/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 32 - (show annotations)
Tue Oct 11 14:04:20 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 15675 byte(s)
fix for 0.9.2 est_db_set_cache_size

1 /**
2 * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3 */
4 #include <estraier.h>
5 #include <estmtdb.h>
6 #include <cabin.h>
7 #include <cstdlib>
8 #include <string>
9 #include <vector>
10 #include <map>
11 #include <cassert>
12 #include <stdexcept>
13 #include <estnode.h>
14
15 /* backward compatibility for 0.5.4 */
16 /*
17 #ifndef ESTCONDAGITO
18 #define ESTCONDAGITO ESTCONDAGIT
19 #endif
20 */
21
22 namespace estraier {
23
24 class IOError : public std::runtime_error {
25 public:
26 explicit IOError (const std::string& w) : std::runtime_error(w) {}
27 };
28
29 class Condition {
30 public:
31 enum { // enumeration for options
32 SURE = ESTCONDSURE, // check every N-gram key
33 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
34 FAST = ESTCONDFAST, // check N-gram keys skipping by two
35 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
36 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
37 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
38 };
39 ESTCOND * cond;
40 Condition() {
41 /**
42 * constructor
43 */
44 cond = est_cond_new();
45 }
46 ~Condition() {
47 /**
48 * destructor
49 */
50 est_cond_delete(cond);
51 }
52 void set_phrase(const char *phrase) {
53 /**
54 * set the search phrase
55 */
56 est_cond_set_phrase(cond, phrase);
57 }
58 void add_attr(const char *expr) {
59 /**
60 * set the attribute expression
61 */
62 est_cond_add_attr(cond, expr);
63 }
64 void set_order(const char *expr) {
65 /**
66 * set the order of a condition object
67 */
68 est_cond_set_order(cond, expr);
69 }
70 void set_max(int _max) {
71 /**
72 * set the maximum number of retrieval of a condition object
73 */
74 est_cond_set_max(cond, _max);
75 }
76 void set_options(int options) {
77 /**
78 * set options of retrieval of a condition object
79 */
80 est_cond_set_options(cond, options);
81 }
82 };
83
84 class Document {
85 private:
86 std::string text_buf;
87 public:
88 ESTDOC *doc;
89 Document() {
90 /**
91 * constructor
92 */
93 doc = est_doc_new();
94 }
95 Document(const char* draft) {
96 /**
97 * constructor
98 */
99 doc = est_doc_new_from_draft(draft);
100 }
101 Document(ESTDOC *_doc) {
102 /**
103 * constructor
104 */
105 doc = _doc;
106 }
107 ~Document() {
108 /**
109 * destructor
110 */
111 est_doc_delete(doc);
112 }
113 void add_attr(const char * name, const char*value) {
114 /**
115 * add an attribute to a document object
116 */
117 est_doc_add_attr(doc, name, value);
118 }
119 void add_text(const char *text) {
120 /**
121 * add a sentence of text to a document object
122 */
123 est_doc_add_text(doc, text);
124 }
125 void add_hidden_text(const char * text) {
126 /**
127 * add a hidden sentence to a document object
128 */
129 est_doc_add_hidden_text(doc, text);
130 }
131 int id() {
132 /**
133 * get the ID number of a document object
134 */
135 return est_doc_id(doc);
136 }
137 std::vector<std::string> * attr_names() {
138 /**
139 * get a list of attribute names of a document object
140 */
141 std::vector<std::string> * vs = new std::vector<std::string>;
142 CBLIST * attr_names = est_doc_attr_names(doc);
143 for (int i=0; i < cblistnum(attr_names); i++) {
144 vs->push_back(cblistval(attr_names, i, NULL));
145 }
146 cblistclose(attr_names);
147 return vs;
148 }
149 const char * attr(const char *name) {
150 /**
151 * get the value of an attribute of a document object
152 */
153 return est_doc_attr(doc, name);
154 }
155 const char * cat_texts() {
156 /**
157 * get a list of sentences of the text of a document object
158 */
159 return est_doc_cat_texts(doc);
160 }
161 std::vector<std::string>* texts() {
162 /**
163 * get a list of sentences of the text of a document object
164 */
165 std::vector<std::string> * vs = new std::vector<std::string>;
166 const CBLIST *texts;
167 texts = est_doc_texts(doc);
168 for(int i = 0; i < cblistnum(texts); i++) {
169 vs->push_back(cblistval(texts, i, NULL));
170 }
171 return vs;
172 }
173 const char * dump_draft() {
174 /**
175 * dump draft data of a document object
176 */
177 return est_doc_dump_draft(doc);
178 }
179 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
180 /**
181 * make a snippet of the body text of a document object
182 */
183 CBLIST * words;
184 std::vector<std::string>::iterator iter;
185 words = cblistopen();
186 for (iter = _words.begin(); _words.end() != iter; iter++) {
187 cblistpush(words, iter->c_str(), -1);
188 }
189 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
190 cblistclose(words);
191 return result;
192 }
193 const char * hidden_texts() {
194 /**
195 * get the hidden texts of a document object.
196 */
197 return est_doc_hidden_texts(doc);
198 }
199 };
200
201 class Database {
202 private:
203 ESTMTDB *db;
204 int ecode;
205 public:
206 enum { // enumeration for error codes
207 ERRNOERR = ESTENOERR, // no error
208 ERRINVAL = ESTEINVAL, // invalid argument
209 ERRACCES = ESTEACCES, // access forbidden
210 ERRLOCK = ESTELOCK, // lock failure
211 ERRDB = ESTEDB, // database problem
212 ERRIO = ESTEIO, // I/O problem
213 ERRNOITEM = ESTENOITEM, // no item
214 ERRMISC = ESTEMISC // miscellaneous
215 };
216 enum { // enumeration for open modes
217 DBREADER = ESTDBREADER, // open as a reader
218 DBWRITER = ESTDBWRITER, // open as a writer
219 DBCREAT = ESTDBCREAT, // a writer creating
220 DBTRUNC = ESTDBTRUNC, // a writer truncating
221 DBNOLCK = ESTDBNOLCK, // open without locking
222 DBLCKNB = ESTDBLCKNB, // lock without blocking
223 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
224 };
225 enum { // enumeration for options of document registration
226 PDCLEAN = ESTPDCLEAN // clean up dispensable regions
227 };
228 enum { // enumeration for options of document deletion
229 ODCLEAN = ESTODCLEAN // clean up dispensable regions
230 };
231 enum { // enumeration for options of optimization
232 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
233 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
234 };
235 enum { // enumeration for options of document retrieval
236 GDNOATTR = ESTGDNOATTR, // no attributes
237 GDNOTEXT = ESTGDNOTEXT // no text
238 };
239 Database() {
240 /**
241 * constructor(dummy)
242 */
243 db = NULL;
244 ecode = ERRNOERR;
245 }
246 ~Database() {
247 if (db) close();
248 }
249 bool open(const char * dbname, int mode) {
250 /**
251 * open the database
252 */
253 if (db) close();
254 int ec;
255 db = est_mtdb_open(dbname, mode, &ec);
256 if (!db) ecode = ec;
257 return db;
258 }
259 bool close() {
260 /**
261 * close the database
262 */
263 if (!db) throw IOError("closed database");
264 int ec;
265 bool result = est_mtdb_close(db, &ec);
266 if (!result) ecode = ec;
267 db = NULL;
268 return result;
269 }
270 bool put_doc(Document *doc, int options) {
271 /**
272 * add a document to a database
273 */
274 if (!db) throw IOError("closed database");
275 bool result = est_mtdb_put_doc(db, doc->doc, options);
276 if (!result) ecode = est_mtdb_error(db);
277 return result;
278 }
279 std::vector<int> * search(Condition * cond, int options) {
280 /**
281 * search documents corresponding a condition for a database
282 */
283 if (!db) throw IOError("closed database");
284 int resnum;
285 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
286 std::vector<int> *numbers = new std::vector<int>;
287 for (int i=0; i<resnum; i++) {
288 numbers->push_back(result[i]);
289 }
290 return numbers;
291 }
292 static const char * err_msg(int ecode) {
293 /**
294 * get the string of an error
295 */
296 return est_err_msg(ecode);
297 }
298 int error() {
299 /**
300 * get the last happended error code of a database
301 */
302 return ecode;
303 }
304 bool fatal() {
305 /**
306 * check whether a database has a fatal error
307 */
308 if (!db) throw IOError("closed database");
309 return est_mtdb_fatal(db);
310 }
311 bool flush(int _max) {
312 /**
313 * flush index words in the cache of a database
314 */
315 if (!db) throw IOError("closed database");
316 bool result = est_mtdb_flush(db, _max);
317 if (!result) ecode = est_mtdb_error(db);
318 return result;
319 }
320 bool sync() {
321 /**
322 * synchronize updating contents of a database
323 */
324 if (!db) throw IOError("closed database");
325 bool result = est_mtdb_sync(db);
326 if (!result) ecode = est_mtdb_error(db);
327 return result;
328 }
329 bool optimize(int options) {
330 /**
331 * optimize a database
332 */
333 if (!db) throw IOError("closed database");
334 bool result = est_mtdb_optimize(db, options);
335 if (!result) ecode = est_mtdb_error(db);
336 return result;
337 }
338 bool out_doc(int id, int options) {
339 /**
340 * remove a document from a database
341 */
342 if (!db) throw IOError("closed database");
343 bool result = est_mtdb_out_doc(db, id, options);
344 if (!result) ecode = est_mtdb_error(db);
345 return result;
346 }
347 bool edit_doc(Document *doc) {
348 /**
349 * edit an attribute of a document in a database
350 */
351 if (!db) throw IOError("closed database");
352 bool result = est_mtdb_edit_doc(db, doc->doc);
353 if (!result) ecode = est_mtdb_error(db);
354 return result;
355 }
356 Document * get_doc(int id, int options) {
357 /**
358 * retrieve a document in a database
359 */
360 if (!db) throw IOError("closed database");
361 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
362 if (!doc) {
363 ecode = est_mtdb_error(db);
364 throw est_err_msg(est_mtdb_error(db));
365 } else {
366 return new Document(doc);
367 }
368 }
369 int uri_to_id(const char *uri) {
370 /**
371 * get the ID of a document spacified by URI
372 */
373 if (!db) throw IOError("closed database");
374 int result = est_mtdb_uri_to_id(db, uri);
375 if(result == -1) ecode = est_mtdb_error(db);
376 return result;
377 }
378 std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
379 /**
380 * extract keywords of a document object
381 */
382 if (!db) throw IOError("closed database");
383 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
384 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
385 cbmapiterinit(keys);
386 int ksiz;
387 while (const char *key = cbmapiternext(keys, &ksiz)) {
388 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
389 }
390 return mss;
391 }
392 const char * name() {
393 /**
394 * get the name of a database
395 */
396 if (!db) throw IOError("closed database");
397 return est_mtdb_name(db);
398 }
399 int doc_num() {
400 /**
401 * get the number of documents in a database
402 */
403 if (!db) throw IOError("closed database");
404 return est_mtdb_doc_num(db);
405 }
406 int word_num() {
407 /**
408 * get the number of unique words in a database
409 */
410 if (!db) throw IOError("closed database");
411 return est_mtdb_word_num(db);
412 }
413 double size() {
414 /**
415 * get the size of a database
416 */
417 if (!db) throw IOError("closed database");
418 return est_mtdb_size(db);
419 }
420 void set_cache_size(size_t size, int anum, int tnum, int rnum) {
421 /**
422 * set the maximum size of the cache memory of a database
423 */
424 if (!db) throw IOError("closed database");
425 est_mtdb_set_cache_size(db, size, anum, tnum, rnum);
426 }
427 void set_special_cache(const char *name, int num) {
428 /**
429 * Set the special cache for narrowing and sorting
430 * with document attributes
431 */
432 est_mtdb_set_special_cache(db, name, num);
433 }
434 };
435
436 static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
437 std::vector<std::string> * vs = new std::vector<std::string>;
438 CBLIST *list;
439 list = cblistopen();
440 est_break_text(text, list, norm, tail);
441 for (int i=0; i < cblistnum(list); i++) {
442 vs->push_back(cblistval(list, i, NULL));
443 }
444 cblistclose(list);
445 return vs;
446 }
447
448 static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
449 std::vector<std::string> * vs = new std::vector<std::string>;
450 CBLIST *list;
451 list = cblistopen();
452 est_break_text_perfng(text, list, norm, tail);
453 for (int i=0; i < cblistnum(list); i++) {
454 vs->push_back(cblistval(list, i, NULL));
455 }
456 cblistclose(list);
457 return vs;
458 }
459
460 class ResultDocument {
461 public:
462 ESTRESDOC *rdoc;
463 ResultDocument(ESTRESDOC *_rdoc) {
464 rdoc = _rdoc;
465 }
466 const char *uri(void) {
467 return est_resdoc_uri(rdoc);
468 }
469 std::vector<std::string> * attr_names() {
470 std::vector<std::string> * vs = new std::vector<std::string>;
471 CBLIST * attr_names = est_resdoc_attr_names(rdoc);
472 for (int i=0; i < cblistnum(attr_names); i++) {
473 vs->push_back(cblistval(attr_names, i, NULL));
474 }
475 cblistclose(attr_names);
476 return vs;
477 }
478 const char *attr(const char *name) {
479 return est_resdoc_attr(rdoc, name);
480 }
481 const char *snippet(void) {
482 return est_resdoc_snippet(rdoc);
483 }
484 };
485
486 class NodeRes {
487 private:
488 ESTNODERES *nres;
489 public:
490 NodeRes(ESTNODE *node, Condition *cond, int depth) {
491 nres = est_node_search(node, cond->cond, depth);
492 }
493 ~NodeRes() {
494 est_noderes_delete(nres);
495 }
496 std::map<std::string, std::string> * hints(void) {
497 std::map<std::string, std::string> * hints = new std::map<std::string, std::string>;
498 CBMAP * keys = est_noderes_hints(nres);
499 cbmapiterinit(keys);
500 int ksiz;
501 while (const char *key = cbmapiternext(keys, &ksiz)) {
502 hints->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
503 }
504 return hints;
505 }
506 int doc_num(void) {
507 return est_noderes_doc_num(nres);
508 }
509 ResultDocument * get_doc(int index) {
510 ESTRESDOC *rdoc = est_noderes_get_doc(nres, index);
511 if (rdoc) {
512 return new ResultDocument(rdoc);
513 } else {
514 return NULL;
515 }
516 }
517 };
518
519 class Node {
520 private:
521 ESTNODE *node;
522 int netenv_ok;
523 public:
524 Node(const char *url) {
525 netenv_ok = est_init_net_env();
526 if (! netenv_ok) throw IOError("can't init net env");
527 node = est_node_new(url);
528 if (! node) throw IOError("can't create node");
529 }
530 ~Node() {
531 est_node_delete(node);
532 est_free_net_env();
533 }
534 void set_proxy(const char *host, int port) {
535 est_node_set_proxy(node, host, port);
536 }
537 void set_timeout(int sec) {
538 est_node_set_timeout(node, sec);
539 }
540 void set_auth(const char *name, const char *passwd) {
541 est_node_set_auth(node, name, passwd);
542 }
543 int status(void) {
544 return est_node_status(node);
545 }
546 bool put_doc(Document *doc) {
547 return est_node_put_doc(node, doc->doc);
548 }
549 bool out_doc(int id) {
550 return est_node_out_doc(node, id);
551 }
552 bool out_doc_by_uri(const char *uri) {
553 return est_node_out_doc_by_uri(node, uri);
554 }
555 #ifdef est_node_edit_doc
556 bool edit_doc(Document *doc) {
557 return est_node_edit_doc(node, doc->doc);
558 }
559 #endif
560 Document * get_doc(int id) {
561 ESTDOC *doc = est_node_get_doc(node, id);
562 if (!doc) {
563 return NULL;
564 } else {
565 return new Document(doc);
566 }
567 }
568 Document * get_doc_by_uri(const char *uri) {
569 ESTDOC *doc = est_node_get_doc_by_uri(node, uri);
570 if (!doc) {
571 return NULL;
572 } else {
573 return new Document(doc);
574 }
575 }
576 char * get_doc_attr(int id, const char *name) {
577 /* is this leeking memory? shouldn't I create
578 * object and free memory region returned?
579 */
580 return est_node_get_doc_attr(node, id, name);
581 }
582 char * get_doc_attr_by_uri(const char *uri, const char *name) {
583 return est_node_get_doc_attr_by_uri(node, uri, name);
584 }
585 int uri_to_id(const char *uri) {
586 return est_node_uri_to_id(node, uri);
587 }
588 const char * name(void) {
589 return est_node_name(node);
590 }
591 const char * label(void) {
592 return est_node_label(node);
593 }
594 int doc_num(void) {
595 return est_node_doc_num(node);
596 }
597 int word_num(void) {
598 return est_node_word_num(node);
599 }
600 double size(void) {
601 return est_node_size(node);
602 }
603 NodeRes * search(Condition *cond, int depth) {
604 return new NodeRes(node, cond, depth);
605 }
606 int set_user(const char *name, int mode) {
607 return est_node_set_user(node, name, mode);
608 }
609 int set_link(const char *url, const char *label, int credit) {
610 return est_node_set_link(node, url, label, credit);
611 }
612 };
613
614 };

  ViewVC Help
Powered by ViewVC 1.1.26