/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 17 - (show annotations)
Sat Sep 10 08:22:04 2005 UTC (16 years, 2 months ago) by dpavlin
File size: 15653 byte(s)
removed throw if no document is found (return NULL)
1 /**
2 * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3 */
4 #include <estraier.h>
5 #include <estmtdb.h>
6 #include <cabin.h>
7 #include <cstdlib>
8 #include <string>
9 #include <vector>
10 #include <map>
11 #include <cassert>
12 #include <stdexcept>
13 #include <estnode.h>
14
15 /* backward compatibility for 0.5.4 */
16 #ifndef ESTCONDAGITO
17 #define ESTCONDAGITO ESTCONDAGIT
18 #endif
19
20 namespace estraier {
21
22 class IOError : public std::runtime_error {
23 public:
24 explicit IOError (const std::string& w) : std::runtime_error(w) {}
25 };
26
27 class Condition {
28 public:
29 enum { // enumeration for options
30 SURE = ESTCONDSURE, // check every N-gram key
31 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
32 FAST = ESTCONDFAST, // check N-gram keys skipping by two
33 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
34 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
35 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
36 };
37 ESTCOND * cond;
38 Condition() {
39 /**
40 * constructor
41 */
42 cond = est_cond_new();
43 }
44 ~Condition() {
45 /**
46 * destructor
47 */
48 est_cond_delete(cond);
49 }
50 void set_phrase(const char *phrase) {
51 /**
52 * set the search phrase
53 */
54 est_cond_set_phrase(cond, phrase);
55 }
56 void add_attr(const char *expr) {
57 /**
58 * set the attribute expression
59 */
60 est_cond_add_attr(cond, expr);
61 }
62 void set_order(const char *expr) {
63 /**
64 * set the order of a condition object
65 */
66 est_cond_set_order(cond, expr);
67 }
68 void set_max(int _max) {
69 /**
70 * set the maximum number of retrieval of a condition object
71 */
72 est_cond_set_max(cond, _max);
73 }
74 void set_options(int options) {
75 /**
76 * set options of retrieval of a condition object
77 */
78 est_cond_set_options(cond, options);
79 }
80 };
81
82 class Document {
83 private:
84 std::string text_buf;
85 public:
86 ESTDOC *doc;
87 Document() {
88 /**
89 * constructor
90 */
91 doc = est_doc_new();
92 }
93 Document(const char* draft) {
94 /**
95 * constructor
96 */
97 doc = est_doc_new_from_draft(draft);
98 }
99 Document(ESTDOC *_doc) {
100 /**
101 * constructor
102 */
103 doc = _doc;
104 }
105 ~Document() {
106 /**
107 * destructor
108 */
109 est_doc_delete(doc);
110 }
111 void add_attr(const char * name, const char*value) {
112 /**
113 * add an attribute to a document object
114 */
115 est_doc_add_attr(doc, name, value);
116 }
117 void add_text(const char *text) {
118 /**
119 * add a sentence of text to a document object
120 */
121 est_doc_add_text(doc, text);
122 }
123 void add_hidden_text(const char * text) {
124 /**
125 * add a hidden sentence to a document object
126 */
127 est_doc_add_hidden_text(doc, text);
128 }
129 int id() {
130 /**
131 * get the ID number of a document object
132 */
133 return est_doc_id(doc);
134 }
135 std::vector<std::string> * attr_names() {
136 /**
137 * get a list of attribute names of a document object
138 */
139 std::vector<std::string> * vs = new std::vector<std::string>;
140 CBLIST * attr_names = est_doc_attr_names(doc);
141 for (int i=0; i < cblistnum(attr_names); i++) {
142 vs->push_back(cblistval(attr_names, i, NULL));
143 }
144 cblistclose(attr_names);
145 return vs;
146 }
147 const char * attr(const char *name) {
148 /**
149 * get the value of an attribute of a document object
150 */
151 return est_doc_attr(doc, name);
152 }
153 const char * cat_texts() {
154 /**
155 * get a list of sentences of the text of a document object
156 */
157 return est_doc_cat_texts(doc);
158 }
159 std::vector<std::string>* texts() {
160 /**
161 * get a list of sentences of the text of a document object
162 */
163 std::vector<std::string> * vs = new std::vector<std::string>;
164 const CBLIST *texts;
165 texts = est_doc_texts(doc);
166 for(int i = 0; i < cblistnum(texts); i++) {
167 vs->push_back(cblistval(texts, i, NULL));
168 }
169 return vs;
170 }
171 const char * dump_draft() {
172 /**
173 * dump draft data of a document object
174 */
175 return est_doc_dump_draft(doc);
176 }
177 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
178 /**
179 * make a snippet of the body text of a document object
180 */
181 CBLIST * words;
182 std::vector<std::string>::iterator iter;
183 words = cblistopen();
184 for (iter = _words.begin(); _words.end() != iter; iter++) {
185 cblistpush(words, iter->c_str(), -1);
186 }
187 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
188 cblistclose(words);
189 return result;
190 }
191 const char * hidden_texts() {
192 /**
193 * get the hidden texts of a document object.
194 */
195 return est_doc_hidden_texts(doc);
196 }
197 };
198
199 class Database {
200 private:
201 ESTMTDB *db;
202 int ecode;
203 public:
204 enum { // enumeration for error codes
205 ERRNOERR = ESTENOERR, // no error
206 ERRINVAL = ESTEINVAL, // invalid argument
207 ERRACCES = ESTEACCES, // access forbidden
208 ERRLOCK = ESTELOCK, // lock failure
209 ERRDB = ESTEDB, // database problem
210 ERRIO = ESTEIO, // I/O problem
211 ERRNOITEM = ESTENOITEM, // no item
212 ERRMISC = ESTEMISC // miscellaneous
213 };
214 enum { // enumeration for open modes
215 DBREADER = ESTDBREADER, // open as a reader
216 DBWRITER = ESTDBWRITER, // open as a writer
217 DBCREAT = ESTDBCREAT, // a writer creating
218 DBTRUNC = ESTDBTRUNC, // a writer truncating
219 DBNOLCK = ESTDBNOLCK, // open without locking
220 DBLCKNB = ESTDBLCKNB, // lock without blocking
221 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
222 };
223 enum { // enumeration for options of document registration
224 PDCLEAN = ESTPDCLEAN // clean up dispensable regions
225 };
226 enum { // enumeration for options of document deletion
227 ODCLEAN = ESTODCLEAN // clean up dispensable regions
228 };
229 enum { // enumeration for options of optimization
230 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
231 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
232 };
233 enum { // enumeration for options of document retrieval
234 GDNOATTR = ESTGDNOATTR, // no attributes
235 GDNOTEXT = ESTGDNOTEXT // no text
236 };
237 Database() {
238 /**
239 * constructor(dummy)
240 */
241 db = NULL;
242 ecode = ERRNOERR;
243 }
244 ~Database() {
245 if (db) close();
246 }
247 bool open(const char * dbname, int mode) {
248 /**
249 * open the database
250 */
251 if (db) close();
252 int ec;
253 db = est_mtdb_open(dbname, mode, &ec);
254 if (!db) ecode = ec;
255 return db;
256 }
257 bool close() {
258 /**
259 * close the database
260 */
261 if (!db) throw IOError("closed database");
262 int ec;
263 bool result = est_mtdb_close(db, &ec);
264 if (!result) ecode = ec;
265 db = NULL;
266 return result;
267 }
268 bool put_doc(Document *doc, int options) {
269 /**
270 * add a document to a database
271 */
272 if (!db) throw IOError("closed database");
273 bool result = est_mtdb_put_doc(db, doc->doc, options);
274 if (!result) ecode = est_mtdb_error(db);
275 return result;
276 }
277 std::vector<int> * search(Condition * cond, int options) {
278 /**
279 * search documents corresponding a condition for a database
280 */
281 if (!db) throw IOError("closed database");
282 int resnum;
283 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
284 std::vector<int> *numbers = new std::vector<int>;
285 for (int i=0; i<resnum; i++) {
286 numbers->push_back(result[i]);
287 }
288 return numbers;
289 }
290 static const char * err_msg(int ecode) {
291 /**
292 * get the string of an error
293 */
294 return est_err_msg(ecode);
295 }
296 int error() {
297 /**
298 * get the last happended error code of a database
299 */
300 return ecode;
301 }
302 bool fatal() {
303 /**
304 * check whether a database has a fatal error
305 */
306 if (!db) throw IOError("closed database");
307 return est_mtdb_fatal(db);
308 }
309 bool flush(int _max) {
310 /**
311 * flush index words in the cache of a database
312 */
313 if (!db) throw IOError("closed database");
314 bool result = est_mtdb_flush(db, _max);
315 if (!result) ecode = est_mtdb_error(db);
316 return result;
317 }
318 bool sync() {
319 /**
320 * synchronize updating contents of a database
321 */
322 if (!db) throw IOError("closed database");
323 bool result = est_mtdb_sync(db);
324 if (!result) ecode = est_mtdb_error(db);
325 return result;
326 }
327 bool optimize(int options) {
328 /**
329 * optimize a database
330 */
331 if (!db) throw IOError("closed database");
332 bool result = est_mtdb_optimize(db, options);
333 if (!result) ecode = est_mtdb_error(db);
334 return result;
335 }
336 bool out_doc(int id, int options) {
337 /**
338 * remove a document from a database
339 */
340 if (!db) throw IOError("closed database");
341 bool result = est_mtdb_out_doc(db, id, options);
342 if (!result) ecode = est_mtdb_error(db);
343 return result;
344 }
345 bool edit_doc(Document *doc) {
346 /**
347 * edit an attribute of a document in a database
348 */
349 if (!db) throw IOError("closed database");
350 bool result = est_mtdb_edit_doc(db, doc->doc);
351 if (!result) ecode = est_mtdb_error(db);
352 return result;
353 }
354 Document * get_doc(int id, int options) {
355 /**
356 * retrieve a document in a database
357 */
358 if (!db) throw IOError("closed database");
359 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
360 if (!doc) {
361 ecode = est_mtdb_error(db);
362 throw est_err_msg(est_mtdb_error(db));
363 } else {
364 return new Document(doc);
365 }
366 }
367 int uri_to_id(const char *uri) {
368 /**
369 * get the ID of a document spacified by URI
370 */
371 if (!db) throw IOError("closed database");
372 int result = est_mtdb_uri_to_id(db, uri);
373 if(result == -1) ecode = est_mtdb_error(db);
374 return result;
375 }
376 std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
377 /**
378 * extract keywords of a document object
379 */
380 if (!db) throw IOError("closed database");
381 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
382 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
383 cbmapiterinit(keys);
384 int ksiz;
385 while (const char *key = cbmapiternext(keys, &ksiz)) {
386 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
387 }
388 return mss;
389 }
390 const char * name() {
391 /**
392 * get the name of a database
393 */
394 if (!db) throw IOError("closed database");
395 return est_mtdb_name(db);
396 }
397 int doc_num() {
398 /**
399 * get the number of documents in a database
400 */
401 if (!db) throw IOError("closed database");
402 return est_mtdb_doc_num(db);
403 }
404 int word_num() {
405 /**
406 * get the number of unique words in a database
407 */
408 if (!db) throw IOError("closed database");
409 return est_mtdb_word_num(db);
410 }
411 double size() {
412 /**
413 * get the size of a database
414 */
415 if (!db) throw IOError("closed database");
416 return est_mtdb_size(db);
417 }
418 void set_cache_size(size_t size, int anum, int tnum) {
419 /**
420 * set the maximum size of the cache memory of a database
421 */
422 if (!db) throw IOError("closed database");
423 est_mtdb_set_cache_size(db, size, anum, tnum);
424 }
425 void set_special_cache(const char *name, int num) {
426 /**
427 * Set the special cache for narrowing and sorting
428 * with document attributes
429 */
430 est_mtdb_set_special_cache(db, name, num);
431 }
432 };
433
434 static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
435 std::vector<std::string> * vs = new std::vector<std::string>;
436 CBLIST *list;
437 list = cblistopen();
438 est_break_text(text, list, norm, tail);
439 for (int i=0; i < cblistnum(list); i++) {
440 vs->push_back(cblistval(list, i, NULL));
441 }
442 cblistclose(list);
443 return vs;
444 }
445
446 static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
447 std::vector<std::string> * vs = new std::vector<std::string>;
448 CBLIST *list;
449 list = cblistopen();
450 est_break_text_perfng(text, list, norm, tail);
451 for (int i=0; i < cblistnum(list); i++) {
452 vs->push_back(cblistval(list, i, NULL));
453 }
454 cblistclose(list);
455 return vs;
456 }
457
458 class ResultDocument {
459 public:
460 ESTRESDOC *rdoc;
461 ResultDocument(ESTRESDOC *_rdoc) {
462 rdoc = _rdoc;
463 }
464 const char *uri(void) {
465 return est_resdoc_uri(rdoc);
466 }
467 std::vector<std::string> * attr_names() {
468 std::vector<std::string> * vs = new std::vector<std::string>;
469 CBLIST * attr_names = est_resdoc_attr_names(rdoc);
470 for (int i=0; i < cblistnum(attr_names); i++) {
471 vs->push_back(cblistval(attr_names, i, NULL));
472 }
473 cblistclose(attr_names);
474 return vs;
475 }
476 const char *attr(const char *name) {
477 return est_resdoc_attr(rdoc, name);
478 }
479 const char *snippet(void) {
480 return est_resdoc_snippet(rdoc);
481 }
482 };
483
484 class NodeRes {
485 private:
486 ESTNODERES *nres;
487 public:
488 NodeRes(ESTNODE *node, Condition *cond, int depth) {
489 nres = est_node_search(node, cond->cond, depth);
490 }
491 ~NodeRes() {
492 est_noderes_delete(nres);
493 }
494 std::map<std::string, std::string> * hints(void) {
495 std::map<std::string, std::string> * hints = new std::map<std::string, std::string>;
496 CBMAP * keys = est_noderes_hints(nres);
497 cbmapiterinit(keys);
498 int ksiz;
499 while (const char *key = cbmapiternext(keys, &ksiz)) {
500 hints->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
501 }
502 return hints;
503 }
504 int doc_num(void) {
505 return est_noderes_doc_num(nres);
506 }
507 ResultDocument * get_doc(int index) {
508 ESTRESDOC *rdoc = est_noderes_get_doc(nres, index);
509 if (rdoc) {
510 return new ResultDocument(rdoc);
511 } else {
512 return NULL;
513 }
514 }
515 };
516
517 class Node {
518 private:
519 ESTNODE *node;
520 int netenv_ok;
521 public:
522 Node(const char *url) {
523 netenv_ok = est_init_net_env();
524 if (! netenv_ok) throw IOError("can't init net env");
525 node = est_node_new(url);
526 if (! node) throw IOError("can't create node");
527 }
528 ~Node() {
529 est_node_delete(node);
530 est_free_net_env();
531 }
532 void set_proxy(const char *host, int port) {
533 est_node_set_proxy(node, host, port);
534 }
535 void set_timeout(int sec) {
536 est_node_set_timeout(node, sec);
537 }
538 void set_auth(const char *name, const char *passwd) {
539 est_node_set_auth(node, name, passwd);
540 }
541 int status(void) {
542 return est_node_status(node);
543 }
544 bool put_doc(Document *doc) {
545 return est_node_put_doc(node, doc->doc);
546 }
547 bool out_doc(int id) {
548 return est_node_out_doc(node, id);
549 }
550 bool out_doc_by_uri(const char *uri) {
551 return est_node_out_doc_by_uri(node, uri);
552 }
553 #ifdef est_node_edit_doc
554 bool edit_doc(Document *doc) {
555 return est_node_edit_doc(node, doc->doc);
556 }
557 #endif
558 Document * get_doc(int id) {
559 ESTDOC *doc = est_node_get_doc(node, id);
560 if (!doc) {
561 return NULL;
562 } else {
563 return new Document(doc);
564 }
565 }
566 Document * get_doc_by_uri(const char *uri) {
567 ESTDOC *doc = est_node_get_doc_by_uri(node, uri);
568 if (!doc) {
569 return NULL;
570 } else {
571 return new Document(doc);
572 }
573 }
574 char * get_doc_attr(int id, const char *name) {
575 /* is this leeking memory? shouldn't I create
576 * object and free memory region returned?
577 */
578 return est_node_get_doc_attr(node, id, name);
579 }
580 char * get_doc_attr_by_uri(const char *uri, const char *name) {
581 return est_node_get_doc_attr_by_uri(node, uri, name);
582 }
583 int uri_to_id(const char *uri) {
584 return est_node_uri_to_id(node, uri);
585 }
586 const char * name(void) {
587 return est_node_name(node);
588 }
589 const char * label(void) {
590 return est_node_label(node);
591 }
592 int doc_num(void) {
593 return est_node_doc_num(node);
594 }
595 int word_num(void) {
596 return est_node_word_num(node);
597 }
598 double size(void) {
599 return est_node_size(node);
600 }
601 NodeRes * search(Condition *cond, int depth) {
602 return new NodeRes(node, cond, depth);
603 }
604 int set_user(const char *name, int mode) {
605 return est_node_set_user(node, name, mode);
606 }
607 int set_link(const char *url, const char *label, int credit) {
608 return est_node_set_link(node, url, label, credit);
609 }
610 };
611
612 };

  ViewVC Help
Powered by ViewVC 1.1.26