9 |
#include <vector> |
#include <vector> |
10 |
#include <map> |
#include <map> |
11 |
#include <cassert> |
#include <cassert> |
12 |
|
#include <stdexcept> |
13 |
|
#include <estnode.h> |
14 |
|
|
15 |
|
/* backward compatibility for 0.5.4 */ |
16 |
|
#ifndef ESTCONDAGITO |
17 |
|
#define ESTCONDAGITO ESTCONDAGIT |
18 |
|
#endif |
19 |
|
|
20 |
namespace estraier { |
namespace estraier { |
21 |
|
|
22 |
|
class IOError : public std::runtime_error { |
23 |
|
public: |
24 |
|
explicit IOError (const std::string& w) : std::runtime_error(w) {} |
25 |
|
}; |
26 |
|
|
27 |
class Condition { |
class Condition { |
28 |
public: |
public: |
29 |
enum { // enumeration for options |
enum { // enumeration for options |
84 |
std::string text_buf; |
std::string text_buf; |
85 |
public: |
public: |
86 |
ESTDOC *doc; |
ESTDOC *doc; |
|
|
|
87 |
Document() { |
Document() { |
88 |
/** |
/** |
89 |
* constructor |
* constructor |
154 |
/** |
/** |
155 |
* get a list of sentences of the text of a document object |
* get a list of sentences of the text of a document object |
156 |
*/ |
*/ |
157 |
// return est_doc_cat_texts(doc); |
return est_doc_cat_texts(doc); |
|
return "This is mockup!"; |
|
158 |
} |
} |
159 |
std::vector<std::string>* texts() { |
std::vector<std::string>* texts() { |
160 |
/** |
/** |
180 |
*/ |
*/ |
181 |
CBLIST * words; |
CBLIST * words; |
182 |
std::vector<std::string>::iterator iter; |
std::vector<std::string>::iterator iter; |
|
|
|
183 |
words = cblistopen(); |
words = cblistopen(); |
|
|
|
184 |
for (iter = _words.begin(); _words.end() != iter; iter++) { |
for (iter = _words.begin(); _words.end() != iter; iter++) { |
185 |
cblistpush(words, iter->c_str(), -1); |
cblistpush(words, iter->c_str(), -1); |
186 |
} |
} |
|
|
|
187 |
const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); |
const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); |
|
|
|
188 |
cblistclose(words); |
cblistclose(words); |
|
|
|
189 |
return result; |
return result; |
190 |
} |
} |
191 |
|
const char * hidden_texts() { |
192 |
|
/** |
193 |
|
* get the hidden texts of a document object. |
194 |
|
*/ |
195 |
|
return est_doc_hidden_texts(doc); |
196 |
|
} |
197 |
}; |
}; |
198 |
|
|
199 |
class Database { |
class Database { |
200 |
private: |
private: |
201 |
ESTMTDB *db; |
ESTMTDB *db; |
202 |
|
int ecode; |
203 |
public: |
public: |
204 |
enum { // enumeration for error codes |
enum { // enumeration for error codes |
205 |
ERRNOERR = ESTENOERR, // no error |
ERRNOERR = ESTENOERR, // no error |
238 |
/** |
/** |
239 |
* constructor(dummy) |
* constructor(dummy) |
240 |
*/ |
*/ |
241 |
|
db = NULL; |
242 |
|
ecode = ERRNOERR; |
243 |
} |
} |
244 |
~Database() { |
~Database() { |
245 |
close(); |
if (db) close(); |
246 |
} |
} |
247 |
bool open(const char * dbname, int mode) { |
bool open(const char * dbname, int mode) { |
248 |
/** |
/** |
249 |
* open the database |
* open the database |
250 |
*/ |
*/ |
251 |
int ecode; |
if (db) close(); |
252 |
db = est_mtdb_open(dbname, mode, &ecode); |
int ec; |
253 |
|
db = est_mtdb_open(dbname, mode, &ec); |
254 |
|
if (!db) ecode = ec; |
255 |
return db; |
return db; |
256 |
} |
} |
257 |
bool close() { |
bool close() { |
258 |
/** |
/** |
259 |
* close the database |
* close the database |
260 |
*/ |
*/ |
261 |
if (db) { |
if (!db) throw IOError("closed database"); |
262 |
int ecode; |
int ec; |
263 |
bool result = est_mtdb_close(db, &ecode); |
bool result = est_mtdb_close(db, &ec); |
264 |
db = NULL; |
if (!result) ecode = ec; |
265 |
return result; |
db = NULL; |
266 |
} else { |
return result; |
|
return false; |
|
|
} |
|
267 |
} |
} |
268 |
bool put_doc(Document *doc, int options) { |
bool put_doc(Document *doc, int options) { |
269 |
/** |
/** |
270 |
* add a document to a database |
* add a document to a database |
271 |
*/ |
*/ |
272 |
return est_mtdb_put_doc(db, doc->doc, options); |
if (!db) throw IOError("closed database"); |
273 |
|
bool result = est_mtdb_put_doc(db, doc->doc, options); |
274 |
|
if (!result) ecode = est_mtdb_error(db); |
275 |
|
return result; |
276 |
} |
} |
277 |
std::vector<int> * search(Condition * cond, int options) { |
std::vector<int> * search(Condition * cond, int options) { |
278 |
/** |
/** |
279 |
* search documents corresponding a condition for a database |
* search documents corresponding a condition for a database |
280 |
*/ |
*/ |
281 |
|
if (!db) throw IOError("closed database"); |
282 |
int resnum; |
int resnum; |
283 |
int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); |
int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); |
284 |
std::vector<int> *numbers = new std::vector<int>; |
std::vector<int> *numbers = new std::vector<int>; |
297 |
/** |
/** |
298 |
* get the last happended error code of a database |
* get the last happended error code of a database |
299 |
*/ |
*/ |
300 |
return est_mtdb_error(db); |
return ecode; |
301 |
} |
} |
302 |
bool fatal() { |
bool fatal() { |
303 |
/** |
/** |
304 |
* check whether a database has a fatal error |
* check whether a database has a fatal error |
305 |
*/ |
*/ |
306 |
|
if (!db) throw IOError("closed database"); |
307 |
return est_mtdb_fatal(db); |
return est_mtdb_fatal(db); |
308 |
} |
} |
309 |
bool flush(int _max) { |
bool flush(int _max) { |
310 |
/** |
/** |
311 |
* flush index words in the cache of a database |
* flush index words in the cache of a database |
312 |
*/ |
*/ |
313 |
return est_mtdb_flush(db, _max); |
if (!db) throw IOError("closed database"); |
314 |
|
bool result = est_mtdb_flush(db, _max); |
315 |
|
if (!result) ecode = est_mtdb_error(db); |
316 |
|
return result; |
317 |
} |
} |
318 |
bool sync() { |
bool sync() { |
319 |
/** |
/** |
320 |
* synchronize updating contents of a database |
* synchronize updating contents of a database |
321 |
*/ |
*/ |
322 |
return est_mtdb_sync(db); |
if (!db) throw IOError("closed database"); |
323 |
|
bool result = est_mtdb_sync(db); |
324 |
|
if (!result) ecode = est_mtdb_error(db); |
325 |
|
return result; |
326 |
} |
} |
327 |
bool optimize(int options) { |
bool optimize(int options) { |
328 |
/** |
/** |
329 |
* optimize a database |
* optimize a database |
330 |
*/ |
*/ |
331 |
return est_mtdb_optimize(db, options); |
if (!db) throw IOError("closed database"); |
332 |
|
bool result = est_mtdb_optimize(db, options); |
333 |
|
if (!result) ecode = est_mtdb_error(db); |
334 |
|
return result; |
335 |
} |
} |
336 |
bool out_doc(int id, int options) { |
bool out_doc(int id, int options) { |
337 |
/** |
/** |
338 |
* remove a document from a database |
* remove a document from a database |
339 |
*/ |
*/ |
340 |
return est_mtdb_out_doc(db, id, options); |
if (!db) throw IOError("closed database"); |
341 |
|
bool result = est_mtdb_out_doc(db, id, options); |
342 |
|
if (!result) ecode = est_mtdb_error(db); |
343 |
|
return result; |
344 |
|
} |
345 |
|
bool edit_doc(Document *doc) { |
346 |
|
/** |
347 |
|
* edit an attribute of a document in a database |
348 |
|
*/ |
349 |
|
if (!db) throw IOError("closed database"); |
350 |
|
bool result = est_mtdb_edit_doc(db, doc->doc); |
351 |
|
if (!result) ecode = est_mtdb_error(db); |
352 |
|
return result; |
353 |
} |
} |
354 |
Document * get_doc(int id, int options) { |
Document * get_doc(int id, int options) { |
355 |
/** |
/** |
356 |
* retrieve a document in a database |
* retrieve a document in a database |
357 |
*/ |
*/ |
358 |
|
if (!db) throw IOError("closed database"); |
359 |
ESTDOC *doc = est_mtdb_get_doc(db, id, options); |
ESTDOC *doc = est_mtdb_get_doc(db, id, options); |
360 |
if (!doc) { |
if (!doc) { |
361 |
|
ecode = est_mtdb_error(db); |
362 |
throw est_err_msg(est_mtdb_error(db)); |
throw est_err_msg(est_mtdb_error(db)); |
363 |
} else { |
} else { |
364 |
return new Document(doc); |
return new Document(doc); |
368 |
/** |
/** |
369 |
* get the ID of a document spacified by URI |
* get the ID of a document spacified by URI |
370 |
*/ |
*/ |
371 |
return est_mtdb_uri_to_id(db, uri); |
if (!db) throw IOError("closed database"); |
372 |
|
int result = est_mtdb_uri_to_id(db, uri); |
373 |
|
if(result == -1) ecode = est_mtdb_error(db); |
374 |
|
return result; |
375 |
} |
} |
376 |
std::map<std::string, std::string> * etch_doc(Document * doc, int max) { |
std::map<std::string, std::string> * etch_doc(Document * doc, int max) { |
377 |
/** |
/** |
378 |
* extract keywords of a document object |
* extract keywords of a document object |
379 |
*/ |
*/ |
380 |
|
if (!db) throw IOError("closed database"); |
381 |
std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; |
std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; |
|
|
|
382 |
CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); |
CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); |
|
|
|
383 |
cbmapiterinit(keys); |
cbmapiterinit(keys); |
384 |
int ksiz; |
int ksiz; |
385 |
while (const char *key = cbmapiternext(keys, &ksiz)) { |
while (const char *key = cbmapiternext(keys, &ksiz)) { |
387 |
} |
} |
388 |
return mss; |
return mss; |
389 |
} |
} |
|
bool iter_init() { |
|
|
/** |
|
|
* initialize the iterator of a database |
|
|
*/ |
|
|
return est_mtdb_iter_init(db); |
|
|
} |
|
|
int iter_next() { |
|
|
/** |
|
|
* get the next ID of the iterator of a database |
|
|
*/ |
|
|
return est_mtdb_iter_next(db); |
|
|
} |
|
390 |
const char * name() { |
const char * name() { |
391 |
/** |
/** |
392 |
* get the name of a database |
* get the name of a database |
393 |
*/ |
*/ |
394 |
|
if (!db) throw IOError("closed database"); |
395 |
return est_mtdb_name(db); |
return est_mtdb_name(db); |
396 |
} |
} |
397 |
int doc_num() { |
int doc_num() { |
398 |
/** |
/** |
399 |
* get the number of documents in a database |
* get the number of documents in a database |
400 |
*/ |
*/ |
401 |
|
if (!db) throw IOError("closed database"); |
402 |
return est_mtdb_doc_num(db); |
return est_mtdb_doc_num(db); |
403 |
} |
} |
404 |
int word_num() { |
int word_num() { |
405 |
/** |
/** |
406 |
* get the number of unique words in a database |
* get the number of unique words in a database |
407 |
*/ |
*/ |
408 |
|
if (!db) throw IOError("closed database"); |
409 |
return est_mtdb_word_num(db); |
return est_mtdb_word_num(db); |
410 |
} |
} |
411 |
double size() { |
double size() { |
412 |
/** |
/** |
413 |
* get the size of a database |
* get the size of a database |
414 |
*/ |
*/ |
415 |
|
if (!db) throw IOError("closed database"); |
416 |
return est_mtdb_size(db); |
return est_mtdb_size(db); |
417 |
} |
} |
418 |
void set_cache_size(size_t size, int anum, int tnum) { |
void set_cache_size(size_t size, int anum, int tnum) { |
419 |
/** |
/** |
420 |
* set the maximum size of the cache memory of a database |
* set the maximum size of the cache memory of a database |
421 |
*/ |
*/ |
422 |
|
if (!db) throw IOError("closed database"); |
423 |
est_mtdb_set_cache_size(db, size, anum, tnum); |
est_mtdb_set_cache_size(db, size, anum, tnum); |
424 |
} |
} |
425 |
void set_special_cache(const char *name, int num) { |
void set_special_cache(const char *name, int num) { |
430 |
est_mtdb_set_special_cache(db, name, num); |
est_mtdb_set_special_cache(db, name, num); |
431 |
} |
} |
432 |
}; |
}; |
433 |
|
|
434 |
|
static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) { |
435 |
|
std::vector<std::string> * vs = new std::vector<std::string>; |
436 |
|
CBLIST *list; |
437 |
|
list = cblistopen(); |
438 |
|
est_break_text(text, list, norm, tail); |
439 |
|
for (int i=0; i < cblistnum(list); i++) { |
440 |
|
vs->push_back(cblistval(list, i, NULL)); |
441 |
|
} |
442 |
|
cblistclose(list); |
443 |
|
return vs; |
444 |
|
} |
445 |
|
|
446 |
|
static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) { |
447 |
|
std::vector<std::string> * vs = new std::vector<std::string>; |
448 |
|
CBLIST *list; |
449 |
|
list = cblistopen(); |
450 |
|
est_break_text_perfng(text, list, norm, tail); |
451 |
|
for (int i=0; i < cblistnum(list); i++) { |
452 |
|
vs->push_back(cblistval(list, i, NULL)); |
453 |
|
} |
454 |
|
cblistclose(list); |
455 |
|
return vs; |
456 |
|
} |
457 |
|
|
458 |
|
class ResultDocument { |
459 |
|
public: |
460 |
|
ESTRESDOC *rdoc; |
461 |
|
ResultDocument(ESTRESDOC *_rdoc) { |
462 |
|
rdoc = _rdoc; |
463 |
|
} |
464 |
|
const char *uri(void) { |
465 |
|
return est_resdoc_uri(rdoc); |
466 |
|
} |
467 |
|
std::vector<std::string> * attr_names() { |
468 |
|
std::vector<std::string> * vs = new std::vector<std::string>; |
469 |
|
CBLIST * attr_names = est_resdoc_attr_names(rdoc); |
470 |
|
for (int i=0; i < cblistnum(attr_names); i++) { |
471 |
|
vs->push_back(cblistval(attr_names, i, NULL)); |
472 |
|
} |
473 |
|
cblistclose(attr_names); |
474 |
|
return vs; |
475 |
|
} |
476 |
|
const char *attr(const char *name) { |
477 |
|
return est_resdoc_attr(rdoc, name); |
478 |
|
} |
479 |
|
const char *snippet(void) { |
480 |
|
return est_resdoc_snippet(rdoc); |
481 |
|
} |
482 |
|
}; |
483 |
|
|
484 |
|
class NodeRes { |
485 |
|
private: |
486 |
|
ESTNODERES *nres; |
487 |
|
public: |
488 |
|
NodeRes(ESTNODE *node, Condition *cond, int depth) { |
489 |
|
nres = est_node_search(node, cond->cond, depth); |
490 |
|
} |
491 |
|
~NodeRes() { |
492 |
|
est_noderes_delete(nres); |
493 |
|
} |
494 |
|
std::map<std::string, std::string> * hints(void) { |
495 |
|
std::map<std::string, std::string> * hints = new std::map<std::string, std::string>; |
496 |
|
CBMAP * keys = est_noderes_hints(nres); |
497 |
|
cbmapiterinit(keys); |
498 |
|
int ksiz; |
499 |
|
while (const char *key = cbmapiternext(keys, &ksiz)) { |
500 |
|
hints->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL))); |
501 |
|
} |
502 |
|
return hints; |
503 |
|
} |
504 |
|
int doc_num(void) { |
505 |
|
return est_noderes_doc_num(nres); |
506 |
|
} |
507 |
|
ResultDocument * get_doc(int index) { |
508 |
|
ESTRESDOC *rdoc = est_noderes_get_doc(nres, index); |
509 |
|
if (rdoc) { |
510 |
|
return new ResultDocument(rdoc); |
511 |
|
} else { |
512 |
|
throw IOError("wtf? no document?"); |
513 |
|
return NULL; |
514 |
|
} |
515 |
|
} |
516 |
|
}; |
517 |
|
|
518 |
|
class Node { |
519 |
|
private: |
520 |
|
ESTNODE *node; |
521 |
|
int netenv_ok; |
522 |
|
public: |
523 |
|
Node(const char *url) { |
524 |
|
netenv_ok = est_init_net_env(); |
525 |
|
if (! netenv_ok) throw IOError("can't init net env"); |
526 |
|
node = est_node_new(url); |
527 |
|
if (! node) throw IOError("can't create node"); |
528 |
|
} |
529 |
|
~Node() { |
530 |
|
est_node_delete(node); |
531 |
|
est_free_net_env(); |
532 |
|
} |
533 |
|
void set_proxy(const char *host, int port) { |
534 |
|
est_node_set_proxy(node, host, port); |
535 |
|
} |
536 |
|
void set_timeout(int sec) { |
537 |
|
est_node_set_timeout(node, sec); |
538 |
|
} |
539 |
|
void set_auth(const char *name, const char *passwd) { |
540 |
|
est_node_set_auth(node, name, passwd); |
541 |
|
} |
542 |
|
int status(void) { |
543 |
|
return est_node_status(node); |
544 |
|
} |
545 |
|
bool put_doc(Document *doc) { |
546 |
|
return est_node_put_doc(node, doc->doc); |
547 |
|
} |
548 |
|
bool out_doc(int id) { |
549 |
|
return est_node_out_doc(node, id); |
550 |
|
} |
551 |
|
bool out_doc_by_uri(const char *uri) { |
552 |
|
return est_node_out_doc_by_uri(node, uri); |
553 |
|
} |
554 |
|
#ifdef est_node_edit_doc |
555 |
|
bool edit_doc(Document *doc) { |
556 |
|
return est_node_edit_doc(node, doc->doc); |
557 |
|
} |
558 |
|
#endif |
559 |
|
Document * get_doc(int id) { |
560 |
|
ESTDOC *doc = est_node_get_doc(node, id); |
561 |
|
if (!doc) { |
562 |
|
return NULL; |
563 |
|
} else { |
564 |
|
return new Document(doc); |
565 |
|
} |
566 |
|
} |
567 |
|
Document * get_doc_by_uri(const char *uri) { |
568 |
|
ESTDOC *doc = est_node_get_doc_by_uri(node, uri); |
569 |
|
if (!doc) { |
570 |
|
return NULL; |
571 |
|
} else { |
572 |
|
return new Document(doc); |
573 |
|
} |
574 |
|
} |
575 |
|
char * get_doc_attr(int id, const char *name) { |
576 |
|
/* is this leeking memory? shouldn't I create |
577 |
|
* object and free memory region returned? |
578 |
|
*/ |
579 |
|
return est_node_get_doc_attr(node, id, name); |
580 |
|
} |
581 |
|
char * get_doc_attr_by_uri(const char *uri, const char *name) { |
582 |
|
return est_node_get_doc_attr_by_uri(node, uri, name); |
583 |
|
} |
584 |
|
int uri_to_id(const char *uri) { |
585 |
|
return est_node_uri_to_id(node, uri); |
586 |
|
} |
587 |
|
const char * name(void) { |
588 |
|
return est_node_name(node); |
589 |
|
} |
590 |
|
const char * label(void) { |
591 |
|
return est_node_label(node); |
592 |
|
} |
593 |
|
int doc_num(void) { |
594 |
|
return est_node_doc_num(node); |
595 |
|
} |
596 |
|
int word_num(void) { |
597 |
|
return est_node_word_num(node); |
598 |
|
} |
599 |
|
double size(void) { |
600 |
|
return est_node_size(node); |
601 |
|
} |
602 |
|
NodeRes * search(Condition *cond, int depth) { |
603 |
|
return new NodeRes(node, cond, depth); |
604 |
|
} |
605 |
|
int set_user(const char *name, int mode) { |
606 |
|
return est_node_set_user(node, name, mode); |
607 |
|
} |
608 |
|
int set_link(const char *url, const char *label, int credit) { |
609 |
|
return est_node_set_link(node, url, label, credit); |
610 |
|
} |
611 |
|
}; |
612 |
|
|
613 |
}; |
}; |