/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6 - (show annotations)
Sat Sep 3 20:00:11 2005 UTC (17 years, 2 months ago) by dpavlin
File size: 11751 byte(s)
updated to latest version (from parent directory ;-)
1 /**
2 * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3 */
4 #include <estraier.h>
5 #include <estmtdb.h>
6 #include <cabin.h>
7 #include <cstdlib>
8 #include <string>
9 #include <vector>
10 #include <map>
11 #include <cassert>
12 #include <stdexcept>
13
14 /* backward compatibility for 0.5.4 */
15 #ifndef ESTCONDAGITO
16 #define ESTCONDAGITO ESTCONDAGIT
17 #endif
18
19 namespace estraier {
20
21 class IOError : public std::runtime_error {
22 public:
23 explicit IOError (const std::string& w) : std::runtime_error(w) {}
24 };
25
26 class Condition {
27 public:
28 enum { // enumeration for options
29 SURE = ESTCONDSURE, // check every N-gram key
30 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
31 FAST = ESTCONDFAST, // check N-gram keys skipping by two
32 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
33 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
34 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
35 };
36 ESTCOND * cond;
37 Condition() {
38 /**
39 * constructor
40 */
41 cond = est_cond_new();
42 }
43 ~Condition() {
44 /**
45 * destructor
46 */
47 est_cond_delete(cond);
48 }
49 void set_phrase(const char *phrase) {
50 /**
51 * set the search phrase
52 */
53 est_cond_set_phrase(cond, phrase);
54 }
55 void add_attr(const char *expr) {
56 /**
57 * set the attribute expression
58 */
59 est_cond_add_attr(cond, expr);
60 }
61 void set_order(const char *expr) {
62 /**
63 * set the order of a condition object
64 */
65 est_cond_set_order(cond, expr);
66 }
67 void set_max(int _max) {
68 /**
69 * set the maximum number of retrieval of a condition object
70 */
71 est_cond_set_max(cond, _max);
72 }
73 void set_options(int options) {
74 /**
75 * set options of retrieval of a condition object
76 */
77 est_cond_set_options(cond, options);
78 }
79 };
80
81 class Document {
82 private:
83 std::string text_buf;
84 public:
85 ESTDOC *doc;
86 Document() {
87 /**
88 * constructor
89 */
90 doc = est_doc_new();
91 }
92 Document(const char* draft) {
93 /**
94 * constructor
95 */
96 doc = est_doc_new_from_draft(draft);
97 }
98 Document(ESTDOC *_doc) {
99 /**
100 * constructor
101 */
102 doc = _doc;
103 }
104 ~Document() {
105 /**
106 * destructor
107 */
108 est_doc_delete(doc);
109 }
110 void add_attr(const char * name, const char*value) {
111 /**
112 * add an attribute to a document object
113 */
114 est_doc_add_attr(doc, name, value);
115 }
116 void add_text(const char *text) {
117 /**
118 * add a sentence of text to a document object
119 */
120 est_doc_add_text(doc, text);
121 }
122 void add_hidden_text(const char * text) {
123 /**
124 * add a hidden sentence to a document object
125 */
126 est_doc_add_hidden_text(doc, text);
127 }
128 int id() {
129 /**
130 * get the ID number of a document object
131 */
132 return est_doc_id(doc);
133 }
134 std::vector<std::string> * attr_names() {
135 /**
136 * get a list of attribute names of a document object
137 */
138 std::vector<std::string> * vs = new std::vector<std::string>;
139 CBLIST * attr_names = est_doc_attr_names(doc);
140 for (int i=0; i < cblistnum(attr_names); i++) {
141 vs->push_back(cblistval(attr_names, i, NULL));
142 }
143 cblistclose(attr_names);
144 return vs;
145 }
146 const char * attr(const char *name) {
147 /**
148 * get the value of an attribute of a document object
149 */
150 return est_doc_attr(doc, name);
151 }
152 const char * cat_texts() {
153 /**
154 * get a list of sentences of the text of a document object
155 */
156 return est_doc_cat_texts(doc);
157 }
158 std::vector<std::string>* texts() {
159 /**
160 * get a list of sentences of the text of a document object
161 */
162 std::vector<std::string> * vs = new std::vector<std::string>;
163 const CBLIST *texts;
164 texts = est_doc_texts(doc);
165 for(int i = 0; i < cblistnum(texts); i++) {
166 vs->push_back(cblistval(texts, i, NULL));
167 }
168 return vs;
169 }
170 const char * dump_draft() {
171 /**
172 * dump draft data of a document object
173 */
174 return est_doc_dump_draft(doc);
175 }
176 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
177 /**
178 * make a snippet of the body text of a document object
179 */
180 CBLIST * words;
181 std::vector<std::string>::iterator iter;
182 words = cblistopen();
183 for (iter = _words.begin(); _words.end() != iter; iter++) {
184 cblistpush(words, iter->c_str(), -1);
185 }
186 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
187 cblistclose(words);
188 return result;
189 }
190 const char * hidden_texts() {
191 /**
192 * get the hidden texts of a document object.
193 */
194 return est_doc_hidden_texts(doc);
195 }
196 };
197
198 class Database {
199 private:
200 ESTMTDB *db;
201 int ecode;
202 public:
203 enum { // enumeration for error codes
204 ERRNOERR = ESTENOERR, // no error
205 ERRINVAL = ESTEINVAL, // invalid argument
206 ERRACCES = ESTEACCES, // access forbidden
207 ERRLOCK = ESTELOCK, // lock failure
208 ERRDB = ESTEDB, // database problem
209 ERRIO = ESTEIO, // I/O problem
210 ERRNOITEM = ESTENOITEM, // no item
211 ERRMISC = ESTEMISC // miscellaneous
212 };
213 enum { // enumeration for open modes
214 DBREADER = ESTDBREADER, // open as a reader
215 DBWRITER = ESTDBWRITER, // open as a writer
216 DBCREAT = ESTDBCREAT, // a writer creating
217 DBTRUNC = ESTDBTRUNC, // a writer truncating
218 DBNOLCK = ESTDBNOLCK, // open without locking
219 DBLCKNB = ESTDBLCKNB, // lock without blocking
220 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
221 };
222 enum { // enumeration for options of document registration
223 PDCLEAN = ESTPDCLEAN // clean up dispensable regions
224 };
225 enum { // enumeration for options of document deletion
226 ODCLEAN = ESTODCLEAN // clean up dispensable regions
227 };
228 enum { // enumeration for options of optimization
229 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
230 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
231 };
232 enum { // enumeration for options of document retrieval
233 GDNOATTR = ESTGDNOATTR, // no attributes
234 GDNOTEXT = ESTGDNOTEXT // no text
235 };
236 Database() {
237 /**
238 * constructor(dummy)
239 */
240 db = NULL;
241 ecode = ERRNOERR;
242 }
243 ~Database() {
244 if (db) close();
245 }
246 bool open(const char * dbname, int mode) {
247 /**
248 * open the database
249 */
250 if (db) close();
251 int ec;
252 db = est_mtdb_open(dbname, mode, &ec);
253 if (!db) ecode = ec;
254 return db;
255 }
256 bool close() {
257 /**
258 * close the database
259 */
260 if (!db) throw IOError("closed database");
261 int ec;
262 bool result = est_mtdb_close(db, &ec);
263 if (!result) ecode = ec;
264 db = NULL;
265 return result;
266 }
267 bool put_doc(Document *doc, int options) {
268 /**
269 * add a document to a database
270 */
271 if (!db) throw IOError("closed database");
272 bool result = est_mtdb_put_doc(db, doc->doc, options);
273 if (!result) ecode = est_mtdb_error(db);
274 return result;
275 }
276 std::vector<int> * search(Condition * cond, int options) {
277 /**
278 * search documents corresponding a condition for a database
279 */
280 if (!db) throw IOError("closed database");
281 int resnum;
282 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
283 std::vector<int> *numbers = new std::vector<int>;
284 for (int i=0; i<resnum; i++) {
285 numbers->push_back(result[i]);
286 }
287 return numbers;
288 }
289 static const char * err_msg(int ecode) {
290 /**
291 * get the string of an error
292 */
293 return est_err_msg(ecode);
294 }
295 int error() {
296 /**
297 * get the last happended error code of a database
298 */
299 return ecode;
300 }
301 bool fatal() {
302 /**
303 * check whether a database has a fatal error
304 */
305 if (!db) throw IOError("closed database");
306 return est_mtdb_fatal(db);
307 }
308 bool flush(int _max) {
309 /**
310 * flush index words in the cache of a database
311 */
312 if (!db) throw IOError("closed database");
313 bool result = est_mtdb_flush(db, _max);
314 if (!result) ecode = est_mtdb_error(db);
315 return result;
316 }
317 bool sync() {
318 /**
319 * synchronize updating contents of a database
320 */
321 if (!db) throw IOError("closed database");
322 bool result = est_mtdb_sync(db);
323 if (!result) ecode = est_mtdb_error(db);
324 return result;
325 }
326 bool optimize(int options) {
327 /**
328 * optimize a database
329 */
330 if (!db) throw IOError("closed database");
331 bool result = est_mtdb_optimize(db, options);
332 if (!result) ecode = est_mtdb_error(db);
333 return result;
334 }
335 bool out_doc(int id, int options) {
336 /**
337 * remove a document from a database
338 */
339 if (!db) throw IOError("closed database");
340 bool result = est_mtdb_out_doc(db, id, options);
341 if (!result) ecode = est_mtdb_error(db);
342 return result;
343 }
344 bool edit_doc(Document *doc) {
345 /**
346 * edit an attribute of a document in a database
347 */
348 if (!db) throw IOError("closed database");
349 bool result = est_mtdb_edit_doc(db, doc->doc);
350 if (!result) ecode = est_mtdb_error(db);
351 return result;
352 }
353 Document * get_doc(int id, int options) {
354 /**
355 * retrieve a document in a database
356 */
357 if (!db) throw IOError("closed database");
358 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
359 if (!doc) {
360 ecode = est_mtdb_error(db);
361 throw est_err_msg(est_mtdb_error(db));
362 } else {
363 return new Document(doc);
364 }
365 }
366 int uri_to_id(const char *uri) {
367 /**
368 * get the ID of a document spacified by URI
369 */
370 if (!db) throw IOError("closed database");
371 int result = est_mtdb_uri_to_id(db, uri);
372 if(result == -1) ecode = est_mtdb_error(db);
373 return result;
374 }
375 std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
376 /**
377 * extract keywords of a document object
378 */
379 if (!db) throw IOError("closed database");
380 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
381 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
382 cbmapiterinit(keys);
383 int ksiz;
384 while (const char *key = cbmapiternext(keys, &ksiz)) {
385 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
386 }
387 return mss;
388 }
389 const char * name() {
390 /**
391 * get the name of a database
392 */
393 if (!db) throw IOError("closed database");
394 return est_mtdb_name(db);
395 }
396 int doc_num() {
397 /**
398 * get the number of documents in a database
399 */
400 if (!db) throw IOError("closed database");
401 return est_mtdb_doc_num(db);
402 }
403 int word_num() {
404 /**
405 * get the number of unique words in a database
406 */
407 if (!db) throw IOError("closed database");
408 return est_mtdb_word_num(db);
409 }
410 double size() {
411 /**
412 * get the size of a database
413 */
414 if (!db) throw IOError("closed database");
415 return est_mtdb_size(db);
416 }
417 void set_cache_size(size_t size, int anum, int tnum) {
418 /**
419 * set the maximum size of the cache memory of a database
420 */
421 if (!db) throw IOError("closed database");
422 est_mtdb_set_cache_size(db, size, anum, tnum);
423 }
424 void set_special_cache(const char *name, int num) {
425 /**
426 * Set the special cache for narrowing and sorting
427 * with document attributes
428 */
429 est_mtdb_set_special_cache(db, name, num);
430 }
431 };
432
433 static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
434 std::vector<std::string> * vs = new std::vector<std::string>;
435 CBLIST *list;
436 list = cblistopen();
437 est_break_text(text, list, norm, tail);
438 for (int i=0; i < cblistnum(list); i++) {
439 vs->push_back(cblistval(list, i, NULL));
440 }
441 cblistclose(list);
442 return vs;
443 }
444
445 static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
446 std::vector<std::string> * vs = new std::vector<std::string>;
447 CBLIST *list;
448 list = cblistopen();
449 est_break_text_perfng(text, list, norm, tail);
450 for (int i=0; i < cblistnum(list); i++) {
451 vs->push_back(cblistval(list, i, NULL));
452 }
453 cblistclose(list);
454 return vs;
455 }
456
457 };

  ViewVC Help
Powered by ViewVC 1.1.26