/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations)
Sat Sep 3 18:04:41 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 9407 byte(s)
make working copy
1 /**
2 * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3 */
4 #include <estraier.h>
5 #include <estmtdb.h>
6 #include <cabin.h>
7 #include <cstdlib>
8 #include <string>
9 #include <vector>
10 #include <map>
11 #include <cassert>
12
13 namespace estraier {
14 class Condition {
15 public:
16 enum { // enumeration for options
17 SURE = ESTCONDSURE, // check every N-gram key
18 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
19 FAST = ESTCONDFAST, // check N-gram keys skipping by two
20 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
21 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
22 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
23 };
24 ESTCOND * cond;
25 Condition() {
26 /**
27 * constructor
28 */
29 cond = est_cond_new();
30 }
31 ~Condition() {
32 /**
33 * destructor
34 */
35 est_cond_delete(cond);
36 }
37 void set_phrase(const char *phrase) {
38 /**
39 * set the search phrase
40 */
41 est_cond_set_phrase(cond, phrase);
42 }
43 void add_attr(const char *expr) {
44 /**
45 * set the attribute expression
46 */
47 est_cond_add_attr(cond, expr);
48 }
49 void set_order(const char *expr) {
50 /**
51 * set the order of a condition object
52 */
53 est_cond_set_order(cond, expr);
54 }
55 void set_max(int _max) {
56 /**
57 * set the maximum number of retrieval of a condition object
58 */
59 est_cond_set_max(cond, _max);
60 }
61 void set_options(int options) {
62 /**
63 * set options of retrieval of a condition object
64 */
65 est_cond_set_options(cond, options);
66 }
67 };
68
69 class Document {
70 private:
71 std::string text_buf;
72 public:
73 ESTDOC *doc;
74
75 Document() {
76 /**
77 * constructor
78 */
79 doc = est_doc_new();
80 }
81 Document(const char* draft) {
82 /**
83 * constructor
84 */
85 doc = est_doc_new_from_draft(draft);
86 }
87 Document(ESTDOC *_doc) {
88 /**
89 * constructor
90 */
91 doc = _doc;
92 }
93 ~Document() {
94 /**
95 * destructor
96 */
97 est_doc_delete(doc);
98 }
99 void add_attr(const char * name, const char*value) {
100 /**
101 * add an attribute to a document object
102 */
103 est_doc_add_attr(doc, name, value);
104 }
105 void add_text(const char *text) {
106 /**
107 * add a sentence of text to a document object
108 */
109 est_doc_add_text(doc, text);
110 }
111 void add_hidden_text(const char * text) {
112 /**
113 * add a hidden sentence to a document object
114 */
115 est_doc_add_hidden_text(doc, text);
116 }
117 int id() {
118 /**
119 * get the ID number of a document object
120 */
121 return est_doc_id(doc);
122 }
123 std::vector<std::string> * attr_names() {
124 /**
125 * get a list of attribute names of a document object
126 */
127 std::vector<std::string> * vs = new std::vector<std::string>;
128 CBLIST * attr_names = est_doc_attr_names(doc);
129 for (int i=0; i < cblistnum(attr_names); i++) {
130 vs->push_back(cblistval(attr_names, i, NULL));
131 }
132 cblistclose(attr_names);
133 return vs;
134 }
135 const char * attr(const char *name) {
136 /**
137 * get the value of an attribute of a document object
138 */
139 return est_doc_attr(doc, name);
140 }
141 const char * cat_texts() {
142 /**
143 * get a list of sentences of the text of a document object
144 */
145 // return est_doc_cat_texts(doc);
146 return "This is mockup!";
147 }
148 std::vector<std::string>* texts() {
149 /**
150 * get a list of sentences of the text of a document object
151 */
152 std::vector<std::string> * vs = new std::vector<std::string>;
153 const CBLIST *texts;
154 texts = est_doc_texts(doc);
155 for(int i = 0; i < cblistnum(texts); i++) {
156 vs->push_back(cblistval(texts, i, NULL));
157 }
158 return vs;
159 }
160 const char * dump_draft() {
161 /**
162 * dump draft data of a document object
163 */
164 return est_doc_dump_draft(doc);
165 }
166 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
167 /**
168 * make a snippet of the body text of a document object
169 */
170 CBLIST * words;
171 std::vector<std::string>::iterator iter;
172
173 words = cblistopen();
174
175 for (iter = _words.begin(); _words.end() != iter; iter++) {
176 cblistpush(words, iter->c_str(), -1);
177 }
178
179 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
180
181 cblistclose(words);
182
183 return result;
184 }
185 };
186
187 class Database {
188 private:
189 ESTMTDB *db;
190 public:
191 enum { // enumeration for error codes
192 ERRNOERR = ESTENOERR, // no error
193 ERRINVAL = ESTEINVAL, // invalid argument
194 ERRACCES = ESTEACCES, // access forbidden
195 ERRLOCK = ESTELOCK, // lock failure
196 ERRDB = ESTEDB, // database problem
197 ERRIO = ESTEIO, // I/O problem
198 ERRNOITEM = ESTENOITEM, // no item
199 ERRMISC = ESTEMISC // miscellaneous
200 };
201 enum { // enumeration for open modes
202 DBREADER = ESTDBREADER, // open as a reader
203 DBWRITER = ESTDBWRITER, // open as a writer
204 DBCREAT = ESTDBCREAT, // a writer creating
205 DBTRUNC = ESTDBTRUNC, // a writer truncating
206 DBNOLCK = ESTDBNOLCK, // open without locking
207 DBLCKNB = ESTDBLCKNB, // lock without blocking
208 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
209 };
210 enum { // enumeration for options of document registration
211 PDCLEAN = ESTPDCLEAN // clean up dispensable regions
212 };
213 enum { // enumeration for options of document deletion
214 ODCLEAN = ESTODCLEAN // clean up dispensable regions
215 };
216 enum { // enumeration for options of optimization
217 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
218 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
219 };
220 enum { // enumeration for options of document retrieval
221 GDNOATTR = ESTGDNOATTR, // no attributes
222 GDNOTEXT = ESTGDNOTEXT // no text
223 };
224 Database() {
225 /**
226 * constructor(dummy)
227 */
228 }
229 ~Database() {
230 close();
231 }
232 bool open(const char * dbname, int mode) {
233 /**
234 * open the database
235 */
236 int ecode;
237 db = est_mtdb_open(dbname, mode, &ecode);
238 return db;
239 }
240 bool close() {
241 /**
242 * close the database
243 */
244 if (db) {
245 int ecode;
246 bool result = est_mtdb_close(db, &ecode);
247 db = NULL;
248 return result;
249 } else {
250 return false;
251 }
252 }
253 bool put_doc(Document *doc, int options) {
254 /**
255 * add a document to a database
256 */
257 return est_mtdb_put_doc(db, doc->doc, options);
258 }
259 std::vector<int> * search(Condition * cond, int options) {
260 /**
261 * search documents corresponding a condition for a database
262 */
263 int resnum;
264 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
265 std::vector<int> *numbers = new std::vector<int>;
266 for (int i=0; i<resnum; i++) {
267 numbers->push_back(result[i]);
268 }
269 return numbers;
270 }
271 static const char * err_msg(int ecode) {
272 /**
273 * get the string of an error
274 */
275 return est_err_msg(ecode);
276 }
277 int error() {
278 /**
279 * get the last happended error code of a database
280 */
281 return est_mtdb_error(db);
282 }
283 bool fatal() {
284 /**
285 * check whether a database has a fatal error
286 */
287 return est_mtdb_fatal(db);
288 }
289 bool flush(int _max) {
290 /**
291 * flush index words in the cache of a database
292 */
293 return est_mtdb_flush(db, _max);
294 }
295 bool sync() {
296 /**
297 * synchronize updating contents of a database
298 */
299 return est_mtdb_sync(db);
300 }
301 bool optimize(int options) {
302 /**
303 * optimize a database
304 */
305 return est_mtdb_optimize(db, options);
306 }
307 bool out_doc(int id, int options) {
308 /**
309 * remove a document from a database
310 */
311 return est_mtdb_out_doc(db, id, options);
312 }
313 Document * get_doc(int id, int options) {
314 /**
315 * retrieve a document in a database
316 */
317 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
318 if (!doc) {
319 throw est_err_msg(est_mtdb_error(db));
320 } else {
321 return new Document(doc);
322 }
323 }
324 int uri_to_id(const char *uri) {
325 /**
326 * get the ID of a document spacified by URI
327 */
328 return est_mtdb_uri_to_id(db, uri);
329 }
330 std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
331 /**
332 * extract keywords of a document object
333 */
334 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
335
336 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
337
338 cbmapiterinit(keys);
339 int ksiz;
340 while (const char *key = cbmapiternext(keys, &ksiz)) {
341 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
342 }
343 return mss;
344 }
345 bool iter_init() {
346 /**
347 * initialize the iterator of a database
348 */
349 return est_mtdb_iter_init(db);
350 }
351 int iter_next() {
352 /**
353 * get the next ID of the iterator of a database
354 */
355 return est_mtdb_iter_next(db);
356 }
357 const char * name() {
358 /**
359 * get the name of a database
360 */
361 return est_mtdb_name(db);
362 }
363 int doc_num() {
364 /**
365 * get the number of documents in a database
366 */
367 return est_mtdb_doc_num(db);
368 }
369 int word_num() {
370 /**
371 * get the number of unique words in a database
372 */
373 return est_mtdb_word_num(db);
374 }
375 double size() {
376 /**
377 * get the size of a database
378 */
379 return est_mtdb_size(db);
380 }
381 void set_cache_size(size_t size, int anum, int tnum) {
382 /**
383 * set the maximum size of the cache memory of a database
384 */
385 est_mtdb_set_cache_size(db, size, anum, tnum);
386 }
387 void set_special_cache(const char *name, int num) {
388 /**
389 * Set the special cache for narrowing and sorting
390 * with document attributes
391 */
392 est_mtdb_set_special_cache(db, name, num);
393 }
394 };
395 };

  ViewVC Help
Powered by ViewVC 1.1.26