/[hyperestraier_wrappers]/trunk/ruby/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/ruby/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 15 - (hide annotations)
Fri Sep 9 15:31:11 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 11652 byte(s)
updated to upstream 0.0.13
1 dpavlin 1 /**
2     * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3     */
4     #include <estraier.h>
5     #include <estmtdb.h>
6     #include <cabin.h>
7     #include <cstdlib>
8     #include <string>
9     #include <vector>
10     #include <map>
11     #include <cassert>
12 dpavlin 15 #include <stdexcept>
13 dpavlin 1
14     namespace estraier {
15 dpavlin 15
16     class IOError : public std::runtime_error {
17     public:
18     explicit IOError (const std::string& w) : std::runtime_error(w) {}
19     };
20    
21 dpavlin 1 class Condition {
22     public:
23     enum { // enumeration for options
24     SURE = ESTCONDSURE, // check every N-gram key
25     USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
26     FAST = ESTCONDFAST, // check N-gram keys skipping by two
27     AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
28     NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
29     SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
30     };
31     ESTCOND * cond;
32     Condition() {
33     /**
34     * constructor
35     */
36     cond = est_cond_new();
37     }
38     ~Condition() {
39     /**
40     * destructor
41     */
42     est_cond_delete(cond);
43     }
44     void set_phrase(const char *phrase) {
45     /**
46     * set the search phrase
47     */
48     est_cond_set_phrase(cond, phrase);
49     }
50     void add_attr(const char *expr) {
51     /**
52     * set the attribute expression
53     */
54     est_cond_add_attr(cond, expr);
55     }
56     void set_order(const char *expr) {
57     /**
58     * set the order of a condition object
59     */
60     est_cond_set_order(cond, expr);
61     }
62     void set_max(int _max) {
63     /**
64     * set the maximum number of retrieval of a condition object
65     */
66     est_cond_set_max(cond, _max);
67     }
68     void set_options(int options) {
69     /**
70     * set options of retrieval of a condition object
71     */
72     est_cond_set_options(cond, options);
73     }
74     };
75    
76     class Document {
77     private:
78     std::string text_buf;
79     public:
80     ESTDOC *doc;
81     Document() {
82     /**
83     * constructor
84     */
85     doc = est_doc_new();
86     }
87     Document(const char* draft) {
88     /**
89     * constructor
90     */
91     doc = est_doc_new_from_draft(draft);
92     }
93     Document(ESTDOC *_doc) {
94     /**
95     * constructor
96     */
97     doc = _doc;
98     }
99     ~Document() {
100     /**
101     * destructor
102     */
103     est_doc_delete(doc);
104     }
105     void add_attr(const char * name, const char*value) {
106     /**
107     * add an attribute to a document object
108     */
109     est_doc_add_attr(doc, name, value);
110     }
111     void add_text(const char *text) {
112     /**
113     * add a sentence of text to a document object
114     */
115     est_doc_add_text(doc, text);
116     }
117     void add_hidden_text(const char * text) {
118     /**
119     * add a hidden sentence to a document object
120     */
121     est_doc_add_hidden_text(doc, text);
122     }
123     int id() {
124     /**
125     * get the ID number of a document object
126     */
127     return est_doc_id(doc);
128     }
129     std::vector<std::string> * attr_names() {
130     /**
131     * get a list of attribute names of a document object
132     */
133     std::vector<std::string> * vs = new std::vector<std::string>;
134     CBLIST * attr_names = est_doc_attr_names(doc);
135     for (int i=0; i < cblistnum(attr_names); i++) {
136     vs->push_back(cblistval(attr_names, i, NULL));
137     }
138     cblistclose(attr_names);
139     return vs;
140     }
141     const char * attr(const char *name) {
142     /**
143     * get the value of an attribute of a document object
144     */
145     return est_doc_attr(doc, name);
146     }
147     const char * cat_texts() {
148     /**
149     * get a list of sentences of the text of a document object
150     */
151 dpavlin 15 return est_doc_cat_texts(doc);
152 dpavlin 1 }
153     std::vector<std::string>* texts() {
154     /**
155     * get a list of sentences of the text of a document object
156     */
157     std::vector<std::string> * vs = new std::vector<std::string>;
158     const CBLIST *texts;
159     texts = est_doc_texts(doc);
160     for(int i = 0; i < cblistnum(texts); i++) {
161     vs->push_back(cblistval(texts, i, NULL));
162     }
163     return vs;
164     }
165     const char * dump_draft() {
166     /**
167     * dump draft data of a document object
168     */
169     return est_doc_dump_draft(doc);
170     }
171     const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
172     /**
173     * make a snippet of the body text of a document object
174     */
175     CBLIST * words;
176     std::vector<std::string>::iterator iter;
177     words = cblistopen();
178     for (iter = _words.begin(); _words.end() != iter; iter++) {
179     cblistpush(words, iter->c_str(), -1);
180     }
181     const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
182     cblistclose(words);
183     return result;
184     }
185 dpavlin 15 const char * hidden_texts() {
186     /**
187     * get the hidden texts of a document object.
188     */
189     return est_doc_hidden_texts(doc);
190     }
191 dpavlin 1 };
192    
193     class Database {
194     private:
195     ESTMTDB *db;
196 dpavlin 15 int ecode;
197 dpavlin 1 public:
198     enum { // enumeration for error codes
199     ERRNOERR = ESTENOERR, // no error
200     ERRINVAL = ESTEINVAL, // invalid argument
201     ERRACCES = ESTEACCES, // access forbidden
202     ERRLOCK = ESTELOCK, // lock failure
203     ERRDB = ESTEDB, // database problem
204     ERRIO = ESTEIO, // I/O problem
205     ERRNOITEM = ESTENOITEM, // no item
206     ERRMISC = ESTEMISC // miscellaneous
207     };
208     enum { // enumeration for open modes
209     DBREADER = ESTDBREADER, // open as a reader
210     DBWRITER = ESTDBWRITER, // open as a writer
211     DBCREAT = ESTDBCREAT, // a writer creating
212     DBTRUNC = ESTDBTRUNC, // a writer truncating
213     DBNOLCK = ESTDBNOLCK, // open without locking
214     DBLCKNB = ESTDBLCKNB, // lock without blocking
215     DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
216     };
217     enum { // enumeration for options of document registration
218     PDCLEAN = ESTPDCLEAN // clean up dispensable regions
219     };
220     enum { // enumeration for options of document deletion
221     ODCLEAN = ESTODCLEAN // clean up dispensable regions
222     };
223     enum { // enumeration for options of optimization
224     OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
225     OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
226     };
227     enum { // enumeration for options of document retrieval
228     GDNOATTR = ESTGDNOATTR, // no attributes
229     GDNOTEXT = ESTGDNOTEXT // no text
230     };
231     Database() {
232     /**
233     * constructor(dummy)
234     */
235 dpavlin 15 db = NULL;
236     ecode = ERRNOERR;
237 dpavlin 1 }
238     ~Database() {
239 dpavlin 15 if (db) close();
240 dpavlin 1 }
241     bool open(const char * dbname, int mode) {
242     /**
243     * open the database
244     */
245 dpavlin 15 if (db) close();
246     int ec;
247     db = est_mtdb_open(dbname, mode, &ec);
248     if (!db) ecode = ec;
249 dpavlin 1 return db;
250     }
251     bool close() {
252     /**
253     * close the database
254     */
255 dpavlin 15 if (!db) throw IOError("closed database");
256     int ec;
257     bool result = est_mtdb_close(db, &ec);
258     if (!result) ecode = ec;
259 dpavlin 1 db = NULL;
260     return result;
261     }
262     bool put_doc(Document *doc, int options) {
263     /**
264     * add a document to a database
265     */
266 dpavlin 15 if (!db) throw IOError("closed database");
267     bool result = est_mtdb_put_doc(db, doc->doc, options);
268     if (!result) ecode = est_mtdb_error(db);
269     return result;
270 dpavlin 1 }
271     std::vector<int> * search(Condition * cond, int options) {
272     /**
273     * search documents corresponding a condition for a database
274     */
275 dpavlin 15 if (!db) throw IOError("closed database");
276 dpavlin 1 int resnum;
277     int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
278     std::vector<int> *numbers = new std::vector<int>;
279     for (int i=0; i<resnum; i++) {
280     numbers->push_back(result[i]);
281     }
282     return numbers;
283     }
284     static const char * err_msg(int ecode) {
285     /**
286     * get the string of an error
287     */
288     return est_err_msg(ecode);
289     }
290     int error() {
291     /**
292     * get the last happended error code of a database
293     */
294 dpavlin 15 return ecode;
295 dpavlin 1 }
296     bool fatal() {
297     /**
298     * check whether a database has a fatal error
299     */
300 dpavlin 15 if (!db) throw IOError("closed database");
301 dpavlin 1 return est_mtdb_fatal(db);
302     }
303     bool flush(int _max) {
304     /**
305     * flush index words in the cache of a database
306     */
307 dpavlin 15 if (!db) throw IOError("closed database");
308     bool result = est_mtdb_flush(db, _max);
309     if (!result) ecode = est_mtdb_error(db);
310     return result;
311 dpavlin 1 }
312     bool sync() {
313     /**
314     * synchronize updating contents of a database
315     */
316 dpavlin 15 if (!db) throw IOError("closed database");
317     bool result = est_mtdb_sync(db);
318     if (!result) ecode = est_mtdb_error(db);
319     return result;
320 dpavlin 1 }
321     bool optimize(int options) {
322     /**
323     * optimize a database
324     */
325 dpavlin 15 if (!db) throw IOError("closed database");
326     bool result = est_mtdb_optimize(db, options);
327     if (!result) ecode = est_mtdb_error(db);
328     return result;
329 dpavlin 1 }
330     bool out_doc(int id, int options) {
331     /**
332     * remove a document from a database
333     */
334 dpavlin 15 if (!db) throw IOError("closed database");
335     bool result = est_mtdb_out_doc(db, id, options);
336     if (!result) ecode = est_mtdb_error(db);
337     return result;
338 dpavlin 1 }
339 dpavlin 15 bool edit_doc(Document *doc) {
340     /**
341     * edit an attribute of a document in a database
342     */
343     if (!db) throw IOError("closed database");
344     bool result = est_mtdb_edit_doc(db, doc->doc);
345     if (!result) ecode = est_mtdb_error(db);
346     return result;
347     }
348 dpavlin 1 Document * get_doc(int id, int options) {
349     /**
350     * retrieve a document in a database
351     */
352 dpavlin 15 if (!db) throw IOError("closed database");
353 dpavlin 1 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
354     if (!doc) {
355 dpavlin 15 ecode = est_mtdb_error(db);
356 dpavlin 1 throw est_err_msg(est_mtdb_error(db));
357     } else {
358     return new Document(doc);
359     }
360     }
361     int uri_to_id(const char *uri) {
362     /**
363     * get the ID of a document spacified by URI
364     */
365 dpavlin 15 if (!db) throw IOError("closed database");
366     int result = est_mtdb_uri_to_id(db, uri);
367     if(result == -1) ecode = est_mtdb_error(db);
368     return result;
369 dpavlin 1 }
370     std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
371     /**
372     * extract keywords of a document object
373     */
374 dpavlin 15 if (!db) throw IOError("closed database");
375 dpavlin 1 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
376     CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
377     cbmapiterinit(keys);
378     int ksiz;
379     while (const char *key = cbmapiternext(keys, &ksiz)) {
380     mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
381     }
382     return mss;
383     }
384     const char * name() {
385     /**
386     * get the name of a database
387     */
388 dpavlin 15 if (!db) throw IOError("closed database");
389 dpavlin 1 return est_mtdb_name(db);
390     }
391     int doc_num() {
392     /**
393     * get the number of documents in a database
394     */
395 dpavlin 15 if (!db) throw IOError("closed database");
396 dpavlin 1 return est_mtdb_doc_num(db);
397     }
398     int word_num() {
399     /**
400     * get the number of unique words in a database
401     */
402 dpavlin 15 if (!db) throw IOError("closed database");
403 dpavlin 1 return est_mtdb_word_num(db);
404     }
405     double size() {
406     /**
407     * get the size of a database
408     */
409 dpavlin 15 if (!db) throw IOError("closed database");
410 dpavlin 1 return est_mtdb_size(db);
411     }
412     void set_cache_size(size_t size, int anum, int tnum) {
413     /**
414     * set the maximum size of the cache memory of a database
415     */
416 dpavlin 15 if (!db) throw IOError("closed database");
417 dpavlin 1 est_mtdb_set_cache_size(db, size, anum, tnum);
418     }
419     void set_special_cache(const char *name, int num) {
420     /**
421     * Set the special cache for narrowing and sorting
422     * with document attributes
423     */
424     est_mtdb_set_special_cache(db, name, num);
425     }
426     };
427 dpavlin 15
428     static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
429     std::vector<std::string> * vs = new std::vector<std::string>;
430     CBLIST *list;
431     list = cblistopen();
432     est_break_text(text, list, norm, tail);
433     for (int i=0; i < cblistnum(list); i++) {
434     vs->push_back(cblistval(list, i, NULL));
435     }
436     cblistclose(list);
437     return vs;
438     }
439    
440     static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
441     std::vector<std::string> * vs = new std::vector<std::string>;
442     CBLIST *list;
443     list = cblistopen();
444     est_break_text_perfng(text, list, norm, tail);
445     for (int i=0; i < cblistnum(list); i++) {
446     vs->push_back(cblistval(list, i, NULL));
447     }
448     cblistclose(list);
449     return vs;
450     }
451    
452 dpavlin 1 };

  ViewVC Help
Powered by ViewVC 1.1.26