/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6 - (hide annotations)
Sat Sep 3 20:00:11 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 11751 byte(s)
updated to latest version (from parent directory ;-)
1 dpavlin 1 /**
2     * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3     */
4     #include <estraier.h>
5     #include <estmtdb.h>
6     #include <cabin.h>
7     #include <cstdlib>
8     #include <string>
9     #include <vector>
10     #include <map>
11     #include <cassert>
12 dpavlin 6 #include <stdexcept>
13 dpavlin 1
14 dpavlin 4 /* backward compatibility for 0.5.4 */
15     #ifndef ESTCONDAGITO
16     #define ESTCONDAGITO ESTCONDAGIT
17     #endif
18    
19 dpavlin 1 namespace estraier {
20 dpavlin 6
21     class IOError : public std::runtime_error {
22     public:
23     explicit IOError (const std::string& w) : std::runtime_error(w) {}
24     };
25    
26 dpavlin 1 class Condition {
27     public:
28     enum { // enumeration for options
29     SURE = ESTCONDSURE, // check every N-gram key
30     USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
31     FAST = ESTCONDFAST, // check N-gram keys skipping by two
32     AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
33     NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
34     SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
35     };
36     ESTCOND * cond;
37     Condition() {
38     /**
39     * constructor
40     */
41     cond = est_cond_new();
42     }
43     ~Condition() {
44     /**
45     * destructor
46     */
47     est_cond_delete(cond);
48     }
49     void set_phrase(const char *phrase) {
50     /**
51     * set the search phrase
52     */
53     est_cond_set_phrase(cond, phrase);
54     }
55     void add_attr(const char *expr) {
56     /**
57     * set the attribute expression
58     */
59     est_cond_add_attr(cond, expr);
60     }
61     void set_order(const char *expr) {
62     /**
63     * set the order of a condition object
64     */
65     est_cond_set_order(cond, expr);
66     }
67     void set_max(int _max) {
68     /**
69     * set the maximum number of retrieval of a condition object
70     */
71     est_cond_set_max(cond, _max);
72     }
73     void set_options(int options) {
74     /**
75     * set options of retrieval of a condition object
76     */
77     est_cond_set_options(cond, options);
78     }
79     };
80    
81     class Document {
82     private:
83     std::string text_buf;
84     public:
85     ESTDOC *doc;
86     Document() {
87     /**
88     * constructor
89     */
90     doc = est_doc_new();
91     }
92     Document(const char* draft) {
93     /**
94     * constructor
95     */
96     doc = est_doc_new_from_draft(draft);
97     }
98     Document(ESTDOC *_doc) {
99     /**
100     * constructor
101     */
102     doc = _doc;
103     }
104     ~Document() {
105     /**
106     * destructor
107     */
108     est_doc_delete(doc);
109     }
110     void add_attr(const char * name, const char*value) {
111     /**
112     * add an attribute to a document object
113     */
114     est_doc_add_attr(doc, name, value);
115     }
116     void add_text(const char *text) {
117     /**
118     * add a sentence of text to a document object
119     */
120     est_doc_add_text(doc, text);
121     }
122     void add_hidden_text(const char * text) {
123     /**
124     * add a hidden sentence to a document object
125     */
126     est_doc_add_hidden_text(doc, text);
127     }
128     int id() {
129     /**
130     * get the ID number of a document object
131     */
132     return est_doc_id(doc);
133     }
134     std::vector<std::string> * attr_names() {
135     /**
136     * get a list of attribute names of a document object
137     */
138     std::vector<std::string> * vs = new std::vector<std::string>;
139     CBLIST * attr_names = est_doc_attr_names(doc);
140     for (int i=0; i < cblistnum(attr_names); i++) {
141     vs->push_back(cblistval(attr_names, i, NULL));
142     }
143     cblistclose(attr_names);
144     return vs;
145     }
146     const char * attr(const char *name) {
147     /**
148     * get the value of an attribute of a document object
149     */
150     return est_doc_attr(doc, name);
151     }
152     const char * cat_texts() {
153     /**
154     * get a list of sentences of the text of a document object
155     */
156 dpavlin 6 return est_doc_cat_texts(doc);
157 dpavlin 1 }
158     std::vector<std::string>* texts() {
159     /**
160     * get a list of sentences of the text of a document object
161     */
162     std::vector<std::string> * vs = new std::vector<std::string>;
163     const CBLIST *texts;
164     texts = est_doc_texts(doc);
165     for(int i = 0; i < cblistnum(texts); i++) {
166     vs->push_back(cblistval(texts, i, NULL));
167     }
168     return vs;
169     }
170     const char * dump_draft() {
171     /**
172     * dump draft data of a document object
173     */
174     return est_doc_dump_draft(doc);
175     }
176     const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
177     /**
178     * make a snippet of the body text of a document object
179     */
180     CBLIST * words;
181     std::vector<std::string>::iterator iter;
182     words = cblistopen();
183     for (iter = _words.begin(); _words.end() != iter; iter++) {
184     cblistpush(words, iter->c_str(), -1);
185     }
186     const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
187     cblistclose(words);
188     return result;
189     }
190 dpavlin 6 const char * hidden_texts() {
191     /**
192     * get the hidden texts of a document object.
193     */
194     return est_doc_hidden_texts(doc);
195     }
196 dpavlin 1 };
197    
198     class Database {
199     private:
200     ESTMTDB *db;
201 dpavlin 6 int ecode;
202 dpavlin 1 public:
203     enum { // enumeration for error codes
204     ERRNOERR = ESTENOERR, // no error
205     ERRINVAL = ESTEINVAL, // invalid argument
206     ERRACCES = ESTEACCES, // access forbidden
207     ERRLOCK = ESTELOCK, // lock failure
208     ERRDB = ESTEDB, // database problem
209     ERRIO = ESTEIO, // I/O problem
210     ERRNOITEM = ESTENOITEM, // no item
211     ERRMISC = ESTEMISC // miscellaneous
212     };
213     enum { // enumeration for open modes
214     DBREADER = ESTDBREADER, // open as a reader
215     DBWRITER = ESTDBWRITER, // open as a writer
216     DBCREAT = ESTDBCREAT, // a writer creating
217     DBTRUNC = ESTDBTRUNC, // a writer truncating
218     DBNOLCK = ESTDBNOLCK, // open without locking
219     DBLCKNB = ESTDBLCKNB, // lock without blocking
220     DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
221     };
222     enum { // enumeration for options of document registration
223     PDCLEAN = ESTPDCLEAN // clean up dispensable regions
224     };
225     enum { // enumeration for options of document deletion
226     ODCLEAN = ESTODCLEAN // clean up dispensable regions
227     };
228     enum { // enumeration for options of optimization
229     OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
230     OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
231     };
232     enum { // enumeration for options of document retrieval
233     GDNOATTR = ESTGDNOATTR, // no attributes
234     GDNOTEXT = ESTGDNOTEXT // no text
235     };
236     Database() {
237     /**
238     * constructor(dummy)
239     */
240 dpavlin 6 db = NULL;
241     ecode = ERRNOERR;
242 dpavlin 1 }
243     ~Database() {
244 dpavlin 6 if (db) close();
245 dpavlin 1 }
246     bool open(const char * dbname, int mode) {
247     /**
248     * open the database
249     */
250 dpavlin 6 if (db) close();
251     int ec;
252     db = est_mtdb_open(dbname, mode, &ec);
253     if (!db) ecode = ec;
254 dpavlin 1 return db;
255     }
256     bool close() {
257     /**
258     * close the database
259     */
260 dpavlin 6 if (!db) throw IOError("closed database");
261     int ec;
262     bool result = est_mtdb_close(db, &ec);
263     if (!result) ecode = ec;
264     db = NULL;
265     return result;
266 dpavlin 1 }
267     bool put_doc(Document *doc, int options) {
268     /**
269     * add a document to a database
270     */
271 dpavlin 6 if (!db) throw IOError("closed database");
272     bool result = est_mtdb_put_doc(db, doc->doc, options);
273     if (!result) ecode = est_mtdb_error(db);
274     return result;
275 dpavlin 1 }
276     std::vector<int> * search(Condition * cond, int options) {
277     /**
278     * search documents corresponding a condition for a database
279     */
280 dpavlin 6 if (!db) throw IOError("closed database");
281 dpavlin 1 int resnum;
282     int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
283     std::vector<int> *numbers = new std::vector<int>;
284     for (int i=0; i<resnum; i++) {
285     numbers->push_back(result[i]);
286     }
287     return numbers;
288     }
289     static const char * err_msg(int ecode) {
290     /**
291     * get the string of an error
292     */
293     return est_err_msg(ecode);
294     }
295     int error() {
296     /**
297     * get the last happended error code of a database
298     */
299 dpavlin 6 return ecode;
300 dpavlin 1 }
301     bool fatal() {
302     /**
303     * check whether a database has a fatal error
304     */
305 dpavlin 6 if (!db) throw IOError("closed database");
306 dpavlin 1 return est_mtdb_fatal(db);
307     }
308     bool flush(int _max) {
309     /**
310     * flush index words in the cache of a database
311     */
312 dpavlin 6 if (!db) throw IOError("closed database");
313     bool result = est_mtdb_flush(db, _max);
314     if (!result) ecode = est_mtdb_error(db);
315     return result;
316 dpavlin 1 }
317     bool sync() {
318     /**
319     * synchronize updating contents of a database
320     */
321 dpavlin 6 if (!db) throw IOError("closed database");
322     bool result = est_mtdb_sync(db);
323     if (!result) ecode = est_mtdb_error(db);
324     return result;
325 dpavlin 1 }
326     bool optimize(int options) {
327     /**
328     * optimize a database
329     */
330 dpavlin 6 if (!db) throw IOError("closed database");
331     bool result = est_mtdb_optimize(db, options);
332     if (!result) ecode = est_mtdb_error(db);
333     return result;
334 dpavlin 1 }
335     bool out_doc(int id, int options) {
336     /**
337     * remove a document from a database
338     */
339 dpavlin 6 if (!db) throw IOError("closed database");
340     bool result = est_mtdb_out_doc(db, id, options);
341     if (!result) ecode = est_mtdb_error(db);
342     return result;
343 dpavlin 1 }
344 dpavlin 6 bool edit_doc(Document *doc) {
345     /**
346     * edit an attribute of a document in a database
347     */
348     if (!db) throw IOError("closed database");
349     bool result = est_mtdb_edit_doc(db, doc->doc);
350     if (!result) ecode = est_mtdb_error(db);
351     return result;
352     }
353 dpavlin 1 Document * get_doc(int id, int options) {
354     /**
355     * retrieve a document in a database
356     */
357 dpavlin 6 if (!db) throw IOError("closed database");
358 dpavlin 1 ESTDOC *doc = est_mtdb_get_doc(db, id, options);
359     if (!doc) {
360 dpavlin 6 ecode = est_mtdb_error(db);
361 dpavlin 1 throw est_err_msg(est_mtdb_error(db));
362     } else {
363     return new Document(doc);
364     }
365     }
366     int uri_to_id(const char *uri) {
367     /**
368     * get the ID of a document spacified by URI
369     */
370 dpavlin 6 if (!db) throw IOError("closed database");
371     int result = est_mtdb_uri_to_id(db, uri);
372     if(result == -1) ecode = est_mtdb_error(db);
373     return result;
374 dpavlin 1 }
375     std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
376     /**
377     * extract keywords of a document object
378     */
379 dpavlin 6 if (!db) throw IOError("closed database");
380 dpavlin 1 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
381     CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
382     cbmapiterinit(keys);
383     int ksiz;
384     while (const char *key = cbmapiternext(keys, &ksiz)) {
385     mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
386     }
387     return mss;
388     }
389     const char * name() {
390     /**
391     * get the name of a database
392     */
393 dpavlin 6 if (!db) throw IOError("closed database");
394 dpavlin 1 return est_mtdb_name(db);
395     }
396     int doc_num() {
397     /**
398     * get the number of documents in a database
399     */
400 dpavlin 6 if (!db) throw IOError("closed database");
401 dpavlin 1 return est_mtdb_doc_num(db);
402     }
403     int word_num() {
404     /**
405     * get the number of unique words in a database
406     */
407 dpavlin 6 if (!db) throw IOError("closed database");
408 dpavlin 1 return est_mtdb_word_num(db);
409     }
410     double size() {
411     /**
412     * get the size of a database
413     */
414 dpavlin 6 if (!db) throw IOError("closed database");
415 dpavlin 1 return est_mtdb_size(db);
416     }
417     void set_cache_size(size_t size, int anum, int tnum) {
418     /**
419     * set the maximum size of the cache memory of a database
420     */
421 dpavlin 6 if (!db) throw IOError("closed database");
422 dpavlin 1 est_mtdb_set_cache_size(db, size, anum, tnum);
423     }
424     void set_special_cache(const char *name, int num) {
425     /**
426     * Set the special cache for narrowing and sorting
427     * with document attributes
428     */
429     est_mtdb_set_special_cache(db, name, num);
430     }
431     };
432 dpavlin 6
433     static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) {
434     std::vector<std::string> * vs = new std::vector<std::string>;
435     CBLIST *list;
436     list = cblistopen();
437     est_break_text(text, list, norm, tail);
438     for (int i=0; i < cblistnum(list); i++) {
439     vs->push_back(cblistval(list, i, NULL));
440     }
441     cblistclose(list);
442     return vs;
443     }
444    
445     static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) {
446     std::vector<std::string> * vs = new std::vector<std::string>;
447     CBLIST *list;
448     list = cblistopen();
449     est_break_text_perfng(text, list, norm, tail);
450     for (int i=0; i < cblistnum(list); i++) {
451     vs->push_back(cblistval(list, i, NULL));
452     }
453     cblistclose(list);
454     return vs;
455     }
456    
457 dpavlin 1 };

  ViewVC Help
Powered by ViewVC 1.1.26