/[hyperestraier_wrappers]/trunk/perl/HyperEstraierWrapper.cpp
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/perl/HyperEstraierWrapper.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4 - (hide annotations)
Sat Sep 3 18:44:31 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 9508 byte(s)
backward compatibility for 0.5.4
1 dpavlin 1 /**
2     * HyperEstraierWrapper.cpp - C++ wrapper for Hyper Estraier
3     */
4     #include <estraier.h>
5     #include <estmtdb.h>
6     #include <cabin.h>
7     #include <cstdlib>
8     #include <string>
9     #include <vector>
10     #include <map>
11     #include <cassert>
12    
13 dpavlin 4 /* backward compatibility for 0.5.4 */
14     #ifndef ESTCONDAGITO
15     #define ESTCONDAGITO ESTCONDAGIT
16     #endif
17    
18 dpavlin 1 namespace estraier {
19     class Condition {
20     public:
21     enum { // enumeration for options
22     SURE = ESTCONDSURE, // check every N-gram key
23     USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one
24     FAST = ESTCONDFAST, // check N-gram keys skipping by two
25     AGITO = ESTCONDAGITO, // check N-gram keys skipping by three
26     NOIDF = ESTCONDNOIDF, // without TF-IDF tuning
27     SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase
28     };
29     ESTCOND * cond;
30     Condition() {
31     /**
32     * constructor
33     */
34     cond = est_cond_new();
35     }
36     ~Condition() {
37     /**
38     * destructor
39     */
40     est_cond_delete(cond);
41     }
42     void set_phrase(const char *phrase) {
43     /**
44     * set the search phrase
45     */
46     est_cond_set_phrase(cond, phrase);
47     }
48     void add_attr(const char *expr) {
49     /**
50     * set the attribute expression
51     */
52     est_cond_add_attr(cond, expr);
53     }
54     void set_order(const char *expr) {
55     /**
56     * set the order of a condition object
57     */
58     est_cond_set_order(cond, expr);
59     }
60     void set_max(int _max) {
61     /**
62     * set the maximum number of retrieval of a condition object
63     */
64     est_cond_set_max(cond, _max);
65     }
66     void set_options(int options) {
67     /**
68     * set options of retrieval of a condition object
69     */
70     est_cond_set_options(cond, options);
71     }
72     };
73    
74     class Document {
75     private:
76     std::string text_buf;
77     public:
78     ESTDOC *doc;
79    
80     Document() {
81     /**
82     * constructor
83     */
84     doc = est_doc_new();
85     }
86     Document(const char* draft) {
87     /**
88     * constructor
89     */
90     doc = est_doc_new_from_draft(draft);
91     }
92     Document(ESTDOC *_doc) {
93     /**
94     * constructor
95     */
96     doc = _doc;
97     }
98     ~Document() {
99     /**
100     * destructor
101     */
102     est_doc_delete(doc);
103     }
104     void add_attr(const char * name, const char*value) {
105     /**
106     * add an attribute to a document object
107     */
108     est_doc_add_attr(doc, name, value);
109     }
110     void add_text(const char *text) {
111     /**
112     * add a sentence of text to a document object
113     */
114     est_doc_add_text(doc, text);
115     }
116     void add_hidden_text(const char * text) {
117     /**
118     * add a hidden sentence to a document object
119     */
120     est_doc_add_hidden_text(doc, text);
121     }
122     int id() {
123     /**
124     * get the ID number of a document object
125     */
126     return est_doc_id(doc);
127     }
128     std::vector<std::string> * attr_names() {
129     /**
130     * get a list of attribute names of a document object
131     */
132     std::vector<std::string> * vs = new std::vector<std::string>;
133     CBLIST * attr_names = est_doc_attr_names(doc);
134     for (int i=0; i < cblistnum(attr_names); i++) {
135     vs->push_back(cblistval(attr_names, i, NULL));
136     }
137     cblistclose(attr_names);
138     return vs;
139     }
140     const char * attr(const char *name) {
141     /**
142     * get the value of an attribute of a document object
143     */
144     return est_doc_attr(doc, name);
145     }
146     const char * cat_texts() {
147     /**
148     * get a list of sentences of the text of a document object
149     */
150     // return est_doc_cat_texts(doc);
151     return "This is mockup!";
152     }
153     std::vector<std::string>* texts() {
154     /**
155     * get a list of sentences of the text of a document object
156     */
157     std::vector<std::string> * vs = new std::vector<std::string>;
158     const CBLIST *texts;
159     texts = est_doc_texts(doc);
160     for(int i = 0; i < cblistnum(texts); i++) {
161     vs->push_back(cblistval(texts, i, NULL));
162     }
163     return vs;
164     }
165     const char * dump_draft() {
166     /**
167     * dump draft data of a document object
168     */
169     return est_doc_dump_draft(doc);
170     }
171     const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) {
172     /**
173     * make a snippet of the body text of a document object
174     */
175     CBLIST * words;
176     std::vector<std::string>::iterator iter;
177    
178     words = cblistopen();
179    
180     for (iter = _words.begin(); _words.end() != iter; iter++) {
181     cblistpush(words, iter->c_str(), -1);
182     }
183    
184     const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth);
185    
186     cblistclose(words);
187    
188     return result;
189     }
190     };
191    
192     class Database {
193     private:
194     ESTMTDB *db;
195     public:
196     enum { // enumeration for error codes
197     ERRNOERR = ESTENOERR, // no error
198     ERRINVAL = ESTEINVAL, // invalid argument
199     ERRACCES = ESTEACCES, // access forbidden
200     ERRLOCK = ESTELOCK, // lock failure
201     ERRDB = ESTEDB, // database problem
202     ERRIO = ESTEIO, // I/O problem
203     ERRNOITEM = ESTENOITEM, // no item
204     ERRMISC = ESTEMISC // miscellaneous
205     };
206     enum { // enumeration for open modes
207     DBREADER = ESTDBREADER, // open as a reader
208     DBWRITER = ESTDBWRITER, // open as a writer
209     DBCREAT = ESTDBCREAT, // a writer creating
210     DBTRUNC = ESTDBTRUNC, // a writer truncating
211     DBNOLCK = ESTDBNOLCK, // open without locking
212     DBLCKNB = ESTDBLCKNB, // lock without blocking
213     DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer
214     };
215     enum { // enumeration for options of document registration
216     PDCLEAN = ESTPDCLEAN // clean up dispensable regions
217     };
218     enum { // enumeration for options of document deletion
219     ODCLEAN = ESTODCLEAN // clean up dispensable regions
220     };
221     enum { // enumeration for options of optimization
222     OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted
223     OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files
224     };
225     enum { // enumeration for options of document retrieval
226     GDNOATTR = ESTGDNOATTR, // no attributes
227     GDNOTEXT = ESTGDNOTEXT // no text
228     };
229     Database() {
230     /**
231     * constructor(dummy)
232     */
233     }
234     ~Database() {
235     close();
236     }
237     bool open(const char * dbname, int mode) {
238     /**
239     * open the database
240     */
241     int ecode;
242     db = est_mtdb_open(dbname, mode, &ecode);
243     return db;
244     }
245     bool close() {
246     /**
247     * close the database
248     */
249     if (db) {
250     int ecode;
251     bool result = est_mtdb_close(db, &ecode);
252     db = NULL;
253     return result;
254     } else {
255     return false;
256     }
257     }
258     bool put_doc(Document *doc, int options) {
259     /**
260     * add a document to a database
261     */
262     return est_mtdb_put_doc(db, doc->doc, options);
263     }
264     std::vector<int> * search(Condition * cond, int options) {
265     /**
266     * search documents corresponding a condition for a database
267     */
268     int resnum;
269     int * result = est_mtdb_search(db, cond->cond, &resnum, NULL);
270     std::vector<int> *numbers = new std::vector<int>;
271     for (int i=0; i<resnum; i++) {
272     numbers->push_back(result[i]);
273     }
274     return numbers;
275     }
276     static const char * err_msg(int ecode) {
277     /**
278     * get the string of an error
279     */
280     return est_err_msg(ecode);
281     }
282     int error() {
283     /**
284     * get the last happended error code of a database
285     */
286     return est_mtdb_error(db);
287     }
288     bool fatal() {
289     /**
290     * check whether a database has a fatal error
291     */
292     return est_mtdb_fatal(db);
293     }
294     bool flush(int _max) {
295     /**
296     * flush index words in the cache of a database
297     */
298     return est_mtdb_flush(db, _max);
299     }
300     bool sync() {
301     /**
302     * synchronize updating contents of a database
303     */
304     return est_mtdb_sync(db);
305     }
306     bool optimize(int options) {
307     /**
308     * optimize a database
309     */
310     return est_mtdb_optimize(db, options);
311     }
312     bool out_doc(int id, int options) {
313     /**
314     * remove a document from a database
315     */
316     return est_mtdb_out_doc(db, id, options);
317     }
318     Document * get_doc(int id, int options) {
319     /**
320     * retrieve a document in a database
321     */
322     ESTDOC *doc = est_mtdb_get_doc(db, id, options);
323     if (!doc) {
324     throw est_err_msg(est_mtdb_error(db));
325     } else {
326     return new Document(doc);
327     }
328     }
329     int uri_to_id(const char *uri) {
330     /**
331     * get the ID of a document spacified by URI
332     */
333     return est_mtdb_uri_to_id(db, uri);
334     }
335     std::map<std::string, std::string> * etch_doc(Document * doc, int max) {
336     /**
337     * extract keywords of a document object
338     */
339     std::map<std::string, std::string> * mss = new std::map<std::string, std::string>;
340    
341     CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max);
342    
343     cbmapiterinit(keys);
344     int ksiz;
345     while (const char *key = cbmapiternext(keys, &ksiz)) {
346     mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL)));
347     }
348     return mss;
349     }
350     bool iter_init() {
351     /**
352     * initialize the iterator of a database
353     */
354     return est_mtdb_iter_init(db);
355     }
356     int iter_next() {
357     /**
358     * get the next ID of the iterator of a database
359     */
360     return est_mtdb_iter_next(db);
361     }
362     const char * name() {
363     /**
364     * get the name of a database
365     */
366     return est_mtdb_name(db);
367     }
368     int doc_num() {
369     /**
370     * get the number of documents in a database
371     */
372     return est_mtdb_doc_num(db);
373     }
374     int word_num() {
375     /**
376     * get the number of unique words in a database
377     */
378     return est_mtdb_word_num(db);
379     }
380     double size() {
381     /**
382     * get the size of a database
383     */
384     return est_mtdb_size(db);
385     }
386     void set_cache_size(size_t size, int anum, int tnum) {
387     /**
388     * set the maximum size of the cache memory of a database
389     */
390     est_mtdb_set_cache_size(db, size, anum, tnum);
391     }
392     void set_special_cache(const char *name, int num) {
393     /**
394     * Set the special cache for narrowing and sorting
395     * with document attributes
396     */
397     est_mtdb_set_special_cache(db, name, num);
398     }
399     };
400     };

  ViewVC Help
Powered by ViewVC 1.1.26