--- 0.0.12/doc/ruby_2HyperEstraierWrapper_8cpp-source.html 2005/09/03 18:02:31 1 +++ 0.0.13/doc/ruby_2HyperEstraierWrapper_8cpp-source.html 2005/09/09 15:26:39 14 @@ -16,258 +16,315 @@ 00009 #include <vector> 00010 #include <map> 00011 #include <cassert> -00012 -00013 namespace estraier { -00014 class Condition { -00015 public: -00016 enum { // enumeration for options -00017 SURE = ESTCONDSURE, // check every N-gram key -00018 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one -00019 FAST = ESTCONDFAST, // check N-gram keys skipping by two -00020 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three -00021 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning -00022 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase -00023 }; -00024 ESTCOND * cond; -00025 Condition() { -00029 cond = est_cond_new(); -00030 } -00031 ~Condition() { -00035 est_cond_delete(cond); -00036 } -00037 void set_phrase(const char *phrase) { -00041 est_cond_set_phrase(cond, phrase); -00042 } -00043 void add_attr(const char *expr) { -00047 est_cond_add_attr(cond, expr); -00048 } -00049 void set_order(const char *expr) { -00053 est_cond_set_order(cond, expr); -00054 } -00055 void set_max(int _max) { -00059 est_cond_set_max(cond, _max); -00060 } -00061 void set_options(int options) { -00065 est_cond_set_options(cond, options); -00066 } -00067 }; -00068 -00069 class Document { -00070 private: -00071 std::string text_buf; -00072 public: -00073 ESTDOC *doc; -00074 -00075 Document() { -00079 doc = est_doc_new(); -00080 } -00081 Document(const char* draft) { -00085 doc = est_doc_new_from_draft(draft); +00012 #include <stdexcept> +00013 +00014 namespace estraier { +00015 +00016 class IOError : public std::runtime_error { +00017 public: +00018 explicit IOError (const std::string& w) : std::runtime_error(w) {} +00019 }; +00020 +00021 class Condition { +00022 public: +00023 enum { // enumeration for options +00024 SURE = ESTCONDSURE, // check every N-gram key +00025 USUAL = ESTCONDUSUAL, // check N-gram keys skipping by one +00026 FAST = ESTCONDFAST, // check N-gram keys skipping by two +00027 AGITO = ESTCONDAGITO, // check N-gram keys skipping by three +00028 NOIDF = ESTCONDNOIDF, // without TF-IDF tuning +00029 SIMPLE = ESTCONDSIMPLE, // with the simplefied phrase +00030 }; +00031 ESTCOND * cond; +00032 Condition() { +00036 cond = est_cond_new(); +00037 } +00038 ~Condition() { +00042 est_cond_delete(cond); +00043 } +00044 void set_phrase(const char *phrase) { +00048 est_cond_set_phrase(cond, phrase); +00049 } +00050 void add_attr(const char *expr) { +00054 est_cond_add_attr(cond, expr); +00055 } +00056 void set_order(const char *expr) { +00060 est_cond_set_order(cond, expr); +00061 } +00062 void set_max(int _max) { +00066 est_cond_set_max(cond, _max); +00067 } +00068 void set_options(int options) { +00072 est_cond_set_options(cond, options); +00073 } +00074 }; +00075 +00076 class Document { +00077 private: +00078 std::string text_buf; +00079 public: +00080 ESTDOC *doc; +00081 Document() { +00085 doc = est_doc_new(); 00086 } -00087 Document(ESTDOC *_doc) { -00091 doc = _doc; +00087 Document(const char* draft) { +00091 doc = est_doc_new_from_draft(draft); 00092 } -00093 ~Document() { -00097 est_doc_delete(doc); +00093 Document(ESTDOC *_doc) { +00097 doc = _doc; 00098 } -00099 void add_attr(const char * name, const char*value) { -00103 est_doc_add_attr(doc, name, value); +00099 ~Document() { +00103 est_doc_delete(doc); 00104 } -00105 void add_text(const char *text) { -00109 est_doc_add_text(doc, text); +00105 void add_attr(const char * name, const char*value) { +00109 est_doc_add_attr(doc, name, value); 00110 } -00111 void add_hidden_text(const char * text) { -00115 est_doc_add_hidden_text(doc, text); +00111 void add_text(const char *text) { +00115 est_doc_add_text(doc, text); 00116 } -00117 int id() { -00121 return est_doc_id(doc); +00117 void add_hidden_text(const char * text) { +00121 est_doc_add_hidden_text(doc, text); 00122 } -00123 std::vector<std::string> * attr_names() { -00127 std::vector<std::string> * vs = new std::vector<std::string>; -00128 CBLIST * attr_names = est_doc_attr_names(doc); -00129 for (int i=0; i < cblistnum(attr_names); i++) { -00130 vs->push_back(cblistval(attr_names, i, NULL)); -00131 } -00132 cblistclose(attr_names); -00133 return vs; -00134 } -00135 const char * attr(const char *name) { -00139 return est_doc_attr(doc, name); +00123 int id() { +00127 return est_doc_id(doc); +00128 } +00129 std::vector<std::string> * attr_names() { +00133 std::vector<std::string> * vs = new std::vector<std::string>; +00134 CBLIST * attr_names = est_doc_attr_names(doc); +00135 for (int i=0; i < cblistnum(attr_names); i++) { +00136 vs->push_back(cblistval(attr_names, i, NULL)); +00137 } +00138 cblistclose(attr_names); +00139 return vs; 00140 } -00141 const char * cat_texts() { -00145 // return est_doc_cat_texts(doc); -00146 return "This is mockup!"; -00147 } -00148 std::vector<std::string>* texts() { -00152 std::vector<std::string> * vs = new std::vector<std::string>; -00153 const CBLIST *texts; -00154 texts = est_doc_texts(doc); -00155 for(int i = 0; i < cblistnum(texts); i++) { -00156 vs->push_back(cblistval(texts, i, NULL)); -00157 } -00158 return vs; -00159 } -00160 const char * dump_draft() { -00164 return est_doc_dump_draft(doc); -00165 } -00166 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) { -00170 CBLIST * words; -00171 std::vector<std::string>::iterator iter; -00172 -00173 words = cblistopen(); -00174 -00175 for (iter = _words.begin(); _words.end() != iter; iter++) { -00176 cblistpush(words, iter->c_str(), -1); -00177 } -00178 -00179 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); -00180 -00181 cblistclose(words); -00182 +00141 const char * attr(const char *name) { +00145 return est_doc_attr(doc, name); +00146 } +00147 const char * cat_texts() { +00151 return est_doc_cat_texts(doc); +00152 } +00153 std::vector<std::string>* texts() { +00157 std::vector<std::string> * vs = new std::vector<std::string>; +00158 const CBLIST *texts; +00159 texts = est_doc_texts(doc); +00160 for(int i = 0; i < cblistnum(texts); i++) { +00161 vs->push_back(cblistval(texts, i, NULL)); +00162 } +00163 return vs; +00164 } +00165 const char * dump_draft() { +00169 return est_doc_dump_draft(doc); +00170 } +00171 const char * make_snippet(std::vector<std::string> _words, int wwidth, int hwidth, int awidth) { +00175 CBLIST * words; +00176 std::vector<std::string>::iterator iter; +00177 words = cblistopen(); +00178 for (iter = _words.begin(); _words.end() != iter; iter++) { +00179 cblistpush(words, iter->c_str(), -1); +00180 } +00181 const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); +00182 cblistclose(words); 00183 return result; 00184 } -00185 }; -00186 -00187 class Database { -00188 private: -00189 ESTMTDB *db; -00190 public: -00191 enum { // enumeration for error codes -00192 ERRNOERR = ESTENOERR, // no error -00193 ERRINVAL = ESTEINVAL, // invalid argument -00194 ERRACCES = ESTEACCES, // access forbidden -00195 ERRLOCK = ESTELOCK, // lock failure -00196 ERRDB = ESTEDB, // database problem -00197 ERRIO = ESTEIO, // I/O problem -00198 ERRNOITEM = ESTENOITEM, // no item -00199 ERRMISC = ESTEMISC // miscellaneous -00200 }; -00201 enum { // enumeration for open modes -00202 DBREADER = ESTDBREADER, // open as a reader -00203 DBWRITER = ESTDBWRITER, // open as a writer -00204 DBCREAT = ESTDBCREAT, // a writer creating -00205 DBTRUNC = ESTDBTRUNC, // a writer truncating -00206 DBNOLCK = ESTDBNOLCK, // open without locking -00207 DBLCKNB = ESTDBLCKNB, // lock without blocking -00208 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer -00209 }; -00210 enum { // enumeration for options of document registration -00211 PDCLEAN = ESTPDCLEAN // clean up dispensable regions -00212 }; -00213 enum { // enumeration for options of document deletion -00214 ODCLEAN = ESTODCLEAN // clean up dispensable regions -00215 }; -00216 enum { // enumeration for options of optimization -00217 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted -00218 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files +00185 const char * hidden_texts() { +00189 return est_doc_hidden_texts(doc); +00190 } +00191 }; +00192 +00193 class Database { +00194 private: +00195 ESTMTDB *db; +00196 int ecode; +00197 public: +00198 enum { // enumeration for error codes +00199 ERRNOERR = ESTENOERR, // no error +00200 ERRINVAL = ESTEINVAL, // invalid argument +00201 ERRACCES = ESTEACCES, // access forbidden +00202 ERRLOCK = ESTELOCK, // lock failure +00203 ERRDB = ESTEDB, // database problem +00204 ERRIO = ESTEIO, // I/O problem +00205 ERRNOITEM = ESTENOITEM, // no item +00206 ERRMISC = ESTEMISC // miscellaneous +00207 }; +00208 enum { // enumeration for open modes +00209 DBREADER = ESTDBREADER, // open as a reader +00210 DBWRITER = ESTDBWRITER, // open as a writer +00211 DBCREAT = ESTDBCREAT, // a writer creating +00212 DBTRUNC = ESTDBTRUNC, // a writer truncating +00213 DBNOLCK = ESTDBNOLCK, // open without locking +00214 DBLCKNB = ESTDBLCKNB, // lock without blocking +00215 DBPERFNG = ESTDBPERFNG // use perfect N-gram analyzer +00216 }; +00217 enum { // enumeration for options of document registration +00218 PDCLEAN = ESTPDCLEAN // clean up dispensable regions 00219 }; -00220 enum { // enumeration for options of document retrieval -00221 GDNOATTR = ESTGDNOATTR, // no attributes -00222 GDNOTEXT = ESTGDNOTEXT // no text -00223 }; -00224 Database() { -00228 } -00229 ~Database() { -00230 close(); -00231 } -00232 bool open(const char * dbname, int mode) { -00236 int ecode; -00237 db = est_mtdb_open(dbname, mode, &ecode); -00238 return db; -00239 } -00240 bool close() { -00244 if (db) { -00245 int ecode; -00246 bool result = est_mtdb_close(db, &ecode); -00247 db = NULL; -00248 return result; -00249 } else { -00250 return false; -00251 } -00252 } -00253 bool put_doc(Document *doc, int options) { -00257 return est_mtdb_put_doc(db, doc->doc, options); -00258 } -00259 std::vector<int> * search(Condition * cond, int options) { -00263 int resnum; -00264 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); -00265 std::vector<int> *numbers = new std::vector<int>; -00266 for (int i=0; i<resnum; i++) { -00267 numbers->push_back(result[i]); -00268 } -00269 return numbers; +00220 enum { // enumeration for options of document deletion +00221 ODCLEAN = ESTODCLEAN // clean up dispensable regions +00222 }; +00223 enum { // enumeration for options of optimization +00224 OPTNOPURGE = ESTOPTNOPURGE, // omit purging dispensable region of deleted +00225 OPTNODBOPT = ESTOPTNODBOPT // omit optimizization of the database files +00226 }; +00227 enum { // enumeration for options of document retrieval +00228 GDNOATTR = ESTGDNOATTR, // no attributes +00229 GDNOTEXT = ESTGDNOTEXT // no text +00230 }; +00231 Database() { +00235 db = NULL; +00236 ecode = ERRNOERR; +00237 } +00238 ~Database() { +00239 if (db) close(); +00240 } +00241 bool open(const char * dbname, int mode) { +00245 if (db) close(); +00246 int ec; +00247 db = est_mtdb_open(dbname, mode, &ec); +00248 if (!db) ecode = ec; +00249 return db; +00250 } +00251 bool close() { +00255 if (!db) throw IOError("closed database"); +00256 int ec; +00257 bool result = est_mtdb_close(db, &ec); +00258 if (!result) ecode = ec; +00259 db = NULL; +00260 return result; +00261 } +00262 bool put_doc(Document *doc, int options) { +00266 if (!db) throw IOError("closed database"); +00267 bool result = est_mtdb_put_doc(db, doc->doc, options); +00268 if (!result) ecode = est_mtdb_error(db); +00269 return result; 00270 } -00271 static const char * err_msg(int ecode) { -00275 return est_err_msg(ecode); -00276 } -00277 int error() { -00281 return est_mtdb_error(db); -00282 } -00283 bool fatal() { -00287 return est_mtdb_fatal(db); -00288 } -00289 bool flush(int _max) { -00293 return est_mtdb_flush(db, _max); -00294 } -00295 bool sync() { -00299 return est_mtdb_sync(db); -00300 } -00301 bool optimize(int options) { -00305 return est_mtdb_optimize(db, options); -00306 } -00307 bool out_doc(int id, int options) { -00311 return est_mtdb_out_doc(db, id, options); -00312 } -00313 Document * get_doc(int id, int options) { -00317 ESTDOC *doc = est_mtdb_get_doc(db, id, options); -00318 if (!doc) { -00319 throw est_err_msg(est_mtdb_error(db)); -00320 } else { -00321 return new Document(doc); -00322 } -00323 } -00324 int uri_to_id(const char *uri) { -00328 return est_mtdb_uri_to_id(db, uri); +00271 std::vector<int> * search(Condition * cond, int options) { +00275 if (!db) throw IOError("closed database"); +00276 int resnum; +00277 int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); +00278 std::vector<int> *numbers = new std::vector<int>; +00279 for (int i=0; i<resnum; i++) { +00280 numbers->push_back(result[i]); +00281 } +00282 return numbers; +00283 } +00284 static const char * err_msg(int ecode) { +00288 return est_err_msg(ecode); +00289 } +00290 int error() { +00294 return ecode; +00295 } +00296 bool fatal() { +00300 if (!db) throw IOError("closed database"); +00301 return est_mtdb_fatal(db); +00302 } +00303 bool flush(int _max) { +00307 if (!db) throw IOError("closed database"); +00308 bool result = est_mtdb_flush(db, _max); +00309 if (!result) ecode = est_mtdb_error(db); +00310 return result; +00311 } +00312 bool sync() { +00316 if (!db) throw IOError("closed database"); +00317 bool result = est_mtdb_sync(db); +00318 if (!result) ecode = est_mtdb_error(db); +00319 return result; +00320 } +00321 bool optimize(int options) { +00325 if (!db) throw IOError("closed database"); +00326 bool result = est_mtdb_optimize(db, options); +00327 if (!result) ecode = est_mtdb_error(db); +00328 return result; 00329 } -00330 std::map<std::string, std::string> * etch_doc(Document * doc, int max) { -00334 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; -00335 -00336 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); -00337 -00338 cbmapiterinit(keys); -00339 int ksiz; -00340 while (const char *key = cbmapiternext(keys, &ksiz)) { -00341 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL))); -00342 } -00343 return mss; -00344 } -00345 bool iter_init() { -00349 return est_mtdb_iter_init(db); -00350 } -00351 int iter_next() { -00355 return est_mtdb_iter_next(db); -00356 } -00357 const char * name() { -00361 return est_mtdb_name(db); -00362 } -00363 int doc_num() { -00367 return est_mtdb_doc_num(db); -00368 } -00369 int word_num() { -00373 return est_mtdb_word_num(db); -00374 } -00375 double size() { -00379 return est_mtdb_size(db); -00380 } -00381 void set_cache_size(size_t size, int anum, int tnum) { -00385 est_mtdb_set_cache_size(db, size, anum, tnum); -00386 } -00387 void set_special_cache(const char *name, int num) { -00392 est_mtdb_set_special_cache(db, name, num); -00393 } -00394 }; -00395 }; -
Generated on Thu Sep 1 08:07:53 2005 for HyperEstraierWrapper by  +00330 bool out_doc(int id, int options) { +00334 if (!db) throw IOError("closed database"); +00335 bool result = est_mtdb_out_doc(db, id, options); +00336 if (!result) ecode = est_mtdb_error(db); +00337 return result; +00338 } +00339 bool edit_doc(Document *doc) { +00343 if (!db) throw IOError("closed database"); +00344 bool result = est_mtdb_edit_doc(db, doc->doc); +00345 if (!result) ecode = est_mtdb_error(db); +00346 return result; +00347 } +00348 Document * get_doc(int id, int options) { +00352 if (!db) throw IOError("closed database"); +00353 ESTDOC *doc = est_mtdb_get_doc(db, id, options); +00354 if (!doc) { +00355 ecode = est_mtdb_error(db); +00356 throw est_err_msg(est_mtdb_error(db)); +00357 } else { +00358 return new Document(doc); +00359 } +00360 } +00361 int uri_to_id(const char *uri) { +00365 if (!db) throw IOError("closed database"); +00366 int result = est_mtdb_uri_to_id(db, uri); +00367 if(result == -1) ecode = est_mtdb_error(db); +00368 return result; +00369 } +00370 std::map<std::string, std::string> * etch_doc(Document * doc, int max) { +00374 if (!db) throw IOError("closed database"); +00375 std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; +00376 CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); +00377 cbmapiterinit(keys); +00378 int ksiz; +00379 while (const char *key = cbmapiternext(keys, &ksiz)) { +00380 mss->insert(std::make_pair(key, cbmapget(keys, key, ksiz, NULL))); +00381 } +00382 return mss; +00383 } +00384 const char * name() { +00388 if (!db) throw IOError("closed database"); +00389 return est_mtdb_name(db); +00390 } +00391 int doc_num() { +00395 if (!db) throw IOError("closed database"); +00396 return est_mtdb_doc_num(db); +00397 } +00398 int word_num() { +00402 if (!db) throw IOError("closed database"); +00403 return est_mtdb_word_num(db); +00404 } +00405 double size() { +00409 if (!db) throw IOError("closed database"); +00410 return est_mtdb_size(db); +00411 } +00412 void set_cache_size(size_t size, int anum, int tnum) { +00416 if (!db) throw IOError("closed database"); +00417 est_mtdb_set_cache_size(db, size, anum, tnum); +00418 } +00419 void set_special_cache(const char *name, int num) { +00424 est_mtdb_set_special_cache(db, name, num); +00425 } +00426 }; +00427 +00428 static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) { +00429 std::vector<std::string> * vs = new std::vector<std::string>; +00430 CBLIST *list; +00431 list = cblistopen(); +00432 est_break_text(text, list, norm, tail); +00433 for (int i=0; i < cblistnum(list); i++) { +00434 vs->push_back(cblistval(list, i, NULL)); +00435 } +00436 cblistclose(list); +00437 return vs; +00438 } +00439 +00440 static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) { +00441 std::vector<std::string> * vs = new std::vector<std::string>; +00442 CBLIST *list; +00443 list = cblistopen(); +00444 est_break_text_perfng(text, list, norm, tail); +00445 for (int i=0; i < cblistnum(list); i++) { +00446 vs->push_back(cblistval(list, i, NULL)); +00447 } +00448 cblistclose(list); +00449 return vs; +00450 } +00451 +00452 }; +
Generated on Thu Sep 8 02:02:20 2005 for HyperEstraierWrapper by  doxygen 1.4.4