9 |
#include <vector> |
#include <vector> |
10 |
#include <map> |
#include <map> |
11 |
#include <cassert> |
#include <cassert> |
12 |
|
#include <stdexcept> |
13 |
|
|
14 |
|
/* backward compatibility for 0.5.4 */ |
15 |
|
#ifndef ESTCONDAGITO |
16 |
|
#define ESTCONDAGITO ESTCONDAGIT |
17 |
|
#endif |
18 |
|
|
19 |
namespace estraier { |
namespace estraier { |
20 |
|
|
21 |
|
class IOError : public std::runtime_error { |
22 |
|
public: |
23 |
|
explicit IOError (const std::string& w) : std::runtime_error(w) {} |
24 |
|
}; |
25 |
|
|
26 |
class Condition { |
class Condition { |
27 |
public: |
public: |
28 |
enum { // enumeration for options |
enum { // enumeration for options |
83 |
std::string text_buf; |
std::string text_buf; |
84 |
public: |
public: |
85 |
ESTDOC *doc; |
ESTDOC *doc; |
|
|
|
86 |
Document() { |
Document() { |
87 |
/** |
/** |
88 |
* constructor |
* constructor |
153 |
/** |
/** |
154 |
* get a list of sentences of the text of a document object |
* get a list of sentences of the text of a document object |
155 |
*/ |
*/ |
156 |
// return est_doc_cat_texts(doc); |
return est_doc_cat_texts(doc); |
|
return "This is mockup!"; |
|
157 |
} |
} |
158 |
std::vector<std::string>* texts() { |
std::vector<std::string>* texts() { |
159 |
/** |
/** |
179 |
*/ |
*/ |
180 |
CBLIST * words; |
CBLIST * words; |
181 |
std::vector<std::string>::iterator iter; |
std::vector<std::string>::iterator iter; |
|
|
|
182 |
words = cblistopen(); |
words = cblistopen(); |
|
|
|
183 |
for (iter = _words.begin(); _words.end() != iter; iter++) { |
for (iter = _words.begin(); _words.end() != iter; iter++) { |
184 |
cblistpush(words, iter->c_str(), -1); |
cblistpush(words, iter->c_str(), -1); |
185 |
} |
} |
|
|
|
186 |
const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); |
const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); |
|
|
|
187 |
cblistclose(words); |
cblistclose(words); |
|
|
|
188 |
return result; |
return result; |
189 |
} |
} |
190 |
|
const char * hidden_texts() { |
191 |
|
/** |
192 |
|
* get the hidden texts of a document object. |
193 |
|
*/ |
194 |
|
return est_doc_hidden_texts(doc); |
195 |
|
} |
196 |
}; |
}; |
197 |
|
|
198 |
class Database { |
class Database { |
199 |
private: |
private: |
200 |
ESTMTDB *db; |
ESTMTDB *db; |
201 |
|
int ecode; |
202 |
public: |
public: |
203 |
enum { // enumeration for error codes |
enum { // enumeration for error codes |
204 |
ERRNOERR = ESTENOERR, // no error |
ERRNOERR = ESTENOERR, // no error |
237 |
/** |
/** |
238 |
* constructor(dummy) |
* constructor(dummy) |
239 |
*/ |
*/ |
240 |
|
db = NULL; |
241 |
|
ecode = ERRNOERR; |
242 |
} |
} |
243 |
~Database() { |
~Database() { |
244 |
close(); |
if (db) close(); |
245 |
} |
} |
246 |
bool open(const char * dbname, int mode) { |
bool open(const char * dbname, int mode) { |
247 |
/** |
/** |
248 |
* open the database |
* open the database |
249 |
*/ |
*/ |
250 |
int ecode; |
if (db) close(); |
251 |
db = est_mtdb_open(dbname, mode, &ecode); |
int ec; |
252 |
|
db = est_mtdb_open(dbname, mode, &ec); |
253 |
|
if (!db) ecode = ec; |
254 |
return db; |
return db; |
255 |
} |
} |
256 |
bool close() { |
bool close() { |
257 |
/** |
/** |
258 |
* close the database |
* close the database |
259 |
*/ |
*/ |
260 |
if (db) { |
if (!db) throw IOError("closed database"); |
261 |
int ecode; |
int ec; |
262 |
bool result = est_mtdb_close(db, &ecode); |
bool result = est_mtdb_close(db, &ec); |
263 |
db = NULL; |
if (!result) ecode = ec; |
264 |
return result; |
db = NULL; |
265 |
} else { |
return result; |
|
return false; |
|
|
} |
|
266 |
} |
} |
267 |
bool put_doc(Document *doc, int options) { |
bool put_doc(Document *doc, int options) { |
268 |
/** |
/** |
269 |
* add a document to a database |
* add a document to a database |
270 |
*/ |
*/ |
271 |
return est_mtdb_put_doc(db, doc->doc, options); |
if (!db) throw IOError("closed database"); |
272 |
|
bool result = est_mtdb_put_doc(db, doc->doc, options); |
273 |
|
if (!result) ecode = est_mtdb_error(db); |
274 |
|
return result; |
275 |
} |
} |
276 |
std::vector<int> * search(Condition * cond, int options) { |
std::vector<int> * search(Condition * cond, int options) { |
277 |
/** |
/** |
278 |
* search documents corresponding a condition for a database |
* search documents corresponding a condition for a database |
279 |
*/ |
*/ |
280 |
|
if (!db) throw IOError("closed database"); |
281 |
int resnum; |
int resnum; |
282 |
int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); |
int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); |
283 |
std::vector<int> *numbers = new std::vector<int>; |
std::vector<int> *numbers = new std::vector<int>; |
296 |
/** |
/** |
297 |
* get the last happended error code of a database |
* get the last happended error code of a database |
298 |
*/ |
*/ |
299 |
return est_mtdb_error(db); |
return ecode; |
300 |
} |
} |
301 |
bool fatal() { |
bool fatal() { |
302 |
/** |
/** |
303 |
* check whether a database has a fatal error |
* check whether a database has a fatal error |
304 |
*/ |
*/ |
305 |
|
if (!db) throw IOError("closed database"); |
306 |
return est_mtdb_fatal(db); |
return est_mtdb_fatal(db); |
307 |
} |
} |
308 |
bool flush(int _max) { |
bool flush(int _max) { |
309 |
/** |
/** |
310 |
* flush index words in the cache of a database |
* flush index words in the cache of a database |
311 |
*/ |
*/ |
312 |
return est_mtdb_flush(db, _max); |
if (!db) throw IOError("closed database"); |
313 |
|
bool result = est_mtdb_flush(db, _max); |
314 |
|
if (!result) ecode = est_mtdb_error(db); |
315 |
|
return result; |
316 |
} |
} |
317 |
bool sync() { |
bool sync() { |
318 |
/** |
/** |
319 |
* synchronize updating contents of a database |
* synchronize updating contents of a database |
320 |
*/ |
*/ |
321 |
return est_mtdb_sync(db); |
if (!db) throw IOError("closed database"); |
322 |
|
bool result = est_mtdb_sync(db); |
323 |
|
if (!result) ecode = est_mtdb_error(db); |
324 |
|
return result; |
325 |
} |
} |
326 |
bool optimize(int options) { |
bool optimize(int options) { |
327 |
/** |
/** |
328 |
* optimize a database |
* optimize a database |
329 |
*/ |
*/ |
330 |
return est_mtdb_optimize(db, options); |
if (!db) throw IOError("closed database"); |
331 |
|
bool result = est_mtdb_optimize(db, options); |
332 |
|
if (!result) ecode = est_mtdb_error(db); |
333 |
|
return result; |
334 |
} |
} |
335 |
bool out_doc(int id, int options) { |
bool out_doc(int id, int options) { |
336 |
/** |
/** |
337 |
* remove a document from a database |
* remove a document from a database |
338 |
*/ |
*/ |
339 |
return est_mtdb_out_doc(db, id, options); |
if (!db) throw IOError("closed database"); |
340 |
|
bool result = est_mtdb_out_doc(db, id, options); |
341 |
|
if (!result) ecode = est_mtdb_error(db); |
342 |
|
return result; |
343 |
|
} |
344 |
|
bool edit_doc(Document *doc) { |
345 |
|
/** |
346 |
|
* edit an attribute of a document in a database |
347 |
|
*/ |
348 |
|
if (!db) throw IOError("closed database"); |
349 |
|
bool result = est_mtdb_edit_doc(db, doc->doc); |
350 |
|
if (!result) ecode = est_mtdb_error(db); |
351 |
|
return result; |
352 |
} |
} |
353 |
Document * get_doc(int id, int options) { |
Document * get_doc(int id, int options) { |
354 |
/** |
/** |
355 |
* retrieve a document in a database |
* retrieve a document in a database |
356 |
*/ |
*/ |
357 |
|
if (!db) throw IOError("closed database"); |
358 |
ESTDOC *doc = est_mtdb_get_doc(db, id, options); |
ESTDOC *doc = est_mtdb_get_doc(db, id, options); |
359 |
if (!doc) { |
if (!doc) { |
360 |
|
ecode = est_mtdb_error(db); |
361 |
throw est_err_msg(est_mtdb_error(db)); |
throw est_err_msg(est_mtdb_error(db)); |
362 |
} else { |
} else { |
363 |
return new Document(doc); |
return new Document(doc); |
367 |
/** |
/** |
368 |
* get the ID of a document spacified by URI |
* get the ID of a document spacified by URI |
369 |
*/ |
*/ |
370 |
return est_mtdb_uri_to_id(db, uri); |
if (!db) throw IOError("closed database"); |
371 |
|
int result = est_mtdb_uri_to_id(db, uri); |
372 |
|
if(result == -1) ecode = est_mtdb_error(db); |
373 |
|
return result; |
374 |
} |
} |
375 |
std::map<std::string, std::string> * etch_doc(Document * doc, int max) { |
std::map<std::string, std::string> * etch_doc(Document * doc, int max) { |
376 |
/** |
/** |
377 |
* extract keywords of a document object |
* extract keywords of a document object |
378 |
*/ |
*/ |
379 |
|
if (!db) throw IOError("closed database"); |
380 |
std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; |
std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; |
|
|
|
381 |
CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); |
CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); |
|
|
|
382 |
cbmapiterinit(keys); |
cbmapiterinit(keys); |
383 |
int ksiz; |
int ksiz; |
384 |
while (const char *key = cbmapiternext(keys, &ksiz)) { |
while (const char *key = cbmapiternext(keys, &ksiz)) { |
386 |
} |
} |
387 |
return mss; |
return mss; |
388 |
} |
} |
|
bool iter_init() { |
|
|
/** |
|
|
* initialize the iterator of a database |
|
|
*/ |
|
|
return est_mtdb_iter_init(db); |
|
|
} |
|
|
int iter_next() { |
|
|
/** |
|
|
* get the next ID of the iterator of a database |
|
|
*/ |
|
|
return est_mtdb_iter_next(db); |
|
|
} |
|
389 |
const char * name() { |
const char * name() { |
390 |
/** |
/** |
391 |
* get the name of a database |
* get the name of a database |
392 |
*/ |
*/ |
393 |
|
if (!db) throw IOError("closed database"); |
394 |
return est_mtdb_name(db); |
return est_mtdb_name(db); |
395 |
} |
} |
396 |
int doc_num() { |
int doc_num() { |
397 |
/** |
/** |
398 |
* get the number of documents in a database |
* get the number of documents in a database |
399 |
*/ |
*/ |
400 |
|
if (!db) throw IOError("closed database"); |
401 |
return est_mtdb_doc_num(db); |
return est_mtdb_doc_num(db); |
402 |
} |
} |
403 |
int word_num() { |
int word_num() { |
404 |
/** |
/** |
405 |
* get the number of unique words in a database |
* get the number of unique words in a database |
406 |
*/ |
*/ |
407 |
|
if (!db) throw IOError("closed database"); |
408 |
return est_mtdb_word_num(db); |
return est_mtdb_word_num(db); |
409 |
} |
} |
410 |
double size() { |
double size() { |
411 |
/** |
/** |
412 |
* get the size of a database |
* get the size of a database |
413 |
*/ |
*/ |
414 |
|
if (!db) throw IOError("closed database"); |
415 |
return est_mtdb_size(db); |
return est_mtdb_size(db); |
416 |
} |
} |
417 |
void set_cache_size(size_t size, int anum, int tnum) { |
void set_cache_size(size_t size, int anum, int tnum) { |
418 |
/** |
/** |
419 |
* set the maximum size of the cache memory of a database |
* set the maximum size of the cache memory of a database |
420 |
*/ |
*/ |
421 |
|
if (!db) throw IOError("closed database"); |
422 |
est_mtdb_set_cache_size(db, size, anum, tnum); |
est_mtdb_set_cache_size(db, size, anum, tnum); |
423 |
} |
} |
424 |
void set_special_cache(const char *name, int num) { |
void set_special_cache(const char *name, int num) { |
429 |
est_mtdb_set_special_cache(db, name, num); |
est_mtdb_set_special_cache(db, name, num); |
430 |
} |
} |
431 |
}; |
}; |
432 |
|
|
433 |
|
static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) { |
434 |
|
std::vector<std::string> * vs = new std::vector<std::string>; |
435 |
|
CBLIST *list; |
436 |
|
list = cblistopen(); |
437 |
|
est_break_text(text, list, norm, tail); |
438 |
|
for (int i=0; i < cblistnum(list); i++) { |
439 |
|
vs->push_back(cblistval(list, i, NULL)); |
440 |
|
} |
441 |
|
cblistclose(list); |
442 |
|
return vs; |
443 |
|
} |
444 |
|
|
445 |
|
static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) { |
446 |
|
std::vector<std::string> * vs = new std::vector<std::string>; |
447 |
|
CBLIST *list; |
448 |
|
list = cblistopen(); |
449 |
|
est_break_text_perfng(text, list, norm, tail); |
450 |
|
for (int i=0; i < cblistnum(list); i++) { |
451 |
|
vs->push_back(cblistval(list, i, NULL)); |
452 |
|
} |
453 |
|
cblistclose(list); |
454 |
|
return vs; |
455 |
|
} |
456 |
|
|
457 |
}; |
}; |