9 |
#include <vector> |
#include <vector> |
10 |
#include <map> |
#include <map> |
11 |
#include <cassert> |
#include <cassert> |
12 |
|
#include <stdexcept> |
13 |
|
|
14 |
namespace estraier { |
namespace estraier { |
15 |
|
|
16 |
|
class IOError : public std::runtime_error { |
17 |
|
public: |
18 |
|
explicit IOError (const std::string& w) : std::runtime_error(w) {} |
19 |
|
}; |
20 |
|
|
21 |
class Condition { |
class Condition { |
22 |
public: |
public: |
23 |
enum { // enumeration for options |
enum { // enumeration for options |
78 |
std::string text_buf; |
std::string text_buf; |
79 |
public: |
public: |
80 |
ESTDOC *doc; |
ESTDOC *doc; |
|
|
|
81 |
Document() { |
Document() { |
82 |
/** |
/** |
83 |
* constructor |
* constructor |
148 |
/** |
/** |
149 |
* get a list of sentences of the text of a document object |
* get a list of sentences of the text of a document object |
150 |
*/ |
*/ |
151 |
// return est_doc_cat_texts(doc); |
return est_doc_cat_texts(doc); |
|
return "This is mockup!"; |
|
152 |
} |
} |
153 |
std::vector<std::string>* texts() { |
std::vector<std::string>* texts() { |
154 |
/** |
/** |
174 |
*/ |
*/ |
175 |
CBLIST * words; |
CBLIST * words; |
176 |
std::vector<std::string>::iterator iter; |
std::vector<std::string>::iterator iter; |
|
|
|
177 |
words = cblistopen(); |
words = cblistopen(); |
|
|
|
178 |
for (iter = _words.begin(); _words.end() != iter; iter++) { |
for (iter = _words.begin(); _words.end() != iter; iter++) { |
179 |
cblistpush(words, iter->c_str(), -1); |
cblistpush(words, iter->c_str(), -1); |
180 |
} |
} |
|
|
|
181 |
const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); |
const char *result = est_doc_make_snippet(doc, words, wwidth, hwidth, awidth); |
|
|
|
182 |
cblistclose(words); |
cblistclose(words); |
|
|
|
183 |
return result; |
return result; |
184 |
} |
} |
185 |
|
const char * hidden_texts() { |
186 |
|
/** |
187 |
|
* get the hidden texts of a document object. |
188 |
|
*/ |
189 |
|
return est_doc_hidden_texts(doc); |
190 |
|
} |
191 |
}; |
}; |
192 |
|
|
193 |
class Database { |
class Database { |
194 |
private: |
private: |
195 |
ESTMTDB *db; |
ESTMTDB *db; |
196 |
|
int ecode; |
197 |
public: |
public: |
198 |
enum { // enumeration for error codes |
enum { // enumeration for error codes |
199 |
ERRNOERR = ESTENOERR, // no error |
ERRNOERR = ESTENOERR, // no error |
232 |
/** |
/** |
233 |
* constructor(dummy) |
* constructor(dummy) |
234 |
*/ |
*/ |
235 |
|
db = NULL; |
236 |
|
ecode = ERRNOERR; |
237 |
} |
} |
238 |
~Database() { |
~Database() { |
239 |
close(); |
if (db) close(); |
240 |
} |
} |
241 |
bool open(const char * dbname, int mode) { |
bool open(const char * dbname, int mode) { |
242 |
/** |
/** |
243 |
* open the database |
* open the database |
244 |
*/ |
*/ |
245 |
int ecode; |
if (db) close(); |
246 |
db = est_mtdb_open(dbname, mode, &ecode); |
int ec; |
247 |
|
db = est_mtdb_open(dbname, mode, &ec); |
248 |
|
if (!db) ecode = ec; |
249 |
return db; |
return db; |
250 |
} |
} |
251 |
bool close() { |
bool close() { |
252 |
/** |
/** |
253 |
* close the database |
* close the database |
254 |
*/ |
*/ |
255 |
if (db) { |
if (!db) throw IOError("closed database"); |
256 |
int ecode; |
int ec; |
257 |
bool result = est_mtdb_close(db, &ecode); |
bool result = est_mtdb_close(db, &ec); |
258 |
|
if (!result) ecode = ec; |
259 |
db = NULL; |
db = NULL; |
260 |
return result; |
return result; |
|
} else { |
|
|
return false; |
|
|
} |
|
261 |
} |
} |
262 |
bool put_doc(Document *doc, int options) { |
bool put_doc(Document *doc, int options) { |
263 |
/** |
/** |
264 |
* add a document to a database |
* add a document to a database |
265 |
*/ |
*/ |
266 |
return est_mtdb_put_doc(db, doc->doc, options); |
if (!db) throw IOError("closed database"); |
267 |
|
bool result = est_mtdb_put_doc(db, doc->doc, options); |
268 |
|
if (!result) ecode = est_mtdb_error(db); |
269 |
|
return result; |
270 |
} |
} |
271 |
std::vector<int> * search(Condition * cond, int options) { |
std::vector<int> * search(Condition * cond, int options) { |
272 |
/** |
/** |
273 |
* search documents corresponding a condition for a database |
* search documents corresponding a condition for a database |
274 |
*/ |
*/ |
275 |
|
if (!db) throw IOError("closed database"); |
276 |
int resnum; |
int resnum; |
277 |
int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); |
int * result = est_mtdb_search(db, cond->cond, &resnum, NULL); |
278 |
std::vector<int> *numbers = new std::vector<int>; |
std::vector<int> *numbers = new std::vector<int>; |
291 |
/** |
/** |
292 |
* get the last happended error code of a database |
* get the last happended error code of a database |
293 |
*/ |
*/ |
294 |
return est_mtdb_error(db); |
return ecode; |
295 |
} |
} |
296 |
bool fatal() { |
bool fatal() { |
297 |
/** |
/** |
298 |
* check whether a database has a fatal error |
* check whether a database has a fatal error |
299 |
*/ |
*/ |
300 |
|
if (!db) throw IOError("closed database"); |
301 |
return est_mtdb_fatal(db); |
return est_mtdb_fatal(db); |
302 |
} |
} |
303 |
bool flush(int _max) { |
bool flush(int _max) { |
304 |
/** |
/** |
305 |
* flush index words in the cache of a database |
* flush index words in the cache of a database |
306 |
*/ |
*/ |
307 |
return est_mtdb_flush(db, _max); |
if (!db) throw IOError("closed database"); |
308 |
|
bool result = est_mtdb_flush(db, _max); |
309 |
|
if (!result) ecode = est_mtdb_error(db); |
310 |
|
return result; |
311 |
} |
} |
312 |
bool sync() { |
bool sync() { |
313 |
/** |
/** |
314 |
* synchronize updating contents of a database |
* synchronize updating contents of a database |
315 |
*/ |
*/ |
316 |
return est_mtdb_sync(db); |
if (!db) throw IOError("closed database"); |
317 |
|
bool result = est_mtdb_sync(db); |
318 |
|
if (!result) ecode = est_mtdb_error(db); |
319 |
|
return result; |
320 |
} |
} |
321 |
bool optimize(int options) { |
bool optimize(int options) { |
322 |
/** |
/** |
323 |
* optimize a database |
* optimize a database |
324 |
*/ |
*/ |
325 |
return est_mtdb_optimize(db, options); |
if (!db) throw IOError("closed database"); |
326 |
|
bool result = est_mtdb_optimize(db, options); |
327 |
|
if (!result) ecode = est_mtdb_error(db); |
328 |
|
return result; |
329 |
} |
} |
330 |
bool out_doc(int id, int options) { |
bool out_doc(int id, int options) { |
331 |
/** |
/** |
332 |
* remove a document from a database |
* remove a document from a database |
333 |
*/ |
*/ |
334 |
return est_mtdb_out_doc(db, id, options); |
if (!db) throw IOError("closed database"); |
335 |
|
bool result = est_mtdb_out_doc(db, id, options); |
336 |
|
if (!result) ecode = est_mtdb_error(db); |
337 |
|
return result; |
338 |
|
} |
339 |
|
bool edit_doc(Document *doc) { |
340 |
|
/** |
341 |
|
* edit an attribute of a document in a database |
342 |
|
*/ |
343 |
|
if (!db) throw IOError("closed database"); |
344 |
|
bool result = est_mtdb_edit_doc(db, doc->doc); |
345 |
|
if (!result) ecode = est_mtdb_error(db); |
346 |
|
return result; |
347 |
} |
} |
348 |
Document * get_doc(int id, int options) { |
Document * get_doc(int id, int options) { |
349 |
/** |
/** |
350 |
* retrieve a document in a database |
* retrieve a document in a database |
351 |
*/ |
*/ |
352 |
|
if (!db) throw IOError("closed database"); |
353 |
ESTDOC *doc = est_mtdb_get_doc(db, id, options); |
ESTDOC *doc = est_mtdb_get_doc(db, id, options); |
354 |
if (!doc) { |
if (!doc) { |
355 |
|
ecode = est_mtdb_error(db); |
356 |
throw est_err_msg(est_mtdb_error(db)); |
throw est_err_msg(est_mtdb_error(db)); |
357 |
} else { |
} else { |
358 |
return new Document(doc); |
return new Document(doc); |
362 |
/** |
/** |
363 |
* get the ID of a document spacified by URI |
* get the ID of a document spacified by URI |
364 |
*/ |
*/ |
365 |
return est_mtdb_uri_to_id(db, uri); |
if (!db) throw IOError("closed database"); |
366 |
|
int result = est_mtdb_uri_to_id(db, uri); |
367 |
|
if(result == -1) ecode = est_mtdb_error(db); |
368 |
|
return result; |
369 |
} |
} |
370 |
std::map<std::string, std::string> * etch_doc(Document * doc, int max) { |
std::map<std::string, std::string> * etch_doc(Document * doc, int max) { |
371 |
/** |
/** |
372 |
* extract keywords of a document object |
* extract keywords of a document object |
373 |
*/ |
*/ |
374 |
|
if (!db) throw IOError("closed database"); |
375 |
std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; |
std::map<std::string, std::string> * mss = new std::map<std::string, std::string>; |
|
|
|
376 |
CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); |
CBMAP * keys = est_mtdb_etch_doc(db, doc->doc, max); |
|
|
|
377 |
cbmapiterinit(keys); |
cbmapiterinit(keys); |
378 |
int ksiz; |
int ksiz; |
379 |
while (const char *key = cbmapiternext(keys, &ksiz)) { |
while (const char *key = cbmapiternext(keys, &ksiz)) { |
381 |
} |
} |
382 |
return mss; |
return mss; |
383 |
} |
} |
|
bool iter_init() { |
|
|
/** |
|
|
* initialize the iterator of a database |
|
|
*/ |
|
|
return est_mtdb_iter_init(db); |
|
|
} |
|
|
int iter_next() { |
|
|
/** |
|
|
* get the next ID of the iterator of a database |
|
|
*/ |
|
|
return est_mtdb_iter_next(db); |
|
|
} |
|
384 |
const char * name() { |
const char * name() { |
385 |
/** |
/** |
386 |
* get the name of a database |
* get the name of a database |
387 |
*/ |
*/ |
388 |
|
if (!db) throw IOError("closed database"); |
389 |
return est_mtdb_name(db); |
return est_mtdb_name(db); |
390 |
} |
} |
391 |
int doc_num() { |
int doc_num() { |
392 |
/** |
/** |
393 |
* get the number of documents in a database |
* get the number of documents in a database |
394 |
*/ |
*/ |
395 |
|
if (!db) throw IOError("closed database"); |
396 |
return est_mtdb_doc_num(db); |
return est_mtdb_doc_num(db); |
397 |
} |
} |
398 |
int word_num() { |
int word_num() { |
399 |
/** |
/** |
400 |
* get the number of unique words in a database |
* get the number of unique words in a database |
401 |
*/ |
*/ |
402 |
|
if (!db) throw IOError("closed database"); |
403 |
return est_mtdb_word_num(db); |
return est_mtdb_word_num(db); |
404 |
} |
} |
405 |
double size() { |
double size() { |
406 |
/** |
/** |
407 |
* get the size of a database |
* get the size of a database |
408 |
*/ |
*/ |
409 |
|
if (!db) throw IOError("closed database"); |
410 |
return est_mtdb_size(db); |
return est_mtdb_size(db); |
411 |
} |
} |
412 |
void set_cache_size(size_t size, int anum, int tnum) { |
void set_cache_size(size_t size, int anum, int tnum) { |
413 |
/** |
/** |
414 |
* set the maximum size of the cache memory of a database |
* set the maximum size of the cache memory of a database |
415 |
*/ |
*/ |
416 |
|
if (!db) throw IOError("closed database"); |
417 |
est_mtdb_set_cache_size(db, size, anum, tnum); |
est_mtdb_set_cache_size(db, size, anum, tnum); |
418 |
} |
} |
419 |
void set_special_cache(const char *name, int num) { |
void set_special_cache(const char *name, int num) { |
424 |
est_mtdb_set_special_cache(db, name, num); |
est_mtdb_set_special_cache(db, name, num); |
425 |
} |
} |
426 |
}; |
}; |
427 |
|
|
428 |
|
static std::vector<std::string> * break_text(const char *text, bool norm, bool tail) { |
429 |
|
std::vector<std::string> * vs = new std::vector<std::string>; |
430 |
|
CBLIST *list; |
431 |
|
list = cblistopen(); |
432 |
|
est_break_text(text, list, norm, tail); |
433 |
|
for (int i=0; i < cblistnum(list); i++) { |
434 |
|
vs->push_back(cblistval(list, i, NULL)); |
435 |
|
} |
436 |
|
cblistclose(list); |
437 |
|
return vs; |
438 |
|
} |
439 |
|
|
440 |
|
static std::vector<std::string> * break_text_perfng(const char *text, bool norm, bool tail) { |
441 |
|
std::vector<std::string> * vs = new std::vector<std::string>; |
442 |
|
CBLIST *list; |
443 |
|
list = cblistopen(); |
444 |
|
est_break_text_perfng(text, list, norm, tail); |
445 |
|
for (int i=0; i < cblistnum(list); i++) { |
446 |
|
vs->push_back(cblistval(list, i, NULL)); |
447 |
|
} |
448 |
|
cblistclose(list); |
449 |
|
return vs; |
450 |
|
} |
451 |
|
|
452 |
}; |
}; |