/[hyperestraier]/trunk/estraier.h
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/estraier.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 3 - (show annotations)
Fri Jul 29 21:57:20 2005 UTC (18 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 39406 byte(s)
make working copy from version 0.5.1

1 /*************************************************************************************************
2 * The core API of Hyper Estraier
3 * Copyright (C) 2004-2005 Mikio Hirabayashi
4 * This file is part of Hyper Estraier.
5 * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
6 * the GNU Lesser General Public License as published by the Free Software Foundation; either
7 * version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope
8 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10 * License for more details.
11 * You should have received a copy of the GNU Lesser General Public License along with Hyper
12 * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13 * Boston, MA 02111-1307 USA.
14 *************************************************************************************************/
15
16
17 #ifndef _ESTRAIER_H /* duplication check */
18 #define _ESTRAIER_H
19
20 #if defined(__cplusplus) /* export for C++ */
21 extern "C" {
22 #endif
23
24
25
26 /*************************************************************************************************
27 * common settings
28 *************************************************************************************************/
29
30
31 /* version of QDBM */
32 extern const char *est_version;
33
34
35
36 /*************************************************************************************************
37 * underlying headers
38 *************************************************************************************************/
39
40
41 #include <depot.h>
42 #include <curia.h>
43 #include <cabin.h>
44 #include <villa.h>
45 #include <stdlib.h>
46
47
48
49 /*************************************************************************************************
50 * API for document
51 *************************************************************************************************/
52
53
54 #define ESTDATTRID "@id" /* name of the attribute of ID */
55 #define ESTDATTRURI "@uri" /* name of the attribute of URI */
56 #define ESTDATTRCDATE "@cdate" /* name of the attribute of creation date */
57 #define ESTDATTRMDATE "@mdate" /* name of the attribute of modification date */
58 #define ESTDATTRTITLE "@title" /* name of the attribute of title */
59 #define ESTDATTRAUTHOR "@author" /* name of the attribute of author */
60 #define ESTDATTRTYPE "@type" /* name of the attribute of content type */
61 #define ESTDATTRLANG "@lang" /* name of the attribute of language */
62 #define ESTDATTRSIZE "@size" /* name of the attribute of entity size */
63
64 typedef struct { /* type of structure for a document */
65 int id; /* identification number */
66 CBMAP *attrs; /* map of attributes */
67 CBLIST *dtexts; /* list of shown text */
68 } ESTDOC;
69
70
71 /* Create a document object.
72 The return value is an object of a document. */
73 ESTDOC *est_doc_new(void);
74
75
76 /* Create a document object made from draft data.
77 `draft' specifies a string of draft data.
78 The return value is an object of a document. */
79 ESTDOC *est_doc_new_from_draft(const char *draft);
80
81
82 /* Destroy a document object.
83 `doc' specifies a document object. */
84 void est_doc_delete(ESTDOC *doc);
85
86
87 /* Add an attribute to a document object.
88 `doc' specifies a document object.
89 `name' specifies the name of an attribute.
90 `value' specifies the value of the attribute. If it is `NULL', the attribute is removed. */
91 void est_doc_add_attr(ESTDOC *doc, const char *name, const char *value);
92
93
94 /* Add a sentence of text to a document object.
95 `doc' specifies a document object.
96 `text' specifies a sentence of text. */
97 void est_doc_add_text(ESTDOC *doc, const char *text);
98
99
100 /* Add a hidden sentence to a document object.
101 `doc' specifies a document object.
102 `text' specifies a hidden sentence. */
103 void est_doc_add_hidden_text(ESTDOC *doc, const char *text);
104
105
106 /* Get the ID number of a document object.
107 `doc' specifies a document object.
108 The return value is the ID number of the document object. If the object has never been
109 registered, -1 is returned. */
110 int est_doc_id(ESTDOC *doc);
111
112
113 /* Get a list of attribute names of a document object.
114 `doc' specifies a document object.
115 The return value is a new list object of attribute names of the document object. Because
116 the object of the return value is opened with the function `cblistopen', it should be closed
117 with the function `cblistclose' if it is no longer in use. */
118 CBLIST *est_doc_attr_names(ESTDOC *doc);
119
120
121 /* Get the value of an attribute of a document object.
122 `doc' specifies a document object.
123 `name' specifies the name of an attribute.
124 The return value is the value of the attribute or `NULL' if it does not exist. The life
125 duration of the returned string is synchronous with the one of the document object. */
126 const char *est_doc_attr(ESTDOC *doc, const char *name);
127
128
129 /* Get a list of sentences of the text of a document object.
130 `doc' specifies a document object.
131 The return value is a list object of sentences of the text of the document object. The life
132 duration of the returned object is synchronous with the one of the document object. */
133 const CBLIST *est_doc_texts(ESTDOC *doc);
134
135
136 /* Concatenate sentences of the text of a document object.
137 `doc' specifies a document object.
138 The return value is concatenated sentences of a document object. Because the region of the
139 return value is allocated with the `malloc' call, it should be released with the `free' call
140 if it is no longer in use. */
141 char *est_doc_cat_texts(ESTDOC *doc);
142
143
144 /* Dump draft data of a document object.
145 `doc' specifies a document object.
146 The return value is draft data of a document object. Because the region of the return value
147 is allocated with the `malloc' call, it should be released with the `free' call if it is no
148 longer in use. */
149 char *est_doc_dump_draft(ESTDOC *doc);
150
151
152 /* Make a snippet of the body text of a document object.
153 `doc' specifies a document object.
154 `word' specifies a list object of words to be highlight.
155 `wwitdh' specifies whole width of the result.
156 `hwitdh' specifies width of strings picked up from the beginning of the text.
157 `awitdh' specifies width of strings picked up around each highlighted word.
158 The return value is a snippet string of the body text of a document object. There are tab
159 separated values. Each line is a string to be shown. Though most lines have only one field,
160 some lines have two fields. If the second field exists, the first field is to be shown with
161 highlighted, and the second field means its normalized form. Because the region of the
162 return value is allocated with the `malloc' call, it should be released with the `free' call
163 if it is no longer in use. */
164 char *est_doc_make_snippet(ESTDOC *doc, const CBLIST *words, int wwidth, int hwidth, int awidth);
165
166
167 /* Check whether the text of a document object includes every specified words.
168 `doc' specifies a document object.
169 `word' specifies a list object of words to be checked.
170 The return value is true if every specified words is found, else it is false. */
171 int est_doc_scan_words(ESTDOC *doc, const CBLIST *words);
172
173
174
175 /*************************************************************************************************
176 * API for search conditions
177 *************************************************************************************************/
178
179
180 #define ESTOPUVSET "[UVSET]" /* universal set */
181 #define ESTOPSIMILAR "[SIMILAR]" /* similarity search */
182
183 #define ESTOPUNION "OR" /* union (conjunction) */
184 #define ESTOPISECT "AND" /* intersection (disjunction) */
185 #define ESTOPDIFF "ANDNOT" /* difference (intersection with negation) */
186 #define ESTOPWITH "WITH" /* delimiter for elements */
187
188 #define ESTOPSTREQ "STREQ" /* string is equal */
189 #define ESTOPSTRNE "STRNE" /* string is not equal */
190 #define ESTOPSTRINC "STRINC" /* string is included in */
191 #define ESTOPSTRBW "STRBW" /* string begins with */
192 #define ESTOPSTREW "STREW" /* string ends with */
193 #define ESTOPNUMEQ "NUMEQ" /* number or date is equal */
194 #define ESTOPNUMNE "NUMNE" /* number or date is not equal */
195 #define ESTOPNUMGT "NUMGT" /* number or date is greater than */
196 #define ESTOPNUMGE "NUMGE" /* number or date is greater than or equal to */
197 #define ESTOPNUMLT "NUMLT" /* number or date is less than */
198 #define ESTOPNUMLE "NUMLE" /* number or date is less than or equal to */
199 #define ESTOPREGEX "REGEX" /* string matches regular expressions */
200
201 #define ESTORDSTRA "STRA" /* strings in ascending order */
202 #define ESTORDSTRD "STRD" /* strings in descending order */
203 #define ESTORDNUMA "NUMA" /* numbers in ascending order */
204 #define ESTORDNUMD "NUMD" /* numbers in descending order */
205
206 typedef struct { /* type of structure for search conditions */
207 char *phrase; /* search phrase */
208 int gstep; /* step of N-gram */
209 int tfidf; /* whether with TF-IDF tuning */
210 int simple; /* whether with the simplefied phrase */
211 CBLIST *attrs; /* conditions with attributes */
212 char *order; /* sorting order */
213 int max; /* maximum number of retrieval */
214 int scfb; /* whether to feed back scores */
215 int *scores; /* array of scores */
216 int snum; /* number of elemnts of the score array */
217 int opts; /* options for preservation */
218 } ESTCOND;
219
220 enum { /* enumeration for options */
221 ESTCONDSURE = 1 << 0, /* check every N-gram key */
222 ESTCONDUSU = 1 << 1, /* check N-gram keys skipping by one */
223 ESTCONDFAST = 1 << 2, /* check N-gram keys skipping by two */
224 ESTCONDAGIT = 1 << 3, /* check N-gram keys skipping by three */
225 ESTCONDNOIDF = 1 << 4, /* without TF-IDF tuning */
226 ESTCONDSIMPLE = 1 << 10, /* with the simplefied phrase */
227 ESTCONDSCFB = 1 << 30 /* feed back scores (for debug) */
228 };
229
230
231 /* Create a condition object.
232 The return value is an object of search conditions. */
233 ESTCOND *est_cond_new(void);
234
235
236 /* Destroy a condition object.
237 `cond' specifies a condition object. */
238 void est_cond_delete(ESTCOND *cond);
239
240
241 /* Set a search phrase to a condition object.
242 `cond' specifies a condition object.
243 `phrase' specifies a search phrase. */
244 void est_cond_set_phrase(ESTCOND *cond, const char *phrase);
245
246
247 /* Add an expression for an attribute to a condition object.
248 `cond' specifies a condition object.
249 `expr' specifies an expression for an attribute. */
250 void est_cond_add_attr(ESTCOND *cond, const char *expr);
251
252
253 /* Set the order of a condition object.
254 `cond' specifies a condition object.
255 `expr' specifies an expression for the order.
256 By default, the order is by score descending. */
257 void est_cond_set_order(ESTCOND *cond, const char *expr);
258
259
260 /* Set the maximum number of retrieval of a condition object.
261 `cond' specifies a condition object.
262 `max' specifies the maximum number of retrieval.
263 By default, the number of retrieval is not limited. */
264 void est_cond_set_max(ESTCOND *cond, int max);
265
266
267 /* Set options of retrieval of a condition object.
268 `cond' specifies a condition object.
269 `options' specifies options: `ESTCONDSURE' specifies that it checks every N-gram key,
270 `ESTCONDUSU', which is the default, specifies that it checks N-gram keys with skipping one
271 key, `ESTCONDFAST' skips two keys, `ESTCONDAGIT' skips three keys, `ESTCONDNOIDF' specifies
272 not to perform TF-IDF tuning, `ESTCONDSIMPLE' specifies to use simplefied phrase. Each option
273 can be specified at the same time by bitwise or. If keys are skipped, though search speed is
274 improved, the relevance ratio grows less. */
275 void est_cond_set_options(ESTCOND *cond, int options);
276
277
278
279 /*************************************************************************************************
280 * API for database
281 *************************************************************************************************/
282
283
284 #define ESTIDXDMAX 16 /* max number of the inverted index */
285
286 typedef struct { /* type of structure for the inverted index */
287 char *name; /* name of the database */
288 int omode; /* open mode */
289 VILLA *dbs[ESTIDXDMAX]; /* database handles */
290 int dnum; /* number of division */
291 VILLA *cdb; /* current database handle */
292 } ESTIDX;
293
294 typedef struct { /* type of structure for a database object */
295 char *name; /* name of the database */
296 DEPOT *metadb; /* handle of the meta database */
297 ESTIDX *idxdb; /* handles of the inverted indexs */
298 VILLA *fwmdb; /* handle of the database for forward matching */
299 CURIA *attrdb; /* handle of the database for attrutes */
300 CURIA *textdb; /* handle of the database for texts */
301 VILLA *listdb; /* handle of the database for document list */
302 int ecode; /* last happened error code */
303 int fatal; /* whether to have a fatal error */
304 int dseq; /* sequence for document IDs */
305 int dnum; /* number of the documents */
306 int amode; /* mode of text analyzer */
307 CBMAP *idxcc; /* cache for the inverted index */
308 size_t icsiz; /* power of the cache */
309 size_t icmax; /* max size of the cache */
310 CBMAP *outcc; /* cache for deleted documents */
311 CBMAP *keycc; /* cache for keys for TF-IDF */
312 int kcmnum; /* max number of the key cache */
313 CBMAP *attrcc; /* cache for attributes */
314 int acmnum; /* max number of the attribute cache */
315 CBMAP *textcc; /* cache for texts */
316 int tcmnum; /* max number of the text cache */
317 CBMAP *spacc; /* special cache for attributes */
318 int scmnum; /* max number of the special cache */
319 char *scname; /* name of the attribute for the special cache */
320 void (*cbinfo)(const char *); /* callback function to inform of events */
321 CBMAP *(*cbvec)(void *, int, void *); /* callback function to create a vector */
322 void *vecdata; /* arbitrary object for the vectorizer */
323 CBMAP *metacc; /* cache for meta data */
324 } ESTDB;
325
326 enum { /* enumeration for error codes */
327 ESTENOERR, /* no error */
328 ESTEINVAL, /* invalid argument */
329 ESTEACCES, /* access forbidden */
330 ESTELOCK, /* lock failure */
331 ESTEDB, /* database problem */
332 ESTEIO, /* I/O problem */
333 ESTENOITEM, /* no item */
334 ESTEMISC = 9999 /* miscellaneous */
335 };
336
337 enum { /* enumeration for open modes */
338 ESTDBREADER = 1 << 0, /* open as a reader */
339 ESTDBWRITER = 1 << 1, /* open as a writer */
340 ESTDBCREAT = 1 << 2, /* a writer creating */
341 ESTDBTRUNC = 1 << 3, /* a writer truncating */
342 ESTDBNOLCK = 1 << 4, /* open without locking */
343 ESTDBLCKNB = 1 << 5, /* lock without blocking */
344 ESTDBPERFNG = 1 << 6 /* use perfect N-gram analyzer */
345 };
346
347 enum { /* enumeration for options of document registration */
348 ESTPDCLEAN = 1 << 0 /* clean up dispensable regions */
349 };
350
351 enum { /* enumeration for options of document deletion */
352 ESTODCLEAN = 1 << 0 /* clean up dispensable regions */
353 };
354
355 enum { /* enumeration for options of optimization */
356 ESTOPTNOPURGE = 1 << 0, /* omit purging dispensable region of deleted */
357 ESTOPTNODBOPT = 1 << 1 /* omit optimizization of the database files */
358 };
359
360 enum { /* enumeration for options of document retrieval */
361 ESTGDNOATTR = 1 << 0, /* no attributes */
362 ESTGDNOTEXT = 1 << 1 /* no text */
363 };
364
365
366 /* Get the string of an error code.
367 `ecode' specifies an error code.
368 The return value is the string of the error code. */
369 const char *est_err_msg(int ecode);
370
371
372 /* Open a database.
373 `name' specifies the name of a database directory.
374 `mode' specifies open modes: `ESTDBWRITER' as a writer, `ESTDBREADER' as a reader. If the
375 mode is `ESTDBWRITER', the following may be added by bitwise or: `ESTDBCREAT', which means it
376 creates a new database if not exist, `ESTDBTRUNC', which means it creates a new database
377 regardless if one exists. Both of `ESTDBREADER' and `ESTDBWRITER' can be added to by
378 bitwise or: `ESTDBNOLCK', which means it opens a database file without file locking, or
379 `ESTDBLCKNB', which means locking is performed without blocking. If `ESTDBNOLCK' is used,
380 the application is responsible for exclusion control. `ESTDBCREAT' can be added to by bitwise
381 or: `ESTDBPERFNG', which means N-gram analysis is performed against Europian text also.
382 `ecp' specifies the pointer to a variable to which the error code is assigned.
383 The return value is a database object of the database or `NULL' if failure. */
384 ESTDB *est_db_open(const char *name, int omode, int *ecp);
385
386
387 /* Close a database.
388 `db' specifies a database object.
389 `ecp' specifies the pointer to a variable to which the error code is assigned.
390 The return value is true if success, else it is false. */
391 int est_db_close(ESTDB *db, int *ecp);
392
393
394 /* Get the last happended error code of a database.
395 `db' specifies a database object.
396 The return value is the last happended error code of the database. */
397 int est_db_error(ESTDB *db);
398
399
400 /* Check whether a database has a fatal error.
401 `db' specifies a database object.
402 The return value is true if the database has fatal erroor, else it is false. */
403 int est_db_fatal(ESTDB *db);
404
405
406 /* Flush index words in the cache of a database.
407 `db' specifies a database object connected as a writer.
408 `max' specifies the maximum number of words to be flushed. If it not more than zero, all
409 words are flushed.
410 The return value is true if success, else it is false. */
411 int est_db_flush(ESTDB *db, int max);
412
413
414 /* Synchronize updating contents of a database.
415 `db' specifies a database object connected as a writer.
416 The return value is true if success, else it is false. */
417 int est_db_sync(ESTDB *db);
418
419
420 /* Optimize a database.
421 `db' specifies a database object connected as a writer.
422 `options' specifies options: `ESTOPTNOPURGE' to omit purging dispensable region of deleted
423 documents, `ESTOPTNODBOPT' to omit optimizization of the database files. The two can be
424 specified at the same time by bitwise or.
425 The return value is true if success, else it is false. */
426 int est_db_optimize(ESTDB *db, int options);
427
428
429 /* Add a document to a database.
430 `db' specifies a database object connected as a writer.
431 `doc' specifies a document object. The document object should have the URI attribute.
432 `options' specifies options: `ESTPDCLEAN' to clean up dispensable regions of the overwritten
433 document.
434 The return value is true if success, else it is false.
435 If the URI attribute is same with an existing document in the database, the existing one is
436 deleted. */
437 int est_db_put_doc(ESTDB *db, ESTDOC *doc, int options);
438
439
440 /* Remove a document from a database.
441 `db' specifies a database object connected as a writer.
442 `id' specifies the ID number of a registered document.
443 `options' specifies options: `ESTODCLEAN' to clean up dispensable regions of the deleted
444 document.
445 The return value is true if success, else it is false. */
446 int est_db_out_doc(ESTDB *db, int id, int options);
447
448
449 /* Retrieve a document in a database.
450 `db' specifies a database object.
451 `id' specifies the ID number of a registered document.
452 `options' specifies options: `ESTGDNOATTR' to ignore attributes, `ESTGDNOTEXT' to ignore
453 the body text. The two can be specified at the same time by bitwise or.
454 The return value is a document object. On error, `NULL' is returned. */
455 ESTDOC *est_db_get_doc(ESTDB *db, int id, int options);
456
457
458 /* Retrieve the value of an attribute of a document in a database.
459 `db' specifies a database object.
460 `id' specifies the ID number of a registered document.
461 `name' specifies the name of an attribute.
462 The return value is the value of the attribute or `NULL' if it does not exist. Because the
463 region of the return value is allocated with the `malloc' call, it should be released with
464 the `free' call if it is no longer in use. */
465 char *est_db_get_doc_attr(ESTDB *db, int id, const char *name);
466
467
468 /* Get the ID of a document spacified by URI.
469 `db' specifies a database object.
470 `uri' specifies the URI of a registered document.
471 The return value is the ID of the document. On error, -1 is returned. */
472 int est_db_uri_to_id(ESTDB *db, const char *uri);
473
474
475 /* Extract keywords of a document object.
476 `db' specifies a database object for TF-IDF tuning. If it is `NULL', it is not used.
477 `doc' specifies a document object.
478 `max' specifies the maximum number of keywords to be extracted.
479 The return value is a new map object of keywords and their scores in decimal string. Because
480 the object of the return value is opened with the function `cbmapopen', it should be closed
481 with the function `cbmapclose' if it is no longer in use. */
482 CBMAP *est_db_etch_doc(ESTDB *db, ESTDOC *doc, int max);
483
484
485 /* Initialize the iterator of a database.
486 `db' specifies a database object.
487 The return value is true if success, else it is false. */
488 int est_db_iter_init(ESTDB *db);
489
490
491 /* Get the next ID of the iterator of a database.
492 `db' specifies a database object.
493 The return value is the next ID. If there is no more document, 0 is returned. On error,
494 -1 is returned. */
495 int est_db_iter_next(ESTDB *db);
496
497
498 /* Get the name of a database.
499 `db' specifies a database object.
500 The return value is the name of the database. The life duration of the returned string is
501 synchronous with the one of the database object. */
502 const char *est_db_name(ESTDB *db);
503
504
505 /* Get the number of documents in a database.
506 `db' specifies a database object.
507 The return value is the number of documents in the database. */
508 int est_db_doc_num(ESTDB *db);
509
510
511 /* Get the number of unique words in a database.
512 `db' specifies a database object.
513 The return value is the number of unique words in the database. */
514 int est_db_word_num(ESTDB *db);
515
516
517 /* Get the size of a database.
518 `db' specifies a database object.
519 The return value is the size of the database. */
520 double est_db_size(ESTDB *db);
521
522
523 /* Search documents corresponding a condition for a database.
524 `db' specifies a database object.
525 `cond' specifies a condition object.
526 `nump' specifies the pointer to a variable to which the number of elements in the result is
527 assigned.
528 `hints' specifies a map object into which the number of documents corresponding to each word
529 is stored. If a word is in a negative condition, the number is negative. The element whose
530 key is an empty string specifies the number of whole result. If it is `NULL', it is not used.
531 The return value is an array whose elements are ID numbers of corresponding documents.
532 This function does never fail. Even if no document corresponds or an error occurs, an empty
533 array is returned. Because the region of the return value is allocated with the `malloc'
534 call, it should be released with the `free' call if it is no longer in use. */
535 int *est_db_search(ESTDB *db, ESTCOND *cond, int *nump, CBMAP *hints);
536
537
538 /* Set the maximum size of the cache memory of a database.
539 `db' specifies a database object.
540 `size' specifies the maximum size of the index cache. By default, it is 64MB. If it is not
541 more than 0, the current size is not changed.
542 `anum' specifies the maximum number of cached records for document attributes. By default, it
543 is 8192. If it is not more than 0, the current size is not changed.
544 `tnum' specifies the maximum number of cached records for document texts. By default, it is
545 1024. If it is not more than 0, the current size is not changed. */
546 void est_db_set_cache_size(ESTDB *db, size_t size, int anum, int tnum);
547
548
549 /* Set the special cache for narrowing and sorting with document attributes.
550 `db' specifies a database object.
551 `name' specifies the name of a document.
552 `num' specifies the maximum number of cached records. */
553 void est_db_set_special_cache(ESTDB *db, const char *name, int num);
554
555
556
557 /*************************************************************************************************
558 * features for experts
559 *************************************************************************************************/
560
561
562 #define _EST_VERSION "0.5.1"
563 #define _EST_LIBVER 200
564 #define _EST_PROTVER "0.9"
565
566 enum { /* enumeration for languages */
567 ESTLANGEN, /* English */
568 ESTLANGJA, /* Japanese */
569 ESTLANGZH, /* Chinese */
570 ESTLANGKO, /* Korean */
571 ESTLANGMISC /* miscellaneous */
572 };
573
574
575 /* Break a sentence of text and extract words.
576 `text' specifies a sentence of text.
577 `list' specifies a list object to which extract words are added.
578 `norm' specifies whether to normalize the text.
579 `tail' specifies whether to pick up oddness N-gram at the end. */
580 void est_break_text(const char *text, CBLIST *list, int norm, int tail);
581
582
583 /* Break a sentence of text and extract words using perfect N-gram analyzer.
584 `text' specifies a sentence of text.
585 `list' specifies a list object to which extract words are added.
586 `norm' specifies whether to normalize the text.
587 `tail' specifies whether to pick up oddness N-gram at the end. */
588 void est_break_text_perfng(const char *text, CBLIST *list, int norm, int tail);
589
590
591 /* Convert the character encoding of a string.
592 `ptr' specifies the pointer to a region.
593 `size' specifies the size of the region. If it is negative, the size is assigned with
594 `strlen(ptr)'.
595 `icode' specifies the name of encoding of the input string.
596 `ocode' specifies the name of encoding of the output string.
597 `sp' specifies the pointer to a variable to which the size of the region of the return
598 value is assigned. If it is `NULL', it is not used.
599 `mp' specifies the pointer to a variable to which the number of missing characters by failure
600 of conversion is assigned. If it is `NULL', it is not used.
601 If successful, the return value is the pointer to the result object, else, it is `NULL'.
602 Because an additional zero code is appended at the end of the region of the return value,
603 the return value can be treated as a character string. Because the region of the return
604 value is allocated with the `malloc' call, it should be released with the `free' call if it
605 is no longer in use. */
606 char *est_iconv(const char *ptr, int size, const char *icode, const char *ocode,
607 int *sp, int *mp);
608
609
610 /* Detect the encoding of a string automatically.
611 `ptr' specifies the pointer to a region.
612 `size' specifies the size of the region. If it is negative, the size is assigned with
613 `strlen(ptr)'.
614 `plang' specifies a preferred language. As for now, `ESTLANGEN', `ESTLANGJA', `ESTLANGZH',
615 and `ESTLANGKO' are supported.
616 The return value is the string of the encoding name of the string. */
617 const char *est_enc_name(const char *ptr, int size, int plang);
618
619
620 /* Convert a UTF-8 string into UTF-16BE.
621 `ptr' specifies the pointer to a region.
622 `size' specifies the size of the region.
623 `sp' specifies the pointer to a variable to which the size of the region of the return
624 value is assigned.
625 The return value is the pointer to the result object. Because an additional zero code is
626 appended at the end of the region of the return value, the return value can be treated as a
627 character string. Because the region of the return value is allocated with the `malloc' call,
628 it should be released with the `free' call if it is no longer in use. */
629 char *est_uconv_in(const char *ptr, int size, int *sp);
630
631
632 /* Convert a UTF-16BE string into UTF-8.
633 `ptr' specifies the pointer to a region.
634 `size' specifies the size of the region.
635 `sp' specifies the pointer to a variable to which the size of the region of the return
636 value is assigned. If it is `NULL', it is not used.
637 The return value is the pointer to the result object. Because an additional zero code is
638 appended at the end of the region of the return value, the return value can be treated as a
639 character string. Because the region of the return value is allocated with the `malloc' call,
640 it should be released with the `free' call if it is no longer in use. */
641 char *est_uconv_out(const char *ptr, int size, int *sp);
642
643
644 /* Compress a serial object with ZLIB.
645 `ptr' specifies the pointer to a region.
646 `size' specifies the size of the region. If it is negative, the size is assigned with
647 `strlen(ptr)'.
648 `sp' specifies the pointer to a variable to which the size of the region of the return
649 value is assigned.
650 If successful, the return value is the pointer to the result object, else, it is `NULL'.
651 Because the region of the return value is allocated with the `malloc' call, it should be
652 released with the `free' call if it is no longer in use. */
653 char *est_deflate(const char *ptr, int size, int *sp);
654
655
656 /* Decompress a serial object compressed with ZLIB.
657 `ptr' specifies the pointer to a region.
658 `size' specifies the size of the region.
659 `sp' specifies the pointer to a variable to which the size of the region of the return
660 value is assigned. If it is `NULL', it is not used.
661 If successful, the return value is the pointer to the result object, else, it is `NULL'.
662 Because an additional zero code is appended at the end of the region of the return value,
663 the return value can be treated as a character string. Because the region of the return
664 value is allocated with the `malloc' call, it should be released with the `free' call if it
665 is no longer in use. */
666 char *est_inflate(const char *ptr, int size, int *sp);
667
668
669 /* Get the border string for draft data of documents.
670 The return value is the border string for draft data of documents. */
671 const char *est_border_str(void);
672
673
674 /* Get the real random number.
675 The return value is the real random number between 0.0 and 1.0. */
676 double est_random(void);
677
678
679 /* Get the random number in normal distribution.
680 The return value is the random number in normal distribution between 0.0 and 1.0. */
681 double est_random_nd(void);
682
683
684 /* Get an MD5 hash string of a key string.
685 `key' specifies a string to be encrypted.
686 The return value is an MD5 hash string of the key string. Because the region of the return
687 value is allocated with the `malloc' call, it should be released with the `free' call if it
688 is no longer in use. */
689 char *est_make_crypt(const char *key);
690
691
692 /* Check whether a key matches an MD5 hash string.
693 `key' specifies a string to be checked.
694 `hash' specifies an MD5 hash string.
695 The return value is true if the key matches the hash string, else it is false. */
696 int est_match_crypt(const char *key, const char *hash);
697
698
699 /* Get the hidden texts of a document object.
700 `doc' specifies a document object.
701 The return value is concatenated sentences of the hidden text of the document object. The
702 life duration of the returned string is synchronous with the one of the document object. */
703 const char *est_doc_hidden_texts(ESTDOC *doc);
704
705
706 /* Get the phrase of a condition object.
707 `cond' specifies a condition object.
708 The return value is the phrase of a condition object or `NULL' if it is not specified. The
709 life duration of the returned string is synchronous with the one of the condition object. */
710 const char *est_cond_phrase(ESTCOND *cond);
711
712
713 /* Get a list object of attribute expressions of a condition object.
714 `cond' specifies a condition object.
715 The return value is a list object of attribute expressions of a condition object or `NULL' if
716 it is not specified. The life duration of the returned object is synchronous with the one of
717 the condition object. */
718 const CBLIST *est_cond_attrs(ESTCOND *cond);
719
720
721 /* Get the order expression of a condition object.
722 `cond' specifies a condition object.
723 The return value is the order expression of a condition object or `NULL' if it is not
724 specified. The life duration of the returned string is synchronous with the one of the
725 condition object. */
726 const char *est_cond_order(ESTCOND *cond);
727
728
729 /* Get the maximum number of retrieval of a condition object.
730 `cond' specifies a condition object.
731 The return value is the maximum number of retrieval of a condition object or -1 if it is not
732 specified. */
733 int est_cond_max(ESTCOND *cond);
734
735
736 /* Get the options of a condition object.
737 `cond' specifies a condition object.
738 The return value is the options of a condition object. */
739 int est_cond_options(ESTCOND *cond);
740
741
742 /* Get the score of a document corresponding to a condition object.
743 `cond' specifies a condition object.
744 `index' specifies the index of an element of the result array of `est_db_search'.
745 The return value is the score of the element or -1 if the index is out of bounds. */
746 int est_cond_score(ESTCOND *cond, int index);
747
748
749 /* Set the error code of a database.
750 `db' specifies a database object.
751 `ecode' specifies a error code to set. */
752 void est_db_set_ecode(ESTDB *db, int ecode);
753
754
755 /* Edit attributes of a document object in a database.
756 `db' specifies a database object connected as a writer.
757 `doc' specifies a document object.
758 The return value is true if success, else it is false. */
759 int est_db_edit_doc(ESTDB *db, ESTDOC *doc);
760
761
762 /* Add a piece of meta data to a database.
763 `db' specifies a database object connected as a writer.
764 `name' specifies the name of a piece of meta data.
765 `value' specifies the value of the meta data. If it is `NULL', the meta data is removed. */
766 void est_db_add_meta(ESTDB *db, const char *name, const char *value);
767
768
769 /* Get a list of names of meta data of a database.
770 `db' specifies a database object.
771 The return value is a new list object of meta data names of the document object. Because the
772 object of the return value is opened with the function `cblistopen', it should be closed with
773 the function `cblistclose' if it is no longer in use. */
774 CBLIST *est_db_meta_names(ESTDB *db);
775
776
777 /* Get the value of a piece of meta data of a database.
778 `db' specifies a database object.
779 `name' specifies the name of a piece of meta data.
780 The return value is the value of the meta data or `NULL' if it does not exist. Because the
781 region of the return value is allocated with the `malloc' call, it should be released with
782 the `free' call if it is no longer in use. */
783 char *est_db_meta(ESTDB *db, const char *name);
784
785
786 /* Get the number of records in the cache memory of a database.
787 `db' specifies a database object.
788 The return value is the cache memory of a database. */
789 int est_db_cache_num(ESTDB *db);
790
791
792 /* Set the callback function to inform of database events.
793 `db' specifies a database object.
794 `func' specifies the pointer to a function. The argument of the callback specifies a message
795 of each event. */
796 void est_db_set_informer(ESTDB *db, void (*func)(const char *));
797
798
799 /* Set the callback function to create a vector of keywords of a document.
800 `db' specifies a database object.
801 `func' specifies the pointer to a function. The arguments of the callback specify the
802 database object, the ID of a document, and an arbitrary pointer. The return value is the
803 callback is a new map object conforming to the return value of `est_db_etch_doc'.
804 `data' specifies the pointer to an object given as the third argument of the callback. */
805 void est_db_set_vectorizer(ESTDB *db, CBMAP *(*func)(void *, int, void *), void *data);
806
807
808 /* Fill the cache for keys for TF-IDF.
809 `db' specifies a database object. */
810 void est_db_fill_key_cache(ESTDB *db);
811
812
813 /* Make a directory.
814 `path' specifies the path of a new directory.
815 The return value is true if success, else it is false. */
816 int est_mkdir(const char *path);
817
818
819 /* Remove a directory and its contents recursively.
820 `path' specifies the path of a directory.
821 The return value is true if success, else it is false. */
822 int est_rmdir_rec(const char *path);
823
824
825 /* Get the canonicalized absolute pathname of a file.
826 `path' specifies the path of a new directory.
827 The return value is the canonicalized absolute pathname of a file. Because the region of the
828 return value is allocated with the `malloc' call, it should be released with the `free' call
829 if it is no longer in use. */
830 char *est_realpath(const char *path);
831
832
833 /* Get the time of day in milliseconds.
834 The return value is the time of day in milliseconds. */
835 double est_gettimeofday(void);
836
837
838 /* Suspend execution for microsecond intervals.
839 `usec' specifies microseconds to sleep for. */
840 void est_usleep(unsigned long usec);
841
842
843 /* Send a signal to a process.
844 `pid' specifies the PID of a target process.
845 `sig' specifies a signal code.
846 The return value is true if success, else it is false. */
847 int est_kill(int pid, int sig);
848
849
850 /* Get the media type of an extention.
851 `ext' specifies the extension of a file path.
852 The return value is the media time of the extension. */
853 const char *est_ext_type(const char *ext);
854
855
856
857 #if defined(__cplusplus) /* export for C++ */
858 }
859 #endif
860
861 #endif /* duplication check */
862
863
864 /* END OF FILE */

  ViewVC Help
Powered by ViewVC 1.1.26