/[webpac]/openisis/0.9.9e/core/cdx.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/0.9.9e/core/cdx.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (show annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 20735 byte(s)
import of new openisis release, 0.9.9e

1 /*
2 The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3 Version 0.9.x (patchlevel see file Version)
4 Copyright (C) 2001-2003 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: cdx.c,v 1.9 2004/11/11 18:20:23 kripke Exp $
25 charset collation
26 */
27
28 #include "../core/core.h"
29
30 /*
31 We use
32 - p codes 0,1,..p-1 for primaries (word and nonword)
33 - v codes p..p+v-1 for secondary and tertiary level variants
34 - m codes p+v..p+v+m-1 for maps (targets)
35
36 We index code numbers to an array[p+v+m] of unsigned ints as code values.
37 The highest bit is the nonword indicator.
38 The lower nibble of the highest byte is the length of the code's byte sequence
39 (for maps in #code values, else in # cleartext bytes).
40
41 If the sequence does not fit into the lower 3 bytes,
42 the value&0xffffff is the offset of the bytes.
43
44 For primaries and variants, the bytes are cleartext.
45 For maps, the bytes are code text (length given as #codes),
46 where every code uses one byte, if p+v<=256, else two.
47
48 With variants, we also have an array[p+v] of variant info.
49 For a primary code 0..p-1,
50 this is the number of secondary and tertiary variants,
51 and the code of the first of the (sec+1)*(ter+1) variants - 1.
52
53 For a variant code p..p+v-1, this is it's secondary and tertiary weights
54 and the code of the associated primary.
55 */
56
57 typedef union {
58 unsigned u;
59 struct {
60 #ifdef CPU_BIG_ENDIAN
61 unsigned char hi;
62 unsigned char c[3];
63 #else
64 unsigned char c[3];
65 unsigned char hi;
66 #endif
67 } b;
68 } Cv;
69
70 typedef struct Var { /* variant info */
71 unsigned char sec; /* secondary variant */
72 unsigned char ter; /* tertiary variant */
73 unsigned short rel; /* code of related variant */
74 } Var;
75
76 /*
77 For the encoding, we map every byte to:
78 - it's code number
79 - the "word" bit
80 - the index of a table of possible following bytes
81
82 To cover the BMP with UTF-8, we need 1072 tables:
83 - 32 bytes 110* initiating a 2 byte sequence,
84 using one table each
85 - 16 bytes 1110* initiating a 3 byte sequence,
86 each having a table with 64 second bytes 10*,
87 using 16+16*64 = 1040 tables
88 (- the 32 tables used for the UTF-16 surrogates D800-DFFF)
89 CJK 3400-9FFF alone uses 432 tables (some 27.000 ideographs)
90 */
91 typedef struct Bin { /* byte info */
92 unsigned short cod; /* byte's collation code */
93 unsigned short tab; /* table */
94 } Bin;
95
96 typedef Bin Bins [256]; /* full table */
97
98 typedef struct Tab { /* table info */
99 unsigned off; /* bytes offset of Bin for min */
100 unsigned char min;
101 unsigned char max;
102 unsigned short unu;
103 } Tab;
104
105
106 enum {
107 BIT_VARIANTS = 0x20,
108 BIT_FRENCH = 0x10,
109 TYP_PLAIN = 0,
110 TYP_VARIANTS = BIT_VARIANTS, /* TODO */
111 TYP_FRENCH = BIT_VARIANTS|BIT_FRENCH, /* TODO */
112 CVLAT1 = 104 /* # primary codes for builtin lat1cdx */
113 };
114
115 /* header of a dumped/mapped cx */
116 struct Cdx {
117 unsigned char mag[3]; /* magic MCX or mcx for mapped cdx */
118 unsigned char typ; /* base type | bits per (primary) code-1 */
119 /* currently only 8(7) and 16(15) bits supported */
120 unsigned short pri; /* p # primary codes incl. 0,1 */
121 unsigned short var; /* p + v # variants */
122 unsigned short map; /* p+v + m # maps */
123 unsigned short tab; /* # non-root tables 1..tab */
124 /* redundant offsets */
125 unsigned ovi; /* offset variant_infos */
126 unsigned otp; /* offset table_pointers - 4 */
127 unsigned siz; /* total size */
128 Bins bt0; /* root table, always full */
129 unsigned cv[CVLAT1]; /* actually Cv code_values[map=p+v+m] */
130 /* Var variant_infos[var=p+v], if BIT_VARIANTS */
131 /* Tab table_pointers[tab] */
132 /* Bin byte_tables ... */
133 /* unsigned char *more_bytes */
134 };
135
136
137 static const char MAGIC[3] =
138 #ifdef CPU_BIG_ENDIAN
139 {'M','C','X'};
140 # define W(b) (0x01000000|(b)<<16)
141 # define N(b) (0x81000000|(b)<<16)
142 #else
143 {'m','c','x'};
144 # define W(b) (0x01000000|(b))
145 # define N(b) (0x81000000|(b))
146 #endif
147
148 #if 0
149 const Cdx lat1cdx = {
150 {'s','t','a'}, /* mark as static */
151 TYP_PLAIN|7, CVLAT1, CVLAT1, CVLAT1, 0,0,0,0,
152 { /* byte infos */
153 #define B(b) {b,0}
154 /* 32 C0 controls */
155 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
156 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
157 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
158 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
159 /* ! " # $ % & ' */
160 B( 2),B( 3),B( 4),B( 5),B( 6),B( 7),B( 8),B( 9),
161 /* ( ) * + , - . / */
162 B(10),B(11),B(12),B(13),B(14),B(15),B(16),B(17),
163 /* 0 1 2 3 4 5 6 7 */
164 B(18),B(19),B(20),B(21),B(22),B(23),B(24),B(25),
165 /* 8 9 : ; < = > ? */
166 B(26),B(27),B(28),B(29),B(30),B(31),B(32),B(33),
167 /* @ A B C D E F G */
168 B(34),B(35),B(36),B(37),B(38),B(39),B(40),B(41),
169 B(42),B(43),B(44),B(45),B(46),B(47),B(48),B(49), /* H-O */
170 B(50),B(51),B(52),B(53),B(54),B(55),B(56),B(57), /* P-W */
171 /* X Y Z [ \ ] ^ _ */
172 B(58),B(59),B(60),B(61),B(62),B(63),B(64),B(65),
173 /* ` a b c d e f g */
174 B(66),B(35),B(36),B(37),B(38),B(39),B(40),B(41),
175 B(42),B(43),B(44),B(45),B(46),B(47),B(48),B(49), /* h-o */
176 B(50),B(51),B(52),B(53),B(54),B(55),B(56),B(57), /* p-w */
177 /* x y z { | } ~ DEL */
178 B(58),B(59),B(60),B(67),B(68),B(69),B(70),B(0),
179 /* 32 C1 controls */
180 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
181 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
182 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
183 B(0),B(0),B(0),B(0),B(0),B(0),B(0),B(0),
184 /* 32 mostly symbols */
185 B( 2),B(71),B(72),B(73),B(74),B(75),B(76),B(77),
186 B(78),B(79),B(80),B(81),B(82),B(83),B(84),B(85),
187 B(86),B(87),B(88),B(89),B(90),B(91),B(92),B(93),
188 B(94),B(95),B(96),B(97),B(98),B(99),B(100),B(101),
189 /* 64 Latin alphas including 2 symbols */
190 B(35),B(35),B(35),B(35),B(35),B(35),B(35),B(37), /* 7A,1C */
191 B(39),B(39),B(39),B(39),B(43),B(43),B(43),B(43), /* 4E,4I */
192 B(54),B(49),B(49),B(49),B(49),B(49),B(49),B(102), /* TN5O* */
193 B(49),B(55),B(55),B(55),B(55),B(59),B(54),B(53), /* O4UYTS */
194 B(35),B(35),B(35),B(35),B(35),B(35),B(35),B(37), /* 7a,1c */
195 B(39),B(39),B(39),B(39),B(43),B(43),B(43),B(43), /* 4e,4i */
196 B(54),B(49),B(49),B(49),B(49),B(49),B(49),B(103), /* tn5o% */
197 B(49),B(55),B(55),B(55),B(55),B(59),B(54),B(59) /* o4uyty */
198 #undef B
199 },
200 { /* code values, all using immediate bytes */
201 0, N(9),
202 N(32),N(33),N(34),N(35),N(36),N(37),N(38),N(39), /* !"#$%&' */
203 N(40),N(41),N(42),N(43),N(44),N(45),N(46),N(47), /* ()*+,-./ */
204 W(48),W(49),W(50),W(51),W(52),W(53),W(54),W(55), /* 01234567 */
205 W(56),W(57),N(58),N(59),N(60),N(61),N(62),N(63), /* 89:;<=>? */
206 N(64),W(65),W(66),W(67),W(68),W(69),W(70),W(71), /* @ABCDEFG */
207 W(72),W(73),W(74),W(75),W(76),W(77),W(78),W(79), /* HIJKLMNO */
208 W(80),W(81),W(82),W(83),W(84),W(85),W(86),W(87), /* PQRSTUWW */
209 W(88),W(89),W(90),N(91),N(92),N(93),N(94),N(95), /* XYZ[\]^_ */
210 N(96), /*a-z*/ N(123),N(124),N(125),N(126), /* ` {|}~ */
211 N(161),N(162),N(163),N(164),N(165),N(166),N(167),
212 N(168),N(169),N(170),N(171),N(172),N(173),N(174),N(175),
213 N(176),N(177),N(178),N(179),N(180),N(181),N(182),N(183),
214 N(184),N(185),N(186),N(187),N(188),N(189),N(190),N(191),
215 N(215), N(247)
216 }
217 }; /* lat1cdx */
218 #endif
219
220
221 /*
222 encode l byte in b to key.
223 key->len is the max len on input, resulting len on return.
224 stop if max key len is hit or on word boundary, if words.
225 return #used bytes in b.
226 */
227 int cEnc ( const Cdx *cdx, Key *key, unsigned char *b, int l, int words )
228 {
229 const unsigned char * const base = (unsigned char *)cdx;
230 const Bin * const bt0 = cdx->bt0;
231 const Tab * const tp = (Tab*)(base + cdx->otp);
232 const int bits = 1+(0xf&cdx->typ), mapcodeshift = 256<cdx->var ? 1 : 0;
233 const unsigned char *e = b+l;
234 int bitsleft = 8*key->len, pfxlen = 0;
235 int unassigned = 0;
236 Bin seq[CDX_MAXSEQ], *top; /* current sequence */
237 unsigned u, len;
238 unsigned short code;
239 Cv cv;
240
241 key->len = 0;
242 for ( ; b < e && bits <= bitsleft; b++ ) {
243 eRr(LOG_TRACE, "byte %c of %d bits %d/%d", *b, e-b, bitsleft, bits);
244 /* traverse byte info tables to find longest matching sequence */
245 for (*(top = seq) = bt0[*b]; top->tab && b < e; ) {
246 const Tab * const t = tp + top->tab;
247 unsigned char n = b[1];
248 LOG_DBG(LOG_DEBUG, "table %d check %d %d-%d off %d %d",
249 top->tab, n, t->min, t->max, t->off, *(int*)(base+t->off));
250 if ( n < t->min || n > t->max /* out of bounds */
251 || !(u = ((unsigned*)(base+t->off))[n - t->min]) /* unassigned */
252 )
253 break;
254 *++top = *(Bin*)&u;
255 b++;
256 }
257 for ( ; top > seq && !top->cod; top-- ) /* no complete sequence */
258 b--;
259 code = top->cod;
260 eRr(LOG_TRACE, "code %d", code);
261 if (words) {
262 if ( 1<words /* leave 1st words-1 bytes alone */
263 && b+l-e >= words /* ok, passed it for the 1st time */
264 ) {
265 pfxlen = key->len;
266 words = 1;
267 } else if (0x80000000&cdx->cv[code]) { /* hit nonword */
268 if (pfxlen == key->len) /* skip to word */
269 continue;
270 break; /* had something after prefix */
271 }
272 }
273 if (cdx->pri > code) {
274 if ( code )
275 unassigned = 0;
276 else { /* unassigned */
277 if ( unassigned )
278 continue;
279 unassigned = 1;
280 code = 1;
281 }
282 switch (bits) {
283 case 16: key->byt[key->len++] = code >> 8; /* always bigend */
284 case 8: key->byt[key->len++] = (char)code;
285 }
286 bitsleft -= bits;
287 continue;
288 }
289 /* if (cdx->var > top->cod) variant */
290 /* else map: */
291 cv.u = cdx->cv[code];
292 if ((len = (0xf & cv.b.hi)<<mapcodeshift)) { /* len = #codes */
293 /* mapped variants TODO */
294 if ( 0 > (bitsleft -= (len << 3)) )
295 break;
296 memcpy(key->byt+key->len, ~3&len ? base+(0xffffff&cv.u) : cv.b.c, len);
297 key->len += len;
298 }
299 }
300 if (/*words &&*/ key->len == pfxlen) /* if pfxlen: found no words */
301 key->len = 0;
302
303 return b+l-e;
304 } /* cEnc */
305
306
307 int cDec ( const Cdx *cdx, unsigned char *b, int l, Key *key )
308 {
309 const unsigned char * const base = (unsigned char *)cdx;
310 const unsigned char *k = key->byt, *e = b+l-CDX_MAXSEQ;
311 const int bits = 1+(0xf&cdx->typ);
312 int bitsleft = 8*key->len;
313 Cv cv;
314
315 for ( ; b < e && bits <= bitsleft; bitsleft -= bits ) {
316 unsigned short v = *k++;
317 if ( 16 == bits )
318 v = v<<8 | *k++;
319 cv.u = cdx->cv[v];
320 switch ( cv.b.hi &= 0xf ) { /* discard word bit */
321 case 1: *b++ = cv.b.c[0]; continue;
322 case 2: *b++ = cv.b.c[0]; *b++ = cv.b.c[1]; continue;
323 case 3: *b++ = cv.b.c[0]; *b++ = cv.b.c[1]; *b++ = cv.b.c[2]; continue;
324 }
325 memcpy(b, base+(cv.u&0xffffff), cv.b.hi);
326 b += cv.b.hi;
327 }
328 return b+l-CDX_MAXSEQ-e;
329 } /* cDec */
330
331
332
333 typedef struct {
334 Cdx cdx;
335 unsigned cvb[0x10000-CVLAT1];
336 Var *vi, vib[0x10000];
337 Tab tp[1100];
338 Bins *bt, btb[1100];
339 unsigned char *p, b[200000];
340 } CdxMake;
341
342
343 /* make byte table entries */
344 static int mapSeq (CdxMake *mk, unsigned char *p, int len,
345 unsigned short code, int save)
346 {
347 Bins *bt = &mk->cdx.bt0;
348 LOG_DBG(LOG_DEBUG, "map '%.*s' -> %d", len, p, code);
349 if (save) {
350 Cv *cv = (Cv*)mk->cdx.cv+code;
351 if (15 < len) {
352 eRr(ERR_INVAL, "sequence '%.*s' too long %d", len, p, len);
353 return 1;
354 }
355 cv->b.hi = (0x80&save) | len;
356 if (!(~3 & len))
357 memcpy(cv->b.c, p, len);
358 else {
359 cv->u |= mk->p - mk->b;
360 memcpy(mk->p, p, len);
361 mk->p += len;
362 }
363 }
364 for (;; p++) {
365 Bin *b = *bt+*p;
366 if (!--len) { /* last byte -- assign code */
367 if (b->cod) {
368 eRr(ERR_INVAL, "attempt to reassign code %d for %d", code, b->cod);
369 return 1;
370 }
371 b->cod = code;
372 return 0;
373 }
374 if (b->tab)
375 bt = mk->btb + (b->tab - 1);
376 else {
377 bt = mk->bt++;
378 b->tab = mk->bt - mk->btb;
379 LOG_DBG(LOG_DEBUG, "new table %d", b->tab);
380 }
381 }
382 return 0;
383 }
384
385 /** compile collation src */
386 static Cdx *cMake ( const Fld *src )
387 {
388 const Fld *eof = REND(src), *f;
389 CdxMake mk;
390 int skipalias = 1, mapcodeshift;
391 unsigned short firstcode = 1; /* of last row */
392 unsigned bins=0, off, u;
393 Cdx *cdx;
394
395 LOG_DBG(LOG_DEBUG, "cMake %d", sizeof mk);
396 memset(&mk, 0, sizeof mk);
397 mk.vi = mk.vib;
398 mk.bt = mk.btb;
399 mk.p = mk.b;
400 mk.cdx.cv[0] = 0x80000000; /* 0: unassigned */
401 mk.cdx.cv[1] = 0x81000009; /* 1: tab */
402 mk.cdx.pri = 2;
403
404 /* first pass: gather primaries and aliases */
405 for (f = src; ++f<eof; ) if (MET_COL == f->tag) {
406 unsigned char *p = (unsigned char*)f->val, *e, *t, save;
407 unsigned short code;
408
409 if (3 > f->len || TAB != p[1]) continue;
410 e = p + f->len;
411 switch (*p) {
412 case 'A': if (skipalias) continue; save = 0; goto alias;
413 case 'W': save = 1; break;
414 case 'N': save = 0x80; break;
415 case 'S':
416 case 'T':
417 skipalias = 1;
418 default: continue;
419 }
420 firstcode = mk.cdx.pri;
421 skipalias = 0;
422 alias:
423 code = firstcode;
424 for (p+=2; e > p; p = t+1) {
425 if (!(t = memchr(p, TAB, e-p))) t = e;
426 if (t > p && mapSeq(&mk, p, t-p, code, save)) return 0;
427 if (save || code < mk.cdx.pri-1) code++;
428 }
429 if (save) mk.cdx.pri = code;
430 }
431 mk.cdx.typ = TYP_PLAIN|(256 < mk.cdx.pri ? 15 : 7);
432
433 mk.cdx.var = mk.cdx.pri; /* second pass: gather variants */
434 for (f = src; ++f<eof; ) if (MET_COL == f->tag) {
435 unsigned char *p = (unsigned char*)f->val;
436
437 if (3 > f->len || TAB != p[1]) continue;
438 switch (*p) {
439 case 'S':
440 case 'T':
441 eRr(ERR_INVAL, "this version does not support multilevel");
442 return 0;
443 }
444 }
445 if (mk.cdx.var > mk.cdx.pri)
446 mk.cdx.typ |= BIT_VARIANTS;
447
448 mk.cdx.map = mk.cdx.var; /* third pass: resolve maps */
449 mapcodeshift = 256<mk.cdx.var ? 1 : 0;
450 for (f = src; ++f<eof; ) if (MET_COL == f->tag) {
451 unsigned short code, codes[CDX_MAXSEQ];
452 unsigned char ccodes[CDX_MAXSEQ], *pcodes;
453 unsigned char *p = (unsigned char*)f->val, *e, *t;
454 unsigned short n = 0;
455 Bin seq[CDX_MAXSEQ], *bin; /* current sequence */
456 Cv *cv;
457
458 if (3 > f->len || 'M' != p[0] || TAB != p[1]) continue;
459 e = p + f->len;
460 p += 2;
461 if (!(t = memchr(p, TAB, e-p))) continue;
462
463 for ( ; p<t && n<CDX_MAXSEQ; p++ ) { /* get n codes for map */
464 for (*(bin = seq) = mk.cdx.bt0[*p]; bin->tab && p < t;) {
465 /* slightly simpler than in cEnc, since we have full tables */
466 int *ent = (int*)mk.btb[bin->tab-1];
467 if (!ent[p[1]]) break; /* unassigned */
468 *++bin = *(Bin*)(ent + *++p);
469 if (seq+CDX_MAXSEQ == bin) break;
470 }
471 for (; bin>seq && (!bin->cod || mk.cdx.var<=bin->cod); bin--)
472 p--;
473 if (mk.cdx.var>bin->cod) /* ignore map codes */
474 codes[n++] = bin->cod ? bin->cod : 1;
475 }
476 p = t+1;
477
478 /* got n codes ... */
479 if (1 == n) /* single code ~ alias */
480 code = codes[0];
481 else { /* make entry cv[mk.cdx.map++] (even for n=0) */
482 if (mapcodeshift) {
483 pcodes = (unsigned char*)codes; /* native */
484 #ifndef CPU_BIG_ENDIAN
485 { unsigned short *us = codes+n;
486 while (us-- > codes) *us = (*us>>8) | (*us<<8); /* mk be */
487 }
488 #endif
489 } else {
490 for (u=n; u--;) ccodes[u] = codes[u];
491 pcodes = ccodes;
492 }
493 cv = (Cv*)mk.cdx.cv + mk.cdx.map;
494 cv->b.hi = n;
495 if (n && 0x80000000&mk.cdx.cv[codes[0]]) /* inherit from 1st code */
496 cv->b.hi |= 0x80;
497 n <<= mapcodeshift;
498 if (!(~3 & n))
499 memcpy(cv->b.c, pcodes, n);
500 else {
501 cv->u |= mk.p - mk.b;
502 memcpy(mk.p, pcodes, n);
503 mk.p += n;
504 }
505 code = mk.cdx.map++;
506 }
507 for (; e > p; p = t+1) { /* map 'em all to mk.cdx.map */
508 if (!(t = memchr(p, TAB, e-p))) t = e;
509 if (t > p && mapSeq(&mk, p, t-p, code, 0)) return 0;
510 }
511 }
512 /* finish: compact */
513 mk.cdx.tab = mk.bt - mk.btb;
514 mk.cdx.otp =
515 mk.cdx.ovi = (((char *)&((Cdx*)0)->cv) - (char*)0) /* offsetoff(cv) */
516 + mk.cdx.map*sizeof (unsigned);
517 if (mk.cdx.var > mk.cdx.pri)
518 mk.cdx.otp += mk.cdx.var * sizeof (Var);
519 off = mk.cdx.otp + mk.cdx.tab * sizeof (Tab);
520 for (u=0; u<mk.cdx.tab; u++) /* compact tables */ {
521 int *base = (int *)(mk.btb + u), *pi = base;
522 mk.tp[u].off = off;
523 while (!*pi) pi++;
524 LOG_DBG(LOG_DEBUG, "found code %x", *pi);
525 mk.tp[u].min = pi - base;
526 for (pi = base+256; !*--pi; ) ;
527 mk.tp[u].max = pi - base;
528 bins += 1+mk.tp[u].max-mk.tp[u].min;
529 off += (1+mk.tp[u].max-mk.tp[u].min)*sizeof (Bin);
530 LOG_DBG(LOG_DEBUG, "table %d %d-%d", u+1, mk.tp[u].min, mk.tp[u].max);
531 }
532 mk.cdx.siz = off + (mk.p - mk.b);
533 eRr(LOG_INFO,
534 "%d primaries %d variants %d maps %d tables %d bins, off %d size %d",
535 mk.cdx.pri, mk.cdx.var-mk.cdx.pri, mk.cdx.map-mk.cdx.var, mk.cdx.tab,
536 bins, off, mk.cdx.siz);
537 /* adjust indirect CVs by off */ {
538 unsigned i = 0, mask = 0x0c000000;
539 for (; i < mk.cdx.var; i++)
540 if (0x0c000000 & mk.cdx.cv[i])
541 mk.cdx.cv[i] += off;
542 if (mapcodeshift) mask |= 0x02000000; /* only 1 code fits */
543 for (; i < mk.cdx.map; i++)
544 if (mask & mk.cdx.cv[i])
545 mk.cdx.cv[i] += off;
546 }
547 cdx = (Cdx*)mAlloc(mk.cdx.siz);
548 memcpy(cdx, &mk.cdx, mk.cdx.ovi);
549 if (cdx->var > cdx->pri)
550 memcpy((char*)cdx + cdx->ovi, mk.vib, mk.cdx.var*sizeof (Var));
551 if (cdx->tab) {
552 memcpy((char*)cdx + cdx->otp, mk.tp, mk.cdx.tab*sizeof (Tab));
553 for (u=0; u<mk.cdx.tab; u++)
554 memcpy((char*)cdx + mk.tp[u].off,
555 mk.btb[u] + mk.tp[u].min,
556 (1+mk.tp[u].max-mk.tp[u].min)*sizeof (Bin));
557 }
558 memcpy((char*)cdx + off, mk.b, mk.p - mk.b);
559 cdx->otp -= sizeof (Tab); /* adjust 0 based */
560
561 return cdx;
562 } /* cMake */
563
564
565
566 /*
567 list of open shared cdx
568 */
569 typedef struct Foo Foo;
570
571 struct Foo {
572 char nln; /* namelen */
573 char nam[31];
574 Foo *foo;
575 int ref;
576 Cdx *cdx;
577 FMap fm;
578 };
579 #if 0
580 static Foo lat1foo = { 7, "Latin-1", 0, 1, (Cdx*)&lat1cdx };
581 static Foo *fools = &lat1foo;
582 #endif
583 static Foo *fools = 0;
584
585
586
587 /** open or compile collation src */
588 const Cdx *cOpen (const Fld *src)
589 {
590 char *nam = 0, *p;
591 int nln = 0, coldef = 0;
592 Foo *foo = fools;
593 Cdx *cdx = 0;
594 const Fld *eof, *f;
595 char fname[20];
596 FMap fm;
597
598 if (src)
599 for (f = src, eof = REND(src); ++f < eof; )
600 if (MET_COL == f->tag) {
601 coldef = 1;
602 if (2 < f->len && 'C' == f->val[0]
603 && TAB == f->val[1] && TAB != f->val[2]
604 ) { /* named */
605 nam = f->val+2;
606 nln = f->len-2;
607 if ((p = memchr(nam, TAB, nln))) nln = p-nam;
608 if (nln > 15) nln = 15;
609 LOG_DBG(LOG_DEBUG, "collation name '%.*s'", nln, nam);
610 for (; foo; foo = foo->foo)
611 if (nln == foo->nln && !memcmp(nam, foo->nam, nln)) { /* got it */
612 foo->ref++; /* ref it */
613 return foo->cdx; /* ret it */
614 }
615 /* TODO: try to map -- check type */
616 memset(fname, 0, sizeof fname);
617 memcpy(fname, nam, nln);
618 memcpy(fname+nln, ".mcx", 5);
619 memset(&fm, 0, sizeof fm);
620 fm.fil = FIL_NONE;
621 if (!fMOpen(&fm, fname, FIL_RD)) {
622 int size = fSize(fm.fil);
623 fm.lim = (size + env.psz-1)>>env.psh;
624 if ( (int)fm.lim != fMap(&fm, fm.lim)
625 || memcmp(MAGIC, fm.map, 3)
626 || size != (int)((Cdx*)fm.map)->siz
627 ) {
628 eRr(ERR_TRASH, "bad coll file '%s'", fname);
629 fMClose(&fm);
630 } else {
631 cdx = (Cdx*)fm.map;
632 eRr(LOG_VERBOSE, "mapped coll '%s' %d bytes", fname, size);
633 #ifndef WIN32
634 fClose(&fm.fil); /* don't need the file open */
635 #endif
636 }
637 }
638 }
639 }
640 if (!cdx && (!coldef || !(cdx = cMake(src))))
641 return 0;
642 if (nam) { /* foo an mmaped or named made */
643 Foo *newfoo = mAlloc(sizeof *newfoo);
644 memcpy(newfoo->nam, nam, newfoo->nln = nln);
645 newfoo->foo = fools;
646 newfoo->ref = 1;
647 newfoo->cdx = cdx;
648 if (fm.map) /* was mapped */
649 newfoo->fm = fm;
650 else { /* try to write */
651 file fil = FIL_NONE;
652 if (!fOpen(&fil, fname, FIL_WR|FIL_CREAT|FIL_TRUNC)) {
653 memcpy(cdx->mag, MAGIC, 3);
654 fWrite(&fil, cdx, cdx->siz);
655 fClose(&fil);
656 eRr(LOG_INFO, "saved coll '%s' %d bytes", fname, cdx->siz);
657 }
658 memcpy(cdx->mag, "nam", 3); /* yet we keep using our selfmade copy */
659 }
660 fools = newfoo;
661 }
662 return cdx;
663 } /* cOpen */
664
665
666 void cClose (Cdx *cdx)
667 {
668 Foo *foo, *f = 0;
669 switch (cdx->mag[0]) {
670 case 0: /* internal anonymous */
671 mFree(cdx);
672 case 's': /* internal static */
673 return;
674 #ifdef CPU_BIG_ENDIAN
675 case 'M':
676 #else
677 case 'm': /* mapped - always named */
678 #endif
679 case 'n': /* internal named */
680 for (foo = fools; foo; foo = (f = foo)->foo)
681 if (foo->cdx == cdx) {
682 if (!--foo->ref) {
683 if ('n' == cdx->mag[0])
684 mFree(cdx);
685 else
686 fMClose(&foo->fm);
687 if (f)
688 f->foo = foo->foo;
689 else
690 fools = foo->foo;
691 mFree(foo);
692 }
693 return;
694 }
695 /* panic time ? */
696 }
697 } /* cClose */

  ViewVC Help
Powered by ViewVC 1.1.26