/[webpac]/openisis/0.9.9e/tool/iif.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /openisis/0.9.9e/tool/iif.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (show annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 11644 byte(s)
import of new openisis release, 0.9.9e

1 /*
2 The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3 Version 0.9.x (patchlevel see file Version)
4 Copyright (C) 2001-2004 by Erik Grziwotz, erik@openisis.org
5
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
20 see README for more information
21 EOH */
22
23 /*
24 $Id: iif.c,v 1.1 2004/04/13 09:50:38 kripke Exp $
25 malete IIF(ISO2709/Z39.2)/MARC conversion
26 less general than IIF, but less restrictive than MARC
27 */
28
29 #include "../tool/tool.h"
30
31 /*
32 implemented structure of IIF files:
33
34 RT, FT and delimiter as defined by Z39.2-1979
35 other chars as used by WinIsis (esp. delimiter ^) are NOT supported
36
37 the bad habit of WinIsis to insert \r\n every 80 chars is NOT supported
38
39 Z39.2 specifies that
40 tag 1 is for a control number, every record must have exactly one
41 tag 2 is for subrecord purposes
42 tags 3-9,a-z are other control fields
43 control field tags 001-00z must be first and sorted in dict and data,
44 so base of data contains always the "control number" (not nec. a number)
45 control fields contain neither indicators nor subfields/delimiters, but FT
46
47 MARC specifies that
48 tag 5 is date and time of latest transaction
49 there are 2 indicators in [ a-z0-9]
50 unused indicators should be blank; printed as # in specs
51 other unused but fixed coded data should use a '|'
52 */
53 enum {
54 RT = 29, /* ^] record terminator; printed \ in MARC specs */
55 FT = 30, /* ^^ field terminator; printed ^ or @ in MARC specs */
56 DL = 31 /* ^_ subfield delimiter; printed $ in MARC specs */
57 };
58
59 typedef struct { /* 24 byte Leader; mostly ASCII digits */
60 char len[5]; /* record length incl. RT */
61 char sts; /* status (new, updated); "ASCII graphic" */
62 char typ; /* type of record; "ASCII graphic" */
63 char id0[3]; /* impl.def., used by some MARCs */
64 char ind; /* indicator count; 2 in MARC */
65 char idl; /* identifier length incl delimiter; 2 in MARC */
66 char off[5]; /* base address of data = 24 + dict size */
67 char id1[3]; /* impl.def., used by some MARCs */
68 /* entry map; lof+scp > 0 */
69 char lof; /* # bytes for length of field; 4 in MARC */
70 char scp; /* # bytes for starting character pos; 5 in MARC */
71 char idp; /* # bytes for impl.def. portion; 0 in MARC */
72 char res; /* reserved; 0 in MARC */
73 } Leader;
74
75 typedef struct { /* dictionary entries after leader, MARC layout */
76 char tag[3]; /* IIF allows alpha; MARC demands digits */
77 char lof[4]; /* [lof] length of field incl indicators and FT */
78 /* if lof is 0, next entry has addtl. lof for same field */
79 char scp[5]; /* [scp]; starting char pos rel to off */
80 /* char idp[idp]; impl.def. portion */
81 } Entry;
82 /* entries followed by an FT */
83
84 typedef struct {
85 Fld file; /* points into env->opt */
86 int rid; /* use control number as MFN */
87 int nomarc; /* do not assume MARC */
88 int write; /* write iif */
89 char ind[2];
90 char sub;
91 } IifOpt;
92
93
94 static int iifOpt (IifOpt *opt, Fld *args)
95 {
96 Fld o;
97 memset(opt, 0, sizeof(*opt));
98 for ( o.val = 0; vGet(&o, args, "FNPR"); )
99 switch (o.tag) {
100 case 'F':
101 opt->file = o;
102 continue;
103 case 'N':
104 opt->nomarc = !0;
105 continue;
106 case 'P':
107 opt->ind[0] = opt->ind[1] = ' ';
108 opt->sub = '0';
109 switch (o.len) {
110 default: opt->sub = o.val[2];
111 case 2: opt->ind[1] = o.val[1];
112 case 1: opt->ind[0] = o.val[0];
113 case 0: ;
114 }
115 continue;
116 case 'R':
117 opt->rid = !0;
118 continue;
119 }
120 return 0;
121 } /* iifOpt */
122
123
124 int iifuse ()
125 {
126 return eRr(ERR_INVAL, "[iifimp|iifexp]:\n"
127 "-F<file>: path/filename of iif (default = malete db.iif)\n"
128 #ifndef WIN32
129 "\t-F- for stdin/out\n"
130 #endif
131 "-Nomarc: do not assume MARC structure 22/450# (imp)\n"
132 "-P[<ii><c>]: prepend indicators ii and, where needed, subfield c (exp)\n"
133 "-Rid: use control number as rid (imp)\n"
134 "use tools like yaz-marcdump for character set recoding\n"
135 );
136 } /* iifuse */
137
138
139 static int iifOpen (file *iif, IifOpt *opt, Db *db)
140 {
141 int filmode = opt->write ? FIL_RDWR|FIL_CREAT|FIL_TRUNC : FIL_RD;
142 int ret;
143 char *path;
144 if ( opt->file.len ) {
145 #ifndef WIN32
146 if ( 1 == opt->file.len && '-' == *opt->file.val ) {
147 *iif = opt->write ? 1 : 0;
148 return 0;
149 }
150 #endif
151 path = VDUPZ(&opt->file);
152 } else {
153 int l = strlen(db->pat);
154 path = mAlloc(l+5);
155 memcpy(path, db->pat, l);
156 memcpy(path+l, ".iif", 5);
157 }
158 ret = fOpen(iif, path, filmode) ?
159 eRr(LOG_ERROR, "no access to IIF file '%s'", path) : 0;
160 mFree(path);
161 return ret;
162 } /* iifOpen */
163
164
165 /*
166 tags may contain alpha; uppercase or lowercase ("but not both")
167 UNIMARC and others specify plain numeric tags
168 MARC specs first two to be digits; we map:
169 3 digits to 0-999 (000 actually forbidden)
170 2 digits: 3rd 'a' to 1000-1099, 'b' to 1100-1199 up to 3599
171 1 digit: 1st + 360*2ndbase36 + 10*3rdbase36 up to 12959
172 others are base 36 (>=12960 = 'a00') < 36^3 = 46656
173 */
174 static int rdtag (char *t)
175 {
176 int tag;
177 switch ( a2il(t, 3, &tag) ) {
178 case 3: return tag;
179 case 2: return tag + 100*b36val[(unsigned char)t[2]];
180 case 1: return tag
181 + 360*b36val[(unsigned char)t[1]]
182 + 10*b36val[(unsigned char)t[2]];
183 }
184 return 1296*b36val[(unsigned char)t[0]]
185 + 36*b36val[(unsigned char)t[1]]
186 + b36val[(unsigned char)t[2]];
187 }
188
189 static OPT_INLINE void pr (char *t, unsigned i, unsigned l)
190 {
191 for ( ; l--; i/=10) t[l] = b36dig[i%10];
192 }
193
194 static void prtag (char *t, unsigned tag)
195 {
196 if (1000 > tag)
197 pr(t, tag, 3);
198 else if (3600 > tag) {
199 pr(t, tag%100, 2);
200 t[2] = b36dig[tag/100];
201 } else if (12960 > tag) {
202 t[0] = b36dig[tag%10]; tag /= 10;
203 t[2] = b36dig[tag%36]; tag /= 36;
204 t[1] = b36dig[tag];
205 } else {
206 t[2] = b36dig[tag%36]; tag /= 36;
207 t[1] = b36dig[tag%36]; tag /= 36;
208 t[0] = b36dig[tag%36];
209 }
210 }
211
212
213
214 int iifimp (Db *db, Fld *args)
215 {
216 char buf[99999];
217 IifOpt opt;
218 file iif;
219 Leader ldr;
220 List rec;
221 int ret = 0;
222
223 if ( iifOpt(&opt, args) )
224 return iifuse();
225 if ( iifOpen(&iif, &opt, db) )
226 return eRr(ERR_INVAL, "bad IIF base");
227 #define CLEANUP( args ) do { ret = eRr args; goto cleanup; } while (0)
228 lInit(&rec, "");
229 for (;;) {
230 char head[8];
231 char *ent = buf, *base;
232 int len, off, eln, dln, lof = 0, scp = 0;
233 int got = fRead(&iif, &ldr, sizeof(ldr));
234 int rid = 0;
235 if ( !got )
236 break;
237 if ( (int)sizeof(ldr) != got )
238 CLEANUP((ERR_TRASH, "reading leader got %d", got));
239 LOG_DBG(LOG_TRACE, "leader '%.*s'", 24, &ldr);
240 len = a2i(ldr.len, 5) - 24;
241 off = a2i(ldr.off, 5);
242 if ( opt.nomarc ) {
243 ldr.ind -= '0';
244 ldr.idl -= '0';
245 ldr.lof -= '0';
246 ldr.scp -= '0';
247 ldr.idp -= '0';
248 } else {
249 ldr.ind = 2;
250 ldr.idl = 2;
251 ldr.lof = 4;
252 ldr.scp = 5;
253 ldr.idp = 0;
254 }
255 eln = 3+ldr.lof+ldr.scp+ldr.idp; /* entry length */
256 dln = (off-25)/eln; /* dict len: off - leader - FT */
257 eRr(LOG_VERBOSE,
258 "len %d sts %c typ %c ind %d idl %d off %d map 3+%d+%d+%d=%d %d fields",
259 len, ldr.sts, ldr.typ, ldr.ind, ldr.idl, off,
260 ldr.lof, ldr.scp, ldr.idp, eln, dln);
261 if ( off != 25+eln*dln )
262 eRr(ERR_TRASH, "bad off %d != 25+%d*%d", off, eln, dln);
263 got = fRead(&iif, buf, len);
264 if ( len != got )
265 CLEANUP((ERR_TRASH, "reading body %d got %d", len, got));
266 base = buf+off-24;
267 if ( FT != base[-1] )
268 eRr(ERR_TRASH, "no FT after dict");
269 if ( RT != buf[len-1] )
270 eRr(ERR_TRASH, "no RT");
271 memcpy(head, &ldr.sts, 5); /* sts,typ,id0 */
272 memcpy(head+5, ldr.id1, 3);
273 lClr(&rec);
274 LADD(&rec, -1, head, 8);
275 for ( ; dln--; ent+=eln ) {
276 int tag = rdtag(ent);
277 char *v, *e, *p;
278 if ( ldr.scp )
279 scp = a2i(ent+3+ldr.lof, ldr.scp);
280 else
281 scp += lof;
282 v = base+scp;
283 if ( !ldr.lof ) {
284 if ( !(e = memchr(v, FT, len-off+24)) )
285 CLEANUP((ERR_TRASH, "no FT after field"));
286 lof = e-v+1;
287 } else {
288 if ( !(lof = a2i(ent+3, ldr.lof)) ) { /* long field */
289 int max=1, dig=ldr.lof, add;
290 while (dig--) max *= 10;
291 lof = max;
292 do {
293 ent += eln;
294 if ( !dln-- ) CLEANUP((ERR_TRASH, "eod on long field"));
295 if ( tag != a2i(ent, 3) ) eRr(ERR_TRASH, "bad tag");
296 add = a2i(ent+3, ldr.lof);
297 lof += add ? add : max;
298 } while ( !add );
299 }
300 if ( FT != *(e = v+lof-1) )
301 eRr(ERR_TRASH, "no FT after field");
302 }
303 for ( p=v; p<e; p++ ) if ( DL == *p ) *p = TAB;
304 /* eOut(tag, "%.*s", lof-1, v); */
305 LADD(&rec, tag, v, (unsigned)lof-1);
306 }
307 if ( opt.rid && 1<LLEN(&rec) && 1 == rec.fld[1].tag ) {
308 int num;
309 if ( (int)rec.fld[1].len == a2il(rec.fld[1].val, rec.fld[1].len, &num) )
310 rid = num;
311 else
312 eRr(LOG_WARN, "bad control number '%.*s'",
313 rec.fld[1].len, rec.fld[1].val);
314 }
315 dWrite(db, rec.fld, rid);
316 }
317 cleanup:
318 #undef CLEANUP
319 fClose(&iif);
320 return ret;
321 } /* iifimp */
322
323
324 int iifexp (Db *db, Fld *args)
325 {
326 char buf[99999];
327 IifOpt opt;
328 file iif;
329 Leader ldr;
330 List rec;
331 int i, ret = 0;
332
333 if ( iifOpt(&opt, args) )
334 return iifuse();
335 opt.write = 1;
336 if ( iifOpen(&iif, &opt, db) )
337 return eRr(ERR_INVAL, "opening IIF");
338 ldr.ind = ldr.idl = '2';
339 ldr.lof = '4';
340 ldr.scp = '5';
341 ldr.idp = '0';
342 ldr.res = ' ';
343 lInit(&rec, "");
344 for (i=1; i<=db->rdx.mid; i++) { /* loop records */
345 char head[8], rid[12];
346 char *p;
347 int l, n, o, ll, ridl;
348 Entry *e;
349 Fld *f;
350 if ( 0 >= dRead(lClr(&rec), db, i) ) /* no rec */
351 continue;
352 ll = LLEN(&rec)-1;
353 /* fill head */
354 for (p = rec.fld->val, l = rec.fld->len; l-- && TAB != *p++; ) ;
355 if ( 8 <= l )
356 memcpy(head, p, 8);
357 else {
358 memset(head, ' ', 8);
359 if ( 0 < l )
360 memcpy(head, p, l);
361 }
362 memcpy(&ldr.sts, head, 5);
363 memcpy(ldr.id1, head+5, 3);
364 e = (Entry*)(buf + 24); /* after leader */
365 l = 26; /* leader + dict FT + RT */
366 o = 0; /* offset to base */
367 n = 0; /* # fields used */
368 /* rid -> control number */
369 if ( ll && 1 == rec.fld[1].tag )
370 ridl = 0;
371 else {
372 ridl = i2a(rid, i);
373 prtag(e->tag, 1);
374 pr(e->lof, o = ridl+1, 4);
375 pr(e->scp, 0, 5);
376 l += o + 12;
377 e++;
378 }
379 for ( f = rec.fld+1; l < 99900 && n < ll; n++,e++,f++ ) {
380 int fln = 1 /* FT */, vln = f->len;
381 l += 12; /* Entry */
382 if ( f->tag > 9 ) { /* non control */
383 if ( opt.ind[0] ) fln += 2;
384 if ( opt.sub && vln && TAB != *f->val ) fln += 2;
385 }
386 if ( vln > 9994 ) vln = 9994; /* max field len */
387 if ( vln > 99994-l ) vln = 99994-l; /* max rec len */
388 l += fln += vln;
389 prtag(e->tag, f->tag);
390 pr(e->lof, fln, 4);
391 pr(e->scp, o, 5);
392 o += fln;
393 }
394 p = (char*)e;
395 *p++ = FT;
396 pr(ldr.len, l, 5);
397 pr(ldr.off, p - buf, 5);
398 memcpy(buf, &ldr, 24);
399 /* now add the field data */
400 if ( ridl ) {
401 memcpy(p, rid, ridl);
402 p += ridl;
403 *p++ = FT;
404 }
405 for ( f = rec.fld+1; n--; f++ ) {
406 int vln = f->len;
407 char *v = f->val;
408 if ( f->tag > 9 ) { /* non control */
409 if ( opt.ind[0] ) {
410 *p++ = opt.ind[0];
411 *p++ = opt.ind[1];
412 }
413 if ( opt.sub && vln && TAB != *f->val ) {
414 *p++ = DL;
415 *p++ = opt.sub;
416 }
417 }
418 if ( vln > 9994 ) vln = 9994;
419 if ( vln > 99994-(p-buf) ) vln = 99994-(p-buf);
420 while ( vln-- ) if ( TAB == (*p++ = *v++) ) p[-1] = DL;
421 *p++ = FT;
422 }
423 *p++ = RT;
424 if ( l != p-buf ) {
425 eRr(ERR_IDIOT, "miscalculated len %d got %d rec %d", l, p-buf, i);
426 break;
427 }
428 fWrite(&iif, buf, l);
429 }
430 fClose(&iif);
431 return ret;
432 } /* iifexp */

  ViewVC Help
Powered by ViewVC 1.1.26