1 |
/* |
2 |
The Malete project - the Z39.2/Z39.50 database framework of OpenIsis. |
3 |
Version 0.9.x (patchlevel see file Version) |
4 |
Copyright (C) 2001-2004 by Erik Grziwotz, erik@openisis.org |
5 |
|
6 |
This library is free software; you can redistribute it and/or |
7 |
modify it under the terms of the GNU Lesser General Public |
8 |
License as published by the Free Software Foundation; either |
9 |
version 2.1 of the License, or (at your option) any later version. |
10 |
|
11 |
This library is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
14 |
See the GNU Lesser General Public License for more details. |
15 |
|
16 |
You should have received a copy of the GNU Lesser General Public |
17 |
License along with this library; if not, write to the Free Software |
18 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
20 |
see README for more information |
21 |
EOH */ |
22 |
|
23 |
/* |
24 |
$Id: iif.c,v 1.1 2004/04/13 09:50:38 kripke Exp $ |
25 |
malete IIF(ISO2709/Z39.2)/MARC conversion |
26 |
less general than IIF, but less restrictive than MARC |
27 |
*/ |
28 |
|
29 |
#include "../tool/tool.h" |
30 |
|
31 |
/* |
32 |
implemented structure of IIF files: |
33 |
|
34 |
RT, FT and delimiter as defined by Z39.2-1979 |
35 |
other chars as used by WinIsis (esp. delimiter ^) are NOT supported |
36 |
|
37 |
the bad habit of WinIsis to insert \r\n every 80 chars is NOT supported |
38 |
|
39 |
Z39.2 specifies that |
40 |
tag 1 is for a control number, every record must have exactly one |
41 |
tag 2 is for subrecord purposes |
42 |
tags 3-9,a-z are other control fields |
43 |
control field tags 001-00z must be first and sorted in dict and data, |
44 |
so base of data contains always the "control number" (not nec. a number) |
45 |
control fields contain neither indicators nor subfields/delimiters, but FT |
46 |
|
47 |
MARC specifies that |
48 |
tag 5 is date and time of latest transaction |
49 |
there are 2 indicators in [ a-z0-9] |
50 |
unused indicators should be blank; printed as # in specs |
51 |
other unused but fixed coded data should use a '|' |
52 |
*/ |
53 |
enum { |
54 |
RT = 29, /* ^] record terminator; printed \ in MARC specs */ |
55 |
FT = 30, /* ^^ field terminator; printed ^ or @ in MARC specs */ |
56 |
DL = 31 /* ^_ subfield delimiter; printed $ in MARC specs */ |
57 |
}; |
58 |
|
59 |
typedef struct { /* 24 byte Leader; mostly ASCII digits */ |
60 |
char len[5]; /* record length incl. RT */ |
61 |
char sts; /* status (new, updated); "ASCII graphic" */ |
62 |
char typ; /* type of record; "ASCII graphic" */ |
63 |
char id0[3]; /* impl.def., used by some MARCs */ |
64 |
char ind; /* indicator count; 2 in MARC */ |
65 |
char idl; /* identifier length incl delimiter; 2 in MARC */ |
66 |
char off[5]; /* base address of data = 24 + dict size */ |
67 |
char id1[3]; /* impl.def., used by some MARCs */ |
68 |
/* entry map; lof+scp > 0 */ |
69 |
char lof; /* # bytes for length of field; 4 in MARC */ |
70 |
char scp; /* # bytes for starting character pos; 5 in MARC */ |
71 |
char idp; /* # bytes for impl.def. portion; 0 in MARC */ |
72 |
char res; /* reserved; 0 in MARC */ |
73 |
} Leader; |
74 |
|
75 |
typedef struct { /* dictionary entries after leader, MARC layout */ |
76 |
char tag[3]; /* IIF allows alpha; MARC demands digits */ |
77 |
char lof[4]; /* [lof] length of field incl indicators and FT */ |
78 |
/* if lof is 0, next entry has addtl. lof for same field */ |
79 |
char scp[5]; /* [scp]; starting char pos rel to off */ |
80 |
/* char idp[idp]; impl.def. portion */ |
81 |
} Entry; |
82 |
/* entries followed by an FT */ |
83 |
|
84 |
typedef struct { |
85 |
Fld file; /* points into env->opt */ |
86 |
int rid; /* use control number as MFN */ |
87 |
int nomarc; /* do not assume MARC */ |
88 |
int write; /* write iif */ |
89 |
char ind[2]; |
90 |
char sub; |
91 |
} IifOpt; |
92 |
|
93 |
|
94 |
static int iifOpt (IifOpt *opt, Fld *args) |
95 |
{ |
96 |
Fld o; |
97 |
memset(opt, 0, sizeof(*opt)); |
98 |
for ( o.val = 0; vGet(&o, args, "FNPR"); ) |
99 |
switch (o.tag) { |
100 |
case 'F': |
101 |
opt->file = o; |
102 |
continue; |
103 |
case 'N': |
104 |
opt->nomarc = !0; |
105 |
continue; |
106 |
case 'P': |
107 |
opt->ind[0] = opt->ind[1] = ' '; |
108 |
opt->sub = '0'; |
109 |
switch (o.len) { |
110 |
default: opt->sub = o.val[2]; |
111 |
case 2: opt->ind[1] = o.val[1]; |
112 |
case 1: opt->ind[0] = o.val[0]; |
113 |
case 0: ; |
114 |
} |
115 |
continue; |
116 |
case 'R': |
117 |
opt->rid = !0; |
118 |
continue; |
119 |
} |
120 |
return 0; |
121 |
} /* iifOpt */ |
122 |
|
123 |
|
124 |
int iifuse () |
125 |
{ |
126 |
return eRr(ERR_INVAL, "[iifimp|iifexp]:\n" |
127 |
"-F<file>: path/filename of iif (default = malete db.iif)\n" |
128 |
#ifndef WIN32 |
129 |
"\t-F- for stdin/out\n" |
130 |
#endif |
131 |
"-Nomarc: do not assume MARC structure 22/450# (imp)\n" |
132 |
"-P[<ii><c>]: prepend indicators ii and, where needed, subfield c (exp)\n" |
133 |
"-Rid: use control number as rid (imp)\n" |
134 |
"use tools like yaz-marcdump for character set recoding\n" |
135 |
); |
136 |
} /* iifuse */ |
137 |
|
138 |
|
139 |
static int iifOpen (file *iif, IifOpt *opt, Db *db) |
140 |
{ |
141 |
int filmode = opt->write ? FIL_RDWR|FIL_CREAT|FIL_TRUNC : FIL_RD; |
142 |
int ret; |
143 |
char *path; |
144 |
if ( opt->file.len ) { |
145 |
#ifndef WIN32 |
146 |
if ( 1 == opt->file.len && '-' == *opt->file.val ) { |
147 |
*iif = opt->write ? 1 : 0; |
148 |
return 0; |
149 |
} |
150 |
#endif |
151 |
path = VDUPZ(&opt->file); |
152 |
} else { |
153 |
int l = strlen(db->pat); |
154 |
path = mAlloc(l+5); |
155 |
memcpy(path, db->pat, l); |
156 |
memcpy(path+l, ".iif", 5); |
157 |
} |
158 |
ret = fOpen(iif, path, filmode) ? |
159 |
eRr(LOG_ERROR, "no access to IIF file '%s'", path) : 0; |
160 |
mFree(path); |
161 |
return ret; |
162 |
} /* iifOpen */ |
163 |
|
164 |
|
165 |
/* |
166 |
tags may contain alpha; uppercase or lowercase ("but not both") |
167 |
UNIMARC and others specify plain numeric tags |
168 |
MARC specs first two to be digits; we map: |
169 |
3 digits to 0-999 (000 actually forbidden) |
170 |
2 digits: 3rd 'a' to 1000-1099, 'b' to 1100-1199 up to 3599 |
171 |
1 digit: 1st + 360*2ndbase36 + 10*3rdbase36 up to 12959 |
172 |
others are base 36 (>=12960 = 'a00') < 36^3 = 46656 |
173 |
*/ |
174 |
static int rdtag (char *t) |
175 |
{ |
176 |
int tag; |
177 |
switch ( a2il(t, 3, &tag) ) { |
178 |
case 3: return tag; |
179 |
case 2: return tag + 100*b36val[(unsigned char)t[2]]; |
180 |
case 1: return tag |
181 |
+ 360*b36val[(unsigned char)t[1]] |
182 |
+ 10*b36val[(unsigned char)t[2]]; |
183 |
} |
184 |
return 1296*b36val[(unsigned char)t[0]] |
185 |
+ 36*b36val[(unsigned char)t[1]] |
186 |
+ b36val[(unsigned char)t[2]]; |
187 |
} |
188 |
|
189 |
static OPT_INLINE void pr (char *t, unsigned i, unsigned l) |
190 |
{ |
191 |
for ( ; l--; i/=10) t[l] = b36dig[i%10]; |
192 |
} |
193 |
|
194 |
static void prtag (char *t, unsigned tag) |
195 |
{ |
196 |
if (1000 > tag) |
197 |
pr(t, tag, 3); |
198 |
else if (3600 > tag) { |
199 |
pr(t, tag%100, 2); |
200 |
t[2] = b36dig[tag/100]; |
201 |
} else if (12960 > tag) { |
202 |
t[0] = b36dig[tag%10]; tag /= 10; |
203 |
t[2] = b36dig[tag%36]; tag /= 36; |
204 |
t[1] = b36dig[tag]; |
205 |
} else { |
206 |
t[2] = b36dig[tag%36]; tag /= 36; |
207 |
t[1] = b36dig[tag%36]; tag /= 36; |
208 |
t[0] = b36dig[tag%36]; |
209 |
} |
210 |
} |
211 |
|
212 |
|
213 |
|
214 |
int iifimp (Db *db, Fld *args) |
215 |
{ |
216 |
char buf[99999]; |
217 |
IifOpt opt; |
218 |
file iif; |
219 |
Leader ldr; |
220 |
List rec; |
221 |
int ret = 0; |
222 |
|
223 |
if ( iifOpt(&opt, args) ) |
224 |
return iifuse(); |
225 |
if ( iifOpen(&iif, &opt, db) ) |
226 |
return eRr(ERR_INVAL, "bad IIF base"); |
227 |
#define CLEANUP( args ) do { ret = eRr args; goto cleanup; } while (0) |
228 |
lInit(&rec, ""); |
229 |
for (;;) { |
230 |
char head[8]; |
231 |
char *ent = buf, *base; |
232 |
int len, off, eln, dln, lof = 0, scp = 0; |
233 |
int got = fRead(&iif, &ldr, sizeof(ldr)); |
234 |
int rid = 0; |
235 |
if ( !got ) |
236 |
break; |
237 |
if ( (int)sizeof(ldr) != got ) |
238 |
CLEANUP((ERR_TRASH, "reading leader got %d", got)); |
239 |
LOG_DBG(LOG_TRACE, "leader '%.*s'", 24, &ldr); |
240 |
len = a2i(ldr.len, 5) - 24; |
241 |
off = a2i(ldr.off, 5); |
242 |
if ( opt.nomarc ) { |
243 |
ldr.ind -= '0'; |
244 |
ldr.idl -= '0'; |
245 |
ldr.lof -= '0'; |
246 |
ldr.scp -= '0'; |
247 |
ldr.idp -= '0'; |
248 |
} else { |
249 |
ldr.ind = 2; |
250 |
ldr.idl = 2; |
251 |
ldr.lof = 4; |
252 |
ldr.scp = 5; |
253 |
ldr.idp = 0; |
254 |
} |
255 |
eln = 3+ldr.lof+ldr.scp+ldr.idp; /* entry length */ |
256 |
dln = (off-25)/eln; /* dict len: off - leader - FT */ |
257 |
eRr(LOG_VERBOSE, |
258 |
"len %d sts %c typ %c ind %d idl %d off %d map 3+%d+%d+%d=%d %d fields", |
259 |
len, ldr.sts, ldr.typ, ldr.ind, ldr.idl, off, |
260 |
ldr.lof, ldr.scp, ldr.idp, eln, dln); |
261 |
if ( off != 25+eln*dln ) |
262 |
eRr(ERR_TRASH, "bad off %d != 25+%d*%d", off, eln, dln); |
263 |
got = fRead(&iif, buf, len); |
264 |
if ( len != got ) |
265 |
CLEANUP((ERR_TRASH, "reading body %d got %d", len, got)); |
266 |
base = buf+off-24; |
267 |
if ( FT != base[-1] ) |
268 |
eRr(ERR_TRASH, "no FT after dict"); |
269 |
if ( RT != buf[len-1] ) |
270 |
eRr(ERR_TRASH, "no RT"); |
271 |
memcpy(head, &ldr.sts, 5); /* sts,typ,id0 */ |
272 |
memcpy(head+5, ldr.id1, 3); |
273 |
lClr(&rec); |
274 |
LADD(&rec, -1, head, 8); |
275 |
for ( ; dln--; ent+=eln ) { |
276 |
int tag = rdtag(ent); |
277 |
char *v, *e, *p; |
278 |
if ( ldr.scp ) |
279 |
scp = a2i(ent+3+ldr.lof, ldr.scp); |
280 |
else |
281 |
scp += lof; |
282 |
v = base+scp; |
283 |
if ( !ldr.lof ) { |
284 |
if ( !(e = memchr(v, FT, len-off+24)) ) |
285 |
CLEANUP((ERR_TRASH, "no FT after field")); |
286 |
lof = e-v+1; |
287 |
} else { |
288 |
if ( !(lof = a2i(ent+3, ldr.lof)) ) { /* long field */ |
289 |
int max=1, dig=ldr.lof, add; |
290 |
while (dig--) max *= 10; |
291 |
lof = max; |
292 |
do { |
293 |
ent += eln; |
294 |
if ( !dln-- ) CLEANUP((ERR_TRASH, "eod on long field")); |
295 |
if ( tag != a2i(ent, 3) ) eRr(ERR_TRASH, "bad tag"); |
296 |
add = a2i(ent+3, ldr.lof); |
297 |
lof += add ? add : max; |
298 |
} while ( !add ); |
299 |
} |
300 |
if ( FT != *(e = v+lof-1) ) |
301 |
eRr(ERR_TRASH, "no FT after field"); |
302 |
} |
303 |
for ( p=v; p<e; p++ ) if ( DL == *p ) *p = TAB; |
304 |
/* eOut(tag, "%.*s", lof-1, v); */ |
305 |
LADD(&rec, tag, v, (unsigned)lof-1); |
306 |
} |
307 |
if ( opt.rid && 1<LLEN(&rec) && 1 == rec.fld[1].tag ) { |
308 |
int num; |
309 |
if ( (int)rec.fld[1].len == a2il(rec.fld[1].val, rec.fld[1].len, &num) ) |
310 |
rid = num; |
311 |
else |
312 |
eRr(LOG_WARN, "bad control number '%.*s'", |
313 |
rec.fld[1].len, rec.fld[1].val); |
314 |
} |
315 |
dWrite(db, rec.fld, rid); |
316 |
} |
317 |
cleanup: |
318 |
#undef CLEANUP |
319 |
fClose(&iif); |
320 |
return ret; |
321 |
} /* iifimp */ |
322 |
|
323 |
|
324 |
int iifexp (Db *db, Fld *args) |
325 |
{ |
326 |
char buf[99999]; |
327 |
IifOpt opt; |
328 |
file iif; |
329 |
Leader ldr; |
330 |
List rec; |
331 |
int i, ret = 0; |
332 |
|
333 |
if ( iifOpt(&opt, args) ) |
334 |
return iifuse(); |
335 |
opt.write = 1; |
336 |
if ( iifOpen(&iif, &opt, db) ) |
337 |
return eRr(ERR_INVAL, "opening IIF"); |
338 |
ldr.ind = ldr.idl = '2'; |
339 |
ldr.lof = '4'; |
340 |
ldr.scp = '5'; |
341 |
ldr.idp = '0'; |
342 |
ldr.res = ' '; |
343 |
lInit(&rec, ""); |
344 |
for (i=1; i<=db->rdx.mid; i++) { /* loop records */ |
345 |
char head[8], rid[12]; |
346 |
char *p; |
347 |
int l, n, o, ll, ridl; |
348 |
Entry *e; |
349 |
Fld *f; |
350 |
if ( 0 >= dRead(lClr(&rec), db, i) ) /* no rec */ |
351 |
continue; |
352 |
ll = LLEN(&rec)-1; |
353 |
/* fill head */ |
354 |
for (p = rec.fld->val, l = rec.fld->len; l-- && TAB != *p++; ) ; |
355 |
if ( 8 <= l ) |
356 |
memcpy(head, p, 8); |
357 |
else { |
358 |
memset(head, ' ', 8); |
359 |
if ( 0 < l ) |
360 |
memcpy(head, p, l); |
361 |
} |
362 |
memcpy(&ldr.sts, head, 5); |
363 |
memcpy(ldr.id1, head+5, 3); |
364 |
e = (Entry*)(buf + 24); /* after leader */ |
365 |
l = 26; /* leader + dict FT + RT */ |
366 |
o = 0; /* offset to base */ |
367 |
n = 0; /* # fields used */ |
368 |
/* rid -> control number */ |
369 |
if ( ll && 1 == rec.fld[1].tag ) |
370 |
ridl = 0; |
371 |
else { |
372 |
ridl = i2a(rid, i); |
373 |
prtag(e->tag, 1); |
374 |
pr(e->lof, o = ridl+1, 4); |
375 |
pr(e->scp, 0, 5); |
376 |
l += o + 12; |
377 |
e++; |
378 |
} |
379 |
for ( f = rec.fld+1; l < 99900 && n < ll; n++,e++,f++ ) { |
380 |
int fln = 1 /* FT */, vln = f->len; |
381 |
l += 12; /* Entry */ |
382 |
if ( f->tag > 9 ) { /* non control */ |
383 |
if ( opt.ind[0] ) fln += 2; |
384 |
if ( opt.sub && vln && TAB != *f->val ) fln += 2; |
385 |
} |
386 |
if ( vln > 9994 ) vln = 9994; /* max field len */ |
387 |
if ( vln > 99994-l ) vln = 99994-l; /* max rec len */ |
388 |
l += fln += vln; |
389 |
prtag(e->tag, f->tag); |
390 |
pr(e->lof, fln, 4); |
391 |
pr(e->scp, o, 5); |
392 |
o += fln; |
393 |
} |
394 |
p = (char*)e; |
395 |
*p++ = FT; |
396 |
pr(ldr.len, l, 5); |
397 |
pr(ldr.off, p - buf, 5); |
398 |
memcpy(buf, &ldr, 24); |
399 |
/* now add the field data */ |
400 |
if ( ridl ) { |
401 |
memcpy(p, rid, ridl); |
402 |
p += ridl; |
403 |
*p++ = FT; |
404 |
} |
405 |
for ( f = rec.fld+1; n--; f++ ) { |
406 |
int vln = f->len; |
407 |
char *v = f->val; |
408 |
if ( f->tag > 9 ) { /* non control */ |
409 |
if ( opt.ind[0] ) { |
410 |
*p++ = opt.ind[0]; |
411 |
*p++ = opt.ind[1]; |
412 |
} |
413 |
if ( opt.sub && vln && TAB != *f->val ) { |
414 |
*p++ = DL; |
415 |
*p++ = opt.sub; |
416 |
} |
417 |
} |
418 |
if ( vln > 9994 ) vln = 9994; |
419 |
if ( vln > 99994-(p-buf) ) vln = 99994-(p-buf); |
420 |
while ( vln-- ) if ( TAB == (*p++ = *v++) ) p[-1] = DL; |
421 |
*p++ = FT; |
422 |
} |
423 |
*p++ = RT; |
424 |
if ( l != p-buf ) { |
425 |
eRr(ERR_IDIOT, "miscalculated len %d got %d rec %d", l, p-buf, i); |
426 |
break; |
427 |
} |
428 |
fWrite(&iif, buf, l); |
429 |
} |
430 |
fClose(&iif); |
431 |
return ret; |
432 |
} /* iifexp */ |