1 |
dpavlin |
604 |
/* |
2 |
|
|
The Malete project - the Z39.2/Z39.50 database framework of OpenIsis. |
3 |
|
|
Version 0.9.x (patchlevel see file Version) |
4 |
|
|
Copyright (C) 2001-2004 by Erik Grziwotz, erik@openisis.org |
5 |
|
|
|
6 |
|
|
This library is free software; you can redistribute it and/or |
7 |
|
|
modify it under the terms of the GNU Lesser General Public |
8 |
|
|
License as published by the Free Software Foundation; either |
9 |
|
|
version 2.1 of the License, or (at your option) any later version. |
10 |
|
|
|
11 |
|
|
This library is distributed in the hope that it will be useful, |
12 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
14 |
|
|
See the GNU Lesser General Public License for more details. |
15 |
|
|
|
16 |
|
|
You should have received a copy of the GNU Lesser General Public |
17 |
|
|
License along with this library; if not, write to the Free Software |
18 |
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
19 |
|
|
|
20 |
|
|
see README for more information |
21 |
|
|
EOH */ |
22 |
|
|
|
23 |
|
|
/* |
24 |
|
|
$Id: iif.c,v 1.1 2004/04/13 09:50:38 kripke Exp $ |
25 |
|
|
malete IIF(ISO2709/Z39.2)/MARC conversion |
26 |
|
|
less general than IIF, but less restrictive than MARC |
27 |
|
|
*/ |
28 |
|
|
|
29 |
|
|
#include "../tool/tool.h" |
30 |
|
|
|
31 |
|
|
/* |
32 |
|
|
implemented structure of IIF files: |
33 |
|
|
|
34 |
|
|
RT, FT and delimiter as defined by Z39.2-1979 |
35 |
|
|
other chars as used by WinIsis (esp. delimiter ^) are NOT supported |
36 |
|
|
|
37 |
|
|
the bad habit of WinIsis to insert \r\n every 80 chars is NOT supported |
38 |
|
|
|
39 |
|
|
Z39.2 specifies that |
40 |
|
|
tag 1 is for a control number, every record must have exactly one |
41 |
|
|
tag 2 is for subrecord purposes |
42 |
|
|
tags 3-9,a-z are other control fields |
43 |
|
|
control field tags 001-00z must be first and sorted in dict and data, |
44 |
|
|
so base of data contains always the "control number" (not nec. a number) |
45 |
|
|
control fields contain neither indicators nor subfields/delimiters, but FT |
46 |
|
|
|
47 |
|
|
MARC specifies that |
48 |
|
|
tag 5 is date and time of latest transaction |
49 |
|
|
there are 2 indicators in [ a-z0-9] |
50 |
|
|
unused indicators should be blank; printed as # in specs |
51 |
|
|
other unused but fixed coded data should use a '|' |
52 |
|
|
*/ |
53 |
|
|
enum { |
54 |
|
|
RT = 29, /* ^] record terminator; printed \ in MARC specs */ |
55 |
|
|
FT = 30, /* ^^ field terminator; printed ^ or @ in MARC specs */ |
56 |
|
|
DL = 31 /* ^_ subfield delimiter; printed $ in MARC specs */ |
57 |
|
|
}; |
58 |
|
|
|
59 |
|
|
typedef struct { /* 24 byte Leader; mostly ASCII digits */ |
60 |
|
|
char len[5]; /* record length incl. RT */ |
61 |
|
|
char sts; /* status (new, updated); "ASCII graphic" */ |
62 |
|
|
char typ; /* type of record; "ASCII graphic" */ |
63 |
|
|
char id0[3]; /* impl.def., used by some MARCs */ |
64 |
|
|
char ind; /* indicator count; 2 in MARC */ |
65 |
|
|
char idl; /* identifier length incl delimiter; 2 in MARC */ |
66 |
|
|
char off[5]; /* base address of data = 24 + dict size */ |
67 |
|
|
char id1[3]; /* impl.def., used by some MARCs */ |
68 |
|
|
/* entry map; lof+scp > 0 */ |
69 |
|
|
char lof; /* # bytes for length of field; 4 in MARC */ |
70 |
|
|
char scp; /* # bytes for starting character pos; 5 in MARC */ |
71 |
|
|
char idp; /* # bytes for impl.def. portion; 0 in MARC */ |
72 |
|
|
char res; /* reserved; 0 in MARC */ |
73 |
|
|
} Leader; |
74 |
|
|
|
75 |
|
|
typedef struct { /* dictionary entries after leader, MARC layout */ |
76 |
|
|
char tag[3]; /* IIF allows alpha; MARC demands digits */ |
77 |
|
|
char lof[4]; /* [lof] length of field incl indicators and FT */ |
78 |
|
|
/* if lof is 0, next entry has addtl. lof for same field */ |
79 |
|
|
char scp[5]; /* [scp]; starting char pos rel to off */ |
80 |
|
|
/* char idp[idp]; impl.def. portion */ |
81 |
|
|
} Entry; |
82 |
|
|
/* entries followed by an FT */ |
83 |
|
|
|
84 |
|
|
typedef struct { |
85 |
|
|
Fld file; /* points into env->opt */ |
86 |
|
|
int rid; /* use control number as MFN */ |
87 |
|
|
int nomarc; /* do not assume MARC */ |
88 |
|
|
int write; /* write iif */ |
89 |
|
|
char ind[2]; |
90 |
|
|
char sub; |
91 |
|
|
} IifOpt; |
92 |
|
|
|
93 |
|
|
|
94 |
|
|
static int iifOpt (IifOpt *opt, Fld *args) |
95 |
|
|
{ |
96 |
|
|
Fld o; |
97 |
|
|
memset(opt, 0, sizeof(*opt)); |
98 |
|
|
for ( o.val = 0; vGet(&o, args, "FNPR"); ) |
99 |
|
|
switch (o.tag) { |
100 |
|
|
case 'F': |
101 |
|
|
opt->file = o; |
102 |
|
|
continue; |
103 |
|
|
case 'N': |
104 |
|
|
opt->nomarc = !0; |
105 |
|
|
continue; |
106 |
|
|
case 'P': |
107 |
|
|
opt->ind[0] = opt->ind[1] = ' '; |
108 |
|
|
opt->sub = '0'; |
109 |
|
|
switch (o.len) { |
110 |
|
|
default: opt->sub = o.val[2]; |
111 |
|
|
case 2: opt->ind[1] = o.val[1]; |
112 |
|
|
case 1: opt->ind[0] = o.val[0]; |
113 |
|
|
case 0: ; |
114 |
|
|
} |
115 |
|
|
continue; |
116 |
|
|
case 'R': |
117 |
|
|
opt->rid = !0; |
118 |
|
|
continue; |
119 |
|
|
} |
120 |
|
|
return 0; |
121 |
|
|
} /* iifOpt */ |
122 |
|
|
|
123 |
|
|
|
124 |
|
|
int iifuse () |
125 |
|
|
{ |
126 |
|
|
return eRr(ERR_INVAL, "[iifimp|iifexp]:\n" |
127 |
|
|
"-F<file>: path/filename of iif (default = malete db.iif)\n" |
128 |
|
|
#ifndef WIN32 |
129 |
|
|
"\t-F- for stdin/out\n" |
130 |
|
|
#endif |
131 |
|
|
"-Nomarc: do not assume MARC structure 22/450# (imp)\n" |
132 |
|
|
"-P[<ii><c>]: prepend indicators ii and, where needed, subfield c (exp)\n" |
133 |
|
|
"-Rid: use control number as rid (imp)\n" |
134 |
|
|
"use tools like yaz-marcdump for character set recoding\n" |
135 |
|
|
); |
136 |
|
|
} /* iifuse */ |
137 |
|
|
|
138 |
|
|
|
139 |
|
|
static int iifOpen (file *iif, IifOpt *opt, Db *db) |
140 |
|
|
{ |
141 |
|
|
int filmode = opt->write ? FIL_RDWR|FIL_CREAT|FIL_TRUNC : FIL_RD; |
142 |
|
|
int ret; |
143 |
|
|
char *path; |
144 |
|
|
if ( opt->file.len ) { |
145 |
|
|
#ifndef WIN32 |
146 |
|
|
if ( 1 == opt->file.len && '-' == *opt->file.val ) { |
147 |
|
|
*iif = opt->write ? 1 : 0; |
148 |
|
|
return 0; |
149 |
|
|
} |
150 |
|
|
#endif |
151 |
|
|
path = VDUPZ(&opt->file); |
152 |
|
|
} else { |
153 |
|
|
int l = strlen(db->pat); |
154 |
|
|
path = mAlloc(l+5); |
155 |
|
|
memcpy(path, db->pat, l); |
156 |
|
|
memcpy(path+l, ".iif", 5); |
157 |
|
|
} |
158 |
|
|
ret = fOpen(iif, path, filmode) ? |
159 |
|
|
eRr(LOG_ERROR, "no access to IIF file '%s'", path) : 0; |
160 |
|
|
mFree(path); |
161 |
|
|
return ret; |
162 |
|
|
} /* iifOpen */ |
163 |
|
|
|
164 |
|
|
|
165 |
|
|
/* |
166 |
|
|
tags may contain alpha; uppercase or lowercase ("but not both") |
167 |
|
|
UNIMARC and others specify plain numeric tags |
168 |
|
|
MARC specs first two to be digits; we map: |
169 |
|
|
3 digits to 0-999 (000 actually forbidden) |
170 |
|
|
2 digits: 3rd 'a' to 1000-1099, 'b' to 1100-1199 up to 3599 |
171 |
|
|
1 digit: 1st + 360*2ndbase36 + 10*3rdbase36 up to 12959 |
172 |
|
|
others are base 36 (>=12960 = 'a00') < 36^3 = 46656 |
173 |
|
|
*/ |
174 |
|
|
static int rdtag (char *t) |
175 |
|
|
{ |
176 |
|
|
int tag; |
177 |
|
|
switch ( a2il(t, 3, &tag) ) { |
178 |
|
|
case 3: return tag; |
179 |
|
|
case 2: return tag + 100*b36val[(unsigned char)t[2]]; |
180 |
|
|
case 1: return tag |
181 |
|
|
+ 360*b36val[(unsigned char)t[1]] |
182 |
|
|
+ 10*b36val[(unsigned char)t[2]]; |
183 |
|
|
} |
184 |
|
|
return 1296*b36val[(unsigned char)t[0]] |
185 |
|
|
+ 36*b36val[(unsigned char)t[1]] |
186 |
|
|
+ b36val[(unsigned char)t[2]]; |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
static OPT_INLINE void pr (char *t, unsigned i, unsigned l) |
190 |
|
|
{ |
191 |
|
|
for ( ; l--; i/=10) t[l] = b36dig[i%10]; |
192 |
|
|
} |
193 |
|
|
|
194 |
|
|
static void prtag (char *t, unsigned tag) |
195 |
|
|
{ |
196 |
|
|
if (1000 > tag) |
197 |
|
|
pr(t, tag, 3); |
198 |
|
|
else if (3600 > tag) { |
199 |
|
|
pr(t, tag%100, 2); |
200 |
|
|
t[2] = b36dig[tag/100]; |
201 |
|
|
} else if (12960 > tag) { |
202 |
|
|
t[0] = b36dig[tag%10]; tag /= 10; |
203 |
|
|
t[2] = b36dig[tag%36]; tag /= 36; |
204 |
|
|
t[1] = b36dig[tag]; |
205 |
|
|
} else { |
206 |
|
|
t[2] = b36dig[tag%36]; tag /= 36; |
207 |
|
|
t[1] = b36dig[tag%36]; tag /= 36; |
208 |
|
|
t[0] = b36dig[tag%36]; |
209 |
|
|
} |
210 |
|
|
} |
211 |
|
|
|
212 |
|
|
|
213 |
|
|
|
214 |
|
|
int iifimp (Db *db, Fld *args) |
215 |
|
|
{ |
216 |
|
|
char buf[99999]; |
217 |
|
|
IifOpt opt; |
218 |
|
|
file iif; |
219 |
|
|
Leader ldr; |
220 |
|
|
List rec; |
221 |
|
|
int ret = 0; |
222 |
|
|
|
223 |
|
|
if ( iifOpt(&opt, args) ) |
224 |
|
|
return iifuse(); |
225 |
|
|
if ( iifOpen(&iif, &opt, db) ) |
226 |
|
|
return eRr(ERR_INVAL, "bad IIF base"); |
227 |
|
|
#define CLEANUP( args ) do { ret = eRr args; goto cleanup; } while (0) |
228 |
|
|
lInit(&rec, ""); |
229 |
|
|
for (;;) { |
230 |
|
|
char head[8]; |
231 |
|
|
char *ent = buf, *base; |
232 |
|
|
int len, off, eln, dln, lof = 0, scp = 0; |
233 |
|
|
int got = fRead(&iif, &ldr, sizeof(ldr)); |
234 |
|
|
int rid = 0; |
235 |
|
|
if ( !got ) |
236 |
|
|
break; |
237 |
|
|
if ( (int)sizeof(ldr) != got ) |
238 |
|
|
CLEANUP((ERR_TRASH, "reading leader got %d", got)); |
239 |
|
|
LOG_DBG(LOG_TRACE, "leader '%.*s'", 24, &ldr); |
240 |
|
|
len = a2i(ldr.len, 5) - 24; |
241 |
|
|
off = a2i(ldr.off, 5); |
242 |
|
|
if ( opt.nomarc ) { |
243 |
|
|
ldr.ind -= '0'; |
244 |
|
|
ldr.idl -= '0'; |
245 |
|
|
ldr.lof -= '0'; |
246 |
|
|
ldr.scp -= '0'; |
247 |
|
|
ldr.idp -= '0'; |
248 |
|
|
} else { |
249 |
|
|
ldr.ind = 2; |
250 |
|
|
ldr.idl = 2; |
251 |
|
|
ldr.lof = 4; |
252 |
|
|
ldr.scp = 5; |
253 |
|
|
ldr.idp = 0; |
254 |
|
|
} |
255 |
|
|
eln = 3+ldr.lof+ldr.scp+ldr.idp; /* entry length */ |
256 |
|
|
dln = (off-25)/eln; /* dict len: off - leader - FT */ |
257 |
|
|
eRr(LOG_VERBOSE, |
258 |
|
|
"len %d sts %c typ %c ind %d idl %d off %d map 3+%d+%d+%d=%d %d fields", |
259 |
|
|
len, ldr.sts, ldr.typ, ldr.ind, ldr.idl, off, |
260 |
|
|
ldr.lof, ldr.scp, ldr.idp, eln, dln); |
261 |
|
|
if ( off != 25+eln*dln ) |
262 |
|
|
eRr(ERR_TRASH, "bad off %d != 25+%d*%d", off, eln, dln); |
263 |
|
|
got = fRead(&iif, buf, len); |
264 |
|
|
if ( len != got ) |
265 |
|
|
CLEANUP((ERR_TRASH, "reading body %d got %d", len, got)); |
266 |
|
|
base = buf+off-24; |
267 |
|
|
if ( FT != base[-1] ) |
268 |
|
|
eRr(ERR_TRASH, "no FT after dict"); |
269 |
|
|
if ( RT != buf[len-1] ) |
270 |
|
|
eRr(ERR_TRASH, "no RT"); |
271 |
|
|
memcpy(head, &ldr.sts, 5); /* sts,typ,id0 */ |
272 |
|
|
memcpy(head+5, ldr.id1, 3); |
273 |
|
|
lClr(&rec); |
274 |
|
|
LADD(&rec, -1, head, 8); |
275 |
|
|
for ( ; dln--; ent+=eln ) { |
276 |
|
|
int tag = rdtag(ent); |
277 |
|
|
char *v, *e, *p; |
278 |
|
|
if ( ldr.scp ) |
279 |
|
|
scp = a2i(ent+3+ldr.lof, ldr.scp); |
280 |
|
|
else |
281 |
|
|
scp += lof; |
282 |
|
|
v = base+scp; |
283 |
|
|
if ( !ldr.lof ) { |
284 |
|
|
if ( !(e = memchr(v, FT, len-off+24)) ) |
285 |
|
|
CLEANUP((ERR_TRASH, "no FT after field")); |
286 |
|
|
lof = e-v+1; |
287 |
|
|
} else { |
288 |
|
|
if ( !(lof = a2i(ent+3, ldr.lof)) ) { /* long field */ |
289 |
|
|
int max=1, dig=ldr.lof, add; |
290 |
|
|
while (dig--) max *= 10; |
291 |
|
|
lof = max; |
292 |
|
|
do { |
293 |
|
|
ent += eln; |
294 |
|
|
if ( !dln-- ) CLEANUP((ERR_TRASH, "eod on long field")); |
295 |
|
|
if ( tag != a2i(ent, 3) ) eRr(ERR_TRASH, "bad tag"); |
296 |
|
|
add = a2i(ent+3, ldr.lof); |
297 |
|
|
lof += add ? add : max; |
298 |
|
|
} while ( !add ); |
299 |
|
|
} |
300 |
|
|
if ( FT != *(e = v+lof-1) ) |
301 |
|
|
eRr(ERR_TRASH, "no FT after field"); |
302 |
|
|
} |
303 |
|
|
for ( p=v; p<e; p++ ) if ( DL == *p ) *p = TAB; |
304 |
|
|
/* eOut(tag, "%.*s", lof-1, v); */ |
305 |
|
|
LADD(&rec, tag, v, (unsigned)lof-1); |
306 |
|
|
} |
307 |
|
|
if ( opt.rid && 1<LLEN(&rec) && 1 == rec.fld[1].tag ) { |
308 |
|
|
int num; |
309 |
|
|
if ( (int)rec.fld[1].len == a2il(rec.fld[1].val, rec.fld[1].len, &num) ) |
310 |
|
|
rid = num; |
311 |
|
|
else |
312 |
|
|
eRr(LOG_WARN, "bad control number '%.*s'", |
313 |
|
|
rec.fld[1].len, rec.fld[1].val); |
314 |
|
|
} |
315 |
|
|
dWrite(db, rec.fld, rid); |
316 |
|
|
} |
317 |
|
|
cleanup: |
318 |
|
|
#undef CLEANUP |
319 |
|
|
fClose(&iif); |
320 |
|
|
return ret; |
321 |
|
|
} /* iifimp */ |
322 |
|
|
|
323 |
|
|
|
324 |
|
|
int iifexp (Db *db, Fld *args) |
325 |
|
|
{ |
326 |
|
|
char buf[99999]; |
327 |
|
|
IifOpt opt; |
328 |
|
|
file iif; |
329 |
|
|
Leader ldr; |
330 |
|
|
List rec; |
331 |
|
|
int i, ret = 0; |
332 |
|
|
|
333 |
|
|
if ( iifOpt(&opt, args) ) |
334 |
|
|
return iifuse(); |
335 |
|
|
opt.write = 1; |
336 |
|
|
if ( iifOpen(&iif, &opt, db) ) |
337 |
|
|
return eRr(ERR_INVAL, "opening IIF"); |
338 |
|
|
ldr.ind = ldr.idl = '2'; |
339 |
|
|
ldr.lof = '4'; |
340 |
|
|
ldr.scp = '5'; |
341 |
|
|
ldr.idp = '0'; |
342 |
|
|
ldr.res = ' '; |
343 |
|
|
lInit(&rec, ""); |
344 |
|
|
for (i=1; i<=db->rdx.mid; i++) { /* loop records */ |
345 |
|
|
char head[8], rid[12]; |
346 |
|
|
char *p; |
347 |
|
|
int l, n, o, ll, ridl; |
348 |
|
|
Entry *e; |
349 |
|
|
Fld *f; |
350 |
|
|
if ( 0 >= dRead(lClr(&rec), db, i) ) /* no rec */ |
351 |
|
|
continue; |
352 |
|
|
ll = LLEN(&rec)-1; |
353 |
|
|
/* fill head */ |
354 |
|
|
for (p = rec.fld->val, l = rec.fld->len; l-- && TAB != *p++; ) ; |
355 |
|
|
if ( 8 <= l ) |
356 |
|
|
memcpy(head, p, 8); |
357 |
|
|
else { |
358 |
|
|
memset(head, ' ', 8); |
359 |
|
|
if ( 0 < l ) |
360 |
|
|
memcpy(head, p, l); |
361 |
|
|
} |
362 |
|
|
memcpy(&ldr.sts, head, 5); |
363 |
|
|
memcpy(ldr.id1, head+5, 3); |
364 |
|
|
e = (Entry*)(buf + 24); /* after leader */ |
365 |
|
|
l = 26; /* leader + dict FT + RT */ |
366 |
|
|
o = 0; /* offset to base */ |
367 |
|
|
n = 0; /* # fields used */ |
368 |
|
|
/* rid -> control number */ |
369 |
|
|
if ( ll && 1 == rec.fld[1].tag ) |
370 |
|
|
ridl = 0; |
371 |
|
|
else { |
372 |
|
|
ridl = i2a(rid, i); |
373 |
|
|
prtag(e->tag, 1); |
374 |
|
|
pr(e->lof, o = ridl+1, 4); |
375 |
|
|
pr(e->scp, 0, 5); |
376 |
|
|
l += o + 12; |
377 |
|
|
e++; |
378 |
|
|
} |
379 |
|
|
for ( f = rec.fld+1; l < 99900 && n < ll; n++,e++,f++ ) { |
380 |
|
|
int fln = 1 /* FT */, vln = f->len; |
381 |
|
|
l += 12; /* Entry */ |
382 |
|
|
if ( f->tag > 9 ) { /* non control */ |
383 |
|
|
if ( opt.ind[0] ) fln += 2; |
384 |
|
|
if ( opt.sub && vln && TAB != *f->val ) fln += 2; |
385 |
|
|
} |
386 |
|
|
if ( vln > 9994 ) vln = 9994; /* max field len */ |
387 |
|
|
if ( vln > 99994-l ) vln = 99994-l; /* max rec len */ |
388 |
|
|
l += fln += vln; |
389 |
|
|
prtag(e->tag, f->tag); |
390 |
|
|
pr(e->lof, fln, 4); |
391 |
|
|
pr(e->scp, o, 5); |
392 |
|
|
o += fln; |
393 |
|
|
} |
394 |
|
|
p = (char*)e; |
395 |
|
|
*p++ = FT; |
396 |
|
|
pr(ldr.len, l, 5); |
397 |
|
|
pr(ldr.off, p - buf, 5); |
398 |
|
|
memcpy(buf, &ldr, 24); |
399 |
|
|
/* now add the field data */ |
400 |
|
|
if ( ridl ) { |
401 |
|
|
memcpy(p, rid, ridl); |
402 |
|
|
p += ridl; |
403 |
|
|
*p++ = FT; |
404 |
|
|
} |
405 |
|
|
for ( f = rec.fld+1; n--; f++ ) { |
406 |
|
|
int vln = f->len; |
407 |
|
|
char *v = f->val; |
408 |
|
|
if ( f->tag > 9 ) { /* non control */ |
409 |
|
|
if ( opt.ind[0] ) { |
410 |
|
|
*p++ = opt.ind[0]; |
411 |
|
|
*p++ = opt.ind[1]; |
412 |
|
|
} |
413 |
|
|
if ( opt.sub && vln && TAB != *f->val ) { |
414 |
|
|
*p++ = DL; |
415 |
|
|
*p++ = opt.sub; |
416 |
|
|
} |
417 |
|
|
} |
418 |
|
|
if ( vln > 9994 ) vln = 9994; |
419 |
|
|
if ( vln > 99994-(p-buf) ) vln = 99994-(p-buf); |
420 |
|
|
while ( vln-- ) if ( TAB == (*p++ = *v++) ) p[-1] = DL; |
421 |
|
|
*p++ = FT; |
422 |
|
|
} |
423 |
|
|
*p++ = RT; |
424 |
|
|
if ( l != p-buf ) { |
425 |
|
|
eRr(ERR_IDIOT, "miscalculated len %d got %d rec %d", l, p-buf, i); |
426 |
|
|
break; |
427 |
|
|
} |
428 |
|
|
fWrite(&iif, buf, l); |
429 |
|
|
} |
430 |
|
|
fClose(&iif); |
431 |
|
|
return ret; |
432 |
|
|
} /* iifexp */ |