/[webpac]/openisis/0.9.9e/tool/iif.c
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /openisis/0.9.9e/tool/iif.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 604 - (hide annotations)
Mon Dec 27 21:49:01 2004 UTC (19 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 11644 byte(s)
import of new openisis release, 0.9.9e

1 dpavlin 604 /*
2     The Malete project - the Z39.2/Z39.50 database framework of OpenIsis.
3     Version 0.9.x (patchlevel see file Version)
4     Copyright (C) 2001-2004 by Erik Grziwotz, erik@openisis.org
5    
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2.1 of the License, or (at your option) any later version.
10    
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14     See the GNU Lesser General Public License for more details.
15    
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19    
20     see README for more information
21     EOH */
22    
23     /*
24     $Id: iif.c,v 1.1 2004/04/13 09:50:38 kripke Exp $
25     malete IIF(ISO2709/Z39.2)/MARC conversion
26     less general than IIF, but less restrictive than MARC
27     */
28    
29     #include "../tool/tool.h"
30    
31     /*
32     implemented structure of IIF files:
33    
34     RT, FT and delimiter as defined by Z39.2-1979
35     other chars as used by WinIsis (esp. delimiter ^) are NOT supported
36    
37     the bad habit of WinIsis to insert \r\n every 80 chars is NOT supported
38    
39     Z39.2 specifies that
40     tag 1 is for a control number, every record must have exactly one
41     tag 2 is for subrecord purposes
42     tags 3-9,a-z are other control fields
43     control field tags 001-00z must be first and sorted in dict and data,
44     so base of data contains always the "control number" (not nec. a number)
45     control fields contain neither indicators nor subfields/delimiters, but FT
46    
47     MARC specifies that
48     tag 5 is date and time of latest transaction
49     there are 2 indicators in [ a-z0-9]
50     unused indicators should be blank; printed as # in specs
51     other unused but fixed coded data should use a '|'
52     */
53     enum {
54     RT = 29, /* ^] record terminator; printed \ in MARC specs */
55     FT = 30, /* ^^ field terminator; printed ^ or @ in MARC specs */
56     DL = 31 /* ^_ subfield delimiter; printed $ in MARC specs */
57     };
58    
59     typedef struct { /* 24 byte Leader; mostly ASCII digits */
60     char len[5]; /* record length incl. RT */
61     char sts; /* status (new, updated); "ASCII graphic" */
62     char typ; /* type of record; "ASCII graphic" */
63     char id0[3]; /* impl.def., used by some MARCs */
64     char ind; /* indicator count; 2 in MARC */
65     char idl; /* identifier length incl delimiter; 2 in MARC */
66     char off[5]; /* base address of data = 24 + dict size */
67     char id1[3]; /* impl.def., used by some MARCs */
68     /* entry map; lof+scp > 0 */
69     char lof; /* # bytes for length of field; 4 in MARC */
70     char scp; /* # bytes for starting character pos; 5 in MARC */
71     char idp; /* # bytes for impl.def. portion; 0 in MARC */
72     char res; /* reserved; 0 in MARC */
73     } Leader;
74    
75     typedef struct { /* dictionary entries after leader, MARC layout */
76     char tag[3]; /* IIF allows alpha; MARC demands digits */
77     char lof[4]; /* [lof] length of field incl indicators and FT */
78     /* if lof is 0, next entry has addtl. lof for same field */
79     char scp[5]; /* [scp]; starting char pos rel to off */
80     /* char idp[idp]; impl.def. portion */
81     } Entry;
82     /* entries followed by an FT */
83    
84     typedef struct {
85     Fld file; /* points into env->opt */
86     int rid; /* use control number as MFN */
87     int nomarc; /* do not assume MARC */
88     int write; /* write iif */
89     char ind[2];
90     char sub;
91     } IifOpt;
92    
93    
94     static int iifOpt (IifOpt *opt, Fld *args)
95     {
96     Fld o;
97     memset(opt, 0, sizeof(*opt));
98     for ( o.val = 0; vGet(&o, args, "FNPR"); )
99     switch (o.tag) {
100     case 'F':
101     opt->file = o;
102     continue;
103     case 'N':
104     opt->nomarc = !0;
105     continue;
106     case 'P':
107     opt->ind[0] = opt->ind[1] = ' ';
108     opt->sub = '0';
109     switch (o.len) {
110     default: opt->sub = o.val[2];
111     case 2: opt->ind[1] = o.val[1];
112     case 1: opt->ind[0] = o.val[0];
113     case 0: ;
114     }
115     continue;
116     case 'R':
117     opt->rid = !0;
118     continue;
119     }
120     return 0;
121     } /* iifOpt */
122    
123    
124     int iifuse ()
125     {
126     return eRr(ERR_INVAL, "[iifimp|iifexp]:\n"
127     "-F<file>: path/filename of iif (default = malete db.iif)\n"
128     #ifndef WIN32
129     "\t-F- for stdin/out\n"
130     #endif
131     "-Nomarc: do not assume MARC structure 22/450# (imp)\n"
132     "-P[<ii><c>]: prepend indicators ii and, where needed, subfield c (exp)\n"
133     "-Rid: use control number as rid (imp)\n"
134     "use tools like yaz-marcdump for character set recoding\n"
135     );
136     } /* iifuse */
137    
138    
139     static int iifOpen (file *iif, IifOpt *opt, Db *db)
140     {
141     int filmode = opt->write ? FIL_RDWR|FIL_CREAT|FIL_TRUNC : FIL_RD;
142     int ret;
143     char *path;
144     if ( opt->file.len ) {
145     #ifndef WIN32
146     if ( 1 == opt->file.len && '-' == *opt->file.val ) {
147     *iif = opt->write ? 1 : 0;
148     return 0;
149     }
150     #endif
151     path = VDUPZ(&opt->file);
152     } else {
153     int l = strlen(db->pat);
154     path = mAlloc(l+5);
155     memcpy(path, db->pat, l);
156     memcpy(path+l, ".iif", 5);
157     }
158     ret = fOpen(iif, path, filmode) ?
159     eRr(LOG_ERROR, "no access to IIF file '%s'", path) : 0;
160     mFree(path);
161     return ret;
162     } /* iifOpen */
163    
164    
165     /*
166     tags may contain alpha; uppercase or lowercase ("but not both")
167     UNIMARC and others specify plain numeric tags
168     MARC specs first two to be digits; we map:
169     3 digits to 0-999 (000 actually forbidden)
170     2 digits: 3rd 'a' to 1000-1099, 'b' to 1100-1199 up to 3599
171     1 digit: 1st + 360*2ndbase36 + 10*3rdbase36 up to 12959
172     others are base 36 (>=12960 = 'a00') < 36^3 = 46656
173     */
174     static int rdtag (char *t)
175     {
176     int tag;
177     switch ( a2il(t, 3, &tag) ) {
178     case 3: return tag;
179     case 2: return tag + 100*b36val[(unsigned char)t[2]];
180     case 1: return tag
181     + 360*b36val[(unsigned char)t[1]]
182     + 10*b36val[(unsigned char)t[2]];
183     }
184     return 1296*b36val[(unsigned char)t[0]]
185     + 36*b36val[(unsigned char)t[1]]
186     + b36val[(unsigned char)t[2]];
187     }
188    
189     static OPT_INLINE void pr (char *t, unsigned i, unsigned l)
190     {
191     for ( ; l--; i/=10) t[l] = b36dig[i%10];
192     }
193    
194     static void prtag (char *t, unsigned tag)
195     {
196     if (1000 > tag)
197     pr(t, tag, 3);
198     else if (3600 > tag) {
199     pr(t, tag%100, 2);
200     t[2] = b36dig[tag/100];
201     } else if (12960 > tag) {
202     t[0] = b36dig[tag%10]; tag /= 10;
203     t[2] = b36dig[tag%36]; tag /= 36;
204     t[1] = b36dig[tag];
205     } else {
206     t[2] = b36dig[tag%36]; tag /= 36;
207     t[1] = b36dig[tag%36]; tag /= 36;
208     t[0] = b36dig[tag%36];
209     }
210     }
211    
212    
213    
214     int iifimp (Db *db, Fld *args)
215     {
216     char buf[99999];
217     IifOpt opt;
218     file iif;
219     Leader ldr;
220     List rec;
221     int ret = 0;
222    
223     if ( iifOpt(&opt, args) )
224     return iifuse();
225     if ( iifOpen(&iif, &opt, db) )
226     return eRr(ERR_INVAL, "bad IIF base");
227     #define CLEANUP( args ) do { ret = eRr args; goto cleanup; } while (0)
228     lInit(&rec, "");
229     for (;;) {
230     char head[8];
231     char *ent = buf, *base;
232     int len, off, eln, dln, lof = 0, scp = 0;
233     int got = fRead(&iif, &ldr, sizeof(ldr));
234     int rid = 0;
235     if ( !got )
236     break;
237     if ( (int)sizeof(ldr) != got )
238     CLEANUP((ERR_TRASH, "reading leader got %d", got));
239     LOG_DBG(LOG_TRACE, "leader '%.*s'", 24, &ldr);
240     len = a2i(ldr.len, 5) - 24;
241     off = a2i(ldr.off, 5);
242     if ( opt.nomarc ) {
243     ldr.ind -= '0';
244     ldr.idl -= '0';
245     ldr.lof -= '0';
246     ldr.scp -= '0';
247     ldr.idp -= '0';
248     } else {
249     ldr.ind = 2;
250     ldr.idl = 2;
251     ldr.lof = 4;
252     ldr.scp = 5;
253     ldr.idp = 0;
254     }
255     eln = 3+ldr.lof+ldr.scp+ldr.idp; /* entry length */
256     dln = (off-25)/eln; /* dict len: off - leader - FT */
257     eRr(LOG_VERBOSE,
258     "len %d sts %c typ %c ind %d idl %d off %d map 3+%d+%d+%d=%d %d fields",
259     len, ldr.sts, ldr.typ, ldr.ind, ldr.idl, off,
260     ldr.lof, ldr.scp, ldr.idp, eln, dln);
261     if ( off != 25+eln*dln )
262     eRr(ERR_TRASH, "bad off %d != 25+%d*%d", off, eln, dln);
263     got = fRead(&iif, buf, len);
264     if ( len != got )
265     CLEANUP((ERR_TRASH, "reading body %d got %d", len, got));
266     base = buf+off-24;
267     if ( FT != base[-1] )
268     eRr(ERR_TRASH, "no FT after dict");
269     if ( RT != buf[len-1] )
270     eRr(ERR_TRASH, "no RT");
271     memcpy(head, &ldr.sts, 5); /* sts,typ,id0 */
272     memcpy(head+5, ldr.id1, 3);
273     lClr(&rec);
274     LADD(&rec, -1, head, 8);
275     for ( ; dln--; ent+=eln ) {
276     int tag = rdtag(ent);
277     char *v, *e, *p;
278     if ( ldr.scp )
279     scp = a2i(ent+3+ldr.lof, ldr.scp);
280     else
281     scp += lof;
282     v = base+scp;
283     if ( !ldr.lof ) {
284     if ( !(e = memchr(v, FT, len-off+24)) )
285     CLEANUP((ERR_TRASH, "no FT after field"));
286     lof = e-v+1;
287     } else {
288     if ( !(lof = a2i(ent+3, ldr.lof)) ) { /* long field */
289     int max=1, dig=ldr.lof, add;
290     while (dig--) max *= 10;
291     lof = max;
292     do {
293     ent += eln;
294     if ( !dln-- ) CLEANUP((ERR_TRASH, "eod on long field"));
295     if ( tag != a2i(ent, 3) ) eRr(ERR_TRASH, "bad tag");
296     add = a2i(ent+3, ldr.lof);
297     lof += add ? add : max;
298     } while ( !add );
299     }
300     if ( FT != *(e = v+lof-1) )
301     eRr(ERR_TRASH, "no FT after field");
302     }
303     for ( p=v; p<e; p++ ) if ( DL == *p ) *p = TAB;
304     /* eOut(tag, "%.*s", lof-1, v); */
305     LADD(&rec, tag, v, (unsigned)lof-1);
306     }
307     if ( opt.rid && 1<LLEN(&rec) && 1 == rec.fld[1].tag ) {
308     int num;
309     if ( (int)rec.fld[1].len == a2il(rec.fld[1].val, rec.fld[1].len, &num) )
310     rid = num;
311     else
312     eRr(LOG_WARN, "bad control number '%.*s'",
313     rec.fld[1].len, rec.fld[1].val);
314     }
315     dWrite(db, rec.fld, rid);
316     }
317     cleanup:
318     #undef CLEANUP
319     fClose(&iif);
320     return ret;
321     } /* iifimp */
322    
323    
324     int iifexp (Db *db, Fld *args)
325     {
326     char buf[99999];
327     IifOpt opt;
328     file iif;
329     Leader ldr;
330     List rec;
331     int i, ret = 0;
332    
333     if ( iifOpt(&opt, args) )
334     return iifuse();
335     opt.write = 1;
336     if ( iifOpen(&iif, &opt, db) )
337     return eRr(ERR_INVAL, "opening IIF");
338     ldr.ind = ldr.idl = '2';
339     ldr.lof = '4';
340     ldr.scp = '5';
341     ldr.idp = '0';
342     ldr.res = ' ';
343     lInit(&rec, "");
344     for (i=1; i<=db->rdx.mid; i++) { /* loop records */
345     char head[8], rid[12];
346     char *p;
347     int l, n, o, ll, ridl;
348     Entry *e;
349     Fld *f;
350     if ( 0 >= dRead(lClr(&rec), db, i) ) /* no rec */
351     continue;
352     ll = LLEN(&rec)-1;
353     /* fill head */
354     for (p = rec.fld->val, l = rec.fld->len; l-- && TAB != *p++; ) ;
355     if ( 8 <= l )
356     memcpy(head, p, 8);
357     else {
358     memset(head, ' ', 8);
359     if ( 0 < l )
360     memcpy(head, p, l);
361     }
362     memcpy(&ldr.sts, head, 5);
363     memcpy(ldr.id1, head+5, 3);
364     e = (Entry*)(buf + 24); /* after leader */
365     l = 26; /* leader + dict FT + RT */
366     o = 0; /* offset to base */
367     n = 0; /* # fields used */
368     /* rid -> control number */
369     if ( ll && 1 == rec.fld[1].tag )
370     ridl = 0;
371     else {
372     ridl = i2a(rid, i);
373     prtag(e->tag, 1);
374     pr(e->lof, o = ridl+1, 4);
375     pr(e->scp, 0, 5);
376     l += o + 12;
377     e++;
378     }
379     for ( f = rec.fld+1; l < 99900 && n < ll; n++,e++,f++ ) {
380     int fln = 1 /* FT */, vln = f->len;
381     l += 12; /* Entry */
382     if ( f->tag > 9 ) { /* non control */
383     if ( opt.ind[0] ) fln += 2;
384     if ( opt.sub && vln && TAB != *f->val ) fln += 2;
385     }
386     if ( vln > 9994 ) vln = 9994; /* max field len */
387     if ( vln > 99994-l ) vln = 99994-l; /* max rec len */
388     l += fln += vln;
389     prtag(e->tag, f->tag);
390     pr(e->lof, fln, 4);
391     pr(e->scp, o, 5);
392     o += fln;
393     }
394     p = (char*)e;
395     *p++ = FT;
396     pr(ldr.len, l, 5);
397     pr(ldr.off, p - buf, 5);
398     memcpy(buf, &ldr, 24);
399     /* now add the field data */
400     if ( ridl ) {
401     memcpy(p, rid, ridl);
402     p += ridl;
403     *p++ = FT;
404     }
405     for ( f = rec.fld+1; n--; f++ ) {
406     int vln = f->len;
407     char *v = f->val;
408     if ( f->tag > 9 ) { /* non control */
409     if ( opt.ind[0] ) {
410     *p++ = opt.ind[0];
411     *p++ = opt.ind[1];
412     }
413     if ( opt.sub && vln && TAB != *f->val ) {
414     *p++ = DL;
415     *p++ = opt.sub;
416     }
417     }
418     if ( vln > 9994 ) vln = 9994;
419     if ( vln > 99994-(p-buf) ) vln = 99994-(p-buf);
420     while ( vln-- ) if ( TAB == (*p++ = *v++) ) p[-1] = DL;
421     *p++ = FT;
422     }
423     *p++ = RT;
424     if ( l != p-buf ) {
425     eRr(ERR_IDIOT, "miscalculated len %d got %d rec %d", l, p-buf, i);
426     break;
427     }
428     fWrite(&iif, buf, l);
429     }
430     fClose(&iif);
431     return ret;
432     } /* iifexp */

  ViewVC Help
Powered by ViewVC 1.1.26