/[webpac]/trunk/tools/isis2marc.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/tools/isis2marc.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 230 - (hide annotations)
Wed Feb 25 10:49:32 2004 UTC (17 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 6001 byte(s)
Lot of changes and improvements:
- documentation at beginning of script
- use XML file for configuration (as opposed to hard-coded values in script)
- example XML configuration can be created with ./isis2marc.pl > config.xml
- supports unique and non-unique output depending on presence of <key>
  tag in configuration file

1 dpavlin 228 #!/usr/bin/perl -w
2    
3 dpavlin 230 # This utility will convert some (or all, depending of definition in
4     # configuration XMLfile) fields and subfields with remapping into MARC
5     # file from one or more CDS/ISIS files
6 dpavlin 228 #
7     # 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org>
8     #
9 dpavlin 230 #
10     # Run without parametars for usage instructions or run without parametars
11     # and redirect STDOUT to file to create example configuration file like
12     # this:
13     #
14     # ./isis2marc.pl > config.xml
15     #
16     # If you want to create unique records, you need to define one or more
17     # fields as key (which will be used to produce just one record for one
18     # key)
19     #
20     # Keys are global for one run of script (that means for all ISIS databases
21     # used in one run), but you can write arbitrary values (as opposed to field
22     # names) inside key tag to produce unique key. For example,
23     #
24     # <key>author</key>
25     # <key>700$a</key>
26     #
27     # WARNING: When using <key> tag you can enter field with subfield
28     # (in format 700$a) just filed name (for fields which doesn't have subfileds
29     # like 005) or literal value. Fields which doesn't exist in that record
30     # will be skipped, and if key is empty no output record will be produced.
31     #
32     # So, best way to produce just few record in output is to specify field which
33     # doesn't exist at all in ISIS database for key, or just one literal value!!
34     #
35     #
36     # If ISIS databases are named same as directories in which they
37 dpavlin 228 # reside, you can specify just directories (so that shell globing work)
38     # like this:
39     #
40 dpavlin 230 # ./isis2marc.pl config.xml all.marc /mnt2/*/LIBRI
41     #
42 dpavlin 228
43     use strict;
44     use OpenIsis;
45     use MARC;
46 dpavlin 230 use XML::Simple;
47 dpavlin 228 use Data::Dumper;
48    
49 dpavlin 230 if ($#ARGV < 2) {
50     print STDERR "Usage: $0 config.xml marc_file.iso isis_db [isis_db ...|isis_dir]\n";
51     print STDERR <<'_END_OF_USAGE_';
52 dpavlin 228
53 dpavlin 230 isis_db can be path to directory (if ISIS database is called
54     same as database) which will make shell globing work
55     or full path to ISIS database (without any extension)
56 dpavlin 228
57 dpavlin 230 Example configuration file will be dumped to standard output
58     after this, so you can just re-direct output of this script
59     to produce config file like this:
60    
61     $ ./isis2marc.pl > config.xml
62    
63     _END_OF_USAGE_
64    
65     print <<'_END_OF_CONFIG_';
66    
67     <?xml version="1.0" encoding="ISO-8859-2"?>
68     <!-- template configuration file -->
69     <mapping>
70     <record>
71     <key>700$a</key>
72     <key>700$b</key>
73     <field tag="700">
74     <indicator1>0</indicator1>
75     <indicator2>#</indicator2>
76     <subfield id="a">700$a</subfield>
77     <subfield id="b">700$b</subfield>
78     </field>
79     <field tag="009">
80     <nosubfield>900</nosubfield>
81     </field>
82     </record>
83    
84     </mapping>
85    
86     _END_OF_CONFIG_
87    
88     exit 1;
89     }
90    
91     my $xml = new XML::Simple();
92    
93     my $config_file = shift @ARGV || die "no config file?";
94    
95     my $config = $xml->XMLin($config_file,
96     KeyAttr => { subfield => 'id' },
97     ForceArray => [ 'record', 'field', 'subfield', 'nosubfield' ],
98     ContentKey => '-content',
99     ) || die "can't open configuration file '$config_file': $!";
100    
101     my $marc_file = shift @ARGV || die "no marc file?";
102    
103 dpavlin 228 my $marc=MARC->new;
104    
105     # it seems that I can't specify invalid template for 005 and prevent
106     # output from creating field 005
107     #$num->add_005s({record=>1});
108    
109     select(STDOUT); $|=1;
110    
111     my %stored;
112     my $total = 0;
113    
114 dpavlin 230
115 dpavlin 228 foreach my $db_file (@ARGV) {
116    
117     print "reading '$db_file'";
118    
119     if (-d $db_file) {
120     $db_file =~ s,([^/]+)/*$,$1/$1,;
121     }
122    
123     my $db = OpenIsis::open( $db_file );
124     my $maxmfn = OpenIsis::maxRowid( $db ) || 1;
125    
126     print " [rows: $maxmfn]\n";
127    
128     my $progress_len = 50;
129    
130     my $step = int($maxmfn/$progress_len);
131     $step = 1 if ($step == 0);
132    
133     my $new = 0;
134    
135     for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
136     print "." if ($mfn % $step == 0);
137     my $row = OpenIsis::read( $db, $mfn );
138 dpavlin 230
139     # unroll this field to in-memory structure data
140     my %data;
141    
142     # delete mfn from $row because it's literal value and
143     # not array, so rest of code would croak
144     delete($row->{mfn});
145    
146 dpavlin 228 foreach my $fld (keys %{$row}) {
147    
148 dpavlin 230 foreach my $rec_data (@{$row->{$fld}}) {
149    
150     while ($rec_data =~ s/\^(\w)([^\^]+)//) {
151     $data{$fld.'$'.$1} = $2;
152    
153     # delete last subfield delimiter
154     $rec_data = "" if ($rec_data =~ /(\^\w*$|\^\w\s*$)/);
155     }
156    
157     # record data still exist? it's field without
158     # subfields, then...
159     if ($rec_data) {
160     $data{$fld} = $rec_data;
161     }
162     }
163     }
164    
165     # now, create output MARC record(s)
166    
167     foreach my $cfg_rec (@{$config->{record}}) {
168    
169     # do we have unique key?
170     my $key;
171     foreach (@{$cfg_rec->{key}}) {
172     if ($data{$_}) {
173     $key .= $data{$_};
174     } elsif (! m/^\d{3,4}(\$\w)*$/) {
175     $key .= $_;
176     } else {
177     $key .= "";
178     }
179     }
180    
181     next if ($key && $stored{$key} || $key eq "");
182    
183     $stored{$key}++ if ($key);
184    
185    
186     # this will be new record (if needed)
187 dpavlin 228 my $num;
188    
189 dpavlin 230 # with one or more fields
190     foreach my $cfg_fld (@{$cfg_rec->{field}}) {
191 dpavlin 228
192 dpavlin 230 my $new_fld = $cfg_fld->{tag};
193 dpavlin 228
194 dpavlin 230 #
195     # first create fields without subfields
196     #
197 dpavlin 228
198 dpavlin 230 # with one or more subfields
199     foreach my $f (@{$cfg_fld->{nosubfield}}) {
200     next if (! $data{$f});
201 dpavlin 228
202 dpavlin 230 if (! $num) {
203     $num=$marc->createrecord();
204     $new++;
205     }
206     my $i1 = $cfg_fld->{indicator1} || ' ';
207     my $i2 = $cfg_fld->{indicator2} || ' ';
208     $marc->addfield({record=>$num,
209     field=>$new_fld,
210     i1=>$i1,
211     i2=>$i2,
212     value=>$data{$f}
213     });
214 dpavlin 228 }
215 dpavlin 230
216     #
217     # then create fields with subfields
218     #
219    
220     # this will hold subfield values
221     my @values;
222    
223     # with one or more subfields
224     foreach my $new_sf (keys %{$cfg_fld->{subfield}}) {
225     # field$subfield
226     my $f = $cfg_fld->{subfield}->{$new_sf};
227     if ($data{$f}) {
228     push @values, $new_sf;
229     push @values, $data{$f};
230     }
231 dpavlin 228 }
232     next if (! @values);
233    
234     if (! $num) {
235     $num=$marc->createrecord();
236     $new++;
237     }
238 dpavlin 230 my $i1 = $cfg_fld->{indicator1} || ' ';
239     my $i2 = $cfg_fld->{indicator2} || ' ';
240 dpavlin 228 $marc->addfield({record=>$num,
241 dpavlin 230 field=>$new_fld,
242     i1=>$i1,
243     i2=>$i2,
244     value=>\@values}
245     );
246 dpavlin 228 }
247    
248     }
249     }
250     $total += $new;
251     printf "\t%d (%0.2f%%) t: %d\n",$new,($new*100/$maxmfn),$total;
252     }
253    
254 dpavlin 229 $marc->output({file=>"> $marc_file",'format'=>"usmarc"})

Properties

Name Value
cvs2svn:cvs-rev 1.3
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26