/[webpac]/trunk/tools/isis2marc.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/tools/isis2marc.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 230 - (show annotations)
Wed Feb 25 10:49:32 2004 UTC (15 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 6001 byte(s)
Lot of changes and improvements:
- documentation at beginning of script
- use XML file for configuration (as opposed to hard-coded values in script)
- example XML configuration can be created with ./isis2marc.pl > config.xml
- supports unique and non-unique output depending on presence of <key>
  tag in configuration file

1 #!/usr/bin/perl -w
2
3 # This utility will convert some (or all, depending of definition in
4 # configuration XMLfile) fields and subfields with remapping into MARC
5 # file from one or more CDS/ISIS files
6 #
7 # 2004-02-23 Dobrica Pavlinusic <dpavlin@rot13.org>
8 #
9 #
10 # Run without parametars for usage instructions or run without parametars
11 # and redirect STDOUT to file to create example configuration file like
12 # this:
13 #
14 # ./isis2marc.pl > config.xml
15 #
16 # If you want to create unique records, you need to define one or more
17 # fields as key (which will be used to produce just one record for one
18 # key)
19 #
20 # Keys are global for one run of script (that means for all ISIS databases
21 # used in one run), but you can write arbitrary values (as opposed to field
22 # names) inside key tag to produce unique key. For example,
23 #
24 # <key>author</key>
25 # <key>700$a</key>
26 #
27 # WARNING: When using <key> tag you can enter field with subfield
28 # (in format 700$a) just filed name (for fields which doesn't have subfileds
29 # like 005) or literal value. Fields which doesn't exist in that record
30 # will be skipped, and if key is empty no output record will be produced.
31 #
32 # So, best way to produce just few record in output is to specify field which
33 # doesn't exist at all in ISIS database for key, or just one literal value!!
34 #
35 #
36 # If ISIS databases are named same as directories in which they
37 # reside, you can specify just directories (so that shell globing work)
38 # like this:
39 #
40 # ./isis2marc.pl config.xml all.marc /mnt2/*/LIBRI
41 #
42
43 use strict;
44 use OpenIsis;
45 use MARC;
46 use XML::Simple;
47 use Data::Dumper;
48
49 if ($#ARGV < 2) {
50 print STDERR "Usage: $0 config.xml marc_file.iso isis_db [isis_db ...|isis_dir]\n";
51 print STDERR <<'_END_OF_USAGE_';
52
53 isis_db can be path to directory (if ISIS database is called
54 same as database) which will make shell globing work
55 or full path to ISIS database (without any extension)
56
57 Example configuration file will be dumped to standard output
58 after this, so you can just re-direct output of this script
59 to produce config file like this:
60
61 $ ./isis2marc.pl > config.xml
62
63 _END_OF_USAGE_
64
65 print <<'_END_OF_CONFIG_';
66
67 <?xml version="1.0" encoding="ISO-8859-2"?>
68 <!-- template configuration file -->
69 <mapping>
70 <record>
71 <key>700$a</key>
72 <key>700$b</key>
73 <field tag="700">
74 <indicator1>0</indicator1>
75 <indicator2>#</indicator2>
76 <subfield id="a">700$a</subfield>
77 <subfield id="b">700$b</subfield>
78 </field>
79 <field tag="009">
80 <nosubfield>900</nosubfield>
81 </field>
82 </record>
83
84 </mapping>
85
86 _END_OF_CONFIG_
87
88 exit 1;
89 }
90
91 my $xml = new XML::Simple();
92
93 my $config_file = shift @ARGV || die "no config file?";
94
95 my $config = $xml->XMLin($config_file,
96 KeyAttr => { subfield => 'id' },
97 ForceArray => [ 'record', 'field', 'subfield', 'nosubfield' ],
98 ContentKey => '-content',
99 ) || die "can't open configuration file '$config_file': $!";
100
101 my $marc_file = shift @ARGV || die "no marc file?";
102
103 my $marc=MARC->new;
104
105 # it seems that I can't specify invalid template for 005 and prevent
106 # output from creating field 005
107 #$num->add_005s({record=>1});
108
109 select(STDOUT); $|=1;
110
111 my %stored;
112 my $total = 0;
113
114
115 foreach my $db_file (@ARGV) {
116
117 print "reading '$db_file'";
118
119 if (-d $db_file) {
120 $db_file =~ s,([^/]+)/*$,$1/$1,;
121 }
122
123 my $db = OpenIsis::open( $db_file );
124 my $maxmfn = OpenIsis::maxRowid( $db ) || 1;
125
126 print " [rows: $maxmfn]\n";
127
128 my $progress_len = 50;
129
130 my $step = int($maxmfn/$progress_len);
131 $step = 1 if ($step == 0);
132
133 my $new = 0;
134
135 for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
136 print "." if ($mfn % $step == 0);
137 my $row = OpenIsis::read( $db, $mfn );
138
139 # unroll this field to in-memory structure data
140 my %data;
141
142 # delete mfn from $row because it's literal value and
143 # not array, so rest of code would croak
144 delete($row->{mfn});
145
146 foreach my $fld (keys %{$row}) {
147
148 foreach my $rec_data (@{$row->{$fld}}) {
149
150 while ($rec_data =~ s/\^(\w)([^\^]+)//) {
151 $data{$fld.'$'.$1} = $2;
152
153 # delete last subfield delimiter
154 $rec_data = "" if ($rec_data =~ /(\^\w*$|\^\w\s*$)/);
155 }
156
157 # record data still exist? it's field without
158 # subfields, then...
159 if ($rec_data) {
160 $data{$fld} = $rec_data;
161 }
162 }
163 }
164
165 # now, create output MARC record(s)
166
167 foreach my $cfg_rec (@{$config->{record}}) {
168
169 # do we have unique key?
170 my $key;
171 foreach (@{$cfg_rec->{key}}) {
172 if ($data{$_}) {
173 $key .= $data{$_};
174 } elsif (! m/^\d{3,4}(\$\w)*$/) {
175 $key .= $_;
176 } else {
177 $key .= "";
178 }
179 }
180
181 next if ($key && $stored{$key} || $key eq "");
182
183 $stored{$key}++ if ($key);
184
185
186 # this will be new record (if needed)
187 my $num;
188
189 # with one or more fields
190 foreach my $cfg_fld (@{$cfg_rec->{field}}) {
191
192 my $new_fld = $cfg_fld->{tag};
193
194 #
195 # first create fields without subfields
196 #
197
198 # with one or more subfields
199 foreach my $f (@{$cfg_fld->{nosubfield}}) {
200 next if (! $data{$f});
201
202 if (! $num) {
203 $num=$marc->createrecord();
204 $new++;
205 }
206 my $i1 = $cfg_fld->{indicator1} || ' ';
207 my $i2 = $cfg_fld->{indicator2} || ' ';
208 $marc->addfield({record=>$num,
209 field=>$new_fld,
210 i1=>$i1,
211 i2=>$i2,
212 value=>$data{$f}
213 });
214 }
215
216 #
217 # then create fields with subfields
218 #
219
220 # this will hold subfield values
221 my @values;
222
223 # with one or more subfields
224 foreach my $new_sf (keys %{$cfg_fld->{subfield}}) {
225 # field$subfield
226 my $f = $cfg_fld->{subfield}->{$new_sf};
227 if ($data{$f}) {
228 push @values, $new_sf;
229 push @values, $data{$f};
230 }
231 }
232 next if (! @values);
233
234 if (! $num) {
235 $num=$marc->createrecord();
236 $new++;
237 }
238 my $i1 = $cfg_fld->{indicator1} || ' ';
239 my $i2 = $cfg_fld->{indicator2} || ' ';
240 $marc->addfield({record=>$num,
241 field=>$new_fld,
242 i1=>$i1,
243 i2=>$i2,
244 value=>\@values}
245 );
246 }
247
248 }
249 }
250 $total += $new;
251 printf "\t%d (%0.2f%%) t: %d\n",$new,($new*100/$maxmfn),$total;
252 }
253
254 $marc->output({file=>"> $marc_file",'format'=>"usmarc"})

Properties

Name Value
cvs2svn:cvs-rev 1.3
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26