/[webpac]/trunk/tools/mods2unimarc.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/tools/mods2unimarc.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 325 - (show annotations)
Fri May 14 16:38:22 2004 UTC (15 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 9742 byte(s)
first try at converting MODS XML records back to MARC

1 #!/usr/bin/perl -w
2
3 =head1 NAME
4
5 mods2marc.pl - convert MODS XML back to MARC (ISO2709)
6
7 =head1 SYNOPSIS
8
9 mods2marc.pl mods.xml export.marc
10
11 =head1 DESCRIPTION
12
13 This script will convert MODS format
14 L<http://www.loc.gov/standards/mods/>
15 back to MARC (ISO2709) format.
16
17 Since conversion back to MARC is not simple, lot of things are hard-coded
18 in this script.
19
20 This script B<is somewhat specific> to MODS export from
21 Faculty of Electrical Engineering and Computing
22 so you might want to edit it
23
24 =head1 WARNING
25
26 This script is in state of flux.
27
28 =cut
29
30 use strict;
31 use XML::Twig;
32 use XML::Simple;
33 use MARC;
34 use Text::Iconv;
35
36 use Data::Dumper;
37
38 my $xml_file = "/data/tehnika/fer/all.xml";
39 $xml_file = "/data/tehnika/fer/modsFER_1.xml";
40 my $marc_file = "fer.marc";
41
42 $|=1;
43 my $nr = 0;
44
45 my $marc = MARC->new;
46
47 my $twig=XML::Twig->new(
48 twig_roots => { 'mods' => \&item },
49 output_encoding => 'iso-8859-2',
50 );
51
52 my $utf2iso = Text::Iconv->new("UTF-8", "ISO-8859-2");
53
54 $twig->parsefile($xml_file);
55 $twig->purge;
56
57 $marc->output({file=>"> $marc_file",'format'=>"usmarc"});
58
59 sub item {
60 my( $t, $elt)= @_;
61
62 my $xml=$elt->xml_string;
63 my $ref = XMLin("<xml>".$xml."</xml>",
64 ForceArray => [
65 'name',
66 'classification',
67 'topic',
68 'udc',
69 ],
70 KeyAttr => {
71 'namePart' => 'type',
72 'identifier' => 'type',
73 'classification' => 'authority',
74 'namePart' => 'type',
75 'role' => 'type',
76 },
77 GroupTags => {
78 'place' => 'placeTerm',
79 'physicalDescription' => 'extent',
80 'roleTerm' => 'content',
81 },
82 ContentKey => '-content',
83 );
84
85 my $m=$marc->createrecord();
86
87 sub marc_arr {
88 my $m = shift || die "no marc record?";
89 my $fld = shift || die "no marc field?";
90 my $sf = shift || '';
91
92 return if (! @_);
93
94 my @a;
95 foreach (@_) {
96 next if (! $_);
97 push @a,$sf;
98 # push @a,$utf2iso->convert($_) || $_;
99 push @a,$_;
100 }
101
102 return if (! @a);
103
104 # print "storing $fld: ",join("|",@a),"\n";
105
106 $marc->addfield({record=>$m,
107 field=>$fld,
108 # i1=>$i1,
109 # i2=>$i2,
110 value=>\@a});
111 }
112
113 marc_arr($m,'610','a',@{$ref->{subject}->{topic}});
114
115 my $fld = '700';
116
117 foreach my $name (@{$ref->{name}}) {
118 my $role = $name->{role}->{roleTerm}->{content};
119 next if (! $role);
120 if ($role eq "author") {
121 marc_arr($m,$fld,'a',$name->{namePart}->{family});
122 marc_arr($m,$fld,'b',$name->{namePart}->{given});
123 marc_arr($m,$fld,'4',$role);
124
125 # first author goes in 700, others in 701
126 $fld = '701';
127 } elsif ($role eq "editor" or $role eq "illustrator") {
128 marc_arr($m,'702','a',$name->{namePart}->{family});
129 marc_arr($m,'702','b',$name->{namePart}->{given});
130 marc_arr($m,'702','4',$role);
131 } else {
132 die "FATAL: don't know how to map role '$role'" if ($role);
133 }
134 }
135
136 my $note = $ref->{note};
137
138 if ($note) {
139 foreach my $n (split(/\s*;\s+/, $note)) {
140 if ($n =~ s/bibliogr:\s+//i) {
141 marc_arr($m,'320','a',"Bibliografija: $n");
142 } elsif ($n =~ s/ilustr:\s+//i) {
143 marc_arr($m,'215','c', $n);
144 } else {
145 marc_arr($m,'320','a',$n);
146 }
147 }
148 }
149
150
151 my $type = $ref->{identifier}->{type};
152
153 if ($type) {
154 if ($type eq "isbn") {
155 marc_arr($m,'010','a',$ref->{identifier}->{content});
156 } elsif ($type eq "issn") {
157 marc_arr($m,'011','a',$ref->{identifier}->{content});
158 } else {
159 die "unknown identifier type $type";
160 }
161 }
162
163 my $phy_desc = $ref->{physicalDescription};
164 if ($phy_desc) {
165 my $tmp;
166 foreach my $t (split(/\s*;\s+/, $phy_desc)) {
167 if ($t =~ m/([^:]+):\s+(.+)$/) {
168 $tmp->{$1} = $2;
169 } else {
170 die "can't parse $t";
171 }
172 }
173 my $data = $tmp->{pagin};
174 $data .= ", " if ($data);
175 if ($tmp->{str}) {
176 $data .= $tmp->{str}." str";
177 }
178 marc_arr($m,'210','a', $data) if ($data);
179 marc_arr($m,'210','d', $tmp->{visina});
180 }
181
182 marc_arr($m,'001','',$ref->{recordInfo}->{recordIdentifier});
183
184 marc_arr($m,'200','a',$ref->{titleInfo}->{title});
185 marc_arr($m,'200','e',$ref->{titleInfo}->{subTitle});
186
187 marc_arr($m,'675','a',$ref->{classification}->{udc});
188
189 my $related = $ref->{relatedItem}->{type};
190 if ($related) {
191 if ($related eq "series") {
192 marc_arr($m,'675','a',$ref->{relatedItem}->{titleInfo}->{title});
193 marc_arr($m,'999','a',$ref->{relatedItem}->{titleInfo}->{partNumber});
194 } elsif ($related eq "preceding") {
195 marc_arr($m,'430','a',$ref->{relatedItem}->{titleInfo}->{title});
196 } else {
197 die "can't parse related item type $related" if ($related);
198 }
199 }
200
201 marc_arr($m,'205','a',$ref->{originInfo}->{edition});
202
203 my $publisher = $ref->{originInfo}->{publisher};
204 if ($publisher =~ m,^(.+?)\s*/\s*(.+)$,) {
205 marc_arr($m,'210','a', $2);
206 marc_arr($m,'210','c', $1);
207 } else {
208 marc_arr($m,'210','c', $publisher);
209 }
210
211 marc_arr($m,'326','a',$ref->{originInfo}->{frequency});
212 marc_arr($m,'326','a',$ref->{originInfo}->{place});
213
214 marc_arr($m,'210','d',$ref->{originInfo}->{dateIssued});
215
216 $nr++;
217 print "$nr " if ($nr % 100 == 0);
218
219 $t->purge; # frees the memory
220 }
221
222 __END__
223
224 KNJIGA = {
225 610a 'subject' => [
226 {
227 'topic' => [
228 'LIBRARIES-AUTOMATION',
229 'ELECTRONIC DATA PROCESSING-LIBRARY SCIENCE'
230 ]
231 }
232 ],
233 'name' => [
234 {
235 'namePart' => {
236 700b,701a... 'given' => 'Robert M.',
237 700a,701b... 'family' => 'Hayes'
238 },
239 'type' => 'personal',
240 'role' => {
241 'roleTerm' => {
242 7004,7014... 'content' => 'author',
243 'type' => 'text'
244 }
245 }
246 },
247 {
248 'namePart' => {
249 702b 'given' => 'Joseph',
250 702a 'family' => 'Becker'
251 },
252 'type' => 'personal',
253 'role' => {
254 'roleTerm' => {
255 7024 'content' => 'editor',
256 'type' => 'text'
257 }
258 }
259 },
260 {
261 'namePart' => {
262 702b 'given' => 'Joseph',
263 702a 'family' => 'Becker'
264 },
265 'type' => 'personal',
266 'role' => {
267 'roleTerm' => {
268 7024 'content' => 'illustrator',
269 'type' => 'text'
270 }
271 }
272 }
273 ],
274 'note' => 'bibliogr: 645-647; kazalo; ilustr: ilustr.',
275 'identifier' => {
276 010a 'content' => '0-471-36483-5',
277 'type' => 'isbn'
278 },
279 215a;215d 'physicalDescription' => 'str: 688; pagin: xvi; visina: 24. cm',
280 001 'recordInfo' => {
281 'recordIdentifier' => 'k-7996-8073'
282 },
283 200a 'titleInfo' => {
284 'title' => 'Handbook of data processing for libraries'
285 },
286 'typeOfResource' => 'text',
287 675a 'classification' => {
288 'udc' => '=20'
289 },
290 225a 'relatedItem' => {
291 'titleInfo' => {
292 'title' => 'A WILEY-BECKER & HAYES SERIES BOOK'
293 },
294 'type' => 'series'
295 },
296 'originInfo' => {
297 'issuance' => 'monographic',
298 205a 'edition' => '2.',
299 210c/210a 'publisher' => 'MELVILLE PUBLISHING COMPANY /LOS ANGELES, CALIFORNIA/',
300 210d 'dateIssued' => '1974'
301 }
302 };
303
304
305
306 CASOPIS = {
307 'identifier' => {
308 011a 'content' => '1041-5173',
309 'type' => 'issn'
310 },
311 'recordInfo' => {
312 'recordIdentifier' => 'c-1'
313 },
314 'titleInfo' => {
315 'title' => 'DBMS - CLIENT/SERVER COMPUTING'
316 },
317 'typeOfResource' => 'text',
318 999a 'relatedItem' => {
319 'titleInfo' => {
320 'partNumber' => 'g. 1990, vol. 137, br. 5'
321 },
322 'type' => 'series'
323 },
324 'classification' => {
325 'udc' => '=20'
326 },
327 'originInfo' => {
328 326a 'frequency' => 'mjeseļæ½no',
329 'issuance' => 'continuing',
330 210a 'place' => 'SAN MATEO, KANADA',
331 210c 'publisher' => 'M&T PUBLISHING INC.'
332 }
333 };
334
335
336 =cut
337

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26