/[webpac]/trunk/tools/mods2unimarc.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/tools/mods2unimarc.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 325 - (hide annotations)
Fri May 14 16:38:22 2004 UTC (19 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 9742 byte(s)
first try at converting MODS XML records back to MARC

1 dpavlin 325 #!/usr/bin/perl -w
2    
3     =head1 NAME
4    
5     mods2marc.pl - convert MODS XML back to MARC (ISO2709)
6    
7     =head1 SYNOPSIS
8    
9     mods2marc.pl mods.xml export.marc
10    
11     =head1 DESCRIPTION
12    
13     This script will convert MODS format
14     L<http://www.loc.gov/standards/mods/>
15     back to MARC (ISO2709) format.
16    
17     Since conversion back to MARC is not simple, lot of things are hard-coded
18     in this script.
19    
20     This script B<is somewhat specific> to MODS export from
21     Faculty of Electrical Engineering and Computing
22     so you might want to edit it
23    
24     =head1 WARNING
25    
26     This script is in state of flux.
27    
28     =cut
29    
30     use strict;
31     use XML::Twig;
32     use XML::Simple;
33     use MARC;
34     use Text::Iconv;
35    
36     use Data::Dumper;
37    
38     my $xml_file = "/data/tehnika/fer/all.xml";
39     $xml_file = "/data/tehnika/fer/modsFER_1.xml";
40     my $marc_file = "fer.marc";
41    
42     $|=1;
43     my $nr = 0;
44    
45     my $marc = MARC->new;
46    
47     my $twig=XML::Twig->new(
48     twig_roots => { 'mods' => \&item },
49     output_encoding => 'iso-8859-2',
50     );
51    
52     my $utf2iso = Text::Iconv->new("UTF-8", "ISO-8859-2");
53    
54     $twig->parsefile($xml_file);
55     $twig->purge;
56    
57     $marc->output({file=>"> $marc_file",'format'=>"usmarc"});
58    
59     sub item {
60     my( $t, $elt)= @_;
61    
62     my $xml=$elt->xml_string;
63     my $ref = XMLin("<xml>".$xml."</xml>",
64     ForceArray => [
65     'name',
66     'classification',
67     'topic',
68     'udc',
69     ],
70     KeyAttr => {
71     'namePart' => 'type',
72     'identifier' => 'type',
73     'classification' => 'authority',
74     'namePart' => 'type',
75     'role' => 'type',
76     },
77     GroupTags => {
78     'place' => 'placeTerm',
79     'physicalDescription' => 'extent',
80     'roleTerm' => 'content',
81     },
82     ContentKey => '-content',
83     );
84    
85     my $m=$marc->createrecord();
86    
87     sub marc_arr {
88     my $m = shift || die "no marc record?";
89     my $fld = shift || die "no marc field?";
90     my $sf = shift || '';
91    
92     return if (! @_);
93    
94     my @a;
95     foreach (@_) {
96     next if (! $_);
97     push @a,$sf;
98     # push @a,$utf2iso->convert($_) || $_;
99     push @a,$_;
100     }
101    
102     return if (! @a);
103    
104     # print "storing $fld: ",join("|",@a),"\n";
105    
106     $marc->addfield({record=>$m,
107     field=>$fld,
108     # i1=>$i1,
109     # i2=>$i2,
110     value=>\@a});
111     }
112    
113     marc_arr($m,'610','a',@{$ref->{subject}->{topic}});
114    
115     my $fld = '700';
116    
117     foreach my $name (@{$ref->{name}}) {
118     my $role = $name->{role}->{roleTerm}->{content};
119     next if (! $role);
120     if ($role eq "author") {
121     marc_arr($m,$fld,'a',$name->{namePart}->{family});
122     marc_arr($m,$fld,'b',$name->{namePart}->{given});
123     marc_arr($m,$fld,'4',$role);
124    
125     # first author goes in 700, others in 701
126     $fld = '701';
127     } elsif ($role eq "editor" or $role eq "illustrator") {
128     marc_arr($m,'702','a',$name->{namePart}->{family});
129     marc_arr($m,'702','b',$name->{namePart}->{given});
130     marc_arr($m,'702','4',$role);
131     } else {
132     die "FATAL: don't know how to map role '$role'" if ($role);
133     }
134     }
135    
136     my $note = $ref->{note};
137    
138     if ($note) {
139     foreach my $n (split(/\s*;\s+/, $note)) {
140     if ($n =~ s/bibliogr:\s+//i) {
141     marc_arr($m,'320','a',"Bibliografija: $n");
142     } elsif ($n =~ s/ilustr:\s+//i) {
143     marc_arr($m,'215','c', $n);
144     } else {
145     marc_arr($m,'320','a',$n);
146     }
147     }
148     }
149    
150    
151     my $type = $ref->{identifier}->{type};
152    
153     if ($type) {
154     if ($type eq "isbn") {
155     marc_arr($m,'010','a',$ref->{identifier}->{content});
156     } elsif ($type eq "issn") {
157     marc_arr($m,'011','a',$ref->{identifier}->{content});
158     } else {
159     die "unknown identifier type $type";
160     }
161     }
162    
163     my $phy_desc = $ref->{physicalDescription};
164     if ($phy_desc) {
165     my $tmp;
166     foreach my $t (split(/\s*;\s+/, $phy_desc)) {
167     if ($t =~ m/([^:]+):\s+(.+)$/) {
168     $tmp->{$1} = $2;
169     } else {
170     die "can't parse $t";
171     }
172     }
173     my $data = $tmp->{pagin};
174     $data .= ", " if ($data);
175     if ($tmp->{str}) {
176     $data .= $tmp->{str}." str";
177     }
178     marc_arr($m,'210','a', $data) if ($data);
179     marc_arr($m,'210','d', $tmp->{visina});
180     }
181    
182     marc_arr($m,'001','',$ref->{recordInfo}->{recordIdentifier});
183    
184     marc_arr($m,'200','a',$ref->{titleInfo}->{title});
185     marc_arr($m,'200','e',$ref->{titleInfo}->{subTitle});
186    
187     marc_arr($m,'675','a',$ref->{classification}->{udc});
188    
189     my $related = $ref->{relatedItem}->{type};
190     if ($related) {
191     if ($related eq "series") {
192     marc_arr($m,'675','a',$ref->{relatedItem}->{titleInfo}->{title});
193     marc_arr($m,'999','a',$ref->{relatedItem}->{titleInfo}->{partNumber});
194     } elsif ($related eq "preceding") {
195     marc_arr($m,'430','a',$ref->{relatedItem}->{titleInfo}->{title});
196     } else {
197     die "can't parse related item type $related" if ($related);
198     }
199     }
200    
201     marc_arr($m,'205','a',$ref->{originInfo}->{edition});
202    
203     my $publisher = $ref->{originInfo}->{publisher};
204     if ($publisher =~ m,^(.+?)\s*/\s*(.+)$,) {
205     marc_arr($m,'210','a', $2);
206     marc_arr($m,'210','c', $1);
207     } else {
208     marc_arr($m,'210','c', $publisher);
209     }
210    
211     marc_arr($m,'326','a',$ref->{originInfo}->{frequency});
212     marc_arr($m,'326','a',$ref->{originInfo}->{place});
213    
214     marc_arr($m,'210','d',$ref->{originInfo}->{dateIssued});
215    
216     $nr++;
217     print "$nr " if ($nr % 100 == 0);
218    
219     $t->purge; # frees the memory
220     }
221    
222     __END__
223    
224     KNJIGA = {
225     610a 'subject' => [
226     {
227     'topic' => [
228     'LIBRARIES-AUTOMATION',
229     'ELECTRONIC DATA PROCESSING-LIBRARY SCIENCE'
230     ]
231     }
232     ],
233     'name' => [
234     {
235     'namePart' => {
236     700b,701a... 'given' => 'Robert M.',
237     700a,701b... 'family' => 'Hayes'
238     },
239     'type' => 'personal',
240     'role' => {
241     'roleTerm' => {
242     7004,7014... 'content' => 'author',
243     'type' => 'text'
244     }
245     }
246     },
247     {
248     'namePart' => {
249     702b 'given' => 'Joseph',
250     702a 'family' => 'Becker'
251     },
252     'type' => 'personal',
253     'role' => {
254     'roleTerm' => {
255     7024 'content' => 'editor',
256     'type' => 'text'
257     }
258     }
259     },
260     {
261     'namePart' => {
262     702b 'given' => 'Joseph',
263     702a 'family' => 'Becker'
264     },
265     'type' => 'personal',
266     'role' => {
267     'roleTerm' => {
268     7024 'content' => 'illustrator',
269     'type' => 'text'
270     }
271     }
272     }
273     ],
274     'note' => 'bibliogr: 645-647; kazalo; ilustr: ilustr.',
275     'identifier' => {
276     010a 'content' => '0-471-36483-5',
277     'type' => 'isbn'
278     },
279     215a;215d 'physicalDescription' => 'str: 688; pagin: xvi; visina: 24. cm',
280     001 'recordInfo' => {
281     'recordIdentifier' => 'k-7996-8073'
282     },
283     200a 'titleInfo' => {
284     'title' => 'Handbook of data processing for libraries'
285     },
286     'typeOfResource' => 'text',
287     675a 'classification' => {
288     'udc' => '=20'
289     },
290     225a 'relatedItem' => {
291     'titleInfo' => {
292     'title' => 'A WILEY-BECKER & HAYES SERIES BOOK'
293     },
294     'type' => 'series'
295     },
296     'originInfo' => {
297     'issuance' => 'monographic',
298     205a 'edition' => '2.',
299     210c/210a 'publisher' => 'MELVILLE PUBLISHING COMPANY /LOS ANGELES, CALIFORNIA/',
300     210d 'dateIssued' => '1974'
301     }
302     };
303    
304    
305    
306     CASOPIS = {
307     'identifier' => {
308     011a 'content' => '1041-5173',
309     'type' => 'issn'
310     },
311     'recordInfo' => {
312     'recordIdentifier' => 'c-1'
313     },
314     'titleInfo' => {
315     'title' => 'DBMS - CLIENT/SERVER COMPUTING'
316     },
317     'typeOfResource' => 'text',
318     999a 'relatedItem' => {
319     'titleInfo' => {
320     'partNumber' => 'g. 1990, vol. 137, br. 5'
321     },
322     'type' => 'series'
323     },
324     'classification' => {
325     'udc' => '=20'
326     },
327     'originInfo' => {
328     326a 'frequency' => 'mjeseļæ½no',
329     'issuance' => 'continuing',
330     210a 'place' => 'SAN MATEO, KANADA',
331     210c 'publisher' => 'M&T PUBLISHING INC.'
332     }
333     };
334    
335    
336     =cut
337    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26