/[webpac]/trunk/tools/mods2unimarc.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/tools/mods2unimarc.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 326 - (hide annotations)
Fri May 14 17:22:39 2004 UTC (15 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 5429 byte(s)
fixes, cleanups, you know: on the way to usable version

1 dpavlin 325 #!/usr/bin/perl -w
2    
3     =head1 NAME
4    
5     mods2marc.pl - convert MODS XML back to MARC (ISO2709)
6    
7     =head1 SYNOPSIS
8    
9     mods2marc.pl mods.xml export.marc
10    
11     =head1 DESCRIPTION
12    
13     This script will convert MODS format
14     L<http://www.loc.gov/standards/mods/>
15     back to MARC (ISO2709) format.
16    
17     Since conversion back to MARC is not simple, lot of things are hard-coded
18     in this script.
19    
20     This script B<is somewhat specific> to MODS export from
21     Faculty of Electrical Engineering and Computing
22 dpavlin 326 so you might want to edit it (among other thing, it includes a lot
23     of fields which are in Croatian).
24 dpavlin 325
25 dpavlin 326 Feel free to hack this script and convert it to your own needs.
26    
27 dpavlin 325 =head1 WARNING
28    
29     This script is in state of flux.
30    
31     =cut
32    
33     use strict;
34     use XML::Twig;
35     use XML::Simple;
36     use MARC;
37     use Text::Iconv;
38    
39     use Data::Dumper;
40    
41     my $xml_file = "/data/tehnika/fer/all.xml";
42     $xml_file = "/data/tehnika/fer/modsFER_1.xml";
43     my $marc_file = "fer.marc";
44    
45     $|=1;
46     my $nr = 0;
47    
48     my $marc = MARC->new;
49    
50     my $twig=XML::Twig->new(
51     twig_roots => { 'mods' => \&item },
52     output_encoding => 'iso-8859-2',
53     );
54    
55     my $utf2iso = Text::Iconv->new("UTF-8", "ISO-8859-2");
56    
57     $twig->parsefile($xml_file);
58     $twig->purge;
59    
60     $marc->output({file=>"> $marc_file",'format'=>"usmarc"});
61    
62     sub item {
63     my( $t, $elt)= @_;
64    
65     my $xml=$elt->xml_string;
66     my $ref = XMLin("<xml>".$xml."</xml>",
67     ForceArray => [
68     'name',
69     'classification',
70     'topic',
71     'udc',
72     ],
73     KeyAttr => {
74     'namePart' => 'type',
75     'identifier' => 'type',
76     'classification' => 'authority',
77     'namePart' => 'type',
78     'role' => 'type',
79     },
80     GroupTags => {
81     'place' => 'placeTerm',
82     'physicalDescription' => 'extent',
83     'roleTerm' => 'content',
84     },
85     ContentKey => '-content',
86     );
87    
88 dpavlin 326 my $m_cache;
89 dpavlin 325
90 dpavlin 326 sub marc_add {
91     my $m_cache = \shift;
92     my $fld = shift || die "need field!";
93 dpavlin 325 my $sf = shift || '';
94    
95     return if (! @_);
96    
97     my @a;
98     foreach (@_) {
99     next if (! $_);
100 dpavlin 326 push @a,$sf if ($sf);
101 dpavlin 325 # push @a,$utf2iso->convert($_) || $_;
102     push @a,$_;
103     }
104    
105     return if (! @a);
106    
107     # print "storing $fld: ",join("|",@a),"\n";
108    
109 dpavlin 326 push @{$$m_cache->{$fld}}, @a;
110    
111 dpavlin 325 }
112    
113 dpavlin 326 my $journal = 0;
114     $journal = 1 if ($ref->{recordInfo}->{recordIdentifier} =~ m/^c-/);
115    
116     marc_add($m_cache,'610','a',@{$ref->{subject}->{topic}});
117 dpavlin 325
118     my $fld = '700';
119    
120     foreach my $name (@{$ref->{name}}) {
121     my $role = $name->{role}->{roleTerm}->{content};
122     next if (! $role);
123     if ($role eq "author") {
124 dpavlin 326 marc_add($m_cache,$fld,'a',$name->{namePart}->{family});
125     marc_add($m_cache,$fld,'b',$name->{namePart}->{given});
126     marc_add($m_cache,$fld,'4',$role);
127 dpavlin 325
128     # first author goes in 700, others in 701
129     $fld = '701';
130     } elsif ($role eq "editor" or $role eq "illustrator") {
131 dpavlin 326 marc_add($m_cache,'702','a',$name->{namePart}->{family});
132     marc_add($m_cache,'702','b',$name->{namePart}->{given});
133     marc_add($m_cache,'702','4',$role);
134 dpavlin 325 } else {
135     die "FATAL: don't know how to map role '$role'" if ($role);
136     }
137     }
138    
139     my $note = $ref->{note};
140    
141     if ($note) {
142     foreach my $n (split(/\s*;\s+/, $note)) {
143     if ($n =~ s/bibliogr:\s+//i) {
144 dpavlin 326 marc_add($m_cache,'320','a',"Bibliografija: $n");
145 dpavlin 325 } elsif ($n =~ s/ilustr:\s+//i) {
146 dpavlin 326 marc_add($m_cache,'215','c', $n);
147 dpavlin 325 } else {
148 dpavlin 326 marc_add($m_cache,'320','a',$n);
149 dpavlin 325 }
150     }
151     }
152    
153    
154     my $type = $ref->{identifier}->{type};
155    
156     if ($type) {
157     if ($type eq "isbn") {
158 dpavlin 326 marc_add($m_cache,'010','a',$ref->{identifier}->{content});
159 dpavlin 325 } elsif ($type eq "issn") {
160 dpavlin 326 marc_add($m_cache,'011','a',$ref->{identifier}->{content});
161 dpavlin 325 } else {
162     die "unknown identifier type $type";
163     }
164     }
165    
166     my $phy_desc = $ref->{physicalDescription};
167     if ($phy_desc) {
168     my $tmp;
169     foreach my $t (split(/\s*;\s+/, $phy_desc)) {
170     if ($t =~ m/([^:]+):\s+(.+)$/) {
171     $tmp->{$1} = $2;
172     } else {
173     die "can't parse $t";
174     }
175     }
176     my $data = $tmp->{pagin};
177     $data .= ", " if ($data);
178     if ($tmp->{str}) {
179     $data .= $tmp->{str}." str";
180     }
181 dpavlin 326 marc_add($m_cache,'215','a', $data) if ($data);
182     marc_add($m_cache,'215','d', $tmp->{visina});
183 dpavlin 325 }
184    
185 dpavlin 326 marc_add($m_cache,'001',undef,$ref->{recordInfo}->{recordIdentifier});
186 dpavlin 325
187 dpavlin 326 marc_add($m_cache,'200','a',$ref->{titleInfo}->{title});
188     marc_add($m_cache,'200','e',$ref->{titleInfo}->{subTitle});
189 dpavlin 325
190 dpavlin 326 marc_add($m_cache,'675','a',$ref->{classification}->{udc});
191 dpavlin 325
192     my $related = $ref->{relatedItem}->{type};
193     if ($related) {
194     if ($related eq "series") {
195 dpavlin 326 marc_add($m_cache,'225','a',$ref->{relatedItem}->{titleInfo}->{title});
196     marc_add($m_cache,'999','a',$ref->{relatedItem}->{titleInfo}->{partNumber});
197 dpavlin 325 } elsif ($related eq "preceding") {
198 dpavlin 326 marc_add($m_cache,'430','a',$ref->{relatedItem}->{titleInfo}->{title});
199 dpavlin 325 } else {
200     die "can't parse related item type $related" if ($related);
201     }
202     }
203    
204 dpavlin 326 marc_add($m_cache,'205','a',$ref->{originInfo}->{edition});
205 dpavlin 325
206     my $publisher = $ref->{originInfo}->{publisher};
207     if ($publisher =~ m,^(.+?)\s*/\s*(.+)$,) {
208 dpavlin 326 marc_add($m_cache,'210','a', $2);
209     marc_add($m_cache,'210','c', $1);
210 dpavlin 325 } else {
211 dpavlin 326 marc_add($m_cache,'210','c', $publisher);
212 dpavlin 325 }
213    
214 dpavlin 326 marc_add($m_cache,'326','a',$ref->{originInfo}->{frequency}) if ($journal);
215 dpavlin 325
216 dpavlin 326 marc_add($m_cache,'210','a',$ref->{originInfo}->{place});
217 dpavlin 325
218 dpavlin 326 marc_add($m_cache,'210','d',$ref->{originInfo}->{dateIssued});
219    
220 dpavlin 325 $nr++;
221     print "$nr " if ($nr % 100 == 0);
222    
223 dpavlin 326 # dump record
224     my $m=$marc->createrecord();
225     foreach my $fld (keys %{$m_cache}) {
226     # print "$fld: ",join(" * ",@{$m_cache->{$fld}}),"\n";
227     $marc->addfield({record=>$m,
228     field=>$fld,
229     value=>\@{$m_cache->{$fld}}
230     });
231     }
232    
233 dpavlin 325 $t->purge; # frees the memory
234     }
235    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26