/[webpac]/trunk/tools/mods2unimarc.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/tools/mods2unimarc.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 326 - (show annotations)
Fri May 14 17:22:39 2004 UTC (19 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 5429 byte(s)
fixes, cleanups, you know: on the way to usable version

1 #!/usr/bin/perl -w
2
3 =head1 NAME
4
5 mods2marc.pl - convert MODS XML back to MARC (ISO2709)
6
7 =head1 SYNOPSIS
8
9 mods2marc.pl mods.xml export.marc
10
11 =head1 DESCRIPTION
12
13 This script will convert MODS format
14 L<http://www.loc.gov/standards/mods/>
15 back to MARC (ISO2709) format.
16
17 Since conversion back to MARC is not simple, lot of things are hard-coded
18 in this script.
19
20 This script B<is somewhat specific> to MODS export from
21 Faculty of Electrical Engineering and Computing
22 so you might want to edit it (among other thing, it includes a lot
23 of fields which are in Croatian).
24
25 Feel free to hack this script and convert it to your own needs.
26
27 =head1 WARNING
28
29 This script is in state of flux.
30
31 =cut
32
33 use strict;
34 use XML::Twig;
35 use XML::Simple;
36 use MARC;
37 use Text::Iconv;
38
39 use Data::Dumper;
40
41 my $xml_file = "/data/tehnika/fer/all.xml";
42 $xml_file = "/data/tehnika/fer/modsFER_1.xml";
43 my $marc_file = "fer.marc";
44
45 $|=1;
46 my $nr = 0;
47
48 my $marc = MARC->new;
49
50 my $twig=XML::Twig->new(
51 twig_roots => { 'mods' => \&item },
52 output_encoding => 'iso-8859-2',
53 );
54
55 my $utf2iso = Text::Iconv->new("UTF-8", "ISO-8859-2");
56
57 $twig->parsefile($xml_file);
58 $twig->purge;
59
60 $marc->output({file=>"> $marc_file",'format'=>"usmarc"});
61
62 sub item {
63 my( $t, $elt)= @_;
64
65 my $xml=$elt->xml_string;
66 my $ref = XMLin("<xml>".$xml."</xml>",
67 ForceArray => [
68 'name',
69 'classification',
70 'topic',
71 'udc',
72 ],
73 KeyAttr => {
74 'namePart' => 'type',
75 'identifier' => 'type',
76 'classification' => 'authority',
77 'namePart' => 'type',
78 'role' => 'type',
79 },
80 GroupTags => {
81 'place' => 'placeTerm',
82 'physicalDescription' => 'extent',
83 'roleTerm' => 'content',
84 },
85 ContentKey => '-content',
86 );
87
88 my $m_cache;
89
90 sub marc_add {
91 my $m_cache = \shift;
92 my $fld = shift || die "need field!";
93 my $sf = shift || '';
94
95 return if (! @_);
96
97 my @a;
98 foreach (@_) {
99 next if (! $_);
100 push @a,$sf if ($sf);
101 # push @a,$utf2iso->convert($_) || $_;
102 push @a,$_;
103 }
104
105 return if (! @a);
106
107 # print "storing $fld: ",join("|",@a),"\n";
108
109 push @{$$m_cache->{$fld}}, @a;
110
111 }
112
113 my $journal = 0;
114 $journal = 1 if ($ref->{recordInfo}->{recordIdentifier} =~ m/^c-/);
115
116 marc_add($m_cache,'610','a',@{$ref->{subject}->{topic}});
117
118 my $fld = '700';
119
120 foreach my $name (@{$ref->{name}}) {
121 my $role = $name->{role}->{roleTerm}->{content};
122 next if (! $role);
123 if ($role eq "author") {
124 marc_add($m_cache,$fld,'a',$name->{namePart}->{family});
125 marc_add($m_cache,$fld,'b',$name->{namePart}->{given});
126 marc_add($m_cache,$fld,'4',$role);
127
128 # first author goes in 700, others in 701
129 $fld = '701';
130 } elsif ($role eq "editor" or $role eq "illustrator") {
131 marc_add($m_cache,'702','a',$name->{namePart}->{family});
132 marc_add($m_cache,'702','b',$name->{namePart}->{given});
133 marc_add($m_cache,'702','4',$role);
134 } else {
135 die "FATAL: don't know how to map role '$role'" if ($role);
136 }
137 }
138
139 my $note = $ref->{note};
140
141 if ($note) {
142 foreach my $n (split(/\s*;\s+/, $note)) {
143 if ($n =~ s/bibliogr:\s+//i) {
144 marc_add($m_cache,'320','a',"Bibliografija: $n");
145 } elsif ($n =~ s/ilustr:\s+//i) {
146 marc_add($m_cache,'215','c', $n);
147 } else {
148 marc_add($m_cache,'320','a',$n);
149 }
150 }
151 }
152
153
154 my $type = $ref->{identifier}->{type};
155
156 if ($type) {
157 if ($type eq "isbn") {
158 marc_add($m_cache,'010','a',$ref->{identifier}->{content});
159 } elsif ($type eq "issn") {
160 marc_add($m_cache,'011','a',$ref->{identifier}->{content});
161 } else {
162 die "unknown identifier type $type";
163 }
164 }
165
166 my $phy_desc = $ref->{physicalDescription};
167 if ($phy_desc) {
168 my $tmp;
169 foreach my $t (split(/\s*;\s+/, $phy_desc)) {
170 if ($t =~ m/([^:]+):\s+(.+)$/) {
171 $tmp->{$1} = $2;
172 } else {
173 die "can't parse $t";
174 }
175 }
176 my $data = $tmp->{pagin};
177 $data .= ", " if ($data);
178 if ($tmp->{str}) {
179 $data .= $tmp->{str}." str";
180 }
181 marc_add($m_cache,'215','a', $data) if ($data);
182 marc_add($m_cache,'215','d', $tmp->{visina});
183 }
184
185 marc_add($m_cache,'001',undef,$ref->{recordInfo}->{recordIdentifier});
186
187 marc_add($m_cache,'200','a',$ref->{titleInfo}->{title});
188 marc_add($m_cache,'200','e',$ref->{titleInfo}->{subTitle});
189
190 marc_add($m_cache,'675','a',$ref->{classification}->{udc});
191
192 my $related = $ref->{relatedItem}->{type};
193 if ($related) {
194 if ($related eq "series") {
195 marc_add($m_cache,'225','a',$ref->{relatedItem}->{titleInfo}->{title});
196 marc_add($m_cache,'999','a',$ref->{relatedItem}->{titleInfo}->{partNumber});
197 } elsif ($related eq "preceding") {
198 marc_add($m_cache,'430','a',$ref->{relatedItem}->{titleInfo}->{title});
199 } else {
200 die "can't parse related item type $related" if ($related);
201 }
202 }
203
204 marc_add($m_cache,'205','a',$ref->{originInfo}->{edition});
205
206 my $publisher = $ref->{originInfo}->{publisher};
207 if ($publisher =~ m,^(.+?)\s*/\s*(.+)$,) {
208 marc_add($m_cache,'210','a', $2);
209 marc_add($m_cache,'210','c', $1);
210 } else {
211 marc_add($m_cache,'210','c', $publisher);
212 }
213
214 marc_add($m_cache,'326','a',$ref->{originInfo}->{frequency}) if ($journal);
215
216 marc_add($m_cache,'210','a',$ref->{originInfo}->{place});
217
218 marc_add($m_cache,'210','d',$ref->{originInfo}->{dateIssued});
219
220 $nr++;
221 print "$nr " if ($nr % 100 == 0);
222
223 # dump record
224 my $m=$marc->createrecord();
225 foreach my $fld (keys %{$m_cache}) {
226 # print "$fld: ",join(" * ",@{$m_cache->{$fld}}),"\n";
227 $marc->addfield({record=>$m,
228 field=>$fld,
229 value=>\@{$m_cache->{$fld}}
230 });
231 }
232
233 $t->purge; # frees the memory
234 }
235

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26