/[webpac]/trunk/tools/phpmylib2marc.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/tools/phpmylib2marc.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 217 - (hide annotations)
Sun Feb 1 23:28:27 2004 UTC (18 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 4909 byte(s)
bugfix and improvements:
- debug and quiet switches
- skip fields which are too short (just subfield without data)
- calculate directory address
- calculate base address in leader

1 dpavlin 208 #!/usr/bin/perl -w
2     #
3     # This script will try (hard) to convert database from
4     # PhpMyLibrary (http://phpmylibrary.sourceforge.net/) back
5 dpavlin 216 # to MARC format (ISO 2709)
6 dpavlin 208 #
7     # 2003-01-31 Dobrica Pavlinusic <dpavlin@rot13.org>
8     #
9     # This script is written for WebPac project available at
10     # http://webpac.sourceforge.net/
11     #
12     # MARC file format documentation is taken from
13     # http://www.ariadne.ac.uk/issue7/marc/
14     #
15     #
16    
17     use DBI;
18     use strict;
19     use Getopt::Long;
20    
21     my $database = "postnuke";
22     my $host = "localhost";
23     my $user = "root";
24     my $passwd = "";
25    
26     my $usage = 0;
27 dpavlin 216 my $debug = 0;
28 dpavlin 217 my $quiet = 0;
29 dpavlin 208
30     my $result = GetOptions(
31     "database=s" => \$database,
32     "host=s" => \$host,
33     "user=s" => \$user,
34     "password=s" => \$passwd,
35 dpavlin 216 "debug!" => \$debug,
36 dpavlin 217 "verbose!" => \$debug,
37     "quiet!" => \$quiet,
38 dpavlin 208 "help!" => \$usage,
39     );
40    
41     if ($usage) {
42     print qq{usage: $0 [--database="$database" --host="$host" --user="$user" --password="$passwd"] > file.marc\n
43     This script will convert PhpMyLibrary database to standard UNIMARC format\n};
44     exit 1;
45     }
46    
47     my $dsn = "DBI:mysql:database=$database;host=$host";
48     my $dbh = DBI->connect($dsn, $user, $passwd, {'RaiseError' => 1});
49    
50     # UNIMARC leader format
51 dpavlin 216 #my $leader_fmt = qq{%05diam0 22%05d 45 };
52     # MARC leader format
53     my $leader_fmt = qq{%05dcas 22%05d a 4500};
54 dpavlin 208
55    
56     my $sth = $dbh->prepare("SELECT marc FROM tblbib");
57     $sth->execute();
58 dpavlin 216
59     my $count = 0;
60     my $rec_nr = 0;
61    
62 dpavlin 208 while (my $row = $sth->fetchrow_hashref()) {
63     my $marc = $row->{'marc'};
64 dpavlin 216 $rec_nr++;
65 dpavlin 208
66     my $real_len = length($marc);
67    
68 dpavlin 216 my $skip = 0; # skip this record?
69    
70 dpavlin 208 # fix PhpMyLibrary MARC (why do I have to do this? It's MARC,
71     # for gaddem sake!!!
72    
73     # Byte Name
74     # ---- ----
75     # 0-4 Record Length
76     # 5 Status (n=new, c=corrected and d=deleted)
77     # 6 Type of Record (a=printed material)
78     # 7 Bibliographic Level (m=monograph)
79     # 8-9 Blanks
80     # 10 Indictator count (2 for monographs)
81     # 11 Subfield code count (2 - 0x1F+subfield code itself)
82     # 12-16 Base address of data
83     # 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2,
84     # 3=sublevel 3)
85     # 18 Descriptive Cataloguing Form (blank=record is full ISBD,
86     # n=record is in non-ISBD format, i=record is in
87     # an incomplete ISBD format)
88     # 19 Blank
89     # 20 Length of length field in directory (always 4 in UNIMARC)
90     # 21 Length of Starting Character Position in directory (always
91     # 5 in UNIMARC)
92     # 22 Length of implementation defined portion in directory (always
93     # 0 in UNIMARC)
94     # 23 Blank
95     #
96     # |0 4|5 89 |12 16|1n 450 |
97     # (xxxxx)nam 22(.....) 450 <---
98     $marc =~ m/^(.....)......(.....)polerioj/ || die "record: '$marc' unparsable!";
99     my ($reclen,$base_addr) = ($1,$2);
100    
101 dpavlin 216 my $directory = substr($marc,24,$base_addr-24);
102     my $fields = substr($marc,$base_addr-1);
103    
104     print STDERR "# $rec_nr fields: '$fields'\n" if ($debug);
105     print STDERR "# $rec_nr directory: [",length($directory),"]\n" if ($debug);
106    
107     # PhpMyLibrary MARC records don't have indicators, so we'll add them
108    
109     my $o = 0; # offset
110     my $new_dictionary;
111     my $new_fields;
112    
113     while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) {
114     my ($tag,$len,$addr) = ($1,$2,$3);
115    
116     sub check_field($) {
117     my $f = shift;
118     my $del = substr($f,0,1);
119    
120     die "expected 0x1e, got '$del' (".ord($del)."): '$f'" if (ord($del) != 30);
121     }
122    
123     if (($addr+$len) > length($fields)) {
124 dpavlin 217 print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $quiet);
125 dpavlin 216 $skip = 1;
126     next;
127     }
128    
129     # take field
130     my $f = substr($fields,$addr,$len);
131 dpavlin 217 print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($debug);
132 dpavlin 216
133     my $del = substr($fields,$addr+$len,1);
134    
135     # check field delimiters...
136     if ($del ne chr(30)) {
137 dpavlin 217 print STDERR "WARNING: skipping record $rec_nr, can't find delimiters got: '$del'\n" if (! $quiet);
138 dpavlin 216 $skip = 1;
139     next;
140     }
141    
142     check_field($f);
143 dpavlin 217
144     if (length($f) < 2) {
145     print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $quiet);
146     next;
147     }
148    
149    
150 dpavlin 216 if ($tag =~ m/^00/) {
151     # fields 001-008 doesn't have indicators
152 dpavlin 217 $new_dictionary .= sprintf("%03d%04d%05d",$tag,$len,length($new_fields || ''));
153 dpavlin 216 $new_fields.=$f;
154     } else {
155 dpavlin 217 $new_dictionary .= sprintf("%03d%04d%05d",$tag,($len+2),length($new_fields || ''));
156 dpavlin 216 $new_fields.=chr(30)." ".substr($f,1);
157     $o += 2;
158     }
159     }
160    
161     if (! $skip) {
162 dpavlin 217 my $new_leader = sprintf($leader_fmt,24+length($new_dictionary.$new_fields)+2,length($new_dictionary)+25);
163 dpavlin 216 my $new_marc = $new_leader . $new_dictionary . $new_fields . chr(30);
164     $new_marc .= chr(29); # end of record
165    
166 dpavlin 217 print STDERR "original and new marc: [$rec_nr/$count]\n$marc\n$new_marc\n\n" if ($debug);
167 dpavlin 216 print "$new_marc";
168     $count++;
169     }
170    
171     # last if ($count > 100);
172    
173 dpavlin 208 }
174     $sth->finish();
175     $dbh->disconnect();
176 dpavlin 216
177     print STDERR "$count records from database $database converted...\n";

Properties

Name Value
cvs2svn:cvs-rev 1.3
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26