/[webpac]/trunk/parse_format.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/parse_format.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 176 - (show annotations)
Mon Nov 24 01:16:04 2003 UTC (15 years, 10 months ago) by dpavlin
File size: 6446 byte(s)
fix for wierd prefixes (consisting of chars and numbers)

1 #-------------------------------------------------------------
2 #
3 # parse_format(...)
4 #
5
6 sub parse_format {
7 my $type = shift || die "parset_format must be called with type!";
8 my $format = shift || die "parse_format must be called with format!";
9 my $row = shift || die "parse_format must be called with row!";
10 my $i = shift || 0; # isis repeatable number
11 my $codepage = shift || die "parse_format must be called with codepage!";
12 if ($type eq "isis") {
13 return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
14 } elsif ($type eq "excel") {
15 return parse_excel_format($format,$row,$i,$codepage);
16 } elsif ($type eq "marc") {
17 return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 } elsif ($type eq "feed") {
19 return parse_feed_format($format,$row,$i,$codepage);
20 }
21 }
22
23 #-------------------------------------------------------------
24
25 sub parse_iso_format {
26
27 my $format = shift;
28 my $row = shift;
29 my $i = shift;
30 my $codepage = shift;
31
32 my $func = shift || die "need to know which sub-field function to use";
33
34 require $func.".pm";
35
36 my $out;
37 my $out_swish;
38
39 my $display;
40 my $swish;
41
42 sub cnv_cp {
43 my $codepage = shift;
44 my $tmp = shift || return;
45 if ($codepage) {
46 $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
47 }
48 return $tmp;
49 }
50
51 # if format doesn't exits, store it in cache
52 if (! defined($cache->{format}->{$format})) {
53 # print STDERR "parsing format for '$format'\n";
54 my @fmt;
55
56 my $f = $format;
57
58 if ($f =~ s/^([^\d]+)//) {
59 if ($f) { # there is more to parse
60 push @fmt,$1;
61 } else {
62 @fmt = ('',$1,undef,'');
63 #print STDERR "just one field: $1\n";
64 }
65 } else {
66 push @fmt,'';
67 }
68
69 while ($f) {
70 # print STDERR "\n#### $f";
71 # this is EBSCO special to support numeric subfield in
72 # form of 856#3
73 if ($f =~ s/^(\d\d\d)#*(\w?)//) {
74 push @fmt,$1;
75 if ($2) {
76 push @fmt,$2;
77 } else {
78 push @fmt,undef;
79 }
80 # this might be our local scpeciality -- fields 10 and 11
81 # (as opposed to 010 and 011) so they are strictly listed
82 # here
83 } elsif ($f =~ s/^(1[01])//) {
84 push @fmt,$1;
85 push @fmt,undef;
86 } elsif ($f =~ s/^mfn//i) {
87 push @fmt,'mfn';
88 push @fmt,'';
89 } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
90 # still prefix?
91 if ($#fmt == 0) {
92 $fmt[0] .= $1;
93 } else {
94 push @fmt,$1;
95 }
96 } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
97 if ($#fmt == 0) {
98 $fmt[0] .= $1;
99 } else {
100 push @fmt,$1;
101 }
102 } elsif ($f =~ s/^(\d{1,2})//) {
103 if ($#fmt == 0) {
104 $fmt[0] .= $1;
105 } else {
106 push @fmt,$1;
107 }
108 } else {
109 print STDERR "unparsed format: $f\n";
110 $f = "";
111 }
112 }
113 push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
114 $cache->{format}->{$format} = \@fmt;
115
116 print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
117 # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
118 # print STDERR Dumper($cache->{format}->{$format});
119 }
120
121 # now produce actual record
122 my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
123 my @fmt = @{$tmp};
124 # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
125 # print STDERR "tmp ",Dumper($tmp);
126 # print STDERR "cache: ",Dumper($cache->{format}->{$format});
127
128 # prefix
129 my $prefix = shift @fmt;
130 my $sufix;
131 while($#fmt > 1) {
132 my $f = shift @fmt || die "BUG: field name can't be empty!";
133 my $sf = shift @fmt;
134
135 if ($f eq 'mfn' && $i == 0) {
136 $display .= $sufix if ($display);
137 $display .= $row->{mfn};
138 } else {
139 my $val = &$func($row,$f,$sf,$i);
140 if ($val) {
141 # print STDERR "val: $val\n";
142 my $tmp = cnv_cp($codepage,$val);
143 if ($display) {
144 $display .= $sufix.$tmp;
145 } else {
146 $display = $tmp;
147 }
148 $swish .= $tmp." ";
149 }
150 }
151 $sufix = shift @fmt;
152 }
153 $display = $prefix.$display.$sufix if ($display);
154 print STDERR "format left unused: [",join("|",@fmt),"]\n" if (@fmt);
155
156 print STDERR "format: [",join("|",@{$tmp}),"]\n" if (@fmt);
157
158 # print STDERR "display: $display swish: $swish\n";
159
160 return ($swish,$display);
161 }
162
163 #-------------------------------------------------------------
164
165 sub parse_excel_format {
166 my $format = shift;
167 my $row = shift;
168 my $i = shift;
169 my $codepage = shift;
170
171 return if ($i > 0); # Excel doesn't support repeatable fields
172
173 my $out;
174 my $out_swish;
175
176 my $prefix = "";
177 if ($format =~ s/^([^A-Z\|]{1,3})//) {
178 $prefix = $1;
179 }
180
181 my $display;
182 my $swish;
183
184 while ($format && length($format) > 0) {
185 #print STDERR "\n#### $format #";
186 if ($format =~ s/^\|([A-Z]{1,2})\|//) {
187 #print STDERR "--$1-> $format -[",length($format),"] ";
188 if ($row->{$1}) {
189 my $tmp = $row->{$1};
190 if ($codepage) {
191 $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert";
192 }
193 $display .= $prefix . $tmp;
194 $swish .= $tmp." ";
195 #print STDERR " == $tmp";
196 }
197 $prefix = "";
198 } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
199 $prefix .= $1 if ($display);
200 } else {
201 print STDERR "unparsed format: $format\n";
202 $prefix .= $format;
203 $format = "";
204 }
205 #print STDERR " display: $display swish: $swish [format: $format]";
206 }
207 # add suffix
208 $display .= $prefix if ($display);
209
210 return ($swish,$display);
211 }
212
213 #-------------------------------------------------------------
214
215 sub parse_feed_format {
216 my $format = shift;
217 my $data = shift;
218 my $i = shift;
219 my $codepage = shift;
220
221 # XXX feed doesn't support repeatable fields, but they really
222 # should, This is a bug. It should be fixed!
223 return if ($i > 0);
224
225 my $out;
226 my $out_swish;
227
228 my $prefix = "";
229 if ($format =~ s/^([^\d\|]{1,3})//) {
230 $prefix = $1;
231 }
232
233 my $display;
234 my $swish;
235
236 while ($format && length($format) > 0) {
237 #print STDERR "\n#### $format #";
238 if ($format =~ s/^\|(\d+)\|//) {
239 #print STDERR "--$1-> $format -[",length($format),"] ";
240 if ($data->{$1}) {
241 my $tmp = $data->{$1};
242 if ($codepage) {
243 $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
244 }
245 $display .= $prefix . $tmp;
246 $swish .= $tmp." ";
247 #print STDERR " == $tmp";
248 }
249 $prefix = "";
250 } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
251 $prefix .= $1 if ($display);
252 } else {
253 print STDERR "unparsed format: $format\n";
254 $prefix .= $format;
255 $format = "";
256 }
257 #print STDERR " display: $display swish: $swish [format: $format]";
258 }
259 # add suffix
260 $display .= $prefix if ($display);
261
262 return ($swish,$display);
263 }
264
265 #-------------------------------------------------------------
266
267 1;

Properties

Name Value
cvs2svn:cvs-rev 1.16

  ViewVC Help
Powered by ViewVC 1.1.26