Line # Revision Author
1 10 dpavlin #-------------------------------------------------------------
2 #
3 54 dpavlin # parse_format(...)
4 10 dpavlin #
5
6 sub parse_format {
7 54 dpavlin my $type = shift || die "parset_format must be called with type!";
8 43 dpavlin my $format = shift || die "parse_format must be called with format!";
9 my $row = shift || die "parse_format must be called with row!";
10 23 dpavlin my $i = shift || 0; # isis repeatable number
11 43 dpavlin my $codepage = shift || die "parse_format must be called with codepage!";
12 54 dpavlin if ($type eq "isis") {
13 684 dpavlin return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
14 54 dpavlin } elsif ($type eq "excel") {
15 return parse_excel_format($format,$row,$i,$codepage);
16 62 dpavlin } elsif ($type eq "marc") {
17 return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 67 dpavlin } elsif ($type eq "feed") {
19 return parse_feed_format($format,$row,$i,$codepage);
20 684 dpavlin } elsif ($type eq "dbf") {
21 return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
22 } else {
23 confess "FATAL: unknown type '$type'";
24 54 dpavlin }
25 }
26 10 dpavlin
27 54 dpavlin #-------------------------------------------------------------
28
29 62 dpavlin sub parse_iso_format {
30 54 dpavlin
31 my $format = shift;
32 my $row = shift;
33 my $i = shift;
34 my $codepage = shift;
35
36 62 dpavlin my $func = shift || die "need to know which sub-field function to use";
37
38 require $func.".pm";
39
40 10 dpavlin my $out;
41 my $out_swish;
42
43 my $display;
44 my $swish;
45
46 62 dpavlin sub cnv_cp {
47 78 dpavlin my $codepage = shift;
48 my $tmp = shift || return;
49 62 dpavlin if ($codepage) {
50 78 dpavlin $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
51 62 dpavlin }
52 return $tmp;
53 }
54
55 170 dpavlin # if format doesn't exits, store it in cache
56 if (! defined($cache->{format}->{$format})) {
57 # print STDERR "parsing format for '$format'\n";
58 my @fmt;
59
60 my $f = $format;
61
62 263 dpavlin my $eval;
63 458 dpavlin $eval = $1 if ($f =~ s/^eval{([^}]+?)}//);
64 263 dpavlin
65 170 dpavlin if ($f =~ s/^([^\d]+)//) {
66 if ($f) { # there is more to parse
67 push @fmt,$1;
68 } else {
69 @fmt = ('',$1,undef,'');
70 #print STDERR "just one field: $1\n";
71 10 dpavlin }
72 170 dpavlin } else {
73 push @fmt,'';
74 }
75
76 while ($f) {
77 # print STDERR "\n#### $f";
78 # this is EBSCO special to support numeric subfield in
79 # form of 856#3
80 610 dpavlin if ($f =~ s/^(\d\d\d*)#*(\w?)//) {
81 170 dpavlin push @fmt,$1;
82 if ($2) {
83 push @fmt,$2;
84 } else {
85 push @fmt,undef;
86 }
87 # this might be our local scpeciality -- fields 10 and 11
88 # (as opposed to 010 and 011) so they are strictly listed
89 # here
90 187 dpavlin } elsif ($f =~ s/^(1[01]\w?)//) {
91 170 dpavlin push @fmt,$1;
92 push @fmt,undef;
93 } elsif ($f =~ s/^mfn//i) {
94 push @fmt,'mfn';
95 push @fmt,'';
96 } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
97 176 dpavlin # still prefix?
98 if ($#fmt == 0) {
99 $fmt[0] .= $1;
100 } else {
101 push @fmt,$1;
102 }
103 170 dpavlin } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
104 176 dpavlin if ($#fmt == 0) {
105 $fmt[0] .= $1;
106 } else {
107 push @fmt,$1;
108 }
109 170 dpavlin } elsif ($f =~ s/^(\d{1,2})//) {
110 176 dpavlin if ($#fmt == 0) {
111 $fmt[0] .= $1;
112 } else {
113 push @fmt,$1;
114 }
115 170 dpavlin } else {
116 print STDERR "unparsed format: $f\n";
117 $f = "";
118 }
119 }
120 push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
121 263 dpavlin
122 $cache->{format_eval}->{$format} = $eval; # store eval string (if any)
123
124 170 dpavlin $cache->{format}->{$format} = \@fmt;
125
126 187 dpavlin # print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
127 170 dpavlin # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
128 # print STDERR Dumper($cache->{format}->{$format});
129 }
130
131 # now produce actual record
132 my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
133 my @fmt = @{$tmp};
134 # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
135 # print STDERR "tmp ",Dumper($tmp);
136 # print STDERR "cache: ",Dumper($cache->{format}->{$format});
137
138 # prefix
139 my $prefix = shift @fmt;
140 my $sufix;
141 while($#fmt > 1) {
142 my $f = shift @fmt || die "BUG: field name can't be empty!";
143 my $sf = shift @fmt;
144
145 if ($f eq 'mfn' && $i == 0) {
146 $display .= $sufix if ($display);
147 $display .= $row->{mfn};
148 } else {
149 my $val = &$func($row,$f,$sf,$i);
150 if ($val) {
151 # print STDERR "val: $val\n";
152 my $tmp = cnv_cp($codepage,$val);
153 if ($display) {
154 $display .= $sufix.$tmp;
155 } else {
156 $display = $tmp;
157 }
158 57 dpavlin $swish .= $tmp." ";
159 22 dpavlin }
160 10 dpavlin }
161 170 dpavlin $sufix = shift @fmt;
162 10 dpavlin }
163 170 dpavlin $display = $prefix.$display.$sufix if ($display);
164 10 dpavlin
165 263 dpavlin my $eval = $cache->{format_eval}->{$format};
166 if ($eval) {
167 sub fld2str {
168 my ($func,$row,$f,$sf,$i) = @_;
169 #print STDERR "## in fld2str\n";
170 458 dpavlin my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || $codepage->convert(&$func($row,$f,$sf,0)) || '';
171 263 dpavlin return "'$tmp'";
172 }
173 176 dpavlin
174 263 dpavlin $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
175 #print STDERR "## eval: $eval\n";
176 if (eval "$eval") {
177 458 dpavlin die "eval error: eval{$eval}: $@" if ($@);
178 263 dpavlin return ($swish,$display);
179 } else {
180 458 dpavlin die "eval error: eval{$eval}: $@" if ($@);
181 263 dpavlin return (undef,undef);
182 }
183 }
184
185 if (@fmt) {
186 print STDERR "format left unused: [",join("|",@fmt),"]\n";
187 print STDERR "format: [",join("|",@{$tmp}),"]\n";
188 }
189
190 187 dpavlin # print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
191 170 dpavlin
192 10 dpavlin return ($swish,$display);
193 }
194
195 #-------------------------------------------------------------
196 54 dpavlin
197 sub parse_excel_format {
198 my $format = shift;
199 my $row = shift;
200 my $i = shift;
201 489 dpavlin #my $codepage = shift;
202 #
203 # data allready comes in utf-8 due to change in
204 # SpreadSheet::ParseExcel::FmtDefault line 69 from
205 # return pack('C*', unpack('n*', $sTxt));
206 # to following which returns utf-8:
207 # return pack('U*', unpack('n*', $sTxt));
208 #
209 54 dpavlin
210 90 dpavlin return if ($i > 0); # Excel doesn't support repeatable fields
211
212 54 dpavlin my $out;
213 my $out_swish;
214
215 my $prefix = "";
216 if ($format =~ s/^([^A-Z\|]{1,3})//) {
217 $prefix = $1;
218 }
219
220 my $display;
221 my $swish;
222
223 while ($format && length($format) > 0) {
224 #print STDERR "\n#### $format #";
225 if ($format =~ s/^\|([A-Z]{1,2})\|//) {
226 #print STDERR "--$1-> $format -[",length($format),"] ";
227 if ($row->{$1}) {
228 my $tmp = $row->{$1};
229 $display .= $prefix . $tmp;
230 $swish .= $tmp." ";
231 #print STDERR " == $tmp";
232 }
233 $prefix = "";
234 } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
235 $prefix .= $1 if ($display);
236 } else {
237 297 dpavlin #print STDERR "unparsed format: $format\n";
238 54 dpavlin $prefix .= $format;
239 $format = "";
240 }
241 #print STDERR " display: $display swish: $swish [format: $format]";
242 }
243 # add suffix
244 $display .= $prefix if ($display);
245
246 return ($swish,$display);
247 }
248
249 67 dpavlin #-------------------------------------------------------------
250
251 sub parse_feed_format {
252 my $format = shift;
253 my $data = shift;
254 my $i = shift;
255 my $codepage = shift;
256
257 92 dpavlin # XXX feed doesn't support repeatable fields, but they really
258 # should, This is a bug. It should be fixed!
259 return if ($i > 0);
260
261 67 dpavlin my $out;
262 my $out_swish;
263
264 my $prefix = "";
265 if ($format =~ s/^([^\d\|]{1,3})//) {
266 $prefix = $1;
267 }
268
269 my $display;
270 my $swish;
271
272 while ($format && length($format) > 0) {
273 #print STDERR "\n#### $format #";
274 if ($format =~ s/^\|(\d+)\|//) {
275 #print STDERR "--$1-> $format -[",length($format),"] ";
276 if ($data->{$1}) {
277 my $tmp = $data->{$1};
278 if ($codepage) {
279 $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
280 }
281 $display .= $prefix . $tmp;
282 $swish .= $tmp." ";
283 #print STDERR " == $tmp";
284 }
285 $prefix = "";
286 } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
287 $prefix .= $1 if ($display);
288 } else {
289 print STDERR "unparsed format: $format\n";
290 $prefix .= $format;
291 $format = "";
292 }
293 #print STDERR " display: $display swish: $swish [format: $format]";
294 }
295 # add suffix
296 $display .= $prefix if ($display);
297
298 return ($swish,$display);
299 }
300
301 #-------------------------------------------------------------
302
303 10 dpavlin 1;