/[webpac]/branches/tehnika/parse_format.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /branches/tehnika/parse_format.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 303 - (show annotations)
Sun Apr 4 22:11:13 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 7054 byte(s)
updated branches to HEAD

1 #-------------------------------------------------------------
2 #
3 # parse_format(...)
4 #
5
6 sub parse_format {
7 my $type = shift || die "parset_format must be called with type!";
8 my $format = shift || die "parse_format must be called with format!";
9 my $row = shift || die "parse_format must be called with row!";
10 my $i = shift || 0; # isis repeatable number
11 my $codepage = shift || die "parse_format must be called with codepage!";
12 if ($type eq "isis") {
13 return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
14 } elsif ($type eq "excel") {
15 return parse_excel_format($format,$row,$i,$codepage);
16 } elsif ($type eq "marc") {
17 return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 } elsif ($type eq "feed") {
19 return parse_feed_format($format,$row,$i,$codepage);
20 }
21 }
22
23 #-------------------------------------------------------------
24
25 sub parse_iso_format {
26
27 my $format = shift;
28 my $row = shift;
29 my $i = shift;
30 my $codepage = shift;
31
32 my $func = shift || die "need to know which sub-field function to use";
33
34 require $func.".pm";
35
36 my $out;
37 my $out_swish;
38
39 my $display;
40 my $swish;
41
42 sub cnv_cp {
43 my $codepage = shift;
44 my $tmp = shift || return;
45 if ($codepage) {
46 $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
47 }
48 return $tmp;
49 }
50
51 # if format doesn't exits, store it in cache
52 if (! defined($cache->{format}->{$format})) {
53 # print STDERR "parsing format for '$format'\n";
54 my @fmt;
55
56 my $f = $format;
57
58 my $eval;
59 $eval = $1 if ($f =~ s/^eval{([^}]+)}//);
60
61 if ($f =~ s/^([^\d]+)//) {
62 if ($f) { # there is more to parse
63 push @fmt,$1;
64 } else {
65 @fmt = ('',$1,undef,'');
66 #print STDERR "just one field: $1\n";
67 }
68 } else {
69 push @fmt,'';
70 }
71
72 while ($f) {
73 # print STDERR "\n#### $f";
74 # this is EBSCO special to support numeric subfield in
75 # form of 856#3
76 if ($f =~ s/^(\d\d\d)#*(\w?)//) {
77 push @fmt,$1;
78 if ($2) {
79 push @fmt,$2;
80 } else {
81 push @fmt,undef;
82 }
83 # this might be our local scpeciality -- fields 10 and 11
84 # (as opposed to 010 and 011) so they are strictly listed
85 # here
86 } elsif ($f =~ s/^(1[01]\w?)//) {
87 push @fmt,$1;
88 push @fmt,undef;
89 } elsif ($f =~ s/^mfn//i) {
90 push @fmt,'mfn';
91 push @fmt,'';
92 } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
93 # still prefix?
94 if ($#fmt == 0) {
95 $fmt[0] .= $1;
96 } else {
97 push @fmt,$1;
98 }
99 } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
100 if ($#fmt == 0) {
101 $fmt[0] .= $1;
102 } else {
103 push @fmt,$1;
104 }
105 } elsif ($f =~ s/^(\d{1,2})//) {
106 if ($#fmt == 0) {
107 $fmt[0] .= $1;
108 } else {
109 push @fmt,$1;
110 }
111 } else {
112 print STDERR "unparsed format: $f\n";
113 $f = "";
114 }
115 }
116 push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
117
118 $cache->{format_eval}->{$format} = $eval; # store eval string (if any)
119
120 $cache->{format}->{$format} = \@fmt;
121
122 # print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
123 # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
124 # print STDERR Dumper($cache->{format}->{$format});
125 }
126
127 # now produce actual record
128 my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
129 my @fmt = @{$tmp};
130 # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
131 # print STDERR "tmp ",Dumper($tmp);
132 # print STDERR "cache: ",Dumper($cache->{format}->{$format});
133
134 # prefix
135 my $prefix = shift @fmt;
136 my $sufix;
137 while($#fmt > 1) {
138 my $f = shift @fmt || die "BUG: field name can't be empty!";
139 my $sf = shift @fmt;
140
141 if ($f eq 'mfn' && $i == 0) {
142 $display .= $sufix if ($display);
143 $display .= $row->{mfn};
144 } else {
145 my $val = &$func($row,$f,$sf,$i);
146 if ($val) {
147 # print STDERR "val: $val\n";
148 my $tmp = cnv_cp($codepage,$val);
149 if ($display) {
150 $display .= $sufix.$tmp;
151 } else {
152 $display = $tmp;
153 }
154 $swish .= $tmp." ";
155 }
156 }
157 $sufix = shift @fmt;
158 }
159 $display = $prefix.$display.$sufix if ($display);
160
161 my $eval = $cache->{format_eval}->{$format};
162 if ($eval) {
163 sub fld2str {
164 my ($func,$row,$f,$sf,$i) = @_;
165 #print STDERR "## in fld2str\n";
166 my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || '';
167 return "'$tmp'";
168 }
169
170 $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
171 #print STDERR "## eval: $eval\n";
172 if (eval "$eval") {
173 return ($swish,$display);
174 } else {
175 return (undef,undef);
176 }
177 }
178
179 if (@fmt) {
180 print STDERR "format left unused: [",join("|",@fmt),"]\n";
181 print STDERR "format: [",join("|",@{$tmp}),"]\n";
182 }
183
184 # print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
185
186 return ($swish,$display);
187 }
188
189 #-------------------------------------------------------------
190
191 sub parse_excel_format {
192 my $format = shift;
193 my $row = shift;
194 my $i = shift;
195 my $codepage = shift;
196
197 return if ($i > 0); # Excel doesn't support repeatable fields
198
199 my $out;
200 my $out_swish;
201
202 my $prefix = "";
203 if ($format =~ s/^([^A-Z\|]{1,3})//) {
204 $prefix = $1;
205 }
206
207 my $display;
208 my $swish;
209
210 while ($format && length($format) > 0) {
211 #print STDERR "\n#### $format #";
212 if ($format =~ s/^\|([A-Z]{1,2})\|//) {
213 #print STDERR "--$1-> $format -[",length($format),"] ";
214 if ($row->{$1}) {
215 my $tmp = $row->{$1};
216 if ($codepage) {
217 $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert";
218 }
219 $display .= $prefix . $tmp;
220 $swish .= $tmp." ";
221 #print STDERR " == $tmp";
222 }
223 $prefix = "";
224 } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
225 $prefix .= $1 if ($display);
226 } else {
227 #print STDERR "unparsed format: $format\n";
228 $prefix .= $format;
229 $format = "";
230 }
231 #print STDERR " display: $display swish: $swish [format: $format]";
232 }
233 # add suffix
234 $display .= $prefix if ($display);
235
236 return ($swish,$display);
237 }
238
239 #-------------------------------------------------------------
240
241 sub parse_feed_format {
242 my $format = shift;
243 my $data = shift;
244 my $i = shift;
245 my $codepage = shift;
246
247 # XXX feed doesn't support repeatable fields, but they really
248 # should, This is a bug. It should be fixed!
249 return if ($i > 0);
250
251 my $out;
252 my $out_swish;
253
254 my $prefix = "";
255 if ($format =~ s/^([^\d\|]{1,3})//) {
256 $prefix = $1;
257 }
258
259 my $display;
260 my $swish;
261
262 while ($format && length($format) > 0) {
263 #print STDERR "\n#### $format #";
264 if ($format =~ s/^\|(\d+)\|//) {
265 #print STDERR "--$1-> $format -[",length($format),"] ";
266 if ($data->{$1}) {
267 my $tmp = $data->{$1};
268 if ($codepage) {
269 $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
270 }
271 $display .= $prefix . $tmp;
272 $swish .= $tmp." ";
273 #print STDERR " == $tmp";
274 }
275 $prefix = "";
276 } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
277 $prefix .= $1 if ($display);
278 } else {
279 print STDERR "unparsed format: $format\n";
280 $prefix .= $format;
281 $format = "";
282 }
283 #print STDERR " display: $display swish: $swish [format: $format]";
284 }
285 # add suffix
286 $display .= $prefix if ($display);
287
288 return ($swish,$display);
289 }
290
291 #-------------------------------------------------------------
292
293 1;

Properties

Name Value
cvs2svn:cvs-rev 1.17

  ViewVC Help
Powered by ViewVC 1.1.26