/[webpac]/trunk/parse_format.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/parse_format.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 488 - (hide annotations)
Wed Sep 29 17:22:24 2004 UTC (16 years, 4 months ago) by dpavlin
File size: 7318 byte(s)
changes to support UTF-8 encoding from
SpreadSheet::ParseExcel::FmtDefault.

You will have to modify line 69 from
	return pack('C*', unpack('n*', $sTxt));
to following which returns utf-8:
	return pack('U*', unpack('n*', $sTxt));


1 dpavlin 10 #-------------------------------------------------------------
2     #
3 dpavlin 54 # parse_format(...)
4 dpavlin 10 #
5    
6     sub parse_format {
7 dpavlin 54 my $type = shift || die "parset_format must be called with type!";
8 dpavlin 43 my $format = shift || die "parse_format must be called with format!";
9     my $row = shift || die "parse_format must be called with row!";
10 dpavlin 23 my $i = shift || 0; # isis repeatable number
11 dpavlin 43 my $codepage = shift || die "parse_format must be called with codepage!";
12 dpavlin 54 if ($type eq "isis") {
13 dpavlin 62 return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
14 dpavlin 54 } elsif ($type eq "excel") {
15     return parse_excel_format($format,$row,$i,$codepage);
16 dpavlin 62 } elsif ($type eq "marc") {
17     return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 dpavlin 67 } elsif ($type eq "feed") {
19     return parse_feed_format($format,$row,$i,$codepage);
20 dpavlin 54 }
21     }
22 dpavlin 10
23 dpavlin 54 #-------------------------------------------------------------
24    
25 dpavlin 62 sub parse_iso_format {
26 dpavlin 54
27     my $format = shift;
28     my $row = shift;
29     my $i = shift;
30     my $codepage = shift;
31    
32 dpavlin 62 my $func = shift || die "need to know which sub-field function to use";
33    
34     require $func.".pm";
35    
36 dpavlin 10 my $out;
37     my $out_swish;
38    
39     my $display;
40     my $swish;
41    
42 dpavlin 62 sub cnv_cp {
43 dpavlin 78 my $codepage = shift;
44     my $tmp = shift || return;
45 dpavlin 62 if ($codepage) {
46 dpavlin 78 $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
47 dpavlin 62 }
48     return $tmp;
49     }
50    
51 dpavlin 170 # if format doesn't exits, store it in cache
52     if (! defined($cache->{format}->{$format})) {
53     # print STDERR "parsing format for '$format'\n";
54     my @fmt;
55    
56     my $f = $format;
57    
58 dpavlin 263 my $eval;
59 dpavlin 384 $eval = $1 if ($f =~ s/^eval{([^}]+?)}//);
60 dpavlin 263
61 dpavlin 170 if ($f =~ s/^([^\d]+)//) {
62     if ($f) { # there is more to parse
63     push @fmt,$1;
64     } else {
65     @fmt = ('',$1,undef,'');
66     #print STDERR "just one field: $1\n";
67 dpavlin 10 }
68 dpavlin 170 } else {
69     push @fmt,'';
70     }
71    
72     while ($f) {
73     # print STDERR "\n#### $f";
74     # this is EBSCO special to support numeric subfield in
75     # form of 856#3
76     if ($f =~ s/^(\d\d\d)#*(\w?)//) {
77     push @fmt,$1;
78     if ($2) {
79     push @fmt,$2;
80     } else {
81     push @fmt,undef;
82     }
83     # this might be our local scpeciality -- fields 10 and 11
84     # (as opposed to 010 and 011) so they are strictly listed
85     # here
86 dpavlin 187 } elsif ($f =~ s/^(1[01]\w?)//) {
87 dpavlin 170 push @fmt,$1;
88     push @fmt,undef;
89     } elsif ($f =~ s/^mfn//i) {
90     push @fmt,'mfn';
91     push @fmt,'';
92     } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
93 dpavlin 176 # still prefix?
94     if ($#fmt == 0) {
95     $fmt[0] .= $1;
96     } else {
97     push @fmt,$1;
98     }
99 dpavlin 170 } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
100 dpavlin 176 if ($#fmt == 0) {
101     $fmt[0] .= $1;
102     } else {
103     push @fmt,$1;
104     }
105 dpavlin 170 } elsif ($f =~ s/^(\d{1,2})//) {
106 dpavlin 176 if ($#fmt == 0) {
107     $fmt[0] .= $1;
108     } else {
109     push @fmt,$1;
110     }
111 dpavlin 170 } else {
112     print STDERR "unparsed format: $f\n";
113     $f = "";
114     }
115     }
116     push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
117 dpavlin 263
118     $cache->{format_eval}->{$format} = $eval; # store eval string (if any)
119    
120 dpavlin 170 $cache->{format}->{$format} = \@fmt;
121    
122 dpavlin 187 # print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
123 dpavlin 170 # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
124     # print STDERR Dumper($cache->{format}->{$format});
125     }
126    
127     # now produce actual record
128     my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
129     my @fmt = @{$tmp};
130     # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
131     # print STDERR "tmp ",Dumper($tmp);
132     # print STDERR "cache: ",Dumper($cache->{format}->{$format});
133    
134     # prefix
135     my $prefix = shift @fmt;
136     my $sufix;
137     while($#fmt > 1) {
138     my $f = shift @fmt || die "BUG: field name can't be empty!";
139     my $sf = shift @fmt;
140    
141     if ($f eq 'mfn' && $i == 0) {
142     $display .= $sufix if ($display);
143     $display .= $row->{mfn};
144     } else {
145     my $val = &$func($row,$f,$sf,$i);
146     if ($val) {
147     # print STDERR "val: $val\n";
148     my $tmp = cnv_cp($codepage,$val);
149     if ($display) {
150     $display .= $sufix.$tmp;
151     } else {
152     $display = $tmp;
153     }
154 dpavlin 57 $swish .= $tmp." ";
155 dpavlin 22 }
156 dpavlin 10 }
157 dpavlin 170 $sufix = shift @fmt;
158 dpavlin 10 }
159 dpavlin 170 $display = $prefix.$display.$sufix if ($display);
160 dpavlin 10
161 dpavlin 263 my $eval = $cache->{format_eval}->{$format};
162     if ($eval) {
163     sub fld2str {
164     my ($func,$row,$f,$sf,$i) = @_;
165     #print STDERR "## in fld2str\n";
166 dpavlin 381 my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || $codepage->convert(&$func($row,$f,$sf,0)) || '';
167 dpavlin 263 return "'$tmp'";
168     }
169 dpavlin 176
170 dpavlin 263 $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
171     #print STDERR "## eval: $eval\n";
172     if (eval "$eval") {
173 dpavlin 381 die "eval error: eval{$eval}: $@" if ($@);
174 dpavlin 263 return ($swish,$display);
175     } else {
176 dpavlin 381 die "eval error: eval{$eval}: $@" if ($@);
177 dpavlin 263 return (undef,undef);
178     }
179     }
180    
181     if (@fmt) {
182     print STDERR "format left unused: [",join("|",@fmt),"]\n";
183     print STDERR "format: [",join("|",@{$tmp}),"]\n";
184     }
185    
186 dpavlin 187 # print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
187 dpavlin 170
188 dpavlin 10 return ($swish,$display);
189     }
190    
191     #-------------------------------------------------------------
192 dpavlin 54
193     sub parse_excel_format {
194     my $format = shift;
195     my $row = shift;
196     my $i = shift;
197 dpavlin 488 #my $codepage = shift;
198     #
199     # data allready comes in utf-8 due to change in
200     # SpreadSheet::ParseExcel::FmtDefault line 69 from
201     # return pack('C*', unpack('n*', $sTxt));
202     # to following which returns utf-8:
203     # return pack('U*', unpack('n*', $sTxt));
204     #
205 dpavlin 54
206 dpavlin 90 return if ($i > 0); # Excel doesn't support repeatable fields
207    
208 dpavlin 54 my $out;
209     my $out_swish;
210    
211     my $prefix = "";
212     if ($format =~ s/^([^A-Z\|]{1,3})//) {
213     $prefix = $1;
214     }
215    
216     my $display;
217     my $swish;
218    
219     while ($format && length($format) > 0) {
220     #print STDERR "\n#### $format #";
221     if ($format =~ s/^\|([A-Z]{1,2})\|//) {
222     #print STDERR "--$1-> $format -[",length($format),"] ";
223     if ($row->{$1}) {
224     my $tmp = $row->{$1};
225     $display .= $prefix . $tmp;
226     $swish .= $tmp." ";
227     #print STDERR " == $tmp";
228     }
229     $prefix = "";
230     } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
231     $prefix .= $1 if ($display);
232     } else {
233 dpavlin 297 #print STDERR "unparsed format: $format\n";
234 dpavlin 54 $prefix .= $format;
235     $format = "";
236     }
237     #print STDERR " display: $display swish: $swish [format: $format]";
238     }
239     # add suffix
240     $display .= $prefix if ($display);
241    
242     return ($swish,$display);
243     }
244    
245 dpavlin 67 #-------------------------------------------------------------
246    
247     sub parse_feed_format {
248     my $format = shift;
249     my $data = shift;
250     my $i = shift;
251     my $codepage = shift;
252    
253 dpavlin 92 # XXX feed doesn't support repeatable fields, but they really
254     # should, This is a bug. It should be fixed!
255     return if ($i > 0);
256    
257 dpavlin 67 my $out;
258     my $out_swish;
259    
260     my $prefix = "";
261     if ($format =~ s/^([^\d\|]{1,3})//) {
262     $prefix = $1;
263     }
264    
265     my $display;
266     my $swish;
267    
268     while ($format && length($format) > 0) {
269     #print STDERR "\n#### $format #";
270     if ($format =~ s/^\|(\d+)\|//) {
271     #print STDERR "--$1-> $format -[",length($format),"] ";
272     if ($data->{$1}) {
273     my $tmp = $data->{$1};
274     if ($codepage) {
275     $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
276     }
277     $display .= $prefix . $tmp;
278     $swish .= $tmp." ";
279     #print STDERR " == $tmp";
280     }
281     $prefix = "";
282     } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
283     $prefix .= $1 if ($display);
284     } else {
285     print STDERR "unparsed format: $format\n";
286     $prefix .= $format;
287     $format = "";
288     }
289     #print STDERR " display: $display swish: $swish [format: $format]";
290     }
291     # add suffix
292     $display .= $prefix if ($display);
293    
294     return ($swish,$display);
295     }
296    
297     #-------------------------------------------------------------
298    
299 dpavlin 10 1;

Properties

Name Value
cvs2svn:cvs-rev 1.17

  ViewVC Help
Powered by ViewVC 1.1.26