/[webpac]/trunk/parse_format.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/parse_format.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 678 - (hide annotations)
Sun Feb 27 23:07:35 2005 UTC (14 years, 9 months ago) by dpavlin
File size: 7461 byte(s)
Experimental support for dBase .dbf files. Usege like this in all2xml.conf:

[hda]
       dbf_file=/data/drustvene/hda/ISO.DBF
       type=dbf
       dbf_codepage=cp852
       dbf_mapping=<<_END_OF_MAP_
ID_BROJ                001
ISBN_BROJ      010
SKUPINA1       200
SKUPINA2       205
SKUPINA4       210
SKUPINA5       215
SKUPINA6       225
SKUPINA7       300
ANOTACIJA      330
PREDMET1       610
PREDMET2       610
PREDMET3       510
UDK            675
REDALICA       700
SIGNATURA      990
_END_OF_MAP_

dbf type will use <isis> tag in import_xml and dbf_codepage will
override codepage specified in import_xml file.

Small code refactoring.


1 dpavlin 10 #-------------------------------------------------------------
2     #
3 dpavlin 54 # parse_format(...)
4 dpavlin 10 #
5    
6     sub parse_format {
7 dpavlin 54 my $type = shift || die "parset_format must be called with type!";
8 dpavlin 43 my $format = shift || die "parse_format must be called with format!";
9     my $row = shift || die "parse_format must be called with row!";
10 dpavlin 23 my $i = shift || 0; # isis repeatable number
11 dpavlin 43 my $codepage = shift || die "parse_format must be called with codepage!";
12 dpavlin 54 if ($type eq "isis") {
13 dpavlin 678 return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
14 dpavlin 54 } elsif ($type eq "excel") {
15     return parse_excel_format($format,$row,$i,$codepage);
16 dpavlin 62 } elsif ($type eq "marc") {
17     return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 dpavlin 67 } elsif ($type eq "feed") {
19     return parse_feed_format($format,$row,$i,$codepage);
20 dpavlin 678 } elsif ($type eq "dbf") {
21     return parse_iso_format($format,$row,$i,$codepage,'hash_sf');
22     } else {
23     confess "FATAL: unknown type '$type'";
24 dpavlin 54 }
25     }
26 dpavlin 10
27 dpavlin 54 #-------------------------------------------------------------
28    
29 dpavlin 62 sub parse_iso_format {
30 dpavlin 54
31     my $format = shift;
32     my $row = shift;
33     my $i = shift;
34     my $codepage = shift;
35    
36 dpavlin 62 my $func = shift || die "need to know which sub-field function to use";
37    
38     require $func.".pm";
39    
40 dpavlin 10 my $out;
41     my $out_swish;
42    
43     my $display;
44     my $swish;
45    
46 dpavlin 62 sub cnv_cp {
47 dpavlin 78 my $codepage = shift;
48     my $tmp = shift || return;
49 dpavlin 62 if ($codepage) {
50 dpavlin 78 $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
51 dpavlin 62 }
52     return $tmp;
53     }
54    
55 dpavlin 170 # if format doesn't exits, store it in cache
56     if (! defined($cache->{format}->{$format})) {
57     # print STDERR "parsing format for '$format'\n";
58     my @fmt;
59    
60     my $f = $format;
61    
62 dpavlin 263 my $eval;
63 dpavlin 384 $eval = $1 if ($f =~ s/^eval{([^}]+?)}//);
64 dpavlin 263
65 dpavlin 170 if ($f =~ s/^([^\d]+)//) {
66     if ($f) { # there is more to parse
67     push @fmt,$1;
68     } else {
69     @fmt = ('',$1,undef,'');
70     #print STDERR "just one field: $1\n";
71 dpavlin 10 }
72 dpavlin 170 } else {
73     push @fmt,'';
74     }
75    
76     while ($f) {
77     # print STDERR "\n#### $f";
78     # this is EBSCO special to support numeric subfield in
79     # form of 856#3
80     if ($f =~ s/^(\d\d\d)#*(\w?)//) {
81     push @fmt,$1;
82     if ($2) {
83     push @fmt,$2;
84     } else {
85     push @fmt,undef;
86     }
87     # this might be our local scpeciality -- fields 10 and 11
88     # (as opposed to 010 and 011) so they are strictly listed
89     # here
90 dpavlin 187 } elsif ($f =~ s/^(1[01]\w?)//) {
91 dpavlin 170 push @fmt,$1;
92     push @fmt,undef;
93     } elsif ($f =~ s/^mfn//i) {
94     push @fmt,'mfn';
95     push @fmt,'';
96     } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
97 dpavlin 176 # still prefix?
98     if ($#fmt == 0) {
99     $fmt[0] .= $1;
100     } else {
101     push @fmt,$1;
102     }
103 dpavlin 170 } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
104 dpavlin 176 if ($#fmt == 0) {
105     $fmt[0] .= $1;
106     } else {
107     push @fmt,$1;
108     }
109 dpavlin 170 } elsif ($f =~ s/^(\d{1,2})//) {
110 dpavlin 176 if ($#fmt == 0) {
111     $fmt[0] .= $1;
112     } else {
113     push @fmt,$1;
114     }
115 dpavlin 170 } else {
116     print STDERR "unparsed format: $f\n";
117     $f = "";
118     }
119     }
120     push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
121 dpavlin 263
122     $cache->{format_eval}->{$format} = $eval; # store eval string (if any)
123    
124 dpavlin 170 $cache->{format}->{$format} = \@fmt;
125    
126 dpavlin 187 # print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
127 dpavlin 170 # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
128     # print STDERR Dumper($cache->{format}->{$format});
129     }
130    
131     # now produce actual record
132     my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
133     my @fmt = @{$tmp};
134     # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
135     # print STDERR "tmp ",Dumper($tmp);
136     # print STDERR "cache: ",Dumper($cache->{format}->{$format});
137    
138     # prefix
139     my $prefix = shift @fmt;
140     my $sufix;
141     while($#fmt > 1) {
142     my $f = shift @fmt || die "BUG: field name can't be empty!";
143     my $sf = shift @fmt;
144    
145     if ($f eq 'mfn' && $i == 0) {
146     $display .= $sufix if ($display);
147     $display .= $row->{mfn};
148     } else {
149     my $val = &$func($row,$f,$sf,$i);
150     if ($val) {
151     # print STDERR "val: $val\n";
152     my $tmp = cnv_cp($codepage,$val);
153     if ($display) {
154     $display .= $sufix.$tmp;
155     } else {
156     $display = $tmp;
157     }
158 dpavlin 57 $swish .= $tmp." ";
159 dpavlin 22 }
160 dpavlin 10 }
161 dpavlin 170 $sufix = shift @fmt;
162 dpavlin 10 }
163 dpavlin 170 $display = $prefix.$display.$sufix if ($display);
164 dpavlin 10
165 dpavlin 263 my $eval = $cache->{format_eval}->{$format};
166     if ($eval) {
167     sub fld2str {
168     my ($func,$row,$f,$sf,$i) = @_;
169     #print STDERR "## in fld2str\n";
170 dpavlin 381 my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || $codepage->convert(&$func($row,$f,$sf,0)) || '';
171 dpavlin 263 return "'$tmp'";
172     }
173 dpavlin 176
174 dpavlin 263 $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
175     #print STDERR "## eval: $eval\n";
176     if (eval "$eval") {
177 dpavlin 381 die "eval error: eval{$eval}: $@" if ($@);
178 dpavlin 263 return ($swish,$display);
179     } else {
180 dpavlin 381 die "eval error: eval{$eval}: $@" if ($@);
181 dpavlin 263 return (undef,undef);
182     }
183     }
184    
185     if (@fmt) {
186     print STDERR "format left unused: [",join("|",@fmt),"]\n";
187     print STDERR "format: [",join("|",@{$tmp}),"]\n";
188     }
189    
190 dpavlin 187 # print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
191 dpavlin 170
192 dpavlin 10 return ($swish,$display);
193     }
194    
195     #-------------------------------------------------------------
196 dpavlin 54
197     sub parse_excel_format {
198     my $format = shift;
199     my $row = shift;
200     my $i = shift;
201 dpavlin 488 #my $codepage = shift;
202     #
203     # data allready comes in utf-8 due to change in
204     # SpreadSheet::ParseExcel::FmtDefault line 69 from
205     # return pack('C*', unpack('n*', $sTxt));
206     # to following which returns utf-8:
207     # return pack('U*', unpack('n*', $sTxt));
208     #
209 dpavlin 54
210 dpavlin 90 return if ($i > 0); # Excel doesn't support repeatable fields
211    
212 dpavlin 54 my $out;
213     my $out_swish;
214    
215     my $prefix = "";
216     if ($format =~ s/^([^A-Z\|]{1,3})//) {
217     $prefix = $1;
218     }
219    
220     my $display;
221     my $swish;
222    
223     while ($format && length($format) > 0) {
224     #print STDERR "\n#### $format #";
225     if ($format =~ s/^\|([A-Z]{1,2})\|//) {
226     #print STDERR "--$1-> $format -[",length($format),"] ";
227     if ($row->{$1}) {
228     my $tmp = $row->{$1};
229     $display .= $prefix . $tmp;
230     $swish .= $tmp." ";
231     #print STDERR " == $tmp";
232     }
233     $prefix = "";
234     } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
235     $prefix .= $1 if ($display);
236     } else {
237 dpavlin 297 #print STDERR "unparsed format: $format\n";
238 dpavlin 54 $prefix .= $format;
239     $format = "";
240     }
241     #print STDERR " display: $display swish: $swish [format: $format]";
242     }
243     # add suffix
244     $display .= $prefix if ($display);
245    
246     return ($swish,$display);
247     }
248    
249 dpavlin 67 #-------------------------------------------------------------
250    
251     sub parse_feed_format {
252     my $format = shift;
253     my $data = shift;
254     my $i = shift;
255     my $codepage = shift;
256    
257 dpavlin 92 # XXX feed doesn't support repeatable fields, but they really
258     # should, This is a bug. It should be fixed!
259     return if ($i > 0);
260    
261 dpavlin 67 my $out;
262     my $out_swish;
263    
264     my $prefix = "";
265     if ($format =~ s/^([^\d\|]{1,3})//) {
266     $prefix = $1;
267     }
268    
269     my $display;
270     my $swish;
271    
272     while ($format && length($format) > 0) {
273     #print STDERR "\n#### $format #";
274     if ($format =~ s/^\|(\d+)\|//) {
275     #print STDERR "--$1-> $format -[",length($format),"] ";
276     if ($data->{$1}) {
277     my $tmp = $data->{$1};
278     if ($codepage) {
279     $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
280     }
281     $display .= $prefix . $tmp;
282     $swish .= $tmp." ";
283     #print STDERR " == $tmp";
284     }
285     $prefix = "";
286     } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
287     $prefix .= $1 if ($display);
288     } else {
289     print STDERR "unparsed format: $format\n";
290     $prefix .= $format;
291     $format = "";
292     }
293     #print STDERR " display: $display swish: $swish [format: $format]";
294     }
295     # add suffix
296     $display .= $prefix if ($display);
297    
298     return ($swish,$display);
299     }
300    
301     #-------------------------------------------------------------
302    
303 dpavlin 10 1;

Properties

Name Value
cvs2svn:cvs-rev 1.17

  ViewVC Help
Powered by ViewVC 1.1.26