/[webpac]/trunk/parse_format.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/parse_format.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 170 - (hide annotations)
Sun Nov 23 15:42:16 2003 UTC (17 years ago) by dpavlin
File size: 6179 byte(s)
Re-wrote parsing for ISO-type data (isis, marc) to use in-memory cache of
format... 10% speed improvement and cleaner code. Include filter functions
just once.

1 dpavlin 10 #-------------------------------------------------------------
2     #
3 dpavlin 54 # parse_format(...)
4 dpavlin 10 #
5    
6     sub parse_format {
7 dpavlin 54 my $type = shift || die "parset_format must be called with type!";
8 dpavlin 43 my $format = shift || die "parse_format must be called with format!";
9     my $row = shift || die "parse_format must be called with row!";
10 dpavlin 23 my $i = shift || 0; # isis repeatable number
11 dpavlin 43 my $codepage = shift || die "parse_format must be called with codepage!";
12 dpavlin 54 if ($type eq "isis") {
13 dpavlin 62 return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
14 dpavlin 54 } elsif ($type eq "excel") {
15     return parse_excel_format($format,$row,$i,$codepage);
16 dpavlin 62 } elsif ($type eq "marc") {
17     return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 dpavlin 67 } elsif ($type eq "feed") {
19     return parse_feed_format($format,$row,$i,$codepage);
20 dpavlin 54 }
21     }
22 dpavlin 10
23 dpavlin 54 #-------------------------------------------------------------
24    
25 dpavlin 62 sub parse_iso_format {
26 dpavlin 54
27     my $format = shift;
28     my $row = shift;
29     my $i = shift;
30     my $codepage = shift;
31    
32 dpavlin 62 my $func = shift || die "need to know which sub-field function to use";
33    
34     require $func.".pm";
35    
36 dpavlin 10 my $out;
37     my $out_swish;
38    
39     my $display;
40     my $swish;
41    
42 dpavlin 62 sub cnv_cp {
43 dpavlin 78 my $codepage = shift;
44     my $tmp = shift || return;
45 dpavlin 62 if ($codepage) {
46 dpavlin 78 $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
47 dpavlin 62 }
48     return $tmp;
49     }
50    
51 dpavlin 170 # if format doesn't exits, store it in cache
52     if (! defined($cache->{format}->{$format})) {
53     # print STDERR "parsing format for '$format'\n";
54     my @fmt;
55    
56     my $f = $format;
57    
58     if ($f =~ s/^([^\d]+)//) {
59     if ($f) { # there is more to parse
60     push @fmt,$1;
61     } else {
62     @fmt = ('',$1,undef,'');
63     #print STDERR "just one field: $1\n";
64 dpavlin 10 }
65 dpavlin 170 } else {
66     push @fmt,'';
67     }
68    
69     while ($f) {
70     # print STDERR "\n#### $f";
71     # this is EBSCO special to support numeric subfield in
72     # form of 856#3
73     if ($f =~ s/^(\d\d\d)#*(\w?)//) {
74     push @fmt,$1;
75     if ($2) {
76     push @fmt,$2;
77     } else {
78     push @fmt,undef;
79     }
80     # this might be our local scpeciality -- fields 10 and 11
81     # (as opposed to 010 and 011) so they are strictly listed
82     # here
83     } elsif ($f =~ s/^(1[01])//) {
84     push @fmt,$1;
85     push @fmt,undef;
86     } elsif ($f =~ s/^mfn//i) {
87     push @fmt,'mfn';
88     push @fmt,'';
89     } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
90     push @fmt,$1;
91     } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
92     push @fmt,$1;
93     } elsif ($f =~ s/^(\d{1,2})//) {
94     push @fmt,$1;
95     } else {
96     print STDERR "unparsed format: $f\n";
97     $f = "";
98     }
99     }
100     push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
101     $cache->{format}->{$format} = \@fmt;
102    
103     # print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
104     # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
105     # print STDERR Dumper($cache->{format}->{$format});
106     }
107    
108     # now produce actual record
109     my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
110     my @fmt = @{$tmp};
111     # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
112     # print STDERR "tmp ",Dumper($tmp);
113     # print STDERR "cache: ",Dumper($cache->{format}->{$format});
114    
115     # prefix
116     my $prefix = shift @fmt;
117     my $sufix;
118     while($#fmt > 1) {
119     my $f = shift @fmt || die "BUG: field name can't be empty!";
120     my $sf = shift @fmt;
121    
122     if ($f eq 'mfn' && $i == 0) {
123     $display .= $sufix if ($display);
124     $display .= $row->{mfn};
125     } else {
126     my $val = &$func($row,$f,$sf,$i);
127     if ($val) {
128     # print STDERR "val: $val\n";
129     my $tmp = cnv_cp($codepage,$val);
130     if ($display) {
131     $display .= $sufix.$tmp;
132     } else {
133     $display = $tmp;
134     }
135 dpavlin 57 $swish .= $tmp." ";
136 dpavlin 22 }
137 dpavlin 10 }
138 dpavlin 170 $sufix = shift @fmt;
139 dpavlin 10 }
140 dpavlin 170 $display = $prefix.$display.$sufix if ($display);
141     print STDERR "format left unused: [",join("|",@fmt),"]\n" if (@fmt);
142 dpavlin 10
143 dpavlin 170 # print STDERR "display: $display swish: $swish\n";
144    
145 dpavlin 10 return ($swish,$display);
146     }
147    
148     #-------------------------------------------------------------
149 dpavlin 54
150     sub parse_excel_format {
151     my $format = shift;
152     my $row = shift;
153     my $i = shift;
154     my $codepage = shift;
155    
156 dpavlin 90 return if ($i > 0); # Excel doesn't support repeatable fields
157    
158 dpavlin 54 my $out;
159     my $out_swish;
160    
161     my $prefix = "";
162     if ($format =~ s/^([^A-Z\|]{1,3})//) {
163     $prefix = $1;
164     }
165    
166     my $display;
167     my $swish;
168    
169     while ($format && length($format) > 0) {
170     #print STDERR "\n#### $format #";
171     if ($format =~ s/^\|([A-Z]{1,2})\|//) {
172     #print STDERR "--$1-> $format -[",length($format),"] ";
173     if ($row->{$1}) {
174     my $tmp = $row->{$1};
175 dpavlin 57 if ($codepage) {
176     $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert";
177     }
178 dpavlin 54 $display .= $prefix . $tmp;
179     $swish .= $tmp." ";
180     #print STDERR " == $tmp";
181     }
182     $prefix = "";
183     } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
184     $prefix .= $1 if ($display);
185     } else {
186     print STDERR "unparsed format: $format\n";
187     $prefix .= $format;
188     $format = "";
189     }
190     #print STDERR " display: $display swish: $swish [format: $format]";
191     }
192     # add suffix
193     $display .= $prefix if ($display);
194    
195     return ($swish,$display);
196     }
197    
198 dpavlin 67 #-------------------------------------------------------------
199    
200     sub parse_feed_format {
201     my $format = shift;
202     my $data = shift;
203     my $i = shift;
204     my $codepage = shift;
205    
206 dpavlin 92 # XXX feed doesn't support repeatable fields, but they really
207     # should, This is a bug. It should be fixed!
208     return if ($i > 0);
209    
210 dpavlin 67 my $out;
211     my $out_swish;
212    
213     my $prefix = "";
214     if ($format =~ s/^([^\d\|]{1,3})//) {
215     $prefix = $1;
216     }
217    
218     my $display;
219     my $swish;
220    
221     while ($format && length($format) > 0) {
222     #print STDERR "\n#### $format #";
223     if ($format =~ s/^\|(\d+)\|//) {
224     #print STDERR "--$1-> $format -[",length($format),"] ";
225     if ($data->{$1}) {
226     my $tmp = $data->{$1};
227     if ($codepage) {
228     $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
229     }
230     $display .= $prefix . $tmp;
231     $swish .= $tmp." ";
232     #print STDERR " == $tmp";
233     }
234     $prefix = "";
235     } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
236     $prefix .= $1 if ($display);
237     } else {
238     print STDERR "unparsed format: $format\n";
239     $prefix .= $format;
240     $format = "";
241     }
242     #print STDERR " display: $display swish: $swish [format: $format]";
243     }
244     # add suffix
245     $display .= $prefix if ($display);
246    
247     return ($swish,$display);
248     }
249    
250     #-------------------------------------------------------------
251    
252 dpavlin 10 1;

Properties

Name Value
cvs2svn:cvs-rev 1.15

  ViewVC Help
Powered by ViewVC 1.1.26