/[webpac]/branches/humanistika/parse_format.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/humanistika/parse_format.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 187 - (hide annotations)
Sat Nov 29 18:58:34 2003 UTC (20 years, 4 months ago) by dpavlin
Original Path: trunk/parse_format.pm
File size: 6502 byte(s)
support for subfields in fields 10/11

1 dpavlin 10 #-------------------------------------------------------------
2     #
3 dpavlin 54 # parse_format(...)
4 dpavlin 10 #
5    
6     sub parse_format {
7 dpavlin 54 my $type = shift || die "parset_format must be called with type!";
8 dpavlin 43 my $format = shift || die "parse_format must be called with format!";
9     my $row = shift || die "parse_format must be called with row!";
10 dpavlin 23 my $i = shift || 0; # isis repeatable number
11 dpavlin 43 my $codepage = shift || die "parse_format must be called with codepage!";
12 dpavlin 54 if ($type eq "isis") {
13 dpavlin 62 return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
14 dpavlin 54 } elsif ($type eq "excel") {
15     return parse_excel_format($format,$row,$i,$codepage);
16 dpavlin 62 } elsif ($type eq "marc") {
17     return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 dpavlin 67 } elsif ($type eq "feed") {
19     return parse_feed_format($format,$row,$i,$codepage);
20 dpavlin 54 }
21     }
22 dpavlin 10
23 dpavlin 54 #-------------------------------------------------------------
24    
25 dpavlin 62 sub parse_iso_format {
26 dpavlin 54
27     my $format = shift;
28     my $row = shift;
29     my $i = shift;
30     my $codepage = shift;
31    
32 dpavlin 62 my $func = shift || die "need to know which sub-field function to use";
33    
34     require $func.".pm";
35    
36 dpavlin 10 my $out;
37     my $out_swish;
38    
39     my $display;
40     my $swish;
41    
42 dpavlin 62 sub cnv_cp {
43 dpavlin 78 my $codepage = shift;
44     my $tmp = shift || return;
45 dpavlin 62 if ($codepage) {
46 dpavlin 78 $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
47 dpavlin 62 }
48     return $tmp;
49     }
50    
51 dpavlin 170 # if format doesn't exits, store it in cache
52     if (! defined($cache->{format}->{$format})) {
53     # print STDERR "parsing format for '$format'\n";
54     my @fmt;
55    
56     my $f = $format;
57    
58     if ($f =~ s/^([^\d]+)//) {
59     if ($f) { # there is more to parse
60     push @fmt,$1;
61     } else {
62     @fmt = ('',$1,undef,'');
63     #print STDERR "just one field: $1\n";
64 dpavlin 10 }
65 dpavlin 170 } else {
66     push @fmt,'';
67     }
68    
69     while ($f) {
70     # print STDERR "\n#### $f";
71     # this is EBSCO special to support numeric subfield in
72     # form of 856#3
73     if ($f =~ s/^(\d\d\d)#*(\w?)//) {
74     push @fmt,$1;
75     if ($2) {
76     push @fmt,$2;
77     } else {
78     push @fmt,undef;
79     }
80     # this might be our local scpeciality -- fields 10 and 11
81     # (as opposed to 010 and 011) so they are strictly listed
82     # here
83 dpavlin 187 } elsif ($f =~ s/^(1[01]\w?)//) {
84 dpavlin 170 push @fmt,$1;
85     push @fmt,undef;
86     } elsif ($f =~ s/^mfn//i) {
87     push @fmt,'mfn';
88     push @fmt,'';
89     } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
90 dpavlin 176 # still prefix?
91     if ($#fmt == 0) {
92     $fmt[0] .= $1;
93     } else {
94     push @fmt,$1;
95     }
96 dpavlin 170 } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
97 dpavlin 176 if ($#fmt == 0) {
98     $fmt[0] .= $1;
99     } else {
100     push @fmt,$1;
101     }
102 dpavlin 170 } elsif ($f =~ s/^(\d{1,2})//) {
103 dpavlin 176 if ($#fmt == 0) {
104     $fmt[0] .= $1;
105     } else {
106     push @fmt,$1;
107     }
108 dpavlin 170 } else {
109     print STDERR "unparsed format: $f\n";
110     $f = "";
111     }
112     }
113     push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
114     $cache->{format}->{$format} = \@fmt;
115    
116 dpavlin 187 # print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
117 dpavlin 170 # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
118     # print STDERR Dumper($cache->{format}->{$format});
119     }
120    
121     # now produce actual record
122     my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
123     my @fmt = @{$tmp};
124     # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
125     # print STDERR "tmp ",Dumper($tmp);
126     # print STDERR "cache: ",Dumper($cache->{format}->{$format});
127    
128     # prefix
129     my $prefix = shift @fmt;
130     my $sufix;
131     while($#fmt > 1) {
132     my $f = shift @fmt || die "BUG: field name can't be empty!";
133     my $sf = shift @fmt;
134    
135     if ($f eq 'mfn' && $i == 0) {
136     $display .= $sufix if ($display);
137     $display .= $row->{mfn};
138     } else {
139     my $val = &$func($row,$f,$sf,$i);
140     if ($val) {
141     # print STDERR "val: $val\n";
142     my $tmp = cnv_cp($codepage,$val);
143     if ($display) {
144     $display .= $sufix.$tmp;
145     } else {
146     $display = $tmp;
147     }
148 dpavlin 57 $swish .= $tmp." ";
149 dpavlin 22 }
150 dpavlin 10 }
151 dpavlin 170 $sufix = shift @fmt;
152 dpavlin 10 }
153 dpavlin 170 $display = $prefix.$display.$sufix if ($display);
154     print STDERR "format left unused: [",join("|",@fmt),"]\n" if (@fmt);
155 dpavlin 10
156 dpavlin 176 print STDERR "format: [",join("|",@{$tmp}),"]\n" if (@fmt);
157    
158 dpavlin 187 # print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
159 dpavlin 170
160 dpavlin 10 return ($swish,$display);
161     }
162    
163     #-------------------------------------------------------------
164 dpavlin 54
165     sub parse_excel_format {
166     my $format = shift;
167     my $row = shift;
168     my $i = shift;
169     my $codepage = shift;
170    
171 dpavlin 90 return if ($i > 0); # Excel doesn't support repeatable fields
172    
173 dpavlin 54 my $out;
174     my $out_swish;
175    
176     my $prefix = "";
177     if ($format =~ s/^([^A-Z\|]{1,3})//) {
178     $prefix = $1;
179     }
180    
181     my $display;
182     my $swish;
183    
184     while ($format && length($format) > 0) {
185     #print STDERR "\n#### $format #";
186     if ($format =~ s/^\|([A-Z]{1,2})\|//) {
187     #print STDERR "--$1-> $format -[",length($format),"] ";
188     if ($row->{$1}) {
189     my $tmp = $row->{$1};
190 dpavlin 57 if ($codepage) {
191     $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert";
192     }
193 dpavlin 54 $display .= $prefix . $tmp;
194     $swish .= $tmp." ";
195     #print STDERR " == $tmp";
196     }
197     $prefix = "";
198     } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
199     $prefix .= $1 if ($display);
200     } else {
201     print STDERR "unparsed format: $format\n";
202     $prefix .= $format;
203     $format = "";
204     }
205     #print STDERR " display: $display swish: $swish [format: $format]";
206     }
207     # add suffix
208     $display .= $prefix if ($display);
209    
210     return ($swish,$display);
211     }
212    
213 dpavlin 67 #-------------------------------------------------------------
214    
215     sub parse_feed_format {
216     my $format = shift;
217     my $data = shift;
218     my $i = shift;
219     my $codepage = shift;
220    
221 dpavlin 92 # XXX feed doesn't support repeatable fields, but they really
222     # should, This is a bug. It should be fixed!
223     return if ($i > 0);
224    
225 dpavlin 67 my $out;
226     my $out_swish;
227    
228     my $prefix = "";
229     if ($format =~ s/^([^\d\|]{1,3})//) {
230     $prefix = $1;
231     }
232    
233     my $display;
234     my $swish;
235    
236     while ($format && length($format) > 0) {
237     #print STDERR "\n#### $format #";
238     if ($format =~ s/^\|(\d+)\|//) {
239     #print STDERR "--$1-> $format -[",length($format),"] ";
240     if ($data->{$1}) {
241     my $tmp = $data->{$1};
242     if ($codepage) {
243     $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
244     }
245     $display .= $prefix . $tmp;
246     $swish .= $tmp." ";
247     #print STDERR " == $tmp";
248     }
249     $prefix = "";
250     } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
251     $prefix .= $1 if ($display);
252     } else {
253     print STDERR "unparsed format: $format\n";
254     $prefix .= $format;
255     $format = "";
256     }
257     #print STDERR " display: $display swish: $swish [format: $format]";
258     }
259     # add suffix
260     $display .= $prefix if ($display);
261    
262     return ($swish,$display);
263     }
264    
265     #-------------------------------------------------------------
266    
267 dpavlin 10 1;

Properties

Name Value
cvs2svn:cvs-rev 1.17

  ViewVC Help
Powered by ViewVC 1.1.26