/[webpac]/branches/ffzg/all2xml.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/ffzg/all2xml.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4 - (hide annotations)
Sun Dec 1 22:51:29 2002 UTC (21 years, 3 months ago) by dpavlin
Original Path: trunk/all2xml.pl
File MIME type: text/plain
File size: 5065 byte(s)
remove subfield definition from values which are displayed and indexed

1 dpavlin 1 #!/usr/bin/perl -w
2    
3     use strict;
4     use OpenIsis;
5     use Getopt::Std;
6     use Data::Dumper;
7     use XML::Simple;
8 dpavlin 3 use Text::Unaccent;
9     require Unicode::Map8;
10 dpavlin 1
11 dpavlin 3 my $config=XMLin(undef, forcearray => [ 'isis' ], forcecontent => 1);
12 dpavlin 1
13     my %opts;
14    
15     getopts('d:m:q', \%opts);
16    
17 dpavlin 3 my $db_dir = $opts{d} || "ps"; # FIX
18 dpavlin 1
19 dpavlin 3 #die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
20 dpavlin 1
21 dpavlin 3 #print Dumper($config->{indexer});
22     #print "-" x 70,"\n";
23 dpavlin 1
24 dpavlin 3 # how to convert isis code page to UTF8?
25     my $isis_map = Unicode::Map8->new($config->{isis_codepage}) || die;
26    
27     sub isis2xml {
28    
29     my $row = shift @_;
30    
31     my $xml;
32    
33     sub isis_sf {
34     my $row = shift @_;
35     my $isis_id = shift @_;
36     my $subfield = shift @_;
37     if ($row->{$isis_id}->[0]) {
38     my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
39     if (! defined $subfield || length($subfield) == 0) {
40     # subfield list undef, empty or no defined subfields for this record
41 dpavlin 4 my $all_sf = $row->{$isis_id}->[0];
42     $all_sf =~ s/\^./ /g; nuke definirions
43     return $all_sf;
44 dpavlin 3 } elsif ($sf->{$subfield}) {
45     return $sf->{$subfield};
46     }
47     }
48     }
49    
50     foreach my $field (keys %{$config->{indexer}}) {
51    
52     my $display_data = "";
53     my $index_data = "";
54    
55     foreach my $x (@{$config->{indexer}->{$field}->{isis}}) {
56    
57     my $display_tmp = "";
58     my $index_tmp = "";
59    
60     my $format = $x->{content};
61     my $i = 1; # index only
62     my $d = 1; # display only
63     $i = 0 if (lc($x->{type}) eq "display");
64     $d = 0 if (lc($x->{type}) eq "index");
65     #print "## i: $i d: $d ## $format ##";
66     # parse format
67     my $prefix = "";
68     if ($format =~ s/^([^\d]+)//) {
69     $prefix = $1;
70     }
71     while ($format) {
72     if ($format =~ s/^(\d\d\d)(\w?)//) {
73     my $isis_tmp = isis_sf($row,$1,$2);
74     if ($isis_tmp) {
75     # $display_tmp .= $prefix . "/$1/$2/".$isis_tmp if ($d);
76     $display_tmp .= $prefix . $isis_tmp if ($d);
77     $index_tmp .= $isis_tmp." " if ($i);
78     #print " $isis_tmp <--\n";
79     }
80     $prefix = "";
81     } elsif ($format =~ s/^([^\d]+)//) {
82     $prefix = $1;
83     } else {
84     print STDERR "WARNING: unparsed format '$format'\n";
85     last;
86     };
87     }
88     # add suffix
89     $display_tmp .= $prefix if ($display_tmp);
90    
91     # $display_data .= $display_tmp if ($display_tmp ne "");
92     # $index_data .= $index_tmp if ($index_tmp ne "");
93     $display_data .= $display_tmp;
94     $index_data .= $index_tmp;
95    
96     }
97     #print "--display:$display_data\n--index:$index_data\n";
98     $xml->{$field}->{display} .= $isis_map->tou($display_data)->utf8 if ($display_data);
99     $xml->{$field}->{index} .= unac_string($config->{isis_codepage},$index_data) if ($index_data);
100    
101     }
102     if ($xml) {
103     return XMLout($xml, rootname => 'xml', noattr => 1 );
104     } else {
105     return;
106     }
107     }
108    
109     ##########################################################################
110    
111 dpavlin 1 my $last_tell=0;
112    
113     my @isis_dirs = ( '.' ); # use dirname as database name
114    
115     if ($opts{m}) {
116     @isis_dirs = split(/,/,$opts{m});
117     }
118    
119     my @isis_dbs;
120    
121     foreach (@isis_dirs) {
122 dpavlin 3 if (-e $config->{isis_data}."/$db_dir/$_/LIBRI") {
123     push @isis_dbs,$config->{isis_data}."/$db_dir/$_/LIBRI/LIBRI";
124 dpavlin 1 }
125 dpavlin 3 if (-e $config->{isis_data}."/$db_dir/$_/PERI") {
126     push @isis_dbs,$config->{isis_data}."/$db_dir/$_/PERI/PERI";
127 dpavlin 1 }
128 dpavlin 3 if (-e $config->{isis_data}."/$db_dir/$_/AMS") {
129     push @isis_dbs,$config->{isis_data}."/$db_dir/$_/AMS/AMS";
130 dpavlin 1 }
131 dpavlin 3 if (-e $config->{isis_data}."/$db_dir/$_/ARTI") {
132     # push @isis_dbs,$config->{isis_data}."/$db_dir/$_/ARTI/ARTI";
133 dpavlin 1 }
134     }
135    
136 dpavlin 3 print STDERR "FATAL: Can't find isis database.\nPerhaps isis_data (".$config->{isis_data}.") has wrong value?\n" if (! @isis_dbs);
137    
138     my $db;
139    
140 dpavlin 1 foreach my $isis_db (@isis_dbs) {
141    
142    
143 dpavlin 3 my $db = OpenIsis::open( $isis_db );
144     if (0) {
145     # # FIX
146     # if (! $db ) {
147     print STDERR "WARNING: can't open '$isis_db'\n";
148     next ;
149     }
150    
151 dpavlin 1 my $max_rowid = OpenIsis::maxRowid( $db );
152    
153 dpavlin 3 print STDERR "Reading database: $isis_db [$max_rowid rows]\n";
154 dpavlin 1
155 dpavlin 3 my $last_p = 0;
156    
157     # { my $row_id = 1;
158     # FIX
159 dpavlin 1 for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
160     my $row = OpenIsis::read( $db, $row_id );
161 dpavlin 3 if ($row && $row->{mfn}) {
162 dpavlin 1
163 dpavlin 3 # output current process indicator
164     my $p = int($row->{mfn} * 100 / $max_rowid);
165     if ($p != $last_p) {
166     printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$row->{mfn},$max_rowid,"=" x ($p/2).">", $p ) if (! $opts{q});
167     $last_p = $p;
168     }
169    
170     if (my $xml = isis2xml($row)) {
171     print "Path-Name: $isis_db#".$row->{mfn}."\n";
172     print "Content-Length: ".(length($xml)+1)."\n";
173     print "Document-Type: XML\n\n$xml\n";
174     }
175 dpavlin 1 }
176     }
177 dpavlin 3 print STDERR "\n";
178 dpavlin 1 }
179 dpavlin 3
180    
181     1;
182     __END__
183     ##########################################################################
184    
185     =head1 NAME
186    
187     isis2xml.pl - read isis file and dump XML
188    
189     =head1 DESCRIPTION
190    
191     This command will read ISIS data file using OpenIsis perl module and
192     create XML file for usage with I<SWISH-E>
193     indexer. Dispite it's name, this script B<isn't general xml generator>
194     from isis files (isis allready has something like that). Output of this
195     script is tailor-made for SWISH-E.
196    
197     =head1 AUTHOR
198    
199     Dobrica Pavlinusic <dpavlin@rot13.org>
200    
201     =head1 COPYRIGHT
202    
203     GNU Public License (GPL) v2 or later
204    
205     =head1 SEE ALSO
206    
207     SWISH-E web site at http://www.swish-e.org
208    
209     =cut

Properties

Name Value
cvs2svn:cvs-rev 1.3
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26