/[webpac]/branches/ffzg/all2xml.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /branches/ffzg/all2xml.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 163 - (hide annotations)
Thu Nov 20 21:23:40 2003 UTC (20 years, 4 months ago) by dpavlin
Original Path: trunk/all2xml.pl
File MIME type: text/plain
File size: 16781 byte(s)
Added type="swish_exact" to save data into swish index with boundaries
xxbxx data xxexxx. This is helpful to implement exact match from beginning
of query and exact match to full query which are defined using e[nr] field
in web user interface (with same [nr] as f[nr] and v[nr] fields) which
have to have value 1 (from beginning) 2 (from end, not that useful...) or
3 (1+2 - exact match)

1 dpavlin 1 #!/usr/bin/perl -w
2    
3     use strict;
4     use OpenIsis;
5     use Getopt::Std;
6     use Data::Dumper;
7     use XML::Simple;
8 dpavlin 5 use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
9 dpavlin 10 use Text::Iconv;
10 dpavlin 13 use Config::IniFiles;
11 dpavlin 40 use Encode;
12 dpavlin 1
13 dpavlin 10 $|=1;
14 dpavlin 9
15 dpavlin 13 my $config_file = $0;
16     $config_file =~ s/\.pl$/.conf/;
17     die "FATAL: can't find configuration file '$config_file'" if (! -e $config_file);
18    
19 dpavlin 10 my $config;
20    
21 dpavlin 58 #use index_DBI; # default DBI module for index
22     use index_DBI_cache; # faster DBI module using memory cache
23 dpavlin 50 my $index;
24 dpavlin 10
25 dpavlin 1 my %opts;
26    
27 dpavlin 7 # usage:
28     # -d directory name
29     # -m multiple directories
30     # -q quiet
31     # -s run swish
32 dpavlin 1
33 dpavlin 7 getopts('d:m:qs', \%opts);
34    
35 dpavlin 43 my $path; # this is name of database
36 dpavlin 1
37 dpavlin 57 Text::Iconv->raise_error(0); # Conversion errors don't raise exceptions
38 dpavlin 1
39 dpavlin 40 # this is encoding of all files on disk, including import_xml/*.xml file and
40     # filter/*.pm files! It will be used to store strings in perl internally!
41     my $codepage = 'ISO-8859-2';
42 dpavlin 1
43 dpavlin 40 my $utf2cp = Text::Iconv->new('UTF-8',$codepage);
44     # this function will convert data from XML files to local encoding
45     sub x {
46     return $utf2cp->convert($_[0]);
47     }
48 dpavlin 3
49 dpavlin 54 # decode isis/excel or other import codepage
50     my $import2cp;
51 dpavlin 10
52 dpavlin 40 # outgoing xml must be in UTF-8
53     my $cp2utf = Text::Iconv->new($codepage,'UTF-8');
54 dpavlin 29
55 dpavlin 54 # mapping between data type and tag which specify
56     # format in XML file
57     my %type2tag = (
58     'isis' => 'isis',
59 dpavlin 62 'excel' => 'column',
60     'marc' => 'marc',
61 dpavlin 67 'feed' => 'feed'
62 dpavlin 54 );
63 dpavlin 3
64 dpavlin 54 sub data2xml {
65    
66 dpavlin 10 use xmlify;
67    
68 dpavlin 54 my $type = shift @_;
69 dpavlin 3 my $row = shift @_;
70 dpavlin 13 my $add_xml = shift @_;
71 dpavlin 59 # needed to read values from configuration file
72     my $cfg = shift @_;
73     my $database = shift @_;
74 dpavlin 3
75     my $xml;
76    
77 dpavlin 10 use parse_format;
78 dpavlin 3
79 dpavlin 13 my $html = ""; # html formatted display output
80 dpavlin 10
81 dpavlin 13 my %field_usage; # counter for usage of each field
82    
83 dpavlin 32 # sort subrouting using order="" attribute
84     sub by_order {
85 dpavlin 98 my $va = $config->{indexer}->{$a}->{order} ||
86     $config->{indexer}->{$a};
87     my $vb = $config->{indexer}->{$b}->{order} ||
88     $config->{indexer}->{$b};
89 dpavlin 29
90 dpavlin 98 return $va <=> $vb;
91 dpavlin 32 }
92 dpavlin 3
93 dpavlin 32 foreach my $field (sort by_order keys %{$config->{indexer}}) {
94    
95 dpavlin 40 $field=x($field);
96 dpavlin 13 $field_usage{$field}++;
97    
98 dpavlin 10 my $swish_data = "";
99 dpavlin 163 my $swish_exact_data = "";
100 dpavlin 3 my $display_data = "";
101 dpavlin 35 my $line_delimiter;
102 dpavlin 3
103 dpavlin 34 my ($swish,$display);
104    
105 dpavlin 54 my $tag = $type2tag{$type} || die "can't find which tag to use for type $type";
106     foreach my $x (@{$config->{indexer}->{$field}->{$tag}}) {
107 dpavlin 3
108 dpavlin 40 my $format = x($x->{content});
109     my $delimiter = x($x->{delimiter}) || ' ';
110 dpavlin 3
111 dpavlin 54 my $repeat_off = 0; # repeatable offset
112 dpavlin 29
113 dpavlin 163 my ($s,$se,$d,$i) = (1,0,1,0); # swish, display default
114 dpavlin 29 $s = 0 if (lc($x->{type}) eq "display");
115     $d = 0 if (lc($x->{type}) eq "swish");
116 dpavlin 163 $se = 1 if (lc($x->{type}) eq "swish_exact");
117 dpavlin 29 ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index");
118    
119 dpavlin 35 # what will separate last line from this one?
120     if ($display_data && $x->{append} && $x->{append} eq "1") {
121 dpavlin 34 $line_delimiter = ' ';
122 dpavlin 35 } elsif ($display_data) {
123 dpavlin 34 $line_delimiter = '<br/>';
124     }
125 dpavlin 29
126 dpavlin 34 # init vars so that we go into while...
127     ($swish,$display) = (1,1);
128 dpavlin 29
129 dpavlin 97 # placeholder for all repeatable entries for index
130     my @index_data;
131     my $index_filter;
132    
133 dpavlin 153 sub mkformat {
134     my $x = shift || die "mkformat needs tag reference";
135     my $data = shift || return;
136     my $format_name = x($x->{format_name}) || return $data;
137     my $fmt = x($config->{format}->{$format_name}->{content}) || die "<format name=\"$format_name\"> is not defined!";
138     my $format_delimiter = x($x->{format_delimiter});
139     my @data;
140     if ($format_delimiter) {
141     @data = split(/$format_delimiter/,$data);
142     } else {
143     push @data,$data;
144     }
145    
146     if ($fmt) {
147     my $nr = scalar $fmt =~ s/%s/%s/g;
148     if (($#data+1) == $nr) {
149     return sprintf($fmt,@data);
150     } else {
151     print STDERR "mkformat: [$data] can't be split on [$format_delimiter] to $nr fields!\n";
152     return $data;
153     }
154     } else {
155     print STDERR "usage of link '$format_name' without defined format (<link> tag)\n";
156     }
157     }
158    
159 dpavlin 90 # while because of repeatable fields
160     while ($swish || $display) {
161 dpavlin 54 ($swish,$display) = parse_format($type, $format,$row,$repeat_off++,$import2cp);
162 dpavlin 90 if ($repeat_off > 1000) {
163     print STDERR "loop (more than 1000 repeatable fields) deteced in $row, $format\n";
164     last;
165     }
166    
167 dpavlin 29 # filter="name" ; filter this field through
168     # filter/[name].pm
169     my $filter = $x->{filter};
170 dpavlin 20 if ($filter) {
171 dpavlin 29 require "filter/".$filter.".pm";
172 dpavlin 20 }
173 dpavlin 29 # type="swish" ; field for swish
174 dpavlin 163 if ($swish) {
175     if ($filter && ($s || $se)) {
176 dpavlin 29 no strict 'refs';
177 dpavlin 163 my $tmp = join(" ",&$filter($swish)) if ($s || $se);
178     $swish_data .= $tmp if ($s);
179     $swish_exact_data .= $tmp if ($se);
180    
181 dpavlin 29 } else {
182 dpavlin 163 $swish_data .= $swish if ($s);
183     $swish_exact_data .= $swish if ($se);
184 dpavlin 29 }
185     }
186 dpavlin 17
187 dpavlin 29 # type="display" ; field for display
188     if ($d && $display) {
189 dpavlin 35 if ($line_delimiter && $display_data) {
190     $display_data .= $line_delimiter;
191     undef $line_delimiter;
192     }
193 dpavlin 29 if ($filter) {
194     no strict 'refs';
195 dpavlin 138 if ($display_data) {
196 dpavlin 153 $display_data .= $delimiter.join($delimiter,mkformat($x,&$filter($display)));
197 dpavlin 138 } else {
198 dpavlin 153 $display_data = join($delimiter,mkformat($x,&$filter($display)));
199 dpavlin 138 }
200 dpavlin 29 } else {
201 dpavlin 138 if ($display_data) {
202 dpavlin 153 $display_data .= $delimiter.mkformat($x,$display);
203 dpavlin 138 } else {
204 dpavlin 153 $display_data = mkformat($x,$display);
205 dpavlin 138 }
206 dpavlin 29 }
207 dpavlin 20 }
208 dpavlin 29
209     # type="index" ; insert into index
210     if ($i && $display) {
211 dpavlin 97 push @index_data, $display;
212     $index_filter = $filter if ($filter);
213     }
214     }
215    
216     # fill data in index
217     if (@index_data) {
218     if ($index_filter) {
219     no strict 'refs';
220 dpavlin 135 foreach my $d (@index_data) {
221     $index->insert($field, &$index_filter($d), $path);
222 dpavlin 20 }
223 dpavlin 97 } else {
224     foreach my $d (@index_data) {
225     $index->insert($field, $d, $path);
226     }
227 dpavlin 20 }
228 dpavlin 17 }
229 dpavlin 3 }
230 dpavlin 9
231 dpavlin 59 # now try to parse variables from configuration file
232     foreach my $x (@{$config->{indexer}->{$field}->{'config'}}) {
233 dpavlin 13
234 dpavlin 62 my $delimiter = x($x->{delimiter}) || ' ';
235 dpavlin 59 my $val = $cfg->val($database, x($x->{content}));
236    
237     my ($s,$d,$i) = (1,1,0); # swish, display default
238     $s = 0 if (lc($x->{type}) eq "display");
239     $d = 0 if (lc($x->{type}) eq "swish");
240 dpavlin 163 # no support for swish exact in config.
241     # IMHO, it's useless
242 dpavlin 59 ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index");
243    
244     if ($val) {
245 dpavlin 62 $display_data .= $delimiter.$val if ($d);
246 dpavlin 59 $swish_data .= $val if ($s);
247     $index->insert($field, $val, $path) if ($i);
248     }
249    
250     }
251    
252    
253 dpavlin 10 if ($display_data) {
254 dpavlin 29
255 dpavlin 13 if ($field eq "headline") {
256     $xml .= xmlify("headline", $display_data);
257     } else {
258    
259     # find field name (signular, plural)
260     my $field_name = "";
261     if ($config->{indexer}->{$field}->{name_singular} && $field_usage{$field} == 1) {
262     $field_name = $config->{indexer}->{$field}->{name_singular}."#-#";
263     } elsif ($config->{indexer}->{$field}->{name_plural}) {
264     $field_name = $config->{indexer}->{$field}->{name_plural}."#-#";
265 dpavlin 20 } elsif ($config->{indexer}->{$field}->{name}) {
266     $field_name = $config->{indexer}->{$field}->{name}."#-#";
267 dpavlin 13 } else {
268 dpavlin 20 print STDERR "WARNING: field '$field' doesn't have 'name' attribute!";
269 dpavlin 13 }
270     if ($field_name) {
271 dpavlin 40 $html .= x($field_name);
272 dpavlin 13 }
273     $html .= $display_data."###\n";
274     }
275 dpavlin 10 }
276     if ($swish_data) {
277 dpavlin 20 # remove extra spaces
278     $swish_data =~ s/ +/ /g;
279     $swish_data =~ s/ +$//g;
280    
281 dpavlin 40 $xml .= xmlify($field."_swish", unac_string($codepage,$swish_data));
282 dpavlin 10 }
283    
284 dpavlin 163 if ($swish_exact_data) {
285     $swish_exact_data =~ s/ +/ /g;
286     $swish_exact_data =~ s/ +$//g;
287 dpavlin 9
288 dpavlin 163 # add delimiters before and after word.
289     # That is required to produce exact match
290     $xml .= xmlify($field."_swish_exact", unac_string($codepage,'xxbxx '.$swish_exact_data.' xxexx'));
291     }
292    
293    
294 dpavlin 3 }
295 dpavlin 13
296     # dump formatted output in <html>
297     if ($html) {
298 dpavlin 81 #$xml .= xmlify("html",$html);
299     $xml .= "<html><![CDATA[ $html ]]></html>";
300 dpavlin 13 }
301    
302 dpavlin 3 if ($xml) {
303 dpavlin 13 $xml .= $add_xml if ($add_xml);
304 dpavlin 10 return "<xml>\n$xml</xml>\n";
305 dpavlin 3 } else {
306     return;
307     }
308     }
309    
310     ##########################################################################
311    
312 dpavlin 54 # read configuration for this script
313 dpavlin 13 my $cfg = new Config::IniFiles( -file => $config_file );
314 dpavlin 1
315 dpavlin 54 # read global.conf configuration
316     my $cfg_global = new Config::IniFiles( -file => 'global.conf' );
317    
318 dpavlin 50 # open index
319     $index = new index_DBI(
320 dpavlin 54 $cfg_global->val('global', 'dbi_dbd'),
321     $cfg_global->val('global', 'dbi_dsn'),
322     $cfg_global->val('global', 'dbi_user'),
323     $cfg_global->val('global', 'dbi_passwd') || '',
324 dpavlin 50 );
325    
326 dpavlin 97 my $show_progress = $cfg_global->val('global', 'show_progress');
327    
328 dpavlin 13 foreach my $database ($cfg->Sections) {
329 dpavlin 1
330 dpavlin 54 my $type = lc($cfg -> val($database, 'type')) || die "$database doesn't have 'type' defined";
331 dpavlin 40 my $add_xml = $cfg -> val($database, 'xml'); # optional
332 dpavlin 1
333 dpavlin 54 print STDERR "reading ./import_xml/$type.xml\n";
334 dpavlin 1
335 dpavlin 58 # extract just type basic
336     my $type_base = $type;
337     $type_base =~ s/_.+$//g;
338 dpavlin 40
339 dpavlin 153 $config=XMLin("./import_xml/$type.xml", forcearray => [ $type2tag{$type_base}, 'config', 'format' ], forcecontent => 1);
340 dpavlin 58
341 dpavlin 54 # output current progress indicator
342     my $last_p = 0;
343     sub progress {
344 dpavlin 101 return if (! $show_progress);
345 dpavlin 54 my $current = shift;
346 dpavlin 57 my $total = shift || 1;
347 dpavlin 54 my $p = int($current * 100 / $total);
348     if ($p != $last_p) {
349     printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$current,$total,"=" x ($p/2).">", $p );
350     $last_p = $p;
351     }
352 dpavlin 3 }
353    
354 dpavlin 67 my $fake_dir = 1;
355     sub fakeprogress {
356 dpavlin 104 return if (! $show_progress);
357 dpavlin 67 my $current = shift @_;
358    
359     my @ind = ('-','\\','|','/','-','\\','|','/', '-');
360    
361     $last_p += $fake_dir;
362     $fake_dir = -$fake_dir if ($last_p > 1000 || $last_p < 0);
363     if ($last_p % 10 == 0) {
364     printf STDERR ("%5d / %5s [%-51s]\r",$current,"?"," " x ($last_p/20).$ind[($last_p/20) % $#ind]);
365     }
366     }
367    
368 dpavlin 54 # now read database
369     print STDERR "using: $type...\n";
370 dpavlin 1
371 dpavlin 58 if ($type_base eq "isis") {
372    
373 dpavlin 54 my $isis_db = $cfg -> val($database, 'isis_db') || die "$database doesn't have 'isis_db' defined!";
374 dpavlin 1
375 dpavlin 54 $import2cp = Text::Iconv->new($config->{isis_codepage},$codepage);
376     my $db = OpenIsis::open( $isis_db );
377 dpavlin 10
378 dpavlin 106 # check if .txt database for OpenIsis is zero length,
379     # if so, erase it and re-open database
380     sub check_txt_db {
381     my $isis_db = shift || die "need isis database name";
382 dpavlin 109 my $reopen = 0;
383    
384 dpavlin 106 if (-e $isis_db.".TXT") {
385 dpavlin 109 print STDERR "WARNING: removing $isis_db.TXT OpenIsis database...\n";
386 dpavlin 108 unlink $isis_db.".TXT" || warn "FATAL: unlink error on '$isis_db.TXT': $!";
387 dpavlin 109 $reopen++;
388 dpavlin 106 }
389 dpavlin 109 if (-e $isis_db.".PTR") {
390     print STDERR "WARNING: removing $isis_db.PTR OpenIsis database...\n";
391     unlink $isis_db.".PTR" || warn "FATAL: unlink error on '$isis_db.PTR': $!";
392     $reopen++;
393     }
394     return OpenIsis::open( $isis_db ) if ($reopen);
395 dpavlin 106 }
396    
397     # EOF error
398     if ($db == -1) {
399     $db = check_txt_db($isis_db);
400 dpavlin 108 if ($db == -1) {
401 dpavlin 106 print STDERR "FATAL: OpenIsis can't open zero size file $isis_db\n";
402     next;
403     }
404     }
405    
406 dpavlin 101 # OpenIsis::ERR_BADF
407     if ($db == -4) {
408     print STDERR "FATAL: OpenIsis can't find file $isis_db\n";
409     next;
410     # OpenIsis::ERR_IO
411     } elsif ($db == -5) {
412     print STDERR "FATAL: OpenIsis can't access file $isis_db\n";
413     next;
414     } elsif ($db < 0) {
415     print STDERR "FATAL: OpenIsis unknown error $db with file $isis_db\n";
416     next;
417     }
418    
419 dpavlin 54 my $max_rowid = OpenIsis::maxRowid( $db );
420 dpavlin 3
421 dpavlin 106 # if 0 records, try to rease isis .txt database
422     if ($max_rowid == 0) {
423     # force removal of database
424     $db = check_txt_db($isis_db);
425     $max_rowid = OpenIsis::maxRowid( $db );
426     }
427    
428 dpavlin 54 print STDERR "Reading database: $isis_db [$max_rowid rows]\n";
429    
430     my $path = $database;
431    
432     for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
433     my $row = OpenIsis::read( $db, $row_id );
434     if ($row && $row->{mfn}) {
435    
436     progress($row->{mfn}, $max_rowid);
437    
438     my $swishpath = $path."#".int($row->{mfn});
439    
440 dpavlin 59 if (my $xml = data2xml($type_base,$row,$add_xml,$cfg,$database)) {
441 dpavlin 54 $xml = $cp2utf->convert($xml);
442     use bytes; # as opposed to chars
443     print "Path-Name: $swishpath\n";
444     print "Content-Length: ".(length($xml)+1)."\n";
445     print "Document-Type: XML\n\n$xml\n";
446     }
447 dpavlin 3 }
448 dpavlin 54 }
449 dpavlin 108 # for this to work with current version of OpenIsis (0.9.0)
450     # you might need my patch from
451     # http://www.rot13.org/~dpavlin/projects/openisis-0.9.0-perl_close.diff
452     OpenIsis::close($db);
453 dpavlin 54 print STDERR "\n";
454 dpavlin 3
455 dpavlin 58 } elsif ($type_base eq "excel") {
456 dpavlin 54 use Spreadsheet::ParseExcel;
457     use Spreadsheet::ParseExcel::Utility qw(int2col);
458    
459     $import2cp = Text::Iconv->new($config->{excel_codepage},$codepage);
460     my $excel_file = $cfg -> val($database, 'excel_file') || die "$database doesn't have 'excel_file' defined!";
461 dpavlin 43
462 dpavlin 54 my $sheet = x($config->{sheet}) || die "no sheet in $type.xml";
463 dpavlin 56 my $start_row = x($config->{start_row}) - 1 || die "no start_row in $type.xml";
464 dpavlin 54
465     my $oBook = Spreadsheet::ParseExcel::Workbook->Parse($excel_file) || die "can't open Excel file '$excel_file'";
466    
467     my $sheet_nr = 0;
468     foreach my $oWks (@{$oBook->{Worksheet}}) {
469     #print STDERR "-- SHEET $sheet_nr:", $oWks->{Name}, "\n";
470     last if ($oWks->{Name} eq $sheet);
471     $sheet_nr++;
472     }
473    
474     my $oWorksheet = $oBook->{Worksheet}[$sheet_nr];
475    
476     print STDERR "using sheet: ",$oWorksheet->{Name},"\n";
477     defined ($oWorksheet) || die "can't find sheet '$sheet' in $excel_file";
478     my $end_row = x($config->{end_row}) || $oWorksheet->{MaxRow};
479    
480 dpavlin 56 for(my $iR = $start_row ; defined $end_row && $iR <= $end_row ; $iR++) {
481 dpavlin 54 my $row;
482     for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
483     my $cell = $oWorksheet->{Cells}[$iR][$iC];
484     if ($cell) {
485     $row->{int2col($iC)} = $cell->Value;
486     }
487     }
488    
489     progress($iR, $end_row);
490    
491     # print "row[$iR/$end_row] ";
492     # foreach (keys %{$row}) {
493     # print "$_: ",$row->{$_},"\t";
494     # }
495     # print "\n";
496    
497     my $swishpath = $database."#".$iR;
498    
499     next if (! $row);
500    
501 dpavlin 59 if (my $xml = data2xml($type_base,$row,$add_xml,$cfg,$database)) {
502 dpavlin 44 $xml = $cp2utf->convert($xml);
503 dpavlin 35 use bytes; # as opposed to chars
504 dpavlin 43 print "Path-Name: $swishpath\n";
505 dpavlin 10 print "Content-Length: ".(length($xml)+1)."\n";
506     print "Document-Type: XML\n\n$xml\n";
507 dpavlin 3 }
508 dpavlin 1 }
509 dpavlin 62 } elsif ($type_base eq "marc") {
510 dpavlin 67
511 dpavlin 62 use MARC;
512    
513     $import2cp = Text::Iconv->new($config->{marc_codepage},$codepage);
514     my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!";
515    
516     # optional argument is format
517     my $format = x($config->{format}) || 'usmarc';
518    
519     print STDERR "Reading MARC file '$marc_file'\n";
520    
521     my $marc = new MARC;
522     my $nr = $marc->openmarc({
523     file=>$marc_file, format=>$format
524     }) || die "Can't open MARC file '$marc_file'";
525    
526     my $i=0; # record nr.
527    
528     my $rec;
529    
530     while ($marc->nextmarc(1)) {
531    
532     # XXX
533 dpavlin 67 fakeprogress($i++);
534 dpavlin 62
535     my $swishpath = $database."#".$i;
536    
537     if (my $xml = data2xml($type_base,$marc,$add_xml,$cfg,$database)) {
538     $xml = $cp2utf->convert($xml);
539     use bytes; # as opposed to chars
540     print "Path-Name: $swishpath\n";
541     print "Content-Length: ".(length($xml)+1)."\n";
542     print "Document-Type: XML\n\n$xml\n";
543     }
544     }
545 dpavlin 67 } elsif ($type_base eq "feed") {
546    
547     $import2cp = Text::Iconv->new($config->{feed_codepage},$codepage);
548     my $prog = x($config->{prog}) || die "$database doesn't have 'prog' defined!";
549    
550     print STDERR "Reading feed from program '$prog'\n";
551    
552     open(FEED,"feeds/$prog |") || die "can't start $prog: $!";
553    
554 dpavlin 74 my $i=1; # record nr.
555 dpavlin 67
556     my $data;
557     my $line=1;
558    
559     while (<FEED>) {
560     chomp;
561    
562     if (/^$/) {
563 dpavlin 74 my $swishpath = $database."#".$i++;
564 dpavlin 67
565     if (my $xml = data2xml($type_base,$data,$add_xml,$cfg,$database)) {
566     $xml = $cp2utf->convert($xml);
567     use bytes; # as opposed to chars
568     print "Path-Name: $swishpath\n";
569     print "Content-Length: ".(length($xml)+1)."\n";
570     print "Document-Type: XML\n\n$xml\n";
571     }
572     $line = 1;
573     $data = {};
574     next;
575     }
576    
577 dpavlin 74 $line = $1 if (s/^(\d+):\s*//);
578 dpavlin 67 $data->{$line++} = $_;
579    
580 dpavlin 74 fakeprogress($i);
581 dpavlin 67
582     }
583 dpavlin 1 }
584     }
585 dpavlin 3
586 dpavlin 10 # call this to commit index
587     $index->close;
588 dpavlin 3
589     1;
590     __END__
591     ##########################################################################
592    
593     =head1 NAME
594    
595 dpavlin 81 all2xml.pl - read various file formats and dump XML for SWISH-E
596 dpavlin 3
597     =head1 DESCRIPTION
598    
599 dpavlin 81 This command will read ISIS data file using OpenIsis perl module, MARC
600     records using MARC module and optionally Micro$oft Excel files to
601     create one XML file for usage with I<SWISH-E> indexer. Dispite it's name,
602     this script B<isn't general xml generator> from isis files (isis allready
603     has something like that). Output of this script is tailor-made for SWISH-E.
604 dpavlin 3
605 dpavlin 81 =head1 BUGS
606    
607     Documentation is really lacking. However, in true Open Source spirit, source
608     is best documentation. I even made considerable effort to comment parts
609     which are not intuitively clear, so...
610    
611 dpavlin 3 =head1 AUTHOR
612    
613     Dobrica Pavlinusic <dpavlin@rot13.org>
614    
615     =head1 COPYRIGHT
616    
617     GNU Public License (GPL) v2 or later
618    
619     =head1 SEE ALSO
620    
621     SWISH-E web site at http://www.swish-e.org
622    
623     =cut

Properties

Name Value
cvs2svn:cvs-rev 1.45
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26