--- trunk/all2xml.pl 2004/07/07 09:55:45 379 +++ trunk/all2xml.pl 2004/12/31 04:22:49 619 @@ -1,7 +1,7 @@ #!/usr/bin/perl -w use strict; -use OpenIsis; +use IsisDB; use Getopt::Std; use Data::Dumper; use XML::Simple; @@ -139,12 +139,14 @@ } else { print STDERR "WARNING: field '$field' doesn't have 'name' attribute!"; } + if ($field_name) { + $field_name = x($field_name); if (! $last_field_name) { - $last_field_name = x($field_name); + $last_field_name = $field_name; return $last_field_name; } elsif ($field_name ne $last_field_name) { - $last_field_name = x($field_name); + $last_field_name = $field_name; return $last_field_name; } } @@ -665,15 +667,30 @@ $config=XMLin("./import_xml/$type.xml", ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 ); + # helper for progress bar + sub fmt_time { + my $t = shift || 0; + my $out = ""; + + my ($ss,$mm,$hh) = gmtime($t); + $out .= "${hh}h" if ($hh); + $out .= sprintf("%02d:%02d", $mm,$ss); + $out .= " " if ($hh == 0); + return $out; + } + # output current progress indicator my $last_p = 0; + my $start_t = time(); sub progress { return if (! $show_progress); - my $current = shift; + my $current = shift || 1; my $total = shift || 1; my $p = int($current * 100 / $total); if ($p != $last_p) { - printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$current,$total,"=" x ($p/2).">", $p ); + my $rate = ($current / (time() - $start_t || 1)); + my $eta = ($total-$current) / ($rate || 1); + printf STDERR ("%5d [%-38s] %-5d %0.1f/s %s\r",$current,"=" x ($p/3)."$p%>", $total, $rate, fmt_time($eta)); $last_p = $p; } } @@ -703,66 +720,21 @@ my $isis_db = $cfg -> val($database, 'isis_db') || die "$database doesn't have 'isis_db' defined!"; $import2cp = Text::Iconv->new($config->{isis_codepage},$codepage); - my $db = OpenIsis::open( $isis_db ); - - # check if .txt database for OpenIsis is zero length, - # if so, erase it and re-open database - sub check_txt_db { - my $isis_db = shift || die "need isis database name"; - my $reopen = 0; - - if (-e $isis_db.".TXT") { - print STDERR "WARNING: removing $isis_db.TXT OpenIsis database...\n"; - unlink $isis_db.".TXT" || warn "FATAL: unlink error on '$isis_db.TXT': $!"; - $reopen++; - } - if (-e $isis_db.".PTR") { - print STDERR "WARNING: removing $isis_db.PTR OpenIsis database...\n"; - unlink $isis_db.".PTR" || warn "FATAL: unlink error on '$isis_db.PTR': $!"; - $reopen++; - } - return OpenIsis::open( $isis_db ) if ($reopen); - } - - # EOF error - if ($db == -1) { - $db = check_txt_db($isis_db); - if ($db == -1) { - print STDERR "FATAL: OpenIsis can't open zero size file $isis_db\n"; - next; - } - } + my $db = new IsisDB( isisdb => $isis_db ); - # OpenIsis::ERR_BADF - if ($db == -4) { - print STDERR "FATAL: OpenIsis can't find file $isis_db\n"; - next; - # OpenIsis::ERR_IO - } elsif ($db == -5) { - print STDERR "FATAL: OpenIsis can't access file $isis_db\n"; - next; - } elsif ($db < 0) { - print STDERR "FATAL: OpenIsis unknown error $db with file $isis_db\n"; - next; - } - - my $max_rowid = OpenIsis::maxRowid( $db ); - - # if 0 records, try to rease isis .txt database - if ($max_rowid == 0) { - # force removal of database - $db = check_txt_db($isis_db); - $max_rowid = OpenIsis::maxRowid( $db ); - } + my $max_rowid = $db->{'maxmfn'} || die "can't find maxmfn"; print STDERR "Reading database: $isis_db [$max_rowid rows]\n"; my $path = $database; for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) { - my $row = OpenIsis::read( $db, $row_id ); - if ($row && $row->{mfn}) { - + my $row = $db->to_hash( $row_id ); + if ($row) { + + $row->{mfn} = $row_id; + $row->{record} = $db->{record}; + progress($row->{mfn}, $max_rowid); my $swishpath = $path."#".int($row->{mfn}); @@ -776,10 +748,6 @@ } } } - # for this to work with current version of OpenIsis (0.9.0) - # you might need my patch from - # http://www.rot13.org/~dpavlin/projects/openisis-0.9.0-perl_close.diff - OpenIsis::close($db); print STDERR "\n"; } elsif ($type_base eq "excel") { @@ -813,7 +781,11 @@ for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) { my $cell = $oWorksheet->{Cells}[$iR][$iC]; if ($cell) { - $row->{int2col($iC)} = $cell->Value; + # this conversion is a cludge. + # Files from Excell could have + # characters which don't fit into + # destination encoding. + $row->{int2col($iC)} = $utf2cp->convert($cell->Value) || $cell->Value; } }