--- trunk/all2xml.pl 2004/04/18 00:57:39 320 +++ trunk/all2xml.pl 2005/01/01 18:16:21 620 @@ -139,12 +139,14 @@ } else { print STDERR "WARNING: field '$field' doesn't have 'name' attribute!"; } + if ($field_name) { + $field_name = x($field_name); if (! $last_field_name) { - $last_field_name = x($field_name); + $last_field_name = $field_name; return $last_field_name; } elsif ($field_name ne $last_field_name) { - $last_field_name = x($field_name); + $last_field_name = $field_name; return $last_field_name; } } @@ -168,6 +170,8 @@ ($s,$se,$d,$i) = (0,1,0,0); } elsif (lc($type) =~ /^lookup/) { ($s,$se,$d,$i,$il) = (0,1,0,0,1); + } elsif ($type) { + print STDERR "WARNING: unknown type: $type\n"; } return ($s,$se,$d,$i,$il); } @@ -637,6 +641,10 @@ my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional if ($lookup_file) { #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644; + if (! -e $lookup_file) { + open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!"; + close(LOOKUP); + } tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644; print STDERR "creating lookup file '$lookup_file'\n"; # delete memory cache for lookup file @@ -659,15 +667,30 @@ $config=XMLin("./import_xml/$type.xml", ForceArray => [ $type2tag{$type_base}, 'config', 'format' ], ForceContent => 1 ); + # helper for progress bar + sub fmt_time { + my $t = shift || 0; + my $out = ""; + + my ($ss,$mm,$hh) = gmtime($t); + $out .= "${hh}h" if ($hh); + $out .= sprintf("%02d:%02d", $mm,$ss); + $out .= " " if ($hh == 0); + return $out; + } + # output current progress indicator my $last_p = 0; + my $start_t = time(); sub progress { return if (! $show_progress); my $current = shift; my $total = shift || 1; my $p = int($current * 100 / $total); if ($p != $last_p) { - printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$current,$total,"=" x ($p/2).">", $p ); + my $rate = ($current / (time() - $start_t || 1)); + my $eta = ($total-$current) / ($rate || 1); + printf STDERR ("%5d [%-38s] %-5d %0.1f/s %s\r",$current,"=" x ($p/3)."$p%>", $total, $rate, fmt_time($eta)); $last_p = $p; } } @@ -807,7 +830,11 @@ for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) { my $cell = $oWorksheet->{Cells}[$iR][$iC]; if ($cell) { - $row->{int2col($iC)} = $cell->Value; + # this conversion is a cludge. + # Files from Excell could have + # characters which don't fit into + # destination encoding. + $row->{int2col($iC)} = $utf2cp->convert($cell->Value) || $cell->Value; } } @@ -833,36 +860,40 @@ } } elsif ($type_base eq "marc") { - require MARC; + require MARC::File::USMARC; $import2cp = Text::Iconv->new($config->{marc_codepage},$codepage); my $marc_file = $cfg -> val($database, 'marc_file') || die "$database doesn't have 'marc_file' defined!"; # optional argument is format - my $format = x($config->{marc_format}) || 'usmarc'; - + warn "marc_format is no longer used!" if ($config->{marc_format}); print STDERR "Reading MARC file '$marc_file'\n"; - my $marc = new MARC; - my $nr = $marc->openmarc({ - file=>$marc_file, format=>$format - }) || die "Can't open MARC file '$marc_file' with format '$format'"; + my $marc = MARC::File::USMARC->in( $marc_file ) + || die "Can't open MARC file '$marc_file': ".$MARC::File::ERROR; - # read MARC file in memory - $marc->nextmarc(-1); + # count records in MARC file + sub marc_count { + my $filename = shift || die; + my $file = MARC::File::USMARC->in($filename) || die $MARC::File::ERROR; + my $count = 0; + while ($file->skip()) { + $count++; + } + return $count; + } - my $max_rec = $marc->marc_count(); + my $count = marc_count($marc_file) || warn "no records in '$marc_file'?"; - for(my $i=1; $i<=$max_rec; $i++) { + my $i = 0; - progress($i,$max_rec); + while( my $rec = $marc->next() ) { - # store value for marc_sf.pm - $main::cache->{marc_record} = $i; + progress($i++,$count); my $swishpath = $database."#".$i; - if (my $xml = data2xml($type_base,$marc,$add_xml,$cfg,$database)) { + if (my $xml = data2xml($type_base,$rec,$add_xml,$cfg,$database)) { $xml = $cp2utf->convert($xml); use bytes; # as opposed to chars print "Path-Name: $swishpath\n";