--- trunk/all2xml.pl 2003/01/11 19:55:30 9
+++ trunk2/all2all.pl 2004/09/13 15:32:55 437
@@ -1,250 +1,438 @@
#!/usr/bin/perl -w
+=head1 NAME
+
+all2all.pl - basic script for all WebPAC needs
+
+=cut
+
use strict;
-use OpenIsis;
-use Getopt::Std;
use Data::Dumper;
-use XML::Simple;
-use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
-require Unicode::Map8;
-use DBI;
-
-my $config=XMLin(undef, forcearray => [ 'isis' ], forcecontent => 1);
-my $dbh = DBI->connect("DBI:Pg:dbname=webpac","","") || die $DBI::errstr; # FIX
-# FIX; select relname from pg_class where relname like 'index_%' ;
-$dbh->begin_work || die $dbh->errstr();
-
-$dbh->do("delete from index_author") || die $dbh->errstr();
-$dbh->do("delete from index_title") || die $dbh->errstr();
-
-my %opts;
-
-# usage:
-# -d directory name
-# -m multiple directories
-# -q quiet
-# -s run swish
-
-getopts('d:m:qs', \%opts);
-
-my $db_dir = $opts{d} || "ps"; # FIX
-
-#die "usage: $0 -d [database_dir] -m [database1,database2] " if (! %opts);
-
-#print Dumper($config->{indexer});
-#print "-" x 70,"\n";
-
-# how to convert isis code page to UTF8?
-my $isis_map = Unicode::Map8->new($config->{isis_codepage}) || die;
-
-sub isis2xml {
-
- my $row = shift @_;
-
- my $xml;
- $xml->{db_dir} = [ $db_dir ]; # FIX remove?
-
- sub isis_sf {
- my $row = shift @_;
- my $isis_id = shift @_;
- my $subfield = shift @_;
- if ($row->{$isis_id}->[0]) {
- my $sf = OpenIsis::subfields($row->{$isis_id}->[0]);
- if (! defined $subfield || length($subfield) == 0) {
- # subfield list undef, empty or no defined subfields for this record
- my $all_sf = $row->{$isis_id}->[0];
- $all_sf =~ s/\^./ /g; nuke definirions
- return $all_sf;
- } elsif ($sf->{$subfield}) {
- return $sf->{$subfield};
- }
- }
+use Carp;
+use Getopt::Long;
+
+use lib './lib';
+use WebPAC;
+use WebPAC::jsFind;
+use WebPAC::Index;
+
+# options which can be changed via command line
+#
+my $code_page = 'ISO-8859-2';
+my ($limit_mfn, $start_mfn, $debug, $low_mem);
+my $index_path = './out/index';
+
+my $result = GetOptions(
+ "code_page=s" => \$code_page,
+ "limit_mfn=i" => \$limit_mfn,
+ "start_mfn=i" => \$start_mfn,
+ "debug!" => \$debug,
+ "low_mem!" => \$low_mem,
+);
+
+# create WebPAC object
+#
+my $webpac = new WebPAC(
+ code_page => $code_page,
+ limit_mfn => $limit_mfn,
+ start_mfn => $start_mfn,
+ debug => $debug,
+ low_mem => $low_mem,
+) || die;
+
+my $log = $webpac->_get_logger() || die "can't get logger";
+
+$log->debug("creating WebPAC::jsFind object");
+
+my $index = new WebPAC::jsFind(
+ index_path => $index_path,
+ keys => 10,
+) || die;
+
+my $thes;
+
+$|=1;
+
+my $maxmfn = $webpac->open_isis(
+ filename => shift @ARGV || '/data/hidra/THS/THS',
+ lookup => [
+ { 'key' => 'd:v900', 'val' => 'v250^a' },
+# { 'eval' => '"v901^a" eq "Područje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' },
+# { 'eval '=> '"v901^a" eq "Mikrotezaurus"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
+# { 'eval' => '"v901^a" eq "Deskriptor"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
+ { 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' },
+ { 'key' => '900_mfn:v900', 'val' => 'v000' },
+ # tree structure
+ { 'eval' => 'length("v251") == 2', 'key' => 'root:v251', 'val' => 'v900' },
+ { 'eval' => '"v251"', 'key' => 'code:v900', 'val' => 'v561^4:v251' },
+ ],
+);
+
+$log->debug("isis file ",$webpac->{'isis_filename'}," opened");
+
+$log->info("rows: $maxmfn");
+
+$webpac->open_import_xml(type => 'isis_hidra_ths');
+
+if(1) { # XXX
+
+while (my $rec = $webpac->fetch_rec) {
+
+ my @ds = $webpac->data_structure($rec);
+
+ if (0 && $log->is_debug) {
+ $log->debug("rec = ",Dumper($rec));
+ $log->debug("ds = ",Dumper(\@ds));
}
- foreach my $field (keys %{$config->{indexer}}) {
+ next if (! @ds);
- my $display_data = "";
- my $swish_data = "";
- my $index_data = "";
-
- foreach my $x (@{$config->{indexer}->{$field}->{isis}}) {
-
- my $display_tmp = "";
- my $swish_tmp = "";
- my $index_tmp = "";
-
- my $format = $x->{content};
- my $s = 1; # swish only
- my $d = 1; # display only
- my $i = 0; # index only
- $s = 0 if (lc($x->{type}) eq "display");
- $d = 0 if (lc($x->{type}) eq "swish");
- ($s,$d,$i) = (0,0,1) if (lc($x->{type}) eq "index");
-#print STDERR "## s: $s d: $d i: $i ## $format ##\n";
- # parse format
- my $prefix = "";
- if ($format =~ s/^([^\d]+)//) {
- $prefix = $1;
- }
- while ($format) {
- if ($format =~ s/^(\d\d\d)(\w?)//) {
- my $isis_tmp = isis_sf($row,$1,$2);
- if ($isis_tmp) {
-# $display_tmp .= $prefix . "/$1/$2/".$isis_tmp if ($d);
- $display_tmp .= $prefix . $isis_tmp if ($d);
- $swish_tmp .= $isis_tmp." " if ($s);
- $index_tmp .= $prefix . $isis_tmp if ($i);
-#print STDERR " $isis_tmp <--\n";
- }
- $prefix = "";
- } elsif ($format =~ s/^([^\d]+)//) {
- $prefix = $1;
- } else {
- print STDERR "WARNING: unparsed format '$format'\n";
- last;
- };
- }
- # add suffix
- $display_tmp .= $prefix if ($display_tmp);
- $index_tmp .= $prefix if ($index_tmp);
-
-# $display_data .= $display_tmp if ($display_tmp ne "");
-# $swish_data .= $swish_tmp if ($swish_tmp ne "");
- $display_data .= $display_tmp;
- $swish_data .= $swish_tmp;
- $index_data .= $index_tmp;
+ my $filename = $webpac->{'current_filename'} || $log->logdie("no current_filename in webpac object");
- }
-#print "--display:$display_data\n--swish:$swish_data\n";
- #$xml->{$field."_display"} = $isis_map->tou($display_data)->utf8 if ($display_data);
- #$xml->{$field."_swish"} = unac_string($config->{isis_codepage},$swish_data) if ($swish_data);
- $xml->{$field."_display" } = [ $isis_map->tou($display_data)->utf8 ] if ($display_data);
- $xml->{$field."_swish"} = [ unac_string($config->{isis_codepage},$swish_data) ] if ($swish_data);
-
- # index
- if ($index_data && $index_data ne "") {
- my $sql = "select $field from index_$field where upper($field)=upper(?)";
- my $sth = $dbh->prepare($sql) || die $dbh->errstr();
- $sth->execute($index_data) || die "SQL: $sql; ".$dbh->errstr();
-#print STDERR "--->$index_data<---\n";
- if (! $sth->fetchrow_hashref) {
- my $sql = "insert into index_$field values (?)";
- my $sth = $dbh->prepare($sql) || die $dbh->errstr();
-#print STDERR "$sql: $index_data
+
+
+
+ search |
+ thesarus |
+ browse
+
+
+
+
+
+};
+
+my $l = $webpac->{'lookup'} || $log->logconfess("can't find lookup");
+
+my @tree = ({
+ # level 0
+ code_arr => sub { sort keys %{$l} },
+ filter_code => sub {
+ my $t = shift;
+ return $t if ($t =~ s/root://);
+ },
+ lookup_v900 => sub { shift @{$l->{"root:".$_[0]}} },
+ lookup_term => sub { shift @{$l->{"d:".$_[1]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[1]}} },
+ have_children => sub { defined($l->{"a:".$_[0]."::"}) },
+ child_code => sub { return $_[0] },
+ style => 'display: none',
+ },{
+ # 1
+ code_arr => sub { @{$l->{"a:".$_[0]."::"}} },
+ filter_code => sub { shift }, # nop
+ lookup_v900 => sub { shift @{$l->{"code:".$_[0]}} },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{"a:".$_[1].":"}) },
+ child_code => sub { return $_[1] },
+ style => 'display: none',
+ },{
+ # 2
+ code_arr => sub { @{$l->{"a:".$_[0].":"}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub { shift },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{"a:".$_[2].":".$_[1]}) },
+ child_code => sub { return "a:".$_[2].":".$_[1] },
+ style => 'display: none',
+ },{
+ # 3 uži pojam
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 4
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 5
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 6
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 7
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+ have_children => sub { defined($l->{$_[1]}) },
+ child_code => sub { return $_[1] },
+ },{
+ # 8
+ code_arr => sub { @{$l->{$_[0]}} },
+ filter_code => sub { shift },
+ lookup_v900 => sub {
+ my ($c,$p) = @_;
+ $p =~ s/^a:(..:....):.*$/$1/;
+ return "a:".$p.":".$c;
+ },
+ lookup_term => sub { shift @{$l->{"d:".$_[0]}} },
+ lookup_mfn => sub { shift @{$l->{"900_mfn:".$_[0]}} },
+# have_children => sub { defined($l->{$_[1]}) },
+# child_code => sub { return $_[1] },
+ have_children => sub { 0 },
+ child_code => sub { 0 },
+});
+
+my @show_ids;
+my @hide_ids;
+
+unroll(0,'');
+
+$log->debug("test filter: ",$tree[0]->{'filter_code'}->("root:99"));
+
+sub unroll {
+ my ($level,$start_code) = @_;
+
+ $log->logconfess("need level") unless (defined($level));
+
+ # all levels passed?
+ return if (! defined($tree[$level]));
+
+ $log->debug("unroll level $level, start code $start_code");
+
+ foreach my $code ($tree[$level]->{'code_arr'}->($start_code)) {
+
+ if ($code = $tree[$level]->{'filter_code'}->($code)) {
+
+ $log->debug("# $level filter passed code $code");
+
+ my $v900 = $tree[$level]->{'lookup_v900'}->($code,$start_code) || $log->warn("can't lookup_v900($code,$start_code)");
+ $log->debug("# $level lookup_v900($code,$start_code) = $v900");
+
+ my $term = $tree[$level]->{'lookup_term'}->($code,$v900) || $log->warn("can't lookup_term($code,$v900)");
+ $log->debug("# $level lookup_term($code,$v900) = $term");
+
+ my $mfn = $tree[$level]->{'lookup_mfn'}->($code,$v900) || $log->warn("can't lookup_mfn($code,$v900)");
+ $log->debug("# $level lookup_mfn($code,$v900) = $mfn");
+
+ $log->debug("$code -> $v900 : $term [$mfn]");
+
+ my ($link_start,$link_end) = ('','');
+
+ my $have_children = $tree[$level]->{'have_children'}->($code,$v900,$start_code);
+ if ($have_children) {
+ ($link_start,$link_end) = (qq{},qq{});
+ } else {
+ $log->debug("# $level doesn't have_children($code,$v900,$start_code)");
+ }
-# { my $row_id = 1;
-# FIX
- for (my $row_id = 1; $row_id <= $max_rowid; $row_id++ ) {
- my $row = OpenIsis::read( $db, $row_id );
- if ($row && $row->{mfn}) {
+ my $mfn_link = "thes/$mfn.html";
+ if (-e "out/$mfn_link") {
+ print HTML " " x $level .
+ qq{- ${link_start}${term}${link_end}}.
+ qq{ »
\n};
+ } else {
+ $log->warn("file 'out/$mfn_link' doesn't exist, skipping");
+ }
- # output current process indicator
- my $p = int($row->{mfn} * 100 / $max_rowid);
- if ($p != $last_p) {
- printf STDERR ("%5d / %5d [%-51s] %-2d %% \r",$row->{mfn},$max_rowid,"=" x ($p/2).">", $p ) if (! $opts{q});
- $last_p = $p;
+ unless ($have_children) {
+ next;
}
+ my $style = $tree[$level]->{'style'};
- if (my $xml = isis2xml($row)) {
- my $path = $isis_db;
- $path =~ s#$config->{isis_data}/*##g;
- my $out = "Path-Name: $path#".$row->{mfn}."\n";
- $out .= "Content-Length: ".(length($xml)+1)."\n";
- $out .= "Document-Type: XML\n\n$xml\n";
- print $out;
+ print HTML " " x $level .
+ qq{\n \n};
+
+ if ($style) {
+ if ($style =~ m/display\s*:\s*none/i) {
+ push @hide_ids, "id$mfn";
+ } else {
+ push @show_ids, "id$mfn";
+ }
+ } else {
+ # default: show
+ push @show_ids, "id$mfn";
}
+
+ unroll($level+1, $tree[$level]->{'child_code'}->($code,$v900,$start_code));
+
+ print HTML " " x $level . qq{
\n};
+
}
}
- print STDERR "\n";
}
-$dbh->commit || die $dbh->errstr();
-
-1;
-__END__
-##########################################################################
+print HTML qq{
+
+
+