35 |
my $filter = { |
my $filter = { |
36 |
'CROVOC' => sub { |
'CROVOC' => sub { |
37 |
my $tmp = shift || return; |
my $tmp = shift || return; |
38 |
return undef unless ($tmp =~ s/CROVOC.*$/ */); |
return undef unless ($tmp =~ s/\s*CROVOC.*$/ */); |
39 |
|
return $tmp; |
40 |
|
}, |
41 |
|
'CROVOC_tree' => sub { |
42 |
|
my $tmp = shift || return; |
43 |
|
$tmp =~ s/\s*CROVOC.*$/ */; |
44 |
|
$tmp =~ s/\s*EUROVOC.*//; |
45 |
return $tmp; |
return $tmp; |
46 |
}, |
}, |
47 |
}; |
}; |
73 |
my $maxmfn = $webpac->open_isis( |
my $maxmfn = $webpac->open_isis( |
74 |
filename => shift @ARGV || '/data/hidra/THS/THS', |
filename => shift @ARGV || '/data/hidra/THS/THS', |
75 |
lookup => [ |
lookup => [ |
76 |
{ 'key' => 'd:v900', 'val' => 'v250^a' }, |
{ 'key' => 'd:v900', 'val' => 'filter{CROVOC_tree}v250^a v800' }, |
77 |
# { 'eval' => '"v901^a" eq "Područje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
# { 'eval' => '"v901^a" eq "Područje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
78 |
# { 'eval '=> '"v901^a" eq "Mikrotezaurus"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
# { 'eval '=> '"v901^a" eq "Mikrotezaurus"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
79 |
# { 'eval' => '"v901^a" eq "Deskriptor"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
# { 'eval' => '"v901^a" eq "Deskriptor"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
138 |
$words =~ s/\W*\s+\W*/ /g; |
$words =~ s/\W*\s+\W*/ /g; |
139 |
$words =~ s/\W+$//; |
$words =~ s/\W+$//; |
140 |
|
|
141 |
|
# first try to generate headline for this entry from index |
142 |
|
my $h = shift @{$ds->{'index'}}; |
143 |
|
# then, from display |
144 |
|
$h ||= shift @{$ds->{'display'}}; |
145 |
|
# and as last resport, fallback to headline |
146 |
|
$h ||= $headline; |
147 |
|
|
148 |
$index->insert( |
$index->insert( |
149 |
index_name => $ds->{'tag'}, |
index_name => $ds->{'tag'}, |
150 |
#path => $f, |
#path => $f, |
151 |
path => $webpac->mfn, |
path => $webpac->mfn, |
152 |
headline => $headline, |
headline => $h, |
153 |
words => $words, |
words => $words, |
154 |
); |
); |
155 |
} |
} |
158 |
foreach my $ds (@ds) { |
foreach my $ds (@ds) { |
159 |
next if (! $ds->{'index'}); |
next if (! $ds->{'index'}); |
160 |
|
|
161 |
$thes->{$ds->{'tag'}} ||= new WebPAC::Index; |
$thes->{$ds->{'tag'}} ||= new WebPAC::Index( name => $ds->{'tag'} ); |
162 |
|
|
163 |
foreach my $h (@{$ds->{'index'}}) { |
foreach my $h (@{$ds->{'index'}}) { |
164 |
$thes->{$ds->{'tag'}}->insert( |
$thes->{$ds->{'tag'}}->insert( |