32 |
"low_mem!" => \$low_mem, |
"low_mem!" => \$low_mem, |
33 |
); |
); |
34 |
|
|
35 |
|
my $filter = { |
36 |
|
'CROVOC' => sub { |
37 |
|
my $tmp = shift || return; |
38 |
|
return undef unless ($tmp =~ s/\s*CROVOC.*$/ */); |
39 |
|
return $tmp; |
40 |
|
}, |
41 |
|
'CROVOC_tree' => sub { |
42 |
|
my $tmp = shift || return; |
43 |
|
$tmp =~ s/\s*CROVOC.*$/ */; |
44 |
|
$tmp =~ s/\s*EUROVOC.*//; |
45 |
|
return $tmp; |
46 |
|
}, |
47 |
|
}; |
48 |
|
|
49 |
# create WebPAC object |
# create WebPAC object |
50 |
# |
# |
51 |
my $webpac = new WebPAC( |
my $webpac = new WebPAC( |
54 |
start_mfn => $start_mfn, |
start_mfn => $start_mfn, |
55 |
debug => $debug, |
debug => $debug, |
56 |
low_mem => $low_mem, |
low_mem => $low_mem, |
57 |
|
filter => $filter, |
58 |
) || die; |
) || die; |
59 |
|
|
60 |
my $log = $webpac->_get_logger() || die "can't get logger"; |
my $log = $webpac->_get_logger() || die "can't get logger"; |
73 |
my $maxmfn = $webpac->open_isis( |
my $maxmfn = $webpac->open_isis( |
74 |
filename => shift @ARGV || '/data/hidra/THS/THS', |
filename => shift @ARGV || '/data/hidra/THS/THS', |
75 |
lookup => [ |
lookup => [ |
76 |
{ 'key' => 'd:v900', 'val' => 'v250^a' }, |
{ 'key' => 'd:v900', 'val' => 'filter{CROVOC_tree}v250^a v800' }, |
77 |
# { 'eval' => '"v901^a" eq "Područje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
# { 'eval' => '"v901^a" eq "Područje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
78 |
# { 'eval '=> '"v901^a" eq "Mikrotezaurus"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
# { 'eval '=> '"v901^a" eq "Mikrotezaurus"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
79 |
# { 'eval' => '"v901^a" eq "Deskriptor"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
# { 'eval' => '"v901^a" eq "Deskriptor"', 'key' => 'a:v561^4:v562^4:v461^1', 'val' => 'v900' }, |
138 |
$words =~ s/\W*\s+\W*/ /g; |
$words =~ s/\W*\s+\W*/ /g; |
139 |
$words =~ s/\W+$//; |
$words =~ s/\W+$//; |
140 |
|
|
141 |
|
# first try to generate headline for this entry from index |
142 |
|
my $h = $ds->{'index'}->[0]; |
143 |
|
# then, from display |
144 |
|
$h ||= $ds->{'display'}->[0]; |
145 |
|
# and as last resport, fallback to headline |
146 |
|
$h ||= $headline; |
147 |
|
|
148 |
$index->insert( |
$index->insert( |
149 |
index_name => $ds->{'tag'}, |
index_name => $ds->{'tag'}, |
150 |
#path => $f, |
#path => $f, |
151 |
path => $webpac->mfn, |
path => $webpac->mfn, |
152 |
headline => $headline, |
headline => $h, |
153 |
words => $words, |
words => $words, |
154 |
); |
); |
155 |
} |
} |
158 |
foreach my $ds (@ds) { |
foreach my $ds (@ds) { |
159 |
next if (! $ds->{'index'}); |
next if (! $ds->{'index'}); |
160 |
|
|
161 |
$thes->{$ds->{'tag'}} ||= new WebPAC::Index; |
$thes->{$ds->{'tag'}} ||= new WebPAC::Index( name => $ds->{'tag'} ); |
162 |
|
|
163 |
foreach my $h (@{$ds->{'index'}}) { |
foreach my $h (@{$ds->{'index'}}) { |
164 |
$thes->{$ds->{'tag'}}->insert( |
$thes->{$ds->{'tag'}}->insert( |