35 |
my $filter = { |
my $filter = { |
36 |
'CROVOC' => sub { |
'CROVOC' => sub { |
37 |
my $tmp = shift || return; |
my $tmp = shift || return; |
38 |
return undef unless ($tmp =~ s/\s+CROVOC.*$/ */); |
return undef unless ($tmp =~ s/\s*CROVOC.*$/ */); |
39 |
return $tmp; |
return $tmp; |
40 |
}, |
}, |
41 |
'CROVOC_tree' => sub { |
'CROVOC_tree' => sub { |
42 |
my $tmp = shift || return; |
my $tmp = shift || return; |
43 |
$tmp =~ s/\s+CROVOC.*$/ */; |
$tmp =~ s/\s*CROVOC.*$/ */; |
44 |
$tmp =~ s/\s+EUROVOC.*//; |
$tmp =~ s/\s*EUROVOC.*//; |
45 |
return $tmp; |
return $tmp; |
46 |
}, |
}, |
47 |
}; |
}; |
138 |
$words =~ s/\W*\s+\W*/ /g; |
$words =~ s/\W*\s+\W*/ /g; |
139 |
$words =~ s/\W+$//; |
$words =~ s/\W+$//; |
140 |
|
|
141 |
|
# first try to generate headline for this entry from index |
142 |
|
my $h = shift @{$ds->{'index'}}; |
143 |
|
# then, from display |
144 |
|
$h ||= shift @{$ds->{'display'}}; |
145 |
|
# and as last resport, fallback to headline |
146 |
|
$h ||= $headline; |
147 |
|
|
148 |
$index->insert( |
$index->insert( |
149 |
index_name => $ds->{'tag'}, |
index_name => $ds->{'tag'}, |
150 |
#path => $f, |
#path => $f, |
151 |
path => $webpac->mfn, |
path => $webpac->mfn, |
152 |
headline => $headline, |
headline => $h, |
153 |
words => $words, |
words => $words, |
154 |
); |
); |
155 |
} |
} |
158 |
foreach my $ds (@ds) { |
foreach my $ds (@ds) { |
159 |
next if (! $ds->{'index'}); |
next if (! $ds->{'index'}); |
160 |
|
|
161 |
$thes->{$ds->{'tag'}} ||= new WebPAC::Index; |
$thes->{$ds->{'tag'}} ||= new WebPAC::Index( name => $ds->{'tag'} ); |
162 |
|
|
163 |
foreach my $h (@{$ds->{'index'}}) { |
foreach my $h (@{$ds->{'index'}}) { |
164 |
$thes->{$ds->{'tag'}}->insert( |
$thes->{$ds->{'tag'}}->insert( |