8 |
use Lingua::Spelling::Alternative; |
use Lingua::Spelling::Alternative; |
9 |
require Unicode::Map8; |
require Unicode::Map8; |
10 |
use GDBM_File; |
use GDBM_File; |
11 |
|
use lib '/data/swish/html'; |
12 |
|
use FormatResult; |
13 |
|
|
14 |
my $dir='/home/dpavlin/nn'; |
my $dir='/home/dpavlin/nn'; |
15 |
my $prog='/usr/bin/swish-e'; |
my $prog='/usr/bin/swish-e'; |
36 |
my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 ); |
my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 ); |
37 |
$hr->load_findaffix("$dir/prvih_50.txt"); |
$hr->load_findaffix("$dir/prvih_50.txt"); |
38 |
|
|
39 |
print header(-charset=>'iso-8859-2'),start_html(-title=>'Narodne Novine pretrazivanje',-lang=>'hr'),start_form(-name=>'search_form'); |
print header(-charset=>'iso-8859-2'),start_html(-title=>'NN - Narodne Novine pretrazivanje',-lang=>'hr'),start_form(-name=>'search_form'); |
40 |
print "Potra¾i ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa rijeèima: ",textfield('search'); |
print "Potra¾i ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa rijeèima: ",textfield('search'); |
41 |
print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona "; |
print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona "; |
42 |
print submit(-value=>'prika¾i'); |
print submit(-value=>'prika¾i'); |
47 |
|
|
48 |
my $s; |
my $s; |
49 |
# re-write query from +/- to and/and not |
# re-write query from +/- to and/and not |
50 |
|
|
51 |
|
my @hl_words; |
52 |
|
|
53 |
foreach (split(/\s+/,param('search'))) { |
foreach (split(/\s+/,param('search'))) { |
54 |
if (m/^([+-])(\S+)/) { |
if (m/^([+-])(\S+)/) { |
55 |
$s.= ($s) ? "and " : ""; |
$s.= ($s) ? "and " : ""; |
56 |
$s.="not " if ($1 eq "-"); |
$s.="not " if ($1 eq "-"); |
57 |
$s.="(".join("* or ",$hr->alternatives($2)).") "; |
my @alt = $hr->alternatives($2); |
58 |
|
$s.="(".join("* or ",@alt).") "; |
59 |
|
push @hl_words, \@alt if ($1 ne "-"); |
60 |
} else { |
} else { |
61 |
$s .= "(".join("* or ",$hr->alternatives($_)).") "; |
my @alt = $hr->alternatives($_); |
62 |
|
$s .= "(".join("* or ",@alt).") "; |
63 |
|
push @hl_words, \@alt; |
64 |
} |
} |
65 |
} |
} |
66 |
$s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 |
$s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 |
83 |
|
|
84 |
$us->utf8($hit->swishtitle); |
$us->utf8($hit->swishtitle); |
85 |
|
|
86 |
print "<tt><a href=\"$url",$hit->swishdocpath,"\">NN",$hit->god,"/",$hit->br,"</a> ",$hit->nr," </tt>",$l2_map->to8($us->utf16)," [",$hit->swishrank,"]<br>\n"; |
my $naslov = $l2_map->to8($us->utf16); |
87 |
|
$naslov = FormatResult::highlite_words(\$naslov, \@hl_words); |
88 |
|
print "<tt><a href=\"$url",$hit->swishdocpath,"\">NN",$hit->god,"/",$hit->br,"</a> ",$hit->nr," </tt> $naslov [",$hit->swishrank,"]<br>\n"; |
89 |
# print $_[1]->as_string,"<br>\n"; |
# print $_[1]->as_string,"<br>\n"; |
90 |
# my @fields = $hit->field_names; |
# my @fields = $hit->field_names; |
91 |
# print "Field '$_' = '", $hit->$_, "'<br>\n" for sort @fields; |
# print "Field '$_' = '", $hit->$_, "'<br>\n" for sort @fields; |
112 |
my $path = $hit->swishdocpath; |
my $path = $hit->swishdocpath; |
113 |
if ($file2title{$path}) { |
if ($file2title{$path}) { |
114 |
my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5); |
my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5); |
115 |
|
$naslov = FormatResult::highlite_words(\$naslov, \@hl_words); |
116 |
print "<tt><a href=\"$url",$path,"\">NN$god/$br</a> $nr</tt> $naslov [",$hit->swishrank,"]<br>\n"; |
print "<tt><a href=\"$url",$path,"\">NN$god/$br</a> $nr</tt> $naslov [",$hit->swishrank,"]<br>\n"; |
117 |
|
|
118 |
} else { |
} else { |