--- trunk/search/nn-swish.cgi 2002/06/19 19:54:43 71
+++ trunk/search/nn-swish.cgi 2005/02/22 14:46:28 101
@@ -2,56 +2,160 @@
use strict;
use CGI qw/:standard -no_xhtml/;
-use Text::Query;
use CGI::Carp qw(fatalsToBrowser);
use SWISH;
+use Unicode::String qw(utf8 utf16);
+use Lingua::Spelling::Alternative;
require Unicode::Map8;
-
-my $dir='/home/dpavlin/nn/swish';
+use GDBM_File;
+use lib '/data/swish/html';
+use FormatResult;
+
+my $dir='/home/dpavlin/nn';
+my $prog='/usr/bin/swish-e';
+my $url='http://www.nn.hr/clanci/sluzbeno/';
my $hits=0;
+my $max_hits=100;
+
+my %labels = (100=>' 100', 200=>' 200', 500=>' 500', 0=>'neograničeno');
+my %index = ('title'=>'naslovu', 'text'=>'tekstu');
+
+my %god_lables;
+
+my %brzakona;
+tie %brzakona, 'GDBM_File', "$dir/swish/brzakona.gdbm", &GDBM_READER, 0640 || die "tie: $!";
+foreach (sort keys %brzakona) {
+ $god_lables{$_} = sprintf("%-8s (%d zakona)",$_,$brzakona{$_});
+}
+untie %brzakona;
-print header(-charset=>'iso-8859-2'),start_html(-title=>'NN pretrazivanje',-lang=>'hr'),start_form;
-print "Potraži zakone sa riječima: ",textfield('search');
+my %file2title;
+tie %file2title, 'GDBM_File', "$dir/swish/file2title.gdbm", &GDBM_READER, 0640 || die "tie: $!";
+
+my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 );
+$hr->load_findaffix("$dir/prvih_50.txt");
+
+print header(-charset=>'iso-8859-2'),start_html(-title=>'NN - Narodne Novine pretrazivanje',-lang=>'hr'),start_form(-name=>'search_form');
+print "Potraži ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa riječima: ",textfield('search');
+print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona ";
print submit(-value=>'prikaži');
+print br,checkbox(-name=>'god_limit', -checked=>0, -label=>"ograniči pretraživanje samo na godinu "),popup_menu(-name=>'god',-values=>[sort keys %god_lables],-labels=>\%god_lables,-onChange=>'this.form.god_limit.checked=true');
print end_form,hr;
if (param('search')) {
- my $s=param('search');
+ my $s;
+ # re-write query from +/- to and/and not
+
+ my @hl_words;
+
+ foreach (split(/\s+/,param('search'))) {
+ if (m/^([+-])(\S+)/) {
+ $s.= ($s) ? "and " : "";
+ $s.="not " if ($1 eq "-");
+ my @alt = $hr->alternatives($2);
+ $s.="(".join("* or ",@alt).") ";
+ push @hl_words, \@alt if ($1 ne "-");
+ } else {
+ my @alt = $hr->alternatives($_);
+ $s .= "(".join("* or ",@alt).") ";
+ push @hl_words, \@alt;
+ }
+ }
$s=~tr/đčćĐČĆ/šđžčćŠĐŽČĆ/; # 1250 -> iso8859-2
$s=~tr/šŠđĐčČćĆžŽ/sSdDcCcCzZ/;
my $l2_map = Unicode::Map8->new("ISO-8859-2") || die;
- #my $utf8_map = Unicode::Map8->new("utf8") || die;
+ my $us = Unicode::String->new();
- my $sh = SWISH->connect('Fork',
- prog => "$dir/swish-e",
- indexes => "$dir/nn.index",
- properties => [qw/god br nr/],
- results => sub {
- my ($sh,$hit) = @_;
+ my $sw_q;
+ my $sh;
- my $us=$hit->swishtitle;
+ if (defined(param('max_hits'))) {
+ $max_hits = param('max_hits');
+ }
- print "swishdocpath,"\">",$hit->god,"/",$hit->br," ",$hit->nr," ",$hit->swishtitle,"x [",$hit->swishrank,"]
\n";
+ if (param('index') eq 'title') {
-# print $_[1]->as_string,"
\n";
-# my @fields = $hit->field_names;
-# print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields;
- },
- );
+ $sh = SWISH->connect('Fork',
+ prog => $prog,
+ indexes => "$dir/swish/nn.index",
+ properties => [qw/god br nr/],
+ results => sub {
+ my ($sh,$hit) = @_;
+
+ $us->utf8($hit->swishtitle);
+
+ my $naslov = $l2_map->to8($us->utf16);
+ $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
+ print "swishdocpath,"\">NN",$hit->god,"/",$hit->br," ",$hit->nr," $naslov [",$hit->swishrank,"]
\n";
+ # print $_[1]->as_string,"
\n";
+ # my @fields = $hit->field_names;
+ # print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields;
+ },
+ maxhits => $max_hits,
+ );
+
+ die $SWISH::errstr unless $sh;
+
+ $sw_q = "naslov_czs=($s)";
+ if (param('god_limit')) {
+ $sw_q .= " and god=".int(param('god')) if (param('god'));
+ }
- die $SWISH::errstr unless $sh;
+ } else {
+ # search in full text
- $hits = $sh->query("naslov_czs=($s)");
+ $sh = SWISH->connect('Fork',
+ prog => $prog,
+ indexes => "$dir/swish/sluzbeno.index",
+ results => sub {
+ my ($sh,$hit) = @_;
+
+ my $path = $hit->swishdocpath;
+ if ($file2title{$path}) {
+ my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5);
+ $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
+ print "NN$god/$br $nr $naslov [",$hit->swishrank,"]
\n";
+
+ } else {
+ print "\n";
+ }
+
+ },
+ maxhits => $max_hits,
+ );
+
+ die $SWISH::errstr unless $sh;
+
+ $sw_q = $s;
+ if (param('god_limit')) {
+ $sw_q .= " and swishdocpath=".int(param('god')) if (param('god'));
+ }
+
+ }
+
+ print "";
+
+ $hits = $sh->query($sw_q);
if ($hits > 0) {
- print p,hr,"Nađeno je $hits zakona...";
+ print p,hr,"Prikazujem $hits zakona";
+ print " iz godine ",param('god') if (param('god_limit'));
+ print " (maks. $max_hits)... " if ($max_hits);
+ print " [$s]";
} else {
- print p,"Nije nađen niti jedan zakon... (",$sh->errstr,")";
+ print p,"Nije nađen niti jedan zakon... [$s, ",$sh->errstr,"]";
}
} else {
- print p('U jednostavnom pretraživanju pretraživač pronalazi sve zakone u kojima se pojavljuje bilo koja od tih riječi. Da bi našli sve zakone u kojima se pojavljuju sve upisane riječi, upišite ispred svake riječi znak plus (+). Npr: +zakon +kava',br,'Ako ispred riječi upišete minus (-) neće se prikazivati zakoni koji imaju takvu riječ. Npr. +kava +zakon -dopunama');
- print p("Možete pročitati i članak o tome kako je ovaj pretraživač napravljen i zašto.");
+ my $dir=$0;
+ $dir=~s,(^.*?)/[^/]+$,$1,g;
+ open(HELP, "$dir/nn-help.html") || die "can't open '$dir/nn-help.html'";
+ while() {
+ print;
+ }
+ close(HELP);
}
+
+untie %file2title;