--- trunk/search/nn-swish.cgi 2003/09/09 08:20:53 90 +++ trunk/search/nn-swish.cgi 2003/09/28 02:19:59 91 @@ -5,30 +5,40 @@ use CGI::Carp qw(fatalsToBrowser); use SWISH; use Unicode::String qw(utf8 utf16); +use Lingua::Spelling::Alternative; require Unicode::Map8; use GDBM_File; -my $dir='/home/dpavlin/nn/swish'; +my $dir='/home/dpavlin/nn'; my $prog='/usr/bin/swish-e'; +my $url='http://www.nn.hr/clanci/sluzbeno/'; my $hits=0; my $max_hits=100; my %labels = (100=>' 100', 200=>' 200', 500=>' 500', 0=>'neograničeno'); +my %index = ('title'=>'naslovu', 'text'=>'tekstu'); my %brs_labels; -my %tie; -tie %tie, 'GDBM_File', "$dir/brzakona.gdbm", &GDBM_READER, 0640 || die "tie: $!"; -foreach (sort keys %tie) { - $brs_labels{$_} = sprintf("%-8s (%d zakona)",$_,$tie{$_}); +my %brzakona; +tie %brzakona, 'GDBM_File', "$dir/swish/brzakona.gdbm", &GDBM_READER, 0640 || die "tie: $!"; +foreach (sort keys %brzakona) { + $brs_labels{$_} = sprintf("%-8s (%d zakona)",$_,$brzakona{$_}); } -untie %tie; +untie %brzakona; + +my %file2title; +tie %file2title, 'GDBM_File', "$dir/swish/file2title.gdbm", &GDBM_READER, 0640 || die "tie: $!"; + +my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 ); +$hr->load_findaffix("$dir/prvih_50.txt"); print header(-charset=>'iso-8859-2'),start_html(-title=>'NN pretrazivanje',-lang=>'hr'),start_form; print "Potraži ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa riječima: ",textfield('search'); +print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona "; print submit(-value=>'prikaži'); -print br,checkbox(-name=>'br_limit', -checked=>0, -label=>"ograniči pretraživanje samo na broj "),popup_menu(-name=>'br',-values=>[sort keys %brs_labels],-labels=>\%brs_labels); +print br,checkbox(-name=>'br_limit', -checked=>0, -label=>"ograniči pretraživanje samo na godinu "),popup_menu(-name=>'br',-values=>[sort keys %brs_labels],-labels=>\%brs_labels); print end_form,hr; if (param('search')) { @@ -39,9 +49,9 @@ if (m/^([+-])(\S+)/) { $s.= ($s) ? "and " : ""; $s.="not " if ($1 eq "-"); - $s.="$2* "; + $s.="(".join("* or ",$hr->alternatives($2)).") "; } else { - $s .= "$_* "; + $s .= "(".join("* or ",$hr->alternatives($_)).") "; } } $s=~tr/šđžčćŠĐŽČĆ/šđžčćŠĐŽČĆ/; # 1250 -> iso8859-2 @@ -50,32 +60,65 @@ my $l2_map = Unicode::Map8->new("ISO-8859-2") || die; my $us = Unicode::String->new(); - my $sh = SWISH->connect('Fork', - prog => $prog, - indexes => "$dir/nn.index", - properties => [qw/god br nr/], - results => sub { - my ($sh,$hit) = @_; - - $us->utf8($hit->swishtitle); - - print "swishdocpath,"\">NN",$hit->god,"/",$hit->br," ",$hit->nr," ",$l2_map->to8($us->utf16)," [",$hit->swishrank,"]
\n"; - -# print $_[1]->as_string,"
\n"; -# my @fields = $hit->field_names; -# print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields; - }, - maxhits => param('max_hits') || $max_hits, - ); - - die $SWISH::errstr unless $sh; - - my $sw_q = "naslov_czs=($s)"; - - if (param('br_limit')) { - my ($god,$br) = split(/\//,param('br')); - $sw_q .= " and god=".int($god) if ($god); - $sw_q .= " and br=".int($br) if ($br); + my $sw_q; + my $sh; + + if (param('index') eq 'title') { + + $sh = SWISH->connect('Fork', + prog => $prog, + indexes => "$dir/swish/nn.index", + properties => [qw/god br nr/], + results => sub { + my ($sh,$hit) = @_; + + $us->utf8($hit->swishtitle); + + print "swishdocpath,"\">NN",$hit->god,"/",$hit->br," ",$hit->nr," ",$l2_map->to8($us->utf16)," [",$hit->swishrank,"]
\n"; + + # print $_[1]->as_string,"
\n"; + # my @fields = $hit->field_names; + # print "Field '$_' = '", $hit->$_, "'
\n" for sort @fields; + }, + maxhits => param('max_hits') || $max_hits, + ); + + die $SWISH::errstr unless $sh; + + $sw_q = "naslov_czs=($s)"; + if (param('br_limit')) { + $sw_q .= " and god=".int(param('br')) if (param('br')); + } + + } else { + # search in full text + + $sh = SWISH->connect('Fork', + prog => $prog, + indexes => "$dir/swish/sluzbeno.index", + results => sub { + my ($sh,$hit) = @_; + + my $path = $hit->swishdocpath; + if ($file2title{$path}) { + my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5); + print "NN$god/$br $nr $naslov [",$hit->swishrank,"]
\n"; + + } else { + print "\n"; + } + + }, + maxhits => param('max_hits') || $max_hits, + ); + + die $SWISH::errstr unless $sh; + + $sw_q = $s; + if (param('br_limit')) { + $sw_q .= " and swishdocpath=".int(param('br')) if (param('br')); + } + } print ""; @@ -91,5 +134,7 @@ } } else { print p('Kod pretraživanja pretraživač pronalazi sve zakone u kojima se pojavljuju sve upisanje riječi.',br,'Ako ispred riječi upišete minus (-) neće se prikazivati zakoni koji imaju takvu riječ. Npr. +kava +zakon -dopunama'); - print p("Možete pročitati i članak o tome kako je ovaj pretraživač napravljen i zašto."),p("Ovo je nova i brža verzija pretraživača o kojoj više možete saznati iz članka."); + print p("Možete pročitati i članak o tome kako je ovaj pretraživač napravljen i zašto."),p("Ovo je nova i brža verzija pretraživača o kojoj više možete saznati iz članka."),p("Novosti od rujna 2003.: pretraživanje po godinama i po punom tekstu zakona!"); } + +untie %file2title;