/[nn.old]/trunk/search/nn-swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/search/nn-swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 101 - (show annotations)
Tue Feb 22 14:46:28 2005 UTC (19 years, 2 months ago) by dpavlin
File size: 4524 byte(s)
fix unlimited search

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH;
7 use Unicode::String qw(utf8 utf16);
8 use Lingua::Spelling::Alternative;
9 require Unicode::Map8;
10 use GDBM_File;
11 use lib '/data/swish/html';
12 use FormatResult;
13
14 my $dir='/home/dpavlin/nn';
15 my $prog='/usr/bin/swish-e';
16 my $url='http://www.nn.hr/clanci/sluzbeno/';
17
18 my $hits=0;
19 my $max_hits=100;
20
21 my %labels = (100=>' 100', 200=>' 200', 500=>' 500', 0=>'neogranièeno');
22 my %index = ('title'=>'naslovu', 'text'=>'tekstu');
23
24 my %god_lables;
25
26 my %brzakona;
27 tie %brzakona, 'GDBM_File', "$dir/swish/brzakona.gdbm", &GDBM_READER, 0640 || die "tie: $!";
28 foreach (sort keys %brzakona) {
29 $god_lables{$_} = sprintf("%-8s (%d zakona)",$_,$brzakona{$_});
30 }
31 untie %brzakona;
32
33 my %file2title;
34 tie %file2title, 'GDBM_File', "$dir/swish/file2title.gdbm", &GDBM_READER, 0640 || die "tie: $!";
35
36 my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 );
37 $hr->load_findaffix("$dir/prvih_50.txt");
38
39 print header(-charset=>'iso-8859-2'),start_html(-title=>'NN - Narodne Novine pretrazivanje',-lang=>'hr'),start_form(-name=>'search_form');
40 print "Potra¾i ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa rijeèima: ",textfield('search');
41 print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona ";
42 print submit(-value=>'prika¾i');
43 print br,checkbox(-name=>'god_limit', -checked=>0, -label=>"ogranièi pretra¾ivanje samo na godinu "),popup_menu(-name=>'god',-values=>[sort keys %god_lables],-labels=>\%god_lables,-onChange=>'this.form.god_limit.checked=true');
44 print end_form,hr;
45
46 if (param('search')) {
47
48 my $s;
49 # re-write query from +/- to and/and not
50
51 my @hl_words;
52
53 foreach (split(/\s+/,param('search'))) {
54 if (m/^([+-])(\S+)/) {
55 $s.= ($s) ? "and " : "";
56 $s.="not " if ($1 eq "-");
57 my @alt = $hr->alternatives($2);
58 $s.="(".join("* or ",@alt).") ";
59 push @hl_words, \@alt if ($1 ne "-");
60 } else {
61 my @alt = $hr->alternatives($_);
62 $s .= "(".join("* or ",@alt).") ";
63 push @hl_words, \@alt;
64 }
65 }
66 $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
67 $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
68
69 my $l2_map = Unicode::Map8->new("ISO-8859-2") || die;
70 my $us = Unicode::String->new();
71
72 my $sw_q;
73 my $sh;
74
75 if (defined(param('max_hits'))) {
76 $max_hits = param('max_hits');
77 }
78
79 if (param('index') eq 'title') {
80
81 $sh = SWISH->connect('Fork',
82 prog => $prog,
83 indexes => "$dir/swish/nn.index",
84 properties => [qw/god br nr/],
85 results => sub {
86 my ($sh,$hit) = @_;
87
88 $us->utf8($hit->swishtitle);
89
90 my $naslov = $l2_map->to8($us->utf16);
91 $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
92 print "<tt><a href=\"$url",$hit->swishdocpath,"\">NN",$hit->god,"/",$hit->br,"</a> ",$hit->nr," </tt> $naslov [",$hit->swishrank,"]<br>\n";
93 # print $_[1]->as_string,"<br>\n";
94 # my @fields = $hit->field_names;
95 # print "Field '$_' = '", $hit->$_, "'<br>\n" for sort @fields;
96 },
97 maxhits => $max_hits,
98 );
99
100 die $SWISH::errstr unless $sh;
101
102 $sw_q = "naslov_czs=($s)";
103 if (param('god_limit')) {
104 $sw_q .= " and god=".int(param('god')) if (param('god'));
105 }
106
107 } else {
108 # search in full text
109
110 $sh = SWISH->connect('Fork',
111 prog => $prog,
112 indexes => "$dir/swish/sluzbeno.index",
113 results => sub {
114 my ($sh,$hit) = @_;
115
116 my $path = $hit->swishdocpath;
117 if ($file2title{$path}) {
118 my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5);
119 $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
120 print "<tt><a href=\"$url",$path,"\">NN$god/$br</a> $nr</tt> $naslov [",$hit->swishrank,"]<br>\n";
121
122 } else {
123 print "<!-- error! can't find $path -->\n";
124 }
125
126 },
127 maxhits => $max_hits,
128 );
129
130 die $SWISH::errstr unless $sh;
131
132 $sw_q = $s;
133 if (param('god_limit')) {
134 $sw_q .= " and swishdocpath=".int(param('god')) if (param('god'));
135 }
136
137 }
138
139 print "<!-- swish query: $sw_q -->";
140
141 $hits = $sh->query($sw_q);
142
143 if ($hits > 0) {
144 print p,hr,"Prikazujem $hits zakona";
145 print " iz godine ",param('god') if (param('god_limit'));
146 print " (maks. $max_hits)... " if ($max_hits);
147 print " <small>[$s]</small>";
148 } else {
149 print p,"Nije naðen niti jedan zakon... <small>[$s, ",$sh->errstr,"]</small>";
150 }
151 } else {
152 my $dir=$0;
153 $dir=~s,(^.*?)/[^/]+$,$1,g;
154 open(HELP, "$dir/nn-help.html") || die "can't open '$dir/nn-help.html'";
155 while(<HELP>) {
156 print;
157 }
158 close(HELP);
159 }
160
161 untie %file2title;

Properties

Name Value
cvs2svn:cvs-rev 1.15
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26