/[nn.old]/trunk/search/nn-swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/search/nn-swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 106 - (show annotations)
Mon Aug 4 15:28:56 2008 UTC (15 years, 8 months ago) by dpavlin
File size: 4478 byte(s)
use SWISH::API and Encode to make it work on recent installations

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH::API;
7 use Lingua::Spelling::Alternative;
8 use Encode qw/from_to/;
9 use GDBM_File;
10 use lib '/data/swish/html';
11 use FormatResult;
12
13 my $dir='/home/dpavlin/nn';
14 #my $prog='/usr/bin/swish-e';
15 my $url='http://www.nn.hr/clanci/sluzbeno/';
16
17 my $max_hits=100;
18
19 my %labels = (100=>' 100', 200=>' 200', 500=>' 500', 0=>'neogranièeno');
20 my %index = ('title'=>'naslovu', 'text'=>'tekstu');
21
22 my %god_lables;
23
24 my %brzakona;
25 tie %brzakona, 'GDBM_File', "$dir/swish/brzakona.gdbm", &GDBM_READER, 0640 || die "tie: $!";
26 foreach (sort keys %brzakona) {
27 $god_lables{$_} = sprintf("%-8s (%d zakona)",$_,$brzakona{$_});
28 }
29 untie %brzakona;
30
31 my %file2title;
32 tie %file2title, 'GDBM_File', "$dir/swish/file2title.gdbm", &GDBM_READER, 0640 || die "tie: $!";
33
34 my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 );
35 $hr->load_findaffix("$dir/prvih_50.txt");
36
37 print header(-charset=>'iso-8859-2'),start_html(-title=>'NN - Narodne Novine pretrazivanje',-lang=>'hr'),start_form(-name=>'search_form');
38 print "Potra¾i ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa rijeèima: ",textfield('search');
39 print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona ";
40 print submit(-value=>'prika¾i');
41 print br,checkbox(-name=>'god_limit', -checked=>0, -label=>"ogranièi pretra¾ivanje samo na godinu "),popup_menu(-name=>'god',-values=>[sort keys %god_lables],-labels=>\%god_lables,-onChange=>'this.form.god_limit.checked=true');
42 print end_form,hr;
43
44 if (param('search')) {
45
46 my $s;
47 # re-write query from +/- to and/and not
48
49 my @hl_words;
50
51 foreach (split(/\s+/,param('search'))) {
52 if (m/^([+-])(\S+)/) {
53 $s.= ($s) ? "and " : "";
54 $s.="not " if ($1 eq "-");
55 my @alt = $hr->alternatives($2);
56 $s.="(".join("* or ",@alt).") ";
57 push @hl_words, \@alt if ($1 ne "-");
58 } else {
59 my @alt = $hr->alternatives($_);
60 $s .= "(".join("* or ",@alt).") ";
61 push @hl_words, \@alt;
62 }
63 }
64 $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
65 $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
66
67 my $swish_query;
68
69 if (defined(param('max_hits'))) {
70 $max_hits = param('max_hits');
71 }
72
73 my ( $index_path, $result_coderef );
74
75 if (param('index') eq 'title') {
76
77 $result_coderef = sub {
78 my $hit = shift || die;
79 my $naslov = $hit->property('swishtitle');
80 from_to($naslov, 'utf-8', 'iso-8859-2');
81 $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
82 print "<tt><a href=\"$url",$hit->property('swishdocpath'),"\">NN",$hit->property('god'),"/",$hit->property('br'),"</a> ",$hit->property('nr')," </tt> $naslov [",$hit->property('swishrank'),"]<br>\n";
83 };
84
85 $index_path = "$dir/swish/nn.index";
86 $swish_query = "naslov_czs=($s)";
87 if (param('god_limit')) {
88 $swish_query .= " and god=".int(param('god')) if (param('god'));
89 }
90
91 } else {
92 # search in full text
93
94 $result_coderef = sub {
95 my $hit = shift || die;
96
97 my $path = $hit->property('swishdocpath');
98 if ($file2title{$path}) {
99 my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5);
100 $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
101 from_to($naslov, 'utf-8', 'iso-8859-2');
102 print "<tt><a href=\"$url",$path,"\">NN$god/$br</a> $nr</tt> $naslov [",$hit->property('swishrank'),"]<br>\n";
103
104 } else {
105 print "<!-- error! can't find $path -->\n";
106 }
107
108 };
109
110 $index_path = "$dir/swish/sluzbeno.index";
111 $swish_query = $s;
112 if (param('god_limit')) {
113 $swish_query .= " and swishdocpath=".int(param('god')) if (param('god'));
114 }
115
116 }
117
118 print "<!-- swish query: $swish_query -->";
119
120 my $swish = SWISH::API->new( $index_path );
121 $swish->abort_last_error if $swish->Error;
122
123 my $results = $swish->query($swish_query);
124
125 my $hits = $results->hits;
126
127 if ($hits > 0) {
128 my $i = 1;
129 while ( my $hit = $results->next_result ) {
130 $result_coderef->( $hit );
131 last if $i++ == $max_hits;
132 }
133
134 print p,hr,"Pronaðeno $hits zakona";
135 print " iz godine ",param('god') if (param('god_limit'));
136 print " (prikazano prvih $max_hits)... " if ($max_hits);
137 print " <small>[$s]</small>";
138 } else {
139 print p,"Nije naðen niti jedan zakon... <small>[$s, ",$swish->error_string,"]</small>";
140 }
141 } else {
142 my $dir=$0;
143 $dir=~s,(^.*?)/[^/]+$,$1,g;
144 open(HELP, "$dir/nn-help.html") || die "can't open '$dir/nn-help.html'";
145 while(<HELP>) {
146 print;
147 }
148 close(HELP);
149 }
150
151 untie %file2title;

Properties

Name Value
cvs2svn:cvs-rev 1.15
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26