/[nn.old]/trunk/search/nn-swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/search/nn-swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 106 - (hide annotations)
Mon Aug 4 15:28:56 2008 UTC (15 years, 8 months ago) by dpavlin
File size: 4478 byte(s)
use SWISH::API and Encode to make it work on recent installations

1 dpavlin 68 #!/usr/bin/perl -w
2    
3     use strict;
4     use CGI qw/:standard -no_xhtml/;
5     use CGI::Carp qw(fatalsToBrowser);
6 dpavlin 106 use SWISH::API;
7 dpavlin 91 use Lingua::Spelling::Alternative;
8 dpavlin 106 use Encode qw/from_to/;
9 dpavlin 90 use GDBM_File;
10 dpavlin 97 use lib '/data/swish/html';
11     use FormatResult;
12 dpavlin 68
13 dpavlin 91 my $dir='/home/dpavlin/nn';
14 dpavlin 106 #my $prog='/usr/bin/swish-e';
15 dpavlin 91 my $url='http://www.nn.hr/clanci/sluzbeno/';
16 dpavlin 68
17 dpavlin 77 my $max_hits=100;
18 dpavlin 68
19 dpavlin 77 my %labels = (100=>' 100', 200=>' 200', 500=>' 500', 0=>'neogranièeno');
20 dpavlin 91 my %index = ('title'=>'naslovu', 'text'=>'tekstu');
21 dpavlin 77
22 dpavlin 95 my %god_lables;
23 dpavlin 90
24 dpavlin 91 my %brzakona;
25     tie %brzakona, 'GDBM_File', "$dir/swish/brzakona.gdbm", &GDBM_READER, 0640 || die "tie: $!";
26     foreach (sort keys %brzakona) {
27 dpavlin 95 $god_lables{$_} = sprintf("%-8s (%d zakona)",$_,$brzakona{$_});
28 dpavlin 90 }
29 dpavlin 91 untie %brzakona;
30 dpavlin 90
31 dpavlin 91 my %file2title;
32     tie %file2title, 'GDBM_File', "$dir/swish/file2title.gdbm", &GDBM_READER, 0640 || die "tie: $!";
33    
34     my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 );
35     $hr->load_findaffix("$dir/prvih_50.txt");
36    
37 dpavlin 97 print header(-charset=>'iso-8859-2'),start_html(-title=>'NN - Narodne Novine pretrazivanje',-lang=>'hr'),start_form(-name=>'search_form');
38 dpavlin 77 print "Potra¾i ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa rijeèima: ",textfield('search');
39 dpavlin 91 print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona ";
40 dpavlin 68 print submit(-value=>'prika¾i');
41 dpavlin 95 print br,checkbox(-name=>'god_limit', -checked=>0, -label=>"ogranièi pretra¾ivanje samo na godinu "),popup_menu(-name=>'god',-values=>[sort keys %god_lables],-labels=>\%god_lables,-onChange=>'this.form.god_limit.checked=true');
42 dpavlin 68 print end_form,hr;
43    
44     if (param('search')) {
45    
46 dpavlin 76 my $s;
47     # re-write query from +/- to and/and not
48 dpavlin 97
49     my @hl_words;
50    
51 dpavlin 76 foreach (split(/\s+/,param('search'))) {
52     if (m/^([+-])(\S+)/) {
53     $s.= ($s) ? "and " : "";
54     $s.="not " if ($1 eq "-");
55 dpavlin 97 my @alt = $hr->alternatives($2);
56     $s.="(".join("* or ",@alt).") ";
57     push @hl_words, \@alt if ($1 ne "-");
58 dpavlin 76 } else {
59 dpavlin 97 my @alt = $hr->alternatives($_);
60     $s .= "(".join("* or ",@alt).") ";
61     push @hl_words, \@alt;
62 dpavlin 76 }
63     }
64 dpavlin 68 $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
65     $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
66    
67 dpavlin 106 my $swish_query;
68 dpavlin 71
69 dpavlin 101 if (defined(param('max_hits'))) {
70     $max_hits = param('max_hits');
71     }
72    
73 dpavlin 106 my ( $index_path, $result_coderef );
74    
75 dpavlin 91 if (param('index') eq 'title') {
76 dpavlin 70
77 dpavlin 106 $result_coderef = sub {
78     my $hit = shift || die;
79     my $naslov = $hit->property('swishtitle');
80     from_to($naslov, 'utf-8', 'iso-8859-2');
81     $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
82     print "<tt><a href=\"$url",$hit->property('swishdocpath'),"\">NN",$hit->property('god'),"/",$hit->property('br'),"</a> ",$hit->property('nr')," </tt> $naslov [",$hit->property('swishrank'),"]<br>\n";
83     };
84 dpavlin 71
85 dpavlin 106 $index_path = "$dir/swish/nn.index";
86     $swish_query = "naslov_czs=($s)";
87 dpavlin 95 if (param('god_limit')) {
88 dpavlin 106 $swish_query .= " and god=".int(param('god')) if (param('god'));
89 dpavlin 91 }
90    
91     } else {
92     # search in full text
93    
94 dpavlin 106 $result_coderef = sub {
95     my $hit = shift || die;
96 dpavlin 91
97 dpavlin 106 my $path = $hit->property('swishdocpath');
98     if ($file2title{$path}) {
99     my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5);
100     $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
101     from_to($naslov, 'utf-8', 'iso-8859-2');
102     print "<tt><a href=\"$url",$path,"\">NN$god/$br</a> $nr</tt> $naslov [",$hit->property('swishrank'),"]<br>\n";
103 dpavlin 91
104 dpavlin 106 } else {
105     print "<!-- error! can't find $path -->\n";
106     }
107 dpavlin 91
108 dpavlin 106 };
109 dpavlin 91
110 dpavlin 106 $index_path = "$dir/swish/sluzbeno.index";
111     $swish_query = $s;
112 dpavlin 95 if (param('god_limit')) {
113 dpavlin 106 $swish_query .= " and swishdocpath=".int(param('god')) if (param('god'));
114 dpavlin 91 }
115    
116 dpavlin 90 }
117    
118 dpavlin 106 print "<!-- swish query: $swish_query -->";
119 dpavlin 90
120 dpavlin 106 my $swish = SWISH::API->new( $index_path );
121     $swish->abort_last_error if $swish->Error;
122 dpavlin 90
123 dpavlin 106 my $results = $swish->query($swish_query);
124    
125     my $hits = $results->hits;
126    
127 dpavlin 68 if ($hits > 0) {
128 dpavlin 106 my $i = 1;
129     while ( my $hit = $results->next_result ) {
130     $result_coderef->( $hit );
131     last if $i++ == $max_hits;
132     }
133    
134     print p,hr,"Pronaðeno $hits zakona";
135 dpavlin 95 print " iz godine ",param('god') if (param('god_limit'));
136 dpavlin 106 print " (prikazano prvih $max_hits)... " if ($max_hits);
137 dpavlin 101 print " <small>[$s]</small>";
138 dpavlin 68 } else {
139 dpavlin 106 print p,"Nije naðen niti jedan zakon... <small>[$s, ",$swish->error_string,"]</small>";
140 dpavlin 68 }
141     } else {
142 dpavlin 95 my $dir=$0;
143     $dir=~s,(^.*?)/[^/]+$,$1,g;
144     open(HELP, "$dir/nn-help.html") || die "can't open '$dir/nn-help.html'";
145     while(<HELP>) {
146     print;
147     }
148     close(HELP);
149 dpavlin 68 }
150 dpavlin 91
151     untie %file2title;

Properties

Name Value
cvs2svn:cvs-rev 1.15
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26