/[nn.old]/trunk/search/nn-swish.cgi
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/search/nn-swish.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (show annotations)
Wed Sep 1 14:08:00 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 4491 byte(s)
added syntax highlightning, first commit into subversion

1 #!/usr/bin/perl -w
2
3 use strict;
4 use CGI qw/:standard -no_xhtml/;
5 use CGI::Carp qw(fatalsToBrowser);
6 use SWISH;
7 use Unicode::String qw(utf8 utf16);
8 use Lingua::Spelling::Alternative;
9 require Unicode::Map8;
10 use GDBM_File;
11 use lib '/data/swish/html';
12 use FormatResult;
13
14 my $dir='/home/dpavlin/nn';
15 my $prog='/usr/bin/swish-e';
16 my $url='http://www.nn.hr/clanci/sluzbeno/';
17
18 my $hits=0;
19 my $max_hits=100;
20
21 my %labels = (100=>' 100', 200=>' 200', 500=>' 500', 0=>'neogranièeno');
22 my %index = ('title'=>'naslovu', 'text'=>'tekstu');
23
24 my %god_lables;
25
26 my %brzakona;
27 tie %brzakona, 'GDBM_File', "$dir/swish/brzakona.gdbm", &GDBM_READER, 0640 || die "tie: $!";
28 foreach (sort keys %brzakona) {
29 $god_lables{$_} = sprintf("%-8s (%d zakona)",$_,$brzakona{$_});
30 }
31 untie %brzakona;
32
33 my %file2title;
34 tie %file2title, 'GDBM_File', "$dir/swish/file2title.gdbm", &GDBM_READER, 0640 || die "tie: $!";
35
36 my $hr = new Lingua::Spelling::Alternative( DEBUG => 0 );
37 $hr->load_findaffix("$dir/prvih_50.txt");
38
39 print header(-charset=>'iso-8859-2'),start_html(-title=>'NN - Narodne Novine pretrazivanje',-lang=>'hr'),start_form(-name=>'search_form');
40 print "Potra¾i ",popup_menu(-name=>'max_hits',-values=>[ sort keys %labels ],-labels=>\%labels,-default=>$max_hits)," zakona sa rijeèima: ",textfield('search');
41 print " u ",popup_menu(-name=>'index',-values=>[ sort keys %index ],-labels=>\%index,-default=>'title')," zakona ";
42 print submit(-value=>'prika¾i');
43 print br,checkbox(-name=>'god_limit', -checked=>0, -label=>"ogranièi pretra¾ivanje samo na godinu "),popup_menu(-name=>'god',-values=>[sort keys %god_lables],-labels=>\%god_lables,-onChange=>'this.form.god_limit.checked=true');
44 print end_form,hr;
45
46 if (param('search')) {
47
48 my $s;
49 # re-write query from +/- to and/and not
50
51 my @hl_words;
52
53 foreach (split(/\s+/,param('search'))) {
54 if (m/^([+-])(\S+)/) {
55 $s.= ($s) ? "and " : "";
56 $s.="not " if ($1 eq "-");
57 my @alt = $hr->alternatives($2);
58 $s.="(".join("* or ",@alt).") ";
59 push @hl_words, \@alt if ($1 ne "-");
60 } else {
61 my @alt = $hr->alternatives($_);
62 $s .= "(".join("* or ",@alt).") ";
63 push @hl_words, \@alt;
64 }
65 }
66 $s=~tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
67 $s=~tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
68
69 my $l2_map = Unicode::Map8->new("ISO-8859-2") || die;
70 my $us = Unicode::String->new();
71
72 my $sw_q;
73 my $sh;
74
75 if (param('index') eq 'title') {
76
77 $sh = SWISH->connect('Fork',
78 prog => $prog,
79 indexes => "$dir/swish/nn.index",
80 properties => [qw/god br nr/],
81 results => sub {
82 my ($sh,$hit) = @_;
83
84 $us->utf8($hit->swishtitle);
85
86 my $naslov = $l2_map->to8($us->utf16);
87 $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
88 print "<tt><a href=\"$url",$hit->swishdocpath,"\">NN",$hit->god,"/",$hit->br,"</a> ",$hit->nr," </tt> $naslov [",$hit->swishrank,"]<br>\n";
89 # print $_[1]->as_string,"<br>\n";
90 # my @fields = $hit->field_names;
91 # print "Field '$_' = '", $hit->$_, "'<br>\n" for sort @fields;
92 },
93 maxhits => param('max_hits') || $max_hits,
94 );
95
96 die $SWISH::errstr unless $sh;
97
98 $sw_q = "naslov_czs=($s)";
99 if (param('god_limit')) {
100 $sw_q .= " and god=".int(param('god')) if (param('god'));
101 }
102
103 } else {
104 # search in full text
105
106 $sh = SWISH->connect('Fork',
107 prog => $prog,
108 indexes => "$dir/swish/sluzbeno.index",
109 results => sub {
110 my ($sh,$hit) = @_;
111
112 my $path = $hit->swishdocpath;
113 if ($file2title{$path}) {
114 my ($god,$br,$nr,undef,$naslov) = split(/ /,$file2title{$path},5);
115 $naslov = FormatResult::highlite_words(\$naslov, \@hl_words);
116 print "<tt><a href=\"$url",$path,"\">NN$god/$br</a> $nr</tt> $naslov [",$hit->swishrank,"]<br>\n";
117
118 } else {
119 print "<!-- error! can't find $path -->\n";
120 }
121
122 },
123 maxhits => param('max_hits') || $max_hits,
124 );
125
126 die $SWISH::errstr unless $sh;
127
128 $sw_q = $s;
129 if (param('god_limit')) {
130 $sw_q .= " and swishdocpath=".int(param('god')) if (param('god'));
131 }
132
133 }
134
135 print "<!-- swish query: $sw_q -->";
136
137 $hits = $sh->query($sw_q);
138
139 if ($hits > 0) {
140 print p,hr,"Prikazujem $hits zakona";
141 print " iz godine ",param('god') if (param('god_limit'));
142 print " (maks. ",param('max_hits') || $max_hits,")... <small>($s)</small>";
143 } else {
144 print p,"Nije naðen niti jedan zakon... <small>($s, ",$sh->errstr,")</small>";
145 }
146 } else {
147 my $dir=$0;
148 $dir=~s,(^.*?)/[^/]+$,$1,g;
149 open(HELP, "$dir/nn-help.html") || die "can't open '$dir/nn-help.html'";
150 while(<HELP>) {
151 print;
152 }
153 close(HELP);
154 }
155
156 untie %file2title;

Properties

Name Value
cvs2svn:cvs-rev 1.15
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26