/[bfilter]/trunk/bfilter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 29 - (show annotations)
Fri Sep 24 15:44:27 2004 UTC (19 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 2134 byte(s)
lot of small changes: remove null from display, clear results before filling
new ones, combo that works again...

1 #!/usr/bin/perl -w
2 #
3
4 use strict;
5 use locale;
6
7 # maximum entries
8 my $max = 0;
9 # minimum letters to search by
10 my $min_len = shift @ARGV;
11 $min_len = 3 unless defined($min_len);
12 # if more than x elements, warn to increase min_len
13 my $increase_at = 500;
14
15 # name of generated index
16 my $headlines = 'headlines';
17
18 my $debug = 1;
19
20 sub print_file {
21 my $f = shift || return;
22 open(F, $f) || die "$f: $!";
23 while(<F>) {
24 print;
25 }
26 close(F);
27 }
28
29 print qq{
30 var $headlines = new Object();
31 };
32
33 my @part_arr;
34 my $last_part = '';
35 my $total = 0;
36
37 my $max_elements = 0;
38
39 sub escape_js {
40 my $t = shift || return 'undef';
41 # escape single quote and backspace
42 $t =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED '$t'\n";
43 # quote string if not number
44 $t = "'$t'" unless ($t =~ m/^\d+$/);
45 return $t;
46 }
47
48 while(<STDIN>) {
49 chomp;
50
51 if (!m/\t/ || m/\t$/) {
52 print STDERR "SKIP '$_': no tab\n";
53 next;
54 }
55
56 my @data = split(/\t+/,$_);
57
58 my $headline = shift @data || die "need at least headline!";
59
60 if (length($headline) < $min_len) {
61 print STDERR "SKIP '$_': too short\n";
62 next;
63 }
64
65
66 # split into min_len part and rest
67 my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );
68
69 # make part lowercase
70 $part = lc($part);
71
72 $last_part = $part if (! $last_part);
73
74 # new part?
75 if ($part ne $last_part) {
76 print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);
77 $max_elements = $#part_arr if ($#part_arr > $max_elements);
78 print "${headlines}[",escape_js($last_part),"] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
79 $total += $#part_arr;
80 @part_arr = ();
81 $last_part = $part;
82 }
83 push @part_arr, "[".escape_js($headline).",".join(",",map { escape_js($_) } @data)."]";
84
85 # break out?
86 last if ($max && $total > $max);
87 }
88
89 print "${headlines}[",escape_js($last_part)."] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
90 print qq{
91
92 ${headlines}.min_len = $min_len;
93 ${headlines}.length = $total;
94
95 };
96
97 print STDERR "You have more than $increase_at elements, so you should\nincrease min_len to ",$min_len+1," or higher for performance benefit.\n" if ($max_elements > $increase_at);

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26