/[bfilter]/trunk/bfilter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 36 - (show annotations)
Sun Oct 10 08:30:36 2004 UTC (14 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 2301 byte(s)
removed locale fix and locale alltogether -- JavaScript hasn't notion of
locale, so I must sort without locale

1 #!/usr/bin/perl -w
2 #
3
4 use strict;
5
6 # maximum entries
7 my $max = 0;
8 # minimum letters to search by
9 my $min_len = shift @ARGV;
10 $min_len = 3 unless defined($min_len);
11 # if more than x elements, warn to increase min_len
12 my $increase_at = 500;
13
14 # name of generated index
15 my $headlines = 'headlines';
16
17 my $debug = 1;
18
19 sub print_file {
20 my $f = shift || return;
21 open(F, $f) || die "$f: $!";
22 while(<F>) {
23 print;
24 }
25 close(F);
26 }
27
28 print qq{
29 var $headlines = new Object();
30 };
31
32 my @part_arr;
33 my $last_part = '';
34 my $total = 0;
35
36 my $max_elements = 0;
37
38 sub escape_js {
39 my $t = shift || return 'undef';
40 # escape single quote and backspace
41 $t =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED '$t'\n";
42 # quote string if not number
43 $t = "'$t'" unless ($t =~ m/^\d+$/);
44 return $t;
45 }
46
47 my @lines;
48
49 while(<STDIN>) {
50 chomp;
51
52 if (!m/\t/ || m/\t$/) {
53 print STDERR "SKIP '$_': no tab\n";
54 next;
55 }
56
57 # remove leading spaces (which are ignored if source list was
58 # sorted using locale)
59 s/^\s+//;
60
61 push @lines, $_;
62 }
63
64 foreach (sort { lc($a) cmp lc($b) } @lines) {
65
66 my @data = split(/\t+/,$_);
67
68 my $headline = shift @data || die "need at least headline!";
69
70 if (length($headline) < $min_len) {
71 print STDERR "SKIP '$_': too short\n";
72 next;
73 }
74
75
76 # split into min_len part and rest
77 my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );
78
79 # make part lowercase
80 $part = lc($part);
81
82 $last_part = $part if (! $last_part);
83
84 # new part?
85 if ($part ne $last_part) {
86 print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);
87 $max_elements = $#part_arr if ($#part_arr > $max_elements);
88 print "${headlines}[",escape_js($last_part),"] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
89 $total += $#part_arr;
90 @part_arr = ();
91 $last_part = $part;
92 }
93 push @part_arr, "[".escape_js($headline).",".join(",",map { escape_js($_) } @data)."]";
94
95 # break out?
96 last if ($max && $total > $max);
97 }
98
99 print "${headlines}[",escape_js($last_part)."] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
100 print qq{
101
102 ${headlines}.min_len = $min_len;
103 ${headlines}.length = $total;
104
105 };
106
107 print STDERR "You have more than $increase_at elements, so you should\nincrease min_len to ",$min_len+1," or higher for performance benefit.\n" if ($max_elements > $increase_at);

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26