/[bfilter]/trunk/bfilter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 26 - (hide annotations)
Wed Sep 15 16:48:24 2004 UTC (19 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 2100 byte(s)
much better quoting for JavaScript (on, why am I re-inventing
Data::JavaScript?)

1 dpavlin 1 #!/usr/bin/perl -w
2     #
3    
4     use strict;
5     use locale;
6    
7     # maximum entries
8     my $max = 0;
9     # minimum letters to search by
10 dpavlin 25 my $min_len = shift @ARGV || 3;
11 dpavlin 4 # if more than x elements, warn to increase min_len
12     my $increase_at = 500;
13 dpavlin 1
14 dpavlin 10 # name of generated index
15     my $headlines = 'headlines';
16    
17 dpavlin 3 my $debug = 1;
18    
19 dpavlin 1 sub print_file {
20     my $f = shift || return;
21     open(F, $f) || die "$f: $!";
22     while(<F>) {
23     print;
24     }
25     close(F);
26     }
27    
28     print qq{
29 dpavlin 10 var $headlines = new Object();
30 dpavlin 1 };
31    
32     my @part_arr;
33     my $last_part = '';
34     my $total = 0;
35    
36 dpavlin 4 my $max_elements = 0;
37    
38 dpavlin 25 sub escape_js {
39 dpavlin 26 my $t = shift || return 'undef';
40     # escape single quote and backspace
41 dpavlin 25 $t =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED '$t'\n";
42 dpavlin 26 # quote string if not number
43     $t = "'$t'" unless ($t =~ m/^\d+$/);
44 dpavlin 25 return $t;
45     }
46    
47 dpavlin 1 while(<STDIN>) {
48     chomp;
49    
50 dpavlin 9 if (!m/\t/ || m/\t$/) {
51     print STDERR "SKIP '$_': no tab\n";
52     next;
53     }
54    
55 dpavlin 25 my @data = split(/\t+/,$_);
56 dpavlin 7
57 dpavlin 25 my $headline = shift @data || die "need at least headline!";
58    
59 dpavlin 9 if (length($headline) < $min_len) {
60     print STDERR "SKIP '$_': too short\n";
61     next;
62     }
63 dpavlin 1
64 dpavlin 9
65 dpavlin 1 # split into min_len part and rest
66 dpavlin 7 my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );
67 dpavlin 1
68     # make part lowercase
69     $part = lc($part);
70    
71     $last_part = $part if (! $last_part);
72    
73     # new part?
74     if ($part ne $last_part) {
75 dpavlin 7 print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);
76 dpavlin 4 $max_elements = $#part_arr if ($#part_arr > $max_elements);
77 dpavlin 26 print "${headlines}[",escape_js($last_part),"] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
78 dpavlin 1 $total += $#part_arr;
79     @part_arr = ();
80     $last_part = $part;
81     }
82 dpavlin 26 push @part_arr, "[".escape_js($headline).",".join(",",map { escape_js($_) } @data)."]";
83 dpavlin 1
84     # break out?
85     last if ($max && $total > $max);
86     }
87    
88 dpavlin 26 print "${headlines}[",escape_js($last_part)."] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
89 dpavlin 7 print qq{
90 dpavlin 4
91 dpavlin 10 ${headlines}.min_len = $min_len;
92     ${headlines}.length = $total;
93 dpavlin 7
94     };
95    
96     print STDERR "You have more than $increase_at elements, so you should\nincrease min_len to ",$min_len+1," or higher for performance benefit.\n" if ($max_elements > $increase_at);

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26