/[bfilter]/trunk/bfilter.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9 - (hide annotations)
Wed Sep 8 17:32:20 2004 UTC (19 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 1937 byte(s)
correctly escape ' and \ in input data

1 dpavlin 1 #!/usr/bin/perl -w
2     #
3    
4     use strict;
5     use locale;
6    
7     # maximum entries
8     my $max = 0;
9     # minimum letters to search by
10 dpavlin 9 my $min_len = 3;
11 dpavlin 4 # if more than x elements, warn to increase min_len
12     my $increase_at = 500;
13 dpavlin 1
14 dpavlin 3 my $debug = 1;
15    
16 dpavlin 1 sub print_file {
17     my $f = shift || return;
18     open(F, $f) || die "$f: $!";
19     while(<F>) {
20     print;
21     }
22     close(F);
23     }
24    
25     print qq{
26     var headlines = Array();
27     };
28    
29     my @part_arr;
30     my $last_part = '';
31     my $total = 0;
32    
33 dpavlin 4 my $max_elements = 0;
34    
35 dpavlin 1 while(<STDIN>) {
36     chomp;
37    
38 dpavlin 9 if (!m/\t/ || m/\t$/) {
39     print STDERR "SKIP '$_': no tab\n";
40     next;
41     }
42    
43 dpavlin 7 my ($path,$headline) = split(/\t+/,$_,2);
44    
45 dpavlin 9 if (length($headline) < $min_len) {
46     print STDERR "SKIP '$_': too short\n";
47     next;
48     }
49 dpavlin 1
50 dpavlin 9
51 dpavlin 1 # split into min_len part and rest
52 dpavlin 7 my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) );
53 dpavlin 1
54 dpavlin 9 # escape special chars
55     $part =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED part '$part'\n";
56     $rest =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED rest '$rest'\n";
57     $headline =~ s/(['\\])/\\$1/g;
58    
59 dpavlin 1 # make part lowercase
60     $part = lc($part);
61    
62     $last_part = $part if (! $last_part);
63    
64     # new part?
65     if ($part ne $last_part) {
66 dpavlin 7 print STDERR $last_part,"\t",$#part_arr+1,"\n" if ($debug && $#part_arr > $increase_at);
67 dpavlin 4 $max_elements = $#part_arr if ($#part_arr > $max_elements);
68 dpavlin 7 print "headlines['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
69 dpavlin 1 $total += $#part_arr;
70     @part_arr = ();
71     $last_part = $part;
72     }
73 dpavlin 7 push @part_arr, "['$path','$headline']";
74 dpavlin 1
75     # break out?
76     last if ($max && $total > $max);
77     }
78    
79 dpavlin 7 print "headlines['$last_part'] = [\n ",join(",\n ",@part_arr),"];\n" if (@part_arr);
80     print qq{
81 dpavlin 4
82 dpavlin 7 var min_len = $min_len;
83 dpavlin 9 var html_pre = '<div><a href="../';
84 dpavlin 7 var html_mid = '">';
85 dpavlin 9 var html_post = '</a></div>';
86 dpavlin 7
87     // index elements: $total
88    
89     };
90    
91     print STDERR "You have more than $increase_at elements, so you should\nincrease min_len to ",$min_len+1," or higher for performance benefit.\n" if ($max_elements > $increase_at);

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26