--- trunk/bfilter.pl 2004/09/07 19:03:11 8 +++ trunk/bfilter.pl 2004/09/08 17:32:20 9 @@ -7,7 +7,7 @@ # maximum entries my $max = 0; # minimum letters to search by -my $min_len = 2; +my $min_len = 3; # if more than x elements, warn to increase min_len my $increase_at = 500; @@ -35,14 +35,27 @@ while() { chomp; + if (!m/\t/ || m/\t$/) { + print STDERR "SKIP '$_': no tab\n"; + next; + } + my ($path,$headline) = split(/\t+/,$_,2); - # escape single quote - $headline =~ s/'/\\'/g; + if (length($headline) < $min_len) { + print STDERR "SKIP '$_': too short\n"; + next; + } + # split into min_len part and rest my ($part,$rest) = ( substr($headline,0,$min_len), substr($headline,$min_len) ); + # escape special chars + $part =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED part '$part'\n"; + $rest =~ s/(['\\])/\\$1/g && print STDERR "ESCAPED rest '$rest'\n"; + $headline =~ s/(['\\])/\\$1/g; + # make part lowercase $part = lc($part); @@ -67,9 +80,9 @@ print qq{ var min_len = $min_len; -var html_pre = '