/[jsFind]/trunk/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 8 by dpavlin, Wed Jul 21 15:44:15 2004 UTC revision 15 by dpavlin, Sun Sep 5 17:57:21 2004 UTC
# Line 1  Line 1 
1  package jsFind;  package jsFind;
2    
3  use 5.008004;  use 5.005;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    use HTML::Entities;
7    
8  our $VERSION = '0.01';  our $VERSION = '0.04';
9    
10    use Exporter 'import';
11    use Carp;
12    
13    our @ISA = qw(Exporter);
14    
15    BEGIN {
16            import 'jsFind::Node';
17    }
18    
19  =head1 NAME  =head1 NAME
20    
# Line 13  jsFind - generate index for jsFind using Line 23  jsFind - generate index for jsFind using
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25    use jsFind;    use jsFind;
26      my $t = new jsFind(B => 4);
27      my $f = 1;
28      foreach my $k (qw{minima ut dolorem sapiente voluptatem}) {
29            $t->B_search(Key => $k,
30                    Data => {
31                            "path" => {
32                            t => "word $k",
33                            f => $f },
34                    },
35                    Insert => 1,
36                    Append => 1,
37            );
38      }
39    
40  =head1 DESCRIPTION  =head1 DESCRIPTION
41    
# Line 36  You can programatically (and incremental Line 57  You can programatically (and incremental
57    
58  =back  =back
59    
60  =head1 METHODS  You can also examine examples which come as tests with this module,
61    for example C<t/04words.t>.
 This module contains two packages C<jsFind> and C<jsFind::Node>.  
62    
63  =head2 jsFind methods  =head1 jsFind methods
   
 =cut  
64    
65  use Exporter 'import';  C<jsFind> is mode implementing methods which you, the user, are going to
66  use Carp;  use to create indexes.
67    
68  our @ISA = qw(Exporter);  =head2 new
   
 BEGIN {  
         import 'jsFind::Node';  
 }  
   
 =head3 new  
69    
70  Create new tree. Arguments are C<B> which is maximum numbers of keys in  Create new tree. Arguments are C<B> which is maximum numbers of keys in
71  each node and optional C<Root> node. Each root node may have child nodes.  each node and optional C<Root> node. Each root node may have child nodes.
# Line 82  sub new { Line 94  sub new {
94    bless { B => $B, Root => $Root } => $package;    bless { B => $B, Root => $Root } => $package;
95  }  }
96    
97  =head3 B_search  =head2 B_search
98    
99  Search, insert, append or replace data in B-Tree  Search, insert, append or replace data in B-Tree
100    
# Line 224  sub split_and_promote { Line 236  sub split_and_promote {
236    }    }
237  }  }
238    
239  =head3 B  =head2 B
240    
241  Return B (maximum number of keys)  Return B (maximum number of keys)
242    
# Line 236  sub B { Line 248  sub B {
248    $_[0]{B};    $_[0]{B};
249  }  }
250    
251  =head3 root  =head2 root
252    
253  Returns root node  Returns root node
254    
# Line 250  sub root { Line 262  sub root {
262    $self->{Root};    $self->{Root};
263  }  }
264    
265  =head3 node_overfull  =head2 node_overfull
266    
267  Returns if node is overfull  Returns if node is overfull
268    
# Line 264  sub node_overfull { Line 276  sub node_overfull {
276    $node->size > $self->B;    $node->size > $self->B;
277  }  }
278    
279  =head3 to_string  =head2 to_string
280    
281  Returns your tree as formatted string.  Returns your tree as formatted string.
282    
# Line 278  sub to_string { Line 290  sub to_string {
290    $_[0]->root->to_string;    $_[0]->root->to_string;
291  }  }
292    
293  =head3 to_dot  =head2 to_dot
294    
295  Create Graphviz graph of your tree  Create Graphviz graph of your tree
296    
# Line 296  sub to_dot { Line 308  sub to_dot {
308          return $dot;          return $dot;
309  }  }
310    
311  =head3 to_jsfind  =head2 to_jsfind
312    
313  Create xml index files for jsFind. This should be called after  Create xml index files for jsFind. This should be called after
314  your B-Tree has been filled with data.  your B-Tree has been filled with data.
# Line 305  your B-Tree has been filled with data. Line 317  your B-Tree has been filled with data.
317    
318  Returns number of nodes in created tree.  Returns number of nodes in created tree.
319    
320    There is also longer version if you want to recode your data charset
321    into different one (probably UTF-8):
322    
323     $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');
324    
325    Destination encoding is UTF-8 by default, so you don't have to specify it.
326    
327     $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');
328    
329  =cut  =cut
330    
331    my $iconv;
332    my $iconv_l1;
333    
334  sub to_jsfind {  sub to_jsfind {
335          my $self = shift;          my $self = shift;
336    
337          my $path = shift || confess "to_jsfind need path to your index!";          my $path = shift || confess "to_jsfind need path to your index!";
338    
339            my ($from_cp,$to_cp) = @_;
340    
341            $to_cp ||= 'UTF-8';
342    
343            if ($from_cp && $to_cp) {
344                    $iconv = Text::Iconv->new($from_cp,$to_cp);
345            }
346            $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);
347    
348          $path .= "/" if ($path =~ /\/$/);          $path .= "/" if ($path =~ /\/$/);
349          carp "create directory for index '$path': $!" if (! -w $path);          #carp "creating directory for index '$path'" if (! -w $path);
350    
351          return $self->root->to_jsfind($path,"0");          return $self->root->to_jsfind($path,"0");
352  }  }
# Line 324  sub default_cmp { Line 357  sub default_cmp {
357    $_[0] cmp $_[1];    $_[0] cmp $_[1];
358  }  }
359    
360    =head2 _recode
361    
362    This is internal function to recode charset.
363    
364    It will also try to decode entities in data using L<HTML::Entities>.
365    
366    =cut
367    
368    sub _recode {
369            my $self = shift;
370            my $text = shift || return;
371    
372            sub _decode_html_entities {
373                    my $data = shift || return;
374                    $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data";
375            }
376    
377            if ($iconv) {
378                    $text = $iconv->convert($text) || $text && carp "convert problem: $text";
379                    $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges;
380            }
381    
382            return $text;
383    }
384    
385  #####################################################################  #####################################################################
386    
387  =head2 jsFind::Node methods  =head1 jsFind::Node methods
388    
389  Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and  Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and
390  each has C<k+1> subnodes, which might be null.  each has C<k+1> subnodes, which might be null.
# Line 348  use strict; Line 406  use strict;
406    
407  use Carp;  use Carp;
408  use File::Path;  use File::Path;
409    use Text::Iconv;
410    use POSIX;
411    
412    use base 'jsFind';
413    
414  my $KEYS = 0;  my $KEYS = 0;
415  my $DATA = 1;  my $DATA = 1;
416  my $SUBNODES = 2;  my $SUBNODES = 2;
417    
418  =head3 new  =head2 new
419    
420  Create New node  Create New node
421    
# Line 373  sub new { Line 435  sub new {
435    bless [@_] => $package;    bless [@_] => $package;
436  }  }
437    
438  =head3 locate_key  =head2 locate_key
439    
440  Locate key in node using linear search. This should probably be replaced  Locate key in node using linear search. This should probably be replaced
441  by binary search for better performance.  by binary search for better performance.
# Line 410  sub locate_key { Line 472  sub locate_key {
472  }  }
473    
474    
475  =head3 emptynode  =head2 emptynode
476    
477  Creates new empty node  Creates new empty node
478    
# Line 423  sub emptynode { Line 485  sub emptynode {
485    new($_[0]);                   # Pass package name, but not anything else.    new($_[0]);                   # Pass package name, but not anything else.
486  }  }
487    
488  =head3 is_empty  =head2 is_empty
489    
490  Test if node is empty  Test if node is empty
491    
# Line 437  sub is_empty { Line 499  sub is_empty {
499    !defined($self) || $#$self < 0;    !defined($self) || $#$self < 0;
500  }  }
501    
502  =head3 key  =head2 key
503    
504  Return C<$i>th key from node  Return C<$i>th key from node
505    
# Line 453  sub key { Line 515  sub key {
515     $_[0]->[$KEYS][$_[1]];     $_[0]->[$KEYS][$_[1]];
516  }  }
517    
518  =head3 data  =head2 data
519    
520  Return C<$i>th data from node  Return C<$i>th data from node
521    
# Line 466  sub data { Line 528  sub data {
528    $self->[$DATA][$n];    $self->[$DATA][$n];
529  }  }
530    
531  =head3 kdp_replace  =head2 kdp_replace
532    
533  Set key data pair for C<$i>th element in node  Set key data pair for C<$i>th element in node
534    
# Line 487  sub kdp_replace { Line 549  sub kdp_replace {
549     $self->[$DATA][$n]];     $self->[$DATA][$n]];
550  }  }
551    
552  =head3 kdp_insert  =head2 kdp_insert
553    
554    Insert key/data pair in tree
555    
556   # No return value.    $node->kdp_insert("key value" => "data value");
557    
558    No return value.
559    
560  =cut  =cut
561    
# Line 508  sub kdp_insert { Line 574  sub kdp_insert {
574    splice(@{$self->[$SUBNODES]}, $where, 0, undef);    splice(@{$self->[$SUBNODES]}, $where, 0, undef);
575  }  }
576    
577  =head3 kdp_append  =head2 kdp_append
578    
579  Adds new data keys and values to C<$i>th element in node  Adds new data keys and values to C<$i>th element in node
580    
# Line 529  sub kdp_append { Line 595  sub kdp_append {
595     $self->[$DATA][$n]];     $self->[$DATA][$n]];
596  }  }
597    
598  =head3 subnode  =head2 subnode
599    
600  Set new or return existing subnode  Set new or return existing subnode
601    
# Line 547  sub subnode { Line 613  sub subnode {
613    $self->[$SUBNODES][$n];    $self->[$SUBNODES][$n];
614  }  }
615    
616  =head3 is_leaf  =head2 is_leaf
617    
618  Test if node is leaf  Test if node is leaf
619    
# Line 560  sub is_leaf { Line 626  sub is_leaf {
626    ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.    ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.
627  }  }
628    
629  =head3 size  =head2 size
630    
631  Return number of keys in the node  Return number of keys in the node
632    
# Line 573  sub size { Line 639  sub size {
639    return scalar(@{$self->[$KEYS]});    return scalar(@{$self->[$KEYS]});
640  }  }
641    
642  =head3 halves  =head2 halves
643    
644    Split node into two halves so that keys C<0 .. $n-1> are in one node
645    and keys C<$n+1 ... $size> are in the other.
646    
647   # Accept an index $n    my ($left_node, $right_node, $kdp) = $node->halves($n);
  # Divide into two nodes so that keys 0 .. $n-1 are in one node  
  # and keys $n+1 ... $size are in the other.  
648    
649  =cut  =cut
650    
# Line 601  sub halves { Line 668  sub halves {
668    ($self->new(@left), $self->new(@right), \@middle);    ($self->new(@left), $self->new(@right), \@middle);
669  }  }
670    
671  =head3 to_string  =head2 to_string
672    
673  Dumps tree as string  Dumps tree as string
674    
# Line 652  sub to_string { Line 719  sub to_string {
719    
720  =end comment  =end comment
721    
722  =head3 to_dot  =head2 to_dot
723    
724  Recursivly walk nodes of tree  Recursivly walk nodes of tree
725    
# Line 691  sub to_dot { Line 758  sub to_dot {
758          $dot;          $dot;
759  }  }
760    
761  =head3 to_jsfind  =head2 to_xml
762    
763    Escape <, >, & and ", and to produce valid XML
764    
765    =cut
766    
767    my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
768    my $escape_re  = join '|' => keys %escape;
769    
770    sub to_xml {
771            my $self = shift || confess "you should call to_xml as object!";
772    
773            my $d = shift || return;
774            $d = $self->SUPER::_recode($d);
775            confess "escape_re undefined!" unless ($escape_re);
776            $d =~ s/($escape_re)/$escape{$1}/g;
777            return $d;
778    }
779    
780    =head2 base62
781    
782    Convert number to base62 (used for jsFind index filenames).
783    
784     my $n = $tree->base62(50);
785    
786    =cut
787    
788    sub base62 {
789            my $self = shift;
790    
791            my $value = shift;
792    
793            confess("need non-negative number") if (! defined($value) || $value < 0);
794    
795            my @digits = qw(
796                    0 1 2 3 4 5 6 7 8 9
797                    a b c d e f g h i j k l m n o p q r s t u v w x y z
798                    A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
799            );
800    
801            my $base = scalar(@digits);
802            my $out = "";
803            my $pow = 1;
804            my $pos = 0;
805    
806    
807            if($value == 0) {
808                    return "0";
809            }
810    
811            while($value > 0) {
812                    $pos = $value % $base;
813                    $out = $digits[$pos] . $out;
814                    $value = floor($value/$base);
815                    $pow *= $base;
816            }
817    
818            return $out;
819    }
820    
821    =head2 to_jsfind
822    
823  Create jsFind xml files  Create jsFind xml files
824    
# Line 710  sub to_jsfind { Line 837  sub to_jsfind {
837          confess("path is undefined.") unless ($path);          confess("path is undefined.") unless ($path);
838          confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));          confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));
839    
840            $file = $self->base62($file);
841    
842          my $nr_keys = 0;          my $nr_keys = 0;
843    
844          my ($k, $d, $s) = @$self;          my ($k, $d, $s) = @$self;
# Line 721  sub to_jsfind { Line 850  sub to_jsfind {
850                  my $key = lc($k->[$i]);                  my $key = lc($k->[$i]);
851    
852                  if ($key) {                  if ($key) {
853                          $key_xml .= qq{<k>$key</k>};                          $key_xml .= '<k>'.$self->to_xml($key).'</k>';
854                          $data_xml .= qq{<e>};                          $data_xml .= '<e>';
855          #use Data::Dumper;          #use Data::Dumper;
856          #print Dumper($d->[$i]);          #print Dumper($d->[$i]);
857                          foreach my $path (keys %{$d->[$i]}) {                          foreach my $path (keys %{$d->[$i]}) {
858                                  $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.($d->[$i]->{$path}->{'t'} || 'no title').'">'.$path.'</l>';                                  $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.$self->to_xml($d->[$i]->{$path}->{'t'} || 'no title').'">'.$self->to_xml($path).'</l>';
859                                  $nr_keys++;                                  $nr_keys++;
860                          }                          }
861                          $data_xml .= qq{</e>};                          $data_xml .= '</e>';
862                  }                  }
863    
864                  $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);                  $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
865          }          }
866    
867          $key_xml .= "</n>";          $key_xml .= '</n>';
868          $data_xml .= "</d>";          $data_xml .= '</d>';
869    
870          if (! -e $path) {          if (! -e $path) {
871                  mkpath($path) || croak "can't create dir '$path': $!";                  mkpath($path) || croak "can't create dir '$path': $!";
# Line 763  jsFind web site L<http://www.elucidsoft. Line 892  jsFind web site L<http://www.elucidsoft.
892    
893  B-Trees in perl web site L<http://perl.plover.com/BTree/>  B-Trees in perl web site L<http://perl.plover.com/BTree/>
894    
895    This module web site L<https://www.rot13.org/~dpavlin/jsFind.html>
896    
897  =head1 AUTHORS  =head1 AUTHORS
898    
899  Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was  Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was

Legend:
Removed from v.8  
changed lines
  Added in v.15

  ViewVC Help
Powered by ViewVC 1.1.26