5 |
use warnings; |
use warnings; |
6 |
use HTML::Entities; |
use HTML::Entities; |
7 |
|
|
8 |
our $VERSION = '0.05'; |
our $VERSION = '0.07_01'; |
9 |
|
|
10 |
use Exporter 'import'; |
use Exporter; |
11 |
use Carp; |
use Carp; |
12 |
|
|
13 |
our @ISA = qw(Exporter); |
our @ISA = qw(Exporter); |
14 |
|
|
15 |
BEGIN { |
BEGIN { |
16 |
import 'jsFind::Node'; |
Exporter::import 'jsFind::Node'; |
17 |
} |
} |
18 |
|
|
19 |
=head1 NAME |
=head1 NAME |
358 |
Create xml index files for jsFind. This should be called after |
Create xml index files for jsFind. This should be called after |
359 |
your B-Tree has been filled with data. |
your B-Tree has been filled with data. |
360 |
|
|
361 |
$root->to_jsfind('/full/path/to/index/dir/'); |
$root->to_jsfind( |
362 |
|
dir => '/full/path/to/index/dir/', |
363 |
|
data_codepage => 'ISO-8859-2', |
364 |
|
index_codepage => 'UTF-8', |
365 |
|
output_filter => sub { |
366 |
|
my $t = shift || return; |
367 |
|
$t =~ s/è/e/; |
368 |
|
} |
369 |
|
); |
370 |
|
|
371 |
|
All options except C<dir> are optional. |
372 |
|
|
373 |
Returns number of nodes in created tree. |
Returns number of nodes in created tree. |
374 |
|
|
375 |
There is also longer version if you want to recode your data charset |
Options: |
376 |
into different one (probably UTF-8): |
|
377 |
|
=over 4 |
378 |
|
|
379 |
|
=item dir |
380 |
|
|
381 |
|
Full path to directory for index (which will be created if needed). |
382 |
|
|
383 |
|
=item data_codepage |
384 |
|
|
385 |
|
If your imput data isn't in C<ISO-8859-1> encoding, you will have to specify |
386 |
|
this option. |
387 |
|
|
388 |
$root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); |
=item index_codepage |
389 |
|
|
390 |
Destination encoding is UTF-8 by default, so you don't have to specify it. |
If your index encoding is not C<UTF-8> use this option. |
391 |
|
|
392 |
$root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); |
If you are not using supplied JavaScript search code, or your browser is |
393 |
|
terribly broken and thinks that index shouldn't be in UTF-8 encoding, use |
394 |
|
this option to specify encoding for created XML index. |
395 |
|
|
396 |
|
=item output_filter |
397 |
|
|
398 |
|
B<this is just draft of documentation for option which is not implemented!> |
399 |
|
|
400 |
|
Code ref to sub which can do modifications on resulting XML file for node. |
401 |
|
Encoding of this data will be in L<index_codepage> and you have to take care |
402 |
|
not to break XML structure. Calling L<xmllint> on your result index |
403 |
|
(like C<t/90xmllint.t> does in this distribution) is a good idea after using |
404 |
|
this option. |
405 |
|
|
406 |
|
This option is also right place to plug in unaccenting function using |
407 |
|
L<Text::Unaccent>. |
408 |
|
|
409 |
|
=back |
410 |
|
|
411 |
=cut |
=cut |
412 |
|
|
416 |
sub to_jsfind { |
sub to_jsfind { |
417 |
my $self = shift; |
my $self = shift; |
418 |
|
|
419 |
my $path = shift || confess "to_jsfind need path to your index!"; |
my %arg = @_; |
420 |
|
|
421 |
my ($from_cp,$to_cp) = @_; |
confess "to_jsfind need path to your index directory !" unless ($arg{'dir'}); |
422 |
|
|
423 |
$to_cp ||= 'UTF-8'; |
my $data_codepage = $arg{'data_codepage'}; |
424 |
|
my $index_codepage = $arg{'index_codepage'} || 'UTF-8'; |
425 |
|
|
426 |
if ($from_cp && $to_cp) { |
# create ISO-8859-1 iconv for HTML::Entities decode |
427 |
$iconv = Text::Iconv->new($from_cp,$to_cp); |
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage); |
|
} |
|
|
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); |
|
428 |
|
|
429 |
$path .= "/" if ($path =~ /\/$/); |
# create another iconv for data |
430 |
#carp "creating directory for index '$path'" if (! -w $path); |
if ($data_codepage && $index_codepage) { |
431 |
|
$iconv = Text::Iconv->new($data_codepage,$index_codepage); |
432 |
|
} |
433 |
|
|
434 |
return $self->root->to_jsfind($path,"0"); |
return $self->root->to_jsfind($arg{'dir'},"0"); |
435 |
} |
} |
436 |
|
|
437 |
|
|
860 |
return $d; |
return $d; |
861 |
} |
} |
862 |
|
|
863 |
=head2 base62 |
=head2 base_x |
864 |
|
|
865 |
Convert number to base62 (used for jsFind index filenames). |
Convert number to base x (used for jsFind index filenames). |
866 |
|
|
867 |
my $n = $tree->base62(50); |
my $n = $tree->base_x(50); |
868 |
|
|
869 |
=cut |
=cut |
870 |
|
|
871 |
sub base62 { |
sub base_x { |
872 |
my $self = shift; |
my $self = shift; |
873 |
|
|
874 |
my $value = shift; |
my $value = shift; |
878 |
my @digits = qw( |
my @digits = qw( |
879 |
0 1 2 3 4 5 6 7 8 9 |
0 1 2 3 4 5 6 7 8 9 |
880 |
a b c d e f g h i j k l m n o p q r s t u v w x y z |
a b c d e f g h i j k l m n o p q r s t u v w x y z |
|
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z |
|
881 |
); |
); |
882 |
|
|
883 |
my $base = scalar(@digits); |
my $base = scalar(@digits); |
919 |
confess("path is undefined.") unless ($path); |
confess("path is undefined.") unless ($path); |
920 |
confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file)); |
confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file)); |
921 |
|
|
922 |
$file = $self->base62($file); |
$file = $self->base_x($file); |
923 |
|
|
924 |
my $nr_keys = 0; |
my $nr_keys = 0; |
925 |
|
|