--- trunk/jsFind.pm 2004/07/21 15:34:03 7 +++ trunk/jsFind.pm 2004/07/26 20:30:12 11 @@ -3,8 +3,9 @@ use 5.008004; use strict; use warnings; +use HTML::Entities; -our $VERSION = '0.01'; +our $VERSION = '0.03'; =head1 NAME @@ -305,15 +306,36 @@ Returns number of nodes in created tree. +There is also longer version if you want to recode your data charset +into different one (probably UTF-8): + + $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); + +Destination encoding is UTF-8 by default, so you don't have to specify it. + + $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); + =cut +my $iconv; +my $iconv_l1; + sub to_jsfind { my $self = shift; my $path = shift || confess "to_jsfind need path to your index!"; + my ($from_cp,$to_cp) = @_; + + $to_cp ||= 'UTF-8'; + + if ($from_cp && $to_cp) { + $iconv = Text::Iconv->new($from_cp,$to_cp); + } + $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); + $path .= "/" if ($path =~ /\/$/); - carp "create directory for index '$path': $!" if (! -w $path); + #carp "creating directory for index '$path'" if (! -w $path); return $self->root->to_jsfind($path,"0"); } @@ -324,6 +346,31 @@ $_[0] cmp $_[1]; } +=head3 _recode + +This is internal function to recode charset. + +It will also try to decode HTML::Entities in data. + +=cut + +sub _recode { + my $self = shift; + my $text = shift || return; + + sub _decode_html_entities { + my $data = shift || return; + $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data"; + } + + if ($iconv) { + $text = $iconv->convert($text) || $text && carp "convert problem: $text"; + $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges; + } + + return $text; +} + ##################################################################### =head2 jsFind::Node methods @@ -348,6 +395,9 @@ use Carp; use File::Path; +use Text::Iconv; + +use base 'jsFind'; my $KEYS = 0; my $DATA = 1; @@ -691,22 +741,45 @@ $dot; } +=head3 to_xml + +Escape <, >, & and ", and to produce valid XML + +=cut + +my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); +my $escape_re = join '|' => keys %escape; + +sub to_xml { + my $self = shift || confess "you should call to_xml as object!"; + + my $d = shift || return; + $d = $self->SUPER::_recode($d); + confess "escape_re undefined!" unless ($escape_re); + $d =~ s/($escape_re)/$escape{$1}/g; + return $d; +} + =head3 to_jsfind Create jsFind xml files - my $nr=$tree->to_dot('/path/to/index','0'); + my $nr=$tree->to_jsfind('/path/to/index','0'); Returns number of elements created =cut + sub to_jsfind { my $self = shift; my ($path,$file) = @_; return 0 if $self->is_empty; + confess("path is undefined.") unless ($path); + confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file)); + my $nr_keys = 0; my ($k, $d, $s) = @$self; @@ -718,22 +791,22 @@ my $key = lc($k->[$i]); if ($key) { - $key_xml .= qq{$key}; - $data_xml .= qq{}; + $key_xml .= ''.$self->to_xml($key).''; + $data_xml .= ''; #use Data::Dumper; #print Dumper($d->[$i]); foreach my $path (keys %{$d->[$i]}) { - $data_xml .= ''.$path.''; + $data_xml .= ''.$self->to_xml($path).''; $nr_keys++; } - $data_xml .= qq{}; + $data_xml .= ''; } $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]); } - $key_xml .= ""; - $data_xml .= ""; + $key_xml .= ''; + $data_xml .= ''; if (! -e $path) { mkpath($path) || croak "can't create dir '$path': $!";