--- trunk/jsFind.pm 2004/07/26 20:17:57 10 +++ trunk/jsFind.pm 2004/07/26 20:30:12 11 @@ -3,8 +3,9 @@ use 5.008004; use strict; use warnings; +use HTML::Entities; -our $VERSION = '0.02'; +our $VERSION = '0.03'; =head1 NAME @@ -310,9 +311,14 @@ $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); +Destination encoding is UTF-8 by default, so you don't have to specify it. + + $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); + =cut my $iconv; +my $iconv_l1; sub to_jsfind { my $self = shift; @@ -320,12 +326,16 @@ my $path = shift || confess "to_jsfind need path to your index!"; my ($from_cp,$to_cp) = @_; + + $to_cp ||= 'UTF-8'; + if ($from_cp && $to_cp) { $iconv = Text::Iconv->new($from_cp,$to_cp); } + $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); $path .= "/" if ($path =~ /\/$/); - carp "create directory for index '$path': $!" if (! -w $path); + #carp "creating directory for index '$path'" if (! -w $path); return $self->root->to_jsfind($path,"0"); } @@ -340,17 +350,25 @@ This is internal function to recode charset. +It will also try to decode HTML::Entities in data. + =cut sub _recode { my $self = shift; my $text = shift || return; + sub _decode_html_entities { + my $data = shift || return; + $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data"; + } + if ($iconv) { - return $iconv->convert($text) || $text; - } else { - return $text; + $text = $iconv->convert($text) || $text && carp "convert problem: $text"; + $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges; } + + return $text; } ##################################################################### @@ -723,6 +741,25 @@ $dot; } +=head3 to_xml + +Escape <, >, & and ", and to produce valid XML + +=cut + +my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); +my $escape_re = join '|' => keys %escape; + +sub to_xml { + my $self = shift || confess "you should call to_xml as object!"; + + my $d = shift || return; + $d = $self->SUPER::_recode($d); + confess "escape_re undefined!" unless ($escape_re); + $d =~ s/($escape_re)/$escape{$1}/g; + return $d; +} + =head3 to_jsfind Create jsFind xml files @@ -733,6 +770,7 @@ =cut + sub to_jsfind { my $self = shift; my ($path,$file) = @_; @@ -753,22 +791,22 @@ my $key = lc($k->[$i]); if ($key) { - $key_xml .= qq{$key}; - $data_xml .= qq{}; + $key_xml .= ''.$self->to_xml($key).''; + $data_xml .= ''; #use Data::Dumper; #print Dumper($d->[$i]); foreach my $path (keys %{$d->[$i]}) { - $data_xml .= ''.$path.''; + $data_xml .= ''.$self->to_xml($path).''; $nr_keys++; } - $data_xml .= qq{}; + $data_xml .= ''; } $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]); } - $key_xml .= ""; - $data_xml .= ""; + $key_xml .= ''; + $data_xml .= ''; if (! -e $path) { mkpath($path) || croak "can't create dir '$path': $!"; @@ -777,8 +815,8 @@ open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!"; open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!"; - print K $self->SUPER::_recode($key_xml); - print D $self->SUPER::_recode($data_xml); + print K $key_xml; + print D $data_xml; close(K); close(D);