--- trunk/jsFind.pm 2004/07/21 15:34:03 7
+++ trunk/jsFind.pm 2004/07/26 20:30:12 11
@@ -3,8 +3,9 @@
use 5.008004;
use strict;
use warnings;
+use HTML::Entities;
-our $VERSION = '0.01';
+our $VERSION = '0.03';
=head1 NAME
@@ -305,15 +306,36 @@
Returns number of nodes in created tree.
+There is also longer version if you want to recode your data charset
+into different one (probably UTF-8):
+
+ $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');
+
+Destination encoding is UTF-8 by default, so you don't have to specify it.
+
+ $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');
+
=cut
+my $iconv;
+my $iconv_l1;
+
sub to_jsfind {
my $self = shift;
my $path = shift || confess "to_jsfind need path to your index!";
+ my ($from_cp,$to_cp) = @_;
+
+ $to_cp ||= 'UTF-8';
+
+ if ($from_cp && $to_cp) {
+ $iconv = Text::Iconv->new($from_cp,$to_cp);
+ }
+ $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);
+
$path .= "/" if ($path =~ /\/$/);
- carp "create directory for index '$path': $!" if (! -w $path);
+ #carp "creating directory for index '$path'" if (! -w $path);
return $self->root->to_jsfind($path,"0");
}
@@ -324,6 +346,31 @@
$_[0] cmp $_[1];
}
+=head3 _recode
+
+This is internal function to recode charset.
+
+It will also try to decode HTML::Entities in data.
+
+=cut
+
+sub _recode {
+ my $self = shift;
+ my $text = shift || return;
+
+ sub _decode_html_entities {
+ my $data = shift || return;
+ $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data";
+ }
+
+ if ($iconv) {
+ $text = $iconv->convert($text) || $text && carp "convert problem: $text";
+ $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges;
+ }
+
+ return $text;
+}
+
#####################################################################
=head2 jsFind::Node methods
@@ -348,6 +395,9 @@
use Carp;
use File::Path;
+use Text::Iconv;
+
+use base 'jsFind';
my $KEYS = 0;
my $DATA = 1;
@@ -691,22 +741,45 @@
$dot;
}
+=head3 to_xml
+
+Escape <, >, & and ", and to produce valid XML
+
+=cut
+
+my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"');
+my $escape_re = join '|' => keys %escape;
+
+sub to_xml {
+ my $self = shift || confess "you should call to_xml as object!";
+
+ my $d = shift || return;
+ $d = $self->SUPER::_recode($d);
+ confess "escape_re undefined!" unless ($escape_re);
+ $d =~ s/($escape_re)/$escape{$1}/g;
+ return $d;
+}
+
=head3 to_jsfind
Create jsFind xml files
- my $nr=$tree->to_dot('/path/to/index','0');
+ my $nr=$tree->to_jsfind('/path/to/index','0');
Returns number of elements created
=cut
+
sub to_jsfind {
my $self = shift;
my ($path,$file) = @_;
return 0 if $self->is_empty;
+ confess("path is undefined.") unless ($path);
+ confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));
+
my $nr_keys = 0;
my ($k, $d, $s) = @$self;
@@ -718,22 +791,22 @@
my $key = lc($k->[$i]);
if ($key) {
- $key_xml .= qq{$key};
- $data_xml .= qq{};
+ $key_xml .= ''.$self->to_xml($key).'';
+ $data_xml .= '';
#use Data::Dumper;
#print Dumper($d->[$i]);
foreach my $path (keys %{$d->[$i]}) {
- $data_xml .= ''.$path.'';
+ $data_xml .= ''.$self->to_xml($path).'';
$nr_keys++;
}
- $data_xml .= qq{};
+ $data_xml .= '';
}
$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
}
- $key_xml .= "";
- $data_xml .= "";
+ $key_xml .= '';
+ $data_xml .= '';
if (! -e $path) {
mkpath($path) || croak "can't create dir '$path': $!";