--- trunk/jsFind.pm 2004/07/26 20:17:57 10
+++ trunk/jsFind.pm 2004/07/26 20:30:12 11
@@ -3,8 +3,9 @@
use 5.008004;
use strict;
use warnings;
+use HTML::Entities;
-our $VERSION = '0.02';
+our $VERSION = '0.03';
=head1 NAME
@@ -310,9 +311,14 @@
$root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');
+Destination encoding is UTF-8 by default, so you don't have to specify it.
+
+ $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');
+
=cut
my $iconv;
+my $iconv_l1;
sub to_jsfind {
my $self = shift;
@@ -320,12 +326,16 @@
my $path = shift || confess "to_jsfind need path to your index!";
my ($from_cp,$to_cp) = @_;
+
+ $to_cp ||= 'UTF-8';
+
if ($from_cp && $to_cp) {
$iconv = Text::Iconv->new($from_cp,$to_cp);
}
+ $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);
$path .= "/" if ($path =~ /\/$/);
- carp "create directory for index '$path': $!" if (! -w $path);
+ #carp "creating directory for index '$path'" if (! -w $path);
return $self->root->to_jsfind($path,"0");
}
@@ -340,17 +350,25 @@
This is internal function to recode charset.
+It will also try to decode HTML::Entities in data.
+
=cut
sub _recode {
my $self = shift;
my $text = shift || return;
+ sub _decode_html_entities {
+ my $data = shift || return;
+ $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data";
+ }
+
if ($iconv) {
- return $iconv->convert($text) || $text;
- } else {
- return $text;
+ $text = $iconv->convert($text) || $text && carp "convert problem: $text";
+ $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges;
}
+
+ return $text;
}
#####################################################################
@@ -723,6 +741,25 @@
$dot;
}
+=head3 to_xml
+
+Escape <, >, & and ", and to produce valid XML
+
+=cut
+
+my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"');
+my $escape_re = join '|' => keys %escape;
+
+sub to_xml {
+ my $self = shift || confess "you should call to_xml as object!";
+
+ my $d = shift || return;
+ $d = $self->SUPER::_recode($d);
+ confess "escape_re undefined!" unless ($escape_re);
+ $d =~ s/($escape_re)/$escape{$1}/g;
+ return $d;
+}
+
=head3 to_jsfind
Create jsFind xml files
@@ -733,6 +770,7 @@
=cut
+
sub to_jsfind {
my $self = shift;
my ($path,$file) = @_;
@@ -753,22 +791,22 @@
my $key = lc($k->[$i]);
if ($key) {
- $key_xml .= qq{$key};
- $data_xml .= qq{};
+ $key_xml .= ''.$self->to_xml($key).'';
+ $data_xml .= '';
#use Data::Dumper;
#print Dumper($d->[$i]);
foreach my $path (keys %{$d->[$i]}) {
- $data_xml .= ''.$path.'';
+ $data_xml .= ''.$self->to_xml($path).'';
$nr_keys++;
}
- $data_xml .= qq{};
+ $data_xml .= '';
}
$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
}
- $key_xml .= "";
- $data_xml .= "";
+ $key_xml .= '';
+ $data_xml .= '';
if (! -e $path) {
mkpath($path) || croak "can't create dir '$path': $!";
@@ -777,8 +815,8 @@
open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!";
open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!";
- print K $self->SUPER::_recode($key_xml);
- print D $self->SUPER::_recode($data_xml);
+ print K $key_xml;
+ print D $data_xml;
close(K);
close(D);