3 |
use 5.008004; |
use 5.008004; |
4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
use HTML::Entities; |
7 |
|
|
8 |
our $VERSION = '0.02'; |
our $VERSION = '0.03'; |
9 |
|
|
10 |
=head1 NAME |
=head1 NAME |
11 |
|
|
311 |
|
|
312 |
$root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); |
$root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); |
313 |
|
|
314 |
|
Destination encoding is UTF-8 by default, so you don't have to specify it. |
315 |
|
|
316 |
|
$root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); |
317 |
|
|
318 |
=cut |
=cut |
319 |
|
|
320 |
my $iconv; |
my $iconv; |
321 |
|
my $iconv_l1; |
322 |
|
|
323 |
sub to_jsfind { |
sub to_jsfind { |
324 |
my $self = shift; |
my $self = shift; |
326 |
my $path = shift || confess "to_jsfind need path to your index!"; |
my $path = shift || confess "to_jsfind need path to your index!"; |
327 |
|
|
328 |
my ($from_cp,$to_cp) = @_; |
my ($from_cp,$to_cp) = @_; |
329 |
|
|
330 |
|
$to_cp ||= 'UTF-8'; |
331 |
|
|
332 |
if ($from_cp && $to_cp) { |
if ($from_cp && $to_cp) { |
333 |
$iconv = Text::Iconv->new($from_cp,$to_cp); |
$iconv = Text::Iconv->new($from_cp,$to_cp); |
334 |
} |
} |
335 |
|
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); |
336 |
|
|
337 |
$path .= "/" if ($path =~ /\/$/); |
$path .= "/" if ($path =~ /\/$/); |
338 |
carp "create directory for index '$path': $!" if (! -w $path); |
#carp "creating directory for index '$path'" if (! -w $path); |
339 |
|
|
340 |
return $self->root->to_jsfind($path,"0"); |
return $self->root->to_jsfind($path,"0"); |
341 |
} |
} |
350 |
|
|
351 |
This is internal function to recode charset. |
This is internal function to recode charset. |
352 |
|
|
353 |
|
It will also try to decode HTML::Entities in data. |
354 |
|
|
355 |
=cut |
=cut |
356 |
|
|
357 |
sub _recode { |
sub _recode { |
358 |
my $self = shift; |
my $self = shift; |
359 |
my $text = shift || return; |
my $text = shift || return; |
360 |
|
|
361 |
|
sub _decode_html_entities { |
362 |
|
my $data = shift || return; |
363 |
|
$data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data"; |
364 |
|
} |
365 |
|
|
366 |
if ($iconv) { |
if ($iconv) { |
367 |
return $iconv->convert($text) || $text; |
$text = $iconv->convert($text) || $text && carp "convert problem: $text"; |
368 |
} else { |
$text =~ s/(\&\w+;)/_decode_html_entities($1)/ges; |
|
return $text; |
|
369 |
} |
} |
370 |
|
|
371 |
|
return $text; |
372 |
} |
} |
373 |
|
|
374 |
##################################################################### |
##################################################################### |
741 |
$dot; |
$dot; |
742 |
} |
} |
743 |
|
|
744 |
|
=head3 to_xml |
745 |
|
|
746 |
|
Escape <, >, & and ", and to produce valid XML |
747 |
|
|
748 |
|
=cut |
749 |
|
|
750 |
|
my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); |
751 |
|
my $escape_re = join '|' => keys %escape; |
752 |
|
|
753 |
|
sub to_xml { |
754 |
|
my $self = shift || confess "you should call to_xml as object!"; |
755 |
|
|
756 |
|
my $d = shift || return; |
757 |
|
$d = $self->SUPER::_recode($d); |
758 |
|
confess "escape_re undefined!" unless ($escape_re); |
759 |
|
$d =~ s/($escape_re)/$escape{$1}/g; |
760 |
|
return $d; |
761 |
|
} |
762 |
|
|
763 |
=head3 to_jsfind |
=head3 to_jsfind |
764 |
|
|
765 |
Create jsFind xml files |
Create jsFind xml files |
770 |
|
|
771 |
=cut |
=cut |
772 |
|
|
773 |
|
|
774 |
sub to_jsfind { |
sub to_jsfind { |
775 |
my $self = shift; |
my $self = shift; |
776 |
my ($path,$file) = @_; |
my ($path,$file) = @_; |
791 |
my $key = lc($k->[$i]); |
my $key = lc($k->[$i]); |
792 |
|
|
793 |
if ($key) { |
if ($key) { |
794 |
$key_xml .= qq{<k>$key</k>}; |
$key_xml .= '<k>'.$self->to_xml($key).'</k>'; |
795 |
$data_xml .= qq{<e>}; |
$data_xml .= '<e>'; |
796 |
#use Data::Dumper; |
#use Data::Dumper; |
797 |
#print Dumper($d->[$i]); |
#print Dumper($d->[$i]); |
798 |
foreach my $path (keys %{$d->[$i]}) { |
foreach my $path (keys %{$d->[$i]}) { |
799 |
$data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.($d->[$i]->{$path}->{'t'} || 'no title').'">'.$path.'</l>'; |
$data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.$self->to_xml($d->[$i]->{$path}->{'t'} || 'no title').'">'.$self->to_xml($path).'</l>'; |
800 |
$nr_keys++; |
$nr_keys++; |
801 |
} |
} |
802 |
$data_xml .= qq{</e>}; |
$data_xml .= '</e>'; |
803 |
} |
} |
804 |
|
|
805 |
$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]); |
$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]); |
806 |
} |
} |
807 |
|
|
808 |
$key_xml .= "</n>"; |
$key_xml .= '</n>'; |
809 |
$data_xml .= "</d>"; |
$data_xml .= '</d>'; |
810 |
|
|
811 |
if (! -e $path) { |
if (! -e $path) { |
812 |
mkpath($path) || croak "can't create dir '$path': $!"; |
mkpath($path) || croak "can't create dir '$path': $!"; |
815 |
open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!"; |
open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!"; |
816 |
open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!"; |
open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!"; |
817 |
|
|
818 |
print K $self->SUPER::_recode($key_xml); |
print K $key_xml; |
819 |
print D $self->SUPER::_recode($data_xml); |
print D $data_xml; |
820 |
|
|
821 |
close(K); |
close(K); |
822 |
close(D); |
close(D); |