31 |
isouc disouc |
isouc disouc |
32 |
isotr disotr |
isotr disotr |
33 |
stop grundform |
stop grundform |
34 |
utf8iso |
utf8iso |
35 |
); |
); |
36 |
# (most implemented in WAIT.xs) |
# (most implemented in WAIT.xs) |
37 |
|
|
78 |
*decode_entities = HTML::Entities->can('decode_entities'); |
*decode_entities = HTML::Entities->can('decode_entities'); |
79 |
goto &decode_entities; |
goto &decode_entities; |
80 |
} elsif ($func =~ /^d?utf8iso$/) { |
} elsif ($func =~ /^d?utf8iso$/) { |
|
require WAIT::Filter::utf8iso; |
|
|
croak "Your perl version must at least be 5.00556 to use '$func'" |
|
|
if $] < 5.00556; |
|
81 |
no strict 'refs'; |
no strict 'refs'; |
82 |
*$func = \&{"WAIT::Filter::utf8iso::$func"}; |
*$func = sub { |
83 |
goto &utf8iso; |
# Courtesy JHI |
84 |
|
my $s = shift; |
85 |
|
$s =~ s{([\xC0-\xDF])([\x80-\xBF])} |
86 |
|
{chr(ord($1)<<6&0xC0|ord($2)&0x3F)}eg; |
87 |
|
$s; |
88 |
|
}; |
89 |
|
goto \&$func; |
90 |
} |
} |
91 |
Carp::confess "Class WAIT::Filter::$func not found"; |
Carp::confess "Class WAIT::Filter::$func not found"; |
92 |
} |
} |
582 |
=head1 SYNOPSIS |
=head1 SYNOPSIS |
583 |
|
|
584 |
use WAIT::Filter qw(Stem Soundex Phonix isolc disolc isouc disouc |
use WAIT::Filter qw(Stem Soundex Phonix isolc disolc isouc disouc |
585 |
isotr disotr stop grundform utf8iso); |
isotr disotr stop grundform); |
586 |
|
|
587 |
$stem = Stem($word); |
$stem = Stem($word); |
588 |
$scode = Soundex($word); |
$scode = Soundex($word); |
694 |
|
|
695 |
=item C<$new = >B<utf8iso>C<($word)> |
=item C<$new = >B<utf8iso>C<($word)> |
696 |
|
|
697 |
Convert UTF8 encoded strings to ISO-8859-1. WAIT currently is |
Deprecated due to flux in perl versions between 5.005 and 5.8. The |
698 |
internally based on the Latin1 character set, so if you process |
function converts UTF8 encoded strings to ISO-8859-1. WAIT is |
699 |
|
internally still based on the Latin1 character set, so if you process |
700 |
anything in a different encoding, you should convert to Latin1 as the |
anything in a different encoding, you should convert to Latin1 as the |
701 |
first filter. |
first filter or refrain from using the iso-latin-1 based filter |
702 |
|
functions. It is recommended that you use your own converter based on |
703 |
|
the perl version you're using. |
704 |
|
|
705 |
=item split, split2, split3, ... |
=item split, split2, split3, ... |
706 |
|
|