| Revision 337 (by dpavlin, 2004/06/10 19:22:40) |
|---|
#!/usr/bin/perl -w
use strict;
use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
require Unicode::Map8;
# how to convert isis code page to UTF8?
my $isis_map = Unicode::Map8->new('ISO-8859-2') || die "$!";
my $test = "<xml>
<author_swish>Skinner B. F. Skinner B. F. B. F. Skiner </author_swish>
<title_swish>Nauka i ljudsko ponasanje B. F. Skiner </title_swish>
<headline_swish>Nauka i ljudsko ponasanje B. F. Skiner 1969 </headline_swish>
<db_dir>ps</db_dir>
<title_display>Nauka i ljudsko ponašanje / B. F. Skiner</title_display>
<headline_display>Nauka i ljudsko ponašanje / B. F. Skiner , 1969</headline_display>
<author_display>Skinner, B. F. -- 7oo 'Skinner' 'B. F.'</author_display>
</xml>";
my $test = "¹ðèæ¾©ÐÈÆ®";
#print "original len: ",length($test),"\t$test\n";
print "original len: ",length($test),"\n";
my $tmp = $isis_map->tou($test)->utf8;
print "UTF8 len: ", length($tmp),"\t$tmp\n";
#print "UTF8 len: ", length($tmp),"\n";
$tmp = unac_string('ISO-8859-2',$test);
print "unacct_len: ", length($tmp),"\t$tmp\n";
#print "unacct_len: ", length($tmp),"\n";