1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use strict; |
4 |
use Text::Unaccent 1.02; # 1.01 won't compile on my platform, |
5 |
require Unicode::Map8; |
6 |
|
7 |
# how to convert isis code page to UTF8? |
8 |
my $isis_map = Unicode::Map8->new('ISO-8859-2') || die "$!"; |
9 |
|
10 |
my $test = "<xml> |
11 |
<author_swish>Skinner B. F. Skinner B. F. B. F. Skiner </author_swish> |
12 |
<title_swish>Nauka i ljudsko ponasanje B. F. Skiner </title_swish> |
13 |
<headline_swish>Nauka i ljudsko ponasanje B. F. Skiner 1969 </headline_swish> |
14 |
<db_dir>ps</db_dir> |
15 |
<title_display>Nauka i ljudsko ponaÃ
¡anje / B. F. Skiner</title_display> |
16 |
<headline_display>Nauka i ljudsko ponaÃ
¡anje / B. F. Skiner , 1969</headline_display> |
17 |
<author_display>Skinner, B. F. -- 7oo 'Skinner' 'B. F.'</author_display> |
18 |
</xml>"; |
19 |
|
20 |
my $test = "¹ðèæ¾©ÐÈÆ®"; |
21 |
|
22 |
#print "original len: ",length($test),"\t$test\n"; |
23 |
print "original len: ",length($test),"\n"; |
24 |
|
25 |
my $tmp = $isis_map->tou($test)->utf8; |
26 |
|
27 |
print "UTF8 len: ", length($tmp),"\t$tmp\n"; |
28 |
#print "UTF8 len: ", length($tmp),"\n"; |
29 |
|
30 |
$tmp = unac_string('ISO-8859-2',$test); |
31 |
print "unacct_len: ", length($tmp),"\t$tmp\n"; |
32 |
#print "unacct_len: ", length($tmp),"\n"; |
33 |
|
34 |
|