1 |
dpavlin |
61 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
use strict; |
4 |
|
|
use Text::Unaccent 1.02; # 1.01 won't compile on my platform, |
5 |
|
|
require Unicode::Map8; |
6 |
|
|
|
7 |
|
|
# how to convert isis code page to UTF8? |
8 |
|
|
my $isis_map = Unicode::Map8->new('ISO-8859-2') || die "$!"; |
9 |
|
|
|
10 |
|
|
my $test = "<xml> |
11 |
|
|
<author_swish>Skinner B. F. Skinner B. F. B. F. Skiner </author_swish> |
12 |
|
|
<title_swish>Nauka i ljudsko ponasanje B. F. Skiner </title_swish> |
13 |
|
|
<headline_swish>Nauka i ljudsko ponasanje B. F. Skiner 1969 </headline_swish> |
14 |
|
|
<db_dir>ps</db_dir> |
15 |
|
|
<title_display>Nauka i ljudsko ponaÃ
¡anje / B. F. Skiner</title_display> |
16 |
|
|
<headline_display>Nauka i ljudsko ponaÃ
¡anje / B. F. Skiner , 1969</headline_display> |
17 |
|
|
<author_display>Skinner, B. F. -- 7oo 'Skinner' 'B. F.'</author_display> |
18 |
|
|
</xml>"; |
19 |
|
|
|
20 |
|
|
my $test = "¹ðèæ¾©ÐÈÆ®"; |
21 |
|
|
|
22 |
|
|
#print "original len: ",length($test),"\t$test\n"; |
23 |
|
|
print "original len: ",length($test),"\n"; |
24 |
|
|
|
25 |
|
|
my $tmp = $isis_map->tou($test)->utf8; |
26 |
|
|
|
27 |
|
|
print "UTF8 len: ", length($tmp),"\t$tmp\n"; |
28 |
|
|
#print "UTF8 len: ", length($tmp),"\n"; |
29 |
|
|
|
30 |
|
|
$tmp = unac_string('ISO-8859-2',$test); |
31 |
|
|
print "unacct_len: ", length($tmp),"\t$tmp\n"; |
32 |
|
|
#print "unacct_len: ", length($tmp),"\n"; |
33 |
|
|
|
34 |
|
|
|