/[webpac]/trunk/tools/check_utf8.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/tools/check_utf8.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 61 - (show annotations)
Fri Jul 4 19:37:32 2003 UTC (20 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 1115 byte(s)
unsorted perl tools and snippets

1 #!/usr/bin/perl -w
2
3 use strict;
4 use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
5 require Unicode::Map8;
6
7 # how to convert isis code page to UTF8?
8 my $isis_map = Unicode::Map8->new('ISO-8859-2') || die "$!";
9
10 my $test = "<xml>
11 <author_swish>Skinner B. F. Skinner B. F. B. F. Skiner </author_swish>
12 <title_swish>Nauka i ljudsko ponasanje B. F. Skiner </title_swish>
13 <headline_swish>Nauka i ljudsko ponasanje B. F. Skiner 1969 </headline_swish>
14 <db_dir>ps</db_dir>
15 <title_display>Nauka i ljudsko ponašanje / B. F. Skiner</title_display>
16 <headline_display>Nauka i ljudsko ponašanje / B. F. Skiner , 1969</headline_display>
17 <author_display>Skinner, B. F. -- 7oo 'Skinner' 'B. F.'</author_display>
18 </xml>";
19
20 my $test = "¹ðè澩ÐÈÆ®";
21
22 #print "original len: ",length($test),"\t$test\n";
23 print "original len: ",length($test),"\n";
24
25 my $tmp = $isis_map->tou($test)->utf8;
26
27 print "UTF8 len: ", length($tmp),"\t$tmp\n";
28 #print "UTF8 len: ", length($tmp),"\n";
29
30 $tmp = unac_string('ISO-8859-2',$test);
31 print "unacct_len: ", length($tmp),"\t$tmp\n";
32 #print "unacct_len: ", length($tmp),"\n";
33
34

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26