/[webpac]/trunk/tools/check_utf8.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/tools/check_utf8.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 61 - (hide annotations)
Fri Jul 4 19:37:32 2003 UTC (16 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 1115 byte(s)
unsorted perl tools and snippets

1 dpavlin 61 #!/usr/bin/perl -w
2    
3     use strict;
4     use Text::Unaccent 1.02; # 1.01 won't compile on my platform,
5     require Unicode::Map8;
6    
7     # how to convert isis code page to UTF8?
8     my $isis_map = Unicode::Map8->new('ISO-8859-2') || die "$!";
9    
10     my $test = "<xml>
11     <author_swish>Skinner B. F. Skinner B. F. B. F. Skiner </author_swish>
12     <title_swish>Nauka i ljudsko ponasanje B. F. Skiner </title_swish>
13     <headline_swish>Nauka i ljudsko ponasanje B. F. Skiner 1969 </headline_swish>
14     <db_dir>ps</db_dir>
15     <title_display>Nauka i ljudsko ponaÅ¡anje / B. F. Skiner</title_display>
16     <headline_display>Nauka i ljudsko ponaÅ¡anje / B. F. Skiner , 1969</headline_display>
17     <author_display>Skinner, B. F. -- 7oo 'Skinner' 'B. F.'</author_display>
18     </xml>";
19    
20     my $test = "¹ðè澩ÐÈÆ®";
21    
22     #print "original len: ",length($test),"\t$test\n";
23     print "original len: ",length($test),"\n";
24    
25     my $tmp = $isis_map->tou($test)->utf8;
26    
27     print "UTF8 len: ", length($tmp),"\t$tmp\n";
28     #print "UTF8 len: ", length($tmp),"\n";
29    
30     $tmp = unac_string('ISO-8859-2',$test);
31     print "unacct_len: ", length($tmp),"\t$tmp\n";
32     #print "unacct_len: ", length($tmp),"\n";
33    
34    

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26