Revision 61 (by dpavlin, 2003/07/04 19:37:32) unsorted perl tools and snippets
#!/usr/bin/perl -w

use strict;
use Text::Unaccent 1.02;	# 1.01 won't compile on my platform,
require Unicode::Map8;

# how to convert isis code page to UTF8?
my $isis_map = Unicode::Map8->new('ISO-8859-2') || die "$!";

my $test = "<xml>
  <author_swish>Skinner B. F. Skinner B. F. B. F. Skiner </author_swish>
  <title_swish>Nauka i ljudsko ponasanje B. F. Skiner </title_swish>
  <headline_swish>Nauka i ljudsko ponasanje B. F. Skiner 1969 </headline_swish>
  <db_dir>ps</db_dir>
  <title_display>Nauka i ljudsko ponašanje / B. F. Skiner</title_display>
  <headline_display>Nauka i ljudsko ponašanje / B. F. Skiner , 1969</headline_display>
  <author_display>Skinner, B. F. -- 7oo 'Skinner' 'B. F.'</author_display>
</xml>";

my $test = "¹ðèæ¾©ÐÈÆ®";

#print "original len: ",length($test),"\t$test\n";
print "original len: ",length($test),"\n";

my $tmp = $isis_map->tou($test)->utf8;

print "UTF8 len: ", length($tmp),"\t$tmp\n"; 
#print "UTF8 len: ", length($tmp),"\n"; 

$tmp = unac_string('ISO-8859-2',$test);
print "unacct_len: ", length($tmp),"\t$tmp\n";
#print "unacct_len: ", length($tmp),"\n";