--- trunk/lib/WebPAC/Normalize/XML.pm 2005/12/16 21:09:42 268 +++ trunk/lib/WebPAC/Normalize/XML.pm 2005/12/16 21:09:48 269 @@ -7,18 +7,19 @@ use XML::Simple; use Data::Dumper; use Text::Iconv; +use YAML qw/Dump LoadFile/; =head1 NAME -WebPAC::Normalize::XML - apply XML normalisaton rules +WebPAC::Normalize::XML - apply XML or YAML normalisaton rules =head1 VERSION -Version 0.02 +Version 0.03 =cut -our $VERSION = '0.02'; +our $VERSION = '0.03'; =head1 SYNOPSIS @@ -42,7 +43,7 @@ C defines tag to use within C -C defines path to normalize XML. +C defines path to normalize XML C define additional tags that can be forced (and an be array). @@ -78,9 +79,50 @@ $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); + #print STDERR Dump($self->{import_xml}); + return $self; } +=head2 open_yaml + +Read normalisation rules defined in YAML file located usually at +C and parse it. + + my $n = new WebPAC::Normalize::XML; + $n->open_yaml( + tag => 'isis', + path => '/path/to/conf/normalize/isis.yml', + ); + +=cut + +sub open_yaml { + my $self = shift; + + my $arg = {@_}; + + my $log = $self->_get_logger(); + + foreach my $req (qw/tag path/) { + $log->logconfess("need argument $req") unless $arg->{$req}; + } + + my $path = $arg->{path}; + $self->{tag} = $arg->{tag}; + + $log->logdie("normalisation yaml file '$path' doesn't exist!") if (! -e $path); + + $log->info("using $path normalization YAML"); + + $self->{'import_xml'} = LoadFile( $path ) || $log->die("can't load $path: $!"); + + $log->debug("import yaml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); + + $self->{_skip_x} = 1; + + return $self; +} =head2 _x @@ -96,6 +138,7 @@ sub _x { my $self = shift; my $utf8 = shift || return; + return $utf8 if ($self->{_skip_x}); # create UTF-8 convertor for import_xml files $self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2');