--- swish/html2xml.pl 2002/06/25 19:04:41 1.3 +++ swish/html2xml.pl 2002/09/07 19:06:21 1.4 @@ -27,7 +27,7 @@ my $aname; ## ancor name na originalnim stranicama my $nn_dir="../"; # dir u kojem su wget-ani fileovi -my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%d#%d"; +my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%s#%d"; my %opts; getopts("vqdl:", \%opts); @@ -100,7 +100,14 @@ $naslov_czs = join(" ",$hr->alternatives(split(/ /,$naslov_czs))); # $naslov_czs = $hr->minimal(split(/ /,$naslov_czs)); my $xml="\n
$br
\n$god\n$nr\n$aname\n"; - $xml.="". $l2_map->tou($naslov)->utf8 ."\n"; + my $naslov_utf=$l2_map->tou($naslov)->utf8; + + # Escape <, >, & and ", and to produce valid XML + my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); + my $escape_re = join '|' => keys %escape; + $naslov_utf =~ s/($escape_re)/$escape{$1}/g; + + $xml.="$naslov_utf\n"; $xml.="$naslov_czs\n
\n\n"; dump_to_swish($xml,$god,$br,$nr,$aname);