/[nn]/swish/html2xml.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /swish/html2xml.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3 by dpavlin, Tue Jun 25 19:04:41 2002 UTC revision 1.4 by dpavlin, Sat Sep 7 19:06:21 2002 UTC
# Line 27  my $god;       ## godina NN Line 27  my $god;       ## godina NN
27  my $aname;      ## ancor name na originalnim stranicama  my $aname;      ## ancor name na originalnim stranicama
28    
29  my $nn_dir="../";               # dir u kojem su wget-ani fileovi  my $nn_dir="../";               # dir u kojem su wget-ani fileovi
30  my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%d#%d";  my $url="http://www.nn.hr/CijeliBrojS.asp?god=%d&br=%s&mid=%s#%d";
31    
32  my %opts;  my %opts;
33  getopts("vqdl:", \%opts);  getopts("vqdl:", \%opts);
# Line 100  foreach my $file (@files) { Line 100  foreach my $file (@files) {
100                          $naslov_czs = join(" ",$hr->alternatives(split(/ /,$naslov_czs)));                          $naslov_czs = join(" ",$hr->alternatives(split(/ /,$naslov_czs)));
101  #                       $naslov_czs = $hr->minimal(split(/ /,$naslov_czs));  #                       $naslov_czs = $hr->minimal(split(/ /,$naslov_czs));
102                          my $xml="<nn>\n<br>$br</br>\n<god>$god</god>\n<nr>$nr</nr>\n<aname>$aname</aname>\n";                          my $xml="<nn>\n<br>$br</br>\n<god>$god</god>\n<nr>$nr</nr>\n<aname>$aname</aname>\n";
103                          $xml.="<naslov>". $l2_map->tou($naslov)->utf8 ."</naslov>\n";                          my $naslov_utf=$l2_map->tou($naslov)->utf8;
104    
105                            # Escape <, >, & and ", and to produce valid XML
106                            my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');  
107                            my $escape_re  = join '|' => keys %escape;
108                            $naslov_utf =~ s/($escape_re)/$escape{$1}/g;
109    
110                            $xml.="<naslov>$naslov_utf</naslov>\n";
111                          $xml.="<naslov_czs>$naslov_czs</naslov_czs>\n</nn>\n\n";                          $xml.="<naslov_czs>$naslov_czs</naslov_czs>\n</nn>\n\n";
112                          dump_to_swish($xml,$god,$br,$nr,$aname);                          dump_to_swish($xml,$god,$br,$nr,$aname);
113                                                    

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.4

  ViewVC Help
Powered by ViewVC 1.1.26