/[Z3950-HTML-Scraper]/COBISS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /COBISS.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 7 by dpavlin, Sat Jun 20 20:19:49 2009 UTC revision 8 by dpavlin, Sat Jun 20 22:09:33 2009 UTC
# Line 5  use strict; Line 5  use strict;
5    
6  use WWW::Mechanize;  use WWW::Mechanize;
7  use MARC::Record;  use MARC::Record;
 use File::Slurp;  
8    
9  binmode STDOUT, ':utf8';  binmode STDOUT, ':utf8';
10    
# Line 119  sub fetch_marc { Line 118  sub fetch_marc {
118    
119          my $comarc;          my $comarc;
120    
121          if ( $mech->content =~ m{<pre>\s*(.+?(\d+\.)\s+ID=(\d+).+?)\s*</pre>}s ) {          if ( $mech->content =~ m{<pre>\s*(.+?(\d+)\.\s+ID=(\d+).+?)\s*</pre>}s ) {
122    
123                  my $comarc = $1;                  my $comarc = $1;
124                  my $nr = $2;                  my $nr = $2;
# Line 131  diag "fetch_marc $nr [$id]"; Line 130  diag "fetch_marc $nr [$id]";
130                  $comarc =~ s{<font[^>]*>}{<s>}gs;                  $comarc =~ s{<font[^>]*>}{<s>}gs;
131                  $comarc =~ s{</font>}{<e>}gs;                  $comarc =~ s{</font>}{<e>}gs;
132    
133                  write_file "comarc/$id", $comarc;                  open(my $out, '>:utf8', "comarc/$id");
134                    print $out $comarc;
135                    close($out);
136    
137                  print $comarc;                  print $comarc;
138    
# Line 158  diag "fetch_marc $nr [$id]"; Line 159  diag "fetch_marc $nr [$id]";
159    
160                  diag $marc->as_formatted;                  diag $marc->as_formatted;
161    
162                    $nr++;
163                    $mech->follow_link( url_regex => qr/rec=$nr/ );
164    
165                  return $marc->as_usmarc;                  return $marc->as_usmarc;
166          } else {          } else {
167                  die "can't fetch COMARC format from ", $mech->content;                  die "can't fetch COMARC format from ", $mech->content;

Legend:
Removed from v.7  
changed lines
  Added in v.8

  ViewVC Help
Powered by ViewVC 1.1.26