/[Z3950-HTML-Scraper]/COBISS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /COBISS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations)
Fri Jun 19 21:51:28 2009 UTC (14 years, 10 months ago) by dpavlin
File size: 1974 byte(s)
create MARC record

1 package COBISS;
2
3 use warnings;
4 use strict;
5
6 use WWW::Mechanize;
7 use MARC::Record;
8
9 binmode STDOUT, ':utf8';
10
11 my $cobiss_marc21 = {
12 '010' => { a => [ '020', 'a' ] },
13 200 => {
14 a => [ 245 , 'a' ],
15 f => [ 245 , 'f' ],
16 },
17 205 => { a => [ 250 , 'a' ] },
18 210 => {
19 a => [ 250 , 'a' ],
20 c => [ 260 , 'b' ],
21 d => [ 260 , 'c' ],
22 },
23 215 => {
24 a => [ 300 , 'a' ],
25 c => [ 300 , 'b' ],
26 d => [ 300 , 'c' ],
27 },
28 700 => {
29 a => [ 100 , 'a' ],
30 },
31 };
32
33 sub search {
34
35 my $url = 'http://cobiss.izum.si/scripts/cobiss?ukaz=GETID&lani=en';
36
37 warn "# get $url\n";
38
39 my $mech = WWW::Mechanize->new();
40 $mech->get( $url );
41
42 warn "# got session\n";
43
44 $mech->follow_link( text_regex => qr/union/ );
45
46 warn "# submit search\n";
47
48 $mech->submit_form(
49 fields => {
50 'SS1' => 'Krleza',
51 },
52 );
53
54 my $hits = 1;
55 if ( $mech->content =~ m{hits:\s*<b>\s*(\d+)\s*</b>}s ) {
56 $hits = $1;
57 } else {
58 warn "get't find results in ", $mech->content;
59 }
60
61 warn "# got $hits results, get first one\n";
62
63 $mech->follow_link( url_regex => qr/ukaz=DISP/ );
64
65 warn "# in COMARC format\n";
66
67 $mech->follow_link( url_regex => qr/fmt=13/ );
68
69 my $comarc;
70
71 if ( $mech->content =~ m{<pre>\s*(.+1\..+?)\s*</pre>}s ) {
72 my $comarc = $1;
73 $comarc =~ s{</?b>}{}gs;
74 $comarc =~ s{<font[^>]*>}{<s>}gs;
75 $comarc =~ s{</font>}{<e>}gs;
76
77 print $comarc;
78
79 my $marc = MARC::Record->new;
80
81 foreach my $line ( split(/[\r\n]+/, $comarc) ) {
82 our @f;
83
84 if ( $line !~ s{(\d\d\d)([01 ])([01 ])}{} ) {
85 warn "SKIP: $line\n";
86 } else {
87 $line .= "<eol>";
88
89 @f = ( $1, $2, $3 );
90 sub sf { warn "sf",@_,"|",@f; push @f, @_; }
91 $line =~ s{<s>(\w)<e>([^<]+)\s*}{sf($1, $2)}ges;
92 warn "# f:", join(' ', @f), " left:|$line|\n";
93 $marc->add_fields( @f );
94 }
95 }
96
97 open(my $out, '>:utf8', 'out.marc');
98 print $out $marc->as_usmarc;
99 close($out);
100
101 warn $marc->as_formatted;
102
103 return $comarc;
104 } else {
105 die "can't fetch COMARC format from ", $mech->content;
106 }
107
108 }
109
110 1;

  ViewVC Help
Powered by ViewVC 1.1.26