/[nn]/find2.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /find2.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (hide annotations) (vendor branch)
Fri Jan 25 18:36:33 2002 UTC (17 years, 5 months ago) by dpavlin
Branch: dbp, MAIN
CVS Tags: r0, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain
initial import

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     use DBI;
4    
5     my $sadrzaj=0;
6     my $nr=0;
7     my $naslov="";
8    
9     my $br; ## broj NN
10     my $god; ## godina NN
11     my $aname; ## ancor name na originalnim stranicama
12    
13     my $nn_dir="."; # dir u kojem su wget-ani fileovi
14    
15     my $dbh = DBI->connect("DBI:Pg:dbname=nn","","") || die $DBI::errstr;
16    
17     $dbh->do("delete from nn") || die $dbh->errstr();
18    
19     opendir(DIR,$nn_dir) || warn "opendir: $!";
20     my @files = grep { /^CijeliBrojS/ && -f "$nn_dir/$_" } readdir(DIR);
21     closedir(DIR);
22    
23     foreach my $file (@files) {
24     open(IN,$file) || die "can't open $file: $!";
25    
26     if ($file=~m/god=(\d+)\&br=(\d+)/) {
27     ($br,$god) = ($2,$1);
28     print "$file -- $2 -- $1\n";
29     }
30    
31     while(<IN>) {
32     chomp;
33     s/\015//g; # kill cr
34     tr/šðžèæŠÐŽÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2
35    
36     $sadrzaj++ if (m,<div class=sadrzaj>,);
37    
38     if ($sadrzaj) {
39     if (s/<a href="#([^"])+">\s*(\d+)\.\s*<[^>]+>//i) {
40     ($aname,$nr) = ($1,$2);
41     }
42     $naslov.=$_;
43     }
44    
45     if ($sadrzaj && m,</div>,) {
46     $sadrzaj--;
47     $naslov=~s/\s+/ /g;
48     $naslov=~s/<[^>]+>//g;
49     $naslov=~s/^\s+//g;
50     $naslov=~s/\s+$//g;
51     print "$god $br $nr: $naslov\n";
52     $naslov_czs = lc($naslov);
53     $naslov_czs =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/;
54     $naslov_czs =~ tr/a-zA-Z/ /cs; # non a-z -> space
55     $dbh->do("insert into nn (br,god,nr,aname,title,title_czs) values ($br,$god,$nr,'$aname','$naslov','$naslov_czs')") || die $dbh->errstr();
56     $naslov="";
57     $nr=0;
58     }
59     }
60    
61     close(IN);
62     }
63    
64     $dbh->do("vacuum") || die $dbh->errstr();

  ViewVC Help
Powered by ViewVC 1.1.26