1 |
dpavlin |
1.1 |
#!/usr/local/bin/perl |
2 |
|
|
|
3 |
|
|
use DBI; |
4 |
|
|
|
5 |
|
|
my $dbh = DBI->connect("DBI:Pg:dbname=corp","","") || die $DBI::errstr; |
6 |
|
|
|
7 |
|
|
sub nukehtml { |
8 |
|
|
my $foo = $_[0]; |
9 |
|
|
$foo=~s/<\/*[^>]+>//g; |
10 |
|
|
$foo=~s/^\s+//g; |
11 |
|
|
$foo=~s/\s+$//g; |
12 |
|
|
return $foo; |
13 |
|
|
} |
14 |
|
|
|
15 |
|
|
while(<>) { |
16 |
|
|
chomp; |
17 |
|
|
tr/ðèæÐÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 |
18 |
|
|
if (/^(-*\d+)\t(.+)$/) { |
19 |
|
|
($t_id,$t_title) = ($1,$2); |
20 |
|
|
$body=~s/\s+/ /g; |
21 |
|
|
$body=~s/'/\\'/g; |
22 |
|
|
$title=~s/'/\\'/g; |
23 |
|
|
print "id: $id\ntitle: $title\ndate: $date\n$body\n----------\n"; |
24 |
|
|
if ($title) { |
25 |
|
|
$dbh->do("insert into news (title,id,date,body) values ('$title',$id,'$date','$body')") || die $dbh->errstr(); |
26 |
|
|
} |
27 |
|
|
$title=nukehtml($t_title); |
28 |
|
|
$id=$t_id; |
29 |
|
|
$body=""; |
30 |
|
|
} elsif (/Zagreb/i && (/199[89]/ || /200[01]/)) { |
31 |
|
|
$date=nukehtml($_); |
32 |
|
|
|
33 |
|
|
# if ($date=~/,\s*(\d+).*\s(\w+)\s+(\d+)/) { |
34 |
|
|
# $date.="-- $1 $2 $3 --"; |
35 |
|
|
# } |
36 |
|
|
|
37 |
|
|
} else { |
38 |
|
|
$body.=$_; |
39 |
|
|
} |
40 |
|
|
} |