1 |
#!/usr/local/bin/perl |
2 |
|
3 |
use DBI; |
4 |
|
5 |
my $dbh = DBI->connect("DBI:Pg:dbname=corp","","") || die $DBI::errstr; |
6 |
|
7 |
sub nukehtml { |
8 |
my $foo = $_[0]; |
9 |
$foo=~s/<\/*[^>]+>//g; |
10 |
$foo=~s/^\s+//g; |
11 |
$foo=~s/\s+$//g; |
12 |
return $foo; |
13 |
} |
14 |
|
15 |
while(<>) { |
16 |
chomp; |
17 |
tr/ðèæÐÈÆ/¹ð¾èæ©Ð®ÈÆ/; # 1250 -> iso8859-2 |
18 |
if (/^(-*\d+)\t(.+)$/) { |
19 |
($t_id,$t_title) = ($1,$2); |
20 |
$body=~s/\s+/ /g; |
21 |
$body=~s/'/\\'/g; |
22 |
$title=~s/'/\\'/g; |
23 |
print "id: $id\ntitle: $title\ndate: $date\n$body\n----------\n"; |
24 |
if ($title) { |
25 |
$dbh->do("insert into news (title,id,date,body) values ('$title',$id,'$date','$body')") || die $dbh->errstr(); |
26 |
} |
27 |
$title=nukehtml($t_title); |
28 |
$id=$t_id; |
29 |
$body=""; |
30 |
} elsif (/Zagreb/i && (/199[89]/ || /200[01]/)) { |
31 |
$date=nukehtml($_); |
32 |
|
33 |
# if ($date=~/,\s*(\d+).*\s(\w+)\s+(\d+)/) { |
34 |
# $date.="-- $1 $2 $3 --"; |
35 |
# } |
36 |
|
37 |
} else { |
38 |
$body.=$_; |
39 |
} |
40 |
} |