| Revision 83 (by dpavlin, 2007/01/17 22:30:14) |
restructure IMDB trivia parser, added db target to create
trivia database in PostgreSQL |
#!/usr/bin/perl -w
use strict;
use parse_trivia;
use DBI;
# score for words in title
my $title_rank = 3;
open(my $t, "gzip -cd trivia.list.gz |") || die "can't open trivia.list.gz: $!";
my $dsn = 'dbi:Pg:dbname=trivia';
my ($user,$password);
my $dbh = DBI->connect($dsn, $user, $password,
{ RaiseError => 1, AutoCommit => 0 });
my $nr = 1;
my $sql = {
insert_movie => qq{
insert into films (title, year) values (?,?)
},
insert_trivia => qq{
insert into trivias (film_id, trivia) values (?,?)
},
insert_quote => qq{
insert into quotes (trivia_id, quote) values (?,?)
},
};
my $sth;
foreach my $op ( keys %$sql ) {
my $q = $sql->{$op};
warn "# prepare $op: $q\n";
$sth->{$op} = $dbh->prepare( $q );
}
my $film_id_by_title;
parse_trivia($t, sub {
my $a = {@_};
my $k = $a->{title};
$k .= $a->{year} if ($a->{year});
my $film_id = $film_id_by_title->{ $k };
if (! $film_id ) {
$sth->{insert_movie}->execute( $a->{title}, $a->{year} );
$film_id = $dbh->last_insert_id(undef,undef,"films",undef);
}
$film_id_by_title->{ $k } = $film_id;
$sth->{insert_trivia}->execute( $film_id, $a->{trivia} );
my $trivia_id = $dbh->last_insert_id(undef,undef,"trivias",undef);
foreach my $q ( @{ $a->{qv} } ) {
$sth->{insert_quote}->execute( $trivia_id, $q );
}
});
$dbh->commit;