Revision 83 (by dpavlin, 2007/01/17 22:30:14) restructure IMDB trivia parser, added db target to create
trivia database in PostgreSQL
#!/usr/bin/perl -w

use strict;
use parse_trivia;
use DBI;

# score for words in title
my $title_rank = 3;

open(my $t, "gzip -cd trivia.list.gz |") || die "can't open trivia.list.gz: $!";

my $dsn = 'dbi:Pg:dbname=trivia';
my ($user,$password);

my $dbh = DBI->connect($dsn, $user, $password,
	{ RaiseError => 1, AutoCommit => 0 });

my $nr = 1;

my $sql = {
	insert_movie => qq{
		insert into films (title, year) values (?,?)
	},
	insert_trivia => qq{
		insert into trivias (film_id, trivia) values (?,?)
	},
	insert_quote => qq{
		insert into quotes (trivia_id, quote) values (?,?)
	},
};

my $sth;

foreach my $op ( keys %$sql ) {
	my $q = $sql->{$op};
	warn "# prepare $op: $q\n";
	$sth->{$op} = $dbh->prepare( $q );
}

my $film_id_by_title;

parse_trivia($t, sub {

		my $a = {@_};

		my $k = $a->{title};
		$k .= $a->{year} if ($a->{year});

		my $film_id = $film_id_by_title->{ $k };
		if (! $film_id ) {
			$sth->{insert_movie}->execute( $a->{title}, $a->{year} );
			$film_id = $dbh->last_insert_id(undef,undef,"films",undef);
		}

		$film_id_by_title->{ $k } = $film_id;

		$sth->{insert_trivia}->execute( $film_id, $a->{trivia} );
		my $trivia_id = $dbh->last_insert_id(undef,undef,"trivias",undef);

		foreach my $q ( @{ $a->{qv} } ) {
			$sth->{insert_quote}->execute( $trivia_id, $q );
		}
});

$dbh->commit;