| 1 |
83 |
dpavlin |
use strict; |
| 2 |
|
|
|
| 3 |
|
|
my $header = 1; |
| 4 |
|
|
my ($title, $year) = ('',undef); |
| 5 |
|
|
my $in = 0; |
| 6 |
|
|
my $trivia = ''; |
| 7 |
|
|
my @qv; |
| 8 |
|
|
|
| 9 |
|
|
my ($all_years,$all_titles,$all_quotes); |
| 10 |
|
|
|
| 11 |
|
|
sub qv { |
| 12 |
|
|
my $t = shift || return; |
| 13 |
|
|
|
| 14 |
|
|
sub qv_print { |
| 15 |
|
|
my $v = shift || return ''; |
| 16 |
|
|
# $v =~ s/(.*)\s*,\s*(.+)/$2 $1/g; |
| 17 |
|
|
push @qv, $v; |
| 18 |
|
|
$all_quotes->{$v}++; |
| 19 |
|
|
print "+ $v "; |
| 20 |
|
|
return ''; |
| 21 |
|
|
} |
| 22 |
|
|
$t =~ s#([_'"])([^_'"]+?)\1 \(qv\)#qv_print($2)#ge; |
| 23 |
|
|
} |
| 24 |
|
|
|
| 25 |
|
|
sub fix_title { |
| 26 |
|
|
my $t = shift; |
| 27 |
|
|
$t =~ s/,\s+The\s*$//; |
| 28 |
|
|
return $t; |
| 29 |
|
|
}; |
| 30 |
|
|
|
| 31 |
|
|
my $i = 0; |
| 32 |
|
|
|
| 33 |
|
|
sub parse_trivia { |
| 34 |
|
|
my ($t, $call) = @_; |
| 35 |
|
|
|
| 36 |
|
|
while(<$t>) { |
| 37 |
|
|
if ($header && /^=====+/) { |
| 38 |
|
|
$header = 0; |
| 39 |
|
|
next; |
| 40 |
|
|
} |
| 41 |
|
|
next if $header; |
| 42 |
|
|
|
| 43 |
|
|
if (/^#\s+(.*)\s*$/) { |
| 44 |
|
|
$title = $1; |
| 45 |
|
|
if ($title =~ m#^("*)(.*)\1\s*\((\d+)\)(:?\s*\(\w+\))*$#) { |
| 46 |
|
|
($title, $year) = (fix_title($2),$3); |
| 47 |
|
|
$all_titles->{$title}++; |
| 48 |
|
|
$all_years->{$3}++; |
| 49 |
|
|
} else { |
| 50 |
|
|
$year = undef; |
| 51 |
|
|
} |
| 52 |
|
|
|
| 53 |
|
|
print "# $title ", ( $year ? "[$year]" : "" ), "\n"; |
| 54 |
|
|
next; |
| 55 |
|
|
|
| 56 |
|
|
} elsif (/^-\s(.*)\s*$/) { |
| 57 |
|
|
$in = 1; |
| 58 |
|
|
$trivia = "$1\n"; |
| 59 |
|
|
qv($1); |
| 60 |
|
|
} elsif (/^\s\s(.*)\s*$/) { |
| 61 |
|
|
$trivia .= "$1\n"; |
| 62 |
|
|
qv($1); |
| 63 |
|
|
} elsif (/^$/ && $in) { |
| 64 |
|
|
$i++; |
| 65 |
|
|
print "[$i] "; |
| 66 |
|
|
|
| 67 |
|
|
$call->( |
| 68 |
|
|
title => $title, |
| 69 |
|
|
year => $year, |
| 70 |
|
|
trivia => $trivia, |
| 71 |
|
|
qv => [ @qv ], |
| 72 |
|
|
); |
| 73 |
|
|
|
| 74 |
|
|
$trivia = ''; |
| 75 |
|
|
@qv = (); |
| 76 |
|
|
$in = 0; |
| 77 |
|
|
} else { |
| 78 |
|
|
print "#$_\n"; |
| 79 |
|
|
} |
| 80 |
|
|
|
| 81 |
|
|
# last if ($i > 1000); # XXX remove this! |
| 82 |
|
|
} |
| 83 |
|
|
} |
| 84 |
|
|
|
| 85 |
|
|
sub dump_data($$) { |
| 86 |
|
|
my ($name,$hash) = @_; |
| 87 |
|
|
|
| 88 |
|
|
open(my $fh, "> $name") || die "can't open $name: $!"; |
| 89 |
|
|
|
| 90 |
|
|
foreach my $k (sort keys %{$hash}) { |
| 91 |
|
|
print $fh "$k\t",$hash->{$k},"\n"; |
| 92 |
|
|
} |
| 93 |
|
|
|
| 94 |
|
|
close($fh); |
| 95 |
|
|
} |
| 96 |
|
|
|
| 97 |
|
|
#dump_data('titles.data', $all_titles); |
| 98 |
|
|
#dump_data('quotes.data', $all_quotes); |
| 99 |
|
|
#dump_data('years.data', $all_years); |
| 100 |
|
|
|
| 101 |
|
|
1; |