1 |
#!/usr/local/bin/perl |
2 |
|
3 |
$sec_nr=1; |
4 |
|
5 |
sub nuke_html { |
6 |
my ($foo)=@_; |
7 |
|
8 |
$foo=~s/^ *//g; |
9 |
$foo=~s/ *$//g; |
10 |
$foo=~s,<[^>]+>,,g; |
11 |
$foo=~s,</[^>]+>,,g; |
12 |
return $foo; |
13 |
} |
14 |
|
15 |
while(<>) { |
16 |
chomp; |
17 |
if (m,^<b>(.+)$,i) { |
18 |
$foo=$1; |
19 |
|
20 |
if ($foo !~ m,^<i>(.+),i) { |
21 |
$line=nuke_html($1); |
22 |
if (length($line) < 80) { |
23 |
push @products,$line; |
24 |
} else { |
25 |
print "skip: too long $line\n"; |
26 |
} |
27 |
next; |
28 |
} |
29 |
|
30 |
$foo=nuke_html($foo); |
31 |
|
32 |
if ($foo=~m/^([a-zA-Z]+)/) { |
33 |
# print "$1\n"; |
34 |
if (! defined($section{$1})) { |
35 |
$section{$1}++; |
36 |
$lsec_nr=$sec_nr; |
37 |
$nr{$1}=$sec_nr++; |
38 |
$full{$1}=$foo; |
39 |
} else { |
40 |
$section{$1}++; |
41 |
$sec_nr=$nr{$1}; |
42 |
} |
43 |
} elsif ($foo=~m/^\d+\.\s+(\w+)/) { |
44 |
$section{$1}++; |
45 |
$nr{$1}="$lsec_nr.".$sec_nr++; |
46 |
$full{$1}=$foo; |
47 |
} elsif ($foo ne "") { |
48 |
print "skip: $foo\n"; |
49 |
} |
50 |
|
51 |
} |
52 |
} |
53 |
close(S); |
54 |
|
55 |
foreach $k (keys %section) { |
56 |
print "$nr{$k} $k [$full{$k}] $section{$k}\n"; |
57 |
} |