1 |
dpavlin |
1.1 |
#!/usr/local/bin/perl |
2 |
|
|
|
3 |
|
|
$sec_nr=1; |
4 |
|
|
|
5 |
|
|
sub nuke_html { |
6 |
|
|
my ($foo)=@_; |
7 |
|
|
|
8 |
|
|
$foo=~s/^ *//g; |
9 |
|
|
$foo=~s/ *$//g; |
10 |
|
|
$foo=~s,<[^>]+>,,g; |
11 |
|
|
$foo=~s,</[^>]+>,,g; |
12 |
|
|
return $foo; |
13 |
|
|
} |
14 |
|
|
|
15 |
|
|
while(<>) { |
16 |
|
|
chomp; |
17 |
|
|
if (m,^<b>(.+)$,i) { |
18 |
|
|
$foo=$1; |
19 |
|
|
|
20 |
|
|
if ($foo !~ m,^<i>(.+),i) { |
21 |
|
|
$line=nuke_html($1); |
22 |
|
|
if (length($line) < 80) { |
23 |
|
|
push @products,$line; |
24 |
|
|
} else { |
25 |
|
|
print "skip: too long $line\n"; |
26 |
|
|
} |
27 |
|
|
next; |
28 |
|
|
} |
29 |
|
|
|
30 |
|
|
$foo=nuke_html($foo); |
31 |
|
|
|
32 |
|
|
if ($foo=~m/^([a-zA-Z]+)/) { |
33 |
|
|
# print "$1\n"; |
34 |
|
|
if (! defined($section{$1})) { |
35 |
|
|
$section{$1}++; |
36 |
|
|
$lsec_nr=$sec_nr; |
37 |
|
|
$nr{$1}=$sec_nr++; |
38 |
|
|
$full{$1}=$foo; |
39 |
|
|
} else { |
40 |
|
|
$section{$1}++; |
41 |
|
|
$sec_nr=$nr{$1}; |
42 |
|
|
} |
43 |
|
|
} elsif ($foo=~m/^\d+\.\s+(\w+)/) { |
44 |
|
|
$section{$1}++; |
45 |
|
|
$nr{$1}="$lsec_nr.".$sec_nr++; |
46 |
|
|
$full{$1}=$foo; |
47 |
|
|
} elsif ($foo ne "") { |
48 |
|
|
print "skip: $foo\n"; |
49 |
|
|
} |
50 |
|
|
|
51 |
|
|
} |
52 |
|
|
} |
53 |
|
|
close(S); |
54 |
|
|
|
55 |
|
|
foreach $k (keys %section) { |
56 |
|
|
print "$nr{$k} $k [$full{$k}] $section{$k}\n"; |
57 |
|
|
} |