--- trunk/spider/progspider 2004/03/18 11:14:49 68 +++ trunk/spider/progspider 2004/04/06 15:06:58 72 @@ -78,12 +78,17 @@ my ($pre_html,$pages,$post_html) = ('
',$html,''); - ($pre_html,$pages,$post_html) = ($1,$2,$3) if ($html =~ m/^(.+
)(.+)(<\/pre>.+)$/si); + ($pre_html,$pages,$post_html) = ($1,$2,$3) if ($html =~ m/^(.+?)(.+)(<\/pre>.+?)$/si); - $pre_html =~ s/(.+?)<\/title>/ $1 :: page ##page_nr##<\/title>/si; + if ($collection) { + $pre_html =~ s/ (.+?)<\/title>/ $collection :: page ##page_nr##<\/title>/si; + } else { + $pre_html =~ s/ (.+?)<\/title>/ $1 :: page ##page_nr##<\/title>/si; + } my $page_nr = 1; - foreach my $page (split(/\f/,$pages)) { + foreach my $page (split(/\f/s,$pages)) { + print STDERR " $page_nr" if ($verbose); my $pre_tmp = $pre_html; $pre_tmp =~ s/##page_nr##/$page_nr<\/title>/s; dump_contents($pre_tmp . $page . $post_html,time(), $path) if ($page !~ m/^\s*$/s);