1 |
#!/usr/bin/perl |
#!/usr/bin/perl |
2 |
# |
# |
3 |
# SLies Copyright 2001 Dobrica Pavlinusic <dpavlin@rot13.org> |
# PLies Copyright 2001 Dobrica Pavlinusic <dpavlin@rot13.org> |
4 |
# |
# |
5 |
# this tool is based on SlideMaker and XLSies tool |
# this tool is based on SlideMaker and XLSies tool |
6 |
# split a all.htm into slide*.htm |
# split a all.htm into slide*.htm |
53 |
## |
## |
54 |
|
|
55 |
## show debug output |
## show debug output |
56 |
my $debug=1; |
my $debug=0; |
57 |
|
|
58 |
## default DOCTYPE added on the slides |
## default DOCTYPE added on the slides |
59 |
$doctype = '<html xmlns="http://www.w3.org/TR/REC-html40">'; |
$doctype = '<html xmlns="http://www.w3.org/TR/REC-html40">'; |
133 |
my %page_data; |
my %page_data; |
134 |
my %overview_data; |
my %overview_data; |
135 |
|
|
136 |
|
my $pack = 0; |
137 |
|
my @pack_additional; # additional files to pack (pictures, logos...) |
138 |
|
my %nesttag = init_nesttag(); |
139 |
|
|
140 |
############################################################################## |
############################################################################## |
141 |
## reading user input from $infos |
## reading user input from $infos |
142 |
## |
## |
143 |
@PARAM = @ARGV; # we keep this for backward compatibility with an old version |
my @file; |
|
# of the slidemaker tool |
|
|
#when the parameters were in Makefile or make.bat |
|
144 |
|
|
145 |
# read parameters from infos.txt and put them in @PARAM |
############################################################################## |
146 |
if (open(INFOS, $infos)) { |
sub parse_infos { |
147 |
print STDOUT "--- Reading parameters file $infos ---\n"; |
my @file=@_; |
|
local(@file,$counter); |
|
|
$counter = 0; |
|
|
@file = <INFOS>; |
|
|
@PARAM = (); |
|
148 |
do { |
do { |
149 |
if ($file[0] && $file[0] =~ /^[^#\n\r]/) { |
if ($file[0] && $file[0] =~ /^[^#\n\r]/) { |
150 |
$file[0] =~ s/\n//; # remove UNIX \n |
$file[0] =~ s/\n//; # remove UNIX \n |
151 |
$file[0] =~ s/\r//; # remove WINDOWS \r |
$file[0] =~ s/\r//; # remove WINDOWS \r |
152 |
$file[0] =~ s/ *= */=/; |
my ($var,$value) = split(/ *= */,$file[0],2); |
153 |
$PARAM[$counter++] = $file[0]; |
$value=~s/'/\\'/g; |
154 |
print "$file[0]\n"; |
$cmd="\$$var = \'$value\';"; |
155 |
|
if (defined($value)) { |
156 |
|
eval($cmd) || warn "problem with eval of: $cmd"; |
157 |
|
} else { |
158 |
|
die "no value defined for $var"; |
159 |
|
} |
160 |
|
print STDERR "$file[0]\n"; |
161 |
} |
} |
162 |
} while (shift(@file)); |
} while (shift(@file)); |
163 |
} |
} |
164 |
## @PARAM is now a table with the user preferences for his presentation |
############################################################################## |
165 |
|
|
166 |
## process arguments |
parse_infos(@ARGV); # backward compatibility and for pack |
167 |
## each preset variable is now re-attributed using the user preferences |
|
168 |
foreach (@PARAM) { |
# read parameters from infos.txt and put them in @PARAM |
169 |
my ($var,$value) = split(/ *= */,$_,2); |
if (open(INFOS, $infos)) { |
170 |
$value=~s/'/\\'/g; |
print STDERR "--- Reading parameters file $infos ---\n"; |
171 |
$cmd="\$$var = \'$value\';"; |
@file = <INFOS>; |
172 |
if ($value) { |
parse_infos(@file); |
|
eval($cmd) || die "problem with eval of: $cmd"; |
|
|
} else { |
|
|
die "no value defined for $var"; |
|
|
} |
|
173 |
} |
} |
174 |
|
|
175 |
|
# try to read local infos.txt for template |
176 |
|
if (-f "$template/$infos" && open(INFOS,"$template/$infos")) { |
177 |
|
print STDERR "--- Reading template parameters file $template/$infos ---\n"; |
178 |
|
@file = <INFOS>; |
179 |
|
parse_infos(@file); |
180 |
|
close(INFOS); |
181 |
|
} |
182 |
|
|
183 |
|
## @PARAM is now a table with the user preferences for his presentation |
184 |
|
|
185 |
## use charset |
## use charset |
186 |
|
|
187 |
if ($charset) { |
if ($charset) { |
190 |
$http_equiv=''; |
$http_equiv=''; |
191 |
} |
} |
192 |
|
|
193 |
|
|
194 |
############################################################################## |
############################################################################## |
195 |
## read the raw html presentation |
## read the raw html presentation |
196 |
## |
## |
200 |
$/ = undef; |
$/ = undef; |
201 |
open(ALL, $all) || die "Error: Cannot open file: $all"; |
open(ALL, $all) || die "Error: Cannot open file: $all"; |
202 |
my $buf = <ALL>; |
my $buf = <ALL>; |
203 |
|
$buf =~ s/\r//g; # remove WINDOWS \r |
204 |
close(ALL); |
close(ALL); |
205 |
$/ = $sep; |
$/ = $sep; |
206 |
|
|
208 |
## they do not need to show up on the slides |
## they do not need to show up on the slides |
209 |
$buf =~ s/<!--.*?-->//sgo; |
$buf =~ s/<!--.*?-->//sgo; |
210 |
|
|
211 |
|
## if $pack is set, output name of css (for inclusion in archive), but |
212 |
|
## reset $cssStandard only to filename (without path) |
213 |
|
|
214 |
|
if ($pack) { |
215 |
|
push @pack_additional,$cssStandard; |
216 |
|
$cssStandard =~ s/^.*\/([^\/]+)$/$1/g; |
217 |
|
} |
218 |
|
|
219 |
## the slidemaker tool assumes that each slide is self contained between 2 sets of h1 tags |
## the slidemaker tool assumes that each slide is self contained between 2 sets of h1 tags |
220 |
## if not it will generate a rather weird output |
## if not it will generate a rather weird output |
221 |
## split using <h1...> and </h1...> as separator (ignores attributes!) |
## split using <h1...> and </h1...> as separator (ignores attributes!) |
235 |
############################################################################## |
############################################################################## |
236 |
## processing the slides |
## processing the slides |
237 |
|
|
238 |
print STDOUT "\n--- Processing $total slides ---\n"; |
print STDERR "\n--- Processing $total slides ---\n"; |
239 |
|
|
240 |
## generate the header table of content of the presentation |
## generate the header table of content of the presentation |
241 |
## which is also the first page of the talk |
## which is also the first page of the talk |
252 |
Text::FastTemplate->preload( [ |
Text::FastTemplate->preload( [ |
253 |
{ file => 'slide.html', key => 'slide' }, |
{ file => 'slide.html', key => 'slide' }, |
254 |
{ file => 'overview.html', key => 'overview' }, |
{ file => 'overview.html', key => 'overview' }, |
255 |
|
{ file => 'title.html', key => 'title' }, |
256 |
]); |
]); |
257 |
|
|
258 |
## unroll relative anchors (#something) into links with slides |
## unroll relative anchors (#something) into links with slides |
261 |
|
|
262 |
## step 1: record anchors |
## step 1: record anchors |
263 |
for($i=0; $i<$total; $i++) { |
for($i=0; $i<$total; $i++) { |
264 |
my $tmp = $table[($i*2)]; |
my $tmp = $table[($i*2)].$table[($i*2)+1]; |
265 |
while ($tmp =~ s,<a +name="*([^>"]+)"*>,,i) { |
while ($tmp =~ s,<a +name="*([^>"]+)"*>,,i) { |
266 |
$anchor_on_slide{$1}=($i+1); |
$anchor_on_slide{$1}=($i+1); |
267 |
print "\tslide ",($i+1)," anchor: $1\n" if ($debug); |
print STDERR "\tslide ",($i+1)," anchor: $1\n" if ($debug); |
268 |
} |
} |
269 |
} |
} |
270 |
|
|
299 |
## need to check if the title contains any anchor |
## need to check if the title contains any anchor |
300 |
## if so it needs to be removed |
## if so it needs to be removed |
301 |
## because the title is being used in the table of content to link to the corresponding slide |
## because the title is being used in the table of content to link to the corresponding slide |
302 |
$table[0] =~ s/(.*)<A[^>]*>(.*)<\/A>(.*)/$1$2$3/i; |
$table[0] = remove_anchor($table[0]); |
303 |
|
|
304 |
## grab next slide title $table[2] (if there's a next slide) |
## grab next slide title $table[2] (if there's a next slide) |
305 |
## to be able to use in the 'next' navigation button |
## to be able to use in the 'next' navigation button |
306 |
## keep in mind that $table[1] contains the slide corresponding to the title $table[0] |
## keep in mind that $table[1] contains the slide corresponding to the title $table[0] |
307 |
if ($table[2]) { |
if ($table[2]) { |
308 |
$next_title= $table[2]; |
$next_title= remove_anchor($table[2]); |
|
## remove any anchor from the next slide title |
|
|
$next_title =~ s/(.*)<A[^>]*>(.*)<\/A>(.*)/$1$2$3/i; |
|
309 |
} |
} |
310 |
|
|
311 |
## the current slide content is stored $table[1] |
## the current slide content is stored $table[1] |
316 |
|
|
317 |
## extract slide Sub Title <h2> |
## extract slide Sub Title <h2> |
318 |
undef $slideSubTitle; |
undef $slideSubTitle; |
319 |
if ($slideContent =~ s/<[hH]2[^>]*>([^<]+)<\/[hH]2[^>]*>//) { |
if ($slideContent =~ s/<[hH]2[^>]*>(.+)<\/[hH]2[^>]*>//sm) { |
320 |
$slideSubTitle=$1; |
$slideSubTitle=remove_anchor($1); |
321 |
} |
} |
322 |
|
|
323 |
## add the title of the current slide to the table of content |
## add the title of the current slide to the table of content |
329 |
&createSlide($slideTitle,$slideSubTitle,$slideContent ,$slideCount++,$prev_title,$next_title); |
&createSlide($slideTitle,$slideSubTitle,$slideContent ,$slideCount++,$prev_title,$next_title); |
330 |
|
|
331 |
## save the title of the previous slide to be displayed in the 'previous' navigation button |
## save the title of the previous slide to be displayed in the 'previous' navigation button |
332 |
$prev_title="$table[0]"; |
$prev_title=remove_anchor($table[0]); |
333 |
} |
} |
334 |
## process the next slide |
## process the next slide |
335 |
while (shift(@table)); |
while (shift(@table)); |
343 |
## and would not work on all platforms (ie would fail on Joe's laptop) |
## and would not work on all platforms (ie would fail on Joe's laptop) |
344 |
&generateTOC; |
&generateTOC; |
345 |
|
|
346 |
|
## print additional files to pack |
347 |
|
print STDOUT join("\n",@pack_additional) if ($pack); |
348 |
|
|
349 |
print STDOUT "--- Finished ---\n"; |
print STDERR "--- Finished ---\n"; |
350 |
exit 0; |
exit 0; |
351 |
## |
## |
352 |
## end of the slidemaker main program |
## end of the slidemaker main program |
360 |
{ |
{ |
361 |
## open the file to write to |
## open the file to write to |
362 |
open(FOO, ">$_[0].html") || die "can't open $_[0].html: $!"; |
open(FOO, ">$_[0].html") || die "can't open $_[0].html: $!"; |
363 |
|
push @pack_additional,"$_[0].html" if ($pack); |
364 |
|
|
365 |
## the style sheet used in the table of content is |
## the style sheet used in the table of content is |
366 |
$stylelink = ""; |
$stylelink = ""; |
389 |
author => $author, |
author => $author, |
390 |
authorUrl => $authorUrl, |
authorUrl => $authorUrl, |
391 |
author2 => $author2, |
author2 => $author2, |
392 |
authorUrl2 => $authorUrl2, |
author2Url => $author2Url, |
393 |
|
|
394 |
date => $date, |
date => $date, |
395 |
|
|
396 |
toc => $loc_toc, |
toc_title => $loc_toc, |
397 |
|
template_dir => "$template/", |
398 |
); |
); |
399 |
|
|
400 |
} |
} |
411 |
$overview_data{toc_entries} = [ @toc_entries ]; |
$overview_data{toc_entries} = [ @toc_entries ]; |
412 |
|
|
413 |
my $page= new Text::FastTemplate key => 'overview'; |
my $page= new Text::FastTemplate key => 'overview'; |
414 |
|
$page_data{template_dir}='' if ($pack); |
415 |
print FOO $page->output( \%overview_data ); |
print FOO $page->output( \%overview_data ); |
416 |
|
|
417 |
close(FOO); |
close(FOO); |
438 |
if ($nr % $toc_on_page == 0) { |
if ($nr % $toc_on_page == 0) { |
439 |
my $toc_nr=int($nr/$toc_on_page); |
my $toc_nr=int($nr/$toc_on_page); |
440 |
|
|
441 |
%item = ( |
$item = { |
442 |
pre_html => $pre_ul, |
pre_html => $pre_ul, |
443 |
accesskey => " ", # space |
accesskey => " ", # space |
444 |
href => "index-toc$toc_nr.html", |
href => "index-toc$toc_nr.html", |
445 |
title => "...", |
title => "...", |
446 |
post_html => $post_ul, |
post_html => $post_ul, |
447 |
more => 1, # use style for more pages link (...) |
more => 1, # use style for more pages link (...) |
448 |
) |
}; |
449 |
# push @toc_entries, %item; |
push @toc_entries, $item; |
450 |
|
|
451 |
&closeOverview; |
&closeOverview; |
452 |
|
undef @toc_entries; |
453 |
&openOverview("$overview-toc$toc_nr"); |
&openOverview("$overview-toc$toc_nr"); |
454 |
$last_toc_title=''; |
$last_toc_title=''; |
455 |
} |
} |
456 |
|
|
457 |
$pre_ul=$post_ul=''; |
$pre_ul=$post_ul=''; |
458 |
if ($last_toc_title eq $title) { |
if ($last_toc_title eq $title && $subtitle) { |
459 |
$title = $subtitle; |
$title = $subtitle; |
460 |
$pre_ul='<ul>'; |
$pre_ul='<ul>'; |
461 |
$post_ul='</ul>'; |
$post_ul='</ul>'; |
477 |
}; |
}; |
478 |
push @toc_entries,$item; |
push @toc_entries,$item; |
479 |
} else { |
} else { |
480 |
%item = ( |
$item = { |
481 |
pre_html => $pre_ul, |
pre_html => $pre_ul, |
482 |
tabindex => "$nr", |
tabindex => "$nr", |
483 |
href => "slide$nr.html", |
href => "slide$nr.html", |
484 |
title => $title, |
title => $title, |
485 |
post_html => $post_ul, |
post_html => $post_ul, |
486 |
) |
}; |
487 |
# push @toc_entries,\%item; |
push @toc_entries,$item; |
488 |
} |
} |
489 |
} |
} |
490 |
## |
## |
527 |
|
|
528 |
$status = sprintf "Slide %2d: %s %s\n", $nr, $title, $subtitle; |
$status = sprintf "Slide %2d: %s %s\n", $nr, $title, $subtitle; |
529 |
$status =~ s/<[^>]+>//g; |
$status =~ s/<[^>]+>//g; |
530 |
print STDOUT $status; |
print STDERR $status; |
531 |
|
|
532 |
&verify_html($content); # check the html |
&verify_html($content); # check the html |
533 |
|
&check_tags($content); # check open and closed tags |
534 |
|
|
535 |
## write to the slide |
## write to the slide |
536 |
open(SLIDE, ">slide$nr.html") || die "can't save slide$nr.html: $!"; |
open(SLIDE, ">slide$nr.html") || die "can't save slide$nr.html: $!"; |
537 |
|
push @pack_additional,"slide$nr.html" if ($pack); |
538 |
|
|
539 |
my $toc_link = "$overview\.html"; |
my $toc_link = "$overview\.html"; |
540 |
|
|
559 |
|
|
560 |
my $slide_html=make_progress_bar($nr,$total); |
my $slide_html=make_progress_bar($nr,$total); |
561 |
|
|
562 |
|
# undefine body if no content is found (so that template can show |
563 |
|
# only title and sub-title |
564 |
|
if ($content !~ m/\S/g) { |
565 |
|
undef $content; |
566 |
|
} |
567 |
|
|
568 |
%page_data = ( |
%page_data = ( |
569 |
doctype => $doctype, |
doctype => $doctype, |
570 |
talkTitle => $talkTitle, |
talkTitle => $talkTitle, |
589 |
toc_link => $toc_link, |
toc_link => $toc_link, |
590 |
next_link => $next_link, |
next_link => $next_link, |
591 |
prev_title => $prev_title, |
prev_title => $prev_title, |
592 |
|
toc_title => $loc_toc, |
593 |
next_title => $next_title, |
next_title => $next_title, |
594 |
|
|
595 |
author => $author, |
author => $author, |
596 |
authorUrl => $authorUrl, |
authorUrl => $authorUrl, |
597 |
author2 => $author2, |
author2 => $author2, |
598 |
authorUrl2 => $authorUrl2, |
author2Url => $author2Url, |
599 |
|
|
600 |
date => $date, |
date => $date, |
601 |
|
|
602 |
slide_html => $slide_html, |
slide_html => $slide_html, |
603 |
|
|
604 |
|
template_dir => "$template/", |
605 |
); |
); |
606 |
|
|
607 |
my $page= new Text::FastTemplate key => 'slide'; |
my $page; |
608 |
|
if ($content) { |
609 |
|
$page= new Text::FastTemplate key => 'slide'; |
610 |
|
} else { |
611 |
|
$page= new Text::FastTemplate key => 'title'; |
612 |
|
} |
613 |
|
$page_data{template_dir}='' if ($pack); |
614 |
print SLIDE $page->output( \%page_data ); |
print SLIDE $page->output( \%page_data ); |
615 |
|
extract_files($page->output( \%page_data )) if ($pack); |
616 |
close(SLIDE); |
close(SLIDE); |
617 |
return 0; |
return 0; |
618 |
} |
} |
660 |
|
|
661 |
if ($_[0] =~ /<img([^>]*)>/im) { |
if ($_[0] =~ /<img([^>]*)>/im) { |
662 |
if (!($1 =~ /ALT=/im)) { |
if (!($1 =~ /ALT=/im)) { |
663 |
print STDOUT "WARNING: <IMG> without ALT\n"; |
print STDERR "WARNING: <IMG> without ALT\n"; |
664 |
print STDOUT " <IMG$1>\n" ; |
print STDERR " <IMG$1>\n" ; |
665 |
} |
} |
666 |
} |
} |
667 |
} |
} |
670 |
# clean the html of the slide |
# clean the html of the slide |
671 |
# remove all <div class="comment">blabla</div> |
# remove all <div class="comment">blabla</div> |
672 |
sub clean_html { |
sub clean_html { |
673 |
$_[0] =~ s/<div\s+class\s*=\s*(?:comment[\s>]|\"comment\").*?<\/div>//igs; |
my $tmp=$_[0]; |
674 |
return $_[0]; |
$tmp =~ s/<div\s+class\s*=\s*(?:comment[\s>]|\"comment\").*?<\/div>//igs; |
675 |
|
$tmp =~ s,</*font[^>]+>,,ig; |
676 |
|
return $tmp; |
677 |
} |
} |
678 |
|
|
679 |
############################################################################## |
############################################################################## |
698 |
my $pcnt_done=int($nr*100/$total); |
my $pcnt_done=int($nr*100/$total); |
699 |
my $pcnt_left=100-$pcnt_done; |
my $pcnt_left=100-$pcnt_done; |
700 |
|
|
701 |
if ($progress_bar) { |
if ($progress_bar && uc($progress_bar) ne "NO") { |
702 |
my $l=$r=" "; |
my $l=$r=" "; |
703 |
my $t="$nr of $total"; |
my $t="$nr of $total"; |
704 |
if ($pcnt_done > 50) { |
if ($pcnt_done > 50) { |
706 |
} else { |
} else { |
707 |
$r=$t; |
$r=$t; |
708 |
} |
} |
709 |
$html='<table border="0" width="50%" cellpadding="0" cellspacing="0" align="right"><tr><td width="'.$pcnt_done.'%" class="pcnt-done">'.$l.'</td><td width="'.$pcnt_left.'%" class="pcnt-left">'.$r.'</td></tr></table>'; |
$html='<table border="0" width="50%" cellpadding="0" cellspacing="0" align="right"><tr>'; |
710 |
|
if ($pcnt_done != 0) { |
711 |
|
$html.='<td width="'.$pcnt_done.'%" class="pcnt-done">'.$l.'</td>'; |
712 |
|
} |
713 |
|
if ($pcnt_left != 0) { |
714 |
|
$html.='<td width="'.$pcnt_left.'%" class="pcnt-left">'.$r.'</td>'; |
715 |
|
} |
716 |
|
$html.='</tr></table>'; |
717 |
} else { |
} else { |
718 |
$html="$loc_slide $nr $loc_of $total"; |
$html="$loc_slide $nr $loc_of $total"; |
719 |
} |
} |
721 |
return $html; |
return $html; |
722 |
} |
} |
723 |
|
|
724 |
|
############################################################################## |
725 |
|
# remove anchors <a href...> from html (for titles) |
726 |
|
sub remove_anchor { |
727 |
|
my $tmp = $_[0]; |
728 |
|
$tmp =~ s/(.*)<A[^>]*>(.*)<\/A>(.*)/$1$2$3/ig; |
729 |
|
return $tmp; |
730 |
|
} |
731 |
|
|
732 |
|
############################################################################## |
733 |
|
# extract files referenced in presentation |
734 |
|
|
735 |
|
sub extract_files { |
736 |
|
my $tmp = $_[0]; |
737 |
|
while ($tmp =~ s/href="*([^"\s]+)"*//ism || |
738 |
|
$tmp =~ s/src="*([^"\s]+)"*//ism) { |
739 |
|
if ("$1" !~ m/[hf]t?tp:/ && -f "$1" && !grep(/$1/,@pack_additional)) { |
740 |
|
push @pack_additional,$1; |
741 |
|
} |
742 |
|
} |
743 |
|
} |
744 |
|
|
745 |
|
############################################################################## |
746 |
|
# check tags in slide |
747 |
|
# based on code from hindent 1.1.2 by Paul Balyoz <pab@domtools.com> |
748 |
|
|
749 |
|
sub init_nesttag { |
750 |
|
# Tags that require their own end tag <TAG>...</TAG> we will nest them |
751 |
|
# properly: (WARNING, you must use lower-case here) |
752 |
|
# All other tags (not on this list) will be ignored for indenting purposes. |
753 |
|
return ( |
754 |
|
'html' => 1, |
755 |
|
'head' => 1, |
756 |
|
'body' => 1, |
757 |
|
'title' => 1, |
758 |
|
|
759 |
|
'a' => 1, |
760 |
|
|
761 |
|
'table' => 1, |
762 |
|
'tr' => 1, |
763 |
|
'th' => 1, |
764 |
|
'td' => 1, |
765 |
|
|
766 |
|
'form' => 1, |
767 |
|
'select' => 1, |
768 |
|
'textarea' => 1, |
769 |
|
|
770 |
|
# 'p' => 1, Don't do this one because many people use <P> but not </P> |
771 |
|
'ul' => 1, |
772 |
|
'ol' => 1, |
773 |
|
'dl' => 1, |
774 |
|
'blockquote' => 1, |
775 |
|
'center' => 1, |
776 |
|
'div' => 1, |
777 |
|
|
778 |
|
'font' => 1, |
779 |
|
'pre' => 1, |
780 |
|
'tt' => 1, |
781 |
|
'i' => 1, |
782 |
|
'b' => 1, |
783 |
|
'u' => 1, |
784 |
|
'strike' => 1, |
785 |
|
'big' => 1, |
786 |
|
'small' => 1, |
787 |
|
'sub' => 1, |
788 |
|
'sup' => 1, |
789 |
|
'em' => 1, |
790 |
|
'strong' => 1, |
791 |
|
'dfn' => 1, |
792 |
|
'code' => 1, |
793 |
|
'samp' => 1, |
794 |
|
'kbd' => 1, |
795 |
|
'var' => 1, |
796 |
|
'cite' => 1, |
797 |
|
|
798 |
|
'h1' => 1, |
799 |
|
'h2' => 1, |
800 |
|
'h3' => 1, |
801 |
|
'h4' => 1, |
802 |
|
'h5' => 1, |
803 |
|
'h6' => 1, |
804 |
|
|
805 |
|
'applet' => 1, |
806 |
|
|
807 |
|
'map' => 1, |
808 |
|
|
809 |
|
'frameset' => 1, |
810 |
|
'noframes' => 1, |
811 |
|
); |
812 |
|
} |
813 |
|
|
814 |
|
sub check_tags { |
815 |
|
my $tmp = $_[0]; |
816 |
|
my @tagstack; |
817 |
|
my $level=0; |
818 |
|
|
819 |
|
while ($tmp =~ /<(.*?)>/gsm) { |
820 |
|
my $tag=$1; $tag=~s/\s.+//g; |
821 |
|
# if regular tag, push it on stack; if end-tag, pop it off stack. |
822 |
|
# but don't do any of this if it's not a special "nesting" tag! |
823 |
|
if ($tag !~ m,^/,) { |
824 |
|
if ($nesttag{lc($tag)}) { |
825 |
|
push @tagstack,$tag; |
826 |
|
$level++; # remember how much for later |
827 |
|
} |
828 |
|
} else { |
829 |
|
$tag =~ s,^/,,; # convert this end-tag to a begin-tag |
830 |
|
$tag = lc($tag); |
831 |
|
if ($nesttag{lc($tag)}) { |
832 |
|
# throw away tags until we find a match |
833 |
|
if ($#tagstack > -1) { |
834 |
|
while ($tag ne lc(pop @tagstack)) { |
835 |
|
$level--; # we threw away extra tags |
836 |
|
last if $#tagstack <= 0; |
837 |
|
} |
838 |
|
$level--; # we threw away extra tags |
839 |
|
if ($level < 0) { |
840 |
|
print STDERR "WARNING: more end than begin tags around </$tag> !\n"; |
841 |
|
} |
842 |
|
} |
843 |
|
} |
844 |
|
} |
845 |
|
} |
846 |
|
|
847 |
|
if ($level > 0) { |
848 |
|
print STDERR "WARNING: level=$level, ", $#tagstack+1," tags left on stack after done parsing! Specifically:\n<",join("> <",@tagstack),">\n"; |
849 |
|
} |
850 |
|
|
851 |
|
} |
852 |
|
|