/[swish]/trunk/spider/progspider
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/spider/progspider

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 46 by dpavlin, Sat Jan 17 23:57:55 2004 UTC revision 57 by dpavlin, Sun Jan 25 16:49:50 2004 UTC
# Line 1  Line 1 
1  #!/usr/local/bin/perl -w  #!/usr/local/bin/perl -w
2  use strict;  use strict;
3  use File::Find;  use File::Find;
4    use Getopt::Long;
5    
6    my $collection;         # name which will be inserted
7    my $path_add;           # add additional info in path
8    my $verbose;
9    
10    #$verbose = 1;
11    
12    my $result = GetOptions(
13            "collection=s" => \$collection,
14            "path=s" => \$path_add,
15            "verbose!" => \$verbose,
16            "debug!" => \$verbose,
17    );
18    
19  my $dir = shift @ARGV || die "usage: $0 [dir]";  my $dir = shift @ARGV || die "usage: $0 [dir]";
20    
21    
22  my $basedir = $0;  my $basedir = $0;
23  $basedir =~ s,/[^/]+$,/,;  $basedir =~ s,/[^/]+$,/,;
24  require "$basedir/filter.pm";  require "$basedir/filter.pm";
25    
26    
27  find({ wanted => \&file,  find({ wanted => \&file,
28          follow => 1,          follow => 1,
29          no_chdir => 1          no_chdir => 1
# Line 18  sub file { Line 33  sub file {
33    
34          return if (! -f || ! m/\.html*/i);          return if (! -f || ! m/\.html*/i);
35    
36            # skip index files
37            return if (m/index_[a-z]\.html*/i || m/index_symbol\.html*/i);
38    
39          my $path = $_;          my $path = $_;
40    
41          open(F,"$path") || die "can't open file: $path";          open(F,"$path") || die "can't open file: $path";
42          print STDERR "$path";          print STDERR "$path" if ($verbose);
43          my $contents;          my $contents;
44          while(<F>) {          while(<F>) {
45  #               chomp;                  $contents .= "$_";
 #               chomp;  
 #               $contents .= " ".$_;  
                 $contents .= $_;  
46          }          }
47            $contents .= "\n\n";
48    
49  #       $contents =~ s/<(\/*\w+)\s+>/<$1>/g;          $contents = filter($contents,$collection);
50    
51          $contents = filter($contents);  #       die "zero size content in '$path'" if (! $contents);
52            return if (! $contents);        # don't die on empty files
53    
54          my $mtime = time;          my $mtime = time;
55            use bytes;
56          my $size = length $contents;          my $size = length $contents;
57    
58          print STDERR " [$size]\n";          print STDERR " [$size]\n" if ($verbose);
59    
60            # add optional components to path
61            $path .= " $path_add" if ($path_add);
62    
63          # Output the document (to swish)          # Output the document (to swish)
64          print <<EOF;          print <<EOF;
# Line 47  Last-Mtime: $mtime Line 68  Last-Mtime: $mtime
68  Document-Type: HTML  Document-Type: HTML
69    
70  EOF  EOF
   
71          print $contents;          print $contents;
72    
73  }  }

Legend:
Removed from v.46  
changed lines
  Added in v.57

  ViewVC Help
Powered by ViewVC 1.1.26