/[swish]/trunk/spider/progspider
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/spider/progspider

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 50 by dpavlin, Tue Jan 20 18:13:32 2004 UTC revision 56 by dpavlin, Fri Jan 23 13:10:40 2004 UTC
# Line 1  Line 1 
1  #!/usr/local/bin/perl -w  #!/usr/local/bin/perl -w
2  use strict;  use strict;
3  use File::Find;  use File::Find;
4    use Getopt::Long;
5    
6    my $collection;         # name which will be inserted
7    my $path_add;           # add additional info in path
8    my $verbose;
9    
10    my $result = GetOptions(
11            "collection=s" => \$collection,
12            "path=s" => \$path_add,
13            "verbose!" => \$verbose,
14            "debug!" => \$verbose,
15    );
16    
17  my $dir = shift @ARGV || die "usage: $0 [dir]";  my $dir = shift @ARGV || die "usage: $0 [dir]";
18    
19    
20  my $basedir = $0;  my $basedir = $0;
21  $basedir =~ s,/[^/]+$,/,;  $basedir =~ s,/[^/]+$,/,;
22  require "$basedir/filter.pm";  require "$basedir/filter.pm";
23    
24    
25  find({ wanted => \&file,  find({ wanted => \&file,
26          follow => 1,          follow => 1,
27          no_chdir => 1          no_chdir => 1
# Line 18  sub file { Line 31  sub file {
31    
32          return if (! -f || ! m/\.html*/i);          return if (! -f || ! m/\.html*/i);
33    
34            # skip index files
35            return if (m/index_[a-z]\.html*/i || m/index_symbol\.html*/i);
36    
37          my $path = $_;          my $path = $_;
38    
39          open(F,"$path") || die "can't open file: $path";          open(F,"$path") || die "can't open file: $path";
40  #       print STDERR "$path\n";          print STDERR "$path" if ($verbose);
41          my $contents;          my $contents;
42          while(<F>) {          while(<F>) {
43                  $contents .= $_;                  $contents .= "$_";
44          }          }
45            $contents .= "\n\n";
46    
47          $contents = filter($contents);          $contents = filter($contents,$collection);
48    
49  #       die "zero size content in '$path'" if (! $contents);  #       die "zero size content in '$path'" if (! $contents);
50          return if (! $contents);        # don't die on empty files          return if (! $contents);        # don't die on empty files
51    
52          my $mtime = time;          my $mtime = time;
53            use bytes;
54          my $size = length $contents;          my $size = length $contents;
55    
56  #       print STDERR " [$size]\n";          print STDERR " [$size]\n" if ($verbose);
57    
58            # add optional components to path
59            $path .= " $path_add" if ($path_add);
60    
61          # Output the document (to swish)          # Output the document (to swish)
62          print <<EOF;          print <<EOF;
# Line 45  Last-Mtime: $mtime Line 66  Last-Mtime: $mtime
66  Document-Type: HTML  Document-Type: HTML
67    
68  EOF  EOF
   
69          print $contents;          print $contents;
70    
71  }  }

Legend:
Removed from v.50  
changed lines
  Added in v.56

  ViewVC Help
Powered by ViewVC 1.1.26