/[swish]/trunk/make_config.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/make_config.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 40 by dpavlin, Sun Jun 1 11:45:19 2003 UTC revision 67 by dpavlin, Wed Mar 17 12:22:26 2004 UTC
# Line 6  Line 6 
6  #  #
7    
8  use strict;  use strict;
9    use Getopt::Long;
10    
11    my $stripurl;
12    
13    my $result = GetOptions ("stripurl=s" => \$stripurl);
14    
15  my $name = shift @ARGV;  my $name = shift @ARGV;
16  my $url = shift @ARGV;  my $url = shift @ARGV;
 #my $stripurl = shift @ARGV;  
 my $stripurl;  
17  my $title = join(" ",@ARGV);  my $title = join(" ",@ARGV);
18    
19  if (!$name || !$url) {  if (!$name || !$url) {
20          print "Usage: $0 name URL [strip from url] [optional title]\n";          print "Usage: $0 [--stripurl=path] name URL|path title\n";
21          exit;          exit;
22  }  }
23    
# Line 29  my ($host,$urlpath); Line 32  my ($host,$urlpath);
32    
33  if ($url =~ m#(http://[^/]+)(/.*)$#) {  if ($url =~ m#(http://[^/]+)(/.*)$#) {
34          ($host,$urlpath) = ($1,$2);          ($host,$urlpath) = ($1,$2);
35          if ($stripurl && $urlpath =~ m/^(.*$stripurl)/) {          if ($stripurl && $url =~ m/^(.*$stripurl)/) {
36                  $stripurl = $1;                  $stripurl = $1;
37          } else {          } else {
38                  $stripurl = "";                  $stripurl = $host;
39          }          }
40    } elsif (-d $url && $url =~ m#.+?(/references/)#) {
41            $stripurl = $1;
42            $host = "";
43  } else {  } else {
44          print "Can't parse URL $url into host and path!\n";          print "Can't parse URL $url into host and path!\n";
45          exit 1;          exit 1;
# Line 42  if ($url =~ m#(http://[^/]+)(/.*)$#) { Line 48  if ($url =~ m#(http://[^/]+)(/.*)$#) {
48  my $baseurl = $url;  my $baseurl = $url;
49  $baseurl =~ s#/[^/]+$#/#g;  $baseurl =~ s#/[^/]+$#/#g;
50    
51  print "Config name: $name\nCrawling url: $url [strip $stripurl]\n";  print "Config name: $name\nCrawling: $url [strip $stripurl]\n";
52  print "Title: $title\n" if ($title);  print "Title: $title\n" if ($title);
53    
54    my $indexer_config = << "EOF";
 my $config = <<"EOF";  
   
 ###################################################  
55  IncludeConfigFile $dir/common.config  IncludeConfigFile $dir/common.config
56    
57  # this is a cludge to implement no parent URL feature in swish indexer  # this is a cludge to implement no parent URL feature in swish indexer
58  IndexDir "$baseurl $url"  IndexDir "$baseurl $url"
59  ReplaceRules replace "$baseurl " ""  ReplaceRules replace "$baseurl " ""
60  ReplaceRules replace "${host}${stripurl}" ""  ReplaceRules replace "${stripurl}" ""
61    EOF
62    
63    $indexer_config = << "EOF" if (-d $url);
64    IncludeConfigFile $dir/common-progspider.config
65    IndexDir $dir/spider/progspider
66    SwishProgParameters $url
67    ReplaceRules replace "/rest/references/" "/"
68    EOF
69    
70    my $config = <<"EOF";
71    
72    ###################################################
73    $indexer_config
74  IndexFile $dir/index/$name  IndexFile $dir/index/$name
75    
76  StoreDescription HTML <body> 500  StoreDescription HTML <body> 500
# Line 70  EOF Line 85  EOF
85    
86  my $xml = << "EOF";  my $xml = << "EOF";
87  <config  <config
88          max_hits="1000"          max_hits="10"
89          prog="/usr/bin/swish-e"          prog="/usr/bin/swish-e"
90          index="$dir/index/$name"          index="$dir/index/$name"
91          charset="iso-8859-2"          charset="iso-8859-2"
# Line 81  if ($title) { Line 96  if ($title) {
96  } else {  } else {
97          $xml .= "\ttitle=\"$name search\"\n";          $xml .= "\ttitle=\"$name search\"\n";
98  }  }
99  $xml .= "\turl=\"$stripurl\"\n" if ($stripurl);  
100    my $xml_title = $title || $url;
101    
102    my $xml_url = $url;
103    $xml_url =~ s/^$host//;
104    my $xml_urlprefix = $stripurl;
105    $xml_urlprefix =~ s/^$host//;
106    
107    $xml .= "\turl=\"$xml_urlprefix\"\n" if ($stripurl);
108  $xml .= << "EOF";  $xml .= << "EOF";
109          >          >
110          <labels>          <labels>
111                  <label value="10"> 10</label>                  <label value="10"> 10</label>
112                    <label value="50"> 100</label>
113                  <label value="100"> 100</label>                  <label value="100"> 100</label>
                 <label value="0">unlimited</label>  
114          </labels>          </labels>
115          <text>          <text>
116                  <search>Search for </search>                  <search>Search for </search>
# Line 97  $xml .= << "EOF"; Line 120  $xml .= << "EOF";
120                  <no_properties>don't display results details</no_properties>                  <no_properties>don't display results details</no_properties>
121                  <hits>Showing %s documents (of maximum %d)... (%s)</hits>                  <hits>Showing %s documents (of maximum %d)... (%s)</hits>
122                  <no_hits>Can't find any documents (%s, %s)</no_hits>                  <no_hits>Can't find any documents (%s, %s)</no_hits>
123                  <footer>Searcher will try to create different variations of words using spelling dictionary. If you enter a word with minus (-) before it, it will exclude documents with this word and plus (+) will do the opposite (e.g. -work +play)</footer>                  <footer><![CDATA[
124                    Searcher will try to create different variations of words
125                    using spelling dictionary. If you enter a word with minus
126                    (<tt>-</tt>) before it, it will exclude documents with this
127                    word and plus (<tt>+</tt>) will do the opposite
128                    (e.g. <tt>-work +play</tt>)
129                    <p><small>Content indexed:
130                    <a href="$xml_url">$xml_title</small></p>
131                    ]]></footer>
132          </text>          </text>
133          <!-- additional data to show summary -->          <!-- additional data to show summary -->
134          <properties>swishdescription swishdocsize</properties>          <properties>swishdescription swishdocsize</properties>
# Line 108  $xml .= << "EOF"; Line 139  $xml .= << "EOF";
139  </config>  </config>
140  EOF  EOF
141    
142  print "Creating $dir/$name.config\n";  print "Creating $dir/config/$name\n";
143  open(C,"> $dir/$name.config") || die "can't open $dir/$name.config: $!";  open(C,"> $dir/config/$name") || die "can't open $dir/config/$name: $!";
144  print C $config;  print C $config;
145  close(C);  close(C);
146    
# Line 121  close(C); Line 152  close(C);
152  print "Creating symlink to $dir/html/$name.cgi\n";  print "Creating symlink to $dir/html/$name.cgi\n";
153  symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";  symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";
154    
155  #print "Index this with:\nswish-e -S http -c $dir/$name.config\n";  #print "Index this with:\nswish-e -S http -c $dir/config/$name\n";
156  #print "Search using $name.cgi\n";  #print "Search using $name.cgi\n";

Legend:
Removed from v.40  
changed lines
  Added in v.67

  ViewVC Help
Powered by ViewVC 1.1.26