/[swish]/trunk/make_config.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/make_config.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 32 by dpavlin, Wed Apr 30 12:40:09 2003 UTC revision 67 by dpavlin, Wed Mar 17 12:22:26 2004 UTC
# Line 6  Line 6 
6  #  #
7    
8  use strict;  use strict;
9    use Getopt::Long;
10    
11  my ($name,$url,$stripurl) = @ARGV;  my $stripurl;
12    
13    my $result = GetOptions ("stripurl=s" => \$stripurl);
14    
15    my $name = shift @ARGV;
16    my $url = shift @ARGV;
17    my $title = join(" ",@ARGV);
18    
19  if (!$name || !$url) {  if (!$name || !$url) {
20          print "Usage: $0 name URL\n";          print "Usage: $0 [--stripurl=path] name URL|path title\n";
21          exit;          exit;
22  }  }
23    
# Line 23  if ($dir !~ m#^/#) { Line 30  if ($dir !~ m#^/#) {
30    
31  my ($host,$urlpath);  my ($host,$urlpath);
32    
33  if ($url =~ m#(http://[^/]+)(/.+)$#) {  if ($url =~ m#(http://[^/]+)(/.*)$#) {
34          ($host,$urlpath) = ($1,$2);          ($host,$urlpath) = ($1,$2);
35          if ($stripurl && $urlpath =~ m/^(.*$stripurl)/) {          if ($stripurl && $url =~ m/^(.*$stripurl)/) {
36                  $stripurl = $1;                  $stripurl = $1;
37          } else {          } else {
38                  $stripurl = "";                  $stripurl = $host;
39          }          }
40    } elsif (-d $url && $url =~ m#.+?(/references/)#) {
41            $stripurl = $1;
42            $host = "";
43  } else {  } else {
44          print "Can't parse URL $url into host and path!\n";          print "Can't parse URL $url into host and path!\n";
45          exit 1;          exit 1;
# Line 38  if ($url =~ m#(http://[^/]+)(/.+)$#) { Line 48  if ($url =~ m#(http://[^/]+)(/.+)$#) {
48  my $baseurl = $url;  my $baseurl = $url;
49  $baseurl =~ s#/[^/]+$#/#g;  $baseurl =~ s#/[^/]+$#/#g;
50    
51  print "Config name: $name\nCrawling url: $url [strip $stripurl]\n";  print "Config name: $name\nCrawling: $url [strip $stripurl]\n";
52    print "Title: $title\n" if ($title);
   
 my $config = <<"EOF";  
53    
54  ###################################################  my $indexer_config = << "EOF";
55  IncludeConfigFile $dir/common.config  IncludeConfigFile $dir/common.config
56    
57  # this is a cludge to implement no parent URL feature in swish indexer  # this is a cludge to implement no parent URL feature in swish indexer
58  IndexDir "$url $baseurl"  IndexDir "$baseurl $url"
59  ReplaceRules replace " $baseurl" ""  ReplaceRules replace "$baseurl " ""
60  ReplaceRules replace "${host}${stripurl}" ""  ReplaceRules replace "${stripurl}" ""
61    EOF
62    
63    $indexer_config = << "EOF" if (-d $url);
64    IncludeConfigFile $dir/common-progspider.config
65    IndexDir $dir/spider/progspider
66    SwishProgParameters $url
67    ReplaceRules replace "/rest/references/" "/"
68    EOF
69    
70    my $config = <<"EOF";
71    
72    ###################################################
73    $indexer_config
74  IndexFile $dir/index/$name  IndexFile $dir/index/$name
75    
76  StoreDescription HTML <body> 500  StoreDescription HTML <body> 500
# Line 65  EOF Line 85  EOF
85    
86  my $xml = << "EOF";  my $xml = << "EOF";
87  <config  <config
88          max_hits="1000"          max_hits="10"
89          prog="/usr/bin/swish-e"          prog="/usr/bin/swish-e"
90          index="$dir/index/$name"          index="$dir/index/$name"
91          charset="iso-8859-2"          charset="iso-8859-2"
         title="$name search"  
92          affix="/usr/lib/ispell/american.aff"          affix="/usr/lib/ispell/american.aff"
93          url="$stripurl"  EOF
94    if ($title) {
95            $xml .= "\ttitle=\"$title\"\n";
96    } else {
97            $xml .= "\ttitle=\"$name search\"\n";
98    }
99    
100    my $xml_title = $title || $url;
101    
102    my $xml_url = $url;
103    $xml_url =~ s/^$host//;
104    my $xml_urlprefix = $stripurl;
105    $xml_urlprefix =~ s/^$host//;
106    
107    $xml .= "\turl=\"$xml_urlprefix\"\n" if ($stripurl);
108    $xml .= << "EOF";
109          >          >
110          <labels>          <labels>
111                  <label value="10"> 10</label>                  <label value="10"> 10</label>
112                    <label value="50"> 100</label>
113                  <label value="100"> 100</label>                  <label value="100"> 100</label>
                 <label value="0">unlimited</label>  
114          </labels>          </labels>
115          <text>          <text>
116                  <search>Search for </search>                  <search>Search for </search>
# Line 86  my $xml = << "EOF"; Line 120  my $xml = << "EOF";
120                  <no_properties>don't display results details</no_properties>                  <no_properties>don't display results details</no_properties>
121                  <hits>Showing %s documents (of maximum %d)... (%s)</hits>                  <hits>Showing %s documents (of maximum %d)... (%s)</hits>
122                  <no_hits>Can't find any documents (%s, %s)</no_hits>                  <no_hits>Can't find any documents (%s, %s)</no_hits>
123                  <footer>Searcher will try to create different variations of words using spelling dictionary. If you enter a word with minus (-) before it, it will exclude documents with this word and plus (+) will do the opposite (e.g. -work +play)</footer>                  <footer><![CDATA[
124                    Searcher will try to create different variations of words
125                    using spelling dictionary. If you enter a word with minus
126                    (<tt>-</tt>) before it, it will exclude documents with this
127                    word and plus (<tt>+</tt>) will do the opposite
128                    (e.g. <tt>-work +play</tt>)
129                    <p><small>Content indexed:
130                    <a href="$xml_url">$xml_title</small></p>
131                    ]]></footer>
132          </text>          </text>
133          <!-- additional data to show summary -->          <!-- additional data to show summary -->
134          <properties>swishdescription swishdocsize</properties>          <properties>swishdescription swishdocsize</properties>
# Line 97  my $xml = << "EOF"; Line 139  my $xml = << "EOF";
139  </config>  </config>
140  EOF  EOF
141    
142  print "Creating $dir/$name.config\n";  print "Creating $dir/config/$name\n";
143  open(C,"> $dir/$name.config") || die "can't open $dir/$name.config: $!";  open(C,"> $dir/config/$name") || die "can't open $dir/config/$name: $!";
144  print C $config;  print C $config;
145  close(C);  close(C);
146    
# Line 110  close(C); Line 152  close(C);
152  print "Creating symlink to $dir/html/$name.cgi\n";  print "Creating symlink to $dir/html/$name.cgi\n";
153  symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";  symlink "$dir/html/swish.cgi","$dir/html/$name.cgi" || die "can't create symlink $dir/html/$name.cgi: $!";
154    
155  #print "Index this with:\nswish-e -S http -c $dir/$name.config\n";  #print "Index this with:\nswish-e -S http -c $dir/config/$name\n";
156  #print "Search using $name.cgi\n";  #print "Search using $name.cgi\n";

Legend:
Removed from v.32  
changed lines
  Added in v.67

  ViewVC Help
Powered by ViewVC 1.1.26