/[swish]/trunk/crawl-parallel.sh
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/crawl-parallel.sh

Parent Directory Parent Directory | Revision Log Revision Log


Revision 47 - (hide annotations)
Tue Jan 20 15:58:15 2004 UTC (20 years, 3 months ago) by dpavlin
File MIME type: application/x-sh
File size: 1309 byte(s)
Start parallel swish-e to index multiple sets of documents.
More info at: http://blog.rot13.org/index.cgi/id_14

1 dpavlin 47 #!/bin/sh
2    
3     # Parallel swish-e crawl script
4     #
5     # 2003-01-19 Dobrica Pavlinusic <dpavlin@rot13.org>
6     #
7     # somewhat documented in my blog at:
8     # ttp://blog.rot13.org/index.cgi/id_14
9    
10     # max. number of crawlers == nr. of processors
11     max=`grep ^processor /proc/cpuinfo | wc -l`
12    
13     if [ ! -e "index/" ] ; then
14     echo "This script needs index/ directory in current dir to store"
15     echo "created index files."
16     exit 1;
17     fi
18    
19     if [ ! -e "config/" ] ; then
20     echo "This script needs config/ directory in current dir in which"
21     echo "are stored swish-e configuration files for each index."
22     exit 1;
23     fi
24    
25     rm index/*temp
26    
27     find config/ | while read config ; do
28     while [ `find index -name "*temp" | wc -l` -ge $max ] ; do
29     echo "sleep"
30     sleep 1
31     done
32    
33     index=`echo $config | sed 's,config/,,'`
34     if [ -e "index/$index" -o -e "index/$index.temp" ] ; then
35     echo "skip $index"
36     else
37     ( echo "indexing $index" && swish-e -S prog -c $config | grep "files indexed" || rm index/$index*temp ) &
38     fi
39     # ( echo $index && touch index/$index.temp && sleep 3 && rm index/$index.temp ) &
40     done
41    
42     while [ `find index -name "*temp" | wc -l` -ne 0 ] ; do
43     echo "wait - `find index -name "*temp" | wc -l` left"
44     sleep 1
45     done
46    
47    
48     rm index/all index/all.prop
49     swish-e -M `ls index | grep -v \.prop$ | grep -v all | grep -v CVS | sed 's#^#./index/#'` index/all
50    

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26