/[swish]/trunk/crawl-parallel.sh
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/crawl-parallel.sh

Parent Directory Parent Directory | Revision Log Revision Log


Revision 47 - (show annotations)
Tue Jan 20 15:58:15 2004 UTC (16 years, 5 months ago) by dpavlin
File MIME type: application/x-sh
File size: 1309 byte(s)
Start parallel swish-e to index multiple sets of documents.
More info at: http://blog.rot13.org/index.cgi/id_14

1 #!/bin/sh
2
3 # Parallel swish-e crawl script
4 #
5 # 2003-01-19 Dobrica Pavlinusic <dpavlin@rot13.org>
6 #
7 # somewhat documented in my blog at:
8 # ttp://blog.rot13.org/index.cgi/id_14
9
10 # max. number of crawlers == nr. of processors
11 max=`grep ^processor /proc/cpuinfo | wc -l`
12
13 if [ ! -e "index/" ] ; then
14 echo "This script needs index/ directory in current dir to store"
15 echo "created index files."
16 exit 1;
17 fi
18
19 if [ ! -e "config/" ] ; then
20 echo "This script needs config/ directory in current dir in which"
21 echo "are stored swish-e configuration files for each index."
22 exit 1;
23 fi
24
25 rm index/*temp
26
27 find config/ | while read config ; do
28 while [ `find index -name "*temp" | wc -l` -ge $max ] ; do
29 echo "sleep"
30 sleep 1
31 done
32
33 index=`echo $config | sed 's,config/,,'`
34 if [ -e "index/$index" -o -e "index/$index.temp" ] ; then
35 echo "skip $index"
36 else
37 ( echo "indexing $index" && swish-e -S prog -c $config | grep "files indexed" || rm index/$index*temp ) &
38 fi
39 # ( echo $index && touch index/$index.temp && sleep 3 && rm index/$index.temp ) &
40 done
41
42 while [ `find index -name "*temp" | wc -l` -ne 0 ] ; do
43 echo "wait - `find index -name "*temp" | wc -l` left"
44 sleep 1
45 done
46
47
48 rm index/all index/all.prop
49 swish-e -M `ls index | grep -v \.prop$ | grep -v all | grep -v CVS | sed 's#^#./index/#'` index/all
50

Properties

Name Value
cvs2svn:cvs-rev 1.1
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26