Revision 2
Legend:
- Added
- Removed
- Modified
-
crawl.sh
1 #!/bin/sh -x 2 3 test -z "$1" && echo "Usage: $0 http://www.example.com" && exit 4 5 base=`echo $1 | sed 's!http://!!'` 6 dir=`dirname $base` 7 test ! -z "$dir" -a ! -d "$dir" && mkdir -v -p $dir 8 9 test -f $base.tsv || ruby wgettsv -allow "$base" -deny cgi -max 10000 http://$base > $base.tsv || exit 10 tctmgr importtsv $base.tct $base.tsv 11 tctmgr setindex -it qgram $base.tct title 12 tctmgr setindex -it qgram $base.tct body 13 tctmgr inform $base.tct