/[safari]/get_book.sh
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /get_book.sh

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by dpavlin, Sun Dec 14 19:11:30 2003 UTC revision 1.3 by dpavlin, Mon Dec 15 00:26:57 2003 UTC
# Line 1  Line 1 
1  #!/bin/sh  #!/bin/sh
2    
3  #export http_proxy=http://proxy.pliva.hr:8080  #export http_proxy=http://proxy:8080
4    
5  #isbn="0-201-41975-0"  if [ -z "$1" ] ; then
6  isbn="0-672-32240-4"          echo "Usage: $0 ISBN"
7            exit 1
8    fi
9    
10    isbn=$1;
11    
12  wait=10  wait=10
13    
# Line 21  function mirror() { Line 25  function mirror() {
25  }  }
26    
27  function geturl() {  function geturl() {
28          hindent -s $1 | grep $2 | grep -i href | grep mode=[st][eo]c | \          hindent -s $1 | grep -i href | grep mode=[st][eo]c | \
29          sed -e 's/^.*<a.*href="//i' \          sed -e 's/^.*<a.*href="//i' \
30                  -e 's/".*//' -e 's/amp;//g' \                  -e 's/".*//' -e 's/amp;//g' \
31                  -e 's,^[^\?]*\?,http://safari.oreilly.com/,' \                  -e 's,^[^\?]*\?,http://safari.oreilly.com/,' \
32                  -e 's/#$//' \                  -e 's/#$//' \
33                  -e 's/srchText=//' | \                  -e 's/\&srchText=//' \
34                  grep -v open=false | \                  -e 's/open=false/open=true/' | \
                 grep -v 'view=[A-Z].*%2F[^i]' | \  
                 grep -v 'view=[A-Z].*/[^i]' | \  
35                  grep '&s=1&b=1&f=1&t=1&c=1&u=1&r=&o=1' | \                  grep '&s=1&b=1&f=1&t=1&c=1&u=1&r=&o=1' | \
36                    grep $2 | \
37                  sort -u >> in                  sort -u >> in
38  }  }
39    
40    function uniqurl() {
41            mv in in.tmp
42            grep -v 'view=[A-Z]' in.tmp | sort -u > in
43            grep 'view=[A-Z].*/index' in.tmp | sort -u >> in
44    }
45    
46    function checklogin() {
47            if grep 'promo.asp' index.html* >/dev/null ; then
48                    echo "WARNING: safari seems to logunt you as user. Aborting."
49                    exit 1
50            fi
51    }
52    
53  echo > in  echo > in
54  #mirror "http://safari.oreilly.com/?XmlId=$isbn"  mirror "http://safari.oreilly.com/?XmlId=$isbn"
55    
56  echo "extract URLs from first page..."  echo "extract URLs from first page..."
57  geturl "index.html?XmlId=$isbn" $isbn  geturl "index.html?XmlId=$isbn" $isbn
58    uniqurl
59    
60  mirror "-i in"  mirror "-i in"
61    checklogin
62    
63  echo -n "extracting URLs [1]"  echo -n "extracting URLs [1]"
64  ls index.html* | while read file ; do  ls index.html* | while read file ; do
# Line 49  ls index.html* | while read file ; do Line 67  ls index.html* | while read file ; do
67  done  done
68  echo  echo
69    
70  sort -u in > in2  uniqurl
71    
72  mirror "-i in2"  mirror "-i in"
73    checklogin
74    
75  echo > in  echo > in
76  echo -n "extracting URLs [2]"  echo -n "extracting URLs [2]"
# Line 60  ls index.html* | while read file ; do Line 79  ls index.html* | while read file ; do
79          geturl $file $isbn          geturl $file $isbn
80  done  done
81    
82  sort -u in > in2  uniqurl
83    
84    mirror "-i in"
85    checklogin
86    
87    # convert links in html
88    bn=`basename $0`
89    dir=`echo $0 | sed "s/$bn$//"`
90    ls index.html* | xargs -i $dir/filter.pl {}
91    mkdir orig
92    mv index.html* orig/
93    
 mirror "-i in2"  

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.3

  ViewVC Help
Powered by ViewVC 1.1.26