"Fossies" - the Fresh Open Source Software Archive

Member "citadel/contrib/getdoku.sh" (5 Jun 2021, 1529 Bytes) of package /linux/www/citadel.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Bash source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 #!/bin/bash
    2 
    3 BASE_SITE=http://www.citadel.org
    4 
    5 
    6 
    7 #retrieves an index document from the citadel.org website, and filters it 
    8 # 1: URL
    9 # 2: outfile where to put the filtered content at
   10 GetIndex()
   11 {
   12   cd /tmp/; wget -q "${BASE_SITE}/${1}"
   13   cat "/tmp/${1}"   | \
   14     grep /doku.php/ | \
   15     grep -v "do="   | \
   16     sed -e "s;.*href=\";;" \
   17         -e "s;\" .*;;" \
   18         -e "s;doku.php/;doku.php?id=;"| \
   19     grep "^/doku" > \
   20     "/tmp/$2"
   21 }
   22 
   23 rm -f /tmp/mainindex /tmp/doku.php*
   24 GetIndex "doku.php?id=faq:start" mainindex
   25 
   26 for i in `cat /tmp/mainindex`; do 
   27     TMPNAME=`echo $i|sed "s;.*=;;"`
   28     echo $i $TMPNAME
   29     mkdir /tmp/$TMPNAME
   30     GetIndex "$i" "$TMPNAME/$TMPNAME"
   31     for j in `cat /tmp/$TMPNAME/$TMPNAME`; do
   32     echo "-----------$j----------------"
   33     cd /tmp/$TMPNAME/; 
   34     DOCUMENT_NAME=`echo $j|sed -e "s;/doku.php?id=.*:;;"`
   35     PLAIN_NAME=`grep "$DOCUMENT_NAME" /tmp/doku*$TMPNAME |head -n1  |sed -e "s;','/doku.*;;" -e "s;.*';;"`
   36 
   37     echo "********** retrieving $DOCUMENT_NAME ************"
   38         wget -q "${BASE_SITE}/${j}&do=export_xhtmlbody"
   39     mv "/tmp/$TMPNAME/${j}&do=export_xhtmlbody" /tmp/$TMPNAME/$DOCUMENT_NAME
   40 
   41     echo "<li><a href=\"#$DOCUMENT_NAME\">$PLAIN_NAME</a></li>" >>collect_index
   42     echo "<a name=\"$DOCUMENT_NAME\"></a>" >>collect_bodies
   43         cat $DOCUMENT_NAME>>collect_bodies
   44     done
   45     (
   46     echo "<html><head>$TMPNAME</head><body><ul>"
   47     cat "/tmp/$TMPNAME/collect_index"
   48     echo "<hr></ul>"
   49     cat "/tmp/$TMPNAME/collect_bodies"
   50     echo "</body></html>"
   51     ) >/tmp/`echo $TMPNAME|sed "s;:;_;g"`.html
   52 done