#!/bin/bash
#                       /usr/local/bin/webdump_burp
# https://crystalfaeries.net/posix/bin/webdump_burp
# celeste:crystalfaery WEBDUMP_BURP 2016-09-16 01:37:36+00:00
# DERIVED FROM:
echo 'WebDump 3.0b+celeste'
echo '======================================'
echo '* USAGE:'
echo $0 INPUTFILE.CSV
echo 'input files are HOSTNAME,PORT'
echo '======================================'
echo 'this app will quickly download a large number of websites first page then remove duplicates.'
echo 'it will also automaticly download https sites with port 443'
echo '* Tested with Cygwin'
echo '* using gawk GNU Awk 4.0.0'
echo '* using curl 7.22.0 (i686-pc-cygwin)'
echo ' top web ports ports'
echo '80-83,99,100,443,631,800,1000,1739,2002,2301,2381,3000,5800,5988,5989,8000-8015,8080-808,8099,8100-8105,8443,8888,8900,9999,10000'
echo ' more web ports'
echo '10080,10100,10243,10250,10251,1027,1029,1030,1032,10439,10444,11267,1183,1184,11869,11905,11910,11935,1208,13080,1416,14176,14654,16000,16080,16372,17012,18083,1818,18180,1830,1831,19000,19082,19091,19101,1947,1972,19740,2002,2030,20444,2130,2140,21988,2301,2316,2381,2414,2424,24305,2480,2523,25684,25825,2693,27775,280,28080,2851,2869,30444,30900,31458,31459,3201,3227,32843,3339,34988,35135,35145,3526,3617,3790,37922,3842,3914,3938,4036,4053,41647,4220,4239,4343,443,45000,4680,47001,4723,48018,4848,4864,49152,49157,50000,50001,50038,51785,51905,51908,5225,53001,5357,5440,5447,5449,5469,54850,5500,5501,554,5554,55885,56414,56737,57423,57772,57773,5800,5801,591,593,5985,5989,60000,6001,6002,6003,6004,60213,61000,6107,6108,6113,6114,6160,6161,631,6325,6453,6454,65084,65093,6842,7001,7002,7003,7070,7099,7126,7191,7359,7453,7454,7717,7751,80,8000,8001,8002,8003,8004,8008,8020,8070,8071,8077,8080,8081,8082,8083,8085,8086,8087,8088,8090,8093,8094,8095,8099,81,8107,8113,8114,8115,8118,8120,8123,8126,8133,8135,8138,815,8150,8151,8180,82,8200,8222,8260,8300,8323,8333,84,8444,85,8530,8533,86,8660,8666,8701,8703,8732,8733,8740,8878,8880,8888,8889,8900,90,9000,9001,9002,9005,9006,9073,9080,9081,9084,9086,9087,9090,9091,9191,9300,9310,9444,9501,9510,9595,9642,9675,9676,9797,9823,9887'
echo '======================================'
echo 'todo:'
echo 'Add support for page reset on burp to hit it again with HTTPS https is only on 443 '
echo 'add support for use proxy flag'
echo '---rmccurdy.com'

echo 'DEBUG EXIT UNTIL CODE IS VETTED' 1>&2
exit -1

###########################################
cd -P /home/downloads || exit -2

count=1
for i in `cat $1`
do

# START IF ------------------------------------------

if [[ "$i" = *443* ]]
then


# start HTTPS --------------------
echo $i|sed 's/,/ /g' | gawk '{system ("curl -s --insecure -x localhost:8080 -k --location-trusted -m 3 -L -o " $1"_"$2".html https:\/\/"$1":"$2 )}'  &
p=$(( $count % 30)) # if process > 50 wait a bit
    if [ "$p" -eq 0 ] ; then
wait
    fi
let count++
# END  HTTPS --------------------




else



# start HTTP --------------------
DEBUG echo $i|sed 's/,/ /g' | gawk '{system ("curl --insecure -x localhost:8080 -k --location-trusted -m 3 -L -o " $1"_"$2".html http:\/\/"$1":"$2 )}'   &
echo $i|sed 's/,/ /g' | gawk '{system ("curl -s --insecure -x localhost:8080 -k --location-trusted -m 3 -L -o " $1"_"$2".html http:\/\/"$1":"$2 )}' 2>&1  &

p=$(( $count % 30)) # if process > 50 wait a bit
    if [ "$p" -eq 0 ] ; then
wait
    fi
let count++
# END  HTTP --------------------




fi




# END IF ------------------------------------------

done

echo sleeping for 10sec to wait for threads to complete ..
sleep 10
echo removing dupes

mkdir DELETE

# remove burp failed files ... dont take out the *.html DERP
grep  -l 'Burp Suite Professional' *.html | xargs rm

# get the file list sorted by size
for i in `find . -type f   | xargs du | sort -rn|gawk '{print $2}'`
do

# copare files by lines
NUMLINSDIFF=`sdiff -B -b -s "$i" "$LAST" | wc|gawk '{print $1}'`

# delete > 3 lines differernt

if [[ "NUMLINSDIFF" -lt "3" ]]
then
echo "$i" and "$LAST" dont look different moving to DELETE
mv "$LAST" ./DELETE
fi


LAST=$i
done



syntax highlighted by Code2HTML, v. 0.9.1