#!/bin/bash
#                       /usr/local/bin/downloads
#  http://crystalfaeries.net/posix/bin/downloads
# celeste:crystalfaery DOWNLOADS 2020-08-15 17:29:37+00:00
#
# Management tool for the downloads initiated by "download", which:
# may have been aborted due to system reboot,
# may still be running in the background,
# may have completed either with failure or success.
#
# We use the presence of a .wget.txt logfile to indicate status.
# When through downloading, we replace index.html with HEADER.html
#
# as with "download", "downloads" will download any URLs thrown at it.
# the usual -h | --help and -v | --version options are standard, and
# -m does its best to migrate all downloaded websites to removable media.
# more details are included in prompts if no options or arguments provided.
let help=17

cd "${HOME}"/documents/		|| exit -1	# downloads directory

case $# in
0)	# no arguments
	echo "30" > /etc/downloaddelay	# reset to default for delay between wget requests

    # for each "website" we wish to have a "www.site" symlink to the directory "site"
    for d in $(find . -maxdepth 1 -type d -name 'www.*' | sed 's/^\.\/www\.//g' | sort -u);do
	rm		{www.,}"${d}"/robots.txt	2>/dev/null # remove what is usually the ONLY file from an off-site reference
	rm		{www.,}"${d}"/{www.,}"{$d}"	2>/dev/null # remove recursive symlinks
	rm	               "${d}"			2>/dev/null # remove target symlinks
	rmdir		{www.,}"${d}"			2>/dev/null # remove empty directories or those which only had a robots.txt
	mkdir	-p	       "${d}"			2>/dev/null # create target directory
	rsync	-auvzH	   www."${d}"/.??*	"${d}"	2>/dev/null # move hidden files
	rsync	-auvzH	   www."${d}"/*		"${d}"		    # move standard files
	rm	-rf	   www."${d}"				    # remove the www. directory
	ln -s	"${d}"	   www."${d}"				    # replace with a symlink
    done

    # find attempted wgets
    ls 	  */.wget.txt	2>/dev/null	\
    | sed 's/\/\.wget\.txt$//g ; s/^www\.//g' \
    | sort -u >		/tmp/$$.dirs.txt	# attempted downloads

    # set the downloaddelay to the number of wgets still running:
    ps -ef --forest \
    | grep -v tail \
    | grep wg[e]t \
    | sed 's/^.*https:\/\///g ; s/^.*http:\/\///g ; s/^.* //g ; s/\/.*$//g ; s/^www\.//g' \
    | sort -u >		/tmp/$$.wgets.txt	# running downloads
    echo "$(wc -l		/tmp/$$.wgets.txt | sed 's/ .*$//')" > /etc/downloaddelay

    # uncomment this section to avoid looking at active downloads
    # for f in `cat	/tmp/$$.wgets.txt`
    # do
    # 	grep -v "$f"	/tmp/$$.dirs.txt >	/tmp/$$.tmp.txt
    # 	mv					/tmp/$$.tmp.txt \
    # 			/tmp/$$.dirs.txt
    # done						# finished or failed

    # review	wget logs of downloads
    if [ -s			/tmp/$$.dirs.txt ]
    then
        for d in `grep -v 'ytimg.com$'	/tmp/$$.dirs.txt`
        do
	    popd 2>/dev/null; cd $HOME/downloads/	# in case we failed to pushd+popd in last pass of loop
	    echo ""
	    echo "======= ${d} ======= BEGIN"
	    head "${d}"/.wget.txt || head www."${d}"/.wget.txt
	    echo "------- ------- `du -s ${d}` ------- -------"
	    tail "${d}"/.wget.txt || tail www."${d}"/.wget.txt
	    echo "======= ${d} ======= END..."
	    echo ""
	    echo -n "[$(cat /etc/downloaddelay)] D(elete), K(ill), F(dedupe), T(idy), V(iew), R(edownload), A(dd2RAWDOG), URL(download): "; read answer
	    case "$answer" in
	    d)
		    rm -rf "${d}"
		    ;;
	    k)
		    ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g'	# debugging echo
          KILL_PID=`ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g;s/ .*$//g'`
    echo "KILL_PID: $KILL_PID"
		    for signal in hup term kill
		    do
			    kill -"$signal" $KILL_PID
			    sleep 6
		    done
		    echo "FINISHED by `whoami`" >>	"${d}"/.wget.txt	2>/dev/null || \
		    echo "FINISHED by `whoami`" >> www."${d}"/.wget.txt	# log our killing the download
		    echo "------- ${d} ------- ..."
		    tail				"${d}"/.wget.txt	2>/dev/null || \
		    tail			    www."${d}"/.wget.txt	# updated log end
		    echo "======= ${d} ======= END"
		    pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		    && touch	index.html	HEADER.html \
		    && cat		index.html >>	HEADER.html \
		    && rm		index.html \
		    && yes|fdedupe \
		    && tree -d > .tree.txt \
		    && dudir > .du.txt \
		    && mv .wget.txt{,-} \
		    && popd \
		    || echo "ERROR CODE: $?"
		    ;;
	    f)	pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		    && touch	index.html	HEADER.html \
		    && cat		index.html >>	HEADER.html \
		    && rm		index.html \
		    && yes|fdedupe \
		    && tree -d > .tree.txt \
		    && dudir > .du.txt \
		    && mv .wget.txt{,-} \
		    && popd \
		    || echo "ERROR CODE: $?"
		    ;;
	    t)	pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		    && touch	index.html	HEADER.html \
		    && cat		index.html >>	HEADER.html \
		    && rm		index.html \
		    && name_tidy -r \
		    && tree -d > .tree.txt \
		    && dudir > .du.txt \
		    && mv .wget.txt{,-} \
		    && popd \
		    || echo "ERROR CODE: $?"
		    ;;
	    v)	firefox "${d}"	< /dev/null	>& /dev/null	& disown %1	# view the website download directory
		    pushd	"${d}" 2> /dev/null || pushd www."${d}" && vi .??*
		    exec downloads	# start over
		    ;;
	    a)	rundog -a "https://${d}"	||	\
		    rundog -a "http://${d}"	# add to RAWDOG feeds list (will hang if busy) and to NewsBoat
		    ;;
	    n | "")	   echo "no-op... ${d}"
		    ;;
	    r)	download "${d}" \
		    && echo "redownloading... ${d}" \
		    || echo "ERROR CODE: $?"
		    ;;
	    *)	download "$answer" \
		    && echo "downloading... $answer" \
		    || echo "ERROR CODE: $?"
		    ;;
	    esac
        done
    fi
    ;;
* )	# we have an argument :-(
	while [ $# -gt 0 ]
	do case "${1}" in
	-v | --version )
		head -n 4 $0 | tail -n 1
		exit
		;;
	-h | --help )
		head -n $help $0
		exit
		;;
	-m | --migrate )
		# release mount points for removed media:
		rmdir /media/celeste/* 2>/dev/null
	
		# migrate downloads off the hard disk to any available removable storage
		cd -P ~/downloads				# migrate from
		target="$(ls -Fd /media/`whoami`/*|head -n 1)"	# migrate to
		echo "migration target=${target}"
		sleep 15
		for d in $(ls -d *.{com,gov,mil,net,org} | grep -v youtube.com | sed 's/\/$//')
		do
			rsync -auvH "${d}" "${target}"	\
		&&	rm -rf "${d}"
		done
		exit	$?
		;;
	* )	# treat it as a URL to download
		download	"${1}"
		shift		# dispose of that argument
		;;
	esac;done
	;;
esac
exit	$?	# Pau for Now

# CODE DEVELOPMENT IN PROGRESS:
# We wish to review all webpages stored on computer,
# those already viewed being logged into ~/.dush.webpages.txt
# so as to not re-view them, and
# finding all by extension .htm, .html, .mht
# which we'll track in `/.webpages.txt
# we may wish to recycle code from similarly purposed script "images".