#!/bin/bash
#                       /usr/local/bin/downloads
#  http://crystalfaeries.net/posix/bin/downloads
# celeste:crystalfaery DOWNLOADS 2020-09-09 07:34:31+00:00
#
# Management tool for the downloads initiated by "download", which:
# * may have been aborted (e.g. due to system reboot),
# * may still be running in the background (good worker!),
# * may have completed either with failure or success.
#
# We use the .wget.txt logfile generated by "download" to indicate status.
# When through downloading, we append index.html to HEADER.html
# and move the .wget.txt to .wget.txt-
# so that we can browse without contents hidden behind index.html
#
# As with "download", "downloads" will download any URLs thrown at it.
# The usual -h | --help and -v | --version options are standard.
# More details are included in prompts if no options or arguments provided.
let help=18

# CONFIGURATION:
DOWNLOADS="${HOME}"/crystalfaeries.net/documents/
cd -P "${DOWNLOADS}"		|| exit -1	# work in downloads directory

case $# in
0)	# no arguments

    # for each "website" we wish to have a "www.site" symlink to the directory "site"
    for d in $(find . -maxdepth 1 -type d -name 'www.*' | sed 's/^\.\/www\.//g' | sort -u);do
	rm		{www.,}"${d}"/robots.txt	2>/dev/null # remove what is usually the ONLY file from an off-site reference
	rm		{www.,}"${d}"/{www.,}"{$d}"	2>/dev/null # remove recursive symlinks
	rm	               "${d}"			2>/dev/null # remove target symlinks
	rmdir		{www.,}"${d}"			2>/dev/null # remove empty directories or those which only had a robots.txt
	mkdir	-p	       "${d}"			2>/dev/null # create target directory
	rsync	-auvH	   www."${d}"/.??*	"${d}"	2>/dev/null # move hidden files
	rsync	-auvH	   www."${d}"/*		"${d}"		    # move standard files
	rm	-rf	   www."${d}"				    # remove the www. directory
	ln -s	"${d}"	   www."${d}"				    # replace with a symlink
    done

    # find attempted wgets
    ls 	  */.wget.txt	2>/dev/null	\
    | sed 's/\/\.wget\.txt$//g ; s/^www\.//g' \
    | sort -u >		/tmp/$$.dirs.txt	# attempted downloads

    # set the downloaddelay to the number of wgets still running:
    ps -ef --forest \
    | grep -v tail \
    | grep wg[e]t \
    | sed 's/^.*https:\/\///g ; s/^.*http:\/\///g ; s/^.* //g ; s/\/.*$//g ; s/^www\.//g' \
    | sort -u >		/tmp/$$.wgets.txt	# running downloads
    echo "$(wc -l		/tmp/$$.wgets.txt | sed 's/ .*$//')" > /etc/downloaddelay

    for f in `cat	/tmp/$$.wgets.txt`
    do	# filter out still-running wgets
    	grep -v "$f"	/tmp/$$.dirs.txt >	/tmp/$$.tmp.txt
    	mv					/tmp/$$.tmp.txt \
    			/tmp/$$.dirs.txt
    done					# finished or failed

    # review	wget logs of downloads
    if [ -s			/tmp/$$.dirs.txt ]
    then
        for d in `grep -v 'ytimg.com$'	/tmp/$$.dirs.txt`
        do
	    popd 2>/dev/null; cd -P "${DOWNLOADS}"	# in case we failed to pushd+popd in last pass of loop
	    echo ""
	    echo "======= ${d} ======= BEGIN"
	    head "${d}"/.wget.txt || head www."${d}"/.wget.txt
	    echo "------- ------- `du -s ${d}` ------- -------"
	    tail "${d}"/.wget.txt || tail www."${d}"/.wget.txt
	    echo "======= ${d} ======= END..."
	    echo ""
	    echo -n "[$(cat /etc/downloaddelay)] D(elete), K(ill), F(dedupe), T(idy), V(iew), R(edownload), A(dd2RAWDOG), URL(download): "; read answer
	    case "${answer}" in
	    d)
		    rm -rf "${d}"
		    ;;
	    k)
		    ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g'	# debugging echo
          KILL_PID=`ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g;s/ .*$//g'`
    echo "KILL_PID: $KILL_PID"
		    for signal in hup term kill
		    do
			    kill -"$signal" $KILL_PID
			    sleep 6
		    done
		    echo "FINISHED by `whoami`" >>	"${d}"/.wget.txt	2>/dev/null || \
		    echo "FINISHED by `whoami`" >> www."${d}"/.wget.txt	# log our killing the download
		    echo "------- ${d} ------- ..."
		    tail				"${d}"/.wget.txt	2>/dev/null || \
		    tail			    www."${d}"/.wget.txt	# updated log end
		    echo "======= ${d} ======= END"
		    pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		    && touch	index.html	HEADER.html \
		    && cat		index.html >>	HEADER.html \
		    && rm		index.html \
		    && yes|fdedupe \
		    && tree -d > .tree.txt \
		    && dudir > .du.txt \
		    && mv .wget.txt{,-} \
		    && popd \
		    || echo "ERROR CODE: $?"
		    ;;
	    f)	pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		    && touch	index.html	HEADER.html \
		    && cat		index.html >>	HEADER.html \
		    && rm		index.html \
		    && yes|fdedupe \
		    && tree -d > .tree.txt \
		    && dudir > .du.txt \
		    && mv .wget.txt{,-} \
		    && popd \
		    || echo "ERROR CODE: $?"
		    ;;
	    t)	pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		    && touch	index.html	HEADER.html \
		    && cat		index.html >>	HEADER.html \
		    && rm		index.html \
		    && name_tidy -r \
		    && tree -d > .tree.txt \
		    && dudir > .du.txt \
		    && mv .wget.txt{,-} \
		    && popd \
		    || echo "ERROR CODE: $?"
		    ;;
	    v)	firefox "${d}"	< /dev/null	>& /dev/null	& disown %1	# view the website download directory
		    pushd	"${d}" 2> /dev/null || pushd www."${d}" && vi .??*
		    exec downloads	# start over
		    ;;
	    a)	rundog -a "https://${d}"	||	\
		rundog -a "http://${d}"	# add to RAWDOG feeds list (will hang if busy) and to NewsBoat
		    ;;
	    n | "")	   echo "no-op... ${d}"
		    ;;
	    r)	download "${d}" \
		    && echo "redownloading... ${d}" \
		    || echo "ERROR CODE: $?"
		    ;;
	    *)	download "${answer}" \
		    && echo "downloading... ${answer}" \
		    || echo "ERROR CODE: $?"
		    ;;
	    esac
        done
    fi
    case $(cat /etc/downloaddelay) in
    0)
	# release mount points for removed media:
	rmdir /media/`whoami`/*			2>/dev/null	# fail on non-empty
	rm    /media/`whoami`/README.html	2>/dev/null	# yeah, in case...

	# MIGRATE downloads OFF the HARD DISK TO every available REMOVABLE storage
	cd -P "${DOWNLOADS}"					# migrate from
	for target in $(ls -Fd /media/`whoami`/*/documents/)	# migrate to
	do
		# for now we are NOT migrating youtube.com videos to documents/downloads, as
		# they instead are manually sorted into the /video/ hierarchy :-)
		# ...and Lisa Renee's energeticdownloads podcasts stay un-tidied
		for dir in $(ls -d *.{mil,gov,edu,org,com,net,show,info} 2>/dev/null	\
		| sed 's/\/$//' | grep -v     youtube.com				\
		| grep -v	1320frequencyshift.com	\
		| grep -v	absinthes.com	\
		| grep -v	ags.hawaii.gov	\
		| grep -v	alexpetty.com	\
		| grep -v	all-natural.com	\
		| grep -v	bandshed.net	\
		| grep -v	battleofearth.wordpress.com	\
		| grep -v	bikedok.com	\
		| grep -v	bioethikaoils.com	\
		| grep -v	bits.debian.org	\
		| grep -v	blog.diasporafoundation.org	\
		| grep -v	caitlin-matthews.blogspot.com	\
		| grep -v	camendesign.com	\
		| grep -v	cassiopaea.org	\
		| grep -v	clayandiron.com	\
		| grep -v	cloudflarestatus.com	\
		| grep -v	commandlinefu.com	\
		| grep -v	cosmicconvergence.org	\
		| grep -v	crystalfaeries.net	\
		| grep -v	culinarysolvent.com	\
		| grep -v	cynthiasuelarson.wordpress.com	\
		| grep -v	davidrevoy.com	\
		| grep -v	debian-handbook.info	\
		| grep -v	debian.org	\
		| grep -v	dedoimedo.com	\
		| grep -v	defectivebydesign.org	\
		| grep -v	dgaryyoung.com	\
		| grep -v	dmzhawaii.org	\
		| grep -v	dot.kde.org	\
		| grep -v	drmalcolmkendrick.org	\
		| grep -v	druidgarden.wordpress.com	\
		| grep -v	efairies.com	\
		| grep -v	energeticdownloads.com	\
		| grep -v	energeticsynthesis.com	\
		| grep -v	faemagazine.com	\
		| grep -v	feed.pippa.io	\
		| grep -v	feedproxy.google.com	\
		| grep -v	feeds.feedburner.com	\
		| grep -v	feeds.fireside.fm	\
		| grep -v	feeds.twit.tv	\
		| grep -v	feeds2.feedburner.com	\
		| grep -v	forum.alchemyforums.com	\
		| grep -v	fossi-foundation.org	\
		| grep -v	freedomdecrypted.com	\
		| grep -v	friendi.ca	\
		| grep -v	gigiyoung.com	\
		| grep -v	hawaiiassembly.org	\
		| grep -v	heliastar.com	\
		| grep -v	herbreath.com	\
		| grep -v	honolulu.craigslist.org	\
		| grep -v	hypergridbusiness.com	\
		| grep -v	hyperledger.org	\
		| grep -v	iceagefarmer.com	\
		| grep -v	incoherency.co.uk	\
		| grep -v	investinblockchain.com	\
		| grep -v	jami.net	\
		| grep -v	jmrart.com	\
		| grep -v	kali.org	\
		| grep -v	kauaijuiceco.com	\
		| grep -v	heelslut.com	\
		| grep -v	linux.org	\
		| grep -v	livingintheprivate.blogspot.com	\
		| grep -v	lpg-c.org	\
		| grep -v	lucycorsetry.com	\
		| grep -v	metaverseink.com	\
		| grep -v	micronews.debian.org	\
		| grep -v	montalk.net	\
		| grep -v	mysticmamma.com	\
		| grep -v	neomutt.org	\
		| grep -v	newagemusicworld.com	\
		| grep -v	newdawnmagazine.com	\
		| grep -v	newsboat.org	\
		| grep -v	nexusnewsfeed.com	\
		| grep -v	nicolasfella.wordpress.com	\
		| grep -v	non-gmoreport.com	\
		| grep -v	nutrimedical.com	\
		| grep -v	nylonsparade.com	\
		| grep -v	oedb.org	\
		| grep -v	offog.org	\
		| grep -v	ohyesceleste.com	\
		| grep -v	open-source-energy.org	\
		| grep -v	opensimulator.org	\
		| grep -v	opensourceecology.org	\
		| grep -v	ose-21.org	\
		| grep -v	oshwa.org	\
		| grep -v	osseeds.org	\
		| grep -v	peppercarrot.com	\
		| grep -v	permaculturenews.org	\
		| grep -v	pesn.com	\
		| grep -v	phoronix.com	\
		| grep -v	phys.org	\
		| grep -v	pixls.us	\
		| grep -v	planet.gnu.org	\
		| grep -v	plantbasednews.org	\
		| grep -v	priestessalchemy.com	\
		| grep -v	puri.sm	\
		| grep -v	quantumweekly.com	\
		| grep -v	radiationdangers.com	\
		| grep -v	re3d.org	\
		| grep -v	retractionwatch.com	\
		| grep -v	risingmoonastrology.com	\
		| grep -v	robcourtofrecord.wordpress.com	\
		| grep -v	rtings.com	\
		| grep -v	rwgresearch.com	\
		| grep -v	ryf.fsf.org	\
		| grep -v	sacred-texts.com	\
		| grep -v	safecrossroads.net	\
		| grep -v	savannah.gnu.org	\
		| grep -v	shekinarose.com	\
		| grep -v	shop.hak5.org	\
		| grep -v	sifive.com	\
		| grep -v	sourceforge.net	\
		| grep -v	sparkfun.com:443	\
		| grep -v	sphinxsearch.com	\
		| grep -v	spreadprivacy.com	\
		| grep -v	sqrl.grc.com	\
		| grep -v	stateofthenation.co	\
		| grep -v	static.fsf.org	\
		| grep -v	steve.fi	\
		| grep -v	steve.grc.com	\
		| grep -v	tabublog.com	\
		| grep -v	talkshoe.com	\
		| grep -v	techblog.sethleedy.name	\
		| grep -v	the3foragers.blogspot.com	\
		| grep -v	theastrologypodcast.com	\
		| grep -v	thefreedomarticles.com	\
		| grep -v	thehutchisoneffect.com	\
		| grep -v	thelingerieaddict.com	\
		| grep -v	therightsofnature.org	\
		| grep -v	thunderbolts.info	\
		| grep -v	tomshardware.com	\
		| grep -v	underwater2web.com	\
		| grep -v	us.whatkatiedid.com	\
		| grep -v	vimeo.com	\
		| grep -v	wespenre.com	\
		| grep -v	wespenrepublications.home.blog	\
		| grep -v	wespenrevideos.com	\
		| grep -v	what-if.xkcd.com	\
		| grep -v	whatonearthishappening.com	\
		| grep -v	worldgrid.net	\
		| grep -v	xkcd.com	\
		| grep -v	www.yandy.com	\
		| grep -v	youngliving.com	\
		| grep -v	zodiactruth.com	\
		)
		do
			rsync -auvH "${dir}" "${target}"				\
		&&	rm -rf "${dir}"	# delete off hard disk ONCE successfully MIGRATED
		done
	done
	;;
    *)	:
	;;
    esac
    ;;
* )	# we have an argument :-(
	while [ $# -gt 0 ]
	do case "${1}" in
	-v | --version )
		head -n 4 $0 | tail -n 1
		exit
		;;
	-h | --help )
		head -n $help $0
		exit
		;;
	* )	# treat it as a URL to download
		download	"${1}"
		shift		# dispose of that argument
		;;
	esac;done
	;;
esac
exit	$?	# Pau for Now

