#!/bin/bash
#                       /usr/local/bin/downloads
#  http://crystalfaeries.net/posix/bin/downloads
# celeste:crystalfaery DOWNLOADS 2017-11-29 00:53:00+00:00
#
# Management tool for the downloads initiated by "download", which:
# may have been aborted due to system reboot,
# may still be running in the background,
# may have completed either with failure or success.
#
# We use the presence of a .wget.txt logfile to indicate status.
# When through downloading, we replace that with:
# .hardlinks.txt	if there are hardlinked files
# .du.txt		a "dudir" listing of disk usage
# .tree.txt		a directory tree

cd /home/downloads/		|| exit -1	# downloads directory

# for each "website" we wish to have a "www.site" symlink to the directory "site"
for d in $(find . -maxdepth 1 -type d -name 'www.*' | sed 's/^\.\/www\.//g' | sort -u);do
	rm		{www.,}"${d}"/robots.txt	2>/dev/null # remove what is usually the ONLY file from an off-site reference
	rm		{www.,}"${d}"/{www.,}"{$d}"	2>/dev/null # remove recursive symlinks
	rm	               "${d}"			2>/dev/null # remove target symlinks
	rmdir		{www.,}"${d}"			2>/dev/null # remove empty directories or those which only had a robots.txt
	mkdir	-p	       "${d}"			2>/dev/null # create target directory
	rsync	-auvzH	   www."${d}"/.??*	"${d}"	2>/dev/null # move hidden files
	rsync	-auvzH	   www."${d}"/*		"${d}"		    # move standard files
	rm	-rf	   www."${d}"				    # remove the www. directory
	ln -s	"${d}"	   www."${d}"				    # replace with a symlink
done

# find attempted wgets
ls 	  */.wget.txt \
| sed 's/\/\.wget\.txt$//g ; s/^www\.//g' \
| sort -u >		/tmp/$$.dirs.txt	# attempted downloads

# uncomment this section to avoid looking at active downloads
# ps -ef --forest \
# | grep -v tail \
# | grep wg[e]t \
# | sed 's/^.*https:\/\///g ; s/^.*http:\/\///g ; s/^.* //g ; s/\/.*$//g ; s/^www\.//g' \
# | sort -u >	/tmp/$$.wgets.txt		# running downloads
# for f in `cat	/tmp/$$.wgets.txt`
# do
# 	grep -v "$f"	/tmp/$$.dirs.txt >	/tmp/$$.tmp.txt
# 	mv					/tmp/$$.tmp.txt \
# 			/tmp/$$.dirs.txt
# done						# finished or failed

# review	wget logs of downloads
if [ -s			/tmp/$$.dirs.txt ]
then
    for d in `cat	/tmp/$$.dirs.txt`
    do
	popd 2>/dev/null; cd /home/downloads/	# in case we failed to pushd+popd in last pass of loop
	echo ""
	echo "======= ${d} ======= BEGIN"
	head "${d}"/.wget.txt || head www."${d}"/.wget.txt
	echo "------- ------- ${d} ------- -------"
	tail "${d}"/.wget.txt || tail www."${d}"/.wget.txt
	echo "======= ${d} ======= END..."
	echo ""
	echo -n "D(elete), K(ill), F(dedupe), T(idy), V(iew), R(edownload), N(oop), URL(download): "; read answer
	case "$answer" in
	d)
		rm -rf "${d}"
		;;
	k)
		ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g'	# debugging echo
      KILL_PID=`ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g;s/ .*$//g'`
echo "KILL_PID: $KILL_PID"
		for signal in hup term kill
		do
			kill -"$signal" $KILL_PID
			sleep 6
		done
		echo "FINISHED by `whoami`" >>	"${d}"/.wget.txt	2>/dev/null || \
		echo "FINISHED by `whoami`" >> www."${d}"/.wget.txt	# log our killing the download
		echo "------- ${d} ------- ..."
		tail				"${d}"/.wget.txt	2>/dev/null || \
		tail			    www."${d}"/.wget.txt	# updated log end
		echo "======= ${d} ======= END"
		pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		&& touch	index.html	HEADER.html \
		&& cat		index.html >>	HEADER.html \
		&& rm		index.html \
		&& yes|fdedupe \
		&& tree -d > .tree.txt \
		&& dudir > .du.txt \
		&& vi .??* \
		&& mv .wget.txt{,-} \
		&& popd \
		|| echo "ERROR CODE: $?"
		;;
	f)	pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		&& touch	index.html	HEADER.html \
		&& cat		index.html >>	HEADER.html \
		&& rm		index.html \
		&& yes|fdedupe \
		&& tree -d > .tree.txt \
		&& dudir > .du.txt \
		&& vi .??* \
		&& mv .wget.txt{,-} \
		&& popd \
		|| echo "ERROR CODE: $?"
		;;
	t)	pushd	"${d}" 2>/dev/null || pushd www."${d}" \
		&& touch	index.html	HEADER.html \
		&& cat		index.html >>	HEADER.html \
		&& rm		index.html \
		&& name_tidy -r \
		&& tree -d > .tree.txt \
		&& dudir > .du.txt \
		&& vi .??* \
		&& mv .wget.txt{,-} \
		&& popd \
		|| echo "ERROR CODE: $?"
		;;
	v)	firefox "${d}"	< /dev/null	>& /dev/null	& disown %1	# view the website download directory
		;;
	n)	   echo "no-op... ${d}"
		;;
	"")	   echo "no-op... ${d}"
		;;
	r)	download "${d}" \
		&& echo "redownloading... ${d}" \
		|| echo "ERROR CODE: $?"
		;;
	*)	download "$answer" \
		&& echo "downloading... $answer" \
		|| echo "ERROR CODE: $?"
		;;
	esac
    done
fi
exit	$?	# Pau for Now

# CODE DEVELOPMENT IN PROGRESS:
# We wish to review all webpages stored on computer,
# those already viewed being logged into ~/.dush.webpages.txt
# so as to not re-view them, and
# finding all by extension .htm, .html, .mht
# which we'll track in `/.webpages.txt
# we may wish to recycle code from similarly purposed script "images".



syntax highlighted by Code2HTML, v. 0.9.1