#!/bin/bash
#                      /usr/local/bin/de-www
# http://crystalfaeries.net/posix/bin/de-www
# celeste:crystalfaery 2018-08-15 22:40:58+00:00
# de-www cleans our downloads archive created with our "download" script
# moving all content to directories which do NOT have the "www." prefix,
# and replacing the directory paths which DO have the "www." prefix with symlinks
# WHY?
# some websites have FULL equivalence between {www.,}domain.tld
# some websites INSIST upon forcing accesses to the FQDN www.domain.tld
# some websites ALLOW full access via domain.tld
# we wish to not have duplicates of content in different directories
# we MUST try downloading BOTH {www.,}domain.tld to ENSURE successful download
# we have not YET re-written "download" to intelligently probe a domain
# to resolve this question BEFORE it proceeds to wget content appropriatelty
# THEREFORE we use this to clean up the aftermath, until we intelligently re-write "download"

# NOTE: that our script "downloads" which restarts incomplete downloads performed by "download"
# ALSO is involved in this issue, therefore, ideally we will combine all three scripts into one script.

for d in $(find $HOME/downloads/ -maxdepth 1 -type d -name 'www.*' | sed 's/\/home\/downloads\/www\.//g')
do	# force content into domain.tld, leaving a www.domain.tld symlink to it
	mkdir -p	$HOME/downloads/"$d"
	rsync -auvzH				$HOME/downloads/www."$d"/{.??,}*	\
			$HOME/downloads/"$d"
	rm -rf					$HOME/downloads/www."$d"
	ln -s		$HOME/downloads/"$d"	$HOME/downloads/www."$d"
done
