#!/bin/bash
#                      /usr/local/bin/de-www
# http://crystalfaeries.net/posix/bin/de-www
# celeste:crystalfaery 2015-12-23 17:14:11+00:00
# de-www cleans our downloads archive created with our "download" script
# moving all content to directories which do NOT have the "www." prefix,
# and replacing the directory paths which DO have the "www." prefix with symlinks
# WHY?
# some websites have FULL equivalence between {www.,}domain.tld
# some websites INSIST upon forcing accesses to the FQDN www.domain.tld
# some websites ALLOW full access via domain.tld
# we wish to not have duplicates of content in different directories
# we MUST try downloading BOTH {www.,}domain.tld to ENSURE successful download
# we have not YET re-written "download" to intelligently probe a domain
# to resolve this question BEFORE it proceeds to wget content appropriatelty
# THEREFORE we use this to clean up the aftermath, until we intelligently re-write "download"

# NOTE: that our script "downloads" which restarts incomplete downloads performed by "download"
# ALSO is involved in this issue, therefore, ideally we will combine all three scripts into one script.

for d in $(find /home/downloads/ -maxdepth 1 -type d -name 'www.*' | sed 's/\/home\/downloads\/www\.//g')
do	# force content into domain.tld, leaving a www.domain.tld symlink to it
	mkdir -p	/home/downloads/"$d"
	rsync -auvzH				/home/downloads/www."$d"/{.??,}*	\
			/home/downloads/"$d"
	rm -rf					/home/downloads/www."$d"
	ln -s		/home/downloads/"$d"	/home/downloads/www."$d"
done


syntax highlighted by Code2HTML, v. 0.9.1