#!/bin/bash
#                       /usr/local/bin/download
# https://crystalfaeries.net/posix/bin/download
# celeste:crystalfaery DOWNLOAD 2018-08-15 22:41:30+00:00
# See Also: /usr/local/bin/downloads for management
# This is a generic website mirroring agent, HOWEVER,
# NOTE: in the wget invocations the REJECT of URL pattern '*showComment=*'
# is intended to NOT download COMMENTS added to BLOG postings,
# as usually the PUBLIC are blithering idiots posting banal drivel.

cd -P $HOME/downloads/ 2> /dev/null || cd -P "${HOME}"/downloads-/ 2> /dev/null || cd -P $HOME/downloads/ 2> /dev/null || cd -P $HOME/Downloads/ 2> /dev/null || exit 1 # edit this to match your paths

while [[ $# -ne 0 ]]		# We accept a list of URLs
do
	url="${1}"		# work on	the first argument
	shift			# consume	the first argument
	 myurl="$(echo ${url} | sed 's/^https// ; s/^http// ; s/^ftps// ; s/^ftp// ; s/^:\/\/// ; s/^www\.//')"
	domain="$(echo ${url} | sed 's/^https// ; s/^http// ; s/^ftps// ; s/^ftp// ; s/^:\/\/// ; s/^www\.// ; s/\/.*$//')"
	if [ "X$domain" == "X" ]
		then
			echo "${url} is malformed or edit the seds in $0" 1>&2
		else
			mkdir	-p	"${domain}"			# in downloads dir, single dir without www. prefix
			if [ \! -e			www."${domain}" ]
			then
				ln -s	"${domain}"	www."${domain}" # symlinked to, from the www.prefixed name
			fi
			echo  "======= Begin downloading ${url} for `whoami` at `/usr/local/bin/now` ======="	>> "${domain}"/.wget.txt
			# we are trying both with and without www. prefix to guarantee some successful download
			# regardless which way the website "cannonicalizes" their own URL.
			# if they alias both to same successful content,
			# we will be double attempting each file, at twice the "-w" rate.
#wget -E --follow-ftp -H -k -l 1 -m --no-check-certificate -np -nv -p --unlink --retry-connrefused -R '*showComment=*' www."${myurl}" >> "${domain}"/.wget.txt 2>&1 &
wget -E --follow-ftp -H -k -l 1 -m --no-check-certificate -np -nv -p --unlink --retry-connrefused -R '*showComment=*'     "${myurl}" >> "${domain}"/.wget.txt 2>&1 &
#			disown %1 %2
	fi
done
exit	$?
