#!/bin/bash
#                      /usr/local/bin/downloader
# http://crystalfaeries.net/posix/bin/downloader
# celeste:crystalfaery 2014-09-12 04:59:57+00:00
# Automatically download youtube videos, vodcasts, and podcasts
# We start from ~/Mail/download e-mail mailbox (which we will destroy)
# to collect URLs to download, which once extracted are queued for download into /home/downloads


#######	Configuration	#######
limit=/etc/downloadlimit			# download limit file has maximum percentage fullness of the
downvol=/home/public				# download volume into which we download to the
downdir=/home/public/downloads/			# podcast download target  and the
youdir=/home/public/downloads/youtube.com	# youtube download target from the
down_q=/home/downloads/.down_q.txt		# download queue and the
e_mail=/home/downloads/.e_mail.txt		# eMail    queue which we split into the
podque=/home/downloads/.podque.txt		# podcast queue and the
youque=/home/downloads/youtube.com/.youtube.txt	# youtube queue and log to
podlog=/home/downloads/.podlog.txt		# podcast download log and
youlog=/home/downloads/youtube.com/.youlog.txt	# youtube download log
downlog=/home/downloads/.wget.txt		# download log
url=""						# url in progress
let sleep=1					# default delay between downloads	(in minutes)
let sleepmax=60					# maximum delay between failure retries	(in minutes)
umask 2						# Guests may view, only Accounted users may delete

#######	Initialize our daemon loop	#######
trap "echo $url >> $e_mail; rm -f /tmp/$$.youtube.txt;							exit 1" 0 1 2 15
cd		$downdir	||	exit 2		# I want it now, and you will have to kill me to unmount the directory I use
echo	"$0: $(/usr/local/bin/now): STARTing"	>>	/home/downloads/.podlog.txt		|| exit 3
echo	"$0: $(/usr/local/bin/now): STARTing"	>>	/home/downloads/youtube.com/.youlog.txt	|| exit 4
name_tidy	$youdir	2>>	$youlog		# tidy    filenames
find		$youdir -iname '*.part' -print -exec sudo touch -t 197001011001	{} \;	2>> $youlog	# backdate partials
find		$youdir -iname '*.part' -print -exec rsync -auvzH    {} $( echo {} | sed 's/\.part$//' ) \;	2>> $youlog	#update   partials
find		$youdir -iname '*.part' -print -exec rm				{} \;	2>> $youlog	# remove   partials
du		$youdir/* 2>> $youlog | sort -n 2>> $youlog | cut -f2 > ~/playlists/youtube.pls	2>> $youlog	# by size  playlist

#######	Loop forever as a daemon	#######
while sleep "$sleep"m
do
	let sleep=$sleep+$sleep		# binary backoff
	if [[ $sleep -gt $sleepmax ]]
	then				# up to the
		let sleep=$sleepmax	# maximum sleep time
	fi
	let    percent="$( /bin/df 2>> $youlog | grep $downvol 2>> $youlog | sed 's/%.*$// ; s/^.* // ; s/^.*	//' 2>> $youlog )"	|| exit 5
	if [[ $percent -lt $( head -n 1 $limit 2>> $youlog | cut -f 1 2>> $youlog ) ]]
	then	# we have freespace available, so check for another download

		# Youtubes
		# extract a list of urls from the Mail file
		sed 's/youtu.be/youtube/' <	$e_mail		| \
		grep 'youtube'					| \
		sed 's/^.*http:\/\///'				| \
		sed 's/^www.youtube.com\/watch?v=//'		| \
		sed 's/^www.youtube.com\/v\///'			| \
		sed 's/{youtube}//'				| \
		sed 's/{\/youtube}//'				| \
		sed 's/^youtube\///'				| \
		sed 's/&.*$//'					| \
		sed 's/".*$//'					| \
		sed 's/\[.*$//'					| \
		sed 's/<.*$//'					| \
		sed 's/^.*watch?v=//'				| \
		sed 's/<\/a>.*$//'				| \
		sed 's/^.*embed\///'				| \
		sed 's/?version=.*$//'				| \
		sed 's/&#.*$//'					| \
		sort -u	>>				$youque 2>> $youlog
		cp /dev/null				$e_mail	2>> $youlog

		# process the next url
		url=$( cat $youque 2>> $youlog | sort -u 2>> $youlog | head -n  1 2>> $youlog )
			  cat $youque 2>> $youlog | sort -u 2>> $youlog | tail -n +2 2>> $youlog >	/tmp/$$.youtube.txt
		cp						/tmp/$$.youtube.txt	$youque 2>> $youlog
		if [ "$url" != "" ]
		then	# download the next youtube video
			youtube-dl -i -c --restrict-filenames --prefer-free-formats --no-progress "$url" >> $youlog 2>> $youlog			\
			&&	(let sleep=1; name_tidy $youdir 2>> $youlog; du $youdir/*  2>> $youlog | sort -n  2>> $youlog | cut -f2  2>> $youlog > ~/playlists/youtube.pls)	\
			||	echo	"$0: ERROR $?: $url"  >> $youlog
		fi

		# Vodcasts
		if [ -s ~/Mail/vodcast ]
		then
			# we must extract a list of URLs from the Mail file vodcast
grep '.avi'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.avi".*$/\.avi/'    | sed 's/\.avi\].*$/\.avi/'    | sed 's/\.avi\}.*$/\.avi/'    >> ~/playlists/vodcast.txt
grep '.mov'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.mov".*$/\.mov/'    | sed 's/\.mov\].*$/\.mov/'    | sed 's/\.mov\}.*$/\.mov/'    >> ~/playlists/vodcast.txt
grep '.mp4'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.mp4".*$/\.mp4/'    | sed 's/\.mp4\].*$/\.mp4/'    | sed 's/\.mp4\}.*$/\.mp4/'    >> ~/playlists/vodcast.txt
grep '.m4v'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.m4v".*$/\.m4v/'    | sed 's/\.m4v\].*$/\.m4v/'    | sed 's/\.m4v\}.*$/\.m4v/'    >> ~/playlists/vodcast.txt
grep '.mv4'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.mv4".*$/\.mv4/'    | sed 's/\.mv4\].*$/\.mv4/'    | sed 's/\.mv4\}.*$/\.mv4/'    >> ~/playlists/vodcast.txt
grep '.ogv'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.ogv".*$/\.ogv/'    | sed 's/\.ogv\].*$/\.ogv/'    | sed 's/\.ogv\}.*$/\.ogv/'    >> ~/playlists/vodcast.txt
grep '.webm'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.webm".*$/\.webm/'  | sed 's/\.webm\].*$/\.webm/'  | sed 's/\.webm\}.*$/\.webm/'  >> ~/playlists/vodcast.txt
grep '.wmv'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.wmv".*$/\.wmv/'    | sed 's/\.wmv\].*$/\.wmv/'    | sed 's/\.wmv\}.*$/\.wmv/'    >> ~/playlists/vodcast.txt
grep '.h.264'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.h.264".*$/\.h.264/'| sed 's/\.h.264\].*$/\.h.264/'| sed 's/\.h.264\}.*$/\.h.264/'>> ~/playlists/vodcast.txt
grep '.264'	~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.264".*$/\.264/'    | sed 's/\.264\].*$/\.264/'    | sed 's/\.264\}.*$/\.264/'    >> ~/playlists/vodcast.txt
			cp /dev/null	~/Mail/vodcast # this mail file serves only to queue vodcast URLs
			sed 's/ (.*$//' < ~/playlists/vodcast.txt > /tmp/vodcast.$$.txt
			sort -u < /tmp/vodcast.$$.txt > ~/playlists/vodcast.txt
		fi
		# process the next URL
		url="`head -n 1								 ~/playlists/vodcast.txt`"
		tail -n +2								 ~/playlists/vodcast.txt >	 /tmp/vodcast.$$.txt
		mv		 /tmp/vodcast.$$.txt					 ~/playlists/vodcast.txt
		if [ "$url" != "" ]
		then # download the next vodcast audio
			grep -v "$url"							 ~/playlists/vodcast.txt >	 /tmp/vodcast.$$.txt
			mv	 /tmp/vodcast.$$.txt					 ~/playlists/vodcast.txt
			wget -nv --mirror --no-check-certificate -c "$url" && let sleep=1 || echo "$url" >> ~/playlists/vodcast.txt-
			echo	"name_tidy -r  `echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
				 name_tidy -r "`echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
		fi

		# Podcasts
		if [ -s ~/Mail/podcast ]
		then
			# we must extract a list of URLs from the Mail file podcast
			cp /dev/null					 /tmp/podcast.$$.txt		 || exit 6
			grep '.mp3' 		~/Mail/podcast >>	 /tmp/podcast.$$.txt
			grep '.ogg' 		~/Mail/podcast >>	 /tmp/podcast.$$.txt
			grep '.wma' 		~/Mail/podcast >>	 /tmp/podcast.$$.txt
			sort -u						 /tmp/podcast.$$.txt	\
			| sed 's/^.*http/http/'						 	\
			| sed 's/\.mp3".*$/.mp3/'					 	\
			| sed 's/\.ogg".*$/.ogg/'					 	\
			| sed 's/\.wma".*$/.wma/'					 	\
			| sed 's/\.mp3}.*$/.mp3/'					 	\
			| sed 's/\.ogg}.*$/.ogg/'					 	\
			| sed 's/\.wma}.*$/.wma/'					 	\
			| sed 's/\.mp3\].*$/.mp3/'					 	\
			| sed 's/\.ogg\].*$/.ogg/'					 	\
			| sed 's/\.wma\].*$/.wma/'					 	\
			| sed 's/{mp3remote}//'						 	\
			| sed 's/{oggremote}//'						 	\
			| sed 's/{wmaremote}//'						 	\
			| sed 's/{\/mp3remote}//'					 	\
			| sed 's/{\/oggremote}//'					 	\
			| sed 's/{\/wmaremote}//'					 	\
			| sed 's/&.*$//'						 	\
			| sed 's/ (.*$//'						 	\
			| sed 's/>.*$//'						 	\
			| sort -u >>	 ~/playlists/podcast.txt
			cp		 ~/playlists/podcast.txt	 /tmp/podcast.$$.txt		|| exit 7
			sort -u		 /tmp/podcast.$$.txt	 >	 ~/playlists/podcast.txt	|| exit 8
			rm		 /tmp/podcast.$$.txt						|| exit 9
			cp /dev/null		 ~/Mail/podcast						|| exit 10
		fi
		# process the next URL
		url="`head -n 1								 ~/playlists/podcast.txt`"
		tail -n +2								 ~/playlists/podcast.txt >	 /tmp/podcast.$$.txt
		mv	      /tmp/podcast.$$.txt					 ~/playlists/podcast.txt
		if [ "$url" != "" ]
		then # download the next podcast audio
			grep -v "$url"							 ~/playlists/podcast.txt >	 /tmp/podcast.$$.txt
			mv /tmp/podcast.$$.txt						 ~/playlists/podcast.txt
			wget -nv --mirror --no-check-certificate -c "$url" && let sleep=1 || echo "$url" >> ~/playlists/podcast.txt-
			touch								 ~/playlists/podcast.txt
			echo	"name_tidy -r  `echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
				 name_tidy -r "`echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
		fi

	fi
done
exit 11	# there is no error-free exit

# #!/bin/bash
# #                 /usr/local/bin/podcastdownload
# # http://crystalfaeries.net/posix/bin/podcastdownload
# # celeste:crystalfaery 2014-09-09 16:48:00+00:00
# # THIS IS OBSOLTED BY /usr/local/bin/downloader*
# # and only serves to hold code which may get incorporated therein
# 
# MAILTO=`whoami`
# umask 2
# cd /home/downloads || exit 1
# 
# let sleepmax=60	# longest sleep time
# let sleep=1 # reset delay to minimum presuming success
# while sleep "$sleep"m;do
# 	let sleep=$sleep+$sleep # binary backoff
# 	let percent="`df | grep /home/audio | sed 's/%.*$// ; s/^.* // ; s/^.*	//'`"	 || exit 2
# 	if [[ $percent -lt `head -n 1 /etc/downloadlimit | cut -f 1` ]]
# 	then # we have freespace available, so check for another podcast audio
# 		if [ -s ~/Mail/podcast ]
# 		then
# 			# we must extract a list of URLs from the Mail file podcast
# 			cp /dev/null					 /tmp/podcast.$$.txt		 || exit 3
# 			grep '.mp3' 		~/Mail/podcast >>	 /tmp/podcast.$$.txt
# 			grep '.ogg' 		~/Mail/podcast >>	 /tmp/podcast.$$.txt
# 			grep '.wma' 		~/Mail/podcast >>	 /tmp/podcast.$$.txt
# 			sort -u						 /tmp/podcast.$$.txt	\
# 			| sed 's/^.*http/http/'						 	\
# 			| sed 's/\.mp3".*$/.mp3/'					 	\
# 			| sed 's/\.ogg".*$/.ogg/'					 	\
# 			| sed 's/\.wma".*$/.wma/'					 	\
# 			| sed 's/\.mp3}.*$/.mp3/'					 	\
# 			| sed 's/\.ogg}.*$/.ogg/'					 	\
# 			| sed 's/\.wma}.*$/.wma/'					 	\
# 			| sed 's/\.mp3\].*$/.mp3/'					 	\
# 			| sed 's/\.ogg\].*$/.ogg/'					 	\
# 			| sed 's/\.wma\].*$/.wma/'					 	\
# 			| sed 's/{mp3remote}//'						 	\
# 			| sed 's/{oggremote}//'						 	\
# 			| sed 's/{wmaremote}//'						 	\
# 			| sed 's/{\/mp3remote}//'					 	\
# 			| sed 's/{\/oggremote}//'					 	\
# 			| sed 's/{\/wmaremote}//'					 	\
# 			| sed 's/&.*$//'						 	\
# 			| sed 's/ (.*$//'						 	\
# 			| sed 's/>.*$//'						 	\
# 			| sort -u >>	 ~/playlists/podcast.txt
# 			cp		 ~/playlists/podcast.txt	 /tmp/podcast.$$.txt		|| exit 4
# 			sort -u		 /tmp/podcast.$$.txt	 >	 ~/playlists/podcast.txt	|| exit 5
# 			rm		 /tmp/podcast.$$.txt						|| exit 6
# 			cp /dev/null		 ~/Mail/podcast						|| exit 7
# 		fi
# 		# process the next URL
# 		url="`head -n 1								 ~/playlists/podcast.txt`"
# 		tail -n +2								 ~/playlists/podcast.txt >	 /tmp/podcast.$$.txt
# 		mv	      /tmp/podcast.$$.txt					 ~/playlists/podcast.txt
# 		if [ "$url" != "" ]
# 		then # download the next podcast audio
# 			grep -v "$url"							 ~/playlists/podcast.txt >	 /tmp/podcast.$$.txt
# 			mv /tmp/podcast.$$.txt						 ~/playlists/podcast.txt
# 			wget -nv --mirror --no-check-certificate -c "$url" && let sleep=1 || echo "$url" >> ~/playlists/podcast.txt-
# 			touch								 ~/playlists/podcast.txt
# 			echo	"name_tidy -r  `echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
# 				 name_tidy -r "`echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
# 		fi
# 	fi
# #	if [ -s ~/playlists/podcast.txt ]
# #	then
# #		echo "~/playlists/podcast.txt has size:"
# #		ls -Flad ~/playlists/podcast.txt
# #	else
# #		echo "~/playlists/podcast.txt is null:"
# #		ls -Flad ~/playlists/podcast.txt
# #		echo "~/playlists/podcast.txt- is:"
# #		ls -Flad ~/playlists/podcast.txt-
# #	fi
# 	if [[ $sleep -gt $sleepmax ]]
# 	then
# 		let sleep=$sleepmax	# maximum time to sleep
# 	fi
# done
# exit -1


syntax highlighted by Code2HTML, v. 0.9.1