#!/bin/bash
#                       /usr/local/bin/youtubedownload
#  http://crystalfaeries.net/posix/bin/youtubedownload
# celeste:crystalfaery YOUTUBEDOWNLOAD 2017-07-27 01:44:23+00:00
# Automatically download (youtube) videos listed in our download queue

# Launch from your crontab:
# @reboot /usr/local/bin/youtubedownload >> /home/downloads/youtube.com/.youlog.txt 2>&1	# launch by CRON

# Launch as background task:
#	$ /usr/local/bin/youtubedownload >> /home/downloads/youtube.com/.youlog.txt 2>&1 &	# launch daemon
#	$ disown %1										# detach the daemon

# CONFIGURATION:
let sleepmax=128					# maximum delay between failure retries (in minutes)
let sleep=1						# minimum delay between success   tries (in minutes)
cachedir=/var/tmp/youtube-dl				# cache directory
vidlim=/etc/downloadlimit				# video limit file of maximum percentage fullness of the
vidvol=/dev/mapper/fey--vg-home				# video volume containing the
viddir=/home/downloads/youtube.com			# video directory
archive=$viddir/.archive.txt				# ever downloaded
vidlog=$viddir/.youlog.txt				# log
vidque=$viddir/.youtube.txt				# video queue of downloads into the
vidfil=$viddir/.playou.txt				# playlist of just youtubes
playlist=$HOME/documents/playlists/txt/monitor.txt	# playlist we monitor
umask	2						# Guests may view, only accounted users may delete
let help=13						# Help displays this many lines of header
if [ $# != 0 ]
then							# treat any argument as a request for help
	head -n $help $0
	exit	0
fi

# Setup our trap handler
vidurl=""						# vidurl in progress
trap "echo $vidurl >> $vidque; rm -f /tmp/$$.youtube.txt; exit 1" 0 1 2 15
# NOTE: we mount most of our volumes as "errors=remount-ro", so
# if our disk has errors we WILL lose a URL we can't append back to the queue on disk

# Setup our daemon loop
cd		$viddir	||	exit 2	# working directory
mkdir -p      $cachedir	||	exit 3	# working directory
rm		"$vidlog"-	2>/dev/null		#remove	old log (if it exists)
mv		"$vidlog"{,-}	2>/dev/null		#  save	    log (if it exists)
cp /dev/null	"$vidlog"				# clear new log

# Loop forever as a daemon
while sleep "$sleep"m
do
	let sleep=$sleep+$sleep		# binary backoff
	if [[ $sleep -gt $sleepmax ]]
	then				# up to the
		let sleep=$sleepmax	# maximum sleep time
	fi
	echo "`/usr/local/bin/now` TIDY BEGIN"										>>  "$vidlog"		# initiate new TIDY
	name_tidy	"$viddir"											>>  "$vidlog"	2>&1	# tidy the directory
	rm	*.f13{3,4,5,6,7}.mp4 *.f24{3,4,7,8}.webm *.f264.mp4 *.f298.mp4	# delete videos sans audio
	mv	*.m4a		/home/audio/m4a				# move audios to audio hierarchy
	mv	*.ogg		/home/audio/ogg				# move audios to audio hierarchy
	mv	*.opus		/home/audio/opus			# move audios to audio hierarchy
	mv	*.f171.webm *.f251.webm	/home/audio/webm			# move audios to audio hierarchy
	du -a $viddir/	2>> "$vidlog" | \
	sort -nr	2>> "$vidlog" | \
	cut -f2-	2>> "$vidlog" >	"$vidfil"												# generate relative-path by-size playlist
	touch TIDY.$$.description
	for f in *.description
	do
		if [ \! -r $(echo "$f"|sed 's/\.description$//') ]
		then	# remove orphaned description files
			rm $f
		fi
	done
	pushd /home/audio/m4a
	# touch TIDY.$$.m4a
	for f in *.m4a
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.m4a$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.m4a$/.jpg/') .
		fi
	done
	for f in *.f140.*;do mv ${f} $(echo ${f} | sed 's/\.f140//g'); done
	for f in *.f141.*;do mv ${f} $(echo ${f} | sed 's/\.f141//g'); done
	popd
	pushd /home/audio/ogg
	# touch TIDY.$$.ogg
	for f in *.ogg
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.ogg$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.ogg$/.jpg/') .
		fi
	done
	for f in *.f140.*;do mv ${f} $(echo ${f} | sed 's/\.f140//g'); done
	for f in *.f141.*;do mv ${f} $(echo ${f} | sed 's/\.f141//g'); done
	popd
	pushd /home/audio/webm
	# touch TIDY.$$.webm
	for f in *.f171.webm *.f251.webm
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.webm$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.webm$/.jpg/') .
		fi
	done
	for f in *.f251.*;do mv ${f} $(echo ${f} | sed 's/\.f251//g'); done
	popd
	pushd /home/audio/opus
	# touch TIDY.$$.opus
	for f in *.opus
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.opus$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.opus$/.jpg/') .
		fi
	done
	popd
	for f in $(grep "Cannot rename" /home/downloads/youtube.com/.youlog.txt | sed 's/^Cannot rename //;s/ .*$//'|sort -u)
	do
		rm /home/downloads/youtube.com/"${f}"
	done
	echo "`/usr/local/bin/now` TIDY END"										>>  "$vidlog"	2>&1	# completed TIDY

	# If we have enough disk space, do the next download
	let    percent="$( /bin/df 2>>  "$vidlog" | grep "$vidvol" 2>>  "$vidlog" | sed 's/%.*$// ; s/^.* // ; s/^.*	//' 2>>  "$vidlog" )"	|| exit 4
	if [[ $percent -lt $( head -n 1 "$vidlim" 2>>  "$vidlog" | cut -f 1 2>>  "$vidlog" ) ]]
	then	# we have freespace available, so check for another video URL
		# extract the next vidurl from the queue file
		vidurl=$( cat "$vidque" 2>>  "$vidlog" | sort -u 2>>  "$vidlog" | head -n  1	2>>  "$vidlog" )
			  cat "$vidque" 2>>  "$vidlog" | sort -u 2>>  "$vidlog" | tail -n +2	2>>  "$vidlog" \
		>					/tmp/$$.youtube.txt
		cp					/tmp/$$.youtube.txt	"$vidque"	2>>  "$vidlog"
		if [ "X$vidurl" != "X" ]
		then	# we have a non-null URL to process and space for it
			echo "`/usr/local/bin/now`"			>>  "$vidlog" 2>&1
			cat	$archive-	$archive | sort -u >	/tmp/$$.txt
			mv						/tmp/$$.txt \
				$archive-	# accumulated master archive file
			# remove vidurl from archive file or we will fail to (re-)download
			hash="$(echo $vidurl | sed 's/^.*=//' | cut -c 1-11)"
			echo    "$hash" >> /home/downloads/youtube.com/.hash.txt #debug log
			grep -v "$hash"		$archive >		/tmp/$$.txt
			mv						/tmp/$$.txt \
						$archive	# current archive file
# start	parking for disabled youtube-dl options:	#########################################
#				--embed-subs							\
#				--embed-thumbnail						\
#				--no-warnings							\
#				--quiet								\
#				--write-annotations						\
# endof	parking for disabled youtube-dl options.	#########################################
			youtube-dl								\
				--cache-dir		$cachedir				\
				--continue							\
				--download-archive	"$archive"				\
				--exec			"echo $viddir/{}>> $vidfil 2>> $vidlog"	\
				--extract-audio							\
				--ignore-errors							\
				--keep-video							\
				--no-part							\
				--no-progress							\
				--prefer-free-formats						\
				--restrict-filenames						\
				--sub-lang en							\
				--verbose							\
				--write-description						\
				--write-thumbnail						\
				"$vidurl"				>>  "$vidlog" 2>&1	\
                        &&      let sleep=1                                                     \
                        ||      let youtube-error=$?	# if success the reset binary backoff timer
			if	 [[ youtube-error -ne 0 ]]
			then
                        	echo	"$0: ERROR $youtube-error: $vidurl"	>>  "$vidlog" 1>&2
			fi
		fi
	fi
done
exit	255	# Error -1 is impossible, right?
# to handle filenames beginning with a "-":
# for f in $(ls ./-* | sed 's/^\.\/-//');do mv ./-"$f" "$f"; done
# see http://knightwise.com/?s=youtube-dl&x=0&y=0
# /usr/share/doc/youtube-dl/NEWS.Debian.gz
# /usr/share/doc/youtube-dl/README.txt.gz


syntax highlighted by Code2HTML, v. 0.9.1