#!/bin/bash
#                       /usr/local/bin/youtubedownload
# https://crystalfaeries.net/posix/bin/youtubedownload
# celeste:crystalfaery YOUTUBEDOWNLOAD 2020-04-22 02:03:07+00:00
# Automatically download (youtube) videos listed in our download queue
# which is created/edited/monitored by /usr/local/bin/qyou

# Launch from your crontab:
# @reboot /usr/local/bin/youtubedownload >> $HOME/downloads/youtube.com/.youlog.txt 2>&1	# launch by CRON

# Launch this daemon from the command line control/status program:
#	$ /usr/local/bin/qyou -s

# Kill this daemon from the command line control/status program:
#	$ /usr/local/bin/qyou -k

# CONFIGURATION:
umask	2						# Guests may view, only accounted users may delete
cachedir=$XDG_CACHE_HOME/youtube-dl			# cache directory (FAIL: $XDG_CACHE_HOME is undefined)
cachedir=/var/tmp/youtube-dl				# cache directory (OK, but less standard than...)
cachedir=$HOME/.cache/youtube-dl			# cache directory (OK, and more standard)
vidlim=/etc/downloadlimit				# video limit file of maximum percentage fullness of the
vidvol=/dev/mapper/fae--vg-home				# video volume containing the
viddir=$HOME/downloads/youtube.com			# video directory
archive=$viddir/.archive.txt				# ever downloaded
vidlog=$viddir/.youlog.txt				# log
vidque=$viddir/.youtube.txt				# video queue of downloads into the
vidfil=$viddir/.playou.txt				# playlist of just youtubes
playlist=$HOME/documents/playlists/.monitor.txt		# playlist we monitor
let sleepmax=64						# maximum delay between failure retries (in minutes)
let help=30						# Help displays this many lines of header
let sleep=1						# minimum delay between success   tries (in minutes)
while [ $# -ne 0 ]
do							# treat any argument as a request for help
    case "${1}" in
    -v | --version)
	head -n 4 $0 | tail -n 1	1>&2
	exit	0
	;;
    -h | --help)
	head -n $help $0		1>&2
	exit	0
	;;
    *)
	echo "$0 does not cognize ${1}" 1>&2
	exit	1
	;;
    esac
done

# python $HOME/crystalfaeries.net/posix/bin/youtube-dl --update # Software Update the youtube[, etc.] downloader

# Setup our trap handler
vidurl=""						# vidurl in progress
trap "echo $vidurl >> $vidque; rm -f /tmp/$$.youtube.txt; exit 1" 0 1 2 15
# NOTE: we mount most of our volumes as "errors=remount-ro", so
# if our disk has errors we WILL lose a URL we can't append back to the queue on disk

# Setup our daemon loop
cd		$viddir			||	exit 2	# working  directory
mkdir -p      $cachedir			||	exit 3	# cachedir directory
rm		"$vidlog"-	2>/dev/null		#remove	old log (if it exists)
mv		"$vidlog"{,-}	2>/dev/null		#  save	    log (if it exists)
cp /dev/null	"$vidlog"				# clear new log

# Loop forever as a daemon (until "qyou -k")
while sleep "$sleep"m
do
	let sleep=$sleep+$sleep		# binary backoff
	if [[ $sleep -gt $sleepmax ]]
	then				# up to the
		let sleep=$sleepmax	# maximum sleep time
	fi

	echo "`/usr/local/bin/now` YOUTUBE TIDY BEGIN"						>>  "$vidlog"		# initiate new TIDY
#	name_tidy	"$viddir"								>>  "$vidlog"	2>&1	# tidy the directory
	rm	*.f13{3,4,5,6,7}.mp4 *.f24{2,3,4,7,8}.webm *.f264.mp4 *.f298.mp4	# delete videos sans audio
	rm	*.fdash-fastly_skyfire-video-* *.fdash-fastly_skyfire_sep-video-*	# delete videos sans audio
	mv	*.m4a		$HOME/crystalfaeries.net/audio/m4a			# move audios to audio hierarchy
	mv	*.ogg		$HOME/crystalfaeries.net/audio/ogg			# move audios to audio hierarchy
	mv	*.opus		$HOME/crystalfaeries.net/audio/opus			# move audios to audio hierarchy
	mv	*.f171.webm	$HOME/crystalfaeries.net/audio/webm			# move audios to audio hierarchy
	mv	*.f251.webm	$HOME/crystalfaeries.net/audio/webm			# move audios to audio hierarchy
	du -a $viddir/	2>> "$vidlog" | \
	sort -nr	2>> "$vidlog" | \
	cut -f2-	2>> "$vidlog" >	"$vidfil"					# generate relative-path by-size playlist
	touch TIDY.$$.description
	for f in *.description
	do
		if [ \! -r $(echo "$f"|sed 's/\.description$//') ]
		then	# remove orphaned description files
			rm $f
		fi
	done
    pushd $HOME/crystalfaeries.net/audio/m4a
	# touch TIDY.$$.m4a
	for f in *.m4a
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.m4a$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.m4a$/.jpg/') .
		fi
	done
	for f in *.f140.*;do mv ${f} $(echo ${f} | sed 's/\.f140//g'); done
	for f in *.f141.*;do mv ${f} $(echo ${f} | sed 's/\.f141//g'); done
    popd
    pushd $HOME/crystalfaeries.net/audio/ogg
	# touch TIDY.$$.ogg
	for f in *.ogg
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.ogg$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.ogg$/.jpg/') .
		fi
	done
	for f in *.f140.*;do mv ${f} $(echo ${f} | sed 's/\.f140//g'); done
	for f in *.f141.*;do mv ${f} $(echo ${f} | sed 's/\.f141//g'); done
    popd
    pushd $HOME/crystalfaeries.net/audio/webm
	# touch TIDY.$$.webm
	for f in *.f171.webm *.f251.webm
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.webm$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.webm$/.jpg/') .
		fi
	done
	for f in *.f251.*;do mv ${f} $(echo ${f} | sed 's/\.f251//g'); done
    popd
    pushd $HOME/crystalfaeries.net/audio/opus
	# touch TIDY.$$.opus
	for f in *.opus
	do
		if [ -r		$viddir$(echo "$f"|sed 's/\.opus$/.jpg/') ]
		then	# collect the thumbnails that go with our audio files
			mv	$viddir$(echo "$f"|sed 's/\.opus$/.jpg/') .
		fi
	done
    popd
	for f in $(grep "Cannot rename" $HOME/downloads/youtube.com/.youlog.txt | sed 's/^Cannot rename //;s/ .*$//'|sort -u)
	do
		rm $HOME/downloads/youtube.com/"${f}"
	done
	echo "`/usr/local/bin/now` YOUTUBE TIDY END"						>>  "$vidlog"	2>&1	# completed TIDY

	# If we have enough disk space, do the next download
	let    percent="$( /bin/df 2>>  "$vidlog" | grep "$vidvol" 2>>  "$vidlog" | sed 's/%.*$// ; s/^.* // ; s/^.*	//' 2>>  "$vidlog" )"	|| exit 4
	if [[ $percent -lt $( head -n 1 "$vidlim" 2>>  "$vidlog" | cut -f 1 2>>  "$vidlog" ) ]]
	then	# we have freespace available, so check for another video URL
		# extract the next vidurl from the queue file
		vidurl=$( cat "$vidque" 2>>  "$vidlog" | sort -u 2>>  "$vidlog" | head -n  1	2>>  "$vidlog" )
			  cat "$vidque" 2>>  "$vidlog" | sort -u 2>>  "$vidlog" | tail -n +2	2>>  "$vidlog" \
		>					/tmp/$$.youtube.txt
		cp					/tmp/$$.youtube.txt	"$vidque"	2>>  "$vidlog"
		if [ "X$vidurl" != "X" ]
		then	# we have a non-null URL to process and space for it
			echo "`/usr/local/bin/now`"			>>  "$vidlog" 2>&1
			cat	$archive-	$archive | sort -u >	/tmp/$$.txt
			mv						/tmp/$$.txt \
				$archive-	# accumulated master archive file
			# remove vidurl from archive file or we will fail to (re-)download
			hash="$(echo $vidurl | sed 's/^.*=//' | cut -c 1-11)"
			echo    "$hash" >> $HOME/downloads/youtube.com/.hash.txt #debug log
			grep -v "$hash"		$archive >		/tmp/$$.txt
			mv						/tmp/$$.txt \
						$archive	# current archive file
			# the difference between the $hash and $archive should be download failures?

# youtube-dl's --batch-file FILE standard of: Lines starting with '#', ';' or ']' are comments we don't yet implement with .youtube.txt

# start	parking disabled youtube-dl options:	#################################################
#				--audio-format		"aac"					\
#				--audio-format		"flac"					\
#				--audio-format		"m4a"					\
#				--audio-format		"mp3"					\
#				--audio-format		"opus"					\
#				--audio-format		"vorbis"				\
#				--audio-format		"wav"					\
#				--batch-file		$HOME/downloads/youtube.com/.youtube.txt\
#				--embed-subs							\
#				--get-url		Simulate, print URL			\
#				--get-title		Simulate, print title			\
#				--get-id		Simulate, print id			\
#				--get-thumbnail		Simulate, print thumbnail URL		\
#				--get-description	Simulate, print video description	\
#				--get-duration		Simulate, print video length		\
#				--get-filename		Simulate, print output filename		\
#				--get-format		Simulate, print output format		\
#				--netrc								\
#				--no-warnings							\
#				--quiet								\
#				--write-annotations						\
# endof	parking disabled youtube-dl options.	#################################################
			youtube-dl								\
				--cache-dir		$cachedir				\
				--continue							\
				--download-archive	"$archive"				\
				--embed-thumbnail						\
				--exec			"echo $viddir/{}>> $vidfil 2>> $vidlog"	\
				--extract-audio							\
				--ignore-errors							\
				--keep-video							\
				--no-part							\
				--no-progress							\
				--prefer-free-formats						\
				--restrict-filenames						\
				--retries infinite						\
				--sub-lang en							\
				--verbose							\
				--write-description						\
				--write-thumbnail						\
				"$vidurl"				>>  "$vidlog" 2>&1	\
                        &&      let sleep=1                                                     \
                        ||      let yourerror=$?	# if success then reset binary backoff timer
			if	 [[ ${yourerror} -ne 0 ]]
			then
                        	echo "${0}: ERROR ${yourerror}: ${vidurl}" >>  "$vidlog" 2>&1
			fi
		fi
	else	# out of hard disk space to download to
		exit $?	# pau for now: DAEMON quitting, restartable with "qyou -s"
	fi
done
exit	255	# Error -1 is impossible, right?

# to handle filenames beginning with a "-":
# for f in $(ls ./-* | sed 's/^\.\/-//');do mv ./-"$f" "$f"; done
# see http://knightwise.com/?s=youtube-dl&x=0&y=0
# /usr/share/doc/youtube-dl/NEWS.Debian.gz
# /usr/share/doc/youtube-dl/README.txt.gz
