#!/bin/bash
# /usr/local/bin/youtubedownload
# http://crystalfaeries.net/posix/bin/youtubedownload
# celeste:crystalfaery YOUTUBEDOWNLOAD 2017-07-27 01:44:23+00:00
# Automatically download (youtube) videos listed in our download queue
# Launch from your crontab:
# @reboot /usr/local/bin/youtubedownload >> /home/downloads/youtube.com/.youlog.txt 2>&1 # launch by CRON
# Launch as background task:
# $ /usr/local/bin/youtubedownload >> /home/downloads/youtube.com/.youlog.txt 2>&1 & # launch daemon
# $ disown %1 # detach the daemon
# CONFIGURATION:
let sleepmax=128 # maximum delay between failure retries (in minutes)
let sleep=1 # minimum delay between success tries (in minutes)
cachedir=/var/tmp/youtube-dl # cache directory
vidlim=/etc/downloadlimit # video limit file of maximum percentage fullness of the
vidvol=/dev/mapper/fey--vg-home # video volume containing the
viddir=/home/downloads/youtube.com # video directory
archive=$viddir/.archive.txt # ever downloaded
vidlog=$viddir/.youlog.txt # log
vidque=$viddir/.youtube.txt # video queue of downloads into the
vidfil=$viddir/.playou.txt # playlist of just youtubes
playlist=$HOME/documents/playlists/txt/monitor.txt # playlist we monitor
umask 2 # Guests may view, only accounted users may delete
let help=13 # Help displays this many lines of header
if [ $# != 0 ]
then # treat any argument as a request for help
head -n $help $0
exit 0
fi
# Setup our trap handler
vidurl="" # vidurl in progress
trap "echo $vidurl >> $vidque; rm -f /tmp/$$.youtube.txt; exit 1" 0 1 2 15
# NOTE: we mount most of our volumes as "errors=remount-ro", so
# if our disk has errors we WILL lose a URL we can't append back to the queue on disk
# Setup our daemon loop
cd $viddir || exit 2 # working directory
mkdir -p $cachedir || exit 3 # working directory
rm "$vidlog"- 2>/dev/null #remove old log (if it exists)
mv "$vidlog"{,-} 2>/dev/null # save log (if it exists)
cp /dev/null "$vidlog" # clear new log
# Loop forever as a daemon
while sleep "$sleep"m
do
let sleep=$sleep+$sleep # binary backoff
if [[ $sleep -gt $sleepmax ]]
then # up to the
let sleep=$sleepmax # maximum sleep time
fi
echo "`/usr/local/bin/now` TIDY BEGIN" >> "$vidlog" # initiate new TIDY
name_tidy "$viddir" >> "$vidlog" 2>&1 # tidy the directory
rm *.f13{3,4,5,6,7}.mp4 *.f24{3,4,7,8}.webm *.f264.mp4 *.f298.mp4 # delete videos sans audio
mv *.m4a /home/audio/m4a # move audios to audio hierarchy
mv *.ogg /home/audio/ogg # move audios to audio hierarchy
mv *.opus /home/audio/opus # move audios to audio hierarchy
mv *.f171.webm *.f251.webm /home/audio/webm # move audios to audio hierarchy
du -a $viddir/ 2>> "$vidlog" | \
sort -nr 2>> "$vidlog" | \
cut -f2- 2>> "$vidlog" > "$vidfil" # generate relative-path by-size playlist
touch TIDY.$$.description
for f in *.description
do
if [ \! -r $(echo "$f"|sed 's/\.description$//') ]
then # remove orphaned description files
rm $f
fi
done
pushd /home/audio/m4a
# touch TIDY.$$.m4a
for f in *.m4a
do
if [ -r $viddir$(echo "$f"|sed 's/\.m4a$/.jpg/') ]
then # collect the thumbnails that go with our audio files
mv $viddir$(echo "$f"|sed 's/\.m4a$/.jpg/') .
fi
done
for f in *.f140.*;do mv ${f} $(echo ${f} | sed 's/\.f140//g'); done
for f in *.f141.*;do mv ${f} $(echo ${f} | sed 's/\.f141//g'); done
popd
pushd /home/audio/ogg
# touch TIDY.$$.ogg
for f in *.ogg
do
if [ -r $viddir$(echo "$f"|sed 's/\.ogg$/.jpg/') ]
then # collect the thumbnails that go with our audio files
mv $viddir$(echo "$f"|sed 's/\.ogg$/.jpg/') .
fi
done
for f in *.f140.*;do mv ${f} $(echo ${f} | sed 's/\.f140//g'); done
for f in *.f141.*;do mv ${f} $(echo ${f} | sed 's/\.f141//g'); done
popd
pushd /home/audio/webm
# touch TIDY.$$.webm
for f in *.f171.webm *.f251.webm
do
if [ -r $viddir$(echo "$f"|sed 's/\.webm$/.jpg/') ]
then # collect the thumbnails that go with our audio files
mv $viddir$(echo "$f"|sed 's/\.webm$/.jpg/') .
fi
done
for f in *.f251.*;do mv ${f} $(echo ${f} | sed 's/\.f251//g'); done
popd
pushd /home/audio/opus
# touch TIDY.$$.opus
for f in *.opus
do
if [ -r $viddir$(echo "$f"|sed 's/\.opus$/.jpg/') ]
then # collect the thumbnails that go with our audio files
mv $viddir$(echo "$f"|sed 's/\.opus$/.jpg/') .
fi
done
popd
for f in $(grep "Cannot rename" /home/downloads/youtube.com/.youlog.txt | sed 's/^Cannot rename //;s/ .*$//'|sort -u)
do
rm /home/downloads/youtube.com/"${f}"
done
echo "`/usr/local/bin/now` TIDY END" >> "$vidlog" 2>&1 # completed TIDY
# If we have enough disk space, do the next download
let percent="$( /bin/df 2>> "$vidlog" | grep "$vidvol" 2>> "$vidlog" | sed 's/%.*$// ; s/^.* // ; s/^.* //' 2>> "$vidlog" )" || exit 4
if [[ $percent -lt $( head -n 1 "$vidlim" 2>> "$vidlog" | cut -f 1 2>> "$vidlog" ) ]]
then # we have freespace available, so check for another video URL
# extract the next vidurl from the queue file
vidurl=$( cat "$vidque" 2>> "$vidlog" | sort -u 2>> "$vidlog" | head -n 1 2>> "$vidlog" )
cat "$vidque" 2>> "$vidlog" | sort -u 2>> "$vidlog" | tail -n +2 2>> "$vidlog" \
> /tmp/$$.youtube.txt
cp /tmp/$$.youtube.txt "$vidque" 2>> "$vidlog"
if [ "X$vidurl" != "X" ]
then # we have a non-null URL to process and space for it
echo "`/usr/local/bin/now`" >> "$vidlog" 2>&1
cat $archive- $archive | sort -u > /tmp/$$.txt
mv /tmp/$$.txt \
$archive- # accumulated master archive file
# remove vidurl from archive file or we will fail to (re-)download
hash="$(echo $vidurl | sed 's/^.*=//' | cut -c 1-11)"
echo "$hash" >> /home/downloads/youtube.com/.hash.txt #debug log
grep -v "$hash" $archive > /tmp/$$.txt
mv /tmp/$$.txt \
$archive # current archive file
# start parking for disabled youtube-dl options: #########################################
# --embed-subs \
# --embed-thumbnail \
# --no-warnings \
# --quiet \
# --write-annotations \
# endof parking for disabled youtube-dl options. #########################################
youtube-dl \
--cache-dir $cachedir \
--continue \
--download-archive "$archive" \
--exec "echo $viddir/{}>> $vidfil 2>> $vidlog" \
--extract-audio \
--ignore-errors \
--keep-video \
--no-part \
--no-progress \
--prefer-free-formats \
--restrict-filenames \
--sub-lang en \
--verbose \
--write-description \
--write-thumbnail \
"$vidurl" >> "$vidlog" 2>&1 \
&& let sleep=1 \
|| let youtube-error=$? # if success the reset binary backoff timer
if [[ youtube-error -ne 0 ]]
then
echo "$0: ERROR $youtube-error: $vidurl" >> "$vidlog" 1>&2
fi
fi
fi
done
exit 255 # Error -1 is impossible, right?
# to handle filenames beginning with a "-":
# for f in $(ls ./-* | sed 's/^\.\/-//');do mv ./-"$f" "$f"; done
# see http://knightwise.com/?s=youtube-dl&x=0&y=0
# /usr/share/doc/youtube-dl/NEWS.Debian.gz
# /usr/share/doc/youtube-dl/README.txt.gz
syntax highlighted by Code2HTML, v. 0.9.1