#!/bin/bash
# /usr/local/bin/downloader
# http://crystalfaeries.net/posix/bin/downloader
# celeste:crystalfaery 2014-09-12 04:59:57+00:00
# Automatically download youtube videos, vodcasts, and podcasts
# We start from ~/Mail/download e-mail mailbox (which we will destroy)
# to collect URLs to download, which once extracted are queued for download into /home/downloads
####### Configuration #######
limit=/etc/downloadlimit # download limit file has maximum percentage fullness of the
downvol=/home/public # download volume into which we download to the
downdir=/home/public/downloads/ # podcast download target and the
youdir=/home/public/downloads/youtube.com # youtube download target from the
down_q=/home/downloads/.down_q.txt # download queue and the
e_mail=/home/downloads/.e_mail.txt # eMail queue which we split into the
podque=/home/downloads/.podque.txt # podcast queue and the
youque=/home/downloads/youtube.com/.youtube.txt # youtube queue and log to
podlog=/home/downloads/.podlog.txt # podcast download log and
youlog=/home/downloads/youtube.com/.youlog.txt # youtube download log
downlog=/home/downloads/.wget.txt # download log
url="" # url in progress
let sleep=1 # default delay between downloads (in minutes)
let sleepmax=60 # maximum delay between failure retries (in minutes)
umask 2 # Guests may view, only Accounted users may delete
####### Initialize our daemon loop #######
trap "echo $url >> $e_mail; rm -f /tmp/$$.youtube.txt; exit 1" 0 1 2 15
cd $downdir || exit 2 # I want it now, and you will have to kill me to unmount the directory I use
echo "$0: $(/usr/local/bin/now): STARTing" >> /home/downloads/.podlog.txt || exit 3
echo "$0: $(/usr/local/bin/now): STARTing" >> /home/downloads/youtube.com/.youlog.txt || exit 4
name_tidy $youdir 2>> $youlog # tidy filenames
find $youdir -iname '*.part' -print -exec sudo touch -t 197001011001 {} \; 2>> $youlog # backdate partials
find $youdir -iname '*.part' -print -exec rsync -auvzH {} $( echo {} | sed 's/\.part$//' ) \; 2>> $youlog #update partials
find $youdir -iname '*.part' -print -exec rm {} \; 2>> $youlog # remove partials
du $youdir/* 2>> $youlog | sort -n 2>> $youlog | cut -f2 > ~/playlists/youtube.pls 2>> $youlog # by size playlist
####### Loop forever as a daemon #######
while sleep "$sleep"m
do
let sleep=$sleep+$sleep # binary backoff
if [[ $sleep -gt $sleepmax ]]
then # up to the
let sleep=$sleepmax # maximum sleep time
fi
let percent="$( /bin/df 2>> $youlog | grep $downvol 2>> $youlog | sed 's/%.*$// ; s/^.* // ; s/^.* //' 2>> $youlog )" || exit 5
if [[ $percent -lt $( head -n 1 $limit 2>> $youlog | cut -f 1 2>> $youlog ) ]]
then # we have freespace available, so check for another download
# Youtubes
# extract a list of urls from the Mail file
sed 's/youtu.be/youtube/' < $e_mail | \
grep 'youtube' | \
sed 's/^.*http:\/\///' | \
sed 's/^www.youtube.com\/watch?v=//' | \
sed 's/^www.youtube.com\/v\///' | \
sed 's/{youtube}//' | \
sed 's/{\/youtube}//' | \
sed 's/^youtube\///' | \
sed 's/&.*$//' | \
sed 's/".*$//' | \
sed 's/\[.*$//' | \
sed 's/<.*$//' | \
sed 's/^.*watch?v=//' | \
sed 's/<\/a>.*$//' | \
sed 's/^.*embed\///' | \
sed 's/?version=.*$//' | \
sed 's/&#.*$//' | \
sort -u >> $youque 2>> $youlog
cp /dev/null $e_mail 2>> $youlog
# process the next url
url=$( cat $youque 2>> $youlog | sort -u 2>> $youlog | head -n 1 2>> $youlog )
cat $youque 2>> $youlog | sort -u 2>> $youlog | tail -n +2 2>> $youlog > /tmp/$$.youtube.txt
cp /tmp/$$.youtube.txt $youque 2>> $youlog
if [ "$url" != "" ]
then # download the next youtube video
youtube-dl -i -c --restrict-filenames --prefer-free-formats --no-progress "$url" >> $youlog 2>> $youlog \
&& (let sleep=1; name_tidy $youdir 2>> $youlog; du $youdir/* 2>> $youlog | sort -n 2>> $youlog | cut -f2 2>> $youlog > ~/playlists/youtube.pls) \
|| echo "$0: ERROR $?: $url" >> $youlog
fi
# Vodcasts
if [ -s ~/Mail/vodcast ]
then
# we must extract a list of URLs from the Mail file vodcast
grep '.avi' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.avi".*$/\.avi/' | sed 's/\.avi\].*$/\.avi/' | sed 's/\.avi\}.*$/\.avi/' >> ~/playlists/vodcast.txt
grep '.mov' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.mov".*$/\.mov/' | sed 's/\.mov\].*$/\.mov/' | sed 's/\.mov\}.*$/\.mov/' >> ~/playlists/vodcast.txt
grep '.mp4' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.mp4".*$/\.mp4/' | sed 's/\.mp4\].*$/\.mp4/' | sed 's/\.mp4\}.*$/\.mp4/' >> ~/playlists/vodcast.txt
grep '.m4v' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.m4v".*$/\.m4v/' | sed 's/\.m4v\].*$/\.m4v/' | sed 's/\.m4v\}.*$/\.m4v/' >> ~/playlists/vodcast.txt
grep '.mv4' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.mv4".*$/\.mv4/' | sed 's/\.mv4\].*$/\.mv4/' | sed 's/\.mv4\}.*$/\.mv4/' >> ~/playlists/vodcast.txt
grep '.ogv' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.ogv".*$/\.ogv/' | sed 's/\.ogv\].*$/\.ogv/' | sed 's/\.ogv\}.*$/\.ogv/' >> ~/playlists/vodcast.txt
grep '.webm' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.webm".*$/\.webm/' | sed 's/\.webm\].*$/\.webm/' | sed 's/\.webm\}.*$/\.webm/' >> ~/playlists/vodcast.txt
grep '.wmv' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.wmv".*$/\.wmv/' | sed 's/\.wmv\].*$/\.wmv/' | sed 's/\.wmv\}.*$/\.wmv/' >> ~/playlists/vodcast.txt
grep '.h.264' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.h.264".*$/\.h.264/'| sed 's/\.h.264\].*$/\.h.264/'| sed 's/\.h.264\}.*$/\.h.264/'>> ~/playlists/vodcast.txt
grep '.264' ~/Mail/vodcast | sed 's/^.*http/http/' | sed 's/\.264".*$/\.264/' | sed 's/\.264\].*$/\.264/' | sed 's/\.264\}.*$/\.264/' >> ~/playlists/vodcast.txt
cp /dev/null ~/Mail/vodcast # this mail file serves only to queue vodcast URLs
sed 's/ (.*$//' < ~/playlists/vodcast.txt > /tmp/vodcast.$$.txt
sort -u < /tmp/vodcast.$$.txt > ~/playlists/vodcast.txt
fi
# process the next URL
url="`head -n 1 ~/playlists/vodcast.txt`"
tail -n +2 ~/playlists/vodcast.txt > /tmp/vodcast.$$.txt
mv /tmp/vodcast.$$.txt ~/playlists/vodcast.txt
if [ "$url" != "" ]
then # download the next vodcast audio
grep -v "$url" ~/playlists/vodcast.txt > /tmp/vodcast.$$.txt
mv /tmp/vodcast.$$.txt ~/playlists/vodcast.txt
wget -nv --mirror --no-check-certificate -c "$url" && let sleep=1 || echo "$url" >> ~/playlists/vodcast.txt-
echo "name_tidy -r `echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
name_tidy -r "`echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
fi
# Podcasts
if [ -s ~/Mail/podcast ]
then
# we must extract a list of URLs from the Mail file podcast
cp /dev/null /tmp/podcast.$$.txt || exit 6
grep '.mp3' ~/Mail/podcast >> /tmp/podcast.$$.txt
grep '.ogg' ~/Mail/podcast >> /tmp/podcast.$$.txt
grep '.wma' ~/Mail/podcast >> /tmp/podcast.$$.txt
sort -u /tmp/podcast.$$.txt \
| sed 's/^.*http/http/' \
| sed 's/\.mp3".*$/.mp3/' \
| sed 's/\.ogg".*$/.ogg/' \
| sed 's/\.wma".*$/.wma/' \
| sed 's/\.mp3}.*$/.mp3/' \
| sed 's/\.ogg}.*$/.ogg/' \
| sed 's/\.wma}.*$/.wma/' \
| sed 's/\.mp3\].*$/.mp3/' \
| sed 's/\.ogg\].*$/.ogg/' \
| sed 's/\.wma\].*$/.wma/' \
| sed 's/{mp3remote}//' \
| sed 's/{oggremote}//' \
| sed 's/{wmaremote}//' \
| sed 's/{\/mp3remote}//' \
| sed 's/{\/oggremote}//' \
| sed 's/{\/wmaremote}//' \
| sed 's/&.*$//' \
| sed 's/ (.*$//' \
| sed 's/>.*$//' \
| sort -u >> ~/playlists/podcast.txt
cp ~/playlists/podcast.txt /tmp/podcast.$$.txt || exit 7
sort -u /tmp/podcast.$$.txt > ~/playlists/podcast.txt || exit 8
rm /tmp/podcast.$$.txt || exit 9
cp /dev/null ~/Mail/podcast || exit 10
fi
# process the next URL
url="`head -n 1 ~/playlists/podcast.txt`"
tail -n +2 ~/playlists/podcast.txt > /tmp/podcast.$$.txt
mv /tmp/podcast.$$.txt ~/playlists/podcast.txt
if [ "$url" != "" ]
then # download the next podcast audio
grep -v "$url" ~/playlists/podcast.txt > /tmp/podcast.$$.txt
mv /tmp/podcast.$$.txt ~/playlists/podcast.txt
wget -nv --mirror --no-check-certificate -c "$url" && let sleep=1 || echo "$url" >> ~/playlists/podcast.txt-
touch ~/playlists/podcast.txt
echo "name_tidy -r `echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
name_tidy -r "`echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
fi
fi
done
exit 11 # there is no error-free exit
# #!/bin/bash
# # /usr/local/bin/podcastdownload
# # http://crystalfaeries.net/posix/bin/podcastdownload
# # celeste:crystalfaery 2014-09-09 16:48:00+00:00
# # THIS IS OBSOLTED BY /usr/local/bin/downloader*
# # and only serves to hold code which may get incorporated therein
#
# MAILTO=`whoami`
# umask 2
# cd /home/downloads || exit 1
#
# let sleepmax=60 # longest sleep time
# let sleep=1 # reset delay to minimum presuming success
# while sleep "$sleep"m;do
# let sleep=$sleep+$sleep # binary backoff
# let percent="`df | grep /home/audio | sed 's/%.*$// ; s/^.* // ; s/^.* //'`" || exit 2
# if [[ $percent -lt `head -n 1 /etc/downloadlimit | cut -f 1` ]]
# then # we have freespace available, so check for another podcast audio
# if [ -s ~/Mail/podcast ]
# then
# # we must extract a list of URLs from the Mail file podcast
# cp /dev/null /tmp/podcast.$$.txt || exit 3
# grep '.mp3' ~/Mail/podcast >> /tmp/podcast.$$.txt
# grep '.ogg' ~/Mail/podcast >> /tmp/podcast.$$.txt
# grep '.wma' ~/Mail/podcast >> /tmp/podcast.$$.txt
# sort -u /tmp/podcast.$$.txt \
# | sed 's/^.*http/http/' \
# | sed 's/\.mp3".*$/.mp3/' \
# | sed 's/\.ogg".*$/.ogg/' \
# | sed 's/\.wma".*$/.wma/' \
# | sed 's/\.mp3}.*$/.mp3/' \
# | sed 's/\.ogg}.*$/.ogg/' \
# | sed 's/\.wma}.*$/.wma/' \
# | sed 's/\.mp3\].*$/.mp3/' \
# | sed 's/\.ogg\].*$/.ogg/' \
# | sed 's/\.wma\].*$/.wma/' \
# | sed 's/{mp3remote}//' \
# | sed 's/{oggremote}//' \
# | sed 's/{wmaremote}//' \
# | sed 's/{\/mp3remote}//' \
# | sed 's/{\/oggremote}//' \
# | sed 's/{\/wmaremote}//' \
# | sed 's/&.*$//' \
# | sed 's/ (.*$//' \
# | sed 's/>.*$//' \
# | sort -u >> ~/playlists/podcast.txt
# cp ~/playlists/podcast.txt /tmp/podcast.$$.txt || exit 4
# sort -u /tmp/podcast.$$.txt > ~/playlists/podcast.txt || exit 5
# rm /tmp/podcast.$$.txt || exit 6
# cp /dev/null ~/Mail/podcast || exit 7
# fi
# # process the next URL
# url="`head -n 1 ~/playlists/podcast.txt`"
# tail -n +2 ~/playlists/podcast.txt > /tmp/podcast.$$.txt
# mv /tmp/podcast.$$.txt ~/playlists/podcast.txt
# if [ "$url" != "" ]
# then # download the next podcast audio
# grep -v "$url" ~/playlists/podcast.txt > /tmp/podcast.$$.txt
# mv /tmp/podcast.$$.txt ~/playlists/podcast.txt
# wget -nv --mirror --no-check-certificate -c "$url" && let sleep=1 || echo "$url" >> ~/playlists/podcast.txt-
# touch ~/playlists/podcast.txt
# echo "name_tidy -r `echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
# name_tidy -r "`echo $url | sed 's/^.*\/\///' | sed 's/\/.*$//'`"
# fi
# fi
# # if [ -s ~/playlists/podcast.txt ]
# # then
# # echo "~/playlists/podcast.txt has size:"
# # ls -Flad ~/playlists/podcast.txt
# # else
# # echo "~/playlists/podcast.txt is null:"
# # ls -Flad ~/playlists/podcast.txt
# # echo "~/playlists/podcast.txt- is:"
# # ls -Flad ~/playlists/podcast.txt-
# # fi
# if [[ $sleep -gt $sleepmax ]]
# then
# let sleep=$sleepmax # maximum time to sleep
# fi
# done
# exit -1
syntax highlighted by Code2HTML, v. 0.9.1