#!/bin/bash
# /usr/local/bin/downloads
# http://crystalfaeries.net/posix/bin/downloads
# celeste:crystalfaery DOWNLOADS 2017-11-29 00:53:00+00:00
#
# Management tool for the downloads initiated by "download", which:
# may have been aborted due to system reboot,
# may still be running in the background,
# may have completed either with failure or success.
#
# We use the presence of a .wget.txt logfile to indicate status.
# When through downloading, we replace that with:
# .hardlinks.txt if there are hardlinked files
# .du.txt a "dudir" listing of disk usage
# .tree.txt a directory tree
cd /home/downloads/ || exit -1 # downloads directory
# for each "website" we wish to have a "www.site" symlink to the directory "site"
for d in $(find . -maxdepth 1 -type d -name 'www.*' | sed 's/^\.\/www\.//g' | sort -u);do
rm {www.,}"${d}"/robots.txt 2>/dev/null # remove what is usually the ONLY file from an off-site reference
rm {www.,}"${d}"/{www.,}"{$d}" 2>/dev/null # remove recursive symlinks
rm "${d}" 2>/dev/null # remove target symlinks
rmdir {www.,}"${d}" 2>/dev/null # remove empty directories or those which only had a robots.txt
mkdir -p "${d}" 2>/dev/null # create target directory
rsync -auvzH www."${d}"/.??* "${d}" 2>/dev/null # move hidden files
rsync -auvzH www."${d}"/* "${d}" # move standard files
rm -rf www."${d}" # remove the www. directory
ln -s "${d}" www."${d}" # replace with a symlink
done
# find attempted wgets
ls */.wget.txt \
| sed 's/\/\.wget\.txt$//g ; s/^www\.//g' \
| sort -u > /tmp/$$.dirs.txt # attempted downloads
# uncomment this section to avoid looking at active downloads
# ps -ef --forest \
# | grep -v tail \
# | grep wg[e]t \
# | sed 's/^.*https:\/\///g ; s/^.*http:\/\///g ; s/^.* //g ; s/\/.*$//g ; s/^www\.//g' \
# | sort -u > /tmp/$$.wgets.txt # running downloads
# for f in `cat /tmp/$$.wgets.txt`
# do
# grep -v "$f" /tmp/$$.dirs.txt > /tmp/$$.tmp.txt
# mv /tmp/$$.tmp.txt \
# /tmp/$$.dirs.txt
# done # finished or failed
# review wget logs of downloads
if [ -s /tmp/$$.dirs.txt ]
then
for d in `cat /tmp/$$.dirs.txt`
do
popd 2>/dev/null; cd /home/downloads/ # in case we failed to pushd+popd in last pass of loop
echo ""
echo "======= ${d} ======= BEGIN"
head "${d}"/.wget.txt || head www."${d}"/.wget.txt
echo "------- ------- ${d} ------- -------"
tail "${d}"/.wget.txt || tail www."${d}"/.wget.txt
echo "======= ${d} ======= END..."
echo ""
echo -n "D(elete), K(ill), F(dedupe), T(idy), V(iew), R(edownload), N(oop), URL(download): "; read answer
case "$answer" in
d)
rm -rf "${d}"
;;
k)
ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g' # debugging echo
KILL_PID=`ps -ef --forest | grep -v tail | grep wg[e]t | grep "${d}" | sed 's/^[a-z][a-z]* *//g;s/ .*$//g'`
echo "KILL_PID: $KILL_PID"
for signal in hup term kill
do
kill -"$signal" $KILL_PID
sleep 6
done
echo "FINISHED by `whoami`" >> "${d}"/.wget.txt 2>/dev/null || \
echo "FINISHED by `whoami`" >> www."${d}"/.wget.txt # log our killing the download
echo "------- ${d} ------- ..."
tail "${d}"/.wget.txt 2>/dev/null || \
tail www."${d}"/.wget.txt # updated log end
echo "======= ${d} ======= END"
pushd "${d}" 2>/dev/null || pushd www."${d}" \
&& touch index.html HEADER.html \
&& cat index.html >> HEADER.html \
&& rm index.html \
&& yes|fdedupe \
&& tree -d > .tree.txt \
&& dudir > .du.txt \
&& vi .??* \
&& mv .wget.txt{,-} \
&& popd \
|| echo "ERROR CODE: $?"
;;
f) pushd "${d}" 2>/dev/null || pushd www."${d}" \
&& touch index.html HEADER.html \
&& cat index.html >> HEADER.html \
&& rm index.html \
&& yes|fdedupe \
&& tree -d > .tree.txt \
&& dudir > .du.txt \
&& vi .??* \
&& mv .wget.txt{,-} \
&& popd \
|| echo "ERROR CODE: $?"
;;
t) pushd "${d}" 2>/dev/null || pushd www."${d}" \
&& touch index.html HEADER.html \
&& cat index.html >> HEADER.html \
&& rm index.html \
&& name_tidy -r \
&& tree -d > .tree.txt \
&& dudir > .du.txt \
&& vi .??* \
&& mv .wget.txt{,-} \
&& popd \
|| echo "ERROR CODE: $?"
;;
v) firefox "${d}" < /dev/null >& /dev/null & disown %1 # view the website download directory
;;
n) echo "no-op... ${d}"
;;
"") echo "no-op... ${d}"
;;
r) download "${d}" \
&& echo "redownloading... ${d}" \
|| echo "ERROR CODE: $?"
;;
*) download "$answer" \
&& echo "downloading... $answer" \
|| echo "ERROR CODE: $?"
;;
esac
done
fi
exit $? # Pau for Now
# CODE DEVELOPMENT IN PROGRESS:
# We wish to review all webpages stored on computer,
# those already viewed being logged into ~/.dush.webpages.txt
# so as to not re-view them, and
# finding all by extension .htm, .html, .mht
# which we'll track in `/.webpages.txt
# we may wish to recycle code from similarly purposed script "images".
syntax highlighted by Code2HTML, v. 0.9.1