#!/bin/bash
#                       /usr/local/bin/fdedupe
#  http://crystalfaeries.net/posix/bin/fdedupe
# celeste:crystalfaery FDEDUPE 2021-04-01 15:56:24+00:00
# Replace non-zero duplicate files with hard links.
#	WARNING: we use name_tidy to clean-up file names to be handled by fdupes.
#	 EITHER: read and understand the powers and dangers of name_tidy,
#	     OR: make sure you have a full backup of all files and directories and subdirectories,
#	    AND: including those symlinked if you spec '-L' (or even NOT until we debug the auto-link-follow!)

# RECOMMENDED USAGE:
#	cd	[working directory tree to be de-duplicated]	# DANGER, bug always follows symlinks?
#	fdedupe		# first execution will prompt you to name_tidy, answer "yes".
#   yes|fdedupe		# how to script the built-in name_tidy (better be sure of your target directory!)
#	fdedupe -f	# 2nd through nth executions "force" dedupe without name_tidy (already done once).

# fdedupe should be extended two ways:
# 1. a Bridge	option (-b) where arguments are not separate fdedupe operations, but all bridged together in one fdedupe operation
# 2. An Exclude	option (-e) which excludes some subdirectories of the top level directory being fdeduped

# fdedupe locks to prevent more than one instance simultaneously working on any given filesystem.

let help=22	# line# this line - 1

#	CONFIGURATION:
let lockretry=32767
lockfile=/run/lock/$(basename $0).$(echo $(pwd)|sed 's/\//_/g')

#	DEFAULTS
    true=0			# POSIX
  force=""			# default to prompting
symlink=""			# default to NOT FOLLOW symlinks
if [[ $# -ne 0 ]]
then	# arguments... we have arguments to this function
	if [ "$1" == "--version" ]
	then
		head -n 4 $0 | tail -n 1
		exit
	fi
	if [ "$1" == "-v" ]
	then
		head -n 4 $0 | tail -n 1
		exit
	fi
	if [ "$1" == "--help" ]
	then
		head -n $help $0
		exit
	fi
	if [ "$1" == "-h" ]
	then
		head -n $help $0
		exit
	fi


	if [ "$1" == "-f" ]
	then
		force="f"	# force dedupe without prompts
		shift
	fi
	if [ "$1" == "-L" ]
	then
		symlink="L"	# follow symlinks
		shift
	fi
	if [ "$1" == "-f" ]
	then
		force="f"	# force dedupe without prompts
		shift
	fi
	if [ "$1" == "-Lf" ]
	then
		force="f"	# force dedupe without prompts
		symlink="L"	# follow symlinks
		shift
	fi
	if [ "$1" == "-fL" ]
	then
		force="f"	# force dedupe without prompts
		symlink="L"	# follow symlinks
		shift
	fi
	if [ "$1" == "" ]
	then				# all arguments were expected and handled
		:
	else				# an argument wasn't expected, so punt it
		echo	>&2	"As $0 is currently coded, it only works on cwd (current working directory)."
		echo	>&2	"I agree it would be good for $0 to take arguments."
		echo	>&2	"Currently the only arguments accepted are:"
		echo	>&2	"'-L' to follow symlinks."
		echo	>&2	"'-f' to force without prompts."
		echo -n	>&2	"Do you volunteer to do shell hacking today?:"
		read answer
		case $answer in
		y*)	
			vi	$0	# what, you expected me to code $EDITOR instead of hard coding "vi"? :-)
					# just for laughs, on this (K)ubuntu (Studio) edition, what is it set to?
					# not set at all... <sarcasm>yay</sarcasm> modern "Desktop" software which has become all GUI.
			exit	$?	# Yay :-)
			;;
		*)	
					# most probably the intent was to ONLY work on the arguments and NOT work on the current working directory
					# therefore, the worst thing we could do is proceed to do the cwd.
			echo	>&2	 "I am aborting because I do not beleive you intended me to work on `pwd`."
			exit	-1	# well, technically it is an error, no?
			;;
		esac
	fi
fi

# install ERROR / EXIT HANDLER:
let exitval=0 # default to success
function cleanup {
	# remove temporary files:
	for f in .fdupes.?.$$ /tmp/fdupes.$$
	do  # spawn a ReMove process for each of our temporary files so we can exit soon.
	    if [ -f	 "${f}" ]
	    then
		nohup rm "${f}"	</dev/null >&/dev/null & disown %1 # in the background
	    fi
	done

	# remove lock file:
	filelock -u -r ${lockretry} ${lockfile}
	let	    exitval=$?
	if	[[ $exitval -ne 0 ]]
	then	# ERROR unlocking
		echo "$0: error $exitval unlocking $1" 1>&2
	fi
	exit	$exitval
}
trap cleanup 0 1 2 15 # activate error handler
filelock -l -r ${lockretry} ${lockfile}
let	    exitval=$?
if	[[ $exitval -ne 0 ]]
then	# ERROR locking
	echo "$0: error $exitval locking $1" 1>&2
	exit	$exitval
fi

#	can I make hard links in this filesystem?
touch	.fdupes.1.$$			|| exit -2
ln	.fdupes.1.$$ .fdupes.2.$$	|| exit -3
rm	.fdupes.1.$$ .fdupes.2.$$	|| exit -4

#	we must cannonicalize the file names themselves
if	[ "f" != "$force" ]
then
	echo -n	"$0 is too stupid to handle weird filenames, so it wants to do a recursive name_tidy on `pwd`, OK?:"	>&2
	read answer
	case $answer in
	y*)	
	#	/usr/local/bin/name_tidy -r$force$symlink # broken code always errors || exit -5
		/usr/local/bin/name_tidy -r
		let return_code=$?
		if [ $return_code -ne 0 ]
		then
			echo "$0 in `pwd` called '/usr/local/bin/name_tidy -r' which returned $return_code"	>&2
			exit $return_code
		fi
		;;
	*)	
		exit	-6
		;;
	esac
#	until such time as we rewrite the code below to handle inconvenient filenames
# else
	#		/usr/local/bin/name_tidy -r$force$symlink # broken code always errors || exit -5
# experimental bypass:	/usr/local/bin/name_tidy -r
fi

#	can I make temporary files? (and set-up entry to the while loop (yay, recycled variables))
echo $$ >	/tmp/fdupes.$$		|| exit -7
#	deduplicate files in this directory hierarchy
while [ -s	/tmp/fdupes.$$ ]
do
	#	find the duplicate files
	nice ionice -c3 fdupes -1 -r . 2>/dev/null	| sed 's/ $//	; s/ /	/ ; s/ /	/' > /tmp/fdupes.$$
	#	count the number of pairs
	let pairs=`wc /tmp/fdupes.$$	| sed 's/^ *//	; s/ .*$//'`
	#	are we there yet?
	if [ $pairs -eq 0 ]
	then
		break
	fi
	let nop=0	#	default detect no operation performed by loop
	let i=0		#	iterate through the list of pairs
	while [ $i -lt $pairs ]
	do
		let i+=1
		file1="`tail -n +$i /tmp/fdupes.$$	| head -n 1	| cut -f 1`"
		file2="`tail -n +$i /tmp/fdupes.$$	| head -n 1	| cut -f 2`"
		if [ -s "$file1" ]
		then	#	We require a non-zero file1
			if [ -s "$file2" ]
			then	#	We require a non-zero file2
				mv	"$file2"	"$file2"-	|| exit -8
	      echo $0 at `now`: ln	"$file1"	"$file2"	>&2	#	replace the original with hardlink to duplicate
				ln	"$file1"	"$file2"	|| exit -9
				nice ionice -c3 diff	"$file2"	"$file2"-	&& \
				rm	-rf		"$file2"-	# if the backup = current then remove the backup
				let nop+=1	# We worked on a non-zero file
			fi
		fi
	done
	if [ $nop ]
	then
		break
	fi
done
exit	$exitval

