#!/bin/bash
#                       /usr/local/bin/mbox2dir
# https://crystalfaeries.net/posix/bin/mbox2dir
# celeste:crystalfaery EMAILS_BY_DATE 2016-10-09 00:24:26+00:00
# Parses the mailboxfile into one file per e-mail message,
# each named as the subject.date+time of the e-mail, in the directory.
# first  argument is the mailbox file to parse
# second argument is the directory to work in
# KNOWN BUG: WE DO NOT YET HANDLE TIMEZONE IN SETTING FILE TIMESTAMPS! ooopsie

mailboxfile="/var/mail/$(/usr/bin/whoami)"						# default mailbox file is user's mail spool
if [ $# -ne 0 ]
then
	mailboxfile="${1}"								# first argumentis mailbox file
	shift
fi

directory="$(/bin/mktemp -d)"	|| exit 1						# default mailfile directory is temporary
if [ $# -ne 0 ]
then
	directory="${1}"								# second argument is mailfile directory
	shift
fi
/bin/mkdir -p	"${directory}"	|| exit 2						# create mailfile directory if necessary
cd		"${directory}"	|| exit 2						# change to mailfile directory

/usr/local/bin/splitmbox	"${mailboxfile}"	"$(/bin/pwd)"			# parse the file into multiple files 00..nn

for f in *										# for each file split from the .mbox file
do
	for i in {1..255}								# scan the e-mail header
	do
		line="$(/usr/bin/tail -n +${i} ${f} | /usr/bin/head -n 1)"		# the next line of the e-mail
		if [ "${line}" == "" ]							# is this the blank line separating header from body?
		then
			break								# we found the separator between header and body
		fi
		date="$(echo ${line} | /bin/grep '^Date: ' | sed 's/^Date: //')"	# is this the first Date: line in the e-mail file?
		if [ "${date}" != "" ]							# is this the first Date: line in the e-mail file?
		then
			first="$(	echo ${date} | cut -d\  -f1)"
			second="$(	echo ${date} | cut -d\  -f2)"
			case "${second}" in
			1)	day="01";;
			2)	day="02";;
			3)	day="03";;
			4)	day="04";;
			5)	day="05";;
			6)	day="06";;
			7)	day="07";;
			8)	day="08";;
			9)	day="09";;
			*)	day="${second}";;
			esac
			third="$(	echo ${date} | cut -d\  -f3)"
			case "${third}" in
			Jan)	month="01"
				;;
			Feb)	month="02"
				;;
			Mar)	month="03"
				;;
			Apr)	month="04"
				;;
			May)	month="05"
				;;
			Jun)	month="06"
				;;
			Jul)	month="07"
				;;
			Aug)	month="08"
				;;
			Sep)	month="09"
				;;
			Oct)	month="10"
				;;
			Nov)	month="11"
				;;
			Dec)	month="12"
				;;
			*)	month="${third}"
				;;
			esac
			fourth="$(	echo ${date} | cut -d\  -f4)"
			rest="$(	echo ${date} | cut -d\  -f5- | sed 's/:/-/g;s/ //g;s/(.*$//')"
			file="${fourth}"-"${month}"-"${day}"."${rest}"
			# first occurrence of "Subject:" should be from header of e-mail, not forwarded content inside body
			subject="$( grep '^Subject:' "${f}" | head -n 1 | tr -cd '[:print:]' | tr '[:upper:]' '[:lower:]' | \
			sed ' s/\$/federal_reserve_notes_price./g; s/ /_/g; s/(//g; s/)//g; s/no_subject//g; s/subject//g; s/:/./g; s/^\.//g; s/^_//g; s/\//+/g; s/^fwd\._//g; s/^re\.//g; s/^_//g; s/\.\././g; s/_+_/+/g; s/_\././g ; s/\._/./g; s/_$//g' )"
			if [ "${subject}" == "" ]
			then
				subject="null"
			fi
			mv "${f}" "${subject}"."${file}".mbox			# rename the file by its Subject: and Date:
			touch -t $(echo "${file}"|sed 's/-//g;s/\.//g'|cut -c 1-12).$(echo "${file}"|sed 's/-//g;s/\.//g'|cut -c 13-14)	"${subject}"."${file}".mbox # set metadata
			break
		fi
	done
done
/usr/local/bin/name_tidy
touch .	# ensure directory date is later than last e-mail (unless TZ issues?)


syntax highlighted by Code2HTML, v. 0.9.1