-- Remove Duplicate Messages -- https://c-command.com/scripts/eaglefiler/remove-duplicate-messages -- Summary: Removes duplicate e-mail messages from a mailbox. -- Requires: EagleFiler 1.6.6 -- Install Location: ~/Library/Scripts/Applications/EagleFiler/ -- Last Modified: 2022-08-11 tell application "EagleFiler" try -- requires EagleFiler 1.7 set _records to current records of browser window 1 on error set _records to selected records of browser window 1 end try set _mailboxes to {} repeat with _record in _records if my isMailboxRecord(_record) then copy _record to end of _mailboxes my removeDuplicatesFromMailboxRecord(_record) else my showAlert("Skipping File", "Skipping “" & _record's filename & "” because it is not a mailbox.") end if end repeat if _mailboxes is {} then my showAlert("You did not select any mailboxes.", "Please click on Records or a folder in the source list (at the left) and then select one or more mailboxes in the records list (at the top-right).") end if end tell on isMailboxRecord(_record) tell application "EagleFiler" if _record's universal type identifier is "com.c-command.mail.mbox" then return true return false end tell end isMailboxRecord on removeDuplicatesFromMailboxRecord(_record) tell application "EagleFiler" set _file to _record's file set _path to _file's POSIX path set _filename to _record's filename set {_newPath, _count} to my removeDuplicatesFromPath(_path) if _count is 0 then my showAlert("No Duplicates Found", "There were no duplicate messages in “" & _filename & "”.") return end if tell _record's library document set {_newRecord} to import files {_newPath} my copyMetadata(_record, _newRecord) set container of _record to trash set _newRecord's filename to _filename -- Wasn't possible to set it correctly earlier. end tell my showAlert("Duplicates Removed", "Removed " & _count & " duplicate messages from “" & _filename & "”.") end tell end removeDuplicatesFromMailboxRecord on removeDuplicatesFromPath(_sourcePath) set _tempFolder to my makeTemporaryFolder() set _cachePath to _tempFolder & "/" & "idcache" set _destPath to _tempFolder & "/" & "NewMailbox.mbox" set _logPath to _tempFolder & "/" & "Log.log" -- cat Old | perl -p -e 's/\r\n/\n/g' | perl -p -e 's/\r/\n/g' | formail -b -e -q- -Y -D 104857600 idcache -s > New set _script to "cat " & _sourcePath's quoted form set _script to _script & " | perl -p -e 's/\\r\\n/\\n/g'" set _script to _script & " | perl -p -e 's/\\r/\\n/g'" set _script to _script & " | " & my formailPath() & " -b -e -q- -Y" set _script to _script & " -D 104857600 " & _cachePath's quoted form -- 100 MB set _script to _script & " -s > " & _destPath's quoted form set _script to _script & " 2> " & _logPath's quoted form with timeout of 24 * 60 * 60 seconds do shell script _script end timeout set _count to my countDuplicatesFoundFromLogPath(_logPath) my reportErrorsFromLogPath(_logPath) return {_destPath, _count} end removeDuplicatesFromPath on countDuplicatesFoundFromLogPath(_logPath) set _script to "grep -c \"^formail: Duplicate key found:\" " & _logPath's quoted form try set _stdout to do shell script _script on error number 1 return 0 end try return _stdout as number end countDuplicatesFoundFromLogPath on reportErrorsFromLogPath(_logPath) -- Skip lines starting with "<" becuase they are probably a wrapped message ID set _script to "grep -vEc \"^(\\s*<|formail: Duplicate key found:)\" " & _logPath's quoted form try set _stdout to do shell script _script set _errorCount to _stdout as number on error set _errorCount to 0 end try if _errorCount > 0 then do shell script "open -a Console " & _logPath set _title to "Possible Errors Reported" set _message to (_errorCount as string) & " unexpected log entries were found when processing the mailbox. Please review them in Console." with timeout of 24 * 60 * 60 seconds display alert _title message _message buttons {"Cancel", "Ignore the Errors"} cancel button 1 end timeout end if end reportErrorsFromLogPath on makeTemporaryFolder() return do shell script "mktemp -d -t 'EFRemoveDuplicateMessages'" end makeTemporaryFolder on copyMetadata(_source, _dest) tell application "EagleFiler" set _sourceURL to _source's source URL set source URL of _dest to _sourceURL set container of _dest to _source's container set _noteText to _source's note text set note text of _dest to _noteText set _tags to _source's assigned tags set assigned tags of _dest to _tags set _title to _source's title set title of _dest to _title set _fromName to _source's from name set from name of _dest to _fromName set _labelIndex to _source's label index set label index of _dest to _labelIndex set _creationDate to _source's creation date set creation date of _dest to _creationDate set _modificationDate to _source's modification date set modification date of _dest to _modificationDate end tell end copyMetadata on showAlert(_title, _message) with timeout of 24 * 60 * 60 seconds display alert _title message _message buttons {"Cancel", "OK"} cancel button 1 end timeout end showAlert on formailPath() -- Prior to Mac OS X 10.11, it's also available at /usr/bin/formail. set _path to path to application "EagleFiler" set _posixPath to POSIX path of _path return _posixPath & "Contents/Frameworks/WashFramework.framework/Versions/A/formail" end formailPath