Remove Duplicate Messages

Summary: Removes duplicate e-mail messages from a mailbox.
Requires: EagleFiler 1.6.6
Install Location: ~/Library/Scripts/Applications/EagleFiler/
Last Modified: 2026-02-08

Description

This script scans the selected mailboxes in the records list or the source list. (Prior to EagleFiler 1.7, the mailboxes must be selected in the records list; with 1.7 or later, the script will use whichever list has focus.) If a mailbox contains duplicate copies of the same e-mail message, the script creates a new mailbox with the duplicate e-mails removed. (If your duplicates are spread across multiple mailboxes or are stored in .eml files, you should first merge them.) It then moves the original mailbox to the trash. This script has the following limitations:

Mailboxes of a few MB will be processed very quickly. For large mailboxes, the script may take a long time to run. It does not report on the progress while it’s running. If you are wondering whether the script is still doing anything, you can watch the “formail” process in Activity Monitor. To help determine what performance you should expect, here are some processing times from a 2012 Retina MacBook Pro:

Installation Instructions · Download in Compiled Format · Download in Text Format

Script

tell application "EagleFiler"
    try
-- requires EagleFiler 1.7
        set
_records to current records of browser window 1
    on error
        set
_records to selected records of browser window 1
    end try
    set
_mailboxes to {}
    repeat with
_record in _records
        if my
isMailboxRecord(_record) then
            copy
_record to end of _mailboxes
            my
removeDuplicatesFromMailboxRecord(_record)
        else
            my
showAlert("Skipping File", "Skipping “" & _record's filename & "” because it is not a mailbox.")
        end if
    end repeat
    if
_mailboxes is {} then
        my
showAlert("You did not select any mailboxes.", "Please click on Records or a folder in the source list (at the left) and then select one or more mailboxes in the records list (at the top-right).")
    end if
end tell

on
isMailboxRecord(_record)
    tell
application "EagleFiler"
        if
_record's universal type identifier is "com.c-command.mail.mbox" then return true
        return
false
    end tell
end
isMailboxRecord

on
removeDuplicatesFromMailboxRecord(_record)
    tell
application "EagleFiler"
        set
_file to _record's file
        set
_path to _file's POSIX path
        set
_filename to _record's filename
        set {
_newPath, _count} to my removeDuplicatesFromPath(_path)
        if
_count is 0 then
            my
showAlert("No Duplicates Found", "There were no duplicate messages in “" & _filename & "”.")
            return
        end if
        with timeout of 24 * 60 * 60
seconds
            tell
_record's library document
                set {
_newRecord} to import files {_newPath}
                my
copyMetadata(_record, _newRecord)
                set
container of _record to trash
                set
_newRecord's filename to _filename -- Wasn't possible to set it correctly earlier.
            end tell
        end timeout
        my
showAlert("Duplicates Removed", "Removed " & _count & " duplicate messages from “" & _filename & "”.")
    end tell
end
removeDuplicatesFromMailboxRecord

on
removeDuplicatesFromPath(_sourcePath)
    set
_tempFolder to my makeTemporaryFolder()
    set
_cachePath to _tempFolder & "/" & "idcache"
    set
_destPath to _tempFolder & "/" & "NewMailbox.mbox"
    set
_logPath to _tempFolder & "/" & "Log.log"
    
-- cat Old | perl -p -e 's/\r\n/\n/g' | perl -p -e 's/\r/\n/g' | formail -b -e -q- -Y -D 104857600 idcache -s > New
    set
_script to "cat " & _sourcePath's quoted form
    set
_script to _script & " | perl -p -e 's/\\r\\n/\\n/g'"
    set
_script to _script & " | perl -p -e 's/\\r/\\n/g'"
    set
_script to _script & " | " & my formailPath() & " -b -e -q- -Y"
    set
_script to _script & " -D 104857600 " & _cachePath's quoted form -- 100 MB
    set
_script to _script & " -s > " & _destPath's quoted form
    set
_script to _script & " 2> " & _logPath's quoted form
    with timeout of 24 * 60 * 60
seconds
        
do shell script _script
    end timeout
    set
_count to my countDuplicatesFoundFromLogPath(_logPath)
    my
reportErrorsFromLogPath(_logPath)
    return {
_destPath, _count}
end
removeDuplicatesFromPath

on
countDuplicatesFoundFromLogPath(_logPath)
    set
_script to "grep -c \"^formail: Duplicate key found:\" " & _logPath's quoted form
    try
        set
_stdout to do shell script _script
    on error
number 1
        return 0
    end try
    return
_stdout as number
end
countDuplicatesFoundFromLogPath

on
reportErrorsFromLogPath(_logPath)
    
-- Skip lines starting with "<" becuase they are probably a wrapped message ID
    set
_script to "grep -vEc \"^(\\s*<|formail: Duplicate key found:)\" " & _logPath's quoted form
    try
        set
_stdout to do shell script _script
        set
_errorCount to _stdout as number
    on error
        set
_errorCount to 0
    end try
    if
_errorCount > 0 then
        
do shell script "open -a Console " & _logPath
        set
_title to "Possible Errors Reported"
        set
_message to (_errorCount as string) & " unexpected log entries were found when processing the mailbox. Please review them in Console."
        with timeout of 24 * 60 * 60
seconds
            
display alert _title message _message buttons {"Cancel", "Ignore the Errors"} cancel button 1
        end timeout
    end if
end
reportErrorsFromLogPath

on
makeTemporaryFolder()
    return
do shell script "mktemp -d -t 'EFRemoveDuplicateMessages'"
end
makeTemporaryFolder

on
copyMetadata(_source, _dest)
    tell
application "EagleFiler"
        set
_sourceURL to _source's source URL
        set
source URL of _dest to _sourceURL
        set
container of _dest to _source's container
        set
_noteText to _source's note text
        set
note text of _dest to _noteText
        set
_tags to _source's assigned tags
        set
assigned tags of _dest to _tags
        set
_title to _source's title
        set
title of _dest to _title
        set
_fromName to _source's from name
        set
from name of _dest to _fromName
        set
_labelIndex to _source's label index
        set
label index of _dest to _labelIndex
        set
_creationDate to _source's creation date
        set
creation date of _dest to _creationDate
        set
_modificationDate to _source's modification date
        set
modification date of _dest to _modificationDate
    end tell
end
copyMetadata

on
showAlert(_title, _message)
    with timeout of 24 * 60 * 60
seconds
        
display alert _title message _message buttons {"Cancel", "OK"} cancel button 1
    end timeout
end
showAlert

on
formailPath()
    
-- Prior to Mac OS X 10.11, it's also available at /usr/bin/formail.
    set
_path to path to application "EagleFiler"
    set
_posixPath to POSIX path of _path
    return
_posixPath & "Contents/Frameworks/WashFramework.framework/Versions/A/formail"
end
formailPath