use crate::libs::{
args,
config::Config,
imap::{ids_list_to_collapsed_sequence, Imap},
render::{new_renderer, Renderer},
};
use clap::Args;
use derive_more::Display;
use exn::{OptionExt as _, Result, ResultExt as _};
use imap::types::{Fetches, Uid};
use regex::Regex;
use std::collections::{HashMap, HashSet};
#[derive(Debug, Display)]
pub struct DuError(String);
impl std::error::Error for DuError {}
#[derive(Args, Debug, Clone)]
#[command(
about = "Remove duplicate emails",
long_about = "This will cleanup your mailboxes of duplicate emails.
It will search each mailbox and if a message with the same message id is found,
it will delete the duplicates."
)]
pub struct FindDups {
#[clap(flatten)]
config: args::Generic,
}
type MyExtra = serde_value::Value;
static MESSAGE_ID_REGEX: std::sync::LazyLock<Regex> = std::sync::LazyLock::new(|| {
#[expect(clippy::unwrap_used, reason = "re is correct")]
Regex::new(r"(?i)Message-ID:\s*(<[^>]+>)")
.unwrap()
});
impl FindDups {
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "trace", skip(self), err(level = "info"))
)]
pub fn execute(&self) -> Result<(), DuError> {
let config =
Config::<MyExtra>::new(&self.config).or_raise(|| DuError("config".to_owned()))?;
#[cfg(feature = "tracing")]
tracing::trace!(?config);
let mut renderer = new_renderer(
if config.base.dry_run {
"Mailbox Deduplication DRY-RUN"
} else {
"Mailbox Deduplication"
},
"[{0}] {1} {2}",
&["Mailbox", "Dups", "Sequence"],
)
.or_raise(|| DuError("new renderer".to_owned()))?;
let mut imap = Imap::connect(&config).or_raise(|| DuError("connect".to_owned()))?;
for (mailbox, _result) in imap.list().or_raise(|| DuError("imap list".to_owned()))? {
self.process(&mut imap, &mut renderer, &mailbox)
.or_raise(|| DuError("process".to_owned()))?;
}
Ok(())
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "trace", skip(self, imap, renderer), err(level = "info"))
)]
fn process(
&self,
imap: &mut Imap<MyExtra>,
renderer: &mut Box<dyn Renderer>,
mailbox: &str,
) -> Result<(), DuError> {
let mbx = imap
.session
.examine(mailbox)
.or_raise(|| DuError(format!("imap examine {mailbox:?} failed")))?;
if mbx.exists < 2 {
return Ok(());
}
let messages = imap
.session
.uid_fetch("1:*", "(BODY.PEEK[HEADER.FIELDS (MESSAGE-ID)])")
.or_raise(|| DuError("imap uid fetch failed".to_owned()))?;
let duplicates =
Self::find_duplicates(&messages).or_raise(|| DuError("find duplicates".to_owned()))?;
if !duplicates.is_empty() {
let duplicate_set = ids_list_to_collapsed_sequence(&duplicates);
if !self.config.dry_run {
imap.session
.select(mailbox)
.or_raise(|| DuError(format!("imap select {mailbox:?} failed")))?;
imap.session
.uid_store(&duplicate_set, "+FLAGS (\\Deleted)")
.or_raise(|| DuError("imap uid store failed".to_owned()))?;
imap.session
.close()
.or_raise(|| DuError("imap close failed".to_owned()))?;
}
renderer
.add_row(&[&mailbox, &duplicates.len(), &duplicate_set])
.or_raise(|| DuError("renderer add row".to_owned()))?;
}
Ok(())
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "trace", skip(messages), ret, , err(level = "info"), fields(messages = messages.len()))
)]
fn find_duplicates(messages: &Fetches) -> Result<HashSet<Uid>, DuError> {
let mut message_ids: HashMap<String, Vec<Uid>> = HashMap::new();
for message in messages.iter() {
if let Some(id) = Self::parse_message_id(message.header()) {
message_ids
.entry(id)
.or_default()
.push(message.uid.ok_or_raise(|| DuError("The server does not support the UIDPLUS capability, and all our operations need UIDs for safety".to_owned()))?);
}
}
let mut duplicates = HashSet::<Uid>::new();
for ids in message_ids.values() {
if ids.len() > 1 {
#[expect(clippy::indexing_slicing, reason = "we just tested it's ok")]
duplicates.extend(&ids[1..]);
}
}
Ok(duplicates)
}
#[cfg(test)]
pub(super) fn parse_message_id_pub(header: Option<&[u8]>) -> Option<String> {
Self::parse_message_id(header)
}
#[cfg_attr(
feature = "tracing",
tracing::instrument(level = "trace", skip(header), ret, fields(header = ?header.map(|h| std::str::from_utf8(h))))
)]
fn parse_message_id(header: Option<&[u8]>) -> Option<String> {
let header_text = std::str::from_utf8(header?).ok()?;
let cleaned_headers = header_text;
let s = MESSAGE_ID_REGEX
.captures(cleaned_headers)?
.get(1)
.map(|m| m.as_str().to_owned())?;
(s.len() > 4).then_some(s)
}
}
#[cfg(test)]
mod tests {
#![expect(clippy::expect_used, reason = "tests")]
use super::*;
use crate::{
libs::args,
test_helpers::{MockExchange, MockServer},
};
fn test_base() -> crate::libs::base_config::BaseConfig {
crate::libs::base_config::BaseConfig::new(&args::Generic {
server: Some("127.0.0.1".to_owned()),
username: Some("test".to_owned()),
password: Some("test".to_owned()),
..Default::default()
})
.expect("test base config")
}
#[test]
fn parse_message_id_valid() {
let header = b"Message-ID: <abc123@example.com>\r\n\r\n";
let result = FindDups::parse_message_id_pub(Some(header));
assert_eq!(result, Some("<abc123@example.com>".to_owned()));
}
#[test]
fn parse_message_id_case_insensitive() {
let header = b"message-id: <ABC@EXAMPLE.COM>\r\n\r\n";
let result = FindDups::parse_message_id_pub(Some(header));
assert_eq!(result, Some("<ABC@EXAMPLE.COM>".to_owned()));
}
#[test]
fn parse_message_id_missing() {
let header = b"Subject: hello\r\n\r\n";
let result = FindDups::parse_message_id_pub(Some(header));
assert_eq!(result, None);
}
#[test]
fn parse_message_id_none_input() {
assert_eq!(FindDups::parse_message_id_pub(None), None);
}
#[test]
fn process_skips_mailbox_with_one_message() {
let server = MockServer::start(
&[],
vec![MockExchange::ok(vec![
"* 1 EXISTS\r\n".into(),
"* 0 RECENT\r\n".into(),
])],
);
let base = test_base();
let mut imap: Imap<serde_value::Value> =
Imap::connect_base_on_port(&base, server.port).expect("connect");
let find_dups = FindDups {
config: args::Generic::default(),
};
let mut renderer = new_renderer("test", "{0}", &["col"]).expect("renderer");
let result = find_dups.process(&mut imap, &mut renderer, "INBOX");
drop(imap);
server.join();
assert!(result.is_ok(), "expected Ok, got: {result:?}");
}
#[test]
fn process_dry_run_finds_duplicates() {
use crate::test_helpers::header_fetch_line;
let server = MockServer::start(
&[],
vec![
MockExchange::ok(vec!["* 3 EXISTS\r\n".into(), "* 0 RECENT\r\n".into()]),
MockExchange::ok(vec![
header_fetch_line(1, 1, "<unique@example.com>"),
header_fetch_line(2, 2, "<dup@example.com>"),
header_fetch_line(3, 3, "<dup@example.com>"),
]),
],
);
let base = test_base();
let mut imap: Imap<serde_value::Value> =
Imap::connect_base_on_port(&base, server.port).expect("connect");
let find_dups = FindDups {
config: args::Generic {
dry_run: true,
..Default::default()
},
};
let mut renderer = new_renderer("test", "{0}", &["col"]).expect("renderer");
let result = find_dups.process(&mut imap, &mut renderer, "INBOX");
drop(imap);
server.join();
assert!(result.is_ok(), "expected Ok, got: {result:?}");
}
}