use sha2::{Digest, Sha256};
const REPLY_PREFIXES: &[&str] = &[
"re",
"fwd",
"fw",
"rv",
"rsv",
"rif",
"tr",
"ant",
"aw",
"antwort",
"antw",
"encaminhar",
"encam",
"回复",
"答复",
];
pub fn normalize_subject(subject: &str) -> String {
let mut s = subject.trim();
loop {
let stripped = strip_one_prefix(s);
if stripped.len() == s.len() {
break;
}
s = stripped.trim();
}
s.trim().to_string()
}
fn strip_one_prefix(s: &str) -> &str {
for prefix in REPLY_PREFIXES {
if let Some(rest) = strip_prefix_with_separator(s, prefix) {
return rest;
}
}
s
}
fn strip_prefix_with_separator<'a>(s: &'a str, prefix: &str) -> Option<&'a str> {
let head = s.get(..prefix.len())?;
if !head.eq_ignore_ascii_case(prefix) {
return None;
}
let rest = &s[prefix.len()..];
let bytes = rest.as_bytes();
let mut idx = 0;
while idx < bytes.len() && (bytes[idx] == b' ' || bytes[idx] == b'\t') {
idx += 1;
}
if idx >= bytes.len() {
return None;
}
if !matches!(bytes[idx], b':' | b'-' | b'|' | b'.' | b'>') {
return None;
}
idx += 1;
Some(&rest[idx..])
}
pub fn synth_thread_id(from_email: &str, subject: &str) -> String {
let normalized = normalize_subject(subject);
let mut hasher = Sha256::new();
hasher.update(b"\x01"); hasher.update(from_email.to_ascii_lowercase().as_bytes());
hasher.update(b"\x00");
hasher.update(normalized.to_lowercase().as_bytes());
let bytes = hasher.finalize();
let mut hex = String::with_capacity(2 * 8 + 6);
hex.push_str("synth:");
for b in &bytes[..8] {
hex.push_str(&format!("{b:02x}"));
}
hex
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn normalize_strips_single_re() {
assert_eq!(normalize_subject("Re: Hello"), "Hello");
}
#[test]
fn normalize_strips_recursive_re() {
assert_eq!(normalize_subject("Re: Re: Re: Hello"), "Hello");
}
#[test]
fn normalize_strips_mixed_case() {
assert_eq!(normalize_subject("RE: re: rE: Hello"), "Hello");
}
#[test]
fn normalize_strips_fwd_variants() {
assert_eq!(normalize_subject("Fwd: x"), "x");
assert_eq!(normalize_subject("FW: x"), "x");
assert_eq!(normalize_subject("Fw: Re: x"), "x");
}
#[test]
fn normalize_strips_spanish_prefixes() {
assert_eq!(normalize_subject("RV: Cotización"), "Cotización");
assert_eq!(normalize_subject("Rv: Re: Cotización"), "Cotización");
}
#[test]
fn normalize_strips_portuguese_prefixes() {
assert_eq!(normalize_subject("Encam: Algo"), "Algo");
assert_eq!(normalize_subject("Encaminhar: Re: Algo"), "Algo");
}
#[test]
fn normalize_strips_german_prefixes() {
assert_eq!(normalize_subject("AW: Hallo"), "Hallo");
assert_eq!(normalize_subject("Antw: AW: Hallo"), "Hallo");
assert_eq!(normalize_subject("Antwort: Hallo"), "Hallo");
}
#[test]
fn normalize_strips_italian_french() {
assert_eq!(normalize_subject("Tr: Bonjour"), "Bonjour");
assert_eq!(normalize_subject("Rif: Salve"), "Salve");
}
#[test]
fn normalize_handles_dash_separator() {
assert_eq!(normalize_subject("Re- Hello"), "Hello");
}
#[test]
fn normalize_handles_pipe_separator() {
assert_eq!(normalize_subject("Re| Hello"), "Hello");
}
#[test]
fn normalize_does_not_strip_word_starting_with_re() {
assert_eq!(
normalize_subject("Reservation: Confirmed"),
"Reservation: Confirmed"
);
}
#[test]
fn normalize_preserves_internal_whitespace() {
assert_eq!(normalize_subject("Re: Hola mundo"), "Hola mundo",);
}
#[test]
fn normalize_preserves_body_case() {
assert_eq!(normalize_subject("Re: COTIZACIÓN"), "COTIZACIÓN");
}
#[test]
fn normalize_empty_input_returns_empty() {
assert_eq!(normalize_subject(""), "");
assert_eq!(normalize_subject(" "), "");
}
#[test]
fn normalize_only_prefixes_returns_empty() {
assert_eq!(normalize_subject("Re: Re:"), "");
assert_eq!(normalize_subject("Fwd: Re: Re:"), "");
}
#[test]
fn synth_id_is_deterministic() {
let a = synth_thread_id("juan@acme.com", "Cotización");
let b = synth_thread_id("juan@acme.com", "Cotización");
assert_eq!(a, b);
}
#[test]
fn synth_id_collapses_reply_prefix_variations() {
let raw = synth_thread_id("juan@acme.com", "Cotización");
let with_re = synth_thread_id("juan@acme.com", "Re: Cotización");
let with_double = synth_thread_id("juan@acme.com", "Re: Re: Cotización");
let with_fwd = synth_thread_id("juan@acme.com", "Fwd: Cotización");
assert_eq!(raw, with_re);
assert_eq!(raw, with_double);
assert_eq!(raw, with_fwd);
}
#[test]
fn synth_id_different_senders_dont_collide() {
let a = synth_thread_id("a@x.com", "Hello");
let b = synth_thread_id("b@x.com", "Hello");
assert_ne!(a, b);
}
#[test]
fn synth_id_different_subjects_dont_collide() {
let a = synth_thread_id("a@x.com", "Hello");
let b = synth_thread_id("a@x.com", "Goodbye");
assert_ne!(a, b);
}
#[test]
fn synth_id_is_case_insensitive_for_email() {
let a = synth_thread_id("Juan@Acme.com", "Cotización");
let b = synth_thread_id("juan@acme.com", "Cotización");
assert_eq!(a, b);
}
#[test]
fn synth_id_is_case_insensitive_for_subject() {
let a = synth_thread_id("a@x.com", "HELLO");
let b = synth_thread_id("a@x.com", "hello");
assert_eq!(a, b);
}
#[test]
fn synth_id_carries_synth_prefix() {
let id = synth_thread_id("a@x.com", "Hello");
assert!(id.starts_with("synth:"));
assert_eq!(id.len(), 22);
}
#[test]
fn synth_id_handles_empty_subject() {
let a = synth_thread_id("a@x.com", "");
let b = synth_thread_id("a@x.com", "");
let c = synth_thread_id("a@x.com", "Hello");
assert_eq!(a, b);
assert_ne!(a, c);
}
}