use once_cell::sync::Lazy;
use regex::Regex;
const MAX_SIGNATURE_LINES: usize = 10;
const MAX_CORPORATE_SIGNATURE_LINES: usize = 60;
static SIGNATURE_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
[
r"(?m)^-- \s*$",
r"(?m)^--\s*$",
r"(?mi)^Sent from my ",
r"(?mi)^Sent from Mail for ",
r"(?mi)^Sent from Outlook",
r"(?mi)^Sent via ",
r"(?mi)^Gesendet von ", r"(?mi)^Envoyé de mon ", r"(?mi)^Enviado desde mi ", r"(?mi)^Get Outlook for ",
]
.iter()
.filter_map(|pattern| Regex::new(pattern).ok())
.collect()
});
static SIGNOFF_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
[
r"(?mi)^(Best( regards)?|Kind regards|Regards|Cheers|Thanks|Thank you|Sincerely|Yours truly|Warm regards|With thanks|Many thanks|All the best|Take care),?\s*$",
r"(?mi)^(Mit freundlichen Grüßen|Beste Grüße|Viele Grüße|Liebe Grüße|MfG),?\s*$", r"(?mi)^(Cordialement|Bien cordialement|Merci|Cdlt),?\s*$", r"(?mi)^(Saludos|Atentamente|Gracias),?\s*$", ]
.iter()
.filter_map(|pattern| Regex::new(pattern).ok())
.collect()
});
pub fn extract_signature(body: &str) -> (String, Option<String>) {
let lines: Vec<&str> = body.lines().collect();
if lines.is_empty() {
return (body.to_string(), None);
}
for pattern in SIGNATURE_PATTERNS.iter() {
if let Some(m) = pattern.find(body) {
let remaining_lines = body[m.start()..].lines().count();
if remaining_lines <= MAX_SIGNATURE_LINES {
let text = body[..m.start()].to_string();
let sig = body[m.start()..].trim().to_string();
return (text, Some(sig));
}
}
}
let search_window = lines.len().saturating_sub(MAX_CORPORATE_SIGNATURE_LINES);
for (i, _line) in lines[search_window..].iter().enumerate() {
let abs_idx = search_window + i;
for pattern in SIGNOFF_PATTERNS.iter() {
if let Some(line_text) = lines.get(abs_idx) {
if pattern.is_match(line_text) {
let remaining = &lines[abs_idx..];
if looks_like_signature(remaining) {
let text = lines[..abs_idx].join("\n");
let sig = remaining.join("\n");
return (text, Some(sig));
}
}
}
}
}
(body.to_string(), None)
}
fn looks_like_signature(lines: &[&str]) -> bool {
if lines.is_empty() || lines.len() > MAX_CORPORATE_SIGNATURE_LINES {
return false;
}
let all_short = lines.iter().all(|l| l.len() < 200);
let has_content = lines.iter().skip(1).any(|l| !l.trim().is_empty());
if !all_short || !has_content {
return lines.len() <= 2;
}
if lines.len() <= MAX_SIGNATURE_LINES {
return true;
}
has_corporate_signature_markers(lines)
}
fn has_corporate_signature_markers(lines: &[&str]) -> bool {
let joined = lines.join("\n");
let has_phone = PHONE_PATTERN.is_match(&joined);
let has_email = EMAIL_PATTERN.is_match(&joined);
let has_url =
joined.contains("http://") || joined.contains("https://") || joined.contains("www.");
let marker_count = has_phone as u8 + has_email as u8 + has_url as u8;
marker_count >= 2
}
static PHONE_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"\+?\d[\d\s\-()]{6,}").unwrap());
static EMAIL_PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_standard_delimiter() {
let body = "Main content here.\n\n-- \nJohn Doe\nCEO, Acme Corp\n+1 555-0123\n";
let (text, sig) = extract_signature(body);
assert!(text.contains("Main content here."));
assert!(sig.is_some());
assert!(sig.unwrap().contains("John Doe"));
}
#[test]
fn test_sent_from_mobile() {
let body = "Quick reply.\n\nSent from my iPhone\n";
let (text, sig) = extract_signature(body);
assert!(text.contains("Quick reply."));
assert!(sig.is_some());
assert!(sig.unwrap().contains("Sent from my iPhone"));
}
#[test]
fn test_signoff_pattern() {
let body =
"Let me know if you have questions.\n\nBest regards,\nAlice Smith\nVP Engineering\n";
let (text, sig) = extract_signature(body);
assert!(text.contains("Let me know"));
assert!(sig.is_some());
let sig_text = sig.unwrap();
assert!(sig_text.contains("Best regards"));
assert!(sig_text.contains("Alice Smith"));
}
#[test]
fn test_no_signature() {
let body = "Just a plain message with no signature.\n\nSecond paragraph too.\n";
let (text, sig) = extract_signature(body);
assert_eq!(text, body);
assert!(sig.is_none());
}
#[test]
fn test_german_signoff() {
let body = "Bitte senden Sie die Unterlagen.\n\nMit freundlichen Grüßen\nHans Schmidt\n";
let (text, sig) = extract_signature(body);
assert!(text.contains("Bitte senden"));
assert!(sig.is_some());
}
#[test]
fn test_dash_delimiter_without_space() {
let body = "Here is my response.\n\n--\nBob Jones\nbob@company.com\n";
let (text, sig) = extract_signature(body);
assert!(text.contains("Here is my response."));
assert!(sig.is_some());
}
#[test]
fn test_german_beste_gruesse_signoff() {
let body = "Hier ist meine Antwort.\n\nBeste Grüße,\nThomas\n";
let (text, sig) = extract_signature(body);
assert!(text.contains("Hier ist meine Antwort."));
assert!(sig.is_some());
assert!(sig.unwrap().contains("Beste Grüße"));
}
#[test]
fn test_corporate_signature_with_markers() {
let body = "Message content.\n\nBest regards,\nJane Doe\nCTO\n\nAcme Corp\n123 Main St\n+1 555-0100\njane@acme.com\nhttps://acme.com\n\nDisclaimer text.\nMore legal.\nLine 3.\nLine 4.\nLine 5.\nLine 6.\n";
let (text, sig) = extract_signature(body);
assert!(text.contains("Message content."));
assert!(sig.is_some());
let sig_text = sig.unwrap();
assert!(sig_text.contains("Best regards"));
assert!(sig_text.contains("Acme Corp"));
}
#[test]
fn test_long_block_without_markers_not_stripped() {
let mut body = "Message content.\n\nBest regards,\nSomeone\n".to_string();
for i in 0..20 {
body.push_str(&format!("Random line {i}\n"));
}
let (text, sig) = extract_signature(&body);
assert!(text.contains("Message content."));
assert!(
sig.is_none(),
"should not strip long block without corporate markers"
);
}
}