use serde::Serialize;
use crate::mail::parse::Parsed;
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "specta", derive(specta::Type))]
#[serde(rename_all = "camelCase")]
pub struct SpamReport {
pub score: f32,
pub verdict: SpamVerdict,
pub factors: Vec<SpamFactor>,
}
#[derive(Debug, Clone, Copy, Serialize)]
#[cfg_attr(feature = "specta", derive(specta::Type))]
#[serde(rename_all = "lowercase")]
pub enum SpamVerdict {
Clean,
Suspicious,
LikelySpam,
}
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "specta", derive(specta::Type))]
#[serde(rename_all = "camelCase")]
pub struct SpamFactor {
pub rule: &'static str,
pub score: f32,
pub detail: String,
}
pub fn score(parsed: &Parsed) -> SpamReport {
let mut factors: Vec<SpamFactor> = Vec::new();
let subject = parsed.header_subject.as_deref().unwrap_or("");
let html = parsed.html_body.as_deref().unwrap_or("");
let text = parsed.text_body.as_deref().unwrap_or("");
let body = if text.is_empty() { html } else { text };
let headers = &parsed.headers_json;
let letters: String = subject.chars().filter(|c| c.is_alphabetic()).collect();
if letters.len() >= 4 && letters.chars().all(|c| c.is_uppercase()) {
factors.push(SpamFactor {
rule: "ALL_CAPS_SUBJECT",
score: 1.5,
detail: "Subject is entirely uppercase".into(),
});
}
let bangs = subject.chars().filter(|c| *c == '!').count();
if bangs >= 3 {
factors.push(SpamFactor {
rule: "MANY_EXCLAMATIONS",
score: 1.0,
detail: format!("Subject contains {bangs} '!' characters"),
});
}
let subject_low = subject.to_lowercase();
for phrase in [
"you have won",
"click here",
"act now",
"limited time",
"free money",
"viagra",
"cialis",
"nigerian prince",
"miracle",
"100% free",
"no cost",
"risk-free",
] {
if subject_low.contains(phrase) {
factors.push(SpamFactor {
rule: "SPAMMY_SUBJECT_PHRASE",
score: 1.0,
detail: format!("Subject contains {phrase:?}"),
});
}
}
let has_list_unsub = headers.get("List-Unsubscribe").is_some();
if !has_list_unsub && body.to_lowercase().contains("unsubscribe") {
factors.push(SpamFactor {
rule: "MISSING_LIST_UNSUBSCRIBE",
score: 1.2,
detail: "Body offers to unsubscribe but no List-Unsubscribe header".into(),
});
}
if !html.is_empty() {
let img_count = html.matches("<img").count();
let text_len = text.len();
if img_count >= 3 && text_len < 200 {
factors.push(SpamFactor {
rule: "HIGH_IMAGE_TO_TEXT",
score: 1.5,
detail: format!(
"{img_count} images with only {text_len} chars of plain text"
),
});
}
}
let from = parsed.header_from.as_deref().unwrap_or("");
if let Some(local) = from.split('@').next() {
let local = local.trim_matches(|c: char| c == '<' || c == '>' || c.is_whitespace());
if local.len() >= 16
&& local.chars().filter(|c| c.is_ascii_digit()).count() >= local.len() / 3
{
factors.push(SpamFactor {
rule: "RANDOM_LOOKING_LOCAL_PART",
score: 0.8,
detail: format!("Sender local part {local:?} looks autogenerated"),
});
}
}
if !from.contains('@') {
factors.push(SpamFactor {
rule: "MALFORMED_FROM",
score: 1.5,
detail: format!("From header doesn't look like an address: {from:?}"),
});
}
if let Some(pos) = from.find('<') {
let display = from[..pos].trim().trim_matches('"').to_lowercase();
let bracketed = &from[pos + 1..];
let bracketed = bracketed.trim_end_matches('>');
let domain = bracketed
.split('@')
.nth(1)
.unwrap_or("")
.to_lowercase();
for trusted in ["paypal", "google", "amazon", "apple", "microsoft", "bank"] {
if display.contains(trusted) && !domain.contains(trusted) {
factors.push(SpamFactor {
rule: "DISPLAY_NAME_SPOOF",
score: 3.0,
detail: format!(
"Display name claims {trusted:?}-like brand; domain is {domain:?}"
),
});
}
}
}
let total: f32 = factors.iter().map(|f| f.score).sum();
let verdict = if total >= 5.0 {
SpamVerdict::LikelySpam
} else if total >= 2.0 {
SpamVerdict::Suspicious
} else {
SpamVerdict::Clean
};
SpamReport { score: total, verdict, factors }
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn p(subject: &str, body: &str, from: &str, headers: serde_json::Value) -> Parsed {
Parsed {
header_from: Some(from.into()),
header_to: None,
header_cc: None,
header_subject: Some(subject.into()),
message_id: None,
in_reply_to: None,
text_body: Some(body.into()),
html_body: None,
has_text: true,
has_html: false,
headers_json: headers,
attachments: Vec::new(),
}
}
#[test]
fn clean_email() {
let r = score(&p("Hello", "How are you?", "alice@friend.example", json!({})));
assert!(matches!(r.verdict, SpamVerdict::Clean));
}
#[test]
fn shouty_subject_flagged() {
let r = score(&p("YOU WON!!!", "Click", "x@y.example", json!({})));
assert!(r.factors.len() >= 2, "got {:?}", r.factors);
}
#[test]
fn likely_spam() {
let mut parsed = p(
"ACT NOW! FREE MONEY!!! CLICK HERE",
"Click here for risk-free money. Unsubscribe.",
"auto12345abc6789@noreply.example",
json!({}),
);
parsed.html_body = Some(
"<html><body><img src=a><img src=b><img src=c><img src=d></body></html>".into(),
);
parsed.text_body = Some(String::from("hi"));
let r = score(&parsed);
assert!(r.score >= 5.0, "score {} factors {:?}", r.score, r.factors);
}
}