use std::sync::LazyLock;
use regex::Regex;
use serde::Serialize;
#[derive(Serialize)]
pub struct AiSlop {
pub signals: Vec<Signal>,
pub density_per_1000_words: f64,
pub verdict: &'static str,
}
#[derive(Serialize, Clone)]
pub struct Signal {
pub kind: &'static str,
pub confidence: &'static str,
pub snippet: String,
pub position_pct: f64,
}
struct Pattern {
kind: &'static str,
confidence: &'static str,
re: Regex,
}
fn confidence_weight(c: &str) -> f64 {
match c {
"high" => 1.0,
"medium" => 0.6,
_ => 0.3,
}
}
static PATTERNS: LazyLock<Vec<Pattern>> = LazyLock::new(|| {
let mk = |kind, conf, p: &str| Pattern {
kind,
confidence: conf,
re: Regex::new(p).unwrap_or_else(|e| panic!("bad regex for {kind}: {e}")),
};
vec![
mk(
"negation_pivot_but",
"low",
r"(?i)\b(not|don'?t|doesn'?t|isn'?t|aren'?t|never)\b[^.!?\n]{3,80},?\s+but\b",
),
mk(
"negation_pivot_em_dash",
"high",
r"(?i)\b(not|never)\b[^.!?\n—–]{3,60}[—–]\s*\w+",
),
mk("delve_only", "high", r"(?i)\bdelv(e|es|ed|ing)\b"),
mk(
"leverage_foster_underscore",
"medium",
r"(?i)\b(leverag(e|es|ed|ing)|foster(s|ed|ing)|underscor(e|es|ed|ing))\b",
),
mk(
"tapestry_family",
"high",
r"(?i)\b(tapestry|multifaceted|paradigm|pivotal|intricate|intricacies|burgeoning)\b",
),
mk(
"nuanced_interplay",
"low",
r"(?i)\b(nuanced|interplay)\b",
),
mk(
"realm_landscape",
"high",
r"(?i)\bin\s+the\s+(realm|landscape|world|sphere|domain)\s+of\b",
),
mk(
"era_opener",
"high",
r"(?i)\bin\s+an?\s+era\s+(of|where|when|in\s+which)\b",
),
mk(
"today_fast_paced",
"high",
r"(?i)\bin\s+today'?s\s+(fast.?paced|digital|interconnected|ever.?(changing|evolving))\b",
),
mk(
"ever_evolving",
"high",
r"(?i)\b(ever.?(evolving|changing|growing|expanding)|rapidly\s+evolving)\b",
),
mk(
"testament_to",
"high",
r"(?i)\b(a|stands?\s+as\s+a)\s+testament\s+to\b",
),
mk(
"it_worth_noting",
"high",
r"(?i)\bit('?s|\s+is)\s+(worth\s+noting|important\s+to\s+note)\b|\bit\s+should\s+be\s+noted\b",
),
mk(
"navigate_complexities",
"high",
r"(?i)\bnavigat(e|ing|es|ed)\s+the\s+(complex(it(y|ies))?|landscape|intricacies|challenges|nuances)\b",
),
mk("bold_colon_header", "medium", r"\*\*[^*\n]{1,40}:\*\*\s+\S"),
mk(
"false_conclusion_opener",
"high",
r"(?im)(^|\n)\s*(in\s+conclusion|in\s+summary|to\s+summari[sz]e|to\s+conclude|in\s+closing|all\s+in\s+all)[,\s]",
),
mk(
"not_only_but_also",
"high",
r"(?i)\bnot\s+only\b[^.!?\n]{3,80}\bbut\s+also\b",
),
mk(
"unlock_potential",
"high",
r"(?i)\b(unlock|harness|unleash)\s+(the\s+)?(power|potential|magic|true\s+\w+)\b",
),
mk(
"paramount_importance",
"high",
r"(?i)\b(of\s+paramount\s+importance|of\s+utmost\s+importance|cannot\s+be\s+overstated)\b",
),
mk(
"seamlessly_integrate",
"high",
r"(?i)\bseamlessly\s+(integrat\w+|combin\w+|blend\w+|merge\w+|transition\w+)\b",
),
mk(
"world_of_x",
"high",
r"(?i)\b(welcome\s+to|enter)\s+the\s+world\s+of\b",
),
mk(
"hedge_stack",
"high",
r"(?i)\b(perhaps|arguably|seemingly|possibly|potentially|presumably)\b[^.!?\n]{0,80}\b(perhaps|arguably|seemingly|might|could|may|possibly|potentially|presumably)\b",
),
mk(
"furthermore_moreover",
"medium",
r"(?i)\b(furthermore|moreover|additionally)\b",
),
]
});
pub fn extract(body_text: &str) -> AiSlop {
let words = body_text.split_whitespace().count();
let total_words = words.max(1) as f64;
let mut signals: Vec<Signal> = Vec::new();
let mut weighted: f64 = 0.0;
for p in PATTERNS.iter() {
for m in p.re.find_iter(body_text).take(20) {
let pos = body_text[..m.start()].split_whitespace().count() as f64;
let position_pct = ((pos / total_words) * 1000.0).round() / 10.0;
let snippet: String = m
.as_str()
.chars()
.take(120)
.collect::<String>()
.trim()
.replace('\n', " ");
weighted += confidence_weight(p.confidence);
signals.push(Signal {
kind: p.kind,
confidence: p.confidence,
snippet,
position_pct,
});
}
}
let density = (weighted / total_words) * 1000.0;
let density = (density * 100.0).round() / 100.0;
let verdict = if density < 3.0 {
"clean"
} else if density < 8.0 {
"suspicious"
} else {
"likely_ai"
};
signals.truncate(40);
AiSlop {
signals,
density_per_1000_words: density,
verdict,
}
}
pub fn suggestion(slop: &AiSlop) -> Option<String> {
match slop.verdict {
"clean" => None,
"suspicious" => Some(format!(
"AI-slop density {:.1}/1000 words. {} signals. Review for tells like \"delve\", \"tapestry\", negation pivots.",
slop.density_per_1000_words,
slop.signals.len()
)),
"likely_ai" => Some(format!(
"AI-slop density {:.1}/1000 words ({} signals). Heavy LLM-writing fingerprint. Rewrite the worst sections.",
slop.density_per_1000_words,
slop.signals.len()
)),
_ => None,
}
}