use std::sync::OnceLock;
use regex::Regex;
use super::{CompiledLexicon, ProseLanguage, tokenize};
fn re(cell: &'static OnceLock<Regex>, pat: &str) -> &'static Regex {
cell.get_or_init(|| Regex::new(pat).expect("valid prose passive regex"))
}
fn detect_passive_en(sentence: &str, lx: &CompiledLexicon) -> bool {
static REG: OnceLock<Regex> = OnceLock::new();
let regular = re(
®,
r"(?i)\b(was|were|is|are|been|being|be)\s+\w+(ed|en)\b",
);
if regular.is_match(sentence) {
return true;
}
let toks = tokenize(sentence);
toks.windows(2).any(|w| {
matches!(
w[0].as_str(),
"was" | "were" | "is" | "are" | "been" | "being" | "be"
) && lx.is_passive_exception(&w[1])
})
}
fn detect_passive_ru(sentence: &str, lx: &CompiledLexicon) -> bool {
tokenize(sentence).iter().any(|w| {
(w.ends_with("ся") || w.ends_with("сь")) && !lx.is_passive_exception(w)
})
}
fn detect_passive_de(sentence: &str) -> bool {
static VORGANG: OnceLock<Regex> = OnceLock::new();
static ZUSTAND: OnceLock<Regex> = OnceLock::new();
static EXCL: OnceLock<Regex> = OnceLock::new();
let vorgang = re(
&VORGANG,
r"(?i)\b(wird|wurde|werden|wurden|worden|werde|werdet)\s+\w{3,}(t|en)\b",
);
if vorgang.is_match(sentence) {
return true;
}
let zustand = re(
&ZUSTAND,
r"(?i)\b(ist|war|sind|waren|sei|wäre)\s+\w{3,}(t|en)\b",
);
let excl = re(
&EXCL,
r"(?i)\b(ist|war|sind|waren|sei|wäre)\s+(klar|bekannt|bereit|fertig|möglich|nötig)\b",
);
zustand.is_match(sentence) && !excl.is_match(sentence)
}
fn detect_passive_fr(sentence: &str) -> bool {
static REG: OnceLock<Regex> = OnceLock::new();
let r = re(
®,
r"(?i)\b(est|était|fut|sera|serait|a été|avait été|aura été)\s+\w+(é|ée|és|ées|i|ie|is|ies|it|ite|its|ites|u|ue|us|ues|ert|erte|eint|eints|aint|aints|oint|oints)\b",
);
r.is_match(sentence)
}
fn detect_passive_es(sentence: &str) -> bool {
static REG: OnceLock<Regex> = OnceLock::new();
let r = re(
®,
r"(?i)\b(es|era|fue|será|sería|ha sido|había sido)\s+\w+(ado|ada|ados|adas|ido|ida|idos|idas)\b",
);
r.is_match(sentence)
}
pub(crate) fn detect_passive(sentence: &str, lang: &ProseLanguage, lx: &CompiledLexicon) -> bool {
match lang {
ProseLanguage::En => detect_passive_en(sentence, lx),
ProseLanguage::Ru => detect_passive_ru(sentence, lx),
ProseLanguage::De => detect_passive_de(sentence),
ProseLanguage::Fr => detect_passive_fr(sentence),
ProseLanguage::Es => detect_passive_es(sentence),
ProseLanguage::Other(_) => false,
}
}
pub(crate) fn passive_ratio(
sentences: &[String],
lang: &ProseLanguage,
lx: &CompiledLexicon,
) -> Option<f32> {
if !lang.is_supported() {
return None;
}
if sentences.is_empty() {
return Some(0.0);
}
let passive = sentences
.iter()
.filter(|s| detect_passive(s, lang, lx))
.count();
let active = sentences.len() - passive;
Some(passive as f32 / active.max(1) as f32)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::prose::CompiledLexicon;
use crate::prose::ProseLanguage::*;
fn lx(l: &crate::prose::ProseLanguage) -> CompiledLexicon {
CompiledLexicon::for_language(l)
}
#[test]
fn passive_per_language() {
assert!(detect_passive("The bridge was built last year.", &En, &lx(&En)));
assert!(detect_passive("The door was opened slowly.", &En, &lx(&En)));
assert!(detect_passive("Дверь была закрыта, и свет погасился.", &Ru, &lx(&Ru)));
assert!(detect_passive("Die Brücke wurde gebaut.", &De, &lx(&De)));
assert!(detect_passive("Le pont a été construit.", &Fr, &lx(&Fr)));
assert!(detect_passive("El puente fue construido.", &Es, &lx(&Es)));
}
#[test]
fn active_and_exclusions_not_passive() {
assert!(!detect_passive("She built the bridge.", &En, &lx(&En)));
assert!(!detect_passive("Ему казалось странно.", &Ru, &lx(&Ru)));
assert!(!detect_passive("Es war klar.", &De, &lx(&De)));
assert!(!detect_passive("Elle marchait vite.", &Fr, &lx(&Fr)));
}
#[test]
fn ratio_none_for_unsupported() {
let other = Other("it".into());
assert_eq!(passive_ratio(&["qualcosa".into()], &other, &lx(&other)), None);
}
#[test]
fn ratio_counts() {
let sents: Vec<String> = vec![
"The wall was painted.".into(), "She walked home.".into(), "He read the book.".into(), ];
assert!((passive_ratio(&sents, &En, &lx(&En)).unwrap() - 0.5).abs() < 1e-6);
}
}