use std::sync::LazyLock;
use regex::Regex;
use crate::dictionaries::irregular_verbs::{
is_adjective_exception, is_irregular_past_participle, is_linking_verb,
};
use crate::text;
static PASSIVE_AUXILIARIES: LazyLock<Vec<&'static str>> = LazyLock::new(|| {
vec![
"am", "is", "are", "was", "were", "be", "been", "being", "get", "gets", "got", "gotten",
"getting",
]
});
static REGULAR_PARTICIPLE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\b\w+ed\b").expect("valid regex"));
static BY_PHRASE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"\bby\s+(?:the\s+)?[a-z]+").expect("valid regex"));
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
pub struct PassiveVoiceMatch {
pub text: String,
pub confidence: f64,
pub sentence_num: usize,
pub auxiliary: String,
pub participle: String,
pub has_by_phrase: bool,
}
const DEFAULT_MIN_CONFIDENCE: f64 = 0.6;
#[tracing::instrument(skip_all, fields(text_len = text.len()))]
pub fn detect_passive_voice(text: &str) -> Vec<PassiveVoiceMatch> {
detect_passive_voice_with_threshold(text, DEFAULT_MIN_CONFIDENCE)
}
#[tracing::instrument(skip_all, fields(text_len = text.len(), min_confidence))]
pub fn detect_passive_voice_with_threshold(
text: &str,
min_confidence: f64,
) -> Vec<PassiveVoiceMatch> {
let sentences = text::split_sentences(text);
let mut matches = Vec::new();
for (idx, sentence) in sentences.iter().enumerate() {
let words = text::extract_words(sentence);
if words.len() < 2 {
continue;
}
for i in 0..words.len() - 1 {
let word = &words[i];
if !PASSIVE_AUXILIARIES.contains(&word.as_str()) {
continue;
}
let next_word = &words[i + 1];
if !is_likely_past_participle(next_word) {
continue;
}
let confidence = calculate_confidence(word, next_word, &words, i);
if confidence < min_confidence {
continue;
}
let has_by = has_by_phrase_nearby(&words, i + 1);
matches.push(PassiveVoiceMatch {
text: format!("{word} {next_word}"),
confidence,
sentence_num: idx + 1,
auxiliary: word.clone(),
participle: next_word.clone(),
has_by_phrase: has_by,
});
}
}
matches
}
pub fn count_passive_voice(text: &str) -> usize {
detect_passive_voice(text).len()
}
pub fn passive_voice_percentage(text: &str, total_sentences: usize) -> f64 {
if total_sentences == 0 {
return 0.0;
}
let count = count_passive_voice(text);
(count as f64 / total_sentences as f64) * 100.0
}
fn is_likely_past_participle(word: &str) -> bool {
if is_adjective_exception(word) {
return false;
}
if is_irregular_past_participle(word) {
return true;
}
REGULAR_PARTICIPLE.is_match(word)
}
fn calculate_confidence(
auxiliary: &str,
participle: &str,
words: &[String],
position: usize,
) -> f64 {
let mut confidence: f64 = 0.5;
if matches!(auxiliary, "was" | "were" | "been" | "being" | "is" | "are") {
confidence += 0.2;
}
if is_irregular_past_participle(participle) {
confidence += 0.2;
}
if is_adjective_exception(participle) {
confidence -= 0.3;
}
if has_by_phrase_nearby(words, position + 1) {
confidence += 0.3;
}
if is_linking_verb(auxiliary) {
confidence -= 0.2;
}
if position > 0 {
let prev = &words[position - 1];
if matches!(
prev.as_str(),
"the" | "a" | "an" | "this" | "that" | "these" | "those" | "it"
) {
confidence += 0.1;
}
}
confidence.clamp(0.0, 1.0)
}
fn has_by_phrase_nearby(words: &[String], position: usize) -> bool {
let end = (position + 5).min(words.len());
let window: String = words[position..end].join(" ");
BY_PHRASE.is_match(&window)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn detects_simple_passive() {
let matches = detect_passive_voice("The report was written by the team.");
assert!(!matches.is_empty());
assert_eq!(matches[0].auxiliary, "was");
assert_eq!(matches[0].participle, "written");
assert!(matches[0].has_by_phrase);
}
#[test]
fn skips_adjective_exceptions() {
let matches = detect_passive_voice("She was tired after the long day.");
assert!(matches.is_empty(), "should not flag 'was tired' as passive");
}
#[test]
fn detects_multiple_passive() {
let text = "The code was written by Alice. The bug was found by Bob.";
let matches = detect_passive_voice(text);
assert_eq!(matches.len(), 2);
}
#[test]
fn percentage_calculation() {
let text = "The code was written. The team celebrated. The bug was fixed.";
let pct = passive_voice_percentage(text, 3);
assert!(pct > 60.0);
assert!(pct < 70.0);
}
#[test]
fn empty_text_returns_empty() {
assert!(detect_passive_voice("").is_empty());
}
}