Skip to main content

writing_analysis/
passive_voice.rs

1use regex::Regex;
2use std::sync::LazyLock;
3
4use crate::error::{Result, WritingAnalysisError};
5use crate::utils::split_sentences;
6
7/// Result of passive voice detection.
8#[derive(Debug, Clone, PartialEq)]
9pub struct PassiveVoiceResult {
10    /// All detected passive voice instances
11    pub instances: Vec<PassiveInstance>,
12    /// Percentage of sentences containing passive voice (0.0-100.0)
13    pub percentage: f64,
14}
15
16/// A single passive voice occurrence.
17#[derive(Debug, Clone, PartialEq)]
18pub struct PassiveInstance {
19    /// The matched passive phrase (e.g., "was written")
20    pub phrase: String,
21    /// Byte offset in the original text
22    pub offset: usize,
23    /// The full sentence containing the passive voice
24    pub sentence: String,
25}
26
27static IRREGULAR_PAST_PARTICIPLES: &[&str] = &[
28    "awoken", "been", "born", "beaten", "become", "begun", "bent", "bitten", "blown", "broken",
29    "brought", "built", "burnt", "bought", "caught", "chosen", "come", "cost", "cut", "done",
30    "drawn", "driven", "drunk", "eaten", "fallen", "felt", "found", "flown", "forgotten",
31    "forgiven", "frozen", "given", "gone", "grown", "had", "heard", "hidden", "hit", "held",
32    "hurt", "kept", "known", "laid", "led", "left", "lent", "let", "lain", "lost", "made",
33    "meant", "met", "paid", "put", "read", "ridden", "risen", "run", "said", "seen", "sent",
34    "set", "shaken", "shown", "shut", "slept", "slid", "spoken", "spent", "split", "spread", "sung",
35    "stood", "stolen", "stuck", "stung", "struck", "sworn", "swept", "swum", "taken", "taught",
36    "thought", "thrown", "told", "torn", "understood", "woken", "worn", "wound", "written",
37];
38
39static ADJECTIVE_EXCLUSIONS: &[&str] = &[
40    "advanced",
41    "amazed",
42    "associated",
43    "attached",
44    "bored",
45    "complicated",
46    "concerned",
47    "confused",
48    "connected",
49    "convinced",
50    "dedicated",
51    "determined",
52    "disappointed",
53    "embarrassed",
54    "excited",
55    "experienced",
56    "frustrated",
57    "interested",
58    "involved",
59    "married",
60    "organized",
61    "overwhelmed",
62    "pleased",
63    "prepared",
64    "related",
65    "satisfied",
66    "sophisticated",
67    "supposed",
68    "surprised",
69    "tired",
70    "used",
71];
72
73static PASSIVE_RE: LazyLock<Regex> = LazyLock::new(|| {
74    let irregulars = IRREGULAR_PAST_PARTICIPLES.join("|");
75    let pattern = format!(
76        r"(?i)\b(am|is|are|was|were|be|been|being)\s+(\w+ed|{})\b",
77        irregulars
78    );
79    Regex::new(&pattern).unwrap()
80});
81
82/// Detect passive voice in text.
83pub fn detect_passive_voice(text: &str) -> Result<PassiveVoiceResult> {
84    let sentences = split_sentences(text);
85    if sentences.is_empty() {
86        return Err(WritingAnalysisError::NoSentences);
87    }
88
89    let mut instances = Vec::new();
90    let mut sentences_with_passive = 0;
91    let text_start = text.as_ptr() as usize;
92
93    for sentence in &sentences {
94        let mut found_in_sentence = false;
95
96        for mat in PASSIVE_RE.find_iter(sentence) {
97            let phrase = mat.as_str();
98
99            // Check exclusion list: get the last word (the participle)
100            let participle = phrase.split_whitespace().last().unwrap_or("");
101            if ADJECTIVE_EXCLUSIONS
102                .iter()
103                .any(|&exc| participle.eq_ignore_ascii_case(exc))
104            {
105                continue;
106            }
107
108            let sentence_start = sentence.as_ptr() as usize - text_start;
109            let offset = sentence_start + mat.start();
110
111            instances.push(PassiveInstance {
112                phrase: phrase.to_string(),
113                offset,
114                sentence: sentence.to_string(),
115            });
116            found_in_sentence = true;
117        }
118
119        if found_in_sentence {
120            sentences_with_passive += 1;
121        }
122    }
123
124    let percentage = (sentences_with_passive as f64 / sentences.len() as f64) * 100.0;
125
126    Ok(PassiveVoiceResult {
127        instances,
128        percentage,
129    })
130}
131
132#[cfg(test)]
133mod tests {
134    use super::*;
135
136    #[test]
137    fn detect_basic_passive() {
138        let result = detect_passive_voice("The ball was thrown by the boy.").unwrap();
139        assert_eq!(result.instances.len(), 1);
140        assert_eq!(result.instances[0].phrase, "was thrown");
141    }
142
143    #[test]
144    fn detect_irregular_passive() {
145        let result = detect_passive_voice("The report was written by the team.").unwrap();
146        assert_eq!(result.instances.len(), 1);
147        assert_eq!(result.instances[0].phrase, "was written");
148    }
149
150    #[test]
151    fn no_passive_active_voice() {
152        let result = detect_passive_voice("The boy threw the ball.").unwrap();
153        assert_eq!(result.instances.len(), 0);
154        assert_eq!(result.percentage, 0.0);
155    }
156
157    #[test]
158    fn exclude_adjectives() {
159        let result = detect_passive_voice("She was excited about the project.").unwrap();
160        assert_eq!(result.instances.len(), 0);
161    }
162
163    #[test]
164    fn multiple_passive_instances() {
165        let text = "The cake was eaten. The song was sung. He walked home.";
166        let result = detect_passive_voice(text).unwrap();
167        assert_eq!(result.instances.len(), 2);
168    }
169
170    #[test]
171    fn passive_percentage_calculation() {
172        let text = "The ball was thrown. She ran quickly.";
173        let result = detect_passive_voice(text).unwrap();
174        assert_eq!(result.percentage, 50.0);
175    }
176}