Skip to main content

writing_analysis/
filter_words.rs

1use crate::error::{Result, WritingAnalysisError};
2use crate::utils::{split_sentences, split_words};
3
4/// Result of filter word detection.
5#[derive(Debug, Clone, PartialEq)]
6pub struct FilterWordResult {
7    /// All detected filter word instances
8    pub instances: Vec<FilterWordInstance>,
9    /// Total number of filter words found
10    pub count: usize,
11    /// Percentage of total words that are filter words (0.0-100.0)
12    pub percentage: f64,
13}
14
15/// A single filter word occurrence.
16#[derive(Debug, Clone, PartialEq)]
17pub struct FilterWordInstance {
18    /// The matched filter word
19    pub word: String,
20    /// Byte offset in the original text
21    pub offset: usize,
22    /// The sentence containing the filter word
23    pub sentence: String,
24}
25
26static FILTER_WORDS: &[&str] = &[
27    "just",
28    "really",
29    "very",
30    "quite",
31    "rather",
32    "somewhat",
33    "somehow",
34    "perhaps",
35    "basically",
36    "actually",
37    "literally",
38    "definitely",
39    "certainly",
40    "probably",
41    "simply",
42    "extremely",
43    "absolutely",
44    "totally",
45    "completely",
46    "utterly",
47];
48
49/// Detect filter words in text.
50pub fn detect_filter_words(text: &str) -> Result<FilterWordResult> {
51    let words = split_words(text);
52    if words.is_empty() {
53        return Err(WritingAnalysisError::EmptyText);
54    }
55
56    let sentences = split_sentences(text);
57    let text_start = text.as_ptr() as usize;
58    let mut instances = Vec::new();
59
60    for &word_ref in &words {
61        let lower = word_ref.to_lowercase();
62        if FILTER_WORDS.contains(&lower.as_str()) {
63            let word_offset = word_ref.as_ptr() as usize - text_start;
64            let sentence = sentences
65                .iter()
66                .find(|s| {
67                    let s_start = s.as_ptr() as usize - text_start;
68                    let s_end = s_start + s.len();
69                    word_offset >= s_start && word_offset < s_end
70                })
71                .map(|s| s.to_string())
72                .unwrap_or_default();
73
74            instances.push(FilterWordInstance {
75                word: word_ref.to_string(),
76                offset: word_offset,
77                sentence,
78            });
79        }
80    }
81
82    let count = instances.len();
83    let percentage = (count as f64 / words.len() as f64) * 100.0;
84
85    Ok(FilterWordResult {
86        instances,
87        count,
88        percentage,
89    })
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn detect_filter_words_basic() {
98        let result = detect_filter_words("She just really wanted to go.").unwrap();
99        assert_eq!(result.count, 2);
100    }
101
102    #[test]
103    fn no_filter_words() {
104        let result = detect_filter_words("The cat sat on the mat.").unwrap();
105        assert_eq!(result.count, 0);
106        assert_eq!(result.percentage, 0.0);
107    }
108
109    #[test]
110    fn filter_word_percentage() {
111        // "I very simply want this" — 5 words, 2 filter words = 40%
112        let result = detect_filter_words("I very simply want this.").unwrap();
113        assert_eq!(result.count, 2);
114        assert!((result.percentage - 40.0).abs() < 1.0);
115    }
116
117    #[test]
118    fn case_insensitive_detection() {
119        let result = detect_filter_words("JUST do it. Really.").unwrap();
120        assert_eq!(result.count, 2);
121    }
122
123    #[test]
124    fn empty_text_error() {
125        let result = detect_filter_words("");
126        assert!(result.is_err());
127    }
128}