writing_analysis/
filter_words.rs1use crate::error::{Result, WritingAnalysisError};
2use crate::utils::{split_sentences, split_words};
3
4#[derive(Debug, Clone, PartialEq)]
6pub struct FilterWordResult {
7 pub instances: Vec<FilterWordInstance>,
9 pub count: usize,
11 pub percentage: f64,
13}
14
15#[derive(Debug, Clone, PartialEq)]
17pub struct FilterWordInstance {
18 pub word: String,
20 pub offset: usize,
22 pub sentence: String,
24}
25
26static FILTER_WORDS: &[&str] = &[
27 "just",
28 "really",
29 "very",
30 "quite",
31 "rather",
32 "somewhat",
33 "somehow",
34 "perhaps",
35 "basically",
36 "actually",
37 "literally",
38 "definitely",
39 "certainly",
40 "probably",
41 "simply",
42 "extremely",
43 "absolutely",
44 "totally",
45 "completely",
46 "utterly",
47];
48
49pub fn detect_filter_words(text: &str) -> Result<FilterWordResult> {
51 let words = split_words(text);
52 if words.is_empty() {
53 return Err(WritingAnalysisError::EmptyText);
54 }
55
56 let sentences = split_sentences(text);
57 let text_start = text.as_ptr() as usize;
58 let mut instances = Vec::new();
59
60 for &word_ref in &words {
61 let lower = word_ref.to_lowercase();
62 if FILTER_WORDS.contains(&lower.as_str()) {
63 let word_offset = word_ref.as_ptr() as usize - text_start;
64 let sentence = sentences
65 .iter()
66 .find(|s| {
67 let s_start = s.as_ptr() as usize - text_start;
68 let s_end = s_start + s.len();
69 word_offset >= s_start && word_offset < s_end
70 })
71 .map(|s| s.to_string())
72 .unwrap_or_default();
73
74 instances.push(FilterWordInstance {
75 word: word_ref.to_string(),
76 offset: word_offset,
77 sentence,
78 });
79 }
80 }
81
82 let count = instances.len();
83 let percentage = (count as f64 / words.len() as f64) * 100.0;
84
85 Ok(FilterWordResult {
86 instances,
87 count,
88 percentage,
89 })
90}
91
92#[cfg(test)]
93mod tests {
94 use super::*;
95
96 #[test]
97 fn detect_filter_words_basic() {
98 let result = detect_filter_words("She just really wanted to go.").unwrap();
99 assert_eq!(result.count, 2);
100 }
101
102 #[test]
103 fn no_filter_words() {
104 let result = detect_filter_words("The cat sat on the mat.").unwrap();
105 assert_eq!(result.count, 0);
106 assert_eq!(result.percentage, 0.0);
107 }
108
109 #[test]
110 fn filter_word_percentage() {
111 let result = detect_filter_words("I very simply want this.").unwrap();
113 assert_eq!(result.count, 2);
114 assert!((result.percentage - 40.0).abs() < 1.0);
115 }
116
117 #[test]
118 fn case_insensitive_detection() {
119 let result = detect_filter_words("JUST do it. Really.").unwrap();
120 assert_eq!(result.count, 2);
121 }
122
123 #[test]
124 fn empty_text_error() {
125 let result = detect_filter_words("");
126 assert!(result.is_err());
127 }
128}