scirs2_text/spelling/
utils.rs1use crate::error::{Result, TextError};
7use std::collections::HashMap;
8use std::fs::File;
9use std::io::{BufRead, BufReader};
10use std::path::Path;
11
12#[inline]
14#[allow(dead_code)]
15pub fn normalize_string(text: &str, casesensitive: bool) -> String {
16 if !casesensitive {
17 text.to_lowercase()
18 } else {
19 text.to_string()
20 }
21}
22
23#[allow(dead_code)]
25pub fn extract_words(text: &str) -> Vec<String> {
26 text.split_whitespace()
27 .map(|s| {
28 s.trim_matches(|c: char| !c.is_alphanumeric())
29 .to_lowercase()
30 })
31 .filter(|s| !s.is_empty())
32 .collect()
33}
34
35#[allow(dead_code)]
37pub fn split_sentences(text: &str) -> Vec<&str> {
38 text.split(['.', '?', '!'])
39 .map(|s| s.trim())
40 .filter(|s| !s.is_empty())
41 .collect()
42}
43
44#[inline]
46#[allow(dead_code)]
47pub fn is_within_length_threshold(_word1: &str, word2: &str, max_editdistance: usize) -> bool {
48 _word1.len() <= word2.len() + max_editdistance && _word1.len() + max_editdistance >= word2.len()
49}
50
51#[inline]
53#[allow(dead_code)]
54pub fn dictionary_contains(
55 dictionary: &HashMap<String, usize>,
56 word: &str,
57 case_sensitive: bool,
58) -> bool {
59 if case_sensitive {
60 dictionary.contains_key(word)
61 } else {
62 let word_lower = word.to_lowercase();
63 dictionary
64 .keys()
65 .any(|dict_word| dict_word.to_lowercase() == word_lower)
66 }
67}
68
69#[allow(dead_code)]
71pub fn load_from_file<P, F, T>(_path: P, mut lineprocessor: F) -> Result<T>
72where
73 P: AsRef<Path>,
74 F: FnMut(&str) -> Result<T>,
75 T: Default,
76{
77 let file =
78 File::open(_path).map_err(|e| TextError::IoError(format!("Failed to open file: {e}")))?;
79
80 let reader = BufReader::new(file);
81 let mut result = T::default();
82
83 for line in reader.lines() {
84 let line =
85 line.map_err(|e| TextError::IoError(format!("Failed to read line from file: {e}")))?;
86
87 if line.trim().is_empty() {
89 continue;
90 }
91
92 result = lineprocessor(&line)?;
93 }
94
95 Ok(result)
96}
97
98#[cfg(test)]
99mod tests {
100 use super::*;
101
102 #[test]
103 fn test_normalize_string() {
104 assert_eq!(normalize_string("Hello", false), "hello");
105 assert_eq!(normalize_string("Hello", true), "Hello");
106 }
107
108 #[test]
109 fn test_extract_words() {
110 let text = "Hello, world! This is a test.";
111 let words = extract_words(text);
112 assert_eq!(words, vec!["hello", "world", "this", "is", "a", "test"]);
113 }
114
115 #[test]
116 fn test_split_sentences() {
117 let text = "Hello, world! This is a test. Another sentence.";
118 let sentences = split_sentences(text);
119 assert_eq!(
120 sentences,
121 vec!["Hello, world", "This is a test", "Another sentence"]
122 );
123 }
124
125 #[test]
126 fn test_is_within_length_threshold() {
127 assert!(is_within_length_threshold("hello", "hello", 2));
128 assert!(is_within_length_threshold("hello", "hell", 2));
129 assert!(is_within_length_threshold("hello", "helloo", 2));
130 assert!(!is_within_length_threshold("hello", "hi", 2));
131 assert!(!is_within_length_threshold("hello", "hello world", 2));
132 }
133
134 #[test]
135 fn test_dictionary_contains() {
136 let mut dictionary = HashMap::new();
137 dictionary.insert("Hello".to_string(), 10);
138 dictionary.insert("World".to_string(), 20);
139
140 assert!(dictionary_contains(&dictionary, "Hello", true));
142 assert!(!dictionary_contains(&dictionary, "hello", true));
143
144 assert!(dictionary_contains(&dictionary, "hello", false));
146 assert!(dictionary_contains(&dictionary, "WORLD", false));
147 assert!(!dictionary_contains(&dictionary, "test", false));
148 }
149}