txt_processor/
processor.rs1extern crate itertools;
2extern crate regex;
3
4use self::itertools::Itertools;
5use self::regex::Regex;
6use std::fs::OpenOptions;
7use std::io::Read;
8use std::io::Write;
9
10#[inline]
11fn count_words(text: &String) -> usize {
12 let word_delimiters = [' ', '\n', '\t', ',', '.', '!', '?', ';', ':', '"', '(', ')'];
13
14 let mut word_count = 0;
15 let mut is_word = false;
16
17 for c in text.chars() {
18 if word_delimiters.contains(&c) {
19 is_word = false;
20 } else if !is_word {
21 is_word = true;
22 word_count += 1;
23 }
24 }
25
26 word_count
27}
28
29#[inline]
30fn collect_words(text: &String) -> Vec<String> {
31 let word_delimiters = [
32 ' ', '\n', '\t', ',', '.', '!', '?', ';', ':', '"', '(', ')', '\0',
33 ];
34 let mut words = vec![];
35 let mut curr_word = String::new();
36
37 for c in text.chars() {
38 if !word_delimiters.contains(&c) {
39 curr_word.push(c);
40 } else {
41 if !curr_word.is_empty() {
42 words.push(curr_word.clone());
43 curr_word.clear();
44 }
45 }
46 }
47
48 words
49}
50
51#[derive(Debug)]
53pub struct TxtProcessor {
54 pub(crate) content: String,
55 pub(crate) file_path: Option<String>,
56 pub(crate) lines: usize,
57 pub(crate) words: usize,
58}
59
60#[derive(Clone, Debug)]
62pub struct Error(pub String);
63
64impl std::fmt::Display for Error {
65 #[inline]
66 fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
67 self.0.fmt(formatter)
68 }
69}
70
71impl std::error::Error for Error {
72 #[inline]
73 fn description(&self) -> &str {
74 &self.0
75 }
76}
77
78impl TxtProcessor {
79 pub fn from_file(file_path: &str) -> Result<Self, Error> {
81 let mut str = String::new();
82 let file = OpenOptions::new().read(true).open(file_path);
83 let mut file = match file {
84 Ok(file) => file,
85 Err(_) => {
86 return Err(Error(format!(
87 "Can't open file with filename '{file_path}'"
88 )))
89 }
90 };
91
92 let res = file.read_to_string(&mut str);
93 match res {
94 Ok(_) => (),
95 Err(_) => {
96 return Err(Error(format!(
97 "Can't read file with filename '{file_path}'"
98 )))
99 }
100 };
101
102 let lines = str.lines().count();
103 let words = count_words(&str);
104 str.push(' ');
105 Ok(TxtProcessor {
106 content: str,
107 file_path: Some(file_path.to_string()),
108 lines,
109 words,
110 })
111 }
112 pub fn from_str(str: String) -> Self {
114 let lines = str.lines().count();
115 let words = count_words(&str);
116 TxtProcessor {
117 content: str,
118 file_path: None,
119 lines,
120 words,
121 }
122 }
123 pub fn count_word_occurences(&self, word: &str) -> usize {
125 let words = collect_words(&self.content);
126 let mut c = 0;
127 for w in words {
128 if w == word {
129 c += 1;
130 }
131 }
132 c
133 }
134 pub fn contains(&self, word: &str) -> bool {
136 collect_words(&self.content).contains(&word.to_string())
137 }
138 pub fn find_unique(&self) -> Vec<String> {
140 let words = collect_words(&self.content);
141 words.into_iter().unique().collect::<Vec<String>>()
142 }
143 pub fn filter(&self, condition: &dyn Fn(&str) -> bool) -> Vec<String> {
145 let words = collect_words(&self.content);
146 let mut ret = vec![];
147 for w in words {
148 if condition(&w) {
149 ret.push(w);
150 }
151 }
152 ret
153 }
154 pub fn find_first_occurence(&self, word: &str) -> Result<usize, Error> {
156 let words = collect_words(&self.content);
157 let res = words.iter().position(|w| w == word);
158 match res {
159 Some(index) => Ok(index),
160 None => return Err(Error(format!("No word '{word}' in content"))),
161 }
162 }
163 pub fn search_occurences(&self, word: &str) -> Vec<usize> {
165 let words = collect_words(&self.content);
166 let mut res = vec![];
167 let mut c = 0;
168 for w in words {
169 if w == word {
170 res.push(c);
171 }
172 c += 1;
173 }
174 res
175 }
176 pub fn search_with_index(&self, index: usize) -> Result<String, Error> {
178 let words = collect_words(&self.content);
179 if index < words.len() {
180 Ok(words[index].clone())
181 } else {
182 Err(Error(String::from("Wrong index value")))
183 }
184 }
185 pub fn replace_word(&mut self, from: &str, to: &str) {
187 let regex = Regex::new(&(r"\b".to_string() + from + r"\b")).unwrap();
188 self.content = regex.replace_all(&self.content, to).to_string();
189 self.words = count_words(&self.content);
190 }
191 pub fn replace_char(&mut self, from: char, to: char) {
193 self.content = self.content.replace(from, &to.to_string());
194 self.words = count_words(&self.content);
195 }
196 pub fn rewrite_file(&self, file_path: &str) -> Result<(), Error> {
198 let file = OpenOptions::new().write(true).open(file_path);
199 let mut file = match file {
200 Ok(file) => file,
201 Err(_) => {
202 return Err(Error(format!(
203 "Can't open file with filename '{file_path}'"
204 )))
205 }
206 };
207 let res = write!(file, "{}", self.content);
208 match res {
209 Ok(_) => Ok(()),
210 Err(_) => {
211 return Err(Error(format!(
212 "Can't write to file with filename '{file_path}'"
213 )))
214 }
215 }
216 }
217 pub fn append_file(&self, file_path: &str) -> Result<(), Error> {
219 let file = OpenOptions::new().append(true).open(file_path);
220 let mut file = match file {
221 Ok(file) => file,
222 Err(_) => {
223 return Err(Error(format!(
224 "Can't open file with filename '{file_path}'"
225 )))
226 }
227 };
228 let res = write!(file, "{}", self.content);
229 match res {
230 Ok(_) => Ok(()),
231 Err(_) => {
232 return Err(Error(format!(
233 "Can't write to file with filename '{file_path}'"
234 )))
235 }
236 }
237 }
238 pub fn fix(&mut self) {
240 let mut result = String::new();
241 let word_delimiters = [',', '.', '!', '?', ';', ':', '"', '(', ')'];
242 let mut i = 0;
243
244 for c in self.content.chars() {
245 if i < self.content.chars().count()-1 {
246 if !word_delimiters.contains(&c) {
247 result.push(c);
248 }
249 else if self.content.chars().nth(i + 1).unwrap() != ' ' && self.content.chars().nth(i + 1).unwrap() != '\n' {
250 result.push(c);
251 result.push(' ');
252 }
253 else {
254 result.push(c);
255 }
256 }
257 i += 1;
258 }
259 result.push(self.content.chars().last().unwrap());
260 self.content = result;
261 }
262 pub fn search_with_regex(&self, regex: Regex) -> Vec<String> {
264 let mut w = vec![];
265 for cap in regex.find_iter(&self.content) {
266 w.push(String::from(cap.as_str()));
267 }
268 w
269 }
270 pub fn add_to_content(&mut self, text: String) {
272 let w = count_words(&text);
273 self.content = self.content.clone() + &text;
274 self.words = self.words + w;
275 self.lines = self.lines + text.lines().count();
276 }
277
278 pub fn get_lines(&self) -> usize {
280 self.lines
281 }
282 pub fn get_words(&self) -> usize {
284 self.words
285 }
286 pub fn get_content(&self) -> String {
288 self.content.clone()
289 }
290 pub fn get_file_path(&self) -> Option<String> {
292 self.file_path.clone()
293 }
294}