txt_processor/
processor.rs

1extern crate itertools;
2extern crate regex;
3
4use self::itertools::Itertools;
5use self::regex::Regex;
6use std::fs::OpenOptions;
7use std::io::Read;
8use std::io::Write;
9
10#[inline]
11fn count_words(text: &String) -> usize {
12    let word_delimiters = [' ', '\n', '\t', ',', '.', '!', '?', ';', ':', '"', '(', ')'];
13
14    let mut word_count = 0;
15    let mut is_word = false;
16
17    for c in text.chars() {
18        if word_delimiters.contains(&c) {
19            is_word = false;
20        } else if !is_word {
21            is_word = true;
22            word_count += 1;
23        }
24    }
25
26    word_count
27}
28
29#[inline]
30fn collect_words(text: &String) -> Vec<String> {
31    let word_delimiters = [
32        ' ', '\n', '\t', ',', '.', '!', '?', ';', ':', '"', '(', ')', '\0',
33    ];
34    let mut words = vec![];
35    let mut curr_word = String::new();
36
37    for c in text.chars() {
38        if !word_delimiters.contains(&c) {
39            curr_word.push(c);
40        } else {
41            if !curr_word.is_empty() {
42                words.push(curr_word.clone());
43                curr_word.clear();
44            }
45        }
46    }
47
48    words
49}
50
51/// TxtProcessor struct
52#[derive(Debug)]
53pub struct TxtProcessor {
54    pub(crate) content: String,
55    pub(crate) file_path: Option<String>,
56    pub(crate) lines: usize,
57    pub(crate) words: usize,
58}
59
60/// Struct for error
61#[derive(Clone, Debug)]
62pub struct Error(pub String);
63
64impl std::fmt::Display for Error {
65    #[inline]
66    fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
67        self.0.fmt(formatter)
68    }
69}
70
71impl std::error::Error for Error {
72    #[inline]
73    fn description(&self) -> &str {
74        &self.0
75    }
76}
77
78impl TxtProcessor { 
79    /// Reads text from file
80    pub fn from_file(file_path: &str) -> Result<Self, Error> {
81        let mut str = String::new();
82        let file = OpenOptions::new().read(true).open(file_path);
83        let mut file = match file {
84            Ok(file) => file,
85            Err(_) => {
86                return Err(Error(format!(
87                    "Can't open file with filename '{file_path}'"
88                )))
89            }
90        };
91
92        let res = file.read_to_string(&mut str);
93        match res {
94            Ok(_) => (),
95            Err(_) => {
96                return Err(Error(format!(
97                    "Can't read file with filename '{file_path}'"
98                )))
99            }
100        };
101
102        let lines = str.lines().count();
103        let words = count_words(&str);
104        str.push(' ');
105        Ok(TxtProcessor {
106            content: str,
107            file_path: Some(file_path.to_string()),
108            lines,
109            words,
110        })
111    }
112    /// Reads text from string
113    pub fn from_str(str: String) -> Self {
114        let lines = str.lines().count();
115        let words = count_words(&str);
116        TxtProcessor {
117            content: str,
118            file_path: None,
119            lines,
120            words,
121        }
122    }
123    /// Reads text from file
124    pub fn count_word_occurences(&self, word: &str) -> usize {
125        let words = collect_words(&self.content);
126        let mut c = 0;
127        for w in words {
128            if w == word {
129                c += 1;
130            }
131        }
132        c
133    }
134    /// Counts how many times a word occurs in a text
135    pub fn contains(&self, word: &str) -> bool {
136        collect_words(&self.content).contains(&word.to_string())
137    }
138    /// Finds unique words in a text
139    pub fn find_unique(&self) -> Vec<String> {
140        let words = collect_words(&self.content);
141        words.into_iter().unique().collect::<Vec<String>>()
142    }
143    /// Finds all words matching the condition
144    pub fn filter(&self, condition: &dyn Fn(&str) -> bool) -> Vec<String> {
145        let words = collect_words(&self.content);
146        let mut ret = vec![];
147        for w in words {
148            if condition(&w) {
149                ret.push(w);
150            }
151        }
152        ret
153    }
154    /// Finds the first ocuurence of a word in a text
155    pub fn find_first_occurence(&self, word: &str) -> Result<usize, Error> {
156        let words = collect_words(&self.content);
157        let res = words.iter().position(|w| w == word);
158        match res {
159            Some(index) => Ok(index),
160            None => return Err(Error(format!("No word '{word}' in content"))),
161        }
162    }
163    /// Finds all occurrences of a word in a text
164    pub fn search_occurences(&self, word: &str) -> Vec<usize> {
165        let words = collect_words(&self.content);
166        let mut res = vec![];
167        let mut c = 0;
168        for w in words {
169            if w == word {
170                res.push(c);
171            }
172            c += 1;
173        }
174        res
175    }
176    /// Finds word on that position in a text
177    pub fn search_with_index(&self, index: usize) -> Result<String, Error> {
178        let words = collect_words(&self.content);
179        if index < words.len() {
180            Ok(words[index].clone())
181        } else {
182            Err(Error(String::from("Wrong index value")))
183        }
184    }
185    /// Replaces all words 'from' on word 'to'
186    pub fn replace_word(&mut self, from: &str, to: &str) {
187        let regex = Regex::new(&(r"\b".to_string() + from + r"\b")).unwrap();
188        self.content = regex.replace_all(&self.content, to).to_string();
189        self.words = count_words(&self.content);
190    }
191    /// Replaces all characters 'from' on character 'to'
192    pub fn replace_char(&mut self, from: char, to: char) {
193        self.content = self.content.replace(from, &to.to_string());
194        self.words = count_words(&self.content);
195    }
196    /// Rewrites file with self.content
197    pub fn rewrite_file(&self, file_path: &str) -> Result<(), Error> {
198        let file = OpenOptions::new().write(true).open(file_path);
199        let mut file = match file {
200            Ok(file) => file,
201            Err(_) => {
202                return Err(Error(format!(
203                    "Can't open file with filename '{file_path}'"
204                )))
205            }
206        };
207        let res = write!(file, "{}", self.content);
208        match res {
209            Ok(_) => Ok(()),
210            Err(_) => {
211                return Err(Error(format!(
212                    "Can't write to file with filename '{file_path}'"
213                )))
214            }
215        }
216    }
217    /// Appends self.content to file
218    pub fn append_file(&self, file_path: &str) -> Result<(), Error> {
219        let file = OpenOptions::new().append(true).open(file_path);
220        let mut file = match file {
221            Ok(file) => file,
222            Err(_) => {
223                return Err(Error(format!(
224                    "Can't open file with filename '{file_path}'"
225                )))
226            }
227        };
228        let res = write!(file, "{}", self.content);
229        match res {
230            Ok(_) => Ok(()),
231            Err(_) => {
232                return Err(Error(format!(
233                    "Can't write to file with filename '{file_path}'"
234                )))
235            }
236        }
237    }
238    /// Fix spaces in text
239    pub fn fix(&mut self) {
240        let mut result = String::new();
241        let word_delimiters = [',', '.', '!', '?', ';', ':', '"', '(', ')'];
242        let mut i = 0;
243
244        for c in self.content.chars() {
245            if i < self.content.chars().count()-1 {
246                if !word_delimiters.contains(&c) {
247                    result.push(c);
248                }
249                else if self.content.chars().nth(i + 1).unwrap() != ' ' && self.content.chars().nth(i + 1).unwrap() != '\n' {
250                    result.push(c);
251                    result.push(' ');
252                } 
253                else {
254                    result.push(c);
255                }
256            }
257            i += 1;
258        }
259        result.push(self.content.chars().last().unwrap());
260        self.content = result;
261    }
262    /// Search for all words matching this regular expression
263    pub fn search_with_regex(&self, regex: Regex) -> Vec<String> {
264        let mut w = vec![];
265        for cap in regex.find_iter(&self.content) {
266            w.push(String::from(cap.as_str()));
267        }
268        w
269    }
270    /// Adds text to this TxtProcessor's content
271    pub fn add_to_content(&mut self, text: String) {
272        let w = count_words(&text);
273        self.content = self.content.clone() + &text;
274        self.words = self.words + w;
275        self.lines = self.lines + text.lines().count();
276    }
277
278    /// Returns lines amount
279    pub fn get_lines(&self) -> usize {
280        self.lines
281    }
282    /// Returns words amount
283    pub fn get_words(&self) -> usize {
284        self.words
285    }
286    /// Returns content
287    pub fn get_content(&self) -> String {
288        self.content.clone()
289    }
290    /// Returns file path
291    pub fn get_file_path(&self) -> Option<String> {
292        self.file_path.clone()
293    }
294}