evolution_parser/
trimmer.rs

1//
2// MIT License
3//
4// Copyright (c) 2023-2024 Firelink Data
5//
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to deal
8// in the Software without restriction, including without limitation the rights
9// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10// copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12//
13// The above copyright notice and this permission notice shall be included in all
14// copies or substantial portions of the Software.
15//
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22// SOFTWARE.
23//
24// File created: 2024-05-31
25// Last updated: 2024-06-01
26//
27
28use log::warn;
29use padder::{Alignment, Symbol};
30
31use std::slice::Iter;
32
33///
34pub trait Trimmer {}
35
36///
37pub type TrimmerRef = Box<dyn Trimmer>;
38
39///
40pub struct TextTrimmer {
41    alignment: Alignment,
42    symbol: char,
43}
44
45impl TextTrimmer {
46    ///
47    pub fn new(alignment: Alignment, symbol: Symbol) -> Self {
48        Self {
49            alignment,
50            symbol: symbol.into(),
51        }
52    }
53
54    ///
55    pub fn find_byte_indices(&self, bytes: &[u8], n_runes: usize) -> usize {
56        let mut utf8_byte_unit: usize = 1;
57        let mut n_bytes_read: usize = 0;
58        let mut n_found_runes: usize = 0;
59
60        let mut iterator: Iter<u8> = bytes.iter();
61
62        while n_found_runes < n_runes {
63            let byte: u8 = match iterator.nth(utf8_byte_unit - 1) {
64                Some(b) => *b,
65                None => break,
66            };
67
68            utf8_byte_unit = match byte {
69                byte if byte >> 7 == 0 => 1,
70                byte if byte >> 5 == 0b110 => 2,
71                byte if byte >> 4 == 0b1110 => 3,
72                byte if byte >> 3 == 0b11110 => 4,
73                _ => panic!("Couldn't parse byte slice, invalid UTF-8 sequence!"),
74            };
75
76            n_found_runes += 1;
77            n_bytes_read += utf8_byte_unit;
78        }
79
80        if n_found_runes != n_runes {
81            warn!("Read the entire byte slice but did not find enough runes...");
82        }
83
84        n_bytes_read
85    }
86
87    ///
88    pub fn trim<'a>(&self, text: &'a str) -> &'a str {
89        match self.alignment {
90            Alignment::Left => text.trim_end_matches::<char>(self.symbol),
91            Alignment::Right => text.trim_start_matches::<char>(self.symbol),
92            Alignment::Center => text.trim_matches::<char>(self.symbol),
93        }
94    }
95}
96
97impl Trimmer for TextTrimmer {}
98
99///
100pub struct IntTrimmer {}
101
102impl IntTrimmer {
103    ///
104    pub fn new() -> Self {
105        Self {}
106    }
107
108    /// This function assumes a lot of stuffs, ok. Assume: UTF-8, byte slice somewhere
109    /// contains ASCII numbers (in UTF-8 encoding), we look for these, when we have found
110    /// enough numbers as we want we
111    pub fn find_byte_indices(&self, bytes: &[u8], n_runes: usize) -> (usize, usize, usize) {
112        let mut utf8_byte_unit: usize = 1;
113        let mut n_bytes_read: usize = 0;
114        let mut n_found_runes: usize = 0;
115
116        let mut iterator: Iter<u8> = bytes.iter();
117
118        let mut has_found_start_of_number: bool = false;
119        let mut has_found_all_number_bytes: bool = false;
120        let mut start_byte_idx: usize = 0;
121        let mut stop_byte_idx: usize = 0;
122
123        while n_found_runes < n_runes {
124            let byte: u8 = match iterator.nth(utf8_byte_unit - 1) {
125                Some(b) => *b,
126                None => break,
127            };
128
129            n_bytes_read += utf8_byte_unit;
130            utf8_byte_unit = match byte {
131                byte if byte >> 7 == 0 => 1,
132                byte if byte >> 5 == 0b110 => 2,
133                byte if byte >> 4 == 0b1110 => 3,
134                byte if byte >> 3 == 0b11110 => 4,
135                _ => panic!("Couldn't parse byte slice, invalid UTF-8 sequence!"),
136            };
137
138            if !has_found_all_number_bytes {
139                if let 48..=57 = byte {
140                    if !has_found_start_of_number {
141                        start_byte_idx = n_bytes_read - utf8_byte_unit;
142                        has_found_start_of_number = true;
143                    }
144
145                    if n_found_runes + 1 == n_runes {
146                        stop_byte_idx = 1 + n_bytes_read - utf8_byte_unit;
147                        has_found_all_number_bytes = true;
148                    }
149                } else {
150                    if has_found_start_of_number {
151                        stop_byte_idx = n_bytes_read - utf8_byte_unit;
152                        has_found_all_number_bytes = true;
153                    }
154                }
155            }
156
157            // A negative number starts here!
158            if !has_found_start_of_number && (byte == 45) {
159                start_byte_idx = n_bytes_read - utf8_byte_unit;
160                has_found_start_of_number = true;
161            }
162
163            n_found_runes += 1;
164        }
165
166        (start_byte_idx, stop_byte_idx, n_bytes_read)
167    }
168}
169
170impl Trimmer for IntTrimmer {}
171
172///
173pub struct FloatTrimmer {
174    alignment: Alignment,
175    symbol: char,
176}
177
178impl FloatTrimmer {
179    ///
180    pub fn new(alignment: Alignment, symbol: Symbol) -> Self {
181        Self {
182            alignment,
183            symbol: symbol.into(),
184        }
185    }
186
187    /// TODO: this can be a specific int implementation, for now it is like any
188    /// other datatype.
189    pub fn find_byte_indices(&self, bytes: &[u8], n_runes: usize) -> usize {
190        let mut utf8_byte_unit: usize = 1;
191        let mut n_bytes: usize = 0;
192        let mut found_runes: usize = 0;
193
194        let mut iterator: Iter<u8> = bytes.iter();
195
196        while found_runes < n_runes {
197            let byte: u8 = match iterator.nth(utf8_byte_unit - 1) {
198                Some(b) => *b,
199                None => break,
200            };
201
202            utf8_byte_unit = match byte {
203                byte if byte >> 7 == 0 => 1,
204                byte if byte >> 5 == 0b110 => 2,
205                byte if byte >> 4 == 0b1110 => 3,
206                byte if byte >> 3 == 0b11110 => 4,
207                _ => panic!("Couldn't parse byte slice, invalid UTF-8 sequence!"),
208            };
209
210            found_runes += 1;
211            n_bytes += utf8_byte_unit;
212        }
213
214        if found_runes != n_runes {
215            warn!("Read the entire byte slice but did not find enough runes...");
216        }
217
218        n_bytes
219    }
220
221    ///
222    pub fn trim<'a>(&self, text: &'a str) -> &'a str {
223        match self.alignment {
224            Alignment::Left => text.trim_end_matches::<char>(self.symbol),
225            Alignment::Right => text.trim_start_matches::<char>(self.symbol),
226            Alignment::Center => text.trim_matches::<char>(self.symbol),
227        }
228    }
229}
230
231impl Trimmer for FloatTrimmer {}