evolution_parser/
datatype.rs

1//
2// MIT License
3//
4// Copyright (c) 2023-2024 Firelink Data
5//
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to deal
8// in the Software without restriction, including without limitation the rights
9// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10// copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12//
13// The above copyright notice and this permission notice shall be included in all
14// copies or substantial portions of the Software.
15//
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22// SOFTWARE.
23//
24// File created: 2024-05-08
25// Last updated: 2024-06-01
26//
27
28use padder::{Alignment, Symbol};
29
30use std::str::{from_utf8_unchecked, FromStr};
31use std::usize;
32
33use crate::parser::Parser;
34use crate::trimmer::{FloatTrimmer, IntTrimmer, TextTrimmer};
35
36///
37pub struct BooleanParser {
38    trimmer: TextTrimmer,
39}
40
41impl BooleanParser {
42    ///
43    pub fn new(alignment: Alignment, trim_symbol: Symbol) -> Self {
44        Self {
45            trimmer: TextTrimmer::new(alignment, trim_symbol),
46        }
47    }
48
49    /// Try and parse the byte slice as UTF-8 characters and count the number of
50    /// bytes that the boolean was represented as in the byte slice.
51    ///
52    /// # Safety
53    /// This function utilizes the [`from_utf8_unchecked`] function to convert the byte
54    /// slice to a string representation. This method is inherently unsafe and might
55    /// cause the program to panic. We have to assume that the input bytes are valid
56    /// UTF-8, because recovering from the situation where the bytes were not valid UTF-8
57    /// is not possible since then we don't know how far into the buffer we need to read.
58    ///
59    /// # Performance
60    /// The function [`from_utf8_unchecked`] will put the string slice on the stack and not
61    /// perform any heap allocations. As such, we need to know the lifetimes of it.
62    pub fn try_parse(&self, bytes: &[u8], n_runes: usize) -> (usize, Option<bool>) {
63        let end_byte_idx: usize = self.trimmer.find_byte_indices(bytes, n_runes);
64        let text: &str = unsafe { from_utf8_unchecked(&bytes[..end_byte_idx]) };
65
66        (end_byte_idx, self.trimmer.trim(text).parse::<bool>().ok())
67    }
68}
69
70impl Parser for BooleanParser {}
71
72///
73pub struct FloatParser {
74    trimmer: FloatTrimmer,
75}
76
77impl FloatParser {
78    ///
79    pub fn new(alignment: Alignment, trim_symbol: Symbol) -> Self {
80        Self {
81            trimmer: FloatTrimmer::new(alignment, trim_symbol),
82        }
83    }
84
85    /// Try and parse the byte slice as UTF-8 characters and count the number of
86    /// bytes that the boolean was represented as in the byte slice.
87    ///
88    /// # Safety
89    /// This function utilizes the [`from_utf8_unchecked`] function to convert the byte
90    /// slice to a string representation. This method is inherently unsafe and might
91    /// cause the program to panic. We have to assume that the input bytes are valid
92    /// UTF-8, because recovering from the situation where the bytes were not valid UTF-8
93    /// is not possible since then we don't know how far into the buffer we need to read.
94    ///
95    /// # Performance
96    /// The function [`from_utf8_unchecked`] will put the string slice on the stack and not
97    /// perform any heap allocations. As such, we need to know the lifetimes of it.
98    pub fn try_parse<T>(&self, bytes: &[u8], n_runes: usize) -> (usize, Option<T>)
99    where
100        T: FromStr,
101    {
102        let end_byte_idx: usize = self.trimmer.find_byte_indices(bytes, n_runes);
103
104        // TODO THIS SHOULD NOT BE CAST TO STRING SLICE, WE CAN GO DIRECTLY TO
105        // FLOAT WITH SIMD?
106        let text: &str = unsafe { from_utf8_unchecked(&bytes[..end_byte_idx]) };
107
108        (end_byte_idx, self.trimmer.trim(text).parse::<T>().ok())
109    }
110}
111
112impl Parser for FloatParser {}
113
114///
115pub struct IntParser {
116    trimmer: IntTrimmer,
117}
118
119impl IntParser {
120    ///
121    pub fn new() -> Self {
122        Self {
123            trimmer: IntTrimmer::new(),
124        }
125    }
126
127    /// Try and parse the byte slice as UTF-8 characters and count the number of
128    /// bytes that the boolean was represented as in the byte slice.
129    pub fn try_parse<T>(&self, bytes: &[u8], n_runes: usize) -> (usize, Option<T>)
130    where
131        T: atoi_simd::Parse + atoi_simd::ParseNeg,
132    {
133        let (start_byte_idx, end_byte_idx, n_bytes_in_column): (usize, usize, usize) =
134            self.trimmer.find_byte_indices(bytes, n_runes);
135
136        let value: Option<T> = atoi_simd::parse::<T>(&bytes[start_byte_idx..end_byte_idx]).ok();
137
138        (n_bytes_in_column, value)
139    }
140}
141
142impl Parser for IntParser {}
143
144///
145pub struct Utf8Parser {
146    trimmer: TextTrimmer,
147}
148
149impl Utf8Parser {
150    ///
151    pub fn new(alignment: Alignment, trim_symbol: Symbol) -> Self {
152        Self {
153            trimmer: TextTrimmer::new(alignment, trim_symbol),
154        }
155    }
156
157    /// Try and parse the byte slice as UTF-8 characters and count the number of
158    /// bytes that the boolean was represented as in the byte slice.
159    ///
160    /// # Safety
161    /// This function utilizes the [`from_utf8_unchecked`] function to convert the byte
162    /// slice to a string representation. This method is inherently unsafe and might
163    /// cause the program to panic. We have to assume that the input bytes are valid
164    /// UTF-8, because recovering from the situation where the bytes were not valid UTF-8
165    /// is not possible since then we don't know how far into the buffer we need to read.
166    ///
167    /// # Performance
168    /// The function [`from_utf8_unchecked`] will put the string slice on the stack and not
169    /// perform any heap allocations. As such, we need to know the lifetimes of it.
170    pub fn try_parse<'a>(&self, bytes: &'a [u8], n_runes: usize) -> (usize, Option<&'a str>) {
171        let end_byte_idx: usize = self.trimmer.find_byte_indices(bytes, n_runes);
172        let text: &'a str = unsafe { from_utf8_unchecked(&bytes[..end_byte_idx]) };
173
174        (end_byte_idx, Some(self.trimmer.trim(text)))
175    }
176}
177
178impl Parser for Utf8Parser {}