evolution_parser/datatype.rs
1//
2// MIT License
3//
4// Copyright (c) 2023-2024 Firelink Data
5//
6// Permission is hereby granted, free of charge, to any person obtaining a copy
7// of this software and associated documentation files (the "Software"), to deal
8// in the Software without restriction, including without limitation the rights
9// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10// copies of the Software, and to permit persons to whom the Software is
11// furnished to do so, subject to the following conditions:
12//
13// The above copyright notice and this permission notice shall be included in all
14// copies or substantial portions of the Software.
15//
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22// SOFTWARE.
23//
24// File created: 2024-05-08
25// Last updated: 2024-06-01
26//
27
28use padder::{Alignment, Symbol};
29
30use std::str::{from_utf8_unchecked, FromStr};
31use std::usize;
32
33use crate::parser::Parser;
34use crate::trimmer::{FloatTrimmer, IntTrimmer, TextTrimmer};
35
36///
37pub struct BooleanParser {
38 trimmer: TextTrimmer,
39}
40
41impl BooleanParser {
42 ///
43 pub fn new(alignment: Alignment, trim_symbol: Symbol) -> Self {
44 Self {
45 trimmer: TextTrimmer::new(alignment, trim_symbol),
46 }
47 }
48
49 /// Try and parse the byte slice as UTF-8 characters and count the number of
50 /// bytes that the boolean was represented as in the byte slice.
51 ///
52 /// # Safety
53 /// This function utilizes the [`from_utf8_unchecked`] function to convert the byte
54 /// slice to a string representation. This method is inherently unsafe and might
55 /// cause the program to panic. We have to assume that the input bytes are valid
56 /// UTF-8, because recovering from the situation where the bytes were not valid UTF-8
57 /// is not possible since then we don't know how far into the buffer we need to read.
58 ///
59 /// # Performance
60 /// The function [`from_utf8_unchecked`] will put the string slice on the stack and not
61 /// perform any heap allocations. As such, we need to know the lifetimes of it.
62 pub fn try_parse(&self, bytes: &[u8], n_runes: usize) -> (usize, Option<bool>) {
63 let end_byte_idx: usize = self.trimmer.find_byte_indices(bytes, n_runes);
64 let text: &str = unsafe { from_utf8_unchecked(&bytes[..end_byte_idx]) };
65
66 (end_byte_idx, self.trimmer.trim(text).parse::<bool>().ok())
67 }
68}
69
70impl Parser for BooleanParser {}
71
72///
73pub struct FloatParser {
74 trimmer: FloatTrimmer,
75}
76
77impl FloatParser {
78 ///
79 pub fn new(alignment: Alignment, trim_symbol: Symbol) -> Self {
80 Self {
81 trimmer: FloatTrimmer::new(alignment, trim_symbol),
82 }
83 }
84
85 /// Try and parse the byte slice as UTF-8 characters and count the number of
86 /// bytes that the boolean was represented as in the byte slice.
87 ///
88 /// # Safety
89 /// This function utilizes the [`from_utf8_unchecked`] function to convert the byte
90 /// slice to a string representation. This method is inherently unsafe and might
91 /// cause the program to panic. We have to assume that the input bytes are valid
92 /// UTF-8, because recovering from the situation where the bytes were not valid UTF-8
93 /// is not possible since then we don't know how far into the buffer we need to read.
94 ///
95 /// # Performance
96 /// The function [`from_utf8_unchecked`] will put the string slice on the stack and not
97 /// perform any heap allocations. As such, we need to know the lifetimes of it.
98 pub fn try_parse<T>(&self, bytes: &[u8], n_runes: usize) -> (usize, Option<T>)
99 where
100 T: FromStr,
101 {
102 let end_byte_idx: usize = self.trimmer.find_byte_indices(bytes, n_runes);
103
104 // TODO THIS SHOULD NOT BE CAST TO STRING SLICE, WE CAN GO DIRECTLY TO
105 // FLOAT WITH SIMD?
106 let text: &str = unsafe { from_utf8_unchecked(&bytes[..end_byte_idx]) };
107
108 (end_byte_idx, self.trimmer.trim(text).parse::<T>().ok())
109 }
110}
111
112impl Parser for FloatParser {}
113
114///
115pub struct IntParser {
116 trimmer: IntTrimmer,
117}
118
119impl IntParser {
120 ///
121 pub fn new() -> Self {
122 Self {
123 trimmer: IntTrimmer::new(),
124 }
125 }
126
127 /// Try and parse the byte slice as UTF-8 characters and count the number of
128 /// bytes that the boolean was represented as in the byte slice.
129 pub fn try_parse<T>(&self, bytes: &[u8], n_runes: usize) -> (usize, Option<T>)
130 where
131 T: atoi_simd::Parse + atoi_simd::ParseNeg,
132 {
133 let (start_byte_idx, end_byte_idx, n_bytes_in_column): (usize, usize, usize) =
134 self.trimmer.find_byte_indices(bytes, n_runes);
135
136 let value: Option<T> = atoi_simd::parse::<T>(&bytes[start_byte_idx..end_byte_idx]).ok();
137
138 (n_bytes_in_column, value)
139 }
140}
141
142impl Parser for IntParser {}
143
144///
145pub struct Utf8Parser {
146 trimmer: TextTrimmer,
147}
148
149impl Utf8Parser {
150 ///
151 pub fn new(alignment: Alignment, trim_symbol: Symbol) -> Self {
152 Self {
153 trimmer: TextTrimmer::new(alignment, trim_symbol),
154 }
155 }
156
157 /// Try and parse the byte slice as UTF-8 characters and count the number of
158 /// bytes that the boolean was represented as in the byte slice.
159 ///
160 /// # Safety
161 /// This function utilizes the [`from_utf8_unchecked`] function to convert the byte
162 /// slice to a string representation. This method is inherently unsafe and might
163 /// cause the program to panic. We have to assume that the input bytes are valid
164 /// UTF-8, because recovering from the situation where the bytes were not valid UTF-8
165 /// is not possible since then we don't know how far into the buffer we need to read.
166 ///
167 /// # Performance
168 /// The function [`from_utf8_unchecked`] will put the string slice on the stack and not
169 /// perform any heap allocations. As such, we need to know the lifetimes of it.
170 pub fn try_parse<'a>(&self, bytes: &'a [u8], n_runes: usize) -> (usize, Option<&'a str>) {
171 let end_byte_idx: usize = self.trimmer.find_byte_indices(bytes, n_runes);
172 let text: &'a str = unsafe { from_utf8_unchecked(&bytes[..end_byte_idx]) };
173
174 (end_byte_idx, Some(self.trimmer.trim(text)))
175 }
176}
177
178impl Parser for Utf8Parser {}