byte_num/from_ascii.rs
1use std::ops::Mul;
2
3use crate::{constants::*, error::ParseIntErr};
4
5/// This trait converts bytes to integers,
6/// and is implemented on all integer types, except u128 and i128.
7///
8/// The most important method on this trait is [`FromAscii::atoi`], which can be called in a function-like style.
9/// As argument, it takes anything that implements `AsRef<[u8]>`.
10/// The return type is a [`Result`], indicating whether the convertion succeeded or failed
11pub trait FromAscii: Sized {
12 /// The function performing the conversion from a byteslice to a number.
13 /// It takes anything that can be transformed into a byte-slice.
14 /// An empty slice returns the number 0.
15 ///
16 /// # Examples
17 /// ```
18 /// use byte_num::{
19 /// from_ascii::FromAscii,
20 /// error::ParseIntErr,
21 /// };
22 ///
23 /// fn main() {
24 /// assert_eq!(u32::atoi("1928"), Ok(1928));
25 /// assert_eq!(u32::atoi("12e3"), Err(ParseIntErr::with_byte(b'e')));
26 /// }
27 /// ```
28 /// # Safety
29 /// It should be noted that trying to convert a slice that does not fit in the chosen integer type,
30 /// wraps around.
31 /// For example:
32 /// ```
33 /// use byte_num::from_ascii::FromAscii;
34 ///
35 /// fn main () {
36 /// let n = u8::atoi("256");
37 /// assert_eq!(n, Ok(0));
38 /// }
39 /// ```
40 #[inline]
41 fn atoi(s: impl AsRef<[u8]>) -> Result<Self, ParseIntErr> {
42 Self::bytes_to_int(s.as_ref())
43 }
44
45 fn bytes_to_int(s: &[u8]) -> Result<Self, ParseIntErr>;
46}
47
48#[inline(always)]
49fn parse_byte<N>(byte: u8, pow10: N) -> Result<N, ParseIntErr>
50where
51 N: From<u8> + Mul<Output = N>,
52{
53 let d = byte.wrapping_sub(ASCII_TO_INT_FACTOR);
54
55 if d > 9 {
56 return Err(ParseIntErr::with_byte(byte));
57 }
58
59 Ok(N::from(d) * pow10)
60}
61
62macro_rules! unsigned_from_ascii {
63 ($int:ty, $const_table:ident) => {
64 impl FromAscii for $int {
65 // 1) Start at correct position in pow10 table (const_table.len() - bytes.len() ).
66 // 2) For each byte:
67 // - substract 48, wrapping
68 // - validate it's less than 9
69 // - multiply with some power of 10
70 #[inline]
71 fn bytes_to_int(mut bytes: &[u8]) -> Result<Self, ParseIntErr> {
72 if bytes.len() > $const_table.len() {
73 return Err(ParseIntErr::Overflow);
74 }
75
76 let mut result: Self = 0;
77
78 let mut len = bytes.len();
79 let mut idx = $const_table.len().wrapping_sub(len);
80
81 // @NOTE: This is safe, we never overshoot the buffers.
82 // First we checked of the length of `bytes` is NOT longer than the length of the corresponding table of powers of 10,
83 // so there is no bounds check needed to access the table of powers of 10.
84 // Second, we loop while the length of the bytes is larger than or equal to 4, but only accessing the first 4 elements.
85 // No boundschecks is needed for that as well.
86 unsafe {
87 while len >= 4 {
88 match (
89 bytes.get_unchecked(..4),
90 $const_table.get_unchecked(idx..idx + 4),
91 ) {
92 ([a, b, c, d], [p1, p2, p3, p4]) => {
93 let r1 = parse_byte(*a, *p1)?;
94 let r2 = parse_byte(*b, *p2)?;
95 let r3 = parse_byte(*c, *p3)?;
96 let r4 = parse_byte(*d, *p4)?;
97
98 result = result.wrapping_add(r1 + r2 + r3 + r4);
99 }
100 _ => unreachable!(),
101 }
102
103 len -= 4;
104 idx += 4;
105 bytes = bytes.get_unchecked(4..);
106 }
107
108 // Fixuploop
109 for offset in 0..len {
110 let a = bytes.get_unchecked(offset);
111 let p = $const_table.get_unchecked(idx + offset);
112 let r = parse_byte(*a, *p)?;
113 result = result.wrapping_add(r);
114 }
115 }
116
117 Ok(result)
118 }
119 }
120 };
121
122 // @NOTE: Specialize implementation for u8, since that's finished within 3 Iterations at max.
123 (@u8, $const_table:ident) => {
124 impl FromAscii for u8 {
125 #[inline]
126 fn bytes_to_int(bytes: &[u8]) -> Result<Self, ParseIntErr> {
127 if bytes.len() > $const_table.len() {
128 return Err(ParseIntErr::Overflow);
129 }
130
131 let mut result: Self = 0;
132 let len = bytes.len();
133 let idx = $const_table.len().wrapping_sub(len);
134
135 unsafe {
136 for offset in 0..len {
137 let a = bytes.get_unchecked(offset);
138 let p = $const_table.get_unchecked(idx + offset);
139 let r = parse_byte(*a, *p)?;
140 result = result.wrapping_add(r);
141 }
142 }
143
144 Ok(result)
145 }
146 }
147 };
148}
149
150macro_rules! signed_from_ascii {
151 ($int:ty, $unsigned_version:ty) => {
152 impl FromAscii for $int {
153 fn bytes_to_int(bytes: &[u8]) -> Result<Self, ParseIntErr> {
154 if bytes.starts_with(b"-") {
155 // .wrapping_neg() wraps around.
156 Ok((<$unsigned_version>::bytes_to_int(&bytes[1..])? as Self).wrapping_neg())
157 } else {
158 Ok(<$unsigned_version>::bytes_to_int(bytes)? as Self)
159 }
160 }
161 }
162 };
163}
164
165unsigned_from_ascii!(@u8, POW10_U8);
166unsigned_from_ascii!(u16, POW10_U16);
167unsigned_from_ascii!(u32, POW10_U32);
168unsigned_from_ascii!(u64, POW10_U64);
169unsigned_from_ascii!(usize, POW10_USIZE);
170
171signed_from_ascii!(i8, u8);
172signed_from_ascii!(i16, u16);
173signed_from_ascii!(i32, u32);
174signed_from_ascii!(i64, u64);
175signed_from_ascii!(isize, usize);
176
177#[cfg(test)]
178mod tests {
179 use super::{FromAscii, ParseIntErr};
180
181 #[test]
182 fn to_u8() {
183 assert_eq!(u8::atoi("123"), Ok(123));
184 assert_eq!(u8::atoi("256"), Ok(0));
185
186 // Wraps around
187 assert_eq!(u8::atoi("257"), Ok(1));
188
189 // Error: InvalidDigit
190 assert_eq!(u8::atoi("!23"), Err(ParseIntErr::with_byte(b'!')));
191
192 // Error: Overflow
193 assert_eq!(u8::atoi("1000"), Err(ParseIntErr::Overflow));
194 }
195
196 #[test]
197 fn overflow_isize() {
198 // overflows minimum value of the isize by 1, but it wraps arroo
199 assert_eq!(isize::atoi("-9223372036854775809"), Ok(9223372036854775807));
200
201 // overflows maximum value of the isize by 1, but it wraps aroo
202 assert_eq!(isize::atoi("9223372036854775809"), Ok(-9223372036854775807));
203 }
204}