byte_num/
from_ascii.rs

1use std::ops::Mul;
2
3use crate::{constants::*, error::ParseIntErr};
4
5/// This trait converts bytes to integers,
6/// and is implemented on all integer types, except u128 and i128.
7///
8/// The most important method on this trait is [`FromAscii::atoi`], which can be called in a function-like style.
9/// As argument, it takes anything that implements `AsRef<[u8]>`.
10/// The return type is a [`Result`], indicating whether the convertion succeeded or failed
11pub trait FromAscii: Sized {
12    /// The function performing the conversion from a byteslice to a number.
13    /// It takes anything that can be transformed into a byte-slice.
14    /// An empty slice returns the number 0.
15    ///
16    /// # Examples
17    /// ```
18    /// use byte_num::{
19    ///     from_ascii::FromAscii,
20    ///     error::ParseIntErr,
21    /// };
22    ///
23    /// fn main() {
24    ///     assert_eq!(u32::atoi("1928"), Ok(1928));
25    ///     assert_eq!(u32::atoi("12e3"), Err(ParseIntErr::with_byte(b'e')));
26    /// }
27    /// ```
28    /// # Safety
29    /// It should be noted that trying to convert a slice that does not fit in the chosen integer type,
30    /// wraps around.
31    /// For example:
32    /// ```
33    /// use byte_num::from_ascii::FromAscii;
34    ///
35    /// fn main () {
36    ///     let n = u8::atoi("256");
37    ///     assert_eq!(n, Ok(0));
38    /// }
39    /// ```
40    #[inline]
41    fn atoi(s: impl AsRef<[u8]>) -> Result<Self, ParseIntErr> {
42        Self::bytes_to_int(s.as_ref())
43    }
44
45    fn bytes_to_int(s: &[u8]) -> Result<Self, ParseIntErr>;
46}
47
48#[inline(always)]
49fn parse_byte<N>(byte: u8, pow10: N) -> Result<N, ParseIntErr>
50where
51    N: From<u8> + Mul<Output = N>,
52{
53    let d = byte.wrapping_sub(ASCII_TO_INT_FACTOR);
54
55    if d > 9 {
56        return Err(ParseIntErr::with_byte(byte));
57    }
58
59    Ok(N::from(d) * pow10)
60}
61
62macro_rules! unsigned_from_ascii {
63    ($int:ty, $const_table:ident) => {
64        impl FromAscii for $int {
65            // 1) Start at correct position in pow10 table (const_table.len() - bytes.len() ).
66            // 2) For each byte:
67            //     - substract 48, wrapping
68            //     - validate it's less than 9
69            //     - multiply with some power of 10
70            #[inline]
71            fn bytes_to_int(mut bytes: &[u8]) -> Result<Self, ParseIntErr> {
72                if bytes.len() > $const_table.len() {
73                    return Err(ParseIntErr::Overflow);
74                }
75        
76                let mut result: Self = 0;
77        
78                let mut len = bytes.len();
79                let mut idx = $const_table.len().wrapping_sub(len);
80        
81                // @NOTE: This is safe, we never overshoot the buffers.
82                // First we checked of the length of `bytes` is NOT longer than the length of the corresponding table of powers of 10,
83                // so there is no bounds check needed to access the table of powers of 10.
84                // Second, we loop while the length of the bytes is larger than or equal to 4, but only accessing the first 4 elements.
85                // No boundschecks is needed for that as well.
86                unsafe {
87                    while len >= 4 {
88                        match (
89                            bytes.get_unchecked(..4),
90                            $const_table.get_unchecked(idx..idx + 4),
91                        ) {
92                            ([a, b, c, d], [p1, p2, p3, p4]) => {
93                                let r1 = parse_byte(*a, *p1)?;
94                                let r2 = parse_byte(*b, *p2)?;
95                                let r3 = parse_byte(*c, *p3)?;
96                                let r4 = parse_byte(*d, *p4)?;
97        
98                                result = result.wrapping_add(r1 + r2 + r3 + r4);
99                            }
100                            _ => unreachable!(),
101                        }
102        
103                        len -= 4;
104                        idx += 4;
105                        bytes = bytes.get_unchecked(4..);
106                    }
107        
108                    // Fixuploop
109                    for offset in 0..len {
110                        let a = bytes.get_unchecked(offset);
111                        let p = $const_table.get_unchecked(idx + offset);
112                        let r = parse_byte(*a, *p)?;
113                        result = result.wrapping_add(r);
114                    }
115                }
116        
117                Ok(result)
118            }
119        }
120    };
121
122    // @NOTE: Specialize implementation for u8, since that's finished within 3 Iterations at max.
123    (@u8, $const_table:ident) => {
124        impl FromAscii for u8 {
125            #[inline]
126            fn bytes_to_int(bytes: &[u8]) -> Result<Self, ParseIntErr> {
127                if bytes.len() > $const_table.len() {
128                    return Err(ParseIntErr::Overflow);
129                }
130        
131                let mut result: Self = 0;
132                let len = bytes.len();
133                let idx = $const_table.len().wrapping_sub(len);
134        
135                unsafe {
136                    for offset in 0..len {
137                        let a = bytes.get_unchecked(offset);
138                        let p = $const_table.get_unchecked(idx + offset);
139                        let r = parse_byte(*a, *p)?;
140                        result = result.wrapping_add(r);
141                    }
142                }
143        
144                Ok(result)
145            }
146        }
147    };
148}
149
150macro_rules! signed_from_ascii {
151    ($int:ty, $unsigned_version:ty) => {
152        impl FromAscii for $int {
153            fn bytes_to_int(bytes: &[u8]) -> Result<Self, ParseIntErr> {
154                if bytes.starts_with(b"-") {
155                    // .wrapping_neg() wraps around.
156                    Ok((<$unsigned_version>::bytes_to_int(&bytes[1..])? as Self).wrapping_neg())
157                } else {
158                    Ok(<$unsigned_version>::bytes_to_int(bytes)? as Self)
159                }
160            }
161        }
162    };
163}
164
165unsigned_from_ascii!(@u8, POW10_U8);
166unsigned_from_ascii!(u16, POW10_U16);
167unsigned_from_ascii!(u32, POW10_U32);
168unsigned_from_ascii!(u64, POW10_U64);
169unsigned_from_ascii!(usize, POW10_USIZE);
170
171signed_from_ascii!(i8, u8);
172signed_from_ascii!(i16, u16);
173signed_from_ascii!(i32, u32);
174signed_from_ascii!(i64, u64);
175signed_from_ascii!(isize, usize);
176
177#[cfg(test)]
178mod tests {
179    use super::{FromAscii, ParseIntErr};
180
181    #[test]
182    fn to_u8() {
183        assert_eq!(u8::atoi("123"), Ok(123));
184        assert_eq!(u8::atoi("256"), Ok(0));
185
186        // Wraps around
187        assert_eq!(u8::atoi("257"), Ok(1));
188
189        // Error: InvalidDigit
190        assert_eq!(u8::atoi("!23"), Err(ParseIntErr::with_byte(b'!')));
191
192        // Error: Overflow
193        assert_eq!(u8::atoi("1000"), Err(ParseIntErr::Overflow));
194    }
195
196    #[test]
197    fn overflow_isize() {
198        // overflows minimum value of the isize by 1, but it wraps arroo
199        assert_eq!(isize::atoi("-9223372036854775809"), Ok(9223372036854775807));
200
201        // overflows maximum value of the isize by 1, but it wraps aroo
202        assert_eq!(isize::atoi("9223372036854775809"), Ok(-9223372036854775807));
203    }
204}