kalosm_sample/structured_parser/
integer.rs

1use crate::bail;
2
3use crate::{
4    CreateParserState, EmptyNumber, InvalidSignLocation, LeadingZeroError, OutOfRangeError,
5    ParseStatus, Parser,
6};
7use std::ops::RangeInclusive;
8
9/// A parser for an integer.
10#[derive(Debug, PartialEq, Eq, Clone)]
11pub struct IntegerParser {
12    range: RangeInclusive<i128>,
13}
14
15impl IntegerParser {
16    /// Create a new integer parser.
17    pub fn new(range: RangeInclusive<i128>) -> Self {
18        if range.start() > range.end() {
19            Self {
20                range: *range.end()..=*range.start(),
21            }
22        } else {
23            Self { range }
24        }
25    }
26}
27
28impl CreateParserState for IntegerParser {
29    fn create_parser_state(&self) -> <Self as Parser>::PartialState {
30        IntegerParserState::default()
31    }
32}
33
34impl IntegerParser {
35    fn can_be_negative(&self) -> bool {
36        *self.range.start() < 0
37    }
38
39    fn is_number_valid(&self, value: i128) -> bool {
40        self.range.contains(&value)
41    }
42
43    fn should_stop(&self, value: i128) -> bool {
44        match value.checked_mul(10) {
45            Some(after_next_digit) => {
46                (after_next_digit > 0 && after_next_digit > *self.range.end())
47                    || (after_next_digit <= 0 && after_next_digit < *self.range.start())
48            }
49            None => true,
50        }
51    }
52
53    fn could_number_become_valid(&self, value: i128) -> bool {
54        if self.is_number_valid(value) {
55            true
56        } else {
57            let start_value = *self.range.start();
58            let end_value = *self.range.end();
59            let positive = value >= 0;
60            // Check if adding a digit would make the number invalid
61            if positive {
62                if value * 10 > end_value {
63                    return false;
64                }
65            } else if value * 10 < start_value {
66                return false;
67            }
68
69            // Check if the digits are within the range so far
70            let digits = value.abs().checked_ilog10().map(|x| x + 1).unwrap_or(1);
71            let start_digits = start_value
72                .abs()
73                .checked_ilog10()
74                .map(|x| x + 1)
75                .unwrap_or(1);
76            let end_digits = end_value.abs().checked_ilog10().map(|x| x + 1).unwrap_or(1);
77            let mut check_end = true;
78            let mut check_start = true;
79            for digit in 1..(digits + 1) {
80                let selected_digit = value / (10_i128.pow(digits - digit)) % 10;
81                let selected_start_digit = start_value / (10_i128.pow(start_digits - digit)) % 10;
82                let selected_end_digit = end_value / (10_i128.pow(end_digits - digit)) % 10;
83
84                if check_start {
85                    match selected_digit.cmp(&selected_start_digit) {
86                        std::cmp::Ordering::Greater => {
87                            check_start = false;
88                        }
89                        std::cmp::Ordering::Less => {
90                            return false;
91                        }
92                        std::cmp::Ordering::Equal => {}
93                    }
94                }
95                if check_end {
96                    match selected_digit.cmp(&selected_end_digit) {
97                        std::cmp::Ordering::Greater => {
98                            return false;
99                        }
100                        std::cmp::Ordering::Less => {
101                            check_end = false;
102                        }
103                        std::cmp::Ordering::Equal => {}
104                    }
105                }
106            }
107            true
108        }
109    }
110}
111
112#[derive(Debug, PartialEq, Eq, Copy, Clone, Default)]
113enum IntegerParserProgress {
114    #[default]
115    Initial,
116    AfterSign,
117    AfterDigit,
118}
119
120impl IntegerParserProgress {
121    fn is_after_digit(&self) -> bool {
122        matches!(self, IntegerParserProgress::AfterDigit)
123    }
124}
125
126/// The state of an integer parser.
127#[derive(Debug, PartialEq, Eq, Copy, Clone)]
128pub struct IntegerParserState {
129    state: IntegerParserProgress,
130    value: u64,
131    positive: bool,
132}
133
134impl Default for IntegerParserState {
135    fn default() -> Self {
136        IntegerParserState {
137            state: IntegerParserProgress::Initial,
138            value: 0,
139            positive: true,
140        }
141    }
142}
143
144impl Parser for IntegerParser {
145    type Output = i128;
146    type PartialState = IntegerParserState;
147
148    fn parse<'a>(
149        &self,
150        state: &IntegerParserState,
151        input: &'a [u8],
152    ) -> crate::ParseResult<ParseStatus<'a, Self::PartialState, Self::Output>> {
153        let mut value = state.value;
154        let mut positive = state.positive;
155        let mut state = state.state;
156
157        for index in 0..input.len() {
158            let input_byte = input[index];
159            let digit = match input_byte {
160                b'0'..=b'9' => {
161                    if state == IntegerParserProgress::AfterDigit
162                        && value == 0
163                        && input_byte == b'0'
164                    {
165                        bail!(LeadingZeroError);
166                    }
167                    input_byte - b'0'
168                }
169                b'-' => {
170                    if state == IntegerParserProgress::Initial {
171                        state = IntegerParserProgress::AfterSign;
172                        positive = false;
173                        if !self.can_be_negative() {
174                            bail!(OutOfRangeError)
175                        }
176                        continue;
177                    } else {
178                        bail!(InvalidSignLocation)
179                    }
180                }
181                _ => {
182                    if state.is_after_digit() {
183                        let result = value as i128 * if positive { 1 } else { -1 };
184                        if self.is_number_valid(result) {
185                            return Ok(ParseStatus::Finished {
186                                result,
187                                remaining: &input[index..],
188                            });
189                        }
190                        bail!(OutOfRangeError)
191                    } else {
192                        bail!(EmptyNumber)
193                    }
194                }
195            };
196
197            state = IntegerParserProgress::AfterDigit;
198            match value.checked_mul(10) {
199                Some(v) => value = v + u64::from(digit),
200                None => {
201                    let signed_value = value as i128 * if positive { 1 } else { -1 };
202                    if self.is_number_valid(signed_value) {
203                        return Ok(ParseStatus::Finished {
204                            result: signed_value,
205                            remaining: &input[index..],
206                        });
207                    }
208                    bail!(OutOfRangeError)
209                }
210            }
211
212            let signed_value = value as i128 * if positive { 1 } else { -1 };
213
214            if self.should_stop(signed_value) {
215                return Ok(ParseStatus::Finished {
216                    result: signed_value,
217                    remaining: &input[index + 1..],
218                });
219            }
220
221            if !self.could_number_become_valid(signed_value) {
222                if self.is_number_valid(signed_value) {
223                    return Ok(ParseStatus::Finished {
224                        result: signed_value,
225                        remaining: &input[index + 1..],
226                    });
227                }
228                bail!(OutOfRangeError)
229            }
230        }
231
232        Ok(ParseStatus::Incomplete {
233            new_state: IntegerParserState {
234                state,
235                value,
236                positive,
237            },
238            required_next: Default::default(),
239        })
240    }
241}
242
243#[test]
244fn integer_parser() {
245    for _ in 0..100 {
246        let random_number = rand::random::<i64>() as i128;
247        let range = random_number.saturating_sub(rand::random::<u8>() as i128)
248            ..=random_number.saturating_add(rand::random::<u8>() as i128);
249        assert!(range.contains(&random_number));
250        println!("range: {:?}", range);
251        println!("random_number: {:?}", random_number);
252
253        let parser = IntegerParser { range };
254        let mut state = IntegerParserState::default();
255
256        let mut as_string = random_number.to_string();
257        let cap_string = rand::random::<char>().to_string();
258        as_string += &cap_string;
259        let mut bytes = as_string.as_bytes().to_vec();
260        loop {
261            let take_count = rand::random::<usize>() % bytes.len();
262            let taken = bytes.drain(..take_count).collect::<Vec<_>>();
263            match parser.parse(&state, &taken) {
264                Ok(result) => match result {
265                    ParseStatus::Incomplete { new_state, .. } => {
266                        state = new_state;
267                    }
268                    ParseStatus::Finished { result, remaining } => {
269                        assert_eq!(result, random_number);
270                        assert!(cap_string.as_bytes().starts_with(remaining));
271                        break;
272                    }
273                },
274                Err(_) => panic!("should parse correctly failed to parse {:?}", as_string),
275            }
276        }
277    }
278}