cpclib_common/
parse.rs

1use winnow::ascii::{alphanumeric1, space0};
2use winnow::combinator::{alt, not, opt, terminated};
3use winnow::error::{AddContext, ParserError, StrContext};
4use winnow::stream::{AsBytes, AsChar, Compare, Stream, StreamIsPartial};
5use winnow::token::take_while;
6use winnow::{ModalResult, Parser};
7
8#[inline]
9///  (prefix) space number suffix
10pub fn parse_value<I, Error: ParserError<I>>(input: &mut I) -> ModalResult<u32, Error>
11where
12    I: Stream + StreamIsPartial + for<'a> Compare<&'a str>,
13    <I as Stream>::Slice: AsBytes,
14    <I as Stream>::Token: AsChar,
15    <I as Stream>::Token: Clone,
16    I: for<'a> Compare<&'a [u8; 2]>,
17    I: for<'a> Compare<&'a [u8; 1]>,
18    I: winnow::stream::Compare<u8>,
19    Error: AddContext<I, winnow::error::StrContext>
20{
21    #[derive(Clone, PartialEq, Debug)]
22    #[repr(u32)]
23    enum EncodingKind {
24        Hex = 16,
25        Bin = 2,
26        Dec = 10,
27
28        AmbiguousBinHex = 200,
29        Unk = 255
30    }
31
32    let before_encoding: <I as Stream>::Checkpoint = input.checkpoint();
33
34    // numbers have an optional prefix with an eventual space
35    let encoding = opt(terminated(
36        alt((
37            alt((b"0x", b"0X", b"#", b"$", b"&")).value(EncodingKind::Hex), // hexadecimal number
38            alt((b"0b", b"0B")).value(EncodingKind::AmbiguousBinHex),
39            b"%".value(EncodingKind::Bin) // binary number
40        )),
41        space0
42    )
43    .context(StrContext::Label("Number prefix detection")))
44    .parse_next(input)?
45    .unwrap_or(EncodingKind::Unk);
46
47    let hex_digits_and_sep = || {
48        take_while(1.., (('0'..='9'), ('a'..='f'), ('A'..='F'), '_'))
49            .context(StrContext::Label("Read hexadecimal digits"))
50    };
51    let mut dec_digits_and_sep =
52        take_while(1.., (('0'..='9'), '_')).context(StrContext::Label("Read decimal digits"));
53    let mut bin_digits_and_sep =
54        take_while(1.., (('0'..='1'), '_')).context(StrContext::Label("Read binary digits"));
55
56    let (encoding, digits) = match encoding {
57        EncodingKind::Hex => (EncodingKind::Hex, hex_digits_and_sep().parse_next(input)?),
58        EncodingKind::Bin => (EncodingKind::Bin, bin_digits_and_sep.parse_next(input)?),
59        EncodingKind::Dec => unreachable!("No prefix exist for decimal kind"),
60        EncodingKind::AmbiguousBinHex => {
61            // we parse for hexdecimal then guess the encoding
62            let digits = opt(hex_digits_and_sep()).parse_next(input)?;
63            let suffix = opt(alt((b'h', b'H')))
64                .verify(|s| if digits.is_none() { s.is_some() } else { true })
65                .parse_next(input)?;
66
67            if suffix.is_some() {
68                // this is an hexadecimal number and part of the encoding place was
69                // TODO find a more efficient way to not redo that
70                input.reset(&before_encoding);
71                b'0'.parse_next(input)?; // eat 0
72                let digits = hex_digits_and_sep().parse_next(input)?;
73                let _suffix = alt((b'h', b'H')).parse_next(input)?;
74
75                (EncodingKind::Hex, digits)
76            }
77            else {
78                // this is a decimal number
79                (EncodingKind::Bin, digits.unwrap())
80            }
81        },
82        EncodingKind::Unk => {
83            // we parse for hexdecimal then guess the encoding
84            let backup = input.checkpoint();
85            let digits = hex_digits_and_sep().parse_next(input)?;
86            let suffix = opt(alt((b'h', b'H'))).parse_next(input)?;
87
88            if suffix.is_some() {
89                // we know if is hex
90                (EncodingKind::Hex, digits)
91            }
92            else {
93                // we need to choose between bin and dec so we reparse a second time :()
94                input.reset(&backup);
95                let digits: &[u8] = digits.as_bytes();
96                let last_digit = digits[digits.len() - 1];
97                if last_digit == b'b' || last_digit == b'B' {
98                    // we need to check this is really a binary
99                    let digits = bin_digits_and_sep.parse_next(input)?;
100                    alt((b'b', b'B')).parse_next(input)?;
101                    (EncodingKind::Bin, digits)
102                }
103                else {
104                    (EncodingKind::Dec, dec_digits_and_sep.parse_next(input)?)
105                }
106            }
107        }
108    };
109
110    // ensure there are no more numbers
111    if encoding == EncodingKind::Hex {
112        not(alphanumeric1)
113            .context(StrContext::Label("This is not an hexadecimal number"))
114            .parse_next(input)?;
115    }
116
117    // right here encoding anddigits are compatible
118    debug_assert!(encoding != EncodingKind::Unk);
119    debug_assert!(encoding != EncodingKind::AmbiguousBinHex);
120    let digits: &[u8] = digits.as_bytes();
121
122    let base = encoding as u32;
123    let mut number = 0;
124    for digit in digits.iter().filter(|&&digit| digit != b'_') {
125        let digit = *digit;
126        let digit = if digit.is_ascii_digit() {
127            digit - b'0'
128        }
129        else if (b'a'..=b'f').contains(&digit) {
130            digit - b'a' + 10
131        }
132        else {
133            digit - b'A' + 10
134        } as u32;
135
136        number = base * number + digit;
137    }
138
139    Ok(number)
140}