Skip to main content

qubit_codec/
c_integer_literal_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! C integer literal decoder.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16};
17
18/// Decodes non-negative C integer literal fragments.
19///
20/// This codec accepts decimal literals such as `123`, octal literals such as
21/// `0123`, and hexadecimal literals such as `0x123` or `0X123`. It trims
22/// surrounding whitespace and returns a `u64`.
23#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
24pub struct CIntegerLiteralCodec;
25
26impl CIntegerLiteralCodec {
27    /// Creates a C integer literal codec.
28    ///
29    /// # Returns
30    /// A stateless C integer literal codec.
31    pub fn new() -> Self {
32        Self
33    }
34
35    /// Decodes a C integer literal into a `u64`.
36    ///
37    /// # Parameters
38    /// - `text`: C integer literal text.
39    ///
40    /// # Returns
41    /// Parsed integer value.
42    ///
43    /// # Errors
44    /// Returns [`CodecError::InvalidInput`] when the input is empty, lacks digits,
45    /// or overflows `u64`; returns [`CodecError::InvalidDigit`] when a character
46    /// is not valid for the detected radix.
47    pub fn decode(&self, text: &str) -> CodecResult<u64> {
48        let (trimmed, trim_offset) = trim_with_offset(text);
49        if trimmed.is_empty() {
50            return Err(invalid_c_integer_input("expected at least one digit"));
51        }
52        let components = LiteralComponents::parse(trimmed, trim_offset)?;
53        validate_digits(components)?;
54        u64::from_str_radix(components.digits, components.radix).map_err(|error| {
55            invalid_c_integer_input(&format!("integer literal is out of range: {error}"))
56        })
57    }
58}
59
60impl Decoder<str> for CIntegerLiteralCodec {
61    type Error = CodecError;
62    type Output = u64;
63
64    /// Decodes a C integer literal into a `u64`.
65    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
66        CIntegerLiteralCodec::decode(self, input)
67    }
68}
69
70/// Parsed C integer literal components.
71#[derive(Debug, Clone, Copy)]
72struct LiteralComponents<'a> {
73    radix: u32,
74    digits: &'a str,
75    digits_offset: usize,
76}
77
78impl<'a> LiteralComponents<'a> {
79    /// Parses radix and digit slice from trimmed input.
80    ///
81    /// # Parameters
82    /// - `trimmed`: Input after surrounding whitespace has been removed.
83    /// - `trim_offset`: Byte offset of `trimmed` in the original input.
84    ///
85    /// # Returns
86    /// Literal components used by validation and numeric parsing.
87    ///
88    /// # Errors
89    /// Returns [`CodecError::InvalidInput`] when a radix prefix is present without
90    /// any digits after it.
91    fn parse(trimmed: &'a str, trim_offset: usize) -> CodecResult<Self> {
92        if let Some(digits) = trimmed
93            .strip_prefix("0x")
94            .or_else(|| trimmed.strip_prefix("0X"))
95        {
96            if digits.is_empty() {
97                return Err(invalid_c_integer_input(
98                    "hexadecimal literal requires at least one digit",
99                ));
100            }
101            return Ok(Self {
102                radix: 16,
103                digits,
104                digits_offset: trim_offset + 2,
105            });
106        }
107        if trimmed.len() > 1
108            && let Some(digits) = trimmed.strip_prefix('0')
109        {
110            return Ok(Self {
111                radix: 8,
112                digits,
113                digits_offset: trim_offset + 1,
114            });
115        }
116        Ok(Self {
117            radix: 10,
118            digits: trimmed,
119            digits_offset: trim_offset,
120        })
121    }
122}
123
124/// Trims surrounding whitespace while preserving the start byte offset.
125///
126/// # Parameters
127/// - `text`: Input text.
128///
129/// # Returns
130/// Trimmed text and the byte offset where it starts in `text`.
131fn trim_with_offset(text: &str) -> (&str, usize) {
132    let trimmed_start = text.trim_start();
133    let start = text.len() - trimmed_start.len();
134    (trimmed_start.trim_end(), start)
135}
136
137/// Validates that every character is valid for the detected radix.
138///
139/// # Parameters
140/// - `components`: Parsed literal components.
141///
142/// # Errors
143/// Returns [`CodecError::InvalidDigit`] with the original input byte index of
144/// the invalid character.
145fn validate_digits(components: LiteralComponents<'_>) -> CodecResult<()> {
146    for (index, character) in components.digits.char_indices() {
147        if character.is_digit(components.radix) {
148            continue;
149        }
150        return Err(CodecError::InvalidDigit {
151            radix: components.radix,
152            index: components.digits_offset + index,
153            character,
154        });
155    }
156    Ok(())
157}
158
159/// Builds an invalid C integer literal input error.
160///
161/// # Parameters
162/// - `reason`: Human-readable reason the input was rejected.
163///
164/// # Returns
165/// An invalid input error for the C integer literal codec.
166fn invalid_c_integer_input(reason: &str) -> CodecError {
167    CodecError::InvalidInput {
168        codec: "c-integer-literal",
169        reason: reason.to_owned(),
170    }
171}