Skip to main content

qubit_codec_misc/
c_integer_literal_codec.rs

1// =============================================================================
2//    Copyright (c) 2026 Haixing Hu.
3//
4//    SPDX-License-Identifier: Apache-2.0
5//
6//    Licensed under the Apache License, Version 2.0.
7// =============================================================================
8//! C integer literal decoder.
9
10use crate::{
11    MiscCodecError,
12    MiscCodecResult,
13    ValueDecoder,
14};
15
16/// Decodes non-negative C integer literal fragments.
17///
18/// This codec accepts decimal literals such as `123`, octal literals such as
19/// `0123`, and hexadecimal literals such as `0x123` or `0X123`. It trims
20/// surrounding whitespace and returns a `u64`.
21#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
22pub struct CIntegerLiteralCodec;
23
24impl CIntegerLiteralCodec {
25    /// Creates a C integer literal codec.
26    ///
27    /// # Returns
28    /// A stateless C integer literal codec.
29    #[inline]
30    pub fn new() -> Self {
31        Self
32    }
33
34    /// Decodes a C integer literal into a `u64`.
35    ///
36    /// # Parameters
37    /// - `text`: C integer literal text.
38    ///
39    /// # Returns
40    /// Parsed integer value.
41    ///
42    /// # Errors
43    /// Returns [`MiscCodecError::InvalidInput`] when the input is empty, lacks
44    /// digits, or overflows `u64`; returns [`MiscCodecError::InvalidDigit`]
45    /// when a character is not valid for the detected radix.
46    #[inline]
47    pub fn decode(&self, text: &str) -> MiscCodecResult<u64> {
48        let (trimmed, trim_offset) = trim_with_offset(text);
49        if trimmed.is_empty() {
50            return Err(invalid_c_integer_input("expected at least one digit"));
51        }
52        let components = LiteralComponents::parse(trimmed, trim_offset)?;
53        validate_digits(components)?;
54        u64::from_str_radix(components.digits, components.radix).map_err(
55            |error| {
56                invalid_c_integer_input(&format!(
57                    "integer literal is out of range: {error}"
58                ))
59            },
60        )
61    }
62}
63
64impl ValueDecoder<str> for CIntegerLiteralCodec {
65    type Error = MiscCodecError;
66    type Output = u64;
67
68    /// Decodes a C integer literal into a `u64`.
69    #[inline]
70    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
71        CIntegerLiteralCodec::decode(self, input)
72    }
73}
74
75/// Parsed C integer literal components.
76#[derive(Debug, Clone, Copy)]
77struct LiteralComponents<'a> {
78    radix: u32,
79    digits: &'a str,
80    digits_offset: usize,
81}
82
83impl<'a> LiteralComponents<'a> {
84    /// Parses radix and digit slice from trimmed input.
85    ///
86    /// # Parameters
87    /// - `trimmed`: Input after surrounding whitespace has been removed.
88    /// - `trim_offset`: Byte offset of `trimmed` in the original input.
89    ///
90    /// # Returns
91    /// Literal components used by validation and numeric parsing.
92    ///
93    /// # Errors
94    /// Returns [`MiscCodecError::InvalidInput`] when a radix prefix is present
95    /// without any digits after it.
96    #[inline]
97    fn parse(trimmed: &'a str, trim_offset: usize) -> MiscCodecResult<Self> {
98        if let Some(digits) = trimmed
99            .strip_prefix("0x")
100            .or_else(|| trimmed.strip_prefix("0X"))
101        {
102            if digits.is_empty() {
103                return Err(invalid_c_integer_input(
104                    "hexadecimal literal requires at least one digit",
105                ));
106            }
107            return Ok(Self {
108                radix: 16,
109                digits,
110                digits_offset: trim_offset + 2,
111            });
112        }
113        if trimmed.len() > 1
114            && let Some(digits) = trimmed.strip_prefix('0')
115        {
116            return Ok(Self {
117                radix: 8,
118                digits,
119                digits_offset: trim_offset + 1,
120            });
121        }
122        Ok(Self {
123            radix: 10,
124            digits: trimmed,
125            digits_offset: trim_offset,
126        })
127    }
128}
129
130/// Trims surrounding whitespace while preserving the start byte offset.
131///
132/// # Parameters
133/// - `text`: Input text.
134///
135/// # Returns
136/// Trimmed text and the byte offset where it starts in `text`.
137#[inline]
138fn trim_with_offset(text: &str) -> (&str, usize) {
139    let trimmed_start = text.trim_start();
140    let start = text.len() - trimmed_start.len();
141    (trimmed_start.trim_end(), start)
142}
143
144/// Validates that every character is valid for the detected radix.
145///
146/// # Parameters
147/// - `components`: Parsed literal components.
148///
149/// # Errors
150/// Returns [`MiscCodecError::InvalidDigit`] with the original input byte index
151/// of the invalid character.
152fn validate_digits(components: LiteralComponents<'_>) -> MiscCodecResult<()> {
153    for (index, character) in components.digits.char_indices() {
154        if character.is_digit(components.radix) {
155            continue;
156        }
157        return Err(MiscCodecError::InvalidDigit {
158            radix: components.radix,
159            index: components.digits_offset + index,
160            character,
161        });
162    }
163    Ok(())
164}
165
166/// Builds an invalid C integer literal input error.
167///
168/// # Parameters
169/// - `reason`: Human-readable reason the input was rejected.
170///
171/// # Returns
172/// An invalid input error for the C integer literal codec.
173fn invalid_c_integer_input(reason: &str) -> MiscCodecError {
174    MiscCodecError::InvalidInput {
175        codec: "c-integer-literal",
176        reason: reason.to_owned(),
177    }
178}