Skip to main content

qubit_codec/
c_integer_literal_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! C integer literal decoder.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16};
17
18/// Decodes non-negative C integer literal fragments.
19///
20/// This codec accepts decimal literals such as `123`, octal literals such as
21/// `0123`, and hexadecimal literals such as `0x123` or `0X123`. It trims
22/// surrounding whitespace and returns a `u64`.
23#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
24pub struct CIntegerLiteralCodec;
25
26impl CIntegerLiteralCodec {
27    /// Creates a C integer literal codec.
28    ///
29    /// # Returns
30    /// A stateless C integer literal codec.
31    pub fn new() -> Self {
32        Self
33    }
34
35    /// Decodes a C integer literal into a `u64`.
36    ///
37    /// # Parameters
38    /// - `text`: C integer literal text.
39    ///
40    /// # Returns
41    /// Parsed integer value.
42    ///
43    /// # Errors
44    /// Returns [`CodecError::InvalidInput`] when the input is empty, lacks digits,
45    /// or overflows `u64`; returns [`CodecError::InvalidDigit`] when a character
46    /// is not valid for the detected radix.
47    pub fn decode(&self, text: &str) -> CodecResult<u64> {
48        let (trimmed, trim_offset) = trim_with_offset(text);
49        if trimmed.is_empty() {
50            return Err(invalid_c_integer_input("expected at least one digit"));
51        }
52        let components = LiteralComponents::parse(trimmed, trim_offset)?;
53        validate_digits(components)?;
54        u64::from_str_radix(components.digits, components.radix)
55            .map_err(|error| invalid_c_integer_input(&format!("integer literal is out of range: {error}")))
56    }
57}
58
59impl Decoder<str> for CIntegerLiteralCodec {
60    type Error = CodecError;
61    type Output = u64;
62
63    /// Decodes a C integer literal into a `u64`.
64    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
65        CIntegerLiteralCodec::decode(self, input)
66    }
67}
68
69/// Parsed C integer literal components.
70#[derive(Debug, Clone, Copy)]
71struct LiteralComponents<'a> {
72    radix: u32,
73    digits: &'a str,
74    digits_offset: usize,
75}
76
77impl<'a> LiteralComponents<'a> {
78    /// Parses radix and digit slice from trimmed input.
79    ///
80    /// # Parameters
81    /// - `trimmed`: Input after surrounding whitespace has been removed.
82    /// - `trim_offset`: Byte offset of `trimmed` in the original input.
83    ///
84    /// # Returns
85    /// Literal components used by validation and numeric parsing.
86    ///
87    /// # Errors
88    /// Returns [`CodecError::InvalidInput`] when a radix prefix is present without
89    /// any digits after it.
90    fn parse(trimmed: &'a str, trim_offset: usize) -> CodecResult<Self> {
91        if let Some(digits) = trimmed.strip_prefix("0x").or_else(|| trimmed.strip_prefix("0X")) {
92            if digits.is_empty() {
93                return Err(invalid_c_integer_input(
94                    "hexadecimal literal requires at least one digit",
95                ));
96            }
97            return Ok(Self {
98                radix: 16,
99                digits,
100                digits_offset: trim_offset + 2,
101            });
102        }
103        if trimmed.len() > 1
104            && let Some(digits) = trimmed.strip_prefix('0')
105        {
106            return Ok(Self {
107                radix: 8,
108                digits,
109                digits_offset: trim_offset + 1,
110            });
111        }
112        Ok(Self {
113            radix: 10,
114            digits: trimmed,
115            digits_offset: trim_offset,
116        })
117    }
118}
119
120/// Trims surrounding whitespace while preserving the start byte offset.
121///
122/// # Parameters
123/// - `text`: Input text.
124///
125/// # Returns
126/// Trimmed text and the byte offset where it starts in `text`.
127fn trim_with_offset(text: &str) -> (&str, usize) {
128    let trimmed_start = text.trim_start();
129    let start = text.len() - trimmed_start.len();
130    (trimmed_start.trim_end(), start)
131}
132
133/// Validates that every character is valid for the detected radix.
134///
135/// # Parameters
136/// - `components`: Parsed literal components.
137///
138/// # Errors
139/// Returns [`CodecError::InvalidDigit`] with the original input byte index of
140/// the invalid character.
141fn validate_digits(components: LiteralComponents<'_>) -> CodecResult<()> {
142    for (index, character) in components.digits.char_indices() {
143        if character.is_digit(components.radix) {
144            continue;
145        }
146        return Err(CodecError::InvalidDigit {
147            radix: components.radix,
148            index: components.digits_offset + index,
149            character,
150        });
151    }
152    Ok(())
153}
154
155/// Builds an invalid C integer literal input error.
156///
157/// # Parameters
158/// - `reason`: Human-readable reason the input was rejected.
159///
160/// # Returns
161/// An invalid input error for the C integer literal codec.
162fn invalid_c_integer_input(reason: &str) -> CodecError {
163    CodecError::InvalidInput {
164        codec: "c-integer-literal",
165        reason: reason.to_owned(),
166    }
167}