qubit_codec_misc/c_integer_literal_codec.rs
1// =============================================================================
2// Copyright (c) 2026 Haixing Hu.
3//
4// SPDX-License-Identifier: Apache-2.0
5//
6// Licensed under the Apache License, Version 2.0.
7// =============================================================================
8//! C integer literal decoder.
9
10use crate::{
11 MiscCodecError,
12 MiscCodecResult,
13 ValueDecoder,
14};
15
16/// Decodes non-negative C integer literal fragments.
17///
18/// This codec accepts decimal literals such as `123`, octal literals such as
19/// `0123`, and hexadecimal literals such as `0x123` or `0X123`. It trims
20/// surrounding whitespace and returns a `u64`.
21#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
22pub struct CIntegerLiteralCodec;
23
24impl CIntegerLiteralCodec {
25 /// Creates a C integer literal codec.
26 ///
27 /// # Returns
28 /// A stateless C integer literal codec.
29 #[inline]
30 pub fn new() -> Self {
31 Self
32 }
33
34 /// Decodes a C integer literal into a `u64`.
35 ///
36 /// # Parameters
37 /// - `text`: C integer literal text.
38 ///
39 /// # Returns
40 /// Parsed integer value.
41 ///
42 /// # Errors
43 /// Returns [`MiscCodecError::InvalidInput`] when the input is empty, lacks
44 /// digits, or overflows `u64`; returns [`MiscCodecError::InvalidDigit`]
45 /// when a character is not valid for the detected radix.
46 #[inline]
47 pub fn decode(&self, text: &str) -> MiscCodecResult<u64> {
48 let (trimmed, trim_offset) = trim_with_offset(text);
49 if trimmed.is_empty() {
50 return Err(invalid_c_integer_input("expected at least one digit"));
51 }
52 let components = LiteralComponents::parse(trimmed, trim_offset)?;
53 validate_digits(components)?;
54 u64::from_str_radix(components.digits, components.radix).map_err(
55 |error| {
56 invalid_c_integer_input(&format!(
57 "integer literal is out of range: {error}"
58 ))
59 },
60 )
61 }
62}
63
64impl ValueDecoder<str> for CIntegerLiteralCodec {
65 type Error = MiscCodecError;
66 type Output = u64;
67
68 /// Decodes a C integer literal into a `u64`.
69 #[inline]
70 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
71 CIntegerLiteralCodec::decode(self, input)
72 }
73}
74
75/// Parsed C integer literal components.
76#[derive(Debug, Clone, Copy)]
77struct LiteralComponents<'a> {
78 radix: u32,
79 digits: &'a str,
80 digits_offset: usize,
81}
82
83impl<'a> LiteralComponents<'a> {
84 /// Parses radix and digit slice from trimmed input.
85 ///
86 /// # Parameters
87 /// - `trimmed`: Input after surrounding whitespace has been removed.
88 /// - `trim_offset`: Byte offset of `trimmed` in the original input.
89 ///
90 /// # Returns
91 /// Literal components used by validation and numeric parsing.
92 ///
93 /// # Errors
94 /// Returns [`MiscCodecError::InvalidInput`] when a radix prefix is present
95 /// without any digits after it.
96 #[inline]
97 fn parse(trimmed: &'a str, trim_offset: usize) -> MiscCodecResult<Self> {
98 if let Some(digits) = trimmed
99 .strip_prefix("0x")
100 .or_else(|| trimmed.strip_prefix("0X"))
101 {
102 if digits.is_empty() {
103 return Err(invalid_c_integer_input(
104 "hexadecimal literal requires at least one digit",
105 ));
106 }
107 return Ok(Self {
108 radix: 16,
109 digits,
110 digits_offset: trim_offset + 2,
111 });
112 }
113 if trimmed.len() > 1
114 && let Some(digits) = trimmed.strip_prefix('0')
115 {
116 return Ok(Self {
117 radix: 8,
118 digits,
119 digits_offset: trim_offset + 1,
120 });
121 }
122 Ok(Self {
123 radix: 10,
124 digits: trimmed,
125 digits_offset: trim_offset,
126 })
127 }
128}
129
130/// Trims surrounding whitespace while preserving the start byte offset.
131///
132/// # Parameters
133/// - `text`: Input text.
134///
135/// # Returns
136/// Trimmed text and the byte offset where it starts in `text`.
137#[inline]
138fn trim_with_offset(text: &str) -> (&str, usize) {
139 let trimmed_start = text.trim_start();
140 let start = text.len() - trimmed_start.len();
141 (trimmed_start.trim_end(), start)
142}
143
144/// Validates that every character is valid for the detected radix.
145///
146/// # Parameters
147/// - `components`: Parsed literal components.
148///
149/// # Errors
150/// Returns [`MiscCodecError::InvalidDigit`] with the original input byte index
151/// of the invalid character.
152fn validate_digits(components: LiteralComponents<'_>) -> MiscCodecResult<()> {
153 for (index, character) in components.digits.char_indices() {
154 if character.is_digit(components.radix) {
155 continue;
156 }
157 return Err(MiscCodecError::InvalidDigit {
158 radix: components.radix,
159 index: components.digits_offset + index,
160 character,
161 });
162 }
163 Ok(())
164}
165
166/// Builds an invalid C integer literal input error.
167///
168/// # Parameters
169/// - `reason`: Human-readable reason the input was rejected.
170///
171/// # Returns
172/// An invalid input error for the C integer literal codec.
173fn invalid_c_integer_input(reason: &str) -> MiscCodecError {
174 MiscCodecError::InvalidInput {
175 codec: "c-integer-literal",
176 reason: reason.to_owned(),
177 }
178}