qubit_codec/c_integer_literal_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! C integer literal decoder.
11
12use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16};
17
18/// Decodes non-negative C integer literal fragments.
19///
20/// This codec accepts decimal literals such as `123`, octal literals such as
21/// `0123`, and hexadecimal literals such as `0x123` or `0X123`. It trims
22/// surrounding whitespace and returns a `u64`.
23#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
24pub struct CIntegerLiteralCodec;
25
26impl CIntegerLiteralCodec {
27 /// Creates a C integer literal codec.
28 ///
29 /// # Returns
30 /// A stateless C integer literal codec.
31 pub fn new() -> Self {
32 Self
33 }
34
35 /// Decodes a C integer literal into a `u64`.
36 ///
37 /// # Parameters
38 /// - `text`: C integer literal text.
39 ///
40 /// # Returns
41 /// Parsed integer value.
42 ///
43 /// # Errors
44 /// Returns [`CodecError::InvalidInput`] when the input is empty, lacks digits,
45 /// or overflows `u64`; returns [`CodecError::InvalidDigit`] when a character
46 /// is not valid for the detected radix.
47 pub fn decode(&self, text: &str) -> CodecResult<u64> {
48 let (trimmed, trim_offset) = trim_with_offset(text);
49 if trimmed.is_empty() {
50 return Err(invalid_c_integer_input("expected at least one digit"));
51 }
52 let components = LiteralComponents::parse(trimmed, trim_offset)?;
53 validate_digits(components)?;
54 u64::from_str_radix(components.digits, components.radix).map_err(|error| {
55 invalid_c_integer_input(&format!("integer literal is out of range: {error}"))
56 })
57 }
58}
59
60impl Decoder<str> for CIntegerLiteralCodec {
61 type Error = CodecError;
62 type Output = u64;
63
64 /// Decodes a C integer literal into a `u64`.
65 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
66 CIntegerLiteralCodec::decode(self, input)
67 }
68}
69
70/// Parsed C integer literal components.
71#[derive(Debug, Clone, Copy)]
72struct LiteralComponents<'a> {
73 radix: u32,
74 digits: &'a str,
75 digits_offset: usize,
76}
77
78impl<'a> LiteralComponents<'a> {
79 /// Parses radix and digit slice from trimmed input.
80 ///
81 /// # Parameters
82 /// - `trimmed`: Input after surrounding whitespace has been removed.
83 /// - `trim_offset`: Byte offset of `trimmed` in the original input.
84 ///
85 /// # Returns
86 /// Literal components used by validation and numeric parsing.
87 ///
88 /// # Errors
89 /// Returns [`CodecError::InvalidInput`] when a radix prefix is present without
90 /// any digits after it.
91 fn parse(trimmed: &'a str, trim_offset: usize) -> CodecResult<Self> {
92 if let Some(digits) = trimmed
93 .strip_prefix("0x")
94 .or_else(|| trimmed.strip_prefix("0X"))
95 {
96 if digits.is_empty() {
97 return Err(invalid_c_integer_input(
98 "hexadecimal literal requires at least one digit",
99 ));
100 }
101 return Ok(Self {
102 radix: 16,
103 digits,
104 digits_offset: trim_offset + 2,
105 });
106 }
107 if trimmed.len() > 1
108 && let Some(digits) = trimmed.strip_prefix('0')
109 {
110 return Ok(Self {
111 radix: 8,
112 digits,
113 digits_offset: trim_offset + 1,
114 });
115 }
116 Ok(Self {
117 radix: 10,
118 digits: trimmed,
119 digits_offset: trim_offset,
120 })
121 }
122}
123
124/// Trims surrounding whitespace while preserving the start byte offset.
125///
126/// # Parameters
127/// - `text`: Input text.
128///
129/// # Returns
130/// Trimmed text and the byte offset where it starts in `text`.
131fn trim_with_offset(text: &str) -> (&str, usize) {
132 let trimmed_start = text.trim_start();
133 let start = text.len() - trimmed_start.len();
134 (trimmed_start.trim_end(), start)
135}
136
137/// Validates that every character is valid for the detected radix.
138///
139/// # Parameters
140/// - `components`: Parsed literal components.
141///
142/// # Errors
143/// Returns [`CodecError::InvalidDigit`] with the original input byte index of
144/// the invalid character.
145fn validate_digits(components: LiteralComponents<'_>) -> CodecResult<()> {
146 for (index, character) in components.digits.char_indices() {
147 if character.is_digit(components.radix) {
148 continue;
149 }
150 return Err(CodecError::InvalidDigit {
151 radix: components.radix,
152 index: components.digits_offset + index,
153 character,
154 });
155 }
156 Ok(())
157}
158
159/// Builds an invalid C integer literal input error.
160///
161/// # Parameters
162/// - `reason`: Human-readable reason the input was rejected.
163///
164/// # Returns
165/// An invalid input error for the C integer literal codec.
166fn invalid_c_integer_input(reason: &str) -> CodecError {
167 CodecError::InvalidInput {
168 codec: "c-integer-literal",
169 reason: reason.to_owned(),
170 }
171}