qubit_codec/c_integer_literal_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! C integer literal decoder.
11
12use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16};
17
18/// Decodes non-negative C integer literal fragments.
19///
20/// This codec accepts decimal literals such as `123`, octal literals such as
21/// `0123`, and hexadecimal literals such as `0x123` or `0X123`. It trims
22/// surrounding whitespace and returns a `u64`.
23#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
24pub struct CIntegerLiteralCodec;
25
26impl CIntegerLiteralCodec {
27 /// Creates a C integer literal codec.
28 ///
29 /// # Returns
30 /// A stateless C integer literal codec.
31 pub fn new() -> Self {
32 Self
33 }
34
35 /// Decodes a C integer literal into a `u64`.
36 ///
37 /// # Parameters
38 /// - `text`: C integer literal text.
39 ///
40 /// # Returns
41 /// Parsed integer value.
42 ///
43 /// # Errors
44 /// Returns [`CodecError::InvalidInput`] when the input is empty, lacks digits,
45 /// or overflows `u64`; returns [`CodecError::InvalidDigit`] when a character
46 /// is not valid for the detected radix.
47 pub fn decode(&self, text: &str) -> CodecResult<u64> {
48 let (trimmed, trim_offset) = trim_with_offset(text);
49 if trimmed.is_empty() {
50 return Err(invalid_c_integer_input("expected at least one digit"));
51 }
52 let components = LiteralComponents::parse(trimmed, trim_offset)?;
53 validate_digits(components)?;
54 u64::from_str_radix(components.digits, components.radix)
55 .map_err(|error| invalid_c_integer_input(&format!("integer literal is out of range: {error}")))
56 }
57}
58
59impl Decoder<str> for CIntegerLiteralCodec {
60 type Error = CodecError;
61 type Output = u64;
62
63 /// Decodes a C integer literal into a `u64`.
64 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
65 CIntegerLiteralCodec::decode(self, input)
66 }
67}
68
69/// Parsed C integer literal components.
70#[derive(Debug, Clone, Copy)]
71struct LiteralComponents<'a> {
72 radix: u32,
73 digits: &'a str,
74 digits_offset: usize,
75}
76
77impl<'a> LiteralComponents<'a> {
78 /// Parses radix and digit slice from trimmed input.
79 ///
80 /// # Parameters
81 /// - `trimmed`: Input after surrounding whitespace has been removed.
82 /// - `trim_offset`: Byte offset of `trimmed` in the original input.
83 ///
84 /// # Returns
85 /// Literal components used by validation and numeric parsing.
86 ///
87 /// # Errors
88 /// Returns [`CodecError::InvalidInput`] when a radix prefix is present without
89 /// any digits after it.
90 fn parse(trimmed: &'a str, trim_offset: usize) -> CodecResult<Self> {
91 if let Some(digits) = trimmed.strip_prefix("0x").or_else(|| trimmed.strip_prefix("0X")) {
92 if digits.is_empty() {
93 return Err(invalid_c_integer_input(
94 "hexadecimal literal requires at least one digit",
95 ));
96 }
97 return Ok(Self {
98 radix: 16,
99 digits,
100 digits_offset: trim_offset + 2,
101 });
102 }
103 if trimmed.len() > 1
104 && let Some(digits) = trimmed.strip_prefix('0')
105 {
106 return Ok(Self {
107 radix: 8,
108 digits,
109 digits_offset: trim_offset + 1,
110 });
111 }
112 Ok(Self {
113 radix: 10,
114 digits: trimmed,
115 digits_offset: trim_offset,
116 })
117 }
118}
119
120/// Trims surrounding whitespace while preserving the start byte offset.
121///
122/// # Parameters
123/// - `text`: Input text.
124///
125/// # Returns
126/// Trimmed text and the byte offset where it starts in `text`.
127fn trim_with_offset(text: &str) -> (&str, usize) {
128 let trimmed_start = text.trim_start();
129 let start = text.len() - trimmed_start.len();
130 (trimmed_start.trim_end(), start)
131}
132
133/// Validates that every character is valid for the detected radix.
134///
135/// # Parameters
136/// - `components`: Parsed literal components.
137///
138/// # Errors
139/// Returns [`CodecError::InvalidDigit`] with the original input byte index of
140/// the invalid character.
141fn validate_digits(components: LiteralComponents<'_>) -> CodecResult<()> {
142 for (index, character) in components.digits.char_indices() {
143 if character.is_digit(components.radix) {
144 continue;
145 }
146 return Err(CodecError::InvalidDigit {
147 radix: components.radix,
148 index: components.digits_offset + index,
149 character,
150 });
151 }
152 Ok(())
153}
154
155/// Builds an invalid C integer literal input error.
156///
157/// # Parameters
158/// - `reason`: Human-readable reason the input was rejected.
159///
160/// # Returns
161/// An invalid input error for the C integer literal codec.
162fn invalid_c_integer_input(reason: &str) -> CodecError {
163 CodecError::InvalidInput {
164 codec: "c-integer-literal",
165 reason: reason.to_owned(),
166 }
167}