qubit_codec/hex_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22 /// Whether to use uppercase hexadecimal digits.
23 uppercase: bool,
24 /// The prefix to use before each encoded byte.
25 prefix: Option<String>,
26 /// The separator to use between bytes in the encoded string.
27 separator: Option<String>,
28 /// Whether to ignore ASCII whitespace while decoding.
29 ignore_ascii_whitespace: bool,
30}
31
32impl HexCodec {
33 /// Creates a lowercase codec without prefix or separators.
34 ///
35 /// # Returns
36 /// A hexadecimal codec using lowercase digits.
37 pub fn new() -> Self {
38 Self {
39 uppercase: false,
40 prefix: None,
41 separator: None,
42 ignore_ascii_whitespace: false,
43 }
44 }
45
46 /// Creates an uppercase codec without prefix or separators.
47 ///
48 /// # Returns
49 /// A hexadecimal codec using uppercase digits.
50 pub fn upper() -> Self {
51 Self::new().with_uppercase(true)
52 }
53
54 /// Sets whether encoded digits should be uppercase.
55 ///
56 /// # Parameters
57 /// - `uppercase`: Whether to use uppercase hexadecimal digits.
58 ///
59 /// # Returns
60 /// The updated codec.
61 pub fn with_uppercase(mut self, uppercase: bool) -> Self {
62 self.uppercase = uppercase;
63 self
64 }
65
66 /// Sets a per-byte prefix.
67 ///
68 /// The prefix is written before every encoded byte and required before
69 /// every decoded byte. For example, using prefix `0x` and separator ` `
70 /// encodes bytes as `0x1F 0x8B`.
71 ///
72 /// This is not a whole-output prefix: `[0x1F, 0x8B]` is encoded as
73 /// `0x1F 0x8B`, not `0x1F 8B`.
74 ///
75 /// # Parameters
76 /// - `prefix`: Prefix text such as `0x`.
77 ///
78 /// # Returns
79 /// The updated codec.
80 pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
81 self.prefix = Some(prefix.into());
82 self
83 }
84
85 /// Sets a separator written and accepted between encoded bytes.
86 ///
87 /// # Parameters
88 /// - `separator`: Separator text.
89 ///
90 /// # Returns
91 /// The updated codec.
92 pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
93 self.separator = Some(separator.into());
94 self
95 }
96
97 /// Sets whether ASCII whitespace is ignored while decoding.
98 ///
99 /// # Parameters
100 /// - `ignore`: Whether to ignore ASCII whitespace.
101 ///
102 /// # Returns
103 /// The updated codec.
104 pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
105 self.ignore_ascii_whitespace = ignore;
106 self
107 }
108
109 /// Encodes bytes into a hexadecimal string.
110 ///
111 /// # Parameters
112 /// - `bytes`: Bytes to encode.
113 ///
114 /// # Returns
115 /// Hexadecimal text.
116 pub fn encode(&self, bytes: &[u8]) -> String {
117 let separator_len = self.separator.as_ref().map_or(0, String::len);
118 let prefix_len = self.prefix.as_ref().map_or(0, String::len);
119 let capacity = bytes
120 .len()
121 .saturating_mul(prefix_len.saturating_add(2))
122 .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len));
123 let mut output = String::with_capacity(capacity);
124 self.encode_into(bytes, &mut output);
125 output
126 }
127
128 /// Encodes bytes into an existing string.
129 ///
130 /// # Parameters
131 /// - `bytes`: Bytes to encode.
132 /// - `output`: Destination string.
133 pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
134 for (index, byte) in bytes.iter().enumerate() {
135 if index > 0
136 && let Some(separator) = &self.separator
137 {
138 output.push_str(separator);
139 }
140 if let Some(prefix) = &self.prefix {
141 output.push_str(prefix);
142 }
143 push_hex_byte(*byte, self.uppercase, output);
144 }
145 }
146
147 /// Decodes hexadecimal text into bytes.
148 ///
149 /// # Parameters
150 /// - `text`: Hexadecimal text.
151 ///
152 /// # Returns
153 /// Decoded bytes.
154 ///
155 /// # Errors
156 /// Returns [`CodecError`] when a configured per-byte prefix is missing,
157 /// when the normalized digit count is odd, or when a non-hex digit is found.
158 pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
159 let mut output = Vec::new();
160 self.decode_into(text, &mut output)?;
161 Ok(output)
162 }
163
164 /// Decodes hexadecimal text into an existing byte vector.
165 ///
166 /// # Parameters
167 /// - `text`: Hexadecimal text.
168 /// - `output`: Destination byte vector.
169 ///
170 /// # Errors
171 /// Returns [`CodecError`] when the input is malformed.
172 pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
173 let digits = self.normalized_digits(text)?;
174 if digits.len() % 2 != 0 {
175 return Err(CodecError::OddHexLength {
176 digits: digits.len(),
177 });
178 }
179 output.reserve(digits.len() / 2);
180 for pair in digits.chunks_exact(2) {
181 let mut pair = pair.iter();
182 let Some(&(high_index, high_char)) = pair.next() else {
183 continue;
184 };
185 let Some(&(low_index, low_char)) = pair.next() else {
186 continue;
187 };
188 let high = hex_value(high_char).ok_or(CodecError::InvalidHexDigit {
189 index: high_index,
190 character: high_char,
191 })?;
192 let low = hex_value(low_char).ok_or(CodecError::InvalidHexDigit {
193 index: low_index,
194 character: low_char,
195 })?;
196 output.push((high << 4) | low);
197 }
198 Ok(())
199 }
200
201 /// Normalizes accepted input characters into hex digits.
202 ///
203 /// # Parameters
204 /// - `text`: Text to decode.
205 ///
206 /// # Returns
207 /// Hex digits paired with their original character indexes.
208 ///
209 /// # Errors
210 /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
211 fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
212 if let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) {
213 return self.normalized_prefixed_digits(text, prefix);
214 }
215 self.normalized_unprefixed_digits(text)
216 }
217
218 /// Normalizes unprefixed input characters into hex digits.
219 ///
220 /// # Parameters
221 /// - `text`: Text to decode.
222 ///
223 /// # Returns
224 /// Hex digits paired with their original character indexes.
225 ///
226 /// # Errors
227 /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
228 fn normalized_unprefixed_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
229 let mut digits = Vec::with_capacity(text.len());
230 let separator = self
231 .separator
232 .as_deref()
233 .filter(|separator| !separator.is_empty());
234 let mut index = 0;
235 while index < text.len() {
236 let Some(rest) = text.get(index..) else {
237 break;
238 };
239 if let Some(separator) = separator
240 && rest.starts_with(separator)
241 {
242 index += separator.len();
243 continue;
244 }
245 let Some(ch) = rest.chars().next() else {
246 break;
247 };
248 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
249 index += ch.len_utf8();
250 continue;
251 }
252 if hex_value(ch).is_some() {
253 digits.push((index, ch));
254 index += ch.len_utf8();
255 continue;
256 }
257 return Err(CodecError::InvalidHexDigit {
258 index,
259 character: ch,
260 });
261 }
262 Ok(digits)
263 }
264
265 /// Normalizes prefixed input characters into hex digits.
266 ///
267 /// # Parameters
268 /// - `text`: Text to decode.
269 /// - `prefix`: Required prefix before each byte.
270 ///
271 /// # Returns
272 /// Hex digits paired with their original character indexes.
273 ///
274 /// # Errors
275 /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
276 /// [`CodecError::InvalidHexDigit`] for unsupported characters.
277 fn normalized_prefixed_digits(
278 &self,
279 text: &str,
280 prefix: &str,
281 ) -> CodecResult<Vec<(usize, char)>> {
282 let mut digits = Vec::with_capacity(text.len());
283 let separator = self
284 .separator
285 .as_deref()
286 .filter(|separator| !separator.is_empty());
287 let mut index = 0;
288 while index < text.len() {
289 index = self.skip_ignored(text, index, separator);
290 if index >= text.len() {
291 break;
292 }
293 let Some(rest) = text.get(index..) else {
294 break;
295 };
296 if !rest.starts_with(prefix) {
297 return Err(CodecError::MissingPrefix {
298 prefix: prefix.to_owned(),
299 });
300 }
301 index += prefix.len();
302
303 let mut digit_count = 0;
304 while digit_count < 2 && index < text.len() {
305 let Some(rest) = text.get(index..) else {
306 break;
307 };
308 let Some(ch) = rest.chars().next() else {
309 break;
310 };
311 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
312 index += ch.len_utf8();
313 continue;
314 }
315 if hex_value(ch).is_some() {
316 digits.push((index, ch));
317 index += ch.len_utf8();
318 digit_count += 1;
319 continue;
320 }
321 return Err(CodecError::InvalidHexDigit {
322 index,
323 character: ch,
324 });
325 }
326 }
327 Ok(digits)
328 }
329
330 /// Skips configured separators and ignored ASCII whitespace.
331 ///
332 /// # Parameters
333 /// - `text`: Text being decoded.
334 /// - `index`: Current byte index.
335 /// - `separator`: Optional configured separator.
336 ///
337 /// # Returns
338 /// The next byte index that should be parsed.
339 fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
340 loop {
341 let Some(rest) = text.get(index..) else {
342 return index;
343 };
344 if let Some(separator) = separator
345 && rest.starts_with(separator)
346 {
347 index += separator.len();
348 continue;
349 }
350 let Some(ch) = rest.chars().next() else {
351 return index;
352 };
353 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
354 index += ch.len_utf8();
355 continue;
356 }
357 return index;
358 }
359 }
360}
361
362impl Default for HexCodec {
363 /// Creates a lowercase codec without prefix or separators.
364 fn default() -> Self {
365 Self::new()
366 }
367}
368
369impl Encoder<[u8]> for HexCodec {
370 type Error = CodecError;
371 type Output = String;
372
373 /// Encodes bytes into hexadecimal text.
374 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
375 Ok(HexCodec::encode(self, input))
376 }
377}
378
379impl Decoder<str> for HexCodec {
380 type Error = CodecError;
381 type Output = Vec<u8>;
382
383 /// Decodes hexadecimal text into bytes.
384 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
385 HexCodec::decode(self, input)
386 }
387}
388
389/// Converts one hex digit to its value.
390///
391/// # Parameters
392/// - `ch`: Character to inspect.
393///
394/// # Returns
395/// Nibble value, or `None` when `ch` is not a hex digit.
396fn hex_value(ch: char) -> Option<u8> {
397 match ch {
398 '0'..='9' => Some(ch as u8 - b'0'),
399 'a'..='f' => Some(ch as u8 - b'a' + 10),
400 'A'..='F' => Some(ch as u8 - b'A' + 10),
401 _ => None,
402 }
403}
404
405/// Appends one encoded byte to `output`.
406///
407/// # Parameters
408/// - `byte`: Byte to encode.
409/// - `uppercase`: Whether to use uppercase digits.
410/// - `output`: Destination string.
411fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
412 output.push(hex_digit(byte >> 4, uppercase));
413 output.push(hex_digit(byte & 0x0f, uppercase));
414}
415
416/// Converts one nibble to a hexadecimal digit.
417///
418/// # Parameters
419/// - `value`: Nibble value.
420/// - `uppercase`: Whether to use uppercase digits.
421///
422/// # Returns
423/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
424fn hex_digit(value: u8, uppercase: bool) -> char {
425 match value & 0x0f {
426 0x0 => '0',
427 0x1 => '1',
428 0x2 => '2',
429 0x3 => '3',
430 0x4 => '4',
431 0x5 => '5',
432 0x6 => '6',
433 0x7 => '7',
434 0x8 => '8',
435 0x9 => '9',
436 0x0a if uppercase => 'A',
437 0x0b if uppercase => 'B',
438 0x0c if uppercase => 'C',
439 0x0d if uppercase => 'D',
440 0x0e if uppercase => 'E',
441 0x0f if uppercase => 'F',
442 0x0a => 'a',
443 0x0b => 'b',
444 0x0c => 'c',
445 0x0d => 'd',
446 0x0e => 'e',
447 _ => 'f',
448 }
449}