qubit_codec/hex_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22 /// Whether to use uppercase hexadecimal digits.
23 uppercase: bool,
24 /// The prefix to use before the whole encoded string.
25 prefix: Option<String>,
26 /// The prefix to use before each encoded byte.
27 byte_prefix: Option<String>,
28 /// The separator to use between bytes in the encoded string.
29 separator: Option<String>,
30 /// Whether to ignore ASCII whitespace while decoding.
31 ignore_ascii_whitespace: bool,
32 /// Whether to ignore ASCII case when matching configured prefixes.
33 ignore_prefix_case: bool,
34}
35
36impl HexCodec {
37 /// Creates a lowercase codec without prefix or separators.
38 ///
39 /// # Returns
40 /// A hexadecimal codec using lowercase digits.
41 pub fn new() -> Self {
42 Self {
43 uppercase: false,
44 prefix: None,
45 byte_prefix: None,
46 separator: None,
47 ignore_ascii_whitespace: false,
48 ignore_prefix_case: false,
49 }
50 }
51
52 /// Creates an uppercase codec without prefix or separators.
53 ///
54 /// # Returns
55 /// A hexadecimal codec using uppercase digits.
56 pub fn upper() -> Self {
57 Self::new().with_uppercase(true)
58 }
59
60 /// Sets whether encoded digits should be uppercase.
61 ///
62 /// # Parameters
63 /// - `uppercase`: Whether to use uppercase hexadecimal digits.
64 ///
65 /// # Returns
66 /// The updated codec.
67 pub fn with_uppercase(mut self, uppercase: bool) -> Self {
68 self.uppercase = uppercase;
69 self
70 }
71
72 /// Sets a whole-output prefix.
73 ///
74 /// The prefix is written once before the encoded bytes and required once
75 /// before decoded input. For example, using prefix `0x` encodes bytes as
76 /// `0x1f8b`.
77 ///
78 /// # Parameters
79 /// - `prefix`: Whole-output prefix text such as `0x`.
80 ///
81 /// # Returns
82 /// The updated codec.
83 pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
84 self.prefix = Some(prefix.into());
85 self
86 }
87
88 /// Sets a per-byte prefix.
89 ///
90 /// The prefix is written before every encoded byte and required before
91 /// every decoded byte. For example, using byte prefix `0x` and separator
92 /// ` ` encodes bytes as `0x1f 0x8b`.
93 ///
94 /// # Parameters
95 /// - `prefix`: Per-byte prefix text such as `0x`.
96 ///
97 /// # Returns
98 /// The updated codec.
99 pub fn with_byte_prefix(mut self, prefix: impl Into<String>) -> Self {
100 self.byte_prefix = Some(prefix.into());
101 self
102 }
103
104 /// Sets a separator written and accepted between encoded bytes.
105 ///
106 /// # Parameters
107 /// - `separator`: Separator text.
108 ///
109 /// # Returns
110 /// The updated codec.
111 pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
112 self.separator = Some(separator.into());
113 self
114 }
115
116 /// Sets whether ASCII whitespace is ignored while decoding.
117 ///
118 /// # Parameters
119 /// - `ignore`: Whether to ignore ASCII whitespace.
120 ///
121 /// # Returns
122 /// The updated codec.
123 pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
124 self.ignore_ascii_whitespace = ignore;
125 self
126 }
127
128 /// Sets whether ASCII case is ignored when decoding configured prefixes.
129 ///
130 /// This option affects whole-output prefixes and per-byte prefixes during
131 /// decoding only. Encoding writes prefixes exactly as configured.
132 ///
133 /// # Parameters
134 /// - `ignore`: Whether to ignore ASCII case while matching prefixes.
135 ///
136 /// # Returns
137 /// The updated codec.
138 pub fn with_ignore_prefix_case(mut self, ignore: bool) -> Self {
139 self.ignore_prefix_case = ignore;
140 self
141 }
142
143 /// Encodes bytes into a hexadecimal string.
144 ///
145 /// # Parameters
146 /// - `bytes`: Bytes to encode.
147 ///
148 /// # Returns
149 /// Hexadecimal text.
150 pub fn encode(&self, bytes: &[u8]) -> String {
151 let separator_len = self.separator.as_ref().map_or(0, String::len);
152 let prefix_len = self.prefix.as_ref().map_or(0, String::len);
153 let byte_prefix_len = self.byte_prefix.as_ref().map_or(0, String::len);
154 let capacity = prefix_len.saturating_add(
155 bytes
156 .len()
157 .saturating_mul(byte_prefix_len.saturating_add(2))
158 .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len)),
159 );
160 let mut output = String::with_capacity(capacity);
161 self.encode_into(bytes, &mut output);
162 output
163 }
164
165 /// Encodes bytes into an existing string.
166 ///
167 /// # Parameters
168 /// - `bytes`: Bytes to encode.
169 /// - `output`: Destination string.
170 pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
171 if let Some(prefix) = &self.prefix {
172 output.push_str(prefix);
173 }
174 for (index, byte) in bytes.iter().enumerate() {
175 if index > 0
176 && let Some(separator) = &self.separator
177 {
178 output.push_str(separator);
179 }
180 if let Some(byte_prefix) = &self.byte_prefix {
181 output.push_str(byte_prefix);
182 }
183 push_hex_byte(*byte, self.uppercase, output);
184 }
185 }
186
187 /// Decodes hexadecimal text into bytes.
188 ///
189 /// # Parameters
190 /// - `text`: Hexadecimal text.
191 ///
192 /// # Returns
193 /// Decoded bytes.
194 ///
195 /// # Errors
196 /// Returns [`CodecError`] when a configured whole or per-byte prefix is missing,
197 /// when the normalized digit count is odd, or when a non-hex digit is found.
198 pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
199 let mut output = Vec::new();
200 self.decode_into(text, &mut output)?;
201 Ok(output)
202 }
203
204 /// Decodes hexadecimal text into an existing byte vector.
205 ///
206 /// # Parameters
207 /// - `text`: Hexadecimal text.
208 /// - `output`: Destination byte vector.
209 ///
210 /// # Errors
211 /// Returns [`CodecError`] when the input is malformed.
212 pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
213 let digits = self.normalized_digits(text)?;
214 if digits.len() % 2 != 0 {
215 return Err(invalid_hex_length(digits.len()));
216 }
217 output.reserve(digits.len() / 2);
218 for pair in digits.chunks_exact(2) {
219 let mut pair = pair.iter();
220 let Some(&(high_index, high_char)) = pair.next() else {
221 continue;
222 };
223 let Some(&(low_index, low_char)) = pair.next() else {
224 continue;
225 };
226 let high = hex_value(high_char).ok_or(invalid_hex_digit(high_index, high_char))?;
227 let low = hex_value(low_char).ok_or(invalid_hex_digit(low_index, low_char))?;
228 output.push((high << 4) | low);
229 }
230 Ok(())
231 }
232
233 /// Normalizes accepted input characters into hex digits.
234 ///
235 /// # Parameters
236 /// - `text`: Text to decode.
237 ///
238 /// # Returns
239 /// Hex digits paired with their original character indexes.
240 ///
241 /// # Errors
242 /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
243 fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
244 let start_index = self.consume_prefix(text)?;
245 if let Some(byte_prefix) = self.byte_prefix.as_deref().filter(|prefix| !prefix.is_empty()) {
246 return self.normalized_byte_prefixed_digits(text, byte_prefix, start_index);
247 }
248 self.normalized_unprefixed_digits(text, start_index)
249 }
250
251 /// Consumes the configured whole-output prefix.
252 ///
253 /// # Parameters
254 /// - `text`: Text to decode.
255 ///
256 /// # Returns
257 /// Byte index where byte parsing should start.
258 ///
259 /// # Errors
260 /// Returns [`CodecError::MissingPrefix`] when a non-empty whole-output
261 /// prefix is configured but absent.
262 fn consume_prefix(&self, text: &str) -> CodecResult<usize> {
263 let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) else {
264 return Ok(0);
265 };
266 let index = self.skip_ascii_whitespace(text, 0);
267 let Some(rest) = text.get(index..) else {
268 return Err(CodecError::MissingPrefix {
269 prefix: prefix.to_owned(),
270 });
271 };
272 if self.starts_with_prefix(rest, prefix) {
273 Ok(index + prefix.len())
274 } else {
275 Err(CodecError::MissingPrefix {
276 prefix: prefix.to_owned(),
277 })
278 }
279 }
280
281 /// Normalizes unprefixed input characters into hex digits.
282 ///
283 /// # Parameters
284 /// - `text`: Text to decode.
285 ///
286 /// # Returns
287 /// Hex digits paired with their original character indexes.
288 ///
289 /// # Errors
290 /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
291 fn normalized_unprefixed_digits(&self, text: &str, mut index: usize) -> CodecResult<Vec<(usize, char)>> {
292 let mut digits = Vec::with_capacity(text.len());
293 let separator = self.separator.as_deref().filter(|separator| !separator.is_empty());
294 while index < text.len() {
295 let Some(rest) = text.get(index..) else {
296 break;
297 };
298 if let Some(separator) = separator
299 && rest.starts_with(separator)
300 {
301 index += separator.len();
302 continue;
303 }
304 let Some(ch) = rest.chars().next() else {
305 break;
306 };
307 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
308 index += ch.len_utf8();
309 continue;
310 }
311 if hex_value(ch).is_some() {
312 digits.push((index, ch));
313 index += ch.len_utf8();
314 continue;
315 }
316 return Err(invalid_hex_digit(index, ch));
317 }
318 Ok(digits)
319 }
320
321 /// Normalizes byte-prefixed input characters into hex digits.
322 ///
323 /// # Parameters
324 /// - `text`: Text to decode.
325 /// - `prefix`: Required prefix before each byte.
326 /// - `index`: Byte index where parsing should start.
327 ///
328 /// # Returns
329 /// Hex digits paired with their original character indexes.
330 ///
331 /// # Errors
332 /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
333 /// [`CodecError::InvalidDigit`] for unsupported characters.
334 fn normalized_byte_prefixed_digits(
335 &self,
336 text: &str,
337 prefix: &str,
338 mut index: usize,
339 ) -> CodecResult<Vec<(usize, char)>> {
340 let mut digits = Vec::with_capacity(text.len());
341 let separator = self.separator.as_deref().filter(|separator| !separator.is_empty());
342 while index < text.len() {
343 index = self.skip_ignored(text, index, separator);
344 if index >= text.len() {
345 break;
346 }
347 let Some(rest) = text.get(index..) else {
348 break;
349 };
350 if !self.starts_with_prefix(rest, prefix) {
351 return Err(CodecError::MissingPrefix {
352 prefix: prefix.to_owned(),
353 });
354 }
355 index += prefix.len();
356
357 let mut digit_count = 0;
358 while digit_count < 2 && index < text.len() {
359 let Some(rest) = text.get(index..) else {
360 break;
361 };
362 let Some(ch) = rest.chars().next() else {
363 break;
364 };
365 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
366 index += ch.len_utf8();
367 continue;
368 }
369 if hex_value(ch).is_some() {
370 digits.push((index, ch));
371 index += ch.len_utf8();
372 digit_count += 1;
373 continue;
374 }
375 return Err(invalid_hex_digit(index, ch));
376 }
377 }
378 Ok(digits)
379 }
380
381 /// Skips configured separators and ignored ASCII whitespace.
382 ///
383 /// # Parameters
384 /// - `text`: Text being decoded.
385 /// - `index`: Current byte index.
386 /// - `separator`: Optional configured separator.
387 ///
388 /// # Returns
389 /// The next byte index that should be parsed.
390 fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
391 loop {
392 let Some(rest) = text.get(index..) else {
393 return index;
394 };
395 if let Some(separator) = separator
396 && rest.starts_with(separator)
397 {
398 index += separator.len();
399 continue;
400 }
401 let Some(ch) = rest.chars().next() else {
402 return index;
403 };
404 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
405 index += ch.len_utf8();
406 continue;
407 }
408 return index;
409 }
410 }
411
412 /// Skips ignored leading ASCII whitespace.
413 ///
414 /// # Parameters
415 /// - `text`: Text being decoded.
416 /// - `index`: Current byte index.
417 ///
418 /// # Returns
419 /// The next byte index after ignored ASCII whitespace.
420 fn skip_ascii_whitespace(&self, text: &str, mut index: usize) -> usize {
421 while self.ignore_ascii_whitespace && index < text.len() {
422 let Some(rest) = text.get(index..) else {
423 return index;
424 };
425 let Some(ch) = rest.chars().next() else {
426 return index;
427 };
428 if !ch.is_ascii_whitespace() {
429 return index;
430 }
431 index += ch.len_utf8();
432 }
433 index
434 }
435
436 /// Tests whether `text` starts with a configured prefix.
437 ///
438 /// # Parameters
439 /// - `text`: Text slice to inspect.
440 /// - `prefix`: Configured prefix.
441 ///
442 /// # Returns
443 /// `true` when `text` starts with `prefix`, honoring the configured
444 /// ASCII case sensitivity for decoding prefixes.
445 fn starts_with_prefix(&self, text: &str, prefix: &str) -> bool {
446 if !self.ignore_prefix_case {
447 return text.starts_with(prefix);
448 }
449 let Some(candidate) = text.get(..prefix.len()) else {
450 return false;
451 };
452 candidate.eq_ignore_ascii_case(prefix)
453 }
454}
455
456impl Default for HexCodec {
457 /// Creates a lowercase codec without prefix or separators.
458 fn default() -> Self {
459 Self::new()
460 }
461}
462
463impl Encoder<[u8]> for HexCodec {
464 type Error = CodecError;
465 type Output = String;
466
467 /// Encodes bytes into hexadecimal text.
468 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
469 Ok(HexCodec::encode(self, input))
470 }
471}
472
473impl Decoder<str> for HexCodec {
474 type Error = CodecError;
475 type Output = Vec<u8>;
476
477 /// Decodes hexadecimal text into bytes.
478 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
479 HexCodec::decode(self, input)
480 }
481}
482
483/// Converts one hex digit to its value.
484///
485/// # Parameters
486/// - `ch`: Character to inspect.
487///
488/// # Returns
489/// Nibble value, or `None` when `ch` is not a hex digit.
490fn hex_value(ch: char) -> Option<u8> {
491 match ch {
492 '0'..='9' => Some(ch as u8 - b'0'),
493 'a'..='f' => Some(ch as u8 - b'a' + 10),
494 'A'..='F' => Some(ch as u8 - b'A' + 10),
495 _ => None,
496 }
497}
498
499/// Builds an invalid hexadecimal digit error.
500///
501/// # Parameters
502/// - `index`: Byte index of the invalid character in the original input.
503/// - `character`: Invalid character.
504///
505/// # Returns
506/// A radix-16 digit error.
507fn invalid_hex_digit(index: usize, character: char) -> CodecError {
508 CodecError::InvalidDigit {
509 radix: 16,
510 index,
511 character,
512 }
513}
514
515/// Builds an invalid hexadecimal length error.
516///
517/// # Parameters
518/// - `actual`: Number of normalized hexadecimal digits.
519///
520/// # Returns
521/// An invalid length error describing the even-digit requirement.
522fn invalid_hex_length(actual: usize) -> CodecError {
523 CodecError::InvalidLength {
524 context: "hex digits",
525 expected: "an even number of digits".to_owned(),
526 actual,
527 }
528}
529
530/// Appends one encoded byte to `output`.
531///
532/// # Parameters
533/// - `byte`: Byte to encode.
534/// - `uppercase`: Whether to use uppercase digits.
535/// - `output`: Destination string.
536fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
537 output.push(hex_digit(byte >> 4, uppercase));
538 output.push(hex_digit(byte & 0x0f, uppercase));
539}
540
541/// Converts one nibble to a hexadecimal digit.
542///
543/// # Parameters
544/// - `value`: Nibble value.
545/// - `uppercase`: Whether to use uppercase digits.
546///
547/// # Returns
548/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
549fn hex_digit(value: u8, uppercase: bool) -> char {
550 match value & 0x0f {
551 0x0 => '0',
552 0x1 => '1',
553 0x2 => '2',
554 0x3 => '3',
555 0x4 => '4',
556 0x5 => '5',
557 0x6 => '6',
558 0x7 => '7',
559 0x8 => '8',
560 0x9 => '9',
561 0x0a if uppercase => 'A',
562 0x0b if uppercase => 'B',
563 0x0c if uppercase => 'C',
564 0x0d if uppercase => 'D',
565 0x0e if uppercase => 'E',
566 0x0f if uppercase => 'F',
567 0x0a => 'a',
568 0x0b => 'b',
569 0x0c => 'c',
570 0x0d => 'd',
571 0x0e => 'e',
572 _ => 'f',
573 }
574}