qubit_codec/hex_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22 /// Whether to use uppercase hexadecimal digits.
23 uppercase: bool,
24 /// The prefix to use before the whole encoded string.
25 prefix: Option<String>,
26 /// The prefix to use before each encoded byte.
27 byte_prefix: Option<String>,
28 /// The separator to use between bytes in the encoded string.
29 separator: Option<String>,
30 /// Whether to ignore ASCII whitespace while decoding.
31 ignore_ascii_whitespace: bool,
32 /// Whether to ignore ASCII case when matching configured prefixes.
33 ignore_prefix_case: bool,
34}
35
36impl HexCodec {
37 /// Creates a lowercase codec without prefix or separators.
38 ///
39 /// # Returns
40 /// A hexadecimal codec using lowercase digits.
41 pub fn new() -> Self {
42 Self {
43 uppercase: false,
44 prefix: None,
45 byte_prefix: None,
46 separator: None,
47 ignore_ascii_whitespace: false,
48 ignore_prefix_case: false,
49 }
50 }
51
52 /// Creates an uppercase codec without prefix or separators.
53 ///
54 /// # Returns
55 /// A hexadecimal codec using uppercase digits.
56 pub fn upper() -> Self {
57 Self::new().with_uppercase(true)
58 }
59
60 /// Sets whether encoded digits should be uppercase.
61 ///
62 /// # Parameters
63 /// - `uppercase`: Whether to use uppercase hexadecimal digits.
64 ///
65 /// # Returns
66 /// The updated codec.
67 pub fn with_uppercase(mut self, uppercase: bool) -> Self {
68 self.uppercase = uppercase;
69 self
70 }
71
72 /// Sets a whole-output prefix.
73 ///
74 /// The prefix is written once before the encoded bytes and required once
75 /// before decoded input. For example, using prefix `0x` encodes bytes as
76 /// `0x1f8b`.
77 ///
78 /// # Parameters
79 /// - `prefix`: Whole-output prefix text such as `0x`.
80 ///
81 /// # Returns
82 /// The updated codec.
83 pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
84 self.prefix = Some(prefix.into());
85 self
86 }
87
88 /// Sets a per-byte prefix.
89 ///
90 /// The prefix is written before every encoded byte and required before
91 /// every decoded byte. For example, using byte prefix `0x` and separator
92 /// ` ` encodes bytes as `0x1f 0x8b`.
93 ///
94 /// # Parameters
95 /// - `prefix`: Per-byte prefix text such as `0x`.
96 ///
97 /// # Returns
98 /// The updated codec.
99 pub fn with_byte_prefix(mut self, prefix: impl Into<String>) -> Self {
100 self.byte_prefix = Some(prefix.into());
101 self
102 }
103
104 /// Sets a separator written and accepted between encoded bytes.
105 ///
106 /// # Parameters
107 /// - `separator`: Separator text.
108 ///
109 /// # Returns
110 /// The updated codec.
111 pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
112 self.separator = Some(separator.into());
113 self
114 }
115
116 /// Sets whether ASCII whitespace is ignored while decoding.
117 ///
118 /// # Parameters
119 /// - `ignore`: Whether to ignore ASCII whitespace.
120 ///
121 /// # Returns
122 /// The updated codec.
123 pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
124 self.ignore_ascii_whitespace = ignore;
125 self
126 }
127
128 /// Sets whether ASCII case is ignored when decoding configured prefixes.
129 ///
130 /// This option affects whole-output prefixes and per-byte prefixes during
131 /// decoding only. Encoding writes prefixes exactly as configured.
132 ///
133 /// # Parameters
134 /// - `ignore`: Whether to ignore ASCII case while matching prefixes.
135 ///
136 /// # Returns
137 /// The updated codec.
138 pub fn with_ignore_prefix_case(mut self, ignore: bool) -> Self {
139 self.ignore_prefix_case = ignore;
140 self
141 }
142
143 /// Encodes bytes into a hexadecimal string.
144 ///
145 /// # Parameters
146 /// - `bytes`: Bytes to encode.
147 ///
148 /// # Returns
149 /// Hexadecimal text.
150 pub fn encode(&self, bytes: &[u8]) -> String {
151 let separator_len = self.separator.as_ref().map_or(0, String::len);
152 let prefix_len = self.prefix.as_ref().map_or(0, String::len);
153 let byte_prefix_len = self.byte_prefix.as_ref().map_or(0, String::len);
154 let capacity = prefix_len.saturating_add(
155 bytes
156 .len()
157 .saturating_mul(byte_prefix_len.saturating_add(2))
158 .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len)),
159 );
160 let mut output = String::with_capacity(capacity);
161 self.encode_into(bytes, &mut output);
162 output
163 }
164
165 /// Encodes bytes into an existing string.
166 ///
167 /// # Parameters
168 /// - `bytes`: Bytes to encode.
169 /// - `output`: Destination string.
170 pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
171 if let Some(prefix) = &self.prefix {
172 output.push_str(prefix);
173 }
174 for (index, byte) in bytes.iter().enumerate() {
175 if index > 0
176 && let Some(separator) = &self.separator
177 {
178 output.push_str(separator);
179 }
180 if let Some(byte_prefix) = &self.byte_prefix {
181 output.push_str(byte_prefix);
182 }
183 push_hex_byte(*byte, self.uppercase, output);
184 }
185 }
186
187 /// Decodes hexadecimal text into bytes.
188 ///
189 /// # Parameters
190 /// - `text`: Hexadecimal text.
191 ///
192 /// # Returns
193 /// Decoded bytes.
194 ///
195 /// # Errors
196 /// Returns [`CodecError`] when a configured whole or per-byte prefix is missing,
197 /// when the normalized digit count is odd, or when a non-hex digit is found.
198 pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
199 let mut output = Vec::new();
200 self.decode_into(text, &mut output)?;
201 Ok(output)
202 }
203
204 /// Decodes hexadecimal text into an existing byte vector.
205 ///
206 /// # Parameters
207 /// - `text`: Hexadecimal text.
208 /// - `output`: Destination byte vector.
209 ///
210 /// # Errors
211 /// Returns [`CodecError`] when the input is malformed.
212 pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
213 let digits = self.normalized_digits(text)?;
214 if digits.len() % 2 != 0 {
215 return Err(invalid_hex_length(digits.len()));
216 }
217 output.reserve(digits.len() / 2);
218 for pair in digits.chunks_exact(2) {
219 let mut pair = pair.iter();
220 let Some(&(high_index, high_char)) = pair.next() else {
221 continue;
222 };
223 let Some(&(low_index, low_char)) = pair.next() else {
224 continue;
225 };
226 let high = hex_value(high_char).ok_or(invalid_hex_digit(high_index, high_char))?;
227 let low = hex_value(low_char).ok_or(invalid_hex_digit(low_index, low_char))?;
228 output.push((high << 4) | low);
229 }
230 Ok(())
231 }
232
233 /// Normalizes accepted input characters into hex digits.
234 ///
235 /// # Parameters
236 /// - `text`: Text to decode.
237 ///
238 /// # Returns
239 /// Hex digits paired with their original character indexes.
240 ///
241 /// # Errors
242 /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
243 fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
244 let start_index = self.consume_prefix(text)?;
245 if let Some(byte_prefix) = self
246 .byte_prefix
247 .as_deref()
248 .filter(|prefix| !prefix.is_empty())
249 {
250 return self.normalized_byte_prefixed_digits(text, byte_prefix, start_index);
251 }
252 self.normalized_unprefixed_digits(text, start_index)
253 }
254
255 /// Consumes the configured whole-output prefix.
256 ///
257 /// # Parameters
258 /// - `text`: Text to decode.
259 ///
260 /// # Returns
261 /// Byte index where byte parsing should start.
262 ///
263 /// # Errors
264 /// Returns [`CodecError::MissingPrefix`] when a non-empty whole-output
265 /// prefix is configured but absent.
266 fn consume_prefix(&self, text: &str) -> CodecResult<usize> {
267 let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) else {
268 return Ok(0);
269 };
270 let index = self.skip_ascii_whitespace(text, 0);
271 let Some(rest) = text.get(index..) else {
272 return Err(CodecError::MissingPrefix {
273 prefix: prefix.to_owned(),
274 });
275 };
276 if self.starts_with_prefix(rest, prefix) {
277 Ok(index + prefix.len())
278 } else {
279 Err(CodecError::MissingPrefix {
280 prefix: prefix.to_owned(),
281 })
282 }
283 }
284
285 /// Normalizes unprefixed input characters into hex digits.
286 ///
287 /// # Parameters
288 /// - `text`: Text to decode.
289 ///
290 /// # Returns
291 /// Hex digits paired with their original character indexes.
292 ///
293 /// # Errors
294 /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
295 fn normalized_unprefixed_digits(
296 &self,
297 text: &str,
298 mut index: usize,
299 ) -> CodecResult<Vec<(usize, char)>> {
300 let mut digits = Vec::with_capacity(text.len());
301 let separator = self
302 .separator
303 .as_deref()
304 .filter(|separator| !separator.is_empty());
305 while index < text.len() {
306 let Some(rest) = text.get(index..) else {
307 break;
308 };
309 if let Some(separator) = separator
310 && rest.starts_with(separator)
311 {
312 index += separator.len();
313 continue;
314 }
315 let Some(ch) = rest.chars().next() else {
316 break;
317 };
318 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
319 index += ch.len_utf8();
320 continue;
321 }
322 if hex_value(ch).is_some() {
323 digits.push((index, ch));
324 index += ch.len_utf8();
325 continue;
326 }
327 return Err(invalid_hex_digit(index, ch));
328 }
329 Ok(digits)
330 }
331
332 /// Normalizes byte-prefixed input characters into hex digits.
333 ///
334 /// # Parameters
335 /// - `text`: Text to decode.
336 /// - `prefix`: Required prefix before each byte.
337 /// - `index`: Byte index where parsing should start.
338 ///
339 /// # Returns
340 /// Hex digits paired with their original character indexes.
341 ///
342 /// # Errors
343 /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
344 /// [`CodecError::InvalidDigit`] for unsupported characters.
345 fn normalized_byte_prefixed_digits(
346 &self,
347 text: &str,
348 prefix: &str,
349 mut index: usize,
350 ) -> CodecResult<Vec<(usize, char)>> {
351 let mut digits = Vec::with_capacity(text.len());
352 let separator = self
353 .separator
354 .as_deref()
355 .filter(|separator| !separator.is_empty());
356 while index < text.len() {
357 index = self.skip_ignored(text, index, separator);
358 if index >= text.len() {
359 break;
360 }
361 let Some(rest) = text.get(index..) else {
362 break;
363 };
364 if !self.starts_with_prefix(rest, prefix) {
365 return Err(CodecError::MissingPrefix {
366 prefix: prefix.to_owned(),
367 });
368 }
369 index += prefix.len();
370
371 let mut digit_count = 0;
372 while digit_count < 2 && index < text.len() {
373 let Some(rest) = text.get(index..) else {
374 break;
375 };
376 let Some(ch) = rest.chars().next() else {
377 break;
378 };
379 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
380 index += ch.len_utf8();
381 continue;
382 }
383 if hex_value(ch).is_some() {
384 digits.push((index, ch));
385 index += ch.len_utf8();
386 digit_count += 1;
387 continue;
388 }
389 return Err(invalid_hex_digit(index, ch));
390 }
391 }
392 Ok(digits)
393 }
394
395 /// Skips configured separators and ignored ASCII whitespace.
396 ///
397 /// # Parameters
398 /// - `text`: Text being decoded.
399 /// - `index`: Current byte index.
400 /// - `separator`: Optional configured separator.
401 ///
402 /// # Returns
403 /// The next byte index that should be parsed.
404 fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
405 loop {
406 let Some(rest) = text.get(index..) else {
407 return index;
408 };
409 if let Some(separator) = separator
410 && rest.starts_with(separator)
411 {
412 index += separator.len();
413 continue;
414 }
415 let Some(ch) = rest.chars().next() else {
416 return index;
417 };
418 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
419 index += ch.len_utf8();
420 continue;
421 }
422 return index;
423 }
424 }
425
426 /// Skips ignored leading ASCII whitespace.
427 ///
428 /// # Parameters
429 /// - `text`: Text being decoded.
430 /// - `index`: Current byte index.
431 ///
432 /// # Returns
433 /// The next byte index after ignored ASCII whitespace.
434 fn skip_ascii_whitespace(&self, text: &str, mut index: usize) -> usize {
435 while self.ignore_ascii_whitespace && index < text.len() {
436 let Some(rest) = text.get(index..) else {
437 return index;
438 };
439 let Some(ch) = rest.chars().next() else {
440 return index;
441 };
442 if !ch.is_ascii_whitespace() {
443 return index;
444 }
445 index += ch.len_utf8();
446 }
447 index
448 }
449
450 /// Tests whether `text` starts with a configured prefix.
451 ///
452 /// # Parameters
453 /// - `text`: Text slice to inspect.
454 /// - `prefix`: Configured prefix.
455 ///
456 /// # Returns
457 /// `true` when `text` starts with `prefix`, honoring the configured
458 /// ASCII case sensitivity for decoding prefixes.
459 fn starts_with_prefix(&self, text: &str, prefix: &str) -> bool {
460 if !self.ignore_prefix_case {
461 return text.starts_with(prefix);
462 }
463 let Some(candidate) = text.get(..prefix.len()) else {
464 return false;
465 };
466 candidate.eq_ignore_ascii_case(prefix)
467 }
468}
469
470impl Default for HexCodec {
471 /// Creates a lowercase codec without prefix or separators.
472 fn default() -> Self {
473 Self::new()
474 }
475}
476
477impl Encoder<[u8]> for HexCodec {
478 type Error = CodecError;
479 type Output = String;
480
481 /// Encodes bytes into hexadecimal text.
482 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
483 Ok(HexCodec::encode(self, input))
484 }
485}
486
487impl Decoder<str> for HexCodec {
488 type Error = CodecError;
489 type Output = Vec<u8>;
490
491 /// Decodes hexadecimal text into bytes.
492 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
493 HexCodec::decode(self, input)
494 }
495}
496
497/// Converts one hex digit to its value.
498///
499/// # Parameters
500/// - `ch`: Character to inspect.
501///
502/// # Returns
503/// Nibble value, or `None` when `ch` is not a hex digit.
504fn hex_value(ch: char) -> Option<u8> {
505 match ch {
506 '0'..='9' => Some(ch as u8 - b'0'),
507 'a'..='f' => Some(ch as u8 - b'a' + 10),
508 'A'..='F' => Some(ch as u8 - b'A' + 10),
509 _ => None,
510 }
511}
512
513/// Builds an invalid hexadecimal digit error.
514///
515/// # Parameters
516/// - `index`: Byte index of the invalid character in the original input.
517/// - `character`: Invalid character.
518///
519/// # Returns
520/// A radix-16 digit error.
521fn invalid_hex_digit(index: usize, character: char) -> CodecError {
522 CodecError::InvalidDigit {
523 radix: 16,
524 index,
525 character,
526 }
527}
528
529/// Builds an invalid hexadecimal length error.
530///
531/// # Parameters
532/// - `actual`: Number of normalized hexadecimal digits.
533///
534/// # Returns
535/// An invalid length error describing the even-digit requirement.
536fn invalid_hex_length(actual: usize) -> CodecError {
537 CodecError::InvalidLength {
538 context: "hex digits",
539 expected: "an even number of digits".to_owned(),
540 actual,
541 }
542}
543
544/// Appends one encoded byte to `output`.
545///
546/// # Parameters
547/// - `byte`: Byte to encode.
548/// - `uppercase`: Whether to use uppercase digits.
549/// - `output`: Destination string.
550fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
551 output.push(hex_digit(byte >> 4, uppercase));
552 output.push(hex_digit(byte & 0x0f, uppercase));
553}
554
555/// Converts one nibble to a hexadecimal digit.
556///
557/// # Parameters
558/// - `value`: Nibble value.
559/// - `uppercase`: Whether to use uppercase digits.
560///
561/// # Returns
562/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
563fn hex_digit(value: u8, uppercase: bool) -> char {
564 match value & 0x0f {
565 0x0 => '0',
566 0x1 => '1',
567 0x2 => '2',
568 0x3 => '3',
569 0x4 => '4',
570 0x5 => '5',
571 0x6 => '6',
572 0x7 => '7',
573 0x8 => '8',
574 0x9 => '9',
575 0x0a if uppercase => 'A',
576 0x0b if uppercase => 'B',
577 0x0c if uppercase => 'C',
578 0x0d if uppercase => 'D',
579 0x0e if uppercase => 'E',
580 0x0f if uppercase => 'F',
581 0x0a => 'a',
582 0x0b => 'b',
583 0x0c => 'c',
584 0x0d => 'd',
585 0x0e => 'e',
586 _ => 'f',
587 }
588}