Skip to main content

qubit_codec/
hex_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16    Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22    /// Whether to use uppercase hexadecimal digits.
23    uppercase: bool,
24    /// The prefix to use before the whole encoded string.
25    prefix: Option<String>,
26    /// The prefix to use before each encoded byte.
27    byte_prefix: Option<String>,
28    /// The separator to use between bytes in the encoded string.
29    separator: Option<String>,
30    /// Whether to ignore ASCII whitespace while decoding.
31    ignore_ascii_whitespace: bool,
32    /// Whether to ignore ASCII case when matching configured prefixes.
33    ignore_prefix_case: bool,
34}
35
36impl HexCodec {
37    /// Creates a lowercase codec without prefix or separators.
38    ///
39    /// # Returns
40    /// A hexadecimal codec using lowercase digits.
41    pub fn new() -> Self {
42        Self {
43            uppercase: false,
44            prefix: None,
45            byte_prefix: None,
46            separator: None,
47            ignore_ascii_whitespace: false,
48            ignore_prefix_case: false,
49        }
50    }
51
52    /// Creates an uppercase codec without prefix or separators.
53    ///
54    /// # Returns
55    /// A hexadecimal codec using uppercase digits.
56    pub fn upper() -> Self {
57        Self::new().with_uppercase(true)
58    }
59
60    /// Sets whether encoded digits should be uppercase.
61    ///
62    /// # Parameters
63    /// - `uppercase`: Whether to use uppercase hexadecimal digits.
64    ///
65    /// # Returns
66    /// The updated codec.
67    pub fn with_uppercase(mut self, uppercase: bool) -> Self {
68        self.uppercase = uppercase;
69        self
70    }
71
72    /// Sets a whole-output prefix.
73    ///
74    /// The prefix is written once before the encoded bytes and required once
75    /// before decoded input. For example, using prefix `0x` encodes bytes as
76    /// `0x1f8b`.
77    ///
78    /// # Parameters
79    /// - `prefix`: Whole-output prefix text such as `0x`.
80    ///
81    /// # Returns
82    /// The updated codec.
83    pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
84        self.prefix = Some(prefix.into());
85        self
86    }
87
88    /// Sets a per-byte prefix.
89    ///
90    /// The prefix is written before every encoded byte and required before
91    /// every decoded byte. For example, using byte prefix `0x` and separator
92    /// ` ` encodes bytes as `0x1f 0x8b`.
93    ///
94    /// # Parameters
95    /// - `prefix`: Per-byte prefix text such as `0x`.
96    ///
97    /// # Returns
98    /// The updated codec.
99    pub fn with_byte_prefix(mut self, prefix: impl Into<String>) -> Self {
100        self.byte_prefix = Some(prefix.into());
101        self
102    }
103
104    /// Sets a separator written and accepted between encoded bytes.
105    ///
106    /// # Parameters
107    /// - `separator`: Separator text.
108    ///
109    /// # Returns
110    /// The updated codec.
111    pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
112        self.separator = Some(separator.into());
113        self
114    }
115
116    /// Sets whether ASCII whitespace is ignored while decoding.
117    ///
118    /// # Parameters
119    /// - `ignore`: Whether to ignore ASCII whitespace.
120    ///
121    /// # Returns
122    /// The updated codec.
123    pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
124        self.ignore_ascii_whitespace = ignore;
125        self
126    }
127
128    /// Sets whether ASCII case is ignored when decoding configured prefixes.
129    ///
130    /// This option affects whole-output prefixes and per-byte prefixes during
131    /// decoding only. Encoding writes prefixes exactly as configured.
132    ///
133    /// # Parameters
134    /// - `ignore`: Whether to ignore ASCII case while matching prefixes.
135    ///
136    /// # Returns
137    /// The updated codec.
138    pub fn with_ignore_prefix_case(mut self, ignore: bool) -> Self {
139        self.ignore_prefix_case = ignore;
140        self
141    }
142
143    /// Encodes bytes into a hexadecimal string.
144    ///
145    /// # Parameters
146    /// - `bytes`: Bytes to encode.
147    ///
148    /// # Returns
149    /// Hexadecimal text.
150    pub fn encode(&self, bytes: &[u8]) -> String {
151        let separator_len = self.separator.as_ref().map_or(0, String::len);
152        let prefix_len = self.prefix.as_ref().map_or(0, String::len);
153        let byte_prefix_len = self.byte_prefix.as_ref().map_or(0, String::len);
154        let capacity = prefix_len.saturating_add(
155            bytes
156                .len()
157                .saturating_mul(byte_prefix_len.saturating_add(2))
158                .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len)),
159        );
160        let mut output = String::with_capacity(capacity);
161        self.encode_into(bytes, &mut output);
162        output
163    }
164
165    /// Encodes bytes into an existing string.
166    ///
167    /// # Parameters
168    /// - `bytes`: Bytes to encode.
169    /// - `output`: Destination string.
170    pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
171        if let Some(prefix) = &self.prefix {
172            output.push_str(prefix);
173        }
174        for (index, byte) in bytes.iter().enumerate() {
175            if index > 0
176                && let Some(separator) = &self.separator
177            {
178                output.push_str(separator);
179            }
180            if let Some(byte_prefix) = &self.byte_prefix {
181                output.push_str(byte_prefix);
182            }
183            push_hex_byte(*byte, self.uppercase, output);
184        }
185    }
186
187    /// Decodes hexadecimal text into bytes.
188    ///
189    /// # Parameters
190    /// - `text`: Hexadecimal text.
191    ///
192    /// # Returns
193    /// Decoded bytes.
194    ///
195    /// # Errors
196    /// Returns [`CodecError`] when a configured whole or per-byte prefix is missing,
197    /// when the normalized digit count is odd, or when a non-hex digit is found.
198    pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
199        let mut output = Vec::new();
200        self.decode_into(text, &mut output)?;
201        Ok(output)
202    }
203
204    /// Decodes hexadecimal text into an existing byte vector.
205    ///
206    /// # Parameters
207    /// - `text`: Hexadecimal text.
208    /// - `output`: Destination byte vector.
209    ///
210    /// # Errors
211    /// Returns [`CodecError`] when the input is malformed.
212    pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
213        let digits = self.normalized_digits(text)?;
214        if digits.len() % 2 != 0 {
215            return Err(invalid_hex_length(digits.len()));
216        }
217        output.reserve(digits.len() / 2);
218        for pair in digits.chunks_exact(2) {
219            let mut pair = pair.iter();
220            let Some(&(high_index, high_char)) = pair.next() else {
221                continue;
222            };
223            let Some(&(low_index, low_char)) = pair.next() else {
224                continue;
225            };
226            let high = hex_value(high_char).ok_or(invalid_hex_digit(high_index, high_char))?;
227            let low = hex_value(low_char).ok_or(invalid_hex_digit(low_index, low_char))?;
228            output.push((high << 4) | low);
229        }
230        Ok(())
231    }
232
233    /// Normalizes accepted input characters into hex digits.
234    ///
235    /// # Parameters
236    /// - `text`: Text to decode.
237    ///
238    /// # Returns
239    /// Hex digits paired with their original character indexes.
240    ///
241    /// # Errors
242    /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
243    fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
244        let start_index = self.consume_prefix(text)?;
245        if let Some(byte_prefix) = self
246            .byte_prefix
247            .as_deref()
248            .filter(|prefix| !prefix.is_empty())
249        {
250            return self.normalized_byte_prefixed_digits(text, byte_prefix, start_index);
251        }
252        self.normalized_unprefixed_digits(text, start_index)
253    }
254
255    /// Consumes the configured whole-output prefix.
256    ///
257    /// # Parameters
258    /// - `text`: Text to decode.
259    ///
260    /// # Returns
261    /// Byte index where byte parsing should start.
262    ///
263    /// # Errors
264    /// Returns [`CodecError::MissingPrefix`] when a non-empty whole-output
265    /// prefix is configured but absent.
266    fn consume_prefix(&self, text: &str) -> CodecResult<usize> {
267        let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) else {
268            return Ok(0);
269        };
270        let index = self.skip_ascii_whitespace(text, 0);
271        let Some(rest) = text.get(index..) else {
272            return Err(CodecError::MissingPrefix {
273                prefix: prefix.to_owned(),
274            });
275        };
276        if self.starts_with_prefix(rest, prefix) {
277            Ok(index + prefix.len())
278        } else {
279            Err(CodecError::MissingPrefix {
280                prefix: prefix.to_owned(),
281            })
282        }
283    }
284
285    /// Normalizes unprefixed input characters into hex digits.
286    ///
287    /// # Parameters
288    /// - `text`: Text to decode.
289    ///
290    /// # Returns
291    /// Hex digits paired with their original character indexes.
292    ///
293    /// # Errors
294    /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
295    fn normalized_unprefixed_digits(
296        &self,
297        text: &str,
298        mut index: usize,
299    ) -> CodecResult<Vec<(usize, char)>> {
300        let mut digits = Vec::with_capacity(text.len());
301        let separator = self
302            .separator
303            .as_deref()
304            .filter(|separator| !separator.is_empty());
305        while index < text.len() {
306            let Some(rest) = text.get(index..) else {
307                break;
308            };
309            if let Some(separator) = separator
310                && rest.starts_with(separator)
311            {
312                index += separator.len();
313                continue;
314            }
315            let Some(ch) = rest.chars().next() else {
316                break;
317            };
318            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
319                index += ch.len_utf8();
320                continue;
321            }
322            if hex_value(ch).is_some() {
323                digits.push((index, ch));
324                index += ch.len_utf8();
325                continue;
326            }
327            return Err(invalid_hex_digit(index, ch));
328        }
329        Ok(digits)
330    }
331
332    /// Normalizes byte-prefixed input characters into hex digits.
333    ///
334    /// # Parameters
335    /// - `text`: Text to decode.
336    /// - `prefix`: Required prefix before each byte.
337    /// - `index`: Byte index where parsing should start.
338    ///
339    /// # Returns
340    /// Hex digits paired with their original character indexes.
341    ///
342    /// # Errors
343    /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
344    /// [`CodecError::InvalidDigit`] for unsupported characters.
345    fn normalized_byte_prefixed_digits(
346        &self,
347        text: &str,
348        prefix: &str,
349        mut index: usize,
350    ) -> CodecResult<Vec<(usize, char)>> {
351        let mut digits = Vec::with_capacity(text.len());
352        let separator = self
353            .separator
354            .as_deref()
355            .filter(|separator| !separator.is_empty());
356        while index < text.len() {
357            index = self.skip_ignored(text, index, separator);
358            if index >= text.len() {
359                break;
360            }
361            let Some(rest) = text.get(index..) else {
362                break;
363            };
364            if !self.starts_with_prefix(rest, prefix) {
365                return Err(CodecError::MissingPrefix {
366                    prefix: prefix.to_owned(),
367                });
368            }
369            index += prefix.len();
370
371            let mut digit_count = 0;
372            while digit_count < 2 && index < text.len() {
373                let Some(rest) = text.get(index..) else {
374                    break;
375                };
376                let Some(ch) = rest.chars().next() else {
377                    break;
378                };
379                if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
380                    index += ch.len_utf8();
381                    continue;
382                }
383                if hex_value(ch).is_some() {
384                    digits.push((index, ch));
385                    index += ch.len_utf8();
386                    digit_count += 1;
387                    continue;
388                }
389                return Err(invalid_hex_digit(index, ch));
390            }
391        }
392        Ok(digits)
393    }
394
395    /// Skips configured separators and ignored ASCII whitespace.
396    ///
397    /// # Parameters
398    /// - `text`: Text being decoded.
399    /// - `index`: Current byte index.
400    /// - `separator`: Optional configured separator.
401    ///
402    /// # Returns
403    /// The next byte index that should be parsed.
404    fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
405        loop {
406            let Some(rest) = text.get(index..) else {
407                return index;
408            };
409            if let Some(separator) = separator
410                && rest.starts_with(separator)
411            {
412                index += separator.len();
413                continue;
414            }
415            let Some(ch) = rest.chars().next() else {
416                return index;
417            };
418            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
419                index += ch.len_utf8();
420                continue;
421            }
422            return index;
423        }
424    }
425
426    /// Skips ignored leading ASCII whitespace.
427    ///
428    /// # Parameters
429    /// - `text`: Text being decoded.
430    /// - `index`: Current byte index.
431    ///
432    /// # Returns
433    /// The next byte index after ignored ASCII whitespace.
434    fn skip_ascii_whitespace(&self, text: &str, mut index: usize) -> usize {
435        while self.ignore_ascii_whitespace && index < text.len() {
436            let Some(rest) = text.get(index..) else {
437                return index;
438            };
439            let Some(ch) = rest.chars().next() else {
440                return index;
441            };
442            if !ch.is_ascii_whitespace() {
443                return index;
444            }
445            index += ch.len_utf8();
446        }
447        index
448    }
449
450    /// Tests whether `text` starts with a configured prefix.
451    ///
452    /// # Parameters
453    /// - `text`: Text slice to inspect.
454    /// - `prefix`: Configured prefix.
455    ///
456    /// # Returns
457    /// `true` when `text` starts with `prefix`, honoring the configured
458    /// ASCII case sensitivity for decoding prefixes.
459    fn starts_with_prefix(&self, text: &str, prefix: &str) -> bool {
460        if !self.ignore_prefix_case {
461            return text.starts_with(prefix);
462        }
463        let Some(candidate) = text.get(..prefix.len()) else {
464            return false;
465        };
466        candidate.eq_ignore_ascii_case(prefix)
467    }
468}
469
470impl Default for HexCodec {
471    /// Creates a lowercase codec without prefix or separators.
472    fn default() -> Self {
473        Self::new()
474    }
475}
476
477impl Encoder<[u8]> for HexCodec {
478    type Error = CodecError;
479    type Output = String;
480
481    /// Encodes bytes into hexadecimal text.
482    fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
483        Ok(HexCodec::encode(self, input))
484    }
485}
486
487impl Decoder<str> for HexCodec {
488    type Error = CodecError;
489    type Output = Vec<u8>;
490
491    /// Decodes hexadecimal text into bytes.
492    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
493        HexCodec::decode(self, input)
494    }
495}
496
497/// Converts one hex digit to its value.
498///
499/// # Parameters
500/// - `ch`: Character to inspect.
501///
502/// # Returns
503/// Nibble value, or `None` when `ch` is not a hex digit.
504fn hex_value(ch: char) -> Option<u8> {
505    match ch {
506        '0'..='9' => Some(ch as u8 - b'0'),
507        'a'..='f' => Some(ch as u8 - b'a' + 10),
508        'A'..='F' => Some(ch as u8 - b'A' + 10),
509        _ => None,
510    }
511}
512
513/// Builds an invalid hexadecimal digit error.
514///
515/// # Parameters
516/// - `index`: Byte index of the invalid character in the original input.
517/// - `character`: Invalid character.
518///
519/// # Returns
520/// A radix-16 digit error.
521fn invalid_hex_digit(index: usize, character: char) -> CodecError {
522    CodecError::InvalidDigit {
523        radix: 16,
524        index,
525        character,
526    }
527}
528
529/// Builds an invalid hexadecimal length error.
530///
531/// # Parameters
532/// - `actual`: Number of normalized hexadecimal digits.
533///
534/// # Returns
535/// An invalid length error describing the even-digit requirement.
536fn invalid_hex_length(actual: usize) -> CodecError {
537    CodecError::InvalidLength {
538        context: "hex digits",
539        expected: "an even number of digits".to_owned(),
540        actual,
541    }
542}
543
544/// Appends one encoded byte to `output`.
545///
546/// # Parameters
547/// - `byte`: Byte to encode.
548/// - `uppercase`: Whether to use uppercase digits.
549/// - `output`: Destination string.
550fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
551    output.push(hex_digit(byte >> 4, uppercase));
552    output.push(hex_digit(byte & 0x0f, uppercase));
553}
554
555/// Converts one nibble to a hexadecimal digit.
556///
557/// # Parameters
558/// - `value`: Nibble value.
559/// - `uppercase`: Whether to use uppercase digits.
560///
561/// # Returns
562/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
563fn hex_digit(value: u8, uppercase: bool) -> char {
564    match value & 0x0f {
565        0x0 => '0',
566        0x1 => '1',
567        0x2 => '2',
568        0x3 => '3',
569        0x4 => '4',
570        0x5 => '5',
571        0x6 => '6',
572        0x7 => '7',
573        0x8 => '8',
574        0x9 => '9',
575        0x0a if uppercase => 'A',
576        0x0b if uppercase => 'B',
577        0x0c if uppercase => 'C',
578        0x0d if uppercase => 'D',
579        0x0e if uppercase => 'E',
580        0x0f if uppercase => 'F',
581        0x0a => 'a',
582        0x0b => 'b',
583        0x0c => 'c',
584        0x0d => 'd',
585        0x0e => 'e',
586        _ => 'f',
587    }
588}