Skip to main content

qubit_codec/
hex_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16    Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22    /// Whether to use uppercase hexadecimal digits.
23    uppercase: bool,
24    /// The prefix to use before the whole encoded string.
25    prefix: Option<String>,
26    /// The prefix to use before each encoded byte.
27    byte_prefix: Option<String>,
28    /// The separator to use between bytes in the encoded string.
29    separator: Option<String>,
30    /// Whether to ignore ASCII whitespace while decoding.
31    ignore_ascii_whitespace: bool,
32    /// Whether to ignore ASCII case when matching configured prefixes.
33    ignore_prefix_case: bool,
34}
35
36impl HexCodec {
37    /// Creates a lowercase codec without prefix or separators.
38    ///
39    /// # Returns
40    /// A hexadecimal codec using lowercase digits.
41    pub fn new() -> Self {
42        Self {
43            uppercase: false,
44            prefix: None,
45            byte_prefix: None,
46            separator: None,
47            ignore_ascii_whitespace: false,
48            ignore_prefix_case: false,
49        }
50    }
51
52    /// Creates an uppercase codec without prefix or separators.
53    ///
54    /// # Returns
55    /// A hexadecimal codec using uppercase digits.
56    pub fn upper() -> Self {
57        Self::new().with_uppercase(true)
58    }
59
60    /// Sets whether encoded digits should be uppercase.
61    ///
62    /// # Parameters
63    /// - `uppercase`: Whether to use uppercase hexadecimal digits.
64    ///
65    /// # Returns
66    /// The updated codec.
67    pub fn with_uppercase(mut self, uppercase: bool) -> Self {
68        self.uppercase = uppercase;
69        self
70    }
71
72    /// Sets a whole-output prefix.
73    ///
74    /// The prefix is written once before the encoded bytes and required once
75    /// before decoded input. For example, using prefix `0x` encodes bytes as
76    /// `0x1f8b`.
77    ///
78    /// # Parameters
79    /// - `prefix`: Whole-output prefix text such as `0x`.
80    ///
81    /// # Returns
82    /// The updated codec.
83    pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
84        self.prefix = Some(prefix.into());
85        self
86    }
87
88    /// Sets a per-byte prefix.
89    ///
90    /// The prefix is written before every encoded byte and required before
91    /// every decoded byte. For example, using byte prefix `0x` and separator
92    /// ` ` encodes bytes as `0x1f 0x8b`.
93    ///
94    /// # Parameters
95    /// - `prefix`: Per-byte prefix text such as `0x`.
96    ///
97    /// # Returns
98    /// The updated codec.
99    pub fn with_byte_prefix(mut self, prefix: impl Into<String>) -> Self {
100        self.byte_prefix = Some(prefix.into());
101        self
102    }
103
104    /// Sets a separator written and accepted between encoded bytes.
105    ///
106    /// # Parameters
107    /// - `separator`: Separator text.
108    ///
109    /// # Returns
110    /// The updated codec.
111    pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
112        self.separator = Some(separator.into());
113        self
114    }
115
116    /// Sets whether ASCII whitespace is ignored while decoding.
117    ///
118    /// # Parameters
119    /// - `ignore`: Whether to ignore ASCII whitespace.
120    ///
121    /// # Returns
122    /// The updated codec.
123    pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
124        self.ignore_ascii_whitespace = ignore;
125        self
126    }
127
128    /// Sets whether ASCII case is ignored when decoding configured prefixes.
129    ///
130    /// This option affects whole-output prefixes and per-byte prefixes during
131    /// decoding only. Encoding writes prefixes exactly as configured.
132    ///
133    /// # Parameters
134    /// - `ignore`: Whether to ignore ASCII case while matching prefixes.
135    ///
136    /// # Returns
137    /// The updated codec.
138    pub fn with_ignore_prefix_case(mut self, ignore: bool) -> Self {
139        self.ignore_prefix_case = ignore;
140        self
141    }
142
143    /// Encodes bytes into a hexadecimal string.
144    ///
145    /// # Parameters
146    /// - `bytes`: Bytes to encode.
147    ///
148    /// # Returns
149    /// Hexadecimal text.
150    pub fn encode(&self, bytes: &[u8]) -> String {
151        let separator_len = self.separator.as_ref().map_or(0, String::len);
152        let prefix_len = self.prefix.as_ref().map_or(0, String::len);
153        let byte_prefix_len = self.byte_prefix.as_ref().map_or(0, String::len);
154        let capacity = prefix_len.saturating_add(
155            bytes
156                .len()
157                .saturating_mul(byte_prefix_len.saturating_add(2))
158                .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len)),
159        );
160        let mut output = String::with_capacity(capacity);
161        self.encode_into(bytes, &mut output);
162        output
163    }
164
165    /// Encodes bytes into an existing string.
166    ///
167    /// # Parameters
168    /// - `bytes`: Bytes to encode.
169    /// - `output`: Destination string.
170    pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
171        if let Some(prefix) = &self.prefix {
172            output.push_str(prefix);
173        }
174        for (index, byte) in bytes.iter().enumerate() {
175            if index > 0
176                && let Some(separator) = &self.separator
177            {
178                output.push_str(separator);
179            }
180            if let Some(byte_prefix) = &self.byte_prefix {
181                output.push_str(byte_prefix);
182            }
183            push_hex_byte(*byte, self.uppercase, output);
184        }
185    }
186
187    /// Decodes hexadecimal text into bytes.
188    ///
189    /// # Parameters
190    /// - `text`: Hexadecimal text.
191    ///
192    /// # Returns
193    /// Decoded bytes.
194    ///
195    /// # Errors
196    /// Returns [`CodecError`] when a configured whole or per-byte prefix is missing,
197    /// when the normalized digit count is odd, or when a non-hex digit is found.
198    pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
199        let mut output = Vec::new();
200        self.decode_into(text, &mut output)?;
201        Ok(output)
202    }
203
204    /// Decodes hexadecimal text into an existing byte vector.
205    ///
206    /// # Parameters
207    /// - `text`: Hexadecimal text.
208    /// - `output`: Destination byte vector.
209    ///
210    /// # Errors
211    /// Returns [`CodecError`] when the input is malformed.
212    pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
213        let digits = self.normalized_digits(text)?;
214        if digits.len() % 2 != 0 {
215            return Err(invalid_hex_length(digits.len()));
216        }
217        output.reserve(digits.len() / 2);
218        for pair in digits.chunks_exact(2) {
219            let mut pair = pair.iter();
220            let Some(&(high_index, high_char)) = pair.next() else {
221                continue;
222            };
223            let Some(&(low_index, low_char)) = pair.next() else {
224                continue;
225            };
226            let high = hex_value(high_char).ok_or(invalid_hex_digit(high_index, high_char))?;
227            let low = hex_value(low_char).ok_or(invalid_hex_digit(low_index, low_char))?;
228            output.push((high << 4) | low);
229        }
230        Ok(())
231    }
232
233    /// Normalizes accepted input characters into hex digits.
234    ///
235    /// # Parameters
236    /// - `text`: Text to decode.
237    ///
238    /// # Returns
239    /// Hex digits paired with their original character indexes.
240    ///
241    /// # Errors
242    /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
243    fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
244        let start_index = self.consume_prefix(text)?;
245        if let Some(byte_prefix) = self.byte_prefix.as_deref().filter(|prefix| !prefix.is_empty()) {
246            return self.normalized_byte_prefixed_digits(text, byte_prefix, start_index);
247        }
248        self.normalized_unprefixed_digits(text, start_index)
249    }
250
251    /// Consumes the configured whole-output prefix.
252    ///
253    /// # Parameters
254    /// - `text`: Text to decode.
255    ///
256    /// # Returns
257    /// Byte index where byte parsing should start.
258    ///
259    /// # Errors
260    /// Returns [`CodecError::MissingPrefix`] when a non-empty whole-output
261    /// prefix is configured but absent.
262    fn consume_prefix(&self, text: &str) -> CodecResult<usize> {
263        let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) else {
264            return Ok(0);
265        };
266        let index = self.skip_ascii_whitespace(text, 0);
267        let Some(rest) = text.get(index..) else {
268            return Err(CodecError::MissingPrefix {
269                prefix: prefix.to_owned(),
270            });
271        };
272        if self.starts_with_prefix(rest, prefix) {
273            Ok(index + prefix.len())
274        } else {
275            Err(CodecError::MissingPrefix {
276                prefix: prefix.to_owned(),
277            })
278        }
279    }
280
281    /// Normalizes unprefixed input characters into hex digits.
282    ///
283    /// # Parameters
284    /// - `text`: Text to decode.
285    ///
286    /// # Returns
287    /// Hex digits paired with their original character indexes.
288    ///
289    /// # Errors
290    /// Returns [`CodecError::InvalidDigit`] for unsupported characters.
291    fn normalized_unprefixed_digits(&self, text: &str, mut index: usize) -> CodecResult<Vec<(usize, char)>> {
292        let mut digits = Vec::with_capacity(text.len());
293        let separator = self.separator.as_deref().filter(|separator| !separator.is_empty());
294        while index < text.len() {
295            let Some(rest) = text.get(index..) else {
296                break;
297            };
298            if let Some(separator) = separator
299                && rest.starts_with(separator)
300            {
301                index += separator.len();
302                continue;
303            }
304            let Some(ch) = rest.chars().next() else {
305                break;
306            };
307            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
308                index += ch.len_utf8();
309                continue;
310            }
311            if hex_value(ch).is_some() {
312                digits.push((index, ch));
313                index += ch.len_utf8();
314                continue;
315            }
316            return Err(invalid_hex_digit(index, ch));
317        }
318        Ok(digits)
319    }
320
321    /// Normalizes byte-prefixed input characters into hex digits.
322    ///
323    /// # Parameters
324    /// - `text`: Text to decode.
325    /// - `prefix`: Required prefix before each byte.
326    /// - `index`: Byte index where parsing should start.
327    ///
328    /// # Returns
329    /// Hex digits paired with their original character indexes.
330    ///
331    /// # Errors
332    /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
333    /// [`CodecError::InvalidDigit`] for unsupported characters.
334    fn normalized_byte_prefixed_digits(
335        &self,
336        text: &str,
337        prefix: &str,
338        mut index: usize,
339    ) -> CodecResult<Vec<(usize, char)>> {
340        let mut digits = Vec::with_capacity(text.len());
341        let separator = self.separator.as_deref().filter(|separator| !separator.is_empty());
342        while index < text.len() {
343            index = self.skip_ignored(text, index, separator);
344            if index >= text.len() {
345                break;
346            }
347            let Some(rest) = text.get(index..) else {
348                break;
349            };
350            if !self.starts_with_prefix(rest, prefix) {
351                return Err(CodecError::MissingPrefix {
352                    prefix: prefix.to_owned(),
353                });
354            }
355            index += prefix.len();
356
357            let mut digit_count = 0;
358            while digit_count < 2 && index < text.len() {
359                let Some(rest) = text.get(index..) else {
360                    break;
361                };
362                let Some(ch) = rest.chars().next() else {
363                    break;
364                };
365                if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
366                    index += ch.len_utf8();
367                    continue;
368                }
369                if hex_value(ch).is_some() {
370                    digits.push((index, ch));
371                    index += ch.len_utf8();
372                    digit_count += 1;
373                    continue;
374                }
375                return Err(invalid_hex_digit(index, ch));
376            }
377        }
378        Ok(digits)
379    }
380
381    /// Skips configured separators and ignored ASCII whitespace.
382    ///
383    /// # Parameters
384    /// - `text`: Text being decoded.
385    /// - `index`: Current byte index.
386    /// - `separator`: Optional configured separator.
387    ///
388    /// # Returns
389    /// The next byte index that should be parsed.
390    fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
391        loop {
392            let Some(rest) = text.get(index..) else {
393                return index;
394            };
395            if let Some(separator) = separator
396                && rest.starts_with(separator)
397            {
398                index += separator.len();
399                continue;
400            }
401            let Some(ch) = rest.chars().next() else {
402                return index;
403            };
404            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
405                index += ch.len_utf8();
406                continue;
407            }
408            return index;
409        }
410    }
411
412    /// Skips ignored leading ASCII whitespace.
413    ///
414    /// # Parameters
415    /// - `text`: Text being decoded.
416    /// - `index`: Current byte index.
417    ///
418    /// # Returns
419    /// The next byte index after ignored ASCII whitespace.
420    fn skip_ascii_whitespace(&self, text: &str, mut index: usize) -> usize {
421        while self.ignore_ascii_whitespace && index < text.len() {
422            let Some(rest) = text.get(index..) else {
423                return index;
424            };
425            let Some(ch) = rest.chars().next() else {
426                return index;
427            };
428            if !ch.is_ascii_whitespace() {
429                return index;
430            }
431            index += ch.len_utf8();
432        }
433        index
434    }
435
436    /// Tests whether `text` starts with a configured prefix.
437    ///
438    /// # Parameters
439    /// - `text`: Text slice to inspect.
440    /// - `prefix`: Configured prefix.
441    ///
442    /// # Returns
443    /// `true` when `text` starts with `prefix`, honoring the configured
444    /// ASCII case sensitivity for decoding prefixes.
445    fn starts_with_prefix(&self, text: &str, prefix: &str) -> bool {
446        if !self.ignore_prefix_case {
447            return text.starts_with(prefix);
448        }
449        let Some(candidate) = text.get(..prefix.len()) else {
450            return false;
451        };
452        candidate.eq_ignore_ascii_case(prefix)
453    }
454}
455
456impl Default for HexCodec {
457    /// Creates a lowercase codec without prefix or separators.
458    fn default() -> Self {
459        Self::new()
460    }
461}
462
463impl Encoder<[u8]> for HexCodec {
464    type Error = CodecError;
465    type Output = String;
466
467    /// Encodes bytes into hexadecimal text.
468    fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
469        Ok(HexCodec::encode(self, input))
470    }
471}
472
473impl Decoder<str> for HexCodec {
474    type Error = CodecError;
475    type Output = Vec<u8>;
476
477    /// Decodes hexadecimal text into bytes.
478    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
479        HexCodec::decode(self, input)
480    }
481}
482
483/// Converts one hex digit to its value.
484///
485/// # Parameters
486/// - `ch`: Character to inspect.
487///
488/// # Returns
489/// Nibble value, or `None` when `ch` is not a hex digit.
490fn hex_value(ch: char) -> Option<u8> {
491    match ch {
492        '0'..='9' => Some(ch as u8 - b'0'),
493        'a'..='f' => Some(ch as u8 - b'a' + 10),
494        'A'..='F' => Some(ch as u8 - b'A' + 10),
495        _ => None,
496    }
497}
498
499/// Builds an invalid hexadecimal digit error.
500///
501/// # Parameters
502/// - `index`: Byte index of the invalid character in the original input.
503/// - `character`: Invalid character.
504///
505/// # Returns
506/// A radix-16 digit error.
507fn invalid_hex_digit(index: usize, character: char) -> CodecError {
508    CodecError::InvalidDigit {
509        radix: 16,
510        index,
511        character,
512    }
513}
514
515/// Builds an invalid hexadecimal length error.
516///
517/// # Parameters
518/// - `actual`: Number of normalized hexadecimal digits.
519///
520/// # Returns
521/// An invalid length error describing the even-digit requirement.
522fn invalid_hex_length(actual: usize) -> CodecError {
523    CodecError::InvalidLength {
524        context: "hex digits",
525        expected: "an even number of digits".to_owned(),
526        actual,
527    }
528}
529
530/// Appends one encoded byte to `output`.
531///
532/// # Parameters
533/// - `byte`: Byte to encode.
534/// - `uppercase`: Whether to use uppercase digits.
535/// - `output`: Destination string.
536fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
537    output.push(hex_digit(byte >> 4, uppercase));
538    output.push(hex_digit(byte & 0x0f, uppercase));
539}
540
541/// Converts one nibble to a hexadecimal digit.
542///
543/// # Parameters
544/// - `value`: Nibble value.
545/// - `uppercase`: Whether to use uppercase digits.
546///
547/// # Returns
548/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
549fn hex_digit(value: u8, uppercase: bool) -> char {
550    match value & 0x0f {
551        0x0 => '0',
552        0x1 => '1',
553        0x2 => '2',
554        0x3 => '3',
555        0x4 => '4',
556        0x5 => '5',
557        0x6 => '6',
558        0x7 => '7',
559        0x8 => '8',
560        0x9 => '9',
561        0x0a if uppercase => 'A',
562        0x0b if uppercase => 'B',
563        0x0c if uppercase => 'C',
564        0x0d if uppercase => 'D',
565        0x0e if uppercase => 'E',
566        0x0f if uppercase => 'F',
567        0x0a => 'a',
568        0x0b => 'b',
569        0x0c => 'c',
570        0x0d => 'd',
571        0x0e => 'e',
572        _ => 'f',
573    }
574}