Skip to main content

qubit_codec/
hex_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16    Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22    /// Whether to use uppercase hexadecimal digits.
23    uppercase: bool,
24    /// The prefix to use before the whole encoded string.
25    prefix: Option<String>,
26    /// The prefix to use before each encoded byte.
27    byte_prefix: Option<String>,
28    /// The separator to use between bytes in the encoded string.
29    separator: Option<String>,
30    /// Whether to ignore ASCII whitespace while decoding.
31    ignore_ascii_whitespace: bool,
32    /// Whether to ignore ASCII case when matching configured prefixes.
33    ignore_prefix_case: bool,
34}
35
36impl HexCodec {
37    /// Creates a lowercase codec without prefix or separators.
38    ///
39    /// # Returns
40    /// A hexadecimal codec using lowercase digits.
41    pub fn new() -> Self {
42        Self {
43            uppercase: false,
44            prefix: None,
45            byte_prefix: None,
46            separator: None,
47            ignore_ascii_whitespace: false,
48            ignore_prefix_case: false,
49        }
50    }
51
52    /// Creates an uppercase codec without prefix or separators.
53    ///
54    /// # Returns
55    /// A hexadecimal codec using uppercase digits.
56    pub fn upper() -> Self {
57        Self::new().with_uppercase(true)
58    }
59
60    /// Sets whether encoded digits should be uppercase.
61    ///
62    /// # Parameters
63    /// - `uppercase`: Whether to use uppercase hexadecimal digits.
64    ///
65    /// # Returns
66    /// The updated codec.
67    pub fn with_uppercase(mut self, uppercase: bool) -> Self {
68        self.uppercase = uppercase;
69        self
70    }
71
72    /// Sets a whole-output prefix.
73    ///
74    /// The prefix is written once before the encoded bytes and required once
75    /// before decoded input. For example, using prefix `0x` encodes bytes as
76    /// `0x1f8b`.
77    ///
78    /// # Parameters
79    /// - `prefix`: Whole-output prefix text such as `0x`.
80    ///
81    /// # Returns
82    /// The updated codec.
83    pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
84        self.prefix = Some(prefix.into());
85        self
86    }
87
88    /// Sets a per-byte prefix.
89    ///
90    /// The prefix is written before every encoded byte and required before
91    /// every decoded byte. For example, using byte prefix `0x` and separator
92    /// ` ` encodes bytes as `0x1f 0x8b`.
93    ///
94    /// # Parameters
95    /// - `prefix`: Per-byte prefix text such as `0x`.
96    ///
97    /// # Returns
98    /// The updated codec.
99    pub fn with_byte_prefix(mut self, prefix: impl Into<String>) -> Self {
100        self.byte_prefix = Some(prefix.into());
101        self
102    }
103
104    /// Sets a separator written and accepted between encoded bytes.
105    ///
106    /// # Parameters
107    /// - `separator`: Separator text.
108    ///
109    /// # Returns
110    /// The updated codec.
111    pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
112        self.separator = Some(separator.into());
113        self
114    }
115
116    /// Sets whether ASCII whitespace is ignored while decoding.
117    ///
118    /// # Parameters
119    /// - `ignore`: Whether to ignore ASCII whitespace.
120    ///
121    /// # Returns
122    /// The updated codec.
123    pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
124        self.ignore_ascii_whitespace = ignore;
125        self
126    }
127
128    /// Sets whether ASCII case is ignored when decoding configured prefixes.
129    ///
130    /// This option affects whole-output prefixes and per-byte prefixes during
131    /// decoding only. Encoding writes prefixes exactly as configured.
132    ///
133    /// # Parameters
134    /// - `ignore`: Whether to ignore ASCII case while matching prefixes.
135    ///
136    /// # Returns
137    /// The updated codec.
138    pub fn with_ignore_prefix_case(mut self, ignore: bool) -> Self {
139        self.ignore_prefix_case = ignore;
140        self
141    }
142
143    /// Encodes bytes into a hexadecimal string.
144    ///
145    /// # Parameters
146    /// - `bytes`: Bytes to encode.
147    ///
148    /// # Returns
149    /// Hexadecimal text.
150    pub fn encode(&self, bytes: &[u8]) -> String {
151        let separator_len = self.separator.as_ref().map_or(0, String::len);
152        let prefix_len = self.prefix.as_ref().map_or(0, String::len);
153        let byte_prefix_len = self.byte_prefix.as_ref().map_or(0, String::len);
154        let capacity = prefix_len.saturating_add(
155            bytes
156                .len()
157                .saturating_mul(byte_prefix_len.saturating_add(2))
158                .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len)),
159        );
160        let mut output = String::with_capacity(capacity);
161        self.encode_into(bytes, &mut output);
162        output
163    }
164
165    /// Encodes bytes into an existing string.
166    ///
167    /// # Parameters
168    /// - `bytes`: Bytes to encode.
169    /// - `output`: Destination string.
170    pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
171        if let Some(prefix) = &self.prefix {
172            output.push_str(prefix);
173        }
174        for (index, byte) in bytes.iter().enumerate() {
175            if index > 0
176                && let Some(separator) = &self.separator
177            {
178                output.push_str(separator);
179            }
180            if let Some(byte_prefix) = &self.byte_prefix {
181                output.push_str(byte_prefix);
182            }
183            push_hex_byte(*byte, self.uppercase, output);
184        }
185    }
186
187    /// Decodes hexadecimal text into bytes.
188    ///
189    /// # Parameters
190    /// - `text`: Hexadecimal text.
191    ///
192    /// # Returns
193    /// Decoded bytes.
194    ///
195    /// # Errors
196    /// Returns [`CodecError`] when a configured whole or per-byte prefix is missing,
197    /// when the normalized digit count is odd, or when a non-hex digit is found.
198    pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
199        let mut output = Vec::new();
200        self.decode_into(text, &mut output)?;
201        Ok(output)
202    }
203
204    /// Decodes hexadecimal text into an existing byte vector.
205    ///
206    /// # Parameters
207    /// - `text`: Hexadecimal text.
208    /// - `output`: Destination byte vector.
209    ///
210    /// # Errors
211    /// Returns [`CodecError`] when the input is malformed.
212    pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
213        let digits = self.normalized_digits(text)?;
214        if digits.len() % 2 != 0 {
215            return Err(CodecError::OddHexLength {
216                digits: digits.len(),
217            });
218        }
219        output.reserve(digits.len() / 2);
220        for pair in digits.chunks_exact(2) {
221            let mut pair = pair.iter();
222            let Some(&(high_index, high_char)) = pair.next() else {
223                continue;
224            };
225            let Some(&(low_index, low_char)) = pair.next() else {
226                continue;
227            };
228            let high = hex_value(high_char).ok_or(CodecError::InvalidHexDigit {
229                index: high_index,
230                character: high_char,
231            })?;
232            let low = hex_value(low_char).ok_or(CodecError::InvalidHexDigit {
233                index: low_index,
234                character: low_char,
235            })?;
236            output.push((high << 4) | low);
237        }
238        Ok(())
239    }
240
241    /// Normalizes accepted input characters into hex digits.
242    ///
243    /// # Parameters
244    /// - `text`: Text to decode.
245    ///
246    /// # Returns
247    /// Hex digits paired with their original character indexes.
248    ///
249    /// # Errors
250    /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
251    fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
252        let start_index = self.consume_prefix(text)?;
253        if let Some(byte_prefix) = self
254            .byte_prefix
255            .as_deref()
256            .filter(|prefix| !prefix.is_empty())
257        {
258            return self.normalized_byte_prefixed_digits(text, byte_prefix, start_index);
259        }
260        self.normalized_unprefixed_digits(text, start_index)
261    }
262
263    /// Consumes the configured whole-output prefix.
264    ///
265    /// # Parameters
266    /// - `text`: Text to decode.
267    ///
268    /// # Returns
269    /// Byte index where byte parsing should start.
270    ///
271    /// # Errors
272    /// Returns [`CodecError::MissingPrefix`] when a non-empty whole-output
273    /// prefix is configured but absent.
274    fn consume_prefix(&self, text: &str) -> CodecResult<usize> {
275        let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) else {
276            return Ok(0);
277        };
278        let index = self.skip_ascii_whitespace(text, 0);
279        let Some(rest) = text.get(index..) else {
280            return Err(CodecError::MissingPrefix {
281                prefix: prefix.to_owned(),
282            });
283        };
284        if self.starts_with_prefix(rest, prefix) {
285            Ok(index + prefix.len())
286        } else {
287            Err(CodecError::MissingPrefix {
288                prefix: prefix.to_owned(),
289            })
290        }
291    }
292
293    /// Normalizes unprefixed input characters into hex digits.
294    ///
295    /// # Parameters
296    /// - `text`: Text to decode.
297    ///
298    /// # Returns
299    /// Hex digits paired with their original character indexes.
300    ///
301    /// # Errors
302    /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
303    fn normalized_unprefixed_digits(
304        &self,
305        text: &str,
306        mut index: usize,
307    ) -> CodecResult<Vec<(usize, char)>> {
308        let mut digits = Vec::with_capacity(text.len());
309        let separator = self
310            .separator
311            .as_deref()
312            .filter(|separator| !separator.is_empty());
313        while index < text.len() {
314            let Some(rest) = text.get(index..) else {
315                break;
316            };
317            if let Some(separator) = separator
318                && rest.starts_with(separator)
319            {
320                index += separator.len();
321                continue;
322            }
323            let Some(ch) = rest.chars().next() else {
324                break;
325            };
326            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
327                index += ch.len_utf8();
328                continue;
329            }
330            if hex_value(ch).is_some() {
331                digits.push((index, ch));
332                index += ch.len_utf8();
333                continue;
334            }
335            return Err(CodecError::InvalidHexDigit {
336                index,
337                character: ch,
338            });
339        }
340        Ok(digits)
341    }
342
343    /// Normalizes byte-prefixed input characters into hex digits.
344    ///
345    /// # Parameters
346    /// - `text`: Text to decode.
347    /// - `prefix`: Required prefix before each byte.
348    /// - `index`: Byte index where parsing should start.
349    ///
350    /// # Returns
351    /// Hex digits paired with their original character indexes.
352    ///
353    /// # Errors
354    /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
355    /// [`CodecError::InvalidHexDigit`] for unsupported characters.
356    fn normalized_byte_prefixed_digits(
357        &self,
358        text: &str,
359        prefix: &str,
360        mut index: usize,
361    ) -> CodecResult<Vec<(usize, char)>> {
362        let mut digits = Vec::with_capacity(text.len());
363        let separator = self
364            .separator
365            .as_deref()
366            .filter(|separator| !separator.is_empty());
367        while index < text.len() {
368            index = self.skip_ignored(text, index, separator);
369            if index >= text.len() {
370                break;
371            }
372            let Some(rest) = text.get(index..) else {
373                break;
374            };
375            if !self.starts_with_prefix(rest, prefix) {
376                return Err(CodecError::MissingPrefix {
377                    prefix: prefix.to_owned(),
378                });
379            }
380            index += prefix.len();
381
382            let mut digit_count = 0;
383            while digit_count < 2 && index < text.len() {
384                let Some(rest) = text.get(index..) else {
385                    break;
386                };
387                let Some(ch) = rest.chars().next() else {
388                    break;
389                };
390                if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
391                    index += ch.len_utf8();
392                    continue;
393                }
394                if hex_value(ch).is_some() {
395                    digits.push((index, ch));
396                    index += ch.len_utf8();
397                    digit_count += 1;
398                    continue;
399                }
400                return Err(CodecError::InvalidHexDigit {
401                    index,
402                    character: ch,
403                });
404            }
405        }
406        Ok(digits)
407    }
408
409    /// Skips configured separators and ignored ASCII whitespace.
410    ///
411    /// # Parameters
412    /// - `text`: Text being decoded.
413    /// - `index`: Current byte index.
414    /// - `separator`: Optional configured separator.
415    ///
416    /// # Returns
417    /// The next byte index that should be parsed.
418    fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
419        loop {
420            let Some(rest) = text.get(index..) else {
421                return index;
422            };
423            if let Some(separator) = separator
424                && rest.starts_with(separator)
425            {
426                index += separator.len();
427                continue;
428            }
429            let Some(ch) = rest.chars().next() else {
430                return index;
431            };
432            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
433                index += ch.len_utf8();
434                continue;
435            }
436            return index;
437        }
438    }
439
440    /// Skips ignored leading ASCII whitespace.
441    ///
442    /// # Parameters
443    /// - `text`: Text being decoded.
444    /// - `index`: Current byte index.
445    ///
446    /// # Returns
447    /// The next byte index after ignored ASCII whitespace.
448    fn skip_ascii_whitespace(&self, text: &str, mut index: usize) -> usize {
449        while self.ignore_ascii_whitespace && index < text.len() {
450            let Some(rest) = text.get(index..) else {
451                return index;
452            };
453            let Some(ch) = rest.chars().next() else {
454                return index;
455            };
456            if !ch.is_ascii_whitespace() {
457                return index;
458            }
459            index += ch.len_utf8();
460        }
461        index
462    }
463
464    /// Tests whether `text` starts with a configured prefix.
465    ///
466    /// # Parameters
467    /// - `text`: Text slice to inspect.
468    /// - `prefix`: Configured prefix.
469    ///
470    /// # Returns
471    /// `true` when `text` starts with `prefix`, honoring the configured
472    /// ASCII case sensitivity for decoding prefixes.
473    fn starts_with_prefix(&self, text: &str, prefix: &str) -> bool {
474        if !self.ignore_prefix_case {
475            return text.starts_with(prefix);
476        }
477        let Some(candidate) = text.get(..prefix.len()) else {
478            return false;
479        };
480        candidate.eq_ignore_ascii_case(prefix)
481    }
482}
483
484impl Default for HexCodec {
485    /// Creates a lowercase codec without prefix or separators.
486    fn default() -> Self {
487        Self::new()
488    }
489}
490
491impl Encoder<[u8]> for HexCodec {
492    type Error = CodecError;
493    type Output = String;
494
495    /// Encodes bytes into hexadecimal text.
496    fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
497        Ok(HexCodec::encode(self, input))
498    }
499}
500
501impl Decoder<str> for HexCodec {
502    type Error = CodecError;
503    type Output = Vec<u8>;
504
505    /// Decodes hexadecimal text into bytes.
506    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
507        HexCodec::decode(self, input)
508    }
509}
510
511/// Converts one hex digit to its value.
512///
513/// # Parameters
514/// - `ch`: Character to inspect.
515///
516/// # Returns
517/// Nibble value, or `None` when `ch` is not a hex digit.
518fn hex_value(ch: char) -> Option<u8> {
519    match ch {
520        '0'..='9' => Some(ch as u8 - b'0'),
521        'a'..='f' => Some(ch as u8 - b'a' + 10),
522        'A'..='F' => Some(ch as u8 - b'A' + 10),
523        _ => None,
524    }
525}
526
527/// Appends one encoded byte to `output`.
528///
529/// # Parameters
530/// - `byte`: Byte to encode.
531/// - `uppercase`: Whether to use uppercase digits.
532/// - `output`: Destination string.
533fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
534    output.push(hex_digit(byte >> 4, uppercase));
535    output.push(hex_digit(byte & 0x0f, uppercase));
536}
537
538/// Converts one nibble to a hexadecimal digit.
539///
540/// # Parameters
541/// - `value`: Nibble value.
542/// - `uppercase`: Whether to use uppercase digits.
543///
544/// # Returns
545/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
546fn hex_digit(value: u8, uppercase: bool) -> char {
547    match value & 0x0f {
548        0x0 => '0',
549        0x1 => '1',
550        0x2 => '2',
551        0x3 => '3',
552        0x4 => '4',
553        0x5 => '5',
554        0x6 => '6',
555        0x7 => '7',
556        0x8 => '8',
557        0x9 => '9',
558        0x0a if uppercase => 'A',
559        0x0b if uppercase => 'B',
560        0x0c if uppercase => 'C',
561        0x0d if uppercase => 'D',
562        0x0e if uppercase => 'E',
563        0x0f if uppercase => 'F',
564        0x0a => 'a',
565        0x0b => 'b',
566        0x0c => 'c',
567        0x0d => 'd',
568        0x0e => 'e',
569        _ => 'f',
570    }
571}