Skip to main content

qubit_codec/
hex_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13    CodecError,
14    CodecResult,
15    Decoder,
16    Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22    /// Whether to use uppercase hexadecimal digits.
23    uppercase: bool,
24    /// The prefix to use before each encoded byte.
25    prefix: Option<String>,
26    /// The separator to use between bytes in the encoded string.
27    separator: Option<String>,
28    /// Whether to ignore ASCII whitespace while decoding.
29    ignore_ascii_whitespace: bool,
30}
31
32impl HexCodec {
33    /// Creates a lowercase codec without prefix or separators.
34    ///
35    /// # Returns
36    /// A hexadecimal codec using lowercase digits.
37    pub fn new() -> Self {
38        Self {
39            uppercase: false,
40            prefix: None,
41            separator: None,
42            ignore_ascii_whitespace: false,
43        }
44    }
45
46    /// Creates an uppercase codec without prefix or separators.
47    ///
48    /// # Returns
49    /// A hexadecimal codec using uppercase digits.
50    pub fn upper() -> Self {
51        Self::new().with_uppercase(true)
52    }
53
54    /// Sets whether encoded digits should be uppercase.
55    ///
56    /// # Parameters
57    /// - `uppercase`: Whether to use uppercase hexadecimal digits.
58    ///
59    /// # Returns
60    /// The updated codec.
61    pub fn with_uppercase(mut self, uppercase: bool) -> Self {
62        self.uppercase = uppercase;
63        self
64    }
65
66    /// Sets a per-byte prefix.
67    ///
68    /// The prefix is written before every encoded byte and required before
69    /// every decoded byte. For example, using prefix `0x` and separator ` `
70    /// encodes bytes as `0x1F 0x8B`.
71    ///
72    /// This is not a whole-output prefix: `[0x1F, 0x8B]` is encoded as
73    /// `0x1F 0x8B`, not `0x1F 8B`.
74    ///
75    /// # Parameters
76    /// - `prefix`: Prefix text such as `0x`.
77    ///
78    /// # Returns
79    /// The updated codec.
80    pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
81        self.prefix = Some(prefix.into());
82        self
83    }
84
85    /// Sets a separator written and accepted between encoded bytes.
86    ///
87    /// # Parameters
88    /// - `separator`: Separator text.
89    ///
90    /// # Returns
91    /// The updated codec.
92    pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
93        self.separator = Some(separator.into());
94        self
95    }
96
97    /// Sets whether ASCII whitespace is ignored while decoding.
98    ///
99    /// # Parameters
100    /// - `ignore`: Whether to ignore ASCII whitespace.
101    ///
102    /// # Returns
103    /// The updated codec.
104    pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
105        self.ignore_ascii_whitespace = ignore;
106        self
107    }
108
109    /// Encodes bytes into a hexadecimal string.
110    ///
111    /// # Parameters
112    /// - `bytes`: Bytes to encode.
113    ///
114    /// # Returns
115    /// Hexadecimal text.
116    pub fn encode(&self, bytes: &[u8]) -> String {
117        let separator_len = self.separator.as_ref().map_or(0, String::len);
118        let prefix_len = self.prefix.as_ref().map_or(0, String::len);
119        let capacity = bytes
120            .len()
121            .saturating_mul(prefix_len.saturating_add(2))
122            .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len));
123        let mut output = String::with_capacity(capacity);
124        self.encode_into(bytes, &mut output);
125        output
126    }
127
128    /// Encodes bytes into an existing string.
129    ///
130    /// # Parameters
131    /// - `bytes`: Bytes to encode.
132    /// - `output`: Destination string.
133    pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
134        for (index, byte) in bytes.iter().enumerate() {
135            if index > 0
136                && let Some(separator) = &self.separator
137            {
138                output.push_str(separator);
139            }
140            if let Some(prefix) = &self.prefix {
141                output.push_str(prefix);
142            }
143            push_hex_byte(*byte, self.uppercase, output);
144        }
145    }
146
147    /// Decodes hexadecimal text into bytes.
148    ///
149    /// # Parameters
150    /// - `text`: Hexadecimal text.
151    ///
152    /// # Returns
153    /// Decoded bytes.
154    ///
155    /// # Errors
156    /// Returns [`CodecError`] when a configured per-byte prefix is missing,
157    /// when the normalized digit count is odd, or when a non-hex digit is found.
158    pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
159        let mut output = Vec::new();
160        self.decode_into(text, &mut output)?;
161        Ok(output)
162    }
163
164    /// Decodes hexadecimal text into an existing byte vector.
165    ///
166    /// # Parameters
167    /// - `text`: Hexadecimal text.
168    /// - `output`: Destination byte vector.
169    ///
170    /// # Errors
171    /// Returns [`CodecError`] when the input is malformed.
172    pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
173        let digits = self.normalized_digits(text)?;
174        if digits.len() % 2 != 0 {
175            return Err(CodecError::OddHexLength {
176                digits: digits.len(),
177            });
178        }
179        output.reserve(digits.len() / 2);
180        for pair in digits.chunks_exact(2) {
181            let mut pair = pair.iter();
182            let Some(&(high_index, high_char)) = pair.next() else {
183                continue;
184            };
185            let Some(&(low_index, low_char)) = pair.next() else {
186                continue;
187            };
188            let high = hex_value(high_char).ok_or(CodecError::InvalidHexDigit {
189                index: high_index,
190                character: high_char,
191            })?;
192            let low = hex_value(low_char).ok_or(CodecError::InvalidHexDigit {
193                index: low_index,
194                character: low_char,
195            })?;
196            output.push((high << 4) | low);
197        }
198        Ok(())
199    }
200
201    /// Normalizes accepted input characters into hex digits.
202    ///
203    /// # Parameters
204    /// - `text`: Text to decode.
205    ///
206    /// # Returns
207    /// Hex digits paired with their original character indexes.
208    ///
209    /// # Errors
210    /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
211    fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
212        if let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) {
213            return self.normalized_prefixed_digits(text, prefix);
214        }
215        self.normalized_unprefixed_digits(text)
216    }
217
218    /// Normalizes unprefixed input characters into hex digits.
219    ///
220    /// # Parameters
221    /// - `text`: Text to decode.
222    ///
223    /// # Returns
224    /// Hex digits paired with their original character indexes.
225    ///
226    /// # Errors
227    /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
228    fn normalized_unprefixed_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
229        let mut digits = Vec::with_capacity(text.len());
230        let separator = self
231            .separator
232            .as_deref()
233            .filter(|separator| !separator.is_empty());
234        let mut index = 0;
235        while index < text.len() {
236            let Some(rest) = text.get(index..) else {
237                break;
238            };
239            if let Some(separator) = separator
240                && rest.starts_with(separator)
241            {
242                index += separator.len();
243                continue;
244            }
245            let Some(ch) = rest.chars().next() else {
246                break;
247            };
248            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
249                index += ch.len_utf8();
250                continue;
251            }
252            if hex_value(ch).is_some() {
253                digits.push((index, ch));
254                index += ch.len_utf8();
255                continue;
256            }
257            return Err(CodecError::InvalidHexDigit {
258                index,
259                character: ch,
260            });
261        }
262        Ok(digits)
263    }
264
265    /// Normalizes prefixed input characters into hex digits.
266    ///
267    /// # Parameters
268    /// - `text`: Text to decode.
269    /// - `prefix`: Required prefix before each byte.
270    ///
271    /// # Returns
272    /// Hex digits paired with their original character indexes.
273    ///
274    /// # Errors
275    /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
276    /// [`CodecError::InvalidHexDigit`] for unsupported characters.
277    fn normalized_prefixed_digits(
278        &self,
279        text: &str,
280        prefix: &str,
281    ) -> CodecResult<Vec<(usize, char)>> {
282        let mut digits = Vec::with_capacity(text.len());
283        let separator = self
284            .separator
285            .as_deref()
286            .filter(|separator| !separator.is_empty());
287        let mut index = 0;
288        while index < text.len() {
289            index = self.skip_ignored(text, index, separator);
290            if index >= text.len() {
291                break;
292            }
293            let Some(rest) = text.get(index..) else {
294                break;
295            };
296            if !rest.starts_with(prefix) {
297                return Err(CodecError::MissingPrefix {
298                    prefix: prefix.to_owned(),
299                });
300            }
301            index += prefix.len();
302
303            let mut digit_count = 0;
304            while digit_count < 2 && index < text.len() {
305                let Some(rest) = text.get(index..) else {
306                    break;
307                };
308                let Some(ch) = rest.chars().next() else {
309                    break;
310                };
311                if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
312                    index += ch.len_utf8();
313                    continue;
314                }
315                if hex_value(ch).is_some() {
316                    digits.push((index, ch));
317                    index += ch.len_utf8();
318                    digit_count += 1;
319                    continue;
320                }
321                return Err(CodecError::InvalidHexDigit {
322                    index,
323                    character: ch,
324                });
325            }
326        }
327        Ok(digits)
328    }
329
330    /// Skips configured separators and ignored ASCII whitespace.
331    ///
332    /// # Parameters
333    /// - `text`: Text being decoded.
334    /// - `index`: Current byte index.
335    /// - `separator`: Optional configured separator.
336    ///
337    /// # Returns
338    /// The next byte index that should be parsed.
339    fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
340        loop {
341            let Some(rest) = text.get(index..) else {
342                return index;
343            };
344            if let Some(separator) = separator
345                && rest.starts_with(separator)
346            {
347                index += separator.len();
348                continue;
349            }
350            let Some(ch) = rest.chars().next() else {
351                return index;
352            };
353            if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
354                index += ch.len_utf8();
355                continue;
356            }
357            return index;
358        }
359    }
360}
361
362impl Default for HexCodec {
363    /// Creates a lowercase codec without prefix or separators.
364    fn default() -> Self {
365        Self::new()
366    }
367}
368
369impl Encoder<[u8]> for HexCodec {
370    type Error = CodecError;
371    type Output = String;
372
373    /// Encodes bytes into hexadecimal text.
374    fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
375        Ok(HexCodec::encode(self, input))
376    }
377}
378
379impl Decoder<str> for HexCodec {
380    type Error = CodecError;
381    type Output = Vec<u8>;
382
383    /// Decodes hexadecimal text into bytes.
384    fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
385        HexCodec::decode(self, input)
386    }
387}
388
389/// Converts one hex digit to its value.
390///
391/// # Parameters
392/// - `ch`: Character to inspect.
393///
394/// # Returns
395/// Nibble value, or `None` when `ch` is not a hex digit.
396fn hex_value(ch: char) -> Option<u8> {
397    match ch {
398        '0'..='9' => Some(ch as u8 - b'0'),
399        'a'..='f' => Some(ch as u8 - b'a' + 10),
400        'A'..='F' => Some(ch as u8 - b'A' + 10),
401        _ => None,
402    }
403}
404
405/// Appends one encoded byte to `output`.
406///
407/// # Parameters
408/// - `byte`: Byte to encode.
409/// - `uppercase`: Whether to use uppercase digits.
410/// - `output`: Destination string.
411fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
412    output.push(hex_digit(byte >> 4, uppercase));
413    output.push(hex_digit(byte & 0x0f, uppercase));
414}
415
416/// Converts one nibble to a hexadecimal digit.
417///
418/// # Parameters
419/// - `value`: Nibble value.
420/// - `uppercase`: Whether to use uppercase digits.
421///
422/// # Returns
423/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
424fn hex_digit(value: u8, uppercase: bool) -> char {
425    match value & 0x0f {
426        0x0 => '0',
427        0x1 => '1',
428        0x2 => '2',
429        0x3 => '3',
430        0x4 => '4',
431        0x5 => '5',
432        0x6 => '6',
433        0x7 => '7',
434        0x8 => '8',
435        0x9 => '9',
436        0x0a if uppercase => 'A',
437        0x0b if uppercase => 'B',
438        0x0c if uppercase => 'C',
439        0x0d if uppercase => 'D',
440        0x0e if uppercase => 'E',
441        0x0f if uppercase => 'F',
442        0x0a => 'a',
443        0x0b => 'b',
444        0x0c => 'c',
445        0x0d => 'd',
446        0x0e => 'e',
447        _ => 'f',
448    }
449}