qubit_codec/hex_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10//! Hexadecimal byte codec.
11
12use crate::{
13 CodecError,
14 CodecResult,
15 Decoder,
16 Encoder,
17};
18
19/// Encodes and decodes hexadecimal byte strings.
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct HexCodec {
22 /// Whether to use uppercase hexadecimal digits.
23 uppercase: bool,
24 /// The prefix to use before the whole encoded string.
25 prefix: Option<String>,
26 /// The prefix to use before each encoded byte.
27 byte_prefix: Option<String>,
28 /// The separator to use between bytes in the encoded string.
29 separator: Option<String>,
30 /// Whether to ignore ASCII whitespace while decoding.
31 ignore_ascii_whitespace: bool,
32 /// Whether to ignore ASCII case when matching configured prefixes.
33 ignore_prefix_case: bool,
34}
35
36impl HexCodec {
37 /// Creates a lowercase codec without prefix or separators.
38 ///
39 /// # Returns
40 /// A hexadecimal codec using lowercase digits.
41 pub fn new() -> Self {
42 Self {
43 uppercase: false,
44 prefix: None,
45 byte_prefix: None,
46 separator: None,
47 ignore_ascii_whitespace: false,
48 ignore_prefix_case: false,
49 }
50 }
51
52 /// Creates an uppercase codec without prefix or separators.
53 ///
54 /// # Returns
55 /// A hexadecimal codec using uppercase digits.
56 pub fn upper() -> Self {
57 Self::new().with_uppercase(true)
58 }
59
60 /// Sets whether encoded digits should be uppercase.
61 ///
62 /// # Parameters
63 /// - `uppercase`: Whether to use uppercase hexadecimal digits.
64 ///
65 /// # Returns
66 /// The updated codec.
67 pub fn with_uppercase(mut self, uppercase: bool) -> Self {
68 self.uppercase = uppercase;
69 self
70 }
71
72 /// Sets a whole-output prefix.
73 ///
74 /// The prefix is written once before the encoded bytes and required once
75 /// before decoded input. For example, using prefix `0x` encodes bytes as
76 /// `0x1f8b`.
77 ///
78 /// # Parameters
79 /// - `prefix`: Whole-output prefix text such as `0x`.
80 ///
81 /// # Returns
82 /// The updated codec.
83 pub fn with_prefix(mut self, prefix: impl Into<String>) -> Self {
84 self.prefix = Some(prefix.into());
85 self
86 }
87
88 /// Sets a per-byte prefix.
89 ///
90 /// The prefix is written before every encoded byte and required before
91 /// every decoded byte. For example, using byte prefix `0x` and separator
92 /// ` ` encodes bytes as `0x1f 0x8b`.
93 ///
94 /// # Parameters
95 /// - `prefix`: Per-byte prefix text such as `0x`.
96 ///
97 /// # Returns
98 /// The updated codec.
99 pub fn with_byte_prefix(mut self, prefix: impl Into<String>) -> Self {
100 self.byte_prefix = Some(prefix.into());
101 self
102 }
103
104 /// Sets a separator written and accepted between encoded bytes.
105 ///
106 /// # Parameters
107 /// - `separator`: Separator text.
108 ///
109 /// # Returns
110 /// The updated codec.
111 pub fn with_separator(mut self, separator: impl Into<String>) -> Self {
112 self.separator = Some(separator.into());
113 self
114 }
115
116 /// Sets whether ASCII whitespace is ignored while decoding.
117 ///
118 /// # Parameters
119 /// - `ignore`: Whether to ignore ASCII whitespace.
120 ///
121 /// # Returns
122 /// The updated codec.
123 pub fn with_ignored_ascii_whitespace(mut self, ignore: bool) -> Self {
124 self.ignore_ascii_whitespace = ignore;
125 self
126 }
127
128 /// Sets whether ASCII case is ignored when decoding configured prefixes.
129 ///
130 /// This option affects whole-output prefixes and per-byte prefixes during
131 /// decoding only. Encoding writes prefixes exactly as configured.
132 ///
133 /// # Parameters
134 /// - `ignore`: Whether to ignore ASCII case while matching prefixes.
135 ///
136 /// # Returns
137 /// The updated codec.
138 pub fn with_ignore_prefix_case(mut self, ignore: bool) -> Self {
139 self.ignore_prefix_case = ignore;
140 self
141 }
142
143 /// Encodes bytes into a hexadecimal string.
144 ///
145 /// # Parameters
146 /// - `bytes`: Bytes to encode.
147 ///
148 /// # Returns
149 /// Hexadecimal text.
150 pub fn encode(&self, bytes: &[u8]) -> String {
151 let separator_len = self.separator.as_ref().map_or(0, String::len);
152 let prefix_len = self.prefix.as_ref().map_or(0, String::len);
153 let byte_prefix_len = self.byte_prefix.as_ref().map_or(0, String::len);
154 let capacity = prefix_len.saturating_add(
155 bytes
156 .len()
157 .saturating_mul(byte_prefix_len.saturating_add(2))
158 .saturating_add(bytes.len().saturating_sub(1).saturating_mul(separator_len)),
159 );
160 let mut output = String::with_capacity(capacity);
161 self.encode_into(bytes, &mut output);
162 output
163 }
164
165 /// Encodes bytes into an existing string.
166 ///
167 /// # Parameters
168 /// - `bytes`: Bytes to encode.
169 /// - `output`: Destination string.
170 pub fn encode_into(&self, bytes: &[u8], output: &mut String) {
171 if let Some(prefix) = &self.prefix {
172 output.push_str(prefix);
173 }
174 for (index, byte) in bytes.iter().enumerate() {
175 if index > 0
176 && let Some(separator) = &self.separator
177 {
178 output.push_str(separator);
179 }
180 if let Some(byte_prefix) = &self.byte_prefix {
181 output.push_str(byte_prefix);
182 }
183 push_hex_byte(*byte, self.uppercase, output);
184 }
185 }
186
187 /// Decodes hexadecimal text into bytes.
188 ///
189 /// # Parameters
190 /// - `text`: Hexadecimal text.
191 ///
192 /// # Returns
193 /// Decoded bytes.
194 ///
195 /// # Errors
196 /// Returns [`CodecError`] when a configured whole or per-byte prefix is missing,
197 /// when the normalized digit count is odd, or when a non-hex digit is found.
198 pub fn decode(&self, text: &str) -> CodecResult<Vec<u8>> {
199 let mut output = Vec::new();
200 self.decode_into(text, &mut output)?;
201 Ok(output)
202 }
203
204 /// Decodes hexadecimal text into an existing byte vector.
205 ///
206 /// # Parameters
207 /// - `text`: Hexadecimal text.
208 /// - `output`: Destination byte vector.
209 ///
210 /// # Errors
211 /// Returns [`CodecError`] when the input is malformed.
212 pub fn decode_into(&self, text: &str, output: &mut Vec<u8>) -> CodecResult<()> {
213 let digits = self.normalized_digits(text)?;
214 if digits.len() % 2 != 0 {
215 return Err(CodecError::OddHexLength {
216 digits: digits.len(),
217 });
218 }
219 output.reserve(digits.len() / 2);
220 for pair in digits.chunks_exact(2) {
221 let mut pair = pair.iter();
222 let Some(&(high_index, high_char)) = pair.next() else {
223 continue;
224 };
225 let Some(&(low_index, low_char)) = pair.next() else {
226 continue;
227 };
228 let high = hex_value(high_char).ok_or(CodecError::InvalidHexDigit {
229 index: high_index,
230 character: high_char,
231 })?;
232 let low = hex_value(low_char).ok_or(CodecError::InvalidHexDigit {
233 index: low_index,
234 character: low_char,
235 })?;
236 output.push((high << 4) | low);
237 }
238 Ok(())
239 }
240
241 /// Normalizes accepted input characters into hex digits.
242 ///
243 /// # Parameters
244 /// - `text`: Text to decode.
245 ///
246 /// # Returns
247 /// Hex digits paired with their original character indexes.
248 ///
249 /// # Errors
250 /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
251 fn normalized_digits(&self, text: &str) -> CodecResult<Vec<(usize, char)>> {
252 let start_index = self.consume_prefix(text)?;
253 if let Some(byte_prefix) = self
254 .byte_prefix
255 .as_deref()
256 .filter(|prefix| !prefix.is_empty())
257 {
258 return self.normalized_byte_prefixed_digits(text, byte_prefix, start_index);
259 }
260 self.normalized_unprefixed_digits(text, start_index)
261 }
262
263 /// Consumes the configured whole-output prefix.
264 ///
265 /// # Parameters
266 /// - `text`: Text to decode.
267 ///
268 /// # Returns
269 /// Byte index where byte parsing should start.
270 ///
271 /// # Errors
272 /// Returns [`CodecError::MissingPrefix`] when a non-empty whole-output
273 /// prefix is configured but absent.
274 fn consume_prefix(&self, text: &str) -> CodecResult<usize> {
275 let Some(prefix) = self.prefix.as_deref().filter(|prefix| !prefix.is_empty()) else {
276 return Ok(0);
277 };
278 let index = self.skip_ascii_whitespace(text, 0);
279 let Some(rest) = text.get(index..) else {
280 return Err(CodecError::MissingPrefix {
281 prefix: prefix.to_owned(),
282 });
283 };
284 if self.starts_with_prefix(rest, prefix) {
285 Ok(index + prefix.len())
286 } else {
287 Err(CodecError::MissingPrefix {
288 prefix: prefix.to_owned(),
289 })
290 }
291 }
292
293 /// Normalizes unprefixed input characters into hex digits.
294 ///
295 /// # Parameters
296 /// - `text`: Text to decode.
297 ///
298 /// # Returns
299 /// Hex digits paired with their original character indexes.
300 ///
301 /// # Errors
302 /// Returns [`CodecError::InvalidHexDigit`] for unsupported characters.
303 fn normalized_unprefixed_digits(
304 &self,
305 text: &str,
306 mut index: usize,
307 ) -> CodecResult<Vec<(usize, char)>> {
308 let mut digits = Vec::with_capacity(text.len());
309 let separator = self
310 .separator
311 .as_deref()
312 .filter(|separator| !separator.is_empty());
313 while index < text.len() {
314 let Some(rest) = text.get(index..) else {
315 break;
316 };
317 if let Some(separator) = separator
318 && rest.starts_with(separator)
319 {
320 index += separator.len();
321 continue;
322 }
323 let Some(ch) = rest.chars().next() else {
324 break;
325 };
326 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
327 index += ch.len_utf8();
328 continue;
329 }
330 if hex_value(ch).is_some() {
331 digits.push((index, ch));
332 index += ch.len_utf8();
333 continue;
334 }
335 return Err(CodecError::InvalidHexDigit {
336 index,
337 character: ch,
338 });
339 }
340 Ok(digits)
341 }
342
343 /// Normalizes byte-prefixed input characters into hex digits.
344 ///
345 /// # Parameters
346 /// - `text`: Text to decode.
347 /// - `prefix`: Required prefix before each byte.
348 /// - `index`: Byte index where parsing should start.
349 ///
350 /// # Returns
351 /// Hex digits paired with their original character indexes.
352 ///
353 /// # Errors
354 /// Returns [`CodecError::MissingPrefix`] when a byte prefix is missing, or
355 /// [`CodecError::InvalidHexDigit`] for unsupported characters.
356 fn normalized_byte_prefixed_digits(
357 &self,
358 text: &str,
359 prefix: &str,
360 mut index: usize,
361 ) -> CodecResult<Vec<(usize, char)>> {
362 let mut digits = Vec::with_capacity(text.len());
363 let separator = self
364 .separator
365 .as_deref()
366 .filter(|separator| !separator.is_empty());
367 while index < text.len() {
368 index = self.skip_ignored(text, index, separator);
369 if index >= text.len() {
370 break;
371 }
372 let Some(rest) = text.get(index..) else {
373 break;
374 };
375 if !self.starts_with_prefix(rest, prefix) {
376 return Err(CodecError::MissingPrefix {
377 prefix: prefix.to_owned(),
378 });
379 }
380 index += prefix.len();
381
382 let mut digit_count = 0;
383 while digit_count < 2 && index < text.len() {
384 let Some(rest) = text.get(index..) else {
385 break;
386 };
387 let Some(ch) = rest.chars().next() else {
388 break;
389 };
390 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
391 index += ch.len_utf8();
392 continue;
393 }
394 if hex_value(ch).is_some() {
395 digits.push((index, ch));
396 index += ch.len_utf8();
397 digit_count += 1;
398 continue;
399 }
400 return Err(CodecError::InvalidHexDigit {
401 index,
402 character: ch,
403 });
404 }
405 }
406 Ok(digits)
407 }
408
409 /// Skips configured separators and ignored ASCII whitespace.
410 ///
411 /// # Parameters
412 /// - `text`: Text being decoded.
413 /// - `index`: Current byte index.
414 /// - `separator`: Optional configured separator.
415 ///
416 /// # Returns
417 /// The next byte index that should be parsed.
418 fn skip_ignored(&self, text: &str, mut index: usize, separator: Option<&str>) -> usize {
419 loop {
420 let Some(rest) = text.get(index..) else {
421 return index;
422 };
423 if let Some(separator) = separator
424 && rest.starts_with(separator)
425 {
426 index += separator.len();
427 continue;
428 }
429 let Some(ch) = rest.chars().next() else {
430 return index;
431 };
432 if self.ignore_ascii_whitespace && ch.is_ascii_whitespace() {
433 index += ch.len_utf8();
434 continue;
435 }
436 return index;
437 }
438 }
439
440 /// Skips ignored leading ASCII whitespace.
441 ///
442 /// # Parameters
443 /// - `text`: Text being decoded.
444 /// - `index`: Current byte index.
445 ///
446 /// # Returns
447 /// The next byte index after ignored ASCII whitespace.
448 fn skip_ascii_whitespace(&self, text: &str, mut index: usize) -> usize {
449 while self.ignore_ascii_whitespace && index < text.len() {
450 let Some(rest) = text.get(index..) else {
451 return index;
452 };
453 let Some(ch) = rest.chars().next() else {
454 return index;
455 };
456 if !ch.is_ascii_whitespace() {
457 return index;
458 }
459 index += ch.len_utf8();
460 }
461 index
462 }
463
464 /// Tests whether `text` starts with a configured prefix.
465 ///
466 /// # Parameters
467 /// - `text`: Text slice to inspect.
468 /// - `prefix`: Configured prefix.
469 ///
470 /// # Returns
471 /// `true` when `text` starts with `prefix`, honoring the configured
472 /// ASCII case sensitivity for decoding prefixes.
473 fn starts_with_prefix(&self, text: &str, prefix: &str) -> bool {
474 if !self.ignore_prefix_case {
475 return text.starts_with(prefix);
476 }
477 let Some(candidate) = text.get(..prefix.len()) else {
478 return false;
479 };
480 candidate.eq_ignore_ascii_case(prefix)
481 }
482}
483
484impl Default for HexCodec {
485 /// Creates a lowercase codec without prefix or separators.
486 fn default() -> Self {
487 Self::new()
488 }
489}
490
491impl Encoder<[u8]> for HexCodec {
492 type Error = CodecError;
493 type Output = String;
494
495 /// Encodes bytes into hexadecimal text.
496 fn encode(&self, input: &[u8]) -> Result<Self::Output, Self::Error> {
497 Ok(HexCodec::encode(self, input))
498 }
499}
500
501impl Decoder<str> for HexCodec {
502 type Error = CodecError;
503 type Output = Vec<u8>;
504
505 /// Decodes hexadecimal text into bytes.
506 fn decode(&self, input: &str) -> Result<Self::Output, Self::Error> {
507 HexCodec::decode(self, input)
508 }
509}
510
511/// Converts one hex digit to its value.
512///
513/// # Parameters
514/// - `ch`: Character to inspect.
515///
516/// # Returns
517/// Nibble value, or `None` when `ch` is not a hex digit.
518fn hex_value(ch: char) -> Option<u8> {
519 match ch {
520 '0'..='9' => Some(ch as u8 - b'0'),
521 'a'..='f' => Some(ch as u8 - b'a' + 10),
522 'A'..='F' => Some(ch as u8 - b'A' + 10),
523 _ => None,
524 }
525}
526
527/// Appends one encoded byte to `output`.
528///
529/// # Parameters
530/// - `byte`: Byte to encode.
531/// - `uppercase`: Whether to use uppercase digits.
532/// - `output`: Destination string.
533fn push_hex_byte(byte: u8, uppercase: bool, output: &mut String) {
534 output.push(hex_digit(byte >> 4, uppercase));
535 output.push(hex_digit(byte & 0x0f, uppercase));
536}
537
538/// Converts one nibble to a hexadecimal digit.
539///
540/// # Parameters
541/// - `value`: Nibble value.
542/// - `uppercase`: Whether to use uppercase digits.
543///
544/// # Returns
545/// Hexadecimal digit. Values above `0x0f` are masked to their low nibble.
546fn hex_digit(value: u8, uppercase: bool) -> char {
547 match value & 0x0f {
548 0x0 => '0',
549 0x1 => '1',
550 0x2 => '2',
551 0x3 => '3',
552 0x4 => '4',
553 0x5 => '5',
554 0x6 => '6',
555 0x7 => '7',
556 0x8 => '8',
557 0x9 => '9',
558 0x0a if uppercase => 'A',
559 0x0b if uppercase => 'B',
560 0x0c if uppercase => 'C',
561 0x0d if uppercase => 'D',
562 0x0e if uppercase => 'E',
563 0x0f if uppercase => 'F',
564 0x0a => 'a',
565 0x0b => 'b',
566 0x0c => 'c',
567 0x0d => 'd',
568 0x0e => 'e',
569 _ => 'f',
570 }
571}