hex_conservative/lib.rs
1// SPDX-License-Identifier: CC0-1.0
2
3//! # Hex encoding and decoding
4//!
5//! General purpose hex encoding/decoding library with a conservative MSRV and dependency policy.
6//!
7//! ## Const hex literals
8//!
9//! ```
10//! use hex_conservative::hex;
11//!
12//! const GENESIS: [u8; 32] = hex!("000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f");
13//! ```
14//!
15//! ## Runtime hex parsing
16//!
17//! ```
18//! # #[cfg(feature = "alloc")] {
19//! // In your manifest use the `package` key to improve import ergonomics.
20//! // hex = { package = "hex-conservative", version = "*" }
21//! # use hex_conservative as hex; // No need for this if using `package` as above.
22//! use hex::prelude::*;
23//!
24//! // Decode an arbitrary length hex string into a vector.
25//! let v = hex::decode_to_vec("deadbeef").expect("valid hex digits");
26//! // Or a known length hex string into a fixed size array.
27//! let a = hex::decode_to_array::<4>("deadbeef").expect("valid length and valid hex digits");
28//!
29//! // We support `LowerHex` and `UpperHex` out of the box for `[u8]` slices.
30//! println!("An array as lower hex: {:x}", a.as_hex());
31//! // And for vecs since `Vec` derefs to byte slice.
32//! println!("A vector as upper hex: {:X}", v.as_hex());
33//!
34//! // Allocate a new string (also `to_upper_hex_string`).
35//! let s = v.to_lower_hex_string();
36//!
37//! // Please note, mixed case strings will still parse successfully but we only
38//! // support displaying hex in a single case.
39//! assert_eq!(
40//! hex::decode_to_vec("dEaDbEeF").expect("valid mixed case hex digits"),
41//! hex::decode_to_vec("deadbeef").expect("valid hex digits"),
42//! );
43//! # }
44//! ```
45//!
46//! ## Crate feature flags
47//!
48//! * `std` - enables the standard library, on by default.
49//! * `alloc` - enables features that require allocation such as decoding into `Vec<u8>`, implied
50//! by `std`.
51//! * `newer-rust-version` - enables Rust version detection and thus newer features, may add
52//! dependency on a feature detection crate to reduce compile times. This feature is expected to
53//! do nothing once the native detection is in Rust and our MSRV is at least that version. We may
54//! also remove the feature gate in 2.0 with semver trick once that happens.
55//!
56//! ## Minimum Supported Rust Version (MSRV)
57//!
58//! The current MSRV is Rust `1.74.0`. Policy is to never use an MSRV that is less than two years
59//! old and also that ships in Debian stable. We may bump our MSRV in a minor version, but we have
60//! no plans to.
61//!
62//! Note though that the dependencies may have looser policy. This is not considered breaking/wrong
63//! - you would just need to pin them in `Cargo.lock` (not `.toml`).
64
65#![no_std]
66// Experimental features we need.
67#![cfg_attr(docsrs, feature(doc_cfg))]
68// Coding conventions
69#![warn(missing_docs)]
70
71#[cfg(feature = "std")]
72extern crate std;
73
74#[cfg(feature = "alloc")]
75#[allow(unused_imports)] // false positive regarding macro
76#[macro_use]
77extern crate alloc;
78
79#[doc(hidden)]
80pub mod _export {
81 /// A re-export of `core::*`.
82 pub mod _core {
83 pub use core::*;
84 }
85}
86
87pub mod buf_encoder;
88pub mod display;
89pub mod error;
90mod iter;
91
92/// Re-exports of the common crate traits.
93pub mod prelude {
94 #[doc(inline)]
95 pub use crate::display::DisplayHex;
96}
97
98#[cfg(feature = "alloc")]
99use alloc::vec::Vec;
100use core::fmt;
101
102pub(crate) use table::Table;
103
104#[rustfmt::skip] // Keep public re-exports separate.
105#[doc(inline)]
106pub use self::{
107 display::DisplayHex,
108 iter::{BytesToHexIter, HexToBytesIter, HexSliceToBytesIter},
109};
110#[doc(no_inline)]
111pub use self::error::{
112 DecodeFixedLengthBytesError, DecodeVariableLengthBytesError, InvalidCharError,
113 InvalidLengthError, OddLengthStringError,
114};
115
116/// Decodes a hex string with variable length.
117///
118/// The length of the returned `Vec` is determined by the length of the input, meaning all even
119/// lengths of the input string are allowed. If you know the required length at compile time using
120/// [`decode_to_array`] is most likely a better choice.
121///
122/// # Errors
123///
124/// Returns an error if `hex` contains invalid characters or doesn't have even length.
125#[cfg(feature = "alloc")]
126pub fn decode_to_vec(hex: &str) -> Result<Vec<u8>, DecodeVariableLengthBytesError> {
127 Ok(HexToBytesIter::new(hex)?.drain_to_vec()?)
128}
129
130/// Decodes a hex string with an expected length known at compile time.
131///
132/// If you don't know the required length at compile time you need to use [`decode_to_vec`]
133/// instead.
134///
135/// # Errors
136///
137/// Returns an error if `hex` contains invalid characters or has incorrect length. (Should be
138/// `N * 2`.)
139pub fn decode_to_array<const N: usize>(hex: &str) -> Result<[u8; N], DecodeFixedLengthBytesError> {
140 if hex.len() == N * 2 {
141 let mut ret = [0u8; N];
142 // checked above
143 HexToBytesIter::new_unchecked(hex).drain_to_slice(&mut ret)?;
144 Ok(ret)
145 } else {
146 Err(InvalidLengthError { invalid: hex.len(), expected: 2 * N }.into())
147 }
148}
149
150/// Parses hex strings in const contexts.
151///
152/// This is primarily useful for testing, panics on all error paths.
153///
154/// # Returns
155///
156/// `[u8; N]` array containing the parsed data if valid.
157///
158/// # Panics
159///
160/// Panics on all error paths:
161///
162/// * If input string is not even length.
163/// * If input string contains non-hex characters.
164#[macro_export]
165macro_rules! hex {
166 ($hex:expr) => {{
167 const _: () = assert!($hex.len() % 2 == 0, "hex string must have even length");
168
169 const fn decode_digit(digit: u8) -> u8 {
170 match digit {
171 b'0'..=b'9' => digit - b'0',
172 b'a'..=b'f' => digit - b'a' + 10,
173 b'A'..=b'F' => digit - b'A' + 10,
174 _ => panic!("invalid hex digit"),
175 }
176 }
177
178 let mut output = [0u8; $hex.len() / 2];
179 let bytes = $hex.as_bytes();
180
181 let mut i = 0;
182 while i < output.len() {
183 let high = decode_digit(bytes[i * 2]);
184 let low = decode_digit(bytes[i * 2 + 1]);
185 output[i] = (high << 4) | low;
186 i += 1;
187 }
188
189 output
190 }};
191}
192
193/// Possible case of hex.
194#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
195pub enum Case {
196 /// Produce lower-case chars (`[0-9a-f]`).
197 ///
198 /// This is the default.
199 Lower,
200
201 /// Produce upper-case chars (`[0-9A-F]`).
202 Upper,
203}
204
205impl Default for Case {
206 #[inline]
207 fn default() -> Self { Case::Lower }
208}
209
210impl Case {
211 /// Returns the encoding table.
212 ///
213 /// The returned table may only contain displayable ASCII chars.
214 #[inline]
215 #[rustfmt::skip]
216 pub(crate) fn table(self) -> &'static Table {
217 match self {
218 Case::Lower => &Table::LOWER,
219 Case::Upper => &Table::UPPER,
220 }
221 }
222}
223
224/// A valid hex character: one of `[0-9a-fA-F]`.
225//
226// The `repr(u8)` guarantees that representation matches the ASCII byte value of the character,
227// making transmute between `Char` and `u8` sound whenever the byte is a valid hex digit.
228#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
229#[repr(u8)]
230pub enum Char {
231 /// `'0'`
232 Zero = b'0',
233 /// `'1'`
234 One = b'1',
235 /// `'2'`
236 Two = b'2',
237 /// `'3'`
238 Three = b'3',
239 /// `'4'`
240 Four = b'4',
241 /// `'5'`
242 Five = b'5',
243 /// `'6'`
244 Six = b'6',
245 /// `'7'`
246 Seven = b'7',
247 /// `'8'`
248 Eight = b'8',
249 /// `'9'`
250 Nine = b'9',
251 /// `'a'`
252 LowerA = b'a',
253 /// `'b'`
254 LowerB = b'b',
255 /// `'c'`
256 LowerC = b'c',
257 /// `'d'`
258 LowerD = b'd',
259 /// `'e'`
260 LowerE = b'e',
261 /// `'f'`
262 LowerF = b'f',
263 /// `'A'`
264 UpperA = b'A',
265 /// `'B'`
266 UpperB = b'B',
267 /// `'C'`
268 UpperC = b'C',
269 /// `'D'`
270 UpperD = b'D',
271 /// `'E'`
272 UpperE = b'E',
273 /// `'F'`
274 UpperF = b'F',
275}
276
277impl Char {
278 /// Returns the nibble value (0–15) of this hex character.
279 #[inline]
280 pub(crate) fn decode_nibble(b: u8) -> Option<u8> {
281 // Each valid hex byte maps to its nibble value; 0xFF marks invalid entries.
282 // Char variant discriminants equal their ASCII byte values, so they index directly.
283 #[rustfmt::skip]
284 static TABLE: [u8; 256] = {
285 let mut t = [0xFF_u8; 256];
286 // Each Char variant is a u8. So all `as usize` casts are safe.
287 t[Char::Zero as usize] = 0; t[Char::One as usize] = 1;
288 t[Char::Two as usize] = 2; t[Char::Three as usize] = 3;
289 t[Char::Four as usize] = 4; t[Char::Five as usize] = 5;
290 t[Char::Six as usize] = 6; t[Char::Seven as usize] = 7;
291 t[Char::Eight as usize] = 8; t[Char::Nine as usize] = 9;
292 t[Char::LowerA as usize] = 10; t[Char::UpperA as usize] = 10;
293 t[Char::LowerB as usize] = 11; t[Char::UpperB as usize] = 11;
294 t[Char::LowerC as usize] = 12; t[Char::UpperC as usize] = 12;
295 t[Char::LowerD as usize] = 13; t[Char::UpperD as usize] = 13;
296 t[Char::LowerE as usize] = 14; t[Char::UpperE as usize] = 14;
297 t[Char::LowerF as usize] = 15; t[Char::UpperF as usize] = 15;
298 t
299 };
300 let n = TABLE[usize::from(b)];
301 if n == 0xFF {
302 None
303 } else {
304 Some(n)
305 }
306 }
307
308 /// Casts a slice of `Char`s to `&str`.
309 ///
310 /// This conversion is zero-cost.
311 #[inline]
312 pub fn slice_as_str(slice: &[Self]) -> &str {
313 let bytes = Self::slice_as_bytes(slice);
314 // Guaranteed becuase it's all ASCII.
315 unsafe { core::str::from_utf8_unchecked(bytes) }
316 }
317
318 /// Casts a slice of `Char`s to `&[u8]`.
319 ///
320 /// This conversion is zero-cost.
321 #[inline]
322 pub fn slice_as_bytes(slice: &[Self]) -> &[u8] {
323 let ptr = slice.as_ptr().cast();
324 let len = slice.len();
325 // SOUNDNESS: `Self` is repr(u8)
326 // Because all chars are ASCII a slice of chars is also guaranteed to be valid slice of
327 // bytes.
328 unsafe { core::slice::from_raw_parts(ptr, len) }
329 }
330}
331
332impl fmt::Display for Char {
333 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
334 // This should be the most efficient way of formatting because it avoids encoding `char`
335 // and it fully supports all formatting options.
336 let slice = core::slice::from_ref(self);
337 fmt::Display::fmt(Self::slice_as_str(slice), f)
338 }
339}
340
341impl fmt::Debug for Char {
342 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
343 // the Debug impl of char puts quotes around it so we do it as well for consistency.
344 let buf = [b'\'', u8::from(*self), b'\''];
345 // SOUNDNESS: every single byte is guaranteed to be ASCII.
346 let buf = unsafe { core::str::from_utf8_unchecked(&buf) };
347 // Yes, Display is correct here since Debug would put "" around it and that would be
348 // incorrect.
349 fmt::Display::fmt(buf, f)
350 }
351}
352
353impl From<Char> for char {
354 #[inline]
355 fn from(c: Char) -> char { char::from(c as u8) }
356}
357
358impl From<Char> for u8 {
359 #[inline]
360 fn from(c: Char) -> u8 { c as u8 }
361}
362
363/// Correctness boundary for `Table`.
364mod table {
365 use super::Char;
366
367 /// Table of hex chars.
368 //
369 // Correctness invariant: each byte in the table must be ASCII.
370 #[allow(clippy::derived_hash_with_manual_eq)] // The Eq impl distinguishes the two possible values of Table
371 #[derive(Debug, Hash)]
372 pub(crate) struct Table([Char; 16]);
373
374 impl Table {
375 #[rustfmt::skip] // rustfmt wants to make these one per line.
376 pub(crate) const LOWER: Self = Table([
377 Char::Zero, Char::One, Char::Two, Char::Three,
378 Char::Four, Char::Five, Char::Six, Char::Seven,
379 Char::Eight, Char::Nine, Char::LowerA, Char::LowerB,
380 Char::LowerC, Char::LowerD, Char::LowerE, Char::LowerF,
381 ]);
382 #[rustfmt::skip] // rustfmt wants to make these one per line.
383 pub(crate) const UPPER: Self = Table([
384 Char::Zero, Char::One, Char::Two, Char::Three,
385 Char::Four, Char::Five, Char::Six, Char::Seven,
386 Char::Eight, Char::Nine, Char::UpperA, Char::UpperB,
387 Char::UpperC, Char::UpperD, Char::UpperE, Char::UpperF,
388 ]);
389
390 /// Encodes single byte as two ASCII chars using the given table.
391 ///
392 /// The function guarantees only returning values from the provided table.
393 #[inline]
394 pub(crate) fn byte_to_chars(&self, byte: u8) -> [char; 2] {
395 self.byte_to_hex_chars(byte).map(char::from)
396 }
397
398 /// Writes the single byte as two ASCII chars in the provided buffer, and returns a `&str`
399 /// to that buffer.
400 ///
401 /// The function guarantees only returning values from the provided table.
402 #[inline]
403 pub(crate) fn byte_to_str<'a>(&self, dest: &'a mut [u8; 2], byte: u8) -> &'a str {
404 dest[0] = self.0[usize::from(byte >> 4)].into();
405 dest[1] = self.0[usize::from(byte & 0x0F)].into();
406 // SAFETY: Table inner array contains only valid ascii
407 let hex_str = unsafe { core::str::from_utf8_unchecked(dest) };
408 hex_str
409 }
410
411 /// Encodes a single byte as two [`Char`] values using the given table.
412 ///
413 /// The function guarantees only returning values from the provided table.
414 #[inline]
415 pub(crate) fn byte_to_hex_chars(&self, byte: u8) -> [Char; 2] {
416 let left = self.0[usize::from(byte >> 4)];
417 let right = self.0[usize::from(byte & 0x0F)];
418 [left, right]
419 }
420 }
421
422 impl PartialEq for Table {
423 // Table can only be Table::LOWER or Table::UPPER. These differ in any of the Chars from
424 // indices 10-15.
425 fn eq(&self, other: &Self) -> bool { self.0[10] == other.0[10] }
426 }
427 impl Eq for Table {}
428}
429
430#[cfg(test)]
431#[cfg(feature = "alloc")]
432mod tests {
433 #[test]
434 fn hex_macro() {
435 let data = hex!("deadbeef");
436 assert_eq!(data, [0xde, 0xad, 0xbe, 0xef]);
437 }
438
439 #[test]
440 fn hex_macro_case_insensitive() {
441 assert_eq!(hex!("DEADBEEF"), hex!("deadbeef"));
442 }
443
444 #[test]
445 fn hex_macro_const_context() {
446 const HASH: [u8; 32] =
447 hex!("000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f");
448 assert_eq!(HASH[0], 0x00);
449 assert_eq!(HASH[31], 0x6f);
450 }
451
452 #[test]
453 fn char_slice_casts() {
454 use super::Char;
455
456 const BEEF: &[Char] = &[Char::LowerB, Char::LowerE, Char::LowerE, Char::LowerF];
457
458 assert_eq!(Char::slice_as_bytes(&[]), &[]);
459 assert_eq!(Char::slice_as_bytes(&BEEF[..1]), b"b");
460 assert_eq!(Char::slice_as_bytes(BEEF), b"beef");
461 assert_eq!(Char::slice_as_str(&[]), "");
462 assert_eq!(Char::slice_as_str(&BEEF[..1]), "b");
463 assert_eq!(Char::slice_as_str(BEEF), "beef");
464 }
465
466 #[test]
467 fn char_display() {
468 use alloc::string::ToString;
469
470 use super::Char;
471
472 assert_eq!(Char::Zero.to_string(), "0");
473 assert_eq!(Char::LowerB.to_string(), "b");
474 assert_eq!(Char::UpperB.to_string(), "B");
475 assert_eq!(format!("{: >3}", Char::UpperB), " B");
476 assert_eq!(format!("{: <3}", Char::UpperB), "B ");
477 assert_eq!(format!("{: ^3}", Char::UpperB), " B ");
478 }
479
480 #[test]
481 fn char_debug() {
482 use super::Char;
483
484 assert_eq!(format!("{:?}", Char::Zero), format!("{:?}", '0'));
485 assert_eq!(format!("{:?}", Char::LowerB), format!("{:?}", 'b'));
486 assert_eq!(format!("{:?}", Char::UpperB), format!("{:?}", 'B'));
487 // We don't test alignment against `char` because it's not supported by `char` which is
488 // considered a bug - see https://github.com/rust-lang/rust/issues/30164
489 assert_eq!(format!("{: >5?}", Char::UpperB), " 'B'");
490 assert_eq!(format!("{: <5?}", Char::UpperB), "'B' ");
491 assert_eq!(format!("{: ^5?}", Char::UpperB), " 'B' ");
492 }
493}