Skip to main content

hex_conservative/
lib.rs

1// SPDX-License-Identifier: CC0-1.0
2
3//! # Hex encoding and decoding
4//!
5//! General purpose hex encoding/decoding library with a conservative MSRV and dependency policy.
6//!
7//! ## Const hex literals
8//!
9//! ```
10//! use hex_conservative::hex;
11//!
12//! const GENESIS: [u8; 32] = hex!("000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f");
13//! ```
14//!
15//! ## Runtime hex parsing
16//!
17//! ```
18//! # #[cfg(feature = "alloc")] {
19//! // In your manifest use the `package` key to improve import ergonomics.
20//! // hex = { package = "hex-conservative", version = "*" }
21//! # use hex_conservative as hex; // No need for this if using `package` as above.
22//! use hex::prelude::*;
23//!
24//! // Decode an arbitrary length hex string into a vector.
25//! let v = hex::decode_to_vec("deadbeef").expect("valid hex digits");
26//! // Or a known length hex string into a fixed size array.
27//! let a = hex::decode_to_array::<4>("deadbeef").expect("valid length and valid hex digits");
28//!
29//! // We support `LowerHex` and `UpperHex` out of the box for `[u8]` slices.
30//! println!("An array as lower hex: {:x}", a.as_hex());
31//! // And for vecs since `Vec` derefs to byte slice.
32//! println!("A vector as upper hex: {:X}", v.as_hex());
33//!
34//! // Allocate a new string (also `to_upper_hex_string`).
35//! let s = v.to_lower_hex_string();
36//!
37//! // Please note, mixed case strings will still parse successfully but we only
38//! // support displaying hex in a single case.
39//! assert_eq!(
40//!     hex::decode_to_vec("dEaDbEeF").expect("valid mixed case hex digits"),
41//!     hex::decode_to_vec("deadbeef").expect("valid hex digits"),
42//! );
43//! # }
44//! ```
45//!
46//! ## Crate feature flags
47//!
48//! * `std` - enables the standard library, on by default.
49//! * `alloc` - enables features that require allocation such as decoding into `Vec<u8>`, implied
50//!   by `std`.
51//! * `newer-rust-version` - enables Rust version detection and thus newer features, may add
52//!   dependency on a feature detection crate to reduce compile times. This feature is expected to
53//!   do nothing once the native detection is in Rust and our MSRV is at least that version. We may
54//!   also remove the feature gate in 2.0 with semver trick once that happens.
55//!
56//! ## Minimum Supported Rust Version (MSRV)
57//!
58//! The current MSRV is Rust `1.74.0`. Policy is to never use an MSRV that is less than two years
59//! old and also that ships in Debian stable. We may bump our MSRV in a minor version, but we have
60//! no plans to.
61//!
62//! Note though that the dependencies may have looser policy. This is not considered breaking/wrong
63//! - you would just need to pin them in `Cargo.lock` (not `.toml`).
64
65#![no_std]
66// Experimental features we need.
67#![cfg_attr(docsrs, feature(doc_cfg))]
68// Coding conventions
69#![warn(missing_docs)]
70
71#[cfg(feature = "std")]
72extern crate std;
73
74#[cfg(feature = "alloc")]
75#[allow(unused_imports)] // false positive regarding macro
76#[macro_use]
77extern crate alloc;
78
79#[doc(hidden)]
80pub mod _export {
81    /// A re-export of `core::*`.
82    pub mod _core {
83        pub use core::*;
84    }
85}
86
87pub mod buf_encoder;
88pub mod display;
89pub mod error;
90mod iter;
91
92/// Re-exports of the common crate traits.
93pub mod prelude {
94    #[doc(inline)]
95    pub use crate::display::DisplayHex;
96}
97
98#[cfg(feature = "alloc")]
99use alloc::vec::Vec;
100use core::fmt;
101
102pub(crate) use table::Table;
103
104#[rustfmt::skip]                // Keep public re-exports separate.
105#[doc(inline)]
106pub use self::{
107    display::DisplayHex,
108    iter::{BytesToHexIter, HexToBytesIter, HexSliceToBytesIter},
109};
110#[doc(no_inline)]
111pub use self::error::{
112    DecodeFixedLengthBytesError, DecodeVariableLengthBytesError, InvalidCharError,
113    InvalidLengthError, OddLengthStringError,
114};
115
116/// Decodes a hex string with variable length.
117///
118/// The length of the returned `Vec` is determined by the length of the input, meaning all even
119/// lengths of the input string are allowed. If you know the required length at compile time using
120/// [`decode_to_array`] is most likely a better choice.
121///
122/// # Errors
123///
124/// Returns an error if `hex` contains invalid characters or doesn't have even length.
125#[cfg(feature = "alloc")]
126pub fn decode_to_vec(hex: &str) -> Result<Vec<u8>, DecodeVariableLengthBytesError> {
127    Ok(HexToBytesIter::new(hex)?.drain_to_vec()?)
128}
129
130/// Decodes a hex string with an expected length known at compile time.
131///
132/// If you don't know the required length at compile time you need to use [`decode_to_vec`]
133/// instead.
134///
135/// # Errors
136///
137/// Returns an error if `hex` contains invalid characters or has incorrect length. (Should be
138/// `N * 2`.)
139pub fn decode_to_array<const N: usize>(hex: &str) -> Result<[u8; N], DecodeFixedLengthBytesError> {
140    if hex.len() == N * 2 {
141        let mut ret = [0u8; N];
142        // checked above
143        HexToBytesIter::new_unchecked(hex).drain_to_slice(&mut ret)?;
144        Ok(ret)
145    } else {
146        Err(InvalidLengthError { invalid: hex.len(), expected: 2 * N }.into())
147    }
148}
149
150/// Parses hex strings in const contexts.
151///
152/// This is primarily useful for testing, panics on all error paths.
153///
154/// # Returns
155///
156/// `[u8; N]` array containing the parsed data if valid.
157///
158/// # Panics
159///
160/// Panics on all error paths:
161///
162/// * If input string is not even length.
163/// * If input string contains non-hex characters.
164#[macro_export]
165macro_rules! hex {
166    ($hex:expr) => {{
167        const _: () = assert!($hex.len() % 2 == 0, "hex string must have even length");
168
169        const fn decode_digit(digit: u8) -> u8 {
170            match digit {
171                b'0'..=b'9' => digit - b'0',
172                b'a'..=b'f' => digit - b'a' + 10,
173                b'A'..=b'F' => digit - b'A' + 10,
174                _ => panic!("invalid hex digit"),
175            }
176        }
177
178        let mut output = [0u8; $hex.len() / 2];
179        let bytes = $hex.as_bytes();
180
181        let mut i = 0;
182        while i < output.len() {
183            let high = decode_digit(bytes[i * 2]);
184            let low = decode_digit(bytes[i * 2 + 1]);
185            output[i] = (high << 4) | low;
186            i += 1;
187        }
188
189        output
190    }};
191}
192
193/// Possible case of hex.
194#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
195pub enum Case {
196    /// Produce lower-case chars (`[0-9a-f]`).
197    ///
198    /// This is the default.
199    Lower,
200
201    /// Produce upper-case chars (`[0-9A-F]`).
202    Upper,
203}
204
205impl Default for Case {
206    #[inline]
207    fn default() -> Self { Case::Lower }
208}
209
210impl Case {
211    /// Returns the encoding table.
212    ///
213    /// The returned table may only contain displayable ASCII chars.
214    #[inline]
215    #[rustfmt::skip]
216    pub(crate) fn table(self) -> &'static Table {
217        match self {
218            Case::Lower => &Table::LOWER,
219            Case::Upper => &Table::UPPER,
220        }
221    }
222}
223
224/// A valid hex character: one of `[0-9a-fA-F]`.
225//
226// The `repr(u8)` guarantees that representation matches the ASCII byte value of the character,
227// making transmute between `Char` and `u8` sound whenever the byte is a valid hex digit.
228#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
229#[repr(u8)]
230pub enum Char {
231    /// `'0'`
232    Zero = b'0',
233    /// `'1'`
234    One = b'1',
235    /// `'2'`
236    Two = b'2',
237    /// `'3'`
238    Three = b'3',
239    /// `'4'`
240    Four = b'4',
241    /// `'5'`
242    Five = b'5',
243    /// `'6'`
244    Six = b'6',
245    /// `'7'`
246    Seven = b'7',
247    /// `'8'`
248    Eight = b'8',
249    /// `'9'`
250    Nine = b'9',
251    /// `'a'`
252    LowerA = b'a',
253    /// `'b'`
254    LowerB = b'b',
255    /// `'c'`
256    LowerC = b'c',
257    /// `'d'`
258    LowerD = b'd',
259    /// `'e'`
260    LowerE = b'e',
261    /// `'f'`
262    LowerF = b'f',
263    /// `'A'`
264    UpperA = b'A',
265    /// `'B'`
266    UpperB = b'B',
267    /// `'C'`
268    UpperC = b'C',
269    /// `'D'`
270    UpperD = b'D',
271    /// `'E'`
272    UpperE = b'E',
273    /// `'F'`
274    UpperF = b'F',
275}
276
277impl Char {
278    /// Returns the nibble value (0–15) of this hex character.
279    #[inline]
280    pub(crate) fn decode_nibble(b: u8) -> Option<u8> {
281        // Each valid hex byte maps to its nibble value; 0xFF marks invalid entries.
282        // Char variant discriminants equal their ASCII byte values, so they index directly.
283        #[rustfmt::skip]
284        static TABLE: [u8; 256] = {
285            let mut t = [0xFF_u8; 256];
286            // Each Char variant is a u8. So all `as usize` casts are safe.
287            t[Char::Zero  as usize] = 0;  t[Char::One   as usize] = 1;
288            t[Char::Two   as usize] = 2;  t[Char::Three as usize] = 3;
289            t[Char::Four  as usize] = 4;  t[Char::Five  as usize] = 5;
290            t[Char::Six   as usize] = 6;  t[Char::Seven as usize] = 7;
291            t[Char::Eight as usize] = 8;  t[Char::Nine  as usize] = 9;
292            t[Char::LowerA as usize] = 10; t[Char::UpperA as usize] = 10;
293            t[Char::LowerB as usize] = 11; t[Char::UpperB as usize] = 11;
294            t[Char::LowerC as usize] = 12; t[Char::UpperC as usize] = 12;
295            t[Char::LowerD as usize] = 13; t[Char::UpperD as usize] = 13;
296            t[Char::LowerE as usize] = 14; t[Char::UpperE as usize] = 14;
297            t[Char::LowerF as usize] = 15; t[Char::UpperF as usize] = 15;
298            t
299        };
300        let n = TABLE[usize::from(b)];
301        if n == 0xFF {
302            None
303        } else {
304            Some(n)
305        }
306    }
307
308    /// Casts a slice of `Char`s to `&str`.
309    ///
310    /// This conversion is zero-cost.
311    #[inline]
312    pub fn slice_as_str(slice: &[Self]) -> &str {
313        let bytes = Self::slice_as_bytes(slice);
314        // Guaranteed becuase it's all ASCII.
315        unsafe { core::str::from_utf8_unchecked(bytes) }
316    }
317
318    /// Casts a slice of `Char`s to `&[u8]`.
319    ///
320    /// This conversion is zero-cost.
321    #[inline]
322    pub fn slice_as_bytes(slice: &[Self]) -> &[u8] {
323        let ptr = slice.as_ptr().cast();
324        let len = slice.len();
325        // SOUNDNESS: `Self` is repr(u8)
326        // Because all chars are ASCII a slice of chars is also guaranteed to be valid slice of
327        // bytes.
328        unsafe { core::slice::from_raw_parts(ptr, len) }
329    }
330}
331
332impl fmt::Display for Char {
333    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
334        // This should be the most efficient way of formatting because it avoids encoding `char`
335        // and it fully supports all formatting options.
336        let slice = core::slice::from_ref(self);
337        fmt::Display::fmt(Self::slice_as_str(slice), f)
338    }
339}
340
341impl fmt::Debug for Char {
342    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
343        // the Debug impl of char puts quotes around it so we do it as well for consistency.
344        let buf = [b'\'', u8::from(*self), b'\''];
345        // SOUNDNESS: every single byte is guaranteed to be ASCII.
346        let buf = unsafe { core::str::from_utf8_unchecked(&buf) };
347        // Yes, Display is correct here since Debug would put "" around it and that would be
348        // incorrect.
349        fmt::Display::fmt(buf, f)
350    }
351}
352
353impl From<Char> for char {
354    #[inline]
355    fn from(c: Char) -> char { char::from(c as u8) }
356}
357
358impl From<Char> for u8 {
359    #[inline]
360    fn from(c: Char) -> u8 { c as u8 }
361}
362
363/// Correctness boundary for `Table`.
364mod table {
365    use super::Char;
366
367    /// Table of hex chars.
368    //
369    // Correctness invariant: each byte in the table must be ASCII.
370    #[allow(clippy::derived_hash_with_manual_eq)] // The Eq impl distinguishes the two possible values of Table
371    #[derive(Debug, Hash)]
372    pub(crate) struct Table([Char; 16]);
373
374    impl Table {
375        #[rustfmt::skip] // rustfmt wants to make these one per line.
376        pub(crate) const LOWER: Self = Table([
377            Char::Zero, Char::One, Char::Two, Char::Three,
378            Char::Four, Char::Five, Char::Six, Char::Seven,
379            Char::Eight, Char::Nine, Char::LowerA, Char::LowerB,
380            Char::LowerC, Char::LowerD, Char::LowerE, Char::LowerF,
381        ]);
382        #[rustfmt::skip] // rustfmt wants to make these one per line.
383        pub(crate) const UPPER: Self = Table([
384            Char::Zero, Char::One, Char::Two, Char::Three,
385            Char::Four, Char::Five, Char::Six, Char::Seven,
386            Char::Eight, Char::Nine, Char::UpperA, Char::UpperB,
387            Char::UpperC, Char::UpperD, Char::UpperE, Char::UpperF,
388        ]);
389
390        /// Encodes single byte as two ASCII chars using the given table.
391        ///
392        /// The function guarantees only returning values from the provided table.
393        #[inline]
394        pub(crate) fn byte_to_chars(&self, byte: u8) -> [char; 2] {
395            self.byte_to_hex_chars(byte).map(char::from)
396        }
397
398        /// Writes the single byte as two ASCII chars in the provided buffer, and returns a `&str`
399        /// to that buffer.
400        ///
401        /// The function guarantees only returning values from the provided table.
402        #[inline]
403        pub(crate) fn byte_to_str<'a>(&self, dest: &'a mut [u8; 2], byte: u8) -> &'a str {
404            dest[0] = self.0[usize::from(byte >> 4)].into();
405            dest[1] = self.0[usize::from(byte & 0x0F)].into();
406            // SAFETY: Table inner array contains only valid ascii
407            let hex_str = unsafe { core::str::from_utf8_unchecked(dest) };
408            hex_str
409        }
410
411        /// Encodes a single byte as two [`Char`] values using the given table.
412        ///
413        /// The function guarantees only returning values from the provided table.
414        #[inline]
415        pub(crate) fn byte_to_hex_chars(&self, byte: u8) -> [Char; 2] {
416            let left = self.0[usize::from(byte >> 4)];
417            let right = self.0[usize::from(byte & 0x0F)];
418            [left, right]
419        }
420    }
421
422    impl PartialEq for Table {
423        // Table can only be Table::LOWER or Table::UPPER. These differ in any of the Chars from
424        // indices 10-15.
425        fn eq(&self, other: &Self) -> bool { self.0[10] == other.0[10] }
426    }
427    impl Eq for Table {}
428}
429
430#[cfg(test)]
431#[cfg(feature = "alloc")]
432mod tests {
433    #[test]
434    fn hex_macro() {
435        let data = hex!("deadbeef");
436        assert_eq!(data, [0xde, 0xad, 0xbe, 0xef]);
437    }
438
439    #[test]
440    fn hex_macro_case_insensitive() {
441        assert_eq!(hex!("DEADBEEF"), hex!("deadbeef"));
442    }
443
444    #[test]
445    fn hex_macro_const_context() {
446        const HASH: [u8; 32] =
447            hex!("000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f");
448        assert_eq!(HASH[0], 0x00);
449        assert_eq!(HASH[31], 0x6f);
450    }
451
452    #[test]
453    fn char_slice_casts() {
454        use super::Char;
455
456        const BEEF: &[Char] = &[Char::LowerB, Char::LowerE, Char::LowerE, Char::LowerF];
457
458        assert_eq!(Char::slice_as_bytes(&[]), &[]);
459        assert_eq!(Char::slice_as_bytes(&BEEF[..1]), b"b");
460        assert_eq!(Char::slice_as_bytes(BEEF), b"beef");
461        assert_eq!(Char::slice_as_str(&[]), "");
462        assert_eq!(Char::slice_as_str(&BEEF[..1]), "b");
463        assert_eq!(Char::slice_as_str(BEEF), "beef");
464    }
465
466    #[test]
467    fn char_display() {
468        use alloc::string::ToString;
469
470        use super::Char;
471
472        assert_eq!(Char::Zero.to_string(), "0");
473        assert_eq!(Char::LowerB.to_string(), "b");
474        assert_eq!(Char::UpperB.to_string(), "B");
475        assert_eq!(format!("{: >3}", Char::UpperB), "  B");
476        assert_eq!(format!("{: <3}", Char::UpperB), "B  ");
477        assert_eq!(format!("{: ^3}", Char::UpperB), " B ");
478    }
479
480    #[test]
481    fn char_debug() {
482        use super::Char;
483
484        assert_eq!(format!("{:?}", Char::Zero), format!("{:?}", '0'));
485        assert_eq!(format!("{:?}", Char::LowerB), format!("{:?}", 'b'));
486        assert_eq!(format!("{:?}", Char::UpperB), format!("{:?}", 'B'));
487        // We don't test alignment against `char` because it's not supported by `char` which is
488        // considered a bug - see https://github.com/rust-lang/rust/issues/30164
489        assert_eq!(format!("{: >5?}", Char::UpperB), "  'B'");
490        assert_eq!(format!("{: <5?}", Char::UpperB), "'B'  ");
491        assert_eq!(format!("{: ^5?}", Char::UpperB), " 'B' ");
492    }
493}