lowercase_hex/
lib.rs

1//! [![github]](https://github.com/andunieee/lowercase-hex) [![crates-io]](https://crates.io/crates/lowercase-hex) [![docs-rs]](https://docs.rs/lowercase-hex)
2//!
3//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
4//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
5//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs
6//!
7//! This crate provides a fast conversion of byte arrays to **lowercase** hexadecimal strings,
8//! both at compile time, and at run time.
9//!
10//! This is a fork of <https://github.com/danipopes/const-hex> that enforces strict lowercase.
11//!
12//! _Version requirement: rustc 1.64+_
13
14#![cfg_attr(not(feature = "std"), no_std)]
15#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
16#![cfg_attr(
17    feature = "nightly",
18    feature(core_intrinsics, inline_const),
19    allow(internal_features, stable_features)
20)]
21#![cfg_attr(feature = "portable-simd", feature(portable_simd))]
22#![warn(
23    missing_copy_implementations,
24    missing_debug_implementations,
25    missing_docs,
26    unreachable_pub,
27    unsafe_op_in_unsafe_fn,
28    clippy::missing_const_for_fn,
29    clippy::missing_inline_in_public_items,
30    clippy::all,
31    rustdoc::all
32)]
33#![cfg_attr(not(any(test, feature = "__fuzzing")), warn(unused_crate_dependencies))]
34#![deny(unused_must_use, rust_2018_idioms)]
35#![allow(
36    clippy::cast_lossless,
37    clippy::inline_always,
38    clippy::let_unit_value,
39    clippy::must_use_candidate,
40    clippy::wildcard_imports,
41    unsafe_op_in_unsafe_fn,
42    unused_unsafe
43)]
44
45#[cfg(feature = "alloc")]
46#[allow(unused_imports)]
47#[macro_use]
48extern crate alloc;
49
50use cfg_if::cfg_if;
51
52#[cfg(feature = "alloc")]
53#[allow(unused_imports)]
54use alloc::{string::String, vec::Vec};
55
56// `cpufeatures` may be unused when `force-generic` is enabled.
57#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
58use cpufeatures as _;
59
60mod arch;
61use arch::imp;
62
63mod impl_core;
64
65pub mod traits;
66#[cfg(feature = "alloc")]
67pub use traits::ToHexExt;
68
69// If the `hex` feature is enabled, re-export the `hex` crate's traits.
70// Otherwise, use our own with the more optimized implementation.
71cfg_if! {
72    if #[cfg(feature = "hex")] {
73        pub use hex;
74        #[doc(inline)]
75        pub use hex::{FromHex, FromHexError, ToHex};
76    } else {
77        mod error;
78        pub use error::FromHexError;
79
80        #[allow(deprecated)]
81        pub use traits::{FromHex, ToHex};
82    }
83}
84
85// Support for nightly features.
86cfg_if! {
87    if #[cfg(feature = "nightly")] {
88        // Branch prediction hints.
89        #[allow(unused_imports)]
90        use core::intrinsics::{likely, unlikely};
91
92        // `inline_const`: [#76001](https://github.com/rust-lang/rust/issues/76001)
93        macro_rules! maybe_const_assert {
94            ($($tt:tt)*) => {
95                const { assert!($($tt)*) }
96            };
97        }
98    } else {
99        #[allow(unused_imports)]
100        use core::convert::{identity as likely, identity as unlikely};
101
102        macro_rules! maybe_const_assert {
103            ($($tt:tt)*) => {
104                assert!($($tt)*)
105            };
106        }
107    }
108}
109
110// Serde support.
111cfg_if! {
112    if #[cfg(feature = "serde")] {
113        pub mod serde;
114
115        #[doc(no_inline)]
116        pub use self::serde::deserialize;
117        #[cfg(feature = "alloc")]
118        #[doc(no_inline)]
119        pub use self::serde::serialize;
120    }
121}
122
123mod buffer;
124pub use buffer::Buffer;
125
126/// The table of lowercase characters used for hex encoding.
127pub const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef";
128
129/// The lookup table of hex byte to value, used for hex decoding.
130///
131/// [`NIL`] is used for invalid values.
132pub const HEX_DECODE_LUT: &[u8; 256] = &make_decode_lut();
133
134/// Represents an invalid value in the [`HEX_DECODE_LUT`] table.
135pub const NIL: u8 = u8::MAX;
136
137/// Encodes `input` as a hex string into a [`Buffer`].
138///
139/// # Examples
140///
141/// ```
142/// const BUFFER: lowercase_hex::Buffer<4> = lowercase_hex::const_encode(b"kiwi");
143/// assert_eq!(BUFFER.as_str(), "6b697769");
144/// ```
145#[inline]
146pub const fn const_encode<const N: usize, const PREFIX: bool>(
147    input: &[u8; N],
148) -> Buffer<N, PREFIX> {
149    Buffer::new().const_format(input)
150}
151
152/// Encodes `input` as a hex string using lowercase characters into a mutable
153/// slice of bytes `output`.
154///
155/// # Errors
156///
157/// If the output buffer is not exactly `input.len() * 2` bytes long.
158///
159/// # Examples
160///
161/// ```
162/// let mut bytes = [0u8; 4 * 2];
163/// lowercase_hex::encode_to_slice(b"kiwi", &mut bytes)?;
164/// assert_eq!(&bytes, b"6b697769");
165/// # Ok::<_, lowercase_hex::FromHexError>(())
166/// ```
167#[inline]
168pub fn encode_to_slice<T: AsRef<[u8]>>(input: T, output: &mut [u8]) -> Result<(), FromHexError> {
169    encode_to_slice_inner(input.as_ref(), output)
170}
171
172/// Encodes `data` as a hex string using lowercase characters.
173///
174/// Lowercase characters are used (e.g. `f9b4ca`). The resulting string's
175/// length is always even, each byte in `data` is always encoded using two hex
176/// digits. Thus, the resulting string contains exactly twice as many bytes as
177/// the input data.
178///
179/// # Examples
180///
181/// ```
182/// assert_eq!(lowercase_hex::encode("Hello world!"), "48656c6c6f20776f726c6421");
183/// assert_eq!(lowercase_hex::encode([1, 2, 3, 15, 16]), "0102030f10");
184/// ```
185#[cfg(feature = "alloc")]
186#[inline]
187pub fn encode<T: AsRef<[u8]>>(data: T) -> String {
188    encode_inner::<false>(data.as_ref())
189}
190
191/// Decode a hex string into a fixed-length byte-array.
192///
193/// Only lowercase characters are valid in the input string (e.g. `f9b4ca`).
194///
195/// Prefer using [`decode_to_array`] instead when possible (at runtime), as it is likely to be faster.
196///
197/// # Errors
198///
199/// This function returns an error if the input is not an even number of
200/// characters long or contains invalid hex characters, or if the input is not
201/// exactly `N * 2` bytes long.
202///
203/// # Example
204///
205/// ```
206/// const _: () = {
207///     let bytes = lowercase_hex::const_decode_to_array(b"6b697769");
208///     assert!(matches!(bytes.as_ref(), Ok(b"kiwi")));
209/// };
210/// ```
211#[inline]
212pub const fn const_decode_to_array<const N: usize>(input: &[u8]) -> Result<[u8; N], FromHexError> {
213    if input.len() % 2 != 0 {
214        return Err(FromHexError::OddLength);
215    }
216    if input.len() != N * 2 {
217        return Err(FromHexError::InvalidStringLength);
218    }
219    match const_decode_to_array_impl(input) {
220        Some(output) => Ok(output),
221        None => Err(unsafe { invalid_hex_error(input) }),
222    }
223}
224
225const fn const_decode_to_array_impl<const N: usize>(input: &[u8]) -> Option<[u8; N]> {
226    macro_rules! next {
227        ($var:ident, $i:expr) => {
228            let hex = unsafe { *input.as_ptr().add($i) };
229            let $var = HEX_DECODE_LUT[hex as usize];
230            if $var == NIL {
231                return None;
232            }
233        };
234    }
235
236    let mut output = [0; N];
237    debug_assert!(input.len() == N * 2);
238    let mut i = 0;
239    while i < output.len() {
240        next!(high, i * 2);
241        next!(low, i * 2 + 1);
242        output[i] = high << 4 | low;
243        i += 1;
244    }
245    Some(output)
246}
247
248/// Decodes a hex string into raw bytes.
249///
250/// Only lowercase characters are valid in the input string (e.g. `f9b4ca`).
251///
252/// # Errors
253///
254/// This function returns an error if the input is not an even number of
255/// characters long or contains invalid hex characters.
256///
257/// # Example
258///
259/// ```
260/// assert_eq!(
261///     lowercase_hex::decode("48656c6c6f20776f726c6421"),
262///     Ok("Hello world!".to_owned().into_bytes())
263/// );
264///
265/// assert_eq!(lowercase_hex::decode("123"), Err(lowercase_hex::FromHexError::OddLength));
266/// assert!(lowercase_hex::decode("foo").is_err());
267/// ```
268#[cfg(feature = "alloc")]
269#[inline]
270pub fn decode<T: AsRef<[u8]>>(input: T) -> Result<Vec<u8>, FromHexError> {
271    fn decode_inner(input: &[u8]) -> Result<Vec<u8>, FromHexError> {
272        if unlikely(input.len() % 2 != 0) {
273            return Err(FromHexError::OddLength);
274        }
275
276        // Do not initialize memory since it will be entirely overwritten.
277        let len = input.len() / 2;
278        let mut output = Vec::with_capacity(len);
279        // SAFETY: The entire vec is never read from, and gets dropped if decoding fails.
280        #[allow(clippy::uninit_vec)]
281        unsafe {
282            output.set_len(len);
283        }
284
285        // SAFETY: Lengths are checked above.
286        unsafe { decode_checked(input, &mut output) }.map(|()| output)
287    }
288
289    decode_inner(input.as_ref())
290}
291
292/// Decode a hex string into a mutable bytes slice.
293///
294/// Only lowercase characters are valid in the input string (e.g. `f9b4ca`).
295///
296/// # Errors
297///
298/// This function returns an error if the input is not an even number of
299/// characters long or contains invalid hex characters, or if the output slice
300/// is not exactly half the length of the input.
301///
302/// # Example
303///
304/// ```
305/// let mut bytes = [0u8; 4];
306/// lowercase_hex::decode_to_slice("6b697769", &mut bytes).unwrap();
307/// assert_eq!(&bytes, b"kiwi");
308///
309/// let res = lowercase_hex::decode_to_slice("6B697769", &mut bytes);
310/// assert!(res.is_err());
311/// ```
312#[inline]
313pub fn decode_to_slice<T: AsRef<[u8]>>(input: T, output: &mut [u8]) -> Result<(), FromHexError> {
314    decode_to_slice_inner(input.as_ref(), output)
315}
316
317/// Decode a hex string into a fixed-length byte-array.
318///
319/// Only lowercase characters are valid in the input string (e.g. `f9b4ca`).
320///
321/// # Errors
322///
323/// This function returns an error if the input is not an even number of
324/// characters long or contains invalid hex characters, or if the input is not
325/// exactly `N / 2` bytes long.
326///
327/// # Example
328///
329/// ```
330/// let bytes = lowercase_hex::decode_to_array(b"6b697769").unwrap();
331/// assert_eq!(&bytes, b"kiwi");
332/// ```
333#[inline]
334pub fn decode_to_array<T: AsRef<[u8]>, const N: usize>(input: T) -> Result<[u8; N], FromHexError> {
335    fn decode_to_array_inner<const N: usize>(input: &[u8]) -> Result<[u8; N], FromHexError> {
336        let mut output = impl_core::uninit_array();
337        // SAFETY: The entire array is never read from.
338        let output_slice = unsafe { impl_core::slice_assume_init_mut(&mut output) };
339        // SAFETY: All elements are initialized.
340        decode_to_slice_inner(input, output_slice)
341            .map(|()| unsafe { impl_core::array_assume_init(output) })
342    }
343
344    decode_to_array_inner(input.as_ref())
345}
346
347#[cfg(feature = "alloc")]
348fn encode_inner<const PREFIX: bool>(data: &[u8]) -> String {
349    let capacity = PREFIX as usize * 2 + data.len() * 2;
350    let mut buf = Vec::<u8>::with_capacity(capacity);
351    // SAFETY: The entire vec is never read from, and gets dropped if decoding fails.
352    #[allow(clippy::uninit_vec)]
353    unsafe {
354        buf.set_len(capacity)
355    };
356    let mut output = buf.as_mut_ptr();
357    if PREFIX {
358        // SAFETY: `output` is long enough.
359        unsafe {
360            output.add(0).write(b'0');
361            output.add(1).write(b'x');
362            output = output.add(2);
363        }
364    }
365    // SAFETY: `output` is long enough (input.len() * 2).
366    unsafe { imp::encode(data, output) };
367    // SAFETY: We only write only ASCII bytes.
368    unsafe { String::from_utf8_unchecked(buf) }
369}
370
371fn encode_to_slice_inner(input: &[u8], output: &mut [u8]) -> Result<(), FromHexError> {
372    if unlikely(output.len() != 2 * input.len()) {
373        return Err(FromHexError::InvalidStringLength);
374    }
375    // SAFETY: Lengths are checked above.
376    unsafe { imp::encode(input, output.as_mut_ptr()) };
377    Ok(())
378}
379
380fn decode_to_slice_inner(input: &[u8], output: &mut [u8]) -> Result<(), FromHexError> {
381    if unlikely(input.len() % 2 != 0) {
382        return Err(FromHexError::OddLength);
383    }
384    if unlikely(output.len() != input.len() / 2) {
385        return Err(FromHexError::InvalidStringLength);
386    }
387    // SAFETY: Lengths are checked above.
388    unsafe { decode_checked(input, output) }
389}
390
391/// # Safety
392///
393/// Assumes `output.len() == input.len() / 2`.
394#[inline]
395unsafe fn decode_checked(input: &[u8], output: &mut [u8]) -> Result<(), FromHexError> {
396    debug_assert_eq!(output.len(), input.len() / 2);
397
398    if imp::USE_CHECK_FN {
399        // check then decode
400        if imp::check(input) {
401            unsafe { imp::decode_unchecked(input, output) };
402            return Ok(());
403        }
404    } else {
405        // check and decode at the same time
406        if unsafe { imp::decode_checked(input, output) } {
407            return Ok(());
408        }
409    }
410
411    Err(unsafe { invalid_hex_error(input) })
412}
413
414#[inline]
415const fn byte2hex(byte: u8) -> (u8, u8) {
416    let table = HEX_CHARS_LOWER;
417    let high = table[(byte >> 4) as usize];
418    let low = table[(byte & 0x0f) as usize];
419    (high, low)
420}
421
422/// Creates an invalid hex error from the input.
423///
424/// # Safety
425///
426/// Assumes `input` contains at least one invalid character.
427#[cold]
428#[cfg_attr(debug_assertions, track_caller)]
429const unsafe fn invalid_hex_error(input: &[u8]) -> FromHexError {
430    // Find the first invalid character.
431    let mut index = None;
432    let mut iter = input;
433    while let [byte, rest @ ..] = iter {
434        if HEX_DECODE_LUT[*byte as usize] == NIL {
435            index = Some(input.len() - rest.len() - 1);
436            break;
437        }
438        iter = rest;
439    }
440
441    let index = match index {
442        Some(index) => index,
443        None => {
444            if cfg!(debug_assertions) {
445                panic!("input was valid but `check` failed")
446            } else {
447                unsafe { core::hint::unreachable_unchecked() }
448            }
449        }
450    };
451
452    FromHexError::InvalidHexCharacter {
453        c: input[index] as char,
454        index,
455    }
456}
457
458const fn make_decode_lut() -> [u8; 256] {
459    let mut lut = [0; 256];
460    let mut i = 0u8;
461    loop {
462        lut[i as usize] = match i {
463            b'0'..=b'9' => i - b'0',
464            b'a'..=b'f' => i - b'a' + 10,
465            // use max value for invalid characters
466            _ => NIL,
467        };
468        if i == NIL {
469            break;
470        }
471        i += 1;
472    }
473    lut
474}
475
476#[allow(
477    missing_docs,
478    unused,
479    clippy::all,
480    clippy::missing_inline_in_public_items
481)]
482#[cfg(all(feature = "__fuzzing", not(miri)))]
483#[doc(hidden)]
484pub mod fuzzing {
485    use super::*;
486    use proptest::test_runner::TestCaseResult;
487    use proptest::{prop_assert, prop_assert_eq};
488    use std::fmt::Write;
489
490    pub fn fuzz(data: &[u8]) -> TestCaseResult {
491        self::encode(&data)?;
492        self::decode(&data)?;
493        Ok(())
494    }
495
496    pub fn encode(input: &[u8]) -> TestCaseResult {
497        test_buffer::<8, 16>(input)?;
498        test_buffer::<20, 40>(input)?;
499        test_buffer::<32, 64>(input)?;
500        test_buffer::<64, 128>(input)?;
501        test_buffer::<128, 256>(input)?;
502
503        let encoded = crate::encode(input);
504        let expected = mk_expected(input);
505        prop_assert_eq!(&encoded, &expected);
506
507        let decoded = crate::decode(&encoded).unwrap();
508        prop_assert_eq!(decoded, input);
509
510        Ok(())
511    }
512
513    pub fn decode(input: &[u8]) -> TestCaseResult {
514        if let Ok(decoded) = crate::decode(input) {
515            let input_len = input.len() / 2;
516            prop_assert_eq!(decoded.len(), input_len);
517        }
518
519        Ok(())
520    }
521
522    fn mk_expected(bytes: &[u8]) -> String {
523        let mut s = String::with_capacity(bytes.len() * 2);
524        for i in bytes {
525            write!(s, "{i:02x}").unwrap();
526        }
527        s
528    }
529
530    fn test_buffer<const N: usize, const LEN: usize>(bytes: &[u8]) -> TestCaseResult {
531        if let Ok(bytes) = <&[u8; N]>::try_from(bytes) {
532            let mut buffer = Buffer::<N, false>::new();
533            let string = buffer.format(bytes).to_string();
534            prop_assert_eq!(string.len(), bytes.len() * 2);
535            prop_assert_eq!(string.as_bytes(), buffer.as_byte_array::<LEN>());
536            prop_assert_eq!(string.as_str(), buffer.as_str());
537            prop_assert_eq!(string.as_str(), mk_expected(bytes));
538
539            let mut buffer = Buffer::<N, true>::new();
540            let prefixed = buffer.format(bytes).to_string();
541            prop_assert_eq!(prefixed.len(), 2 + bytes.len() * 2);
542            prop_assert_eq!(prefixed.as_str(), buffer.as_str());
543            prop_assert_eq!(prefixed.as_str(), format!("0x{string}"));
544
545            prop_assert_eq!(decode_to_array(&string), Ok(*bytes));
546            prop_assert_eq!(decode_to_array(&prefixed), Ok(*bytes));
547            prop_assert_eq!(const_decode_to_array(string.as_bytes()), Ok(*bytes));
548            prop_assert_eq!(const_decode_to_array(prefixed.as_bytes()), Ok(*bytes));
549        }
550
551        Ok(())
552    }
553
554    proptest::proptest! {
555        #![proptest_config(proptest::prelude::ProptestConfig {
556            cases: 1024,
557            ..Default::default()
558        })]
559
560        #[test]
561        fn fuzz_encode(s in ".+") {
562            encode(s.as_bytes())?;
563        }
564
565        #[test]
566        fn fuzz_check_true(s in "[0-9a-f]+") {
567            let s = s.as_bytes();
568            prop_assert!(crate::check_raw(s));
569            prop_assert!(crate::const_check_raw(s));
570            if s.len() % 2 == 0 {
571                prop_assert!(crate::check(s).is_ok());
572                prop_assert!(crate::const_check(s).is_ok());
573            }
574        }
575
576        #[test]
577        fn fuzz_check_false(s in ".{16}[0-9a-f]+") {
578            let s = s.as_bytes();
579            prop_assert!(crate::check(s).is_err());
580            prop_assert!(crate::const_check(s).is_err());
581            prop_assert!(!crate::check_raw(s));
582            prop_assert!(!crate::const_check_raw(s));
583        }
584    }
585}
586
587/// Returns `true` if the input is a valid hex string and can be decoded successfully.
588///
589/// Prefer using [`check`] instead when possible (at runtime), as it is likely to be faster.
590///
591/// # Examples
592///
593/// ```
594/// const _: () = {
595///     assert!(lowercase_hex::const_check(b"48656c6c6f20776f726c6421").is_ok());
596///
597///     assert!(lowercase_hex::const_check(b"48656c6c6f20776f726c642").is_err());
598///     assert!(lowercase_hex::const_check(b"Hello world!").is_err());
599/// };
600/// ```
601#[inline]
602pub const fn const_check(input: &[u8]) -> Result<(), FromHexError> {
603    if input.len() % 2 != 0 {
604        return Err(FromHexError::OddLength);
605    }
606    if const_check_raw(input) {
607        Ok(())
608    } else {
609        Err(unsafe { invalid_hex_error(input) })
610    }
611}
612
613/// Returns `true` if the input is a valid hex string.
614///
615/// Note that this does not check prefixes or length, but just the contents of the string.
616///
617/// Prefer using [`check_raw`] instead when possible (at runtime), as it is likely to be faster.
618///
619/// # Examples
620///
621/// ```
622/// const _: () = {
623///     assert!(lowercase_hex::const_check_raw(b"48656c6c6f20776f726c6421"));
624///
625///     // Odd length, but valid hex
626///     assert!(lowercase_hex::const_check_raw(b"48656c6c6f20776f726c642"));
627///
628///     // Valid hex string, but the prefix is not valid
629///     assert!(!lowercase_hex::const_check_raw(b"0x48656c6c6f20776f726c6421"));
630///
631///     assert!(!lowercase_hex::const_check_raw(b"Hello world!"));
632/// };
633/// ```
634#[inline]
635pub const fn const_check_raw(input: &[u8]) -> bool {
636    let mut i = 0;
637    while i < input.len() {
638        let byte = input[i];
639        if HEX_DECODE_LUT[byte as usize] == NIL {
640            return false;
641        }
642        i += 1;
643    }
644    true
645}
646
647/// Returns `true` if the input is a valid hex string and can be decoded successfully.
648///
649/// # Examples
650///
651/// ```
652/// assert!(lowercase_hex::check("48656c6c6f20776f726c6421").is_ok());
653///
654/// assert!(lowercase_hex::check("48656c6c6f20776f726c642").is_err());
655/// assert!(lowercase_hex::check("Hello world!").is_err());
656/// ```
657#[inline]
658pub fn check<T: AsRef<[u8]>>(input: T) -> Result<(), FromHexError> {
659    #[allow(clippy::missing_const_for_fn)]
660    fn check_inner(input: &[u8]) -> Result<(), FromHexError> {
661        if input.len() % 2 != 0 {
662            return Err(FromHexError::OddLength);
663        }
664        if imp::check(input) {
665            Ok(())
666        } else {
667            Err(unsafe { invalid_hex_error(input) })
668        }
669    }
670
671    check_inner(input.as_ref())
672}
673
674/// Returns `true` if the input is a valid hex string.
675///
676/// Note that this does not check prefixes or length, but just the contents of the string.
677///
678/// # Examples
679///
680/// ```
681/// assert!(lowercase_hex::check_raw("48656c6c6f20776f726c6421"));
682///
683/// // Odd length, but valid hex
684/// assert!(lowercase_hex::check_raw("48656c6c6f20776f726c642"));
685///
686/// // Valid hex string, but the prefix is not valid
687/// assert!(!lowercase_hex::check_raw("0x48656c6c6f20776f726c6421"));
688///
689/// assert!(!lowercase_hex::check_raw("Hello world!"));
690/// ```
691#[inline]
692pub fn check_raw<T: AsRef<[u8]>>(input: T) -> bool {
693    imp::check(input.as_ref())
694}