fashex 0.0.8

Hexadecimal string encoding and decoding with best-effort SIMD acceleration.
Documentation
#![doc = include_str!("../README.md")]
#![no_std]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(feature = "portable-simd", feature(portable_simd))]
#![cfg_attr(
    all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"),
    feature(stdarch_loongarch)
)]

#[cfg(any(test, feature = "alloc"))]
extern crate alloc;

#[cfg(any(test, feature = "std"))]
extern crate std;

mod backend;
mod buffer;
#[cfg(feature = "alloc")]
mod display;
mod error;
#[cfg(feature = "__internal_fuzz")]
pub mod fuzz;
mod traits;
mod util;

use core::mem::MaybeUninit;
use core::str;

pub use crate::buffer::Buffer;
#[cfg(feature = "alloc")]
pub use crate::display::Display;
pub use crate::error::InvalidInput;
pub use crate::traits::FromHex;
#[cfg(feature = "alloc")]
pub use crate::traits::ToHex;
pub use crate::util::{HEX_CHARS_LOWER, HEX_CHARS_UPPER};

/// Encodes `src` as a hexadecimal string into `dst` and returns a reference
/// to the written string.
///
/// ```rust
/// let decoded = b"Hello, world!";
///
/// let mut encoded = Vec::with_capacity(decoded.len() * 2);
///
/// assert_eq!(
///     fashex::encode::<false>(decoded, &mut encoded)
///         .expect("pre-allocated capacity is sufficient"),
///     "48656c6c6f2c20776f726c6421"
/// );
/// # assert_eq!(&*encoded, b"48656c6c6f2c20776f726c6421");
///
/// let mut encoded = Vec::with_capacity(decoded.len() * 2);
/// assert_eq!(
///     fashex::encode::<true>(decoded, &mut encoded)
///         .expect("pre-allocated capacity is sufficient"),
///     "48656C6C6F2C20776F726C6421"
/// );
/// # assert_eq!(&*encoded, b"48656C6C6F2C20776F726C6421");
/// ```
///
/// If you like, `&mut [MaybeUninit<u8>]` can also be used as the output buffer:
///
/// ```rust
/// use core::mem::MaybeUninit;
///
/// const CASE: &[u8; 13] = b"Hello, world!";
///
/// let mut encoded = [MaybeUninit::uninit(); CASE.len() * 2 + 1];
///
/// assert_eq!(
///     fashex::encode::<false>(CASE, &mut encoded)
///         .expect("the len of the uninitialized buffer is sufficient"),
///     "48656c6c6f2c20776f726c6421"
/// );
///
/// assert_eq!(
///     unsafe { &*(&raw const encoded[..CASE.len() * 2] as *const [u8] as *const str) },
///     "48656c6c6f2c20776f726c6421"
/// );
/// ```
///
/// ## Errors
///
/// The len (spare capacity) of the output buffer is insufficient.
pub fn encode<const UPPER: bool>(
    src: impl AsRef<[u8]>,
    dst: &mut (impl Buffer + ?Sized),
) -> Result<&str, InvalidInput> {
    let src = src.as_ref();

    backend::encode::<UPPER>(src, dst.spare_capacity_mut())?;

    #[allow(unsafe_code, reason = "XXX")]
    unsafe {
        let bytes = dst.advance(src.len() * 2);

        // SAFETY: the encoded hexadecimal string only contains ASCII characters, which
        // are valid UTF-8 bytes.
        Ok(str::from_utf8_unchecked(bytes))
    }
}

#[cfg(feature = "__internal_cargo_asm")]
#[doc(hidden)]
pub fn __cargo_asm_encode<'dst>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst str, InvalidInput> {
    encode::<false>(src, dst)
}

/// Decodes `src` (a hexadecimal string, case insensitive) into bytes, writes
/// them into `dst`, and returns a reference to the written bytes.
///
/// ```rust
/// let encoded = b"48656c6c6f2c20776f726c6421";
///
/// let mut decoded = Vec::with_capacity(encoded.len() / 2);
///
/// assert_eq!(
///     fashex::decode(encoded, &mut decoded)
///         .expect("the input is valid hex and the spare capacity of the buffer is sufficient"),
///     b"Hello, world!"
/// );
/// # assert_eq!(&*decoded, b"Hello, world!");
/// ```
///
/// If you like, `&mut [MaybeUninit<u8>]` can also be used as the output buffer:
///
/// ```rust
/// use core::mem::MaybeUninit;
///
/// const CASE: &[u8; 26] = b"48656c6c6f2c20776f726c6421";
///
/// let mut decoded = [MaybeUninit::uninit(); CASE.len() / 2 + 1];
///
/// assert_eq!(
///     fashex::decode(CASE, &mut decoded)
///         .expect("the input is valid hex and the len of the uninitialized buffer is sufficient"),
///     b"Hello, world!"
/// );
///
/// assert_eq!(
///     unsafe { &*(&raw const decoded[..CASE.len() / 2] as *const [u8]) },
///     b"Hello, world!"
/// );
/// ```
///
/// ## Errors
///
/// 1. The input contains invalid characters.
/// 1. The len (spare capacity) of the output buffer is insufficient.
pub fn decode(
    src: impl AsRef<[u8]>,
    dst: &mut (impl Buffer + ?Sized),
) -> Result<&[u8], InvalidInput> {
    let src = src.as_ref();

    backend::decode(src, dst.spare_capacity_mut())?;

    #[allow(unsafe_code, reason = "XXX")]
    unsafe {
        Ok(dst.advance(src.len() / 2))
    }
}

#[cfg(feature = "__internal_cargo_asm")]
#[doc(hidden)]
pub fn __cargo_asm_decode<'dst>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst [u8], InvalidInput> {
    decode(src, dst)
}

/// [`encode()`], but const-evaluable at the cost of performance.
///
/// The [`encode!`] macro wraps this function and is the preferred way to call
/// it in const contexts, such as when initializing a `const` or `static`
/// variable.
///
/// ## Errors
///
/// 1. The len of the output buffer is insufficient.
pub const fn encode_generic<'dst, const UPPER: bool>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst str, InvalidInput> {
    let (dst, _) = dst.as_chunks_mut::<2>();

    if src.len() > dst.len() {
        return Err(InvalidInput);
    }

    #[allow(unsafe_code, reason = "The length is validated")]
    unsafe {
        backend::generic::encode_generic_unchecked::<UPPER>(src, dst);
    };

    let dst = dst.as_flattened();

    #[allow(unsafe_code, reason = "XXX")]
    // SAFETY: the `dst` is fully initialized with hexadecimal characters.
    let dst = unsafe { &*(&raw const *dst as *const [u8] as *const str) };

    Ok(dst)
}

/// [`decode()`], but const-evaluable at the cost of performance.
///
/// The [`decode!`] macro wraps this function and is the preferred way to call
/// it in const contexts, such as when initializing a `const` or `static`
/// variable.
///
/// ## Errors
///
/// 1. The input contains invalid characters.
/// 1. The input contains an odd number of nibbles.
/// 1. The len of the output buffer is insufficient.
pub const fn decode_generic<'dst>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst [u8], InvalidInput> {
    let (src, &[]) = src.as_chunks::<2>() else {
        // Cannot have odd nibbles.
        return Err(InvalidInput);
    };

    if src.len() > dst.len() {
        // The nibble pairs cannot fit in the output buffer.
        return Err(InvalidInput);
    }

    #[allow(unsafe_code, reason = "The length is validated")]
    let ret = unsafe { backend::generic::decode_generic_unchecked::<false>(src, dst) };

    match ret {
        Ok(()) => {
            #[allow(
                unsafe_code,
                reason = "We have decoded the input hexadecimal string to bytes and fully \
                          initialized the output buffer"
            )]
            let dst = unsafe { &*((&raw const *dst) as *const [u8]) };

            Ok(dst)
        }
        Err(e) => Err(e),
    }
}

#[macro_export]
/// Helper macro for encoding byte arrays as hexadecimal strings in const
/// contexts.
///
/// ## Examples
///
/// ```rust
/// const HELLO_WORLD_LOWERCASE: &str = fashex::encode!(b"Hello, world!");
/// assert_eq!(HELLO_WORLD_LOWERCASE, "48656c6c6f2c20776f726c6421");
/// const HELLO_WORLD_UPPERCASE: &str = fashex::encode!(b"Hello, world!", true);
/// assert_eq!(HELLO_WORLD_UPPERCASE, "48656C6C6F2C20776F726C6421");
/// # const HELLO_WORLD_STR: &str = fashex::encode!("Hello, world!");
/// # assert_eq!(HELLO_WORLD_STR, "48656c6c6f2c20776f726c6421");
/// # const FROM_BYTES_LOWERCASE: &str = fashex::encode!([0x12, 0x34, 0xab, 0xcd]);
/// # assert_eq!(FROM_BYTES_LOWERCASE, "1234abcd");
/// ```
macro_rules! encode {
    ($bytes:expr) => {
        $crate::encode!($bytes, false)
    };
    ($bytes:expr, $uppercase:expr) => {{
        const ENCODED: [u8; $bytes.len() * 2] = {
            let buf: &mut [::core::mem::MaybeUninit<u8>; const { $bytes.len() * 2 }] =
                &mut [::core::mem::MaybeUninit::uninit(); _];

            #[allow(unsafe_code, reason = "XXX")]
            let bytes = unsafe { ::core::slice::from_raw_parts($bytes.as_ptr(), $bytes.len()) };

            match $crate::encode_generic::<{ $uppercase }>(bytes, buf) {
                Ok(_) => {}
                Err(_) => unreachable!(),
            };

            #[allow(unsafe_code, reason = "XXX")]
            unsafe {
                ::core::mem::transmute::<_, _>(*buf)
            }
        };

        #[allow(unsafe_code, reason = "XXX")]
        unsafe {
            ::core::str::from_utf8_unchecked(&ENCODED)
        }
    }};
}

#[macro_export]
/// Helper macro for decoding hexadecimal strings to byte arrays in const
/// contexts.
///
/// ## Examples
///
/// ```rust
/// const FOOBAR: &[u8] = fashex::decode!("48656c6c6f2c20776f726c6421");
/// assert_eq!(FOOBAR, b"Hello, world!");
/// # const FOOBAR_ARRAY: &[u8; 13] = fashex::decode!("48656c6c6f2c20776f726c6421");
/// # assert_eq!(FOOBAR_ARRAY, b"Hello, world!");
/// # const FOOBAR_RIG: &[u8; 13] = fashex::decode!("48656c6c6f2C20776F726c6421");
/// # assert_eq!(FOOBAR_RIG, b"Hello, world!");
/// ```
macro_rules! decode {
    ($bytes:expr) => {{
        const DECODED: [u8; $bytes.len() / 2] = {
            assert!(
                $bytes.len() % 2 == 0,
                "the length of the input must be even"
            );

            let buf: &mut [::core::mem::MaybeUninit<u8>; const { $bytes.len() / 2 }] =
                &mut [::core::mem::MaybeUninit::uninit(); _];

            #[allow(unsafe_code, reason = "XXX")]
            let bytes = unsafe { ::core::slice::from_raw_parts($bytes.as_ptr(), $bytes.len()) };

            match $crate::decode_generic(bytes, buf) {
                Ok(_) => {}
                Err(_) => panic!("invalid hexadecimal string"),
            };

            #[allow(unsafe_code, reason = "XXX")]
            unsafe {
                ::core::mem::transmute::<_, _>(*buf)
            }
        };

        &DECODED
    }};
}