fashex 0.0.6

Hexadecimal string encoding and decoding with best-effort SIMD acceleration.
Documentation
#![doc = include_str!("../README.md")]
#![no_std]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![cfg_attr(feature = "portable-simd", feature(portable_simd))]
#![cfg_attr(
    all(feature = "experimental-loongarch64-simd", target_arch = "loongarch64"),
    feature(stdarch_loongarch)
)]

#[cfg(any(test, feature = "alloc"))]
extern crate alloc;

#[cfg(any(test, feature = "std"))]
extern crate std;

mod backend;
mod buffer;
#[cfg(feature = "alloc")]
mod display;
mod error;
#[cfg(feature = "__internal_fuzz")]
pub mod fuzz;
mod util;

use core::mem::MaybeUninit;

pub use crate::buffer::Buffer;
#[cfg(feature = "alloc")]
pub use crate::display::Display;
pub use crate::error::InvalidInput;
pub use crate::util::{HEX_CHARS_LOWER, HEX_CHARS_UPPER};

/// Encodes `src` as a hexadecimal string into `dst` and returns a reference
/// to the written bytes.
///
/// ## Examples
///
/// ```rust
/// let decoded = b"Hello, world!";
///
/// let mut encoded = Vec::with_capacity(decoded.len() * 2);
///
/// assert_eq!(
///     fashex::encode::<_, false>(decoded, &mut encoded)
///         .expect("pre-allocated capacity is sufficient"),
///     b"48656c6c6f2c20776f726c6421"
/// );
/// # assert_eq!(&*encoded, b"48656c6c6f2c20776f726c6421");
///
/// let mut encoded = Vec::with_capacity(decoded.len() * 2);
/// assert_eq!(
///     fashex::encode::<_, true>(decoded, &mut encoded)
///         .expect("pre-allocated capacity is sufficient"),
///     b"48656C6C6F2C20776F726C6421"
/// );
/// # assert_eq!(&*encoded, b"48656C6C6F2C20776F726C6421");
/// ```
///
/// ## Notes
///
/// When passing a `&mut [MaybeUninit<u8>]` as the output buffer, we only
/// guarantee that the first `2 * src.len()` portion is properly initialized,
/// and the caller should not read the uninitialized portion.
///
/// ## Errors
///
/// The spare capacity of the output buffer is insufficient.
pub fn encode<'dst, B: Buffer + ?Sized, const UPPER: bool>(
    src: &[u8],
    dst: &'dst mut B,
) -> Result<&'dst [u8], InvalidInput> {
    backend::encode::<UPPER>(src, dst.spare_capacity_mut())?;

    #[allow(unsafe_code, reason = "XXX")]
    unsafe {
        Ok(dst.advance(src.len() * 2))
    }
}

#[cfg(feature = "__internal_cargo_asm")]
#[doc(hidden)]
pub fn __cargo_asm_encode<'dst>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst [u8], InvalidInput> {
    encode::<_, false>(src, dst)
}

/// Decodes `src` (a hexadecimal string) into raw bytes, writes them into `dst`,
/// and returns a reference to the written bytes.
///
/// The returned `&[u8]` can be safely reinterpreted as `&str` because hex
/// characters are valid ASCII.
///
/// ```rust
/// let encoded = b"48656c6c6f2c20776f726c6421";
///
/// let mut decoded = Vec::with_capacity(encoded.len() / 2);
///
/// assert_eq!(
///     fashex::decode(encoded, &mut decoded)
///         .expect("the input is valid hex and capacity is sufficient"),
///     b"Hello, world!"
/// );
/// # assert_eq!(&*decoded, b"Hello, world!");
/// ```
///
/// ```rust
/// let encoded = b"48656C6C6F2C20776F726C6421";
///
/// let mut decoded = Vec::with_capacity(encoded.len() / 2);
///
/// assert_eq!(
///     fashex::decode(encoded, &mut decoded)
///         .expect("the input is valid hex and capacity is sufficient"),
///     b"Hello, world!"
/// );
/// # assert_eq!(&*decoded, b"Hello, world!");
/// ```
///
/// ```rust
/// let encoded = b"48656c6c6f2C20776F726c6421";
///
/// let mut decoded = Vec::with_capacity(encoded.len() / 2);
///
/// assert_eq!(
///     fashex::decode(encoded, &mut decoded)
///         .expect("the input is valid hex and capacity is sufficient"),
///     b"Hello, world!"
/// );
/// # assert_eq!(&*decoded, b"Hello, world!");
/// ```
///
/// ## Notes
///
/// When passing a `&mut [MaybeUninit<u8>]` as the output buffer, we only
/// guarantee that the first `src.len() / 2` portion is properly initialized,
/// and the caller should not read the uninitialized portion.
///
/// ## Errors
///
/// 1. The input contains invalid characters.
/// 1. The spare capacity of the output buffer is insufficient.
pub fn decode<'dst, B: Buffer + ?Sized>(
    src: &[u8],
    dst: &'dst mut B,
) -> Result<&'dst [u8], InvalidInput> {
    backend::decode(src, dst.spare_capacity_mut())?;

    #[allow(unsafe_code, reason = "XXX")]
    unsafe {
        Ok(dst.advance(src.len() / 2))
    }
}

#[cfg(feature = "__internal_cargo_asm")]
#[doc(hidden)]
pub fn __cargo_asm_decode<'dst>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst [u8], InvalidInput> {
    decode(src, dst)
}

/// [`encode()`], but const-evaluable at the cost of performance.
///
/// The [`encode!`] macro wraps this function and is the preferred way to call
/// it in const contexts, such as when initializing a `const` or `static`
/// variable.
///
/// ## Errors
///
/// 1. The spare capacity of the output buffer is insufficient.
pub const fn encode_generic<'dst, const UPPER: bool>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst [u8], InvalidInput> {
    let (dst, _) = dst.as_chunks_mut::<2>();

    if src.len() > dst.len() {
        return Err(InvalidInput);
    }

    #[allow(unsafe_code, reason = "The length is validated")]
    unsafe {
        backend::generic::encode_generic_unchecked::<UPPER>(src, dst);
    };

    let dst = dst.as_flattened();

    #[allow(
        unsafe_code,
        reason = "We have encoded the input bytes to a hexadecimal string and fully initialized \
                  the output buffer"
    )]
    let dst = unsafe { &*(&raw const *dst as *const [u8]) };

    Ok(dst)
}

/// [`decode()`], but const-evaluable at the cost of performance.
///
/// The [`decode!`] macro wraps this function and is the preferred way to call
/// it in const contexts, such as when initializing a `const` or `static`
/// variable.
///
/// ## Errors
///
/// 1. The input contains invalid characters.
/// 1. The input contains an odd number of nibbles.
/// 1. The output buffer is too small to hold the decoded bytes.
pub const fn decode_generic<'dst>(
    src: &[u8],
    dst: &'dst mut [MaybeUninit<u8>],
) -> Result<&'dst [u8], InvalidInput> {
    let (src, &[]) = src.as_chunks::<2>() else {
        // Cannot have odd nibbles.
        return Err(InvalidInput);
    };

    if src.len() > dst.len() {
        // The nibble pairs cannot fit in the output buffer.
        return Err(InvalidInput);
    }

    #[allow(unsafe_code, reason = "The length is validated")]
    let ret = unsafe { backend::generic::decode_generic_unchecked::<false>(src, dst) };

    match ret {
        Ok(()) => {
            #[allow(
                unsafe_code,
                reason = "We have decoded the input hexadecimal string to bytes and fully \
                          initialized the output buffer"
            )]
            let dst = unsafe { &*((&raw const *dst) as *const [u8]) };

            Ok(dst)
        }
        Err(e) => Err(e),
    }
}

#[macro_export]
/// Helper macro for encoding byte arrays as hexadecimal strings in const
/// contexts.
///
/// ## Examples
///
/// ```rust
/// const HELLO_WORLD_LOWERCASE: &str = fashex::encode!(b"Hello, world!");
/// assert_eq!(HELLO_WORLD_LOWERCASE, "48656c6c6f2c20776f726c6421");
/// const HELLO_WORLD_UPPERCASE: &str = fashex::encode!(b"Hello, world!", true);
/// assert_eq!(HELLO_WORLD_UPPERCASE, "48656C6C6F2C20776F726C6421");
/// # const HELLO_WORLD_STR: &str = fashex::encode!("Hello, world!");
/// # assert_eq!(HELLO_WORLD_STR, "48656c6c6f2c20776f726c6421");
/// # const FROM_BYTES_LOWERCASE: &str = fashex::encode!([0x12, 0x34, 0xab, 0xcd]);
/// # assert_eq!(FROM_BYTES_LOWERCASE, "1234abcd");
/// ```
macro_rules! encode {
    ($bytes:expr) => {
        $crate::encode!($bytes, false)
    };
    ($bytes:expr, $uppercase:expr) => {{
        const ENCODED: [u8; $bytes.len() * 2] = {
            let buf: &mut [::core::mem::MaybeUninit<u8>; const { $bytes.len() * 2 }] =
                &mut [::core::mem::MaybeUninit::uninit(); _];

            #[allow(unsafe_code, reason = "XXX")]
            let bytes = unsafe { ::core::slice::from_raw_parts($bytes.as_ptr(), $bytes.len()) };

            match $crate::encode_generic::<{ $uppercase }>(bytes, buf) {
                Ok(_) => {}
                Err(_) => unreachable!(),
            };

            #[allow(unsafe_code, reason = "XXX")]
            unsafe {
                ::core::mem::transmute::<_, _>(*buf)
            }
        };

        #[allow(unsafe_code, reason = "XXX")]
        unsafe {
            ::core::str::from_utf8_unchecked(&ENCODED)
        }
    }};
}

#[macro_export]
/// Helper macro for decoding hexadecimal strings to byte arrays in const
/// contexts.
///
/// ## Examples
///
/// ```rust
/// const FOOBAR: &[u8] = fashex::decode!("48656c6c6f2c20776f726c6421");
/// assert_eq!(FOOBAR, b"Hello, world!");
/// # const FOOBAR_ARRAY: &[u8; 13] = fashex::decode!("48656c6c6f2c20776f726c6421");
/// # assert_eq!(FOOBAR_ARRAY, b"Hello, world!");
/// # const FOOBAR_RIG: &[u8; 13] = fashex::decode!("48656c6c6f2C20776F726c6421");
/// # assert_eq!(FOOBAR_RIG, b"Hello, world!");
/// ```
macro_rules! decode {
    ($bytes:expr) => {{
        const DECODED: [u8; $bytes.len() / 2] = {
            assert!(
                $bytes.len() % 2 == 0,
                "the length of the input must be even"
            );

            let buf: &mut [::core::mem::MaybeUninit<u8>; const { $bytes.len() / 2 }] =
                &mut [::core::mem::MaybeUninit::uninit(); _];

            #[allow(unsafe_code, reason = "XXX")]
            let bytes = unsafe { ::core::slice::from_raw_parts($bytes.as_ptr(), $bytes.len()) };

            match $crate::decode_generic(bytes, buf) {
                Ok(_) => {}
                Err(_) => panic!("invalid hexadecimal string"),
            };

            #[allow(unsafe_code, reason = "XXX")]
            unsafe {
                ::core::mem::transmute::<_, _>(*buf)
            }
        };

        &DECODED
    }};
}