fashex 0.0.10

Hexadecimal string encoding and decoding with best-effort SIMD acceleration.
Documentation
//! Optimized implementations based on unstable [`core::simd`] APIs.

#![allow(clippy::similar_names, reason = "XXX")]

use core::mem::MaybeUninit;
use core::simd::prelude::*;
use core::slice;

use crate::backend::generic::{decode_generic_unchecked, encode_generic_unchecked};
use crate::error::InvalidInput;
use crate::util::lut16;

/// ## Safety
///
/// We assume that:
///
/// 1. `src.len() <= dst.len()`.
pub(crate) unsafe fn encode_simd128_unchecked<const UPPER: bool>(
    mut src: &[u8],
    mut dst: &mut [[MaybeUninit<u8>; 2]],
) {
    /// Process 16 bytes of input, and produce 16 * 2 bytes of output.
    const BATCH: usize = size_of::<Simd<u8, 16>>();

    if src.len() >= BATCH {
        let m = Simd::splat(0b_0000_1111);
        let lut = Simd::from_array(*lut16::<UPPER>());

        while src.len() >= BATCH {
            // let [byte @ u8; 16]
            let chunk = Simd::<u8, 16>::from_slice(src);

            // let [hi; 16] = [byte >> 4; 16];
            let mut hi = chunk >> 4;
            // let [lo; 16] = [byte & 0b_0000_1111; 16];
            let mut lo = chunk & m;

            // let [hi; 16] = [lut[hi]; 16];
            lo = lut.swizzle_dyn(lo);
            // let [lo; 16] = [lut[lo]; 16];
            hi = lut.swizzle_dyn(hi);

            // Interleave the nibbles ([hi[0], lo[0], hi[1], lo[1], ...]).
            let (out0, out1) = Simd::<u8, 16>::interleave(hi, lo);

            // Store the result.
            {
                let dst = dst.as_mut_ptr().cast::<Simd<u8, 16>>();
                out0.copy_to_slice(slice::from_raw_parts_mut(dst.cast::<u8>(), BATCH));
                out1.copy_to_slice(slice::from_raw_parts_mut(dst.add(1).cast::<u8>(), BATCH));
            }

            src = &src[BATCH..];
            dst = dst.get_unchecked_mut(BATCH..);
        }
    }

    encode_generic_unchecked::<UPPER>(src, dst);
}

#[allow(clippy::cast_possible_wrap, reason = "XXX")]
/// ## Safety
///
/// We assume that:
///
/// 1. `src.len() <= dst.len()`.
pub(crate) unsafe fn decode_simd128_unchecked(
    mut src: &[[u8; 2]],
    mut dst: &mut [MaybeUninit<u8>],
) -> Result<(), InvalidInput> {
    /// Process 16 * 2 bytes of input, and produce 16 bytes of output.
    const BATCH: usize = size_of::<i8x16>();

    if src.len() >= BATCH {
        let n_c6 = i8x16::splat((0xFF_u8 - b'9') as i8);
        let n_06 = i8x16::splat(0x06_i8);
        let n_f0 = i8x16::splat(0xF0_u8 as i8);

        let n_df = i8x16::splat(0xDF_u8 as i8);
        let u_a = i8x16::splat(b'A' as i8);
        let n_0a = i8x16::splat(0x0A_i8);

        macro_rules! n {
            ($chunk:expr) => {{
                // Digits '0'..'9' → 0..9, others > 15.
                let d = ($chunk + n_c6)
                    .cast::<u8>()
                    .saturating_sub(n_06.cast::<u8>())
                    .cast::<i8>()
                    - n_f0;

                // Letters 'A'..'F'/'a'..'f' → 10..15, others > 15.
                let a = (($chunk & n_df) - u_a)
                    .cast::<u8>()
                    .saturating_add(n_0a.cast::<u8>());

                // Valid nibble wins (0..15), invalid stays > 15.
                d.cast::<u8>().simd_min(a)
            }};
        }

        let n_0f = u8x16::splat(15);

        while src.len() >= BATCH {
            let (chunk0, chunk1) = {
                let chunk;

                (chunk, src) = src.split_at_unchecked(BATCH);

                let chunk = chunk.as_flattened().as_chunks_unchecked::<BATCH>();

                (chunk.get_unchecked(0), chunk.get_unchecked(1))
            };

            let n0 = n!(u8x16::from_slice(chunk0).cast::<i8>());
            let n1 = n!(u8x16::from_slice(chunk1).cast::<i8>());

            // Validate: saturating add sets MSB if nibble > 15.
            if n0.simd_gt(n_0f).any() || n1.simd_gt(n_0f).any() {
                return Err(InvalidInput);
            }

            let bytes = {
                let (hi, lo) = Simd::deinterleave(n0, n1);
                (hi << 4) | lo
            };

            {
                let buf;

                (buf, dst) = unsafe { dst.split_at_mut_unchecked(BATCH) };

                unsafe {
                    buf.as_mut_ptr()
                        .copy_from_nonoverlapping(bytes.as_array().as_ptr().cast(), buf.len());
                }
            }
        }
    }

    decode_generic_unchecked::<false>(src, dst)
}

#[cfg(test)]
mod smoking {
    use alloc::string::String;
    use alloc::vec;
    use alloc::vec::Vec;
    use core::mem::MaybeUninit;
    use core::slice;

    use super::*;
    use crate::util::{HEX_CHARS_LOWER, HEX_CHARS_UPPER};

    macro_rules! test {
        (
            Encode = $encode_f:ident;
            Decode = $($decode_f:ident),*;
            Case = $i:expr
        ) => {{
            let input = $i;

            let expected_lower = input
                .iter()
                .flat_map(|b| [
                    HEX_CHARS_LOWER[(*b >> 4) as usize] as char,
                    HEX_CHARS_LOWER[(*b & 0b1111) as usize] as char,
                ])
                .collect::<String>();
            let expected_upper = input
                .iter()
                .flat_map(|b| [
                    HEX_CHARS_UPPER[(*b >> 4) as usize] as char,
                    HEX_CHARS_UPPER[(*b & 0b1111) as usize] as char,
                ])
                .collect::<String>();

            let mut output_lower = vec![[MaybeUninit::<u8>::uninit(); 2]; input.len()];
            let mut output_upper = vec![[MaybeUninit::<u8>::uninit(); 2]; input.len()];

            unsafe {
                $encode_f::<false>(input, &mut output_lower);
                $encode_f::<true>(input, &mut output_upper);
            }

            let output_lower = unsafe {
                slice::from_raw_parts(
                    output_lower.as_ptr().cast::<[u8; 2]>(),
                    output_lower.len(),
                )
            };
            let output_upper = unsafe {
                slice::from_raw_parts(
                    output_upper.as_ptr().cast::<[u8; 2]>(),
                    output_upper.len(),
                )
            };

            assert_eq!(
                output_lower.as_flattened(),
                expected_lower.as_bytes(),
                "Encode error, expect \"{expected_lower}\", got \"{}\" ({:?})",
                str::from_utf8(output_lower.as_flattened()).unwrap_or("<invalid utf-8>"),
                output_lower.as_flattened()
            );
            assert_eq!(
                output_upper.as_flattened(),
                expected_upper.as_bytes(),
                "Encode error, expect \"{expected_upper}\", got \"{}\" ({:?})",
                str::from_utf8(output_upper.as_flattened()).unwrap_or("<invalid utf-8>"),
                output_upper.as_flattened()
            );

            $({
                let mut decoded_lower = vec![MaybeUninit::<u8>::uninit(); input.len()];
                let mut decoded_upper = vec![MaybeUninit::<u8>::uninit(); input.len()];

                unsafe {
                    $decode_f(output_lower, &mut decoded_lower).unwrap();
                    $decode_f(output_upper, &mut decoded_upper).unwrap();

                    assert_eq!(
                        decoded_lower.assume_init_ref(),
                        input,
                        "Decode error for {}, expect {:?}, got {:?}",
                        stringify!($decode_f),
                        input,
                        decoded_lower.assume_init_ref()
                    );
                    assert_eq!(
                        decoded_upper.assume_init_ref(),
                        input,
                        "Decode error for {}, expect {:?}, got {:?}",
                        stringify!($decode_f),
                        input,
                        decoded_upper.assume_init_ref()
                    );
                }
            })*
        }};
    }

    const CASE: &[u8; 513] = &[
        0xBD, 0xE8, 0xAC, 0xA5, 0x82, 0x41, 0x8A, 0x10, 0x66, 0x56, 0xE4, 0xF3, 0x06, 0x13, 0xD0,
        0x06, 0x3E, 0x19, 0x4B, 0x7E, 0xE1, 0xAB, 0x24, 0x03, 0x29, 0xD0, 0x8B, 0x91, 0x06, 0x56,
        0xF4, 0x44, 0x4E, 0x7B, 0x00, 0x76, 0xFB, 0xA3, 0xB4, 0x4F, 0x9E, 0x4E, 0x3E, 0x20, 0x89,
        0x29, 0x17, 0x47, 0x4D, 0x59, 0xF7, 0x9E, 0xAE, 0x0A, 0xB4, 0x16, 0xEB, 0x2B, 0x0D, 0xA2,
        0x35, 0x99, 0x1D, 0x94, 0xA0, 0x23, 0xFF, 0x60, 0x0F, 0x67, 0xDB, 0xB5, 0xEF, 0x89, 0xC2,
        0x3C, 0x2C, 0x24, 0x0E, 0x04, 0x05, 0x35, 0x31, 0xAA, 0x88, 0xB4, 0x04, 0x82, 0x21, 0x8B,
        0x24, 0x88, 0x3F, 0x19, 0x94, 0x36, 0xDB, 0x52, 0x9E, 0x89, 0x7D, 0x53, 0x6D, 0x8D, 0xDF,
        0xF7, 0xFD, 0x2A, 0x8F, 0x4B, 0x20, 0xAB, 0xAC, 0xA4, 0x4B, 0xBB, 0x5C, 0x10, 0x0D, 0x7B,
        0xEF, 0x3A, 0x03, 0xF7, 0x4D, 0x15, 0x10, 0x8C, 0xB1, 0x0A, 0x86, 0x6A, 0x19, 0x6F, 0x25,
        0xA6, 0xE3, 0x4B, 0xA8, 0x9D, 0x78, 0xC7, 0x19, 0x19, 0x09, 0x05, 0x08, 0x9A, 0xA1, 0x67,
        0x48, 0xF7, 0x9E, 0x3C, 0xFA, 0xD3, 0xFD, 0x5E, 0x1A, 0x09, 0xD8, 0x85, 0x7F, 0xA5, 0x73,
        0x34, 0xBF, 0x93, 0xCC, 0xF4, 0x8D, 0x8A, 0x62, 0xBD, 0xD5, 0x67, 0x39, 0x0D, 0xB7, 0x41,
        0x94, 0x7D, 0xB5, 0xB3, 0x5B, 0x95, 0x1F, 0x43, 0xE4, 0x77, 0x40, 0x41, 0x9E, 0x26, 0x34,
        0x73, 0x0D, 0x93, 0x0C, 0xE9, 0xB7, 0x3C, 0x97, 0x3D, 0xA4, 0xBC, 0xAA, 0xDA, 0xA9, 0xFB,
        0x78, 0xD8, 0xE4, 0xB4, 0xE8, 0x88, 0x29, 0x9B, 0xE4, 0x5B, 0xF4, 0x56, 0xC4, 0x0D, 0x50,
        0x05, 0x0F, 0x84, 0x51, 0xD4, 0x96, 0x3E, 0xC5, 0x4F, 0xCD, 0xEF, 0x2B, 0x0F, 0x78, 0x1D,
        0xE6, 0x4A, 0x90, 0xC6, 0xD8, 0xF7, 0x88, 0x0D, 0x58, 0x2C, 0xE7, 0x37, 0x4A, 0x94, 0x5F,
        0x56, 0x68, 0x84, 0xEE, 0xD2, 0xD6, 0x8A, 0xC9, 0x8A, 0x90, 0x70, 0xF7, 0x51, 0xC9, 0xD1,
        0x86, 0x5A, 0xB2, 0xD5, 0x91, 0xDB, 0xDF, 0x36, 0xF1, 0xD3, 0x69, 0xB1, 0x7D, 0x39, 0x0E,
        0xCC, 0x86, 0xEF, 0xBD, 0xBD, 0x13, 0x52, 0x2A, 0xFC, 0x72, 0x78, 0x14, 0x28, 0xDD, 0xD5,
        0xEE, 0xF8, 0x72, 0x0F, 0x26, 0x76, 0xC6, 0x5E, 0x1B, 0x50, 0x30, 0xDB, 0x93, 0xD9, 0x20,
        0xA3, 0x07, 0x4D, 0x85, 0x50, 0x40, 0x28, 0x1E, 0x40, 0x4B, 0x96, 0xD6, 0x8C, 0xAF, 0x8E,
        0xD4, 0xD7, 0x81, 0x31, 0x97, 0x47, 0x2A, 0x95, 0xC3, 0x03, 0xA2, 0x40, 0xC9, 0x55, 0xBF,
        0x64, 0x1A, 0xAB, 0x81, 0xA1, 0x6B, 0x6A, 0x56, 0x81, 0xDD, 0xD2, 0x68, 0x1D, 0xB7, 0xDB,
        0xD6, 0x9E, 0xDA, 0x84, 0xFC, 0x5B, 0xE0, 0x34, 0xAD, 0x61, 0x5E, 0xD1, 0xF5, 0x74, 0x79,
        0xE9, 0xED, 0xB5, 0x31, 0x3C, 0x7F, 0xB1, 0x44, 0xE0, 0x23, 0xAE, 0xBD, 0x9E, 0x13, 0x8A,
        0x9D, 0xAF, 0x48, 0x75, 0x06, 0x16, 0x58, 0x4A, 0x8B, 0xD3, 0xB7, 0x06, 0x14, 0xB5, 0x92,
        0xE2, 0xA1, 0x9F, 0xCF, 0x42, 0x3E, 0x99, 0x24, 0xE4, 0x65, 0x93, 0x84, 0x83, 0x66, 0x26,
        0x28, 0xEA, 0x3F, 0x05, 0x4E, 0xAC, 0x7C, 0x96, 0xF2, 0x50, 0x22, 0xF3, 0xCD, 0x90, 0x81,
        0x73, 0xBD, 0x3D, 0xCA, 0xD1, 0x2F, 0xC2, 0x3F, 0x20, 0xF0, 0x1C, 0x41, 0x9D, 0x9A, 0x85,
        0x1A, 0xC4, 0xB1, 0xE3, 0xBA, 0x52, 0xE7, 0xE3, 0x22, 0x72, 0x98, 0x76, 0xEC, 0x0B, 0xC4,
        0x07, 0xA3, 0x05, 0x01, 0xC0, 0x40, 0xA7, 0x0E, 0x8A, 0x0F, 0xDE, 0x5F, 0x65, 0xA3, 0x89,
        0x34, 0x3B, 0xFD, 0x9F, 0xE4, 0xB1, 0x6C, 0x1B, 0x40, 0xE6, 0xC2, 0x58, 0xE3, 0x62, 0xFC,
        0xB0, 0x22, 0x02, 0xD2, 0xE2, 0xF6, 0xFD, 0x4D, 0x64, 0xF5, 0x17, 0x07, 0x04, 0x34, 0x50,
        0x04, 0xEF, 0xAB,
    ];

    #[test]
    #[cfg_attr(miri, ignore)]
    fn test_simd128() {
        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &[]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..15]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..16]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..17]
        };

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..31]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..32]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..33]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..63]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..64]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..65]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..127]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..128]
        }

        test! {
            Encode = encode_simd128_unchecked;
            Decode = decode_simd128_unchecked;
            Case = &CASE[..129]
        }
    }

    #[test]
    #[cfg_attr(miri, ignore)]
    fn test_validation() {
        for l in [
            15, 16, 17, 31, 32, 33, 63, 64, 65, 127, 128, 129, 255, 256, 257, 511, 512, 513,
        ] {
            for c in 0u8..=255 {
                let mut bytes = vec![b'a'; l * 2];

                bytes[l] = c;

                let bytes = unsafe { bytes.as_chunks_unchecked() };

                if c.is_ascii_hexdigit() {
                    unsafe {
                        assert!(
                            decode_simd128_unchecked(
                                bytes,
                                Vec::with_capacity(l).spare_capacity_mut()
                            )
                            .is_ok(),
                            "simd128 validation failed for byte {c} (l={l})",
                        );
                    }
                } else {
                    unsafe {
                        assert!(
                            decode_simd128_unchecked(
                                bytes,
                                Vec::with_capacity(l).spare_capacity_mut()
                            )
                            .is_err(),
                            "simd128 validation failed for byte {c} (l={l})"
                        );
                    }
                }
            }
        }
    }
}