lz4_flex 0.12.1

Fastest LZ4 implementation in Rust, no unsafe by default.
Documentation
//! # FastCpy
//!
//! The Rust Compiler calls `memcpy` for slices of unknown length.
//! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`).
//! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program.
//!
//! `fastcpy` is designed to contain not too much assembly, so the overhead is low.
//!
//! As fall back the standard `memcpy` is called
//!
//! ## Double Copy Trick
//! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`).
//! E.g. Slice of length 6 can be copied with two uncoditional copy operations.
//!
//! /// [1, 2, 3, 4, 5, 6]
//! /// [1, 2, 3, 4]
//! ///       [3, 4, 5, 6]
//!

#[inline]
pub fn slice_copy(src: *const u8, dst: *mut u8, num_bytes: usize) {
    if num_bytes < 4 {
        short_copy(src, dst, num_bytes);
        return;
    }

    if num_bytes < 8 {
        double_copy_trick::<4>(src, dst, num_bytes);
        return;
    }

    if num_bytes <= 16 {
        double_copy_trick::<8>(src, dst, num_bytes);
        return;
    }

    //if num_bytes <= 32 {
    //double_copy_trick::<16>(src, dst, num_bytes);
    //return;
    //}

    // /// The code will use the vmovdqu instruction to copy 32 bytes at a time.
    //#[cfg(target_feature = "avx")]
    //{
    //if num_bytes <= 64 {
    //double_copy_trick::<32>(src, dst, num_bytes);
    //return;
    //}
    //}

    // For larger sizes we use the default, which calls memcpy
    // memcpy does some virtual memory tricks to copy large chunks of memory.
    //
    // The theory should be that the checks above don't cost much relative to the copy call for
    // larger copies.
    // The bounds checks in `copy_from_slice` are elided.

    //unsafe { core::ptr::copy_nonoverlapping(src, dst, num_bytes) }
    wild_copy_from_src::<16>(src, dst, num_bytes)
}

// Inline never because otherwise we get a call to memcpy -.-
#[inline]
fn wild_copy_from_src<const SIZE: usize>(
    mut source: *const u8,
    mut dst: *mut u8,
    num_bytes: usize,
) {
    // Note: if the compiler auto-vectorizes this it'll hurt performance!
    // It's not the case for 16 bytes stepsize, but for 8 bytes.
    let l_last = unsafe { source.add(num_bytes - SIZE) };
    let r_last = unsafe { dst.add(num_bytes - SIZE) };
    let num_bytes = (num_bytes / SIZE) * SIZE;

    unsafe {
        let dst_ptr_end = dst.add(num_bytes);
        loop {
            core::ptr::copy_nonoverlapping(source, dst, SIZE);
            source = source.add(SIZE);
            dst = dst.add(SIZE);
            if dst >= dst_ptr_end {
                break;
            }
        }
    }

    unsafe {
        core::ptr::copy_nonoverlapping(l_last, r_last, SIZE);
    }
}

#[inline]
fn short_copy(src: *const u8, dst: *mut u8, len: usize) {
    unsafe {
        *dst = *src;
    }
    if len >= 2 {
        double_copy_trick::<2>(src, dst, len);
    }
}

#[inline(always)]
/// [1, 2, 3, 4, 5, 6]
/// [1, 2, 3, 4]
///       [3, 4, 5, 6]
fn double_copy_trick<const SIZE: usize>(src: *const u8, dst: *mut u8, len: usize) {
    let l_end = unsafe { src.add(len - SIZE) };
    let r_end = unsafe { dst.add(len - SIZE) };

    unsafe {
        core::ptr::copy_nonoverlapping(src, dst, SIZE);
        core::ptr::copy_nonoverlapping(l_end, r_end, SIZE);
    }
}

#[cfg(test)]
mod tests {
    use super::slice_copy;
    use alloc::vec::Vec;
    use proptest::prelude::*;
    proptest! {
        #[test]
        fn test_fast_short_slice_copy(left: Vec<u8>) {
            if left.is_empty() {
                return Ok(());
            }
            let mut right = vec![0u8; left.len()];
            slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
            prop_assert_eq!(&left, &right);
        }
    }

    #[test]
    fn test_fast_short_slice_copy_edge_cases() {
        for len in 1..(512 * 2) {
            let left = (0..len).map(|i| i as u8).collect::<Vec<_>>();
            let mut right = vec![0u8; len];
            slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
            assert_eq!(left, right);
        }
    }

    #[test]
    fn test_fail2() {
        let left = vec![
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
            24, 25, 26, 27, 28, 29, 30, 31, 32,
        ];
        let mut right = vec![0u8; left.len()];
        slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
        assert_eq!(left, right);
    }

    #[test]
    fn test_fail() {
        let left = vec![
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
            0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        ];
        let mut right = vec![0u8; left.len()];
        slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len());
        assert_eq!(left, right);
    }
}