libblur 0.9.3

High performance blur in pure rust
Documentation
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
#[cfg(target_feature = "neon")]
pub(crate) mod neon_utils {
    use std::arch::aarch64::{
        float32x4_t, int16x4_t, int32x4_t, uint16x4_t, uint32x4_t, vget_low_u16, vld1_u8,
        vld1q_f32, vmovl_u16, vmovl_u8, vreinterpret_s16_u16, vreinterpretq_s32_u32,
    };
    use std::ptr;

    #[allow(dead_code)]
    #[inline(always)]
    pub(crate) fn load_u8_s32(ptr: *const u8, use_vld: bool, channels_count: usize) -> int32x4_t {
        let mut safe_transient_store: [u8; 8] = [0; 8];
        let edge_ptr: *const u8;
        if use_vld {
            edge_ptr = ptr;
        } else {
            unsafe {
                ptr::copy_nonoverlapping(ptr, safe_transient_store.as_mut_ptr(), channels_count);
            }
            edge_ptr = safe_transient_store.as_ptr();
        }
        let pixel_color =
            unsafe { vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(vmovl_u8(vld1_u8(edge_ptr))))) };
        return pixel_color;
    }

    #[allow(dead_code)]
    #[inline(always)]
    pub(crate) fn load_u8_s16(ptr: *const u8, use_vld: bool, channels_count: usize) -> int16x4_t {
        let mut safe_transient_store: [u8; 8] = [0; 8];
        let edge_ptr: *const u8;
        if use_vld {
            edge_ptr = ptr;
        } else {
            unsafe {
                ptr::copy_nonoverlapping(ptr, safe_transient_store.as_mut_ptr(), channels_count);
            }
            edge_ptr = safe_transient_store.as_ptr();
        }
        let pixel_color =
            unsafe { vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(edge_ptr)))) };
        return pixel_color;
    }

    #[allow(dead_code)]
    #[inline(always)]
    pub(crate) fn load_u8_u16(ptr: *const u8, use_vld: bool, channels_count: usize) -> uint16x4_t {
        let mut safe_transient_store: [u8; 8] = [0; 8];
        let edge_ptr: *const u8;
        if use_vld {
            edge_ptr = ptr;
        } else {
            unsafe {
                ptr::copy_nonoverlapping(ptr, safe_transient_store.as_mut_ptr(), channels_count);
            }
            edge_ptr = safe_transient_store.as_ptr();
        }
        let pixel_color = unsafe { vget_low_u16(vmovl_u8(vld1_u8(edge_ptr))) };
        return pixel_color;
    }

    #[allow(dead_code)]
    #[inline(always)]
    pub(crate) fn load_u8_u32(ptr: *const u8, use_vld: bool, channels_count: usize) -> uint32x4_t {
        let mut safe_transient_store: [u8; 8] = [0; 8];
        let edge_ptr: *const u8;
        if use_vld {
            edge_ptr = ptr;
        } else {
            unsafe {
                ptr::copy_nonoverlapping(ptr, safe_transient_store.as_mut_ptr(), channels_count);
            }
            edge_ptr = safe_transient_store.as_ptr();
        }
        let pixel_color = unsafe { vmovl_u16(vget_low_u16(vmovl_u8(vld1_u8(edge_ptr)))) };
        return pixel_color;
    }

    #[allow(dead_code)]
    #[inline(always)]
    pub(crate) fn load_f32(ptr: *const f32, use_vld: bool, channels_count: usize) -> float32x4_t {
        let mut safe_transient_store: [f32; 4] = [0f32; 4];
        let edge_ptr: *const f32;
        if use_vld {
            edge_ptr = ptr;
        } else {
            unsafe {
                ptr::copy_nonoverlapping(ptr, safe_transient_store.as_mut_ptr(), channels_count);
            }
            edge_ptr = safe_transient_store.as_ptr();
        }
        let pixel_color = unsafe { vld1q_f32(edge_ptr) };
        return pixel_color;
    }
}