//! AVX-512 utility intrinsics shared across Q4_0, Q8_0, Q4_K, and Q1_0_G128 kernels.
//!
//! All functions in this module are `unsafe` and require the `avx512f` CPU feature.
//! The feature gate at the top restricts compilation to the correct platform.
use *;
/// Horizontal sum of a 512-bit packed-float register.
///
/// Reduces sixteen FP32 lanes to a single `f32` scalar using the
/// `_mm512_reduce_add_ps` intrinsic.
///
/// # Safety
/// Requires the `avx512f` CPU feature.
pub unsafe
/// Read two bytes from `bytes` as a little-endian IEEE 754 FP16 value and
/// return the FP32 equivalent.
///
/// Uses the `half` crate for the conversion, which handles denormals,
/// infinities, and NaNs correctly.
///
/// # Safety
/// `bytes` must be at least 2 bytes long. The caller is responsible for
/// ensuring the slice bounds are valid before calling this function.
pub unsafe