zerovec 0.9.2

Zero-copy vector backed by a byte array
Documentation
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

#![allow(clippy::upper_case_acronyms)]
//! ULE implementation for Plain Old Data types, including all sized integers.

use super::*;
use crate::ZeroSlice;
use core::num::{NonZeroI8, NonZeroU8};

/// A u8 array of little-endian data with infallible conversions to and from &[u8].
#[repr(transparent)]
#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Hash)]
#[allow(clippy::exhaustive_structs)] // newtype
pub struct RawBytesULE<const N: usize>(pub [u8; N]);

macro_rules! impl_byte_slice_size {
    ($unsigned:ty, $size:literal) => {
        impl From<[u8; $size]> for RawBytesULE<$size> {
            #[inline]
            fn from(le_bytes: [u8; $size]) -> Self {
                Self(le_bytes)
            }
        }
        impl RawBytesULE<$size> {
            #[inline]
            pub fn as_bytes(&self) -> &[u8] {
                &self.0
            }
        }
        // Safety (based on the safety checklist on the ULE trait):
        //  1. RawBytesULE does not include any uninitialized or padding bytes.
        //     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
        //  2. RawBytesULE is aligned to 1 byte.
        //     (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
        //  3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
        //  4. The impl of validate_byte_slice() returns an error if there are leftover bytes.
        //  5. The other ULE methods use the default impl.
        //  6. RawBytesULE byte equality is semantic equality
        unsafe impl ULE for RawBytesULE<$size> {
            #[inline]
            fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
                if bytes.len() % $size == 0 {
                    // Safe because Self is transparent over [u8; $size]
                    Ok(())
                } else {
                    Err(ZeroVecError::length::<Self>(bytes.len()))
                }
            }
        }

        impl RawBytesULE<$size> {
            #[inline]
            pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
                let data = bytes.as_mut_ptr();
                let len = bytes.len() / $size;
                // Safe because Self is transparent over [u8; $size]
                unsafe { core::slice::from_raw_parts_mut(data as *mut Self, len) }
            }

            /// Gets this RawBytesULE as an unsigned int. This is equivalent to calling
            /// [AsULE::from_unaligned()] on the appropriately sized type.
            #[inline]
            pub fn as_unsigned_int(&self) -> $unsigned {
                <$unsigned as $crate::ule::AsULE>::from_unaligned(*self)
            }

            /// Convert an array of native-endian aligned integers to an array of RawBytesULE.
            pub const fn from_array<const N: usize>(arr: [$unsigned; N]) -> [Self; N] {
                let mut result = [RawBytesULE([0; $size]); N];
                let mut i = 0;
                // Won't panic because i < N and arr has length N
                #[allow(clippy::indexing_slicing)]
                while i < N {
                    result[i].0 = arr[i].to_le_bytes();
                    i += 1;
                }
                result
            }
        }
    };
}

macro_rules! impl_const_constructors {
    ($base:ty, $size:literal) => {
        impl ZeroSlice<$base> {
            /// This function can be used for constructing ZeroVecs in a const context, avoiding
            /// parsing checks.
            ///
            /// This cannot be generic over T because of current limitations in `const`, but if
            /// this method is needed in a non-const context, check out [`ZeroSlice::parse_byte_slice()`]
            /// instead.
            ///
            /// See [`ZeroSlice::cast()`] for an example.
            pub const fn try_from_bytes(bytes: &[u8]) -> Result<&Self, ZeroVecError> {
                let len = bytes.len();
                #[allow(clippy::modulo_one)]
                if len % $size == 0 {
                    Ok(unsafe { Self::from_bytes_unchecked(bytes) })
                } else {
                    Err(ZeroVecError::InvalidLength {
                        ty: concat!("<const construct: ", $size, ">"),
                        len,
                    })
                }
            }
        }
    };
}

macro_rules! impl_byte_slice_type {
    ($type:ty, $size:literal) => {
        impl From<$type> for RawBytesULE<$size> {
            #[inline]
            fn from(value: $type) -> Self {
                Self(value.to_le_bytes())
            }
        }
        impl AsULE for $type {
            type ULE = RawBytesULE<$size>;
            #[inline]
            fn to_unaligned(self) -> Self::ULE {
                RawBytesULE(self.to_le_bytes())
            }
            #[inline]
            fn from_unaligned(unaligned: Self::ULE) -> Self {
                <$type>::from_le_bytes(unaligned.0)
            }
        }
        // EqULE is true because $type and RawBytesULE<$size>
        // have the same byte sequence on little-endian
        unsafe impl EqULE for $type {}
    };
}

impl_byte_slice_size!(u16, 2);
impl_byte_slice_size!(u32, 4);
impl_byte_slice_size!(u64, 8);
impl_byte_slice_size!(u128, 16);

impl_byte_slice_type!(u16, 2);
impl_byte_slice_type!(u32, 4);
impl_byte_slice_type!(u64, 8);
impl_byte_slice_type!(u128, 16);

impl_byte_slice_type!(i16, 2);
impl_byte_slice_type!(i32, 4);
impl_byte_slice_type!(i64, 8);
impl_byte_slice_type!(i128, 16);

impl_const_constructors!(u8, 1);
impl_const_constructors!(u16, 2);
impl_const_constructors!(u32, 4);
impl_const_constructors!(u64, 8);
impl_const_constructors!(u128, 16);

// Note: The f32 and f64 const constructors currently have limited use because
// `f32::to_le_bytes` is not yet const.

impl_const_constructors!(bool, 1);

// Safety (based on the safety checklist on the ULE trait):
//  1. u8 does not include any uninitialized or padding bytes.
//  2. u8 is aligned to 1 byte.
//  3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
//  4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
//  5. The other ULE methods use the default impl.
//  6. u8 byte equality is semantic equality
unsafe impl ULE for u8 {
    #[inline]
    fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> {
        Ok(())
    }
}

impl AsULE for u8 {
    type ULE = Self;
    #[inline]
    fn to_unaligned(self) -> Self::ULE {
        self
    }
    #[inline]
    fn from_unaligned(unaligned: Self::ULE) -> Self {
        unaligned
    }
}

// EqULE is true because u8 is its own ULE.
unsafe impl EqULE for u8 {}

// Safety (based on the safety checklist on the ULE trait):
//  1. NonZeroU8 does not include any uninitialized or padding bytes.
//  2. NonZeroU8 is aligned to 1 byte.
//  3. The impl of validate_byte_slice() returns an error if any byte is not valid (0x00).
//  4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
//  5. The other ULE methods use the default impl.
//  6. NonZeroU8 byte equality is semantic equality
unsafe impl ULE for NonZeroU8 {
    #[inline]
    fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
        bytes.iter().try_for_each(|b| {
            if *b == 0x00 {
                Err(ZeroVecError::parse::<Self>())
            } else {
                Ok(())
            }
        })
    }
}

impl AsULE for NonZeroU8 {
    type ULE = Self;
    #[inline]
    fn to_unaligned(self) -> Self::ULE {
        self
    }
    #[inline]
    fn from_unaligned(unaligned: Self::ULE) -> Self {
        unaligned
    }
}

unsafe impl EqULE for NonZeroU8 {}

impl NicheBytes<1> for NonZeroU8 {
    const NICHE_BIT_PATTERN: [u8; 1] = [0x00];
}

// Safety (based on the safety checklist on the ULE trait):
//  1. i8 does not include any uninitialized or padding bytes.
//  2. i8 is aligned to 1 byte.
//  3. The impl of validate_byte_slice() returns an error if any byte is not valid (never).
//  4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
//  5. The other ULE methods use the default impl.
//  6. i8 byte equality is semantic equality
unsafe impl ULE for i8 {
    #[inline]
    fn validate_byte_slice(_bytes: &[u8]) -> Result<(), ZeroVecError> {
        Ok(())
    }
}

impl AsULE for i8 {
    type ULE = Self;
    #[inline]
    fn to_unaligned(self) -> Self::ULE {
        self
    }
    #[inline]
    fn from_unaligned(unaligned: Self::ULE) -> Self {
        unaligned
    }
}

// EqULE is true because i8 is its own ULE.
unsafe impl EqULE for i8 {}

impl AsULE for NonZeroI8 {
    type ULE = NonZeroU8;
    #[inline]
    fn to_unaligned(self) -> Self::ULE {
        // Safety: NonZeroU8 and NonZeroI8 have same size
        unsafe { core::mem::transmute(self) }
    }

    #[inline]
    fn from_unaligned(unaligned: Self::ULE) -> Self {
        // Safety: NonZeroU8 and NonZeroI8 have same size
        unsafe { core::mem::transmute(unaligned) }
    }
}

// These impls are actually safe and portable due to Rust always using IEEE 754, see the documentation
// on f32::from_bits: https://doc.rust-lang.org/stable/std/primitive.f32.html#method.from_bits
//
// The only potential problem is that some older platforms treat signaling NaNs differently. This is
// still quite portable, signalingness is not typically super important.

impl AsULE for f32 {
    type ULE = RawBytesULE<4>;
    #[inline]
    fn to_unaligned(self) -> Self::ULE {
        self.to_bits().to_unaligned()
    }
    #[inline]
    fn from_unaligned(unaligned: Self::ULE) -> Self {
        Self::from_bits(u32::from_unaligned(unaligned))
    }
}

impl AsULE for f64 {
    type ULE = RawBytesULE<8>;
    #[inline]
    fn to_unaligned(self) -> Self::ULE {
        self.to_bits().to_unaligned()
    }
    #[inline]
    fn from_unaligned(unaligned: Self::ULE) -> Self {
        Self::from_bits(u64::from_unaligned(unaligned))
    }
}

// The from_bits documentation mentions that they have identical byte representations to integers
// and EqULE only cares about LE systems
unsafe impl EqULE for f32 {}
unsafe impl EqULE for f64 {}

// The bool impl is not as efficient as it could be
// We can, in the future, have https://github.com/unicode-org/icu4x/blob/main/utils/zerovec/design_doc.md#bitpacking
// for better bitpacking

// Safety (based on the safety checklist on the ULE trait):
//  1. bool does not include any uninitialized or padding bytes (the remaining 7 bytes in bool are by definition zero)
//  2. bool is aligned to 1 byte.
//  3. The impl of validate_byte_slice() returns an error if any byte is not valid (bytes that are not 0 or 1).
//  4. The impl of validate_byte_slice() returns an error if there are leftover bytes (never).
//  5. The other ULE methods use the default impl.
//  6. bool byte equality is semantic equality
unsafe impl ULE for bool {
    #[inline]
    fn validate_byte_slice(bytes: &[u8]) -> Result<(), ZeroVecError> {
        for byte in bytes {
            // https://doc.rust-lang.org/reference/types/boolean.html
            // Rust booleans are always size 1, align 1 values with valid bit patterns 0x0 or 0x1
            if *byte > 1 {
                return Err(ZeroVecError::parse::<Self>());
            }
        }
        Ok(())
    }
}

impl AsULE for bool {
    type ULE = Self;
    #[inline]
    fn to_unaligned(self) -> Self::ULE {
        self
    }
    #[inline]
    fn from_unaligned(unaligned: Self::ULE) -> Self {
        unaligned
    }
}

// EqULE is true because bool is its own ULE.
unsafe impl EqULE for bool {}