use crate::mask8x8;
#[allow(non_camel_case_types)]
#[repr(transparent)]
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct u8x8 {
pub(crate) n: u64,
}
impl u8x8 {
pub const ZEROES: Self = Self::new(0);
#[inline(always)]
pub const fn from_array(a: [u8; 8]) -> Self {
Self {
n: u64::from_ne_bytes(a),
}
}
#[inline(always)]
pub fn from_byte_slice<'a>(s: &'a [u8]) -> (&'a [u8], &'a [Self], &'a [u8]) {
const ALIGN: usize = core::mem::align_of::<u8x8>();
let addr = s.as_ptr().cast::<u8>() as usize;
let prior_count = core::cmp::min(addr.next_multiple_of(ALIGN) - addr, s.len());
let (prior, remain) = s.split_at(prior_count);
if remain.is_empty() {
let empty = unsafe { core::slice::from_raw_parts(core::ptr::dangling::<Self>(), 0) };
return (prior, empty, remain);
}
let s = remain; let u8x8_len = s.len() / 8;
let use_len = u8x8_len * 8;
let (for_u8u8, remain) = s.split_at(use_len);
let ptr = for_u8u8.as_ptr().cast::<Self>();
let u8x8s = unsafe { core::slice::from_raw_parts(ptr, u8x8_len) };
(prior, u8x8s, remain)
}
#[inline(always)]
pub fn from_byte_slice_mut<'a>(
s: &'a mut [u8],
) -> (&'a mut [u8], &'a mut [Self], &'a mut [u8]) {
const ALIGN: usize = core::mem::align_of::<u8x8>();
let addr = s.as_ptr().cast::<u8>() as usize;
let prior_count = core::cmp::min(addr.next_multiple_of(ALIGN) - addr, s.len());
let (prior, remain) = s.split_at_mut(prior_count);
if remain.is_empty() {
let empty =
unsafe { core::slice::from_raw_parts_mut(core::ptr::dangling_mut::<Self>(), 0) };
return (prior, empty, remain);
}
let s = remain; let u8x8_len = s.len() / 8;
let use_len = u8x8_len * 8;
let (for_u8u8, remain) = s.split_at_mut(use_len);
let ptr = for_u8u8.as_mut_ptr().cast::<Self>();
let u8x8s = unsafe { core::slice::from_raw_parts_mut(ptr, u8x8_len) };
(prior, u8x8s, remain)
}
#[inline(always)]
pub(crate) const fn new(n: u64) -> Self {
Self { n }
}
#[inline(always)]
pub const fn splat(v: u8) -> Self {
Self::new(v as u64 * ALL_ONES)
}
#[inline(always)]
pub const fn to_array(self) -> [u8; 8] {
self.n.to_ne_bytes()
}
#[inline(always)]
pub const fn complement(self) -> Self {
Self::new(!self.n)
}
#[inline(always)]
pub const fn bitor(self, other: Self) -> Self {
Self::new(self.n | other.n)
}
#[inline(always)]
pub const fn bitand(self, other: Self) -> Self {
Self::new(self.n & other.n)
}
#[inline(always)]
pub const fn bitxor(self, other: Self) -> Self {
Self::new(self.n ^ other.n)
}
#[inline(always)]
pub const fn equals(self, other: Self) -> mask8x8 {
let xo = self.n ^ other.n;
let lo = ((xo & WITHOUT_HIGH_BITS) + WITHOUT_HIGH_BITS) | xo;
let hi = !lo & ONLY_HIGH_BITS;
mask8x8::new(hi >> 7)
}
#[inline(always)]
pub const fn less_than(self, other: Self) -> mask8x8 {
let diff = (self.n | ONLY_HIGH_BITS).wrapping_sub(other.n & !ONLY_HIGH_BITS);
let select =
((self.n & (self.n ^ other.n)) | (diff & !(self.n ^ other.n))) & ONLY_HIGH_BITS;
let inv = select ^ ONLY_HIGH_BITS;
mask8x8::new(inv >> 7)
}
#[inline(always)]
pub const fn greater_than(self, other: Self) -> mask8x8 {
let diff = (other.n | ONLY_HIGH_BITS).wrapping_sub(self.n & !ONLY_HIGH_BITS);
let select =
((other.n & (other.n ^ self.n)) | (diff & !(other.n ^ self.n))) & ONLY_HIGH_BITS;
let inv = select ^ ONLY_HIGH_BITS;
mask8x8::new(inv >> 7)
}
#[inline(always)]
pub const fn wrapping_add(self, other: Self) -> Self {
let low = (self.n & WITHOUT_HIGH_BITS).wrapping_add(other.n & WITHOUT_HIGH_BITS);
Self::new(low ^ ((self.n ^ other.n) & ONLY_HIGH_BITS))
}
#[inline(always)]
pub const fn saturating_add(self, other: Self) -> Self {
let sum = self.wrapping_add(other).n;
let carry = ((self.n & other.n) | ((self.n | other.n) & !sum)) & ONLY_HIGH_BITS;
Self::new(sum | msb_mask(carry))
}
#[inline(always)]
pub const fn reduce_sum(self) -> u64 {
const ALT_8S: u64 = 0x00ff00ff00ff00ff;
const ALT_16S: u64 = 0x0000ffff0000ffff;
const ALT_32S: u64 = 0x00000000ffffffff;
let mut raw = self.n;
raw = (raw & ALT_8S) + ((raw >> 8) & ALT_8S);
raw = (raw & ALT_16S) + ((raw >> 16) & ALT_16S);
raw = (raw & ALT_32S) + ((raw >> 32) & ALT_32S);
raw
}
#[inline(always)]
pub const fn wrapping_sub(self, other: Self) -> Self {
Self::new(
(self.n | ONLY_HIGH_BITS).wrapping_sub(other.n & WITHOUT_HIGH_BITS)
^ ((self.n ^ !other.n) & ONLY_HIGH_BITS),
)
}
#[inline(always)]
pub const fn saturating_sub(self, other: Self) -> Self {
let diff = self.wrapping_sub(other).n;
let borrow = ((!self.n & other.n) | ((!self.n | other.n) & diff)) & ONLY_HIGH_BITS;
Self::new(diff & !msb_mask(borrow))
}
#[inline(always)]
pub const fn abs_difference(self, other: Self) -> Self {
let diff = self.n.wrapping_sub(other.n);
let borrow = ((!self.n & other.n) | ((!self.n | other.n) & diff)) & ONLY_HIGH_BITS;
let msb_mask = msb_mask(borrow);
let lo = (self.n & !msb_mask) | (other.n & msb_mask);
let hi = (self.n & msb_mask) | (other.n & !msb_mask);
Self::new(
((lo | ONLY_HIGH_BITS) - (hi & WITHOUT_HIGH_BITS)) ^ ((lo ^ !hi) & ONLY_HIGH_BITS),
)
}
#[inline(always)]
pub const fn max(self, other: Self) -> Self {
let diff = self.n.wrapping_sub(other.n);
let borrow = ((!self.n & other.n) | ((!self.n | other.n) & diff)) & ONLY_HIGH_BITS;
let msb_mask = msb_mask(borrow);
Self::new((self.n & !msb_mask) | (other.n & msb_mask))
}
#[inline(always)]
pub const fn min(self, other: Self) -> Self {
let diff = self.n.wrapping_sub(other.n);
let borrow = ((!self.n & other.n) | ((!self.n | other.n) & diff)) & ONLY_HIGH_BITS;
let msb_mask = msb_mask(borrow);
Self::new((self.n & msb_mask) | (other.n & !msb_mask))
}
#[inline(always)]
pub const fn mean(self, other: Self) -> Self {
let shared = self.n & other.n;
let diff = (self.n ^ other.n) & 0xfefefefefefefefe;
Self::new(shared + (diff >> 1))
}
#[inline(always)]
pub const fn popcount(self) -> Self {
let a = self.n - ((self.n >> 1) & 0x5555555555555555);
let b = (a & 0x3333333333333333).wrapping_add((a >> 2) & 0x3333333333333333);
Self::new((b + (b >> 4)) & 0x0f0f0f0f0f0f0f0f)
}
}
impl core::ops::Not for u8x8 {
type Output = Self;
fn not(self) -> Self {
self.complement()
}
}
impl core::ops::BitOr for u8x8 {
type Output = Self;
fn bitor(self, rhs: Self) -> Self {
self.bitor(rhs)
}
}
impl core::ops::BitOrAssign for u8x8 {
fn bitor_assign(&mut self, rhs: Self) {
*self = self.bitor(rhs);
}
}
impl core::ops::BitAnd for u8x8 {
type Output = Self;
fn bitand(self, rhs: Self) -> Self {
self.bitand(rhs)
}
}
impl core::ops::BitAndAssign for u8x8 {
fn bitand_assign(&mut self, rhs: Self) {
*self = self.bitand(rhs);
}
}
impl core::ops::BitXor for u8x8 {
type Output = Self;
fn bitxor(self, rhs: Self) -> Self {
self.bitxor(rhs)
}
}
impl core::ops::BitXorAssign for u8x8 {
fn bitxor_assign(&mut self, rhs: Self) {
*self = self.bitxor(rhs);
}
}
impl core::ops::Add for u8x8 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: Self) -> Self {
self.wrapping_add(rhs)
}
}
impl core::ops::Add<u8> for u8x8 {
type Output = Self;
#[inline(always)]
fn add(self, rhs: u8) -> Self {
self.wrapping_add(Self::splat(rhs))
}
}
impl core::ops::AddAssign for u8x8 {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
*self = self.wrapping_add(rhs);
}
}
impl core::ops::AddAssign<u8> for u8x8 {
#[inline(always)]
fn add_assign(&mut self, rhs: u8) {
*self = self.wrapping_add(Self::splat(rhs));
}
}
impl core::ops::Sub for u8x8 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: Self) -> Self::Output {
self.wrapping_sub(rhs)
}
}
impl core::ops::Sub<u8> for u8x8 {
type Output = Self;
#[inline(always)]
fn sub(self, rhs: u8) -> Self {
self.wrapping_sub(Self::splat(rhs))
}
}
impl core::ops::SubAssign for u8x8 {
#[inline(always)]
fn sub_assign(&mut self, rhs: Self) {
*self = self.wrapping_sub(rhs);
}
}
impl core::ops::SubAssign<u8> for u8x8 {
#[inline(always)]
fn sub_assign(&mut self, rhs: u8) {
*self = self.wrapping_sub(Self::splat(rhs));
}
}
impl IntoIterator for u8x8 {
type Item = u8;
type IntoIter = core::array::IntoIter<u8, 8>;
#[inline(always)]
fn into_iter(self) -> Self::IntoIter {
self.to_array().into_iter()
}
}
impl core::fmt::Debug for u8x8 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_tuple("u8x8").field(&self.to_array()).finish()
}
}
pub(crate) const ALL_ONES: u64 = 0x0101010101010101;
const WITHOUT_HIGH_BITS: u64 = 0x7f7f7f7f7f7f7f7f;
const ONLY_HIGH_BITS: u64 = 0x8080808080808080;
#[inline(always)]
const fn msb_mask(n: u64) -> u64 {
(n >> 7) * 255
}