use std::mem::transmute;
use crate::S;
use packed_seq::Delay;
use wide::{CmpGt, i32x8, u32x8};
pub fn canonical_mapper_scalar(l: usize) -> (Delay, impl FnMut((u8, u8)) -> bool) {
assert!(
l % 2 == 1,
"Window length l={l} must be odd to guarantee canonicality"
);
let mut cnt = -(l as isize);
(
Delay(l - 1),
#[inline(always)]
move |(a, r)| {
cnt += (a & 2) as isize;
let out = cnt > 0;
cnt -= (r & 2) as isize;
out
},
)
}
#[inline(always)]
pub fn canonical_mapper_simd(l: usize) -> (Delay, impl FnMut((S, S)) -> u32x8) {
assert!(
l % 2 == 1,
"Window length l={l} must be odd to guarantee canonicality"
);
let mut cnt = i32x8::splat(-(l as i32));
let two = i32x8::splat(2);
(
Delay(l - 1),
#[inline(always)]
move |(a, r)| {
cnt += unsafe { transmute::<_, i32x8>(a) } & two;
let out = unsafe { transmute::<_, u32x8>(cnt.cmp_gt(i32x8::ZERO)) };
cnt -= unsafe { transmute::<_, i32x8>(r) } & two;
out
},
)
}