#[inline]
pub fn node16_find_byte(keys: &[u8; 16], count: u8, byte: u8) -> Option<u8> {
#[cfg(target_arch = "x86_64")]
unsafe {
x86::find_byte_in_16(keys.as_ptr(), count, byte)
}
#[cfg(target_arch = "aarch64")]
unsafe {
arm::find_byte_in_16(keys.as_ptr(), count, byte)
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
node16_find_byte_scalar(keys, count, byte)
}
}
#[cfg(any(test, not(any(target_arch = "x86_64", target_arch = "aarch64"))))]
#[inline]
pub(crate) fn node16_find_byte_scalar(keys: &[u8; 16], count: u8, byte: u8) -> Option<u8> {
let n = (count as usize).min(16);
let mut i = 0;
while i < n {
if keys[i] == byte {
return Some(i as u8);
}
i += 1;
}
None
}
#[inline]
pub fn longest_common_prefix(a: &[u8], b: &[u8]) -> usize {
let limit = a.len().min(b.len());
let mut i = 0;
#[cfg(target_arch = "x86_64")]
if limit >= 64 && x86::avx2_available() {
while i + 32 <= limit {
let mask = unsafe { x86::cmp_32_bytes_bitmask(a[i..].as_ptr(), b[i..].as_ptr()) };
if mask != u32::MAX {
return i + mask.trailing_ones() as usize;
}
i += 32;
}
}
#[cfg(target_arch = "x86_64")]
while i + 16 <= limit {
let mask = unsafe { x86::cmp_16_bytes_bitmask(a[i..].as_ptr(), b[i..].as_ptr()) };
if mask != 0xFFFF {
return i + mask.trailing_ones() as usize;
}
i += 16;
}
#[cfg(target_arch = "aarch64")]
while i + 16 <= limit {
let mask = unsafe { arm::cmp_16_bytes_nibble(a[i..].as_ptr(), b[i..].as_ptr()) };
if mask != u64::MAX {
return i + (mask.trailing_ones() / 4) as usize;
}
i += 16;
}
while i < limit && a[i] == b[i] {
i += 1;
}
i
}
#[inline]
pub fn find_byte(bytes: &[u8], needle: u8, start: usize) -> Option<usize> {
let len = bytes.len();
if start >= len {
return None;
}
let mut i = start;
#[cfg(target_arch = "x86_64")]
if len - i >= 64 && x86::avx2_available() {
while i + 32 <= len {
let ptr = unsafe { bytes.as_ptr().add(i) };
let mask = unsafe { x86::cmp_byte_eq_mask_32(ptr, needle) };
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 32;
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
while i + 16 <= len {
let ptr = unsafe { bytes.as_ptr().add(i) };
let mask = unsafe { byte_eq_mask_16(ptr, needle) };
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 16;
}
while i < len {
if bytes[i] == needle {
return Some(i);
}
i += 1;
}
None
}
#[inline]
pub fn find_next_nonzero_byte(bytes: &[u8], start: usize) -> Option<usize> {
let len = bytes.len();
if start >= len {
return None;
}
let mut i = start;
#[cfg(target_arch = "x86_64")]
if len - i >= 64 && x86::avx2_available() {
while i + 32 <= len {
let ptr = unsafe { bytes.as_ptr().add(i) };
let mask = unsafe { x86::cmp_byte_neq_zero_mask_32(ptr) };
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 32;
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
while i + 16 <= len {
let ptr = unsafe { bytes.as_ptr().add(i) };
let mask = unsafe { nonzero_byte_mask_16(ptr) };
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 16;
}
while i < len {
if bytes[i] != 0 {
return Some(i);
}
i += 1;
}
None
}
#[inline]
pub fn find_next_nonzero_u32(words: &[u32], start: usize) -> Option<usize> {
let len = words.len();
if start >= len {
return None;
}
let mut i = start;
#[cfg(target_arch = "x86_64")]
if len - i >= 16 && x86::avx2_available() {
while i + 8 <= len {
let ptr = unsafe { words.as_ptr().add(i) };
let mask = unsafe { x86::cmp_u32_neq_zero_mask_8(ptr) };
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 8;
}
}
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
while i + 4 <= len {
let ptr = unsafe { words.as_ptr().add(i) };
let mask = unsafe { nonzero_u32_mask_4(ptr) };
if mask != 0 {
return Some(i + mask.trailing_zeros() as usize);
}
i += 4;
}
while i < len {
if words[i] != 0 {
return Some(i);
}
i += 1;
}
None
}
#[cfg(target_arch = "x86_64")]
#[inline]
unsafe fn byte_eq_mask_16(ptr: *const u8, needle: u8) -> u32 {
unsafe { x86::cmp_byte_eq_mask_16(ptr, needle) }
}
#[cfg(target_arch = "x86_64")]
#[inline]
unsafe fn nonzero_byte_mask_16(ptr: *const u8) -> u32 {
unsafe { x86::cmp_byte_neq_zero_mask_16(ptr) }
}
#[cfg(target_arch = "x86_64")]
#[inline]
unsafe fn nonzero_u32_mask_4(ptr: *const u32) -> u32 {
unsafe { x86::cmp_u32_neq_zero_mask_4(ptr) }
}
#[cfg(target_arch = "aarch64")]
#[inline]
unsafe fn byte_eq_mask_16(ptr: *const u8, needle: u8) -> u32 {
unsafe { arm::cmp_byte_eq_mask_16(ptr, needle) }
}
#[cfg(target_arch = "aarch64")]
#[inline]
unsafe fn nonzero_byte_mask_16(ptr: *const u8) -> u32 {
unsafe { arm::cmp_byte_neq_zero_mask_16(ptr) }
}
#[cfg(target_arch = "aarch64")]
#[inline]
unsafe fn nonzero_u32_mask_4(ptr: *const u32) -> u32 {
unsafe { arm::cmp_u32_neq_zero_mask_4(ptr) }
}
#[cfg(target_arch = "x86_64")]
mod x86 {
use std::arch::x86_64::{
__m128i, __m256i, _mm256_cmpeq_epi32, _mm256_cmpeq_epi8, _mm256_loadu_si256,
_mm256_movemask_epi8, _mm256_movemask_ps, _mm256_set1_epi8, _mm256_setzero_si256,
_mm_cmpeq_epi32, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_movemask_ps,
_mm_set1_epi8, _mm_setzero_si128,
};
#[inline]
pub(super) fn avx2_available() -> bool {
cfg!(target_feature = "avx2") || std::arch::is_x86_feature_detected!("avx2")
}
#[inline]
pub(super) unsafe fn cmp_16_bytes_bitmask(a: *const u8, b: *const u8) -> u32 {
let va = unsafe { _mm_loadu_si128(a.cast::<__m128i>()) };
let vb = unsafe { _mm_loadu_si128(b.cast::<__m128i>()) };
let cmp = _mm_cmpeq_epi8(va, vb);
_mm_movemask_epi8(cmp) as u32
}
#[target_feature(enable = "avx2")]
#[inline]
pub(super) unsafe fn cmp_32_bytes_bitmask(a: *const u8, b: *const u8) -> u32 {
let va = unsafe { _mm256_loadu_si256(a.cast::<__m256i>()) };
let vb = unsafe { _mm256_loadu_si256(b.cast::<__m256i>()) };
let cmp = _mm256_cmpeq_epi8(va, vb);
_mm256_movemask_epi8(cmp) as u32
}
#[inline]
pub(super) unsafe fn cmp_byte_eq_mask_16(ptr: *const u8, needle: u8) -> u32 {
let vec = unsafe { _mm_loadu_si128(ptr.cast::<__m128i>()) };
let needle = _mm_set1_epi8(needle as i8);
let cmp = _mm_cmpeq_epi8(vec, needle);
_mm_movemask_epi8(cmp) as u32
}
#[target_feature(enable = "avx2")]
#[inline]
pub(super) unsafe fn cmp_byte_eq_mask_32(ptr: *const u8, needle: u8) -> u32 {
let vec = unsafe { _mm256_loadu_si256(ptr.cast::<__m256i>()) };
let needle = _mm256_set1_epi8(needle as i8);
let cmp = _mm256_cmpeq_epi8(vec, needle);
_mm256_movemask_epi8(cmp) as u32
}
#[inline]
pub(super) unsafe fn find_byte_in_16(keys: *const u8, count: u8, byte: u8) -> Option<u8> {
let vec = unsafe { _mm_loadu_si128(keys.cast::<__m128i>()) };
let needle = _mm_set1_epi8(byte as i8);
let cmp = _mm_cmpeq_epi8(vec, needle);
let mask = _mm_movemask_epi8(cmp) as u32;
let count_mask = if count >= 16 {
0xFFFF
} else {
(1u32 << count) - 1
};
let masked = mask & count_mask;
if masked == 0 {
None
} else {
Some(masked.trailing_zeros() as u8)
}
}
#[inline]
pub(super) unsafe fn cmp_byte_neq_zero_mask_16(ptr: *const u8) -> u32 {
let vec = unsafe { _mm_loadu_si128(ptr.cast::<__m128i>()) };
let zero = _mm_setzero_si128();
let cmp_eq_zero = _mm_cmpeq_epi8(vec, zero); let zero_mask = _mm_movemask_epi8(cmp_eq_zero) as u32;
(!zero_mask) & 0xFFFF
}
#[target_feature(enable = "avx2")]
#[inline]
pub(super) unsafe fn cmp_byte_neq_zero_mask_32(ptr: *const u8) -> u32 {
let vec = unsafe { _mm256_loadu_si256(ptr.cast::<__m256i>()) };
let zero = _mm256_setzero_si256();
let cmp_eq_zero = _mm256_cmpeq_epi8(vec, zero);
let zero_mask = _mm256_movemask_epi8(cmp_eq_zero) as u32;
!zero_mask
}
#[inline]
pub(super) unsafe fn cmp_u32_neq_zero_mask_4(ptr: *const u32) -> u32 {
let vec = unsafe { _mm_loadu_si128(ptr.cast::<__m128i>()) };
let zero = _mm_setzero_si128();
let cmp_eq_zero = _mm_cmpeq_epi32(vec, zero);
let as_ps =
unsafe { std::mem::transmute::<__m128i, std::arch::x86_64::__m128>(cmp_eq_zero) };
let zero_mask = (_mm_movemask_ps(as_ps) as u32) & 0xF;
(!zero_mask) & 0xF
}
#[target_feature(enable = "avx2")]
#[inline]
pub(super) unsafe fn cmp_u32_neq_zero_mask_8(ptr: *const u32) -> u32 {
let vec = unsafe { _mm256_loadu_si256(ptr.cast::<__m256i>()) };
let zero = _mm256_setzero_si256();
let cmp_eq_zero = _mm256_cmpeq_epi32(vec, zero);
let as_ps =
unsafe { std::mem::transmute::<__m256i, std::arch::x86_64::__m256>(cmp_eq_zero) };
let zero_mask = (_mm256_movemask_ps(as_ps) as u32) & 0xFF;
(!zero_mask) & 0xFF
}
}
#[cfg(target_arch = "aarch64")]
mod arm {
use std::arch::aarch64::{
uint8x16_t, vceqq_u32, vceqq_u8, vdupq_n_u32, vdupq_n_u8, vget_lane_u64, vld1q_u32,
vld1q_u8, vmvnq_u32, vmvnq_u8, vreinterpret_u64_u8, vreinterpretq_u16_u8,
vreinterpretq_u8_u32, vshrn_n_u16,
};
#[inline]
unsafe fn byte_mask_to_nibble_u64(cmp: uint8x16_t) -> u64 {
let narrow = vshrn_n_u16::<4>(vreinterpretq_u16_u8(cmp));
vget_lane_u64::<0>(vreinterpret_u64_u8(narrow))
}
#[inline]
fn nibble_mask_to_bitmask_16(nib: u64) -> u32 {
let mut x = nib & 0x1111_1111_1111_1111;
x = (x | (x >> 3)) & 0x0303_0303_0303_0303;
x = (x | (x >> 6)) & 0x000f_000f_000f_000f;
x = (x | (x >> 12)) & 0x0000_00ff_0000_00ff;
x = (x | (x >> 24)) & 0x0000_0000_0000_ffff;
x as u32
}
#[inline]
pub(super) unsafe fn cmp_16_bytes_nibble(a: *const u8, b: *const u8) -> u64 {
let va = unsafe { vld1q_u8(a) };
let vb = unsafe { vld1q_u8(b) };
let cmp = vceqq_u8(va, vb);
unsafe { byte_mask_to_nibble_u64(cmp) }
}
#[inline]
pub(super) unsafe fn find_byte_in_16(keys: *const u8, count: u8, byte: u8) -> Option<u8> {
let vec = unsafe { vld1q_u8(keys) };
let needle = vdupq_n_u8(byte);
let cmp = vceqq_u8(vec, needle);
let mask64 = unsafe { byte_mask_to_nibble_u64(cmp) };
let count_bits = (count.min(16) as u32) * 4;
let count_mask = if count_bits == 64 {
u64::MAX
} else {
(1u64 << count_bits) - 1
};
let masked = mask64 & count_mask;
if masked == 0 {
None
} else {
Some((masked.trailing_zeros() / 4) as u8)
}
}
#[inline]
pub(super) unsafe fn cmp_byte_eq_mask_16(ptr: *const u8, needle: u8) -> u32 {
let vec = unsafe { vld1q_u8(ptr) };
let needle = vdupq_n_u8(needle);
let cmp = vceqq_u8(vec, needle);
let nib = unsafe { byte_mask_to_nibble_u64(cmp) };
nibble_mask_to_bitmask_16(nib)
}
#[inline]
pub(super) unsafe fn cmp_byte_neq_zero_mask_16(ptr: *const u8) -> u32 {
let vec = unsafe { vld1q_u8(ptr) };
let zero = vdupq_n_u8(0);
let cmp_eq_zero = vceqq_u8(vec, zero);
let cmp_neq_zero = vmvnq_u8(cmp_eq_zero);
let nib = unsafe { byte_mask_to_nibble_u64(cmp_neq_zero) };
nibble_mask_to_bitmask_16(nib)
}
#[inline]
pub(super) unsafe fn cmp_u32_neq_zero_mask_4(ptr: *const u32) -> u32 {
let vec = unsafe { vld1q_u32(ptr) };
let zero = vdupq_n_u32(0);
let cmp_eq_zero = vceqq_u32(vec, zero);
let cmp_neq_zero = vmvnq_u32(cmp_eq_zero);
let as_bytes = vreinterpretq_u8_u32(cmp_neq_zero);
let nib = unsafe { byte_mask_to_nibble_u64(as_bytes) };
let byte_bits = nibble_mask_to_bitmask_16(nib);
nibble_mask_to_bitmask_16(u64::from(byte_bits & 0x1111)) & 0xF
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn find_byte_at_index_zero() {
let mut keys = [0u8; 16];
keys[0] = 0x42;
assert_eq!(node16_find_byte(&keys, 1, 0x42), Some(0));
}
#[test]
fn find_byte_at_last_valid_index() {
let mut keys = [0u8; 16];
keys[15] = 0xAB;
assert_eq!(node16_find_byte(&keys, 16, 0xAB), Some(15));
}
#[test]
fn find_byte_middle() {
let mut keys = [0u8; 16];
for (i, slot) in keys.iter_mut().enumerate().take(10) {
*slot = b'a' + i as u8;
}
assert_eq!(node16_find_byte(&keys, 10, b'f'), Some(5));
}
#[test]
fn find_byte_absent_returns_none() {
let mut keys = [0u8; 16];
for (i, slot) in keys.iter_mut().enumerate().take(8) {
*slot = b'a' + i as u8;
}
assert_eq!(node16_find_byte(&keys, 8, b'z'), None);
}
#[test]
fn find_byte_count_zero_returns_none() {
let keys = [0xAB; 16];
assert_eq!(node16_find_byte(&keys, 0, 0xAB), None);
}
#[test]
fn find_byte_ignores_unused_tail() {
let mut keys = [0u8; 16];
keys[10] = 0x77;
assert_eq!(node16_find_byte(&keys, 4, 0x77), None);
}
#[test]
fn find_byte_first_of_duplicates() {
let mut keys = [0u8; 16];
keys[3] = 0x55;
keys[7] = 0x55;
assert_eq!(node16_find_byte(&keys, 16, 0x55), Some(3));
}
#[test]
fn find_byte_matches_scalar_random() {
use std::collections::HashSet;
let mut state: u64 = 0xDEAD_BEEF_CAFE_BABE;
let next = |s: &mut u64| -> u8 {
*s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(*s >> 33) as u8
};
for _ in 0..1000 {
let count = next(&mut state) % 17; let mut keys = [0u8; 16];
let mut used = HashSet::new();
for k in keys.iter_mut().take(count as usize) {
loop {
let b = next(&mut state);
if used.insert(b) {
*k = b;
break;
}
}
}
let query = next(&mut state);
let got = node16_find_byte(&keys, count, query);
let expected = node16_find_byte_scalar(&keys, count, query);
assert_eq!(
got, expected,
"mismatch on keys={keys:?} count={count} q={query}"
);
}
}
#[test]
fn lcp_empty_inputs() {
assert_eq!(longest_common_prefix(b"", b""), 0);
assert_eq!(longest_common_prefix(b"abc", b""), 0);
assert_eq!(longest_common_prefix(b"", b"abc"), 0);
}
#[test]
fn lcp_identical() {
assert_eq!(longest_common_prefix(b"hello", b"hello"), 5);
}
#[test]
fn lcp_strict_prefix() {
assert_eq!(longest_common_prefix(b"abc", b"abcdef"), 3);
assert_eq!(longest_common_prefix(b"abcdef", b"abc"), 3);
}
#[test]
fn lcp_no_common() {
assert_eq!(longest_common_prefix(b"abc", b"xyz"), 0);
}
#[test]
fn lcp_divergence_at_boundary() {
let a = b"0123456789ABCDEFhello"; let b = b"0123456789ABCDEFworld"; assert_eq!(longest_common_prefix(a, b), 16);
}
#[test]
fn lcp_long_match_then_diverge_in_chunk() {
let a = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa01"; let b = b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa99"; assert_eq!(longest_common_prefix(a, b), 35);
}
#[test]
fn lcp_match_then_diverge_at_byte_15() {
let a = b"aaaaaaaaaaaaaaaXrest";
let b = b"aaaaaaaaaaaaaaaYrest";
assert_eq!(longest_common_prefix(a, b), 15);
}
#[test]
fn lcp_match_then_diverge_at_byte_16() {
let a = b"aaaaaaaaaaaaaaaaXrest";
let b = b"aaaaaaaaaaaaaaaaYrest";
assert_eq!(longest_common_prefix(a, b), 16);
}
fn scalar_find_byte(bytes: &[u8], needle: u8, start: usize) -> Option<usize> {
bytes
.iter()
.enumerate()
.skip(start)
.find(|(_, b)| **b == needle)
.map(|(i, _)| i)
}
#[test]
fn find_byte_respects_start_and_boundaries() {
let mut bytes = [b'a'; 96];
for pos in [0usize, 1, 15, 16, 17, 31, 32, 63, 64, 95] {
bytes.fill(b'a');
bytes[pos] = b'/';
assert_eq!(find_byte(&bytes, b'/', 0), Some(pos), "pos={pos}");
assert_eq!(find_byte(&bytes, b'/', pos), Some(pos), "pos={pos}");
if pos + 1 < bytes.len() {
assert_eq!(find_byte(&bytes, b'/', pos + 1), None, "pos={pos}");
}
}
}
#[test]
fn find_byte_random_matches_scalar() {
let mut state: u64 = 0xA11C_E55E_D15C_0DED;
let step = |s: &mut u64| -> u8 {
*s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(*s >> 33) as u8
};
for len in [0usize, 1, 3, 15, 16, 17, 31, 32, 33, 127, 255] {
let mut bytes = vec![0u8; len];
for b in &mut bytes {
*b = step(&mut state);
}
for _ in 0..32 {
let needle = step(&mut state);
let start = if len == 0 {
0
} else {
(step(&mut state) as usize) % (len + 1)
};
assert_eq!(
find_byte(&bytes, needle, start),
scalar_find_byte(&bytes, needle, start),
"len={len} start={start} needle={needle}",
);
}
}
}
fn scalar_next_nonzero_byte(bytes: &[u8], start: usize) -> Option<usize> {
bytes
.iter()
.enumerate()
.skip(start)
.find(|(_, b)| **b != 0)
.map(|(i, _)| i)
}
#[test]
fn find_next_nonzero_byte_empty() {
let bytes = [0u8; 256];
assert_eq!(find_next_nonzero_byte(&bytes, 0), None);
assert_eq!(find_next_nonzero_byte(&bytes, 100), None);
assert_eq!(find_next_nonzero_byte(&bytes, 256), None);
}
#[test]
fn find_next_nonzero_byte_at_chunk_boundaries() {
for pos in [0usize, 1, 15, 16, 17, 31, 32, 240, 254, 255] {
let mut bytes = [0u8; 256];
bytes[pos] = 0xAB;
assert_eq!(find_next_nonzero_byte(&bytes, 0), Some(pos), "pos={pos}");
assert_eq!(find_next_nonzero_byte(&bytes, pos), Some(pos), "pos={pos}");
if pos + 1 < 256 {
assert_eq!(find_next_nonzero_byte(&bytes, pos + 1), None, "pos+1={pos}");
}
}
}
#[test]
fn find_next_nonzero_byte_random_matches_scalar() {
let mut state: u64 = 0xCAFE_F00D_1234_5678;
let step = |s: &mut u64| -> u8 {
*s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(*s >> 33) as u8
};
for _ in 0..500 {
let mut bytes = [0u8; 256];
for b in &mut bytes {
if step(&mut state) < 13 {
*b = step(&mut state).max(1);
}
}
let start = (step(&mut state) as usize) % 257;
assert_eq!(
find_next_nonzero_byte(&bytes, start),
scalar_next_nonzero_byte(&bytes, start),
"start={start}",
);
}
}
fn scalar_next_nonzero_u32(words: &[u32], start: usize) -> Option<usize> {
words
.iter()
.enumerate()
.skip(start)
.find(|(_, w)| **w != 0)
.map(|(i, _)| i)
}
#[test]
fn find_next_nonzero_u32_empty() {
let words = [0u32; 256];
assert_eq!(find_next_nonzero_u32(&words, 0), None);
assert_eq!(find_next_nonzero_u32(&words, 100), None);
assert_eq!(find_next_nonzero_u32(&words, 256), None);
}
#[test]
fn find_next_nonzero_u32_at_chunk_boundaries() {
for pos in [0usize, 1, 3, 4, 5, 7, 8, 9, 240, 254, 255] {
let mut words = [0u32; 256];
words[pos] = 0xABCD_1234;
assert_eq!(find_next_nonzero_u32(&words, 0), Some(pos), "pos={pos}");
assert_eq!(find_next_nonzero_u32(&words, pos), Some(pos), "pos={pos}");
if pos + 1 < 256 {
assert_eq!(find_next_nonzero_u32(&words, pos + 1), None);
}
}
}
#[test]
fn find_next_nonzero_u32_random_matches_scalar() {
let mut state: u64 = 0xF00D_CAFE_8765_4321;
let step = |s: &mut u64| -> u32 {
*s = s
.wrapping_mul(6_364_136_223_846_793_005)
.wrapping_add(1_442_695_040_888_963_407);
(*s >> 32) as u32
};
for _ in 0..500 {
let mut words = [0u32; 256];
for w in &mut words {
if step(&mut state).trailing_zeros() >= 4 {
*w = step(&mut state).max(1);
}
}
let start = (step(&mut state) as usize) % 257;
assert_eq!(
find_next_nonzero_u32(&words, start),
scalar_next_nonzero_u32(&words, start),
"start={start}",
);
}
}
}