#[inline(always)]
pub(crate) unsafe fn count_forward(ip: *const u8, match_ptr: *const u8, iend: *const u8) -> usize {
let p_start = ip;
let mut ip = ip;
let mut m = match_ptr;
const CHUNK_SIZE: usize = core::mem::size_of::<usize>();
while (ip as usize) + CHUNK_SIZE <= (iend as usize) {
let a = unsafe { core::ptr::read_unaligned(ip.cast::<usize>()) };
let b = unsafe { core::ptr::read_unaligned(m.cast::<usize>()) };
let diff = a ^ b;
if diff != 0 {
#[cfg(target_endian = "little")]
let common = (diff.trailing_zeros() / 8) as usize;
#[cfg(target_endian = "big")]
let common = (diff.leading_zeros() / 8) as usize;
return unsafe { (ip.add(common) as usize) - (p_start as usize) };
}
unsafe {
ip = ip.add(CHUNK_SIZE);
m = m.add(CHUNK_SIZE);
}
}
#[cfg(target_pointer_width = "64")]
if (ip as usize) + 4 <= (iend as usize) {
let a = unsafe { core::ptr::read_unaligned(ip.cast::<u32>()) };
let b = unsafe { core::ptr::read_unaligned(m.cast::<u32>()) };
if a == b {
unsafe {
ip = ip.add(4);
m = m.add(4);
}
}
}
if (ip as usize) + 2 <= (iend as usize) {
let a = unsafe { core::ptr::read_unaligned(ip.cast::<u16>()) };
let b = unsafe { core::ptr::read_unaligned(m.cast::<u16>()) };
if a == b {
unsafe {
ip = ip.add(2);
m = m.add(2);
}
}
}
if (ip as usize) < (iend as usize) {
let a = unsafe { *ip };
let b = unsafe { *m };
if a == b {
unsafe {
ip = ip.add(1);
}
}
}
(ip as usize) - (p_start as usize)
}
#[inline]
pub(crate) fn count_forward_dict_2segment(
dict: &[u8],
cand: usize,
inp: &[u8],
cur: usize,
) -> usize {
assert!(
cand < dict.len(),
"count_forward_dict_2segment requires cand ({cand}) < dict.len() ({})",
dict.len(),
);
assert!(
cur <= inp.len(),
"count_forward_dict_2segment requires cur ({cur}) <= inp.len() ({})",
inp.len(),
);
let dict_len = dict.len();
let inp_len = inp.len();
let cur_avail = inp_len - cur;
if cur_avail == 0 {
return 0;
}
let seg1 = (dict_len - cand).min(cur_avail);
let m1 = unsafe {
count_forward(
inp.as_ptr().add(cur),
dict.as_ptr().add(cand),
inp.as_ptr().add(cur + seg1),
)
};
if m1 < seg1 || seg1 == cur_avail {
return m1;
}
let cur2 = cur + m1;
let m2 = unsafe {
count_forward(
inp.as_ptr().add(cur2),
inp.as_ptr(),
inp.as_ptr().add(inp_len),
)
};
m1 + m2
}
#[cfg(test)]
mod tests {
use super::*;
fn count(a: &[u8], b: &[u8]) -> usize {
let min_len = a.len().min(b.len());
unsafe { count_forward(a.as_ptr(), b.as_ptr(), a.as_ptr().add(min_len)) }
}
#[test]
fn empty_inputs_return_zero() {
let a: [u8; 0] = [];
let b: [u8; 0] = [];
let n = unsafe { count_forward(a.as_ptr(), b.as_ptr(), a.as_ptr()) };
assert_eq!(n, 0);
}
#[test]
fn full_match_inside_8_byte_chunk() {
let a = [1, 2, 3, 4, 5, 6, 7, 8];
let b = [1, 2, 3, 4, 5, 6, 7, 8];
assert_eq!(count(&a, &b), 8);
}
#[test]
fn diff_at_byte_3_in_first_chunk() {
let a = [1, 2, 3, 9, 5, 6, 7, 8];
let b = [1, 2, 3, 4, 5, 6, 7, 8];
assert_eq!(count(&a, &b), 3);
}
#[test]
fn match_spanning_two_chunks() {
let mut a = [0u8; 16];
let mut b = [0u8; 16];
for i in 0..16 {
a[i] = i as u8;
b[i] = i as u8;
}
a[13] = 99;
assert_eq!(count(&a, &b), 13);
}
#[test]
fn match_terminates_at_iend_within_tail() {
let a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
let b = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
assert_eq!(count(&a, &b), 11);
}
#[test]
fn diff_in_u32_tail() {
let a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 99, 11, 12];
let b = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12];
assert_eq!(count(&a, &b), 9);
}
#[test]
fn diff_in_u16_tail_after_u32_match() {
let a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 99, 14];
let b = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14];
assert_eq!(count(&a, &b), 12);
}
#[test]
fn diff_in_single_byte_tail() {
let a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 99];
let b = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13];
assert_eq!(count(&a, &b), 12);
}
#[test]
fn long_match_thousand_bytes() {
let a = [0x5Au8; 1024];
let b = [0x5Au8; 1024];
assert_eq!(count(&a, &b), 1024);
}
#[test]
fn no_match_first_byte() {
let a = [0u8, 1, 2, 3, 4, 5, 6, 7];
let b = [9u8, 1, 2, 3, 4, 5, 6, 7];
assert_eq!(count(&a, &b), 0);
}
#[test]
fn dict_2segment_within_dict_only() {
let dict = [10u8, 20, 30, 40];
let inp = [30u8, 40, 99];
assert_eq!(count_forward_dict_2segment(&dict, 2, &inp, 0), 2);
}
#[test]
fn dict_2segment_crosses_boundary_into_input() {
let dict = [1u8, 2, 3];
let inp = [1u8, 2, 3, 1, 2, 3, 9]; assert_eq!(count_forward_dict_2segment(&dict, 0, &inp, 3), 3);
}
#[test]
fn dict_2segment_continues_word_at_a_time_past_boundary() {
let dict = [1u8, 2, 3];
let inp = [1u8, 2, 3, 1, 2, 3, 1, 2]; assert_eq!(count_forward_dict_2segment(&dict, 0, &inp, 3), 5);
}
#[test]
fn dict_2segment_stops_at_input_end() {
let dict = [7u8, 7];
let inp = [7u8, 7, 7, 7]; assert_eq!(count_forward_dict_2segment(&dict, 0, &inp, 2), 2);
}
}