1#![doc = include_str!("../README.md")]
2#![no_std]
3#![cfg_attr(feature = "nightly", allow(internal_features), feature(core_intrinsics))]
4#![cfg_attr(feature = "nightly", feature(portable_simd))]
5
6#[cfg(not(feature = "nightly"))]
11#[allow(unused)]
12pub(crate) use core::convert::{identity as likely, identity as unlikely};
13#[cfg(feature = "nightly")]
14#[allow(unused)]
15pub(crate) use core::intrinsics::{likely, unlikely};
16
17const PAGE_SIZE: usize = 4096;
62
63#[inline(always)]
64unsafe fn same_page<const VECTOR_SIZE: usize>(slice: &[u8]) -> bool {
65 let address = slice.as_ptr() as usize;
66 let offset_within_page = address & (PAGE_SIZE - 1);
68 offset_within_page < PAGE_SIZE - VECTOR_SIZE
70}
71
72fn count_shared_reference(p: &[u8], q: &[u8]) -> usize {
74 p.iter().zip(q)
75 .take_while(|(x, y)| x == y).count()
76}
77
78#[cold]
79fn count_shared_cold(a: &[u8], b: &[u8]) -> usize {
80 count_shared_reference(a, b)
81}
82
83#[cfg(all(target_feature="avx2", not(miri)))]
84#[inline(always)]
85fn count_shared_avx2(p: &[u8], q: &[u8]) -> usize {
86 use core::arch::x86_64::*;
87 unsafe {
88 let pl = p.len();
89 let ql = q.len();
90 let max_shared = pl.min(ql);
91 if unlikely(max_shared == 0) { return 0 }
92 if likely(same_page::<32>(p) && same_page::<32>(q)) {
93 let pv = _mm256_loadu_si256(p.as_ptr() as _);
94 let qv = _mm256_loadu_si256(q.as_ptr() as _);
95 let ev = _mm256_cmpeq_epi8(pv, qv);
96 let ne = !(_mm256_movemask_epi8(ev) as u32);
97 let count = _tzcnt_u32(ne);
98 if count != 32 || max_shared < 33 {
99 (count as usize).min(max_shared)
100 } else {
101 let new_len = max_shared-32;
102 32 + count_shared_avx2(core::slice::from_raw_parts(p.as_ptr().add(32), new_len), core::slice::from_raw_parts(q.as_ptr().add(32), new_len))
103 }
104 } else {
105 count_shared_cold(p, q)
106 }
107 }
108}
109
110#[cfg(all(not(feature = "nightly"), target_arch = "aarch64", target_feature = "neon", not(miri)))]
111#[inline(always)]
112fn count_shared_neon(p: &[u8], q: &[u8]) -> usize {
113 use core::arch::aarch64::*;
114 unsafe {
115 let pl = p.len();
116 let ql = q.len();
117 let max_shared = pl.min(ql);
118 if unlikely(max_shared == 0) { return 0 }
119
120 if same_page::<16>(p) && same_page::<16>(q) {
121 let pv = vld1q_u8(p.as_ptr());
122 let qv = vld1q_u8(q.as_ptr());
123 let eq = vceqq_u8(pv, qv);
124
125 let mut bytes = [core::mem::MaybeUninit::<u8>::uninit(); 16];
138 vst1q_u8(bytes.as_mut_ptr().cast(), eq);
139 let scalar128 = u128::from_le_bytes(core::mem::transmute(bytes));
140 let count = scalar128.trailing_ones() / 8;
141
142 if count != 16 || max_shared < 17 {
143 (count as usize).min(max_shared)
144 } else {
145 let new_len = max_shared-16;
146 16 + count_shared_neon(core::slice::from_raw_parts(p.as_ptr().add(16), new_len), core::slice::from_raw_parts(q.as_ptr().add(16), new_len))
147 }
148 } else {
149 return count_shared_cold(p, q);
150 }
151 }
152}
153
154#[cfg(all(feature = "nightly", not(miri)))]
155#[inline(always)]
156fn count_shared_simd(p: &[u8], q: &[u8]) -> usize {
157 use core::simd::{u8x32, cmp::SimdPartialEq};
158 unsafe {
159 let pl = p.len();
160 let ql = q.len();
161 let max_shared = pl.min(ql);
162 if unlikely(max_shared == 0) { return 0 }
163 if same_page::<32>(p) && same_page::<32>(q) {
164 let mut p_array = [core::mem::MaybeUninit::<u8>::uninit(); 32];
165 core::ptr::copy_nonoverlapping(p.as_ptr().cast(), (&mut p_array).as_mut_ptr(), 32);
166 let pv = u8x32::from_array(core::mem::transmute(p_array));
167 let mut q_array = [core::mem::MaybeUninit::<u8>::uninit(); 32];
168 core::ptr::copy_nonoverlapping(q.as_ptr().cast(), (&mut q_array).as_mut_ptr(), 32);
169 let qv = u8x32::from_array(core::mem::transmute(q_array));
170 let ev = pv.simd_eq(qv);
171 let mask = ev.to_bitmask();
172 let count = mask.trailing_ones();
173 if count != 32 || max_shared < 33 {
174 (count as usize).min(max_shared)
175 } else {
176 let new_len = max_shared-32;
177 32 + count_shared_simd(core::slice::from_raw_parts(p.as_ptr().add(32), new_len), core::slice::from_raw_parts(q.as_ptr().add(32), new_len))
178 }
179 } else {
180 return count_shared_cold(p, q);
181 }
182 }
183}
184
185#[inline]
203pub fn find_prefix_overlap(a: &[u8], b: &[u8]) -> usize {
204 #[cfg(all(target_feature="avx2", not(miri)))]
205 {
206 count_shared_avx2(a, b)
207 }
208 #[cfg(all(not(feature = "nightly"), target_arch = "aarch64", target_feature = "neon", not(miri)))]
209 {
210 count_shared_neon(a, b)
211 }
212 #[cfg(all(feature = "nightly", target_arch = "aarch64", target_feature = "neon", not(miri)))]
213 {
214 count_shared_simd(a, b)
215 }
216 #[cfg(any(all(not(target_feature="avx2"), not(target_feature="neon")), miri))]
217 {
218 count_shared_reference(a, b)
219 }
220}
221
222#[test]
223fn find_prefix_overlap_test() {
224 let tests = [
225 ("12345", "67890", 0),
226 ("", "12300", 0),
227 ("12345", "", 0),
228 ("12345", "12300", 3),
229 ("123", "123000000", 3),
230 ("123456789012345678901234567890xxxx", "123456789012345678901234567890yy", 30),
231 ("123456789012345678901234567890123456789012345678901234567890xxxx", "123456789012345678901234567890123456789012345678901234567890yy", 60),
232 ("1234567890123456xxxx", "1234567890123456yyyyyyy", 16),
233 ("123456789012345xxxx", "123456789012345yyyyyyy", 15),
234 ("12345678901234567xxxx", "12345678901234567yyyyyyy", 17),
235 ("1234567890123456789012345678901xxxx", "1234567890123456789012345678901yy", 31),
236 ("12345678901234567890123456789012xxxx", "12345678901234567890123456789012yy", 32),
237 ("123456789012345678901234567890123xxxx", "123456789012345678901234567890123yy", 33),
238 ("123456789012345678901234567890123456789012345678901234567890123xxxx", "123456789012345678901234567890123456789012345678901234567890123yy", 63),
239 ("1234567890123456789012345678901234567890123456789012345678901234xxxx", "1234567890123456789012345678901234567890123456789012345678901234yy", 64),
240 ("12345678901234567890123456789012345678901234567890123456789012345xxxx", "12345678901234567890123456789012345678901234567890123456789012345yy", 65),
241 ];
242
243 for test in tests {
244 let overlap = find_prefix_overlap(test.0.as_bytes(), test.1.as_bytes());
245 assert_eq!(overlap, test.2);
246 }
247}
248
249#[inline(always)]
251pub fn starts_with(x: &[u8], y: &[u8]) -> bool {
252 if y.len() == 0 { return true }
253 if x.len() == 0 { return false }
254 if y.len() > x.len() { return false }
255 find_prefix_overlap(x, y) == y.len()
256}