use needle::*;
use haystack::{Hay, Span};
use std::cmp::{Ordering, max, min};
use std::usize;
use std::ops::Range;
type FastSkipByteset = u64;
trait FastSkipOptimization {
fn byteset_mask(&self) -> FastSkipByteset;
}
impl<T: ?Sized> FastSkipOptimization for T {
#[inline]
default fn byteset_mask(&self) -> FastSkipByteset { !0 }
}
impl FastSkipOptimization for u8 {
#[inline]
fn byteset_mask(&self) -> FastSkipByteset { 1 << (self & 63) }
}
trait MaximalSuffix: Sized {
fn maximal_suffix(arr: &[Self], order: Ordering) -> (usize, usize);
fn reverse_maximal_suffix(arr: &[Self], known_period: usize, order: Ordering) -> usize;
}
impl<T: PartialEq> MaximalSuffix for T {
default fn maximal_suffix(_: &[Self], _: Ordering) -> (usize, usize) {
(0, 1)
}
default fn reverse_maximal_suffix(_: &[Self], _: usize, _: Ordering) -> usize {
0
}
}
impl<T: Ord> MaximalSuffix for T {
fn maximal_suffix(arr: &[Self], order: Ordering) -> (usize, usize) {
let mut left = 0; let mut right = 1; let mut offset = 0; let mut period = 1;
while let Some(a) = arr.get(right + offset) {
let b = &arr[left + offset];
match a.cmp(b) {
Ordering::Equal => {
if offset + 1 == period {
right += offset + 1;
offset = 0;
} else {
offset += 1;
}
}
o if o == order => {
right += offset + 1;
offset = 0;
period = right - left;
}
_ => {
left = right;
right += 1;
offset = 0;
period = 1;
}
};
}
(left, period)
}
fn reverse_maximal_suffix(arr: &[Self], known_period: usize, order: Ordering) -> usize {
let mut left = 0; let mut right = 1; let mut offset = 0; let mut period = 1; let n = arr.len();
while right + offset < n {
let a = &arr[n - (1 + right + offset)];
let b = &arr[n - (1 + left + offset)];
match a.cmp(b) {
Ordering::Equal => {
if offset + 1 == period {
right += offset + 1;
offset = 0;
} else {
offset += 1;
}
}
o if o == order => {
right += offset + 1;
offset = 0;
period = right - left;
}
_ => {
left = right;
right += 1;
offset = 0;
period = 1;
}
}
if period == known_period {
break;
}
}
debug_assert!(period <= known_period);
left
}
}
struct LongPeriod;
struct ShortPeriod;
trait Period {
const IS_LONG_PERIOD: bool;
}
impl Period for LongPeriod {
const IS_LONG_PERIOD: bool = true;
}
impl Period for ShortPeriod {
const IS_LONG_PERIOD: bool = false;
}
#[derive(Debug)]
pub struct TwoWaySearcher<'p, T: 'p> {
crit_pos: usize,
crit_pos_back: usize,
period: usize,
byteset: FastSkipByteset,
needle: &'p [T],
memory: usize,
memory_back: usize,
}
impl<'p, T: 'p> Clone for TwoWaySearcher<'p, T> {
fn clone(&self) -> Self {
*self
}
}
impl<'p, T: 'p> Copy for TwoWaySearcher<'p, T> {}
impl<'p, T> TwoWaySearcher<'p, T>
where
T: PartialEq + 'p,
{
#[inline]
fn do_next<P: Period>(&mut self, hay: &[T], range: Range<usize>) -> Option<Range<usize>> {
let needle = self.needle;
let mut position = range.start;
'search: loop {
let i = position + (needle.len() - 1);
if i >= range.end {
return None;
}
let tail_item = unsafe { hay.get_unchecked(i) };
if !self.byteset_contains(tail_item) {
position += needle.len();
if !P::IS_LONG_PERIOD {
self.memory = 0;
}
continue 'search;
}
let start = if P::IS_LONG_PERIOD {
self.crit_pos
} else {
max(self.crit_pos, self.memory)
};
for i in start..needle.len() {
if unsafe { needle.get_unchecked(i) != hay.get_unchecked(position + i) } {
position += i - self.crit_pos + 1;
if !P::IS_LONG_PERIOD {
self.memory = 0;
}
continue 'search;
}
}
let start = if P::IS_LONG_PERIOD { 0 } else { self.memory };
for i in (start..self.crit_pos).rev() {
if unsafe { needle.get_unchecked(i) != hay.get_unchecked(position + i) } {
position += self.period;
if !P::IS_LONG_PERIOD {
self.memory = needle.len() - self.period;
}
continue 'search;
}
}
if !P::IS_LONG_PERIOD {
self.memory = 0; }
return Some(position..(position + needle.len()));
}
}
#[inline]
pub(crate) fn next(&mut self, hay: &[T], range: Range<usize>) -> Option<Range<usize>> {
if self.memory != usize::MAX {
self.do_next::<ShortPeriod>(hay, range)
} else {
self.do_next::<LongPeriod>(hay, range)
}
}
#[inline]
fn do_next_back<P: Period>(&mut self, hay: &[T], range: Range<usize>) -> Option<Range<usize>> {
let needle = self.needle;
let mut end = range.end;
'search: loop {
if needle.len() + range.start > end {
return None;
}
let front_item = unsafe { hay.get_unchecked(end.wrapping_sub(needle.len())) };
if !self.byteset_contains(front_item) {
end -= needle.len();
if !P::IS_LONG_PERIOD {
self.memory_back = needle.len();
}
continue 'search;
}
let crit = if P::IS_LONG_PERIOD {
self.crit_pos_back
} else {
min(self.crit_pos_back, self.memory_back)
};
for i in (0..crit).rev() {
if unsafe { needle.get_unchecked(i) != hay.get_unchecked(end - needle.len() + i) } {
end -= self.crit_pos_back - i;
if !P::IS_LONG_PERIOD {
self.memory_back = needle.len();
}
continue 'search;
}
}
let needle_end = if P::IS_LONG_PERIOD { needle.len() } else { self.memory_back };
for i in self.crit_pos_back..needle_end {
if unsafe { needle.get_unchecked(i) != hay.get_unchecked(end - needle.len() + i) } {
end -= self.period;
if !P::IS_LONG_PERIOD {
self.memory_back = self.period;
}
continue 'search;
}
}
if !P::IS_LONG_PERIOD {
self.memory_back = needle.len();
}
return Some((end - needle.len())..end);
}
}
#[inline]
pub(crate) fn next_back(&mut self, hay: &[T], range: Range<usize>) -> Option<Range<usize>> {
if self.memory != usize::MAX {
self.do_next_back::<ShortPeriod>(hay, range)
} else {
self.do_next_back::<LongPeriod>(hay, range)
}
}
#[inline]
pub(crate) fn new(needle: &'p [T]) -> Self {
let res_lt = T::maximal_suffix(needle, Ordering::Less);
let res_gt = T::maximal_suffix(needle, Ordering::Greater);
let (crit_pos, period) = max(res_lt, res_gt);
let byteset = Self::byteset_create(needle);
if needle[..crit_pos] == needle[period..(period + crit_pos)] {
let crit_pos_back = needle.len() - max(
T::reverse_maximal_suffix(needle, period, Ordering::Greater),
T::reverse_maximal_suffix(needle, period, Ordering::Less),
);
Self {
crit_pos,
crit_pos_back,
period,
byteset,
needle,
memory: 0,
memory_back: needle.len(),
}
} else {
Self {
crit_pos,
crit_pos_back: crit_pos,
period: max(crit_pos, needle.len() - crit_pos) + 1,
byteset,
needle,
memory: usize::MAX, memory_back: usize::MAX,
}
}
}
#[inline]
fn byteset_create(needle: &[T]) -> FastSkipByteset {
needle.iter().fold(0, |a, b| b.byteset_mask() | a)
}
#[inline]
fn byteset_contains(&self, item: &T) -> bool {
(self.byteset & item.byteset_mask()) != 0
}
}
unsafe impl<'p, T> Searcher<[T]> for TwoWaySearcher<'p, T>
where
T: PartialEq + 'p,
{
#[inline]
fn search(&mut self, span: Span<&[T]>) -> Option<Range<usize>> {
let (hay, range) = span.into_parts();
self.next(hay, range)
}
}
unsafe impl<'p, T> ReverseSearcher<[T]> for TwoWaySearcher<'p, T>
where
T: PartialEq + 'p,
{
#[inline]
fn rsearch(&mut self, span: Span<&[T]>) -> Option<Range<usize>> {
let (hay, range) = span.into_parts();
self.next_back(hay, range)
}
}
#[derive(Debug)]
pub struct NaiveSearcher<'p, T: 'p>(&'p [T]);
impl<'p, T: 'p> Clone for NaiveSearcher<'p, T> {
fn clone(&self) -> Self {
*self
}
}
impl<'p, T: 'p> Copy for NaiveSearcher<'p, T> {}
unsafe impl<'p, T> Consumer<[T]> for NaiveSearcher<'p, T>
where
T: PartialEq + 'p,
{
#[inline]
fn consume(&mut self, span: Span<&[T]>) -> Option<usize> {
let (hay, range) = span.into_parts();
let check_end = range.start + self.0.len();
if range.end < check_end {
return None;
}
if unsafe { hay.get_unchecked(range.start..check_end) } == self.0 {
Some(check_end)
} else {
None
}
}
}
unsafe impl<'p, T> ReverseConsumer<[T]> for NaiveSearcher<'p, T>
where
T: PartialEq + 'p,
{
#[inline]
fn rconsume(&mut self, span: Span<&[T]>) -> Option<usize> {
let (hay, range) = span.into_parts();
if range.start + self.0.len() > range.end {
return None;
}
let index = range.end - self.0.len();
if unsafe { hay.get_unchecked(index..range.end) } == self.0 {
Some(index)
} else {
None
}
}
}
impl<'p, T: 'p> NaiveSearcher<'p, T> {
#[inline]
pub fn new(slice: &'p [T]) -> Self {
NaiveSearcher(slice)
}
#[inline]
pub fn needle(&self) -> &'p [T] {
self.0
}
}
#[derive(Debug)]
pub enum SliceSearcher<'p, T: 'p> {
TwoWay(TwoWaySearcher<'p, T>),
Empty(EmptySearcher),
}
impl<'p, T: PartialEq + 'p> SliceSearcher<'p, T> {
#[inline]
pub fn new(slice: &'p [T]) -> Self {
if slice.is_empty() {
SliceSearcher::Empty(EmptySearcher::default())
} else {
SliceSearcher::TwoWay(TwoWaySearcher::new(slice))
}
}
#[inline]
pub fn needle(&self) -> &'p [T] {
match self {
SliceSearcher::TwoWay(s) => s.needle,
SliceSearcher::Empty(_) => &[],
}
}
}
impl<'p, T: 'p> Clone for SliceSearcher<'p, T> {
#[inline]
fn clone(&self) -> Self {
match self {
SliceSearcher::TwoWay(s) => SliceSearcher::TwoWay(*s),
SliceSearcher::Empty(s) => SliceSearcher::Empty(s.clone()),
}
}
}
macro_rules! forward {
(searcher: $self:expr, $s:ident => $e:expr) => {
match $self {
SliceSearcher::TwoWay($s) => $e,
SliceSearcher::Empty($s) => $e,
}
};
}
unsafe impl<'p, T, A> Searcher<A> for SliceSearcher<'p, T>
where
A: Hay<Index = usize> + ?Sized,
TwoWaySearcher<'p, T>: Searcher<A>,
{
#[inline]
fn search(&mut self, span: Span<&A>) -> Option<Range<usize>> {
forward!(searcher: self, s => s.search(span))
}
}
unsafe impl<'p, T, A> ReverseSearcher<A> for SliceSearcher<'p, T>
where
A: Hay<Index = usize> + ?Sized,
TwoWaySearcher<'p, T>: ReverseSearcher<A>,
{
#[inline]
fn rsearch(&mut self, span: Span<&A>) -> Option<Range<usize>> {
forward!(searcher: self, s => s.rsearch(span))
}
}
macro_rules! impl_needle {
(<[$($gen:tt)*]> $ty:ty) => {
impl<$($gen)*> Needle<$ty> for &'p [T]
where
T: PartialEq + 'p,
{
type Searcher = SliceSearcher<'p, T>;
type Consumer = NaiveSearcher<'p, T>;
#[inline]
fn into_searcher(self) -> Self::Searcher {
SliceSearcher::new(self)
}
#[inline]
fn into_consumer(self) -> Self::Consumer {
NaiveSearcher::new(self)
}
}
}
}
impl_needle!(<['p, 'h, T]> &'h [T]);
impl_needle!(<['p, 'h, T]> &'h mut [T]);
#[cfg(feature = "std")]
impl_needle!(<['p, T]> Vec<T>);