use std::cmp::{max, min};
pub struct MaxShiftHorspool {
skip_table: [usize; 256],
suffix: Vec<usize>,
pattern: Vec<u8>,
}
impl MaxShiftHorspool {
pub fn new(pattern: &str) -> Self {
let pattern_bytes = pattern.as_bytes().to_vec();
let length = pattern_bytes.len();
let mut skip_table = [length; 256];
for i in 0..length - 1 {
skip_table[pattern_bytes[i] as usize] = length - 1 - i;
}
let suffix = Self::build_suffix_array(&pattern_bytes);
Self {
skip_table,
suffix,
pattern: pattern_bytes,
}
}
fn build_suffix_array(pattern: &[u8]) -> Vec<usize> {
let length = pattern.len();
let mut suffix = vec![0; length];
let mut z = vec![0; length + 1];
if length == 0 {
return suffix;
}
let reversed: Vec<u8> = pattern.iter().rev().copied().collect();
z[0] = length;
let mut l = 0;
let mut r = 0;
for i in 1..length {
if i <= r {
z[i] = min(r - i + 1, z[i - l]);
}
while i + z[i] < length && reversed[z[i]] == reversed[i + z[i]] {
z[i] += 1;
}
if i + z[i] - 1 > r {
l = i;
r = i + z[i] - 1;
}
}
(0..length).for_each(|i| {
suffix[i] = length;
});
(0..length).for_each(|i| {
let j = i + z[i] - 1;
if j < length {
suffix[j] = length - z[i];
}
});
suffix
}
pub fn find_all(&self, text: &str) -> Vec<usize> {
let text_bytes = text.as_bytes();
let text_length = text_bytes.len();
let pattern_length = self.pattern.len();
if pattern_length == 0 || pattern_length > text_length {
return Vec::new();
}
let mut position = Vec::new();
let mut pos = 0;
while pos <= text_length - pattern_length {
let mut i = pattern_length - 1;
while i < pattern_length && self.pattern[i] == text_bytes[pos + i] {
if i == 0 {
position.push(pos);
break;
}
i -= 1;
}
if i < pattern_length && i != usize::MAX {
let bad_apple = self.skip_table[text_bytes[pos + pattern_length - 1] as usize];
let good_girl = if i < pattern_length - 1 {
self.suffix[i + 1]
} else {
1
};
pos += max(bad_apple, good_girl);
} else {
pos += 1;
}
}
position
}
}