use crate::error::{SeqError, SeqResult};
pub fn z_array(s: &[u8]) -> SeqResult<Vec<usize>> {
if s.is_empty() {
return Err(SeqError::EmptyInput);
}
let n = s.len();
let mut z = vec![0usize; n];
let mut l = 0usize;
let mut r = 0usize;
for i in 1..n {
if i < r {
let mirror = i - l;
z[i] = z[mirror].min(r - i);
}
while i + z[i] < n && s[z[i]] == s[i + z[i]] {
z[i] += 1;
}
if i + z[i] > r {
l = i;
r = i + z[i];
}
}
Ok(z)
}
fn naive_match(p: &[u8], t: &[u8]) -> Vec<usize> {
let m = p.len();
let n = t.len();
if m == 0 || m > n {
return Vec::new();
}
let mut out = Vec::new();
for start in 0..=(n - m) {
if &t[start..start + m] == p {
out.push(start);
}
}
out
}
fn free_separator(p: &[u8], t: &[u8]) -> Option<u8> {
let mut used = [false; 256];
for &b in p {
used[b as usize] = true;
}
for &b in t {
used[b as usize] = true;
}
(0u16..256).find(|&v| !used[v as usize]).map(|v| v as u8)
}
pub fn z_search(pattern: &[u8], text: &[u8]) -> SeqResult<Vec<usize>> {
if pattern.is_empty() {
return Err(SeqError::EmptyInput);
}
let m = pattern.len();
if m > text.len() {
return Ok(Vec::new());
}
let sep = match free_separator(pattern, text) {
Some(sep) => sep,
None => return Ok(naive_match(pattern, text)),
};
let mut s = Vec::with_capacity(m + 1 + text.len());
s.extend_from_slice(pattern);
s.push(sep);
s.extend_from_slice(text);
let z = z_array(&s)?;
let mut out = Vec::new();
let offset = m + 1; for i in offset..s.len() {
if z[i] >= m {
out.push(i - offset);
}
}
Ok(out)
}
#[cfg(test)]
mod tests {
use super::*;
fn brute_force_z(s: &[u8]) -> Vec<usize> {
let n = s.len();
let mut z = vec![0usize; n];
for i in 1..n {
let mut k = 0;
while i + k < n && s[k] == s[i + k] {
k += 1;
}
z[i] = k;
}
z
}
fn naive_search(p: &[u8], t: &[u8]) -> Vec<usize> {
let (m, n) = (p.len(), t.len());
if m == 0 || m > n {
return Vec::new();
}
(0..=(n - m)).filter(|&i| &t[i..i + m] == p).collect()
}
fn random_bytes(rng: &mut crate::handle::LcgRng, alphabet: &[u8], len: usize) -> Vec<u8> {
(0..len)
.map(|_| alphabet[rng.next_usize(alphabet.len())])
.collect()
}
#[test]
fn z_array_matches_brute_force() {
let mut rng = crate::handle::LcgRng::new(0xED);
for &alphabet in &[b"ab".as_slice(), b"abc", b"abcd"] {
for _ in 0..500 {
let len = 1 + rng.next_usize(40);
let s = random_bytes(&mut rng, alphabet, len);
let fast = z_array(&s).expect("non-empty");
let slow = brute_force_z(&s);
assert_eq!(fast, slow, "Z-array mismatch for {s:?}");
}
}
}
#[test]
fn search_finds_all_occurrences() {
let mut rng = crate::handle::LcgRng::new(7);
for &alphabet in &[b"ab".as_slice(), b"abc"] {
for _ in 0..500 {
let tlen = 1 + rng.next_usize(40);
let plen = 1 + rng.next_usize(6);
let t = random_bytes(&mut rng, alphabet, tlen);
let p = random_bytes(&mut rng, alphabet, plen);
let got = z_search(&p, &t).expect("non-empty pattern");
let want = naive_search(&p, &t);
assert_eq!(got, want, "search mismatch for p={p:?} t={t:?}");
}
}
}
#[test]
fn periodic_strings_have_expected_z() {
let z = z_array(b"aaaa").expect("non-empty");
assert_eq!(z, vec![0, 3, 2, 1]);
let z = z_array(b"abcabc").expect("non-empty");
assert_eq!(z, vec![0, 0, 0, 3, 0, 0]);
let z = z_array(b"ababab").expect("non-empty");
assert_eq!(z, vec![0, 0, 4, 0, 2, 0]);
}
#[test]
fn z0_convention_is_zero() {
for s in [
b"a".as_slice(),
b"abc",
b"aaaa",
b"abacabad",
b"mississippi",
] {
let z = z_array(s).expect("non-empty");
assert_eq!(z[0], 0, "z[0] must be 0 by convention for {s:?}");
}
}
#[test]
fn single_char_and_empty() {
let z = z_array(b"x").expect("non-empty");
assert_eq!(z, vec![0]);
assert!(matches!(z_array(b""), Err(SeqError::EmptyInput)));
assert!(matches!(z_search(b"", b"abc"), Err(SeqError::EmptyInput)));
assert!(z_search(b"abcd", b"ab").expect("ok").is_empty());
}
#[test]
fn no_false_positives_across_separator() {
assert!(z_search(b"ba", b"xb").expect("ok").is_empty());
assert!(z_search(b"ab", b"zzza").expect("ok").is_empty());
assert_eq!(z_search(b"ab", b"zzzab").expect("ok"), vec![3]);
}
#[test]
fn boundary_patterns() {
assert_eq!(z_search(b"abc", b"abc").expect("ok"), vec![0]);
assert_eq!(z_search(b"c", b"aabc").expect("ok"), vec![3]);
assert_eq!(z_search(b"aa", b"aab").expect("ok"), vec![0]);
}
#[test]
fn full_alphabet_fallback() {
let text: Vec<u8> = (0u16..256).map(|v| v as u8).collect();
let pattern = &text[100..104];
assert!(free_separator(pattern, &text).is_none());
let got = z_search(pattern, &text).expect("ok");
assert_eq!(got, vec![100]);
let absent = [200u8, 5u8, 200u8];
let got = z_search(&absent, &text).expect("ok");
assert!(got.is_empty());
}
}