fn kmp_failure_function(pattern: &[u8]) -> Vec<usize> {
let m = pattern.len();
if m == 0 {
return Vec::new();
}
let mut failure = vec![0usize; m];
let mut k = 0usize;
let mut i = 1usize;
while i < m {
while k > 0 && pattern[k] != pattern[i] {
k = failure[k - 1];
}
if pattern[k] == pattern[i] {
k += 1;
}
failure[i] = k;
i += 1;
}
failure
}
pub fn kmp_search(text: &[u8], pattern: &[u8]) -> Vec<usize> {
let n = text.len();
let m = pattern.len();
if m == 0 || m > n {
return Vec::new();
}
let failure = kmp_failure_function(pattern);
let mut results = Vec::new();
let mut q = 0usize;
for (i, &c) in text.iter().enumerate() {
while q > 0 && pattern[q] != c {
q = failure[q - 1];
}
if pattern[q] == c {
q += 1;
}
if q == m {
results.push(i + 1 - m);
q = failure[q - 1];
}
}
results
}
pub fn rabin_karp_search(text: &[u8], pattern: &[u8]) -> Vec<usize> {
let n = text.len();
let m = pattern.len();
if m == 0 || m > n {
return Vec::new();
}
const BASE: u64 = 131;
const MOD: u64 = 1_000_000_007;
let mut high_power = 1u64;
for _ in 0..m.saturating_sub(1) {
high_power = high_power.wrapping_mul(BASE) % MOD;
}
let mut pat_hash = 0u64;
for &b in pattern {
pat_hash = (pat_hash.wrapping_mul(BASE) + b as u64) % MOD;
}
let mut txt_hash = 0u64;
for &b in &text[..m] {
txt_hash = (txt_hash.wrapping_mul(BASE) + b as u64) % MOD;
}
let mut results = Vec::new();
if txt_hash == pat_hash && text[..m] == *pattern {
results.push(0);
}
for i in 1..=(n - m) {
let remove_contribution = (text[i - 1] as u64).wrapping_mul(high_power) % MOD;
let h = (txt_hash + MOD - remove_contribution) % MOD;
txt_hash = (h.wrapping_mul(BASE) + text[i + m - 1] as u64) % MOD;
if txt_hash == pat_hash && text[i..i + m] == *pattern {
results.push(i);
}
}
results
}
pub fn boyer_moore_search(text: &[u8], pattern: &[u8]) -> Vec<usize> {
let n = text.len();
let m = pattern.len();
if m == 0 || m > n {
return Vec::new();
}
let mut shift = [m; 256];
for (i, &b) in pattern[..m - 1].iter().enumerate() {
shift[b as usize] = m - 1 - i;
}
let mut results = Vec::new();
let mut i = 0usize;
while i + m <= n {
let mut j = m;
while j > 0 && text[i + j - 1] == pattern[j - 1] {
j -= 1;
}
if j == 0 {
results.push(i);
i += 1; } else {
let bad_char_shift = shift[text[i + m - 1] as usize];
i += if bad_char_shift == 0 {
1
} else {
bad_char_shift
};
}
}
results
}
pub fn levenshtein_distance(a: &str, b: &str) -> usize {
let a = a.as_bytes();
let b = b.as_bytes();
let m = a.len();
let n = b.len();
if m == 0 {
return n;
}
if n == 0 {
return m;
}
let mut prev: Vec<usize> = (0..=n).collect();
let mut curr: Vec<usize> = vec![0; n + 1];
for i in 1..=m {
curr[0] = i;
for j in 1..=n {
let cost = if a[i - 1] == b[j - 1] { 0 } else { 1 };
curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[n]
}
pub fn lcs_length(a: &str, b: &str) -> usize {
let a = a.as_bytes();
let b = b.as_bytes();
let m = a.len();
let n = b.len();
if m == 0 || n == 0 {
return 0;
}
let mut prev = vec![0usize; n + 1];
let mut curr = vec![0usize; n + 1];
for i in 1..=m {
for j in 1..=n {
curr[j] = if a[i - 1] == b[j - 1] {
prev[j - 1] + 1
} else {
prev[j].max(curr[j - 1])
};
}
std::mem::swap(&mut prev, &mut curr);
for x in curr.iter_mut() {
*x = 0;
}
}
prev[n]
}
pub fn lcs_string(a: &str, b: &str) -> String {
let a_bytes = a.as_bytes();
let b_bytes = b.as_bytes();
let m = a_bytes.len();
let n = b_bytes.len();
if m == 0 || n == 0 {
return String::new();
}
let mut dp = vec![vec![0usize; n + 1]; m + 1];
for i in 1..=m {
for j in 1..=n {
dp[i][j] = if a_bytes[i - 1] == b_bytes[j - 1] {
dp[i - 1][j - 1] + 1
} else {
dp[i - 1][j].max(dp[i][j - 1])
};
}
}
let mut result = Vec::with_capacity(dp[m][n]);
let mut i = m;
let mut j = n;
while i > 0 && j > 0 {
if a_bytes[i - 1] == b_bytes[j - 1] {
result.push(a_bytes[i - 1]);
i -= 1;
j -= 1;
} else if dp[i - 1][j] > dp[i][j - 1] {
i -= 1;
} else {
j -= 1;
}
}
result.reverse();
String::from_utf8(result).unwrap_or_else(|e| String::from_utf8_lossy(e.as_bytes()).into_owned())
}
#[derive(Debug, Clone)]
pub struct SuffixArray {
text: Vec<u8>,
sa: Vec<usize>,
isa: Vec<usize>,
lcp: Vec<usize>,
}
impl SuffixArray {
pub fn new(text: &[u8]) -> Self {
let n = text.len();
if n == 0 {
return SuffixArray {
text: Vec::new(),
sa: Vec::new(),
isa: Vec::new(),
lcp: Vec::new(),
};
}
let mut sa: Vec<usize> = (0..n).collect();
let mut rank: Vec<i64> = text.iter().map(|&b| b as i64).collect();
sa.sort_by_key(|&i| rank[i]);
let mut gap = 1usize;
while gap < n {
let cur_rank = rank.clone();
let second_rank = |i: usize| -> i64 {
if i + gap < n {
cur_rank[i + gap]
} else {
-1
}
};
sa.sort_by(|&a, &b| {
let ka = (cur_rank[a], second_rank(a));
let kb = (cur_rank[b], second_rank(b));
ka.cmp(&kb)
});
let mut new_rank = vec![0i64; n];
new_rank[sa[0]] = 0;
for i in 1..n {
let prev = sa[i - 1];
let cur = sa[i];
let same = cur_rank[prev] == cur_rank[cur] && second_rank(prev) == second_rank(cur);
new_rank[cur] = new_rank[prev] + if same { 0 } else { 1 };
}
rank = new_rank;
if rank[sa[n - 1]] == (n as i64 - 1) {
break;
}
gap <<= 1;
}
let mut isa = vec![0usize; n];
for (i, &s) in sa.iter().enumerate() {
isa[s] = i;
}
let lcp = Self::build_lcp(text, &sa, &isa);
SuffixArray {
text: text.to_vec(),
sa,
isa,
lcp,
}
}
fn build_lcp(text: &[u8], sa: &[usize], isa: &[usize]) -> Vec<usize> {
let n = text.len();
let mut lcp = vec![0usize; n];
let mut h = 0usize;
for i in 0..n {
if isa[i] > 0 {
let j = sa[isa[i] - 1];
while i + h < n && j + h < n && text[i + h] == text[j + h] {
h += 1;
}
lcp[isa[i]] = h;
h = h.saturating_sub(1);
}
}
lcp
}
pub fn search(&self, pattern: &[u8]) -> Vec<usize> {
if pattern.is_empty() || self.text.is_empty() {
return Vec::new();
}
let n = self.text.len();
let m = pattern.len();
let lo = {
let mut low = 0usize;
let mut high = n;
while low < high {
let mid = low + (high - low) / 2;
let start = self.sa[mid];
let end = (start + m).min(n);
if self.text[start..end] < *pattern {
low = mid + 1;
} else {
high = mid;
}
}
low
};
let hi = {
let mut low = lo;
let mut high = n;
while low < high {
let mid = low + (high - low) / 2;
let start = self.sa[mid];
let end = (start + m).min(n);
if self.text[start..end] <= *pattern {
low = mid + 1;
} else {
high = mid;
}
}
low
};
if lo < hi {
let mut positions: Vec<usize> = self.sa[lo..hi].to_vec();
positions.sort_unstable();
positions
} else {
Vec::new()
}
}
pub fn lcp_array(&self) -> Vec<usize> {
self.lcp.clone()
}
pub fn sa(&self) -> &[usize] {
&self.sa
}
pub fn isa(&self) -> &[usize] {
&self.isa
}
pub fn text(&self) -> &[u8] {
&self.text
}
}
pub fn bwt_encode(text: &[u8]) -> (Vec<u8>, usize) {
let n = text.len();
if n == 0 {
return (Vec::new(), 0);
}
let sa = SuffixArray::new(text);
let mut bwt = Vec::with_capacity(n);
let mut original_pos = 0usize;
for (i, &start) in sa.sa.iter().enumerate() {
if start == 0 {
bwt.push(text[n - 1]);
original_pos = i;
} else {
bwt.push(text[start - 1]);
}
}
(bwt, original_pos)
}
pub fn bwt_decode(bwt: &[u8], idx: usize) -> Vec<u8> {
let n = bwt.len();
if n == 0 {
return Vec::new();
}
let mut count = [0usize; 256];
for &b in bwt {
count[b as usize] += 1;
}
let mut first_occ = [0usize; 256];
let mut total = 0usize;
for (b, c) in count.iter().enumerate() {
first_occ[b] = total;
total += c;
}
let mut byte_rank = [0usize; 256];
let mut lf = vec![0usize; n];
for (i, &b) in bwt.iter().enumerate() {
lf[i] = first_occ[b as usize] + byte_rank[b as usize];
byte_rank[b as usize] += 1;
}
let mut result = vec![0u8; n];
let mut row = idx;
for slot in result.iter_mut().rev() {
*slot = bwt[row];
row = lf[row];
}
result
}
pub fn fnv1a_hash(data: &[u8]) -> u64 {
const FNV_OFFSET: u64 = 14_695_981_039_346_656_037;
const FNV_PRIME: u64 = 1_099_511_628_211;
let mut hash = FNV_OFFSET;
for &b in data {
hash ^= b as u64;
hash = hash.wrapping_mul(FNV_PRIME);
}
hash
}
pub fn djb2_hash(data: &[u8]) -> u64 {
let mut hash: u64 = 5381;
for &b in data {
hash = hash.wrapping_mul(33).wrapping_add(b as u64);
}
hash
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_algo_kmp_basic() {
let pos = kmp_search(b"aababcab", b"ab");
assert_eq!(pos, vec![1, 3, 6]);
}
#[test]
fn test_string_algo_kmp_no_match() {
let pos = kmp_search(b"hello", b"xyz");
assert!(pos.is_empty());
}
#[test]
fn test_string_algo_kmp_overlapping() {
let pos = kmp_search(b"aaa", b"aa");
assert_eq!(pos, vec![0, 1]);
}
#[test]
fn test_string_algo_kmp_full_text_match() {
let pos = kmp_search(b"abcabc", b"abcabc");
assert_eq!(pos, vec![0]);
}
#[test]
fn test_string_algo_rabin_karp_basic() {
let pos = rabin_karp_search(b"ababab", b"ab");
assert_eq!(pos, vec![0, 2, 4]);
}
#[test]
fn test_string_algo_rabin_karp_no_match() {
let pos = rabin_karp_search(b"hello world", b"xyz");
assert!(pos.is_empty());
}
#[test]
fn test_string_algo_rabin_karp_overlapping() {
let pos = rabin_karp_search(b"aaa", b"aa");
assert_eq!(pos, vec![0, 1]);
}
#[test]
fn test_string_algo_boyer_moore_basic() {
let pos = boyer_moore_search(b"AABAAABAAABAA", b"AAB");
assert_eq!(pos, vec![0, 4, 8]);
}
#[test]
fn test_string_algo_boyer_moore_no_match() {
let pos = boyer_moore_search(b"hello world", b"xyz");
assert!(pos.is_empty());
}
#[test]
fn test_string_algo_boyer_moore_single_char() {
let pos = boyer_moore_search(b"aaa", b"a");
assert_eq!(pos, vec![0, 1, 2]);
}
#[test]
fn test_string_algo_levenshtein_kitten_sitting() {
assert_eq!(levenshtein_distance("kitten", "sitting"), 3);
}
#[test]
fn test_string_algo_levenshtein_empty() {
assert_eq!(levenshtein_distance("", "abc"), 3);
assert_eq!(levenshtein_distance("abc", ""), 3);
assert_eq!(levenshtein_distance("", ""), 0);
}
#[test]
fn test_string_algo_levenshtein_identical() {
assert_eq!(levenshtein_distance("hello", "hello"), 0);
}
#[test]
fn test_string_algo_levenshtein_single_substitution() {
assert_eq!(levenshtein_distance("abc", "axc"), 1);
}
#[test]
fn test_string_algo_lcs_length_basic() {
assert_eq!(lcs_length("ABCBDAB", "BDCAB"), 4);
}
#[test]
fn test_string_algo_lcs_length_identical() {
assert_eq!(lcs_length("abc", "abc"), 3);
}
#[test]
fn test_string_algo_lcs_length_disjoint() {
assert_eq!(lcs_length("abc", "xyz"), 0);
}
#[test]
fn test_string_algo_lcs_string_length() {
let result = lcs_string("ABCBDAB", "BDCAB");
assert_eq!(result.len(), 4);
}
#[test]
fn test_string_algo_lcs_string_valid_subsequence() {
let a = "ABCBDAB";
let b = "BDCAB";
let seq = lcs_string(a, b);
let a_bytes = a.as_bytes();
let mut ai = 0usize;
for c in seq.bytes() {
while ai < a_bytes.len() && a_bytes[ai] != c {
ai += 1;
}
assert!(ai < a_bytes.len(), "LCS element not found in a");
ai += 1;
}
}
#[test]
fn test_string_algo_suffix_array_banana() {
let sa = SuffixArray::new(b"banana");
assert_eq!(sa.sa(), &[5, 3, 1, 0, 4, 2]);
}
#[test]
fn test_string_algo_suffix_array_search() {
let sa = SuffixArray::new(b"banana");
let mut positions = sa.search(b"ana");
positions.sort_unstable();
assert_eq!(positions, vec![1, 3]);
}
#[test]
fn test_string_algo_suffix_array_search_not_found() {
let sa = SuffixArray::new(b"banana");
let positions = sa.search(b"xyz");
assert!(positions.is_empty());
}
#[test]
fn test_string_algo_suffix_array_lcp() {
let sa = SuffixArray::new(b"banana");
let lcp = sa.lcp_array();
assert_eq!(lcp[0], 0);
assert_eq!(lcp[1], 1); assert_eq!(lcp[2], 3); assert_eq!(lcp[3], 0); assert_eq!(lcp[4], 0); assert_eq!(lcp[5], 2); }
#[test]
fn test_string_algo_suffix_array_empty() {
let sa = SuffixArray::new(b"");
assert!(sa.sa().is_empty());
assert!(sa.lcp_array().is_empty());
}
#[test]
fn test_string_algo_bwt_roundtrip_banana() {
let (bwt, pos) = bwt_encode(b"banana");
let decoded = bwt_decode(&bwt, pos);
assert_eq!(decoded, b"banana");
}
#[test]
fn test_string_algo_bwt_roundtrip_mississippi() {
let (bwt, pos) = bwt_encode(b"mississippi");
let decoded = bwt_decode(&bwt, pos);
assert_eq!(decoded, b"mississippi");
}
#[test]
fn test_string_algo_bwt_roundtrip_empty() {
let (bwt, pos) = bwt_encode(b"");
let decoded = bwt_decode(&bwt, pos);
assert_eq!(decoded, b"");
}
#[test]
fn test_string_algo_bwt_roundtrip_repeated() {
let original = b"aaaaaa";
let (bwt, pos) = bwt_encode(original);
let decoded = bwt_decode(&bwt, pos);
assert_eq!(decoded, original.as_ref());
}
#[test]
fn test_string_algo_fnv1a_deterministic() {
assert_eq!(fnv1a_hash(b"hello"), fnv1a_hash(b"hello"));
}
#[test]
fn test_string_algo_fnv1a_different_strings() {
assert_ne!(fnv1a_hash(b"hello"), fnv1a_hash(b"world"));
}
#[test]
fn test_string_algo_fnv1a_empty() {
assert_eq!(fnv1a_hash(b""), 14_695_981_039_346_656_037u64);
}
#[test]
fn test_string_algo_djb2_deterministic() {
assert_eq!(djb2_hash(b"hello"), djb2_hash(b"hello"));
}
#[test]
fn test_string_algo_djb2_different_strings() {
assert_ne!(djb2_hash(b"foo"), djb2_hash(b"bar"));
}
#[test]
fn test_string_algo_all_search_agree() {
let text = b"abcabcabc";
let pattern = b"abc";
let kmp_result = kmp_search(text, pattern);
let rk_result = rabin_karp_search(text, pattern);
let bm_result = boyer_moore_search(text, pattern);
assert_eq!(kmp_result, rk_result);
assert_eq!(kmp_result, bm_result);
}
#[test]
fn test_string_algo_pattern_longer_than_text() {
assert!(kmp_search(b"ab", b"abc").is_empty());
assert!(rabin_karp_search(b"ab", b"abc").is_empty());
assert!(boyer_moore_search(b"ab", b"abc").is_empty());
}
}