use crate::learn::Alphabet;
use crate::sfa::{BytePred, Sfa, StateId};
fn kmp_sfa(needle: &[u8]) -> Sfa {
assert!(!needle.is_empty(), "attack needle must be non-empty");
let m = needle.len();
let mut fail = vec![0usize; m];
let mut k = 0;
for i in 1..m {
while k > 0 && needle[i] != needle[k] {
k = fail[k - 1];
}
if needle[i] == needle[k] {
k += 1;
}
fail[i] = k;
}
let kmp_next = |mut j: usize, c: u8| -> usize {
if j == m {
return m; }
while j > 0 && c != needle[j] {
j = fail[j - 1];
}
if c == needle[j] {
j += 1;
}
j
};
let n_states = m + 1;
let mut accept = vec![false; n_states];
accept[m] = true;
let mut delta: Vec<Vec<(BytePred, StateId)>> = Vec::with_capacity(n_states);
for st in 0..n_states {
if st == m {
delta.push(vec![(BytePred::any(), m)]); continue;
}
let mut groups: Vec<Option<BytePred>> = vec![None; n_states];
for b in 0u8..=255u8 {
let t = kmp_next(st, b);
match &mut groups[t] {
Some(pred) => pred.insert(b),
slot @ None => {
let mut pred = BytePred::none();
pred.insert(b);
*slot = Some(pred);
}
}
}
let mut row: Vec<(BytePred, StateId)> = Vec::new();
for (t, opt) in groups.into_iter().enumerate() {
if let Some(pred) = opt {
row.push((pred, t));
}
}
delta.push(row);
}
Sfa::new(0, accept, delta)
}
#[must_use]
pub fn attack_grammar(_alpha: &Alphabet, needles: &[&[u8]]) -> Sfa {
let mut it = needles.iter();
let Some(first) = it.next() else {
return Sfa::new(0, vec![false], vec![vec![(BytePred::any(), 0)]]);
};
let mut g = kmp_sfa(first);
for n in it {
g = g.union(&kmp_sfa(n));
}
g
}
#[must_use]
pub fn mine_bypasses(learned: &Sfa, attack: &Sfa, max: usize, max_len: usize) -> Vec<Vec<u8>> {
learned.intersect(attack).enumerate_accepted(max, max_len)
}
#[must_use]
pub fn minimal_bypass(learned: &Sfa, attack: &Sfa) -> Option<Vec<u8>> {
learned.intersect(attack).shortest_accepted()
}
#[must_use]
pub fn waf_diff(a: &Sfa, b: &Sfa, max: usize, max_len: usize) -> Vec<Vec<u8>> {
a.difference(b)
.union(&b.difference(a))
.enumerate_accepted(max, max_len)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::learn::Alphabet;
fn pass_all(alpha: &Alphabet) -> Sfa {
let mut row = Vec::with_capacity(alpha.len());
for a in 0..alpha.len() {
row.push((alpha.guard(a), 0));
}
Sfa::new(0, vec![true], vec![row])
}
#[test]
fn kmp_sfa_matches_needle_with_catch_all_bytes() {
let alpha = Alphabet::new(vec![], b'Z');
let g = attack_grammar(&alpha, &[b"select"]);
let learned = pass_all(&alpha);
let bypasses = mine_bypasses(&learned, &g, 10, 20);
assert!(
!bypasses.is_empty(),
"attack_grammar must find words containing b\"select\" \
even when all needle bytes are in the catch-all class; \
got zero (F-MINE-01 regression)"
);
for w in &bypasses {
assert!(
w.windows(6).any(|s| s == b"select"),
"mined word {w:?} does not contain the needle b\"select\""
);
}
}
#[test]
fn kmp_sfa_matches_needle_with_mixed_distinguished_and_catch_all_bytes() {
let alpha = Alphabet::new(vec![b'\''], b'Z');
let g = attack_grammar(&alpha, &[b"'or'"]);
let learned = pass_all(&alpha);
let bypasses = mine_bypasses(&learned, &g, 10, 12);
assert!(
!bypasses.is_empty(),
"attack_grammar must find words containing b\"'or'\" \
with mixed distinguished/catch-all bytes; got zero (F-MINE-01)"
);
for w in &bypasses {
assert!(
w.windows(4).any(|s| s == b"'or'"),
"mined word {w:?} does not contain the needle"
);
}
}
#[test]
fn kmp_sfa_matches_needle_whose_bytes_happen_to_be_the_representative() {
let alpha = Alphabet::new(vec![], b'Z');
let g = attack_grammar(&alpha, &[b"ZZZ"]);
let learned = pass_all(&alpha);
let bypasses = mine_bypasses(&learned, &g, 5, 10);
assert!(
!bypasses.is_empty(),
"attack_grammar must find words containing b\"ZZZ\" \
when the needle IS the representative"
);
for w in &bypasses {
assert!(
w.windows(3).any(|s| s == b"ZZZ"),
"mined word {w:?} does not contain b\"ZZZ\""
);
}
}
#[test]
fn kmp_sfa_matches_fully_distinguished_needle() {
let alpha = Alphabet::new(vec![b'a', b'b'], b'Z');
let g = attack_grammar(&alpha, &[b"aba"]);
let learned = pass_all(&alpha);
let bypasses = mine_bypasses(&learned, &g, 5, 10);
assert!(
!bypasses.is_empty(),
"attack_grammar must find words containing b\"aba\""
);
for w in &bypasses {
assert!(
w.windows(3).any(|s| s == b"aba"),
"mined word {w:?} does not contain b\"aba\""
);
}
}
#[test]
fn attack_grammar_empty_needles_accepts_nothing() {
let alpha = Alphabet::new(vec![b'a'], b'Z');
let g = attack_grammar(&alpha, &[]);
let learned = pass_all(&alpha);
let bypasses = mine_bypasses(&learned, &g, 10, 20);
assert!(
bypasses.is_empty(),
"empty needle set must produce zero bypasses, got {bypasses:?}"
);
}
#[test]
fn kmp_sfa_self_overlapping_catch_all_needle() {
let alpha = Alphabet::new(vec![], b'X');
let g = attack_grammar(&alpha, &[b"aaa"]);
let learned = pass_all(&alpha);
let bypasses = mine_bypasses(&learned, &g, 5, 10);
assert!(
!bypasses.is_empty(),
"attack_grammar must find words containing b\"aaa\" (all catch-all, self-overlapping)"
);
for w in &bypasses {
assert!(
w.windows(3).any(|s| s == b"aaa"),
"mined word {w:?} does not contain b\"aaa\""
);
}
}
#[test]
fn attack_grammar_union_of_distinguished_and_catch_all_needles() {
let alpha = Alphabet::new(vec![b'<', b'/'], b'Z');
let g = attack_grammar(&alpha, &[b"</", b"union"]);
let learned = pass_all(&alpha);
let g_angle = attack_grammar(&alpha, &[b"</"]);
let shortest_angle = learned
.intersect(&g_angle)
.shortest_accepted()
.expect("union grammar must accept words containing b\"</\"");
assert!(
shortest_angle.windows(2).any(|s| s == b"</"),
"shortest word {shortest_angle:?} does not contain b\"</\""
);
let g_union = attack_grammar(&alpha, &[b"union"]);
let shortest_union = learned
.intersect(&g_union)
.shortest_accepted()
.expect("union grammar must accept words containing b\"union\" (catch-all needle)");
assert!(
shortest_union.windows(5).any(|s| s == b"union"),
"shortest word {shortest_union:?} does not contain b\"union\""
);
assert!(
!learned.intersect(&g).is_language_empty(),
"combined union grammar must be non-empty"
);
}
}