use serde::{Deserialize, Serialize};
use std::collections::{HashMap, HashSet};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct BypassPayload {
pub id: String,
pub payload: String,
pub rule_classes: Vec<String>,
pub score: f64,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MinBypassSetResult {
pub min_set: Vec<BypassPayload>,
pub classes_covered: usize,
pub input_count: usize,
pub compression_ratio: f64,
pub likely_optimal: bool,
}
#[must_use]
pub fn compute_min_bypass_set(payloads: &[BypassPayload]) -> MinBypassSetResult {
let input_count = payloads.len();
if payloads.is_empty() {
return MinBypassSetResult {
min_set: Vec::new(),
classes_covered: 0,
input_count: 0,
compression_ratio: 1.0,
likely_optimal: true,
};
}
let universe: HashSet<&str> = payloads
.iter()
.flat_map(|p| p.rule_classes.iter().map(String::as_str))
.collect();
let total_classes = universe.len();
if total_classes == 0 {
return MinBypassSetResult {
min_set: Vec::new(),
classes_covered: 0,
input_count,
compression_ratio: input_count as f64,
likely_optimal: true,
};
}
let payload_sets: Vec<HashSet<&str>> = payloads
.iter()
.map(|p| p.rule_classes.iter().map(String::as_str).collect())
.collect();
let mut uncovered: HashSet<&str> = universe.clone();
let mut selected: Vec<usize> = Vec::new();
let mut available: Vec<bool> = vec![true; payloads.len()];
while !uncovered.is_empty() {
let best = (0..payloads.len())
.filter(|&i| available[i])
.max_by(|&a, &b| {
let cover_a = payload_sets[a].intersection(&uncovered).count();
let cover_b = payload_sets[b].intersection(&uncovered).count();
let cmp = cover_a.cmp(&cover_b);
if cmp != std::cmp::Ordering::Equal {
return cmp;
}
let score_a = finite_score(payloads[a].score);
let score_b = finite_score(payloads[b].score);
score_a
.partial_cmp(&score_b)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| payloads[b].id.cmp(&payloads[a].id))
});
match best {
None => break, Some(idx) => {
let new_coverage: usize = payload_sets[idx].intersection(&uncovered).count();
if new_coverage == 0 {
break;
}
for class in &payload_sets[idx] {
uncovered.remove(class);
}
selected.push(idx);
available[idx] = false;
}
}
}
let min_set: Vec<BypassPayload> = selected.iter().map(|&idx| payloads[idx].clone()).collect();
let classes_covered = total_classes - uncovered.len();
let min_len = min_set.len().max(1);
let compression_ratio = input_count as f64 / min_len as f64;
let likely_optimal =
min_set.len() == 1 || (classes_covered > 0 && classes_covered == min_set.len());
MinBypassSetResult {
min_set,
classes_covered,
input_count,
compression_ratio,
likely_optimal,
}
}
#[must_use]
pub fn compute_min_bypass_set_streaming<I>(payloads: I) -> MinBypassSetResult
where
I: IntoIterator<Item = BypassPayload>,
{
let collected: Vec<BypassPayload> = payloads.into_iter().collect();
compute_min_bypass_set(&collected)
}
#[must_use]
pub fn format_min_bypass_summary(result: &MinBypassSetResult) -> String {
let mut out = String::new();
out.push_str(&format!(
"Min-bypass set: {} / {} payloads cover {} rule classes ({:.1}x compression){}\n",
result.min_set.len(),
result.input_count,
result.classes_covered,
result.compression_ratio,
if result.likely_optimal {
" [likely optimal]"
} else {
""
},
));
for (i, p) in result.min_set.iter().enumerate() {
let classes = p.rule_classes.join(", ");
out.push_str(&format!(
" {}. [{}] {} (score={:.3})\n",
i + 1,
p.id,
classes,
finite_score(p.score),
));
}
out
}
#[must_use]
pub fn class_coverage_map<'a>(result: &'a MinBypassSetResult) -> HashMap<&'a str, &'a str> {
let mut map: HashMap<&'a str, &'a str> = HashMap::new();
for p in &result.min_set {
for class in &p.rule_classes {
map.entry(class.as_str()).or_insert(p.id.as_str());
}
}
map
}
#[inline]
fn finite_score(s: f64) -> f64 {
if s.is_finite() { s } else { 0.0 }
}
#[cfg(test)]
mod tests {
use super::*;
fn bp(id: &str, classes: &[&str], score: f64) -> BypassPayload {
BypassPayload {
id: id.into(),
payload: format!("payload_for_{id}"),
rule_classes: classes.iter().map(|s| (*s).into()).collect(),
score,
}
}
#[test]
fn empty_input_returns_empty_result() {
let r = compute_min_bypass_set(&[]);
assert!(r.min_set.is_empty());
assert_eq!(r.classes_covered, 0);
assert_eq!(r.input_count, 0);
assert_eq!(r.compression_ratio, 1.0);
}
#[test]
fn single_payload_is_its_own_min_set() {
let p = bp("p1", &["sqli"], 0.9);
let r = compute_min_bypass_set(std::slice::from_ref(&p));
assert_eq!(r.min_set.len(), 1);
assert_eq!(r.min_set[0].id, "p1");
assert_eq!(r.classes_covered, 1);
}
#[test]
fn no_classes_produces_empty_min_set() {
let p = BypassPayload {
id: "p1".into(),
payload: "x".into(),
rule_classes: Vec::new(),
score: 0.5,
};
let r = compute_min_bypass_set(&[p]);
assert!(r.min_set.is_empty());
assert_eq!(r.classes_covered, 0);
}
#[test]
fn two_disjoint_classes_require_two_payloads() {
let payloads = vec![bp("p1", &["sqli"], 0.9), bp("p2", &["xss"], 0.8)];
let r = compute_min_bypass_set(&payloads);
assert_eq!(r.min_set.len(), 2, "two disjoint classes need two payloads");
assert_eq!(r.classes_covered, 2);
}
#[test]
fn single_payload_covering_all_classes_wins() {
let payloads = vec![
bp("p1", &["a"], 0.5),
bp("p2", &["b"], 0.5),
bp("p3", &["a", "b"], 0.9), ];
let r = compute_min_bypass_set(&payloads);
assert_eq!(r.min_set.len(), 1);
assert_eq!(r.min_set[0].id, "p3");
assert_eq!(r.classes_covered, 2);
}
#[test]
fn greedy_picks_higher_score_on_tie() {
let payloads = vec![
bp("p1", &["sqli"], 0.7),
bp("p2", &["sqli"], 0.9), ];
let r = compute_min_bypass_set(&payloads);
assert_eq!(r.min_set.len(), 1);
assert_eq!(r.min_set[0].id, "p2", "higher-score payload should win tie");
}
#[test]
fn min_set_is_subset_of_input() {
let payloads = vec![
bp("p1", &["a", "b"], 0.9),
bp("p2", &["b", "c"], 0.8),
bp("p3", &["c", "d"], 0.7),
bp("p4", &["a"], 0.6),
bp("p5", &["d"], 0.5),
];
let r = compute_min_bypass_set(&payloads);
for p in &r.min_set {
assert!(
payloads.iter().any(|orig| orig.id == p.id),
"min_set must be a subset of input: {} not found",
p.id
);
}
}
#[test]
fn result_covers_all_classes() {
let payloads = vec![
bp("p1", &["sqli", "xss"], 0.9),
bp("p2", &["cmdi"], 0.8),
bp("p3", &["path", "ssrf"], 0.7),
];
let r = compute_min_bypass_set(&payloads);
assert_eq!(r.classes_covered, 5);
let covered: HashSet<&str> = r
.min_set
.iter()
.flat_map(|p| p.rule_classes.iter().map(String::as_str))
.collect();
assert_eq!(covered.len(), 5);
}
#[test]
fn compression_ratio_is_positive() {
let payloads: Vec<BypassPayload> = (0..20)
.map(|i| bp(&format!("p{i}"), &[&format!("class_{i}")], i as f64 / 20.0))
.collect();
let r = compute_min_bypass_set(&payloads);
assert!((r.compression_ratio - 1.0).abs() < 1e-9);
}
#[test]
fn large_redundant_set_compresses_dramatically() {
let payloads: Vec<BypassPayload> = (0..100)
.map(|i| bp(&format!("p{i}"), &["a", "b", "c"], i as f64 / 100.0))
.collect();
let r = compute_min_bypass_set(&payloads);
assert_eq!(r.min_set.len(), 1, "100 redundant payloads → 1 needed");
assert!(r.compression_ratio > 50.0);
}
#[test]
fn deterministic_output() {
let payloads = vec![
bp("alpha", &["a", "b"], 0.5),
bp("beta", &["b", "c"], 0.5),
bp("gamma", &["c", "d"], 0.5),
];
let r1 = compute_min_bypass_set(&payloads);
let r2 = compute_min_bypass_set(&payloads);
assert_eq!(r1.min_set.len(), r2.min_set.len());
for (a, b) in r1.min_set.iter().zip(r2.min_set.iter()) {
assert_eq!(a.id, b.id);
}
}
#[test]
fn streaming_matches_batch() {
let payloads = vec![
bp("p1", &["a", "b"], 0.9),
bp("p2", &["c"], 0.8),
bp("p3", &["b", "c", "d"], 0.7),
];
let batch = compute_min_bypass_set(&payloads);
let stream = compute_min_bypass_set_streaming(payloads);
assert_eq!(batch.min_set.len(), stream.min_set.len());
assert_eq!(batch.classes_covered, stream.classes_covered);
}
#[test]
fn format_summary_contains_key_fields() {
let payloads = vec![bp("p1", &["sqli"], 0.9), bp("p2", &["xss"], 0.8)];
let r = compute_min_bypass_set(&payloads);
let summary = format_min_bypass_summary(&r);
assert!(summary.contains("2"), "summary must mention class count");
assert!(
summary.contains("compression"),
"summary must mention compression"
);
}
#[test]
fn class_coverage_map_is_complete() {
let payloads = vec![bp("p1", &["a", "b"], 0.9), bp("p2", &["c"], 0.8)];
let r = compute_min_bypass_set(&payloads);
let map = class_coverage_map(&r);
assert!(map.contains_key("a"));
assert!(map.contains_key("b"));
assert!(map.contains_key("c"));
}
#[test]
fn class_coverage_map_respects_greedy_order() {
let payloads = vec![
bp("p1", &["a"], 0.5),
bp("p2", &["b"], 0.5),
bp("p3", &["a", "b"], 0.9), ];
let r = compute_min_bypass_set(&payloads);
let map = class_coverage_map(&r);
assert_eq!(map.get("a"), Some(&"p3"), "p3 should cover class 'a'");
assert_eq!(map.get("b"), Some(&"p3"), "p3 should cover class 'b'");
}
#[test]
fn nan_score_treated_as_zero() {
let payloads = vec![bp("p_nan", &["a"], f64::NAN), bp("p_good", &["a"], 0.9)];
let r = compute_min_bypass_set(&payloads);
assert_eq!(r.min_set.len(), 1);
assert_eq!(r.min_set[0].id, "p_good");
}
#[test]
fn inf_score_treated_as_zero() {
let payloads = vec![
bp("p_inf", &["a"], f64::INFINITY),
bp("p_neginf", &["a"], f64::NEG_INFINITY),
];
let r = compute_min_bypass_set(&payloads);
assert_eq!(r.min_set.len(), 1);
}
#[test]
fn min_set_never_larger_than_input() {
for n in [0, 1, 2, 5, 10, 50] {
let payloads: Vec<BypassPayload> = (0..n)
.map(|i| bp(&format!("p{i}"), &[&format!("c{i}")], 0.5))
.collect();
let r = compute_min_bypass_set(&payloads);
assert!(
r.min_set.len() <= payloads.len(),
"min_set must not exceed input size for n={n}"
);
}
}
#[test]
fn classes_covered_equals_universe_size_when_fully_covered() {
let payloads = vec![bp("p1", &["a", "b", "c"], 0.9), bp("p2", &["d", "e"], 0.8)];
let r = compute_min_bypass_set(&payloads);
assert_eq!(
r.classes_covered, 5,
"all 5 distinct classes must be counted"
);
}
}