use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use crate::coverage_feedback::{PayloadClass, RuleId};
pub const CORPUS_SCHEMA_VERSION: u32 = 1;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RecordedAttempt {
pub payload: String,
pub payload_class: PayloadClass,
pub encoding_chain: Vec<String>,
pub response_hash: u64,
pub observed_at_secs: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct RecordedBypass {
pub payload: String,
pub payload_class: PayloadClass,
pub encoding_chain: Vec<String>,
pub response_hash: u64,
pub observed_at_secs: u64,
#[serde(default)]
pub submission: SubmissionStatus,
#[serde(default)]
pub delivery: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
#[serde(tag = "stage", content = "data")]
pub enum SubmissionStatus {
#[default]
Queued,
DryRunHold { release_at_secs: u64 },
Submitted { report_id: String },
Accepted { report_id: String },
Duplicate { duplicate_of: String },
Rejected { reason: String },
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct RuleBucket {
pub rule_id: RuleId,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub blocked: Vec<RecordedAttempt>,
#[serde(default)]
pub bypassed: Vec<RecordedBypass>,
#[serde(default)]
pub last_drift_at_secs: Option<u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RuleBypassCorpus {
#[serde(default)]
pub schema_version: u32,
pub target_fingerprint: String,
#[serde(default)]
pub buckets: BTreeMap<String, RuleBucket>,
#[serde(default)]
pub last_saved_at_secs: u64,
}
impl RuleBypassCorpus {
#[must_use]
pub fn new(target_fingerprint: impl Into<String>) -> Self {
Self {
schema_version: CORPUS_SCHEMA_VERSION,
target_fingerprint: target_fingerprint.into(),
buckets: BTreeMap::new(),
last_saved_at_secs: 0,
}
}
const CORPUS_READ_CEILING_BYTES: usize = 1024 * 1024 * 1024;
pub fn load_or_default(path: &Path, target_fingerprint: impl Into<String>) -> Self {
if !path.exists() {
return Self::new(target_fingerprint);
}
let raw = match crate::safe_io::read_capped_text(path, Self::CORPUS_READ_CEILING_BYTES) {
Ok(s) => s,
Err(e) => {
preserve_unreadable_corpus(path, &format!("read failed: {e}"));
return Self::new(target_fingerprint);
}
};
if raw.trim().is_empty() {
return Self::new(target_fingerprint);
}
match serde_json::from_str::<Self>(&raw) {
Ok(mut corpus) => {
if corpus.schema_version == 0 {
corpus.schema_version = CORPUS_SCHEMA_VERSION;
}
for bucket in corpus.buckets.values_mut() {
bucket.blocked.truncate(Self::MAX_BLOCKED_PER_BUCKET);
bucket.bypassed.truncate(Self::MAX_BYPASSED_PER_BUCKET);
}
corpus
}
Err(e) => {
preserve_unreadable_corpus(path, &format!("parse failed: {e}"));
Self::new(target_fingerprint)
}
}
}
pub fn save_atomic(&self, path: &Path) -> std::io::Result<()> {
backup_before_overwrite(path);
let mut snap = self.clone();
snap.schema_version = CORPUS_SCHEMA_VERSION;
snap.last_saved_at_secs = current_epoch_secs();
let body = serde_json::to_vec_pretty(&snap)
.map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
wafrift_types::loaders::write_atomic(path, &body)
}
pub fn bucket_mut(&mut self, rule_id: &str) -> &mut RuleBucket {
self.buckets
.entry(rule_id.to_string())
.or_insert_with(|| RuleBucket {
rule_id: RuleId::new(rule_id),
..RuleBucket::default()
})
}
const MAX_BLOCKED_PER_BUCKET: usize = 512;
const MAX_BYPASSED_PER_BUCKET: usize = 4096;
pub fn record_block(
&mut self,
rule_id: &str,
payload: &str,
payload_class: PayloadClass,
encoding_chain: Vec<String>,
response_hash: u64,
) {
let entry = RecordedAttempt {
payload: payload.to_string(),
payload_class,
encoding_chain,
response_hash,
observed_at_secs: current_epoch_secs(),
};
let bucket = self.bucket_mut(rule_id);
if bucket.blocked.len() >= Self::MAX_BLOCKED_PER_BUCKET {
return;
}
if !bucket
.blocked
.iter()
.any(|a| a.response_hash == entry.response_hash && a.payload == entry.payload)
{
bucket.blocked.push(entry);
}
}
pub fn record_bypass(
&mut self,
rule_id: &str,
payload: &str,
payload_class: PayloadClass,
encoding_chain: Vec<String>,
response_hash: u64,
) {
let entry = RecordedBypass {
payload: payload.to_string(),
payload_class,
encoding_chain,
response_hash,
observed_at_secs: current_epoch_secs(),
submission: SubmissionStatus::Queued,
delivery: String::new(),
};
let bucket = self.bucket_mut(rule_id);
if bucket.bypassed.len() >= Self::MAX_BYPASSED_PER_BUCKET {
return;
}
if !bucket
.bypassed
.iter()
.any(|b| b.response_hash == entry.response_hash && b.payload == entry.payload)
{
bucket.bypassed.push(entry);
}
}
pub fn mark_drift(&mut self, rule_id: &str) {
let bucket = self.bucket_mut(rule_id);
bucket.last_drift_at_secs = Some(current_epoch_secs());
}
pub fn set_submission(
&mut self,
rule_id: &str,
payload: &str,
new_status: SubmissionStatus,
) -> bool {
if let Some(bucket) = self.buckets.get_mut(rule_id)
&& let Some(b) = bucket.bypassed.iter_mut().find(|b| b.payload == payload)
{
b.submission = new_status;
return true;
}
false
}
pub fn set_delivery(&mut self, rule_id: &str, payload: &str, delivery: String) -> bool {
if delivery.is_empty() {
return false;
}
if let Some(bucket) = self.buckets.get_mut(rule_id)
&& let Some(b) = bucket.bypassed.iter_mut().find(|b| b.payload == payload)
{
b.delivery = delivery;
return true;
}
false
}
#[must_use]
pub fn unexplored_rules(&self, min_attempts: usize) -> Vec<String> {
self.buckets
.iter()
.filter(|(_, b)| b.blocked.len() < min_attempts && b.bypassed.is_empty())
.map(|(k, _)| k.clone())
.collect()
}
#[must_use]
pub fn rules_due_for_retry(&self, window_secs: u64) -> Vec<String> {
let now = current_epoch_secs();
self.buckets
.iter()
.filter(|(_, b)| {
b.last_drift_at_secs
.is_some_and(|d| now.saturating_sub(d) <= window_secs)
&& !b.blocked.is_empty()
})
.map(|(k, _)| k.clone())
.collect()
}
#[must_use]
pub fn bypasses_for_rule(&self, rule_id: &str) -> &[RecordedBypass] {
self.buckets
.get(rule_id)
.map(|b| b.bypassed.as_slice())
.unwrap_or(&[])
}
#[must_use]
pub fn blocked_for_rule(&self, rule_id: &str) -> &[RecordedAttempt] {
self.buckets
.get(rule_id)
.map(|b| b.blocked.as_slice())
.unwrap_or(&[])
}
#[must_use]
pub fn novel_bypasses_pending_submission(
&self,
default_dry_run_secs: u64,
) -> Vec<(&str, &RecordedBypass)> {
let now = current_epoch_secs();
let mut out = vec![];
for (rule_id, bucket) in &self.buckets {
for b in &bucket.bypassed {
let ready = match &b.submission {
SubmissionStatus::Queued => {
now.saturating_sub(b.observed_at_secs) >= default_dry_run_secs
}
SubmissionStatus::DryRunHold { release_at_secs } => now >= *release_at_secs,
_ => false,
};
if ready {
out.push((rule_id.as_str(), b));
}
}
}
out
}
#[must_use]
pub fn total_bypasses(&self) -> usize {
self.buckets.values().map(|b| b.bypassed.len()).sum()
}
#[must_use]
pub fn total_blocks(&self) -> usize {
self.buckets.values().map(|b| b.blocked.len()).sum()
}
#[must_use]
pub fn rules_seen(&self) -> usize {
self.buckets.len()
}
#[must_use]
pub fn summary(&self) -> CoverageSummary {
let mut per_class: BTreeMap<String, ClassStats> = BTreeMap::new();
for bucket in self.buckets.values() {
for b in &bucket.blocked {
let entry = per_class
.entry(b.payload_class.as_str().to_string())
.or_default();
entry.blocks += 1;
}
for b in &bucket.bypassed {
let entry = per_class
.entry(b.payload_class.as_str().to_string())
.or_default();
entry.bypasses += 1;
}
}
CoverageSummary {
target_fingerprint: self.target_fingerprint.clone(),
rules_seen: self.rules_seen(),
total_blocks: self.total_blocks(),
total_bypasses: self.total_bypasses(),
per_class,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ClassStats {
pub blocks: usize,
pub bypasses: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CoverageSummary {
pub target_fingerprint: String,
pub rules_seen: usize,
pub total_blocks: usize,
pub total_bypasses: usize,
pub per_class: BTreeMap<String, ClassStats>,
}
#[must_use]
pub fn default_corpus_path(target_fingerprint: &str) -> PathBuf {
let safe = sanitize_fingerprint_for_filename(target_fingerprint);
if let Some(home) = dirs_home() {
return home
.join(".wafrift")
.join("corpus")
.join(format!("{safe}.json"));
}
PathBuf::from("wafrift-bench/results/corpus").join(format!("{safe}.json"))
}
fn sanitize_fingerprint_for_filename(fp: &str) -> String {
fp.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
c
} else {
'_'
}
})
.collect()
}
fn current_epoch_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
fn preserve_unreadable_corpus(path: &Path, reason: &str) {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
let mut aside = path.as_os_str().to_owned();
aside.push(format!(
".corrupt-{}-{}-{}",
current_epoch_secs(),
std::process::id(),
nanos
));
let aside = PathBuf::from(aside);
match std::fs::rename(path, &aside) {
Ok(()) => eprintln!(
"wafrift: WARNING — corpus at {} could not be loaded ({reason}). \
Your data was PRESERVED at {} and a fresh corpus was started. \
Rename it back once the cause is addressed.",
path.display(),
aside.display(),
),
Err(e) => eprintln!(
"wafrift: ERROR — corpus at {} could not be loaded ({reason}) AND \
could not be moved aside ({e}). Back this file up MANUALLY before \
the next run — a save may otherwise overwrite it.",
path.display(),
),
}
}
fn backup_before_overwrite(path: &Path) {
match std::fs::metadata(path) {
Ok(meta) if meta.len() > 0 => {
let mut bak = path.as_os_str().to_owned();
bak.push(".bak");
let _ = std::fs::copy(path, PathBuf::from(bak));
}
_ => {}
}
}
fn dirs_home() -> Option<PathBuf> {
if let Ok(h) = std::env::var("HOME")
&& !h.is_empty()
{
return Some(PathBuf::from(h));
}
if let Ok(h) = std::env::var("USERPROFILE")
&& !h.is_empty()
{
return Some(PathBuf::from(h));
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
fn cls(s: &str) -> PayloadClass {
PayloadClass::new(s)
}
#[test]
fn new_corpus_is_empty() {
let c = RuleBypassCorpus::new("cf:managed-ruleset:cumulusfire.cloudflare.com");
assert_eq!(c.rules_seen(), 0);
assert_eq!(c.total_blocks(), 0);
assert_eq!(c.total_bypasses(), 0);
assert_eq!(
c.target_fingerprint,
"cf:managed-ruleset:cumulusfire.cloudflare.com"
);
assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
}
#[test]
fn record_block_dedups_by_payload_and_hash() {
let mut c = RuleBypassCorpus::new("t");
c.record_block(
"942100",
"' OR 1=1--",
cls("sql"),
vec!["url".into()],
0xCAFE,
);
c.record_block(
"942100",
"' OR 1=1--",
cls("sql"),
vec!["url".into()],
0xCAFE,
);
c.record_block(
"942100",
"' OR 1=1--",
cls("sql"),
vec!["url".into()],
0xCAFE,
);
assert_eq!(c.blocked_for_rule("942100").len(), 1);
}
#[test]
fn record_block_keeps_distinct_payloads_per_rule() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("942100", "' OR 1=1--", cls("sql"), vec![], 1);
c.record_block("942100", "UNION SELECT 1", cls("sql"), vec![], 2);
c.record_block("942100", "1' AND 1=1--", cls("sql"), vec![], 3);
assert_eq!(c.blocked_for_rule("942100").len(), 3);
}
#[test]
fn record_block_caps_blocked_per_bucket() {
let mut c = RuleBypassCorpus::new("t");
let over = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 200;
for i in 0..over {
c.record_block("r", &format!("p{i}"), cls("sql"), vec![], i as u64);
}
assert_eq!(
c.blocked_for_rule("r").len(),
RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET,
"blocked must be capped per bucket"
);
let n_bypass = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 50;
for i in 0..n_bypass {
c.record_bypass(
"r",
&format!("b{i}"),
cls("sql"),
vec![],
1_000_000 + i as u64,
);
}
assert_eq!(
c.total_bypasses(),
n_bypass,
"bypasses under MAX_BYPASSED_PER_BUCKET must all persist"
);
}
#[test]
fn record_bypass_caps_bypassed_per_bucket() {
let mut c = RuleBypassCorpus::new("t");
let over = RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET + 500;
for i in 0..over {
c.record_bypass("r", &format!("b{i}"), cls("sql"), vec![], i as u64);
}
assert_eq!(
c.bypasses_for_rule("r").len(),
RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET,
"bypassed must be capped at MAX_BYPASSED_PER_BUCKET"
);
}
#[test]
fn load_or_default_heals_pre_cap_oversized_blocked() {
use std::env::temp_dir;
let mut c = RuleBypassCorpus::new("heal-test");
let over = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 300;
let blocked: Vec<RecordedAttempt> = (0..over)
.map(|i| RecordedAttempt {
payload: format!("p{i}"),
payload_class: cls("sql"),
encoding_chain: vec![],
response_hash: i as u64,
observed_at_secs: 0,
})
.collect();
c.buckets.insert(
"r".to_string(),
RuleBucket {
blocked,
..RuleBucket::default()
},
);
c.record_bypass("r", "winner", cls("sql"), vec![], 42);
let path = temp_dir().join(format!("wafrift-corpus-heal-{}.json", std::process::id()));
let _ = std::fs::remove_file(&path);
c.save_atomic(&path).expect("save oversized corpus");
let healed = RuleBypassCorpus::load_or_default(&path, "heal-test");
assert_eq!(
healed.blocked_for_rule("r").len(),
RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET,
"load must truncate over-cap blocked to reclaim the bloat"
);
assert_eq!(healed.total_bypasses(), 1, "bypasses survive the heal");
let _ = std::fs::remove_file(&path);
}
#[test]
fn load_or_default_heals_pre_cap_oversized_bypassed() {
use std::env::temp_dir;
let mut c = RuleBypassCorpus::new("bypass-heal-test");
let over = RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET + 200;
let bypassed: Vec<RecordedBypass> = (0..over)
.map(|i| RecordedBypass {
payload: format!("b{i}"),
payload_class: cls("sql"),
encoding_chain: vec![],
response_hash: i as u64,
observed_at_secs: 0,
submission: SubmissionStatus::Queued,
delivery: String::new(),
})
.collect();
c.buckets.insert(
"r".to_string(),
RuleBucket {
bypassed,
..RuleBucket::default()
},
);
c.record_block("r", "blocker", cls("sql"), vec![], 1);
let path = temp_dir().join(format!(
"wafrift-corpus-bypass-heal-{}.json",
std::process::id()
));
let _ = std::fs::remove_file(&path);
c.save_atomic(&path).expect("save oversized bypass corpus");
let healed = RuleBypassCorpus::load_or_default(&path, "bypass-heal-test");
assert_eq!(
healed.bypasses_for_rule("r").len(),
RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET,
"load must truncate over-cap bypassed to MAX_BYPASSED_PER_BUCKET"
);
assert_eq!(healed.total_blocks(), 1, "blocked entries survive the heal");
let _ = std::fs::remove_file(&path);
}
#[test]
fn record_bypass_dedups() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("942100", "Ω union select", cls("sql"), vec![], 1);
c.record_bypass("942100", "Ω union select", cls("sql"), vec![], 1);
assert_eq!(c.bypasses_for_rule("942100").len(), 1);
}
#[test]
fn record_bypass_default_status_is_queued() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("942100", "payload", cls("sql"), vec![], 1);
let b = &c.bypasses_for_rule("942100")[0];
assert!(matches!(b.submission, SubmissionStatus::Queued));
}
#[test]
fn set_submission_updates_lifecycle() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("942100", "payload", cls("sql"), vec![], 1);
let ok = c.set_submission(
"942100",
"payload",
SubmissionStatus::Submitted {
report_id: "H1-12345".into(),
},
);
assert!(ok);
let b = &c.bypasses_for_rule("942100")[0];
assert!(matches!(
&b.submission,
SubmissionStatus::Submitted { report_id } if report_id == "H1-12345"
));
}
#[test]
fn set_submission_missing_returns_false() {
let mut c = RuleBypassCorpus::new("t");
let ok = c.set_submission(
"doesnt-exist",
"payload",
SubmissionStatus::Accepted {
report_id: "X".into(),
},
);
assert!(!ok);
}
#[test]
fn record_bypass_default_delivery_is_empty() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
assert_eq!(c.bypasses_for_rule("R1")[0].delivery, "");
}
#[test]
fn set_delivery_attaches_shape_to_recorded_bypass() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
let ok = c.set_delivery("R1", "p", "{\"Query\":{\"param\":\"q\"}}".into());
assert!(ok);
assert_eq!(
c.bypasses_for_rule("R1")[0].delivery,
"{\"Query\":{\"param\":\"q\"}}"
);
}
#[test]
fn set_delivery_missing_bypass_returns_false() {
let mut c = RuleBypassCorpus::new("t");
assert!(!c.set_delivery("nope", "p", "{\"PathSegment\":null}".into()));
}
#[test]
fn set_delivery_ignores_empty_string() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
assert!(c.set_delivery("R1", "p", "\"PathSegment\"".into()));
assert!(!c.set_delivery("R1", "p", String::new()));
assert_eq!(c.bypasses_for_rule("R1")[0].delivery, "\"PathSegment\"");
}
#[test]
fn delivery_round_trips_through_save_load() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("cf:mr:cumulus");
c.record_bypass("942100", "1 OR 1=1 --", cls("sql"), vec![], 9);
c.set_delivery(
"942100",
"1 OR 1=1 --",
"{\"HppSplit\":{\"param\":\"q\",\"parts\":3}}".into(),
);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(
r.bypasses_for_rule("942100")[0].delivery,
"{\"HppSplit\":{\"param\":\"q\",\"parts\":3}}"
);
}
#[test]
fn delivery_defaults_empty_for_corpus_without_the_field() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "old", cls("sql"), vec![], 1);
let mut v: serde_json::Value =
serde_json::from_str(&serde_json::to_string(&c).unwrap()).unwrap();
for bucket in v["buckets"].as_object_mut().unwrap().values_mut() {
for bp in bucket["bypassed"].as_array_mut().unwrap() {
assert!(
bp.as_object_mut().unwrap().remove("delivery").is_some(),
"serialization must include the delivery key to strip"
);
}
}
let dir = tempdir().expect("tempdir");
let path = dir.path().join("old.json");
std::fs::write(&path, serde_json::to_string(&v).unwrap()).expect("write");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
let b = &r.bypasses_for_rule("R1")[0];
assert_eq!(b.payload, "old");
assert_eq!(b.delivery, "", "missing delivery must default to empty");
}
#[test]
fn unexplored_rules_skips_ones_with_bypass() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("R1", "p1", cls("sql"), vec![], 1);
c.record_bypass("R2", "p2", cls("sql"), vec![], 2);
let unexplored = c.unexplored_rules(3);
assert!(unexplored.contains(&"R1".to_string()));
assert!(!unexplored.contains(&"R2".to_string()));
}
#[test]
fn rules_due_for_retry_respects_window() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("R1", "p", cls("sql"), vec![], 1);
c.mark_drift("R1");
let due = c.rules_due_for_retry(60);
assert_eq!(due, vec!["R1".to_string()]);
}
#[test]
fn rules_due_for_retry_skips_rules_with_no_blocks() {
let mut c = RuleBypassCorpus::new("t");
c.mark_drift("R1");
assert!(c.rules_due_for_retry(60).is_empty());
}
#[test]
fn total_counts_aggregate_across_rules() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("R1", "p1", cls("sql"), vec![], 1);
c.record_block("R2", "p2", cls("xss"), vec![], 2);
c.record_bypass("R1", "p3", cls("sql"), vec![], 3);
assert_eq!(c.total_blocks(), 2);
assert_eq!(c.total_bypasses(), 1);
assert_eq!(c.rules_seen(), 2);
}
#[test]
fn summary_breaks_down_by_class() {
let mut c = RuleBypassCorpus::new("cf:mr:foo");
c.record_block("R1", "p1", cls("sql"), vec![], 1);
c.record_block("R1", "p2", cls("sql"), vec![], 2);
c.record_block("R2", "p3", cls("xss"), vec![], 3);
c.record_bypass("R1", "p4", cls("sql"), vec![], 4);
let s = c.summary();
assert_eq!(s.target_fingerprint, "cf:mr:foo");
assert_eq!(s.rules_seen, 2);
assert_eq!(s.total_blocks, 3);
assert_eq!(s.total_bypasses, 1);
let sql_stats = s.per_class.get("sql").unwrap();
assert_eq!(sql_stats.blocks, 2);
assert_eq!(sql_stats.bypasses, 1);
let xss_stats = s.per_class.get("xss").unwrap();
assert_eq!(xss_stats.blocks, 1);
assert_eq!(xss_stats.bypasses, 0);
}
#[test]
fn save_load_round_trip() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("corpus.json");
let mut c = RuleBypassCorpus::new("cf:mr:cumulus");
c.record_block("942100", "payload-1", cls("sql"), vec!["url".into()], 1);
c.record_bypass(
"942100",
"payload-2",
cls("sql"),
vec!["unicode".into(), "case".into()],
2,
);
c.save_atomic(&path).expect("save");
let reloaded = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(reloaded.target_fingerprint, "cf:mr:cumulus");
assert_eq!(reloaded.rules_seen(), 1);
assert_eq!(reloaded.total_blocks(), 1);
assert_eq!(reloaded.total_bypasses(), 1);
let bp = &reloaded.bypasses_for_rule("942100")[0];
assert_eq!(bp.payload, "payload-2");
assert_eq!(
bp.encoding_chain,
vec!["unicode".to_string(), "case".to_string()]
);
}
#[test]
fn load_missing_file_returns_default() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("nope.json");
let c = RuleBypassCorpus::load_or_default(&path, "cf:mr:x");
assert_eq!(c.target_fingerprint, "cf:mr:x");
assert_eq!(c.rules_seen(), 0);
}
#[test]
fn load_corrupted_file_preserves_original_then_defaults() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("trash.json");
let original = b"{not valid json !!! but represents 500 lost bypasses";
std::fs::write(&path, original).expect("write");
let c = RuleBypassCorpus::load_or_default(&path, "fallback");
assert_eq!(c.target_fingerprint, "fallback");
assert_eq!(c.rules_seen(), 0);
assert!(!path.exists(), "the unparseable file must be moved aside");
let aside: Vec<_> = std::fs::read_dir(dir.path())
.unwrap()
.filter_map(Result::ok)
.filter(|e| {
e.file_name()
.to_string_lossy()
.contains("trash.json.corrupt-")
})
.collect();
assert_eq!(aside.len(), 1, "exactly one preserved sidecar must exist");
let preserved = std::fs::read(aside[0].path()).expect("read sidecar");
assert_eq!(
preserved, original,
"preserved bytes must be byte-identical"
);
}
#[test]
fn load_empty_file_returns_default_without_preserving() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("empty.json");
std::fs::write(&path, b"").expect("write");
let c = RuleBypassCorpus::load_or_default(&path, "fallback");
assert_eq!(c.target_fingerprint, "fallback");
let has_sidecar = std::fs::read_dir(dir.path())
.unwrap()
.filter_map(Result::ok)
.any(|e| e.file_name().to_string_lossy().contains(".corrupt-"));
assert!(!has_sidecar, "empty file must not spawn a preserve sidecar");
}
#[test]
fn save_atomic_backs_up_prior_corpus_before_overwrite() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("corpus.json");
let mut a = RuleBypassCorpus::new("cf:mr:cumulus");
a.record_bypass("942100", "winner-A", cls("xss"), vec![], 1);
a.save_atomic(&path).expect("save A");
let empty = RuleBypassCorpus::new("cf:mr:cumulus");
empty.save_atomic(&path).expect("save empty over A");
let bak = dir.path().join("corpus.json.bak");
assert!(
bak.exists(),
"a .bak snapshot of the prior corpus must exist"
);
let recovered = RuleBypassCorpus::load_or_default(&bak, "ignored");
assert_eq!(
recovered.total_bypasses(),
1,
"the prior bypass must be recoverable from the .bak snapshot"
);
assert_eq!(recovered.bypasses_for_rule("942100")[0].payload, "winner-A");
}
#[test]
fn corrupt_then_save_does_not_destroy_preserved_bypasses() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("corpus.json");
let mut real = RuleBypassCorpus::new("cf:mr:cumulus");
for i in 0..50 {
real.record_bypass("942100", &format!("bypass-{i}"), cls("xss"), vec![], i);
}
let real_bytes = serde_json::to_vec_pretty(&real).unwrap();
let mut corrupt = real_bytes.clone();
corrupt.truncate(corrupt.len() / 2);
std::fs::write(&path, &corrupt).expect("write corrupt");
let fresh = RuleBypassCorpus::load_or_default(&path, "cf:mr:cumulus");
assert_eq!(fresh.total_bypasses(), 0);
fresh.save_atomic(&path).expect("save fresh");
let aside: Vec<_> = std::fs::read_dir(dir.path())
.unwrap()
.filter_map(Result::ok)
.filter(|e| e.file_name().to_string_lossy().contains(".corrupt-"))
.collect();
assert_eq!(aside.len(), 1, "corrupt bytes must be preserved aside");
assert_eq!(
std::fs::read(aside[0].path()).unwrap(),
corrupt,
"preserved sidecar must hold the exact corrupt bytes for manual recovery"
);
}
#[test]
fn save_creates_parent_directory() {
let dir = tempdir().expect("tempdir");
let nested = dir.path().join("deep/nested/path/corpus.json");
let c = RuleBypassCorpus::new("t");
c.save_atomic(&nested).expect("save creates parents");
assert!(nested.exists());
}
#[test]
fn save_atomic_no_torn_write_on_existing_file() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("corpus.json");
std::fs::write(&path, b"prior-garbage-bytes").expect("seed");
let c = RuleBypassCorpus::new("cf:mr:t");
c.save_atomic(&path).expect("save");
let bytes = std::fs::read(&path).expect("read");
assert!(
!std::str::from_utf8(&bytes)
.unwrap()
.contains("prior-garbage")
);
}
#[test]
fn novel_bypasses_pending_submission_honors_dry_run() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "fresh", cls("sql"), vec![], 1);
let pending = c.novel_bypasses_pending_submission(86400);
assert!(pending.is_empty(), "fresh bypass should not be pending");
let pending = c.novel_bypasses_pending_submission(0);
assert_eq!(pending.len(), 1);
assert_eq!(pending[0].0, "R1");
}
#[test]
fn novel_bypasses_pending_submission_skips_already_submitted() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
c.set_submission(
"R1",
"p",
SubmissionStatus::Submitted {
report_id: "H1-X".into(),
},
);
let pending = c.novel_bypasses_pending_submission(0);
assert!(
pending.is_empty(),
"Submitted bypass should not appear pending"
);
}
#[test]
fn novel_bypasses_pending_submission_honors_explicit_hold() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
let future = current_epoch_secs() + 3600;
c.set_submission(
"R1",
"p",
SubmissionStatus::DryRunHold {
release_at_secs: future,
},
);
let pending = c.novel_bypasses_pending_submission(0);
assert!(pending.is_empty(), "explicit DryRunHold must be honored");
}
#[test]
fn schema_version_normalized_on_load() {
let raw = r#"{"target_fingerprint":"t","buckets":{}}"#;
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
std::fs::write(&path, raw).expect("write");
let c = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
}
#[test]
fn sanitize_fingerprint_strips_path_separators() {
assert_eq!(
sanitize_fingerprint_for_filename("cf:managed-ruleset:host/foo"),
"cf_managed-ruleset_host_foo"
);
assert_eq!(
sanitize_fingerprint_for_filename("..\\..\\evil"),
"______evil"
);
}
#[test]
fn sanitize_fingerprint_preserves_safe_chars() {
assert_eq!(
sanitize_fingerprint_for_filename("cf-managed_ruleset_v1"),
"cf-managed_ruleset_v1"
);
assert_eq!(
sanitize_fingerprint_for_filename("cf-managed.ruleset_v1"),
"cf-managed_ruleset_v1"
);
}
#[test]
fn default_corpus_path_uses_fingerprint() {
let p = default_corpus_path("cf:mr:x.com");
let s = p.to_string_lossy();
assert!(s.contains("cf_mr_x_com"));
assert!(s.ends_with(".json"));
}
#[test]
fn determinism_serialization_btree_order() {
let mut c = RuleBypassCorpus::new("t");
for i in (0..50).rev() {
c.record_block(
&format!("R{i}"),
&format!("p{i}"),
cls("sql"),
vec![],
i as u64,
);
}
let a = serde_json::to_string(&c).unwrap();
let b = serde_json::to_string(&c).unwrap();
assert_eq!(a, b);
}
#[test]
fn description_field_persists() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("942100", "p", cls("sql"), vec![], 1);
c.bucket_mut("942100").description = Some("SQL injection — OWASP CRS 942100".into());
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
let desc = r
.buckets
.get("942100")
.and_then(|b| b.description.as_deref());
assert_eq!(desc, Some("SQL injection — OWASP CRS 942100"));
}
#[test]
fn mark_drift_updates_timestamp() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("R1", "p", cls("sql"), vec![], 1);
c.mark_drift("R1");
let t1 = c.buckets["R1"].last_drift_at_secs.unwrap();
std::thread::sleep(std::time::Duration::from_millis(1100));
c.mark_drift("R1");
let t2 = c.buckets["R1"].last_drift_at_secs.unwrap();
assert!(t2 >= t1);
}
#[test]
fn adversarial_large_chain_no_panic() {
let big_chain: Vec<String> = (0..1000).map(|i| format!("technique-{i}")).collect();
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), big_chain.clone(), 1);
assert_eq!(c.bypasses_for_rule("R1")[0].encoding_chain.len(), 1000);
}
#[test]
fn adversarial_huge_payload_no_panic() {
let big = "A".repeat(1_000_000);
let mut c = RuleBypassCorpus::new("t");
c.record_block("R1", &big, cls("sql"), vec![], 1);
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(r.blocked_for_rule("R1").len(), 1);
assert_eq!(r.blocked_for_rule("R1")[0].payload.len(), 1_000_000);
}
#[test]
fn unicode_in_payload_round_trips() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass(
"R1",
"SELECT Ω 中文 \u{200B} \u{E0041}",
cls("sql"),
vec![],
1,
);
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
let b = &r.bypasses_for_rule("R1")[0];
assert!(b.payload.contains("SELECT"));
assert!(b.payload.contains("中文"));
assert!(b.payload.contains('\u{200B}'));
assert!(b.payload.contains('\u{E0041}'));
}
#[test]
fn dedup_distinguishes_different_response_hashes() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("R1", "p", cls("sql"), vec![], 1);
c.record_block("R1", "p", cls("sql"), vec![], 2); assert_eq!(c.blocked_for_rule("R1").len(), 2);
}
fn corrupt_sidecars(dir: &Path, base: &str) -> Vec<PathBuf> {
std::fs::read_dir(dir)
.unwrap()
.filter_map(Result::ok)
.filter(|e| {
let name = e.file_name().to_string_lossy().into_owned();
name.starts_with(base) && name.contains(".corrupt-")
})
.map(|e| e.path())
.collect()
}
fn assert_preserved_fresh(
dir: &Path,
path: &Path,
base: &str,
original: &[u8],
fingerprint: &str,
) {
let c = RuleBypassCorpus::load_or_default(path, fingerprint);
assert_eq!(
c.target_fingerprint, fingerprint,
"fresh corpus uses fallback fp"
);
assert_eq!(c.rules_seen(), 0, "returned corpus must be fresh/empty");
assert_eq!(c.total_bypasses(), 0);
assert_eq!(c.total_blocks(), 0);
assert!(
!path.exists(),
"the unreadable original must be moved aside"
);
let aside = corrupt_sidecars(dir, base);
assert_eq!(aside.len(), 1, "exactly one preserved sidecar must exist");
let preserved = std::fs::read(&aside[0]).expect("read sidecar");
assert_eq!(
preserved, original,
"preserved sidecar bytes must be byte-identical to the original"
);
}
#[test]
fn preserve_non_utf8_file_byte_identical() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("nonutf8.json");
let original: &[u8] = &[0x7B, 0xFF, 0xFE, 0x80, 0xC0, 0x22, 0x6B, 0x65, 0x79];
std::fs::write(&path, original).expect("write");
assert_preserved_fresh(dir.path(), &path, "nonutf8.json", original, "fb");
}
#[test]
fn preserve_truncated_mid_json_byte_identical() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("trunc.json");
let original = br#"{"schema_version":1,"target_fingerprint":"cf:mr:x","buckets":{"942100":{"rule_id":{"#;
std::fs::write(&path, original).expect("write");
assert_preserved_fresh(dir.path(), &path, "trunc.json", original, "fb");
}
#[test]
fn preserve_lone_open_brace_byte_identical() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("brace.json");
let original = b"{";
std::fs::write(&path, original).expect("write");
assert_preserved_fresh(dir.path(), &path, "brace.json", original, "fb");
}
#[test]
fn preserve_valid_json_wrong_schema_byte_identical() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("wrongschema.json");
let original = br#"{"completely":"different","shape":[1,2,3],"nested":{"a":true}}"#;
std::fs::write(&path, original).expect("write");
assert_preserved_fresh(dir.path(), &path, "wrongschema.json", original, "fb");
}
#[test]
fn preserve_json_array_instead_of_object_byte_identical() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("arr.json");
let original = br#"["this","is","not","a","corpus"]"#;
std::fs::write(&path, original).expect("write");
assert_preserved_fresh(dir.path(), &path, "arr.json", original, "fb");
}
#[test]
fn preserve_garbage_text_byte_identical() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("garbage.json");
let original = b"this is not json at all -- 500 lost bypasses live here\n\x01\x02";
std::fs::write(&path, original).expect("write");
assert_preserved_fresh(dir.path(), &path, "garbage.json", original, "fb");
}
#[test]
fn preserve_moves_aside_on_every_corruption_event() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("multi.json");
let first = b"FIRST corrupt corpus bytes !!!";
std::fs::write(&path, first).expect("write 1");
let c1 = RuleBypassCorpus::load_or_default(&path, "fb");
assert_eq!(c1.rules_seen(), 0, "fresh corpus after first corruption");
assert!(
!path.exists(),
"original moved aside after first corruption"
);
let second = b"SECOND corrupt corpus bytes ???";
std::fs::write(&path, second).expect("write 2");
let c2 = RuleBypassCorpus::load_or_default(&path, "fb");
assert_eq!(c2.rules_seen(), 0, "fresh corpus after second corruption");
assert!(
!path.exists(),
"original moved aside after second corruption"
);
let bytes: Vec<Vec<u8>> = corrupt_sidecars(dir.path(), "multi.json")
.iter()
.map(|p| std::fs::read(p).unwrap())
.collect();
assert!(
bytes.iter().any(|b| b.as_slice() == second.as_slice()),
"latest corruption's exact bytes must be preserved aside"
);
}
#[test]
fn whitespace_only_file_is_fresh_no_sidecar() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("ws.json");
std::fs::write(&path, b" \n\t \r\n ").expect("write");
let c = RuleBypassCorpus::load_or_default(&path, "fb");
assert_eq!(c.target_fingerprint, "fb");
assert_eq!(c.rules_seen(), 0);
assert!(
corrupt_sidecars(dir.path(), "ws.json").is_empty(),
"whitespace-only file must NOT spawn a preserve sidecar"
);
assert!(path.exists(), "whitespace file is not moved aside");
}
#[test]
fn empty_file_leaves_no_sidecar_and_returns_fresh() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("zero.json");
std::fs::write(&path, b"").expect("write");
let c = RuleBypassCorpus::load_or_default(&path, "fb");
assert_eq!(c.rules_seen(), 0);
assert!(corrupt_sidecars(dir.path(), "zero.json").is_empty());
}
#[test]
fn bak_recovers_first_corpus_after_empty_second_save() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut first = RuleBypassCorpus::new("cf:mr:cumulus");
first.record_bypass("942100", "winner-A", cls("xss"), vec!["b64".into()], 7);
first.record_bypass("942100", "winner-B", cls("sql"), vec![], 8);
first.record_block("942100", "blk", cls("sql"), vec![], 9);
first.save_atomic(&path).expect("save first");
let empty = RuleBypassCorpus::new("cf:mr:cumulus");
empty.save_atomic(&path).expect("save empty");
let bak = dir.path().join("c.json.bak");
assert!(
bak.exists(),
".bak must exist after overwriting a non-empty corpus"
);
let recovered = RuleBypassCorpus::load_or_default(&bak, "ignored");
assert_eq!(
recovered.total_bypasses(),
2,
"both prior bypasses recoverable"
);
assert_eq!(recovered.total_blocks(), 1, "prior block recoverable");
let payloads: Vec<_> = recovered
.bypasses_for_rule("942100")
.iter()
.map(|b| b.payload.clone())
.collect();
assert_eq!(
payloads,
vec!["winner-A".to_string(), "winner-B".to_string()]
);
assert_eq!(
recovered.bypasses_for_rule("942100")[0].encoding_chain,
vec!["b64".to_string()]
);
}
#[test]
fn bak_skipped_when_no_prior_file() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
c.save_atomic(&path).expect("save");
assert!(
!dir.path().join("c.json.bak").exists(),
"no .bak on the first save (no prior file)"
);
}
#[test]
fn bak_skipped_when_prior_file_empty() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
std::fs::write(&path, b"").expect("seed empty");
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
c.save_atomic(&path).expect("save over empty");
assert!(
!dir.path().join("c.json.bak").exists(),
"empty prior file must not be backed up"
);
}
#[test]
fn bak_holds_exact_prior_bytes() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut first = RuleBypassCorpus::new("cf:mr:x");
first.record_bypass("R1", "p", cls("sql"), vec![], 1);
first.save_atomic(&path).expect("save first");
let prior_bytes = std::fs::read(&path).expect("read prior");
let mut second = RuleBypassCorpus::new("cf:mr:x");
second.record_bypass("R2", "q", cls("xss"), vec![], 2);
second.save_atomic(&path).expect("save second");
let bak_bytes = std::fs::read(dir.path().join("c.json.bak")).expect("read bak");
assert_eq!(
bak_bytes, prior_bytes,
".bak must be a byte-exact snapshot of the prior file"
);
}
#[test]
fn bak_round_trips_then_main_continues() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut good = RuleBypassCorpus::new("cf:mr:x");
good.record_bypass("R1", "keep-me", cls("sql"), vec![], 1);
good.save_atomic(&path).expect("save good");
RuleBypassCorpus::new("cf:mr:x")
.save_atomic(&path)
.expect("save empty");
let bak = dir.path().join("c.json.bak");
let recovered = RuleBypassCorpus::load_or_default(&bak, "x");
recovered.save_atomic(&path).expect("restore");
let reloaded = RuleBypassCorpus::load_or_default(&path, "x");
assert_eq!(reloaded.bypasses_for_rule("R1").len(), 1);
assert_eq!(reloaded.bypasses_for_rule("R1")[0].payload, "keep-me");
}
#[test]
fn end_to_end_corpus_disappeared_non_utf8() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("corpus.json");
let mut real = RuleBypassCorpus::new("cf:mr:cumulus");
for i in 0..30 {
real.record_bypass("942100", &format!("bypass-{i}"), cls("xss"), vec![], i);
}
real.save_atomic(&path).expect("save real");
let corrupt: &[u8] = &[0x00, 0xFF, 0x80, 0x7B, 0xC3, 0x28, 0x42];
std::fs::write(&path, corrupt).expect("corrupt");
let fresh = RuleBypassCorpus::load_or_default(&path, "cf:mr:cumulus");
assert_eq!(fresh.total_bypasses(), 0);
fresh.save_atomic(&path).expect("save fresh empty");
let aside = corrupt_sidecars(dir.path(), "corpus.json");
assert_eq!(aside.len(), 1, "corrupt non-UTF8 bytes preserved aside");
assert_eq!(
std::fs::read(&aside[0]).unwrap(),
corrupt,
"sidecar holds the exact corrupt bytes"
);
}
#[test]
fn heal_truncates_blocked_but_keeps_all_bypasses() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("heal");
let over = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 100;
let blocked: Vec<RecordedAttempt> = (0..over)
.map(|i| RecordedAttempt {
payload: format!("blk{i}"),
payload_class: cls("sql"),
encoding_chain: vec![],
response_hash: i as u64,
observed_at_secs: 0,
})
.collect();
let bypassed: Vec<RecordedBypass> = (0..10)
.map(|i| RecordedBypass {
payload: format!("by{i}"),
payload_class: cls("xss"),
encoding_chain: vec![],
response_hash: 1_000 + i as u64,
observed_at_secs: 0,
submission: SubmissionStatus::Queued,
delivery: String::new(),
})
.collect();
c.buckets.insert(
"r".into(),
RuleBucket {
blocked,
bypassed,
..RuleBucket::default()
},
);
c.save_atomic(&path).expect("save");
let healed = RuleBypassCorpus::load_or_default(&path, "heal");
assert_eq!(
healed.blocked_for_rule("r").len(),
RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET
);
assert_eq!(healed.blocked_for_rule("r")[0].payload, "blk0");
assert_eq!(
healed.bypasses_for_rule("r").len(),
10,
"under-cap bypasses untouched"
);
assert_eq!(healed.bypasses_for_rule("r")[9].payload, "by9");
}
#[test]
fn heal_leaves_under_cap_bucket_untouched() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
for i in 0..5 {
c.record_block("r", &format!("b{i}"), cls("sql"), vec![], i);
c.record_bypass("r", &format!("p{i}"), cls("sql"), vec![], 100 + i);
}
c.save_atomic(&path).expect("save");
let healed = RuleBypassCorpus::load_or_default(&path, "t");
assert_eq!(healed.blocked_for_rule("r").len(), 5);
assert_eq!(healed.bypasses_for_rule("r").len(), 5);
assert_eq!(healed.blocked_for_rule("r")[4].payload, "b4");
assert_eq!(healed.bypasses_for_rule("r")[0].payload, "p0");
}
#[test]
fn heal_truncated_bypassed_keeps_blocked_and_prefix() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
let over = RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET + 17;
let bypassed: Vec<RecordedBypass> = (0..over)
.map(|i| RecordedBypass {
payload: format!("by{i}"),
payload_class: cls("sql"),
encoding_chain: vec![],
response_hash: i as u64,
observed_at_secs: 0,
submission: SubmissionStatus::Queued,
delivery: String::new(),
})
.collect();
c.buckets.insert(
"r".into(),
RuleBucket {
bypassed,
..RuleBucket::default()
},
);
c.bucket_mut("r").blocked.push(RecordedAttempt {
payload: "survivor".into(),
payload_class: cls("sql"),
encoding_chain: vec![],
response_hash: 9,
observed_at_secs: 0,
});
c.save_atomic(&path).expect("save");
let healed = RuleBypassCorpus::load_or_default(&path, "t");
assert_eq!(
healed.bypasses_for_rule("r").len(),
RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET
);
assert_eq!(
healed.bypasses_for_rule("r")[0].payload,
"by0",
"kept prefix"
);
assert_eq!(healed.blocked_for_rule("r").len(), 1);
assert_eq!(healed.blocked_for_rule("r")[0].payload, "survivor");
}
#[test]
fn schema_version_zero_normalized_to_current() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let raw = r#"{"schema_version":0,"target_fingerprint":"t","buckets":{}}"#;
std::fs::write(&path, raw).expect("write");
let c = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
assert_eq!(
c.target_fingerprint, "t",
"embedded fingerprint wins for valid file"
);
}
#[test]
fn schema_version_missing_normalized_to_current() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let raw = r#"{"target_fingerprint":"emb","buckets":{}}"#;
std::fs::write(&path, raw).expect("write");
let c = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
assert_eq!(c.target_fingerprint, "emb");
}
#[test]
fn valid_file_fingerprint_overrides_fallback() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("embedded-fp");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "fallback-should-be-ignored");
assert_eq!(r.target_fingerprint, "embedded-fp");
}
#[test]
fn old_corpus_loads_with_default_delivery_for_every_bypass() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("old.json");
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "a", cls("sql"), vec!["x".into()], 1);
c.record_bypass("R1", "b", cls("xss"), vec![], 2);
let mut v: serde_json::Value =
serde_json::from_str(&serde_json::to_string(&c).unwrap()).unwrap();
for bucket in v["buckets"].as_object_mut().unwrap().values_mut() {
for bp in bucket["bypassed"].as_array_mut().unwrap() {
bp.as_object_mut().unwrap().remove("delivery");
}
}
std::fs::write(&path, serde_json::to_string(&v).unwrap()).expect("write");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
let bps = r.bypasses_for_rule("R1");
assert_eq!(bps.len(), 2);
assert_eq!(bps[0].delivery, "");
assert_eq!(bps[1].delivery, "");
assert_eq!(bps[0].encoding_chain, vec!["x".to_string()]);
}
#[test]
fn set_delivery_overwrites_existing_shape_with_non_empty() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
assert!(c.set_delivery("R1", "p", "\"first\"".into()));
assert!(c.set_delivery("R1", "p", "\"second\"".into()));
assert_eq!(c.bypasses_for_rule("R1")[0].delivery, "\"second\"");
}
#[test]
fn set_delivery_empty_on_missing_bucket_returns_false() {
let mut c = RuleBypassCorpus::new("t");
assert!(!c.set_delivery("nope", "p", String::new()));
}
#[test]
fn set_submission_empty_corpus_returns_false() {
let mut c = RuleBypassCorpus::new("t");
assert!(!c.set_submission("R1", "p", SubmissionStatus::Queued));
}
#[test]
fn set_submission_bucket_exists_but_payload_absent_returns_false() {
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "present", cls("sql"), vec![], 1);
assert!(
!c.set_submission(
"R1",
"absent",
SubmissionStatus::Accepted {
report_id: "X".into()
}
),
"wrong payload in an existing bucket must not match"
);
assert!(matches!(
c.bypasses_for_rule("R1")[0].submission,
SubmissionStatus::Queued
));
}
#[test]
fn submission_status_round_trips_all_variants() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
let variants = [
("p0", SubmissionStatus::Queued),
(
"p1",
SubmissionStatus::DryRunHold {
release_at_secs: 1234,
},
),
(
"p2",
SubmissionStatus::Submitted {
report_id: "H1-1".into(),
},
),
(
"p3",
SubmissionStatus::Accepted {
report_id: "H1-2".into(),
},
),
(
"p4",
SubmissionStatus::Duplicate {
duplicate_of: "H1-3".into(),
},
),
(
"p5",
SubmissionStatus::Rejected {
reason: "informative".into(),
},
),
];
for (p, _) in &variants {
c.record_bypass("R1", p, cls("sql"), vec![], 0);
}
for (p, st) in &variants {
assert!(c.set_submission("R1", p, st.clone()));
}
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
let by_payload: BTreeMap<_, _> = r
.bypasses_for_rule("R1")
.iter()
.map(|b| (b.payload.clone(), b.submission.clone()))
.collect();
assert_eq!(
by_payload["p1"],
SubmissionStatus::DryRunHold {
release_at_secs: 1234
}
);
assert_eq!(
by_payload["p2"],
SubmissionStatus::Submitted {
report_id: "H1-1".into()
}
);
assert_eq!(
by_payload["p4"],
SubmissionStatus::Duplicate {
duplicate_of: "H1-3".into()
}
);
assert_eq!(
by_payload["p5"],
SubmissionStatus::Rejected {
reason: "informative".into()
}
);
}
#[test]
fn determinism_identical_serialization_after_save_load() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
for i in (0..40).rev() {
c.record_bypass(&format!("R{i:03}"), &format!("p{i}"), cls("sql"), vec![], i);
}
let s1 = serde_json::to_string(&c).unwrap();
let s2 = serde_json::to_string(&c).unwrap();
assert_eq!(s1, s2);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
let keys: Vec<_> = r.buckets.keys().cloned().collect();
let mut sorted = keys.clone();
sorted.sort();
assert_eq!(keys, sorted, "BTreeMap keys must iterate in sorted order");
}
#[test]
fn btreemap_order_independent_of_insertion_order() {
let mut a = RuleBypassCorpus::new("t");
let mut b = RuleBypassCorpus::new("t");
let ids = ["R5", "R1", "R9", "R3", "R7"];
for id in ids {
a.record_block(id, "p", cls("sql"), vec![], 1);
}
for id in ids.iter().rev() {
b.record_block(id, "p", cls("sql"), vec![], 1);
}
assert_eq!(
serde_json::to_string(&a).unwrap(),
serde_json::to_string(&b).unwrap(),
"BTreeMap makes serialization insertion-order-independent"
);
}
#[test]
fn unicode_payload_round_trips_with_exact_bytes() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let payload = "𝕊𝔼𝕃𝔼ℂ𝕋 ' OR 𝟙=𝟙 -- 中文 \u{200B}\u{FEFF}\u{1F4A9} emoji";
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", payload, cls("sql"), vec![], 1);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(
r.bypasses_for_rule("R1")[0].payload,
payload,
"unicode payload exact"
);
}
#[test]
fn one_mb_bypass_payload_round_trips_no_oom() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let big = "A".repeat(1_200_000);
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", &big, cls("sql"), vec![], 1);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(r.bypasses_for_rule("R1")[0].payload.len(), 1_200_000);
assert!(
r.bypasses_for_rule("R1")[0]
.payload
.bytes()
.all(|b| b == b'A')
);
}
#[test]
fn huge_encoding_chain_round_trips() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let chain: Vec<String> = (0..5000).map(|i| format!("t{i}")).collect();
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), chain.clone(), 1);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
let got = &r.bypasses_for_rule("R1")[0].encoding_chain;
assert_eq!(got.len(), 5000);
assert_eq!(got[0], "t0");
assert_eq!(got[4999], "t4999");
}
#[test]
fn dedup_bypass_by_response_hash_and_payload_property() {
let mut c = RuleBypassCorpus::new("t");
let inputs = [
("p", 1u64),
("q", 1),
("p", 2),
("p", 1),
("q", 1),
("p", 2),
("p", 2),
];
for (p, h) in inputs {
c.record_bypass("R1", p, cls("sql"), vec![], h);
}
assert_eq!(
c.bypasses_for_rule("R1").len(),
3,
"only distinct (hash,payload) survive"
);
let pairs: std::collections::BTreeSet<(String, u64)> = c
.bypasses_for_rule("R1")
.iter()
.map(|b| (b.payload.clone(), b.response_hash))
.collect();
assert!(pairs.contains(&("p".to_string(), 1)));
assert!(pairs.contains(&("p".to_string(), 2)));
assert!(pairs.contains(&("q".to_string(), 1)));
}
#[test]
fn drift_timestamp_monotonic_across_remarks() {
let mut c = RuleBypassCorpus::new("t");
c.record_block("R1", "p", cls("sql"), vec![], 1);
c.mark_drift("R1");
let t1 = c.buckets["R1"].last_drift_at_secs.unwrap();
c.mark_drift("R1");
let t2 = c.buckets["R1"].last_drift_at_secs.unwrap();
assert!(t2 >= t1, "drift timestamp must be monotonic non-decreasing");
}
#[test]
fn first_save_writes_current_schema_version_to_disk() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
c.schema_version = 0; c.record_bypass("R1", "p", cls("sql"), vec![], 1);
c.save_atomic(&path).expect("save");
let v: serde_json::Value =
serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
assert_eq!(
v["schema_version"].as_u64().unwrap(),
u64::from(CORPUS_SCHEMA_VERSION)
);
}
#[test]
fn save_stamps_last_saved_at_secs() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let c = RuleBypassCorpus::new("t");
assert_eq!(c.last_saved_at_secs, 0);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "t");
assert!(
r.last_saved_at_secs > 0,
"save must stamp a real epoch second"
);
}
#[test]
fn valid_oversize_under_ceiling_is_preserved_not_dropped() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
for r in 0..30 {
for i in 0..50 {
c.record_bypass(
&format!("R{r}"),
&format!("{}-{r}-{i}", "X".repeat(2000)),
cls("sql"),
vec![],
(r * 1000 + i) as u64,
);
}
}
c.save_atomic(&path).expect("save");
let on_disk = std::fs::metadata(&path).unwrap().len();
assert!(on_disk > 1_000_000, "test corpus should be multi-MB");
let r = RuleBypassCorpus::load_or_default(&path, "ignored");
assert_eq!(
r.total_bypasses(),
30 * 50,
"all valid bypasses load intact"
);
assert!(
corrupt_sidecars(dir.path(), "c.json").is_empty(),
"valid file never preserved-aside"
);
}
#[test]
fn save_atomic_leaves_no_tempfiles_behind() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
c.record_bypass("R1", "p", cls("sql"), vec![], 1);
c.save_atomic(&path).expect("save");
let entries: Vec<String> = std::fs::read_dir(dir.path())
.unwrap()
.filter_map(Result::ok)
.map(|e| e.file_name().to_string_lossy().into_owned())
.collect();
assert!(entries.contains(&"c.json".to_string()));
assert!(
entries.iter().all(|n| n == "c.json" || n == "c.json.bak"),
"no stray temp files left behind, got: {entries:?}"
);
}
#[test]
fn empty_buckets_and_blocks_persist_exact_counts() {
let dir = tempdir().expect("tempdir");
let path = dir.path().join("c.json");
let mut c = RuleBypassCorpus::new("t");
c.record_block("only-block", "b", cls("sql"), vec![], 1);
c.record_bypass("only-bypass", "p", cls("xss"), vec![], 2);
c.record_block("mixed", "b", cls("cmd"), vec![], 3);
c.record_bypass("mixed", "p", cls("cmd"), vec![], 4);
c.save_atomic(&path).expect("save");
let r = RuleBypassCorpus::load_or_default(&path, "t");
assert_eq!(r.blocked_for_rule("only-block").len(), 1);
assert_eq!(r.bypasses_for_rule("only-block").len(), 0);
assert_eq!(r.bypasses_for_rule("only-bypass").len(), 1);
assert_eq!(r.blocked_for_rule("only-bypass").len(), 0);
assert_eq!(r.blocked_for_rule("mixed").len(), 1);
assert_eq!(r.bypasses_for_rule("mixed").len(), 1);
assert_eq!(r.rules_seen(), 3);
}
}