1use serde::{Deserialize, Serialize};
52use std::collections::BTreeMap;
53use std::path::{Path, PathBuf};
54use std::time::{SystemTime, UNIX_EPOCH};
55
56use crate::coverage_feedback::{PayloadClass, RuleId};
57
58pub const CORPUS_SCHEMA_VERSION: u32 = 1;
61
62#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
72pub struct RecordedAttempt {
73 pub payload: String,
76 pub payload_class: PayloadClass,
79 pub encoding_chain: Vec<String>,
82 pub response_hash: u64,
85 pub observed_at_secs: u64,
87}
88
89#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
92pub struct RecordedBypass {
93 pub payload: String,
95 pub payload_class: PayloadClass,
96 pub encoding_chain: Vec<String>,
97 pub response_hash: u64,
98 pub observed_at_secs: u64,
99 #[serde(default)]
101 pub submission: SubmissionStatus,
102 #[serde(default)]
115 pub delivery: String,
116}
117
118#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
120#[serde(tag = "stage", content = "data")]
121pub enum SubmissionStatus {
122 #[default]
124 Queued,
125 DryRunHold { release_at_secs: u64 },
128 Submitted { report_id: String },
131 Accepted { report_id: String },
133 Duplicate { duplicate_of: String },
135 Rejected { reason: String },
137}
138
139#[derive(Debug, Clone, Default, Serialize, Deserialize)]
141pub struct RuleBucket {
142 pub rule_id: RuleId,
145 #[serde(default)]
148 pub description: Option<String>,
149 #[serde(default)]
151 pub blocked: Vec<RecordedAttempt>,
152 #[serde(default)]
154 pub bypassed: Vec<RecordedBypass>,
155 #[serde(default)]
159 pub last_drift_at_secs: Option<u64>,
160}
161
162#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct RuleBypassCorpus {
168 #[serde(default)]
171 pub schema_version: u32,
172 pub target_fingerprint: String,
176 #[serde(default)]
180 pub buckets: BTreeMap<String, RuleBucket>,
181 #[serde(default)]
183 pub last_saved_at_secs: u64,
184}
185
186impl RuleBypassCorpus {
187 #[must_use]
189 pub fn new(target_fingerprint: impl Into<String>) -> Self {
190 Self {
191 schema_version: CORPUS_SCHEMA_VERSION,
192 target_fingerprint: target_fingerprint.into(),
193 buckets: BTreeMap::new(),
194 last_saved_at_secs: 0,
195 }
196 }
197
198 const CORPUS_READ_CEILING_BYTES: usize = 1024 * 1024 * 1024; pub fn load_or_default(path: &Path, target_fingerprint: impl Into<String>) -> Self {
234 if !path.exists() {
236 return Self::new(target_fingerprint);
237 }
238 let raw = match crate::safe_io::read_capped_text(path, Self::CORPUS_READ_CEILING_BYTES) {
239 Ok(s) => s,
240 Err(e) => {
241 preserve_unreadable_corpus(path, &format!("read failed: {e}"));
244 return Self::new(target_fingerprint);
245 }
246 };
247 if raw.trim().is_empty() {
250 return Self::new(target_fingerprint);
251 }
252 match serde_json::from_str::<Self>(&raw) {
253 Ok(mut corpus) => {
254 if corpus.schema_version == 0 {
255 corpus.schema_version = CORPUS_SCHEMA_VERSION;
256 }
257 for bucket in corpus.buckets.values_mut() {
263 bucket.blocked.truncate(Self::MAX_BLOCKED_PER_BUCKET);
264 bucket.bypassed.truncate(Self::MAX_BYPASSED_PER_BUCKET);
265 }
266 corpus
267 }
268 Err(e) => {
269 preserve_unreadable_corpus(path, &format!("parse failed: {e}"));
273 Self::new(target_fingerprint)
274 }
275 }
276 }
277
278 pub fn save_atomic(&self, path: &Path) -> std::io::Result<()> {
283 backup_before_overwrite(path);
289 let mut snap = self.clone();
290 snap.schema_version = CORPUS_SCHEMA_VERSION;
291 snap.last_saved_at_secs = current_epoch_secs();
292 let body = serde_json::to_vec_pretty(&snap)
293 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
294 wafrift_types::loaders::write_atomic(path, &body)
300 }
301
302 pub fn bucket_mut(&mut self, rule_id: &str) -> &mut RuleBucket {
305 self.buckets
306 .entry(rule_id.to_string())
307 .or_insert_with(|| RuleBucket {
308 rule_id: RuleId::new(rule_id),
309 ..RuleBucket::default()
310 })
311 }
312
313 const MAX_BLOCKED_PER_BUCKET: usize = 512;
322
323 const MAX_BYPASSED_PER_BUCKET: usize = 4096;
333
334 pub fn record_block(
337 &mut self,
338 rule_id: &str,
339 payload: &str,
340 payload_class: PayloadClass,
341 encoding_chain: Vec<String>,
342 response_hash: u64,
343 ) {
344 let entry = RecordedAttempt {
345 payload: payload.to_string(),
346 payload_class,
347 encoding_chain,
348 response_hash,
349 observed_at_secs: current_epoch_secs(),
350 };
351 let bucket = self.bucket_mut(rule_id);
352 if bucket.blocked.len() >= Self::MAX_BLOCKED_PER_BUCKET {
357 return;
358 }
359 if !bucket
362 .blocked
363 .iter()
364 .any(|a| a.response_hash == entry.response_hash && a.payload == entry.payload)
365 {
366 bucket.blocked.push(entry);
367 }
368 }
369
370 pub fn record_bypass(
374 &mut self,
375 rule_id: &str,
376 payload: &str,
377 payload_class: PayloadClass,
378 encoding_chain: Vec<String>,
379 response_hash: u64,
380 ) {
381 let entry = RecordedBypass {
382 payload: payload.to_string(),
383 payload_class,
384 encoding_chain,
385 response_hash,
386 observed_at_secs: current_epoch_secs(),
387 submission: SubmissionStatus::Queued,
388 delivery: String::new(),
389 };
390 let bucket = self.bucket_mut(rule_id);
391 if bucket.bypassed.len() >= Self::MAX_BYPASSED_PER_BUCKET {
395 return;
396 }
397 if !bucket
398 .bypassed
399 .iter()
400 .any(|b| b.response_hash == entry.response_hash && b.payload == entry.payload)
401 {
402 bucket.bypassed.push(entry);
403 }
404 }
405
406 pub fn mark_drift(&mut self, rule_id: &str) {
410 let bucket = self.bucket_mut(rule_id);
411 bucket.last_drift_at_secs = Some(current_epoch_secs());
412 }
413
414 pub fn set_submission(
417 &mut self,
418 rule_id: &str,
419 payload: &str,
420 new_status: SubmissionStatus,
421 ) -> bool {
422 if let Some(bucket) = self.buckets.get_mut(rule_id)
423 && let Some(b) = bucket.bypassed.iter_mut().find(|b| b.payload == payload)
424 {
425 b.submission = new_status;
426 return true;
427 }
428 false
429 }
430
431 pub fn set_delivery(&mut self, rule_id: &str, payload: &str, delivery: String) -> bool {
441 if delivery.is_empty() {
442 return false;
443 }
444 if let Some(bucket) = self.buckets.get_mut(rule_id)
445 && let Some(b) = bucket.bypassed.iter_mut().find(|b| b.payload == payload)
446 {
447 b.delivery = delivery;
448 return true;
449 }
450 false
451 }
452
453 #[must_use]
457 pub fn unexplored_rules(&self, min_attempts: usize) -> Vec<String> {
458 self.buckets
459 .iter()
460 .filter(|(_, b)| b.blocked.len() < min_attempts && b.bypassed.is_empty())
461 .map(|(k, _)| k.clone())
462 .collect()
463 }
464
465 #[must_use]
468 pub fn rules_due_for_retry(&self, window_secs: u64) -> Vec<String> {
469 let now = current_epoch_secs();
470 self.buckets
471 .iter()
472 .filter(|(_, b)| {
473 b.last_drift_at_secs
474 .is_some_and(|d| now.saturating_sub(d) <= window_secs)
475 && !b.blocked.is_empty()
476 })
477 .map(|(k, _)| k.clone())
478 .collect()
479 }
480
481 #[must_use]
484 pub fn bypasses_for_rule(&self, rule_id: &str) -> &[RecordedBypass] {
485 self.buckets
486 .get(rule_id)
487 .map(|b| b.bypassed.as_slice())
488 .unwrap_or(&[])
489 }
490
491 #[must_use]
493 pub fn blocked_for_rule(&self, rule_id: &str) -> &[RecordedAttempt] {
494 self.buckets
495 .get(rule_id)
496 .map(|b| b.blocked.as_slice())
497 .unwrap_or(&[])
498 }
499
500 #[must_use]
508 pub fn novel_bypasses_pending_submission(
509 &self,
510 default_dry_run_secs: u64,
511 ) -> Vec<(&str, &RecordedBypass)> {
512 let now = current_epoch_secs();
513 let mut out = vec![];
514 for (rule_id, bucket) in &self.buckets {
515 for b in &bucket.bypassed {
516 let ready = match &b.submission {
517 SubmissionStatus::Queued => {
518 now.saturating_sub(b.observed_at_secs) >= default_dry_run_secs
519 }
520 SubmissionStatus::DryRunHold { release_at_secs } => now >= *release_at_secs,
521 _ => false,
522 };
523 if ready {
524 out.push((rule_id.as_str(), b));
525 }
526 }
527 }
528 out
529 }
530
531 #[must_use]
533 pub fn total_bypasses(&self) -> usize {
534 self.buckets.values().map(|b| b.bypassed.len()).sum()
535 }
536
537 #[must_use]
539 pub fn total_blocks(&self) -> usize {
540 self.buckets.values().map(|b| b.blocked.len()).sum()
541 }
542
543 #[must_use]
545 pub fn rules_seen(&self) -> usize {
546 self.buckets.len()
547 }
548
549 #[must_use]
552 pub fn summary(&self) -> CoverageSummary {
553 let mut per_class: BTreeMap<String, ClassStats> = BTreeMap::new();
554 for bucket in self.buckets.values() {
555 for b in &bucket.blocked {
556 let entry = per_class
557 .entry(b.payload_class.as_str().to_string())
558 .or_default();
559 entry.blocks += 1;
560 }
561 for b in &bucket.bypassed {
562 let entry = per_class
563 .entry(b.payload_class.as_str().to_string())
564 .or_default();
565 entry.bypasses += 1;
566 }
567 }
568 CoverageSummary {
569 target_fingerprint: self.target_fingerprint.clone(),
570 rules_seen: self.rules_seen(),
571 total_blocks: self.total_blocks(),
572 total_bypasses: self.total_bypasses(),
573 per_class,
574 }
575 }
576}
577
578#[derive(Debug, Clone, Default, Serialize, Deserialize)]
580pub struct ClassStats {
581 pub blocks: usize,
582 pub bypasses: usize,
583}
584
585#[derive(Debug, Clone, Serialize, Deserialize)]
588pub struct CoverageSummary {
589 pub target_fingerprint: String,
590 pub rules_seen: usize,
591 pub total_blocks: usize,
592 pub total_bypasses: usize,
593 pub per_class: BTreeMap<String, ClassStats>,
594}
595
596#[must_use]
600pub fn default_corpus_path(target_fingerprint: &str) -> PathBuf {
601 let safe = sanitize_fingerprint_for_filename(target_fingerprint);
602 if let Some(home) = dirs_home() {
603 return home
604 .join(".wafrift")
605 .join("corpus")
606 .join(format!("{safe}.json"));
607 }
608 PathBuf::from("wafrift-bench/results/corpus").join(format!("{safe}.json"))
609}
610
611fn sanitize_fingerprint_for_filename(fp: &str) -> String {
619 fp.chars()
620 .map(|c| {
621 if c.is_ascii_alphanumeric() || c == '-' || c == '_' {
622 c
623 } else {
624 '_'
625 }
626 })
627 .collect()
628}
629
630fn current_epoch_secs() -> u64 {
631 SystemTime::now()
632 .duration_since(UNIX_EPOCH)
633 .map(|d| d.as_secs())
634 .unwrap_or(0)
635}
636
637fn preserve_unreadable_corpus(path: &Path, reason: &str) {
648 let nanos = SystemTime::now()
654 .duration_since(UNIX_EPOCH)
655 .map(|d| d.as_nanos())
656 .unwrap_or(0);
657 let mut aside = path.as_os_str().to_owned();
658 aside.push(format!(
659 ".corrupt-{}-{}-{}",
660 current_epoch_secs(),
661 std::process::id(),
662 nanos
663 ));
664 let aside = PathBuf::from(aside);
665 match std::fs::rename(path, &aside) {
666 Ok(()) => eprintln!(
667 "wafrift: WARNING — corpus at {} could not be loaded ({reason}). \
668 Your data was PRESERVED at {} and a fresh corpus was started. \
669 Rename it back once the cause is addressed.",
670 path.display(),
671 aside.display(),
672 ),
673 Err(e) => eprintln!(
674 "wafrift: ERROR — corpus at {} could not be loaded ({reason}) AND \
675 could not be moved aside ({e}). Back this file up MANUALLY before \
676 the next run — a save may otherwise overwrite it.",
677 path.display(),
678 ),
679 }
680}
681
682fn backup_before_overwrite(path: &Path) {
687 match std::fs::metadata(path) {
688 Ok(meta) if meta.len() > 0 => {
689 let mut bak = path.as_os_str().to_owned();
690 bak.push(".bak");
691 let _ = std::fs::copy(path, PathBuf::from(bak));
692 }
693 _ => {}
694 }
695}
696
697fn dirs_home() -> Option<PathBuf> {
698 if let Ok(h) = std::env::var("HOME")
701 && !h.is_empty()
702 {
703 return Some(PathBuf::from(h));
704 }
705 if let Ok(h) = std::env::var("USERPROFILE")
706 && !h.is_empty()
707 {
708 return Some(PathBuf::from(h));
709 }
710 None
711}
712
713#[cfg(test)]
714mod tests {
715 use super::*;
716 use tempfile::tempdir;
717
718 fn cls(s: &str) -> PayloadClass {
719 PayloadClass::new(s)
720 }
721
722 #[test]
723 fn new_corpus_is_empty() {
724 let c = RuleBypassCorpus::new("cf:managed-ruleset:cumulusfire.cloudflare.com");
725 assert_eq!(c.rules_seen(), 0);
726 assert_eq!(c.total_blocks(), 0);
727 assert_eq!(c.total_bypasses(), 0);
728 assert_eq!(
729 c.target_fingerprint,
730 "cf:managed-ruleset:cumulusfire.cloudflare.com"
731 );
732 assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
733 }
734
735 #[test]
736 fn record_block_dedups_by_payload_and_hash() {
737 let mut c = RuleBypassCorpus::new("t");
738 c.record_block(
739 "942100",
740 "' OR 1=1--",
741 cls("sql"),
742 vec!["url".into()],
743 0xCAFE,
744 );
745 c.record_block(
746 "942100",
747 "' OR 1=1--",
748 cls("sql"),
749 vec!["url".into()],
750 0xCAFE,
751 );
752 c.record_block(
753 "942100",
754 "' OR 1=1--",
755 cls("sql"),
756 vec!["url".into()],
757 0xCAFE,
758 );
759 assert_eq!(c.blocked_for_rule("942100").len(), 1);
760 }
761
762 #[test]
763 fn record_block_keeps_distinct_payloads_per_rule() {
764 let mut c = RuleBypassCorpus::new("t");
765 c.record_block("942100", "' OR 1=1--", cls("sql"), vec![], 1);
766 c.record_block("942100", "UNION SELECT 1", cls("sql"), vec![], 2);
767 c.record_block("942100", "1' AND 1=1--", cls("sql"), vec![], 3);
768 assert_eq!(c.blocked_for_rule("942100").len(), 3);
769 }
770
771 #[test]
772 fn record_block_caps_blocked_per_bucket() {
773 let mut c = RuleBypassCorpus::new("t");
778 let over = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 200;
779 for i in 0..over {
780 c.record_block("r", &format!("p{i}"), cls("sql"), vec![], i as u64);
783 }
784 assert_eq!(
785 c.blocked_for_rule("r").len(),
786 RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET,
787 "blocked must be capped per bucket"
788 );
789 let n_bypass = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 50;
791 for i in 0..n_bypass {
792 c.record_bypass(
793 "r",
794 &format!("b{i}"),
795 cls("sql"),
796 vec![],
797 1_000_000 + i as u64,
798 );
799 }
800 assert_eq!(
801 c.total_bypasses(),
802 n_bypass,
803 "bypasses under MAX_BYPASSED_PER_BUCKET must all persist"
804 );
805 }
806
807 #[test]
808 fn record_bypass_caps_bypassed_per_bucket() {
809 let mut c = RuleBypassCorpus::new("t");
813 let over = RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET + 500;
814 for i in 0..over {
815 c.record_bypass("r", &format!("b{i}"), cls("sql"), vec![], i as u64);
817 }
818 assert_eq!(
819 c.bypasses_for_rule("r").len(),
820 RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET,
821 "bypassed must be capped at MAX_BYPASSED_PER_BUCKET"
822 );
823 }
824
825 #[test]
826 fn load_or_default_heals_pre_cap_oversized_blocked() {
827 use std::env::temp_dir;
828 let mut c = RuleBypassCorpus::new("heal-test");
833 let over = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 300;
834 let blocked: Vec<RecordedAttempt> = (0..over)
835 .map(|i| RecordedAttempt {
836 payload: format!("p{i}"),
837 payload_class: cls("sql"),
838 encoding_chain: vec![],
839 response_hash: i as u64,
840 observed_at_secs: 0,
841 })
842 .collect();
843 c.buckets.insert(
844 "r".to_string(),
845 RuleBucket {
846 blocked,
847 ..RuleBucket::default()
848 },
849 );
850 c.record_bypass("r", "winner", cls("sql"), vec![], 42);
851
852 let path = temp_dir().join(format!("wafrift-corpus-heal-{}.json", std::process::id()));
853 let _ = std::fs::remove_file(&path);
854 c.save_atomic(&path).expect("save oversized corpus");
855 let healed = RuleBypassCorpus::load_or_default(&path, "heal-test");
856 assert_eq!(
857 healed.blocked_for_rule("r").len(),
858 RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET,
859 "load must truncate over-cap blocked to reclaim the bloat"
860 );
861 assert_eq!(healed.total_bypasses(), 1, "bypasses survive the heal");
862 let _ = std::fs::remove_file(&path);
863 }
864
865 #[test]
866 fn load_or_default_heals_pre_cap_oversized_bypassed() {
867 use std::env::temp_dir;
868 let mut c = RuleBypassCorpus::new("bypass-heal-test");
874 let over = RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET + 200;
875 let bypassed: Vec<RecordedBypass> = (0..over)
878 .map(|i| RecordedBypass {
879 payload: format!("b{i}"),
880 payload_class: cls("sql"),
881 encoding_chain: vec![],
882 response_hash: i as u64,
883 observed_at_secs: 0,
884 submission: SubmissionStatus::Queued,
885 delivery: String::new(),
886 })
887 .collect();
888 c.buckets.insert(
889 "r".to_string(),
890 RuleBucket {
891 bypassed,
892 ..RuleBucket::default()
893 },
894 );
895 c.record_block("r", "blocker", cls("sql"), vec![], 1);
897
898 let path = temp_dir().join(format!(
899 "wafrift-corpus-bypass-heal-{}.json",
900 std::process::id()
901 ));
902 let _ = std::fs::remove_file(&path);
903 c.save_atomic(&path).expect("save oversized bypass corpus");
904 let healed = RuleBypassCorpus::load_or_default(&path, "bypass-heal-test");
905 assert_eq!(
906 healed.bypasses_for_rule("r").len(),
907 RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET,
908 "load must truncate over-cap bypassed to MAX_BYPASSED_PER_BUCKET"
909 );
910 assert_eq!(healed.total_blocks(), 1, "blocked entries survive the heal");
911 let _ = std::fs::remove_file(&path);
912 }
913
914 #[test]
915 fn record_bypass_dedups() {
916 let mut c = RuleBypassCorpus::new("t");
917 c.record_bypass("942100", "Ω union select", cls("sql"), vec![], 1);
918 c.record_bypass("942100", "Ω union select", cls("sql"), vec![], 1);
919 assert_eq!(c.bypasses_for_rule("942100").len(), 1);
920 }
921
922 #[test]
923 fn record_bypass_default_status_is_queued() {
924 let mut c = RuleBypassCorpus::new("t");
925 c.record_bypass("942100", "payload", cls("sql"), vec![], 1);
926 let b = &c.bypasses_for_rule("942100")[0];
927 assert!(matches!(b.submission, SubmissionStatus::Queued));
928 }
929
930 #[test]
931 fn set_submission_updates_lifecycle() {
932 let mut c = RuleBypassCorpus::new("t");
933 c.record_bypass("942100", "payload", cls("sql"), vec![], 1);
934 let ok = c.set_submission(
935 "942100",
936 "payload",
937 SubmissionStatus::Submitted {
938 report_id: "H1-12345".into(),
939 },
940 );
941 assert!(ok);
942 let b = &c.bypasses_for_rule("942100")[0];
943 assert!(matches!(
944 &b.submission,
945 SubmissionStatus::Submitted { report_id } if report_id == "H1-12345"
946 ));
947 }
948
949 #[test]
950 fn set_submission_missing_returns_false() {
951 let mut c = RuleBypassCorpus::new("t");
952 let ok = c.set_submission(
953 "doesnt-exist",
954 "payload",
955 SubmissionStatus::Accepted {
956 report_id: "X".into(),
957 },
958 );
959 assert!(!ok);
960 }
961
962 #[test]
963 fn record_bypass_default_delivery_is_empty() {
964 let mut c = RuleBypassCorpus::new("t");
965 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
966 assert_eq!(c.bypasses_for_rule("R1")[0].delivery, "");
967 }
968
969 #[test]
970 fn set_delivery_attaches_shape_to_recorded_bypass() {
971 let mut c = RuleBypassCorpus::new("t");
972 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
973 let ok = c.set_delivery("R1", "p", "{\"Query\":{\"param\":\"q\"}}".into());
974 assert!(ok);
975 assert_eq!(
976 c.bypasses_for_rule("R1")[0].delivery,
977 "{\"Query\":{\"param\":\"q\"}}"
978 );
979 }
980
981 #[test]
982 fn set_delivery_missing_bypass_returns_false() {
983 let mut c = RuleBypassCorpus::new("t");
984 assert!(!c.set_delivery("nope", "p", "{\"PathSegment\":null}".into()));
985 }
986
987 #[test]
988 fn set_delivery_ignores_empty_string() {
989 let mut c = RuleBypassCorpus::new("t");
991 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
992 assert!(c.set_delivery("R1", "p", "\"PathSegment\"".into()));
993 assert!(!c.set_delivery("R1", "p", String::new()));
994 assert_eq!(c.bypasses_for_rule("R1")[0].delivery, "\"PathSegment\"");
995 }
996
997 #[test]
998 fn delivery_round_trips_through_save_load() {
999 let dir = tempdir().expect("tempdir");
1000 let path = dir.path().join("c.json");
1001 let mut c = RuleBypassCorpus::new("cf:mr:cumulus");
1002 c.record_bypass("942100", "1 OR 1=1 --", cls("sql"), vec![], 9);
1003 c.set_delivery(
1004 "942100",
1005 "1 OR 1=1 --",
1006 "{\"HppSplit\":{\"param\":\"q\",\"parts\":3}}".into(),
1007 );
1008 c.save_atomic(&path).expect("save");
1009 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
1010 assert_eq!(
1011 r.bypasses_for_rule("942100")[0].delivery,
1012 "{\"HppSplit\":{\"param\":\"q\",\"parts\":3}}"
1013 );
1014 }
1015
1016 #[test]
1017 fn delivery_defaults_empty_for_corpus_without_the_field() {
1018 let mut c = RuleBypassCorpus::new("t");
1023 c.record_bypass("R1", "old", cls("sql"), vec![], 1);
1024 let mut v: serde_json::Value =
1025 serde_json::from_str(&serde_json::to_string(&c).unwrap()).unwrap();
1026 for bucket in v["buckets"].as_object_mut().unwrap().values_mut() {
1027 for bp in bucket["bypassed"].as_array_mut().unwrap() {
1028 assert!(
1029 bp.as_object_mut().unwrap().remove("delivery").is_some(),
1030 "serialization must include the delivery key to strip"
1031 );
1032 }
1033 }
1034 let dir = tempdir().expect("tempdir");
1035 let path = dir.path().join("old.json");
1036 std::fs::write(&path, serde_json::to_string(&v).unwrap()).expect("write");
1037 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
1038 let b = &r.bypasses_for_rule("R1")[0];
1039 assert_eq!(b.payload, "old");
1040 assert_eq!(b.delivery, "", "missing delivery must default to empty");
1041 }
1042
1043 #[test]
1044 fn unexplored_rules_skips_ones_with_bypass() {
1045 let mut c = RuleBypassCorpus::new("t");
1046 c.record_block("R1", "p1", cls("sql"), vec![], 1);
1047 c.record_bypass("R2", "p2", cls("sql"), vec![], 2);
1048 let unexplored = c.unexplored_rules(3);
1051 assert!(unexplored.contains(&"R1".to_string()));
1052 assert!(!unexplored.contains(&"R2".to_string()));
1053 }
1054
1055 #[test]
1056 fn rules_due_for_retry_respects_window() {
1057 let mut c = RuleBypassCorpus::new("t");
1058 c.record_block("R1", "p", cls("sql"), vec![], 1);
1059 c.mark_drift("R1");
1061 let due = c.rules_due_for_retry(60);
1062 assert_eq!(due, vec!["R1".to_string()]);
1063 }
1064
1065 #[test]
1066 fn rules_due_for_retry_skips_rules_with_no_blocks() {
1067 let mut c = RuleBypassCorpus::new("t");
1068 c.mark_drift("R1");
1069 assert!(c.rules_due_for_retry(60).is_empty());
1071 }
1072
1073 #[test]
1074 fn total_counts_aggregate_across_rules() {
1075 let mut c = RuleBypassCorpus::new("t");
1076 c.record_block("R1", "p1", cls("sql"), vec![], 1);
1077 c.record_block("R2", "p2", cls("xss"), vec![], 2);
1078 c.record_bypass("R1", "p3", cls("sql"), vec![], 3);
1079 assert_eq!(c.total_blocks(), 2);
1080 assert_eq!(c.total_bypasses(), 1);
1081 assert_eq!(c.rules_seen(), 2);
1082 }
1083
1084 #[test]
1085 fn summary_breaks_down_by_class() {
1086 let mut c = RuleBypassCorpus::new("cf:mr:foo");
1087 c.record_block("R1", "p1", cls("sql"), vec![], 1);
1088 c.record_block("R1", "p2", cls("sql"), vec![], 2);
1089 c.record_block("R2", "p3", cls("xss"), vec![], 3);
1090 c.record_bypass("R1", "p4", cls("sql"), vec![], 4);
1091 let s = c.summary();
1092 assert_eq!(s.target_fingerprint, "cf:mr:foo");
1093 assert_eq!(s.rules_seen, 2);
1094 assert_eq!(s.total_blocks, 3);
1095 assert_eq!(s.total_bypasses, 1);
1096 let sql_stats = s.per_class.get("sql").unwrap();
1097 assert_eq!(sql_stats.blocks, 2);
1098 assert_eq!(sql_stats.bypasses, 1);
1099 let xss_stats = s.per_class.get("xss").unwrap();
1100 assert_eq!(xss_stats.blocks, 1);
1101 assert_eq!(xss_stats.bypasses, 0);
1102 }
1103
1104 #[test]
1105 fn save_load_round_trip() {
1106 let dir = tempdir().expect("tempdir");
1107 let path = dir.path().join("corpus.json");
1108 let mut c = RuleBypassCorpus::new("cf:mr:cumulus");
1109 c.record_block("942100", "payload-1", cls("sql"), vec!["url".into()], 1);
1110 c.record_bypass(
1111 "942100",
1112 "payload-2",
1113 cls("sql"),
1114 vec!["unicode".into(), "case".into()],
1115 2,
1116 );
1117 c.save_atomic(&path).expect("save");
1118
1119 let reloaded = RuleBypassCorpus::load_or_default(&path, "ignored");
1120 assert_eq!(reloaded.target_fingerprint, "cf:mr:cumulus");
1121 assert_eq!(reloaded.rules_seen(), 1);
1122 assert_eq!(reloaded.total_blocks(), 1);
1123 assert_eq!(reloaded.total_bypasses(), 1);
1124 let bp = &reloaded.bypasses_for_rule("942100")[0];
1125 assert_eq!(bp.payload, "payload-2");
1126 assert_eq!(
1127 bp.encoding_chain,
1128 vec!["unicode".to_string(), "case".to_string()]
1129 );
1130 }
1131
1132 #[test]
1133 fn load_missing_file_returns_default() {
1134 let dir = tempdir().expect("tempdir");
1135 let path = dir.path().join("nope.json");
1136 let c = RuleBypassCorpus::load_or_default(&path, "cf:mr:x");
1137 assert_eq!(c.target_fingerprint, "cf:mr:x");
1138 assert_eq!(c.rules_seen(), 0);
1139 }
1140
1141 #[test]
1142 fn load_corrupted_file_preserves_original_then_defaults() {
1143 let dir = tempdir().expect("tempdir");
1149 let path = dir.path().join("trash.json");
1150 let original = b"{not valid json !!! but represents 500 lost bypasses";
1151 std::fs::write(&path, original).expect("write");
1152
1153 let c = RuleBypassCorpus::load_or_default(&path, "fallback");
1154 assert_eq!(c.target_fingerprint, "fallback");
1155 assert_eq!(c.rules_seen(), 0);
1156
1157 assert!(!path.exists(), "the unparseable file must be moved aside");
1160 let aside: Vec<_> = std::fs::read_dir(dir.path())
1161 .unwrap()
1162 .filter_map(Result::ok)
1163 .filter(|e| {
1164 e.file_name()
1165 .to_string_lossy()
1166 .contains("trash.json.corrupt-")
1167 })
1168 .collect();
1169 assert_eq!(aside.len(), 1, "exactly one preserved sidecar must exist");
1170 let preserved = std::fs::read(aside[0].path()).expect("read sidecar");
1171 assert_eq!(
1172 preserved, original,
1173 "preserved bytes must be byte-identical"
1174 );
1175 }
1176
1177 #[test]
1178 fn load_empty_file_returns_default_without_preserving() {
1179 let dir = tempdir().expect("tempdir");
1182 let path = dir.path().join("empty.json");
1183 std::fs::write(&path, b"").expect("write");
1184 let c = RuleBypassCorpus::load_or_default(&path, "fallback");
1185 assert_eq!(c.target_fingerprint, "fallback");
1186 let has_sidecar = std::fs::read_dir(dir.path())
1187 .unwrap()
1188 .filter_map(Result::ok)
1189 .any(|e| e.file_name().to_string_lossy().contains(".corrupt-"));
1190 assert!(!has_sidecar, "empty file must not spawn a preserve sidecar");
1191 }
1192
1193 #[test]
1194 fn save_atomic_backs_up_prior_corpus_before_overwrite() {
1195 let dir = tempdir().expect("tempdir");
1199 let path = dir.path().join("corpus.json");
1200
1201 let mut a = RuleBypassCorpus::new("cf:mr:cumulus");
1202 a.record_bypass("942100", "winner-A", cls("xss"), vec![], 1);
1203 a.save_atomic(&path).expect("save A");
1204
1205 let empty = RuleBypassCorpus::new("cf:mr:cumulus");
1208 empty.save_atomic(&path).expect("save empty over A");
1209
1210 let bak = dir.path().join("corpus.json.bak");
1211 assert!(
1212 bak.exists(),
1213 "a .bak snapshot of the prior corpus must exist"
1214 );
1215 let recovered = RuleBypassCorpus::load_or_default(&bak, "ignored");
1216 assert_eq!(
1217 recovered.total_bypasses(),
1218 1,
1219 "the prior bypass must be recoverable from the .bak snapshot"
1220 );
1221 assert_eq!(recovered.bypasses_for_rule("942100")[0].payload, "winner-A");
1222 }
1223
1224 #[test]
1225 fn corrupt_then_save_does_not_destroy_preserved_bypasses() {
1226 let dir = tempdir().expect("tempdir");
1231 let path = dir.path().join("corpus.json");
1232
1233 let mut real = RuleBypassCorpus::new("cf:mr:cumulus");
1236 for i in 0..50 {
1237 real.record_bypass("942100", &format!("bypass-{i}"), cls("xss"), vec![], i);
1238 }
1239 let real_bytes = serde_json::to_vec_pretty(&real).unwrap();
1240 let mut corrupt = real_bytes.clone();
1242 corrupt.truncate(corrupt.len() / 2);
1243 std::fs::write(&path, &corrupt).expect("write corrupt");
1244
1245 let fresh = RuleBypassCorpus::load_or_default(&path, "cf:mr:cumulus");
1247 assert_eq!(fresh.total_bypasses(), 0);
1248 fresh.save_atomic(&path).expect("save fresh");
1249
1250 let aside: Vec<_> = std::fs::read_dir(dir.path())
1252 .unwrap()
1253 .filter_map(Result::ok)
1254 .filter(|e| e.file_name().to_string_lossy().contains(".corrupt-"))
1255 .collect();
1256 assert_eq!(aside.len(), 1, "corrupt bytes must be preserved aside");
1257 assert_eq!(
1258 std::fs::read(aside[0].path()).unwrap(),
1259 corrupt,
1260 "preserved sidecar must hold the exact corrupt bytes for manual recovery"
1261 );
1262 }
1263
1264 #[test]
1265 fn save_creates_parent_directory() {
1266 let dir = tempdir().expect("tempdir");
1267 let nested = dir.path().join("deep/nested/path/corpus.json");
1268 let c = RuleBypassCorpus::new("t");
1269 c.save_atomic(&nested).expect("save creates parents");
1270 assert!(nested.exists());
1271 }
1272
1273 #[test]
1274 fn save_atomic_no_torn_write_on_existing_file() {
1275 let dir = tempdir().expect("tempdir");
1278 let path = dir.path().join("corpus.json");
1279 std::fs::write(&path, b"prior-garbage-bytes").expect("seed");
1280 let c = RuleBypassCorpus::new("cf:mr:t");
1281 c.save_atomic(&path).expect("save");
1282 let bytes = std::fs::read(&path).expect("read");
1283 assert!(
1285 !std::str::from_utf8(&bytes)
1286 .unwrap()
1287 .contains("prior-garbage")
1288 );
1289 }
1290
1291 #[test]
1292 fn novel_bypasses_pending_submission_honors_dry_run() {
1293 let mut c = RuleBypassCorpus::new("t");
1294 c.record_bypass("R1", "fresh", cls("sql"), vec![], 1);
1295 let pending = c.novel_bypasses_pending_submission(86400);
1297 assert!(pending.is_empty(), "fresh bypass should not be pending");
1298
1299 let pending = c.novel_bypasses_pending_submission(0);
1301 assert_eq!(pending.len(), 1);
1302 assert_eq!(pending[0].0, "R1");
1303 }
1304
1305 #[test]
1306 fn novel_bypasses_pending_submission_skips_already_submitted() {
1307 let mut c = RuleBypassCorpus::new("t");
1308 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
1309 c.set_submission(
1310 "R1",
1311 "p",
1312 SubmissionStatus::Submitted {
1313 report_id: "H1-X".into(),
1314 },
1315 );
1316 let pending = c.novel_bypasses_pending_submission(0);
1317 assert!(
1318 pending.is_empty(),
1319 "Submitted bypass should not appear pending"
1320 );
1321 }
1322
1323 #[test]
1324 fn novel_bypasses_pending_submission_honors_explicit_hold() {
1325 let mut c = RuleBypassCorpus::new("t");
1326 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
1327 let future = current_epoch_secs() + 3600;
1329 c.set_submission(
1330 "R1",
1331 "p",
1332 SubmissionStatus::DryRunHold {
1333 release_at_secs: future,
1334 },
1335 );
1336 let pending = c.novel_bypasses_pending_submission(0);
1337 assert!(pending.is_empty(), "explicit DryRunHold must be honored");
1338 }
1339
1340 #[test]
1341 fn schema_version_normalized_on_load() {
1342 let raw = r#"{"target_fingerprint":"t","buckets":{}}"#;
1344 let dir = tempdir().expect("tempdir");
1345 let path = dir.path().join("c.json");
1346 std::fs::write(&path, raw).expect("write");
1347 let c = RuleBypassCorpus::load_or_default(&path, "ignored");
1348 assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
1349 }
1350
1351 #[test]
1352 fn sanitize_fingerprint_strips_path_separators() {
1353 assert_eq!(
1354 sanitize_fingerprint_for_filename("cf:managed-ruleset:host/foo"),
1355 "cf_managed-ruleset_host_foo"
1356 );
1357 assert_eq!(
1360 sanitize_fingerprint_for_filename("..\\..\\evil"),
1361 "______evil"
1362 );
1363 }
1364
1365 #[test]
1366 fn sanitize_fingerprint_preserves_safe_chars() {
1367 assert_eq!(
1369 sanitize_fingerprint_for_filename("cf-managed_ruleset_v1"),
1370 "cf-managed_ruleset_v1"
1371 );
1372 assert_eq!(
1374 sanitize_fingerprint_for_filename("cf-managed.ruleset_v1"),
1375 "cf-managed_ruleset_v1"
1376 );
1377 }
1378
1379 #[test]
1380 fn default_corpus_path_uses_fingerprint() {
1381 let p = default_corpus_path("cf:mr:x.com");
1382 let s = p.to_string_lossy();
1383 assert!(s.contains("cf_mr_x_com"));
1385 assert!(s.ends_with(".json"));
1386 }
1387
1388 #[test]
1389 fn determinism_serialization_btree_order() {
1390 let mut c = RuleBypassCorpus::new("t");
1393 for i in (0..50).rev() {
1394 c.record_block(
1395 &format!("R{i}"),
1396 &format!("p{i}"),
1397 cls("sql"),
1398 vec![],
1399 i as u64,
1400 );
1401 }
1402 let a = serde_json::to_string(&c).unwrap();
1403 let b = serde_json::to_string(&c).unwrap();
1404 assert_eq!(a, b);
1405 }
1406
1407 #[test]
1408 fn description_field_persists() {
1409 let mut c = RuleBypassCorpus::new("t");
1410 c.record_block("942100", "p", cls("sql"), vec![], 1);
1411 c.bucket_mut("942100").description = Some("SQL injection — OWASP CRS 942100".into());
1412 let dir = tempdir().expect("tempdir");
1413 let path = dir.path().join("c.json");
1414 c.save_atomic(&path).expect("save");
1415 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
1416 let desc = r
1417 .buckets
1418 .get("942100")
1419 .and_then(|b| b.description.as_deref());
1420 assert_eq!(desc, Some("SQL injection — OWASP CRS 942100"));
1421 }
1422
1423 #[test]
1424 fn mark_drift_updates_timestamp() {
1425 let mut c = RuleBypassCorpus::new("t");
1426 c.record_block("R1", "p", cls("sql"), vec![], 1);
1427 c.mark_drift("R1");
1428 let t1 = c.buckets["R1"].last_drift_at_secs.unwrap();
1429 std::thread::sleep(std::time::Duration::from_millis(1100));
1431 c.mark_drift("R1");
1432 let t2 = c.buckets["R1"].last_drift_at_secs.unwrap();
1433 assert!(t2 >= t1);
1434 }
1435
1436 #[test]
1437 fn adversarial_large_chain_no_panic() {
1438 let big_chain: Vec<String> = (0..1000).map(|i| format!("technique-{i}")).collect();
1439 let mut c = RuleBypassCorpus::new("t");
1440 c.record_bypass("R1", "p", cls("sql"), big_chain.clone(), 1);
1441 assert_eq!(c.bypasses_for_rule("R1")[0].encoding_chain.len(), 1000);
1442 }
1443
1444 #[test]
1445 fn adversarial_huge_payload_no_panic() {
1446 let big = "A".repeat(1_000_000);
1447 let mut c = RuleBypassCorpus::new("t");
1448 c.record_block("R1", &big, cls("sql"), vec![], 1);
1449 let dir = tempdir().expect("tempdir");
1451 let path = dir.path().join("c.json");
1452 c.save_atomic(&path).expect("save");
1453 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
1454 assert_eq!(r.blocked_for_rule("R1").len(), 1);
1455 assert_eq!(r.blocked_for_rule("R1")[0].payload.len(), 1_000_000);
1456 }
1457
1458 #[test]
1459 fn unicode_in_payload_round_trips() {
1460 let mut c = RuleBypassCorpus::new("t");
1461 c.record_bypass(
1462 "R1",
1463 "SELECT Ω 中文 \u{200B} \u{E0041}",
1464 cls("sql"),
1465 vec![],
1466 1,
1467 );
1468 let dir = tempdir().expect("tempdir");
1469 let path = dir.path().join("c.json");
1470 c.save_atomic(&path).expect("save");
1471 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
1472 let b = &r.bypasses_for_rule("R1")[0];
1473 assert!(b.payload.contains("SELECT"));
1474 assert!(b.payload.contains("中文"));
1475 assert!(b.payload.contains('\u{200B}'));
1476 assert!(b.payload.contains('\u{E0041}'));
1477 }
1478
1479 #[test]
1480 fn dedup_distinguishes_different_response_hashes() {
1481 let mut c = RuleBypassCorpus::new("t");
1482 c.record_block("R1", "p", cls("sql"), vec![], 1);
1483 c.record_block("R1", "p", cls("sql"), vec![], 2); assert_eq!(c.blocked_for_rule("R1").len(), 2);
1487 }
1488
1489 fn corrupt_sidecars(dir: &Path, base: &str) -> Vec<PathBuf> {
1504 std::fs::read_dir(dir)
1505 .unwrap()
1506 .filter_map(Result::ok)
1507 .filter(|e| {
1508 let name = e.file_name().to_string_lossy().into_owned();
1509 name.starts_with(base) && name.contains(".corrupt-")
1510 })
1511 .map(|e| e.path())
1512 .collect()
1513 }
1514
1515 fn assert_preserved_fresh(
1518 dir: &Path,
1519 path: &Path,
1520 base: &str,
1521 original: &[u8],
1522 fingerprint: &str,
1523 ) {
1524 let c = RuleBypassCorpus::load_or_default(path, fingerprint);
1525 assert_eq!(
1526 c.target_fingerprint, fingerprint,
1527 "fresh corpus uses fallback fp"
1528 );
1529 assert_eq!(c.rules_seen(), 0, "returned corpus must be fresh/empty");
1530 assert_eq!(c.total_bypasses(), 0);
1531 assert_eq!(c.total_blocks(), 0);
1532 assert!(
1533 !path.exists(),
1534 "the unreadable original must be moved aside"
1535 );
1536 let aside = corrupt_sidecars(dir, base);
1537 assert_eq!(aside.len(), 1, "exactly one preserved sidecar must exist");
1538 let preserved = std::fs::read(&aside[0]).expect("read sidecar");
1539 assert_eq!(
1540 preserved, original,
1541 "preserved sidecar bytes must be byte-identical to the original"
1542 );
1543 }
1544
1545 #[test]
1548 fn preserve_non_utf8_file_byte_identical() {
1549 let dir = tempdir().expect("tempdir");
1553 let path = dir.path().join("nonutf8.json");
1554 let original: &[u8] = &[0x7B, 0xFF, 0xFE, 0x80, 0xC0, 0x22, 0x6B, 0x65, 0x79];
1556 std::fs::write(&path, original).expect("write");
1557 assert_preserved_fresh(dir.path(), &path, "nonutf8.json", original, "fb");
1558 }
1559
1560 #[test]
1561 fn preserve_truncated_mid_json_byte_identical() {
1562 let dir = tempdir().expect("tempdir");
1565 let path = dir.path().join("trunc.json");
1566 let original = br#"{"schema_version":1,"target_fingerprint":"cf:mr:x","buckets":{"942100":{"rule_id":{"#;
1567 std::fs::write(&path, original).expect("write");
1568 assert_preserved_fresh(dir.path(), &path, "trunc.json", original, "fb");
1569 }
1570
1571 #[test]
1572 fn preserve_lone_open_brace_byte_identical() {
1573 let dir = tempdir().expect("tempdir");
1574 let path = dir.path().join("brace.json");
1575 let original = b"{";
1576 std::fs::write(&path, original).expect("write");
1577 assert_preserved_fresh(dir.path(), &path, "brace.json", original, "fb");
1578 }
1579
1580 #[test]
1581 fn preserve_valid_json_wrong_schema_byte_identical() {
1582 let dir = tempdir().expect("tempdir");
1585 let path = dir.path().join("wrongschema.json");
1586 let original = br#"{"completely":"different","shape":[1,2,3],"nested":{"a":true}}"#;
1587 std::fs::write(&path, original).expect("write");
1588 assert_preserved_fresh(dir.path(), &path, "wrongschema.json", original, "fb");
1589 }
1590
1591 #[test]
1592 fn preserve_json_array_instead_of_object_byte_identical() {
1593 let dir = tempdir().expect("tempdir");
1595 let path = dir.path().join("arr.json");
1596 let original = br#"["this","is","not","a","corpus"]"#;
1597 std::fs::write(&path, original).expect("write");
1598 assert_preserved_fresh(dir.path(), &path, "arr.json", original, "fb");
1599 }
1600
1601 #[test]
1602 fn preserve_garbage_text_byte_identical() {
1603 let dir = tempdir().expect("tempdir");
1604 let path = dir.path().join("garbage.json");
1605 let original = b"this is not json at all -- 500 lost bypasses live here\n\x01\x02";
1606 std::fs::write(&path, original).expect("write");
1607 assert_preserved_fresh(dir.path(), &path, "garbage.json", original, "fb");
1608 }
1609
1610 #[test]
1611 fn preserve_moves_aside_on_every_corruption_event() {
1612 let dir = tempdir().expect("tempdir");
1623 let path = dir.path().join("multi.json");
1624
1625 let first = b"FIRST corrupt corpus bytes !!!";
1626 std::fs::write(&path, first).expect("write 1");
1627 let c1 = RuleBypassCorpus::load_or_default(&path, "fb");
1628 assert_eq!(c1.rules_seen(), 0, "fresh corpus after first corruption");
1629 assert!(
1630 !path.exists(),
1631 "original moved aside after first corruption"
1632 );
1633
1634 let second = b"SECOND corrupt corpus bytes ???";
1635 std::fs::write(&path, second).expect("write 2");
1636 let c2 = RuleBypassCorpus::load_or_default(&path, "fb");
1637 assert_eq!(c2.rules_seen(), 0, "fresh corpus after second corruption");
1638 assert!(
1639 !path.exists(),
1640 "original moved aside after second corruption"
1641 );
1642
1643 let bytes: Vec<Vec<u8>> = corrupt_sidecars(dir.path(), "multi.json")
1645 .iter()
1646 .map(|p| std::fs::read(p).unwrap())
1647 .collect();
1648 assert!(
1649 bytes.iter().any(|b| b.as_slice() == second.as_slice()),
1650 "latest corruption's exact bytes must be preserved aside"
1651 );
1652 }
1653
1654 #[test]
1657 fn whitespace_only_file_is_fresh_no_sidecar() {
1658 let dir = tempdir().expect("tempdir");
1659 let path = dir.path().join("ws.json");
1660 std::fs::write(&path, b" \n\t \r\n ").expect("write");
1661 let c = RuleBypassCorpus::load_or_default(&path, "fb");
1662 assert_eq!(c.target_fingerprint, "fb");
1663 assert_eq!(c.rules_seen(), 0);
1664 assert!(
1665 corrupt_sidecars(dir.path(), "ws.json").is_empty(),
1666 "whitespace-only file must NOT spawn a preserve sidecar"
1667 );
1668 assert!(path.exists(), "whitespace file is not moved aside");
1671 }
1672
1673 #[test]
1674 fn empty_file_leaves_no_sidecar_and_returns_fresh() {
1675 let dir = tempdir().expect("tempdir");
1676 let path = dir.path().join("zero.json");
1677 std::fs::write(&path, b"").expect("write");
1678 let c = RuleBypassCorpus::load_or_default(&path, "fb");
1679 assert_eq!(c.rules_seen(), 0);
1680 assert!(corrupt_sidecars(dir.path(), "zero.json").is_empty());
1681 }
1682
1683 #[test]
1686 fn bak_recovers_first_corpus_after_empty_second_save() {
1687 let dir = tempdir().expect("tempdir");
1690 let path = dir.path().join("c.json");
1691
1692 let mut first = RuleBypassCorpus::new("cf:mr:cumulus");
1693 first.record_bypass("942100", "winner-A", cls("xss"), vec!["b64".into()], 7);
1694 first.record_bypass("942100", "winner-B", cls("sql"), vec![], 8);
1695 first.record_block("942100", "blk", cls("sql"), vec![], 9);
1696 first.save_atomic(&path).expect("save first");
1697
1698 let empty = RuleBypassCorpus::new("cf:mr:cumulus");
1699 empty.save_atomic(&path).expect("save empty");
1700
1701 let bak = dir.path().join("c.json.bak");
1702 assert!(
1703 bak.exists(),
1704 ".bak must exist after overwriting a non-empty corpus"
1705 );
1706 let recovered = RuleBypassCorpus::load_or_default(&bak, "ignored");
1707 assert_eq!(
1708 recovered.total_bypasses(),
1709 2,
1710 "both prior bypasses recoverable"
1711 );
1712 assert_eq!(recovered.total_blocks(), 1, "prior block recoverable");
1713 let payloads: Vec<_> = recovered
1714 .bypasses_for_rule("942100")
1715 .iter()
1716 .map(|b| b.payload.clone())
1717 .collect();
1718 assert_eq!(
1719 payloads,
1720 vec!["winner-A".to_string(), "winner-B".to_string()]
1721 );
1722 assert_eq!(
1723 recovered.bypasses_for_rule("942100")[0].encoding_chain,
1724 vec!["b64".to_string()]
1725 );
1726 }
1727
1728 #[test]
1729 fn bak_skipped_when_no_prior_file() {
1730 let dir = tempdir().expect("tempdir");
1733 let path = dir.path().join("c.json");
1734 let mut c = RuleBypassCorpus::new("t");
1735 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
1736 c.save_atomic(&path).expect("save");
1737 assert!(
1738 !dir.path().join("c.json.bak").exists(),
1739 "no .bak on the first save (no prior file)"
1740 );
1741 }
1742
1743 #[test]
1744 fn bak_skipped_when_prior_file_empty() {
1745 let dir = tempdir().expect("tempdir");
1747 let path = dir.path().join("c.json");
1748 std::fs::write(&path, b"").expect("seed empty");
1749 let mut c = RuleBypassCorpus::new("t");
1750 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
1751 c.save_atomic(&path).expect("save over empty");
1752 assert!(
1753 !dir.path().join("c.json.bak").exists(),
1754 "empty prior file must not be backed up"
1755 );
1756 }
1757
1758 #[test]
1759 fn bak_holds_exact_prior_bytes() {
1760 let dir = tempdir().expect("tempdir");
1763 let path = dir.path().join("c.json");
1764 let mut first = RuleBypassCorpus::new("cf:mr:x");
1765 first.record_bypass("R1", "p", cls("sql"), vec![], 1);
1766 first.save_atomic(&path).expect("save first");
1767 let prior_bytes = std::fs::read(&path).expect("read prior");
1768
1769 let mut second = RuleBypassCorpus::new("cf:mr:x");
1770 second.record_bypass("R2", "q", cls("xss"), vec![], 2);
1771 second.save_atomic(&path).expect("save second");
1772
1773 let bak_bytes = std::fs::read(dir.path().join("c.json.bak")).expect("read bak");
1774 assert_eq!(
1775 bak_bytes, prior_bytes,
1776 ".bak must be a byte-exact snapshot of the prior file"
1777 );
1778 }
1779
1780 #[test]
1781 fn bak_round_trips_then_main_continues() {
1782 let dir = tempdir().expect("tempdir");
1785 let path = dir.path().join("c.json");
1786 let mut good = RuleBypassCorpus::new("cf:mr:x");
1787 good.record_bypass("R1", "keep-me", cls("sql"), vec![], 1);
1788 good.save_atomic(&path).expect("save good");
1789 RuleBypassCorpus::new("cf:mr:x")
1790 .save_atomic(&path)
1791 .expect("save empty");
1792
1793 let bak = dir.path().join("c.json.bak");
1794 let recovered = RuleBypassCorpus::load_or_default(&bak, "x");
1795 recovered.save_atomic(&path).expect("restore");
1796 let reloaded = RuleBypassCorpus::load_or_default(&path, "x");
1797 assert_eq!(reloaded.bypasses_for_rule("R1").len(), 1);
1798 assert_eq!(reloaded.bypasses_for_rule("R1")[0].payload, "keep-me");
1799 }
1800
1801 #[test]
1804 fn end_to_end_corpus_disappeared_non_utf8() {
1805 let dir = tempdir().expect("tempdir");
1809 let path = dir.path().join("corpus.json");
1810
1811 let mut real = RuleBypassCorpus::new("cf:mr:cumulus");
1812 for i in 0..30 {
1813 real.record_bypass("942100", &format!("bypass-{i}"), cls("xss"), vec![], i);
1814 }
1815 real.save_atomic(&path).expect("save real");
1816
1817 let corrupt: &[u8] = &[0x00, 0xFF, 0x80, 0x7B, 0xC3, 0x28, 0x42];
1819 std::fs::write(&path, corrupt).expect("corrupt");
1820
1821 let fresh = RuleBypassCorpus::load_or_default(&path, "cf:mr:cumulus");
1822 assert_eq!(fresh.total_bypasses(), 0);
1823 fresh.save_atomic(&path).expect("save fresh empty");
1824
1825 let aside = corrupt_sidecars(dir.path(), "corpus.json");
1826 assert_eq!(aside.len(), 1, "corrupt non-UTF8 bytes preserved aside");
1827 assert_eq!(
1828 std::fs::read(&aside[0]).unwrap(),
1829 corrupt,
1830 "sidecar holds the exact corrupt bytes"
1831 );
1832 }
1833
1834 #[test]
1837 fn heal_truncates_blocked_but_keeps_all_bypasses() {
1838 let dir = tempdir().expect("tempdir");
1839 let path = dir.path().join("c.json");
1840 let mut c = RuleBypassCorpus::new("heal");
1841 let over = RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET + 100;
1842 let blocked: Vec<RecordedAttempt> = (0..over)
1843 .map(|i| RecordedAttempt {
1844 payload: format!("blk{i}"),
1845 payload_class: cls("sql"),
1846 encoding_chain: vec![],
1847 response_hash: i as u64,
1848 observed_at_secs: 0,
1849 })
1850 .collect();
1851 let bypassed: Vec<RecordedBypass> = (0..10)
1853 .map(|i| RecordedBypass {
1854 payload: format!("by{i}"),
1855 payload_class: cls("xss"),
1856 encoding_chain: vec![],
1857 response_hash: 1_000 + i as u64,
1858 observed_at_secs: 0,
1859 submission: SubmissionStatus::Queued,
1860 delivery: String::new(),
1861 })
1862 .collect();
1863 c.buckets.insert(
1864 "r".into(),
1865 RuleBucket {
1866 blocked,
1867 bypassed,
1868 ..RuleBucket::default()
1869 },
1870 );
1871 c.save_atomic(&path).expect("save");
1872
1873 let healed = RuleBypassCorpus::load_or_default(&path, "heal");
1874 assert_eq!(
1875 healed.blocked_for_rule("r").len(),
1876 RuleBypassCorpus::MAX_BLOCKED_PER_BUCKET
1877 );
1878 assert_eq!(healed.blocked_for_rule("r")[0].payload, "blk0");
1880 assert_eq!(
1881 healed.bypasses_for_rule("r").len(),
1882 10,
1883 "under-cap bypasses untouched"
1884 );
1885 assert_eq!(healed.bypasses_for_rule("r")[9].payload, "by9");
1886 }
1887
1888 #[test]
1889 fn heal_leaves_under_cap_bucket_untouched() {
1890 let dir = tempdir().expect("tempdir");
1891 let path = dir.path().join("c.json");
1892 let mut c = RuleBypassCorpus::new("t");
1893 for i in 0..5 {
1894 c.record_block("r", &format!("b{i}"), cls("sql"), vec![], i);
1895 c.record_bypass("r", &format!("p{i}"), cls("sql"), vec![], 100 + i);
1896 }
1897 c.save_atomic(&path).expect("save");
1898 let healed = RuleBypassCorpus::load_or_default(&path, "t");
1899 assert_eq!(healed.blocked_for_rule("r").len(), 5);
1900 assert_eq!(healed.bypasses_for_rule("r").len(), 5);
1901 assert_eq!(healed.blocked_for_rule("r")[4].payload, "b4");
1903 assert_eq!(healed.bypasses_for_rule("r")[0].payload, "p0");
1904 }
1905
1906 #[test]
1907 fn heal_truncated_bypassed_keeps_blocked_and_prefix() {
1908 let dir = tempdir().expect("tempdir");
1909 let path = dir.path().join("c.json");
1910 let mut c = RuleBypassCorpus::new("t");
1911 let over = RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET + 17;
1912 let bypassed: Vec<RecordedBypass> = (0..over)
1913 .map(|i| RecordedBypass {
1914 payload: format!("by{i}"),
1915 payload_class: cls("sql"),
1916 encoding_chain: vec![],
1917 response_hash: i as u64,
1918 observed_at_secs: 0,
1919 submission: SubmissionStatus::Queued,
1920 delivery: String::new(),
1921 })
1922 .collect();
1923 c.buckets.insert(
1924 "r".into(),
1925 RuleBucket {
1926 bypassed,
1927 ..RuleBucket::default()
1928 },
1929 );
1930 c.bucket_mut("r").blocked.push(RecordedAttempt {
1931 payload: "survivor".into(),
1932 payload_class: cls("sql"),
1933 encoding_chain: vec![],
1934 response_hash: 9,
1935 observed_at_secs: 0,
1936 });
1937 c.save_atomic(&path).expect("save");
1938 let healed = RuleBypassCorpus::load_or_default(&path, "t");
1939 assert_eq!(
1940 healed.bypasses_for_rule("r").len(),
1941 RuleBypassCorpus::MAX_BYPASSED_PER_BUCKET
1942 );
1943 assert_eq!(
1944 healed.bypasses_for_rule("r")[0].payload,
1945 "by0",
1946 "kept prefix"
1947 );
1948 assert_eq!(healed.blocked_for_rule("r").len(), 1);
1949 assert_eq!(healed.blocked_for_rule("r")[0].payload, "survivor");
1950 }
1951
1952 #[test]
1955 fn schema_version_zero_normalized_to_current() {
1956 let dir = tempdir().expect("tempdir");
1959 let path = dir.path().join("c.json");
1960 let raw = r#"{"schema_version":0,"target_fingerprint":"t","buckets":{}}"#;
1961 std::fs::write(&path, raw).expect("write");
1962 let c = RuleBypassCorpus::load_or_default(&path, "ignored");
1963 assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
1964 assert_eq!(
1965 c.target_fingerprint, "t",
1966 "embedded fingerprint wins for valid file"
1967 );
1968 }
1969
1970 #[test]
1971 fn schema_version_missing_normalized_to_current() {
1972 let dir = tempdir().expect("tempdir");
1973 let path = dir.path().join("c.json");
1974 let raw = r#"{"target_fingerprint":"emb","buckets":{}}"#;
1976 std::fs::write(&path, raw).expect("write");
1977 let c = RuleBypassCorpus::load_or_default(&path, "ignored");
1978 assert_eq!(c.schema_version, CORPUS_SCHEMA_VERSION);
1979 assert_eq!(c.target_fingerprint, "emb");
1980 }
1981
1982 #[test]
1983 fn valid_file_fingerprint_overrides_fallback() {
1984 let dir = tempdir().expect("tempdir");
1987 let path = dir.path().join("c.json");
1988 let mut c = RuleBypassCorpus::new("embedded-fp");
1989 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
1990 c.save_atomic(&path).expect("save");
1991 let r = RuleBypassCorpus::load_or_default(&path, "fallback-should-be-ignored");
1992 assert_eq!(r.target_fingerprint, "embedded-fp");
1993 }
1994
1995 #[test]
1998 fn old_corpus_loads_with_default_delivery_for_every_bypass() {
1999 let dir = tempdir().expect("tempdir");
2002 let path = dir.path().join("old.json");
2003 let mut c = RuleBypassCorpus::new("t");
2004 c.record_bypass("R1", "a", cls("sql"), vec!["x".into()], 1);
2005 c.record_bypass("R1", "b", cls("xss"), vec![], 2);
2006 let mut v: serde_json::Value =
2007 serde_json::from_str(&serde_json::to_string(&c).unwrap()).unwrap();
2008 for bucket in v["buckets"].as_object_mut().unwrap().values_mut() {
2009 for bp in bucket["bypassed"].as_array_mut().unwrap() {
2010 bp.as_object_mut().unwrap().remove("delivery");
2011 }
2012 }
2013 std::fs::write(&path, serde_json::to_string(&v).unwrap()).expect("write");
2014 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
2015 let bps = r.bypasses_for_rule("R1");
2016 assert_eq!(bps.len(), 2);
2017 assert_eq!(bps[0].delivery, "");
2018 assert_eq!(bps[1].delivery, "");
2019 assert_eq!(bps[0].encoding_chain, vec!["x".to_string()]);
2020 }
2021
2022 #[test]
2023 fn set_delivery_overwrites_existing_shape_with_non_empty() {
2024 let mut c = RuleBypassCorpus::new("t");
2025 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
2026 assert!(c.set_delivery("R1", "p", "\"first\"".into()));
2027 assert!(c.set_delivery("R1", "p", "\"second\"".into()));
2028 assert_eq!(c.bypasses_for_rule("R1")[0].delivery, "\"second\"");
2029 }
2030
2031 #[test]
2032 fn set_delivery_empty_on_missing_bucket_returns_false() {
2033 let mut c = RuleBypassCorpus::new("t");
2035 assert!(!c.set_delivery("nope", "p", String::new()));
2036 }
2037
2038 #[test]
2039 fn set_submission_empty_corpus_returns_false() {
2040 let mut c = RuleBypassCorpus::new("t");
2041 assert!(!c.set_submission("R1", "p", SubmissionStatus::Queued));
2042 }
2043
2044 #[test]
2045 fn set_submission_bucket_exists_but_payload_absent_returns_false() {
2046 let mut c = RuleBypassCorpus::new("t");
2047 c.record_bypass("R1", "present", cls("sql"), vec![], 1);
2048 assert!(
2049 !c.set_submission(
2050 "R1",
2051 "absent",
2052 SubmissionStatus::Accepted {
2053 report_id: "X".into()
2054 }
2055 ),
2056 "wrong payload in an existing bucket must not match"
2057 );
2058 assert!(matches!(
2060 c.bypasses_for_rule("R1")[0].submission,
2061 SubmissionStatus::Queued
2062 ));
2063 }
2064
2065 #[test]
2066 fn submission_status_round_trips_all_variants() {
2067 let dir = tempdir().expect("tempdir");
2070 let path = dir.path().join("c.json");
2071 let mut c = RuleBypassCorpus::new("t");
2072 let variants = [
2073 ("p0", SubmissionStatus::Queued),
2074 (
2075 "p1",
2076 SubmissionStatus::DryRunHold {
2077 release_at_secs: 1234,
2078 },
2079 ),
2080 (
2081 "p2",
2082 SubmissionStatus::Submitted {
2083 report_id: "H1-1".into(),
2084 },
2085 ),
2086 (
2087 "p3",
2088 SubmissionStatus::Accepted {
2089 report_id: "H1-2".into(),
2090 },
2091 ),
2092 (
2093 "p4",
2094 SubmissionStatus::Duplicate {
2095 duplicate_of: "H1-3".into(),
2096 },
2097 ),
2098 (
2099 "p5",
2100 SubmissionStatus::Rejected {
2101 reason: "informative".into(),
2102 },
2103 ),
2104 ];
2105 for (p, _) in &variants {
2106 c.record_bypass("R1", p, cls("sql"), vec![], 0);
2107 }
2108 for (p, st) in &variants {
2109 assert!(c.set_submission("R1", p, st.clone()));
2110 }
2111 c.save_atomic(&path).expect("save");
2112 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
2113 let by_payload: BTreeMap<_, _> = r
2114 .bypasses_for_rule("R1")
2115 .iter()
2116 .map(|b| (b.payload.clone(), b.submission.clone()))
2117 .collect();
2118 assert_eq!(
2119 by_payload["p1"],
2120 SubmissionStatus::DryRunHold {
2121 release_at_secs: 1234
2122 }
2123 );
2124 assert_eq!(
2125 by_payload["p2"],
2126 SubmissionStatus::Submitted {
2127 report_id: "H1-1".into()
2128 }
2129 );
2130 assert_eq!(
2131 by_payload["p4"],
2132 SubmissionStatus::Duplicate {
2133 duplicate_of: "H1-3".into()
2134 }
2135 );
2136 assert_eq!(
2137 by_payload["p5"],
2138 SubmissionStatus::Rejected {
2139 reason: "informative".into()
2140 }
2141 );
2142 }
2143
2144 #[test]
2147 fn determinism_identical_serialization_after_save_load() {
2148 let dir = tempdir().expect("tempdir");
2151 let path = dir.path().join("c.json");
2152 let mut c = RuleBypassCorpus::new("t");
2153 for i in (0..40).rev() {
2154 c.record_bypass(&format!("R{i:03}"), &format!("p{i}"), cls("sql"), vec![], i);
2155 }
2156 let s1 = serde_json::to_string(&c).unwrap();
2157 let s2 = serde_json::to_string(&c).unwrap();
2158 assert_eq!(s1, s2);
2159 c.save_atomic(&path).expect("save");
2160 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
2161 let keys: Vec<_> = r.buckets.keys().cloned().collect();
2163 let mut sorted = keys.clone();
2164 sorted.sort();
2165 assert_eq!(keys, sorted, "BTreeMap keys must iterate in sorted order");
2166 }
2167
2168 #[test]
2169 fn btreemap_order_independent_of_insertion_order() {
2170 let mut a = RuleBypassCorpus::new("t");
2173 let mut b = RuleBypassCorpus::new("t");
2174 let ids = ["R5", "R1", "R9", "R3", "R7"];
2175 for id in ids {
2176 a.record_block(id, "p", cls("sql"), vec![], 1);
2177 }
2178 for id in ids.iter().rev() {
2179 b.record_block(id, "p", cls("sql"), vec![], 1);
2180 }
2181 assert_eq!(
2182 serde_json::to_string(&a).unwrap(),
2183 serde_json::to_string(&b).unwrap(),
2184 "BTreeMap makes serialization insertion-order-independent"
2185 );
2186 }
2187
2188 #[test]
2189 fn unicode_payload_round_trips_with_exact_bytes() {
2190 let dir = tempdir().expect("tempdir");
2191 let path = dir.path().join("c.json");
2192 let payload = "𝕊𝔼𝕃𝔼ℂ𝕋 ' OR 𝟙=𝟙 -- 中文 \u{200B}\u{FEFF}\u{1F4A9} emoji";
2193 let mut c = RuleBypassCorpus::new("t");
2194 c.record_bypass("R1", payload, cls("sql"), vec![], 1);
2195 c.save_atomic(&path).expect("save");
2196 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
2197 assert_eq!(
2198 r.bypasses_for_rule("R1")[0].payload,
2199 payload,
2200 "unicode payload exact"
2201 );
2202 }
2203
2204 #[test]
2205 fn one_mb_bypass_payload_round_trips_no_oom() {
2206 let dir = tempdir().expect("tempdir");
2207 let path = dir.path().join("c.json");
2208 let big = "A".repeat(1_200_000);
2209 let mut c = RuleBypassCorpus::new("t");
2210 c.record_bypass("R1", &big, cls("sql"), vec![], 1);
2211 c.save_atomic(&path).expect("save");
2212 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
2213 assert_eq!(r.bypasses_for_rule("R1")[0].payload.len(), 1_200_000);
2214 assert!(
2215 r.bypasses_for_rule("R1")[0]
2216 .payload
2217 .bytes()
2218 .all(|b| b == b'A')
2219 );
2220 }
2221
2222 #[test]
2223 fn huge_encoding_chain_round_trips() {
2224 let dir = tempdir().expect("tempdir");
2225 let path = dir.path().join("c.json");
2226 let chain: Vec<String> = (0..5000).map(|i| format!("t{i}")).collect();
2227 let mut c = RuleBypassCorpus::new("t");
2228 c.record_bypass("R1", "p", cls("sql"), chain.clone(), 1);
2229 c.save_atomic(&path).expect("save");
2230 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
2231 let got = &r.bypasses_for_rule("R1")[0].encoding_chain;
2232 assert_eq!(got.len(), 5000);
2233 assert_eq!(got[0], "t0");
2234 assert_eq!(got[4999], "t4999");
2235 }
2236
2237 #[test]
2238 fn dedup_bypass_by_response_hash_and_payload_property() {
2239 let mut c = RuleBypassCorpus::new("t");
2242 let inputs = [
2244 ("p", 1u64),
2245 ("q", 1),
2246 ("p", 2),
2247 ("p", 1),
2248 ("q", 1),
2249 ("p", 2),
2250 ("p", 2),
2251 ];
2252 for (p, h) in inputs {
2253 c.record_bypass("R1", p, cls("sql"), vec![], h);
2254 }
2255 assert_eq!(
2256 c.bypasses_for_rule("R1").len(),
2257 3,
2258 "only distinct (hash,payload) survive"
2259 );
2260 let pairs: std::collections::BTreeSet<(String, u64)> = c
2262 .bypasses_for_rule("R1")
2263 .iter()
2264 .map(|b| (b.payload.clone(), b.response_hash))
2265 .collect();
2266 assert!(pairs.contains(&("p".to_string(), 1)));
2267 assert!(pairs.contains(&("p".to_string(), 2)));
2268 assert!(pairs.contains(&("q".to_string(), 1)));
2269 }
2270
2271 #[test]
2272 fn drift_timestamp_monotonic_across_remarks() {
2273 let mut c = RuleBypassCorpus::new("t");
2274 c.record_block("R1", "p", cls("sql"), vec![], 1);
2275 c.mark_drift("R1");
2276 let t1 = c.buckets["R1"].last_drift_at_secs.unwrap();
2277 c.mark_drift("R1");
2279 let t2 = c.buckets["R1"].last_drift_at_secs.unwrap();
2280 assert!(t2 >= t1, "drift timestamp must be monotonic non-decreasing");
2281 }
2282
2283 #[test]
2284 fn first_save_writes_current_schema_version_to_disk() {
2285 let dir = tempdir().expect("tempdir");
2289 let path = dir.path().join("c.json");
2290 let mut c = RuleBypassCorpus::new("t");
2291 c.schema_version = 0; c.record_bypass("R1", "p", cls("sql"), vec![], 1);
2293 c.save_atomic(&path).expect("save");
2294 let v: serde_json::Value =
2295 serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
2296 assert_eq!(
2297 v["schema_version"].as_u64().unwrap(),
2298 u64::from(CORPUS_SCHEMA_VERSION)
2299 );
2300 }
2301
2302 #[test]
2303 fn save_stamps_last_saved_at_secs() {
2304 let dir = tempdir().expect("tempdir");
2305 let path = dir.path().join("c.json");
2306 let c = RuleBypassCorpus::new("t");
2307 assert_eq!(c.last_saved_at_secs, 0);
2308 c.save_atomic(&path).expect("save");
2309 let r = RuleBypassCorpus::load_or_default(&path, "t");
2310 assert!(
2311 r.last_saved_at_secs > 0,
2312 "save must stamp a real epoch second"
2313 );
2314 }
2315
2316 #[test]
2317 fn valid_oversize_under_ceiling_is_preserved_not_dropped() {
2318 let dir = tempdir().expect("tempdir");
2321 let path = dir.path().join("c.json");
2322 let mut c = RuleBypassCorpus::new("t");
2323 for r in 0..30 {
2325 for i in 0..50 {
2326 c.record_bypass(
2327 &format!("R{r}"),
2328 &format!("{}-{r}-{i}", "X".repeat(2000)),
2329 cls("sql"),
2330 vec![],
2331 (r * 1000 + i) as u64,
2332 );
2333 }
2334 }
2335 c.save_atomic(&path).expect("save");
2336 let on_disk = std::fs::metadata(&path).unwrap().len();
2337 assert!(on_disk > 1_000_000, "test corpus should be multi-MB");
2338 let r = RuleBypassCorpus::load_or_default(&path, "ignored");
2339 assert_eq!(
2340 r.total_bypasses(),
2341 30 * 50,
2342 "all valid bypasses load intact"
2343 );
2344 assert!(
2345 corrupt_sidecars(dir.path(), "c.json").is_empty(),
2346 "valid file never preserved-aside"
2347 );
2348 }
2349
2350 #[test]
2351 fn save_atomic_leaves_no_tempfiles_behind() {
2352 let dir = tempdir().expect("tempdir");
2355 let path = dir.path().join("c.json");
2356 let mut c = RuleBypassCorpus::new("t");
2357 c.record_bypass("R1", "p", cls("sql"), vec![], 1);
2358 c.save_atomic(&path).expect("save");
2359 let entries: Vec<String> = std::fs::read_dir(dir.path())
2360 .unwrap()
2361 .filter_map(Result::ok)
2362 .map(|e| e.file_name().to_string_lossy().into_owned())
2363 .collect();
2364 assert!(entries.contains(&"c.json".to_string()));
2365 assert!(
2366 entries.iter().all(|n| n == "c.json" || n == "c.json.bak"),
2367 "no stray temp files left behind, got: {entries:?}"
2368 );
2369 }
2370
2371 #[test]
2372 fn empty_buckets_and_blocks_persist_exact_counts() {
2373 let dir = tempdir().expect("tempdir");
2376 let path = dir.path().join("c.json");
2377 let mut c = RuleBypassCorpus::new("t");
2378 c.record_block("only-block", "b", cls("sql"), vec![], 1);
2379 c.record_bypass("only-bypass", "p", cls("xss"), vec![], 2);
2380 c.record_block("mixed", "b", cls("cmd"), vec![], 3);
2381 c.record_bypass("mixed", "p", cls("cmd"), vec![], 4);
2382 c.save_atomic(&path).expect("save");
2383 let r = RuleBypassCorpus::load_or_default(&path, "t");
2384 assert_eq!(r.blocked_for_rule("only-block").len(), 1);
2385 assert_eq!(r.bypasses_for_rule("only-block").len(), 0);
2386 assert_eq!(r.bypasses_for_rule("only-bypass").len(), 1);
2387 assert_eq!(r.blocked_for_rule("only-bypass").len(), 0);
2388 assert_eq!(r.blocked_for_rule("mixed").len(), 1);
2389 assert_eq!(r.bypasses_for_rule("mixed").len(), 1);
2390 assert_eq!(r.rules_seen(), 3);
2391 }
2392}