1use crate::evolution::Chromosome;
4use serde::{Deserialize, Serialize};
5use std::sync::Arc;
6
7#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
9pub struct MutationOp {
10 pub gene_name: String,
12 pub from: String,
14 pub to: String,
16 pub operator: String,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
29pub struct ParentSnapshot {
30 pub genes: Vec<(String, String)>,
31}
32
33impl ParentSnapshot {
34 fn from_chromosome(c: &Chromosome) -> Self {
35 Self {
36 genes: c.genes.clone(),
37 }
38 }
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
43pub enum Lineage {
44 Genesis {
46 generation: u32,
48 },
49 Crossover {
51 parent_a: Arc<ParentSnapshot>,
53 parent_b: Arc<ParentSnapshot>,
55 strategy: String,
57 generation: u32,
59 },
60 Mutation {
62 parent: Arc<ParentSnapshot>,
64 log: Vec<MutationOp>,
66 generation: u32,
68 },
69}
70
71impl Lineage {
72 #[must_use]
74 pub fn genesis(generation: u32) -> Self {
75 Self::Genesis { generation }
76 }
77
78 #[must_use]
80 pub fn crossover(
81 parent_a: &Chromosome,
82 parent_b: &Chromosome,
83 strategy: &str,
84 generation: u32,
85 ) -> Self {
86 Self::Crossover {
87 parent_a: Arc::new(ParentSnapshot::from_chromosome(parent_a)),
88 parent_b: Arc::new(ParentSnapshot::from_chromosome(parent_b)),
89 strategy: strategy.to_string(),
90 generation,
91 }
92 }
93
94 #[must_use]
96 pub fn mutation(parent: &Chromosome, log: Vec<MutationOp>, generation: u32) -> Self {
97 Self::Mutation {
98 parent: Arc::new(ParentSnapshot::from_chromosome(parent)),
99 log,
100 generation,
101 }
102 }
103
104 #[must_use]
106 pub fn to_trace(&self) -> String {
107 match self {
108 Self::Genesis { generation } => format!("genesis[gen={generation}]"),
109 Self::Crossover {
110 parent_a,
111 parent_b,
112 strategy,
113 generation,
114 } => {
115 format!(
116 "crossover[gen={generation},strategy={strategy},a={{{}}},b={{{}}}]",
117 genes_to_string(&parent_a.genes),
118 genes_to_string(&parent_b.genes)
119 )
120 }
121 Self::Mutation {
122 parent,
123 log,
124 generation,
125 } => {
126 let ops: Vec<String> = log
127 .iter()
128 .map(|op| format!("{}:{}->{}[{}]", op.gene_name, op.from, op.to, op.operator))
129 .collect();
130 format!(
131 "mutation[gen={generation},parent={{{}}},ops=[{}]]",
132 genes_to_string(&parent.genes),
133 ops.join(",")
134 )
135 }
136 }
137 }
138}
139
140fn genes_to_string(genes: &[(String, String)]) -> String {
141 genes
142 .iter()
143 .map(|(n, v)| format!("{n}={v}"))
144 .collect::<Vec<_>>()
145 .join(",")
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct BypassEntry {
151 pub payload_hash: String,
153 pub genes: Vec<(String, String)>,
155 pub lineage_trace: String,
157 pub fitness: f64,
159 pub evaluations: u32,
161 pub target_waf: Option<String>,
163 pub verified: bool,
165 pub schema_version: u32,
167}
168
169impl BypassEntry {
170 pub const CURRENT_SCHEMA: u32 = 1;
171
172 #[must_use]
173 pub fn from_chromosome(chromosome: &Chromosome, target_waf: Option<String>) -> Self {
174 use sha2::{Digest, Sha256};
184 let mut hasher = Sha256::new();
185 for (k, v) in &chromosome.genes {
186 hasher.update(k.as_bytes());
187 hasher.update([0u8]); hasher.update(v.as_bytes());
189 hasher.update([0u8]);
190 }
191 let digest = hasher.finalize();
192 let payload_hash = digest
193 .iter()
194 .map(|b| format!("{b:02x}"))
195 .collect::<String>();
196
197 Self {
198 payload_hash,
199 genes: chromosome.genes.clone(),
200 lineage_trace: chromosome.lineage.to_trace(),
201 fitness: chromosome.fitness,
202 evaluations: chromosome.evaluations,
203 target_waf,
204 verified: true,
205 schema_version: Self::CURRENT_SCHEMA,
206 }
207 }
208}
209
210#[derive(Debug, Clone, Default, Serialize, Deserialize)]
212pub struct BypassCorpus {
213 pub entries: Vec<BypassEntry>,
214 pub schema_version: u32,
215 #[serde(skip)]
225 seen_hashes: std::collections::HashSet<String>,
226}
227
228impl BypassCorpus {
229 pub const CURRENT_SCHEMA: u32 = 1;
230
231 pub const MAX_ENTRIES: usize = 100_000;
239
240 #[must_use]
241 pub fn new() -> Self {
242 Self {
243 entries: Vec::new(),
244 schema_version: Self::CURRENT_SCHEMA,
245 seen_hashes: std::collections::HashSet::new(),
246 }
247 }
248
249 fn ensure_index(&mut self) {
253 if self.seen_hashes.len() != self.entries.len() {
254 self.seen_hashes = self
255 .entries
256 .iter()
257 .map(|e| e.payload_hash.clone())
258 .collect();
259 }
260 }
261
262 pub fn add(&mut self, entry: BypassEntry) {
267 self.ensure_index();
268 if !self.seen_hashes.insert(entry.payload_hash.clone()) {
270 return;
271 }
272 if self.entries.len() >= Self::MAX_ENTRIES {
273 self.seen_hashes.remove(&entry.payload_hash);
276 return;
277 }
278 self.entries.push(entry);
279 }
280
281 const MAX_CORPUS_BYTES: usize = 256 * 1024 * 1024;
284
285 const MAX_JSONL_LINE_BYTES: usize = 16 * 1024 * 1024;
287
288 pub fn save(&self, path: &std::path::Path) -> Result<(), crate::types::EvolutionError> {
290 use crate::types::EvolutionError;
291 let mut buf = Vec::new();
292 for entry in &self.entries {
293 let json = serde_json::to_string(entry).map_err(EvolutionError::SerializationFailed)?;
294 if json.len() > Self::MAX_JSONL_LINE_BYTES {
295 tracing::warn!(
296 line_len = json.len(),
297 max = Self::MAX_JSONL_LINE_BYTES,
298 "skipping oversized corpus entry"
299 );
300 continue;
301 }
302 if !buf.is_empty() {
303 buf.push(b'\n');
304 }
305 buf.extend_from_slice(json.as_bytes());
306 if buf.len() > Self::MAX_CORPUS_BYTES {
307 return Err(EvolutionError::OversizedData {
308 context: format!("corpus {}", path.display()),
309 size: buf.len(),
310 max: Self::MAX_CORPUS_BYTES,
311 });
312 }
313 }
314 std::fs::write(path, buf)?;
315 Ok(())
316 }
317
318 pub fn load(path: &std::path::Path) -> Result<Self, crate::types::EvolutionError> {
320 use crate::types::EvolutionError;
321 let meta = std::fs::metadata(path)?;
322 let len = usize::try_from(meta.len()).unwrap_or(usize::MAX);
326 if len > Self::MAX_CORPUS_BYTES {
327 return Err(EvolutionError::OversizedData {
328 context: format!("corpus {}", path.display()),
329 size: len,
330 max: Self::MAX_CORPUS_BYTES,
331 });
332 }
333 let content = crate::safe_io::read_capped_text(path, Self::MAX_CORPUS_BYTES)?;
336 let mut entries = Vec::new();
337 for line in content.lines().filter(|l| !l.trim().is_empty()) {
338 if line.len() > Self::MAX_JSONL_LINE_BYTES {
339 tracing::warn!(
340 line_len = line.len(),
341 max = Self::MAX_JSONL_LINE_BYTES,
342 "skipping oversized corpus line"
343 );
344 continue;
345 }
346 let entry: BypassEntry =
347 serde_json::from_str(line).map_err(EvolutionError::DeserializationFailed)?;
348 if entry.schema_version != BypassEntry::CURRENT_SCHEMA {
360 tracing::warn!(
361 entry_schema = entry.schema_version,
362 current_schema = BypassEntry::CURRENT_SCHEMA,
363 "BypassCorpus::load skipping entry from a different schema \
364 version — re-run scan to rebuild at current schema"
365 );
366 continue;
367 }
368 entries.push(entry);
369 }
370 Ok(Self {
371 entries,
372 schema_version: Self::CURRENT_SCHEMA,
373 seen_hashes: std::collections::HashSet::new(),
377 })
378 }
379}
380
381#[cfg(test)]
382mod tests {
383 use super::*;
384 use crate::evolution::Chromosome;
385
386 #[test]
387 fn bypass_entry_deduplicates() {
388 let mut corpus = BypassCorpus::new();
389 let chrom = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
390 let entry = BypassEntry::from_chromosome(&chrom, None);
391 corpus.add(entry.clone());
392 corpus.add(entry);
393 assert_eq!(corpus.entries.len(), 1);
394 }
395
396 #[test]
401 fn dedup_survives_serde_round_trip() {
402 let mut corpus = BypassCorpus::new();
403 let chrom = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
404 let entry = BypassEntry::from_chromosome(&chrom, None);
405 corpus.add(entry.clone());
406 assert_eq!(corpus.entries.len(), 1);
407
408 let json = serde_json::to_string(&corpus).unwrap();
410 let mut reloaded: BypassCorpus = serde_json::from_str(&json).unwrap();
411 assert!(
412 reloaded.seen_hashes.is_empty(),
413 "precondition: the dedup index is not serialized"
414 );
415 reloaded.add(entry);
417 assert_eq!(
418 reloaded.entries.len(),
419 1,
420 "dedup must hold across a load (ensure_index rebuild)"
421 );
422 }
423
424 #[test]
427 fn add_after_load_accepts_new_and_indexes_it() {
428 let mut corpus = BypassCorpus::new();
429 let a = BypassEntry::from_chromosome(
430 &Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]),
431 None,
432 );
433 corpus.add(a);
434 let json = serde_json::to_string(&corpus).unwrap();
435 let mut reloaded: BypassCorpus = serde_json::from_str(&json).unwrap();
436
437 let b = BypassEntry::from_chromosome(
438 &Chromosome::new(vec![("encoding".into(), "Base64".into())]),
439 None,
440 );
441 reloaded.add(b.clone());
442 assert_eq!(reloaded.entries.len(), 2);
443 assert_eq!(
444 reloaded.seen_hashes.len(),
445 reloaded.entries.len(),
446 "index must mirror entries after a post-load add"
447 );
448 reloaded.add(b);
450 assert_eq!(reloaded.entries.len(), 2);
451 }
452
453 #[test]
460 fn add_is_bounded_and_index_stays_consistent_at_cap() {
461 let mut corpus = BypassCorpus::new();
462 for tag in ["UrlEncode", "Base64"] {
464 corpus.add(BypassEntry::from_chromosome(
465 &Chromosome::new(vec![("encoding".into(), tag.into())]),
466 None,
467 ));
468 }
469 assert_eq!(corpus.entries.len(), 2);
471 assert_eq!(corpus.seen_hashes.len(), corpus.entries.len());
472 for i in 0..1000u32 {
476 corpus.add(BypassEntry::from_chromosome(
477 &Chromosome::new(vec![("encoding".into(), format!("E{i}"))]),
478 None,
479 ));
480 }
481 assert!(
482 corpus.entries.len() <= BypassCorpus::MAX_ENTRIES,
483 "entries must never exceed MAX_ENTRIES"
484 );
485 assert_eq!(
486 corpus.seen_hashes.len(),
487 corpus.entries.len(),
488 "index length must equal entries length after a batch of adds"
489 );
490 }
491
492 #[test]
493 fn lineage_trace_roundtrips() {
494 let chrom = Chromosome::new(vec![("a".into(), "1".into())]);
495 let lineage = Lineage::genesis(0);
496 assert!(lineage.to_trace().contains("genesis"));
497
498 let cross = Lineage::crossover(&chrom, &chrom, "uniform", 1);
499 assert!(cross.to_trace().contains("crossover"));
500
501 let mutation = Lineage::mutation(&chrom, vec![], 2);
502 assert!(mutation.to_trace().contains("mutation"));
503 }
504
505 #[test]
506 fn empty_lineage_trace_is_serializable() {
507 let chrom = Chromosome::new(Vec::new());
508 let cross = Lineage::crossover(&chrom, &chrom, "single_point", 1);
509 let trace = cross.to_trace();
510 assert!(trace.contains("crossover"));
511 assert!(trace.contains("a={}"));
512 assert!(trace.contains("b={}"));
513 }
514
515 #[test]
516 fn payload_hash_is_order_sensitive() {
517 let chrom_a = Chromosome::new(vec![
518 ("encoding".into(), "UrlEncode".into()),
519 ("content_type".into(), "JsonNested".into()),
520 ]);
521 let chrom_b = Chromosome::new(vec![
522 ("content_type".into(), "JsonNested".into()),
523 ("encoding".into(), "UrlEncode".into()),
524 ]);
525 let a = BypassEntry::from_chromosome(&chrom_a, None);
526 let b = BypassEntry::from_chromosome(&chrom_b, None);
527 assert_ne!(a.payload_hash, b.payload_hash);
528 }
529}