1use crate::evolution::Chromosome;
4use serde::{Deserialize, Serialize};
5use std::sync::Arc;
6
7#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
9pub struct MutationOp {
10 pub gene_name: String,
12 pub from: String,
14 pub to: String,
16 pub operator: String,
18}
19
20#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
29pub struct ParentSnapshot {
30 pub genes: Vec<(String, String)>,
31}
32
33impl ParentSnapshot {
34 fn from_chromosome(c: &Chromosome) -> Self {
35 Self {
36 genes: c.genes.clone(),
37 }
38 }
39}
40
41#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
43pub enum Lineage {
44 Genesis {
46 generation: u32,
48 },
49 Crossover {
51 parent_a: Arc<ParentSnapshot>,
53 parent_b: Arc<ParentSnapshot>,
55 strategy: String,
57 generation: u32,
59 },
60 Mutation {
62 parent: Arc<ParentSnapshot>,
64 log: Vec<MutationOp>,
66 generation: u32,
68 },
69}
70
71impl Lineage {
72 #[must_use]
74 pub fn genesis(generation: u32) -> Self {
75 Self::Genesis { generation }
76 }
77
78 #[must_use]
80 pub fn crossover(
81 parent_a: &Chromosome,
82 parent_b: &Chromosome,
83 strategy: &str,
84 generation: u32,
85 ) -> Self {
86 Self::Crossover {
87 parent_a: Arc::new(ParentSnapshot::from_chromosome(parent_a)),
88 parent_b: Arc::new(ParentSnapshot::from_chromosome(parent_b)),
89 strategy: strategy.to_string(),
90 generation,
91 }
92 }
93
94 #[must_use]
96 pub fn mutation(parent: &Chromosome, log: Vec<MutationOp>, generation: u32) -> Self {
97 Self::Mutation {
98 parent: Arc::new(ParentSnapshot::from_chromosome(parent)),
99 log,
100 generation,
101 }
102 }
103
104 #[must_use]
106 pub fn to_trace(&self) -> String {
107 match self {
108 Self::Genesis { generation } => format!("genesis[gen={generation}]"),
109 Self::Crossover {
110 parent_a,
111 parent_b,
112 strategy,
113 generation,
114 } => {
115 format!(
116 "crossover[gen={generation},strategy={strategy},a={{{}}},b={{{}}}]",
117 genes_to_string(&parent_a.genes),
118 genes_to_string(&parent_b.genes)
119 )
120 }
121 Self::Mutation {
122 parent,
123 log,
124 generation,
125 } => {
126 let ops: Vec<String> = log
127 .iter()
128 .map(|op| format!("{}:{}->{}[{}]", op.gene_name, op.from, op.to, op.operator))
129 .collect();
130 format!(
131 "mutation[gen={generation},parent={{{}}},ops=[{}]]",
132 genes_to_string(&parent.genes),
133 ops.join(",")
134 )
135 }
136 }
137 }
138}
139
140fn genes_to_string(genes: &[(String, String)]) -> String {
141 genes
142 .iter()
143 .map(|(n, v)| format!("{n}={v}"))
144 .collect::<Vec<_>>()
145 .join(",")
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct BypassEntry {
151 pub payload_hash: String,
153 pub genes: Vec<(String, String)>,
155 pub lineage_trace: String,
157 pub fitness: f64,
159 pub evaluations: u32,
161 pub target_waf: Option<String>,
163 pub verified: bool,
165 pub schema_version: u32,
167}
168
169impl BypassEntry {
170 pub const CURRENT_SCHEMA: u32 = 1;
171
172 #[must_use]
173 pub fn from_chromosome(chromosome: &Chromosome, target_waf: Option<String>) -> Self {
174 use sha2::{Digest, Sha256};
184 let mut hasher = Sha256::new();
185 for (k, v) in &chromosome.genes {
186 hasher.update(k.as_bytes());
187 hasher.update([0u8]); hasher.update(v.as_bytes());
189 hasher.update([0u8]);
190 }
191 let digest = hasher.finalize();
192 let payload_hash = digest
193 .iter()
194 .map(|b| format!("{b:02x}"))
195 .collect::<String>();
196
197 Self {
198 payload_hash,
199 genes: chromosome.genes.clone(),
200 lineage_trace: chromosome.lineage.to_trace(),
201 fitness: chromosome.fitness,
202 evaluations: chromosome.evaluations,
203 target_waf,
204 verified: true,
205 schema_version: Self::CURRENT_SCHEMA,
206 }
207 }
208}
209
210#[derive(Debug, Clone, Default, Serialize, Deserialize)]
212pub struct BypassCorpus {
213 pub entries: Vec<BypassEntry>,
214 pub schema_version: u32,
215}
216
217impl BypassCorpus {
218 pub const CURRENT_SCHEMA: u32 = 1;
219
220 #[must_use]
221 pub fn new() -> Self {
222 Self {
223 entries: Vec::new(),
224 schema_version: Self::CURRENT_SCHEMA,
225 }
226 }
227
228 pub fn add(&mut self, entry: BypassEntry) {
230 if !self
232 .entries
233 .iter()
234 .any(|e| e.payload_hash == entry.payload_hash)
235 {
236 self.entries.push(entry);
237 }
238 }
239
240 const MAX_CORPUS_BYTES: usize = 256 * 1024 * 1024;
243
244 const MAX_JSONL_LINE_BYTES: usize = 16 * 1024 * 1024;
246
247 pub fn save(&self, path: &std::path::Path) -> Result<(), crate::types::EvolutionError> {
249 use crate::types::EvolutionError;
250 let mut buf = Vec::new();
251 for entry in &self.entries {
252 let json = serde_json::to_string(entry).map_err(EvolutionError::SerializationFailed)?;
253 if json.len() > Self::MAX_JSONL_LINE_BYTES {
254 tracing::warn!(
255 line_len = json.len(),
256 max = Self::MAX_JSONL_LINE_BYTES,
257 "skipping oversized corpus entry"
258 );
259 continue;
260 }
261 if !buf.is_empty() {
262 buf.push(b'\n');
263 }
264 buf.extend_from_slice(json.as_bytes());
265 if buf.len() > Self::MAX_CORPUS_BYTES {
266 return Err(EvolutionError::OversizedData {
267 context: format!("corpus {}", path.display()),
268 size: buf.len(),
269 max: Self::MAX_CORPUS_BYTES,
270 });
271 }
272 }
273 std::fs::write(path, buf)?;
274 Ok(())
275 }
276
277 pub fn load(path: &std::path::Path) -> Result<Self, crate::types::EvolutionError> {
279 use crate::types::EvolutionError;
280 let meta = std::fs::metadata(path)?;
281 let len = meta.len() as usize;
282 if len > Self::MAX_CORPUS_BYTES {
283 return Err(EvolutionError::OversizedData {
284 context: format!("corpus {}", path.display()),
285 size: len,
286 max: Self::MAX_CORPUS_BYTES,
287 });
288 }
289 let content = std::fs::read_to_string(path)?;
290 let mut entries = Vec::new();
291 for line in content.lines().filter(|l| !l.trim().is_empty()) {
292 if line.len() > Self::MAX_JSONL_LINE_BYTES {
293 tracing::warn!(
294 line_len = line.len(),
295 max = Self::MAX_JSONL_LINE_BYTES,
296 "skipping oversized corpus line"
297 );
298 continue;
299 }
300 let entry: BypassEntry =
301 serde_json::from_str(line).map_err(EvolutionError::DeserializationFailed)?;
302 entries.push(entry);
303 }
304 Ok(Self {
305 entries,
306 schema_version: Self::CURRENT_SCHEMA,
307 })
308 }
309}
310
311#[cfg(test)]
312mod tests {
313 use super::*;
314 use crate::evolution::Chromosome;
315
316 #[test]
317 fn bypass_entry_deduplicates() {
318 let mut corpus = BypassCorpus::new();
319 let chrom = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
320 let entry = BypassEntry::from_chromosome(&chrom, None);
321 corpus.add(entry.clone());
322 corpus.add(entry);
323 assert_eq!(corpus.entries.len(), 1);
324 }
325
326 #[test]
327 fn lineage_trace_roundtrips() {
328 let chrom = Chromosome::new(vec![("a".into(), "1".into())]);
329 let lineage = Lineage::genesis(0);
330 assert!(lineage.to_trace().contains("genesis"));
331
332 let cross = Lineage::crossover(&chrom, &chrom, "uniform", 1);
333 assert!(cross.to_trace().contains("crossover"));
334
335 let mutation = Lineage::mutation(&chrom, vec![], 2);
336 assert!(mutation.to_trace().contains("mutation"));
337 }
338
339 #[test]
340 fn empty_lineage_trace_is_serializable() {
341 let chrom = Chromosome::new(Vec::new());
342 let cross = Lineage::crossover(&chrom, &chrom, "single_point", 1);
343 let trace = cross.to_trace();
344 assert!(trace.contains("crossover"));
345 assert!(trace.contains("a={}"));
346 assert!(trace.contains("b={}"));
347 }
348
349 #[test]
350 fn payload_hash_is_order_sensitive() {
351 let chrom_a = Chromosome::new(vec![
352 ("encoding".into(), "UrlEncode".into()),
353 ("content_type".into(), "JsonNested".into()),
354 ]);
355 let chrom_b = Chromosome::new(vec![
356 ("content_type".into(), "JsonNested".into()),
357 ("encoding".into(), "UrlEncode".into()),
358 ]);
359 let a = BypassEntry::from_chromosome(&chrom_a, None);
360 let b = BypassEntry::from_chromosome(&chrom_b, None);
361 assert_ne!(a.payload_hash, b.payload_hash);
362 }
363}