Skip to main content

wafrift_evolution/
lineage.rs

1//! Lineage tracking for replayable bypass discovery.
2
3use crate::evolution::Chromosome;
4use serde::{Deserialize, Serialize};
5use std::sync::Arc;
6
7/// A single mutation operation log entry.
8#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
9pub struct MutationOp {
10    /// Gene name that was mutated.
11    pub gene_name: String,
12    /// Previous value.
13    pub from: String,
14    /// New value.
15    pub to: String,
16    /// Mutation operator name.
17    pub operator: String,
18}
19
20/// Compact, transitive-closure-safe snapshot of a parent chromosome's
21/// gene tuple. Stored inside `Lineage::Crossover` / `Lineage::Mutation`
22/// instead of `Arc<Chromosome>` so the lineage tree of a long-running
23/// scan is bounded by `O(genes per chromosome)` per ancestor instead
24/// of `O(full ancestry chain)` — the earlier full-Chromosome arcs
25/// transitively dragged the parent's own `Lineage` field along, so
26/// every grandchild kept its grandparents alive forever and a long
27/// scan would OOM.
28#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
29pub struct ParentSnapshot {
30    pub genes: Vec<(String, String)>,
31}
32
33impl ParentSnapshot {
34    fn from_chromosome(c: &Chromosome) -> Self {
35        Self {
36            genes: c.genes.clone(),
37        }
38    }
39}
40
41/// Lineage of a chromosome: how it was derived from seeds.
42#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
43pub enum Lineage {
44    /// Original randomly-generated chromosome.
45    Genesis {
46        /// Generation when created.
47        generation: u32,
48    },
49    /// Created via crossover of two parents.
50    Crossover {
51        /// Parent A snapshot — genes only, breaks ancestry chain.
52        parent_a: Arc<ParentSnapshot>,
53        /// Parent B snapshot — genes only, breaks ancestry chain.
54        parent_b: Arc<ParentSnapshot>,
55        /// Strategy used.
56        strategy: String,
57        /// Generation when created.
58        generation: u32,
59    },
60    /// Created via mutation of a single parent.
61    Mutation {
62        /// Parent snapshot — genes only, breaks ancestry chain.
63        parent: Arc<ParentSnapshot>,
64        /// Log of applied mutation operations.
65        log: Vec<MutationOp>,
66        /// Generation when created.
67        generation: u32,
68    },
69}
70
71impl Lineage {
72    /// Create a genesis lineage.
73    #[must_use]
74    pub fn genesis(generation: u32) -> Self {
75        Self::Genesis { generation }
76    }
77
78    /// Create a crossover lineage.
79    #[must_use]
80    pub fn crossover(
81        parent_a: &Chromosome,
82        parent_b: &Chromosome,
83        strategy: &str,
84        generation: u32,
85    ) -> Self {
86        Self::Crossover {
87            parent_a: Arc::new(ParentSnapshot::from_chromosome(parent_a)),
88            parent_b: Arc::new(ParentSnapshot::from_chromosome(parent_b)),
89            strategy: strategy.to_string(),
90            generation,
91        }
92    }
93
94    /// Create a mutation lineage.
95    #[must_use]
96    pub fn mutation(parent: &Chromosome, log: Vec<MutationOp>, generation: u32) -> Self {
97        Self::Mutation {
98            parent: Arc::new(ParentSnapshot::from_chromosome(parent)),
99            log,
100            generation,
101        }
102    }
103
104    /// Serialize lineage to a compact string representation.
105    #[must_use]
106    pub fn to_trace(&self) -> String {
107        match self {
108            Self::Genesis { generation } => format!("genesis[gen={generation}]"),
109            Self::Crossover {
110                parent_a,
111                parent_b,
112                strategy,
113                generation,
114            } => {
115                format!(
116                    "crossover[gen={generation},strategy={strategy},a={{{}}},b={{{}}}]",
117                    genes_to_string(&parent_a.genes),
118                    genes_to_string(&parent_b.genes)
119                )
120            }
121            Self::Mutation {
122                parent,
123                log,
124                generation,
125            } => {
126                let ops: Vec<String> = log
127                    .iter()
128                    .map(|op| format!("{}:{}->{}[{}]", op.gene_name, op.from, op.to, op.operator))
129                    .collect();
130                format!(
131                    "mutation[gen={generation},parent={{{}}},ops=[{}]]",
132                    genes_to_string(&parent.genes),
133                    ops.join(",")
134                )
135            }
136        }
137    }
138}
139
140fn genes_to_string(genes: &[(String, String)]) -> String {
141    genes
142        .iter()
143        .map(|(n, v)| format!("{n}={v}"))
144        .collect::<Vec<_>>()
145        .join(",")
146}
147
148/// Serialize a bypass corpus including full lineage trees.
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct BypassEntry {
151    /// Payload hash (SHA-256 hex of serialized genes).
152    pub payload_hash: String,
153    /// Genes that produced the bypass.
154    pub genes: Vec<(String, String)>,
155    /// Full lineage trace.
156    pub lineage_trace: String,
157    /// Final fitness score.
158    pub fitness: f64,
159    /// Number of evaluations.
160    pub evaluations: u32,
161    /// Target WAF identifier (optional).
162    pub target_waf: Option<String>,
163    /// Whether this bypass was verified.
164    pub verified: bool,
165    /// Schema version for forward/backward compatibility.
166    pub schema_version: u32,
167}
168
169impl BypassEntry {
170    pub const CURRENT_SCHEMA: u32 = 1;
171
172    #[must_use]
173    pub fn from_chromosome(chromosome: &Chromosome, target_waf: Option<String>) -> Self {
174        // SHA-256 over a deterministic gene encoding. Earlier versions
175        // used the 64-bit DefaultHasher, which collides via birthday
176        // attack at roughly 2^32 chromosomes — well within reach of a
177        // long-running scan, causing BypassCorpus::add to silently
178        // dedupe distinct bypass discoveries.
179        //
180        // Important: gene order is part of the payload identity. Two
181        // chromosomes with the same set of genes in different order
182        // intentionally produce different hashes.
183        use sha2::{Digest, Sha256};
184        let mut hasher = Sha256::new();
185        for (k, v) in &chromosome.genes {
186            hasher.update(k.as_bytes());
187            hasher.update([0u8]); // delimiter so ("ab", "c") != ("a", "bc")
188            hasher.update(v.as_bytes());
189            hasher.update([0u8]);
190        }
191        let digest = hasher.finalize();
192        let payload_hash = digest
193            .iter()
194            .map(|b| format!("{b:02x}"))
195            .collect::<String>();
196
197        Self {
198            payload_hash,
199            genes: chromosome.genes.clone(),
200            lineage_trace: chromosome.lineage.to_trace(),
201            fitness: chromosome.fitness,
202            evaluations: chromosome.evaluations,
203            target_waf,
204            verified: true,
205            schema_version: Self::CURRENT_SCHEMA,
206        }
207    }
208}
209
210/// A serializable bypass corpus.
211#[derive(Debug, Clone, Default, Serialize, Deserialize)]
212pub struct BypassCorpus {
213    pub entries: Vec<BypassEntry>,
214    pub schema_version: u32,
215}
216
217impl BypassCorpus {
218    pub const CURRENT_SCHEMA: u32 = 1;
219
220    #[must_use]
221    pub fn new() -> Self {
222        Self {
223            entries: Vec::new(),
224            schema_version: Self::CURRENT_SCHEMA,
225        }
226    }
227
228    /// Add a bypass entry.
229    pub fn add(&mut self, entry: BypassEntry) {
230        // Deduplicate by payload hash
231        if !self
232            .entries
233            .iter()
234            .any(|e| e.payload_hash == entry.payload_hash)
235        {
236            self.entries.push(entry);
237        }
238    }
239
240    /// Save corpus to disk as JSONL (one JSON object per line).
241    pub fn save(&self, path: &std::path::Path) -> Result<(), crate::types::EvolutionError> {
242        use crate::types::EvolutionError;
243        let mut lines = Vec::new();
244        for entry in &self.entries {
245            let json = serde_json::to_string(entry)
246                .map_err(|e| EvolutionError::SerializationFailed(e.to_string()))?;
247            lines.push(json);
248        }
249        std::fs::write(path, lines.join("\n"))
250            .map_err(|e| EvolutionError::SerializationFailed(e.to_string()))?;
251        Ok(())
252    }
253
254    /// Load corpus from JSONL.
255    pub fn load(path: &std::path::Path) -> Result<Self, crate::types::EvolutionError> {
256        use crate::types::EvolutionError;
257        let content = std::fs::read_to_string(path)
258            .map_err(|e| EvolutionError::DeserializationFailed(e.to_string()))?;
259        let mut entries = Vec::new();
260        for line in content.lines().filter(|l| !l.trim().is_empty()) {
261            let entry: BypassEntry = serde_json::from_str(line)
262                .map_err(|e| EvolutionError::DeserializationFailed(e.to_string()))?;
263            entries.push(entry);
264        }
265        Ok(Self {
266            entries,
267            schema_version: Self::CURRENT_SCHEMA,
268        })
269    }
270}
271
272#[cfg(test)]
273mod tests {
274    use super::*;
275    use crate::evolution::Chromosome;
276
277    #[test]
278    fn bypass_entry_deduplicates() {
279        let mut corpus = BypassCorpus::new();
280        let chrom = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
281        let entry = BypassEntry::from_chromosome(&chrom, None);
282        corpus.add(entry.clone());
283        corpus.add(entry);
284        assert_eq!(corpus.entries.len(), 1);
285    }
286
287    #[test]
288    fn lineage_trace_roundtrips() {
289        let chrom = Chromosome::new(vec![("a".into(), "1".into())]);
290        let lineage = Lineage::genesis(0);
291        assert!(lineage.to_trace().contains("genesis"));
292
293        let cross = Lineage::crossover(&chrom, &chrom, "uniform", 1);
294        assert!(cross.to_trace().contains("crossover"));
295
296        let mutation = Lineage::mutation(&chrom, vec![], 2);
297        assert!(mutation.to_trace().contains("mutation"));
298    }
299
300    #[test]
301    fn empty_lineage_trace_is_serializable() {
302        let chrom = Chromosome::new(Vec::new());
303        let cross = Lineage::crossover(&chrom, &chrom, "single_point", 1);
304        let trace = cross.to_trace();
305        assert!(trace.contains("crossover"));
306        assert!(trace.contains("a={}"));
307        assert!(trace.contains("b={}"));
308    }
309
310    #[test]
311    fn payload_hash_is_order_sensitive() {
312        let chrom_a = Chromosome::new(vec![
313            ("encoding".into(), "UrlEncode".into()),
314            ("content_type".into(), "JsonNested".into()),
315        ]);
316        let chrom_b = Chromosome::new(vec![
317            ("content_type".into(), "JsonNested".into()),
318            ("encoding".into(), "UrlEncode".into()),
319        ]);
320        let a = BypassEntry::from_chromosome(&chrom_a, None);
321        let b = BypassEntry::from_chromosome(&chrom_b, None);
322        assert_ne!(a.payload_hash, b.payload_hash);
323    }
324}