Skip to main content

wafrift_evolution/evolution/
population.rs

1use crate::lineage::Lineage;
2use rand::Rng;
3use serde::{Deserialize, Serialize};
4use wafrift_types::pick::pick_ref_from_rng;
5
6/// A chromosome representing a combination of evasion techniques.
7#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub struct Chromosome {
9    /// Named technique genes: `(gene_name, gene_value)`.
10    pub genes: Vec<(String, String)>,
11    /// Fitness score (0.0 = always blocked, 1.0 = always passes).
12    pub fitness: f64,
13    /// Number of times this chromosome has been evaluated.
14    pub evaluations: u32,
15    /// Full lineage tree for replayability.
16    #[serde(default = "default_lineage")]
17    pub lineage: Lineage,
18}
19
20fn default_lineage() -> Lineage {
21    Lineage::genesis(0)
22}
23
24impl Chromosome {
25    /// Create a new chromosome with zero fitness and genesis lineage.
26    #[must_use]
27    pub fn new(genes: Vec<(String, String)>) -> Self {
28        Self {
29            genes,
30            fitness: 0.0,
31            evaluations: 0,
32            lineage: Lineage::genesis(0),
33        }
34    }
35
36    /// Create a new chromosome with explicit lineage.
37    #[must_use]
38    pub fn with_lineage(genes: Vec<(String, String)>, lineage: Lineage) -> Self {
39        Self {
40            genes,
41            fitness: 0.0,
42            evaluations: 0,
43            lineage,
44        }
45    }
46
47    /// Record an evaluation result using a rich oracle verdict.
48    pub fn record_verdict(&mut self, verdict: &crate::types::OracleVerdict) {
49        self.evaluations += 1;
50        let value = verdict.to_fitness();
51        let alpha = 2.0 / (f64::from(self.evaluations) + 1.0);
52        self.fitness = alpha * value + (1.0 - alpha) * self.fitness;
53    }
54
55    /// Legacy record for backward compatibility.
56    pub fn record(&mut self, passed: bool) {
57        self.record_verdict(&crate::types::OracleVerdict::from_bool(passed));
58    }
59
60    /// Get a specific gene's value by name.
61    #[must_use]
62    pub fn gene(&self, name: &str) -> Option<&str> {
63        self.genes
64            .iter()
65            .find(|(gene_name, _)| gene_name == name)
66            .map(|(_, value)| value.as_str())
67    }
68
69    /// Check if this chromosome has a specific gene.
70    #[must_use]
71    pub fn has_gene(&self, name: &str) -> bool {
72        self.genes.iter().any(|(gene_name, _)| gene_name == name)
73    }
74
75    /// Count genes that actively apply an evasion technique.
76    #[must_use]
77    pub fn active_gene_count(&self) -> usize {
78        self.genes
79            .iter()
80            .filter(|(_, value)| value != "None")
81            .count()
82    }
83
84    /// Compute a hash of this chromosome for deduplication.
85    ///
86    /// R48 pass-10 I4 (CLAUDE.md §15 AUDIT): pre-fix used
87    /// `DefaultHasher` which is non-collision-resistant — an adversary
88    /// controlling gene names (e.g. via crafted `--technique` flags or
89    /// `.wafrift.toml`) could engineer collisions to silently dedupe
90    /// distinct bypass discoveries from the corpus or mark live
91    /// candidates as already-visited in tabu search. Switched to
92    /// SHA-256, truncated to u64 — matches the lineage.rs::BypassEntry
93    /// collision-fix that the rest of the crate already adopted.
94    #[must_use]
95    pub fn hash(&self) -> u64 {
96        use sha2::{Digest, Sha256};
97        let mut hasher = Sha256::new();
98        for (name, value) in &self.genes {
99            hasher.update((name.len() as u64).to_le_bytes());
100            hasher.update(name.as_bytes());
101            hasher.update((value.len() as u64).to_le_bytes());
102            hasher.update(value.as_bytes());
103        }
104        let digest = hasher.finalize();
105        let mut out = [0u8; 8];
106        out.copy_from_slice(&digest[..8]);
107        u64::from_le_bytes(out)
108    }
109}
110
111/// Gene pool: the possible values for each gene type.
112#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct GenePool {
114    /// Available gene types and their possible values.
115    pub pools: Vec<(String, Vec<String>)>,
116}
117
118impl GenePool {
119    /// Create a gene pool with WAF Rift's built-in technique space.
120    #[must_use]
121    pub fn default_wafrift() -> Self {
122        Self {
123            pools: vec![
124                (
125                    "encoding".into(),
126                    vec![
127                        "None".into(),
128                        "CaseAlternation".into(),
129                        "UrlEncode".into(),
130                        "DoubleUrlEncode".into(),
131                        "TripleUrlEncode".into(),
132                        "UnicodeEncode".into(),
133                        "HtmlEntityEncode".into(),
134                        "OverlongUtf8".into(),
135                        "WhitespaceInsertion".into(),
136                        "SqlCommentInsertion".into(),
137                        "NullByteInsertion".into(),
138                        "ChunkedSplit".into(),
139                        "ParameterPollution".into(),
140                    ],
141                ),
142                (
143                    "content_type".into(),
144                    vec![
145                        "None".into(),
146                        "Multipart".into(),
147                        "MultipartQuotedBoundary".into(),
148                        "JsonNested".into(),
149                        "JsonUnicodeKeys".into(),
150                        "JsonWithComments".into(),
151                        "XmlCdata".into(),
152                        "XmlNamespace".into(),
153                        "MixedContentType".into(),
154                    ],
155                ),
156                (
157                    "header_obfuscation".into(),
158                    vec![
159                        "None".into(),
160                        "CaseMixing".into(),
161                        "TabSeparator".into(),
162                        "WhitespacePadding".into(),
163                        "LineFolding".into(),
164                        "UnderscoreSubstitution".into(),
165                    ],
166                ),
167                (
168                    "grammar_rule".into(),
169                    vec![
170                        "None".into(),
171                        "tautology_swap".into(),
172                        "comment_swap".into(),
173                        "whitespace_swap".into(),
174                        "equality_swap".into(),
175                        "union_swap".into(),
176                        "string_split".into(),
177                        "mysql_conditional".into(),
178                        "tag_event_swap".into(),
179                        "exec_fn_swap".into(),
180                        "uri_scheme".into(),
181                        "separator_swap".into(),
182                        "command_obfuscate".into(),
183                        "ifs_swap".into(),
184                        "path_obfuscate".into(),
185                        "variable_indirection".into(),
186                    ],
187                ),
188            ],
189        }
190    }
191
192    /// Get the possible values for a gene type.
193    #[must_use]
194    pub fn values_for(&self, gene_name: &str) -> Option<&[String]> {
195        self.pools
196            .iter()
197            .find(|(name, _)| name == gene_name)
198            .map(|(_, values)| values.as_slice())
199    }
200
201    /// Get all gene type names.
202    #[must_use]
203    pub fn gene_names(&self) -> Vec<&str> {
204        self.pools.iter().map(|(name, _)| name.as_str()).collect()
205    }
206
207    /// Pick a random value for a gene type using the provided RNG.
208    #[must_use]
209    pub fn random_value(&self, gene_name: &str, rng: &mut impl Rng) -> Option<String> {
210        let values = self.values_for(gene_name)?;
211        pick_ref_from_rng(values, rng).cloned()
212    }
213
214    /// Return all unique values across all gene pools.
215    #[must_use]
216    pub fn all_values(&self) -> Vec<String> {
217        let mut values = Vec::new();
218        for (_, pool_values) in &self.pools {
219            for v in pool_values {
220                if !values.contains(v) {
221                    values.push(v.clone());
222                }
223            }
224        }
225        values
226    }
227}
228
229/// Generate a random chromosome from the gene pool.
230#[must_use]
231pub fn random_chromosome(gene_pool: &GenePool, rng: &mut impl Rng) -> Chromosome {
232    let genes = gene_pool
233        .gene_names()
234        .into_iter()
235        .map(|name| {
236            let value = gene_pool
237                .random_value(name, rng)
238                .unwrap_or_else(|| String::from("None"));
239            (name.to_string(), value)
240        })
241        .collect();
242    Chromosome::new(genes)
243}
244
245/// Generate a baseline chromosome with all genes set to "None".
246#[must_use]
247pub fn baseline_chromosome(gene_pool: &GenePool) -> Chromosome {
248    let genes = gene_pool
249        .gene_names()
250        .into_iter()
251        .map(|name| (name.to_string(), String::from("None")))
252        .collect();
253    Chromosome::new(genes)
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use rand::SeedableRng;
260    use rand::rngs::StdRng;
261
262    #[test]
263    fn chromosome_new_zero_fitness() {
264        let c = Chromosome::new(vec![("a".into(), "1".into())]);
265        assert_eq!(c.fitness, 0.0);
266        assert_eq!(c.evaluations, 0);
267    }
268
269    #[test]
270    fn chromosome_record_updates_fitness() {
271        let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
272        c.record(true);
273        assert_eq!(c.evaluations, 1);
274        assert!(c.fitness > 0.0);
275    }
276
277    #[test]
278    fn chromosome_record_verdict_smoothing() {
279        let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
280        c.record_verdict(&crate::types::OracleVerdict::from_bool(true));
281        let f1 = c.fitness;
282        c.record_verdict(&crate::types::OracleVerdict::from_bool(false));
283        assert!(c.fitness < f1);
284    }
285
286    #[test]
287    fn chromosome_gene_lookup() {
288        let c = Chromosome::new(vec![
289            ("encoding".into(), "UrlEncode".into()),
290            ("content_type".into(), "None".into()),
291        ]);
292        assert_eq!(c.gene("encoding"), Some("UrlEncode"));
293        assert_eq!(c.gene("missing"), None);
294    }
295
296    #[test]
297    fn chromosome_has_gene() {
298        let c = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
299        assert!(c.has_gene("encoding"));
300        assert!(!c.has_gene("missing"));
301    }
302
303    #[test]
304    fn chromosome_active_gene_count_skips_none() {
305        let c = Chromosome::new(vec![
306            ("a".into(), "None".into()),
307            ("b".into(), "1".into()),
308            ("c".into(), "None".into()),
309            ("d".into(), "2".into()),
310        ]);
311        assert_eq!(c.active_gene_count(), 2);
312    }
313
314    #[test]
315    fn chromosome_hash_equal_for_equal_genes() {
316        let c1 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
317        let c2 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
318        assert_eq!(c1.hash(), c2.hash());
319    }
320
321    #[test]
322    fn chromosome_hash_different_for_different_genes() {
323        let c1 = Chromosome::new(vec![("a".into(), "1".into())]);
324        let c2 = Chromosome::new(vec![("a".into(), "2".into())]);
325        assert_ne!(c1.hash(), c2.hash());
326    }
327
328    #[test]
329    fn gene_pool_default_has_encoding() {
330        let pool = GenePool::default_wafrift();
331        assert!(pool.values_for("encoding").is_some());
332        assert!(pool.values_for("content_type").is_some());
333        assert!(pool.values_for("header_obfuscation").is_some());
334        assert!(pool.values_for("grammar_rule").is_some());
335    }
336
337    #[test]
338    fn gene_pool_gene_names() {
339        let pool = GenePool::default_wafrift();
340        let names = pool.gene_names();
341        assert_eq!(names.len(), 4);
342    }
343
344    #[test]
345    fn gene_pool_random_value_returns_some() {
346        let pool = GenePool::default_wafrift();
347        let mut rng = StdRng::seed_from_u64(42);
348        assert!(pool.random_value("encoding", &mut rng).is_some());
349    }
350
351    #[test]
352    fn gene_pool_random_value_missing_returns_none() {
353        let pool = GenePool::default_wafrift();
354        let mut rng = StdRng::seed_from_u64(42);
355        assert!(pool.random_value("missing", &mut rng).is_none());
356    }
357
358    #[test]
359    fn gene_pool_all_values_unique() {
360        let pool = GenePool::default_wafrift();
361        let values = pool.all_values();
362        let unique: std::collections::HashSet<_> = values.iter().collect();
363        assert_eq!(values.len(), unique.len());
364    }
365
366    #[test]
367    fn baseline_chromosome_all_none() {
368        let pool = GenePool::default_wafrift();
369        let c = baseline_chromosome(&pool);
370        for (_, value) in &c.genes {
371            assert_eq!(value, "None");
372        }
373        assert_eq!(c.genes.len(), pool.gene_names().len());
374    }
375
376    #[test]
377    fn random_chromosome_has_all_genes() {
378        let pool = GenePool::default_wafrift();
379        let mut rng = StdRng::seed_from_u64(42);
380        let c = random_chromosome(&pool, &mut rng);
381        assert_eq!(c.genes.len(), pool.gene_names().len());
382    }
383}