Skip to main content

wafrift_evolution/evolution/
population.rs

1use crate::lineage::Lineage;
2use rand::Rng;
3use serde::{Deserialize, Serialize};
4
5/// A chromosome representing a combination of evasion techniques.
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
7pub struct Chromosome {
8    /// Named technique genes: `(gene_name, gene_value)`.
9    pub genes: Vec<(String, String)>,
10    /// Fitness score (0.0 = always blocked, 1.0 = always passes).
11    pub fitness: f64,
12    /// Number of times this chromosome has been evaluated.
13    pub evaluations: u32,
14    /// Full lineage tree for replayability.
15    #[serde(default = "default_lineage")]
16    pub lineage: Lineage,
17}
18
19fn default_lineage() -> Lineage {
20    Lineage::genesis(0)
21}
22
23impl Chromosome {
24    /// Create a new chromosome with zero fitness and genesis lineage.
25    #[must_use]
26    pub fn new(genes: Vec<(String, String)>) -> Self {
27        Self {
28            genes,
29            fitness: 0.0,
30            evaluations: 0,
31            lineage: Lineage::genesis(0),
32        }
33    }
34
35    /// Create a new chromosome with explicit lineage.
36    #[must_use]
37    pub fn with_lineage(genes: Vec<(String, String)>, lineage: Lineage) -> Self {
38        Self {
39            genes,
40            fitness: 0.0,
41            evaluations: 0,
42            lineage,
43        }
44    }
45
46    /// Record an evaluation result using a rich oracle verdict.
47    pub fn record_verdict(&mut self, verdict: &crate::types::OracleVerdict) {
48        self.evaluations += 1;
49        let value = verdict.to_fitness();
50        let alpha = 2.0 / (f64::from(self.evaluations) + 1.0);
51        self.fitness = alpha * value + (1.0 - alpha) * self.fitness;
52    }
53
54    /// Legacy record for backward compatibility.
55    pub fn record(&mut self, passed: bool) {
56        self.record_verdict(&crate::types::OracleVerdict::from_bool(passed));
57    }
58
59    /// Get a specific gene's value by name.
60    #[must_use]
61    pub fn gene(&self, name: &str) -> Option<&str> {
62        self.genes
63            .iter()
64            .find(|(gene_name, _)| gene_name == name)
65            .map(|(_, value)| value.as_str())
66    }
67
68    /// Check if this chromosome has a specific gene.
69    #[must_use]
70    pub fn has_gene(&self, name: &str) -> bool {
71        self.genes.iter().any(|(gene_name, _)| gene_name == name)
72    }
73
74    /// Count genes that actively apply an evasion technique.
75    #[must_use]
76    pub fn active_gene_count(&self) -> usize {
77        self.genes
78            .iter()
79            .filter(|(_, value)| value != "None")
80            .count()
81    }
82
83    /// Compute a hash of this chromosome for deduplication.
84    #[must_use]
85    pub fn hash(&self) -> u64 {
86        use std::collections::hash_map::DefaultHasher;
87        use std::hash::{Hash, Hasher};
88        let mut hasher = DefaultHasher::new();
89        for (name, value) in &self.genes {
90            name.hash(&mut hasher);
91            value.hash(&mut hasher);
92        }
93        hasher.finish()
94    }
95}
96
97/// Gene pool: the possible values for each gene type.
98#[derive(Debug, Clone, Serialize, Deserialize)]
99pub struct GenePool {
100    /// Available gene types and their possible values.
101    pub pools: Vec<(String, Vec<String>)>,
102}
103
104impl GenePool {
105    /// Create a gene pool with WAF Rift's built-in technique space.
106    #[must_use]
107    pub fn default_wafrift() -> Self {
108        Self {
109            pools: vec![
110                (
111                    "encoding".into(),
112                    vec![
113                        "None".into(),
114                        "CaseAlternation".into(),
115                        "UrlEncode".into(),
116                        "DoubleUrlEncode".into(),
117                        "TripleUrlEncode".into(),
118                        "UnicodeEncode".into(),
119                        "HtmlEntityEncode".into(),
120                        "OverlongUtf8".into(),
121                        "WhitespaceInsertion".into(),
122                        "SqlCommentInsertion".into(),
123                        "NullByteInsertion".into(),
124                        "ChunkedSplit".into(),
125                        "ParameterPollution".into(),
126                    ],
127                ),
128                (
129                    "content_type".into(),
130                    vec![
131                        "None".into(),
132                        "Multipart".into(),
133                        "MultipartQuotedBoundary".into(),
134                        "JsonNested".into(),
135                        "JsonUnicodeKeys".into(),
136                        "JsonWithComments".into(),
137                        "XmlCdata".into(),
138                        "XmlNamespace".into(),
139                        "MixedContentType".into(),
140                    ],
141                ),
142                (
143                    "header_obfuscation".into(),
144                    vec![
145                        "None".into(),
146                        "CaseMixing".into(),
147                        "TabSeparator".into(),
148                        "WhitespacePadding".into(),
149                        "LineFolding".into(),
150                        "UnderscoreSubstitution".into(),
151                    ],
152                ),
153                (
154                    "grammar_rule".into(),
155                    vec![
156                        "None".into(),
157                        "tautology_swap".into(),
158                        "comment_swap".into(),
159                        "whitespace_swap".into(),
160                        "equality_swap".into(),
161                        "union_swap".into(),
162                        "string_split".into(),
163                        "mysql_conditional".into(),
164                        "tag_event_swap".into(),
165                        "exec_fn_swap".into(),
166                        "uri_scheme".into(),
167                        "separator_swap".into(),
168                        "command_obfuscate".into(),
169                        "ifs_swap".into(),
170                        "path_obfuscate".into(),
171                        "variable_indirection".into(),
172                    ],
173                ),
174            ],
175        }
176    }
177
178    /// Get the possible values for a gene type.
179    #[must_use]
180    pub fn values_for(&self, gene_name: &str) -> Option<&[String]> {
181        self.pools
182            .iter()
183            .find(|(name, _)| name == gene_name)
184            .map(|(_, values)| values.as_slice())
185    }
186
187    /// Get all gene type names.
188    #[must_use]
189    pub fn gene_names(&self) -> Vec<&str> {
190        self.pools.iter().map(|(name, _)| name.as_str()).collect()
191    }
192
193    /// Pick a random value for a gene type using the provided RNG.
194    #[must_use]
195    pub fn random_value(&self, gene_name: &str, rng: &mut impl Rng) -> Option<String> {
196        let values = self.values_for(gene_name)?;
197        if values.is_empty() {
198            return None;
199        }
200        Some(values[rng.gen_range(0..values.len())].clone())
201    }
202
203    /// Return all unique values across all gene pools.
204    #[must_use]
205    pub fn all_values(&self) -> Vec<String> {
206        let mut values = Vec::new();
207        for (_, pool_values) in &self.pools {
208            for v in pool_values {
209                if !values.contains(v) {
210                    values.push(v.clone());
211                }
212            }
213        }
214        values
215    }
216}
217
218/// Generate a random chromosome from the gene pool.
219#[must_use]
220pub fn random_chromosome(gene_pool: &GenePool, rng: &mut impl Rng) -> Chromosome {
221    let genes = gene_pool
222        .gene_names()
223        .into_iter()
224        .map(|name| {
225            let value = gene_pool
226                .random_value(name, rng)
227                .unwrap_or_else(|| String::from("None"));
228            (name.to_string(), value)
229        })
230        .collect();
231    Chromosome::new(genes)
232}
233
234/// Generate a baseline chromosome with all genes set to "None".
235#[must_use]
236pub fn baseline_chromosome(gene_pool: &GenePool) -> Chromosome {
237    let genes = gene_pool
238        .gene_names()
239        .into_iter()
240        .map(|name| (name.to_string(), String::from("None")))
241        .collect();
242    Chromosome::new(genes)
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248    use rand::SeedableRng;
249    use rand::rngs::StdRng;
250
251    #[test]
252    fn chromosome_new_zero_fitness() {
253        let c = Chromosome::new(vec![("a".into(), "1".into())]);
254        assert_eq!(c.fitness, 0.0);
255        assert_eq!(c.evaluations, 0);
256    }
257
258    #[test]
259    fn chromosome_record_updates_fitness() {
260        let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
261        c.record(true);
262        assert_eq!(c.evaluations, 1);
263        assert!(c.fitness > 0.0);
264    }
265
266    #[test]
267    fn chromosome_record_verdict_smoothing() {
268        let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
269        c.record_verdict(&crate::types::OracleVerdict::from_bool(true));
270        let f1 = c.fitness;
271        c.record_verdict(&crate::types::OracleVerdict::from_bool(false));
272        assert!(c.fitness < f1);
273    }
274
275    #[test]
276    fn chromosome_gene_lookup() {
277        let c = Chromosome::new(vec![
278            ("encoding".into(), "UrlEncode".into()),
279            ("content_type".into(), "None".into()),
280        ]);
281        assert_eq!(c.gene("encoding"), Some("UrlEncode"));
282        assert_eq!(c.gene("missing"), None);
283    }
284
285    #[test]
286    fn chromosome_has_gene() {
287        let c = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
288        assert!(c.has_gene("encoding"));
289        assert!(!c.has_gene("missing"));
290    }
291
292    #[test]
293    fn chromosome_active_gene_count_skips_none() {
294        let c = Chromosome::new(vec![
295            ("a".into(), "None".into()),
296            ("b".into(), "1".into()),
297            ("c".into(), "None".into()),
298            ("d".into(), "2".into()),
299        ]);
300        assert_eq!(c.active_gene_count(), 2);
301    }
302
303    #[test]
304    fn chromosome_hash_equal_for_equal_genes() {
305        let c1 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
306        let c2 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
307        assert_eq!(c1.hash(), c2.hash());
308    }
309
310    #[test]
311    fn chromosome_hash_different_for_different_genes() {
312        let c1 = Chromosome::new(vec![("a".into(), "1".into())]);
313        let c2 = Chromosome::new(vec![("a".into(), "2".into())]);
314        assert_ne!(c1.hash(), c2.hash());
315    }
316
317    #[test]
318    fn gene_pool_default_has_encoding() {
319        let pool = GenePool::default_wafrift();
320        assert!(pool.values_for("encoding").is_some());
321        assert!(pool.values_for("content_type").is_some());
322        assert!(pool.values_for("header_obfuscation").is_some());
323        assert!(pool.values_for("grammar_rule").is_some());
324    }
325
326    #[test]
327    fn gene_pool_gene_names() {
328        let pool = GenePool::default_wafrift();
329        let names = pool.gene_names();
330        assert_eq!(names.len(), 4);
331    }
332
333    #[test]
334    fn gene_pool_random_value_returns_some() {
335        let pool = GenePool::default_wafrift();
336        let mut rng = StdRng::seed_from_u64(42);
337        assert!(pool.random_value("encoding", &mut rng).is_some());
338    }
339
340    #[test]
341    fn gene_pool_random_value_missing_returns_none() {
342        let pool = GenePool::default_wafrift();
343        let mut rng = StdRng::seed_from_u64(42);
344        assert!(pool.random_value("missing", &mut rng).is_none());
345    }
346
347    #[test]
348    fn gene_pool_all_values_unique() {
349        let pool = GenePool::default_wafrift();
350        let values = pool.all_values();
351        let unique: std::collections::HashSet<_> = values.iter().collect();
352        assert_eq!(values.len(), unique.len());
353    }
354
355    #[test]
356    fn baseline_chromosome_all_none() {
357        let pool = GenePool::default_wafrift();
358        let c = baseline_chromosome(&pool);
359        for (_, value) in &c.genes {
360            assert_eq!(value, "None");
361        }
362        assert_eq!(c.genes.len(), pool.gene_names().len());
363    }
364
365    #[test]
366    fn random_chromosome_has_all_genes() {
367        let pool = GenePool::default_wafrift();
368        let mut rng = StdRng::seed_from_u64(42);
369        let c = random_chromosome(&pool, &mut rng);
370        assert_eq!(c.genes.len(), pool.gene_names().len());
371    }
372}