wafrift_evolution/evolution/
population.rs1use crate::lineage::Lineage;
2use rand::Rng;
3use serde::{Deserialize, Serialize};
4use wafrift_types::pick::pick_ref_from_rng;
5
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
8pub struct Chromosome {
9 pub genes: Vec<(String, String)>,
11 pub fitness: f64,
13 pub evaluations: u32,
15 #[serde(default = "default_lineage")]
17 pub lineage: Lineage,
18}
19
20fn default_lineage() -> Lineage {
21 Lineage::genesis(0)
22}
23
24impl Chromosome {
25 #[must_use]
27 pub fn new(genes: Vec<(String, String)>) -> Self {
28 Self {
29 genes,
30 fitness: 0.0,
31 evaluations: 0,
32 lineage: Lineage::genesis(0),
33 }
34 }
35
36 #[must_use]
38 pub fn with_lineage(genes: Vec<(String, String)>, lineage: Lineage) -> Self {
39 Self {
40 genes,
41 fitness: 0.0,
42 evaluations: 0,
43 lineage,
44 }
45 }
46
47 pub fn record_verdict(&mut self, verdict: &crate::types::OracleVerdict) {
49 self.evaluations += 1;
50 let value = verdict.to_fitness();
51 let alpha = 2.0 / (f64::from(self.evaluations) + 1.0);
52 self.fitness = alpha * value + (1.0 - alpha) * self.fitness;
53 }
54
55 pub fn record(&mut self, passed: bool) {
57 self.record_verdict(&crate::types::OracleVerdict::from_bool(passed));
58 }
59
60 #[must_use]
62 pub fn gene(&self, name: &str) -> Option<&str> {
63 self.genes
64 .iter()
65 .find(|(gene_name, _)| gene_name == name)
66 .map(|(_, value)| value.as_str())
67 }
68
69 #[must_use]
71 pub fn has_gene(&self, name: &str) -> bool {
72 self.genes.iter().any(|(gene_name, _)| gene_name == name)
73 }
74
75 #[must_use]
77 pub fn active_gene_count(&self) -> usize {
78 self.genes
79 .iter()
80 .filter(|(_, value)| value != "None")
81 .count()
82 }
83
84 #[must_use]
95 pub fn hash(&self) -> u64 {
96 use sha2::{Digest, Sha256};
97 let mut hasher = Sha256::new();
98 for (name, value) in &self.genes {
99 hasher.update((name.len() as u64).to_le_bytes());
100 hasher.update(name.as_bytes());
101 hasher.update((value.len() as u64).to_le_bytes());
102 hasher.update(value.as_bytes());
103 }
104 let digest = hasher.finalize();
105 let mut out = [0u8; 8];
106 out.copy_from_slice(&digest[..8]);
107 u64::from_le_bytes(out)
108 }
109}
110
111#[derive(Debug, Clone, Serialize, Deserialize)]
113pub struct GenePool {
114 pub pools: Vec<(String, Vec<String>)>,
116}
117
118impl GenePool {
119 #[must_use]
121 pub fn default_wafrift() -> Self {
122 Self {
123 pools: vec![
124 (
125 "encoding".into(),
126 vec![
127 "None".into(),
128 "CaseAlternation".into(),
129 "UrlEncode".into(),
130 "DoubleUrlEncode".into(),
131 "TripleUrlEncode".into(),
132 "UnicodeEncode".into(),
133 "HtmlEntityEncode".into(),
134 "OverlongUtf8".into(),
135 "WhitespaceInsertion".into(),
136 "SqlCommentInsertion".into(),
137 "NullByteInsertion".into(),
138 "ChunkedSplit".into(),
139 "ParameterPollution".into(),
140 ],
141 ),
142 (
143 "content_type".into(),
144 vec![
145 "None".into(),
146 "Multipart".into(),
147 "MultipartQuotedBoundary".into(),
148 "JsonNested".into(),
149 "JsonUnicodeKeys".into(),
150 "JsonWithComments".into(),
151 "XmlCdata".into(),
152 "XmlNamespace".into(),
153 "MixedContentType".into(),
154 ],
155 ),
156 (
157 "header_obfuscation".into(),
158 vec![
159 "None".into(),
160 "CaseMixing".into(),
161 "TabSeparator".into(),
162 "WhitespacePadding".into(),
163 "LineFolding".into(),
164 "UnderscoreSubstitution".into(),
165 ],
166 ),
167 (
168 "grammar_rule".into(),
169 vec![
170 "None".into(),
171 "tautology_swap".into(),
172 "comment_swap".into(),
173 "whitespace_swap".into(),
174 "equality_swap".into(),
175 "union_swap".into(),
176 "string_split".into(),
177 "mysql_conditional".into(),
178 "tag_event_swap".into(),
179 "exec_fn_swap".into(),
180 "uri_scheme".into(),
181 "separator_swap".into(),
182 "command_obfuscate".into(),
183 "ifs_swap".into(),
184 "path_obfuscate".into(),
185 "variable_indirection".into(),
186 ],
187 ),
188 ],
189 }
190 }
191
192 #[must_use]
194 pub fn values_for(&self, gene_name: &str) -> Option<&[String]> {
195 self.pools
196 .iter()
197 .find(|(name, _)| name == gene_name)
198 .map(|(_, values)| values.as_slice())
199 }
200
201 #[must_use]
203 pub fn gene_names(&self) -> Vec<&str> {
204 self.pools.iter().map(|(name, _)| name.as_str()).collect()
205 }
206
207 #[must_use]
209 pub fn random_value(&self, gene_name: &str, rng: &mut impl Rng) -> Option<String> {
210 let values = self.values_for(gene_name)?;
211 pick_ref_from_rng(values, rng).cloned()
212 }
213
214 #[must_use]
216 pub fn all_values(&self) -> Vec<String> {
217 let mut values = Vec::new();
218 for (_, pool_values) in &self.pools {
219 for v in pool_values {
220 if !values.contains(v) {
221 values.push(v.clone());
222 }
223 }
224 }
225 values
226 }
227}
228
229#[must_use]
231pub fn random_chromosome(gene_pool: &GenePool, rng: &mut impl Rng) -> Chromosome {
232 let genes = gene_pool
233 .gene_names()
234 .into_iter()
235 .map(|name| {
236 let value = gene_pool
237 .random_value(name, rng)
238 .unwrap_or_else(|| String::from("None"));
239 (name.to_string(), value)
240 })
241 .collect();
242 Chromosome::new(genes)
243}
244
245#[must_use]
247pub fn baseline_chromosome(gene_pool: &GenePool) -> Chromosome {
248 let genes = gene_pool
249 .gene_names()
250 .into_iter()
251 .map(|name| (name.to_string(), String::from("None")))
252 .collect();
253 Chromosome::new(genes)
254}
255
256#[cfg(test)]
257mod tests {
258 use super::*;
259 use rand::SeedableRng;
260 use rand::rngs::StdRng;
261
262 #[test]
263 fn chromosome_new_zero_fitness() {
264 let c = Chromosome::new(vec![("a".into(), "1".into())]);
265 assert_eq!(c.fitness, 0.0);
266 assert_eq!(c.evaluations, 0);
267 }
268
269 #[test]
270 fn chromosome_record_updates_fitness() {
271 let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
272 c.record(true);
273 assert_eq!(c.evaluations, 1);
274 assert!(c.fitness > 0.0);
275 }
276
277 #[test]
278 fn chromosome_record_verdict_smoothing() {
279 let mut c = Chromosome::new(vec![("a".into(), "1".into())]);
280 c.record_verdict(&crate::types::OracleVerdict::from_bool(true));
281 let f1 = c.fitness;
282 c.record_verdict(&crate::types::OracleVerdict::from_bool(false));
283 assert!(c.fitness < f1);
284 }
285
286 #[test]
287 fn chromosome_gene_lookup() {
288 let c = Chromosome::new(vec![
289 ("encoding".into(), "UrlEncode".into()),
290 ("content_type".into(), "None".into()),
291 ]);
292 assert_eq!(c.gene("encoding"), Some("UrlEncode"));
293 assert_eq!(c.gene("missing"), None);
294 }
295
296 #[test]
297 fn chromosome_has_gene() {
298 let c = Chromosome::new(vec![("encoding".into(), "UrlEncode".into())]);
299 assert!(c.has_gene("encoding"));
300 assert!(!c.has_gene("missing"));
301 }
302
303 #[test]
304 fn chromosome_active_gene_count_skips_none() {
305 let c = Chromosome::new(vec![
306 ("a".into(), "None".into()),
307 ("b".into(), "1".into()),
308 ("c".into(), "None".into()),
309 ("d".into(), "2".into()),
310 ]);
311 assert_eq!(c.active_gene_count(), 2);
312 }
313
314 #[test]
315 fn chromosome_hash_equal_for_equal_genes() {
316 let c1 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
317 let c2 = Chromosome::new(vec![("a".into(), "1".into()), ("b".into(), "2".into())]);
318 assert_eq!(c1.hash(), c2.hash());
319 }
320
321 #[test]
322 fn chromosome_hash_different_for_different_genes() {
323 let c1 = Chromosome::new(vec![("a".into(), "1".into())]);
324 let c2 = Chromosome::new(vec![("a".into(), "2".into())]);
325 assert_ne!(c1.hash(), c2.hash());
326 }
327
328 #[test]
329 fn gene_pool_default_has_encoding() {
330 let pool = GenePool::default_wafrift();
331 assert!(pool.values_for("encoding").is_some());
332 assert!(pool.values_for("content_type").is_some());
333 assert!(pool.values_for("header_obfuscation").is_some());
334 assert!(pool.values_for("grammar_rule").is_some());
335 }
336
337 #[test]
338 fn gene_pool_gene_names() {
339 let pool = GenePool::default_wafrift();
340 let names = pool.gene_names();
341 assert_eq!(names.len(), 4);
342 }
343
344 #[test]
345 fn gene_pool_random_value_returns_some() {
346 let pool = GenePool::default_wafrift();
347 let mut rng = StdRng::seed_from_u64(42);
348 assert!(pool.random_value("encoding", &mut rng).is_some());
349 }
350
351 #[test]
352 fn gene_pool_random_value_missing_returns_none() {
353 let pool = GenePool::default_wafrift();
354 let mut rng = StdRng::seed_from_u64(42);
355 assert!(pool.random_value("missing", &mut rng).is_none());
356 }
357
358 #[test]
359 fn gene_pool_all_values_unique() {
360 let pool = GenePool::default_wafrift();
361 let values = pool.all_values();
362 let unique: std::collections::HashSet<_> = values.iter().collect();
363 assert_eq!(values.len(), unique.len());
364 }
365
366 #[test]
367 fn baseline_chromosome_all_none() {
368 let pool = GenePool::default_wafrift();
369 let c = baseline_chromosome(&pool);
370 for (_, value) in &c.genes {
371 assert_eq!(value, "None");
372 }
373 assert_eq!(c.genes.len(), pool.gene_names().len());
374 }
375
376 #[test]
377 fn random_chromosome_has_all_genes() {
378 let pool = GenePool::default_wafrift();
379 let mut rng = StdRng::seed_from_u64(42);
380 let c = random_chromosome(&pool, &mut rng);
381 assert_eq!(c.genes.len(), pool.gene_names().len());
382 }
383}