1use crate::evolution::crossover::mutation::mutate_with_log;
2use crate::evolution::{Chromosome, GenePool, population::random_chromosome};
3use crate::lineage::Lineage;
4use crate::search::{EvalCandidate, SearchAlgorithm, comparable_fitness, fitness_cmp};
5use crate::types::{Budget, EvolutionError, OracleVerdict, SearchStats};
6use rand::Rng;
7use rand::rngs::StdRng;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use wafrift_types::pick::pick_from_rng;
11
12#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
14pub struct FeatureDescriptor {
15 pub encoding: String,
16 pub grammar: String,
17 pub content_type: String,
18}
19
20impl FeatureDescriptor {
21 #[must_use]
22 pub fn from_chromosome(chromosome: &Chromosome) -> Self {
23 Self {
24 encoding: chromosome.gene("encoding").unwrap_or("None").to_string(),
25 grammar: chromosome
26 .gene("grammar_rule")
27 .unwrap_or("None")
28 .to_string(),
29 content_type: chromosome
30 .gene("content_type")
31 .unwrap_or("None")
32 .to_string(),
33 }
34 }
35}
36
37#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct MapElites {
57 #[serde(with = "grid_as_pairs")]
58 grid: HashMap<FeatureDescriptor, Chromosome>,
59 gene_pool: GenePool,
60 generation: u32,
61 eval_counter: u64,
62 #[serde(skip)]
63 in_flight: HashMap<u64, Chromosome>,
64}
65
66mod grid_as_pairs {
75 use super::{Chromosome, FeatureDescriptor};
76 use serde::{Deserialize, Deserializer, Serialize, Serializer};
77 use std::collections::HashMap;
78
79 pub(super) fn serialize<S: Serializer>(
80 grid: &HashMap<FeatureDescriptor, Chromosome>,
81 serializer: S,
82 ) -> Result<S::Ok, S::Error> {
83 let pairs: Vec<(&FeatureDescriptor, &Chromosome)> = grid.iter().collect();
84 pairs.serialize(serializer)
85 }
86
87 pub(super) fn deserialize<'de, D: Deserializer<'de>>(
88 deserializer: D,
89 ) -> Result<HashMap<FeatureDescriptor, Chromosome>, D::Error> {
90 let pairs: Vec<(FeatureDescriptor, Chromosome)> = Vec::deserialize(deserializer)?;
91 Ok(pairs.into_iter().collect())
92 }
93}
94
95impl MapElites {
96 #[must_use]
97 pub fn new() -> Self {
98 Self {
99 grid: HashMap::new(),
100 gene_pool: GenePool::default_wafrift(),
101 generation: 0,
102 eval_counter: 0,
103 in_flight: HashMap::new(),
104 }
105 }
106
107 fn sample_parent(&self, rng: &mut StdRng) -> Option<Chromosome> {
108 if self.grid.is_empty() {
109 return None;
110 }
111 if rng.gen_bool(0.5) {
114 let values: Vec<&Chromosome> = self.grid.values().collect();
119 Some((*pick_from_rng(&values, values[0], rng)).clone())
120 } else {
121 let encoding = self
124 .gene_pool
125 .random_value("encoding", rng)
126 .unwrap_or_else(|| "None".into());
127 let grammar = self
128 .gene_pool
129 .random_value("grammar_rule", rng)
130 .unwrap_or_else(|| "None".into());
131 let content_type = self
132 .gene_pool
133 .random_value("content_type", rng)
134 .unwrap_or_else(|| "None".into());
135 let descriptor = FeatureDescriptor {
136 encoding,
137 grammar,
138 content_type,
139 };
140 if let Some(c) = self.grid.get(&descriptor) {
143 Some(c.clone())
144 } else {
145 let values: Vec<&Chromosome> = self.grid.values().collect();
146 Some((*pick_from_rng(&values, values[0], rng)).clone())
147 }
148 }
149 }
150
151 fn generate_individual(&self, rng: &mut StdRng) -> Chromosome {
152 match self.sample_parent(rng) {
153 Some(parent) => {
154 let mut child = parent.clone();
155 let log = mutate_with_log(&mut child, &self.gene_pool, 0.25, rng);
156 child.lineage = Lineage::mutation(&parent, log, self.generation);
157 child
158 }
159 None => random_chromosome(&self.gene_pool, rng),
160 }
161 }
162}
163
164impl Default for MapElites {
165 fn default() -> Self {
166 Self::new()
167 }
168}
169
170impl SearchAlgorithm for MapElites {
171 fn name(&self) -> &'static str {
172 "map_elites"
173 }
174
175 fn initialize(&mut self, population: Vec<Chromosome>, gene_pool: &GenePool, _rng: &mut StdRng) {
176 self.gene_pool = gene_pool.clone();
177 self.grid.clear();
178 self.in_flight.clear();
179 for chromosome in population {
180 let descriptor = FeatureDescriptor::from_chromosome(&chromosome);
181 self.grid.entry(descriptor).or_insert(chromosome);
185 }
186 }
187
188 fn request_evaluations(&mut self, n: usize, rng: &mut StdRng) -> Vec<EvalCandidate> {
189 let mut out = Vec::with_capacity(n);
190 for _ in 0..n {
191 self.eval_counter = self.eval_counter.saturating_add(1);
192 let candidate = self.generate_individual(rng);
193 self.in_flight.insert(self.eval_counter, candidate.clone());
194 out.push(EvalCandidate {
195 id: self.eval_counter,
196 chromosome: candidate,
197 });
198 }
199 out
200 }
201
202 fn submit_evaluations(&mut self, results: Vec<(u64, OracleVerdict)>) {
203 for (id, verdict) in results {
204 if let Some(mut candidate) = self.in_flight.remove(&id) {
205 candidate.record_verdict(&verdict);
206 let descriptor = FeatureDescriptor::from_chromosome(&candidate);
207 use std::collections::hash_map::Entry;
216 match self.grid.entry(descriptor) {
217 Entry::Vacant(e) => {
218 e.insert(candidate);
219 }
220 Entry::Occupied(mut e) => {
221 if comparable_fitness(candidate.fitness)
222 > comparable_fitness(e.get().fitness)
223 {
224 *e.get_mut() = candidate;
225 }
226 }
227 }
228 }
229 }
230 self.generation = self.generation.saturating_add(1);
231 }
232
233 fn should_terminate(&self, stats: &SearchStats, budget: &Budget) -> bool {
234 stats.evaluations >= budget.max_requests
235 || stats.generation >= budget.max_generations
236 || stats.stagnation_counter >= budget.stagnation_limit
237 }
238
239 fn best(&self) -> Option<&Chromosome> {
240 self.grid
246 .values()
247 .max_by(|a, b| fitness_cmp(a.fitness, b.fitness))
248 }
249
250 fn checkpoint(&self) -> Result<Vec<u8>, EvolutionError> {
251 serde_json::to_vec(self).map_err(EvolutionError::SerializationFailed)
252 }
253
254 fn restore(&mut self, bytes: &[u8]) -> Result<(), EvolutionError> {
255 if bytes.len() > crate::types::MAX_CHECKPOINT_BYTES {
256 return Err(EvolutionError::OversizedData {
257 context: "map_elites checkpoint restore".into(),
258 size: bytes.len(),
259 max: crate::types::MAX_CHECKPOINT_BYTES,
260 });
261 }
262 *self = serde_json::from_slice(bytes).map_err(EvolutionError::DeserializationFailed)?;
263 self.in_flight.clear();
264 Ok(())
265 }
266
267 fn population_snapshot(&self) -> Vec<Chromosome> {
270 self.grid.values().cloned().collect()
271 }
272
273 fn clone_box(&self) -> Box<dyn SearchAlgorithm> {
274 Box::new(self.clone())
275 }
276}
277
278#[cfg(test)]
279mod tests {
280 use super::*;
281 use rand::SeedableRng;
282
283 fn dummy_chromosome(encoding: &str, grammar: &str, content_type: &str) -> Chromosome {
284 Chromosome::new(vec![
285 ("encoding".into(), encoding.into()),
286 ("grammar_rule".into(), grammar.into()),
287 ("content_type".into(), content_type.into()),
288 ])
289 }
290
291 #[test]
292 fn initialize_populates_grid() {
293 let mut alg = MapElites::new();
294 let pool = GenePool::default_wafrift();
295 let mut rng = StdRng::seed_from_u64(1);
296 let pop = vec![
297 dummy_chromosome("UrlEncode", "sqli", "json"),
298 dummy_chromosome("CaseAlternation", "cmdi", "form"),
299 ];
300 alg.initialize(pop, &pool, &mut rng);
301 assert_eq!(alg.grid.len(), 2);
302 }
303
304 #[test]
305 fn request_evaluations_returns_unique_ids() {
306 let mut alg = MapElites::new();
307 let pool = GenePool::default_wafrift();
308 let mut rng = StdRng::seed_from_u64(2);
309 alg.initialize(
310 vec![dummy_chromosome("UrlEncode", "sqli", "json")],
311 &pool,
312 &mut rng,
313 );
314
315 let c1 = alg.request_evaluations(2, &mut rng);
316 let c2 = alg.request_evaluations(2, &mut rng);
317 let ids: Vec<_> = c1.iter().chain(c2.iter()).map(|c| c.id).collect();
318 let unique: std::collections::HashSet<_> = ids.iter().copied().collect();
319 assert_eq!(ids.len(), unique.len());
320 }
321
322 #[test]
323 fn submit_evaluation_inserts_into_grid() {
324 let mut alg = MapElites::new();
325 let pool = GenePool::default_wafrift();
326 let mut rng = StdRng::seed_from_u64(3);
327 alg.initialize(vec![], &pool, &mut rng);
328
329 let candidates = alg.request_evaluations(1, &mut rng);
330 let id = candidates[0].id;
331
332 alg.submit_evaluations(vec![(
333 id,
334 OracleVerdict {
335 passed: true,
336 status_delta: 1,
337 body_delta: 1,
338 latency_ms: 10,
339 confidence: 0.9,
340 triggered_rules: 0,
341 ..Default::default()
342 },
343 )]);
344
345 assert!(!alg.grid.is_empty());
346 assert!(alg.best().is_some());
347 assert!(alg.best().unwrap().fitness > 0.0);
348 }
349
350 #[test]
351 fn higher_fitness_replaces_existing_grid_cell() {
352 let mut alg = MapElites::new();
353 let pool = GenePool::default_wafrift();
354 let mut rng = StdRng::seed_from_u64(4);
355 let mut low = dummy_chromosome("UrlEncode", "sqli", "json");
356 low.fitness = 0.1;
357 alg.initialize(vec![low], &pool, &mut rng);
358
359 let mut high = dummy_chromosome("UrlEncode", "sqli", "json");
361 high.fitness = 0.9;
362 alg.in_flight.insert(42, high);
363 alg.submit_evaluations(vec![(
364 42,
365 OracleVerdict {
366 passed: true,
367 status_delta: 1,
368 body_delta: 1,
369 latency_ms: 10,
370 confidence: 0.9,
371 triggered_rules: 0,
372 ..Default::default()
373 },
374 )]);
375
376 assert!(alg.best().unwrap().fitness > 0.5);
377 }
378
379 #[test]
380 fn nan_poisoned_grid_cell_can_still_be_replaced() {
381 let mut alg = MapElites::new();
391 let pool = GenePool::default_wafrift();
392 let mut rng = StdRng::seed_from_u64(7);
393 let mut poisoned = dummy_chromosome("UrlEncode", "sqli", "json");
395 poisoned.fitness = f64::NAN;
396 alg.initialize(vec![poisoned], &pool, &mut rng);
397 assert_eq!(alg.grid.len(), 1);
398
399 let mut healer = dummy_chromosome("UrlEncode", "sqli", "json");
403 healer.fitness = 0.0; alg.in_flight.insert(99, healer);
405 alg.submit_evaluations(vec![(
406 99,
407 OracleVerdict {
408 passed: true,
409 status_delta: 0,
410 body_delta: 0,
411 latency_ms: 0,
412 confidence: 1.0,
413 triggered_rules: 0,
414 ..Default::default()
415 },
416 )]);
417
418 let cell_fitness = alg
422 .grid
423 .iter()
424 .find(|(d, _)| d.encoding == "UrlEncode")
425 .map(|(_, c)| c.fitness)
426 .expect("descriptor must still exist");
427 assert!(
428 cell_fitness.is_finite(),
429 "NaN cell must be evictable: got {cell_fitness}"
430 );
431 assert!(
432 cell_fitness > 0.0,
433 "healer must have replaced poisoned cell"
434 );
435 }
436
437 #[test]
438 fn checkpoint_roundtrip_clears_in_flight() {
439 let mut alg = MapElites::new();
440 let pool = GenePool::default_wafrift();
441 let mut rng = StdRng::seed_from_u64(5);
442 alg.initialize(
443 vec![dummy_chromosome("UrlEncode", "sqli", "json")],
444 &pool,
445 &mut rng,
446 );
447 let _ = alg.request_evaluations(3, &mut rng);
448 assert!(!alg.in_flight.is_empty());
449
450 let bytes = alg.checkpoint().expect("checkpoint must serialize");
451 let mut restored = MapElites::new();
452 restored.restore(&bytes).expect("restore must succeed");
453 assert!(restored.in_flight.is_empty());
454 assert_eq!(restored.grid.len(), alg.grid.len());
455 }
456
457 #[test]
458 fn should_terminate_respects_budget() {
459 let alg = MapElites::new();
460 let budget = Budget::default_wafrift();
461 let stats = SearchStats {
462 evaluations: budget.max_requests - 1,
463 ..SearchStats::default()
464 };
465 assert!(!alg.should_terminate(&stats, &budget));
466 let stats = SearchStats {
467 evaluations: budget.max_requests,
468 ..SearchStats::default()
469 };
470 assert!(alg.should_terminate(&stats, &budget));
471 }
472
473 #[test]
474 fn best_returns_none_for_empty_grid() {
475 let alg = MapElites::new();
476 assert!(alg.best().is_none());
477 }
478
479 #[test]
485 fn eval_counter_saturates_at_u64_max() {
486 let mut alg = MapElites::new();
487 let pool = GenePool::default_wafrift();
488 let mut rng = StdRng::seed_from_u64(99);
489 alg.initialize(
490 vec![dummy_chromosome("UrlEncode", "sqli", "json")],
491 &pool,
492 &mut rng,
493 );
494 alg.eval_counter = u64::MAX;
495 let _ = alg.request_evaluations(1, &mut rng);
497 assert_eq!(
498 alg.eval_counter,
499 u64::MAX,
500 "eval_counter must saturate at u64::MAX, not wrap to 0"
501 );
502 }
503
504 #[test]
506 fn generation_saturates_at_u32_max() {
507 let mut alg = MapElites::new();
508 let pool = GenePool::default_wafrift();
509 let mut rng = StdRng::seed_from_u64(100);
510 alg.initialize(vec![], &pool, &mut rng);
511 alg.generation = u32::MAX;
512 alg.submit_evaluations(vec![]);
514 assert_eq!(
515 alg.generation,
516 u32::MAX,
517 "generation must saturate at u32::MAX, not wrap to 0"
518 );
519 }
520
521 #[test]
525 fn eval_counter_is_strictly_increasing() {
526 let mut alg = MapElites::new();
527 let pool = GenePool::default_wafrift();
528 let mut rng = StdRng::seed_from_u64(101);
529 alg.initialize(
530 vec![dummy_chromosome("CaseAlternation", "xss", "json")],
531 &pool,
532 &mut rng,
533 );
534 let mut all_ids: Vec<u64> = Vec::new();
535 for _ in 0..5 {
536 let batch = alg.request_evaluations(3, &mut rng);
537 for c in &batch {
538 all_ids.push(c.id);
539 }
540 let verdicts: Vec<_> = batch
541 .into_iter()
542 .map(|c| (c.id, OracleVerdict::from_bool(false)))
543 .collect();
544 alg.submit_evaluations(verdicts);
545 }
546 let unique: std::collections::HashSet<_> = all_ids.iter().copied().collect();
547 assert_eq!(
548 unique.len(),
549 all_ids.len(),
550 "all eval_counter-derived IDs must be unique across generations"
551 );
552 }
553}