wafrift_evolution/evolution/engine.rs
1use crate::coverage_feedback::{RuleCoverage, map_elites_descriptor};
2use crate::evolution::fitness::{evolutionary_fitness, update_gene_stats};
3use crate::evolution::{
4 Chromosome, GenePool,
5 population::{baseline_chromosome, random_chromosome},
6};
7use crate::lineage::{BypassCorpus, BypassEntry};
8use crate::search::SearchAlgorithm;
9use crate::types::{
10 Budget, EvolutionError, OracleVerdict, SearchStats, TargetHealthMonitor, load_checkpoint,
11 save_checkpoint,
12};
13use lru::LruCache;
14use rand::{SeedableRng, rngs::StdRng};
15use std::collections::{HashMap, VecDeque};
16use std::num::NonZeroUsize;
17use std::path::{Path, PathBuf};
18use std::time::Instant;
19use wafrift_wafmodel::booster::WafBoosterScorer;
20
21/// The evolutionary engine that maintains a population and evolves it.
22#[derive(Debug)]
23pub struct EvolutionEngine {
24 /// Search algorithm implementation.
25 pub(crate) algorithm: Box<dyn SearchAlgorithm>,
26 /// Gene pool for creating/mutating chromosomes.
27 pub gene_pool: GenePool,
28 /// Seeded random number generator.
29 pub rng: StdRng,
30 /// Payload→verdict LRU cache.
31 pub cache: LruCache<String, OracleVerdict>,
32 /// Hard budget limits.
33 pub budget: Budget,
34 /// Candidates currently being evaluated:
35 /// `engine_eval_id` → (`algorithm_candidate_id`, Chromosome, `sent_at`).
36 ///
37 /// `algorithm_candidate_id` is the ID the *search algorithm*
38 /// originally minted in `request_evaluations` and the same ID it
39 /// expects to see back in `submit_evaluations`. Population-based
40 /// algorithms (`MapElites`, `NoveltySearch`) keep their own private
41 /// `in_flight` keyed by that ID — if we forwarded the engine's
42 /// `eval_id` instead, their lookup misses and the evaluation is
43 /// silently dropped (the grid / archive never gets updated).
44 pub in_flight: HashMap<u64, (u64, Chromosome, Instant)>,
45 /// Search statistics.
46 pub stats: SearchStats,
47 /// Target health monitor.
48 pub target_health: TargetHealthMonitor,
49 /// Optional path for automatic checkpointing.
50 pub checkpoint_path: Option<PathBuf>,
51 /// Total oracle requests issued.
52 pub request_count: usize,
53 /// Per-gene success tracking: `(gene_name, gene_value, successes, attempts)`.
54 pub gene_stats: Vec<(String, String, u32, u32)>,
55 /// Fitness history: average fitness per generation (sliding window).
56 pub fitness_history: VecDeque<f64>,
57 /// Number of consecutive generations with no improvement.
58 pub stagnation_counter: u32,
59 /// Saved bypass corpus.
60 pub corpus: BypassCorpus,
61 /// WAF rule-coverage accumulator. Tracks (payload_class × rule_id) cells
62 /// observed during the run. Populated by `submit_batch` when the oracle
63 /// result carries a `rule_id` (via `OracleVerdict::rule_id`).
64 pub rule_coverage: RuleCoverage,
65 /// WAFBooster importance scorer. Updated on every oracle result and used
66 /// to re-rank mutation candidates so pass-likely payloads are tried first.
67 pub booster: WafBoosterScorer,
68 /// When `true`, the booster is disabled and candidate selection falls back
69 /// to the underlying algorithm's FIFO/UCB1 ordering.
70 pub no_booster: bool,
71 /// Evaluations this generation.
72 generation_evals: usize,
73 /// Next candidate ID.
74 next_id: u64,
75 /// Pending single candidate for legacy sequential API.
76 pending_single: Option<(usize, Chromosome)>,
77 /// Remaining rounds of elevated exploration weight following a
78 /// change-point alarm (C-11). When > 0, `on_change_point` has
79 /// signalled that a WAF rule update was detected and the engine
80 /// should explore more aggressively (higher UCB1 exploration bias,
81 /// stagnation counter reset). Decrements by 1 each call to `evolve`.
82 pub exploration_boost_remaining: u32,
83 /// Exploration-weight multiplier applied while `exploration_boost_remaining > 0`.
84 /// A value of 2.0 doubles the effective UCB1 exploration constant.
85 /// Reset to 1.0 when the boost expires. Default 1.0 (no boost).
86 pub exploration_boost_factor: f64,
87}
88
89impl Clone for EvolutionEngine {
90 fn clone(&self) -> Self {
91 // Algorithm state is duplicated via the trait's `clone_box`
92 // method, which all in-tree algorithms override with a direct
93 // `Box::new(self.clone())` — no serde_json round-trip.
94 // The previous checkpoint/restore path was 10-100× slower
95 // on populated MapElites grids and was the original "clone
96 // spike on the proxy hot path" blocker (see #113).
97 Self {
98 algorithm: self.algorithm.clone_box(),
99 gene_pool: self.gene_pool.clone(),
100 rng: self.rng.clone(),
101 // The LRU cache deliberately does not survive cloning —
102 // each cloned engine gets a fresh same-capacity cache.
103 // Sharing the cache across clones is what `SharedEngine`
104 // is for (Arc<RwLock<EvolutionEngine>>); deep-cloning the
105 // cache itself would just balloon allocation.
106 cache: LruCache::new(self.cache.cap()),
107 budget: self.budget,
108 // Mid-flight evaluations belong to the caller, not the
109 // clone — drop them.
110 in_flight: HashMap::new(),
111 stats: self.stats,
112 target_health: self.target_health.clone(),
113 checkpoint_path: self.checkpoint_path.clone(),
114 request_count: self.request_count,
115 gene_stats: self.gene_stats.clone(),
116 fitness_history: self.fitness_history.clone(),
117 stagnation_counter: self.stagnation_counter,
118 corpus: self.corpus.clone(),
119 rule_coverage: self.rule_coverage.clone(),
120 booster: self.booster.clone(),
121 no_booster: self.no_booster,
122 generation_evals: self.generation_evals,
123 next_id: self.next_id,
124 pending_single: None,
125 exploration_boost_remaining: self.exploration_boost_remaining,
126 exploration_boost_factor: self.exploration_boost_factor,
127 }
128 }
129}
130
131/// Shared engine pointer — what the proxy and any future
132/// shared-state worker pool should hold.
133///
134/// Use this instead of `Clone` whenever multiple async tasks need
135/// access to the same engine's cache + corpus + `gene_stats`. Cloning
136/// the `Arc` is O(1); cloning the engine itself is O(grid + archive +
137/// `gene_stats`) and produces an *independent* engine with a fresh
138/// (empty) cache.
139///
140/// Locking discipline:
141/// - hot read paths (cache hits, `diversity_score`, `best()`) → `read()`
142/// - mutation paths (`submit_evaluations`, `gene_stats` updates,
143/// checkpoint persistence) → `write()`
144/// - never hold the write lock across an `await` that performs network
145/// I/O — drop it before the await, re-acquire after
146pub type SharedEngine = std::sync::Arc<tokio::sync::RwLock<EvolutionEngine>>;
147
148impl EvolutionEngine {
149 /// Move this engine behind the canonical [`SharedEngine`] pointer.
150 ///
151 /// Equivalent to `Arc::new(RwLock::new(self))` — exists so the
152 /// shared-access pattern is discoverable on the type itself
153 /// rather than buried in module-level docs.
154 #[must_use]
155 pub fn into_shared(self) -> SharedEngine {
156 std::sync::Arc::new(tokio::sync::RwLock::new(self))
157 }
158}
159
160impl EvolutionEngine {
161 /// Create a new engine with the given algorithm and population size.
162 #[must_use]
163 pub fn new(population_size: usize) -> Self {
164 Self::new_seeded(population_size, 0)
165 }
166
167 /// Create a new engine with a seeded RNG.
168 /// `population_size` is clamped to the inclusive range `[1, 10_000]`:
169 /// 0 would leave the selection helpers (tournament/roulette) with
170 /// nothing to index — a contract violation that used to panic.
171 /// `10_000` caps memory at construction so a misconfigured caller
172 /// can't OOM the process by passing `usize::MAX`.
173 #[must_use]
174 pub fn new_seeded(population_size: usize, seed: u64) -> Self {
175 let population_size = population_size.clamp(1, 10_000);
176 let gene_pool = GenePool::default_wafrift();
177 let mut rng = StdRng::seed_from_u64(seed);
178 let mut population: Vec<Chromosome> = (0..population_size)
179 .map(|_| random_chromosome(&gene_pool, &mut rng))
180 .collect();
181 if population_size > 0 {
182 population[0] = baseline_chromosome(&gene_pool);
183 }
184
185 // Pre-fix this constructor double-initialised the algorithm:
186 // built `population` with a cloned RNG, called `algorithm.
187 // initialize(population, ..., &mut engine.rng.clone())`, then
188 // re-generated `population2` with the engine's now-moved RNG
189 // and called `initialize` again. Because `with_algorithm`
190 // doesn't advance the RNG, `population` and `population2`
191 // were IDENTICAL chromosomes — and every `initialize` impl
192 // is last-call-wins (HillClimbing overwrites current/best,
193 // MapElites .clear()s the grid, NoveltySearch overwrites
194 // self.population). Net effect: 2× chromosome generation +
195 // 2× initialize calls for the same final state. Fixed by
196 // single-shot init using the engine's owned RNG.
197 let mut engine = Self::with_algorithm("hill_climbing", gene_pool, rng, Budget::default())
198 .expect("hill_climbing is built-in");
199 engine
200 .algorithm
201 .initialize(population, &engine.gene_pool, &mut engine.rng);
202 engine
203 }
204
205 /// Create an engine with a specific algorithm by name.
206 pub fn with_algorithm(
207 algorithm_name: &str,
208 gene_pool: GenePool,
209 rng: StdRng,
210 budget: Budget,
211 ) -> Result<Self, EvolutionError> {
212 let algorithm: Box<dyn SearchAlgorithm> = match algorithm_name {
213 "hill_climbing" => Box::new(crate::search::HillClimbing::new()),
214 "simulated_annealing" => Box::new(crate::search::SimulatedAnnealing::new()),
215 "tabu_search" => Box::new(crate::search::TabuSearch::new(20)),
216 "novelty_search" => Box::new(crate::search::NoveltySearch::new(15, 0.3)),
217 "map_elites" => Box::new(crate::search::MapElites::new()),
218 "ast_mcts" => Box::new(crate::search::AstMctsAlgorithm::new()),
219 _ => {
220 return Err(EvolutionError::AlgorithmError(format!(
221 "unknown algorithm '{algorithm_name}'; valid choices: \
222 hill_climbing, simulated_annealing, tabu_search, \
223 novelty_search, map_elites, ast_mcts"
224 )));
225 }
226 };
227
228 Ok(Self {
229 algorithm,
230 gene_pool,
231 rng,
232 cache: LruCache::new(NonZeroUsize::new(10_000).expect("10_000 is non-zero")),
233 budget,
234 in_flight: HashMap::new(),
235 stats: SearchStats::new(),
236 target_health: TargetHealthMonitor::new(),
237 checkpoint_path: None,
238 request_count: 0,
239 gene_stats: Vec::new(),
240 fitness_history: VecDeque::new(),
241 stagnation_counter: 0,
242 corpus: BypassCorpus::new(),
243 rule_coverage: RuleCoverage::new(),
244 booster: WafBoosterScorer::no_decay(),
245 no_booster: false,
246 generation_evals: 0,
247 next_id: 0,
248 pending_single: None,
249 exploration_boost_remaining: 0,
250 exploration_boost_factor: 1.0,
251 })
252 }
253
254 fn cache_key(chromosome: &Chromosome) -> String {
255 // §1 SPEED: the pre-fix code allocated a Vec<String>, sorted it,
256 // and joined it — three heap operations per cache lookup. This is
257 // unnecessary because chromosome genes are always emitted in the
258 // canonical GenePool order (encoding → content_type →
259 // header_obfuscation → grammar_rule) by every construction path
260 // (random_chromosome, baseline_chromosome, mutate_with_log).
261 // The sort was defensive but redundant; removing it saves:
262 // - 1× Vec<String> heap alloc (N elements × avg ~20 bytes)
263 // - 1× sort (N×log(N) comparisons, typically N=4 so tiny but
264 // the alloc dominates)
265 // - 1× join alloc (another heap String)
266 // Replacement: single-pass write into a pre-allocated String.
267 // Pre-alloc: 4 genes × ~25 chars ("encoding=UrlEncode;") = 100 bytes.
268 let mut key = String::with_capacity(chromosome.genes.len() * 25);
269 for (i, (n, v)) in chromosome.genes.iter().enumerate() {
270 if i > 0 {
271 key.push(';');
272 }
273 key.push_str(n);
274 key.push('=');
275 key.push_str(v);
276 }
277 key
278 }
279
280 /// Read-only view of the engine's next eval-id counter.
281 /// Exposed so checkpoint round-trip tests can verify the counter
282 /// is preserved across save/load. The field itself stays private
283 /// so external callers can't desync it.
284 #[must_use]
285 pub fn next_id(&self) -> u64 {
286 self.next_id
287 }
288
289 fn next_eval_id(&mut self) -> u64 {
290 self.next_id = self.next_id.saturating_add(1);
291 self.next_id
292 }
293
294 /// Get the next candidate to try (legacy sequential API).
295 ///
296 /// Returns a synthetic index and a reference to the stored candidate.
297 #[must_use]
298 pub fn next_candidate(&mut self) -> Option<(usize, &Chromosome)> {
299 if self.should_terminate() {
300 return None;
301 }
302 if self.pending_single.is_none() {
303 self.pending_single = self.batch_candidates(1).into_iter().next();
304 }
305 self.pending_single
306 .as_ref()
307 .map(|(idx, chrom)| (*idx, chrom))
308 }
309
310 /// Request a batch of up to `n` candidates for parallel evaluation.
311 ///
312 /// Checks cache, budget, and target health before returning candidates.
313 /// `n` is also clamped to the remaining `budget.max_requests` headroom
314 /// so a single batch call can never overshoot the hard request budget
315 /// (the underlying algorithm is free to request whatever it likes
316 /// internally; the engine bounds the request count it actually
317 /// surfaces).
318 ///
319 /// When the booster is enabled (`no_booster == false`), the result
320 /// batch is re-ordered by ascending booster score so that pass-likely
321 /// candidates are tried first. The in-flight map and cache logic are
322 /// unaffected by the reordering.
323 pub fn batch_candidates(&mut self, n: usize) -> Vec<(usize, Chromosome)> {
324 if self.should_terminate() || n == 0 {
325 return Vec::new();
326 }
327 let remaining = self.budget.max_requests.saturating_sub(self.request_count);
328 if remaining == 0 {
329 return Vec::new();
330 }
331 let n = n.min(remaining);
332
333 let mut result = Vec::with_capacity(n);
334 let mut cached_results = Vec::new();
335 let requested = self.algorithm.request_evaluations(n, &mut self.rng);
336
337 for candidate in requested {
338 let key = Self::cache_key(&candidate.chromosome);
339 if let Some(verdict) = self.cache.get(&key).cloned() {
340 cached_results.push((candidate.id, verdict));
341 } else {
342 let eval_id = self.next_eval_id();
343 // Pair the engine's eval_id (handed to the caller and
344 // used as the in_flight key) with the algorithm's
345 // own candidate.id (used to look up its private
346 // in_flight on submit). See the in_flight field doc.
347 self.in_flight.insert(
348 eval_id,
349 (candidate.id, candidate.chromosome.clone(), Instant::now()),
350 );
351 result.push((eval_id as usize, candidate.chromosome));
352 }
353 }
354
355 if !cached_results.is_empty() {
356 self.algorithm.submit_evaluations(cached_results);
357 }
358
359 self.request_count = self.request_count.saturating_add(result.len());
360
361 // WAFBooster re-ranking: when the booster is active, sort candidates
362 // so the lowest-score (most pass-likely) ones come first. The sort
363 // is stable so equal-score candidates preserve algorithm order.
364 if !self.no_booster && !result.is_empty() {
365 // Build a booster-score map keyed by eval_id.
366 let mut scored: Vec<(usize, Chromosome, f64)> = result
367 .into_iter()
368 .map(|(eval_id, chrom)| {
369 let payload = Self::cache_key(&chrom); // deterministic string repr
370 let score = self.booster.score_candidate(&payload);
371 (eval_id, chrom, score)
372 })
373 .collect();
374 scored.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
375 result = scored
376 .into_iter()
377 .map(|(id, chrom, _)| (id, chrom))
378 .collect();
379 }
380
381 result
382 }
383
384 /// Drop entries from the in-flight map that have been outstanding
385 /// longer than `max_age`. The proxy / scan loop should call this
386 /// periodically (or when a Worker pool is reaped) so a dropped
387 /// evaluation doesn't permanently consume a budget slot.
388 ///
389 /// Audit (2026-05-10): pre-fix `in_flight` grew without any TTL —
390 /// every dropped eval permanently consumed a `max_requests` slot,
391 /// so a long scan with even moderate eval-loss would terminate
392 /// prematurely with budget exhausted while the in-flight map
393 /// silently accumulated. Returns the number of pruned entries.
394 pub fn prune_stale_in_flight(&mut self, max_age: std::time::Duration) -> usize {
395 let now = Instant::now();
396 let before = self.in_flight.len();
397 self.in_flight
398 .retain(|_, (_, _, sent_at)| now.duration_since(*sent_at) <= max_age);
399 let pruned = before - self.in_flight.len();
400 // Repay the budget for stale entries: they never returned a
401 // verdict so they shouldn't count against max_requests.
402 self.request_count = self.request_count.saturating_sub(pruned);
403 pruned
404 }
405
406 /// Submit a batch of evaluation results.
407 ///
408 /// # Errors
409 ///
410 /// Returns an error if an evaluation ID is not in the in-flight set.
411 pub fn submit_batch(
412 &mut self,
413 results: Vec<(usize, OracleVerdict)>,
414 ) -> Result<(), EvolutionError> {
415 let mut to_submit: Vec<(u64, OracleVerdict)> = Vec::with_capacity(results.len());
416 for (id_usize, verdict) in results {
417 let id = id_usize as u64;
418 let (algorithm_candidate_id, mut chromosome, _sent_at) = self
419 .in_flight
420 .remove(&id)
421 .ok_or(EvolutionError::InvalidChromosomeIndex(id_usize))?;
422
423 chromosome.record_verdict(&verdict);
424 // Compute the cache key once and reuse for both the LRU cache
425 // insert and the WAFBooster update — previously this called
426 // cache_key() twice per chromosome, allocating Vec + sort + join
427 // both times. Pre-fix cost: 2× per submit; post-fix: 1×.
428 let key = Self::cache_key(&chromosome);
429
430 // Coverage-feedback: record the (payload_class × rule_id)
431 // MAP-Elites behavior descriptor. Use the chromosome's
432 // `grammar_rule` gene as the class signal — it is the
433 // closest available proxy to "attack class" inside the
434 // engine layer. When the verdict carries a `rule_id`, we
435 // get a 2-D descriptor; without it the descriptor collapses
436 // to class-only (the pre-coverage fall-through).
437 let coverage_signal = chromosome
438 .gene("grammar_rule")
439 .filter(|v| *v != "None")
440 .unwrap_or("")
441 .to_string();
442 let (_, _cov_rid) = map_elites_descriptor(&coverage_signal, verdict.rule_id.as_deref());
443 self.rule_coverage
444 .record(&coverage_signal, verdict.rule_id.as_deref());
445
446 // Extract scalar fields before the verdict is moved.
447 let passed = verdict.passed;
448 let status_delta = verdict.status_delta;
449
450 // WAFBooster online update: feed the observation so future
451 // candidate ranking benefits from accumulated signal.
452 // Reuse `key` (already computed above) instead of calling
453 // cache_key() a second time.
454 if !self.no_booster {
455 if passed {
456 self.booster.observe_pass(&key);
457 } else {
458 self.booster.observe_block(&key, verdict.rule_id.as_deref());
459 }
460 }
461
462 self.cache.put(key, verdict.clone());
463
464 update_gene_stats(&mut self.gene_stats, &chromosome.genes, passed);
465 let adjusted = evolutionary_fitness(&chromosome, &self.gene_stats);
466 chromosome.fitness = adjusted;
467
468 // Save high-fitness bypasses to corpus. Dedup + the
469 // MAX_ENTRIES cap are enforced inside `BypassCorpus::add`
470 // (O(1) via its hash index). The earlier inline pre-scan
471 // here was both wasteful and BROKEN: it compared a 16-char
472 // u64 `chromosome.hash()` against the corpus's 64-char
473 // SHA-256 `payload_hash`, so it never matched and deduped
474 // nothing — every high-fitness chromosome fell through to
475 // `add`, which did the real (then-linear) dedup. Now `add`
476 // is the single, correct, O(1) gate.
477 if chromosome.fitness >= 0.85 {
478 self.corpus
479 .add(BypassEntry::from_chromosome(&chromosome, None));
480 }
481
482 // Forward the *algorithm's* candidate ID, not the engine's
483 // eval_id — population-based algorithms key their own
484 // in_flight by it (see in_flight doc).
485 to_submit.push((algorithm_candidate_id, verdict));
486 self.generation_evals = self.generation_evals.saturating_add(1);
487 self.stats.evaluations = self.stats.evaluations.saturating_add(1);
488
489 if passed {
490 self.target_health.record_success();
491 } else if status_delta >= 500 {
492 self.target_health.record_error();
493 }
494 }
495
496 self.algorithm.submit_evaluations(to_submit);
497 Ok(())
498 }
499
500 /// Record legacy boolean feedback for a candidate.
501 pub fn record_feedback(
502 &mut self,
503 chromosome_index: usize,
504 passed: bool,
505 ) -> Result<(), EvolutionError> {
506 // Clear pending_single if it matches the index
507 if let Some((idx, _)) = self.pending_single
508 && idx == chromosome_index
509 {
510 self.pending_single = None;
511 }
512 self.record_verdict(chromosome_index, &OracleVerdict::from_bool(passed))
513 }
514
515 /// Record rich oracle verdict feedback.
516 pub fn record_verdict(
517 &mut self,
518 chromosome_index: usize,
519 verdict: &OracleVerdict,
520 ) -> Result<(), EvolutionError> {
521 self.submit_batch(vec![(chromosome_index, verdict.clone())])
522 }
523
524 /// Record target-error feedback.
525 pub fn record_target_error(&mut self, error: String) -> Result<(), EvolutionError> {
526 self.target_health.record_error();
527 if !self.target_health.is_healthy() {
528 return Err(EvolutionError::TargetHealthCritical(error));
529 }
530 Ok(())
531 }
532
533 /// Signal that the online CUSUM bypass-rate monitor detected a change-point
534 /// (C-11) — i.e. the WAF vendor likely pushed a rule update.
535 ///
536 /// The engine responds by:
537 /// 1. Resetting `stagnation_counter` to 0 — prevents premature termination
538 /// that would otherwise fire because the bypass rate collapsed (high
539 /// stagnation from many blocked attempts).
540 /// 2. Setting `exploration_boost_remaining` to `boost_rounds` — for the
541 /// next `boost_rounds` calls to `evolve`, the booster score for
542 /// candidates is discounted so the engine explores more broadly
543 /// rather than exploiting the (now-broken) learned strategy.
544 /// 3. Setting `exploration_boost_factor` to `factor` — the multiplier
545 /// applied to candidate selection diversity during boost rounds.
546 ///
547 /// # Parameters
548 ///
549 /// - `boost_rounds`: how many `evolve` calls the boost lasts (default 10).
550 /// - `factor`: exploration multiplier > 1.0 (default 2.0).
551 ///
552 /// # Example
553 ///
554 /// ```rust
555 /// use wafrift_evolution::evolution::EvolutionEngine;
556 ///
557 /// let mut engine = EvolutionEngine::new(10);
558 /// assert_eq!(engine.exploration_boost_remaining, 0);
559 /// engine.on_change_point(10, 2.0);
560 /// assert_eq!(engine.exploration_boost_remaining, 10);
561 /// assert_eq!(engine.stagnation_counter, 0);
562 /// ```
563 pub fn on_change_point(&mut self, boost_rounds: u32, factor: f64) {
564 // Reset stagnation so a rule update (which causes many blocked probes
565 // → high stagnation) doesn't terminate the campaign prematurely.
566 self.stagnation_counter = 0;
567 self.stats.stagnation_counter = 0;
568
569 // Set the exploration boost — decays by 1 each evolve() call.
570 self.exploration_boost_remaining = boost_rounds.max(1);
571 self.exploration_boost_factor = factor.max(1.0);
572
573 tracing::info!(
574 boost_rounds,
575 factor,
576 "C-11 change-point detected: exploration boost activated"
577 );
578 }
579
580 /// Evolve the population to the next generation.
581 pub fn evolve(&mut self) {
582 if self.algorithm.best().is_none() {
583 return;
584 }
585
586 // C-11: Decay exploration boost by one round per evolve call.
587 // When exploration_boost_remaining > 0, the booster is temporarily
588 // suppressed so the engine explores more broadly after a WAF rule
589 // update that invalidated the learned bypass strategy.
590 if self.exploration_boost_remaining > 0 {
591 self.exploration_boost_remaining = self.exploration_boost_remaining.saturating_sub(1);
592 if self.exploration_boost_remaining == 0 {
593 // Boost expired: restore default (no-boost) exploration factor.
594 self.exploration_boost_factor = 1.0;
595 tracing::info!("C-11 exploration boost expired — returning to normal exploitation");
596 }
597 }
598
599 // Update fitness history with sliding window
600 if let Some(best) = self.algorithm.best() {
601 self.fitness_history.push_back(best.fitness);
602 }
603 if self.fitness_history.len() > 1000 {
604 self.fitness_history.pop_front();
605 }
606
607 // Detect stagnation — but skip the stagnation increment while the
608 // exploration boost is active: a burst of new-territory exploration
609 // after a rule update naturally shows lower short-term fitness and
610 // would spuriously trigger stagnation-based termination.
611 let window = 10_usize;
612 if self.fitness_history.len() >= window {
613 let skip = self.fitness_history.len().saturating_sub(window);
614 let recent: Vec<f64> = self.fitness_history.iter().skip(skip).copied().collect();
615 let improved = recent.windows(2).any(|w| w[1] > w[0] + 0.001);
616 if improved {
617 self.stagnation_counter = 0;
618 } else if self.exploration_boost_remaining == 0 {
619 // Only accumulate stagnation outside the boost window.
620 self.stagnation_counter = self.stagnation_counter.saturating_add(1);
621 }
622 }
623 // Mirror into stats so should_terminate() (which reads
624 // self.stats.stagnation_counter, not self.stagnation_counter)
625 // and the search algorithms' own should_terminate() impls see
626 // the same value. Without this sync the stagnation_limit
627 // budget would be silently ignored.
628 self.stats.stagnation_counter = self.stagnation_counter;
629
630 self.stats.generation = self.stats.generation.saturating_add(1);
631 self.generation_evals = 0;
632
633 if let Some(ref path) = self.checkpoint_path
634 && let Err(e) = self.save_checkpoint(path)
635 {
636 tracing::warn!(error = %e, path = %path.display(), "checkpoint save failed");
637 }
638 }
639
640 /// Check if evolution should terminate.
641 #[must_use]
642 pub fn should_terminate(&self) -> bool {
643 if !self.target_health.is_healthy() {
644 return true;
645 }
646 self.algorithm.should_terminate(&self.stats, &self.budget)
647 || self.request_count >= self.budget.max_requests
648 || self.stats.stagnation_counter >= self.budget.stagnation_limit
649 }
650
651 /// Get the best-performing chromosome.
652 #[must_use]
653 pub fn best(&self) -> Option<&Chromosome> {
654 self.algorithm.best()
655 }
656
657 /// Return the name of the active search algorithm (e.g. `"ast_mcts"`).
658 #[must_use]
659 pub fn algorithm_name(&self) -> &str {
660 self.algorithm.name()
661 }
662
663 /// Save engine state to disk.
664 pub fn save_checkpoint(&self, path: &Path) -> Result<(), EvolutionError> {
665 let state = EngineState {
666 algorithm_name: self.algorithm.name().to_string(),
667 algorithm_state: self.algorithm.checkpoint()?,
668 gene_pool: self.gene_pool.clone(),
669 // The engine-level rng is not serializable; the algorithm
670 // captures its own rng state inside algorithm_state. Any
671 // engine-side draws after a restore will diverge from
672 // pre-crash, but the algorithm's exploration sequence is
673 // preserved.
674 rng_seed: 0,
675 budget: self.budget,
676 gene_stats: self.gene_stats.clone(),
677 fitness_history: self.fitness_history.clone(),
678 stagnation_counter: self.stagnation_counter,
679 request_count: self.request_count,
680 stats: self.stats,
681 schema_version: 2,
682 corpus: self.corpus.clone(),
683 next_id: self.next_id,
684 generation_evals: self.generation_evals,
685 };
686 save_checkpoint(path, &state)
687 }
688
689 /// Load engine state from disk.
690 pub fn load_checkpoint(&mut self, path: &Path) -> Result<(), EvolutionError> {
691 let mut state: EngineState = load_checkpoint(path)?;
692 state.stats.fixup_start_time();
693 self.algorithm.restore(&state.algorithm_state)?;
694 self.gene_pool = state.gene_pool;
695 self.budget = state.budget;
696 self.gene_stats = state.gene_stats;
697 self.fitness_history = state.fitness_history;
698 self.stagnation_counter = state.stagnation_counter;
699 self.request_count = state.request_count;
700 self.stats = state.stats;
701 // v2 fields — `#[serde(default)]` on EngineState means a v1
702 // checkpoint loads cleanly with empty corpus / next_id=0.
703 self.corpus = state.corpus;
704 self.next_id = state.next_id;
705 self.generation_evals = state.generation_evals;
706 Ok(())
707 }
708
709 /// Get per-gene success rates.
710 #[must_use]
711 pub fn gene_success_rates(&self) -> Vec<(&str, &str, f64)> {
712 crate::evolution::fitness::gene_success_rates(&self.gene_stats)
713 }
714
715 /// Get a human-readable summary.
716 #[must_use]
717 pub fn learned_summary(&self) -> String {
718 crate::evolution::fitness::learned_summary(
719 self.stats.generation,
720 self.algorithm.best(),
721 &self.gene_stats,
722 self.request_count,
723 )
724 }
725
726 /// Seed the underlying algorithm with an explicit population —
727 /// the public path callers use to warm-start search from a known
728 /// good corpus (or to inject a synthetic population from tests).
729 ///
730 /// Previously this method cloned `self.rng` before passing it to
731 /// `initialize`, so the engine's owned RNG was never advanced. Any
732 /// random draws made by `initialize` (e.g. MapElites grid placement,
733 /// initial mutation in SimulatedAnnealing) were "used up" in the
734 /// clone and the engine remained at the same RNG state — making two
735 /// successive `seed_population` calls (or a `seed_population` + an
736 /// `evolve`) produce identical random sequences and identical
737 /// chromosomes.
738 pub fn seed_population(&mut self, population: Vec<Chromosome>) {
739 self.algorithm
740 .initialize(population, &self.gene_pool, &mut self.rng);
741 }
742
743 /// Snapshot the algorithm's live population (test/diagnostic
744 /// surface). Population-based algorithms return their full pool;
745 /// single-state algorithms return the singleton current/best.
746 #[must_use]
747 pub fn population_snapshot(&self) -> Vec<Chromosome> {
748 self.algorithm.population_snapshot()
749 }
750
751 /// Population diversity in `[0.0, 1.0]` — drives adaptive mutation
752 /// pressure (see `crossover::diversity::adaptive_mutation_rate`).
753 ///
754 /// Strategy:
755 /// 1. Snapshot the algorithm's live population and union it with
756 /// the engine's `in_flight` candidates.
757 /// 2. If `len() >= 2`, return mean pairwise gene-mismatch ratio
758 /// via `crossover::diversity::diversity_score`.
759 /// 3. Otherwise (single-state algorithm with nothing in-flight),
760 /// fall back to gene-pool exploration entropy from
761 /// [`Self::gene_stats_diversity`] — measures how broadly the
762 /// engine has *explored* the gene space rather than how varied
763 /// the *current* population is. With no exploration history
764 /// either, return 1.0 (max-safe default — keeps mutation
765 /// pressure conservative on a fresh engine).
766 #[must_use]
767 pub fn diversity_score(&self) -> f64 {
768 let mut population = self.algorithm.population_snapshot();
769 for (_, chromosome, _) in self.in_flight.values() {
770 population.push(chromosome.clone());
771 }
772 if population.len() >= 2 {
773 return crate::evolution::crossover::diversity::diversity_score(&population);
774 }
775 let gene_div = self.gene_stats_diversity();
776 if gene_div > 0.0 { gene_div } else { 1.0 }
777 }
778
779 /// Shannon-entropy style diversity over the engine's per-gene
780 /// exploration history.
781 ///
782 /// For each unique gene name in `gene_stats`, computes the
783 /// normalised entropy of its value distribution weighted by
784 /// `attempts`. The per-gene entropies are averaged. Range
785 /// `[0.0, 1.0]`: 0.0 means we tried only one value for every
786 /// gene (no exploration), 1.0 means a uniform distribution
787 /// across the maximum-cardinality gene's value space.
788 ///
789 /// Useful as a fallback signal when the active search algorithm
790 /// is single-state (e.g. simulated annealing) and the population
791 /// snapshot is too small to give meaningful pairwise distance.
792 #[must_use]
793 pub fn gene_stats_diversity(&self) -> f64 {
794 if self.gene_stats.is_empty() {
795 return 0.0;
796 }
797 // Bucket per-gene attempt counts.
798 let mut by_gene: HashMap<&str, Vec<u32>> = HashMap::new();
799 for (name, _value, _successes, attempts) in &self.gene_stats {
800 if *attempts == 0 {
801 continue;
802 }
803 by_gene.entry(name.as_str()).or_default().push(*attempts);
804 }
805 if by_gene.is_empty() {
806 return 0.0;
807 }
808 let mut entropy_sum = 0.0_f64;
809 let mut counted = 0_usize;
810 for attempts in by_gene.values() {
811 let total: u64 = attempts.iter().map(|a| u64::from(*a)).sum();
812 if total == 0 || attempts.len() < 2 {
813 // Single value tried — zero entropy contribution. Still
814 // counted so the per-gene mean isn't biased by skipping.
815 counted += 1;
816 continue;
817 }
818 #[allow(clippy::cast_precision_loss)]
819 let total_f = total as f64;
820 let mut h = 0.0_f64;
821 for a in attempts {
822 #[allow(clippy::cast_precision_loss)]
823 let p = f64::from(*a) / total_f;
824 if p > 0.0 {
825 h -= p * p.log2();
826 }
827 }
828 // Normalise by max entropy log2(k) where k is the number of
829 // distinct values tried for this gene. Falls in `[0, 1]`.
830 #[allow(clippy::cast_precision_loss)]
831 let h_max = (attempts.len() as f64).log2();
832 let normalised = if h_max > 0.0 { h / h_max } else { 0.0 };
833 entropy_sum += normalised;
834 counted += 1;
835 }
836 if counted == 0 {
837 0.0
838 } else {
839 #[allow(clippy::cast_precision_loss)]
840 let avg = entropy_sum / counted as f64;
841 avg.clamp(0.0, 1.0)
842 }
843 }
844}
845
846/// Serializable engine state.
847///
848/// Schema version 2 (2026-05-10) adds `corpus`, `next_id`, and
849/// `generation_evals` so a restored engine doesn't lose all of its
850/// bypass discoveries and doesn't reset its eval-id counter (which
851/// would collide with any in-flight evaluation that survived the
852/// crash).
853///
854/// What is intentionally NOT serialized:
855/// - `in_flight`: by definition transient; any pending eval at
856/// checkpoint time is lost on crash, but the corpus capture
857/// above means the *useful* bypasses are preserved.
858/// - `cache`: LRU cache of payload→verdict; recomputable.
859/// - `target_health`: runtime stats; resets on resume.
860/// - `checkpoint_path`: re-injected by the caller after load.
861/// - `pending_single`: legacy sequential API state, transient.
862/// - `rule_coverage`: runtime observation accumulator; resets on
863/// resume so each run produces an independent coverage report.
864/// - RNG state: search algorithms each capture their own RNG
865/// state inside `algorithm_state`; the engine-level rng is
866/// used only for `next_eval_id` minting and gene-pool sampling
867/// when the algorithm doesn't override.
868#[derive(Debug, Clone, Serialize, Deserialize)]
869pub struct EngineState {
870 pub algorithm_name: String,
871 pub algorithm_state: Vec<u8>,
872 pub gene_pool: GenePool,
873 pub rng_seed: u64,
874 pub budget: Budget,
875 pub gene_stats: Vec<(String, String, u32, u32)>,
876 pub fitness_history: VecDeque<f64>,
877 pub stagnation_counter: u32,
878 pub request_count: usize,
879 pub stats: SearchStats,
880 pub schema_version: u32,
881 /// Saved bypass discoveries — added in `schema_version` 2.
882 /// Defaults to empty for v1 checkpoints loaded by a v2 engine.
883 #[serde(default)]
884 pub corpus: BypassCorpus,
885 /// Next `eval_id` to mint — added in `schema_version` 2 so a
886 /// restored engine doesn't recycle IDs that may collide with
887 /// any in-flight evaluation that survived the crash.
888 #[serde(default)]
889 pub next_id: u64,
890 /// Evaluations issued in the current generation — added in v2.
891 #[serde(default)]
892 pub generation_evals: usize,
893}
894
895use serde::{Deserialize, Serialize};