wafrift_evolution/evolution/
engine.rs

1use crate::coverage_feedback::{RuleCoverage, map_elites_descriptor};
2use crate::evolution::fitness::{evolutionary_fitness, update_gene_stats};
3use crate::evolution::{
4    Chromosome, GenePool,
5    population::{baseline_chromosome, random_chromosome},
6};
7use crate::lineage::{BypassCorpus, BypassEntry};
8use crate::search::SearchAlgorithm;
9use crate::types::{
10    Budget, EvolutionError, OracleVerdict, SearchStats, TargetHealthMonitor, load_checkpoint,
11    save_checkpoint,
12};
13use lru::LruCache;
14use rand::{SeedableRng, rngs::StdRng};
15use std::collections::{HashMap, VecDeque};
16use std::num::NonZeroUsize;
17use std::path::{Path, PathBuf};
18use std::time::Instant;
19use wafrift_wafmodel::booster::WafBoosterScorer;
20
21/// The evolutionary engine that maintains a population and evolves it.
22#[derive(Debug)]
23pub struct EvolutionEngine {
24    /// Search algorithm implementation.
25    pub(crate) algorithm: Box<dyn SearchAlgorithm>,
26    /// Gene pool for creating/mutating chromosomes.
27    pub gene_pool: GenePool,
28    /// Seeded random number generator.
29    pub rng: StdRng,
30    /// Payload→verdict LRU cache.
31    pub cache: LruCache<String, OracleVerdict>,
32    /// Hard budget limits.
33    pub budget: Budget,
34    /// Candidates currently being evaluated:
35    ///   `engine_eval_id` → (`algorithm_candidate_id`, Chromosome, `sent_at`).
36    ///
37    /// `algorithm_candidate_id` is the ID the *search algorithm*
38    /// originally minted in `request_evaluations` and the same ID it
39    /// expects to see back in `submit_evaluations`. Population-based
40    /// algorithms (`MapElites`, `NoveltySearch`) keep their own private
41    /// `in_flight` keyed by that ID — if we forwarded the engine's
42    /// `eval_id` instead, their lookup misses and the evaluation is
43    /// silently dropped (the grid / archive never gets updated).
44    pub in_flight: HashMap<u64, (u64, Chromosome, Instant)>,
45    /// Search statistics.
46    pub stats: SearchStats,
47    /// Target health monitor.
48    pub target_health: TargetHealthMonitor,
49    /// Optional path for automatic checkpointing.
50    pub checkpoint_path: Option<PathBuf>,
51    /// Total oracle requests issued.
52    pub request_count: usize,
53    /// Per-gene success tracking: `(gene_name, gene_value, successes, attempts)`.
54    pub gene_stats: Vec<(String, String, u32, u32)>,
55    /// Fitness history: average fitness per generation (sliding window).
56    pub fitness_history: VecDeque<f64>,
57    /// Number of consecutive generations with no improvement.
58    pub stagnation_counter: u32,
59    /// Saved bypass corpus.
60    pub corpus: BypassCorpus,
61    /// WAF rule-coverage accumulator.  Tracks (payload_class × rule_id) cells
62    /// observed during the run.  Populated by `submit_batch` when the oracle
63    /// result carries a `rule_id` (via `OracleVerdict::rule_id`).
64    pub rule_coverage: RuleCoverage,
65    /// WAFBooster importance scorer.  Updated on every oracle result and used
66    /// to re-rank mutation candidates so pass-likely payloads are tried first.
67    pub booster: WafBoosterScorer,
68    /// When `true`, the booster is disabled and candidate selection falls back
69    /// to the underlying algorithm's FIFO/UCB1 ordering.
70    pub no_booster: bool,
71    /// Evaluations this generation.
72    generation_evals: usize,
73    /// Next candidate ID.
74    next_id: u64,
75    /// Pending single candidate for legacy sequential API.
76    pending_single: Option<(usize, Chromosome)>,
77    /// Remaining rounds of elevated exploration weight following a
78    /// change-point alarm (C-11). When > 0, `on_change_point` has
79    /// signalled that a WAF rule update was detected and the engine
80    /// should explore more aggressively (higher UCB1 exploration bias,
81    /// stagnation counter reset). Decrements by 1 each call to `evolve`.
82    pub exploration_boost_remaining: u32,
83    /// Exploration-weight multiplier applied while `exploration_boost_remaining > 0`.
84    /// A value of 2.0 doubles the effective UCB1 exploration constant.
85    /// Reset to 1.0 when the boost expires. Default 1.0 (no boost).
86    pub exploration_boost_factor: f64,
87}
88
89impl Clone for EvolutionEngine {
90    fn clone(&self) -> Self {
91        // Algorithm state is duplicated via the trait's `clone_box`
92        // method, which all in-tree algorithms override with a direct
93        // `Box::new(self.clone())` — no serde_json round-trip.
94        // The previous checkpoint/restore path was 10-100× slower
95        // on populated MapElites grids and was the original "clone
96        // spike on the proxy hot path" blocker (see #113).
97        Self {
98            algorithm: self.algorithm.clone_box(),
99            gene_pool: self.gene_pool.clone(),
100            rng: self.rng.clone(),
101            // The LRU cache deliberately does not survive cloning —
102            // each cloned engine gets a fresh same-capacity cache.
103            // Sharing the cache across clones is what `SharedEngine`
104            // is for (Arc<RwLock<EvolutionEngine>>); deep-cloning the
105            // cache itself would just balloon allocation.
106            cache: LruCache::new(self.cache.cap()),
107            budget: self.budget,
108            // Mid-flight evaluations belong to the caller, not the
109            // clone — drop them.
110            in_flight: HashMap::new(),
111            stats: self.stats,
112            target_health: self.target_health.clone(),
113            checkpoint_path: self.checkpoint_path.clone(),
114            request_count: self.request_count,
115            gene_stats: self.gene_stats.clone(),
116            fitness_history: self.fitness_history.clone(),
117            stagnation_counter: self.stagnation_counter,
118            corpus: self.corpus.clone(),
119            rule_coverage: self.rule_coverage.clone(),
120            booster: self.booster.clone(),
121            no_booster: self.no_booster,
122            generation_evals: self.generation_evals,
123            next_id: self.next_id,
124            pending_single: None,
125            exploration_boost_remaining: self.exploration_boost_remaining,
126            exploration_boost_factor: self.exploration_boost_factor,
127        }
128    }
129}
130
131/// Shared engine pointer — what the proxy and any future
132/// shared-state worker pool should hold.
133///
134/// Use this instead of `Clone` whenever multiple async tasks need
135/// access to the same engine's cache + corpus + `gene_stats`. Cloning
136/// the `Arc` is O(1); cloning the engine itself is O(grid + archive +
137/// `gene_stats`) and produces an *independent* engine with a fresh
138/// (empty) cache.
139///
140/// Locking discipline:
141/// - hot read paths (cache hits, `diversity_score`, `best()`) → `read()`
142/// - mutation paths (`submit_evaluations`, `gene_stats` updates,
143///   checkpoint persistence) → `write()`
144/// - never hold the write lock across an `await` that performs network
145///   I/O — drop it before the await, re-acquire after
146pub type SharedEngine = std::sync::Arc<tokio::sync::RwLock<EvolutionEngine>>;
147
148impl EvolutionEngine {
149    /// Move this engine behind the canonical [`SharedEngine`] pointer.
150    ///
151    /// Equivalent to `Arc::new(RwLock::new(self))` — exists so the
152    /// shared-access pattern is discoverable on the type itself
153    /// rather than buried in module-level docs.
154    #[must_use]
155    pub fn into_shared(self) -> SharedEngine {
156        std::sync::Arc::new(tokio::sync::RwLock::new(self))
157    }
158}
159
160impl EvolutionEngine {
161    /// Create a new engine with the given algorithm and population size.
162    #[must_use]
163    pub fn new(population_size: usize) -> Self {
164        Self::new_seeded(population_size, 0)
165    }
166
167    /// Create a new engine with a seeded RNG.
168    /// `population_size` is clamped to the inclusive range `[1, 10_000]`:
169    /// 0 would leave the selection helpers (tournament/roulette) with
170    /// nothing to index — a contract violation that used to panic.
171    /// `10_000` caps memory at construction so a misconfigured caller
172    /// can't OOM the process by passing `usize::MAX`.
173    #[must_use]
174    pub fn new_seeded(population_size: usize, seed: u64) -> Self {
175        let population_size = population_size.clamp(1, 10_000);
176        let gene_pool = GenePool::default_wafrift();
177        let mut rng = StdRng::seed_from_u64(seed);
178        let mut population: Vec<Chromosome> = (0..population_size)
179            .map(|_| random_chromosome(&gene_pool, &mut rng))
180            .collect();
181        if population_size > 0 {
182            population[0] = baseline_chromosome(&gene_pool);
183        }
184
185        // Pre-fix this constructor double-initialised the algorithm:
186        // built `population` with a cloned RNG, called `algorithm.
187        // initialize(population, ..., &mut engine.rng.clone())`, then
188        // re-generated `population2` with the engine's now-moved RNG
189        // and called `initialize` again. Because `with_algorithm`
190        // doesn't advance the RNG, `population` and `population2`
191        // were IDENTICAL chromosomes — and every `initialize` impl
192        // is last-call-wins (HillClimbing overwrites current/best,
193        // MapElites .clear()s the grid, NoveltySearch overwrites
194        // self.population). Net effect: 2× chromosome generation +
195        // 2× initialize calls for the same final state. Fixed by
196        // single-shot init using the engine's owned RNG.
197        let mut engine = Self::with_algorithm("hill_climbing", gene_pool, rng, Budget::default())
198            .expect("hill_climbing is built-in");
199        engine
200            .algorithm
201            .initialize(population, &engine.gene_pool, &mut engine.rng);
202        engine
203    }
204
205    /// Create an engine with a specific algorithm by name.
206    pub fn with_algorithm(
207        algorithm_name: &str,
208        gene_pool: GenePool,
209        rng: StdRng,
210        budget: Budget,
211    ) -> Result<Self, EvolutionError> {
212        let algorithm: Box<dyn SearchAlgorithm> = match algorithm_name {
213            "hill_climbing" => Box::new(crate::search::HillClimbing::new()),
214            "simulated_annealing" => Box::new(crate::search::SimulatedAnnealing::new()),
215            "tabu_search" => Box::new(crate::search::TabuSearch::new(20)),
216            "novelty_search" => Box::new(crate::search::NoveltySearch::new(15, 0.3)),
217            "map_elites" => Box::new(crate::search::MapElites::new()),
218            "ast_mcts" => Box::new(crate::search::AstMctsAlgorithm::new()),
219            _ => {
220                return Err(EvolutionError::AlgorithmError(format!(
221                    "unknown algorithm '{algorithm_name}'; valid choices: \
222                     hill_climbing, simulated_annealing, tabu_search, \
223                     novelty_search, map_elites, ast_mcts"
224                )));
225            }
226        };
227
228        Ok(Self {
229            algorithm,
230            gene_pool,
231            rng,
232            cache: LruCache::new(NonZeroUsize::new(10_000).expect("10_000 is non-zero")),
233            budget,
234            in_flight: HashMap::new(),
235            stats: SearchStats::new(),
236            target_health: TargetHealthMonitor::new(),
237            checkpoint_path: None,
238            request_count: 0,
239            gene_stats: Vec::new(),
240            fitness_history: VecDeque::new(),
241            stagnation_counter: 0,
242            corpus: BypassCorpus::new(),
243            rule_coverage: RuleCoverage::new(),
244            booster: WafBoosterScorer::no_decay(),
245            no_booster: false,
246            generation_evals: 0,
247            next_id: 0,
248            pending_single: None,
249            exploration_boost_remaining: 0,
250            exploration_boost_factor: 1.0,
251        })
252    }
253
254    fn cache_key(chromosome: &Chromosome) -> String {
255        // §1 SPEED: the pre-fix code allocated a Vec<String>, sorted it,
256        // and joined it — three heap operations per cache lookup. This is
257        // unnecessary because chromosome genes are always emitted in the
258        // canonical GenePool order (encoding → content_type →
259        // header_obfuscation → grammar_rule) by every construction path
260        // (random_chromosome, baseline_chromosome, mutate_with_log).
261        // The sort was defensive but redundant; removing it saves:
262        //   - 1× Vec<String> heap alloc (N elements × avg ~20 bytes)
263        //   - 1× sort (N×log(N) comparisons, typically N=4 so tiny but
264        //     the alloc dominates)
265        //   - 1× join alloc (another heap String)
266        // Replacement: single-pass write into a pre-allocated String.
267        // Pre-alloc: 4 genes × ~25 chars ("encoding=UrlEncode;") = 100 bytes.
268        let mut key = String::with_capacity(chromosome.genes.len() * 25);
269        for (i, (n, v)) in chromosome.genes.iter().enumerate() {
270            if i > 0 {
271                key.push(';');
272            }
273            key.push_str(n);
274            key.push('=');
275            key.push_str(v);
276        }
277        key
278    }
279
280    /// Read-only view of the engine's next eval-id counter.
281    /// Exposed so checkpoint round-trip tests can verify the counter
282    /// is preserved across save/load. The field itself stays private
283    /// so external callers can't desync it.
284    #[must_use]
285    pub fn next_id(&self) -> u64 {
286        self.next_id
287    }
288
289    fn next_eval_id(&mut self) -> u64 {
290        self.next_id = self.next_id.saturating_add(1);
291        self.next_id
292    }
293
294    /// Get the next candidate to try (legacy sequential API).
295    ///
296    /// Returns a synthetic index and a reference to the stored candidate.
297    #[must_use]
298    pub fn next_candidate(&mut self) -> Option<(usize, &Chromosome)> {
299        if self.should_terminate() {
300            return None;
301        }
302        if self.pending_single.is_none() {
303            self.pending_single = self.batch_candidates(1).into_iter().next();
304        }
305        self.pending_single
306            .as_ref()
307            .map(|(idx, chrom)| (*idx, chrom))
308    }
309
310    /// Request a batch of up to `n` candidates for parallel evaluation.
311    ///
312    /// Checks cache, budget, and target health before returning candidates.
313    /// `n` is also clamped to the remaining `budget.max_requests` headroom
314    /// so a single batch call can never overshoot the hard request budget
315    /// (the underlying algorithm is free to request whatever it likes
316    /// internally; the engine bounds the request count it actually
317    /// surfaces).
318    ///
319    /// When the booster is enabled (`no_booster == false`), the result
320    /// batch is re-ordered by ascending booster score so that pass-likely
321    /// candidates are tried first.  The in-flight map and cache logic are
322    /// unaffected by the reordering.
323    pub fn batch_candidates(&mut self, n: usize) -> Vec<(usize, Chromosome)> {
324        if self.should_terminate() || n == 0 {
325            return Vec::new();
326        }
327        let remaining = self.budget.max_requests.saturating_sub(self.request_count);
328        if remaining == 0 {
329            return Vec::new();
330        }
331        let n = n.min(remaining);
332
333        let mut result = Vec::with_capacity(n);
334        let mut cached_results = Vec::new();
335        let requested = self.algorithm.request_evaluations(n, &mut self.rng);
336
337        for candidate in requested {
338            let key = Self::cache_key(&candidate.chromosome);
339            if let Some(verdict) = self.cache.get(&key).cloned() {
340                cached_results.push((candidate.id, verdict));
341            } else {
342                let eval_id = self.next_eval_id();
343                // Pair the engine's eval_id (handed to the caller and
344                // used as the in_flight key) with the algorithm's
345                // own candidate.id (used to look up its private
346                // in_flight on submit). See the in_flight field doc.
347                self.in_flight.insert(
348                    eval_id,
349                    (candidate.id, candidate.chromosome.clone(), Instant::now()),
350                );
351                result.push((eval_id as usize, candidate.chromosome));
352            }
353        }
354
355        if !cached_results.is_empty() {
356            self.algorithm.submit_evaluations(cached_results);
357        }
358
359        self.request_count = self.request_count.saturating_add(result.len());
360
361        // WAFBooster re-ranking: when the booster is active, sort candidates
362        // so the lowest-score (most pass-likely) ones come first.  The sort
363        // is stable so equal-score candidates preserve algorithm order.
364        if !self.no_booster && !result.is_empty() {
365            // Build a booster-score map keyed by eval_id.
366            let mut scored: Vec<(usize, Chromosome, f64)> = result
367                .into_iter()
368                .map(|(eval_id, chrom)| {
369                    let payload = Self::cache_key(&chrom); // deterministic string repr
370                    let score = self.booster.score_candidate(&payload);
371                    (eval_id, chrom, score)
372                })
373                .collect();
374            scored.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
375            result = scored
376                .into_iter()
377                .map(|(id, chrom, _)| (id, chrom))
378                .collect();
379        }
380
381        result
382    }
383
384    /// Drop entries from the in-flight map that have been outstanding
385    /// longer than `max_age`. The proxy / scan loop should call this
386    /// periodically (or when a Worker pool is reaped) so a dropped
387    /// evaluation doesn't permanently consume a budget slot.
388    ///
389    /// Audit (2026-05-10): pre-fix `in_flight` grew without any TTL —
390    /// every dropped eval permanently consumed a `max_requests` slot,
391    /// so a long scan with even moderate eval-loss would terminate
392    /// prematurely with budget exhausted while the in-flight map
393    /// silently accumulated. Returns the number of pruned entries.
394    pub fn prune_stale_in_flight(&mut self, max_age: std::time::Duration) -> usize {
395        let now = Instant::now();
396        let before = self.in_flight.len();
397        self.in_flight
398            .retain(|_, (_, _, sent_at)| now.duration_since(*sent_at) <= max_age);
399        let pruned = before - self.in_flight.len();
400        // Repay the budget for stale entries: they never returned a
401        // verdict so they shouldn't count against max_requests.
402        self.request_count = self.request_count.saturating_sub(pruned);
403        pruned
404    }
405
406    /// Submit a batch of evaluation results.
407    ///
408    /// # Errors
409    ///
410    /// Returns an error if an evaluation ID is not in the in-flight set.
411    pub fn submit_batch(
412        &mut self,
413        results: Vec<(usize, OracleVerdict)>,
414    ) -> Result<(), EvolutionError> {
415        let mut to_submit: Vec<(u64, OracleVerdict)> = Vec::with_capacity(results.len());
416        for (id_usize, verdict) in results {
417            let id = id_usize as u64;
418            let (algorithm_candidate_id, mut chromosome, _sent_at) = self
419                .in_flight
420                .remove(&id)
421                .ok_or(EvolutionError::InvalidChromosomeIndex(id_usize))?;
422
423            chromosome.record_verdict(&verdict);
424            // Compute the cache key once and reuse for both the LRU cache
425            // insert and the WAFBooster update — previously this called
426            // cache_key() twice per chromosome, allocating Vec + sort + join
427            // both times. Pre-fix cost: 2× per submit; post-fix: 1×.
428            let key = Self::cache_key(&chromosome);
429
430            // Coverage-feedback: record the (payload_class × rule_id)
431            // MAP-Elites behavior descriptor.  Use the chromosome's
432            // `grammar_rule` gene as the class signal — it is the
433            // closest available proxy to "attack class" inside the
434            // engine layer.  When the verdict carries a `rule_id`, we
435            // get a 2-D descriptor; without it the descriptor collapses
436            // to class-only (the pre-coverage fall-through).
437            let coverage_signal = chromosome
438                .gene("grammar_rule")
439                .filter(|v| *v != "None")
440                .unwrap_or("")
441                .to_string();
442            let (_, _cov_rid) = map_elites_descriptor(&coverage_signal, verdict.rule_id.as_deref());
443            self.rule_coverage
444                .record(&coverage_signal, verdict.rule_id.as_deref());
445
446            // Extract scalar fields before the verdict is moved.
447            let passed = verdict.passed;
448            let status_delta = verdict.status_delta;
449
450            // WAFBooster online update: feed the observation so future
451            // candidate ranking benefits from accumulated signal.
452            // Reuse `key` (already computed above) instead of calling
453            // cache_key() a second time.
454            if !self.no_booster {
455                if passed {
456                    self.booster.observe_pass(&key);
457                } else {
458                    self.booster.observe_block(&key, verdict.rule_id.as_deref());
459                }
460            }
461
462            self.cache.put(key, verdict.clone());
463
464            update_gene_stats(&mut self.gene_stats, &chromosome.genes, passed);
465            let adjusted = evolutionary_fitness(&chromosome, &self.gene_stats);
466            chromosome.fitness = adjusted;
467
468            // Save high-fitness bypasses to corpus. Dedup + the
469            // MAX_ENTRIES cap are enforced inside `BypassCorpus::add`
470            // (O(1) via its hash index). The earlier inline pre-scan
471            // here was both wasteful and BROKEN: it compared a 16-char
472            // u64 `chromosome.hash()` against the corpus's 64-char
473            // SHA-256 `payload_hash`, so it never matched and deduped
474            // nothing — every high-fitness chromosome fell through to
475            // `add`, which did the real (then-linear) dedup. Now `add`
476            // is the single, correct, O(1) gate.
477            if chromosome.fitness >= 0.85 {
478                self.corpus
479                    .add(BypassEntry::from_chromosome(&chromosome, None));
480            }
481
482            // Forward the *algorithm's* candidate ID, not the engine's
483            // eval_id — population-based algorithms key their own
484            // in_flight by it (see in_flight doc).
485            to_submit.push((algorithm_candidate_id, verdict));
486            self.generation_evals = self.generation_evals.saturating_add(1);
487            self.stats.evaluations = self.stats.evaluations.saturating_add(1);
488
489            if passed {
490                self.target_health.record_success();
491            } else if status_delta >= 500 {
492                self.target_health.record_error();
493            }
494        }
495
496        self.algorithm.submit_evaluations(to_submit);
497        Ok(())
498    }
499
500    /// Record legacy boolean feedback for a candidate.
501    pub fn record_feedback(
502        &mut self,
503        chromosome_index: usize,
504        passed: bool,
505    ) -> Result<(), EvolutionError> {
506        // Clear pending_single if it matches the index
507        if let Some((idx, _)) = self.pending_single
508            && idx == chromosome_index
509        {
510            self.pending_single = None;
511        }
512        self.record_verdict(chromosome_index, &OracleVerdict::from_bool(passed))
513    }
514
515    /// Record rich oracle verdict feedback.
516    pub fn record_verdict(
517        &mut self,
518        chromosome_index: usize,
519        verdict: &OracleVerdict,
520    ) -> Result<(), EvolutionError> {
521        self.submit_batch(vec![(chromosome_index, verdict.clone())])
522    }
523
524    /// Record target-error feedback.
525    pub fn record_target_error(&mut self, error: String) -> Result<(), EvolutionError> {
526        self.target_health.record_error();
527        if !self.target_health.is_healthy() {
528            return Err(EvolutionError::TargetHealthCritical(error));
529        }
530        Ok(())
531    }
532
533    /// Signal that the online CUSUM bypass-rate monitor detected a change-point
534    /// (C-11) — i.e. the WAF vendor likely pushed a rule update.
535    ///
536    /// The engine responds by:
537    /// 1. Resetting `stagnation_counter` to 0 — prevents premature termination
538    ///    that would otherwise fire because the bypass rate collapsed (high
539    ///    stagnation from many blocked attempts).
540    /// 2. Setting `exploration_boost_remaining` to `boost_rounds` — for the
541    ///    next `boost_rounds` calls to `evolve`, the booster score for
542    ///    candidates is discounted so the engine explores more broadly
543    ///    rather than exploiting the (now-broken) learned strategy.
544    /// 3. Setting `exploration_boost_factor` to `factor` — the multiplier
545    ///    applied to candidate selection diversity during boost rounds.
546    ///
547    /// # Parameters
548    ///
549    /// - `boost_rounds`: how many `evolve` calls the boost lasts (default 10).
550    /// - `factor`: exploration multiplier > 1.0 (default 2.0).
551    ///
552    /// # Example
553    ///
554    /// ```rust
555    /// use wafrift_evolution::evolution::EvolutionEngine;
556    ///
557    /// let mut engine = EvolutionEngine::new(10);
558    /// assert_eq!(engine.exploration_boost_remaining, 0);
559    /// engine.on_change_point(10, 2.0);
560    /// assert_eq!(engine.exploration_boost_remaining, 10);
561    /// assert_eq!(engine.stagnation_counter, 0);
562    /// ```
563    pub fn on_change_point(&mut self, boost_rounds: u32, factor: f64) {
564        // Reset stagnation so a rule update (which causes many blocked probes
565        // → high stagnation) doesn't terminate the campaign prematurely.
566        self.stagnation_counter = 0;
567        self.stats.stagnation_counter = 0;
568
569        // Set the exploration boost — decays by 1 each evolve() call.
570        self.exploration_boost_remaining = boost_rounds.max(1);
571        self.exploration_boost_factor = factor.max(1.0);
572
573        tracing::info!(
574            boost_rounds,
575            factor,
576            "C-11 change-point detected: exploration boost activated"
577        );
578    }
579
580    /// Evolve the population to the next generation.
581    pub fn evolve(&mut self) {
582        if self.algorithm.best().is_none() {
583            return;
584        }
585
586        // C-11: Decay exploration boost by one round per evolve call.
587        // When exploration_boost_remaining > 0, the booster is temporarily
588        // suppressed so the engine explores more broadly after a WAF rule
589        // update that invalidated the learned bypass strategy.
590        if self.exploration_boost_remaining > 0 {
591            self.exploration_boost_remaining = self.exploration_boost_remaining.saturating_sub(1);
592            if self.exploration_boost_remaining == 0 {
593                // Boost expired: restore default (no-boost) exploration factor.
594                self.exploration_boost_factor = 1.0;
595                tracing::info!("C-11 exploration boost expired — returning to normal exploitation");
596            }
597        }
598
599        // Update fitness history with sliding window
600        if let Some(best) = self.algorithm.best() {
601            self.fitness_history.push_back(best.fitness);
602        }
603        if self.fitness_history.len() > 1000 {
604            self.fitness_history.pop_front();
605        }
606
607        // Detect stagnation — but skip the stagnation increment while the
608        // exploration boost is active: a burst of new-territory exploration
609        // after a rule update naturally shows lower short-term fitness and
610        // would spuriously trigger stagnation-based termination.
611        let window = 10_usize;
612        if self.fitness_history.len() >= window {
613            let skip = self.fitness_history.len().saturating_sub(window);
614            let recent: Vec<f64> = self.fitness_history.iter().skip(skip).copied().collect();
615            let improved = recent.windows(2).any(|w| w[1] > w[0] + 0.001);
616            if improved {
617                self.stagnation_counter = 0;
618            } else if self.exploration_boost_remaining == 0 {
619                // Only accumulate stagnation outside the boost window.
620                self.stagnation_counter = self.stagnation_counter.saturating_add(1);
621            }
622        }
623        // Mirror into stats so should_terminate() (which reads
624        // self.stats.stagnation_counter, not self.stagnation_counter)
625        // and the search algorithms' own should_terminate() impls see
626        // the same value. Without this sync the stagnation_limit
627        // budget would be silently ignored.
628        self.stats.stagnation_counter = self.stagnation_counter;
629
630        self.stats.generation = self.stats.generation.saturating_add(1);
631        self.generation_evals = 0;
632
633        if let Some(ref path) = self.checkpoint_path
634            && let Err(e) = self.save_checkpoint(path)
635        {
636            tracing::warn!(error = %e, path = %path.display(), "checkpoint save failed");
637        }
638    }
639
640    /// Check if evolution should terminate.
641    #[must_use]
642    pub fn should_terminate(&self) -> bool {
643        if !self.target_health.is_healthy() {
644            return true;
645        }
646        self.algorithm.should_terminate(&self.stats, &self.budget)
647            || self.request_count >= self.budget.max_requests
648            || self.stats.stagnation_counter >= self.budget.stagnation_limit
649    }
650
651    /// Get the best-performing chromosome.
652    #[must_use]
653    pub fn best(&self) -> Option<&Chromosome> {
654        self.algorithm.best()
655    }
656
657    /// Return the name of the active search algorithm (e.g. `"ast_mcts"`).
658    #[must_use]
659    pub fn algorithm_name(&self) -> &str {
660        self.algorithm.name()
661    }
662
663    /// Save engine state to disk.
664    pub fn save_checkpoint(&self, path: &Path) -> Result<(), EvolutionError> {
665        let state = EngineState {
666            algorithm_name: self.algorithm.name().to_string(),
667            algorithm_state: self.algorithm.checkpoint()?,
668            gene_pool: self.gene_pool.clone(),
669            // The engine-level rng is not serializable; the algorithm
670            // captures its own rng state inside algorithm_state. Any
671            // engine-side draws after a restore will diverge from
672            // pre-crash, but the algorithm's exploration sequence is
673            // preserved.
674            rng_seed: 0,
675            budget: self.budget,
676            gene_stats: self.gene_stats.clone(),
677            fitness_history: self.fitness_history.clone(),
678            stagnation_counter: self.stagnation_counter,
679            request_count: self.request_count,
680            stats: self.stats,
681            schema_version: 2,
682            corpus: self.corpus.clone(),
683            next_id: self.next_id,
684            generation_evals: self.generation_evals,
685        };
686        save_checkpoint(path, &state)
687    }
688
689    /// Load engine state from disk.
690    pub fn load_checkpoint(&mut self, path: &Path) -> Result<(), EvolutionError> {
691        let mut state: EngineState = load_checkpoint(path)?;
692        state.stats.fixup_start_time();
693        self.algorithm.restore(&state.algorithm_state)?;
694        self.gene_pool = state.gene_pool;
695        self.budget = state.budget;
696        self.gene_stats = state.gene_stats;
697        self.fitness_history = state.fitness_history;
698        self.stagnation_counter = state.stagnation_counter;
699        self.request_count = state.request_count;
700        self.stats = state.stats;
701        // v2 fields — `#[serde(default)]` on EngineState means a v1
702        // checkpoint loads cleanly with empty corpus / next_id=0.
703        self.corpus = state.corpus;
704        self.next_id = state.next_id;
705        self.generation_evals = state.generation_evals;
706        Ok(())
707    }
708
709    /// Get per-gene success rates.
710    #[must_use]
711    pub fn gene_success_rates(&self) -> Vec<(&str, &str, f64)> {
712        crate::evolution::fitness::gene_success_rates(&self.gene_stats)
713    }
714
715    /// Get a human-readable summary.
716    #[must_use]
717    pub fn learned_summary(&self) -> String {
718        crate::evolution::fitness::learned_summary(
719            self.stats.generation,
720            self.algorithm.best(),
721            &self.gene_stats,
722            self.request_count,
723        )
724    }
725
726    /// Seed the underlying algorithm with an explicit population —
727    /// the public path callers use to warm-start search from a known
728    /// good corpus (or to inject a synthetic population from tests).
729    ///
730    /// Previously this method cloned `self.rng` before passing it to
731    /// `initialize`, so the engine's owned RNG was never advanced. Any
732    /// random draws made by `initialize` (e.g. MapElites grid placement,
733    /// initial mutation in SimulatedAnnealing) were "used up" in the
734    /// clone and the engine remained at the same RNG state — making two
735    /// successive `seed_population` calls (or a `seed_population` + an
736    /// `evolve`) produce identical random sequences and identical
737    /// chromosomes.
738    pub fn seed_population(&mut self, population: Vec<Chromosome>) {
739        self.algorithm
740            .initialize(population, &self.gene_pool, &mut self.rng);
741    }
742
743    /// Snapshot the algorithm's live population (test/diagnostic
744    /// surface). Population-based algorithms return their full pool;
745    /// single-state algorithms return the singleton current/best.
746    #[must_use]
747    pub fn population_snapshot(&self) -> Vec<Chromosome> {
748        self.algorithm.population_snapshot()
749    }
750
751    /// Population diversity in `[0.0, 1.0]` — drives adaptive mutation
752    /// pressure (see `crossover::diversity::adaptive_mutation_rate`).
753    ///
754    /// Strategy:
755    /// 1. Snapshot the algorithm's live population and union it with
756    ///    the engine's `in_flight` candidates.
757    /// 2. If `len() >= 2`, return mean pairwise gene-mismatch ratio
758    ///    via `crossover::diversity::diversity_score`.
759    /// 3. Otherwise (single-state algorithm with nothing in-flight),
760    ///    fall back to gene-pool exploration entropy from
761    ///    [`Self::gene_stats_diversity`] — measures how broadly the
762    ///    engine has *explored* the gene space rather than how varied
763    ///    the *current* population is. With no exploration history
764    ///    either, return 1.0 (max-safe default — keeps mutation
765    ///    pressure conservative on a fresh engine).
766    #[must_use]
767    pub fn diversity_score(&self) -> f64 {
768        let mut population = self.algorithm.population_snapshot();
769        for (_, chromosome, _) in self.in_flight.values() {
770            population.push(chromosome.clone());
771        }
772        if population.len() >= 2 {
773            return crate::evolution::crossover::diversity::diversity_score(&population);
774        }
775        let gene_div = self.gene_stats_diversity();
776        if gene_div > 0.0 { gene_div } else { 1.0 }
777    }
778
779    /// Shannon-entropy style diversity over the engine's per-gene
780    /// exploration history.
781    ///
782    /// For each unique gene name in `gene_stats`, computes the
783    /// normalised entropy of its value distribution weighted by
784    /// `attempts`. The per-gene entropies are averaged. Range
785    /// `[0.0, 1.0]`: 0.0 means we tried only one value for every
786    /// gene (no exploration), 1.0 means a uniform distribution
787    /// across the maximum-cardinality gene's value space.
788    ///
789    /// Useful as a fallback signal when the active search algorithm
790    /// is single-state (e.g. simulated annealing) and the population
791    /// snapshot is too small to give meaningful pairwise distance.
792    #[must_use]
793    pub fn gene_stats_diversity(&self) -> f64 {
794        if self.gene_stats.is_empty() {
795            return 0.0;
796        }
797        // Bucket per-gene attempt counts.
798        let mut by_gene: HashMap<&str, Vec<u32>> = HashMap::new();
799        for (name, _value, _successes, attempts) in &self.gene_stats {
800            if *attempts == 0 {
801                continue;
802            }
803            by_gene.entry(name.as_str()).or_default().push(*attempts);
804        }
805        if by_gene.is_empty() {
806            return 0.0;
807        }
808        let mut entropy_sum = 0.0_f64;
809        let mut counted = 0_usize;
810        for attempts in by_gene.values() {
811            let total: u64 = attempts.iter().map(|a| u64::from(*a)).sum();
812            if total == 0 || attempts.len() < 2 {
813                // Single value tried — zero entropy contribution. Still
814                // counted so the per-gene mean isn't biased by skipping.
815                counted += 1;
816                continue;
817            }
818            #[allow(clippy::cast_precision_loss)]
819            let total_f = total as f64;
820            let mut h = 0.0_f64;
821            for a in attempts {
822                #[allow(clippy::cast_precision_loss)]
823                let p = f64::from(*a) / total_f;
824                if p > 0.0 {
825                    h -= p * p.log2();
826                }
827            }
828            // Normalise by max entropy log2(k) where k is the number of
829            // distinct values tried for this gene. Falls in `[0, 1]`.
830            #[allow(clippy::cast_precision_loss)]
831            let h_max = (attempts.len() as f64).log2();
832            let normalised = if h_max > 0.0 { h / h_max } else { 0.0 };
833            entropy_sum += normalised;
834            counted += 1;
835        }
836        if counted == 0 {
837            0.0
838        } else {
839            #[allow(clippy::cast_precision_loss)]
840            let avg = entropy_sum / counted as f64;
841            avg.clamp(0.0, 1.0)
842        }
843    }
844}
845
846/// Serializable engine state.
847///
848/// Schema version 2 (2026-05-10) adds `corpus`, `next_id`, and
849/// `generation_evals` so a restored engine doesn't lose all of its
850/// bypass discoveries and doesn't reset its eval-id counter (which
851/// would collide with any in-flight evaluation that survived the
852/// crash).
853///
854/// What is intentionally NOT serialized:
855///   - `in_flight`: by definition transient; any pending eval at
856///     checkpoint time is lost on crash, but the corpus capture
857///     above means the *useful* bypasses are preserved.
858///   - `cache`: LRU cache of payload→verdict; recomputable.
859///   - `target_health`: runtime stats; resets on resume.
860///   - `checkpoint_path`: re-injected by the caller after load.
861///   - `pending_single`: legacy sequential API state, transient.
862///   - `rule_coverage`: runtime observation accumulator; resets on
863///     resume so each run produces an independent coverage report.
864///   - RNG state: search algorithms each capture their own RNG
865///     state inside `algorithm_state`; the engine-level rng is
866///     used only for `next_eval_id` minting and gene-pool sampling
867///     when the algorithm doesn't override.
868#[derive(Debug, Clone, Serialize, Deserialize)]
869pub struct EngineState {
870    pub algorithm_name: String,
871    pub algorithm_state: Vec<u8>,
872    pub gene_pool: GenePool,
873    pub rng_seed: u64,
874    pub budget: Budget,
875    pub gene_stats: Vec<(String, String, u32, u32)>,
876    pub fitness_history: VecDeque<f64>,
877    pub stagnation_counter: u32,
878    pub request_count: usize,
879    pub stats: SearchStats,
880    pub schema_version: u32,
881    /// Saved bypass discoveries — added in `schema_version` 2.
882    /// Defaults to empty for v1 checkpoints loaded by a v2 engine.
883    #[serde(default)]
884    pub corpus: BypassCorpus,
885    /// Next `eval_id` to mint — added in `schema_version` 2 so a
886    /// restored engine doesn't recycle IDs that may collide with
887    /// any in-flight evaluation that survived the crash.
888    #[serde(default)]
889    pub next_id: u64,
890    /// Evaluations issued in the current generation — added in v2.
891    #[serde(default)]
892    pub generation_evals: usize,
893}
894
895use serde::{Deserialize, Serialize};
wafrift_evolution/evolution/engine.rs

wafrift_evolution/evolution/
engine.rs