Skip to main content

wafrift_evolution/search/
novelty.rs

1use crate::evolution::crossover::mutation::mutate_with_log;
2use crate::evolution::{Chromosome, GenePool, population::random_chromosome};
3use crate::lineage::Lineage;
4use crate::search::{EvalCandidate, SearchAlgorithm};
5use crate::types::{Budget, EvolutionError, OracleVerdict, SearchStats};
6use rand::Rng;
7use rand::rngs::StdRng;
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10
11/// Novelty search with k-NN behavioral distance archive.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct NoveltySearch {
14    population: Vec<Chromosome>,
15    archive: Vec<Chromosome>,
16    gene_pool: GenePool,
17    generation: u32,
18    eval_counter: u64,
19    k: usize,
20    threshold: f64,
21    #[serde(skip)]
22    in_flight: HashMap<u64, Chromosome>,
23}
24
25impl NoveltySearch {
26    #[must_use]
27    pub fn new(k: usize, threshold: f64) -> Self {
28        Self {
29            population: Vec::new(),
30            archive: Vec::new(),
31            gene_pool: GenePool::default_wafrift(),
32            generation: 0,
33            eval_counter: 0,
34            k,
35            threshold,
36            in_flight: HashMap::new(),
37        }
38    }
39
40    fn phenotypic_distance(a: &Chromosome, b: &Chromosome) -> f64 {
41        let genes_a: Vec<_> = a.genes.iter().map(|(n, v)| format!("{n}={v}")).collect();
42        let genes_b: Vec<_> = b.genes.iter().map(|(n, v)| format!("{n}={v}")).collect();
43        levenshtein_distance(&genes_a.join("|"), &genes_b.join("|")) as f64
44            / (genes_a.len().max(genes_b.len()).max(1) as f64)
45    }
46
47    fn novelty_score(&self, chromosome: &Chromosome) -> f64 {
48        let mut neighbors: Vec<f64> = self
49            .archive
50            .iter()
51            .chain(self.population.iter())
52            .map(|other| Self::phenotypic_distance(chromosome, other))
53            .collect();
54        neighbors.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
55        neighbors.truncate(self.k);
56        if neighbors.is_empty() {
57            return f64::INFINITY;
58        }
59        neighbors.iter().sum::<f64>() / neighbors.len() as f64
60    }
61
62    fn generate_individual(&self, rng: &mut StdRng) -> Chromosome {
63        if self.population.is_empty() {
64            return random_chromosome(&self.gene_pool, rng);
65        }
66        let parent = &self.population[rng.gen_range(0..self.population.len())];
67        let mut child = parent.clone();
68        let log = mutate_with_log(&mut child, &self.gene_pool, 0.3, rng);
69        child.lineage = Lineage::mutation(parent, log, self.generation);
70        child
71    }
72}
73
74impl Default for NoveltySearch {
75    fn default() -> Self {
76        Self::new(15, 0.3)
77    }
78}
79
80impl SearchAlgorithm for NoveltySearch {
81    fn name(&self) -> &'static str {
82        "novelty_search"
83    }
84
85    fn initialize(&mut self, population: Vec<Chromosome>, gene_pool: &GenePool, _rng: &mut StdRng) {
86        self.gene_pool = gene_pool.clone();
87        self.population = population;
88        self.archive.clear();
89        self.in_flight.clear();
90    }
91
92    fn request_evaluations(&mut self, n: usize, rng: &mut StdRng) -> Vec<EvalCandidate> {
93        let mut out = Vec::with_capacity(n);
94        for _ in 0..n {
95            self.eval_counter += 1;
96            let candidate = self.generate_individual(rng);
97            self.in_flight.insert(self.eval_counter, candidate.clone());
98            out.push(EvalCandidate {
99                id: self.eval_counter,
100                chromosome: candidate,
101            });
102        }
103        out
104    }
105
106    fn submit_evaluations(&mut self, results: Vec<(u64, OracleVerdict)>) {
107        let mut evaluated: Vec<Chromosome> = Vec::with_capacity(results.len());
108        for (id, verdict) in results {
109            if let Some(mut candidate) = self.in_flight.remove(&id) {
110                candidate.record_verdict(&verdict);
111                evaluated.push(candidate);
112            }
113        }
114
115        // Add to archive based on novelty. Cap the archive at 10_000
116        // to prevent unbounded growth on long-running scans (every
117        // novel candidate would otherwise stay alive forever, leaking
118        // memory until OOM). When full, evict the least-novel entry
119        // by score so the highest-novelty history is retained.
120        const ARCHIVE_CAP: usize = 10_000;
121        for candidate in evaluated {
122            let score = self.novelty_score(&candidate);
123            if score > self.threshold {
124                if self.archive.len() >= ARCHIVE_CAP
125                    && let Some((min_idx, _)) = self
126                        .archive
127                        .iter()
128                        .enumerate()
129                        .map(|(i, c)| (i, self.novelty_score(c)))
130                        .min_by(|(_, a), (_, b)| {
131                            a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)
132                        })
133                {
134                    self.archive.swap_remove(min_idx);
135                }
136                self.archive.push(candidate.clone());
137            }
138            self.population.push(candidate);
139        }
140
141        // Cull population to reasonable size, keeping most novel
142        if self.population.len() > 100 {
143            let temp: Vec<Chromosome> = self.population.drain(..).collect();
144            let mut scored: Vec<(f64, Chromosome)> = temp
145                .into_iter()
146                .map(|c| (self.novelty_score(&c), c))
147                .collect();
148            scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
149            scored.truncate(100);
150            self.population = scored.into_iter().map(|(_, c)| c).collect();
151        }
152
153        self.generation += 1;
154    }
155
156    fn should_terminate(&self, stats: &SearchStats, budget: &Budget) -> bool {
157        stats.evaluations >= budget.max_requests
158            || stats.generation >= budget.max_generations
159            || stats.stagnation_counter >= budget.stagnation_limit
160    }
161
162    fn best(&self) -> Option<&Chromosome> {
163        self.population
164            .iter()
165            .chain(self.archive.iter())
166            .max_by(|a, b| {
167                a.fitness
168                    .partial_cmp(&b.fitness)
169                    .unwrap_or(std::cmp::Ordering::Equal)
170            })
171    }
172
173    fn checkpoint(&self) -> Result<Vec<u8>, EvolutionError> {
174        serde_json::to_vec(self).map_err(|e| EvolutionError::SerializationFailed(e.to_string()))
175    }
176
177    fn restore(&mut self, bytes: &[u8]) -> Result<(), EvolutionError> {
178        *self = serde_json::from_slice(bytes)
179            .map_err(|e| EvolutionError::DeserializationFailed(e.to_string()))?;
180        self.in_flight.clear();
181        Ok(())
182    }
183}
184
185fn levenshtein_distance(a: &str, b: &str) -> usize {
186    let a_chars: Vec<char> = a.chars().collect();
187    let b_chars: Vec<char> = b.chars().collect();
188    let mut prev = vec![0; b_chars.len() + 1];
189    let mut curr = vec![0; b_chars.len() + 1];
190    for (j, slot) in prev.iter_mut().enumerate().take(b_chars.len() + 1) {
191        *slot = j;
192    }
193    for i in 1..=a_chars.len() {
194        curr[0] = i;
195        for j in 1..=b_chars.len() {
196            let cost = if a_chars[i - 1] == b_chars[j - 1] {
197                0
198            } else {
199                1
200            };
201            curr[j] = (curr[j - 1] + 1).min(prev[j] + 1).min(prev[j - 1] + cost);
202        }
203        std::mem::swap(&mut prev, &mut curr);
204    }
205    prev[b_chars.len()]
206}