Skip to main content

radiate_engines/steps/
audit.rs

1use crate::steps::EngineStep;
2use radiate_core::{
3    Chromosome, Ecosystem, Lineage, Metric, MetricSet, Objective, metric_names,
4    phenotype::PhenotypeId,
5};
6use radiate_error::Result;
7use radiate_utils::intern;
8use std::{
9    cmp::Ordering,
10    collections::HashSet,
11    sync::{Arc, RwLock},
12};
13
14const EPS: f32 = 1e-9;
15
16#[derive(Default)]
17pub struct AuditStep {
18    objective: Objective,
19    lineage: Arc<RwLock<Lineage>>,
20    score_distribution: Vec<Vec<f32>>,
21    unique_score_work: Vec<Vec<f32>>,
22    age_distribution: Vec<usize>,
23    seen_ids: HashSet<PhenotypeId>,
24    last_gen_ids: HashSet<PhenotypeId>,
25}
26
27impl AuditStep {
28    pub fn new(objective: Objective, lineage: Arc<RwLock<Lineage>>) -> Self {
29        Self {
30            objective,
31            lineage,
32            ..Default::default()
33        }
34    }
35}
36
37impl AuditStep {
38    #[inline]
39    fn calc_species_metrics<C: Chromosome>(
40        generation: usize,
41        metrics: &mut MetricSet,
42        ecosystem: &Ecosystem<C>,
43    ) {
44        if let Some(species) = ecosystem.species() {
45            let mut new_species_count = 0;
46            let mut species_ages = Vec::with_capacity(species.len());
47            let mut species_size = Vec::with_capacity(species.len());
48
49            let pop_len = ecosystem.population().len().max(1);
50
51            let mut max_size = 0;
52            let mut size_sum = 0;
53
54            let mut size_vec = Vec::with_capacity(species.len());
55
56            for spec in species.iter() {
57                let spec_age = spec.age(generation);
58
59                if spec_age == 0 {
60                    new_species_count += 1;
61                }
62
63                let len = spec.len();
64
65                species_ages.push(spec_age);
66                species_size.push(len);
67
68                max_size = max_size.max(len);
69                size_sum += len;
70                size_vec.push(len);
71            }
72
73            // Largest species share (how dominant is the biggest species)
74            let largest_share = if pop_len > 0 {
75                max_size as f32 / pop_len as f32
76            } else {
77                0.0
78            };
79
80            let mut largest_share_metric = Metric::new(metric_names::LARGEST_SPECIES_SHARE);
81            largest_share_metric.apply_update(largest_share);
82
83            // Species evenness via normalized Shannon entropy
84            let mut evenness = 0.0_f32;
85            let s_count = species.len();
86            if s_count > 1 && size_sum > 0 {
87                let size_sum_f = size_sum as f32;
88                let mut h = 0.0_f32;
89                for sz in size_vec {
90                    if sz > 0 {
91                        let p = sz as f32 / size_sum_f;
92                        h -= p * p.ln();
93                    }
94                }
95                let h_max = (s_count as f32).ln();
96                if h_max > 0.0 {
97                    evenness = h / h_max;
98                }
99            }
100
101            // Species churn ratio: new species / total species
102            let churn_ratio = if s_count > 0 {
103                new_species_count as f32 / s_count as f32
104            } else {
105                0.0
106            };
107            let mut churn_metric = Metric::new(metric_names::SPECIES_NEW_RATIO);
108            churn_metric.apply_update(churn_ratio);
109
110            metrics.upsert((metric_names::SPECIES_AGE, &species_ages));
111            metrics.upsert((metric_names::SPECIES_SIZE, &species_size));
112            metrics.upsert((metric_names::SPECIES_COUNT, species.len()));
113            metrics.upsert((metric_names::SPECIES_CREATED, new_species_count));
114            metrics.upsert((metric_names::SPECIES_EVENNESS, evenness));
115            metrics.upsert((metric_names::SPECIES_NEW_RATIO, churn_ratio));
116            metrics.upsert(largest_share_metric);
117            metrics.upsert(churn_metric);
118        } else {
119            let population_unique_rc_count = ecosystem.population().shared_count();
120            assert_eq!(
121                population_unique_rc_count, 0,
122                "Ecosystem has no species, but population has {} non-unique ptrs",
123                population_unique_rc_count
124            );
125        }
126    }
127
128    #[inline]
129    fn calc_membership_metrics<C: Chromosome>(
130        &mut self,
131        metrics: &mut MetricSet,
132        ecosystem: &Ecosystem<C>,
133    ) {
134        let mut curr_ids = HashSet::with_capacity(ecosystem.population().len());
135        for p in ecosystem.population().iter() {
136            curr_ids.insert(p.id());
137        }
138
139        let pop_len = curr_ids.len();
140
141        let new_this_gen = curr_ids.difference(&self.seen_ids).count();
142        let survivor_count = curr_ids.intersection(&self.last_gen_ids).count();
143
144        let carryover_rate = if pop_len > 0 {
145            survivor_count as f32 / pop_len as f32
146        } else {
147            0.0
148        };
149
150        self.seen_ids.extend(curr_ids.iter().copied());
151        drop(std::mem::replace(&mut self.last_gen_ids, curr_ids));
152
153        metrics.upsert((metric_names::CARRYOVER_RATE, carryover_rate));
154        metrics.upsert((metric_names::NEW_CHILDREN, new_this_gen));
155        metrics.upsert((metric_names::SURVIVOR_COUNT, survivor_count));
156    }
157
158    #[inline]
159    fn calc_derived_metrics<C: Chromosome>(
160        _: usize,
161        metrics: &mut MetricSet,
162        ecosystem: &Ecosystem<C>,
163    ) {
164        let pop_len = ecosystem.population().len() as f32;
165        // Will only compute for single-objective for now
166        if let Some(scores) = metrics.get(metric_names::SCORES) {
167            let score_coeff = match (scores.value_std_dev(), scores.value_mean()) {
168                (Some(std_dev), Some(mean)) if mean != 0.0 => std_dev / mean,
169                _ => 0.0,
170            };
171
172            metrics.upsert((metric_names::SCORE_VOLATILITY, score_coeff));
173        }
174
175        let diversity_ratio = if ecosystem.population().len() > 0 {
176            metrics
177                .get(metric_names::UNIQUE_MEMBERS)
178                .map(|m| m.last_value() / pop_len)
179                .unwrap_or(0.0)
180        } else {
181            0.0
182        };
183
184        metrics.upsert((metric_names::DIVERSITY_RATIO, diversity_ratio));
185    }
186
187    fn clear_state(&mut self) {
188        self.age_distribution.clear();
189
190        let dims = self.objective.dims();
191        if self.score_distribution.len() < dims {
192            self.score_distribution.resize_with(dims, Vec::new);
193        }
194        if self.unique_score_work.len() < dims {
195            self.unique_score_work.resize_with(dims, Vec::new);
196        }
197
198        for v in &mut self.score_distribution {
199            v.clear();
200        }
201
202        for v in &mut self.unique_score_work {
203            v.clear();
204        }
205    }
206
207    fn calc_lineage_metrics<C: Chromosome>(
208        _: usize,
209        metrics: &mut MetricSet,
210        ecosystem: &Ecosystem<C>,
211        lineage: &Lineage,
212    ) {
213        let stats = lineage.stats();
214        let parent_usage = &stats.parent_usage;
215        let family_usage = &stats.family_usage;
216        let family_pairs = &stats.family_pairs;
217
218        let family_pair_dist = family_pairs.values().copied().collect::<Vec<usize>>();
219        let parent_usage_dist = parent_usage.values().cloned().collect::<Vec<usize>>();
220        let family_usage_dist = family_usage.values().cloned().collect::<Vec<usize>>();
221
222        let pair_entropy = normalized_shannon_entropy(&family_pair_dist);
223        let pair_unique = family_pair_dist.iter().filter(|&&c| c > 0).count();
224        let top1_pair_share = topk_share(family_pair_dist.clone(), 1);
225
226        metrics.upsert((
227            metric_names::LINEAGE_PARENTS_USED_UNIQUE,
228            parent_usage.len(),
229        ));
230        metrics.upsert((
231            metric_names::LINEAGE_PARENTS_USED_RATIO,
232            if parent_usage.len() > 0 {
233                parent_usage.len() as f32 / ecosystem.population().len() as f32
234            } else {
235                0.0
236            },
237        ));
238
239        metrics.upsert((metric_names::ALTER_PARENT_REUSE, &parent_usage_dist));
240        metrics.upsert((metric_names::ALTER_WITHIN_FAMILY, &family_usage_dist));
241        metrics.upsert((metric_names::ALTER_CROSS_FAMILY, &family_pair_dist));
242        metrics.upsert((metric_names::LINEAGE_EVENTS, stats.updates));
243        metrics.upsert((metric_names::LINEAGE_FAMILY_PAIR_ENTROPY, pair_entropy));
244        metrics.upsert((metric_names::LINEAGE_TOP1_PAIR_SHARE, top1_pair_share));
245        metrics.upsert((metric_names::LINEAGE_FAMILY_PAIR_UNIQUE, pair_unique));
246    }
247}
248
249impl<C: Chromosome> EngineStep<C> for AuditStep {
250    #[inline]
251    fn execute(
252        &mut self,
253        generation: usize,
254        ecosystem: &mut Ecosystem<C>,
255        metrics: &mut MetricSet,
256    ) -> Result<()> {
257        self.clear_state();
258
259        {
260            let lineage = self.lineage.read().unwrap();
261            Self::calc_lineage_metrics(generation, metrics, ecosystem, &lineage);
262        }
263
264        let pop = ecosystem.population();
265        let n = pop.len();
266        let dims = self.objective.dims();
267
268        for i in 0..dims {
269            self.score_distribution
270                .get_mut(i)
271                .map(|v| v.reserve_exact(n));
272            self.unique_score_work
273                .get_mut(i)
274                .map(|v| v.reserve_exact(n));
275        }
276
277        let mut size_metric = Vec::with_capacity(n);
278        let mut unique_members = HashSet::with_capacity(n);
279
280        for p in pop.iter() {
281            unique_members.insert(p.id());
282
283            self.age_distribution.push(p.age(generation));
284
285            let geno_size = p
286                .genotype()
287                .iter()
288                .map(|chromosome| chromosome.len())
289                .sum::<usize>();
290            size_metric.push(geno_size);
291
292            if let Some(score) = p.score() {
293                for (idx, val) in score.iter().enumerate() {
294                    self.score_distribution[idx].push(*val);
295                    self.unique_score_work[idx].push(*val);
296                }
297            }
298        }
299
300        for vec in &mut self.unique_score_work {
301            vec.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
302        }
303
304        for (idx, v) in self.unique_score_work.iter().enumerate() {
305            let mut unique_count = 0;
306            let mut last: Option<f32> = None;
307            for val in v {
308                if last.map(|l| (l - val).abs() > EPS).unwrap_or(true) {
309                    unique_count += 1;
310                    last = Some(*val);
311                }
312            }
313
314            let metric_name = if self.objective.is_single() {
315                metric_names::UNIQUE_SCORES
316            } else {
317                intern!(format!("{}_{}", metric_names::UNIQUE_SCORES, idx))
318            };
319            metrics.upsert((metric_name, unique_count));
320        }
321
322        if !self.score_distribution.is_empty() {
323            if self.objective.is_single() {
324                metrics.upsert((metric_names::SCORES, &self.score_distribution[0]));
325            } else {
326                for (idx, vec) in self.score_distribution.iter().enumerate() {
327                    let metric_name = intern!(format!("{}_{}", metric_names::SCORES, idx));
328                    metrics.upsert((metric_name, vec));
329                }
330            }
331        }
332
333        metrics.upsert((metric_names::AGE, &self.age_distribution));
334        metrics.upsert((metric_names::GENOME_SIZE, &size_metric));
335        metrics.upsert((metric_names::UNIQUE_MEMBERS, unique_members.len()));
336
337        self.calc_membership_metrics(metrics, ecosystem);
338        Self::calc_species_metrics(generation, metrics, ecosystem);
339        Self::calc_derived_metrics(generation, metrics, ecosystem);
340
341        Ok(())
342    }
343}
344
345fn topk_share(mut counts: Vec<usize>, k: usize) -> f32 {
346    if counts.is_empty() {
347        return 0.0;
348    }
349    counts.sort_unstable_by(|a, b| b.cmp(a));
350    let total: usize = counts.iter().sum();
351    if total == 0 {
352        return 0.0;
353    }
354    let take = counts.into_iter().take(k).sum::<usize>();
355    take as f32 / total as f32
356}
357
358fn normalized_shannon_entropy(counts: &[usize]) -> f32 {
359    let total: usize = counts.iter().sum();
360    if total == 0 {
361        return 0.0;
362    }
363
364    let total_f = total as f32;
365    let mut h = 0.0f32;
366    let mut k = 0usize;
367
368    for &c in counts {
369        if c == 0 {
370            continue;
371        }
372        k += 1;
373        let p = c as f32 / total_f;
374        h -= p * p.ln();
375    }
376
377    if k <= 1 {
378        return 0.0;
379    }
380    let h_max = (k as f32).ln();
381    if h_max <= 0.0 { 0.0 } else { h / h_max }
382}