1use crate::steps::EngineStep;
2use radiate_core::{
3 Chromosome, Ecosystem, Lineage, Metric, MetricSet, Objective, metric_names,
4 phenotype::PhenotypeId,
5};
6use radiate_error::Result;
7use radiate_utils::intern;
8use std::{
9 cmp::Ordering,
10 collections::HashSet,
11 sync::{Arc, RwLock},
12};
13
14const EPS: f32 = 1e-9;
15
16#[derive(Default)]
17pub struct AuditStep {
18 objective: Objective,
19 lineage: Arc<RwLock<Lineage>>,
20 score_distribution: Vec<Vec<f32>>,
21 unique_score_work: Vec<Vec<f32>>,
22 age_distribution: Vec<usize>,
23 seen_ids: HashSet<PhenotypeId>,
24 last_gen_ids: HashSet<PhenotypeId>,
25}
26
27impl AuditStep {
28 pub fn new(objective: Objective, lineage: Arc<RwLock<Lineage>>) -> Self {
29 Self {
30 objective,
31 lineage,
32 ..Default::default()
33 }
34 }
35}
36
37impl AuditStep {
38 #[inline]
39 fn calc_species_metrics<C: Chromosome>(
40 generation: usize,
41 metrics: &mut MetricSet,
42 ecosystem: &Ecosystem<C>,
43 ) {
44 if let Some(species) = ecosystem.species() {
45 let mut new_species_count = 0;
46 let mut species_ages = Vec::with_capacity(species.len());
47 let mut species_size = Vec::with_capacity(species.len());
48
49 let pop_len = ecosystem.population().len().max(1);
50
51 let mut max_size = 0;
52 let mut size_sum = 0;
53
54 let mut size_vec = Vec::with_capacity(species.len());
55
56 for spec in species.iter() {
57 let spec_age = spec.age(generation);
58
59 if spec_age == 0 {
60 new_species_count += 1;
61 }
62
63 let len = spec.len();
64
65 species_ages.push(spec_age);
66 species_size.push(len);
67
68 max_size = max_size.max(len);
69 size_sum += len;
70 size_vec.push(len);
71 }
72
73 let largest_share = if pop_len > 0 {
75 max_size as f32 / pop_len as f32
76 } else {
77 0.0
78 };
79
80 let mut largest_share_metric = Metric::new(metric_names::LARGEST_SPECIES_SHARE);
81 largest_share_metric.apply_update(largest_share);
82
83 let mut evenness = 0.0_f32;
85 let s_count = species.len();
86 if s_count > 1 && size_sum > 0 {
87 let size_sum_f = size_sum as f32;
88 let mut h = 0.0_f32;
89 for sz in size_vec {
90 if sz > 0 {
91 let p = sz as f32 / size_sum_f;
92 h -= p * p.ln();
93 }
94 }
95 let h_max = (s_count as f32).ln();
96 if h_max > 0.0 {
97 evenness = h / h_max;
98 }
99 }
100
101 let churn_ratio = if s_count > 0 {
103 new_species_count as f32 / s_count as f32
104 } else {
105 0.0
106 };
107 let mut churn_metric = Metric::new(metric_names::SPECIES_NEW_RATIO);
108 churn_metric.apply_update(churn_ratio);
109
110 metrics.upsert((metric_names::SPECIES_AGE, &species_ages));
111 metrics.upsert((metric_names::SPECIES_SIZE, &species_size));
112 metrics.upsert((metric_names::SPECIES_COUNT, species.len()));
113 metrics.upsert((metric_names::SPECIES_CREATED, new_species_count));
114 metrics.upsert((metric_names::SPECIES_EVENNESS, evenness));
115 metrics.upsert((metric_names::SPECIES_NEW_RATIO, churn_ratio));
116 metrics.upsert(largest_share_metric);
117 metrics.upsert(churn_metric);
118 } else {
119 let population_unique_rc_count = ecosystem.population().shared_count();
120 assert_eq!(
121 population_unique_rc_count, 0,
122 "Ecosystem has no species, but population has {} non-unique ptrs",
123 population_unique_rc_count
124 );
125 }
126 }
127
128 #[inline]
129 fn calc_membership_metrics<C: Chromosome>(
130 &mut self,
131 metrics: &mut MetricSet,
132 ecosystem: &Ecosystem<C>,
133 ) {
134 let mut curr_ids = HashSet::with_capacity(ecosystem.population().len());
135 for p in ecosystem.population().iter() {
136 curr_ids.insert(p.id());
137 }
138
139 let pop_len = curr_ids.len();
140
141 let new_this_gen = curr_ids.difference(&self.seen_ids).count();
142 let survivor_count = curr_ids.intersection(&self.last_gen_ids).count();
143
144 let carryover_rate = if pop_len > 0 {
145 survivor_count as f32 / pop_len as f32
146 } else {
147 0.0
148 };
149
150 self.seen_ids.extend(curr_ids.iter().copied());
151 drop(std::mem::replace(&mut self.last_gen_ids, curr_ids));
152
153 metrics.upsert((metric_names::CARRYOVER_RATE, carryover_rate));
154 metrics.upsert((metric_names::NEW_CHILDREN, new_this_gen));
155 metrics.upsert((metric_names::SURVIVOR_COUNT, survivor_count));
156 }
157
158 #[inline]
159 fn calc_derived_metrics<C: Chromosome>(
160 _: usize,
161 metrics: &mut MetricSet,
162 ecosystem: &Ecosystem<C>,
163 ) {
164 let pop_len = ecosystem.population().len() as f32;
165 if let Some(scores) = metrics.get(metric_names::SCORES) {
167 let score_coeff = match (scores.value_std_dev(), scores.value_mean()) {
168 (Some(std_dev), Some(mean)) if mean != 0.0 => std_dev / mean,
169 _ => 0.0,
170 };
171
172 metrics.upsert((metric_names::SCORE_VOLATILITY, score_coeff));
173 }
174
175 let diversity_ratio = if ecosystem.population().len() > 0 {
176 metrics
177 .get(metric_names::UNIQUE_MEMBERS)
178 .map(|m| m.last_value() / pop_len)
179 .unwrap_or(0.0)
180 } else {
181 0.0
182 };
183
184 metrics.upsert((metric_names::DIVERSITY_RATIO, diversity_ratio));
185 }
186
187 fn clear_state(&mut self) {
188 self.age_distribution.clear();
189
190 let dims = self.objective.dims();
191 if self.score_distribution.len() < dims {
192 self.score_distribution.resize_with(dims, Vec::new);
193 }
194 if self.unique_score_work.len() < dims {
195 self.unique_score_work.resize_with(dims, Vec::new);
196 }
197
198 for v in &mut self.score_distribution {
199 v.clear();
200 }
201
202 for v in &mut self.unique_score_work {
203 v.clear();
204 }
205 }
206
207 fn calc_lineage_metrics<C: Chromosome>(
208 _: usize,
209 metrics: &mut MetricSet,
210 ecosystem: &Ecosystem<C>,
211 lineage: &Lineage,
212 ) {
213 let stats = lineage.stats();
214 let parent_usage = &stats.parent_usage;
215 let family_usage = &stats.family_usage;
216 let family_pairs = &stats.family_pairs;
217
218 let family_pair_dist = family_pairs.values().copied().collect::<Vec<usize>>();
219 let parent_usage_dist = parent_usage.values().cloned().collect::<Vec<usize>>();
220 let family_usage_dist = family_usage.values().cloned().collect::<Vec<usize>>();
221
222 let pair_entropy = normalized_shannon_entropy(&family_pair_dist);
223 let pair_unique = family_pair_dist.iter().filter(|&&c| c > 0).count();
224 let top1_pair_share = topk_share(family_pair_dist.clone(), 1);
225
226 metrics.upsert((
227 metric_names::LINEAGE_PARENTS_USED_UNIQUE,
228 parent_usage.len(),
229 ));
230 metrics.upsert((
231 metric_names::LINEAGE_PARENTS_USED_RATIO,
232 if parent_usage.len() > 0 {
233 parent_usage.len() as f32 / ecosystem.population().len() as f32
234 } else {
235 0.0
236 },
237 ));
238
239 metrics.upsert((metric_names::ALTER_PARENT_REUSE, &parent_usage_dist));
240 metrics.upsert((metric_names::ALTER_WITHIN_FAMILY, &family_usage_dist));
241 metrics.upsert((metric_names::ALTER_CROSS_FAMILY, &family_pair_dist));
242 metrics.upsert((metric_names::LINEAGE_EVENTS, stats.updates));
243 metrics.upsert((metric_names::LINEAGE_FAMILY_PAIR_ENTROPY, pair_entropy));
244 metrics.upsert((metric_names::LINEAGE_TOP1_PAIR_SHARE, top1_pair_share));
245 metrics.upsert((metric_names::LINEAGE_FAMILY_PAIR_UNIQUE, pair_unique));
246 }
247}
248
249impl<C: Chromosome> EngineStep<C> for AuditStep {
250 #[inline]
251 fn execute(
252 &mut self,
253 generation: usize,
254 ecosystem: &mut Ecosystem<C>,
255 metrics: &mut MetricSet,
256 ) -> Result<()> {
257 self.clear_state();
258
259 {
260 let lineage = self.lineage.read().unwrap();
261 Self::calc_lineage_metrics(generation, metrics, ecosystem, &lineage);
262 }
263
264 let pop = ecosystem.population();
265 let n = pop.len();
266 let dims = self.objective.dims();
267
268 for i in 0..dims {
269 self.score_distribution
270 .get_mut(i)
271 .map(|v| v.reserve_exact(n));
272 self.unique_score_work
273 .get_mut(i)
274 .map(|v| v.reserve_exact(n));
275 }
276
277 let mut size_metric = Vec::with_capacity(n);
278 let mut unique_members = HashSet::with_capacity(n);
279
280 for p in pop.iter() {
281 unique_members.insert(p.id());
282
283 self.age_distribution.push(p.age(generation));
284
285 let geno_size = p
286 .genotype()
287 .iter()
288 .map(|chromosome| chromosome.len())
289 .sum::<usize>();
290 size_metric.push(geno_size);
291
292 if let Some(score) = p.score() {
293 for (idx, val) in score.iter().enumerate() {
294 self.score_distribution[idx].push(*val);
295 self.unique_score_work[idx].push(*val);
296 }
297 }
298 }
299
300 for vec in &mut self.unique_score_work {
301 vec.sort_unstable_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
302 }
303
304 for (idx, v) in self.unique_score_work.iter().enumerate() {
305 let mut unique_count = 0;
306 let mut last: Option<f32> = None;
307 for val in v {
308 if last.map(|l| (l - val).abs() > EPS).unwrap_or(true) {
309 unique_count += 1;
310 last = Some(*val);
311 }
312 }
313
314 let metric_name = if self.objective.is_single() {
315 metric_names::UNIQUE_SCORES
316 } else {
317 intern!(format!("{}_{}", metric_names::UNIQUE_SCORES, idx))
318 };
319 metrics.upsert((metric_name, unique_count));
320 }
321
322 if !self.score_distribution.is_empty() {
323 if self.objective.is_single() {
324 metrics.upsert((metric_names::SCORES, &self.score_distribution[0]));
325 } else {
326 for (idx, vec) in self.score_distribution.iter().enumerate() {
327 let metric_name = intern!(format!("{}_{}", metric_names::SCORES, idx));
328 metrics.upsert((metric_name, vec));
329 }
330 }
331 }
332
333 metrics.upsert((metric_names::AGE, &self.age_distribution));
334 metrics.upsert((metric_names::GENOME_SIZE, &size_metric));
335 metrics.upsert((metric_names::UNIQUE_MEMBERS, unique_members.len()));
336
337 self.calc_membership_metrics(metrics, ecosystem);
338 Self::calc_species_metrics(generation, metrics, ecosystem);
339 Self::calc_derived_metrics(generation, metrics, ecosystem);
340
341 Ok(())
342 }
343}
344
345fn topk_share(mut counts: Vec<usize>, k: usize) -> f32 {
346 if counts.is_empty() {
347 return 0.0;
348 }
349 counts.sort_unstable_by(|a, b| b.cmp(a));
350 let total: usize = counts.iter().sum();
351 if total == 0 {
352 return 0.0;
353 }
354 let take = counts.into_iter().take(k).sum::<usize>();
355 take as f32 / total as f32
356}
357
358fn normalized_shannon_entropy(counts: &[usize]) -> f32 {
359 let total: usize = counts.iter().sum();
360 if total == 0 {
361 return 0.0;
362 }
363
364 let total_f = total as f32;
365 let mut h = 0.0f32;
366 let mut k = 0usize;
367
368 for &c in counts {
369 if c == 0 {
370 continue;
371 }
372 k += 1;
373 let p = c as f32 / total_f;
374 h -= p * p.ln();
375 }
376
377 if k <= 1 {
378 return 0.0;
379 }
380 let h_max = (k as f32).ln();
381 if h_max <= 0.0 { 0.0 } else { h / h_max }
382}