radiate_core/domain/math/distribution.rs
1use num_traits::ToPrimitive;
2use radiate_utils::Primitive;
3
4pub struct DistShape {
5 /// Number of distinct values (richness).
6 pub unique: usize,
7 /// Pielou evenness of the value frequencies in `[0, 1]` — how uniformly the
8 /// population is spread across distinct fitness values. Low evenness means
9 /// the population has piled onto a few scores (premature convergence).
10 pub evenness: f32,
11 /// Gini coefficient of the distribution in `[0, 1]` — fitness inequality /
12 /// selection pressure. 0 = every member shares the same score, → 1 = one
13 /// member dominates.
14 pub gini: f32,
15}
16
17/// Single descending-cost pass over an **ascending-sorted** slice computing its
18/// richness, Pielou evenness, and Gini coefficient together. `sorted` must be
19/// sorted ascending; callers already sort for the unique-count walk.
20/// O(m) time, O(1) extra space.
21///
22/// The input values are generic over [`Primitive`], but the running statistics
23/// are accumulated in `f32` (each value is converted once via [`Primitive::extract`]).
24#[inline]
25pub fn shape<P: Primitive>(sorted: &[P]) -> DistShape {
26 let m = sorted.len();
27 if m == 0 {
28 return DistShape {
29 unique: 0,
30 evenness: 0.0,
31 gini: 0.0,
32 };
33 }
34
35 // Gini is only defined for non-negative quantities; if the distribution
36 // dips below zero (e.g. minimization or signed fitness), shift it up so its
37 // floor sits at zero. This keeps the standard formula well-defined across
38 // arbitrary fitness scales at the cost of making it translation-invariant.
39 // `safe_sub` rather than `-sorted[0]` because `Primitive` (which includes
40 // unsigned ints) carries no `Neg` bound; the branch is unreachable for them.
41 let shift = if sorted[0] < P::ZERO {
42 P::ZERO.safe_sub(sorted[0])
43 } else {
44 P::ZERO
45 };
46
47 let total = m as f32;
48 let mut unique = 0_f32;
49 let mut entropy = 0_f32; // Shannon entropy of the value frequencies
50 let mut run_len = 0_f32; // length of the current run of equal values
51 let mut last: Option<P> = None;
52
53 let mut gini_sum = 0.0_f32; // Σ (xᵢ + shift)
54 let mut gini_weighted = 0.0_f32; // Σ (i+1)(xᵢ + shift), 1-indexed
55
56 for (i, &val) in sorted.iter().enumerate() {
57 match last {
58 Some(l) if l.is_equal(val) => run_len += 1.0,
59 _ => {
60 if run_len > 0.0 {
61 let p = run_len / total;
62 entropy -= p * p.ln();
63 }
64
65 unique += 1.0;
66 run_len = 1.0;
67 last = Some(val);
68 }
69 }
70
71 let x = val.safe_add(shift).extract::<f32>().unwrap_or(0.0);
72 gini_sum += x;
73 gini_weighted += (i as f32 + 1.0) * x;
74 }
75
76 if run_len > 0.0 {
77 let p = run_len / total;
78 entropy -= p * p.ln();
79 }
80
81 let evenness = if unique > 1.0 {
82 // Bounded by 1.0 in theory; clamp away f32 rounding overshoot.
83 (entropy / unique.ln()).min(1.0)
84 } else {
85 0.0
86 };
87
88 let gini = if gini_sum > 0.0 {
89 ((2.0 * gini_weighted) / (total * gini_sum) - (total + 1.0) / total).max(0.0)
90 } else {
91 0.0
92 };
93
94 DistShape {
95 unique: unique as usize,
96 evenness,
97 gini,
98 }
99}
100
101/// Pielou evenness of a categorical distribution given each category's weight
102/// (e.g. species member counts, histogram bin counts, fitness shares), in
103/// `[0, 1]`.
104///
105/// This is the same measure as [`DistShape::evenness`], but for data whose
106/// per-category weights are already known — rather than a flat sample whose
107/// frequencies [`shape`] has to derive by counting runs of equal values:
108/// - `~1.0`: every non-empty category holds the same weight — maximally even.
109/// - `→ 0`: the weight is concentrated in a few categories.
110///
111/// Generic over any numeric weight (`usize` counts, integer or float shares);
112/// non-positive entries are treated as empty. Normalized by `ln(k)` where `k`
113/// is the number of non-empty categories; returns `0.0` when fewer than two
114/// categories carry any weight (no diversity to be even about). O(n) time,
115/// O(1) extra space.
116#[inline]
117pub fn evenness<T: ToPrimitive>(weights: &[T]) -> f32 {
118 let mut total = 0.0_f32;
119 let mut weighted_log = 0.0_f32; // Σ wᵢ ln wᵢ
120 let mut nonzero = 0_f32; // categories carrying weight
121
122 for w in weights {
123 let w = w.to_f32().unwrap_or(0.0);
124 if w > 0.0 {
125 total += w;
126 weighted_log += w * w.ln();
127 nonzero += 1.0;
128 }
129 }
130
131 if nonzero <= 1.0 {
132 return 0.0;
133 }
134
135 // Single-pass identity: H = -Σ pᵢ ln pᵢ with pᵢ = wᵢ/total expands to
136 // ln(total) - (Σ wᵢ ln wᵢ)/total, so we never materialize the pᵢ.
137 let entropy = total.ln() - weighted_log / total;
138 (entropy / nonzero.ln()).min(1.0)
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 fn approx(a: f32, b: f32) {
146 assert!((a - b).abs() < 1e-4, "expected {b}, got {a}");
147 }
148
149 #[test]
150 fn evenness_empty_or_single_category_is_zero() {
151 approx(evenness::<usize>(&[]), 0.0);
152 approx(evenness(&[7]), 0.0);
153 // A single non-empty category among empties still has nothing to spread.
154 approx(evenness(&[0, 5, 0]), 0.0);
155 }
156
157 #[test]
158 fn evenness_equal_counts_is_maximal() {
159 approx(evenness(&[4, 4, 4]), 1.0);
160 approx(evenness(&[1, 1]), 1.0);
161 }
162
163 #[test]
164 fn evenness_accepts_fractional_weights() {
165 // Same shape as [15,10,5] counts, expressed as float shares → same J.
166 approx(evenness(&[1.5_f32, 1.0, 0.5]), 0.9206);
167 }
168
169 #[test]
170 fn evenness_skewed_counts_drops_below_one() {
171 // [15,10,5]: H = 1.0115, H_max = ln 3 → ~0.921.
172 approx(evenness(&[15, 10, 5]), 0.9206);
173 }
174
175 #[test]
176 fn evenness_ignores_empty_categories_in_the_denominator() {
177 // Empty categories carry no weight and are not counted toward k, so this
178 // matches the two-category even case rather than being dragged down.
179 approx(evenness(&[5, 0, 5, 0]), 1.0);
180 }
181}