Skip to main content

radiate_core/domain/math/
distribution.rs

1use num_traits::ToPrimitive;
2use radiate_utils::Primitive;
3
4pub struct DistShape {
5    /// Number of distinct values (richness).
6    pub unique: usize,
7    /// Pielou evenness of the value frequencies in `[0, 1]` — how uniformly the
8    /// population is spread across distinct fitness values. Low evenness means
9    /// the population has piled onto a few scores (premature convergence).
10    pub evenness: f32,
11    /// Gini coefficient of the distribution in `[0, 1]` — fitness inequality /
12    /// selection pressure. 0 = every member shares the same score, → 1 = one
13    /// member dominates.
14    pub gini: f32,
15}
16
17/// Single descending-cost pass over an **ascending-sorted** slice computing its
18/// richness, Pielou evenness, and Gini coefficient together. `sorted` must be
19/// sorted ascending; callers already sort for the unique-count walk.
20/// O(m) time, O(1) extra space.
21///
22/// The input values are generic over [`Primitive`], but the running statistics
23/// are accumulated in `f32` (each value is converted once via [`Primitive::extract`]).
24#[inline]
25pub fn shape<P: Primitive>(sorted: &[P]) -> DistShape {
26    let m = sorted.len();
27    if m == 0 {
28        return DistShape {
29            unique: 0,
30            evenness: 0.0,
31            gini: 0.0,
32        };
33    }
34
35    // Gini is only defined for non-negative quantities; if the distribution
36    // dips below zero (e.g. minimization or signed fitness), shift it up so its
37    // floor sits at zero. This keeps the standard formula well-defined across
38    // arbitrary fitness scales at the cost of making it translation-invariant.
39    // `safe_sub` rather than `-sorted[0]` because `Primitive` (which includes
40    // unsigned ints) carries no `Neg` bound; the branch is unreachable for them.
41    let shift = if sorted[0] < P::ZERO {
42        P::ZERO.safe_sub(sorted[0])
43    } else {
44        P::ZERO
45    };
46
47    let total = m as f32;
48    let mut unique = 0_f32;
49    let mut entropy = 0_f32; // Shannon entropy of the value frequencies
50    let mut run_len = 0_f32; // length of the current run of equal values
51    let mut last: Option<P> = None;
52
53    let mut gini_sum = 0.0_f32; // Σ (xᵢ + shift)
54    let mut gini_weighted = 0.0_f32; // Σ (i+1)(xᵢ + shift), 1-indexed
55
56    for (i, &val) in sorted.iter().enumerate() {
57        match last {
58            Some(l) if l.is_equal(val) => run_len += 1.0,
59            _ => {
60                if run_len > 0.0 {
61                    let p = run_len / total;
62                    entropy -= p * p.ln();
63                }
64
65                unique += 1.0;
66                run_len = 1.0;
67                last = Some(val);
68            }
69        }
70
71        let x = val.safe_add(shift).extract::<f32>().unwrap_or(0.0);
72        gini_sum += x;
73        gini_weighted += (i as f32 + 1.0) * x;
74    }
75
76    if run_len > 0.0 {
77        let p = run_len / total;
78        entropy -= p * p.ln();
79    }
80
81    let evenness = if unique > 1.0 {
82        // Bounded by 1.0 in theory; clamp away f32 rounding overshoot.
83        (entropy / unique.ln()).min(1.0)
84    } else {
85        0.0
86    };
87
88    let gini = if gini_sum > 0.0 {
89        ((2.0 * gini_weighted) / (total * gini_sum) - (total + 1.0) / total).max(0.0)
90    } else {
91        0.0
92    };
93
94    DistShape {
95        unique: unique as usize,
96        evenness,
97        gini,
98    }
99}
100
101/// Pielou evenness of a categorical distribution given each category's weight
102/// (e.g. species member counts, histogram bin counts, fitness shares), in
103/// `[0, 1]`.
104///
105/// This is the same measure as [`DistShape::evenness`], but for data whose
106/// per-category weights are already known — rather than a flat sample whose
107/// frequencies [`shape`] has to derive by counting runs of equal values:
108/// - `~1.0`: every non-empty category holds the same weight — maximally even.
109/// - `→ 0`: the weight is concentrated in a few categories.
110///
111/// Generic over any numeric weight (`usize` counts, integer or float shares);
112/// non-positive entries are treated as empty. Normalized by `ln(k)` where `k`
113/// is the number of non-empty categories; returns `0.0` when fewer than two
114/// categories carry any weight (no diversity to be even about). O(n) time,
115/// O(1) extra space.
116#[inline]
117pub fn evenness<T: ToPrimitive>(weights: &[T]) -> f32 {
118    let mut total = 0.0_f32;
119    let mut weighted_log = 0.0_f32; // Σ wᵢ ln wᵢ
120    let mut nonzero = 0_f32; // categories carrying weight
121
122    for w in weights {
123        let w = w.to_f32().unwrap_or(0.0);
124        if w > 0.0 {
125            total += w;
126            weighted_log += w * w.ln();
127            nonzero += 1.0;
128        }
129    }
130
131    if nonzero <= 1.0 {
132        return 0.0;
133    }
134
135    // Single-pass identity: H = -Σ pᵢ ln pᵢ with pᵢ = wᵢ/total expands to
136    // ln(total) - (Σ wᵢ ln wᵢ)/total, so we never materialize the pᵢ.
137    let entropy = total.ln() - weighted_log / total;
138    (entropy / nonzero.ln()).min(1.0)
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    fn approx(a: f32, b: f32) {
146        assert!((a - b).abs() < 1e-4, "expected {b}, got {a}");
147    }
148
149    #[test]
150    fn evenness_empty_or_single_category_is_zero() {
151        approx(evenness::<usize>(&[]), 0.0);
152        approx(evenness(&[7]), 0.0);
153        // A single non-empty category among empties still has nothing to spread.
154        approx(evenness(&[0, 5, 0]), 0.0);
155    }
156
157    #[test]
158    fn evenness_equal_counts_is_maximal() {
159        approx(evenness(&[4, 4, 4]), 1.0);
160        approx(evenness(&[1, 1]), 1.0);
161    }
162
163    #[test]
164    fn evenness_accepts_fractional_weights() {
165        // Same shape as [15,10,5] counts, expressed as float shares → same J.
166        approx(evenness(&[1.5_f32, 1.0, 0.5]), 0.9206);
167    }
168
169    #[test]
170    fn evenness_skewed_counts_drops_below_one() {
171        // [15,10,5]: H = 1.0115, H_max = ln 3 → ~0.921.
172        approx(evenness(&[15, 10, 5]), 0.9206);
173    }
174
175    #[test]
176    fn evenness_ignores_empty_categories_in_the_denominator() {
177        // Empty categories carry no weight and are not counted toward k, so this
178        // matches the two-category even case rather than being dragged down.
179        approx(evenness(&[5, 0, 5, 0]), 1.0);
180    }
181}