Skip to main content

cyanea_stats/
rank.rs

1//! Ranking methods for numeric data.
2//!
3//! Provides [`rank`] which assigns ranks to data values using various
4//! tie-breaking strategies via [`RankMethod`].
5
6/// Strategy for handling tied values when ranking.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum RankMethod {
9    /// Tied values receive the average of their would-be ranks (default for
10    /// most statistical methods).
11    Average,
12    /// Tied values receive the minimum rank in the group.
13    Min,
14    /// Tied values receive the maximum rank in the group.
15    Max,
16    /// Tied values receive sequential ranks (first occurrence gets lower rank).
17    Ordinal,
18    /// Like `Min`, but ranks always increase by 1 (no gaps).
19    Dense,
20}
21
22/// Assign ranks to `data` using the given [`RankMethod`].
23///
24/// Returns a `Vec<f64>` of the same length as `data`, where each element is
25/// the rank of the corresponding input value.
26///
27/// Empty input produces empty output.
28pub fn rank(data: &[f64], method: RankMethod) -> Vec<f64> {
29    let n = data.len();
30    if n == 0 {
31        return Vec::new();
32    }
33
34    // Build (value, original_index) and sort by value.
35    let mut indexed: Vec<(f64, usize)> = data.iter().copied().enumerate().map(|(i, v)| (v, i)).collect();
36    indexed.sort_by(|a, b| a.0.total_cmp(&b.0));
37
38    let mut ranks = vec![0.0; n];
39
40    if method == RankMethod::Ordinal {
41        for (rank_minus_1, &(_, orig_idx)) in indexed.iter().enumerate() {
42            ranks[orig_idx] = (rank_minus_1 + 1) as f64;
43        }
44    } else {
45        let mut dense_rank = 0.0;
46        let mut i = 0;
47        while i < n {
48            // Find the end of the tie group.
49            let mut j = i + 1;
50            while j < n && indexed[j].0.total_cmp(&indexed[i].0).is_eq() {
51                j += 1;
52            }
53            dense_rank += 1.0;
54
55            // Ranks in the group are (i+1)..=(j) (1-based).
56            let group_len = j - i;
57            let rank_val = match method {
58                RankMethod::Average => {
59                    let sum: f64 = (i + 1..=j).map(|r| r as f64).sum();
60                    sum / group_len as f64
61                }
62                RankMethod::Min => (i + 1) as f64,
63                RankMethod::Max => j as f64,
64                RankMethod::Dense => dense_rank,
65                RankMethod::Ordinal => unreachable!(),
66            };
67
68            for k in i..j {
69                ranks[indexed[k].1] = rank_val;
70            }
71
72            i = j;
73        }
74    }
75
76    ranks
77}
78
79// ── Tests ──────────────────────────────────────────────────────────────────
80
81#[cfg(test)]
82mod tests {
83    use super::*;
84
85    #[test]
86    fn rank_average_no_ties() {
87        let data = [3.0, 1.0, 2.0];
88        assert_eq!(rank(&data, RankMethod::Average), vec![3.0, 1.0, 2.0]);
89    }
90
91    #[test]
92    fn rank_average_with_ties() {
93        let data = [3.0, 1.0, 2.0, 2.0];
94        // sorted: 1(1), 2(2), 2(3), 3(4) → ties at 2 get (2+3)/2 = 2.5
95        assert_eq!(rank(&data, RankMethod::Average), vec![4.0, 1.0, 2.5, 2.5]);
96    }
97
98    #[test]
99    fn rank_min_with_ties() {
100        let data = [3.0, 1.0, 2.0, 2.0];
101        assert_eq!(rank(&data, RankMethod::Min), vec![4.0, 1.0, 2.0, 2.0]);
102    }
103
104    #[test]
105    fn rank_max_with_ties() {
106        let data = [3.0, 1.0, 2.0, 2.0];
107        assert_eq!(rank(&data, RankMethod::Max), vec![4.0, 1.0, 3.0, 3.0]);
108    }
109
110    #[test]
111    fn rank_dense_with_ties() {
112        let data = [3.0, 1.0, 2.0, 2.0];
113        assert_eq!(rank(&data, RankMethod::Dense), vec![3.0, 1.0, 2.0, 2.0]);
114    }
115
116    #[test]
117    fn rank_ordinal() {
118        let data = [3.0, 1.0, 2.0, 2.0];
119        // Ties broken by original position in sorted order.
120        let r = rank(&data, RankMethod::Ordinal);
121        assert_eq!(r[1], 1.0); // 1.0 is smallest
122        assert_eq!(r[0], 4.0); // 3.0 is largest
123        // The two 2.0s get ranks 2 and 3 (order depends on stable sort).
124        assert!(r[2] == 2.0 || r[2] == 3.0);
125        assert!(r[3] == 2.0 || r[3] == 3.0);
126        assert!((r[2] - r[3]).abs() > 0.5); // they must differ
127    }
128
129    #[test]
130    fn rank_all_equal() {
131        let data = [5.0, 5.0, 5.0];
132        assert_eq!(rank(&data, RankMethod::Average), vec![2.0, 2.0, 2.0]);
133        assert_eq!(rank(&data, RankMethod::Dense), vec![1.0, 1.0, 1.0]);
134    }
135
136    #[test]
137    fn rank_empty() {
138        let empty: Vec<f64> = vec![];
139        assert_eq!(rank(&empty, RankMethod::Average), Vec::<f64>::new());
140    }
141}