flow_gates/
statistics.rs

1use crate::filtering::filter_events_by_gate;
2use crate::types::Gate;
3use flow_fcs::Fcs;
4use anyhow::{Context, Result};
5use serde::{Deserialize, Serialize};
6
7/// Statistics calculated for a gated population.
8///
9/// This structure provides comprehensive statistical analysis of events that
10/// pass through a gate, including event counts, percentages, centroids, and
11/// detailed parameter statistics.
12///
13/// # Example
14///
15/// ```rust,no_run
16/// use flow_gates::{GateStatistics, Gate};
17/// use flow_fcs::Fcs;
18///
19/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
20/// // Load FCS file and create gate (implementation depends on your FCS library)
21/// // let fcs = /* load FCS file */;
22/// // let gate = /* ... create gate ... */;
23///
24/// // Calculate statistics
25/// // let stats = GateStatistics::calculate(&fcs, &gate)?;
26///
27/// // println!("Event count: {}", stats.event_count);
28/// // println!("Percentage: {:.2}%", stats.percentage);
29/// // println!("Centroid: ({:.2}, {:.2})", stats.centroid.0, stats.centroid.1);
30/// // println!("X mean: {:.2}", stats.x_stats.mean);
31/// // println!("Y median: {:.2}", stats.y_stats.median);
32/// # Ok(())
33/// # }
34/// ```
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct GateStatistics {
37    /// Number of events in the gate
38    pub event_count: usize,
39    /// Percentage of total events (0.0 to 100.0)
40    pub percentage: f64,
41    /// 2D centroid (x, y) in raw data space
42    pub centroid: (f64, f64),
43    /// Statistics for the X parameter
44    pub x_stats: ParameterStatistics,
45    /// Statistics for the Y parameter
46    pub y_stats: ParameterStatistics,
47}
48
49/// Statistics for a single parameter (channel) within a gate.
50///
51/// Provides comprehensive statistical measures including central tendency
52/// (mean, median, geometric mean), dispersion (std dev, CV), and distribution
53/// (min, max, quartiles).
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct ParameterStatistics {
56    /// Parameter (channel) name
57    pub parameter: String,
58    /// Mean value
59    pub mean: f64,
60    /// Geometric mean
61    pub geometric_mean: f64,
62    /// Median value
63    pub median: f64,
64    /// Standard deviation
65    pub std_dev: f64,
66    /// Minimum value
67    pub min: f64,
68    /// Maximum value
69    pub max: f64,
70    /// 25th percentile (Q1)
71    pub q1: f64,
72    /// 75th percentile (Q3)
73    pub q3: f64,
74    /// Coefficient of variation (CV) = std_dev / mean
75    pub cv: f64,
76}
77
78impl GateStatistics {
79    /// Calculate statistics for a gate applied to FCS data
80    pub fn calculate(fcs: &Fcs, gate: &Gate) -> Result<Self> {
81        // Get filtered event indices
82        let indices = filter_events_by_gate(fcs, gate, None)?;
83        let event_count = indices.len();
84
85        if event_count == 0 {
86            return Ok(Self::empty(gate));
87        }
88
89        // Get parameter data as views (no full allocation)
90        let x_param = gate.x_parameter_channel_name();
91        let y_param = gate.y_parameter_channel_name();
92
93        let x_slice = fcs
94            .get_parameter_events_slice(x_param)
95            .with_context(|| format!("Failed to get parameter data for {}", x_param))?;
96        let y_slice = fcs
97            .get_parameter_events_slice(y_param)
98            .with_context(|| format!("Failed to get parameter data for {}", y_param))?;
99
100        // Extract filtered values (only allocate the filtered subset)
101        let x_values: Vec<f64> = indices.iter().map(|&i| x_slice[i] as f64).collect();
102        let y_values: Vec<f64> = indices.iter().map(|&i| y_slice[i] as f64).collect();
103
104        // Calculate total events for percentage
105        let total_events = fcs.data_frame.height();
106        let percentage = (event_count as f64 / total_events as f64) * 100.0;
107
108        // Calculate centroid
109        let centroid = (
110            x_values.iter().sum::<f64>() / event_count as f64,
111            y_values.iter().sum::<f64>() / event_count as f64,
112        );
113
114        // Calculate parameter statistics
115        let x_stats = ParameterStatistics::calculate(x_param, &x_values)?;
116        let y_stats = ParameterStatistics::calculate(y_param, &y_values)?;
117
118        Ok(Self {
119            event_count,
120            percentage,
121            centroid,
122            x_stats,
123            y_stats,
124        })
125    }
126
127    /// Create empty statistics (for gates with no events)
128    fn empty(gate: &Gate) -> Self {
129        Self {
130            event_count: 0,
131            percentage: 0.0,
132            centroid: (0.0, 0.0),
133            x_stats: ParameterStatistics::empty(gate.x_parameter_channel_name()),
134            y_stats: ParameterStatistics::empty(gate.y_parameter_channel_name()),
135        }
136    }
137}
138
139impl ParameterStatistics {
140    /// Calculate statistics for a parameter
141    pub fn calculate(parameter: &str, values: &[f64]) -> Result<Self> {
142        if values.is_empty() {
143            return Ok(Self::empty(parameter));
144        }
145
146        let n = values.len() as f64;
147
148        // Mean
149        let mean = values.iter().sum::<f64>() / n;
150
151        // Geometric mean (only for positive values)
152        let geometric_mean = if values.iter().all(|&v| v > 0.0) {
153            let log_sum: f64 = values.iter().map(|&v| v.ln()).sum();
154            (log_sum / n).exp()
155        } else {
156            f64::NAN
157        };
158
159        // Variance and standard deviation
160        let variance = values.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / n;
161        let std_dev = variance.sqrt();
162
163        // Coefficient of variation
164        let cv = if mean != 0.0 {
165            (std_dev / mean.abs()) * 100.0
166        } else {
167            f64::NAN
168        };
169
170        // Min and Max
171        let min = values
172            .iter()
173            .copied()
174            .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
175            .unwrap_or(f64::NAN);
176        let max = values
177            .iter()
178            .copied()
179            .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
180            .unwrap_or(f64::NAN);
181
182        // Median and percentiles (requires sorting)
183        let mut sorted = values.to_vec();
184        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
185
186        let median = percentile(&sorted, 50.0);
187        let q1 = percentile(&sorted, 25.0);
188        let q3 = percentile(&sorted, 75.0);
189
190        Ok(Self {
191            parameter: parameter.to_string(),
192            mean,
193            geometric_mean,
194            median,
195            std_dev,
196            min,
197            max,
198            q1,
199            q3,
200            cv,
201        })
202    }
203
204    /// Create empty statistics
205    fn empty(parameter: &str) -> Self {
206        Self {
207            parameter: parameter.to_string(),
208            mean: f64::NAN,
209            geometric_mean: f64::NAN,
210            median: f64::NAN,
211            std_dev: f64::NAN,
212            min: f64::NAN,
213            max: f64::NAN,
214            q1: f64::NAN,
215            q3: f64::NAN,
216            cv: f64::NAN,
217        }
218    }
219}
220
221/// Calculate percentile from sorted data
222///
223/// Uses linear interpolation between ranks
224fn percentile(sorted_values: &[f64], p: f64) -> f64 {
225    if sorted_values.is_empty() {
226        return f64::NAN;
227    }
228
229    let n = sorted_values.len();
230    if n == 1 {
231        return sorted_values[0];
232    }
233
234    // Calculate the rank
235    let rank = (p / 100.0) * (n - 1) as f64;
236    let lower_index = rank.floor() as usize;
237    let upper_index = rank.ceil() as usize;
238
239    if lower_index == upper_index {
240        sorted_values[lower_index]
241    } else {
242        // Linear interpolation
243        let lower_value = sorted_values[lower_index];
244        let upper_value = sorted_values[upper_index];
245        let fraction = rank - lower_index as f64;
246        lower_value + fraction * (upper_value - lower_value)
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_percentile() {
256        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
257        assert_eq!(percentile(&data, 0.0), 1.0);
258        assert_eq!(percentile(&data, 50.0), 3.0);
259        assert_eq!(percentile(&data, 100.0), 5.0);
260    }
261
262    #[test]
263    fn test_parameter_statistics() {
264        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
265        let stats = ParameterStatistics::calculate("test", &values).expect("stats");
266
267        assert_eq!(stats.mean, 3.0);
268        assert_eq!(stats.median, 3.0);
269        assert_eq!(stats.min, 1.0);
270        assert_eq!(stats.max, 5.0);
271        assert!((stats.std_dev - 1.4142).abs() < 0.01);
272    }
273
274    #[test]
275    fn test_geometric_mean() {
276        let values = vec![1.0, 2.0, 4.0, 8.0];
277        let stats = ParameterStatistics::calculate("test", &values).expect("stats");
278
279        // Geometric mean of [1,2,4,8] = (1*2*4*8)^(1/4) = 64^0.25 = sqrt(8) ≈ 2.828
280        assert!((stats.geometric_mean - 2.828).abs() < 0.01);
281    }
282
283    #[test]
284    fn test_coefficient_of_variation() {
285        let values = vec![10.0, 20.0, 30.0, 40.0, 50.0];
286        let stats = ParameterStatistics::calculate("test", &values).expect("stats");
287
288        // Mean = 30, StdDev ≈ 14.14, CV = (14.14/30)*100 ≈ 47.14%
289        assert!((stats.cv - 47.14).abs() < 1.0);
290    }
291
292    #[test]
293    fn test_empty_statistics() {
294        let stats = ParameterStatistics::calculate("test", &[]).expect("stats");
295        assert!(stats.mean.is_nan());
296        assert!(stats.median.is_nan());
297    }
298}