flow_gates/
statistics.rs

1use crate::filtering::filter_events_by_gate;
2use crate::types::Gate;
3use flow_fcs::Fcs;
4use anyhow::{Context, Result};
5use serde::{Deserialize, Serialize};
6
7/// Statistics calculated for a gated population.
8///
9/// This structure provides comprehensive statistical analysis of events that
10/// pass through a gate, including event counts, percentages, centroids, and
11/// detailed parameter statistics.
12///
13/// # Example
14///
15/// ```rust
16/// use flow_gates::{GateStatistics, Gate};
17/// use flow_fcs::Fcs;
18///
19/// // Load FCS file and create gate
20/// let fcs = Fcs::from_file("data.fcs")?;
21/// let gate = /* ... create gate ... */;
22///
23/// // Calculate statistics
24/// let stats = GateStatistics::calculate(&fcs, &gate)?;
25///
26/// println!("Event count: {}", stats.event_count);
27/// println!("Percentage: {:.2}%", stats.percentage);
28/// println!("Centroid: ({:.2}, {:.2})", stats.centroid.0, stats.centroid.1);
29/// println!("X mean: {:.2}", stats.x_stats.mean);
30/// println!("Y median: {:.2}", stats.y_stats.median);
31/// ```
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct GateStatistics {
34    /// Number of events in the gate
35    pub event_count: usize,
36    /// Percentage of total events (0.0 to 100.0)
37    pub percentage: f64,
38    /// 2D centroid (x, y) in raw data space
39    pub centroid: (f64, f64),
40    /// Statistics for the X parameter
41    pub x_stats: ParameterStatistics,
42    /// Statistics for the Y parameter
43    pub y_stats: ParameterStatistics,
44}
45
46/// Statistics for a single parameter (channel) within a gate.
47///
48/// Provides comprehensive statistical measures including central tendency
49/// (mean, median, geometric mean), dispersion (std dev, CV), and distribution
50/// (min, max, quartiles).
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct ParameterStatistics {
53    /// Parameter (channel) name
54    pub parameter: String,
55    /// Mean value
56    pub mean: f64,
57    /// Geometric mean
58    pub geometric_mean: f64,
59    /// Median value
60    pub median: f64,
61    /// Standard deviation
62    pub std_dev: f64,
63    /// Minimum value
64    pub min: f64,
65    /// Maximum value
66    pub max: f64,
67    /// 25th percentile (Q1)
68    pub q1: f64,
69    /// 75th percentile (Q3)
70    pub q3: f64,
71    /// Coefficient of variation (CV) = std_dev / mean
72    pub cv: f64,
73}
74
75impl GateStatistics {
76    /// Calculate statistics for a gate applied to FCS data
77    pub fn calculate(fcs: &Fcs, gate: &Gate) -> Result<Self> {
78        // Get filtered event indices
79        let indices = filter_events_by_gate(fcs, gate, None)?;
80        let event_count = indices.len();
81
82        if event_count == 0 {
83            return Ok(Self::empty(gate));
84        }
85
86        // Get parameter data as views (no full allocation)
87        let x_param = gate.x_parameter_channel_name();
88        let y_param = gate.y_parameter_channel_name();
89
90        let x_slice = fcs
91            .get_parameter_events_slice(x_param)
92            .with_context(|| format!("Failed to get parameter data for {}", x_param))?;
93        let y_slice = fcs
94            .get_parameter_events_slice(y_param)
95            .with_context(|| format!("Failed to get parameter data for {}", y_param))?;
96
97        // Extract filtered values (only allocate the filtered subset)
98        let x_values: Vec<f64> = indices.iter().map(|&i| x_slice[i] as f64).collect();
99        let y_values: Vec<f64> = indices.iter().map(|&i| y_slice[i] as f64).collect();
100
101        // Calculate total events for percentage
102        let total_events = fcs.data_frame.height();
103        let percentage = (event_count as f64 / total_events as f64) * 100.0;
104
105        // Calculate centroid
106        let centroid = (
107            x_values.iter().sum::<f64>() / event_count as f64,
108            y_values.iter().sum::<f64>() / event_count as f64,
109        );
110
111        // Calculate parameter statistics
112        let x_stats = ParameterStatistics::calculate(x_param, &x_values)?;
113        let y_stats = ParameterStatistics::calculate(y_param, &y_values)?;
114
115        Ok(Self {
116            event_count,
117            percentage,
118            centroid,
119            x_stats,
120            y_stats,
121        })
122    }
123
124    /// Create empty statistics (for gates with no events)
125    fn empty(gate: &Gate) -> Self {
126        Self {
127            event_count: 0,
128            percentage: 0.0,
129            centroid: (0.0, 0.0),
130            x_stats: ParameterStatistics::empty(gate.x_parameter_channel_name()),
131            y_stats: ParameterStatistics::empty(gate.y_parameter_channel_name()),
132        }
133    }
134}
135
136impl ParameterStatistics {
137    /// Calculate statistics for a parameter
138    pub fn calculate(parameter: &str, values: &[f64]) -> Result<Self> {
139        if values.is_empty() {
140            return Ok(Self::empty(parameter));
141        }
142
143        let n = values.len() as f64;
144
145        // Mean
146        let mean = values.iter().sum::<f64>() / n;
147
148        // Geometric mean (only for positive values)
149        let geometric_mean = if values.iter().all(|&v| v > 0.0) {
150            let log_sum: f64 = values.iter().map(|&v| v.ln()).sum();
151            (log_sum / n).exp()
152        } else {
153            f64::NAN
154        };
155
156        // Variance and standard deviation
157        let variance = values.iter().map(|&v| (v - mean).powi(2)).sum::<f64>() / n;
158        let std_dev = variance.sqrt();
159
160        // Coefficient of variation
161        let cv = if mean != 0.0 {
162            (std_dev / mean.abs()) * 100.0
163        } else {
164            f64::NAN
165        };
166
167        // Min and Max
168        let min = values
169            .iter()
170            .copied()
171            .min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
172            .unwrap_or(f64::NAN);
173        let max = values
174            .iter()
175            .copied()
176            .max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
177            .unwrap_or(f64::NAN);
178
179        // Median and percentiles (requires sorting)
180        let mut sorted = values.to_vec();
181        sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
182
183        let median = percentile(&sorted, 50.0);
184        let q1 = percentile(&sorted, 25.0);
185        let q3 = percentile(&sorted, 75.0);
186
187        Ok(Self {
188            parameter: parameter.to_string(),
189            mean,
190            geometric_mean,
191            median,
192            std_dev,
193            min,
194            max,
195            q1,
196            q3,
197            cv,
198        })
199    }
200
201    /// Create empty statistics
202    fn empty(parameter: &str) -> Self {
203        Self {
204            parameter: parameter.to_string(),
205            mean: f64::NAN,
206            geometric_mean: f64::NAN,
207            median: f64::NAN,
208            std_dev: f64::NAN,
209            min: f64::NAN,
210            max: f64::NAN,
211            q1: f64::NAN,
212            q3: f64::NAN,
213            cv: f64::NAN,
214        }
215    }
216}
217
218/// Calculate percentile from sorted data
219///
220/// Uses linear interpolation between ranks
221fn percentile(sorted_values: &[f64], p: f64) -> f64 {
222    if sorted_values.is_empty() {
223        return f64::NAN;
224    }
225
226    let n = sorted_values.len();
227    if n == 1 {
228        return sorted_values[0];
229    }
230
231    // Calculate the rank
232    let rank = (p / 100.0) * (n - 1) as f64;
233    let lower_index = rank.floor() as usize;
234    let upper_index = rank.ceil() as usize;
235
236    if lower_index == upper_index {
237        sorted_values[lower_index]
238    } else {
239        // Linear interpolation
240        let lower_value = sorted_values[lower_index];
241        let upper_value = sorted_values[upper_index];
242        let fraction = rank - lower_index as f64;
243        lower_value + fraction * (upper_value - lower_value)
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn test_percentile() {
253        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
254        assert_eq!(percentile(&data, 0.0), 1.0);
255        assert_eq!(percentile(&data, 50.0), 3.0);
256        assert_eq!(percentile(&data, 100.0), 5.0);
257    }
258
259    #[test]
260    fn test_parameter_statistics() {
261        let values = vec![1.0, 2.0, 3.0, 4.0, 5.0];
262        let stats = ParameterStatistics::calculate("test", &values).expect("stats");
263
264        assert_eq!(stats.mean, 3.0);
265        assert_eq!(stats.median, 3.0);
266        assert_eq!(stats.min, 1.0);
267        assert_eq!(stats.max, 5.0);
268        assert!((stats.std_dev - 1.4142).abs() < 0.01);
269    }
270
271    #[test]
272    fn test_geometric_mean() {
273        let values = vec![1.0, 2.0, 4.0, 8.0];
274        let stats = ParameterStatistics::calculate("test", &values).expect("stats");
275
276        // Geometric mean of [1,2,4,8] = (1*2*4*8)^(1/4) = 64^0.25 = sqrt(8) ≈ 2.828
277        assert!((stats.geometric_mean - 2.828).abs() < 0.01);
278    }
279
280    #[test]
281    fn test_coefficient_of_variation() {
282        let values = vec![10.0, 20.0, 30.0, 40.0, 50.0];
283        let stats = ParameterStatistics::calculate("test", &values).expect("stats");
284
285        // Mean = 30, StdDev ≈ 14.14, CV = (14.14/30)*100 ≈ 47.14%
286        assert!((stats.cv - 47.14).abs() < 1.0);
287    }
288
289    #[test]
290    fn test_empty_statistics() {
291        let stats = ParameterStatistics::calculate("test", &[]).expect("stats");
292        assert!(stats.mean.is_nan());
293        assert!(stats.median.is_nan());
294    }
295}