peacoqc-rs 0.2.0

PeacoQC quality control algorithms for flow cytometry
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
//! PeacoQC quality control algorithms for flow cytometry
//!
//! This crate implements all PeacoQC algorithms and provides
//! a trait-based interface that works with any FCS data structure.
//!
//! # Quick Start
//!
//! ```no_run
//! use peacoqc_rs::{PeacoQCConfig, PeacoQCData, QCMode, peacoqc};
//!
//! // Assuming you have an FCS struct that implements PeacoQCData
//! # struct MyFcs;
//! # impl PeacoQCData for MyFcs {
//! #     fn n_events(&self) -> usize { 0 }
//! #     fn channel_names(&self) -> Vec<String> { vec![] }
//! #     fn get_channel_range(&self, _: &str) -> Option<(f64, f64)> { None }
//! #     fn get_channel_f64(&self, _: &str) -> peacoqc_rs::Result<Vec<f64>> {
//! #         Ok(vec![])
//! #     }
//! # }
//! # let fcs = MyFcs;
//! let config = PeacoQCConfig {
//!     channels: vec!["FL1-A".to_string(), "FL2-A".to_string()],
//!     determine_good_cells: QCMode::All,
//!     ..Default::default()
//! };
//!
//! let result = peacoqc(&fcs, &config)?;
//! println!("Removed {:.2}% of events", result.percentage_removed);
//! # Ok::<(), peacoqc_rs::PeacoQCError>(())
//! ```
//!
//! # Integration Example
//!
//! Here's a complete example of integrating PeacoQC into an application:
//!
//! ```rust,no_run
//! use peacoqc_rs::{PeacoQCConfig, PeacoQCData, QCMode, peacoqc, remove_margins, remove_doublets};
//! use flow_fcs::Fcs; // If using flow-fcs crate
//! use std::time::Instant;
//!
//! // Load FCS file (example using flow-fcs)
//! let mut fcs = Fcs::open("data.fcs")?;
//! let n_events_initial = fcs.n_events();
//!
//! // Step 1: Remove margin events (optional)
//! let margin_config = peacoqc_rs::MarginConfig {
//!     channels: fcs.channel_names(),
//!     ..Default::default()
//! };
//! let margin_result = remove_margins(&fcs, &margin_config)?;
//! if margin_result.percentage_removed > 0.0 {
//!     fcs = fcs.filter(&margin_result.mask)?;
//! }
//!
//! // Step 2: Remove doublets (optional)
//! let doublet_config = peacoqc_rs::DoubletConfig::default();
//! let doublet_result = remove_doublets(&fcs, &doublet_config)?;
//! if doublet_result.percentage_removed > 0.0 {
//!     fcs = fcs.filter(&doublet_result.mask)?;
//! }
//!
//! // Step 3: Run PeacoQC
//! let start_time = Instant::now();
//! let channels = fcs.get_fluorescence_channels(); // Auto-detect channels
//! let config = PeacoQCConfig {
//!     channels,
//!     determine_good_cells: QCMode::All,
//!     mad: 6.0,
//!     it_limit: 0.6,
//!     consecutive_bins: 5,
//!     ..Default::default()
//! };
//!
//! let peacoqc_result = peacoqc(&fcs, &config)?;
//!
//! // Step 4: Apply filter
//! let clean_fcs = fcs.filter(&peacoqc_result.good_cells)?;
//! let n_events_final = clean_fcs.n_events();
//!
//! println!("Events: {} → {} ({:.2}% removed)",
//!     n_events_initial,
//!     n_events_final,
//!     peacoqc_result.percentage_removed);
//! println!("Processing time: {:.2}s", start_time.elapsed().as_secs_f64());
//! # Ok::<(), Box<dyn std::error::Error>>(())
//! ```
//!
//! See `examples/basic_usage.rs` and `examples/tauri_command.rs` for more complete examples.

pub mod error;
pub mod qc;
pub mod stats;

#[cfg(feature = "gpu")]
pub mod gpu;

// fcs module provides SimpleFcs for testing and examples
pub mod fcs;

pub use error::{PeacoQCError, Result};
pub use qc::{
    DoubletConfig, DoubletResult, MarginConfig, MarginResult, PeacoQCConfig, PeacoQCResult,
    QCExportFormat, QCExportOptions, QCMode, QCPlotConfig, create_qc_plots, export_csv_boolean,
    export_csv_numeric, export_json_metadata, peacoqc, remove_doublets, remove_margins,
};

#[cfg(feature = "flow-fcs")]
pub use crate::flow_fcs_impl::preprocess_fcs;

/// Trait for data structures that can be used with PeacoQC
///
/// Implement this trait on your FCS data structure to enable PeacoQC analysis.
/// This trait-based design allows PeacoQC to work with any data format, but we recommend
/// using the `flow-fcs` crate, which itself utilizes the `polars` crate for production applications.
///
/// # Example Implementation
///
/// ```rust
/// use peacoqc_rs::{PeacoQCData, Result};
///
/// struct MyFcs {
///     data: Vec<HashMap<String, Vec<f64>>>,
///     channels: Vec<String>,
/// }
///
/// impl PeacoQCData for MyFcs {
///     fn n_events(&self) -> usize {
///         self.data.get(0).map(|d| d.values().next().map(|v| v.len())).flatten().unwrap_or(0)
///     }
///
///     fn channel_names(&self) -> Vec<String> {
///         self.channels.clone()
///     }
///
///     fn get_channel_range(&self, _channel: &str) -> Option<(f64, f64)> {
///         Some((0.0, 262144.0)) // Return channel range if available
///     }
///
///     fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>> {
///         self.data.get(0)
///             .and_then(|d| d.get(channel).cloned())
///             .ok_or_else(|| peacoqc_rs::PeacoQCError::ChannelNotFound(channel.to_string()))
///     }
/// }
/// ```
pub trait PeacoQCData {
    fn n_events(&self) -> usize;
    fn channel_names(&self) -> Vec<String>;
    fn get_channel_range(&self, channel: &str) -> Option<(f64, f64)>;

    /// Get channel data as f64 values
    fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>>;

    fn get_fluorescence_channels(&self) -> Vec<String> {
        self.channel_names()
            .into_iter()
            .filter(|name| {
                let upper = name.to_uppercase();
                !upper.contains("FSC") && !upper.contains("SSC") && !upper.contains("TIME")
            })
            .collect()
    }
}

/// Extension trait for FCS data structures to add filtering capabilities
///
/// Implement this trait to enable filtering of events using a boolean mask.
/// This is required to apply PeacoQC results to your data.
///
/// # Example Implementation
///
/// ```rust
/// use peacoqc_rs::{FcsFilter, Result, PeacoQCError};
///
/// struct MyFcs {
///     data: Vec<Vec<f64>>, // rows x channels
/// }
///
/// impl FcsFilter for MyFcs {
///     fn filter(&self, mask: &[bool]) -> Result<Self> {
///         if mask.len() != self.data.len() {
///             return Err(PeacoQCError::StatsError(format!(
///                 "Mask length {} doesn't match event count {}",
///                 mask.len(),
///                 self.data.len()
///             )));
///         }
///
///         let filtered_data: Vec<Vec<f64>> = self.data
///             .iter()
///             .enumerate()
///             .filter_map(|(i, row)| if mask[i] { Some(row.clone()) } else { None })
///             .collect();
///
///         Ok(MyFcs { data: filtered_data })
///     }
/// }
/// ```
pub trait FcsFilter: Sized {
    /// Filter events using a boolean mask
    ///
    /// # Arguments
    /// * `mask` - Boolean slice where `true` means keep the event, `false` means remove
    ///
    /// # Returns
    /// A new instance with filtered data
    ///
    /// # Errors
    /// Returns an error if the mask length doesn't match the number of events
    fn filter(&self, mask: &[bool]) -> Result<Self>;
}

#[cfg(feature = "flow-fcs")]
mod flow_fcs_impl {
    use super::*;
    use flow_fcs::{file::Fcs, keyword::FloatableKeyword};
    use polars::prelude::*;
    use std::sync::Arc;

    impl PeacoQCData for Fcs {
        fn n_events(&self) -> usize {
            self.get_event_count_from_dataframe()
        }

        fn channel_names(&self) -> Vec<String> {
            self.parameters
                .values()
                .map(|p| p.channel_name.to_string())
                .collect()
        }

        fn get_channel_range(&self, channel: &str) -> Option<(f64, f64)> {
            // Find parameter by channel name
            let param = self
                .parameters
                .values()
                .find(|p| p.channel_name.as_ref() == channel)?;

            // Get range from metadata using $PnR keyword
            let key = format!("$P{}R", param.parameter_number);
            let max_range = self.metadata.get_float_keyword(&key).ok()?.get_f32();

            Some((0.0, *max_range as f64))
        }

        fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>> {
            let series = self
                .data_frame
                .column(channel)
                .map_err(|_| PeacoQCError::ChannelNotFound(channel.to_string()))?;

            // Handle both f32 and f64 columns (FCS files typically use f32)
            let values = if let Ok(f64_vals) = series.f64() {
                f64_vals.into_iter().filter_map(|x| x).collect()
            } else if let Ok(f32_vals) = series.f32() {
                // Cast f32 to f64
                f32_vals
                    .into_iter()
                    .filter_map(|x| x.map(|v| v as f64))
                    .collect()
            } else {
                return Err(PeacoQCError::InvalidChannel(format!(
                    "Channel {} is not numeric (dtype: {:?})",
                    channel,
                    series.dtype()
                )));
            };
            Ok(values)
        }

        fn get_fluorescence_channels(&self) -> Vec<String> {
            // Use the existing is_fluorescence() method
            self.parameters
                .values()
                .filter(|p| p.is_fluorescence())
                .map(|p| p.channel_name.to_string())
                .collect()
        }
    }

    impl FcsFilter for Fcs {
        fn filter(&self, mask: &[bool]) -> Result<Self> {
            let n_events = self.get_event_count_from_dataframe();
            if mask.len() != n_events {
                return Err(PeacoQCError::StatsError(format!(
                    "Mask length {} doesn't match event count {}",
                    mask.len(),
                    n_events
                )));
            }

            // Convert boolean slice to Vec<bool> for Series creation
            let mask_vec: Vec<bool> = mask.to_vec();

            // Create a boolean Series from the mask
            let mask_series = Series::new("mask".into(), mask_vec);

            // Get the boolean ChunkedArray from the Series
            let mask_ca = mask_series.bool().map_err(|e| {
                PeacoQCError::StatsError(format!("Failed to convert mask to boolean array: {}", e))
            })?;

            // Filter DataFrame using boolean mask
            let filtered_df = self
                .data_frame
                .filter(&mask_ca)
                .map_err(|e| PeacoQCError::PolarsError(e))?;

            // Create new Fcs with filtered data
            // Clone the Fcs and replace the data_frame
            let mut filtered_fcs = self.clone();
            filtered_fcs.data_frame = Arc::new(filtered_df);

            // Note: Metadata and parameters are preserved, but the event count
            // in the DataFrame will reflect the filtered data
            Ok(filtered_fcs)
        }
    }

    /// Apply preprocessing steps (compensation and transformation) to FCS data
    ///
    /// This function applies compensation and/or transformation before running PeacoQC,
    /// matching the original R implementation's preprocessing steps:
    /// ```r
    /// ff <- flowCore::compensate(ff, flowCore::keyword(ff)$SPILL)
    /// ff <- flowCore::transform(ff, flowCore::estimateLogicle(ff, ...))
    /// ```
    ///
    /// **Important**: Transformation (biexponential/logicle) should almost always be applied before PeacoQC.
    /// Without transformation, raw fluorescence values have huge dynamic ranges that cause
    /// the MAD detection to remove far more events than expected. The R implementation
    /// typically works with FlowJo-exported data that has already been biexponentially transformed.
    ///
    /// # Arguments
    /// * `fcs` - FCS data structure
    /// * `apply_compensation` - Whether to apply compensation from file's $SPILLOVER keyword
    /// * `apply_transformation` - Whether to apply biexponential transformation to fluorescence channels (matching FlowJo defaults)
    /// * `transform_cofactor` - Unused parameter (kept for API compatibility)
    ///
    /// # Returns
    /// A new Fcs instance with preprocessing applied, or the original if no preprocessing requested
    ///
    /// # Errors
    /// Returns an error if compensation is requested but no $SPILLOVER keyword is found
    pub fn preprocess_fcs(
        mut fcs: Fcs,
        apply_compensation: bool,
        apply_transformation: bool,
        _transform_cofactor: f32, // Currently unused, apply_default_arcsinh_transform uses its own default
    ) -> anyhow::Result<Fcs> {
        use tracing::info;

        // Step 1: Apply compensation (if requested)
        if apply_compensation {
            if !fcs.has_compensation() {
                return Err(anyhow::anyhow!(
                    "Compensation requested but no $SPILLOVER keyword found in FCS file"
                ));
            }
            let compensated_df = fcs
                .apply_file_compensation()
                .map_err(|e| anyhow::anyhow!("Failed to apply compensation: {}", e))?;
            // EventDataFrame is already Arc<DataFrame>, no need to wrap again
            fcs.data_frame = compensated_df;
            info!("Applied compensation from $SPILLOVER keyword");
        }

        // Step 2: Apply transformation (if requested)
        // R PeacoQC logic:
        //   - If compensation available: use estimateLogicle (biexponential/logicle)
        //   - If no compensation: use arcsinh with cofactor=2000
        // This matches R's behavior exactly
        if apply_transformation {
            let has_comp = fcs.has_compensation();
            let transformed_df = if has_comp {
                // R uses estimateLogicle (biexponential) when compensation is available
                fcs.apply_default_biexponential_transform().map_err(|e| {
                    anyhow::anyhow!("Failed to apply biexponential transformation: {}", e)
                })?
            } else {
                // R falls back to arcsinh if no compensation (cofactor=2000)
                fcs.apply_default_arcsinh_transform()
                    .map_err(|e| anyhow::anyhow!("Failed to apply arcsinh transformation: {}", e))?
            };

            // EventDataFrame is already Arc<DataFrame>, no need to wrap again
            fcs.data_frame = transformed_df;

            if has_comp {
                info!(
                    "Applied biexponential (logicle) transformation to fluorescence channels (matching R's estimateLogicle)"
                );
            } else {
                info!(
                    "Applied arcsinh transformation to fluorescence channels with cofactor=2000 (matching R's fallback)"
                );
            }
        }

        Ok(fcs)
    }
}