Skip to main content

peacoqc_rs/
lib.rs

1//! PeacoQC quality control algorithms for flow cytometry
2//!
3//! This crate implements all PeacoQC algorithms and provides
4//! a trait-based interface that works with any FCS data structure.
5//!
6//! # Quick Start
7//!
8//! ```no_run
9//! use peacoqc_rs::{PeacoQCConfig, PeacoQCData, QCMode, peacoqc};
10//!
11//! // Assuming you have an FCS struct that implements PeacoQCData
12//! # struct MyFcs;
13//! # impl PeacoQCData for MyFcs {
14//! #     fn n_events(&self) -> usize { 0 }
15//! #     fn channel_names(&self) -> Vec<String> { vec![] }
16//! #     fn get_channel_range(&self, _: &str) -> Option<(f64, f64)> { None }
17//! #     fn get_channel_f64(&self, _: &str) -> peacoqc_rs::Result<Vec<f64>> {
18//! #         Ok(vec![])
19//! #     }
20//! # }
21//! # let fcs = MyFcs;
22//! let config = PeacoQCConfig {
23//!     channels: vec!["FL1-A".to_string(), "FL2-A".to_string()],
24//!     determine_good_cells: QCMode::All,
25//!     ..Default::default()
26//! };
27//!
28//! let result = peacoqc(&fcs, &config)?;
29//! println!("Removed {:.2}% of events", result.percentage_removed);
30//! # Ok::<(), peacoqc_rs::PeacoQCError>(())
31//! ```
32//!
33//! # Integration Example
34//!
35//! Here's a complete example of integrating PeacoQC into an application:
36//!
37//! ```rust,no_run
38//! use peacoqc_rs::{PeacoQCConfig, PeacoQCData, QCMode, peacoqc, remove_margins, remove_doublets};
39//! use flow_fcs::Fcs; // If using flow-fcs crate
40//! use std::time::Instant;
41//!
42//! // Load FCS file (example using flow-fcs)
43//! let mut fcs = Fcs::open("data.fcs")?;
44//! let n_events_initial = fcs.n_events();
45//!
46//! // Step 1: Remove margin events (optional)
47//! let margin_config = peacoqc_rs::MarginConfig {
48//!     channels: fcs.channel_names(),
49//!     ..Default::default()
50//! };
51//! let margin_result = remove_margins(&fcs, &margin_config)?;
52//! if margin_result.percentage_removed > 0.0 {
53//!     fcs = fcs.filter(&margin_result.mask)?;
54//! }
55//!
56//! // Step 2: Remove doublets (optional)
57//! let doublet_config = peacoqc_rs::DoubletConfig::default();
58//! let doublet_result = remove_doublets(&fcs, &doublet_config)?;
59//! if doublet_result.percentage_removed > 0.0 {
60//!     fcs = fcs.filter(&doublet_result.mask)?;
61//! }
62//!
63//! // Step 3: Run PeacoQC
64//! let start_time = Instant::now();
65//! let channels = fcs.get_fluorescence_channels(); // Auto-detect channels
66//! let config = PeacoQCConfig {
67//!     channels,
68//!     determine_good_cells: QCMode::All,
69//!     mad: 6.0,
70//!     it_limit: 0.6,
71//!     consecutive_bins: 5,
72//!     ..Default::default()
73//! };
74//!
75//! let peacoqc_result = peacoqc(&fcs, &config)?;
76//!
77//! // Step 4: Apply filter
78//! let clean_fcs = fcs.filter(&peacoqc_result.good_cells)?;
79//! let n_events_final = clean_fcs.n_events();
80//!
81//! println!("Events: {} → {} ({:.2}% removed)",
82//!     n_events_initial,
83//!     n_events_final,
84//!     peacoqc_result.percentage_removed);
85//! println!("Processing time: {:.2}s", start_time.elapsed().as_secs_f64());
86//! # Ok::<(), Box<dyn std::error::Error>>(())
87//! ```
88//!
89//! See `examples/basic_usage.rs` and `examples/tauri_command.rs` for more complete examples.
90
91pub mod error;
92pub mod qc;
93pub mod stats;
94
95#[cfg(feature = "gpu")]
96pub mod gpu;
97
98// fcs module provides SimpleFcs for testing and examples
99pub mod fcs;
100
101pub use error::{PeacoQCError, Result};
102pub use qc::{
103    DoubletConfig, DoubletResult, MarginConfig, MarginResult, PeacoQCConfig, PeacoQCResult,
104    QCExportFormat, QCExportOptions, QCMode, QCPlotConfig, create_qc_plots, export_csv_boolean,
105    export_csv_numeric, export_json_metadata, peacoqc, remove_doublets, remove_margins,
106};
107
108#[cfg(feature = "flow-fcs")]
109pub use crate::flow_fcs_impl::preprocess_fcs;
110
111/// Trait for data structures that can be used with PeacoQC
112///
113/// Implement this trait on your FCS data structure to enable PeacoQC analysis.
114/// This trait-based design allows PeacoQC to work with any data format, but we recommend
115/// using the `flow-fcs` crate, which itself utilizes the `polars` crate for production applications.
116///
117/// # Example Implementation
118///
119/// ```rust
120/// use peacoqc_rs::{PeacoQCData, Result};
121///
122/// struct MyFcs {
123///     data: Vec<HashMap<String, Vec<f64>>>,
124///     channels: Vec<String>,
125/// }
126///
127/// impl PeacoQCData for MyFcs {
128///     fn n_events(&self) -> usize {
129///         self.data.get(0).map(|d| d.values().next().map(|v| v.len())).flatten().unwrap_or(0)
130///     }
131///
132///     fn channel_names(&self) -> Vec<String> {
133///         self.channels.clone()
134///     }
135///
136///     fn get_channel_range(&self, _channel: &str) -> Option<(f64, f64)> {
137///         Some((0.0, 262144.0)) // Return channel range if available
138///     }
139///
140///     fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>> {
141///         self.data.get(0)
142///             .and_then(|d| d.get(channel).cloned())
143///             .ok_or_else(|| peacoqc_rs::PeacoQCError::ChannelNotFound(channel.to_string()))
144///     }
145/// }
146/// ```
147pub trait PeacoQCData {
148    fn n_events(&self) -> usize;
149    fn channel_names(&self) -> Vec<String>;
150    fn get_channel_range(&self, channel: &str) -> Option<(f64, f64)>;
151
152    /// Get channel data as f64 values
153    fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>>;
154
155    fn get_fluorescence_channels(&self) -> Vec<String> {
156        self.channel_names()
157            .into_iter()
158            .filter(|name| {
159                let upper = name.to_uppercase();
160                !upper.contains("FSC") && !upper.contains("SSC") && !upper.contains("TIME")
161            })
162            .collect()
163    }
164}
165
166/// Extension trait for FCS data structures to add filtering capabilities
167///
168/// Implement this trait to enable filtering of events using a boolean mask.
169/// This is required to apply PeacoQC results to your data.
170///
171/// # Example Implementation
172///
173/// ```rust
174/// use peacoqc_rs::{FcsFilter, Result, PeacoQCError};
175///
176/// struct MyFcs {
177///     data: Vec<Vec<f64>>, // rows x channels
178/// }
179///
180/// impl FcsFilter for MyFcs {
181///     fn filter(&self, mask: &[bool]) -> Result<Self> {
182///         if mask.len() != self.data.len() {
183///             return Err(PeacoQCError::StatsError(format!(
184///                 "Mask length {} doesn't match event count {}",
185///                 mask.len(),
186///                 self.data.len()
187///             )));
188///         }
189///
190///         let filtered_data: Vec<Vec<f64>> = self.data
191///             .iter()
192///             .enumerate()
193///             .filter_map(|(i, row)| if mask[i] { Some(row.clone()) } else { None })
194///             .collect();
195///
196///         Ok(MyFcs { data: filtered_data })
197///     }
198/// }
199/// ```
200pub trait FcsFilter: Sized {
201    /// Filter events using a boolean mask
202    ///
203    /// # Arguments
204    /// * `mask` - Boolean slice where `true` means keep the event, `false` means remove
205    ///
206    /// # Returns
207    /// A new instance with filtered data
208    ///
209    /// # Errors
210    /// Returns an error if the mask length doesn't match the number of events
211    fn filter(&self, mask: &[bool]) -> Result<Self>;
212}
213
214#[cfg(feature = "flow-fcs")]
215mod flow_fcs_impl {
216    use super::*;
217    use flow_fcs::{file::Fcs, keyword::FloatableKeyword};
218    use polars::prelude::*;
219    use std::sync::Arc;
220
221    impl PeacoQCData for Fcs {
222        fn n_events(&self) -> usize {
223            self.get_event_count_from_dataframe()
224        }
225
226        fn channel_names(&self) -> Vec<String> {
227            self.parameters
228                .values()
229                .map(|p| p.channel_name.to_string())
230                .collect()
231        }
232
233        fn get_channel_range(&self, channel: &str) -> Option<(f64, f64)> {
234            // Find parameter by channel name
235            let param = self
236                .parameters
237                .values()
238                .find(|p| p.channel_name.as_ref() == channel)?;
239
240            // Get range from metadata using $PnR keyword
241            let key = format!("$P{}R", param.parameter_number);
242            let max_range = self.metadata.get_float_keyword(&key).ok()?.get_f32();
243
244            Some((0.0, *max_range as f64))
245        }
246
247        fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>> {
248            let series = self
249                .data_frame
250                .column(channel)
251                .map_err(|_| PeacoQCError::ChannelNotFound(channel.to_string()))?;
252
253            // Handle both f32 and f64 columns (FCS files typically use f32)
254            let values = if let Ok(f64_vals) = series.f64() {
255                f64_vals.into_iter().filter_map(|x| x).collect()
256            } else if let Ok(f32_vals) = series.f32() {
257                // Cast f32 to f64
258                f32_vals
259                    .into_iter()
260                    .filter_map(|x| x.map(|v| v as f64))
261                    .collect()
262            } else {
263                return Err(PeacoQCError::InvalidChannel(format!(
264                    "Channel {} is not numeric (dtype: {:?})",
265                    channel,
266                    series.dtype()
267                )));
268            };
269            Ok(values)
270        }
271
272        fn get_fluorescence_channels(&self) -> Vec<String> {
273            // Use the existing is_fluorescence() method
274            self.parameters
275                .values()
276                .filter(|p| p.is_fluorescence())
277                .map(|p| p.channel_name.to_string())
278                .collect()
279        }
280    }
281
282    impl FcsFilter for Fcs {
283        fn filter(&self, mask: &[bool]) -> Result<Self> {
284            let n_events = self.get_event_count_from_dataframe();
285            if mask.len() != n_events {
286                return Err(PeacoQCError::StatsError(format!(
287                    "Mask length {} doesn't match event count {}",
288                    mask.len(),
289                    n_events
290                )));
291            }
292
293            // Convert boolean slice to Vec<bool> for Series creation
294            let mask_vec: Vec<bool> = mask.to_vec();
295
296            // Create a boolean Series from the mask
297            let mask_series = Series::new("mask".into(), mask_vec);
298
299            // Get the boolean ChunkedArray from the Series
300            let mask_ca = mask_series.bool().map_err(|e| {
301                PeacoQCError::StatsError(format!("Failed to convert mask to boolean array: {}", e))
302            })?;
303
304            // Filter DataFrame using boolean mask
305            let filtered_df = self
306                .data_frame
307                .filter(&mask_ca)
308                .map_err(|e| PeacoQCError::PolarsError(e))?;
309
310            // Create new Fcs with filtered data
311            // Clone the Fcs and replace the data_frame
312            let mut filtered_fcs = self.clone();
313            filtered_fcs.data_frame = Arc::new(filtered_df);
314
315            // Note: Metadata and parameters are preserved, but the event count
316            // in the DataFrame will reflect the filtered data
317            Ok(filtered_fcs)
318        }
319    }
320
321    /// Apply preprocessing steps (compensation and transformation) to FCS data
322    ///
323    /// This function applies compensation and/or transformation before running PeacoQC,
324    /// matching the original R implementation's preprocessing steps:
325    /// ```r
326    /// ff <- flowCore::compensate(ff, flowCore::keyword(ff)$SPILL)
327    /// ff <- flowCore::transform(ff, flowCore::estimateLogicle(ff, ...))
328    /// ```
329    ///
330    /// **Important**: Transformation (biexponential/logicle) should almost always be applied before PeacoQC.
331    /// Without transformation, raw fluorescence values have huge dynamic ranges that cause
332    /// the MAD detection to remove far more events than expected. The R implementation
333    /// typically works with FlowJo-exported data that has already been biexponentially transformed.
334    ///
335    /// # Arguments
336    /// * `fcs` - FCS data structure
337    /// * `apply_compensation` - Whether to apply compensation from file's $SPILLOVER keyword
338    /// * `apply_transformation` - Whether to apply biexponential transformation to fluorescence channels (matching FlowJo defaults)
339    /// * `transform_cofactor` - Unused parameter (kept for API compatibility)
340    ///
341    /// # Returns
342    /// A new Fcs instance with preprocessing applied, or the original if no preprocessing requested
343    ///
344    /// # Errors
345    /// Returns an error if compensation is requested but no $SPILLOVER keyword is found
346    pub fn preprocess_fcs(
347        mut fcs: Fcs,
348        apply_compensation: bool,
349        apply_transformation: bool,
350        _transform_cofactor: f32, // Currently unused, apply_default_arcsinh_transform uses its own default
351    ) -> anyhow::Result<Fcs> {
352        use tracing::info;
353
354        // Step 1: Apply compensation (if requested and available)
355        // Skip compensation when $SPILLOVER is missing (e.g. some IntelliCyt iQue3 exports);
356        // transformation will use arcsinh fallback instead of biexponential.
357        if apply_compensation && fcs.has_compensation() {
358            let compensated_df = fcs
359                .apply_file_compensation()
360                .map_err(|e| anyhow::anyhow!("Failed to apply compensation: {}", e))?;
361            // EventDataFrame is already Arc<DataFrame>, no need to wrap again
362            fcs.data_frame = compensated_df;
363            info!("Applied compensation from $SPILLOVER keyword");
364        } else if apply_compensation && !fcs.has_compensation() {
365            info!("No $SPILLOVER keyword found; skipping compensation (will use arcsinh transform)");
366        }
367
368        // Step 2: Apply transformation (if requested)
369        // R PeacoQC logic:
370        //   - If compensation available: use estimateLogicle (biexponential/logicle)
371        //   - If no compensation: use arcsinh with cofactor=2000
372        // This matches R's behavior exactly
373        if apply_transformation {
374            let has_comp = fcs.has_compensation();
375            let transformed_df = if has_comp {
376                // R uses estimateLogicle (biexponential) when compensation is available
377                fcs.apply_default_biexponential_transform().map_err(|e| {
378                    anyhow::anyhow!("Failed to apply biexponential transformation: {}", e)
379                })?
380            } else {
381                // R falls back to arcsinh if no compensation (cofactor=2000)
382                fcs.apply_default_arcsinh_transform()
383                    .map_err(|e| anyhow::anyhow!("Failed to apply arcsinh transformation: {}", e))?
384            };
385
386            // EventDataFrame is already Arc<DataFrame>, no need to wrap again
387            fcs.data_frame = transformed_df;
388
389            if has_comp {
390                info!(
391                    "Applied biexponential (logicle) transformation to fluorescence channels (matching R's estimateLogicle)"
392                );
393            } else {
394                info!(
395                    "Applied arcsinh transformation to fluorescence channels with cofactor=2000 (matching R's fallback)"
396                );
397            }
398        }
399
400        Ok(fcs)
401    }
402}