peacoqc_rs/lib.rs
1//! PeacoQC quality control algorithms for flow cytometry
2//!
3//! This crate implements all PeacoQC algorithms and provides
4//! a trait-based interface that works with any FCS data structure.
5//!
6//! # Quick Start
7//!
8//! ```no_run
9//! use peacoqc_rs::{PeacoQCConfig, PeacoQCData, QCMode, peacoqc};
10//!
11//! // Assuming you have an FCS struct that implements PeacoQCData
12//! # struct MyFcs;
13//! # impl PeacoQCData for MyFcs {
14//! # fn n_events(&self) -> usize { 0 }
15//! # fn channel_names(&self) -> Vec<String> { vec![] }
16//! # fn get_channel_range(&self, _: &str) -> Option<(f64, f64)> { None }
17//! # fn get_channel_f64(&self, _: &str) -> peacoqc_rs::Result<Vec<f64>> {
18//! # Ok(vec![])
19//! # }
20//! # }
21//! # let fcs = MyFcs;
22//! let config = PeacoQCConfig {
23//! channels: vec!["FL1-A".to_string(), "FL2-A".to_string()],
24//! determine_good_cells: QCMode::All,
25//! ..Default::default()
26//! };
27//!
28//! let result = peacoqc(&fcs, &config)?;
29//! println!("Removed {:.2}% of events", result.percentage_removed);
30//! # Ok::<(), peacoqc_rs::PeacoQCError>(())
31//! ```
32//!
33//! # Integration Example
34//!
35//! Here's a complete example of integrating PeacoQC into an application:
36//!
37//! ```rust,no_run
38//! use peacoqc_rs::{PeacoQCConfig, PeacoQCData, QCMode, peacoqc, remove_margins, remove_doublets};
39//! use flow_fcs::Fcs; // If using flow-fcs crate
40//! use std::time::Instant;
41//!
42//! // Load FCS file (example using flow-fcs)
43//! let mut fcs = Fcs::open("data.fcs")?;
44//! let n_events_initial = fcs.n_events();
45//!
46//! // Step 1: Remove margin events (optional)
47//! let margin_config = peacoqc_rs::MarginConfig {
48//! channels: fcs.channel_names(),
49//! ..Default::default()
50//! };
51//! let margin_result = remove_margins(&fcs, &margin_config)?;
52//! if margin_result.percentage_removed > 0.0 {
53//! fcs = fcs.filter(&margin_result.mask)?;
54//! }
55//!
56//! // Step 2: Remove doublets (optional)
57//! let doublet_config = peacoqc_rs::DoubletConfig::default();
58//! let doublet_result = remove_doublets(&fcs, &doublet_config)?;
59//! if doublet_result.percentage_removed > 0.0 {
60//! fcs = fcs.filter(&doublet_result.mask)?;
61//! }
62//!
63//! // Step 3: Run PeacoQC
64//! let start_time = Instant::now();
65//! let channels = fcs.get_fluorescence_channels(); // Auto-detect channels
66//! let config = PeacoQCConfig {
67//! channels,
68//! determine_good_cells: QCMode::All,
69//! mad: 6.0,
70//! it_limit: 0.6,
71//! consecutive_bins: 5,
72//! ..Default::default()
73//! };
74//!
75//! let peacoqc_result = peacoqc(&fcs, &config)?;
76//!
77//! // Step 4: Apply filter
78//! let clean_fcs = fcs.filter(&peacoqc_result.good_cells)?;
79//! let n_events_final = clean_fcs.n_events();
80//!
81//! println!("Events: {} → {} ({:.2}% removed)",
82//! n_events_initial,
83//! n_events_final,
84//! peacoqc_result.percentage_removed);
85//! println!("Processing time: {:.2}s", start_time.elapsed().as_secs_f64());
86//! # Ok::<(), Box<dyn std::error::Error>>(())
87//! ```
88//!
89//! See `examples/basic_usage.rs` and `examples/tauri_command.rs` for more complete examples.
90
91pub mod error;
92pub mod qc;
93pub mod stats;
94
95#[cfg(feature = "gpu")]
96pub mod gpu;
97
98// fcs module provides SimpleFcs for testing and examples
99pub mod fcs;
100
101pub use error::{PeacoQCError, Result};
102pub use qc::{
103 DoubletConfig, DoubletResult, MarginConfig, MarginResult, PeacoQCConfig, PeacoQCResult,
104 QCExportFormat, QCExportOptions, QCMode, QCPlotConfig, create_qc_plots, export_csv_boolean,
105 export_csv_numeric, export_json_metadata, peacoqc, remove_doublets, remove_margins,
106};
107
108#[cfg(feature = "flow-fcs")]
109pub use crate::flow_fcs_impl::preprocess_fcs;
110
111/// Trait for data structures that can be used with PeacoQC
112///
113/// Implement this trait on your FCS data structure to enable PeacoQC analysis.
114/// This trait-based design allows PeacoQC to work with any data format, but we recommend
115/// using the `flow-fcs` crate, which itself utilizes the `polars` crate for production applications.
116///
117/// # Example Implementation
118///
119/// ```rust
120/// use peacoqc_rs::{PeacoQCData, Result};
121///
122/// struct MyFcs {
123/// data: Vec<HashMap<String, Vec<f64>>>,
124/// channels: Vec<String>,
125/// }
126///
127/// impl PeacoQCData for MyFcs {
128/// fn n_events(&self) -> usize {
129/// self.data.get(0).map(|d| d.values().next().map(|v| v.len())).flatten().unwrap_or(0)
130/// }
131///
132/// fn channel_names(&self) -> Vec<String> {
133/// self.channels.clone()
134/// }
135///
136/// fn get_channel_range(&self, _channel: &str) -> Option<(f64, f64)> {
137/// Some((0.0, 262144.0)) // Return channel range if available
138/// }
139///
140/// fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>> {
141/// self.data.get(0)
142/// .and_then(|d| d.get(channel).cloned())
143/// .ok_or_else(|| peacoqc_rs::PeacoQCError::ChannelNotFound(channel.to_string()))
144/// }
145/// }
146/// ```
147pub trait PeacoQCData {
148 fn n_events(&self) -> usize;
149 fn channel_names(&self) -> Vec<String>;
150 fn get_channel_range(&self, channel: &str) -> Option<(f64, f64)>;
151
152 /// Get channel data as f64 values
153 fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>>;
154
155 fn get_fluorescence_channels(&self) -> Vec<String> {
156 self.channel_names()
157 .into_iter()
158 .filter(|name| {
159 let upper = name.to_uppercase();
160 !upper.contains("FSC") && !upper.contains("SSC") && !upper.contains("TIME")
161 })
162 .collect()
163 }
164}
165
166/// Extension trait for FCS data structures to add filtering capabilities
167///
168/// Implement this trait to enable filtering of events using a boolean mask.
169/// This is required to apply PeacoQC results to your data.
170///
171/// # Example Implementation
172///
173/// ```rust
174/// use peacoqc_rs::{FcsFilter, Result, PeacoQCError};
175///
176/// struct MyFcs {
177/// data: Vec<Vec<f64>>, // rows x channels
178/// }
179///
180/// impl FcsFilter for MyFcs {
181/// fn filter(&self, mask: &[bool]) -> Result<Self> {
182/// if mask.len() != self.data.len() {
183/// return Err(PeacoQCError::StatsError(format!(
184/// "Mask length {} doesn't match event count {}",
185/// mask.len(),
186/// self.data.len()
187/// )));
188/// }
189///
190/// let filtered_data: Vec<Vec<f64>> = self.data
191/// .iter()
192/// .enumerate()
193/// .filter_map(|(i, row)| if mask[i] { Some(row.clone()) } else { None })
194/// .collect();
195///
196/// Ok(MyFcs { data: filtered_data })
197/// }
198/// }
199/// ```
200pub trait FcsFilter: Sized {
201 /// Filter events using a boolean mask
202 ///
203 /// # Arguments
204 /// * `mask` - Boolean slice where `true` means keep the event, `false` means remove
205 ///
206 /// # Returns
207 /// A new instance with filtered data
208 ///
209 /// # Errors
210 /// Returns an error if the mask length doesn't match the number of events
211 fn filter(&self, mask: &[bool]) -> Result<Self>;
212}
213
214#[cfg(feature = "flow-fcs")]
215mod flow_fcs_impl {
216 use super::*;
217 use flow_fcs::{file::Fcs, keyword::FloatableKeyword};
218 use polars::prelude::*;
219 use std::sync::Arc;
220
221 impl PeacoQCData for Fcs {
222 fn n_events(&self) -> usize {
223 self.get_event_count_from_dataframe()
224 }
225
226 fn channel_names(&self) -> Vec<String> {
227 self.parameters
228 .values()
229 .map(|p| p.channel_name.to_string())
230 .collect()
231 }
232
233 fn get_channel_range(&self, channel: &str) -> Option<(f64, f64)> {
234 // Find parameter by channel name
235 let param = self
236 .parameters
237 .values()
238 .find(|p| p.channel_name.as_ref() == channel)?;
239
240 // Get range from metadata using $PnR keyword
241 let key = format!("$P{}R", param.parameter_number);
242 let max_range = self.metadata.get_float_keyword(&key).ok()?.get_f32();
243
244 Some((0.0, *max_range as f64))
245 }
246
247 fn get_channel_f64(&self, channel: &str) -> Result<Vec<f64>> {
248 let series = self
249 .data_frame
250 .column(channel)
251 .map_err(|_| PeacoQCError::ChannelNotFound(channel.to_string()))?;
252
253 // Handle both f32 and f64 columns (FCS files typically use f32)
254 let values = if let Ok(f64_vals) = series.f64() {
255 f64_vals.into_iter().filter_map(|x| x).collect()
256 } else if let Ok(f32_vals) = series.f32() {
257 // Cast f32 to f64
258 f32_vals
259 .into_iter()
260 .filter_map(|x| x.map(|v| v as f64))
261 .collect()
262 } else {
263 return Err(PeacoQCError::InvalidChannel(format!(
264 "Channel {} is not numeric (dtype: {:?})",
265 channel,
266 series.dtype()
267 )));
268 };
269 Ok(values)
270 }
271
272 fn get_fluorescence_channels(&self) -> Vec<String> {
273 // Use the existing is_fluorescence() method
274 self.parameters
275 .values()
276 .filter(|p| p.is_fluorescence())
277 .map(|p| p.channel_name.to_string())
278 .collect()
279 }
280 }
281
282 impl FcsFilter for Fcs {
283 fn filter(&self, mask: &[bool]) -> Result<Self> {
284 let n_events = self.get_event_count_from_dataframe();
285 if mask.len() != n_events {
286 return Err(PeacoQCError::StatsError(format!(
287 "Mask length {} doesn't match event count {}",
288 mask.len(),
289 n_events
290 )));
291 }
292
293 // Convert boolean slice to Vec<bool> for Series creation
294 let mask_vec: Vec<bool> = mask.to_vec();
295
296 // Create a boolean Series from the mask
297 let mask_series = Series::new("mask".into(), mask_vec);
298
299 // Get the boolean ChunkedArray from the Series
300 let mask_ca = mask_series.bool().map_err(|e| {
301 PeacoQCError::StatsError(format!("Failed to convert mask to boolean array: {}", e))
302 })?;
303
304 // Filter DataFrame using boolean mask
305 let filtered_df = self
306 .data_frame
307 .filter(&mask_ca)
308 .map_err(|e| PeacoQCError::PolarsError(e))?;
309
310 // Create new Fcs with filtered data
311 // Clone the Fcs and replace the data_frame
312 let mut filtered_fcs = self.clone();
313 filtered_fcs.data_frame = Arc::new(filtered_df);
314
315 // Note: Metadata and parameters are preserved, but the event count
316 // in the DataFrame will reflect the filtered data
317 Ok(filtered_fcs)
318 }
319 }
320
321 /// Apply preprocessing steps (compensation and transformation) to FCS data
322 ///
323 /// This function applies compensation and/or transformation before running PeacoQC,
324 /// matching the original R implementation's preprocessing steps:
325 /// ```r
326 /// ff <- flowCore::compensate(ff, flowCore::keyword(ff)$SPILL)
327 /// ff <- flowCore::transform(ff, flowCore::estimateLogicle(ff, ...))
328 /// ```
329 ///
330 /// **Important**: Transformation (biexponential/logicle) should almost always be applied before PeacoQC.
331 /// Without transformation, raw fluorescence values have huge dynamic ranges that cause
332 /// the MAD detection to remove far more events than expected. The R implementation
333 /// typically works with FlowJo-exported data that has already been biexponentially transformed.
334 ///
335 /// # Arguments
336 /// * `fcs` - FCS data structure
337 /// * `apply_compensation` - Whether to apply compensation from file's $SPILLOVER keyword
338 /// * `apply_transformation` - Whether to apply biexponential transformation to fluorescence channels (matching FlowJo defaults)
339 /// * `transform_cofactor` - Unused parameter (kept for API compatibility)
340 ///
341 /// # Returns
342 /// A new Fcs instance with preprocessing applied, or the original if no preprocessing requested
343 ///
344 /// # Errors
345 /// Returns an error if compensation is requested but no $SPILLOVER keyword is found
346 pub fn preprocess_fcs(
347 mut fcs: Fcs,
348 apply_compensation: bool,
349 apply_transformation: bool,
350 _transform_cofactor: f32, // Currently unused, apply_default_arcsinh_transform uses its own default
351 ) -> anyhow::Result<Fcs> {
352 use tracing::info;
353
354 // Step 1: Apply compensation (if requested and available)
355 // Skip compensation when $SPILLOVER is missing (e.g. some IntelliCyt iQue3 exports);
356 // transformation will use arcsinh fallback instead of biexponential.
357 if apply_compensation && fcs.has_compensation() {
358 let compensated_df = fcs
359 .apply_file_compensation()
360 .map_err(|e| anyhow::anyhow!("Failed to apply compensation: {}", e))?;
361 // EventDataFrame is already Arc<DataFrame>, no need to wrap again
362 fcs.data_frame = compensated_df;
363 info!("Applied compensation from $SPILLOVER keyword");
364 } else if apply_compensation && !fcs.has_compensation() {
365 info!("No $SPILLOVER keyword found; skipping compensation (will use arcsinh transform)");
366 }
367
368 // Step 2: Apply transformation (if requested)
369 // R PeacoQC logic:
370 // - If compensation available: use estimateLogicle (biexponential/logicle)
371 // - If no compensation: use arcsinh with cofactor=2000
372 // This matches R's behavior exactly
373 if apply_transformation {
374 let has_comp = fcs.has_compensation();
375 let transformed_df = if has_comp {
376 // R uses estimateLogicle (biexponential) when compensation is available
377 fcs.apply_default_biexponential_transform().map_err(|e| {
378 anyhow::anyhow!("Failed to apply biexponential transformation: {}", e)
379 })?
380 } else {
381 // R falls back to arcsinh if no compensation (cofactor=2000)
382 fcs.apply_default_arcsinh_transform()
383 .map_err(|e| anyhow::anyhow!("Failed to apply arcsinh transformation: {}", e))?
384 };
385
386 // EventDataFrame is already Arc<DataFrame>, no need to wrap again
387 fcs.data_frame = transformed_df;
388
389 if has_comp {
390 info!(
391 "Applied biexponential (logicle) transformation to fluorescence channels (matching R's estimateLogicle)"
392 );
393 } else {
394 info!(
395 "Applied arcsinh transformation to fluorescence channels with cofactor=2000 (matching R's fallback)"
396 );
397 }
398 }
399
400 Ok(fcs)
401 }
402}