use anyhow::{Context, Result};
use clap::Subcommand;
use faer::Mat;
use flow_fcs::{EventDataFrame, Fcs};
use flow_gates::automated::{
DoubletGateConfig, DoubletMethod, PreprocessingConfig, PreprocessingGates, ScatterGateConfig,
ScatterGateMethod, create_preprocessing_gates,
};
use flow_gates::filtering::EventIndex;
use flow_plots::options::{
AxisOptions, BasePlotOptions, DensityPlotOptions, SpectralSignaturePlotOptions,
};
use flow_plots::render::RenderConfig;
use flow_plots::{DensityPlot, Plot, SpectralSignaturePlot};
use flow_tru_ols::{TruOlsUnmixing, UnmixingStrategy};
use flow_utils::KernelDensity;
use ndarray::Array2;
use serde_json;
use std::collections::HashSet;
use std::fs;
use std::io::{Write, stdin, stdout};
use std::path::PathBuf;
use tracing::{info, warn};
fn count_delimiters(name: &str) -> usize {
name.chars()
.filter(|c| c.is_whitespace() || *c == '-' || *c == '_')
.count()
}
fn find_most_ambiguous_endmember(control_files: &[(String, PathBuf)]) -> Option<(usize, usize)> {
if control_files.is_empty() {
return None;
}
let mut max_delim = 0;
let mut max_idx = 0;
for (idx, (endmember, _)) in control_files.iter().enumerate() {
let delim_count = count_delimiters(endmember);
if delim_count > max_delim {
max_delim = delim_count;
max_idx = idx;
}
}
if max_delim > 0 {
Some((max_idx, max_delim))
} else {
None
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct DelimiterPreference {
use_space: bool,
use_hyphen: bool,
use_underscore: bool,
}
impl DelimiterPreference {
fn infer(original: &str, chosen: &str) -> Self {
if original == chosen {
Self {
use_space: true,
use_hyphen: true,
use_underscore: true,
}
} else {
let parts_space: Vec<&str> = original.split_whitespace().collect();
let parts_hyphen: Vec<&str> = original.split('-').collect();
let parts_underscore: Vec<&str> = original.split('_').collect();
Self {
use_space: parts_space.iter().any(|p| p == &chosen),
use_hyphen: parts_hyphen.iter().any(|p| p.trim() == chosen),
use_underscore: parts_underscore.iter().any(|p| p.trim() == chosen),
}
}
}
fn apply(&self, name: &str) -> Vec<String> {
let mut parts: Vec<String> = Vec::new();
let full = name.trim().to_string();
if !full.is_empty() {
parts.push(full.clone());
}
if self.use_space {
for p in name.split_whitespace() {
let s = p.trim();
if !s.is_empty() && !parts.contains(&s.to_string()) {
parts.push(s.to_string());
}
}
}
if self.use_underscore {
for p in name.split('_') {
let s = p.trim();
if !s.is_empty() && !parts.contains(&s.to_string()) {
parts.push(s.to_string());
}
}
}
if self.use_hyphen {
for p in name.split('-') {
let s = p.trim();
if !s.is_empty() && !parts.contains(&s.to_string()) {
parts.push(s.to_string());
}
}
}
parts
}
}
fn print_detailed_help() {
println!("TRU-OLS CLI - Detailed Arguments Reference");
println!("==========================================\n");
println!("REQUIRED ARGUMENTS");
println!("------------------\n");
println!("The CLI requires at least ONE of the following mixing matrix sources:\n");
println!(" 1. --mixing-matrix (CSV file) - OR");
println!(" 2. --use-spill (extract from FCS file SPILL keyword) - OR");
println!(" 3. --single-stain-controls (directory with single-stain control files)\n");
println!("Always Required:");
println!(" -s, --stained <PATH>");
println!(" Path to stained sample FCS file or directory containing stained FCS files");
println!(" If a directory is provided, all FCS files in it will be processed\n");
println!(" -u, --unstained <PATH>");
println!(" Path to unstained control FCS file");
println!(
" Optional if using --controls (auto-detected from filename containing 'unstained')\n"
);
println!(" -e, --endmembers <NAMES>");
println!(" Comma-separated endmember names (e.g., \"AF488,PE,APC,Autofluorescence\")");
println!(
" Optional when using --single-stain-controls or --controls (auto-detected from filenames)\n"
);
println!("Conditionally Required:");
println!(" -d, --detectors <NAMES>");
println!(" Required if NOT using --use-spill or --single-stain-controls");
println!(" • When using --use-spill: Detector names are extracted from SPILL keyword");
println!(
" • When using --single-stain-controls: Auto-detected from FCS parameters (optional)"
);
println!(" • When using --mixing-matrix: Detector names must be provided\n");
println!("OPTIONAL ARGUMENTS AND DEFAULTS");
println!("--------------------------------\n");
println!("Mixing Matrix Options:");
println!(" -m, --mixing-matrix <PATH>");
println!(
" Path to CSV mixing matrix file (optional if using --use-spill, --single-stain-controls, or --controls)\n"
);
println!(" --use-spill");
println!(" Use SPILL/SPILLOVER keyword from FCS file");
println!(" Default: false\n");
println!(" -c, --controls <PATH>");
println!(" Directory containing all control files (single-stain controls + unstained)");
println!(" Unstained control is auto-detected from filename containing 'unstained'");
println!(" Single-stain controls are all other FCS files in the directory\n");
println!(" --single-stain-controls <PATH>");
println!(" Directory containing single-stain control FCS files only");
println!(" Optional if using --controls (auto-detected)\n");
println!("Basic Unmixing Parameters:");
println!(" -a, --autofluorescence <NAME>");
println!(" Autofluorescence endmember name");
println!(" Default: \"Autofluorescence\"\n");
println!(" -p, --cutoff-percentile <VALUE>");
println!(" Cutoff percentile");
println!(" Default: 0.995\n");
println!(" --strategy <STRATEGY>");
println!(" Unmixing strategy: \"zero\" or \"ucm\"");
println!(" Default: \"ucm\"\n");
println!(" -o, --output <PATH>");
println!(" Output FCS file path (optional, no output file created if omitted)\n");
println!("Plotting Options:");
println!(" --plot");
println!(" Generate comparison plots");
println!(" Default: true\n");
println!(" --plot-format <FORMAT>");
println!(" Plot format: png, svg, or pdf");
println!(" Default: \"png\"\n");
println!(" --plot-output-dir <PATH>");
println!(" Directory for plot outputs (optional, defaults to current directory)\n");
println!(" --compare-ols");
println!(" Also run standard OLS and compare");
println!(" Default: true\n");
println!(" --plot-both");
println!(" Generate plots for both OLS and TRU-OLS");
println!(" Default: false\n");
println!("Peak Detection Options (for single-stain controls):");
println!(" --peak-detection");
println!(" Enable peak-based median selection");
println!(" Default: false\n");
println!(" --peak-threshold <VALUE>");
println!(" Peak detection threshold (fraction of max density)");
println!(" Lower values detect more peaks, higher values detect only strong peaks");
println!(" Default: 0.3\n");
println!(" --peak-bias <VALUE>");
println!(" Peak bias fraction for positive peaks (0.5 = upper 50%%)");
println!(" Default: 0.5\n");
println!(" --peak-bias-negative <VALUE>");
println!(" Peak bias fraction for negative peaks (0.5 = lower 50%%)");
println!(" Default: 0.5\n");
println!("Negative Event Options:");
println!(" --use-negative-events");
println!(" Use negative events from single-stain controls for autofluorescence");
println!(" Default: false\n");
println!(" --min-negative-events <COUNT>");
println!(" Minimum number of negative events required");
println!(" Default: 100\n");
println!(" --autofluorescence-mode <MODE>");
println!(" Autofluorescence mode: \"universal\", \"negative-events\", or \"hybrid\"");
println!(" Default: \"universal\"\n");
println!(" --af-weight <VALUE>");
println!(" Autofluorescence weight for hybrid mode (0.0-1.0)");
println!(" Weight of unstained control vs negative events");
println!(" Default: 0.7\n");
println!("Automated Gating Options:");
println!(" --auto-gate");
println!(" Enable automated scatter and doublet gating before processing");
println!(" Default: false\n");
println!("USAGE EXAMPLES");
println!("--------------\n");
println!("Using SPILL Matrix (No Detector List Required):");
println!(" tru-ols unmix \\");
println!(" --stained stained.fcs \\");
println!(" --unstained unstained.fcs \\");
println!(" --use-spill \\");
println!(" --endmembers AF488,PE,APC,Autofluorescence \\");
println!(" --output unmixed.fcs\n");
println!("Using --controls (Simplest - All Auto-Detection):");
println!(" tru-ols unmix \\");
println!(" --stained stained.fcs \\");
println!(" --controls ./controls/ \\");
println!(" --output unmixed.fcs\n");
println!(" # Unstained control, detectors, and endmembers are all auto-detected\n");
println!("Batch Processing (Directory of Stained Files):");
println!(" tru-ols unmix \\");
println!(" --stained ./samples/ \\");
println!(" --controls ./controls/ \\");
println!(" --output ./unmixed/\n");
println!(
" # Processes all FCS files in ./samples/, outputs to ./unmixed/ with _unmixed suffix\n"
);
println!("Using Single-Stain Controls (Auto-Detection Available):");
println!(" tru-ols unmix \\");
println!(" --stained stained.fcs \\");
println!(" --unstained unstained.fcs \\");
println!(" --single-stain-controls ./controls/ \\");
println!(" --output unmixed.fcs\n");
println!(" # Detectors and endmembers are auto-detected from FCS files and filenames");
println!(" # You can still provide them explicitly if needed:\n");
println!(" tru-ols unmix \\");
println!(" --stained stained.fcs \\");
println!(" --unstained unstained.fcs \\");
println!(" --single-stain-controls ./controls/ \\");
println!(" --detectors FL1-A,FL2-A,FL3-A,FL4-A \\");
println!(" --endmembers AF488,PE,APC,Autofluorescence \\");
println!(" --output unmixed.fcs\n");
println!("Using CSV Mixing Matrix (Detector List Required):");
println!(" tru-ols unmix \\");
println!(" --stained stained.fcs \\");
println!(" --unstained unstained.fcs \\");
println!(" --mixing-matrix matrix.csv \\");
println!(" --detectors FL1-A,FL2-A,FL3-A,FL4-A \\");
println!(" --endmembers AF488,PE,APC,Autofluorescence \\");
println!(" --output unmixed.fcs\n");
println!("QUICK REFERENCE");
println!("---------------\n");
println!("Can you run without detector list?");
println!(" ✅ YES - If using --use-spill (detectors extracted from SPILL keyword)");
println!(" ✅ YES - If using --single-stain-controls or --controls (detectors auto-detected)");
println!(" ❌ NO - If using --mixing-matrix (detectors must be provided)\n");
println!("Can you run without endmembers?");
println!(
" ✅ YES - If using --single-stain-controls or --controls (endmembers auto-detected)"
);
println!(" ❌ NO - If using --use-spill or --mixing-matrix (endmembers must be provided)\n");
println!("Can you run without unstained control?");
println!(
" ✅ YES - If using --controls (unstained auto-detected from filename containing 'unstained')"
);
println!(" ❌ NO - Otherwise (must provide --unstained)\n");
println!("For more information, see: CLI_ARGUMENTS_REFERENCE.md\n");
}
#[derive(Subcommand, Debug)]
pub enum Command {
Args,
Unmix {
#[arg(short, long)]
stained: PathBuf,
#[arg(short, long)]
unstained: Option<PathBuf>,
#[arg(short = 'c', long)]
controls: Option<PathBuf>,
#[arg(short = 'm', long)]
mixing_matrix: Option<PathBuf>,
#[arg(long)]
use_spill: bool,
#[arg(long)]
single_stain_controls: Option<PathBuf>,
#[arg(short, long, value_delimiter = ',')]
detectors: Vec<String>,
#[arg(short, long, value_delimiter = ',')]
endmembers: Vec<String>,
#[arg(short = 'a', long, default_value = "Autofluorescence")]
autofluorescence: String,
#[arg(short = 'p', long, default_value = "0.995")]
cutoff_percentile: f64,
#[arg(long, default_value = "ucm")]
strategy: String,
#[arg(short, long)]
output: Option<PathBuf>,
#[arg(long)]
plot: bool,
#[arg(long, default_value = "png")]
plot_format: String,
#[arg(long)]
plot_output_dir: Option<PathBuf>,
#[arg(long)]
compare_ols: bool,
#[arg(long)]
plot_both: bool,
#[arg(long)]
export_mixing_matrix: Option<PathBuf>,
#[arg(long, default_value_t = true)]
peak_detection: bool,
#[arg(long, default_value = "0.3")]
peak_threshold: f64,
#[arg(long, default_value = "0.5")]
peak_bias: f64,
#[arg(long, default_value = "0.5")]
peak_bias_negative: f64,
#[arg(long, default_value = "100")]
min_negative_events: usize,
#[arg(long)]
use_negative_events: bool,
#[arg(long, default_value = "universal")]
autofluorescence_mode: String,
#[arg(long, default_value = "0.7")]
af_weight: f64,
#[arg(long, default_value_t = true)]
auto_gate: bool,
},
}
pub fn run_command(command: &Command) -> Result<()> {
match command {
Command::Args => {
print_detailed_help();
Ok(())
}
Command::Unmix {
stained,
unstained,
controls,
mixing_matrix,
use_spill,
single_stain_controls,
detectors,
endmembers,
autofluorescence,
cutoff_percentile,
strategy,
output,
plot,
plot_format,
plot_output_dir,
compare_ols,
plot_both,
peak_detection,
peak_threshold,
peak_bias,
peak_bias_negative,
use_negative_events,
autofluorescence_mode,
af_weight,
min_negative_events,
auto_gate,
export_mixing_matrix,
} => run_unmix_command(
stained,
unstained.as_ref(),
controls.as_ref(),
mixing_matrix.as_ref(),
*use_spill,
single_stain_controls.as_ref(),
detectors,
endmembers,
autofluorescence,
*cutoff_percentile,
strategy,
output.as_ref(),
*plot,
plot_format,
plot_output_dir.as_ref(),
*compare_ols,
*plot_both,
*peak_detection,
*peak_threshold,
*peak_bias,
*peak_bias_negative,
*use_negative_events,
autofluorescence_mode,
*af_weight,
*min_negative_events,
*auto_gate,
export_mixing_matrix.as_ref(),
),
}
}
fn run_unmix_command(
stained_path: &PathBuf,
unstained_path: Option<&PathBuf>,
controls_dir: Option<&PathBuf>,
mixing_matrix_path: Option<&PathBuf>,
use_spill: bool,
single_stain_controls_dir: Option<&PathBuf>,
detectors: &[String],
endmembers: &[String],
autofluorescence: &str,
cutoff_percentile: f64,
strategy_str: &str,
output: Option<&PathBuf>,
plot: bool,
plot_format: &str,
plot_output_dir: Option<&PathBuf>,
compare_ols: bool,
plot_both: bool,
peak_detection: bool,
peak_threshold: f64,
peak_bias: f64,
peak_bias_negative: f64,
use_negative_events: bool,
autofluorescence_mode: &str,
af_weight: f64,
min_negative_events: usize,
auto_gate: bool,
export_mixing_matrix: Option<&PathBuf>,
) -> Result<()> {
if stained_path.is_dir() {
info!(
"Processing directory of stained FCS files: {}",
stained_path.display()
);
process_directory_of_stained_files(
stained_path,
unstained_path,
controls_dir,
mixing_matrix_path,
use_spill,
single_stain_controls_dir,
detectors,
endmembers,
autofluorescence,
cutoff_percentile,
strategy_str,
output,
plot,
plot_format,
plot_output_dir,
compare_ols,
plot_both,
peak_detection,
peak_threshold,
peak_bias,
peak_bias_negative,
use_negative_events,
autofluorescence_mode,
af_weight,
min_negative_events,
auto_gate,
export_mixing_matrix,
)
} else {
process_single_stained_file(
stained_path,
unstained_path,
controls_dir,
mixing_matrix_path,
use_spill,
single_stain_controls_dir,
detectors,
endmembers,
autofluorescence,
cutoff_percentile,
strategy_str,
output,
plot,
plot_format,
plot_output_dir,
compare_ols,
plot_both,
peak_detection,
peak_threshold,
peak_bias,
peak_bias_negative,
use_negative_events,
autofluorescence_mode,
af_weight,
min_negative_events,
auto_gate,
export_mixing_matrix,
)
}
}
fn process_single_stained_file(
stained_path: &PathBuf,
unstained_path: Option<&PathBuf>,
controls_dir: Option<&PathBuf>,
mixing_matrix_path: Option<&PathBuf>,
use_spill: bool,
single_stain_controls_dir: Option<&PathBuf>,
detectors: &[String],
endmembers: &[String],
autofluorescence: &str,
cutoff_percentile: f64,
strategy_str: &str,
output: Option<&PathBuf>,
plot: bool,
plot_format: &str,
plot_output_dir: Option<&PathBuf>,
compare_ols: bool,
plot_both: bool,
peak_detection: bool,
peak_threshold: f64,
peak_bias: f64,
peak_bias_negative: f64,
use_negative_events: bool,
autofluorescence_mode: &str,
af_weight: f64,
min_negative_events: usize,
auto_gate: bool,
export_mixing_matrix: Option<&PathBuf>,
) -> Result<()> {
info!("Loading FCS files...");
let stained_fcs = Fcs::open(stained_path.to_str().context("Invalid stained file path")?)?;
let unstained_path_final = if let Some(path) = unstained_path {
Some(path.clone())
} else if let Some(controls_dir) = controls_dir {
info!("Auto-detecting unstained control from --controls directory...");
let detected = find_unstained_control(controls_dir)?;
info!("Auto-detected unstained control: {}", detected.display());
Some(detected)
} else {
return Err(anyhow::anyhow!(
"Unstained control must be provided via --unstained or auto-detected via --controls"
));
};
let unstained_fcs = Fcs::open(
unstained_path_final
.as_ref()
.unwrap()
.to_str()
.context("Invalid unstained file path")?,
)?;
let single_stain_controls_dir_final = if let Some(dir) = single_stain_controls_dir {
Some(dir.clone())
} else if let Some(controls_dir) = controls_dir {
Some(controls_dir.clone())
} else {
None
};
let (final_detectors, mut final_endmembers) = if let Some(controls_dir) =
&single_stain_controls_dir_final
{
if detectors.is_empty() || endmembers.is_empty() {
info!("Auto-detecting detectors and endmembers from single-stain controls...");
let (auto_detectors, auto_endmembers) =
auto_detect_from_single_stains(controls_dir, &stained_fcs)?;
let final_detectors = if detectors.is_empty() {
info!("Auto-detected detectors: {}", auto_detectors.join(", "));
auto_detectors
} else {
detectors.to_vec()
};
let mut final_endmembers = if endmembers.is_empty() {
info!("Auto-detected endmembers: {}", auto_endmembers.join(", "));
auto_endmembers
} else {
endmembers.to_vec()
};
if !final_endmembers.contains(&autofluorescence.to_string()) {
info!(
"Adding autofluorescence endmember '{}' to endmembers list",
autofluorescence
);
final_endmembers.push(autofluorescence.to_string());
}
(final_detectors, final_endmembers)
} else {
let mut final_endmembers = endmembers.to_vec();
if !final_endmembers.contains(&autofluorescence.to_string()) {
info!(
"Adding autofluorescence endmember '{}' to endmembers list",
autofluorescence
);
final_endmembers.push(autofluorescence.to_string());
}
(detectors.to_vec(), final_endmembers)
}
} else {
let mut final_endmembers = endmembers.to_vec();
if !final_endmembers.is_empty() && !final_endmembers.contains(&autofluorescence.to_string())
{
info!(
"Adding autofluorescence endmember '{}' to endmembers list",
autofluorescence
);
final_endmembers.push(autofluorescence.to_string());
}
(detectors.to_vec(), final_endmembers)
};
let (mixing_matrix, detector_names_from_matrix, mut primary_detector_info) = if use_spill {
info!("Extracting mixing matrix from SPILL keyword...");
let (matrix, detectors) =
extract_mixing_matrix_from_spill(&stained_fcs, &final_endmembers)?;
let mut info = Vec::new();
for endmember in &final_endmembers {
info.push(PrimaryDetectorInfo {
endmember_name: endmember.clone(),
is_autofluorescence: endmember == autofluorescence,
primary_detector_name: None,
primary_detector_pn_name: None,
primary_detector_pn_label: None,
selected_marker_name: None,
selected_fluor_name: None,
});
}
(matrix, detectors, info)
} else if let Some(controls_dir) = &single_stain_controls_dir_final {
info!("Creating mixing matrix from single-stain controls...");
let single_stain_config = SingleStainConfig {
peak_detection,
peak_threshold,
peak_bias,
peak_bias_negative,
use_negative_events,
autofluorescence_mode: autofluorescence_mode.to_string(),
af_weight,
min_negative_events,
};
create_mixing_matrix_from_single_stains(
controls_dir,
&unstained_fcs,
&final_detectors,
&final_endmembers,
&autofluorescence,
&single_stain_config,
auto_gate,
None, )?
} else if let Some(matrix_path) = mixing_matrix_path {
info!("Loading mixing matrix from CSV file...");
let matrix = load_mixing_matrix(matrix_path)?;
let mut info = Vec::new();
for endmember in &final_endmembers {
info.push(PrimaryDetectorInfo {
endmember_name: endmember.clone(),
is_autofluorescence: endmember == autofluorescence,
primary_detector_name: None,
primary_detector_pn_name: None,
primary_detector_pn_label: None,
selected_marker_name: None,
selected_fluor_name: None,
});
}
(matrix, final_detectors.clone(), info)
} else {
return Err(anyhow::anyhow!(
"Must provide one of: --mixing-matrix, --use-spill, or --single-stain-controls"
));
};
let detector_names: Vec<String> = if !detector_names_from_matrix.is_empty() {
detector_names_from_matrix
} else if !final_detectors.is_empty() {
final_detectors.clone()
} else {
return Err(anyhow::anyhow!(
"Detector names must be provided or extracted from SPILL keyword"
));
};
let endmember_names = final_endmembers;
if mixing_matrix.nrows() != detector_names.len() {
return Err(anyhow::anyhow!(
"Mixing matrix rows ({}) don't match number of detectors ({})",
mixing_matrix.nrows(),
detector_names.len()
));
}
if mixing_matrix.ncols() != endmember_names.len() {
return Err(anyhow::anyhow!(
"Mixing matrix columns ({}) don't match number of endmembers ({})",
mixing_matrix.ncols(),
endmember_names.len()
));
}
if let Some(export_path) = export_mixing_matrix {
export_mixing_matrix_to_csv(
&mixing_matrix,
export_path,
&detector_names,
&endmember_names,
)?;
info!("Exported mixing matrix to: {}", export_path.display());
}
let strategy = match strategy_str.to_lowercase().as_str() {
"zero" => Some(UnmixingStrategy::Zero),
"ucm" => Some(UnmixingStrategy::UnstainedControlMapping),
_ => {
warn!("Unknown strategy '{}', using default (zero)", strategy_str);
Some(UnmixingStrategy::Zero)
}
};
let stained_param_names = stained_fcs.get_parameter_names_from_dataframe();
let stained_param_set: std::collections::HashSet<&str> =
stained_param_names.iter().map(|s| s.as_str()).collect();
let mut filtered_matrix_rows = Vec::new();
let mut filtered_detector_names = Vec::new();
let mut filtered_primary_pn_names = Vec::new();
let mut filtered_primary_pn_labels = Vec::new();
let mut filtered_primary_detector_names = Vec::new();
for (det_idx, det_name) in detector_names.iter().enumerate() {
if stained_param_set.contains(det_name.as_str()) {
filtered_matrix_rows.push(det_idx);
filtered_detector_names.push(det_name.clone());
}
}
if filtered_detector_names.is_empty() {
return Err(anyhow::anyhow!(
"No detectors from mixing matrix found in stained file. Stained file parameters: {:?}, Expected detectors: {:?}",
stained_param_names,
detector_names
));
}
if filtered_detector_names.len() < endmember_names.len() {
return Err(anyhow::anyhow!(
"Stained file has {} detectors but requires at least {} for unmixing {} endmembers (underdetermined system). \
Consider using a stained file with more detector channels or reducing the number of endmembers.",
filtered_detector_names.len(),
endmember_names.len(),
endmember_names.len()
));
}
use ndarray::Array2;
let n_filtered = filtered_matrix_rows.len();
let mut filtered_mixing_matrix = Array2::<f64>::zeros((n_filtered, mixing_matrix.ncols()));
for (new_idx, &orig_idx) in filtered_matrix_rows.iter().enumerate() {
let src_row = mixing_matrix.row(orig_idx);
filtered_mixing_matrix.row_mut(new_idx).assign(&src_row);
}
info!(
"Filtered mixing matrix: {} detectors (from {}) × {} endmembers",
n_filtered,
detector_names.len(),
endmember_names.len()
);
let mut filtered_selected_marker_names = Vec::new();
let mut filtered_selected_fluor_names = Vec::new();
for pn_name in &primary_detector_info {
filtered_primary_pn_names.push(pn_name.primary_detector_pn_name.clone());
filtered_primary_pn_labels.push(pn_name.primary_detector_pn_label.clone());
filtered_primary_detector_names.push(pn_name.primary_detector_name.clone());
filtered_selected_marker_names.push(pn_name.selected_marker_name.clone());
filtered_selected_fluor_names.push(pn_name.selected_fluor_name.clone());
}
let detector_names_slices: Vec<&str> =
filtered_detector_names.iter().map(|s| s.as_str()).collect();
let endmember_names: Vec<&str> = endmember_names.iter().map(|s| s.as_str()).collect();
info!("Running TRU-OLS unmixing...");
let mixing_mat = Mat::from_fn(
filtered_mixing_matrix.nrows(),
filtered_mixing_matrix.ncols(),
|i, j| filtered_mixing_matrix[(i, j)],
);
let unmixed_fcs = stained_fcs.apply_tru_ols_unmixing(
&unstained_fcs,
mixing_mat,
&detector_names_slices,
&endmember_names,
autofluorescence,
strategy,
&filtered_primary_detector_names,
&filtered_primary_pn_names,
&filtered_primary_pn_labels,
&filtered_selected_marker_names,
&filtered_selected_fluor_names,
)?;
info!("TRU-OLS unmixing complete!");
if let Some(output_path) = output {
info!("Writing unmixed FCS file to: {}", output_path.display());
use flow_fcs::write_fcs_file;
write_fcs_file(unmixed_fcs.clone(), output_path)?;
info!("Successfully wrote unmixed FCS file");
}
if plot || plot_both {
let plot_dir = plot_output_dir
.map(|p| p.clone())
.unwrap_or_else(|| PathBuf::from("."));
std::fs::create_dir_all(&plot_dir)?;
if plot_both && compare_ols {
info!("Generating comparison plots...");
generate_ols_comparison_plots(
&stained_fcs,
&unstained_fcs,
&mixing_matrix,
&detector_names_slices,
&endmember_names,
&unmixed_fcs.data_frame,
&plot_dir,
plot_format,
)?;
} else if plot {
info!("Generating TRU-OLS plots...");
generate_tru_ols_plots(
&unmixed_fcs.data_frame,
&endmember_names,
&plot_dir,
plot_format,
)?;
}
}
Ok(())
}
fn process_directory_of_stained_files(
stained_dir: &PathBuf,
unstained_path: Option<&PathBuf>,
controls_dir: Option<&PathBuf>,
mixing_matrix_path: Option<&PathBuf>,
use_spill: bool,
single_stain_controls_dir: Option<&PathBuf>,
detectors: &[String],
endmembers: &[String],
autofluorescence: &str,
cutoff_percentile: f64,
strategy_str: &str,
output: Option<&PathBuf>,
plot: bool,
plot_format: &str,
plot_output_dir: Option<&PathBuf>,
compare_ols: bool,
plot_both: bool,
peak_detection: bool,
peak_threshold: f64,
peak_bias: f64,
peak_bias_negative: f64,
use_negative_events: bool,
autofluorescence_mode: &str,
af_weight: f64,
min_negative_events: usize,
auto_gate: bool,
export_mixing_matrix: Option<&PathBuf>,
) -> Result<()> {
use std::fs;
let entries = fs::read_dir(stained_dir)
.with_context(|| format!("Failed to read directory: {}", stained_dir.display()))?;
let mut stained_files: Vec<PathBuf> = Vec::new();
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("fcs") {
stained_files.push(path);
}
}
if stained_files.is_empty() {
return Err(anyhow::anyhow!(
"No FCS files found in directory: {}",
stained_dir.display()
));
}
stained_files.sort();
info!("Found {} FCS files to process", stained_files.len());
info!("Preparing mixing matrix and configuration (this will be reused for all files)...");
let output_dir = if let Some(output_path) = output {
if output_path.is_dir() {
Some(output_path.clone())
} else {
output_path.parent().map(|p| p.to_path_buf())
}
} else {
Some(stained_dir.clone())
};
if let Some(ref out_dir) = output_dir {
std::fs::create_dir_all(out_dir)?;
}
let first_stained_fcs = Fcs::open(
stained_files[0]
.to_str()
.context("Invalid stained file path")?,
)?;
let (mixing_matrix, detector_names, endmember_names, unstained_fcs, primary_detector_info) =
prepare_mixing_matrix_for_batch(
&first_stained_fcs,
unstained_path,
controls_dir,
mixing_matrix_path,
use_spill,
single_stain_controls_dir,
detectors,
endmembers,
autofluorescence,
peak_detection,
peak_threshold,
peak_bias,
peak_bias_negative,
use_negative_events,
autofluorescence_mode,
af_weight,
min_negative_events,
auto_gate,
export_mixing_matrix,
)?;
let strategy = match strategy_str {
"ucm" => UnmixingStrategy::UnstainedControlMapping,
_ => UnmixingStrategy::Zero,
};
let mut success_count = 0;
let mut error_count = 0;
for (idx, stained_file) in stained_files.iter().enumerate() {
info!(
"\n[{}/{}] Processing: {}",
idx + 1,
stained_files.len(),
stained_file.display()
);
let output_file = if let Some(ref out_dir) = output_dir {
let stem = stained_file
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("unmixed");
let mut output_path = out_dir.clone();
output_path.push(format!("{}_unmixed.fcs", stem));
Some(output_path)
} else {
None
};
let file_plot_dir: Option<PathBuf> = if plot || plot_both {
plot_output_dir.cloned().or_else(|| {
output_dir
.as_ref()
.map(|out_dir| {
let mut plot_path = out_dir.clone();
plot_path.push("plots");
plot_path
})
.or(Some(PathBuf::from("plots")))
})
} else {
None
};
match process_stained_file_with_matrix(
stained_file,
&mixing_matrix,
&detector_names,
&endmember_names,
&unstained_fcs,
&primary_detector_info,
autofluorescence,
cutoff_percentile,
&strategy,
output_file.as_ref(),
plot,
plot_format,
file_plot_dir.as_ref(),
compare_ols,
plot_both,
) {
Ok(()) => {
success_count += 1;
info!("✓ Successfully processed: {}", stained_file.display());
}
Err(e) => {
error_count += 1;
warn!("✗ Failed to process {}: {}", stained_file.display(), e);
}
}
}
info!("\n=== Batch Processing Complete ===");
info!("Successfully processed: {} files", success_count);
if error_count > 0 {
warn!("Failed to process: {} files", error_count);
}
if error_count > 0 {
Err(anyhow::anyhow!(
"Batch processing completed with {} errors out of {} files",
error_count,
stained_files.len()
))
} else {
Ok(())
}
}
fn prepare_mixing_matrix_for_batch(
sample_fcs: &Fcs,
unstained_path: Option<&PathBuf>,
controls_dir: Option<&PathBuf>,
mixing_matrix_path: Option<&PathBuf>,
use_spill: bool,
single_stain_controls_dir: Option<&PathBuf>,
detectors: &[String],
endmembers: &[String],
autofluorescence: &str,
peak_detection: bool,
peak_threshold: f64,
peak_bias: f64,
peak_bias_negative: f64,
use_negative_events: bool,
autofluorescence_mode: &str,
af_weight: f64,
min_negative_events: usize,
auto_gate: bool,
export_mixing_matrix: Option<&PathBuf>,
) -> Result<(
Array2<f64>,
Vec<String>,
Vec<String>,
Fcs,
Vec<PrimaryDetectorInfo>,
)> {
let unstained_path_final = if let Some(path) = unstained_path {
Some(path.clone())
} else if let Some(controls_dir) = controls_dir {
info!("Auto-detecting unstained control from --controls directory...");
let detected = find_unstained_control(controls_dir)?;
info!("Auto-detected unstained control: {}", detected.display());
Some(detected)
} else {
return Err(anyhow::anyhow!(
"Unstained control must be provided via --unstained or auto-detected via --controls"
));
};
let unstained_fcs = Fcs::open(
unstained_path_final
.as_ref()
.unwrap()
.to_str()
.context("Invalid unstained file path")?,
)?;
let single_stain_controls_dir_final = if let Some(dir) = single_stain_controls_dir {
Some(dir.clone())
} else if let Some(controls_dir) = controls_dir {
Some(controls_dir.clone())
} else {
None
};
let (final_detectors, mut final_endmembers) = if let Some(controls_dir) =
&single_stain_controls_dir_final
{
if detectors.is_empty() || endmembers.is_empty() {
info!("Auto-detecting detectors and endmembers from single-stain controls...");
let (auto_detectors, auto_endmembers) =
auto_detect_from_single_stains(controls_dir, sample_fcs)?;
let final_detectors = if detectors.is_empty() {
info!(
"Auto-detected {} detectors and {} endmembers from single-stain controls",
auto_detectors.len(),
auto_endmembers.len()
);
info!("Auto-detected detectors: {}", auto_detectors.join(", "));
auto_detectors
} else {
detectors.to_vec()
};
let mut final_endmembers = if endmembers.is_empty() {
info!("Auto-detected endmembers: {}", auto_endmembers.join(", "));
auto_endmembers
} else {
endmembers.to_vec()
};
if !final_endmembers.contains(&autofluorescence.to_string()) {
info!(
"Adding autofluorescence endmember '{}' to endmembers list",
autofluorescence
);
final_endmembers.push(autofluorescence.to_string());
}
(final_detectors, final_endmembers)
} else {
let mut final_endmembers = endmembers.to_vec();
if !final_endmembers.contains(&autofluorescence.to_string()) {
info!(
"Adding autofluorescence endmember '{}' to endmembers list",
autofluorescence
);
final_endmembers.push(autofluorescence.to_string());
}
(detectors.to_vec(), final_endmembers)
}
} else {
let mut final_endmembers = endmembers.to_vec();
if !final_endmembers.is_empty() && !final_endmembers.contains(&autofluorescence.to_string())
{
info!(
"Adding autofluorescence endmember '{}' to endmembers list",
autofluorescence
);
final_endmembers.push(autofluorescence.to_string());
}
(detectors.to_vec(), final_endmembers)
};
let (mixing_matrix, detector_names_from_matrix, primary_detector_info) = if use_spill {
info!("Extracting mixing matrix from SPILL keyword...");
let (matrix, detectors) = extract_mixing_matrix_from_spill(sample_fcs, &final_endmembers)?;
let mut info = Vec::new();
for endmember in &final_endmembers {
info.push(PrimaryDetectorInfo {
endmember_name: endmember.clone(),
is_autofluorescence: endmember == autofluorescence,
primary_detector_name: None,
primary_detector_pn_name: None,
primary_detector_pn_label: None,
selected_marker_name: None,
selected_fluor_name: None,
});
}
(matrix, detectors, info)
} else if let Some(controls_dir) = &single_stain_controls_dir_final {
info!("Creating mixing matrix from single-stain controls...");
let single_stain_config = SingleStainConfig {
peak_detection,
peak_threshold,
peak_bias,
peak_bias_negative,
use_negative_events,
autofluorescence_mode: autofluorescence_mode.to_string(),
af_weight,
min_negative_events,
};
create_mixing_matrix_from_single_stains(
controls_dir,
&unstained_fcs,
&final_detectors,
&final_endmembers,
&autofluorescence,
&single_stain_config,
auto_gate,
None, )?
} else if let Some(matrix_path) = mixing_matrix_path {
info!("Loading mixing matrix from CSV file...");
let matrix = load_mixing_matrix(matrix_path)?;
let mut info = Vec::new();
for endmember in &final_endmembers {
info.push(PrimaryDetectorInfo {
endmember_name: endmember.clone(),
is_autofluorescence: endmember == autofluorescence,
primary_detector_name: None,
primary_detector_pn_name: None,
primary_detector_pn_label: None,
selected_marker_name: None,
selected_fluor_name: None,
});
}
(matrix, final_detectors.clone(), info)
} else {
return Err(anyhow::anyhow!(
"Must provide --mixing-matrix, --use-spill, or --single-stain-controls/--controls"
));
};
let final_detector_names: Vec<String> = if !detector_names_from_matrix.is_empty() {
detector_names_from_matrix
} else if !final_detectors.is_empty() {
final_detectors.clone()
} else {
return Err(anyhow::anyhow!(
"Detector names must be provided or extracted from SPILL keyword"
));
};
let n_detectors_in_matrix = mixing_matrix.nrows();
if n_detectors_in_matrix != final_detector_names.len() {
return Err(anyhow::anyhow!(
"Mixing matrix rows ({}) don't match number of detectors ({})",
n_detectors_in_matrix,
final_detector_names.len()
));
}
if mixing_matrix.ncols() != final_endmembers.len() {
return Err(anyhow::anyhow!(
"Mixing matrix columns ({}) don't match number of endmembers ({})",
mixing_matrix.ncols(),
final_endmembers.len()
));
}
info!(
"Prepared mixing matrix: {} detectors × {} endmembers",
final_detector_names.len(),
final_endmembers.len()
);
if let Some(export_path) = export_mixing_matrix {
export_mixing_matrix_to_csv(
&mixing_matrix,
export_path,
&final_detector_names,
&final_endmembers,
)?;
info!("Exported mixing matrix to: {}", export_path.display());
}
Ok((
mixing_matrix,
final_detector_names,
final_endmembers,
unstained_fcs,
primary_detector_info,
))
}
fn process_stained_file_with_matrix(
stained_path: &PathBuf,
mixing_matrix: &Array2<f64>,
detector_names: &[String],
endmember_names: &[String],
unstained_fcs: &Fcs,
primary_detector_info: &[PrimaryDetectorInfo],
autofluorescence: &str,
cutoff_percentile: f64,
strategy: &UnmixingStrategy,
output: Option<&PathBuf>,
plot: bool,
plot_format: &str,
plot_output_dir: Option<&PathBuf>,
compare_ols: bool,
plot_both: bool,
) -> Result<()> {
info!("Loading stained FCS file...");
let stained_fcs = Fcs::open(stained_path.to_str().context("Invalid stained file path")?)?;
let stained_param_names = stained_fcs.get_parameter_names_from_dataframe();
let stained_param_set: std::collections::HashSet<&str> =
stained_param_names.iter().map(|s| s.as_str()).collect();
let mut filtered_matrix_rows = Vec::new();
let mut filtered_detector_names = Vec::new();
for (det_idx, det_name) in detector_names.iter().enumerate() {
if stained_param_set.contains(det_name.as_str()) {
filtered_matrix_rows.push(det_idx);
filtered_detector_names.push(det_name.clone());
}
}
if filtered_detector_names.is_empty() {
return Err(anyhow::anyhow!(
"No detectors from mixing matrix found in stained file. Stained file parameters: {:?}, Expected detectors: {:?}",
stained_param_names,
detector_names
));
}
if filtered_detector_names.len() < endmember_names.len() {
return Err(anyhow::anyhow!(
"Stained file has {} detectors but requires at least {} for unmixing {} endmembers (underdetermined system). \
Consider using a stained file with more detector channels or reducing the number of endmembers.",
filtered_detector_names.len(),
endmember_names.len(),
endmember_names.len()
));
}
use ndarray::Array2;
let n_filtered = filtered_matrix_rows.len();
let mut filtered_mixing_matrix = Array2::<f64>::zeros((n_filtered, mixing_matrix.ncols()));
for (new_idx, &orig_idx) in filtered_matrix_rows.iter().enumerate() {
let src_row = mixing_matrix.row(orig_idx);
filtered_mixing_matrix.row_mut(new_idx).assign(&src_row);
}
info!(
"Filtered mixing matrix: {} detectors (from {}) × {} endmembers",
n_filtered,
detector_names.len(),
endmember_names.len()
);
info!("Running TRU-OLS unmixing...");
let detector_names_str: Vec<&str> =
filtered_detector_names.iter().map(|s| s.as_str()).collect();
let endmember_names_str: Vec<&str> = endmember_names.iter().map(|s| s.as_str()).collect();
let primary_detector_names: Vec<Option<String>> = primary_detector_info
.iter()
.map(|p| p.primary_detector_name.clone())
.collect();
let primary_pn_names: Vec<Option<String>> = primary_detector_info
.iter()
.map(|p| p.primary_detector_pn_name.clone())
.collect();
let primary_pn_labels: Vec<Option<String>> = primary_detector_info
.iter()
.map(|p| p.primary_detector_pn_label.clone())
.collect();
let selected_marker_names: Vec<Option<String>> = primary_detector_info
.iter()
.map(|p| p.selected_marker_name.clone())
.collect();
let selected_fluor_names: Vec<Option<String>> = primary_detector_info
.iter()
.map(|p| p.selected_fluor_name.clone())
.collect();
let mixing_mat = Mat::from_fn(
filtered_mixing_matrix.nrows(),
filtered_mixing_matrix.ncols(),
|i, j| filtered_mixing_matrix[(i, j)],
);
let unmixed_fcs = stained_fcs.apply_tru_ols_unmixing(
unstained_fcs,
mixing_mat,
&detector_names_str,
&endmember_names_str,
autofluorescence,
Some(*strategy),
&primary_detector_names,
&primary_pn_names,
&primary_pn_labels,
&selected_marker_names,
&selected_fluor_names,
)?;
info!("TRU-OLS unmixing complete!");
if let Some(output_path) = output {
info!("Writing unmixed FCS file to: {}", output_path.display());
use flow_fcs::write_fcs_file;
write_fcs_file(unmixed_fcs.clone(), output_path)?;
info!("Successfully wrote unmixed FCS file");
}
if plot || plot_both {
let plot_dir = plot_output_dir
.map(|p| p.clone())
.unwrap_or_else(|| PathBuf::from("."));
std::fs::create_dir_all(&plot_dir)?;
if plot_both && compare_ols {
info!("Generating comparison plots...");
generate_ols_comparison_plots(
&stained_fcs,
unstained_fcs,
mixing_matrix,
&detector_names_str,
&endmember_names_str,
&unmixed_fcs.data_frame,
&plot_dir,
plot_format,
)?;
} else {
generate_tru_ols_plots(
&unmixed_fcs.data_frame,
&endmember_names_str,
&plot_dir,
plot_format,
)?;
}
}
Ok(())
}
fn extract_mixing_matrix_from_spill(
fcs: &Fcs,
endmember_names: &[String],
) -> Result<(Array2<f64>, Vec<String>)> {
let (spill_matrix_f32, detector_names) = fcs
.get_spillover_matrix()
.map_err(|e| anyhow::anyhow!("Failed to extract SPILL/SPILLOVER keyword: {}", e))?
.ok_or_else(|| anyhow::anyhow!("No SPILL/SPILLOVER keyword found in FCS file"))?;
let spill_matrix = faer::Mat::from_fn(
spill_matrix_f32.nrows(),
spill_matrix_f32.ncols(),
|i, j| spill_matrix_f32[(i, j)] as f64,
);
let n_detectors = spill_matrix.nrows();
let n_endmembers_in_matrix = spill_matrix.ncols();
if n_detectors == 0 || n_endmembers_in_matrix == 0 {
return Err(anyhow::anyhow!(
"SPILL matrix has invalid dimensions: {} × {}",
n_detectors,
n_endmembers_in_matrix
));
}
if n_endmembers_in_matrix != endmember_names.len() {
warn!(
"SPILL matrix has {} endmembers, but {} were specified. Using matrix dimensions.",
n_endmembers_in_matrix,
endmember_names.len()
);
}
for i in 0..n_detectors {
for j in 0..n_endmembers_in_matrix {
let value = spill_matrix[(i, j)];
if !value.is_finite() {
return Err(anyhow::anyhow!(
"SPILL matrix contains non-finite value at position [{}, {}]",
i,
j
));
}
if value < 0.0 {
warn!(
"SPILL matrix contains negative value at [{}, {}]: {}",
i, j, value
);
}
}
}
let mut has_primary_detectors = true;
for j in 0..n_endmembers_in_matrix {
let column_max: f64 = (0..n_detectors)
.map(|i| spill_matrix[(i, j)])
.fold(0.0_f64, |a, b| a.max(b));
if column_max < 0.1 {
warn!(
"Endmember {} has very low maximum signal ({}) in SPILL matrix",
j, column_max
);
has_primary_detectors = false;
}
}
if !has_primary_detectors {
warn!(
"SPILL matrix may not be a valid spectral mixing matrix (low primary detector signals)"
);
}
info!(
"Extracted mixing matrix from SPILL keyword: {} detectors × {} endmembers",
n_detectors, n_endmembers_in_matrix
);
let mixing_matrix =
Array2::from_shape_fn((spill_matrix.nrows(), spill_matrix.ncols()), |(i, j)| {
spill_matrix[(i, j)]
});
Ok((mixing_matrix, detector_names))
}
fn find_unstained_control(controls_dir: &PathBuf) -> Result<PathBuf> {
use std::fs;
let entries = fs::read_dir(controls_dir)
.with_context(|| format!("Failed to read directory: {}", controls_dir.display()))?;
let mut unstained_candidates: Vec<PathBuf> = Vec::new();
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("fcs") {
if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
if filename.to_lowercase().contains("unstained") {
unstained_candidates.push(path);
}
}
}
}
if unstained_candidates.is_empty() {
return Err(anyhow::anyhow!(
"No unstained control file found in {} (looking for filename containing 'unstained')",
controls_dir.display()
));
}
if unstained_candidates.len() > 1 {
warn!(
"Multiple files with 'unstained' in filename found: {:?}. Using first: {}",
unstained_candidates
.iter()
.map(|p| p.file_name().unwrap().to_string_lossy())
.collect::<Vec<_>>(),
unstained_candidates[0].display()
);
}
Ok(unstained_candidates[0].clone())
}
fn auto_detect_from_single_stains(
controls_dir: &PathBuf,
sample_fcs: &Fcs,
) -> Result<(Vec<String>, Vec<String>)> {
use std::fs;
let all_params = sample_fcs.get_parameter_names_from_dataframe();
let detectors: Vec<String> = all_params
.into_iter()
.filter(|name| {
let name_upper = name.to_uppercase();
!name_upper.contains("FSC")
&& !name_upper.contains("SSC")
&& !name_upper.contains("TIME")
&& !name_upper.contains("TIME ")
})
.collect();
if detectors.is_empty() {
return Err(anyhow::anyhow!(
"No fluorescent detector parameters found in FCS file. Found parameters: {}",
sample_fcs.get_parameter_names_from_dataframe().join(", ")
));
}
let entries = fs::read_dir(controls_dir)
.with_context(|| format!("Failed to read directory: {}", controls_dir.display()))?;
let mut endmembers: Vec<String> = Vec::new();
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("fcs") {
if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
if filename.to_lowercase().contains("unstained") {
continue;
}
}
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
if !endmembers.contains(&stem.to_string()) {
endmembers.push(stem.to_string());
}
}
}
}
if endmembers.is_empty() {
return Err(anyhow::anyhow!(
"No FCS files found in single-stain controls directory: {}",
controls_dir.display()
));
}
endmembers.sort();
info!(
"Auto-detected {} detectors and {} endmembers from single-stain controls",
detectors.len(),
endmembers.len()
);
Ok((detectors, endmembers))
}
fn candidate_fragments(name: &str) -> Vec<String> {
let mut parts: Vec<String> = Vec::new();
let full = name.trim().to_string();
if !full.is_empty() {
parts.push(full.clone());
}
for p in name.split_whitespace() {
let s = p.trim();
if !s.is_empty() && !parts.contains(&s.to_string()) {
parts.push(s.to_string());
}
}
for p in name.split('_') {
let s = p.trim();
if !s.is_empty() && !parts.contains(&s.to_string()) {
parts.push(s.to_string());
}
}
for p in name.split('-') {
let s = p.trim();
if !s.is_empty() && !parts.contains(&s.to_string()) {
parts.push(s.to_string());
}
}
parts
}
fn extract_fluor_candidates(filename: &str, pn_label: Option<&str>) -> Vec<String> {
let mut candidates: Vec<String> = Vec::new();
if let Some(label) = pn_label {
if !label.is_empty() {
candidates.push(label.to_string());
if label.len() < 15 {
candidates.extend(candidate_fragments(label));
}
}
}
if let Some(paren_start) = filename.find('(') {
let before_paren = filename[..paren_start].trim();
let sections: Vec<&str> = before_paren.split('_').collect();
if sections.len() >= 3 {
let last_section = sections[sections.len() - 1].trim();
let words: Vec<&str> = last_section.split_whitespace().collect();
let mut dye_start_idx = 0;
for (i, word) in words.iter().enumerate() {
if is_dye_name_start(word) {
dye_start_idx = i;
break;
}
}
if dye_start_idx < words.len() {
let dye_name = words[dye_start_idx..].join(" ");
if is_likely_fluor_name(&dye_name) && !candidates.contains(&dye_name) {
candidates.push(dye_name);
}
}
}
}
candidates.retain(|c| {
let lower = c.to_lowercase();
!lower.contains("plate")
&& !lower.contains("beads")
&& !lower.contains("cells")
&& !lower.contains("filtered")
&& !lower.contains("reference")
&& !lower.contains("group")
&& !lower.contains("non-debris")
&& !c.chars().all(|ch| ch.is_numeric() || ch == '_')
&& (c.len() <= 20 && c.len() >= 2)
});
let mut seen = std::collections::HashSet::new();
candidates.retain(|c| seen.insert(c.clone()));
candidates
}
fn is_dye_name_start(word: &str) -> bool {
let word_upper = word.to_uppercase();
let dye_prefixes = [
"SPARK", "UV", "BV", "BUV", "BB", "PE", "APC", "FITC", "RB", "RY", "LD", "AF", "ALEXA",
"NEAR", "FAR", "LIVE", "DEAD", "R", "V", "B", "YG",
];
dye_prefixes
.iter()
.any(|&prefix| word_upper.starts_with(prefix))
|| word_upper.starts_with("R")
&& word.len() >= 2
&& word.chars().nth(1).map_or(false, |c| c.is_numeric())
|| word_upper.starts_with("V")
&& word.len() >= 2
&& word.chars().nth(1).map_or(false, |c| c.is_numeric())
}
fn is_well_identifier(word: &str) -> bool {
if word.len() < 2 || word.len() > 3 {
return false;
}
let first_char = word.chars().next().unwrap();
let rest = &word[1..];
first_char.is_ascii_uppercase()
&& first_char >= 'A'
&& first_char <= 'H'
&& rest.chars().all(|c| c.is_numeric())
&& rest.parse::<u32>().map_or(false, |n| n >= 1 && n <= 12)
}
fn is_detector_channel_name(s: &str) -> bool {
if let Some(dash_pos) = s.find('-') {
if dash_pos > 0 && dash_pos < s.len() - 1 {
let before_dash = &s[..dash_pos];
let after_dash = &s[dash_pos + 1..];
return before_dash
.chars()
.next()
.map_or(false, |c| c.is_ascii_alphabetic())
&& after_dash.len() == 1
&& after_dash
.chars()
.next()
.map_or(false, |c| c.is_ascii_alphabetic());
}
}
false
}
fn extract_marker_and_fluor_from_text(text: &str) -> (String, String) {
let cleaned = text.trim();
let words: Vec<&str> = cleaned.split_whitespace().collect();
if words.is_empty() {
return (String::new(), String::new());
}
let mut dye_start_idx = words.len();
for (i, word) in words.iter().enumerate() {
if is_dye_name_start(word) {
dye_start_idx = i;
break;
}
}
let marker_words: Vec<&str> = words[..dye_start_idx]
.iter()
.filter(|&w| !is_well_identifier(w) && !is_detector_channel_name(w))
.copied()
.collect();
let marker_name = if !marker_words.is_empty() {
marker_words.join(" ")
} else if dye_start_idx > 0 {
words[..dye_start_idx].join(" ")
} else {
String::new()
};
let fluor_name = if dye_start_idx < words.len() {
words[dye_start_idx..].join(" ")
} else {
String::new()
};
(marker_name, fluor_name)
}
fn is_likely_fluor_name(s: &str) -> bool {
let s_trim = s.trim();
let has_letter = s_trim.chars().any(|c| c.is_alphabetic());
let has_number = s_trim.chars().any(|c| c.is_numeric());
let common_dyes = [
"UV", "Spark", "PE", "APC", "FITC", "BV", "BUV", "BB", "RB", "RY", "LD", "Near IR",
];
let has_common_pattern = common_dyes.iter().any(|&dye| s_trim.contains(dye));
has_letter && (has_number || has_common_pattern) && s_trim.len() >= 2 && s_trim.len() <= 30
}
fn choose_fragment_interactive(
control_path: &PathBuf,
candidates: &[String],
original_name: &str,
) -> (String, DelimiterPreference) {
if candidates.len() <= 1 {
let choice = candidates
.get(0)
.cloned()
.unwrap_or_else(|| "UNKNOWN".to_string());
let pref = DelimiterPreference::infer(original_name, &choice);
return (choice, pref);
}
println!(
"Ambiguous marker name extracted from control file: {}",
control_path.display()
);
println!("Select the best marker name from the candidates below:");
for (i, c) in candidates.iter().enumerate() {
println!(" {}) {}", i + 1, c);
}
print!("Enter selection (1-{}) [default: 1]: ", candidates.len());
let _ = stdout().flush();
let mut input = String::new();
let choice = match stdin().read_line(&mut input) {
Ok(_) => {
let trimmed = input.trim();
if trimmed.is_empty() {
candidates[0].clone()
} else if let Ok(idx) = trimmed.parse::<usize>() {
if idx >= 1 && idx <= candidates.len() {
candidates[idx - 1].clone()
} else {
candidates[0].clone()
}
} else {
candidates[0].clone()
}
}
Err(_) => candidates[0].clone(),
};
let pref = DelimiterPreference::infer(original_name, &choice);
(choice, pref)
}
#[derive(Debug, Clone)]
pub struct PrimaryDetectorInfo {
pub endmember_name: String,
pub is_autofluorescence: bool,
pub primary_detector_name: Option<String>,
pub primary_detector_pn_name: Option<String>,
pub primary_detector_pn_label: Option<String>,
pub selected_marker_name: Option<String>,
pub selected_fluor_name: Option<String>,
}
#[derive(Debug, Clone)]
pub struct SingleStainConfig {
pub peak_detection: bool,
pub peak_threshold: f64,
pub peak_bias: f64,
pub peak_bias_negative: f64,
pub use_negative_events: bool,
pub autofluorescence_mode: String,
pub af_weight: f64,
pub min_negative_events: usize,
}
impl Default for SingleStainConfig {
fn default() -> Self {
Self {
peak_detection: true,
peak_threshold: 0.3,
peak_bias: 0.5,
peak_bias_negative: 0.5,
use_negative_events: false,
autofluorescence_mode: "universal".to_string(),
af_weight: 0.7,
min_negative_events: 100,
}
}
}
pub fn create_mixing_matrix_from_single_stains(
controls_dir: &PathBuf,
unstained_fcs: &Fcs,
detector_names: &[String],
endmember_names: &[String],
autofluorescence_name: &str,
config: &SingleStainConfig,
auto_gate: bool,
diagnostic_plot_dir: Option<&PathBuf>,
) -> Result<(Array2<f64>, Vec<String>, Vec<PrimaryDetectorInfo>)> {
use std::fs;
info!(
"Scanning single-stain control directory: {}",
controls_dir.display()
);
let entries = fs::read_dir(controls_dir)
.with_context(|| format!("Failed to read directory: {}", controls_dir.display()))?;
let mut control_files: Vec<(String, PathBuf)> = Vec::new();
for entry in entries {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("fcs") {
if let Some(filename) = path.file_name().and_then(|s| s.to_str()) {
if filename.to_lowercase().contains("unstained") {
continue;
}
}
let filename = path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
for endmember in endmember_names {
if filename.contains(&endmember.to_lowercase()) {
control_files.push((endmember.clone(), path));
break;
}
}
}
}
if control_files.is_empty() {
return Err(anyhow::anyhow!(
"No matching single-stain control files found in {}",
controls_dir.display()
));
}
info!("Found {} single-stain control files", control_files.len());
let mut delimiter_preference = DelimiterPreference {
use_space: true,
use_hyphen: true,
use_underscore: true,
};
if let Some((most_ambig_idx, delim_count)) = find_most_ambiguous_endmember(&control_files) {
info!(
"Most ambiguous control file at index {}: {} delimiters",
most_ambig_idx, delim_count
);
}
let mut autofluorescence_medians: Vec<f32> = Vec::new();
for detector_name in detector_names {
let values = unstained_fcs
.get_parameter_events_slice(detector_name)
.with_context(|| {
format!("Failed to extract {} from unstained control", detector_name)
})?;
let mut sorted_values: Vec<f32> = values.iter().copied().collect();
sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median = if sorted_values.is_empty() {
0.0
} else {
sorted_values[sorted_values.len() / 2]
};
autofluorescence_medians.push(median);
}
let mut negative_event_af: std::collections::HashMap<String, Vec<f32>> =
std::collections::HashMap::new();
let n_detectors = detector_names.len();
let n_endmembers = endmember_names.len();
let mut mixing_matrix = Array2::<f64>::zeros((n_detectors, n_endmembers));
let mut fluorophore_endmembers: Vec<(usize, String)> = Vec::new();
let mut autofluorescence_idx: Option<usize> = None;
let mut primary_detector_info: Vec<Option<PrimaryDetectorInfo>> = vec![None; n_endmembers];
let mut primary_detectors_used: Vec<(usize, String)> = Vec::new();
info!(
"Looking for autofluorescence '{}' in {} endmembers: {:?}",
autofluorescence_name,
endmember_names.len(),
endmember_names
);
for (endmember_idx, endmember_name) in endmember_names.iter().enumerate() {
if endmember_name == autofluorescence_name {
info!("Found autofluorescence at index {}", endmember_idx);
autofluorescence_idx = Some(endmember_idx);
} else {
if control_files.iter().any(|(name, _)| name == endmember_name) {
fluorophore_endmembers.push((endmember_idx, endmember_name.clone()));
} else {
return Err(anyhow::anyhow!(
"No single-stain control file found for endmember: {}",
endmember_name
));
}
}
}
if autofluorescence_idx.is_none() {
return Err(anyhow::anyhow!(
"Autofluorescence endmember '{}' not found in endmember names",
autofluorescence_name
));
}
let autofluorescence_idx = autofluorescence_idx.unwrap();
for (control_file_idx, (endmember_idx, endmember_name)) in
fluorophore_endmembers.iter().enumerate()
{
let control_path = control_files
.iter()
.find(|(name, _)| name == endmember_name)
.map(|(_, path)| path)
.ok_or_else(|| {
anyhow::anyhow!(
"No single-stain control file found for endmember: {}",
endmember_name
)
})?;
info!(
"Processing single-stain control: {} -> {}",
endmember_name,
control_path.display()
);
info!(" auto_gate enabled: {}", auto_gate);
let control_fcs_before_gating =
Fcs::open(control_path.to_str().context("Invalid control file path")?)?;
let control_fcs = if auto_gate {
info!("Applying automated gating to {} control...", endmember_name);
apply_automated_gating(&control_fcs_before_gating)?
} else {
control_fcs_before_gating.clone()
};
if config.use_negative_events {
let negative_af = extract_negative_event_autofluorescence(
&control_fcs,
detector_names,
endmember_name,
config,
)?;
if let Some(af) = negative_af {
let universal_af = &autofluorescence_medians;
let mut af_differences = Vec::new();
for (det_idx, detector_name) in detector_names.iter().enumerate() {
let diff = (af[det_idx] - universal_af[det_idx]).abs();
let diff_percent = if universal_af[det_idx] > 0.0 {
(diff / universal_af[det_idx]) * 100.0
} else {
0.0
};
af_differences.push((detector_name.clone(), diff, diff_percent));
}
let max_diff = af_differences
.iter()
.map(|(_, _, p)| *p)
.fold(0.0f32, f32::max);
info!(
"Extracted negative event autofluorescence for {} ({} detectors, max diff: {:.1}%)",
endmember_name,
detector_names.len(),
max_diff
);
if max_diff > 20.0 {
info!(
"Significant difference between negative-event AF and universal AF for {}:",
endmember_name
);
for (det_idx, (det_name, _diff, diff_pct)) in af_differences.iter().enumerate()
{
if *diff_pct > 10.0 {
info!(
" {}: negative={:.2}, universal={:.2}, diff={:.1}%",
det_name, af[det_idx], universal_af[det_idx], diff_pct
);
}
}
}
negative_event_af.insert(endmember_name.clone(), af);
} else {
warn!(
"Insufficient negative events for {} (need at least {}), using universal AF only",
endmember_name, config.min_negative_events
);
}
}
let mut medians: Vec<f32> = Vec::new();
let mut mads: Vec<f32> = Vec::new();
let control_filename = control_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or(endmember_name);
let (marker_name, fluor_name) = {
let mut marker = String::new();
let mut fluor = String::new();
let mut found_useful_pns = false;
use std::sync::Arc;
for det_name in detector_names.iter() {
if let Some(param) = control_fcs.parameters.get(&Arc::from(det_name.as_str())) {
if !param.label_name.is_empty() {
let pns_label = param.label_name.to_string();
if !is_detector_channel_name(&pns_label) {
let (m, f) = extract_marker_and_fluor_from_text(&pns_label);
if !m.is_empty() && !f.is_empty() {
marker = m;
fluor = f;
found_useful_pns = true;
info!(
"Using $PnS label '{}' from detector {} for marker/fluor extraction",
pns_label, det_name
);
break;
}
}
}
}
}
if !found_useful_pns {
info!("No useful $PnS labels found, using filename extraction");
let text_to_parse = if let Some(paren_start) = control_filename.find('(') {
let before_paren = control_filename[..paren_start].trim();
let sections: Vec<&str> = before_paren.split('_').collect();
if sections.len() >= 2 {
sections[sections.len() - 1].trim()
} else {
before_paren
}
} else {
control_filename
};
let (m, f) = extract_marker_and_fluor_from_text(text_to_parse);
marker = m;
fluor = f;
}
if marker.is_empty() {
marker = endmember_name.to_string();
}
if fluor.is_empty() {
fluor = marker.clone();
}
(marker, fluor)
};
info!(
"=== Analyzing control: {} (marker: {}, fluor: {}) ===",
endmember_name, marker_name, fluor_name
);
for detector_name in detector_names.iter() {
let values = control_fcs
.get_parameter_events_slice(detector_name)
.with_context(|| {
format!(
"Failed to extract {} from control file {}",
detector_name,
control_path.display()
)
})?;
let n_events = values.len();
let min_val = values.iter().cloned().fold(f32::INFINITY, f32::min);
let max_val = values.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
let sum: f64 = values.iter().map(|&v| v as f64).sum();
let mean = (sum / n_events as f64) as f32;
let above_100 = values.iter().filter(|&&v| v > 100.0).count();
let above_1000 = values.iter().filter(|&&v| v > 1000.0).count();
let above_10000 = values.iter().filter(|&&v| v > 10000.0).count();
let values_f64: Vec<f64> = values.iter().map(|&v| v as f64).collect();
let median = if config.peak_detection {
match calculate_peak_based_median(
&values_f64,
config.peak_threshold,
config.peak_bias,
) {
Some(peak_median) => {
let simple_median = calculate_simple_median(values);
let diff_percent =
((peak_median - simple_median).abs() / simple_median.max(1.0)) * 100.0;
info!(
"Peak-based median for {} in {}: {:.2} (simple: {:.2}, diff: {:.1}%)",
detector_name, endmember_name, peak_median, simple_median, diff_percent
);
peak_median
}
None => {
warn!(
"Peak detection failed for {} in {}, falling back to simple median",
detector_name, endmember_name
);
calculate_simple_median(values)
}
}
} else {
calculate_simple_median(values)
};
info!(
" {} -> n={}, min={:.1}, max={:.1}, mean={:.1}, median={:.1}, >100: {}, >1k: {}, >10k: {}",
detector_name,
n_events,
min_val,
max_val,
mean,
median,
above_100,
above_1000,
above_10000
); let deviations: Vec<f32> = values.iter().map(|&v| (v - median).abs()).collect();
let mut sorted_deviations = deviations;
sorted_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mad = if sorted_deviations.is_empty() {
0.0
} else {
sorted_deviations[sorted_deviations.len() / 2]
};
medians.push(median);
mads.push(mad.max(f32::EPSILON)); }
let effective_af: Vec<f32> = match config.autofluorescence_mode.as_str() {
"negative-events" => {
if let Some(negative_af) = negative_event_af.get(endmember_name) {
info!(
"Using negative-event autofluorescence for {}",
endmember_name
);
negative_af.clone()
} else {
warn!(
"Negative events not available for {}, falling back to universal AF",
endmember_name
);
autofluorescence_medians.clone()
}
}
"hybrid" => {
if let Some(negative_af) = negative_event_af.get(endmember_name) {
info!(
"Using hybrid autofluorescence for {} (weight: {:.2})",
endmember_name, config.af_weight
);
autofluorescence_medians
.iter()
.zip(negative_af.iter())
.map(|(&af_universal, &af_negative)| {
(config.af_weight * af_universal as f64
+ (1.0 - config.af_weight) * af_negative as f64)
as f32
})
.collect()
} else {
warn!(
"Negative events not available for {}, using universal AF only",
endmember_name
);
autofluorescence_medians.clone()
}
}
_ => {
if config.use_negative_events {
info!(
"Using universal autofluorescence for {} (negative events available but mode=universal)",
endmember_name
);
}
autofluorescence_medians.clone()
}
};
let corrected_medians: Vec<f32> = medians
.iter()
.zip(effective_af.iter())
.map(|(median, &af)| (median - af).max(0.0))
.collect();
info!(
" --- After AF subtraction (ALL {} detectors) ---",
detector_names.len()
);
for (det_idx, detector_name) in detector_names.iter().enumerate() {
info!(
" {} -> raw={:.1}, af={:.1}, corrected={:.1}",
detector_name, medians[det_idx], effective_af[det_idx], corrected_medians[det_idx]
);
}
let primary_idx = corrected_medians
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.map(|(idx, _)| idx)
.ok_or_else(|| anyhow::anyhow!("No valid signal found in control file"))?;
let primary_median = corrected_medians[primary_idx];
info!(
" PRIMARY DETECTOR SELECTION: {} with corrected value {:.1}",
detector_names[primary_idx], primary_median
);
info!(" Top 3 corrected values:");
let mut sorted_corrected: Vec<(usize, f32)> = corrected_medians
.iter()
.enumerate()
.map(|(idx, &val)| (idx, val))
.collect();
sorted_corrected.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
for (i, (det_idx, val)) in sorted_corrected.iter().take(3).enumerate() {
info!(
" {}. {} = {:.1} (raw: {:.1}, af: {:.1})",
i + 1,
detector_names[*det_idx],
val,
medians[*det_idx],
effective_af[*det_idx]
);
}
if primary_median <= 0.0 {
return Err(anyhow::anyhow!(
"Primary detector has zero or negative signal after autofluorescence subtraction"
));
}
let primary_detector_name = detector_names[primary_idx].clone();
primary_detectors_used.push((*endmember_idx, primary_detector_name.clone()));
let mut pn_name: Option<String> = None;
let mut pn_label: Option<String> = None;
{
use std::sync::Arc;
if let Some(param) = control_fcs
.parameters
.get(&Arc::from(primary_detector_name.as_str()))
{
if !param.channel_name.is_empty() {
pn_name = Some(param.channel_name.to_string());
}
if !param.label_name.is_empty() {
pn_label = Some(param.label_name.to_string());
}
}
}
primary_detector_info[*endmember_idx] = Some(PrimaryDetectorInfo {
endmember_name: endmember_name.clone(),
is_autofluorescence: false,
primary_detector_name: Some(primary_detector_name.clone()),
primary_detector_pn_name: pn_name.clone(),
primary_detector_pn_label: pn_label.clone(),
selected_marker_name: Some(marker_name.clone()),
selected_fluor_name: if fluor_name.is_empty() {
pn_label.clone()
} else {
Some(fluor_name.clone())
},
});
for (detector_idx, corrected_median) in corrected_medians.iter().enumerate() {
mixing_matrix[(detector_idx, *endmember_idx)] =
(*corrected_median / primary_median) as f64;
}
let max_spillover = corrected_medians
.iter()
.enumerate()
.filter(|(idx, _)| *idx != primary_idx)
.map(|(_, &val)| val / primary_median)
.fold(0.0f32, f32::max);
info!(
"Created spectral signature for {}: primary detector {} (normalized to 1.0, max spillover: {:.3})",
endmember_name, detector_names[primary_idx], max_spillover
);
{
use std::fs::OpenOptions;
use std::io::Write;
if let Ok(mut file) = OpenOptions::new()
.create(true)
.append(true)
.open("/Users/kfls271/Rust/.cursor/debug.log")
{
let normalized_sig: Vec<f64> = (0..detector_names.len())
.map(|idx| mixing_matrix[(idx, *endmember_idx)])
.collect();
let non_zero_count = normalized_sig.iter().filter(|&&v| v > 1e-6).count();
let max_non_primary = normalized_sig
.iter()
.enumerate()
.filter(|(idx, _)| *idx != primary_idx)
.map(|(_, &v)| v)
.fold(0.0f64, f64::max);
let log_entry = serde_json::json!({
"sessionId": "debug-session",
"runId": "signature-extraction",
"hypothesisId": "A,B,C,D",
"location": "commands.rs:1792",
"message": "Spectral signature extracted",
"data": {
"endmember": endmember_name,
"primary_detector": detector_names[primary_idx],
"primary_idx": primary_idx,
"raw_medians": medians.iter().map(|&v| v as f64).collect::<Vec<f64>>(),
"corrected_medians": corrected_medians.iter().map(|&v| v as f64).collect::<Vec<f64>>(),
"normalized_signature": normalized_sig,
"non_zero_detectors": non_zero_count,
"max_non_primary": max_non_primary,
"max_spillover": max_spillover
},
"timestamp": std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_millis()
});
let _ = writeln!(file, "{}", log_entry);
}
}
if let Some(plot_dir) = diagnostic_plot_dir {
let normalized_signature: Vec<f64> = corrected_medians
.iter()
.map(|&val| (val / primary_median) as f64)
.collect();
if let Err(e) = generate_control_diagnostic_plots(
&control_fcs_before_gating,
&control_fcs,
endmember_name,
detector_names,
&normalized_signature,
plot_dir,
"jpg", ) {
warn!(
"Failed to generate diagnostic plots for {}: {}",
endmember_name, e
);
}
}
if max_spillover > 0.5 {
warn!(
"High spillover detected for {}: max spillover = {:.1}% - verify control quality",
endmember_name,
max_spillover * 100.0
);
}
}
info!("Adding autofluorescence column to mixing matrix...");
let max_af = autofluorescence_medians
.iter()
.fold(0.0f32, |a, &b| a.max(b));
if max_af > 0.0 {
for (detector_idx, &af_median) in autofluorescence_medians.iter().enumerate() {
mixing_matrix[(detector_idx, autofluorescence_idx)] = (af_median / max_af) as f64;
}
info!(
"Created autofluorescence signature: normalized to max = 1.0 (detector with max AF: {:.2})",
max_af
);
} else {
warn!(
"Autofluorescence medians are all zero - this may indicate an issue with the unstained control"
);
for detector_idx in 0..n_detectors {
mixing_matrix[(detector_idx, autofluorescence_idx)] = 1e-6;
}
}
info!(
"Created mixing matrix from single-stain controls: {} detectors × {} endmembers",
n_detectors, n_endmembers
);
if config.peak_detection {
info!(
"Peak detection: ENABLED (threshold: {:.2}, bias: {:.2})",
config.peak_threshold, config.peak_bias
);
} else {
info!("Peak detection: DISABLED (using simple median)");
}
if config.use_negative_events {
info!(
"Negative event extraction: ENABLED (min events: {}, mode: {})",
config.min_negative_events, config.autofluorescence_mode
);
info!(
"Negative event AF available for {} endmembers",
negative_event_af.len()
);
} else {
info!("Negative event extraction: DISABLED");
}
for endmember_idx in 0..n_endmembers {
let column = mixing_matrix.column(endmember_idx);
let max_val = column.iter().fold(0.0f64, |a, &b| a.max(b));
let min_val = column.iter().fold(f64::INFINITY, |a, &b| a.min(b));
if max_val <= 0.0 {
warn!(
"Endmember {} has zero or negative maximum value in mixing matrix",
endmember_names[endmember_idx]
);
}
if min_val < -0.1 {
warn!(
"Endmember {} has negative values in mixing matrix (min: {:.3})",
endmember_names[endmember_idx], min_val
);
}
}
if n_endmembers > 1 {
let mut similar_pairs = Vec::new();
for i in 0..n_endmembers {
for j in (i + 1)..n_endmembers {
let has_control_i = control_files
.iter()
.any(|(name, _)| name == &endmember_names[i]);
let has_control_j = control_files
.iter()
.any(|(name, _)| name == &endmember_names[j]);
if !has_control_i || !has_control_j {
continue; }
let col_i = mixing_matrix.column(i);
let col_j = mixing_matrix.column(j);
let dot_product: f64 = col_i.iter().zip(col_j.iter()).map(|(a, b)| a * b).sum();
let norm_i: f64 = col_i.iter().map(|x| x * x).sum::<f64>().sqrt();
let norm_j: f64 = col_j.iter().map(|x| x * x).sum::<f64>().sqrt();
{
use std::fs::OpenOptions;
use std::io::Write;
if let Ok(mut file) = OpenOptions::new()
.create(true)
.append(true)
.open("/Users/kfls271/Rust/.cursor/debug.log")
{
let col_i_vec: Vec<f64> = col_i.iter().copied().collect();
let col_j_vec: Vec<f64> = col_j.iter().copied().collect();
let diff_vec: Vec<f64> = col_i_vec
.iter()
.zip(col_j_vec.iter())
.map(|(a, b)| (a - b).abs())
.collect();
let max_diff = diff_vec.iter().fold(0.0f64, |a, &b| a.max(b));
let mean_diff = diff_vec.iter().sum::<f64>() / diff_vec.len() as f64;
let log_entry = serde_json::json!({
"sessionId": "debug-session",
"runId": "similarity-check",
"hypothesisId": "A,B,C,D,E",
"location": "commands.rs:1792",
"message": "Cosine similarity calculation",
"data": {
"endmember_i": endmember_names[i],
"endmember_j": endmember_names[j],
"col_i": col_i_vec,
"col_j": col_j_vec,
"dot_product": dot_product,
"norm_i": norm_i,
"norm_j": norm_j,
"similarity": if norm_i > 0.0 && norm_j > 0.0 { dot_product / (norm_i * norm_j) } else { -1.0 },
"max_diff": max_diff,
"mean_diff": mean_diff,
"diff_vector": diff_vec
},
"timestamp": std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap().as_millis()
});
let _ = writeln!(file, "{}", log_entry);
}
}
if norm_i > 0.0 && norm_j > 0.0 {
let similarity = dot_product / (norm_i * norm_j);
if similarity > 0.99 {
let primary_i = col_i
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.map(|(idx, _)| idx);
let primary_j = col_j
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.map(|(idx, _)| idx);
let same_primary = primary_i == primary_j;
let non_zero_i = col_i.iter().filter(|&&v| v > 1e-6).count();
let non_zero_j = col_j.iter().filter(|&&v| v > 1e-6).count();
let max_i = col_i.iter().fold(0.0f64, |a, &b| a.max(b));
let max_j = col_j.iter().fold(0.0f64, |a, &b| a.max(b));
similar_pairs.push((
i,
j,
similarity,
same_primary,
primary_i,
primary_j,
non_zero_i,
non_zero_j,
max_i,
max_j,
));
}
}
}
}
if !similar_pairs.is_empty() {
warn!(
"Detected highly similar endmember pairs (cosine similarity > 0.99 on normalized spectra), which may cause solve failures:"
);
for (i, j, sim, same_primary, prim_i, prim_j, nz_i, nz_j, max_i, max_j) in similar_pairs
{
let primary_note = if same_primary {
format!(" (both primary: {})", detector_names[prim_i.unwrap()])
} else {
format!(
" (primary: {} vs {})",
detector_names[prim_i.unwrap()],
detector_names[prim_j.unwrap()]
)
};
warn!(
" - {} and {}: similarity = {:.4}{}",
endmember_names[i], endmember_names[j], sim, primary_note
);
warn!(
" Non-zero detectors: {} vs {}, max values: {:.3} vs {:.3}",
nz_i, nz_j, max_i, max_j
);
if nz_i <= 2 && nz_j <= 2 {
warn!(
" ⚠ Both spectra have very few non-zero detectors - this may indicate over-aggressive autofluorescence subtraction"
);
}
}
}
}
info!(
"Mixing matrix dimensions: {} detectors × {} endmembers (overdetermined system)",
n_detectors, n_endmembers
);
if n_detectors < n_endmembers {
return Err(anyhow::anyhow!(
"Mixing matrix is underdetermined: {} detectors < {} endmembers. Cannot solve uniquely.",
n_detectors,
n_endmembers
));
}
info!("Primary detector assignments:");
let mut detector_to_endmembers: std::collections::HashMap<String, Vec<String>> =
std::collections::HashMap::new();
for (endmember_idx, endmember_name) in endmember_names.iter().enumerate() {
if let Some(Some(info)) = primary_detector_info.get(endmember_idx) {
if let Some(ref primary_det) = info.primary_detector_name {
detector_to_endmembers
.entry(primary_det.clone())
.or_insert_with(Vec::new)
.push(endmember_name.clone());
info!(
" [{}] {} -> primary detector: {}",
endmember_idx, endmember_name, primary_det
);
}
}
}
for (detector, endmembers) in detector_to_endmembers.iter() {
if endmembers.len() > 1 {
warn!(
"⚠️ {} endmembers share primary detector '{}' - may indicate weak control signals or cross-reactivity:",
endmembers.len(),
detector
);
for (i, em) in endmembers.iter().enumerate() {
warn!(" {}. {}", i + 1, em);
}
}
}
primary_detector_info[autofluorescence_idx] = Some(PrimaryDetectorInfo {
endmember_name: endmember_names[autofluorescence_idx].clone(),
is_autofluorescence: true,
primary_detector_name: None,
primary_detector_pn_name: None,
primary_detector_pn_label: None,
selected_marker_name: Some("Autofluorescence".to_string()),
selected_fluor_name: None,
});
let primary_detector_info: Vec<PrimaryDetectorInfo> = primary_detector_info
.into_iter()
.map(|opt| {
opt.unwrap_or_else(|| {
panic!("PrimaryDetectorInfo not populated for all endmembers");
})
})
.collect();
Ok((
mixing_matrix,
detector_names.to_vec(),
primary_detector_info,
))
}
fn calculate_simple_median(values: &[f32]) -> f32 {
if values.is_empty() {
return 0.0;
}
let mut sorted_values: Vec<f32> = values.iter().copied().collect();
sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
sorted_values[sorted_values.len() / 2]
}
fn export_mixing_matrix_to_csv(
matrix: &ndarray::Array2<f64>,
path: &PathBuf,
detector_names: &[String],
endmember_names: &[String],
) -> Result<()> {
use std::fs::File;
use std::io::Write;
let mut file = File::create(path)?;
write!(file, "RowName,")?;
for (i, col_name) in endmember_names.iter().enumerate() {
write!(file, "{}", col_name)?;
if i < endmember_names.len() - 1 {
write!(file, ",")?;
}
}
writeln!(file)?;
for (row_idx, row_name) in detector_names.iter().enumerate() {
write!(file, "{}", row_name)?;
for col_idx in 0..matrix.ncols() {
write!(file, ",{:.10e}", matrix[(row_idx, col_idx)])?;
}
writeln!(file)?;
}
Ok(())
}
fn calculate_simple_median_f64(values: &[f64]) -> f64 {
if values.is_empty() {
return 0.0;
}
let mut sorted_values: Vec<f64> = values.iter().copied().collect();
sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
sorted_values[sorted_values.len() / 2]
}
fn extract_negative_event_autofluorescence(
control_fcs: &Fcs,
detector_names: &[String],
endmember_name: &str,
config: &SingleStainConfig,
) -> Result<Option<Vec<f32>>> {
use std::collections::HashSet;
let mut primary_detector_idx = 0;
let mut max_median = 0.0f32;
for (idx, detector_name) in detector_names.iter().enumerate() {
let values = control_fcs
.get_parameter_events_slice(detector_name)
.with_context(|| format!("Failed to extract {} from control", detector_name))?;
let median = calculate_simple_median(values);
if median > max_median {
max_median = median;
primary_detector_idx = idx;
}
}
let primary_detector = &detector_names[primary_detector_idx];
let primary_values = control_fcs
.get_parameter_events_slice(primary_detector)
.with_context(|| format!("Failed to extract {} from control", primary_detector))?;
let primary_values_f64: Vec<f64> = primary_values.iter().map(|&v| v as f64).collect();
let negative_events_mask = if config.peak_detection {
find_negative_peak_events(
&primary_values_f64,
config.peak_threshold,
config.peak_bias_negative,
)?
} else {
let threshold = calculate_simple_median(primary_values);
primary_values
.iter()
.map(|&v| v < threshold * 0.5) .collect()
};
let n_negative = negative_events_mask.iter().filter(|&&x| x).count();
if n_negative < config.min_negative_events {
return Ok(None);
}
let negative_percent = (n_negative as f64 / primary_values.len() as f64) * 100.0;
info!(
"Found {} negative events ({:.1}%) in {} control",
n_negative, negative_percent, endmember_name
);
if negative_percent < 5.0 {
warn!(
"Very few negative events ({:.1}%) in {} - may indicate poor staining or gating issues",
negative_percent, endmember_name
);
} else if negative_percent > 50.0 {
warn!(
"Unusually high negative event percentage ({:.1}%) in {} - verify control quality",
negative_percent, endmember_name
);
}
let mut negative_af: Vec<f32> = Vec::new();
for detector_name in detector_names.iter() {
let values = control_fcs
.get_parameter_events_slice(detector_name)
.with_context(|| format!("Failed to extract {} from control", detector_name))?;
let negative_values: Vec<f32> = values
.iter()
.zip(negative_events_mask.iter())
.filter_map(|(&value, &is_negative)| if is_negative { Some(value) } else { None })
.collect();
if negative_values.is_empty() {
return Ok(None);
}
let median = calculate_simple_median(&negative_values);
negative_af.push(median);
}
Ok(Some(negative_af))
}
fn find_negative_peak_events(
values: &[f64],
peak_threshold: f64,
peak_bias_negative: f64,
) -> Result<Vec<bool>> {
if values.is_empty() {
return Ok(vec![]);
}
let kde = match KernelDensity::estimate(values, 1.0, 512) {
Ok(kde) => kde,
Err(_) => {
let threshold = calculate_simple_median_f64(values);
return Ok(values.iter().map(|&v| v < threshold * 0.5).collect());
}
};
let peaks = kde.find_peaks(peak_threshold);
if peaks.is_empty() {
let threshold = calculate_simple_median_f64(values);
return Ok(values.iter().map(|&v| v < threshold * 0.5).collect());
}
let negative_peak = peaks
.iter()
.min_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.copied()
.ok_or_else(|| anyhow::anyhow!("No negative peak found"))?;
let mut sorted_values: Vec<f64> = values.iter().copied().collect();
sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median_all = sorted_values[sorted_values.len() / 2];
let deviations: Vec<f64> = sorted_values
.iter()
.map(|&v| (v - median_all).abs())
.collect();
let mut sorted_deviations = deviations;
sorted_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mad = sorted_deviations[sorted_deviations.len() / 2];
let peak_width = 2.0 * mad;
let peak_min = negative_peak - peak_width;
let peak_max = negative_peak + peak_width;
let mut peak_events: Vec<(usize, f64)> = values
.iter()
.enumerate()
.filter_map(|(idx, &v)| {
if v >= peak_min && v <= peak_max {
Some((idx, v))
} else {
None
}
})
.collect();
if peak_events.is_empty() {
let threshold = calculate_simple_median_f64(values);
return Ok(values.iter().map(|&v| v < threshold * 0.5).collect());
}
peak_events.sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap());
let bias_end_idx = ((peak_events.len() as f64) * peak_bias_negative) as usize;
let biased_indices: HashSet<usize> = peak_events[..bias_end_idx]
.iter()
.map(|(idx, _)| *idx)
.collect();
Ok(values
.iter()
.enumerate()
.map(|(idx, _)| biased_indices.contains(&idx))
.collect())
}
fn calculate_peak_based_median(values: &[f64], peak_threshold: f64, peak_bias: f64) -> Option<f32> {
if values.is_empty() {
return None;
}
let kde = match KernelDensity::estimate(values, 1.0, 512) {
Ok(kde) => kde,
Err(_) => return None,
};
let peaks = kde.find_peaks(peak_threshold);
if peaks.is_empty() {
return None;
}
if peaks.len() > 1 {
info!(
"Detected {} peaks (using highest at {:.2})",
peaks.len(),
peaks
.iter()
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.unwrap()
);
}
let main_peak = peaks
.iter()
.max_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))?;
let mut sorted_values: Vec<f64> = values.iter().copied().collect();
sorted_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median_all = sorted_values[sorted_values.len() / 2];
let deviations: Vec<f64> = sorted_values
.iter()
.map(|&v| (v - median_all).abs())
.collect();
let mut sorted_deviations = deviations;
sorted_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mad = sorted_deviations[sorted_deviations.len() / 2];
let peak_width = 2.0 * mad;
let peak_min = main_peak - peak_width;
let peak_max = main_peak + peak_width;
let mut peak_events: Vec<f64> = values
.iter()
.filter(|&&v| v >= peak_min && v <= peak_max)
.copied()
.collect();
if peak_events.is_empty() {
peak_events = values.to_vec();
}
peak_events.sort_by(|a, b| a.partial_cmp(b).unwrap());
let bias_start_idx = ((peak_events.len() as f64) * (1.0 - peak_bias)) as usize;
let biased_events = &peak_events[bias_start_idx..];
if biased_events.is_empty() {
return None;
}
let median_idx = biased_events.len() / 2;
Some(biased_events[median_idx] as f32)
}
fn load_mixing_matrix(path: &PathBuf) -> Result<Array2<f64>> {
use std::fs::File;
use std::io::BufReader;
let file = File::open(path)
.with_context(|| format!("Failed to open mixing matrix file: {}", path.display()))?;
let reader = BufReader::new(file);
let mut csv_reader = csv::Reader::from_reader(reader);
let mut rows = Vec::new();
for result in csv_reader.records() {
let record = result?;
let row: Result<Vec<f64>, _> = record.iter().map(|s| s.parse::<f64>()).collect();
rows.push(row?);
}
if rows.is_empty() {
return Err(anyhow::anyhow!("Mixing matrix file is empty"));
}
let n_cols = rows[0].len();
for (idx, row) in rows.iter().enumerate() {
if row.len() != n_cols {
return Err(anyhow::anyhow!(
"Row {} has {} columns, expected {}",
idx + 1,
row.len(),
n_cols
));
}
}
let n_rows = rows.len();
let mut matrix = Array2::<f64>::zeros((n_rows, n_cols));
for (i, row) in rows.iter().enumerate() {
for (j, &value) in row.iter().enumerate() {
matrix[(i, j)] = value;
}
}
Ok(matrix)
}
fn generate_tru_ols_plots(
unmixed_df: &EventDataFrame,
endmember_names: &[&str],
plot_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
use flow_plots::{DensityPlot, DensityPlotOptions};
use flow_tru_ols::plot_abundance_distribution;
let n_events = unmixed_df.height();
let n_endmembers = endmember_names.len();
let mut unmixed_array = Array2::<f64>::zeros((n_events, n_endmembers));
for (idx, &endmember_name) in endmember_names.iter().enumerate() {
let series = unmixed_df
.column(endmember_name)
.with_context(|| format!("Failed to find column for endmember: {}", endmember_name))?;
let values = series
.f32()
.with_context(|| format!("Failed to extract f32 values for {}", endmember_name))?;
for (event_idx, opt_val) in values.iter().enumerate() {
if let Some(val) = opt_val {
unmixed_array[(event_idx, idx)] = val as f64;
}
}
}
let unmixed_mat = Mat::from_fn(n_events, n_endmembers, |i, j| unmixed_array[(i, j)]);
for (idx, &endmember_name) in endmember_names.iter().enumerate() {
let plot_bytes = plot_abundance_distribution(unmixed_mat.as_ref(), endmember_names, idx)
.with_context(|| {
format!(
"Failed to plot abundance distribution for {}",
endmember_name
)
})?;
let filename = format!(
"tru_ols_{}_distribution.{}",
endmember_name.replace(" ", "_"),
plot_format
);
let filepath = plot_dir.join(&filename);
fs::write(&filepath, plot_bytes)
.with_context(|| format!("Failed to write plot to {}", filepath.display()))?;
info!("Saved plot: {}", filepath.display());
}
if endmember_names.len() >= 2 {
for i in 0..(endmember_names.len().min(4)) {
for j in (i + 1)..(endmember_names.len().min(4)) {
let x_col = endmember_names[i];
let y_col = endmember_names[j];
let x_series = unmixed_df
.column(x_col)
.with_context(|| format!("Failed to find column: {}", x_col))?;
let y_series = unmixed_df
.column(y_col)
.with_context(|| format!("Failed to find column: {}", y_col))?;
let x_values = x_series
.f32()
.with_context(|| format!("Failed to extract f32 values for {}", x_col))?;
let y_values = y_series
.f32()
.with_context(|| format!("Failed to extract f32 values for {}", y_col))?;
let pairs: Vec<(f32, f32)> = x_values
.iter()
.zip(y_values.iter())
.filter_map(|(x_opt, y_opt)| x_opt.and_then(|x| y_opt.map(|y| (x, y))))
.collect();
if !pairs.is_empty() {
let base = BasePlotOptions::new()
.width(800u32)
.height(600u32)
.build()
.context("Failed to create base plot options")?;
let options = DensityPlotOptions::new()
.base(base)
.build()
.context("Failed to create plot options")?;
let plot = DensityPlot::new();
let plot_bytes = plot
.render(
pairs.into(),
&options,
&mut flow_plots::render::RenderConfig::default(),
)
.context("Failed to render plot")?;
let filename = format!(
"tru_ols_{}_vs_{}.{}",
endmember_names[i].replace(" ", "_"),
endmember_names[j].replace(" ", "_"),
plot_format
);
let filepath = plot_dir.join(&filename);
fs::write(&filepath, plot_bytes).with_context(|| {
format!("Failed to write plot to {}", filepath.display())
})?;
info!("Saved comparison plot: {}", filepath.display());
}
}
}
}
Ok(())
}
fn generate_ols_comparison_plots(
stained_fcs: &Fcs,
_unstained_fcs: &Fcs,
mixing_matrix: &Array2<f64>,
detector_names: &[&str],
endmember_names: &[&str],
tru_ols_unmixed_df: &EventDataFrame,
plot_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
use flow_plots::{DensityPlot, DensityPlotOptions};
info!("Running OLS unmixing for comparison...");
let mixing_matrix_f32 =
faer::Mat::from_fn(mixing_matrix.nrows(), mixing_matrix.ncols(), |i, j| {
mixing_matrix[(i, j)] as f32
});
let ols_unmixed_df = stained_fcs
.apply_spectral_unmixing(
mixing_matrix_f32.as_ref(),
detector_names,
Some(endmember_names),
)
.context("Failed to run OLS unmixing")?;
for i in 0..(endmember_names.len().min(4)) {
for j in (i + 1)..(endmember_names.len().min(4)) {
let x_col = endmember_names[i];
let y_col = endmember_names[j];
let ols_x_series = ols_unmixed_df
.column(x_col)
.with_context(|| format!("Failed to find OLS column: {}", x_col))?;
let ols_y_series = ols_unmixed_df
.column(y_col)
.with_context(|| format!("Failed to find OLS column: {}", y_col))?;
let ols_x_values = ols_x_series
.f32()
.with_context(|| format!("Failed to extract f32 values for OLS {}", x_col))?;
let ols_y_values = ols_y_series
.f32()
.with_context(|| format!("Failed to extract f32 values for OLS {}", y_col))?;
let tru_ols_x_series = tru_ols_unmixed_df
.column(x_col)
.with_context(|| format!("Failed to find TRU-OLS column: {}", x_col))?;
let tru_ols_y_series = tru_ols_unmixed_df
.column(y_col)
.with_context(|| format!("Failed to find TRU-OLS column: {}", y_col))?;
let tru_ols_x_values = tru_ols_x_series
.f32()
.with_context(|| format!("Failed to extract f32 values for TRU-OLS {}", x_col))?;
let tru_ols_y_values = tru_ols_y_series
.f32()
.with_context(|| format!("Failed to extract f32 values for TRU-OLS {}", y_col))?;
let ols_pairs: Vec<(f32, f32)> = ols_x_values
.iter()
.zip(ols_y_values.iter())
.filter_map(|(x_opt, y_opt)| x_opt.and_then(|x| y_opt.map(|y| (x, y))))
.collect();
let tru_ols_pairs: Vec<(f32, f32)> = tru_ols_x_values
.iter()
.zip(tru_ols_y_values.iter())
.filter_map(|(x_opt, y_opt)| x_opt.and_then(|x| y_opt.map(|y| (x, y))))
.collect();
if !ols_pairs.is_empty() {
let base = BasePlotOptions::new()
.width(800u32)
.height(600u32)
.build()
.context("Failed to create base plot options")?;
let options = DensityPlotOptions::new()
.base(base)
.build()
.context("Failed to create plot options")?;
let plot = DensityPlot::new();
let plot_bytes = plot
.render(
ols_pairs.into(),
&options,
&mut flow_plots::render::RenderConfig::default(),
)
.context("Failed to render OLS plot")?;
let filename = format!(
"comparison_ols_{}_vs_{}.{}",
endmember_names[i].replace(" ", "_"),
endmember_names[j].replace(" ", "_"),
plot_format
);
let filepath = plot_dir.join(&filename);
fs::write(&filepath, plot_bytes)
.with_context(|| format!("Failed to write plot to {}", filepath.display()))?;
info!("Saved OLS plot: {}", filepath.display());
}
if !tru_ols_pairs.is_empty() {
let base = BasePlotOptions::new()
.width(800u32)
.height(600u32)
.build()
.context("Failed to create base plot options")?;
let options = DensityPlotOptions::new()
.base(base)
.build()
.context("Failed to create plot options")?;
let plot = DensityPlot::new();
let plot_bytes = plot
.render(
tru_ols_pairs.into(),
&options,
&mut flow_plots::render::RenderConfig::default(),
)
.context("Failed to render TRU-OLS plot")?;
let filename = format!(
"comparison_tru_ols_{}_vs_{}.{}",
endmember_names[i].replace(" ", "_"),
endmember_names[j].replace(" ", "_"),
plot_format
);
let filepath = plot_dir.join(&filename);
fs::write(&filepath, plot_bytes)
.with_context(|| format!("Failed to write plot to {}", filepath.display()))?;
info!("Saved TRU-OLS plot: {}", filepath.display());
}
}
}
Ok(())
}
pub fn clean_fcs_data(fcs: &Fcs) -> Result<Fcs> {
use peacoqc_rs::{DoubletConfig, FcsFilter, MarginConfig, remove_doublets, remove_margins};
let mut cleaned_fcs = fcs.clone();
let fluorescence_channels: Vec<String> = cleaned_fcs
.parameters
.values()
.filter(|p| p.is_fluorescence())
.map(|p| p.channel_name.to_string())
.collect();
if !fluorescence_channels.is_empty() {
let margin_config = MarginConfig {
channels: fluorescence_channels.clone(),
..Default::default()
};
if let Ok(margin_result) = remove_margins(&cleaned_fcs, &margin_config) {
if margin_result.percentage_removed > 0.0 {
cleaned_fcs = cleaned_fcs
.filter(&margin_result.mask)
.map_err(|e| anyhow::anyhow!("Failed to filter margin events: {}", e))?;
info!(
"PeacoQC margin removal: removed {:.2}% events",
margin_result.percentage_removed
);
}
}
let doublet_config = DoubletConfig::default();
if let Ok(doublet_result) = remove_doublets(&cleaned_fcs, &doublet_config) {
if doublet_result.percentage_removed > 0.0 {
cleaned_fcs = cleaned_fcs
.filter(&doublet_result.mask)
.map_err(|e| anyhow::anyhow!("Failed to filter doublet events: {}", e))?;
info!(
"PeacoQC doublet removal: removed {:.2}% events",
doublet_result.percentage_removed
);
}
}
}
cleaned_fcs = remove_debris_heuristic(&cleaned_fcs)?;
Ok(cleaned_fcs)
}
fn apply_automated_gating(fcs: &Fcs) -> Result<Fcs> {
use flow_gates::filtering::filter_events_by_gate;
let cleaned_fcs = clean_fcs_data(fcs)?;
let scatter_config = ScatterGateConfig {
fsc_channel: "FSC-A".to_string(),
ssc_channel: "SSC-A".to_string(),
method: ScatterGateMethod::DensityContour { threshold: 0.5 },
min_events: 100,
density_threshold: Some(0.5),
cluster_eps: None,
cluster_min_samples: None,
};
let doublet_config = DoubletGateConfig {
channels: vec![
("FSC-A".to_string(), "FSC-H".to_string()),
("FSC-W".to_string(), "FSC-H".to_string()),
],
method: DoubletMethod::Hybrid, nmad: Some(3.5), density_threshold: Some(0.15), cluster_eps: None,
cluster_min_samples: None,
};
let preprocessing_config = PreprocessingConfig {
scatter_config,
doublet_config,
};
let gates = create_preprocessing_gates(&cleaned_fcs, preprocessing_config)
.map_err(|e| anyhow::anyhow!("Failed to create preprocessing gates: {}", e))?;
let mut gated_indices: HashSet<usize> = HashSet::new();
if let Some(scatter_gate) = &gates.scatter_gate {
let scatter_indices = filter_events_by_gate(&cleaned_fcs, scatter_gate, None)
.map_err(|e| anyhow::anyhow!("Failed to apply scatter gate: {}", e))?;
gated_indices.extend(scatter_indices.iter().copied());
info!("Scatter gate: {} events passed", scatter_indices.len());
} else {
let n_events = fcs.data_frame.height();
gated_indices.extend(0..n_events);
}
if let Some(doublet_gate) = &gates.doublet_gate {
let doublet_indices = filter_events_by_gate(&cleaned_fcs, doublet_gate, None)
.map_err(|e| anyhow::anyhow!("Failed to apply doublet gate: {}", e))?;
let doublet_set: HashSet<usize> = doublet_indices.iter().copied().collect();
gated_indices.retain(|&idx| !doublet_set.contains(&idx));
info!(
"Doublet exclusion: {} doublets removed, {} events remaining",
doublet_set.len(),
gated_indices.len()
);
}
let n_events = cleaned_fcs.data_frame.height();
info!(
"Automated gating complete: {} events passed gates (out of {})",
gated_indices.len(),
n_events
);
let mut mask = vec![false; n_events];
for &idx in &gated_indices {
if idx < n_events {
mask[idx] = true;
}
}
use polars::prelude::Series;
use std::sync::Arc;
let mask_series = Series::from_iter(mask.iter().copied());
let mask_ca = mask_series
.bool()
.map_err(|e| anyhow::anyhow!("Failed to create boolean mask: {}", e))?;
let filtered_df = cleaned_fcs
.data_frame
.filter(&mask_ca)
.map_err(|e| anyhow::anyhow!("Failed to filter DataFrame: {}", e))?;
let mut filtered_fcs = cleaned_fcs.clone();
filtered_fcs.data_frame = Arc::new(filtered_df);
let n_events_after = filtered_fcs.get_event_count_from_dataframe();
use flow_fcs::keyword::{
IntegerKeyword, Keyword, KeywordCreationResult, match_and_parse_keyword,
};
let tot_keyword = match_and_parse_keyword("$TOT", &n_events_after.to_string());
if let KeywordCreationResult::Int(IntegerKeyword::TOT(tot)) = tot_keyword {
filtered_fcs
.metadata
.keywords
.insert("$TOT".to_string(), Keyword::Int(IntegerKeyword::TOT(tot)));
}
Ok(filtered_fcs)
}
pub fn remove_debris_heuristic(fcs: &Fcs) -> Result<Fcs> {
use flow_utils::clustering::{KMeans, KMeansConfig};
use ndarray::Array2;
use polars::prelude::Series;
use std::sync::Arc;
let fsc_a_values = fcs
.get_parameter_events_slice("FSC-A")
.map_err(|e| anyhow::anyhow!("Failed to get FSC-A: {}", e))?;
let ssc_a_values = fcs
.get_parameter_events_slice("SSC-A")
.map_err(|e| anyhow::anyhow!("Failed to get SSC-A: {}", e))?;
if fsc_a_values.len() != ssc_a_values.len() {
return Err(anyhow::anyhow!("FSC-A and SSC-A have different lengths"));
}
let n_events = fsc_a_values.len();
if n_events < 100 {
return Ok(fcs.clone());
}
if n_events > 100_000 {
info!(
"Large dataset ({} events), using fast percentile-based debris removal",
n_events
);
return remove_debris_percentile(fcs);
}
let data_rows: Vec<Vec<f64>> = (0..n_events)
.map(|i| vec![fsc_a_values[i] as f64, ssc_a_values[i] as f64])
.collect();
let kmeans_config = KMeansConfig {
n_clusters: 3,
max_iterations: 50, tolerance: 1e-3, seed: Some(42), };
let result = match KMeans::fit_from_rows(data_rows, &kmeans_config) {
Ok(r) => r,
Err(e) => {
info!(
"K-means clustering failed: {:?}, falling back to percentile method",
e
);
return remove_debris_percentile(fcs);
}
};
let mut cluster_counts = vec![0; result.centroids.nrows()];
for &assignment in &result.assignments {
cluster_counts[assignment] += 1;
}
info!("Cluster sizes: {:?}", cluster_counts);
let debris_cluster = cluster_counts
.iter()
.enumerate()
.min_by(|(_, a), (_, b)| a.cmp(b))
.map(|(idx, _)| idx)
.unwrap_or(0);
let debris_cluster_size = cluster_counts[debris_cluster];
let debris_percentage = (debris_cluster_size as f64 / n_events as f64) * 100.0;
let mut debris_sum_fsc = 0.0;
let mut debris_sum_ssc = 0.0;
let mut debris_count = 0;
let mut total_fsc = 0.0;
let mut total_ssc = 0.0;
for (i, &cluster) in result.assignments.iter().enumerate() {
let fsc = fsc_a_values[i] as f64;
let ssc = ssc_a_values[i] as f64;
total_fsc += fsc;
total_ssc += ssc;
if cluster == debris_cluster {
debris_sum_fsc += fsc;
debris_sum_ssc += ssc;
debris_count += 1;
}
}
if debris_count == 0 {
info!("No events in smallest cluster, skipping debris removal");
return Ok(fcs.clone());
}
let debris_centroid_fsc = debris_sum_fsc / debris_count as f64;
let debris_centroid_ssc = debris_sum_ssc / debris_count as f64;
let overall_centroid_fsc = total_fsc / n_events as f64;
let overall_centroid_ssc = total_ssc / n_events as f64;
let is_bottom_left =
debris_centroid_fsc < overall_centroid_fsc && debris_centroid_ssc < overall_centroid_ssc;
info!(
"Debris cluster: size={} ({:.2}%), centroid=(FSC={:.1}, SSC={:.1}), overall_centroid=(FSC={:.1}, SSC={:.1}), is_bottom_left={}",
debris_cluster_size,
debris_percentage,
debris_centroid_fsc,
debris_centroid_ssc,
overall_centroid_fsc,
overall_centroid_ssc,
is_bottom_left
);
let very_low_threshold_fsc = overall_centroid_fsc * 0.3; let very_low_threshold_ssc = overall_centroid_ssc * 0.3;
let is_very_low = debris_centroid_fsc < very_low_threshold_fsc
&& debris_centroid_ssc < very_low_threshold_ssc;
let mask: Vec<bool> = if is_bottom_left || debris_percentage < 20.0 || is_very_low {
info!(
"Removing debris cluster: is_bottom_left={}, percentage={:.2}%, is_very_low={}",
is_bottom_left, debris_percentage, is_very_low
);
result
.assignments
.iter()
.map(|&cluster| cluster != debris_cluster)
.collect()
} else {
info!(
"Debris cluster is large ({:.2}%) and not in bottom-left, skipping removal",
debris_percentage
);
vec![true; n_events]
};
let n_events_before = fcs.data_frame.height();
let n_removed = mask.iter().filter(|&&keep| !keep).count();
if n_removed > 0 {
info!(
"Debris removal (clustering): removed {} events ({:.2}%) from smallest cluster (centroid: FSC={:.1}, SSC={:.1})",
n_removed,
(n_removed as f64 / n_events_before as f64) * 100.0,
debris_centroid_fsc,
debris_centroid_ssc
);
}
let mask_series = Series::from_iter(mask.iter().copied());
let mask_ca = mask_series
.bool()
.map_err(|e| anyhow::anyhow!("Failed to create boolean mask: {}", e))?;
let filtered_df = fcs
.data_frame
.filter(&mask_ca)
.map_err(|e| anyhow::anyhow!("Failed to filter DataFrame: {}", e))?;
let mut filtered_fcs = fcs.clone();
filtered_fcs.data_frame = Arc::new(filtered_df);
let n_events_after = filtered_fcs.get_event_count_from_dataframe();
use flow_fcs::keyword::{
IntegerKeyword, Keyword, KeywordCreationResult, match_and_parse_keyword,
};
let tot_keyword = match_and_parse_keyword("$TOT", &n_events_after.to_string());
if let KeywordCreationResult::Int(IntegerKeyword::TOT(tot)) = tot_keyword {
filtered_fcs
.metadata
.keywords
.insert("$TOT".to_string(), Keyword::Int(IntegerKeyword::TOT(tot)));
}
Ok(filtered_fcs)
}
fn remove_debris_percentile(fcs: &Fcs) -> Result<Fcs> {
use polars::prelude::Series;
use std::sync::Arc;
let fsc_a_values = fcs
.get_parameter_events_slice("FSC-A")
.map_err(|e| anyhow::anyhow!("Failed to get FSC-A: {}", e))?;
let ssc_a_values = fcs
.get_parameter_events_slice("SSC-A")
.map_err(|e| anyhow::anyhow!("Failed to get SSC-A: {}", e))?;
let mut fsc_sorted = fsc_a_values.to_vec();
fsc_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mut ssc_sorted = ssc_a_values.to_vec();
ssc_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let fsc_threshold_idx = (fsc_sorted.len() as f64 * 0.02).floor() as usize;
let ssc_threshold_idx = (ssc_sorted.len() as f64 * 0.02).floor() as usize;
let fsc_threshold = fsc_sorted.get(fsc_threshold_idx).copied().unwrap_or(0.0);
let ssc_threshold = ssc_sorted.get(ssc_threshold_idx).copied().unwrap_or(0.0);
let mut total_fsc = 0.0;
let mut total_ssc = 0.0;
for (&fsc, &ssc) in fsc_a_values.iter().zip(ssc_a_values.iter()) {
total_fsc += fsc as f64;
total_ssc += ssc as f64;
}
let overall_centroid_fsc = total_fsc / fsc_a_values.len() as f64;
let overall_centroid_ssc = total_ssc / ssc_a_values.len() as f64;
let fsc_debris_threshold = (fsc_threshold as f64 * 2.5).max(overall_centroid_fsc * 0.30) as f32;
let ssc_debris_threshold = (ssc_threshold as f64 * 2.5).max(overall_centroid_ssc * 0.30) as f32;
let mask: Vec<bool> = fsc_a_values
.iter()
.zip(ssc_a_values.iter())
.map(|(&fsc, &ssc)| {
fsc > fsc_debris_threshold && ssc > ssc_debris_threshold
})
.collect();
let n_events_before = fcs.data_frame.height();
let n_removed = mask.iter().filter(|&&keep| !keep).count();
if n_removed > 0 {
info!(
"Debris removal (percentile fallback): removed {} events ({:.2}%)",
n_removed,
(n_removed as f64 / n_events_before as f64) * 100.0
);
}
let mask_series = Series::from_iter(mask.iter().copied());
let mask_ca = mask_series
.bool()
.map_err(|e| anyhow::anyhow!("Failed to create boolean mask: {}", e))?;
let filtered_df = fcs
.data_frame
.filter(&mask_ca)
.map_err(|e| anyhow::anyhow!("Failed to filter DataFrame: {}", e))?;
let mut filtered_fcs = fcs.clone();
filtered_fcs.data_frame = Arc::new(filtered_df);
let n_events_after = filtered_fcs.get_event_count_from_dataframe();
use flow_fcs::keyword::{
IntegerKeyword, Keyword, KeywordCreationResult, match_and_parse_keyword,
};
let tot_keyword = match_and_parse_keyword("$TOT", &n_events_after.to_string());
if let KeywordCreationResult::Int(IntegerKeyword::TOT(tot)) = tot_keyword {
filtered_fcs
.metadata
.keywords
.insert("$TOT".to_string(), Keyword::Int(IntegerKeyword::TOT(tot)));
}
Ok(filtered_fcs)
}
pub fn isolate_positive_peak_mask(
values: &[f64],
peak_threshold: f64,
peak_bias: f64,
) -> Result<Vec<bool>> {
use flow_utils::kde::KernelDensity;
if values.is_empty() {
return Ok(vec![]);
}
let mut sorted_all = values.to_vec();
sorted_all.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let q1_idx = sorted_all.len() / 4;
let intensity_threshold = sorted_all[q1_idx];
let median_all = sorted_all[sorted_all.len() / 2];
let q3_idx = (sorted_all.len() * 3) / 4;
let q3_value = sorted_all[q3_idx];
info!(
"Intensity statistics: Q1={:.2}, median={:.2}, Q3={:.2}, max={:.2}",
intensity_threshold,
median_all,
q3_value,
sorted_all[sorted_all.len() - 1]
);
let mut positive_events: Vec<(usize, f64)> = values
.iter()
.enumerate()
.filter(|(_, v)| **v > intensity_threshold)
.map(|(idx, v)| (idx, *v))
.collect();
if positive_events.is_empty() {
info!("No positive events found after filtering, using all events");
positive_events = values
.iter()
.enumerate()
.map(|(idx, v)| (idx, *v))
.collect();
}
let positive_values: Vec<f64> = positive_events.iter().map(|(_, v)| *v).collect();
info!(
"Filtered to {} positive events (above Q1={:.2}) out of {} total",
positive_values.len(),
intensity_threshold,
values.len()
);
let kde = match KernelDensity::estimate(&positive_values, 0.5, 1024) {
Ok(kde) => kde,
Err(e) => {
info!("KDE estimation failed: {:?}, returning all events", e);
return Ok(vec![true; values.len()]);
}
};
let adjusted_threshold = peak_threshold.min(0.2);
let mut peaks = kde.find_peaks(adjusted_threshold);
if peaks.is_empty() {
info!("No peaks detected in positive events, returning all events");
return Ok(vec![true; values.len()]);
}
peaks.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
if peaks.len() > 3 {
peaks.truncate(3);
}
info!(
"Detected {} candidate peaks in positive region: {:?}",
peaks.len(),
peaks
);
struct PeakCandidate {
x: f64,
density: f64,
intensity: f64,
combined_score: f64, }
let mut candidates: Vec<PeakCandidate> = peaks
.iter()
.map(|&peak_x| {
let density = kde.density_at(peak_x);
let intensity = peak_x; let max_density = kde.y.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let normalized_density = if max_density > 0.0 {
density / max_density
} else {
0.0
};
let log_intensity = (intensity + 1.0).ln();
let combined_score = normalized_density * log_intensity;
PeakCandidate {
x: peak_x,
density,
intensity,
combined_score,
}
})
.collect();
candidates.sort_by(|a, b| {
match b
.combined_score
.partial_cmp(&a.combined_score)
.unwrap_or(std::cmp::Ordering::Equal)
{
std::cmp::Ordering::Equal => b
.intensity
.partial_cmp(&a.intensity)
.unwrap_or(std::cmp::Ordering::Equal),
other => other,
}
});
let main_peak = candidates[0].x;
let main_density = candidates[0].density;
let main_intensity = candidates[0].intensity;
let main_score = candidates[0].combined_score;
info!(
"Selected peak at {:.2} (density: {:.6}, intensity: {:.2}, combined_score: {:.6}) from {} candidates",
main_peak,
main_density,
main_intensity,
main_score,
candidates.len()
);
for (i, cand) in candidates.iter().enumerate() {
info!(
" Candidate {}: x={:.2}, density={:.6}, intensity={:.2}, combined_score={:.6}",
i + 1,
cand.x,
cand.density,
cand.intensity,
cand.combined_score
);
}
let mut sorted_all = values.to_vec();
sorted_all.sort_by(|a, b| a.partial_cmp(b).unwrap());
let q1_idx = sorted_all.len() / 4;
let q3_idx = (sorted_all.len() * 3) / 4;
let iqr = sorted_all[q3_idx] - sorted_all[q1_idx];
let window = iqr * 2.0;
info!("IQR: {:.2}, initial window: {:.2} (IQR * 2.0)", iqr, window);
let mut peak_region_values: Vec<f64> = values
.iter()
.filter(|&&v| (v - main_peak).abs() < window)
.copied()
.collect();
if peak_region_values.is_empty() {
peak_region_values = values.to_vec();
}
peak_region_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median_peak_region = peak_region_values[peak_region_values.len() / 2];
let deviations: Vec<f64> = peak_region_values
.iter()
.map(|&v| (v - median_peak_region).abs())
.collect();
let mut sorted_deviations = deviations;
sorted_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mad1 = sorted_deviations[sorted_deviations.len() / 2];
let peak_width1 = 2.0 * mad1;
let peak_min1 = main_peak - peak_width1;
let peak_max1 = main_peak + peak_width1;
info!(
"First-stage MAD: {:.2}, peak region: [{:.2}, {:.2}], width: {:.2}",
mad1, peak_min1, peak_max1, peak_width1
);
let peak_indices: Vec<usize> = values
.iter()
.enumerate()
.filter(|(_, v)| **v >= peak_min1 && **v <= peak_max1)
.map(|(idx, _)| idx)
.collect();
if peak_indices.is_empty() {
info!("No events in first-stage peak region, returning all events");
return Ok(vec![true; values.len()]);
}
info!(
"Found {} events in first-stage peak region (out of {})",
peak_indices.len(),
values.len()
);
let mut filtered_values: Vec<f64> = peak_indices.iter().map(|&idx| values[idx]).collect();
filtered_values.sort_by(|a, b| a.partial_cmp(b).unwrap());
let median_filtered = filtered_values[filtered_values.len() / 2];
let deviations2: Vec<f64> = filtered_values
.iter()
.map(|&v| (v - median_filtered).abs())
.collect();
let mut sorted_deviations2 = deviations2;
sorted_deviations2.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mad2 = sorted_deviations2[sorted_deviations2.len() / 2];
let peak_width2 = 2.0 * mad2;
let peak_min2 = main_peak - peak_width2;
let peak_max2 = main_peak + peak_width2;
info!(
"Second-stage MAD: {:.2}, refined peak region: [{:.2}, {:.2}], width: {:.2}",
mad2, peak_min2, peak_max2, peak_width2
);
let mut refined_indices: Vec<usize> = values
.iter()
.enumerate()
.filter(|(_, v)| **v >= peak_min2 && **v <= peak_max2)
.map(|(idx, _)| idx)
.collect();
if refined_indices.is_empty() {
info!("No events in refined peak region, using first-stage region");
refined_indices = peak_indices;
} else {
info!(
"Found {} events in refined peak region",
refined_indices.len()
);
}
let mut peak_values: Vec<(usize, f64)> = refined_indices
.iter()
.map(|&idx| (idx, values[idx]))
.collect();
peak_values.sort_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap());
let bias_start_idx = ((peak_values.len() as f64) * (1.0 - peak_bias)) as usize;
let biased_indices: std::collections::HashSet<usize> = peak_values[bias_start_idx..]
.iter()
.map(|(idx, _)| *idx)
.collect();
info!(
"After right-bias ({}): kept {} events",
peak_bias,
biased_indices.len()
);
if biased_indices.is_empty() {
info!("No events after bias filtering, returning all events");
return Ok(vec![true; values.len()]);
}
let mask: Vec<bool> = (0..values.len())
.map(|idx| biased_indices.contains(&idx))
.collect();
let n_kept = mask.iter().filter(|&&keep| keep).count();
info!(
"Positive peak isolation: kept {} events ({:.2}%) from peak region (peak: {:.2}, density: {:.6}, bias: {:.2})",
n_kept,
(n_kept as f64 / values.len() as f64) * 100.0,
main_peak,
main_density,
peak_bias
);
Ok(mask)
}
pub fn apply_mask_to_fcs(fcs: &Fcs, mask: &[bool]) -> Result<Fcs> {
use polars::prelude::Series;
use std::sync::Arc;
let n_events = fcs.data_frame.height();
if mask.len() != n_events {
return Err(anyhow::anyhow!(
"Mask length {} doesn't match FCS event count {}",
mask.len(),
n_events
));
}
let mask_series = Series::from_iter(mask.iter().copied());
let mask_ca = mask_series
.bool()
.map_err(|e| anyhow::anyhow!("Failed to create boolean mask: {}", e))?;
let filtered_df = fcs
.data_frame
.filter(&mask_ca)
.map_err(|e| anyhow::anyhow!("Failed to filter DataFrame: {}", e))?;
let mut filtered_fcs = fcs.clone();
filtered_fcs.data_frame = Arc::new(filtered_df);
let n_events_after = filtered_fcs.get_event_count_from_dataframe();
use flow_fcs::keyword::{
IntegerKeyword, Keyword, KeywordCreationResult, match_and_parse_keyword,
};
let tot_keyword = match_and_parse_keyword("$TOT", &n_events_after.to_string());
if let KeywordCreationResult::Int(IntegerKeyword::TOT(tot)) = tot_keyword {
filtered_fcs
.metadata
.keywords
.insert("$TOT".to_string(), Keyword::Int(IntegerKeyword::TOT(tot)));
}
Ok(filtered_fcs)
}
fn generate_control_diagnostic_plots(
control_fcs_before: &Fcs,
control_fcs_after: &Fcs,
endmember_name: &str,
detector_names: &[String],
normalized_signature: &[f64],
plot_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
use std::fs;
let control_plot_dir = plot_dir.join(endmember_name);
fs::create_dir_all(&control_plot_dir)?;
info!(
"Generating diagnostic plots for {} in {}",
endmember_name,
control_plot_dir.display()
);
generate_scatter_diagnostic_plots(
control_fcs_before,
control_fcs_after,
endmember_name,
&control_plot_dir,
plot_format,
)?;
generate_channel_density_plot(
control_fcs_after,
endmember_name,
detector_names,
&control_plot_dir,
plot_format,
)?;
generate_spectral_signature_plot(
endmember_name,
detector_names,
normalized_signature,
&control_plot_dir,
plot_format,
)?;
Ok(())
}
fn generate_scatter_diagnostic_plots(
fcs_before: &Fcs,
fcs_after: &Fcs,
endmember_name: &str,
plot_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
use flow_fcs::TransformType;
let fsc_a_before = fcs_before.get_parameter_events_slice("FSC-A").ok();
let ssc_a_before = fcs_before.get_parameter_events_slice("SSC-A").ok();
let fsc_h_before = fcs_before.get_parameter_events_slice("FSC-H").ok();
let fsc_a_after = fcs_after.get_parameter_events_slice("FSC-A").ok();
let ssc_a_after = fcs_after.get_parameter_events_slice("SSC-A").ok();
let fsc_h_after = fcs_after.get_parameter_events_slice("FSC-H").ok();
let mut render_config = RenderConfig::default();
let plot = DensityPlot::new();
if let (Some(fsc_a), Some(ssc_a)) = (fsc_a_before, ssc_a_before) {
let data: Vec<(f32, f32)> = fsc_a
.iter()
.zip(ssc_a.iter())
.map(|(a, b)| (*a, *b))
.collect();
let base_opts = BasePlotOptions::new()
.width(800u32)
.height(600u32)
.build()?;
let options = DensityPlotOptions::new()
.base(base_opts)
.x_axis(
AxisOptions::new()
.label("FSC-A".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.y_axis(
AxisOptions::new()
.label("SSC-A".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.build()?;
let bytes = plot.render(data.into(), &options, &mut render_config)?;
let output_path = plot_dir.join(format!(
"{}_fsca_vs_ssca_before.{}",
endmember_name, plot_format
));
std::fs::write(&output_path, bytes)?;
info!(" ✓ Saved: {}", output_path.display());
}
if let (Some(fsc_a), Some(ssc_a)) = (fsc_a_after, ssc_a_after) {
let data: Vec<(f32, f32)> = fsc_a
.iter()
.zip(ssc_a.iter())
.map(|(a, b)| (*a, *b))
.collect();
let base_opts = BasePlotOptions::new()
.width(800u32)
.height(600u32)
.build()?;
let options = DensityPlotOptions::new()
.base(base_opts)
.x_axis(
AxisOptions::new()
.label("FSC-A".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.y_axis(
AxisOptions::new()
.label("SSC-A".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.build()?;
let bytes = plot.render(data.into(), &options, &mut render_config)?;
let output_path = plot_dir.join(format!(
"{}_fsca_vs_ssca_after.{}",
endmember_name, plot_format
));
std::fs::write(&output_path, bytes)?;
info!(" ✓ Saved: {}", output_path.display());
}
if let (Some(fsc_a), Some(fsc_h)) = (fsc_a_before, fsc_h_before) {
let data: Vec<(f32, f32)> = fsc_a
.iter()
.zip(fsc_h.iter())
.map(|(a, b)| (*a, *b))
.collect();
let base_opts = BasePlotOptions::new()
.width(800u32)
.height(600u32)
.build()?;
let options = DensityPlotOptions::new()
.base(base_opts)
.x_axis(
AxisOptions::new()
.label("FSC-A".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.y_axis(
AxisOptions::new()
.label("FSC-H".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.build()?;
let bytes = plot.render(data.into(), &options, &mut render_config)?;
let output_path = plot_dir.join(format!(
"{}_fsca_vs_fsch_before.{}",
endmember_name, plot_format
));
std::fs::write(&output_path, bytes)?;
info!(" ✓ Saved: {}", output_path.display());
}
if let (Some(fsc_a), Some(fsc_h)) = (fsc_a_after, fsc_h_after) {
let data: Vec<(f32, f32)> = fsc_a
.iter()
.zip(fsc_h.iter())
.map(|(a, b)| (*a, *b))
.collect();
let base_opts = BasePlotOptions::new()
.width(800u32)
.height(600u32)
.build()?;
let options = DensityPlotOptions::new()
.base(base_opts)
.x_axis(
AxisOptions::new()
.label("FSC-A".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.y_axis(
AxisOptions::new()
.label("FSC-H".to_string())
.range(0.0..=262144.0)
.transform(TransformType::Linear)
.build()?,
)
.build()?;
let bytes = plot.render(data.into(), &options, &mut render_config)?;
let output_path = plot_dir.join(format!(
"{}_fsca_vs_fsch_after.{}",
endmember_name, plot_format
));
std::fs::write(&output_path, bytes)?;
info!(" ✓ Saved: {}", output_path.display());
}
Ok(())
}
fn generate_channel_density_plot(
fcs: &Fcs,
endmember_name: &str,
detector_names: &[String],
plot_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
let mut medians = Vec::new();
for detector_name in detector_names {
if let Ok(values) = fcs.get_parameter_events_slice(detector_name) {
let median = calculate_simple_median(values);
medians.push(median);
} else {
medians.push(0.0);
}
}
let data: Vec<(f32, f32)> = medians
.iter()
.enumerate()
.map(|(idx, &val)| (idx as f32, val))
.collect();
let mut render_config = RenderConfig::default();
let plot = DensityPlot::new();
let base_opts = BasePlotOptions::new()
.width(1200u32)
.height(400u32)
.build()?;
let options = DensityPlotOptions::new()
.base(base_opts)
.x_axis(
AxisOptions::new()
.label("Channel Index".to_string())
.range(0.0..=detector_names.len() as f32)
.transform(flow_fcs::TransformType::Linear)
.build()?,
)
.y_axis(
AxisOptions::new()
.label("Median Signal".to_string())
.range(0.0..=medians.iter().fold(0.0f32, |a, &b| a.max(b)) * 1.1)
.transform(flow_fcs::TransformType::Linear)
.build()?,
)
.build()?;
let bytes = plot.render(data.into(), &options, &mut render_config)?;
let output_path = plot_dir.join(format!(
"{}_channel_signals.{}",
endmember_name, plot_format
));
std::fs::write(&output_path, bytes)?;
info!(" ✓ Saved: {}", output_path.display());
Ok(())
}
fn generate_spectral_signature_plot(
endmember_name: &str,
detector_names: &[String],
normalized_signature: &[f64],
plot_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
let spectrum_data: Vec<(usize, f64)> = normalized_signature
.iter()
.enumerate()
.map(|(idx, &val)| (idx, val))
.collect();
let mut render_config = RenderConfig::default();
let plot = SpectralSignaturePlot::new();
let base_opts = BasePlotOptions::new()
.width(1200u32)
.height(600u32)
.build()?;
let options = SpectralSignaturePlotOptions::new()
.base(base_opts)
.x_axis(Some(
AxisOptions::new()
.label("Detector Channel".to_string())
.build()?,
))
.y_axis(Some(
AxisOptions::new()
.label("Normalized Intensity (1.0 to 0.0)".to_string())
.build()?,
))
.line_color("#1f77b4".to_string())
.line_width(2.0)
.show_grid(true)
.build()?;
let bytes = plot.render(
(spectrum_data, detector_names.to_vec()),
&options,
&mut render_config,
)?;
let output_path = plot_dir.join(format!(
"{}_spectral_signature.{}",
endmember_name, plot_format
));
std::fs::write(&output_path, bytes)?;
info!(" ✓ Saved: {}", output_path.display());
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_count_delimiters() {
assert_eq!(count_delimiters("PD-1"), 1);
assert_eq!(count_delimiters("HLA-DR_DQ"), 2);
assert_eq!(count_delimiters("simple"), 0);
assert_eq!(count_delimiters("a b c"), 2);
assert_eq!(count_delimiters("a_b-c d"), 3);
}
#[test]
fn test_candidate_fragments() {
let frags = candidate_fragments("PD-1");
assert!(frags.contains(&"PD-1".to_string()));
assert!(frags.contains(&"PD".to_string()));
assert!(frags.contains(&"1".to_string()));
let frags = candidate_fragments("HLA-DR_DQ");
assert!(frags.contains(&"HLA-DR_DQ".to_string()));
assert!(frags.contains(&"HLA".to_string()));
assert!(frags.contains(&"DR_DQ".to_string()));
assert!(frags.contains(&"DQ".to_string()));
let frags = candidate_fragments("simple");
assert_eq!(frags.len(), 1);
assert_eq!(frags[0], "simple");
}
#[test]
fn test_delimiter_preference_infer_full_name() {
let pref = DelimiterPreference::infer("PD-1", "PD-1");
assert!(pref.use_space);
assert!(pref.use_hyphen);
assert!(pref.use_underscore);
}
#[test]
fn test_delimiter_preference_infer_hyphen_split() {
let pref = DelimiterPreference::infer("HLA-DR_DQ", "HLA");
assert!(!pref.use_space);
assert!(pref.use_hyphen);
assert!(!pref.use_underscore);
}
#[test]
fn test_delimiter_preference_infer_underscore_split() {
let pref = DelimiterPreference::infer("HLA-DR_DQ", "DQ");
assert!(!pref.use_space);
assert!(!pref.use_hyphen);
assert!(pref.use_underscore);
}
#[test]
fn test_delimiter_preference_apply_hyphen_only() {
let pref = DelimiterPreference {
use_space: false,
use_hyphen: true,
use_underscore: false,
};
let frags = pref.apply("HLA-DR_DQ");
assert!(frags.contains(&"HLA-DR_DQ".to_string()));
assert!(frags.contains(&"HLA".to_string()));
assert!(frags.contains(&"DR_DQ".to_string()));
assert!(!frags.contains(&"DQ".to_string()));
}
#[test]
fn test_delimiter_preference_apply_space_only() {
let pref = DelimiterPreference {
use_space: true,
use_hyphen: false,
use_underscore: false,
};
let frags = pref.apply("Panel A CD4-T Cells");
assert!(frags.contains(&"Panel A CD4-T Cells".to_string()));
assert!(frags.contains(&"Panel".to_string()));
assert!(frags.contains(&"A".to_string()));
assert!(frags.contains(&"CD4-T".to_string()));
assert!(frags.contains(&"Cells".to_string()));
assert!(!frags.contains(&"CD4".to_string()));
}
#[test]
fn test_find_most_ambiguous_endmember() {
let controls = vec![
("CD4".to_string(), PathBuf::from("cd4.fcs")),
("HLA-DR_DQ".to_string(), PathBuf::from("hla.fcs")),
("PD-1".to_string(), PathBuf::from("pd1.fcs")),
];
if let Some((idx, delim)) = find_most_ambiguous_endmember(&controls) {
assert_eq!(idx, 1); assert_eq!(delim, 2);
} else {
panic!("Expected to find most ambiguous endmember");
}
}
#[test]
fn test_find_most_ambiguous_endmember_empty() {
let controls: Vec<(String, PathBuf)> = vec![];
assert!(find_most_ambiguous_endmember(&controls).is_none());
}
#[test]
fn test_find_most_ambiguous_endmember_no_delimiters() {
let controls = vec![
("CD4".to_string(), PathBuf::from("cd4.fcs")),
("CD8".to_string(), PathBuf::from("cd8.fcs")),
];
assert!(find_most_ambiguous_endmember(&controls).is_none());
}
}