use anyhow::{Context, Result};
use flow_fcs::Transformable;
use flow_fcs::file::AccessWrapper;
use flow_fcs::parameter::ParameterMap;
use flow_fcs::{Fcs, Header, Metadata, Parameter, TransformType, write_fcs_file};
use flow_plots::colormap::ColorMaps;
use flow_plots::options::{
AxisOptions, BasePlotOptions, DensityPlotOptions, SpectralSignaturePlotOptions,
};
use flow_plots::render::RenderConfig;
use flow_plots::{DensityPlot, Plot, SpectralSignaturePlot};
use flow_plots::{generate_normalized_spectral_signature_plot, generate_signal_heatmap};
use ndarray::Array2;
use polars::prelude::*;
use rand::{Rng, RngExt};
use rand_distr::{Distribution, Normal};
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct SpectralSignature {
pub name: String,
pub primary_detector: String,
pub detector_signals: HashMap<String, f64>,
}
pub fn generate_single_stain_control(
signature: &SpectralSignature,
detector_names: &[String],
n_events: usize,
autofluorescence: &HashMap<String, f32>,
noise_level: f32,
output_path: &PathBuf,
) -> Result<()> {
use rand::RngExt;
let mut rng = rand::rng();
let signal_mean = 50000.0;
let signal_std = 10000.0;
let signal_dist = Normal::new(signal_mean as f64, signal_std as f64)
.context("Failed to create signal distribution")?;
let mut columns: Vec<Column> = Vec::new();
let mut params = ParameterMap::default();
let fsc_a_dist =
Normal::new(100000.0, 25000.0).context("Failed to create FSC-A distribution")?;
let fsc_h_dist =
Normal::new(95000.0, 20000.0).context("Failed to create FSC-H distribution")?;
let ssc_a_dist =
Normal::new(50000.0, 15000.0).context("Failed to create SSC-A distribution")?;
let fsc_a: Vec<f32> = (0..n_events)
.map(|_| (fsc_a_dist.sample(&mut rng) as f32).max(1000.0))
.collect();
let fsc_h: Vec<f32> = (0..n_events)
.map(|i| {
let correlated = fsc_a[i] * 0.92;
let noise = fsc_h_dist.sample(&mut rng) as f32 - 95000.0;
(correlated + noise * 0.1).max(1000.0)
})
.collect();
let ssc_a: Vec<f32> = (0..n_events)
.map(|_| (ssc_a_dist.sample(&mut rng) as f32).max(500.0))
.collect();
columns.push(Column::new("FSC-A".into(), fsc_a.clone()));
columns.push(Column::new("FSC-H".into(), fsc_h.clone()));
columns.push(Column::new("SSC-A".into(), ssc_a.clone()));
params.insert(
"FSC-A".into(),
Parameter::new(&1, "FSC-A", "FSC-A", &TransformType::Linear),
);
params.insert(
"FSC-H".into(),
Parameter::new(&2, "FSC-H", "FSC-H", &TransformType::Linear),
);
params.insert(
"SSC-A".into(),
Parameter::new(&3, "SSC-A", "SSC-A", &TransformType::Linear),
);
let mut param_idx = 4;
for detector_name in detector_names {
let base_signal = signature
.detector_signals
.get(detector_name)
.copied()
.unwrap_or(0.0);
let af = autofluorescence
.get(detector_name)
.copied()
.unwrap_or(100.0);
let values: Vec<f32> = (0..n_events)
.map(|_| {
let event_signal = signal_dist.sample(&mut rng) as f32;
let spectral_component = event_signal * base_signal as f32;
let af_component = af;
let noise = rng.random_range(-noise_level..noise_level) * event_signal;
(spectral_component + af_component + noise).max(0.0)
})
.collect();
columns.push(Column::new(detector_name.clone().into(), values));
params.insert(
detector_name.clone().into(),
Parameter::new(
¶m_idx,
detector_name,
detector_name,
&TransformType::Linear,
),
);
param_idx += 1;
}
let df = DataFrame::new(n_events, columns)
.context("Failed to create DataFrame for synthetic control")?;
{
use std::fs::OpenOptions;
use std::io::Write;
if let Ok(mut file) = OpenOptions::new()
.create(true)
.append(true)
.open("/Users/kfls271/Rust/.cursor/debug.log")
{
let log_entry = serde_json::json!({
"sessionId": "debug-session",
"runId": "synthetic-generation",
"hypothesisId": "file-write-error",
"location": "synthetic_data.rs:130",
"message": "Before creating FCS struct and writing file",
"data": {
"output_path": output_path.display().to_string(),
"parent_dir": output_path.parent().map(|p| p.display().to_string()),
"n_events": n_events,
"n_detectors": detector_names.len()
},
"timestamp": std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).map(|d| d.as_millis()).unwrap_or(0)
});
let _ = writeln!(file, "{}", log_entry);
}
}
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).with_context(|| {
format!(
"Failed to create parent directory for {}",
output_path.display()
)
})?;
}
{
use std::fs::OpenOptions;
use std::io::Write;
if let Ok(mut file) = OpenOptions::new()
.create(true)
.append(true)
.open("/Users/kfls271/Rust/.cursor/debug.log")
{
let log_entry = serde_json::json!({
"sessionId": "debug-session",
"runId": "synthetic-generation",
"hypothesisId": "file-write-error",
"location": "synthetic_data.rs:145",
"message": "After creating parent directory, before creating FCS struct",
"data": {
"output_path": output_path.display().to_string(),
"parent_exists": output_path.parent().map(|p| p.exists())
},
"timestamp": std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).map(|d| d.as_millis()).unwrap_or(0)
});
let _ = writeln!(file, "{}", log_entry);
}
}
let temp_file = std::env::temp_dir().join(format!("synthetic_temp_{}.fcs", std::process::id()));
{
std::fs::File::create(&temp_file)
.with_context(|| format!("Failed to create temporary file: {}", temp_file.display()))?;
}
{
use std::fs::OpenOptions;
use std::io::Write;
if let Ok(mut file) = OpenOptions::new()
.create(true)
.append(true)
.open("/Users/kfls271/Rust/.cursor/debug.log")
{
let log_entry = serde_json::json!({
"sessionId": "debug-session",
"runId": "synthetic-generation",
"hypothesisId": "file-write-error",
"location": "synthetic_data.rs:182",
"message": "Created temp file for AccessWrapper",
"data": {
"temp_file": temp_file.display().to_string(),
"temp_exists": temp_file.exists(),
"output_path": output_path.display().to_string()
},
"timestamp": std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).map(|d| d.as_millis()).unwrap_or(0)
});
let _ = writeln!(file, "{}", log_entry);
}
}
let metadata = Metadata::from_dataframe_and_parameters(&df, ¶ms)
.with_context(|| "Failed to create metadata from DataFrame and ParameterMap")?;
let fcs = Fcs {
header: Header::new(),
metadata,
parameters: params,
data_frame: Arc::new(df),
file_access: AccessWrapper::new(temp_file.to_str().ok_or_else(|| {
anyhow::anyhow!(
"Temp file path contains invalid UTF-8: {}",
temp_file.display()
)
})?)
.with_context(|| {
format!(
"Failed to create AccessWrapper from temp file: {}",
temp_file.display()
)
})?,
};
{
use std::fs::OpenOptions;
use std::io::Write;
if let Ok(mut file) = OpenOptions::new()
.create(true)
.append(true)
.open("/Users/kfls271/Rust/.cursor/debug.log")
{
let log_entry = serde_json::json!({
"sessionId": "debug-session",
"runId": "synthetic-generation",
"hypothesisId": "file-write-error",
"location": "synthetic_data.rs:165",
"message": "Before write_fcs_file call",
"data": {
"output_path": output_path.display().to_string(),
"path_exists": output_path.exists(),
"parent_exists": output_path.parent().map(|p| p.exists())
},
"timestamp": std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).map(|d| d.as_millis()).unwrap_or(0)
});
let _ = writeln!(file, "{}", log_entry);
}
}
write_fcs_file(fcs, output_path).with_context(|| {
format!(
"Failed to write synthetic control to {}",
output_path.display()
)
})?;
{
use std::fs::OpenOptions;
use std::io::Write;
if let Ok(mut file) = OpenOptions::new()
.create(true)
.append(true)
.open("/Users/kfls271/Rust/.cursor/debug.log")
{
let log_entry = serde_json::json!({
"sessionId": "debug-session",
"runId": "synthetic-generation",
"hypothesisId": "file-write-error",
"location": "synthetic_data.rs:185",
"message": "After write_fcs_file call - success",
"data": {
"output_path": output_path.display().to_string(),
"file_exists": output_path.exists(),
"file_size": output_path.exists().then(|| std::fs::metadata(output_path).ok().map(|m| m.len())).flatten()
},
"timestamp": std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).map(|d| d.as_millis()).unwrap_or(0)
});
let _ = writeln!(file, "{}", log_entry);
}
}
Ok(())
}
pub fn generate_mixed_sample(
signatures: &[SpectralSignature],
detector_names: &[String],
n_events: usize,
abundances: &Array2<f64>,
autofluorescence: &HashMap<String, f32>,
noise_level: f32,
output_path: &PathBuf,
) -> Result<()> {
use rand::RngExt;
let mut rng = rand::rng();
if abundances.nrows() != n_events {
return Err(anyhow::anyhow!(
"Abundances matrix has {} rows but need {} events",
abundances.nrows(),
n_events
));
}
if abundances.ncols() != signatures.len() {
return Err(anyhow::anyhow!(
"Abundances matrix has {} columns but have {} signatures",
abundances.ncols(),
signatures.len()
));
}
let mut columns: Vec<Column> = Vec::new();
let mut params = ParameterMap::default();
let fsc_a_dist =
Normal::new(100000.0, 25000.0).context("Failed to create FSC-A distribution")?;
let fsc_h_dist =
Normal::new(95000.0, 20000.0).context("Failed to create FSC-H distribution")?;
let ssc_a_dist =
Normal::new(50000.0, 15000.0).context("Failed to create SSC-A distribution")?;
let fsc_a: Vec<f32> = (0..n_events)
.map(|_| (fsc_a_dist.sample(&mut rng) as f32).max(1000.0))
.collect();
let fsc_h: Vec<f32> = (0..n_events)
.map(|i| {
let correlated = fsc_a[i] * 0.92;
let noise = fsc_h_dist.sample(&mut rng) as f32 - 95000.0;
(correlated + noise * 0.1).max(1000.0)
})
.collect();
let ssc_a: Vec<f32> = (0..n_events)
.map(|_| (ssc_a_dist.sample(&mut rng) as f32).max(500.0))
.collect();
columns.push(Column::new("FSC-A".into(), fsc_a.clone()));
columns.push(Column::new("FSC-H".into(), fsc_h.clone()));
columns.push(Column::new("SSC-A".into(), ssc_a.clone()));
params.insert(
"FSC-A".into(),
Parameter::new(&1, "FSC-A", "FSC-A", &TransformType::Linear),
);
params.insert(
"FSC-H".into(),
Parameter::new(&2, "FSC-H", "FSC-H", &TransformType::Linear),
);
params.insert(
"SSC-A".into(),
Parameter::new(&3, "SSC-A", "SSC-A", &TransformType::Linear),
);
let mut param_idx = 4;
for detector_name in detector_names {
let af = autofluorescence
.get(detector_name)
.copied()
.unwrap_or(100.0);
let values: Vec<f32> = (0..n_events)
.map(|event_idx| {
let mut total_signal = 0.0;
for (sig_idx, signature) in signatures.iter().enumerate() {
let abundance = abundances[(event_idx, sig_idx)];
let spectral_component = signature
.detector_signals
.get(detector_name)
.copied()
.unwrap_or(0.0);
let base_signal = 50000.0 * abundance as f32;
total_signal += base_signal * spectral_component as f32;
}
total_signal += af;
let noise = rng.random_range(-noise_level..noise_level) * 50000.0;
(total_signal + noise).max(0.0)
})
.collect();
columns.push(Column::new(detector_name.clone().into(), values));
params.insert(
detector_name.clone().into(),
Parameter::new(
¶m_idx,
detector_name,
detector_name,
&TransformType::Linear,
),
);
param_idx += 1;
}
let df = DataFrame::new(n_events, columns)
.context("Failed to create DataFrame for synthetic mixed sample")?;
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).with_context(|| {
format!(
"Failed to create parent directory for {}",
output_path.display()
)
})?;
}
let temp_file = std::env::temp_dir().join(format!("synthetic_temp_{}.fcs", std::process::id()));
{
std::fs::File::create(&temp_file)
.with_context(|| format!("Failed to create temporary file: {}", temp_file.display()))?;
}
let mut metadata = Metadata::from_dataframe_and_parameters(&df, ¶ms)
.with_context(|| "Failed to create metadata from DataFrame and ParameterMap")?;
let filename = output_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("synthetic.fcs");
metadata.insert_string_keyword("$FIL".to_string(), filename.to_string());
let fcs = Fcs {
header: Header::new(),
metadata,
parameters: params,
data_frame: Arc::new(df),
file_access: AccessWrapper::new(temp_file.to_str().ok_or_else(|| {
anyhow::anyhow!(
"Temp file path contains invalid UTF-8: {}",
temp_file.display()
)
})?)
.with_context(|| {
format!(
"Failed to create AccessWrapper from temp file: {}",
temp_file.display()
)
})?,
};
write_fcs_file(fcs, output_path).with_context(|| {
format!(
"Failed to write synthetic mixed sample to {}",
output_path.display()
)
})?;
let _ = fs::remove_file(&temp_file);
Ok(())
}
pub fn create_test_signatures() -> Vec<SpectralSignature> {
let mut signatures = Vec::new();
let mut buv395 = HashMap::new();
buv395.insert("UV1-A".to_string(), 1.0); buv395.insert("UV2-A".to_string(), 0.15); buv395.insert("UV3-A".to_string(), 0.05); buv395.insert("V1-A".to_string(), 0.02); signatures.push(SpectralSignature {
name: "BUV395".to_string(),
primary_detector: "UV1-A".to_string(),
detector_signals: buv395,
});
let mut buv496 = HashMap::new();
buv496.insert("UV2-A".to_string(), 1.0); buv496.insert("UV1-A".to_string(), 0.20); buv496.insert("UV3-A".to_string(), 0.30); buv496.insert("V1-A".to_string(), 0.10); signatures.push(SpectralSignature {
name: "BUV496".to_string(),
primary_detector: "UV2-A".to_string(),
detector_signals: buv496,
});
signatures
}
pub fn generate_spectral_visualization_plots(
signature: &SpectralSignature,
detector_names: &[String],
raw_signals: &HashMap<String, f32>,
output_dir: &PathBuf,
plot_format: &str,
fcs_file_path: Option<&PathBuf>,
colormap: Option<ColorMaps>,
) -> Result<()> {
generate_spectral_visualization_plots_with_overlay(
signature,
detector_names,
raw_signals,
output_dir,
plot_format,
fcs_file_path,
colormap,
None, None, None, )
}
pub fn generate_spectral_visualization_plots_with_overlay(
signature: &SpectralSignature,
detector_names: &[String],
raw_signals: &HashMap<String, f32>,
output_dir: &PathBuf,
plot_format: &str,
fcs_file_path: Option<&PathBuf>,
colormap: Option<ColorMaps>,
unstained_medians: Option<&HashMap<String, f32>>,
positive_medians: Option<&HashMap<String, f32>>,
positive_geometric_means: Option<&HashMap<String, f32>>,
) -> Result<()> {
fs::create_dir_all(output_dir).with_context(|| {
format!(
"Failed to create output directory: {}",
output_dir.display()
)
})?;
write_signal_heatmap_to_file(
signature,
detector_names,
raw_signals,
output_dir,
plot_format,
fcs_file_path,
colormap,
unstained_medians,
positive_medians,
positive_geometric_means,
)?;
generate_normalized_signature_plot(
signature,
detector_names,
output_dir,
plot_format,
fcs_file_path,
)?;
Ok(())
}
pub fn generate_signal_heatmap_only(
signature: &SpectralSignature,
detector_names: &[String],
raw_signals: &HashMap<String, f32>,
output_dir: &PathBuf,
plot_format: &str,
fcs_file_path: Option<&PathBuf>,
colormap: Option<ColorMaps>,
unstained_medians: Option<&HashMap<String, f32>>,
positive_medians: Option<&HashMap<String, f32>>,
) -> Result<()> {
fs::create_dir_all(output_dir).with_context(|| {
format!(
"Failed to create output directory: {}",
output_dir.display()
)
})?;
write_signal_heatmap_to_file(
signature,
detector_names,
raw_signals,
output_dir,
plot_format,
fcs_file_path,
colormap,
unstained_medians,
positive_medians,
None, )
}
fn write_signal_heatmap_to_file(
signature: &SpectralSignature,
detector_names: &[String],
raw_signals: &HashMap<String, f32>,
output_dir: &PathBuf,
plot_format: &str,
fcs_file_path: Option<&PathBuf>,
colormap: Option<ColorMaps>,
unstained_medians: Option<&HashMap<String, f32>>,
positive_medians: Option<&HashMap<String, f32>>,
positive_geometric_means: Option<&HashMap<String, f32>>,
) -> Result<()> {
let bytes = generate_signal_heatmap(
&signature.name,
detector_names,
raw_signals,
fcs_file_path.as_ref().map(|p| p.as_path()),
colormap,
unstained_medians,
positive_medians,
positive_geometric_means,
)?;
let output_path = output_dir.join(format!("{}_signal_heatmap.{}", signature.name, plot_format));
fs::write(&output_path, bytes)
.with_context(|| format!("Failed to write heatmap to {}", output_path.display()))?;
Ok(())
}
fn generate_normalized_signature_plot(
signature: &SpectralSignature,
detector_names: &[String],
output_dir: &PathBuf,
plot_format: &str,
fcs_file_path: Option<&PathBuf>,
) -> Result<()> {
let detector_signals = &signature.detector_signals;
let bytes = generate_normalized_spectral_signature_plot(
&signature.name,
detector_names,
detector_signals,
fcs_file_path.as_ref().map(|p| p.as_path()),
)?;
let output_path = output_dir.join(format!(
"{}_normalized_signature.{}",
signature.name, plot_format
));
fs::write(&output_path, bytes).with_context(|| {
format!(
"Failed to write normalized signature plot to {}",
output_path.display()
)
})?;
Ok(())
}
pub fn generate_single_stain_control_with_plots(
signature: &SpectralSignature,
detector_names: &[String],
n_events: usize,
autofluorescence: &HashMap<String, f32>,
noise_level: f32,
output_path: &PathBuf,
plot_output_dir: Option<&PathBuf>,
plot_format: &str,
) -> Result<HashMap<String, f32>> {
generate_single_stain_control(
signature,
detector_names,
n_events,
autofluorescence,
noise_level,
output_path,
)?;
let signal_mean = 50000.0;
let mut raw_signals = HashMap::new();
for detector_name in detector_names {
let base_signal = signature
.detector_signals
.get(detector_name)
.copied()
.unwrap_or(0.0);
let af = autofluorescence
.get(detector_name)
.copied()
.unwrap_or(100.0);
let expected_median = (signal_mean * base_signal as f32) + af;
raw_signals.insert(detector_name.clone(), expected_median);
}
if let Some(plot_dir) = plot_output_dir {
generate_spectral_visualization_plots(
signature,
detector_names,
&raw_signals,
plot_dir,
plot_format,
Some(output_path), None, )?;
}
Ok(raw_signals)
}
pub fn generate_test_synthetic_data_with_plots(
output_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
use std::fs;
fs::create_dir_all(output_dir).with_context(|| {
format!(
"Failed to create output directory: {}",
output_dir.display()
)
})?;
let controls_dir = output_dir.join("controls");
let plots_dir = output_dir.join("plots");
fs::create_dir_all(&controls_dir)?;
fs::create_dir_all(&plots_dir)?;
let signatures = create_test_signatures();
let detector_names = vec![
"UV1-A".to_string(),
"UV2-A".to_string(),
"UV3-A".to_string(),
"V1-A".to_string(),
"V2-A".to_string(),
];
let mut autofluorescence = HashMap::new();
for det_name in &detector_names {
autofluorescence.insert(det_name.clone(), 100.0);
}
for signature in &signatures {
let control_path = controls_dir.join(format!("{}.fcs", signature.name));
let plot_dir = plots_dir.join(&signature.name);
println!("Generating synthetic control: {}", signature.name);
let raw_signals = generate_single_stain_control_with_plots(
signature,
&detector_names,
50000, &autofluorescence,
0.05, &control_path,
Some(&plot_dir),
plot_format,
)?;
println!(" ✓ Generated FCS file: {}", control_path.display());
println!(" ✓ Generated plots in: {}", plot_dir.display());
println!(" - {}_signal_heatmap.{}", signature.name, plot_format);
println!(
" - {}_normalized_signature.{}",
signature.name, plot_format
);
}
let unstained_path = controls_dir.join("Unstained.fcs");
let mut unstained_signature = SpectralSignature {
name: "Unstained".to_string(),
primary_detector: "UV1-A".to_string(),
detector_signals: HashMap::new(), };
generate_single_stain_control(
&unstained_signature,
&detector_names,
50000,
&autofluorescence,
0.05,
&unstained_path,
)?;
println!(
"✓ Generated unstained control: {}",
unstained_path.display()
);
println!(
"\nAll synthetic test data generated in: {}",
output_dir.display()
);
Ok(())
}
pub fn generate_comprehensive_synthetic_data(
output_dir: &PathBuf,
plot_format: &str,
) -> Result<()> {
use std::fs;
fs::create_dir_all(output_dir).with_context(|| {
format!(
"Failed to create output directory: {}",
output_dir.display()
)
})?;
let controls_dir = output_dir.join("controls");
let plots_dir = output_dir.join("plots");
let samples_dir = output_dir.join("samples");
fs::create_dir_all(&controls_dir)?;
fs::create_dir_all(&plots_dir)?;
fs::create_dir_all(&samples_dir)?;
let detector_names = vec![
"UV379-A".to_string(),
"UV446-A".to_string(),
"UV582-A".to_string(),
"UV736-A".to_string(),
"UV812-A".to_string(),
"V508-A".to_string(),
"V525-A".to_string(),
"V660-A".to_string(),
"V720-A".to_string(),
"V780-A".to_string(),
"B510-A".to_string(),
"B560-A".to_string(),
"B610-A".to_string(),
"B660-A".to_string(),
"B710-A".to_string(),
"YG585-A".to_string(),
"YG615-A".to_string(),
"YG660-A".to_string(),
"YG750-A".to_string(),
"YG812-A".to_string(),
"R660-A".to_string(),
"R710-A".to_string(),
"R750-A".to_string(),
"R780-A".to_string(),
"R810-A".to_string(),
];
let mut autofluorescence = HashMap::new();
for det_name in &detector_names {
let af_value = if det_name.starts_with("UV")
&& (det_name.contains("446") || det_name.contains("582"))
{
500.0 } else if det_name.starts_with("V")
&& (det_name.contains("525") || det_name.contains("660"))
{
400.0 } else {
100.0 };
autofluorescence.insert(det_name.clone(), af_value);
}
let signatures = create_10_fluorophore_signatures();
println!("Generating single-stain controls...");
for signature in &signatures {
let control_path = controls_dir.join(format!("{}.fcs", signature.name));
let plot_dir = plots_dir.join(&signature.name);
println!(" Generating: {}", signature.name);
let _raw_signals = generate_single_stain_control_with_plots(
signature,
&detector_names,
50000, &autofluorescence,
0.05, &control_path,
Some(&plot_dir),
plot_format,
)?;
}
println!("\nGenerating unstained control...");
let unstained_path = controls_dir.join("Unstained_Control.fcs");
let unstained_signature = SpectralSignature {
name: "Unstained_Control".to_string(),
primary_detector: "UV446-A".to_string(),
detector_signals: HashMap::new(), };
generate_single_stain_control(
&unstained_signature,
&detector_names,
50000,
&autofluorescence,
0.05,
&unstained_path,
)?;
println!("\nGenerating fully-stained samples...");
for sample_idx in 1..=3 {
let sample_path = samples_dir.join(format!("FullyStained_Sample_{}.fcs", sample_idx));
println!(" Generating: FullyStained_Sample_{}", sample_idx);
let n_events = 50000;
let abundances = create_varying_expression_abundances(n_events, &signatures, sample_idx);
generate_mixed_sample(
&signatures,
&detector_names,
n_events,
&abundances,
&autofluorescence,
0.05,
&sample_path,
)?;
}
println!(
"\n✓ All synthetic test data generated in: {}",
output_dir.display()
);
println!(" - Controls: {}", controls_dir.display());
println!(" - Samples: {}", samples_dir.display());
println!(" - Plots: {}", plots_dir.display());
Ok(())
}
fn create_10_fluorophore_signatures() -> Vec<SpectralSignature> {
let mut signatures = Vec::new();
let mut flu1 = HashMap::new();
flu1.insert("UV379-A".to_string(), 1.0); flu1.insert("UV446-A".to_string(), 0.18); flu1.insert("V508-A".to_string(), 0.12); flu1.insert("V525-A".to_string(), 0.08); flu1.insert("B510-A".to_string(), 0.04); signatures.push(SpectralSignature {
name: "Fluor_UV379".to_string(),
primary_detector: "UV379-A".to_string(),
detector_signals: flu1,
});
let mut flu2 = HashMap::new();
flu2.insert("UV446-A".to_string(), 1.0); flu2.insert("UV379-A".to_string(), 0.12); flu2.insert("UV582-A".to_string(), 0.25); flu2.insert("V508-A".to_string(), 0.22); flu2.insert("V525-A".to_string(), 0.15); flu2.insert("B510-A".to_string(), 0.08); signatures.push(SpectralSignature {
name: "Fluor_UV446".to_string(),
primary_detector: "UV446-A".to_string(),
detector_signals: flu2,
});
let mut flu3 = HashMap::new();
flu3.insert("UV736-A".to_string(), 1.0); flu3.insert("UV582-A".to_string(), 0.15); flu3.insert("UV812-A".to_string(), 0.20); flu3.insert("V720-A".to_string(), 0.18); flu3.insert("V780-A".to_string(), 0.12); flu3.insert("YG750-A".to_string(), 0.10); flu3.insert("B710-A".to_string(), 0.06); signatures.push(SpectralSignature {
name: "Fluor_UV736".to_string(),
primary_detector: "UV736-A".to_string(),
detector_signals: flu3,
});
let mut flu4 = HashMap::new();
flu4.insert("V660-A".to_string(), 1.0); flu4.insert("V525-A".to_string(), 0.10); flu4.insert("V720-A".to_string(), 0.15); flu4.insert("B660-A".to_string(), 0.25); flu4.insert("B610-A".to_string(), 0.12); flu4.insert("R660-A".to_string(), 0.20); flu4.insert("R710-A".to_string(), 0.08); signatures.push(SpectralSignature {
name: "Fluor_V660".to_string(),
primary_detector: "V660-A".to_string(),
detector_signals: flu4,
});
let mut flu5 = HashMap::new();
flu5.insert("V720-A".to_string(), 1.0); flu5.insert("V660-A".to_string(), 0.20); flu5.insert("V780-A".to_string(), 0.18); flu5.insert("UV736-A".to_string(), 0.15); flu5.insert("UV812-A".to_string(), 0.08); flu5.insert("B710-A".to_string(), 0.10); signatures.push(SpectralSignature {
name: "Fluor_V720".to_string(),
primary_detector: "V720-A".to_string(),
detector_signals: flu5,
});
let mut flu6 = HashMap::new();
flu6.insert("B510-A".to_string(), 1.0); flu6.insert("B560-A".to_string(), 0.22); flu6.insert("V508-A".to_string(), 0.18); flu6.insert("V525-A".to_string(), 0.12); flu6.insert("YG585-A".to_string(), 0.08); flu6.insert("UV446-A".to_string(), 0.05); signatures.push(SpectralSignature {
name: "Fluor_B510".to_string(),
primary_detector: "B510-A".to_string(),
detector_signals: flu6,
});
let mut flu7 = HashMap::new();
flu7.insert("B660-A".to_string(), 1.0); flu7.insert("B610-A".to_string(), 0.18); flu7.insert("B710-A".to_string(), 0.20); flu7.insert("V660-A".to_string(), 0.25); flu7.insert("V720-A".to_string(), 0.10); flu7.insert("R660-A".to_string(), 0.22); flu7.insert("R710-A".to_string(), 0.12); signatures.push(SpectralSignature {
name: "Fluor_B660".to_string(),
primary_detector: "B660-A".to_string(),
detector_signals: flu7,
});
let mut flu8 = HashMap::new();
flu8.insert("YG585-A".to_string(), 1.0); flu8.insert("YG615-A".to_string(), 0.25); flu8.insert("YG660-A".to_string(), 0.10); flu8.insert("B560-A".to_string(), 0.18); flu8.insert("B610-A".to_string(), 0.12); flu8.insert("V525-A".to_string(), 0.08); signatures.push(SpectralSignature {
name: "Fluor_YG585".to_string(),
primary_detector: "YG585-A".to_string(),
detector_signals: flu8,
});
let mut flu9 = HashMap::new();
flu9.insert("YG750-A".to_string(), 1.0); flu9.insert("YG660-A".to_string(), 0.20); flu9.insert("YG812-A".to_string(), 0.18); flu9.insert("UV736-A".to_string(), 0.15); flu9.insert("UV812-A".to_string(), 0.10); flu9.insert("V720-A".to_string(), 0.12); flu9.insert("V780-A".to_string(), 0.10); flu9.insert("R750-A".to_string(), 0.08); signatures.push(SpectralSignature {
name: "Fluor_YG750".to_string(),
primary_detector: "YG750-A".to_string(),
detector_signals: flu9,
});
let mut flu10 = HashMap::new();
flu10.insert("R710-A".to_string(), 1.0); flu10.insert("R660-A".to_string(), 0.15); flu10.insert("R750-A".to_string(), 0.22); flu10.insert("R780-A".to_string(), 0.12); flu10.insert("B710-A".to_string(), 0.18); flu10.insert("B660-A".to_string(), 0.10); flu10.insert("V720-A".to_string(), 0.08); flu10.insert("YG750-A".to_string(), 0.10); signatures.push(SpectralSignature {
name: "Fluor_R710".to_string(),
primary_detector: "R710-A".to_string(),
detector_signals: flu10,
});
signatures
}
fn create_varying_expression_abundances(
n_events: usize,
signatures: &[SpectralSignature],
sample_idx: usize,
) -> Array2<f64> {
use rand::RngExt;
let mut rng = rand::rng();
let mut abundances = Array2::zeros((n_events, signatures.len()));
let patterns: Vec<Vec<usize>> = match sample_idx {
1 => vec![
vec![0, 1, 2], vec![3, 4], vec![5, 6], vec![7, 8], vec![9], vec![0, 3, 5, 7, 9], ],
2 => vec![
vec![1, 2, 3], vec![4, 5, 6], vec![7, 8, 9], vec![0, 4, 7], vec![2, 6, 9], ],
3 => vec![
vec![0, 1, 2, 3, 4], vec![5, 6, 7, 8, 9], vec![0, 3, 5, 7, 9], vec![1, 2, 4, 6, 8], ],
_ => vec![vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]], };
for event_idx in 0..n_events {
let pattern_idx = rng.random_range(0..patterns.len());
let pattern = &patterns[pattern_idx];
for &fluor_idx in pattern {
let base_abundance = if rng.random_bool(0.7) {
rng.random_range(0.6..1.0)
} else {
rng.random_range(0.1..0.5)
};
abundances[(event_idx, fluor_idx)] = base_abundance;
}
for fluor_idx in 0..signatures.len() {
if !pattern.contains(&fluor_idx) && rng.random_bool(0.15) {
abundances[(event_idx, fluor_idx)] = rng.random_range(0.05..0.2);
}
}
}
abundances
}