use crate::{
Fcs,
byteorder::ByteOrder,
keyword::{ByteKeyword, IntegerKeyword, Keyword, StringableKeyword},
metadata::Metadata,
version::Version,
};
use anyhow::{Result, anyhow};
use byteorder::{LittleEndian, WriteBytesExt};
use polars::prelude::*;
use std::fs::File;
use std::io::Write;
use std::path::Path;
use std::sync::Arc;
pub fn write_fcs_file(fcs: Fcs, path: impl AsRef<Path>) -> Result<()> {
let path = path.as_ref();
if path.extension().and_then(|s| s.to_str()) != Some("fcs") {
return Err(anyhow!("Output file must have .fcs extension"));
}
let df = &*fcs.data_frame;
let n_events = df.height();
let n_params = df.width();
if n_events == 0 {
return Err(anyhow!("Cannot write FCS file with 0 events"));
}
if n_params == 0 {
return Err(anyhow!("Cannot write FCS file with 0 parameters"));
}
let data_segment = serialize_data(df, &fcs.metadata)?;
let header_size = 58;
let text_start = header_size;
let estimated_text_size = estimate_text_segment_size(&fcs.metadata, n_events, n_params);
let estimated_text_end = text_start + estimated_text_size - 1;
let data_start = estimated_text_end + 1;
let data_end = data_start + data_segment.len() - 1;
let text_segment = serialize_metadata(&fcs.metadata, n_events, n_params, data_start, data_end)?;
let text_end = text_start + text_segment.len() - 1;
let data_start = text_end + 1;
let data_end = data_start + data_segment.len() - 1;
let header = build_header(
&fcs.header.version,
text_start,
text_end,
data_start,
data_end,
)?;
let mut file = File::create(path)?;
file.write_all(&header)?;
file.write_all(&text_segment)?;
file.write_all(&data_segment)?;
file.sync_all()?;
Ok(())
}
pub fn duplicate_fcs_file(fcs: &Fcs, path: impl AsRef<Path>) -> Result<()> {
use std::fs;
let path = path.as_ref();
fs::copy(&fcs.file_access.path, path)?;
Ok(())
}
pub fn edit_metadata_and_save<F>(mut fcs: Fcs, path: impl AsRef<Path>, updates: F) -> Result<Fcs>
where
F: FnOnce(&mut Metadata),
{
updates(&mut fcs.metadata);
let n_events = fcs.get_event_count_from_dataframe();
use crate::keyword::match_and_parse_keyword;
let tot_keyword = match_and_parse_keyword("$TOT", &n_events.to_string());
if let crate::keyword::KeywordCreationResult::Int(int_kw) = tot_keyword {
fcs.metadata
.keywords
.insert("$TOT".to_string(), Keyword::Int(int_kw));
}
write_fcs_file(fcs.clone(), &path)?;
Fcs::open(
path.as_ref()
.to_str()
.ok_or_else(|| anyhow!("Invalid path"))?,
)
}
pub fn filter_events(fcs: Fcs, path: impl AsRef<Path>, mask: &[bool]) -> Result<Fcs> {
let df = &*fcs.data_frame;
let n_events = df.height();
if mask.len() != n_events {
return Err(anyhow!(
"Mask length {} doesn't match number of events {}",
mask.len(),
n_events
));
}
let mask_vec: Vec<bool> = mask.to_vec();
let mask_series = Series::new("mask".into(), mask_vec);
let mask_ca = mask_series.bool()?;
let filtered_df = df.filter(&mask_ca)?;
let mut new_fcs = fcs.clone();
new_fcs.data_frame = Arc::new(filtered_df);
let n_events_after = new_fcs.get_event_count_from_dataframe();
use crate::keyword::match_and_parse_keyword;
let tot_keyword = match_and_parse_keyword("$TOT", &n_events_after.to_string());
if let crate::keyword::KeywordCreationResult::Int(int_kw) = tot_keyword {
new_fcs
.metadata
.keywords
.insert("$TOT".to_string(), Keyword::Int(int_kw));
}
write_fcs_file(new_fcs.clone(), &path)?;
Fcs::open(
path.as_ref()
.to_str()
.ok_or_else(|| anyhow!("Invalid path"))?,
)
}
pub fn concatenate_events(files: Vec<Fcs>, path: impl AsRef<Path>) -> Result<Fcs> {
if files.is_empty() {
return Err(anyhow!("Cannot concatenate empty list of files"));
}
if files.len() == 1 {
return duplicate_fcs_file(&files[0], &path).and_then(|_| {
Fcs::open(
path.as_ref()
.to_str()
.ok_or_else(|| anyhow!("Invalid path"))?,
)
});
}
let first_params: Vec<String> = files[0].get_parameter_names_from_dataframe();
for (idx, fcs) in files.iter().enumerate().skip(1) {
let params: Vec<String> = fcs.get_parameter_names_from_dataframe();
if params != first_params {
return Err(anyhow!("File {} has different parameters than file 0", idx));
}
}
let dfs: Vec<DataFrame> = files.iter().map(|f| (*f.data_frame).clone()).collect();
let concatenated_df = dfs
.into_iter()
.reduce(|acc, df| acc.vstack(&df).unwrap_or(acc))
.ok_or_else(|| anyhow!("No files to concatenate"))?;
let mut new_fcs = files[0].clone();
new_fcs.data_frame = Arc::new(concatenated_df);
let n_events_after = new_fcs.get_event_count_from_dataframe();
use crate::keyword::match_and_parse_keyword;
let tot_keyword = match_and_parse_keyword("$TOT", &n_events_after.to_string());
if let crate::keyword::KeywordCreationResult::Int(int_kw) = tot_keyword {
new_fcs
.metadata
.keywords
.insert("$TOT".to_string(), Keyword::Int(int_kw));
}
new_fcs.metadata.validate_guid();
write_fcs_file(new_fcs.clone(), &path)?;
Fcs::open(
path.as_ref()
.to_str()
.ok_or_else(|| anyhow!("Invalid path"))?,
)
}
pub fn add_column(
mut fcs: Fcs,
path: impl AsRef<Path>,
column_name: &str,
values: Vec<f32>,
) -> Result<Fcs> {
let df = &*fcs.data_frame;
let n_events = df.height();
if values.len() != n_events {
return Err(anyhow!(
"Values length {} doesn't match number of events {}",
values.len(),
n_events
));
}
if df
.get_column_names()
.iter()
.any(|&name| name == column_name)
{
return Err(anyhow!("Column {} already exists", column_name));
}
let mut new_df = df.clone();
let new_series = Series::new(column_name.into(), values);
new_df
.with_column(new_series.into())
.map_err(|e| anyhow!("Failed to add column: {}", e))?;
fcs.data_frame = Arc::new(new_df);
let n_params = fcs.get_parameter_count_from_dataframe();
let param_num = n_params;
use crate::keyword::match_and_parse_keyword;
let par_keyword = match_and_parse_keyword("$PAR", &n_params.to_string());
if let crate::keyword::KeywordCreationResult::Int(int_kw) = par_keyword {
fcs.metadata
.keywords
.insert("$PAR".to_string(), Keyword::Int(int_kw));
}
fcs.metadata
.insert_string_keyword(format!("$P{}N", param_num), column_name.to_string());
let pnb_keyword = match_and_parse_keyword(&format!("$P{}B", param_num), "32");
if let crate::keyword::KeywordCreationResult::Int(int_kw) = pnb_keyword {
fcs.metadata
.keywords
.insert(format!("$P{}B", param_num), Keyword::Int(int_kw));
}
let pnr_keyword = match_and_parse_keyword(&format!("$P{}R", param_num), "262144");
if let crate::keyword::KeywordCreationResult::Int(int_kw) = pnr_keyword {
fcs.metadata
.keywords
.insert(format!("$P{}R", param_num), Keyword::Int(int_kw));
}
fcs.metadata
.insert_string_keyword(format!("$P{}E", param_num), "0,0".to_string());
use crate::TransformType;
use crate::parameter::Parameter;
fcs.parameters.insert(
column_name.to_string().into(),
Parameter::new(¶m_num, column_name, column_name, &TransformType::Linear),
);
write_fcs_file(fcs.clone(), &path)?;
Fcs::open(
path.as_ref()
.to_str()
.ok_or_else(|| anyhow!("Invalid path"))?,
)
}
fn estimate_text_segment_size(metadata: &Metadata, _n_events: usize, n_params: usize) -> usize {
let base_size = 200; let keyword_size = metadata.keywords.len() * 50; let param_keywords = n_params * 100; base_size + keyword_size + param_keywords
}
fn serialize_metadata(
metadata: &Metadata,
n_events: usize,
n_params: usize,
data_start: usize,
data_end: usize,
) -> Result<Vec<u8>> {
let delimiter = metadata.delimiter as u8;
let mut text_segment = Vec::new();
let mut add_keyword = |key: &str, value: &str| {
text_segment.push(delimiter);
text_segment.extend_from_slice(format!("${}", key).as_bytes());
text_segment.push(delimiter);
text_segment.extend_from_slice(value.as_bytes());
};
add_keyword("BEGINANALYSIS", "0");
add_keyword("ENDANALYSIS", "0");
add_keyword("BEGINSTEXT", "0");
add_keyword("ENDSTEXT", "0");
add_keyword("BEGINDATA", &data_start.to_string());
add_keyword("ENDDATA", &data_end.to_string());
let byteord_value = metadata
.keywords
.get("$BYTEORD")
.and_then(|k| match k {
Keyword::Byte(ByteKeyword::BYTEORD(bo)) => Some(bo.to_keyword_str()),
_ => None,
})
.unwrap_or("1,2,3,4");
add_keyword("BYTEORD", byteord_value);
let datatype_value = metadata
.keywords
.get("$DATATYPE")
.and_then(|k| match k {
Keyword::Byte(ByteKeyword::DATATYPE(dt)) => Some(dt.to_keyword_str()),
_ => None,
})
.unwrap_or("F");
add_keyword("DATATYPE", datatype_value);
let mode_value = metadata
.keywords
.get("$MODE")
.and_then(|k| match k {
Keyword::String(sk) => Some(sk.get_str().to_string()),
_ => None,
})
.unwrap_or_else(|| "L".to_string());
add_keyword("MODE", &mode_value);
add_keyword("PAR", &n_params.to_string());
add_keyword("TOT", &n_events.to_string());
let nextdata_value = metadata
.keywords
.get("$NEXTDATA")
.and_then(|k| match k {
Keyword::String(sk) => Some(sk.get_str().to_string()),
_ => None,
})
.unwrap_or_else(|| "0".to_string());
add_keyword("NEXTDATA", &nextdata_value);
let mut sorted_keys: Vec<_> = metadata.keywords.keys().collect();
sorted_keys.sort();
for key in sorted_keys {
if matches!(
key.as_str(),
"$BEGINANALYSIS"
| "$ENDANALYSIS"
| "$BEGINSTEXT"
| "$ENDSTEXT"
| "$BEGINDATA"
| "$ENDDATA"
| "$BYTEORD"
| "$DATATYPE"
| "$MODE"
| "$PAR"
| "$TOT"
| "$NEXTDATA"
) {
continue;
}
let keyword = metadata
.keywords
.get(key)
.ok_or_else(|| anyhow!("Keyword '{}' not found in metadata", key))?;
let value_str = match keyword {
Keyword::Int(int_kw) => match int_kw {
IntegerKeyword::TOT(_) => {
n_events.to_string()
}
IntegerKeyword::PAR(_) => {
n_params.to_string()
}
_ => int_kw.get_str().to_string(),
},
Keyword::String(str_kw) => str_kw.get_str().to_string(),
Keyword::Float(float_kw) => float_kw.to_string(),
Keyword::Byte(byte_kw) => byte_kw.get_str().to_string(),
Keyword::Mixed(mixed_kw) => {
use crate::keyword::MixedKeyword;
match mixed_kw {
MixedKeyword::PnE(f1, f2) => format!("{},{}", f1, f2),
MixedKeyword::PnL(wavelengths) => {
format!("({})", wavelengths.iter().map(|w| w.to_string()).collect::<Vec<_>>().join(","))
}
MixedKeyword::PnD(scale_type, lower, upper) => {
format!("({},{},{})", scale_type, lower, upper)
}
MixedKeyword::PnCalibration(f1, s) => {
format!("{}/{}", f1, s)
}
MixedKeyword::RnW(widths) => {
format!("({})", widths.iter().map(|w| w.to_string()).collect::<Vec<_>>().join(","))
}
MixedKeyword::SPILLOVER { n_parameters, parameter_names, matrix_values } => {
let mut result = format!("{}", n_parameters);
for name in parameter_names {
result.push(',');
result.push_str(name);
}
for val in matrix_values {
result.push(',');
result.push_str(&val.to_string());
}
result
}
MixedKeyword::GnE(f1, f2) => format!("{},{}", f1, f2),
}
},
};
let key_without_prefix = key.strip_prefix('$').unwrap_or(key);
add_keyword(key_without_prefix, &value_str);
}
text_segment.push(delimiter);
Ok(text_segment)
}
fn serialize_data(df: &DataFrame, metadata: &Metadata) -> Result<Vec<u8>> {
let n_events = df.height();
let n_params = df.width();
let bytes_per_param = metadata
.calculate_bytes_per_event()
.map(|bytes_per_event| bytes_per_event / n_params)
.unwrap_or(4);
let mut data = Vec::with_capacity(n_events * n_params * bytes_per_param);
let byte_order = metadata
.get_byte_order()
.unwrap_or(&ByteOrder::LittleEndian);
let is_little_endian = matches!(byte_order, ByteOrder::LittleEndian);
let column_names = df.get_column_names();
let mut column_data: Vec<&[f32]> = Vec::with_capacity(n_params);
for col_name in &column_names {
let series = df.column(col_name)?;
let f32_series = series
.f32()
.map_err(|e| anyhow!("Column {} is not f32: {}", col_name, e))?;
let slice = f32_series
.cont_slice()
.map_err(|e| anyhow!("Column {} data is not contiguous: {}", col_name, e))?;
column_data.push(slice);
}
for row_idx in 0..n_events {
for col_data in &column_data {
let value = col_data[row_idx];
if is_little_endian {
data.write_f32::<LittleEndian>(value)?;
} else {
use byteorder::BigEndian;
data.write_f32::<BigEndian>(value)?;
}
}
}
Ok(data)
}
fn build_header(
version: &Version,
text_start: usize,
text_end: usize,
data_start: usize,
data_end: usize,
) -> Result<Vec<u8>> {
let mut header = vec![0u8; 58];
let version_str = format!("{}", version);
if version_str.len() > 6 {
return Err(anyhow!("Version string too long: {}", version_str));
}
header[0..version_str.len()].copy_from_slice(version_str.as_bytes());
header[6..10].fill(b' ');
let text_start_str = format!("{:>8}", text_start);
header[10..18].copy_from_slice(text_start_str.as_bytes());
let text_end_str = format!("{:>8}", text_end);
header[18..26].copy_from_slice(text_end_str.as_bytes());
let data_start_str = format!("{:>8}", data_start);
header[26..34].copy_from_slice(data_start_str.as_bytes());
let data_end_str = format!("{:>8}", data_end);
header[34..42].copy_from_slice(data_end_str.as_bytes());
header[42..50].copy_from_slice(b" 0");
header[50..58].copy_from_slice(b" 0");
Ok(header)
}