use std::ops::RangeInclusive;
use std::collections::HashMap;
use std::io::{self, Read, Seek};
use std::io::{BufReader, SeekFrom};
use std::fs::File;
use std::str;
use byteorder::{ReadBytesExt, LittleEndian, BigEndian};
use regex::Regex;
use serde::{Serialize, Deserialize};
use thiserror::Error;
const VALID_FCS_VERSIONS: [&[u8]; 2] = [b"FCS3.0", b"FCS3.1"];
const REQUIRED_KEYWORDS: [&str; 12] = [
"$BEGINANALYSIS", "$BEGINDATA", "$BEGINSTEXT", "$BYTEORD", "$DATATYPE", "$ENDANALYSIS", "$ENDDATA", "$ENDSTEXT", "$MODE", "$NEXTDATA", "$PAR", "$TOT" ];
const OPTIONAL_KEYWORDS: [&str; 31] = [
"$ABRT", "$BTIM", "$CELLS", "$COM", "$CSMODE", "$CSVBITS", "$CYT", "$CYTSN", "$DATE", "$ETIM", "$EXP", "$FIL", "$GATE", "$GATING", "$INST", "$LAST_MODIFIED", "$LAST_MODIFIER", "$LOST", "$OP", "$ORIGINALITY", "$PLATEID", "$PLATENAME", "$PROJ", "$SMNO", "$SPILLOVER", "$SRC", "$SYS", "$TIMESTEP", "$TR", "$VOL", "$WELLID" ];
#[derive(Debug, Error)]
pub enum FcsError {
#[error("IO Error: {0}")]
IoError(#[from] io::Error),
#[error("Invalid Fcs Header. File may be corrupted or not a Fcs file.")]
InvalidHeader,
#[error("Fcs version `{0}` not supported. Must be either FCS3.0 or FCS3.1")]
InvalidVersion(String),
#[error("Invalid Fcs Metadata")]
InvalidMetadata,
#[error("Invalid Fcs Data: {0}")]
InvalidData(String),
}
pub struct FcsFile {
inner: File,
}
impl FcsFile {
pub fn open(path: &str) -> Result<FcsFile, FcsError> {
let file = File::open(path)?;
Ok(Self { inner: file })
}
pub fn read(&self) -> Result<FcsData, FcsError> {
let mut reader = BufReader::new(&self.inner);
let metadata = read_metadata(&mut reader)?;
let parameters = read_param_data(&mut reader, &metadata)?;
let fcs_data = FcsData {
metadata,
parameters,
};
Ok(fcs_data)
}
}
#[derive(Serialize, Deserialize, Debug)]
pub struct FcsData {
pub metadata: HashMap<String, String>,
pub parameters: HashMap<String, Vec<f64>>,
}
pub fn read_metadata(reader: &mut BufReader<&File>) -> Result<HashMap<String, String>, FcsError> {
let text_offset = read_header(reader)?;
let mut metadata: HashMap<String, String> = HashMap::new();
let bytes_to_read = text_offset.end() - text_offset.start() - 1;
let mut buffer = vec![0u8; bytes_to_read];
reader.seek(SeekFrom::Start(*text_offset.start() as u64))?;
let delimiter = reader.read_u8()? as char;
reader.read_exact(&mut buffer)?;
let text = String::from_utf8(buffer)
.map_err(|_| FcsError::InvalidMetadata)?;
let mut keyword = String::new();
let mut value = String::new();
let kv_pairs = text.split(delimiter);
for kv in kv_pairs {
if kv.starts_with("$") {
keyword = kv.to_string();
value.clear()
} else {
value.push_str(kv);
metadata.insert(keyword.clone(), value.clone());
}
}
validate_text(&metadata)?;
Ok(metadata)
}
fn read_header(reader: &mut BufReader<&File>) -> Result<RangeInclusive<usize>, FcsError> {
let mut buffer = [0u8; 8];
reader.read_exact(&mut buffer[..6])?;
if !VALID_FCS_VERSIONS.contains(&&buffer[0..6]) {
return Err(FcsError::InvalidVersion(String::from_utf8_lossy(&buffer[0..6]).to_string()));
}
reader.read_exact(&mut buffer[..4])?;
if buffer.iter().filter(|&&x| x == b' ').count() != 4 {
return Err(FcsError::InvalidHeader);
}
let mut offsets = [0usize; 2];
for i in 0..2 {
reader.read_exact(&mut buffer)?;
let trimmed = buffer.trim_ascii();
offsets[i] = str::from_utf8(trimmed)
.map_err(|_| FcsError::InvalidHeader)?
.parse::<usize>()
.map_err(|_| FcsError::InvalidHeader)?;
}
Ok(offsets[0]..=offsets[1])
}
fn validate_text(text: &HashMap<String,String>) -> Result<(), FcsError> {
let n_params = text.get("$PAR").unwrap();
let n_digits = n_params.chars().count().to_string();
let regex_string = r"[PR]\d{1,".to_string() + &n_digits + "}[BENRDFGLOPSTVIW]";
let param_keywords = Regex::new(®ex_string).unwrap();
for keyword in text.keys() {
if !REQUIRED_KEYWORDS.contains(&keyword.as_str()) && !param_keywords.is_match(keyword) && !OPTIONAL_KEYWORDS.contains(&keyword.as_str()){
return Err(FcsError::InvalidMetadata);
}
}
Ok(())
}
fn read_param_data(reader: &mut BufReader<&File>, metadata: &HashMap<String, String>) -> Result<HashMap<String, Vec<f64>>, FcsError> {
let mode = metadata.get("$MODE").unwrap(); if mode != "L" {
return Err(FcsError::InvalidData("Data must be in list (L) mode".to_string()));
}
let data_type = metadata.get("$DATATYPE").unwrap() as &str;
let n_params = metadata.get("$PAR").unwrap().parse::<usize>().unwrap();
let n_events = metadata.get("$TOT").unwrap().parse::<usize>().unwrap();
let data_start = metadata.get("$BEGINDATA").unwrap().parse::<u64>().unwrap();
let byte_order = metadata.get("$BYTEORD").unwrap();
let capacity = n_params * n_events;
if capacity == 0 {
return Err(FcsError::InvalidData("Fcs file may be corrupted. No data found".to_string()));
}
reader.seek(SeekFrom::Start(data_start))?;
let mut parameters: HashMap<String, Vec<f64>> = HashMap::new();
let mut events = Vec::with_capacity(n_events);
for i in 1..=n_params {
if byte_order == "1,2,3,4" {
events = read_events::<LittleEndian>(reader, data_type, n_events, i, metadata)?;
} else if byte_order == "4,3,2,1" {
events = read_events::<BigEndian>(reader, data_type, n_events, i, metadata)?;
} else {
return Err(FcsError::InvalidData("Could not determine byte order.".to_string()));
}
let id = metadata.get(&format!("$P{}S", i)).unwrap();
parameters.insert(id.to_owned(), events);
}
Ok(parameters)
}
fn read_events<B: byteorder::ByteOrder>(reader: &mut BufReader<&File>, data_type: &str, n_events: usize, param_idx: usize, metadata: &HashMap<String, String>) -> Result<Vec<f64>, FcsError> {
let data = match data_type {
"F" => {
let mut float_buffer = vec![0; n_events * std::mem::size_of::<f32>()];
reader.read_exact(&mut float_buffer)?;
let mut data = Vec::with_capacity(n_events);
for i in 0..n_events {
let float_value = B::read_f32(&float_buffer[i * 4..(i + 1) * 4]) as f64;
data.push(float_value);
}
data
},
"D" => {
let mut data = Vec::with_capacity(n_events);
for i in 0..n_events {
let float_buffer = vec![0; n_events * std::mem::size_of::<f64>()];
let double = B::read_f64(&float_buffer[i * 8..(i+1) * 8]);
data.push(double);
}
data
},
"I" => {
let bits_per_param = metadata.get(&format!("$P{}B", param_idx)).unwrap().parse::<usize>().unwrap();
match bits_per_param / 8 {
2 => {
let mut data = Vec::with_capacity(n_events);
for i in 0..n_events {
let events_u16 = vec![0; n_events * std::mem::size_of::<u16>()];
let double = B::read_u16(&events_u16[i * 2..(i+1) * 2]) as f64;
data.push(double);
}
data
},
4 => {
let mut data = Vec::with_capacity(n_events);
for i in 0..n_events {
let events_u32= vec![0; n_events * std::mem::size_of::<u32>()];
let double = B::read_u16(&events_u32[i * 4..(i+1) * 4]) as f64;
data.push(double);
}
data
},
8 => {
let mut data = Vec::with_capacity(n_events);
for i in 0..n_events {
let events_u64= vec![0; n_events * std::mem::size_of::<u64>()];
let double = B::read_u16(&events_u64[i * 8..(i+1) * 8]) as f64;
data.push(double);
}
data
},
16 => {
let mut data = Vec::with_capacity(n_events);
for i in 0..n_events {
let events_u128= vec![0; n_events * std::mem::size_of::<u128>()];
let double = B::read_u16(&events_u128[i * 16..(i+1) * 16]) as f64;
data.push(double);
}
data
},
_ => return Err(FcsError::InvalidData(("Bits for param type not supported").to_string())),
}
}
_ => return Err(FcsError::InvalidData("FCS data type not supported. Must be F, D, or I".to_string()))
};
Ok(data)
}