flowfairy_api 0.2.2

Library for processing flow cytometry data.
Documentation
//! Fcs File manipulation operations.
//!
//! This module contains basic methods to manipulate the contents of fcs files.

use std::ops::RangeInclusive;
use std::collections::HashMap;
use std::io::{self, Read, Seek};
use std::io::{BufReader, SeekFrom};
use std::fs::File;
use std::str;
use byteorder::{ReadBytesExt, LittleEndian, BigEndian};
use regex::Regex;
use serde::{Serialize, Deserialize};
use thiserror::Error;

const VALID_FCS_VERSIONS: [&[u8]; 2] = [b"FCS3.0", b"FCS3.1"];

/// Required non-parameter indexed keywords for fcs text segment
const REQUIRED_KEYWORDS: [&str; 12] = [
    "$BEGINANALYSIS", // byte-offset to the beginning of analysis segment
    "$BEGINDATA", // byte-offset of beginning of data segment
    "$BEGINSTEXT", // byte-offset to beginning of text segment
    "$BYTEORD", // byte order for data acquisition computer
    "$DATATYPE", // type of data in data segment (ASCII, int, float)
    "$ENDANALYSIS", // byte-offset to end of analysis segment
    "$ENDDATA", // byte-offset to end of data segment
    "$ENDSTEXT", // byte-offset to end of text segment
    "$MODE", // data mode (list mode - preferred, histogram - deprecated)
    "$NEXTDATA", // byte-offset to next data set in the file
    "$PAR", // number of parameters in an event
    "$TOT" // total number of events in the data set
];

/// Optional non-paramater indexed keywords
const OPTIONAL_KEYWORDS: [&str; 31] = [
    "$ABRT", // events lost due to acquisition electronic coincidence
    "$BTIM", // clock time at beginning of data acquisition
    "$CELLS", // description of objects measured
    "$COM", // comment
    "$CSMODE", // cell subset mode, number of subsets an object may belong
    "$CSVBITS", // number of bits used to encode cell subset identifier
    "$CYT", // cytometer type
    "$CYTSN", // cytometer serial number
    "$DATE", // date of data acquisition
    "$ETIM", // clock time at end of data acquisition
    "$EXP", // investigator name initiating experiment
    "$FIL", // name of data file containing data set
    "$GATE", // number of gating parameters
    "$GATING", // region combinations used for gating
    "$INST", // institution where data was acquired
    "$LAST_MODIFIED", // timestamp of last modification
    "$LAST_MODIFIER", // person performing last modification
    "$LOST", // number events lost due to computer busy
    "$OP", // name of flow cytometry operator
    "$ORIGINALITY", // information whether FCS data set has been modified or not
    "$PLATEID", // plate identifier
    "$PLATENAME", // plate name
    "$PROJ", // project name
    "$SMNO", // specimen (i.e., tube) label
    "$SPILLOVER", // spillover matrix
    "$SRC", // source of specimen (cell type, name, etc.)
    "$SYS", // type of computer and OS
    "$TIMESTEP", // time step for time parameter
    "$TR", // trigger paramter and its threshold
    "$VOL", // volume of sample run during data acquisition
    "$WELLID" // well identifier
];

// FIXME: Update unwrap or expect to return a proper error
#[derive(Debug, Error)]
pub enum FcsError {
    #[error("IO Error: {0}")]
    IoError(#[from] io::Error),
    #[error("Invalid Fcs Header. File may be corrupted or not a Fcs file.")]
    InvalidHeader,
    #[error("Fcs version `{0}` not supported. Must be either FCS3.0 or FCS3.1")]
    InvalidVersion(String),
    #[error("Invalid Fcs Metadata")]
    InvalidMetadata,
    #[error("Invalid Fcs Data: {0}")]
    InvalidData(String),
}

/// An object providing access to an fcs file.
pub struct FcsFile {
    inner: File,
}

impl FcsFile {
    /// Open an Fcs file in read-only mode.
    pub fn open(path: &str) -> Result<FcsFile, FcsError> {
        let file = File::open(path)?;

        Ok(Self { inner: file })
    }

    /// Read fcs file and return metadata and parameter data in an `FcsData` struct
    pub fn read(&self) -> Result<FcsData, FcsError> {
        let mut reader = BufReader::new(&self.inner);
        let metadata = read_metadata(&mut reader)?;
        let parameters = read_param_data(&mut reader, &metadata)?;
        let fcs_data = FcsData {
            metadata,
            parameters,
        };

        Ok(fcs_data)
    }
}

/// Contains metadata and parameter data from fcs file
#[derive(Serialize, Deserialize, Debug)]
pub struct FcsData {
    pub metadata: HashMap<String, String>,
    pub parameters: HashMap<String, Vec<f64>>,
}

/*
impl FcsData {
    fn logicle_transform(&mut self, w: usize, t: usize, m: usize, a: usize, x: usize, channels: Vec<String>, q: f64) -> Result<(), io::Error> {
        todo!("Implement transform function")
        for channel in channels.iter() {
            let mut data = self.parameters.get(channel).unwrap();
            // apply transfomration to data
            // store data in self.parameters
        }
    }
    // fn hyperlog_transform()...etc.
}
*/

/// Reads the text segment of the fcs file and returns an FcsMetadata struct
pub fn read_metadata(reader: &mut BufReader<&File>) -> Result<HashMap<String, String>, FcsError> {
    let text_offset = read_header(reader)?;
    let mut metadata: HashMap<String, String> = HashMap::new();
    // read delimiter and create regex string
    // read text segment
    let bytes_to_read = text_offset.end() - text_offset.start() - 1;
    let mut buffer = vec![0u8; bytes_to_read];
    reader.seek(SeekFrom::Start(*text_offset.start() as u64))?;
    let delimiter = reader.read_u8()? as char;
    reader.read_exact(&mut buffer)?;
    // convert text segment buffer to string
    let text = String::from_utf8(buffer)
        .map_err(|_| FcsError::InvalidMetadata)?;
    // extract keyword value pairs
    let mut keyword = String::new();
    let mut value = String::new();

    let kv_pairs = text.split(delimiter);
    
    for kv in kv_pairs {
        if kv.starts_with("$") {
            keyword = kv.to_string();
            value.clear()
        } else {
            value.push_str(kv);
            metadata.insert(keyword.clone(), value.clone());
        }
    }

    validate_text(&metadata)?;
    Ok(metadata)
}

/// Reads the header segment of the fcs file and returns an FcsHeader struct
fn read_header(reader: &mut BufReader<&File>) -> Result<RangeInclusive<usize>, FcsError> {
    let mut buffer = [0u8; 8];

    reader.read_exact(&mut buffer[..6])?;
    if !VALID_FCS_VERSIONS.contains(&&buffer[0..6]) {
        return Err(FcsError::InvalidVersion(String::from_utf8_lossy(&buffer[0..6]).to_string()));
    }

    reader.read_exact(&mut buffer[..4])?;
    if buffer.iter().filter(|&&x| x == b' ').count() != 4 {
        return Err(FcsError::InvalidHeader);
    }

    let mut offsets = [0usize; 2];
    for i in 0..2 {
        reader.read_exact(&mut buffer)?;
        let trimmed = buffer.trim_ascii();
        offsets[i] = str::from_utf8(trimmed)
            .map_err(|_| FcsError::InvalidHeader)?
            .parse::<usize>()
            .map_err(|_| FcsError::InvalidHeader)?;
    }

    Ok(offsets[0]..=offsets[1])
}

/// Check that required keys are present in the text segment
fn validate_text(text: &HashMap<String,String>) -> Result<(), FcsError> {
    let n_params = text.get("$PAR").unwrap();
    let n_digits = n_params.chars().count().to_string();
    let regex_string = r"[PR]\d{1,".to_string() + &n_digits + "}[BENRDFGLOPSTVIW]";
    let param_keywords = Regex::new(&regex_string).unwrap();

    for keyword in text.keys() {
        if !REQUIRED_KEYWORDS.contains(&keyword.as_str()) && !param_keywords.is_match(keyword) && !OPTIONAL_KEYWORDS.contains(&keyword.as_str()){
            return Err(FcsError::InvalidMetadata);
        }
    }

    Ok(())
}

/// Reads the data segment of the fcs file and returns a vector of FcsParameter structs
fn read_param_data(reader: &mut BufReader<&File>, metadata: &HashMap<String, String>) -> Result<HashMap<String, Vec<f64>>, FcsError> {
    let mode = metadata.get("$MODE").unwrap(); // just unwrap here since we already checked for all required keywords 
    if mode != "L" {
        return Err(FcsError::InvalidData("Data must be in list (L) mode".to_string()));
    }
    // same here, we already checked for these...unwrap all the things. Please don't hate me 
    let data_type = metadata.get("$DATATYPE").unwrap() as &str;
    let n_params = metadata.get("$PAR").unwrap().parse::<usize>().unwrap();
    let n_events = metadata.get("$TOT").unwrap().parse::<usize>().unwrap();
    let data_start = metadata.get("$BEGINDATA").unwrap().parse::<u64>().unwrap();
    let byte_order = metadata.get("$BYTEORD").unwrap();
    let capacity = n_params * n_events;
    if capacity == 0 {
        return Err(FcsError::InvalidData("Fcs file may be corrupted. No data found".to_string()));
    }

    reader.seek(SeekFrom::Start(data_start))?;
    let mut parameters: HashMap<String, Vec<f64>> = HashMap::new();
    let mut events = Vec::with_capacity(n_events);
    for i in 1..=n_params {
        if byte_order == "1,2,3,4" {
            events = read_events::<LittleEndian>(reader, data_type, n_events, i, metadata)?;
        } else if byte_order == "4,3,2,1" {
            events = read_events::<BigEndian>(reader, data_type, n_events, i, metadata)?;
        } else {
            return Err(FcsError::InvalidData("Could not determine byte order.".to_string()));
        }

        let id = metadata.get(&format!("$P{}S", i)).unwrap();
        parameters.insert(id.to_owned(), events);
    }

    Ok(parameters)
}

fn read_events<B: byteorder::ByteOrder>(reader: &mut BufReader<&File>, data_type: &str, n_events: usize, param_idx: usize, metadata: &HashMap<String, String>) -> Result<Vec<f64>, FcsError> {
    let data = match data_type {
        "F" => {
            let mut float_buffer = vec![0; n_events * std::mem::size_of::<f32>()];
            reader.read_exact(&mut float_buffer)?;
            let mut data = Vec::with_capacity(n_events);
            for i in 0..n_events {
                let float_value = B::read_f32(&float_buffer[i * 4..(i + 1) * 4]) as f64;
                data.push(float_value);
            }
            data
        },
        "D" => {
            let mut data = Vec::with_capacity(n_events);
            for i in 0..n_events {
                let float_buffer = vec![0; n_events * std::mem::size_of::<f64>()];
                let double = B::read_f64(&float_buffer[i * 8..(i+1) * 8]);
                data.push(double);
            }
            data
        },
        "I" => {
            // just unwrap the things since we checked they are present already.
            let bits_per_param = metadata.get(&format!("$P{}B", param_idx)).unwrap().parse::<usize>().unwrap();
            match bits_per_param / 8 {
                2 => {
                    let mut data = Vec::with_capacity(n_events);
                    for i in 0..n_events {
                        let events_u16 = vec![0; n_events * std::mem::size_of::<u16>()];
                        let double = B::read_u16(&events_u16[i * 2..(i+1) * 2]) as f64;
                        data.push(double);
                    }
                    data
                },
                4 => {
                    let mut data = Vec::with_capacity(n_events);
                    for i in 0..n_events {
                        let events_u32= vec![0; n_events * std::mem::size_of::<u32>()];
                        let double = B::read_u16(&events_u32[i * 4..(i+1) * 4]) as f64;
                        data.push(double);
                    }
                    data
                },
                8 => {
                    let mut data = Vec::with_capacity(n_events);
                    for i in 0..n_events {
                        let events_u64= vec![0; n_events * std::mem::size_of::<u64>()];
                        let double = B::read_u16(&events_u64[i * 8..(i+1) * 8]) as f64;
                        data.push(double);
                    }
                    data
                },
                16 => {
                    let mut data = Vec::with_capacity(n_events);
                    for i in 0..n_events {
                        let events_u128= vec![0; n_events * std::mem::size_of::<u128>()];
                        let double = B::read_u16(&events_u128[i * 16..(i+1) * 16]) as f64;
                        data.push(double);
                    }
                    data
                },
                _ => return Err(FcsError::InvalidData(("Bits for param type not supported").to_string())),
            }
        }
        _ => return Err(FcsError::InvalidData("FCS data type not supported. Must be F, D, or I".to_string()))
    };

    Ok(data)


}