1use crate::fcs::{EventData, Metadata, Sample};
2use atoi::atoi;
3use byteorder::ReadBytesExt;
4use derive_more::{Display, From};
5use nom::bytes::complete::{is_not, tag, take};
6use nom::combinator::map_res;
7use nom::error::ErrorKind;
8use nom::multi::fold_many1;
9use nom::sequence::{separated_pair, terminated, tuple};
10use nom::IResult;
11use regex::Regex;
12use std::collections::HashMap;
13use std::fs::File;
14use std::io::{BufReader, Read, Seek, SeekFrom};
15use std::num::ParseIntError;
16use std::ops::RangeInclusive;
17use std::path::Path;
18use std::str::FromStr;
19
20#[derive(Display, From, Debug)]
22pub enum Error {
23 #[from]
24 IO(std::io::Error),
25
26 #[display("Invalid FCS version: {}", version)]
27 InvalidVersion {
28 version: String,
29 },
30
31 #[display("Invalid file type found. File must be fcs.")]
32 InvalidFileType,
33
34 #[display("Failed to parse header segment offset.")]
35 FailedHeaderOffsetParse,
36
37 #[display("Failed to parse text segment delimiter.")]
38 FailedDelimiterParse,
39
40 #[display("Metadata and header segment offsets don't match.")]
41 MetadataOffsetMismatch,
42
43 FailedMetadataParse,
44
45 #[from]
46 FailedIntParse(ParseIntError),
47
48 InvalidMetadata,
49
50 #[display("Invalid data mode: {data_mode} for version {version}")]
51 InvalidDataMode {
52 data_mode: String,
53 version: String,
54 },
55
56 #[display("Invalid data type: {kind} for version {version}")]
57 InvalidDataType {
58 kind: String,
59 version: String,
60 },
61
62 #[display("Could not find key: {key}, in FCS metadata")]
63 MetadataKeyNotFound {
64 key: String,
65 },
66
67 NoDataFound,
68
69 #[display("Invalid bit param length: {bit_length} for parameter index {index}")]
70 InvalidParamBitLength {
71 bit_length: usize,
72 index: usize,
73 },
74
75 InvalidByteOrder {
76 byte_order: String,
77 },
78
79 #[from]
80 FromUtf8Error(std::string::FromUtf8Error),
81}
82
83type Result<T> = core::result::Result<T, Error>;
85
86pub fn read<P: AsRef<Path>>(path: P) -> Result<Sample> {
88 if path.as_ref().extension() != Some("fcs".as_ref()) {
89 return Err(Error::InvalidFileType);
90 }
91
92 let file = File::open(path)?;
93 let mut reader = BufReader::new(file);
94
95 let header = read_header(&mut reader)?;
96 let metadata = read_metadata(&mut reader, &header)?;
97 let event_data = read_event_data(&mut reader, &metadata)?;
98
99 Ok(Sample {
100 metadata,
101 event_data,
102 })
103}
104
105#[derive(Debug, PartialEq)]
107enum Version {
108 FCS3_1,
109 FCS3_0,
110}
111
112impl FromStr for Version {
113 type Err = Error;
114
115 fn from_str(s: &str) -> Result<Self> {
117 match s {
118 "FCS3.1" => Ok(Version::FCS3_1),
119 "FCS3.0" => Ok(Version::FCS3_0),
121 _ => Err(Error::InvalidVersion {
122 version: s.to_string(),
123 }),
124 }
125 }
126}
127
128impl Display for Version {
129 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
130 let str = match self {
131 Version::FCS3_1 => "FCS3.1".to_string(),
132 Version::FCS3_0 => "FCS3.0".to_string(),
133 };
134 write!(f, "{}", str)
135 }
136}
137
138struct Header {
140 version: Version,
141 text_offsets: RangeInclusive<usize>,
142 data_offsets: RangeInclusive<usize>,
143 analysis_offsets: RangeInclusive<usize>,
144}
145
146fn read_header(reader: &mut BufReader<File>) -> Result<Header> {
148 let mut version_buffer = [0u8; 6];
149 reader.read_exact(&mut version_buffer)?;
150 let version = String::from_utf8(version_buffer.to_vec())?.parse::<Version>()?;
151
152 reader.seek(SeekFrom::Current(4))?; let mut offset_buffer = [0u8; 48]; reader.read_exact(&mut offset_buffer)?;
156
157 let (offset_buffer, text_offsets) = parse_segment_offsets(&offset_buffer)?;
158 let (offset_buffer, data_offsets) = parse_segment_offsets(&offset_buffer)?;
159 let (_, analysis_offsets) = parse_segment_offsets(&offset_buffer)?;
160
161 Ok(Header {
162 version,
163 text_offsets,
164 data_offsets,
165 analysis_offsets,
166 })
167}
168
169fn parse_segment_offsets(input: &[u8]) -> Result<(&[u8], RangeInclusive<usize>)> {
171 let (input, (start, stop)) = tuple((parse_offset_bytes, parse_offset_bytes))(input)
172 .map_err(|_| Error::FailedHeaderOffsetParse)?;
173
174 Ok((input, start..=stop))
175}
176
177fn parse_offset_bytes(input: &[u8]) -> IResult<&[u8], usize> {
179 map_res(take(8usize), |bytes: &[u8]| {
180 atoi::<usize>(bytes.trim_ascii_start()).ok_or(ErrorKind::Fail)
181 })(input)
182}
183
184const DOUBLE_DELIMITER_TRANSFORM: &str = "@ESCAPED@";
188
189const REQUIRED_KEYWORDS: [&str; 12] = [
191 "$BEGINANALYSIS", "$BEGINDATA", "$BEGINSTEXT", "$BYTEORD", "$DATATYPE", "$ENDANALYSIS", "$ENDDATA", "$ENDSTEXT", "$MODE", "$NEXTDATA", "$PAR", "$TOT", ];
204
205const OPTIONAL_KEYWORDS: [&str; 31] = [
207 "$ABRT", "$BTIM", "$CELLS", "$COM", "$CSMODE", "$CSVBITS", "$CYT", "$CYTSN", "$DATE", "$ETIM", "$EXP", "$FIL", "$GATE", "$GATING", "$INST", "$LAST_MODIFIED", "$LAST_MODIFIER", "$LOST", "$OP", "$ORIGINALITY", "$PLATEID", "$PLATENAME", "$PROJ", "$SMNO", "$SPILLOVER", "$SRC", "$SYS", "$TIMESTEP", "$TR", "$VOL", "$WELLID", ];
239
240fn read_metadata(reader: &mut BufReader<File>, header: &Header) -> Result<Metadata> {
242 reader.seek(SeekFrom::Start(*header.text_offsets.start() as u64))?;
243 let mut metadata_buf = vec![0u8; *header.text_offsets.end() - *header.text_offsets.start()];
244 reader.read_exact(&mut metadata_buf)?;
245
246 let metadata_txt = String::from_utf8(metadata_buf)?;
247
248 let (metadata_txt, delimiter) =
249 parse_delimiter(&metadata_txt).map_err(|_| Error::FailedDelimiterParse)?;
250
251 let metadata_txt = metadata_txt.replace(&delimiter.repeat(2), DOUBLE_DELIMITER_TRANSFORM);
256
257 let (_, metadata) = fold_many1(
258 |input| parse_metadata_pairs(input, delimiter),
259 HashMap::new,
260 |mut acc: HashMap<String, String>, (key, value)| {
261 acc.insert(key, value);
262 acc
263 },
264 )(&metadata_txt)
265 .map_err(|_| Error::FailedMetadataParse)?;
266
267 metadata.is_valid()?;
268 cross_validate(&metadata, &header)?;
269 Ok(metadata)
270}
271
272fn parse_delimiter(input: &str) -> IResult<&str, &str> {
274 take(1u8)(input)
275}
276
277fn parse_metadata_string<'a>(input: &'a str, delimiter: &str) -> IResult<&'a str, String> {
279 map_res(is_not(delimiter), |s: &str| {
280 Ok::<String, std::io::Error>(s.replace(DOUBLE_DELIMITER_TRANSFORM, delimiter))
283 })(input)
284}
285
286fn parse_metadata_pairs<'a>(input: &'a str, delimiter: &str) -> IResult<&'a str, (String, String)> {
288 separated_pair(
289 |input| parse_metadata_string(input, delimiter), tag(delimiter), terminated(
292 |input| parse_metadata_string(input, delimiter),
294 tag(delimiter),
295 ),
296 )(input)
297}
298
299fn validate_metadata_offsets(
301 seg_start: usize,
302 seg_end: usize,
303 seg_offsets: &RangeInclusive<usize>,
304) -> Result<()> {
305 if seg_start != *seg_offsets.start() || seg_end != *seg_offsets.end() {
306 return Err(Error::InvalidMetadata);
307 }
308
309 Ok(())
310}
311
312trait GetRequiredKey {
313 fn get_required_key(&self, key: &str) -> Result<&str>;
314}
315
316impl GetRequiredKey for Metadata {
317 fn get_required_key(&self, key: &str) -> Result<&str> {
320 self.get(key)
321 .ok_or(Error::MetadataKeyNotFound {
322 key: key.to_string(),
323 })
324 .map(|s| s.as_str())
325 }
326}
327
328trait IsValid {
330 fn is_valid(&self) -> Result<()>;
331}
332
333impl IsValid for Metadata {
334 fn is_valid(&self) -> Result<()> {
336 let n_params = self.get_required_key("$PAR")?;
338
339 let n_digits = n_params.chars().count().to_string();
340 let parameter_indexed_regex = r"[PR]\d{1,".to_string() + &n_digits + "}[BENRDFGLOPSTVIW]";
341
342 let param_keywords = Regex::new(¶meter_indexed_regex).unwrap();
344
345 for key in self.keys() {
347 if !REQUIRED_KEYWORDS.contains(&key.as_str())
348 && !param_keywords.is_match(key)
349 && !OPTIONAL_KEYWORDS.contains(&key.as_str())
350 {
351 return Err(Error::InvalidMetadata);
352 }
353 }
354
355 Ok(())
356 }
357}
358
359fn cross_validate(metadata: &Metadata, header: &Header) -> Result<()> {
361 let begin_data = metadata.get_required_key("$BEGINDATA")?;
363 let end_data = metadata.get_required_key("$ENDDATA")?;
364 validate_metadata_offsets(
365 begin_data.parse::<usize>()?,
366 end_data.parse::<usize>()?,
367 &header.data_offsets,
368 )?;
369
370 let begin_analysis = metadata.get_required_key("$BEGINANALYSIS")?;
372 let end_analysis = metadata.get_required_key("$ENDANALYSIS")?;
373 validate_metadata_offsets(
374 begin_analysis.parse::<usize>()?,
375 end_analysis.parse::<usize>()?,
376 &header.analysis_offsets,
377 )?;
378
379 match header.version {
381 Version::FCS3_1 => {
382 let data_mode = metadata.get_required_key("$MODE")?;
383 if data_mode != "L" {
384 return Err(Error::InvalidDataMode {
385 data_mode: data_mode.to_string(),
386 version: header.version.to_string(),
387 });
388 }
389
390 let data_type = metadata.get_required_key("$DATATYPE")?;
391 if data_type != "I" && data_type != "F" && data_type != "D" {
392 return Err(Error::InvalidDataType {
393 kind: data_type.to_string(),
394 version: header.version.to_string(),
395 });
396 }
397 }
398 Version::FCS3_0 => {
399 todo!()
400 }
401 }
402 Ok(())
403}
404
405fn read_event_data(
407 reader: &mut BufReader<std::fs::File>,
408 metadata: &Metadata,
409) -> Result<EventData> {
410 let n_params = metadata.get_required_key("$PAR")?.parse::<usize>()?;
411 let n_events = metadata.get_required_key("$TOT")?.parse::<usize>()?;
412 let capacity = n_params * n_events;
413
414 if capacity == 0 {
415 return Err(Error::NoDataFound);
416 }
417
418 let byte_order = metadata.get_required_key("$BYTEORD")?;
419 let data_type = metadata.get_required_key("$DATATYPE")?;
420 let data_start = metadata.get_required_key("$BEGINDATA")?.parse::<u64>()?;
421
422 reader.seek(SeekFrom::Start(data_start))?;
423 let mut events: Vec<f64>;
424 let mut data: HashMap<String, Vec<f64>> = HashMap::with_capacity(n_params);
425
426 match metadata.get_required_key("$MODE")? {
427 "L" => {
429 for i in 1..=n_params {
430 match byte_order {
431 "1,2,3,4" => {
432 events = parse_events::<byteorder::LittleEndian>(
433 reader, &data_type, n_events, metadata, i,
434 )?;
435 }
436 "4,3,2,1" => {
437 events = parse_events::<byteorder::BigEndian>(
438 reader, &data_type, n_events, metadata, i,
439 )?;
440 }
441 _ => {
442 return Err(Error::InvalidByteOrder {
443 byte_order: byte_order.to_string(),
444 })
445 }
446 }
447 let id = metadata.get_required_key(&format!("$P{}N", i))?;
448 data.insert(id.to_string(), events);
449 }
450 Ok(data)
451 }
452 "H" => todo!(),
453 _ => unreachable!(),
454 }
455}
456
457fn parse_events<B: byteorder::ByteOrder>(
458 reader: &mut BufReader<std::fs::File>,
459 data_type: &str,
460 n_events: usize,
461 metadata: &Metadata,
462 index: usize,
463) -> Result<Vec<f64>> {
464 let mut data: Vec<f64> = Vec::with_capacity(n_events);
465 match data_type {
466 "I" => {
468 let bit_length = metadata
469 .get_required_key(&format!("P{}B", index))?
470 .parse::<usize>()?;
471 match bit_length {
472 16 => {
473 for _ in 0..n_events {
474 let event = reader.read_u16::<B>()? as f64;
475 data.push(event);
476 }
477 }
478 32 => {
479 for _ in 0..n_events {
480 let event = reader.read_u32::<B>()? as f64;
481 data.push(event);
482 }
483 }
484 64 => {
485 for _ in 0..n_events {
486 let event = reader.read_u64::<B>()? as f64;
487 data.push(event);
488 }
489 }
490 128 => {
491 for _ in 0..n_events {
492 let event = reader.read_u128::<B>()? as f64;
493 data.push(event);
494 }
495 }
496 _ => return Err(Error::InvalidParamBitLength { bit_length, index }),
497 }
498 }
499 "F" => {
501 for _ in 0..n_events {
502 let event = reader.read_f32::<B>()? as f64;
503 data.push(event);
504 }
505 }
506 "D" => {
508 for _ in 0..n_events {
509 let event = reader.read_f64::<B>()?;
510 data.push(event);
511 }
512 }
513 "A" => {
514 unimplemented!()
515 }
516 _ => unreachable!(),
517 }
518 Ok(data)
519}
520
521#[cfg(test)]
522mod tests {
523 use super::*;
524
525 #[test]
526 fn fcs_header_parser() -> Result<()> {
527 let file = File::open("tests/data/test_fcs_3_1.fcs")?;
528 let mut reader = BufReader::new(file);
529
530 let header = read_header(&mut reader)?;
531
532 assert_eq!(header.version, Version::FCS3_1);
533 assert_eq!(header.text_offsets, 64..=1717);
534 assert_eq!(header.data_offsets, 1718..=5201717);
535 assert_eq!(header.analysis_offsets, 0..=0);
536
537 Ok(())
538 }
539
540 #[test]
541 fn fcs_metadata_parser() {
542 let metadata_string =
543 "\\Key1\\Value1\\Escaped\\\\Key2\\Value2\\Key3\\Escaped\\\\Value3\\Key 4\\Value-4\\";
544
545 let true_metadata_map: HashMap<String, String> = HashMap::from_iter(vec![
546 ("Key1".to_string(), "Value1".to_string()),
547 ("Escaped\\Key2".to_string(), "Value2".to_string()),
548 ("Key3".to_string(), "Escaped\\Value3".to_string()),
549 ("Key 4".to_string(), "Value-4".to_string()),
550 ]);
551
552 let (metadata_string, delimiter) = parse_delimiter(&metadata_string).unwrap();
553 let metadata_string_transformed =
554 metadata_string.replace(delimiter.repeat(2).as_str(), DOUBLE_DELIMITER_TRANSFORM);
555
556 let (_, metadata) = fold_many1(
557 |input| parse_metadata_pairs(input, delimiter),
558 HashMap::new,
559 |mut acc: HashMap<String, String>, (key, value)| {
560 acc.insert(key, value);
561 acc
562 },
563 )(&metadata_string_transformed)
564 .unwrap();
565
566 assert_eq!(metadata, true_metadata_map);
567 }
568
569 #[test]
570 fn full_fcs_parser() -> Result<()> {
571 let sample = read("tests/data/test_fcs_3_1.fcs")?;
572
573 let n_params = sample.metadata.get_required_key("$PAR")?.parse::<usize>()?;
574 let n_param_vecs = sample.event_data.len();
575 assert_eq!(n_params, n_param_vecs);
576
577 let n_events = sample.metadata.get_required_key("$TOT")?.parse::<usize>()?;
578 let param_id = sample.metadata.get_required_key("$P1N")?;
579 let param_data = sample.event_data.get(param_id).unwrap();
580 assert_eq!(n_events, param_data.len());
581
582 Ok(())
583 }
584}