flow_fcs/metadata.rs
1use super::{
2 byteorder::ByteOrder,
3 datatype::FcsDataType,
4 header::Header,
5 keyword::{
6 ByteKeyword, FloatKeyword, IntegerKeyword, IntegerableKeyword, Keyword,
7 KeywordCreationResult, MixedKeyword, StringKeyword, match_and_parse_keyword,
8 },
9};
10use anyhow::{Result, anyhow};
11use memmap3::Mmap;
12use regex::bytes::Regex;
13use rustc_hash::FxHashMap;
14use serde::{Deserialize, Serialize};
15use std::sync::Arc;
16use uuid::Uuid;
17pub type KeywordMap = FxHashMap<String, Keyword>;
18
19/// Contains keyword-value pairs and delimiter from the TEXT segment of an FCS file
20///
21/// The TEXT segment contains all metadata about the FCS file, including:
22/// - File information (GUID, filename, cytometer type)
23/// - Data structure information (number of events, parameters, data type, byte order)
24/// - Parameter metadata (names, labels, ranges, transforms)
25/// - Optional information (compensation matrices, timestamps, etc.)
26///
27/// Keywords are stored in a hashmap for fast lookup, with type-safe accessors
28/// for different keyword types (integer, float, string, byte, mixed).
29#[derive(Default, Debug, Clone, Serialize, Deserialize)]
30pub struct Metadata {
31 pub keywords: KeywordMap,
32 pub delimiter: char,
33}
34
35impl Metadata {
36 #[must_use]
37 pub fn new() -> Self {
38 Self {
39 keywords: FxHashMap::default(),
40 delimiter: ' ',
41 }
42 }
43 /// Prints all keywords sorted alphabetically by key name
44 ///
45 /// This is a debugging utility that displays all keyword-value pairs
46 /// in the metadata, sorted for easy reading.
47 pub fn print_sorted_by_keyword(&self) {
48 // Step 1: Get a Vector from existing text HashMap.
49 let mut sorted: Vec<_> = self.keywords.iter().collect();
50
51 // Step 2: sort Vector by key from HashMap.
52 // ... This sorts by HashMap keys.
53 // Each tuple is sorted by its first item [.0] (the key).
54 sorted.sort_by_key(|a| a.0);
55
56 // Step 3: loop over sorted vector.
57 for (key, value) in &sorted {
58 println!("{key}: {value}");
59 }
60 }
61 /// Reads the text segment of the fcs file and returns an `Metadata` struct
62 ///
63 /// Uses memchr for fast delimiter finding (5-10x faster than byte-by-byte iteration)
64 #[must_use]
65 pub fn from_mmap(mmap: &Mmap, header: &Header) -> Self {
66 let text_start = header.text_offset.start();
67
68 // Read the first byte of the text segment to determine the delimiter:
69 let delimiter = mmap[*text_start];
70
71 // Determine the number of bytes to read, excluding the delimiter:
72 let text_end = header.text_offset.end();
73 let text_slice = &mmap[(*text_start + 1)..*text_end];
74
75 // Extract keyword value pairs using memchr for fast delimiter finding
76 let mut keywords: KeywordMap = FxHashMap::default();
77
78 // Find all delimiter positions using SIMD-accelerated search
79 // This is 5-10x faster than manual iteration
80 let delimiter_positions: Vec<usize> = memchr::memchr_iter(delimiter, text_slice).collect();
81
82 // Parse keyword-value pairs
83 // FCS format: |KEY1|VALUE1|KEY2|VALUE2|...
84 // delimiter_positions gives us the split points
85 let mut prev_pos = 0;
86 let mut is_keyword = true;
87 let mut current_key = String::new();
88
89 for &pos in &delimiter_positions {
90 // Extract the slice between delimiters
91 let segment = &text_slice[prev_pos..pos];
92
93 // SAFETY: FCS spec requires TEXT segment to be ASCII/UTF-8
94 let text = std::str::from_utf8(segment).unwrap_or_default();
95
96 if is_keyword {
97 // This is a keyword
98 current_key = text.to_string();
99 is_keyword = false;
100 } else {
101 // This is a value - parse and store the keyword-value pair
102 if !current_key.is_empty() {
103 // Normalize key: ensure it has $ prefix (FCS spec requires it)
104 // Store with $ prefix for consistent lookups
105 let normalized_key: String = if current_key.starts_with('$') {
106 current_key.clone()
107 } else {
108 format!("${}", current_key)
109 };
110
111 match match_and_parse_keyword(¤t_key, text) {
112 KeywordCreationResult::Int(int_keyword) => {
113 keywords.insert(normalized_key.clone(), Keyword::Int(int_keyword));
114 }
115 KeywordCreationResult::Float(float_keyword) => {
116 keywords.insert(normalized_key.clone(), Keyword::Float(float_keyword));
117 }
118 KeywordCreationResult::String(string_keyword) => {
119 keywords
120 .insert(normalized_key.clone(), Keyword::String(string_keyword));
121 }
122 KeywordCreationResult::Byte(byte_keyword) => {
123 keywords.insert(normalized_key.clone(), Keyword::Byte(byte_keyword));
124 }
125 KeywordCreationResult::Mixed(mixed_keyword) => {
126 keywords.insert(normalized_key.clone(), Keyword::Mixed(mixed_keyword));
127 }
128 KeywordCreationResult::UnableToParse => {
129 eprintln!(
130 "Unable to parse keyword: {} with value: {}",
131 current_key, text
132 );
133 }
134 }
135 }
136 current_key.clear();
137 is_keyword = true;
138 }
139
140 prev_pos = pos + 1;
141 }
142
143 Self {
144 keywords,
145 delimiter: delimiter as char,
146 }
147 }
148
149 /// Check that required keys are present in the TEXT segment of the metadata
150 /// # Errors
151 /// Will return `Err` if:
152 /// - any of the required keywords are missing from the keywords hashmap
153 /// - the number of parameters can't be obtained from the $PAR keyword in the TEXT section
154 /// - any keyword has a Pn[X] value where n is greater than the number of parameters indicated by the $PAR keyword
155 pub fn validate_text_segment_keywords(&self, header: &Header) -> Result<()> {
156 println!("Validating FCS file...{}", header.version);
157 let required_keywords = header.version.get_required_keywords();
158 for keyword in required_keywords {
159 if !self.keywords.contains_key(*keyword) {
160 // println!("Invalid FCS file: Missing keyword: {:#?}", self.keywords);
161 return Err(anyhow!("Invalid FCS file: Missing keyword: {}", keyword));
162 }
163 }
164
165 Ok(())
166 }
167
168 /// Validates if a GUID is present in the file's metadata, and if not, generates a new one.
169 pub fn validate_guid(&mut self) {
170 if self.get_string_keyword("GUID").is_err() {
171 self.insert_string_keyword("GUID".to_string(), Uuid::new_v4().to_string());
172 }
173 }
174
175 /// Confirm that no stored keyword has a value greater than the $PAR keyword indicates
176 #[allow(unused)]
177 fn validate_number_of_parameters(&self) -> Result<()> {
178 let n_params = self.get_number_of_parameters()?;
179 let n_params_string = n_params.to_string();
180 let n_digits = n_params_string.chars().count().to_string();
181 let regex_string = r"[PR]\d{1,".to_string() + &n_digits + "}[BENRDFGLOPSTVIW]";
182 let param_keywords = Regex::new(®ex_string)?;
183
184 for keyword in self.keywords.keys() {
185 if !param_keywords.is_match(keyword.as_bytes()) {
186 continue; // Skip to the next iteration if the keyword doesn't match
187 }
188
189 // If the keyword starts with a $P, then the value of the next non-terminal characters should be less than or equal to the number of parameters
190 if keyword.starts_with("$P") {
191 let param_number = keyword
192 .chars()
193 .nth(1)
194 .expect("should have a second character in {keyword}")
195 .to_digit(10)
196 .expect("should be able to convert the character to a digit to count the parameters") as usize;
197 if param_number > *n_params {
198 return Err(anyhow!(
199 "Invalid FCS file: {} keyword value exceeds number of parameters",
200 keyword
201 ));
202 }
203 }
204 }
205
206 Ok(())
207 }
208 /// Generic function to get the unwrapped unsigned integer value associated with a numeric keyword (e.g. $PAR, $TOT, etc.)
209 fn get_keyword_value_as_usize(&self, keyword: &str) -> Result<&usize> {
210 Ok(self.get_integer_keyword(keyword)?.get_usize())
211 }
212
213 /// Return the number of parameters in the file from the $PAR keyword in the metadata TEXT section
214 /// # Errors
215 /// Will return `Err` if the $PAR keyword is not present in the metadata keywords hashmap
216 pub fn get_number_of_parameters(&self) -> Result<&usize> {
217 self.get_keyword_value_as_usize("$PAR")
218 }
219
220 /// Return the number of events in the file from the $TOT keyword in the metadata TEXT section
221 /// # Errors
222 /// Will return `Err` if the $TOT keyword is not present in the metadata keywords hashmap
223 pub fn get_number_of_events(&self) -> Result<&usize> {
224 self.get_keyword_value_as_usize("$TOT")
225 }
226
227 /// Return the data type from the $DATATYPE keyword in the metadata TEXT section, unwraps and returns it if it exists.
228 /// # Errors
229 /// Will return `Err` if the $DATATYPE keyword is not present in the metadata keywords hashmap
230 pub fn get_data_type(&self) -> Result<&FcsDataType> {
231 let keyword = self.get_byte_keyword("$DATATYPE")?;
232 if let ByteKeyword::DATATYPE(data_type) = keyword {
233 Ok(data_type)
234 } else {
235 Err(anyhow!("No $DATATYPE value stored."))
236 }
237 }
238
239 /// Get the data type for a specific channel/parameter (FCS 3.2+)
240 ///
241 /// First checks for `$PnDATATYPE` keyword to see if this parameter has a specific data type override.
242 /// If not found, falls back to the default `$DATATYPE` keyword.
243 ///
244 /// # Arguments
245 /// * `parameter_number` - 1-based parameter index
246 ///
247 /// # Errors
248 /// Will return `Err` if neither `$PnDATATYPE` nor `$DATATYPE` is present
249 pub fn get_data_type_for_channel(&self, parameter_number: usize) -> Result<FcsDataType> {
250 // First try to get parameter-specific data type (FCS 3.2+)
251 if let Ok(pn_datatype_keyword) =
252 self.get_parameter_numeric_metadata(parameter_number, "DATATYPE")
253 {
254 if let IntegerKeyword::PnDATATYPE(datatype_code) = pn_datatype_keyword {
255 // Map datatype code to enum: 0=I, 1=F, 2=D
256 match datatype_code {
257 0 => Ok(FcsDataType::I),
258 1 => Ok(FcsDataType::F),
259 2 => Ok(FcsDataType::D),
260 _ => Err(anyhow!(
261 "Invalid $P{}DATATYPE code: {}",
262 parameter_number,
263 datatype_code
264 )),
265 }
266 } else {
267 // Shouldn't happen, but fall back to default
268 Ok(self.get_data_type()?.clone())
269 }
270 } else {
271 // Fall back to default $DATATYPE
272 Ok(self.get_data_type()?.clone())
273 }
274 }
275
276 /// Calculate the total bytes per event by summing bytes per parameter
277 ///
278 /// Uses `$PnB` (bits per parameter) divided by 8 to get bytes per parameter,
279 /// then sums across all parameters. This is more accurate than using `$DATATYPE`
280 /// which only provides a default value.
281 ///
282 /// # Errors
283 /// Will return `Err` if the number of parameters cannot be determined or
284 /// if any required `$PnB` keyword is missing
285 pub fn calculate_bytes_per_event(&self) -> Result<usize> {
286 let number_of_parameters = self.get_number_of_parameters()?;
287 let mut total_bytes = 0;
288
289 for param_num in 1..=*number_of_parameters {
290 // Get $PnB (bits per parameter)
291 let bits = self.get_parameter_numeric_metadata(param_num, "B")?;
292 if let IntegerKeyword::PnB(bits_value) = bits {
293 // Convert bits to bytes (round up if not divisible by 8)
294 let bytes = (bits_value + 7) / 8;
295 total_bytes += bytes;
296 } else {
297 return Err(anyhow!(
298 "$P{}B keyword found but is not the expected PnB variant",
299 param_num
300 ));
301 }
302 }
303
304 Ok(total_bytes)
305 }
306
307 /// Get bytes per parameter for a specific channel
308 ///
309 /// Uses `$PnB` (bits per parameter) divided by 8 to get bytes per parameter.
310 ///
311 /// # Arguments
312 /// * `parameter_number` - 1-based parameter index
313 ///
314 /// # Errors
315 /// Will return `Err` if the `$PnB` keyword is missing for this parameter
316 pub fn get_bytes_per_parameter(&self, parameter_number: usize) -> Result<usize> {
317 let bits = self.get_parameter_numeric_metadata(parameter_number, "B")?;
318 if let IntegerKeyword::PnB(bits_value) = bits {
319 // Convert bits to bytes (round up if not divisible by 8)
320 Ok((bits_value + 7) / 8)
321 } else {
322 Err(anyhow!(
323 "$P{}B keyword found but is not the expected PnB variant",
324 parameter_number
325 ))
326 }
327 }
328
329 /// Return the byte order from the $BYTEORD keyword in the metadata TEXT section, unwraps and returns it if it exists.
330 /// # Errors
331 /// Will return `Err` if the $BYTEORD keyword is not present in the keywords hashmap
332 pub fn get_byte_order(&self) -> Result<&ByteOrder> {
333 let keyword = self.get_byte_keyword("$BYTEORD")?;
334 if let ByteKeyword::BYTEORD(byte_order) = keyword {
335 Ok(byte_order)
336 } else {
337 Err(anyhow!("No $BYTEORD value stored."))
338 }
339 }
340 /// Returns a keyword that holds numeric data from the keywords hashmap, if it exists
341 /// # Errors
342 /// Will return `Err` if the keyword is not present in the keywords hashmap
343 pub fn get_integer_keyword(&self, keyword: &str) -> Result<&IntegerKeyword> {
344 if let Some(keyword) = self.keywords.get(keyword) {
345 match keyword {
346 Keyword::Int(integer) => Ok(integer),
347 _ => Err(anyhow!("Keyword is not integer variant")),
348 }
349 } else {
350 Err(anyhow!("No {keyword} keyword stored."))
351 }
352 }
353
354 /// Returns a keyword that holds numeric data from the keywords hashmap, if it exists
355 /// # Errors
356 /// Will return `Err` if the keyword is not present in the keywords hashmap
357 pub fn get_float_keyword(&self, keyword: &str) -> Result<&FloatKeyword> {
358 if let Some(keyword) = self.keywords.get(keyword) {
359 match keyword {
360 Keyword::Float(float) => Ok(float),
361 _ => Err(anyhow!("Keyword is not float variant")),
362 }
363 } else {
364 Err(anyhow!("No {keyword} keyword stored."))
365 }
366 }
367
368 /// Returns a keyword that holds string data from the keywords hashmap, if it exists
369 /// # Errors
370 /// Will return `Err` if the keyword is not present in the keywords hashmap
371 pub fn get_string_keyword(&self, keyword: &str) -> Result<&StringKeyword> {
372 if let Some(keyword) = self.keywords.get(keyword) {
373 match keyword {
374 Keyword::String(string) => Ok(string),
375 _ => Err(anyhow!("Keyword is not a string variant")),
376 }
377 } else {
378 Err(anyhow!("No {keyword} keyword stored."))
379 }
380 }
381
382 /// Returns a keyword that holds byte-orientation data from the keywords hashmap, if it exists
383 /// # Errors
384 /// Will return `Err` if the keyword is not present in the keywords hashmap
385 pub fn get_byte_keyword(&self, keyword: &str) -> Result<&ByteKeyword> {
386 if let Some(keyword) = self.keywords.get(keyword) {
387 match keyword {
388 Keyword::Byte(byte) => Ok(byte),
389 _ => Err(anyhow!("Keyword is not a byte variant")),
390 }
391 } else {
392 Err(anyhow!("No {keyword} keyword stored."))
393 }
394 }
395
396 /// Returns a keyword that holds mixed data from the keywords hashmap, if it exists
397 /// # Errors
398 /// Will return `Err` if the keyword is not present in the keywords hashmap
399 pub fn get_mixed_keyword(&self, keyword: &str) -> Result<&MixedKeyword> {
400 if let Some(keyword) = self.keywords.get(keyword) {
401 match keyword {
402 Keyword::Mixed(mixed) => Ok(mixed),
403 _ => Err(anyhow!("Keyword is not a mixed variant")),
404 }
405 } else {
406 Err(anyhow!("No {keyword} keyword stored."))
407 }
408 }
409
410 /// General function to get a given parameter's string keyword from the file's metadata (e.g. `$PnN` or `$PnS`)
411 /// # Errors
412 /// Will return `Err` if the keyword is not present in the keywords hashmap
413 pub fn get_parameter_string_metadata(
414 &self,
415 parameter_number: usize,
416 suffix: &str,
417 ) -> Result<&StringKeyword> {
418 // Interpolate the parameter number into the keyword:
419 let keyword = format!("$P{parameter_number}{suffix}");
420 self.get_string_keyword(&keyword)
421 }
422
423 /// Generic function to get a given parameter's integer keyword from the file's metadata (e.g. `$PnN`, `$PnS`, `$PnDATATYPE`)
424 /// # Errors
425 /// Will return `Err` if the keyword is not present in the keywords hashmap
426 pub fn get_parameter_numeric_metadata(
427 &self,
428 parameter_number: usize,
429 suffix: &str,
430 ) -> Result<&IntegerKeyword> {
431 // Interpolate the parameter number into the keyword:
432 let keyword = format!("$P{parameter_number}{suffix}");
433 self.get_integer_keyword(&keyword)
434 }
435
436 /// Get excitation wavelength(s) for a parameter from `$PnL` keyword
437 /// Returns the first wavelength if multiple are present (for co-axial lasers)
438 /// # Errors
439 /// Will return `Err` if the keyword is not present in the keywords hashmap
440 pub fn get_parameter_excitation_wavelength(
441 &self,
442 parameter_number: usize,
443 ) -> Result<Option<usize>> {
444 let keyword = format!("$P{parameter_number}L");
445
446 // Try as integer keyword first (older FCS format)
447 if let Ok(int_keyword) = self.get_integer_keyword(&keyword) {
448 if let IntegerKeyword::PnL(wavelength) = int_keyword {
449 return Ok(Some(*wavelength));
450 }
451 }
452
453 // Try as mixed keyword (FCS 3.1+ format, can have multiple wavelengths)
454 if let Ok(mixed_keyword) = self.get_mixed_keyword(&keyword) {
455 if let MixedKeyword::PnL(wavelengths) = mixed_keyword {
456 // Return the first wavelength if multiple are present
457 return Ok(wavelengths.first().copied());
458 }
459 }
460
461 Ok(None)
462 }
463
464 /// Return the name of the parameter's channel from the `$PnN` keyword in the metadata TEXT section, where `n` is the provided parameter index (1-based)
465 /// # Errors
466 /// Will return `Err` if the keyword is not present in the keywords hashmap
467 pub fn get_parameter_channel_name(&self, parameter_number: usize) -> Result<&str> {
468 if let StringKeyword::PnN(name) =
469 self.get_parameter_string_metadata(parameter_number, "N")?
470 {
471 Ok(name.as_ref())
472 } else {
473 Err(anyhow!(
474 "$P{parameter_number}N keyword not found in metadata TEXT section",
475 ))
476 }
477 }
478
479 /// Return the label name of the parameter from the `$PnS` keyword in the metadata TEXT section, where `n` is the provided parameter number
480 /// # Errors
481 /// Will return `Err` if the keyword is not present in the keywords hashmap
482 pub fn get_parameter_label(&self, parameter_number: usize) -> Result<&str> {
483 if let StringKeyword::PnS(label) =
484 self.get_parameter_string_metadata(parameter_number, "S")?
485 {
486 Ok(label.as_ref())
487 } else {
488 Err(anyhow!(
489 "$P{parameter_number}S keyword not found in metadata TEXT section",
490 ))
491 }
492 }
493
494 /// Transform the metadata keywords hashmap into a JSON object via serde
495 /// # Errors
496 /// Will return `Err` if the metadata keywords hashmap is empty
497 pub fn get_metadata_as_json_string(&self) -> Result<String> {
498 if self.keywords.is_empty() {
499 Err(anyhow!("No metadata keywords stored."))
500 } else {
501 let json = serde_json::to_string(&self.keywords)?;
502 Ok(json)
503 }
504 }
505
506 /// Insert or update a string keyword in the metadata
507 pub fn insert_string_keyword(&mut self, key: String, value: String) {
508 let normalized_key = if key.starts_with('$') {
509 key
510 } else {
511 format!("${key}")
512 };
513
514 let parsed = match_and_parse_keyword(&normalized_key, value.as_str());
515 let string_keyword = match parsed {
516 KeywordCreationResult::String(string_keyword) => string_keyword,
517 // If parsing fails (or parses to a non-string keyword), fall back to `Other`.
518 _ => StringKeyword::Other(Arc::from(value)),
519 };
520
521 self.keywords
522 .insert(normalized_key, Keyword::String(string_keyword));
523 }
524}