flow_fcs/
metadata.rs

1use super::{
2    byteorder::ByteOrder,
3    datatype::FcsDataType,
4    header::Header,
5    keyword::{
6        ByteKeyword, FloatKeyword, IntegerKeyword, IntegerableKeyword, Keyword,
7        KeywordCreationResult, MixedKeyword, StringKeyword, match_and_parse_keyword,
8    },
9};
10use anyhow::{Result, anyhow};
11use memmap3::Mmap;
12use regex::bytes::Regex;
13use rustc_hash::FxHashMap;
14use serde::{Deserialize, Serialize};
15use std::sync::Arc;
16use uuid::Uuid;
17pub type KeywordMap = FxHashMap<String, Keyword>;
18
19/// Contains keyword-value pairs and delimiter from the TEXT segment of an FCS file
20///
21/// The TEXT segment contains all metadata about the FCS file, including:
22/// - File information (GUID, filename, cytometer type)
23/// - Data structure information (number of events, parameters, data type, byte order)
24/// - Parameter metadata (names, labels, ranges, transforms)
25/// - Optional information (compensation matrices, timestamps, etc.)
26///
27/// Keywords are stored in a hashmap for fast lookup, with type-safe accessors
28/// for different keyword types (integer, float, string, byte, mixed).
29#[derive(Default, Debug, Clone, Serialize, Deserialize)]
30pub struct Metadata {
31    pub keywords: KeywordMap,
32    pub delimiter: char,
33}
34
35impl Metadata {
36    #[must_use]
37    pub fn new() -> Self {
38        Self {
39            keywords: FxHashMap::default(),
40            delimiter: ' ',
41        }
42    }
43    /// Prints all keywords sorted alphabetically by key name
44    ///
45    /// This is a debugging utility that displays all keyword-value pairs
46    /// in the metadata, sorted for easy reading.
47    pub fn print_sorted_by_keyword(&self) {
48        // Step 1: Get a Vector from existing text HashMap.
49        let mut sorted: Vec<_> = self.keywords.iter().collect();
50
51        // Step 2: sort Vector by key from HashMap.
52        // ... This sorts by HashMap keys.
53        //     Each tuple is sorted by its first item [.0] (the key).
54        sorted.sort_by_key(|a| a.0);
55
56        // Step 3: loop over sorted vector.
57        for (key, value) in &sorted {
58            println!("{key}: {value}");
59        }
60    }
61    /// Reads the text segment of the fcs file and returns an `Metadata` struct
62    ///
63    /// Uses memchr for fast delimiter finding (5-10x faster than byte-by-byte iteration)
64    #[must_use]
65    pub fn from_mmap(mmap: &Mmap, header: &Header) -> Self {
66        let text_start = header.text_offset.start();
67
68        // Read the first byte of the text segment to determine the delimiter:
69        let delimiter = mmap[*text_start];
70
71        // Determine the number of bytes to read, excluding the delimiter:
72        let text_end = header.text_offset.end();
73        let text_slice = &mmap[(*text_start + 1)..*text_end];
74
75        // Extract keyword value pairs using memchr for fast delimiter finding
76        let mut keywords: KeywordMap = FxHashMap::default();
77
78        // Find all delimiter positions using SIMD-accelerated search
79        // This is 5-10x faster than manual iteration
80        let delimiter_positions: Vec<usize> = memchr::memchr_iter(delimiter, text_slice).collect();
81
82        // Parse keyword-value pairs
83        // FCS format: |KEY1|VALUE1|KEY2|VALUE2|...
84        // delimiter_positions gives us the split points
85        let mut prev_pos = 0;
86        let mut is_keyword = true;
87        let mut current_key = String::new();
88
89        for &pos in &delimiter_positions {
90            // Extract the slice between delimiters
91            let segment = &text_slice[prev_pos..pos];
92
93            // SAFETY: FCS spec requires TEXT segment to be ASCII/UTF-8
94            let text = std::str::from_utf8(segment).unwrap_or_default();
95
96            if is_keyword {
97                // This is a keyword
98                current_key = text.to_string();
99                is_keyword = false;
100            } else {
101                // This is a value - parse and store the keyword-value pair
102                if !current_key.is_empty() {
103                    // Normalize key: ensure it has $ prefix (FCS spec requires it)
104                    // Store with $ prefix for consistent lookups
105                    let normalized_key: String = if current_key.starts_with('$') {
106                        current_key.clone()
107                    } else {
108                        format!("${}", current_key)
109                    };
110
111                    match match_and_parse_keyword(&current_key, text) {
112                        KeywordCreationResult::Int(int_keyword) => {
113                            keywords.insert(normalized_key.clone(), Keyword::Int(int_keyword));
114                        }
115                        KeywordCreationResult::Float(float_keyword) => {
116                            keywords.insert(normalized_key.clone(), Keyword::Float(float_keyword));
117                        }
118                        KeywordCreationResult::String(string_keyword) => {
119                            keywords
120                                .insert(normalized_key.clone(), Keyword::String(string_keyword));
121                        }
122                        KeywordCreationResult::Byte(byte_keyword) => {
123                            keywords.insert(normalized_key.clone(), Keyword::Byte(byte_keyword));
124                        }
125                        KeywordCreationResult::Mixed(mixed_keyword) => {
126                            keywords.insert(normalized_key.clone(), Keyword::Mixed(mixed_keyword));
127                        }
128                        KeywordCreationResult::UnableToParse => {
129                            eprintln!(
130                                "Unable to parse keyword: {} with value: {}",
131                                current_key, text
132                            );
133                        }
134                    }
135                }
136                current_key.clear();
137                is_keyword = true;
138            }
139
140            prev_pos = pos + 1;
141        }
142
143        Self {
144            keywords,
145            delimiter: delimiter as char,
146        }
147    }
148
149    /// Check that required keys are present in the TEXT segment of the metadata
150    /// # Errors
151    /// Will return `Err` if:
152    /// - any of the required keywords are missing from the keywords hashmap
153    /// - the number of parameters can't be obtained from the $PAR keyword in the TEXT section
154    /// - any keyword has a Pn[X] value where n is greater than the number of parameters indicated by the $PAR keyword
155    pub fn validate_text_segment_keywords(&self, header: &Header) -> Result<()> {
156        println!("Validating FCS file...{}", header.version);
157        let required_keywords = header.version.get_required_keywords();
158        for keyword in required_keywords {
159            if !self.keywords.contains_key(*keyword) {
160                // println!("Invalid FCS file: Missing keyword: {:#?}", self.keywords);
161                return Err(anyhow!("Invalid FCS file: Missing keyword: {}", keyword));
162            }
163        }
164
165        Ok(())
166    }
167
168    /// Validates if a GUID is present in the file's metadata, and if not, generates a new one.
169    pub fn validate_guid(&mut self) {
170        if self.get_string_keyword("GUID").is_err() {
171            self.insert_string_keyword("GUID".to_string(), Uuid::new_v4().to_string());
172        }
173    }
174
175    /// Confirm that no stored keyword has a value greater than the $PAR keyword indicates
176    #[allow(unused)]
177    fn validate_number_of_parameters(&self) -> Result<()> {
178        let n_params = self.get_number_of_parameters()?;
179        let n_params_string = n_params.to_string();
180        let n_digits = n_params_string.chars().count().to_string();
181        let regex_string = r"[PR]\d{1,".to_string() + &n_digits + "}[BENRDFGLOPSTVIW]";
182        let param_keywords = Regex::new(&regex_string)?;
183
184        for keyword in self.keywords.keys() {
185            if !param_keywords.is_match(keyword.as_bytes()) {
186                continue; // Skip to the next iteration if the keyword doesn't match
187            }
188
189            // If the keyword starts with a $P, then the value of the next non-terminal characters should be less than or equal to the number of parameters
190            if keyword.starts_with("$P") {
191                let param_number = keyword
192                    .chars()
193                    .nth(1)
194                    .expect("should have a second character in {keyword}")
195                    .to_digit(10)
196                    .expect("should be able to convert the character to a digit to count the parameters") as usize;
197                if param_number > *n_params {
198                    return Err(anyhow!(
199                        "Invalid FCS file: {} keyword value exceeds number of parameters",
200                        keyword
201                    ));
202                }
203            }
204        }
205
206        Ok(())
207    }
208    /// Generic function to get the unwrapped unsigned integer value associated with a numeric keyword (e.g. $PAR, $TOT, etc.)
209    fn get_keyword_value_as_usize(&self, keyword: &str) -> Result<&usize> {
210        Ok(self.get_integer_keyword(keyword)?.get_usize())
211    }
212
213    /// Return the number of parameters in the file from the $PAR keyword in the metadata TEXT section
214    /// # Errors
215    /// Will return `Err` if the $PAR keyword is not present in the metadata keywords hashmap
216    pub fn get_number_of_parameters(&self) -> Result<&usize> {
217        self.get_keyword_value_as_usize("$PAR")
218    }
219
220    /// Return the number of events in the file from the $TOT keyword in the metadata TEXT section
221    /// # Errors
222    /// Will return `Err` if the $TOT keyword is not present in the metadata keywords hashmap
223    pub fn get_number_of_events(&self) -> Result<&usize> {
224        self.get_keyword_value_as_usize("$TOT")
225    }
226
227    /// Return the data type from the $DATATYPE keyword in the metadata TEXT section, unwraps and returns it if it exists.
228    /// # Errors
229    /// Will return `Err` if the $DATATYPE keyword is not present in the metadata keywords hashmap
230    pub fn get_data_type(&self) -> Result<&FcsDataType> {
231        let keyword = self.get_byte_keyword("$DATATYPE")?;
232        if let ByteKeyword::DATATYPE(data_type) = keyword {
233            Ok(data_type)
234        } else {
235            Err(anyhow!("No $DATATYPE value stored."))
236        }
237    }
238
239    /// Get the data type for a specific channel/parameter (FCS 3.2+)
240    ///
241    /// First checks for `$PnDATATYPE` keyword to see if this parameter has a specific data type override.
242    /// If not found, falls back to the default `$DATATYPE` keyword.
243    ///
244    /// # Arguments
245    /// * `parameter_number` - 1-based parameter index
246    ///
247    /// # Errors
248    /// Will return `Err` if neither `$PnDATATYPE` nor `$DATATYPE` is present
249    pub fn get_data_type_for_channel(&self, parameter_number: usize) -> Result<FcsDataType> {
250        // First try to get parameter-specific data type (FCS 3.2+)
251        if let Ok(pn_datatype_keyword) =
252            self.get_parameter_numeric_metadata(parameter_number, "DATATYPE")
253        {
254            if let IntegerKeyword::PnDATATYPE(datatype_code) = pn_datatype_keyword {
255                // Map datatype code to enum: 0=I, 1=F, 2=D
256                match datatype_code {
257                    0 => Ok(FcsDataType::I),
258                    1 => Ok(FcsDataType::F),
259                    2 => Ok(FcsDataType::D),
260                    _ => Err(anyhow!(
261                        "Invalid $P{}DATATYPE code: {}",
262                        parameter_number,
263                        datatype_code
264                    )),
265                }
266            } else {
267                // Shouldn't happen, but fall back to default
268                Ok(self.get_data_type()?.clone())
269            }
270        } else {
271            // Fall back to default $DATATYPE
272            Ok(self.get_data_type()?.clone())
273        }
274    }
275
276    /// Calculate the total bytes per event by summing bytes per parameter
277    ///
278    /// Uses `$PnB` (bits per parameter) divided by 8 to get bytes per parameter,
279    /// then sums across all parameters. This is more accurate than using `$DATATYPE`
280    /// which only provides a default value.
281    ///
282    /// # Errors
283    /// Will return `Err` if the number of parameters cannot be determined or
284    /// if any required `$PnB` keyword is missing
285    pub fn calculate_bytes_per_event(&self) -> Result<usize> {
286        let number_of_parameters = self.get_number_of_parameters()?;
287        let mut total_bytes = 0;
288
289        for param_num in 1..=*number_of_parameters {
290            // Get $PnB (bits per parameter)
291            let bits = self.get_parameter_numeric_metadata(param_num, "B")?;
292            if let IntegerKeyword::PnB(bits_value) = bits {
293                // Convert bits to bytes (round up if not divisible by 8)
294                let bytes = (bits_value + 7) / 8;
295                total_bytes += bytes;
296            } else {
297                return Err(anyhow!(
298                    "$P{}B keyword found but is not the expected PnB variant",
299                    param_num
300                ));
301            }
302        }
303
304        Ok(total_bytes)
305    }
306
307    /// Get bytes per parameter for a specific channel
308    ///
309    /// Uses `$PnB` (bits per parameter) divided by 8 to get bytes per parameter.
310    ///
311    /// # Arguments
312    /// * `parameter_number` - 1-based parameter index
313    ///
314    /// # Errors
315    /// Will return `Err` if the `$PnB` keyword is missing for this parameter
316    pub fn get_bytes_per_parameter(&self, parameter_number: usize) -> Result<usize> {
317        let bits = self.get_parameter_numeric_metadata(parameter_number, "B")?;
318        if let IntegerKeyword::PnB(bits_value) = bits {
319            // Convert bits to bytes (round up if not divisible by 8)
320            Ok((bits_value + 7) / 8)
321        } else {
322            Err(anyhow!(
323                "$P{}B keyword found but is not the expected PnB variant",
324                parameter_number
325            ))
326        }
327    }
328
329    /// Return the byte order from the $BYTEORD keyword in the metadata TEXT section, unwraps and returns it if it exists.
330    /// # Errors
331    /// Will return `Err` if the $BYTEORD keyword is not present in the keywords hashmap
332    pub fn get_byte_order(&self) -> Result<&ByteOrder> {
333        let keyword = self.get_byte_keyword("$BYTEORD")?;
334        if let ByteKeyword::BYTEORD(byte_order) = keyword {
335            Ok(byte_order)
336        } else {
337            Err(anyhow!("No $BYTEORD value stored."))
338        }
339    }
340    /// Returns a keyword that holds numeric data from the keywords hashmap, if it exists
341    /// # Errors
342    /// Will return `Err` if the keyword is not present in the keywords hashmap
343    pub fn get_integer_keyword(&self, keyword: &str) -> Result<&IntegerKeyword> {
344        if let Some(keyword) = self.keywords.get(keyword) {
345            match keyword {
346                Keyword::Int(integer) => Ok(integer),
347                _ => Err(anyhow!("Keyword is not integer variant")),
348            }
349        } else {
350            Err(anyhow!("No {keyword} keyword stored."))
351        }
352    }
353
354    /// Returns a keyword that holds numeric data from the keywords hashmap, if it exists
355    /// # Errors
356    /// Will return `Err` if the keyword is not present in the keywords hashmap
357    pub fn get_float_keyword(&self, keyword: &str) -> Result<&FloatKeyword> {
358        if let Some(keyword) = self.keywords.get(keyword) {
359            match keyword {
360                Keyword::Float(float) => Ok(float),
361                _ => Err(anyhow!("Keyword is not float variant")),
362            }
363        } else {
364            Err(anyhow!("No {keyword} keyword stored."))
365        }
366    }
367
368    /// Returns a keyword that holds string data from the keywords hashmap, if it exists
369    /// # Errors
370    /// Will return `Err` if the keyword is not present in the keywords hashmap
371    pub fn get_string_keyword(&self, keyword: &str) -> Result<&StringKeyword> {
372        if let Some(keyword) = self.keywords.get(keyword) {
373            match keyword {
374                Keyword::String(string) => Ok(string),
375                _ => Err(anyhow!("Keyword is not a string variant")),
376            }
377        } else {
378            Err(anyhow!("No {keyword} keyword stored."))
379        }
380    }
381
382    /// Returns a keyword that holds byte-orientation data from the keywords hashmap, if it exists
383    /// # Errors
384    /// Will return `Err` if the keyword is not present in the keywords hashmap
385    pub fn get_byte_keyword(&self, keyword: &str) -> Result<&ByteKeyword> {
386        if let Some(keyword) = self.keywords.get(keyword) {
387            match keyword {
388                Keyword::Byte(byte) => Ok(byte),
389                _ => Err(anyhow!("Keyword is not a byte variant")),
390            }
391        } else {
392            Err(anyhow!("No {keyword} keyword stored."))
393        }
394    }
395
396    /// Returns a keyword that holds mixed data from the keywords hashmap, if it exists
397    /// # Errors
398    /// Will return `Err` if the keyword is not present in the keywords hashmap
399    pub fn get_mixed_keyword(&self, keyword: &str) -> Result<&MixedKeyword> {
400        if let Some(keyword) = self.keywords.get(keyword) {
401            match keyword {
402                Keyword::Mixed(mixed) => Ok(mixed),
403                _ => Err(anyhow!("Keyword is not a mixed variant")),
404            }
405        } else {
406            Err(anyhow!("No {keyword} keyword stored."))
407        }
408    }
409
410    /// General function to get a given parameter's string keyword from the file's metadata (e.g. `$PnN` or `$PnS`)
411    /// # Errors
412    /// Will return `Err` if the keyword is not present in the keywords hashmap
413    pub fn get_parameter_string_metadata(
414        &self,
415        parameter_number: usize,
416        suffix: &str,
417    ) -> Result<&StringKeyword> {
418        // Interpolate the parameter number into the keyword:
419        let keyword = format!("$P{parameter_number}{suffix}");
420        self.get_string_keyword(&keyword)
421    }
422
423    /// Generic function to get a given parameter's integer keyword from the file's metadata (e.g. `$PnN`, `$PnS`, `$PnDATATYPE`)
424    /// # Errors
425    /// Will return `Err` if the keyword is not present in the keywords hashmap
426    pub fn get_parameter_numeric_metadata(
427        &self,
428        parameter_number: usize,
429        suffix: &str,
430    ) -> Result<&IntegerKeyword> {
431        // Interpolate the parameter number into the keyword:
432        let keyword = format!("$P{parameter_number}{suffix}");
433        self.get_integer_keyword(&keyword)
434    }
435
436    /// Get excitation wavelength(s) for a parameter from `$PnL` keyword
437    /// Returns the first wavelength if multiple are present (for co-axial lasers)
438    /// # Errors
439    /// Will return `Err` if the keyword is not present in the keywords hashmap
440    pub fn get_parameter_excitation_wavelength(
441        &self,
442        parameter_number: usize,
443    ) -> Result<Option<usize>> {
444        let keyword = format!("$P{parameter_number}L");
445
446        // Try as integer keyword first (older FCS format)
447        if let Ok(int_keyword) = self.get_integer_keyword(&keyword) {
448            if let IntegerKeyword::PnL(wavelength) = int_keyword {
449                return Ok(Some(*wavelength));
450            }
451        }
452
453        // Try as mixed keyword (FCS 3.1+ format, can have multiple wavelengths)
454        if let Ok(mixed_keyword) = self.get_mixed_keyword(&keyword) {
455            if let MixedKeyword::PnL(wavelengths) = mixed_keyword {
456                // Return the first wavelength if multiple are present
457                return Ok(wavelengths.first().copied());
458            }
459        }
460
461        Ok(None)
462    }
463
464    /// Return the name of the parameter's channel from the `$PnN` keyword in the metadata TEXT section, where `n` is the provided parameter index (1-based)
465    /// # Errors
466    /// Will return `Err` if the keyword is not present in the keywords hashmap
467    pub fn get_parameter_channel_name(&self, parameter_number: usize) -> Result<&str> {
468        if let StringKeyword::PnN(name) =
469            self.get_parameter_string_metadata(parameter_number, "N")?
470        {
471            Ok(name.as_ref())
472        } else {
473            Err(anyhow!(
474                "$P{parameter_number}N keyword not found in metadata TEXT section",
475            ))
476        }
477    }
478
479    /// Return the label name of the parameter from the `$PnS` keyword in the metadata TEXT section, where `n` is the provided parameter number
480    /// # Errors
481    /// Will return `Err` if the keyword is not present in the keywords hashmap
482    pub fn get_parameter_label(&self, parameter_number: usize) -> Result<&str> {
483        if let StringKeyword::PnS(label) =
484            self.get_parameter_string_metadata(parameter_number, "S")?
485        {
486            Ok(label.as_ref())
487        } else {
488            Err(anyhow!(
489                "$P{parameter_number}S keyword not found in metadata TEXT section",
490            ))
491        }
492    }
493
494    /// Transform the metadata keywords hashmap into a JSON object via serde
495    /// # Errors
496    /// Will return `Err` if the metadata keywords hashmap is empty
497    pub fn get_metadata_as_json_string(&self) -> Result<String> {
498        if self.keywords.is_empty() {
499            Err(anyhow!("No metadata keywords stored."))
500        } else {
501            let json = serde_json::to_string(&self.keywords)?;
502            Ok(json)
503        }
504    }
505
506    /// Insert or update a string keyword in the metadata
507    pub fn insert_string_keyword(&mut self, key: String, value: String) {
508        let normalized_key = if key.starts_with('$') {
509            key
510        } else {
511            format!("${key}")
512        };
513
514        let parsed = match_and_parse_keyword(&normalized_key, value.as_str());
515        let string_keyword = match parsed {
516            KeywordCreationResult::String(string_keyword) => string_keyword,
517            // If parsing fails (or parses to a non-string keyword), fall back to `Other`.
518            _ => StringKeyword::Other(Arc::from(value)),
519        };
520
521        self.keywords
522            .insert(normalized_key, Keyword::String(string_keyword));
523    }
524}