flow_fcs/
metadata.rs

1use super::{
2    byteorder::ByteOrder,
3    datatype::FcsDataType,
4    header::Header,
5    keyword::{
6        ByteKeyword, FloatKeyword, IntegerKeyword, IntegerableKeyword, Keyword,
7        KeywordCreationResult, MixedKeyword, StringKeyword, match_and_parse_keyword,
8    },
9};
10use anyhow::{Result, anyhow};
11use memmap3::Mmap;
12use regex::bytes::Regex;
13use rustc_hash::FxHashMap;
14use serde::{Deserialize, Serialize};
15use std::sync::Arc;
16use uuid::Uuid;
17pub type KeywordMap = FxHashMap<String, Keyword>;
18
19/// Contains keyword-value pairs and delimiter from the TEXT segment of an FCS file
20///
21/// The TEXT segment contains all metadata about the FCS file, including:
22/// - File information (GUID, filename, cytometer type)
23/// - Data structure information (number of events, parameters, data type, byte order)
24/// - Parameter metadata (names, labels, ranges, transforms)
25/// - Optional information (compensation matrices, timestamps, etc.)
26///
27/// Keywords are stored in a hashmap for fast lookup, with type-safe accessors
28/// for different keyword types (integer, float, string, byte, mixed).
29#[derive(Default, Debug, Clone, Serialize, Deserialize)]
30pub struct Metadata {
31    pub keywords: KeywordMap,
32    pub delimiter: char,
33}
34
35impl Metadata {
36    #[must_use]
37    pub fn new() -> Self {
38        Self {
39            keywords: FxHashMap::default(),
40            delimiter: ' ',
41        }
42    }
43    /// Prints all keywords sorted alphabetically by key name
44    ///
45    /// This is a debugging utility that displays all keyword-value pairs
46    /// in the metadata, sorted for easy reading.
47    pub fn print_sorted_by_keyword(&self) {
48        // Step 1: Get a Vector from existing text HashMap.
49        let mut sorted: Vec<_> = self.keywords.iter().collect();
50
51        // Step 2: sort Vector by key from HashMap.
52        // ... This sorts by HashMap keys.
53        //     Each tuple is sorted by its first item [.0] (the key).
54        sorted.sort_by_key(|a| a.0);
55
56        // Step 3: loop over sorted vector.
57        for (key, value) in &sorted {
58            println!("{key}: {value}");
59        }
60    }
61    /// Reads the text segment of the fcs file and returns an `Metadata` struct
62    ///
63    /// Uses memchr for fast delimiter finding (5-10x faster than byte-by-byte iteration)
64    #[must_use]
65    pub fn from_mmap(mmap: &Mmap, header: &Header) -> Self {
66        let text_start = header.text_offset.start();
67
68        // Read the first byte of the text segment to determine the delimiter:
69        let delimiter = mmap[*text_start];
70
71        // Determine the number of bytes to read, excluding the delimiter:
72        let text_end = header.text_offset.end();
73        let text_slice = &mmap[(*text_start + 1)..*text_end];
74
75        // Extract keyword value pairs using memchr for fast delimiter finding
76        let mut keywords: KeywordMap = FxHashMap::default();
77
78        // Find all delimiter positions using SIMD-accelerated search
79        // This is 5-10x faster than manual iteration
80        let delimiter_positions: Vec<usize> = memchr::memchr_iter(delimiter, text_slice).collect();
81
82        // Parse keyword-value pairs
83        // FCS format: |KEY1|VALUE1|KEY2|VALUE2|...
84        // delimiter_positions gives us the split points
85        let mut prev_pos = 0;
86        let mut is_keyword = true;
87        let mut current_key = String::new();
88
89        for &pos in &delimiter_positions {
90            // Extract the slice between delimiters
91            let segment = &text_slice[prev_pos..pos];
92
93            // SAFETY: FCS spec requires TEXT segment to be ASCII/UTF-8
94            let text = std::str::from_utf8(segment).unwrap_or_default();
95
96            if is_keyword {
97                // This is a keyword
98                current_key = text.to_string();
99                is_keyword = false;
100            } else {
101                // This is a value - parse and store the keyword-value pair
102                if !current_key.is_empty() {
103                    match match_and_parse_keyword(&current_key, text) {
104                        KeywordCreationResult::Int(int_keyword) => {
105                            keywords.insert(current_key.clone(), Keyword::Int(int_keyword));
106                        }
107                        KeywordCreationResult::Float(float_keyword) => {
108                            keywords.insert(current_key.clone(), Keyword::Float(float_keyword));
109                        }
110                        KeywordCreationResult::String(string_keyword) => {
111                            keywords.insert(current_key.clone(), Keyword::String(string_keyword));
112                        }
113                        KeywordCreationResult::Byte(byte_keyword) => {
114                            keywords.insert(current_key.clone(), Keyword::Byte(byte_keyword));
115                        }
116                        KeywordCreationResult::Mixed(mixed_keyword) => {
117                            keywords.insert(current_key.clone(), Keyword::Mixed(mixed_keyword));
118                        }
119                        KeywordCreationResult::UnableToParse => {
120                            eprintln!(
121                                "Unable to parse keyword: {} with value: {}",
122                                current_key, text
123                            );
124                        }
125                    }
126                }
127                current_key.clear();
128                is_keyword = true;
129            }
130
131            prev_pos = pos + 1;
132        }
133
134        Self {
135            keywords,
136            delimiter: delimiter as char,
137        }
138    }
139
140    /// Check that required keys are present in the TEXT segment of the metadata
141    /// # Errors
142    /// Will return `Err` if:
143    /// - any of the required keywords are missing from the keywords hashmap
144    /// - the number of parameters can't be obtained from the $PAR keyword in the TEXT section
145    /// - any keyword has a Pn[X] value where n is greater than the number of parameters indicated by the $PAR keyword
146    pub fn validate_text_segment_keywords(&self, header: &Header) -> Result<()> {
147        println!("Validating FCS file...{}", header.version);
148        let required_keywords = header.version.get_required_keywords();
149        for keyword in required_keywords {
150            if !self.keywords.contains_key(*keyword) {
151                // println!("Invalid FCS file: Missing keyword: {:#?}", self.keywords);
152                return Err(anyhow!("Invalid FCS file: Missing keyword: {}", keyword));
153            }
154        }
155
156        Ok(())
157    }
158
159    /// Validates if a GUID is present in the file's metadata, and if not, generates a new one.
160    pub fn validate_guid(&mut self) {
161        if self.get_string_keyword("GUID").is_err() {
162            self.insert_string_keyword("GUID".to_string(), Uuid::new_v4().to_string());
163        }
164    }
165
166    /// Confirm that no stored keyword has a value greater than the $PAR keyword indicates
167    #[allow(unused)]
168    fn validate_number_of_parameters(&self) -> Result<()> {
169        let n_params = self.get_number_of_parameters()?;
170        let n_params_string = n_params.to_string();
171        let n_digits = n_params_string.chars().count().to_string();
172        let regex_string = r"[PR]\d{1,".to_string() + &n_digits + "}[BENRDFGLOPSTVIW]";
173        let param_keywords = Regex::new(&regex_string)?;
174
175        for keyword in self.keywords.keys() {
176            if !param_keywords.is_match(keyword.as_bytes()) {
177                continue; // Skip to the next iteration if the keyword doesn't match
178            }
179
180            // If the keyword starts with a $P, then the value of the next non-terminal characters should be less than or equal to the number of parameters
181            if keyword.starts_with("$P") {
182                let param_number = keyword
183                    .chars()
184                    .nth(1)
185                    .expect("should have a second character in {keyword}")
186                    .to_digit(10)
187                    .expect("should be able to convert the character to a digit to count the parameters") as usize;
188                if param_number > *n_params {
189                    return Err(anyhow!(
190                        "Invalid FCS file: {} keyword value exceeds number of parameters",
191                        keyword
192                    ));
193                }
194            }
195        }
196
197        Ok(())
198    }
199    /// Generic function to get the unwrapped unsigned integer value associated with a numeric keyword (e.g. $PAR, $TOT, etc.)
200    fn get_keyword_value_as_usize(&self, keyword: &str) -> Result<&usize> {
201        Ok(self.get_integer_keyword(keyword)?.get_usize())
202    }
203
204    /// Return the number of parameters in the file from the $PAR keyword in the metadata TEXT section
205    /// # Errors
206    /// Will return `Err` if the $PAR keyword is not present in the metadata keywords hashmap
207    pub fn get_number_of_parameters(&self) -> Result<&usize> {
208        self.get_keyword_value_as_usize("$PAR")
209    }
210
211    /// Return the number of events in the file from the $TOT keyword in the metadata TEXT section
212    /// # Errors
213    /// Will return `Err` if the $TOT keyword is not present in the metadata keywords hashmap
214    pub fn get_number_of_events(&self) -> Result<&usize> {
215        self.get_keyword_value_as_usize("$TOT")
216    }
217
218    /// Return the data type from the $DATATYPE keyword in the metadata TEXT section, unwraps and returns it if it exists.
219    /// # Errors
220    /// Will return `Err` if the $DATATYPE keyword is not present in the metadata keywords hashmap
221    pub fn get_data_type(&self) -> Result<&FcsDataType> {
222        let keyword = self.get_byte_keyword("$DATATYPE")?;
223        if let ByteKeyword::DATATYPE(data_type) = keyword {
224            Ok(data_type)
225        } else {
226            Err(anyhow!("No $DATATYPE value stored."))
227        }
228    }
229
230    /// Return the byte order from the $BYTEORD keyword in the metadata TEXT section, unwraps and returns it if it exists.
231    /// # Errors
232    /// Will return `Err` if the $BYTEORD keyword is not present in the keywords hashmap
233    pub fn get_byte_order(&self) -> Result<&ByteOrder> {
234        let keyword = self.get_byte_keyword("$BYTEORD")?;
235        if let ByteKeyword::BYTEORD(byte_order) = keyword {
236            Ok(byte_order)
237        } else {
238            Err(anyhow!("No $BYTEORD value stored."))
239        }
240    }
241    /// Returns a keyword that holds numeric data from the keywords hashmap, if it exists
242    /// # Errors
243    /// Will return `Err` if the keyword is not present in the keywords hashmap
244    pub fn get_integer_keyword(&self, keyword: &str) -> Result<&IntegerKeyword> {
245        if let Some(keyword) = self.keywords.get(keyword) {
246            match keyword {
247                Keyword::Int(integer) => Ok(integer),
248                _ => Err(anyhow!("Keyword is not integer variant")),
249            }
250        } else {
251            Err(anyhow!("No {keyword} keyword stored."))
252        }
253    }
254
255    /// Returns a keyword that holds numeric data from the keywords hashmap, if it exists
256    /// # Errors
257    /// Will return `Err` if the keyword is not present in the keywords hashmap
258    pub fn get_float_keyword(&self, keyword: &str) -> Result<&FloatKeyword> {
259        if let Some(keyword) = self.keywords.get(keyword) {
260            match keyword {
261                Keyword::Float(float) => Ok(float),
262                _ => Err(anyhow!("Keyword is not float variant")),
263            }
264        } else {
265            Err(anyhow!("No {keyword} keyword stored."))
266        }
267    }
268
269    /// Returns a keyword that holds string data from the keywords hashmap, if it exists
270    /// # Errors
271    /// Will return `Err` if the keyword is not present in the keywords hashmap
272    pub fn get_string_keyword(&self, keyword: &str) -> Result<&StringKeyword> {
273        if let Some(keyword) = self.keywords.get(keyword) {
274            match keyword {
275                Keyword::String(string) => Ok(string),
276                _ => Err(anyhow!("Keyword is not a string variant")),
277            }
278        } else {
279            Err(anyhow!("No {keyword} keyword stored."))
280        }
281    }
282
283    /// Returns a keyword that holds byte-orientation data from the keywords hashmap, if it exists
284    /// # Errors
285    /// Will return `Err` if the keyword is not present in the keywords hashmap
286    pub fn get_byte_keyword(&self, keyword: &str) -> Result<&ByteKeyword> {
287        if let Some(keyword) = self.keywords.get(keyword) {
288            match keyword {
289                Keyword::Byte(byte) => Ok(byte),
290                _ => Err(anyhow!("Keyword is not a byte variant")),
291            }
292        } else {
293            Err(anyhow!("No {keyword} keyword stored."))
294        }
295    }
296
297    /// Returns a keyword that holds mixed data from the keywords hashmap, if it exists
298    /// # Errors
299    /// Will return `Err` if the keyword is not present in the keywords hashmap
300    pub fn get_mixed_keyword(&self, keyword: &str) -> Result<&MixedKeyword> {
301        if let Some(keyword) = self.keywords.get(keyword) {
302            match keyword {
303                Keyword::Mixed(mixed) => Ok(mixed),
304                _ => Err(anyhow!("Keyword is not a mixed variant")),
305            }
306        } else {
307            Err(anyhow!("No {keyword} keyword stored."))
308        }
309    }
310
311    /// General function to get a given parameter's string keyword from the file's metadata (e.g. `$PnN` or `$PnS`)
312    /// # Errors
313    /// Will return `Err` if the keyword is not present in the keywords hashmap
314    pub fn get_parameter_string_metadata(
315        &self,
316        parameter_number: usize,
317        suffix: &str,
318    ) -> Result<&StringKeyword> {
319        // Interpolate the parameter number into the keyword:
320        let keyword = format!("$P{parameter_number}{suffix}");
321        self.get_string_keyword(&keyword)
322    }
323
324    /// Generic function to get a given parameter's string keyword from the file's metadata (e.g. `$PnN` or `$PnS`)
325    /// # Errors
326    /// Will return `Err` if the keyword is not present in the keywords hashmap
327    pub fn get_parameter_numeric_metadata(
328        &self,
329        parameter_number: usize,
330        suffix: &str,
331    ) -> Result<&IntegerKeyword> {
332        // Interpolate the parameter number into the keyword:
333        let keyword = format!("$P{parameter_number}{suffix}");
334        self.get_integer_keyword(&keyword)
335    }
336
337    /// Get excitation wavelength(s) for a parameter from `$PnL` keyword
338    /// Returns the first wavelength if multiple are present (for co-axial lasers)
339    /// # Errors
340    /// Will return `Err` if the keyword is not present in the keywords hashmap
341    pub fn get_parameter_excitation_wavelength(
342        &self,
343        parameter_number: usize,
344    ) -> Result<Option<usize>> {
345        let keyword = format!("$P{parameter_number}L");
346
347        // Try as integer keyword first (older FCS format)
348        if let Ok(int_keyword) = self.get_integer_keyword(&keyword) {
349            if let IntegerKeyword::PnL(wavelength) = int_keyword {
350                return Ok(Some(*wavelength));
351            }
352        }
353
354        // Try as mixed keyword (FCS 3.1+ format, can have multiple wavelengths)
355        if let Ok(mixed_keyword) = self.get_mixed_keyword(&keyword) {
356            if let MixedKeyword::PnL(wavelengths) = mixed_keyword {
357                // Return the first wavelength if multiple are present
358                return Ok(wavelengths.first().copied());
359            }
360        }
361
362        Ok(None)
363    }
364
365    /// Return the name of the parameter's channel from the `$PnN` keyword in the metadata TEXT section, where `n` is the provided parameter index (1-based)
366    /// # Errors
367    /// Will return `Err` if the keyword is not present in the keywords hashmap
368    pub fn get_parameter_channel_name(&self, parameter_number: usize) -> Result<&str> {
369        if let StringKeyword::PnN(name) =
370            self.get_parameter_string_metadata(parameter_number, "N")?
371        {
372            Ok(name.as_ref())
373        } else {
374            Err(anyhow!(
375                "$P{parameter_number}N keyword not found in metadata TEXT section",
376            ))
377        }
378    }
379
380    /// Return the label name of the parameter from the `$PnS` keyword in the metadata TEXT section, where `n` is the provided parameter number
381    /// # Errors
382    /// Will return `Err` if the keyword is not present in the keywords hashmap
383    pub fn get_parameter_label(&self, parameter_number: usize) -> Result<&str> {
384        if let StringKeyword::PnS(label) =
385            self.get_parameter_string_metadata(parameter_number, "S")?
386        {
387            Ok(label.as_ref())
388        } else {
389            Err(anyhow!(
390                "$P{parameter_number}S keyword not found in metadata TEXT section",
391            ))
392        }
393    }
394
395    /// Transform the metadata keywords hashmap into a JSON object via serde
396    /// # Errors
397    /// Will return `Err` if the metadata keywords hashmap is empty
398    pub fn get_metadata_as_json_string(&self) -> Result<String> {
399        if self.keywords.is_empty() {
400            Err(anyhow!("No metadata keywords stored."))
401        } else {
402            let json = serde_json::to_string(&self.keywords)?;
403            Ok(json)
404        }
405    }
406
407    /// Insert or update a string keyword in the metadata
408    pub fn insert_string_keyword(&mut self, key: String, value: String) {
409        let normalized_key = if key.starts_with('$') {
410            key
411        } else {
412            format!("${key}")
413        };
414
415        let parsed = match_and_parse_keyword(&normalized_key, value.as_str());
416        let string_keyword = match parsed {
417            KeywordCreationResult::String(string_keyword) => string_keyword,
418            // If parsing fails (or parses to a non-string keyword), fall back to `Other`.
419            _ => StringKeyword::Other(Arc::from(value)),
420        };
421
422        self.keywords
423            .insert(normalized_key, Keyword::String(string_keyword));
424    }
425}