Skip to main content

flow_fcs/keyword/
mod.rs

1#![allow(deprecated)]
2mod helpers;
3mod parsing;
4#[cfg(test)]
5mod tests;
6use parsing::*;
7
8use crate::{byteorder::ByteOrder, datatype::FcsDataType};
9use serde::{Deserialize, Serialize};
10use std::{borrow::Cow, hash::Hash, sync::Arc};
11use strum_macros::Display;
12
13/// Result of parsing a keyword-value pair from the FCS TEXT segment
14///
15/// This enum represents the possible types a keyword can be parsed as.
16/// The parsing logic attempts to match the keyword name and value format
17/// to determine the appropriate type.
18#[derive(Debug)]
19pub enum KeywordCreationResult {
20    /// Successfully parsed as an integer keyword (e.g., `$PAR`, `$TOT`)
21    Int(IntegerKeyword),
22    /// Successfully parsed as a float keyword (e.g., `$PnG`)
23    Float(FloatKeyword),
24    /// Successfully parsed as a string keyword (e.g., `$CYT`, `$FIL`, `$GUID`)
25    String(StringKeyword),
26    /// Successfully parsed as a byte-oriented keyword (e.g., `$BYTEORD`, `$DATATYPE`)
27    Byte(ByteKeyword),
28    /// Successfully parsed as a mixed-type keyword (e.g., `$SPILLOVER`, `$PnD`, `$PnE`)
29    Mixed(MixedKeyword),
30    /// Unable to parse the keyword-value pair (fallback to generic string storage)
31    UnableToParse,
32}
33
34#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Display)]
35pub enum Keyword {
36    Int(IntegerKeyword),
37    Float(FloatKeyword),
38    String(StringKeyword),
39    Byte(ByteKeyword),
40    Mixed(MixedKeyword),
41}
42
43type LowerBound = f32;
44type UpperBound = f32;
45
46#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
47#[allow(deprecated)]
48pub enum MixedKeyword {
49    /// Specifies the conversion of arbitrary signal units, recorded as parameter values (uncompensated or compensated)
50    /// to some well defined unit. For example, mean equivalent soluble fluorochrome (MESF) or antibody molecules.
51    /// * f1 - the number of calibrated units corresponding to a unit signal value of parameter n
52    ///
53    /// * str - name of the units corresponding to calibration value
54    ///
55    /// **Example:** If the signal on parameter n has the scale value X then the calibrated value is X * f units
56    PnCalibration(f32, String),
57    /// Recommends visualization scale for parameter `n`.
58    /// * String is either "Linear" or "Logarithmic".
59    /// * f1 and f2 parameter values are in "scale" units, not "channel" units, see below for details.
60    /// * For linear scaling:
61    ///   - f1: Lower bound - the scale value corresponding to the left edge of the display
62    ///   - f2: Upper bound - the scale value corresponding to the right edge of the display
63    /// * *For logarithmic scaling:*
64    ///   - f1: Decades - The number of decades to display.
65    ///   - f2: Offset - The scale value corresponding to the left edge of the display
66    ///
67    /// **Example**: `$P3D (Linear,0,1024)`
68    /// - Specifies a linear display range with scale parameter values ranging from 0 to 1024.
69    ///
70    /// **Example**: `$P2D (Logarithmic,4,0.1)`
71    /// - Specifies a logarithmic display ranging from 0.1 to 1000 (scale value), which is *4* decades of display width.
72    ///
73    /// **Example**: `$P1D (Logarithmic,5,0.01)`
74    /// - Specifies a logarithmic display ranging from 0.01 to 1000 (scale value), which is 5 decades of display width.
75    ///
76    /// **Example**: `$P3B (8) | $P3R (256) | $P3G (4) | $P3E (0,0) | $P3D (Linear,0,32)`:
77    /// - This is a linear parameter with channel values going from 0 to 255 (`$P3R`). Taking account the gain (`$PnG`),
78    /// the *scale* values go from 0 to 64 (256/4 = 64). The $P3D specifies a linear display from 0 to 32
79    /// scale units, which only encompasses the bottom half of the collected data range on this scale.
80    ///
81    /// **Example**: `$P4B (16) | $P4R (1024) | $P4E (4,1) | $P4D (Linear,0,1000)`
82    /// - Specifies a linear display, with channel values going from 0 to 1023 (`$P4R`).
83    /// Only the bottom 10th of the scale values shown.
84    /// This will restrict the display to channel values between 0 and 768 (the bottom 3 decades),
85    /// with channels being distributed exponentially in the linear display.
86    ///
87    /// **Example**: `$P4B (16) | $P4R (1024) | $P4E (4,1) | $P4D (Logarithmic,3,1)`:
88    /// - The display keyword specifies that the data should be shown in logarithmic scaling, with only the bottom 3 decades shown.
89    /// This will restrict the display to channel values between 0 and 768 (1024*3/4).
90    ///
91    PnD(String, LowerBound, UpperBound),
92
93    /// (f1, f2) -Amplification type for parameter n. (FCS 1.0+)
94    /// * f1 - number of logarithmic decades
95    /// * f2 - linear value obtained for a signal with log value = 0
96    /// * 0,0 when the parameter is Linear.
97    /// * Also 0,0 when floating-point data (`$DATATYPE` = F or `$DATATYPE` = D) is stored.
98    /// **Example**: `$P3E (4,1)` - 4 decades with offset of 1
99    PnE(f32, f32),
100
101    /// Gate n amplification type.
102    ///
103    /// *<small>(FCS v2.0-3.1, deprecated)</small>*
104    GnE(f32, f32),
105
106    /// Region n width values - vector of width values for region boundaries
107    /// **Example**: `$R1W (0.5,1.2,0.8)` - Three width values
108    RnW(Vec<f32>),
109
110    /// Spillover matrix for compensation
111    /// Format: n, [param_names...], [matrix_values...]
112    /// **Example**: `$SPILLOVER/3,FL2-A,FL1-A,FL3-A,1.0,0.03,0.2,0.1,1.0,0.0,0.05,0,1.0`
113    SPILLOVER {
114        n_parameters: usize,
115        parameter_names: Vec<String>,
116        matrix_values: Vec<f32>,
117    },
118
119    /// Excitation wavelength(s) for parameter n in nanometers (FCS 1.0+, format updated in FCS 3.1)
120    /// Can contain single or multiple wavelengths for co-axial lasers
121    /// **Example**: `$P3L (488)` - single wavelength
122    /// **Example**: `$P4L (488,532,633)` - multiple co-axial lasers
123    PnL(Vec<usize>),
124}
125
126impl StringableKeyword for MixedKeyword {
127    fn get_str(&self) -> Cow<'_, str> {
128        match self {
129            Self::PnCalibration(f1, s) => Cow::Owned(format!("PnCalibration({}, {})", f1, s)),
130            Self::PnD(s, f1, f2) => Cow::Owned(format!("PnD({}, {}, {})", s, f1, f2)),
131            Self::PnE(f1, f2) => Cow::Owned(format!("PnE({}, {})", f1, f2)),
132            Self::GnE(f1, f2) => Cow::Owned(format!("GnE({}, {})", f1, f2)),
133            Self::PnL(vec) => Cow::Owned(format!(
134                "PnL({})",
135                vec.iter()
136                    .map(|v| v.to_string())
137                    .collect::<Vec<_>>()
138                    .join(", ")
139            )),
140            Self::RnW(vec) => Cow::Owned(format!(
141                "RnW({})",
142                vec.iter()
143                    .map(|v| v.to_string())
144                    .collect::<Vec<_>>()
145                    .join(", ")
146            )),
147            Self::SPILLOVER {
148                n_parameters,
149                parameter_names,
150                matrix_values,
151            } => Cow::Owned(format!(
152                "SPILLOVER({}, {}, {})",
153                n_parameters,
154                parameter_names.join(", "),
155                matrix_values
156                    .iter()
157                    .map(|v| v.to_string())
158                    .collect::<Vec<_>>()
159                    .join(", ")
160            )),
161        }
162    }
163}
164
165impl Eq for MixedKeyword {}
166impl Hash for MixedKeyword {
167    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
168        match self {
169            Self::PnCalibration(f1, s) => {
170                f1.to_bits().hash(state);
171                s.hash(state);
172            }
173            Self::PnD(s, f1, f2) => {
174                s.hash(state);
175                f1.to_bits().hash(state);
176                f2.to_bits().hash(state);
177            }
178            Self::PnE(f1, f2) | Self::GnE(f1, f2) => {
179                f1.to_bits().hash(state);
180                f2.to_bits().hash(state);
181            }
182            Self::PnL(vec) => {
183                for v in vec {
184                    v.hash(state);
185                }
186            }
187            Self::RnW(vec) => {
188                for f in vec {
189                    f.to_bits().hash(state);
190                }
191            }
192            Self::SPILLOVER {
193                n_parameters,
194                parameter_names,
195                matrix_values,
196            } => {
197                n_parameters.hash(state);
198                parameter_names.hash(state);
199                for f in matrix_values {
200                    f.to_bits().hash(state);
201                }
202            }
203        }
204    }
205}
206
207#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
208pub enum IntegerKeyword {
209    /// The offset to the beginning of the DATA segment (FCS 1.0+)
210    BeginData(usize),
211    /// The offset to the end of the DATA segment (FCS 1.0+)
212    EndData(usize),
213    /// The offset to the beginning of the ANALYSIS segment (FCS 2.0+)
214    BeginAnalysis(usize),
215    /// The offset to the end of the ANALYSIS segment (FCS 2.0+)
216    EndAnalysis(usize),
217    /// The offset to the beginning of the TEXT segment (FCS 1.0+)
218    BeginText(usize),
219    /// The offset to the end of the TEXT segment (FCS 1.0+)
220    EndText(usize),
221    /// The number of parameters in the dataset (FCS 1.0+)
222    PAR(usize),
223    /// The number of events in the dataset (FCS 1.0+)
224    TOT(usize),
225    /// Range for parameter `n` (FCS 1.0+)
226    PnR(usize),
227    /// Number of bits reserved for parameter `n` (FCS 1.0+)
228    PnB(usize),
229    /// Voltage range for parameter `n` (FCS 1.0+)
230    PnV(usize),
231    /// Excitation wavelength for parameter `n` (FCS 1.0+)
232    PnL(usize),
233    /// The transformation to apply when displaying the data (FCS 1.0+)
234    PnDisplay(usize),
235}
236
237#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
238pub enum FloatKeyword {
239    /// Gain for parameter n
240    PnG(f32),
241}
242
243impl Eq for FloatKeyword {}
244impl Hash for FloatKeyword {
245    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
246        match self {
247            FloatKeyword::PnG(f) => f.to_bits().hash(state),
248        }
249    }
250}
251
252#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
253pub enum StringKeyword {
254    /// The name of the cytometer used to acquire the data (FCS 1.0+)
255    CYT(Arc<str>),
256    /// The name of the file containing the dataset (FCS 1.0+)
257    FIL(Arc<str>),
258    /// The globally unique identifier for the dataset (FCS 2.0+)
259    GUID(Arc<str>),
260
261    /// Begin date and time of data acquisition (FCS 3.2+)
262    BEGINDATETIME(Arc<str>),
263    /// End date and time of data acquisition (FCS 3.2+)
264    ENDDATETIME(Arc<str>),
265
266    /// Generic sample carrier identifier (FCS 3.2+, replaces $PLATEID)
267    CARRIERID(Arc<str>),
268    /// Type of sample carrier (FCS 3.2+, replaces $PLATENAME)
269    CARRIERTYPE(Arc<str>),
270    /// Location identifier within carrier (FCS 3.2+, replaces $WELLID)
271    LOCATIONID(Arc<str>),
272
273    /// 'Short name' for parameter `n` (FCS 1.0+)
274    PnN(Arc<str>),
275    /// Label name for parameter `n` (FCS 1.0+)
276    PnS(Arc<str>),
277    /// Name of the optical filter for parameter `n` (FCS 1.0+)
278    PnF(Arc<str>),
279    /// The FCS measurement signal types and evaluation features (e.g., area, height, or width) (FCS 1.0+)
280    PnType(Arc<str>),
281    /// Display scale for parameter `n` - typically "LOG" for logarithmic or "LIN" for linear (FCS 1.0+)
282    /// Note: Some FCS files use this as a string, others as numeric. We store as string for flexibility.
283    PnDISPLAY(Arc<str>),
284
285    /// Detector name for parameter `n` (FCS 3.2+)
286    PnDET(Arc<str>),
287    /// Dye specification for parameter `n` (FCS 3.2+)
288    PnTAG(Arc<str>),
289    /// Target molecule or process for parameter `n` (FCS 3.2+)
290    PnANALYTE(Arc<str>),
291    /// Evaluation features for parameter `n` (FCS 3.2+)
292    PnFEATURE(Arc<str>),
293
294    /// Acquisition flow rate setting (FCS 3.2+)
295    FLOWRATE(Arc<str>),
296
297    /// Sample volume (FCS 3.1+)
298    VOL(Arc<str>),
299
300    /// Distinguish between original and altered data set (FCS 3.1+)
301    ORIGINALITY(Arc<str>),
302    /// Who last modified the data set (FCS 3.1+)
303    LastModifier(Arc<str>),
304    /// When the data set was last modified (FCS 3.1+)
305    LastModified(Arc<str>),
306
307    /// Date of data acquisition
308    ///
309    /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $BEGINDATETIME)</small>
310    DATE(Arc<str>),
311
312    /// Begin time of data acquisition
313    ///
314    /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $BEGINDATETIME)</small>
315    BTIM(Arc<str>),
316
317    /// End time of data acquisition
318    ///
319    /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $ENDDATETIME)</small>
320    ETIM(Arc<str>),
321
322    /// Data acquisition mode
323    ///
324    /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2, fixed to "L" list mode)</small>
325    MODE(Arc<str>),
326
327    /// Plate identifier
328    ///
329    /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $CARRIERID)</small>
330    PLATEID(Arc<str>),
331
332    /// Platform/plate name
333    ///
334    /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $CARRIERTYPE)</small>
335    PLATENAME(Arc<str>),
336
337    /// Well identifier
338    ///
339    /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $LOCATIONID)</small>
340    WELLID(Arc<str>),
341
342    /// Gate definition
343    ///
344    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
345    GATE(Arc<str>),
346
347    /// Gate n optical filter
348    ///
349    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
350    GnF(Arc<str>),
351
352    /// Gate n short name
353    ///
354    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
355    GnN(Arc<str>),
356
357    /// Gate n population name
358    ///
359    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
360    GnP(Arc<str>),
361
362    /// Gate n range
363    ///
364    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
365    GnR(Arc<str>),
366
367    /// Gate n label name
368    ///
369    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
370    GnS(Arc<str>),
371
372    /// Gate n threshold
373    ///
374    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
375    GnT(Arc<str>),
376
377    /// Gate n voltage range
378    ///
379    /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
380    GnV(Arc<str>),
381
382    /// A catch-all for other keywords, to be stored as Arc<str>
383    Other(Arc<str>),
384}
385
386// Keywords regarding the data-layout, lacking any associated values
387#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
388pub enum ByteKeyword {
389    /// The byte order (endianness) of the data
390    BYTEORD(ByteOrder),
391    /// The data type of the FCS file (integer, float, double, ascii)
392    DATATYPE(FcsDataType),
393    /// Data type for parameter `n` (FCS 3.2+), overriding the default $DATATYPE for a given parameter
394    PnDATATYPE(FcsDataType),
395}
396
397pub trait StringableKeyword {
398    fn get_str(&self) -> Cow<'_, str>;
399}
400pub trait IntegerableKeyword {
401    fn get_usize(&self) -> &usize;
402}
403#[allow(unused)]
404pub trait FloatableKeyword {
405    fn get_f32(&self) -> &f32;
406}
407
408impl IntegerableKeyword for IntegerKeyword {
409    fn get_usize(&self) -> &usize {
410        match self {
411            Self::TOT(value)
412            | Self::BeginData(value)
413            | Self::EndData(value)
414            | Self::BeginAnalysis(value)
415            | Self::EndAnalysis(value)
416            | Self::BeginText(value)
417            | Self::EndText(value)
418            | Self::PnR(value)
419            | Self::PnB(value)
420            | Self::PnV(value)
421            | Self::PnL(value)
422            | Self::PnDisplay(value)
423            | Self::PAR(value) => value,
424        }
425    }
426}
427
428impl FloatableKeyword for FloatKeyword {
429    fn get_f32(&self) -> &f32 {
430        match self {
431            Self::PnG(value) => value,
432        }
433    }
434}
435
436impl StringableKeyword for StringKeyword {
437    /// Get a reference to the string value (if it exists) from a StringKeyword variant
438    fn get_str(&self) -> Cow<'_, str> {
439        match self {
440            Self::CYT(value)
441            | Self::FIL(value)
442            | Self::GUID(value)
443            | Self::BEGINDATETIME(value)
444            | Self::ENDDATETIME(value)
445            | Self::CARRIERID(value)
446            | Self::CARRIERTYPE(value)
447            | Self::LOCATIONID(value)
448            | Self::PnN(value)
449            | Self::PnS(value)
450            | Self::PnF(value)
451            | Self::PnType(value)
452            | Self::PnDISPLAY(value)
453            | Self::PnDET(value)
454            | Self::PnTAG(value)
455            | Self::PnANALYTE(value)
456            | Self::PnFEATURE(value)
457            | Self::FLOWRATE(value)
458            | Self::VOL(value)
459            | Self::ORIGINALITY(value)
460            | Self::LastModifier(value)
461            | Self::LastModified(value)
462            | Self::DATE(value)
463            | Self::BTIM(value)
464            | Self::ETIM(value)
465            | Self::MODE(value)
466            | Self::PLATEID(value)
467            | Self::PLATENAME(value)
468            | Self::WELLID(value)
469            | Self::GATE(value)
470            | Self::GnF(value)
471            | Self::GnN(value)
472            | Self::GnP(value)
473            | Self::GnR(value)
474            | Self::GnS(value)
475            | Self::GnT(value)
476            | Self::GnV(value)
477            | Self::Other(value) => Cow::Borrowed(value.as_ref()),
478        }
479    }
480}
481
482impl StringableKeyword for ByteKeyword {
483    /// Get a reference to the string value (if it exists) from a ByteKeyword variant
484    fn get_str(&self) -> Cow<'_, str> {
485        match self {
486            Self::DATATYPE(data_type) | Self::PnDATATYPE(data_type) => {
487                Cow::Borrowed(data_type.to_keyword_str())
488            }
489            Self::BYTEORD(byte_order) => Cow::Borrowed(byte_order.to_keyword_str()),
490        }
491    }
492}
493
494impl StringableKeyword for IntegerKeyword {
495    fn get_str(&self) -> Cow<'_, str> {
496        match self {
497            Self::BeginData(value)
498            | Self::EndData(value)
499            | Self::BeginAnalysis(value)
500            | Self::EndAnalysis(value)
501            | Self::BeginText(value)
502            | Self::EndText(value)
503            | Self::PAR(value)
504            | Self::TOT(value)
505            | Self::PnR(value)
506            | Self::PnB(value)
507            | Self::PnV(value)
508            | Self::PnL(value)
509            | Self::PnDisplay(value) => Cow::Owned(value.to_string()),
510        }
511    }
512}
513
514impl StringableKeyword for FloatKeyword {
515    fn get_str(&self) -> Cow<'_, str> {
516        match self {
517            Self::PnG(value) => Cow::Owned(value.to_string()),
518        }
519    }
520}
521
522/// Main parsing entry point for FCS keywords
523///
524/// Dispatches to appropriate parsing functions based on keyword name pattern.
525/// Attempts to match the keyword against known patterns (fixed keywords, parameter keywords,
526/// gate keywords, region keywords) and parse the value accordingly.
527///
528/// # Arguments
529/// * `key` - The keyword name (with or without `$` prefix)
530/// * `value` - The keyword value as a string
531///
532/// # Returns
533/// A `KeywordCreationResult` indicating the parsed type, or `UnableToParse` if no pattern matches
534///
535/// # Example
536/// ```ignore
537/// let result = match_and_parse_keyword("$PAR", "10");
538/// // Returns KeywordCreationResult::Int(IntegerKeyword::PAR(10))
539/// ```
540pub fn match_and_parse_keyword(key: &str, value: &str) -> KeywordCreationResult {
541    // Keywords without $ prefix should be treated as Other, not parsed
542    // Exception: GUID keyword doesn't always have $ prefix in some FCS files
543    let dollarless_key = if let Some(key) = key.strip_prefix('$') {
544        key
545    } else if key == "GUID" {
546        // GUID is a special case - it can appear without $ prefix
547        "GUID"
548    } else {
549        // No $ prefix - treat as unknown keyword
550        return KeywordCreationResult::String(StringKeyword::Other(Arc::from(value.trim())));
551    };
552
553    parse_fixed_keywords(dollarless_key, value)
554        .or_else(|| parse_parameter_keywords(dollarless_key, value))
555        .or_else(|| parse_gate_keywords(dollarless_key, value))
556        .or_else(|| parse_region_keywords(dollarless_key, value))
557        .unwrap_or_else(|| {
558            KeywordCreationResult::String(StringKeyword::Other(Arc::from(value.trim())))
559        })
560}
561
562impl From<&StringKeyword> for Arc<str> {
563    fn from(keyword: &StringKeyword) -> Self {
564        keyword.get_str().into()
565    }
566}
567
568// Extract the variant's value and convert it to a string
569impl From<&IntegerKeyword> for String {
570    fn from(keyword: &IntegerKeyword) -> Self {
571        keyword.get_usize().to_string()
572    }
573}