flow_fcs/keyword/mod.rs
1#![allow(deprecated)]
2mod helpers;
3mod parsing;
4#[cfg(test)]
5mod tests;
6use parsing::*;
7
8use crate::{byteorder::ByteOrder, datatype::FcsDataType};
9use serde::{Deserialize, Serialize};
10use std::{borrow::Cow, hash::Hash, sync::Arc};
11use strum_macros::Display;
12
13/// Result of parsing a keyword-value pair from the FCS TEXT segment
14///
15/// This enum represents the possible types a keyword can be parsed as.
16/// The parsing logic attempts to match the keyword name and value format
17/// to determine the appropriate type.
18#[derive(Debug)]
19pub enum KeywordCreationResult {
20 /// Successfully parsed as an integer keyword (e.g., `$PAR`, `$TOT`)
21 Int(IntegerKeyword),
22 /// Successfully parsed as a float keyword (e.g., `$PnG`)
23 Float(FloatKeyword),
24 /// Successfully parsed as a string keyword (e.g., `$CYT`, `$FIL`, `$GUID`)
25 String(StringKeyword),
26 /// Successfully parsed as a byte-oriented keyword (e.g., `$BYTEORD`, `$DATATYPE`)
27 Byte(ByteKeyword),
28 /// Successfully parsed as a mixed-type keyword (e.g., `$SPILLOVER`, `$PnD`, `$PnE`)
29 Mixed(MixedKeyword),
30 /// Unable to parse the keyword-value pair (fallback to generic string storage)
31 UnableToParse,
32}
33
34#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Display)]
35pub enum Keyword {
36 Int(IntegerKeyword),
37 Float(FloatKeyword),
38 String(StringKeyword),
39 Byte(ByteKeyword),
40 Mixed(MixedKeyword),
41}
42
43type LowerBound = f32;
44type UpperBound = f32;
45
46#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
47#[allow(deprecated)]
48pub enum MixedKeyword {
49 /// Specifies the conversion of arbitrary signal units, recorded as parameter values (uncompensated or compensated)
50 /// to some well defined unit. For example, mean equivalent soluble fluorochrome (MESF) or antibody molecules.
51 /// * f1 - the number of calibrated units corresponding to a unit signal value of parameter n
52 ///
53 /// * str - name of the units corresponding to calibration value
54 ///
55 /// **Example:** If the signal on parameter n has the scale value X then the calibrated value is X * f units
56 PnCalibration(f32, String),
57 /// Recommends visualization scale for parameter `n`.
58 /// * String is either "Linear" or "Logarithmic".
59 /// * f1 and f2 parameter values are in "scale" units, not "channel" units, see below for details.
60 /// * For linear scaling:
61 /// - f1: Lower bound - the scale value corresponding to the left edge of the display
62 /// - f2: Upper bound - the scale value corresponding to the right edge of the display
63 /// * *For logarithmic scaling:*
64 /// - f1: Decades - The number of decades to display.
65 /// - f2: Offset - The scale value corresponding to the left edge of the display
66 ///
67 /// **Example**: `$P3D (Linear,0,1024)`
68 /// - Specifies a linear display range with scale parameter values ranging from 0 to 1024.
69 ///
70 /// **Example**: `$P2D (Logarithmic,4,0.1)`
71 /// - Specifies a logarithmic display ranging from 0.1 to 1000 (scale value), which is *4* decades of display width.
72 ///
73 /// **Example**: `$P1D (Logarithmic,5,0.01)`
74 /// - Specifies a logarithmic display ranging from 0.01 to 1000 (scale value), which is 5 decades of display width.
75 ///
76 /// **Example**: `$P3B (8) | $P3R (256) | $P3G (4) | $P3E (0,0) | $P3D (Linear,0,32)`:
77 /// - This is a linear parameter with channel values going from 0 to 255 (`$P3R`). Taking account the gain (`$PnG`),
78 /// the *scale* values go from 0 to 64 (256/4 = 64). The $P3D specifies a linear display from 0 to 32
79 /// scale units, which only encompasses the bottom half of the collected data range on this scale.
80 ///
81 /// **Example**: `$P4B (16) | $P4R (1024) | $P4E (4,1) | $P4D (Linear,0,1000)`
82 /// - Specifies a linear display, with channel values going from 0 to 1023 (`$P4R`).
83 /// Only the bottom 10th of the scale values shown.
84 /// This will restrict the display to channel values between 0 and 768 (the bottom 3 decades),
85 /// with channels being distributed exponentially in the linear display.
86 ///
87 /// **Example**: `$P4B (16) | $P4R (1024) | $P4E (4,1) | $P4D (Logarithmic,3,1)`:
88 /// - The display keyword specifies that the data should be shown in logarithmic scaling, with only the bottom 3 decades shown.
89 /// This will restrict the display to channel values between 0 and 768 (1024*3/4).
90 ///
91 PnD(String, LowerBound, UpperBound),
92
93 /// (f1, f2) -Amplification type for parameter n. (FCS 1.0+)
94 /// * f1 - number of logarithmic decades
95 /// * f2 - linear value obtained for a signal with log value = 0
96 /// * 0,0 when the parameter is Linear.
97 /// * Also 0,0 when floating-point data (`$DATATYPE` = F or `$DATATYPE` = D) is stored.
98 /// **Example**: `$P3E (4,1)` - 4 decades with offset of 1
99 PnE(f32, f32),
100
101 /// Gate n amplification type.
102 ///
103 /// *<small>(FCS v2.0-3.1, deprecated)</small>*
104 GnE(f32, f32),
105
106 /// Region n width values - vector of width values for region boundaries
107 /// **Example**: `$R1W (0.5,1.2,0.8)` - Three width values
108 RnW(Vec<f32>),
109
110 /// Spillover matrix for compensation
111 /// Format: n, [param_names...], [matrix_values...]
112 /// **Example**: `$SPILLOVER/3,FL2-A,FL1-A,FL3-A,1.0,0.03,0.2,0.1,1.0,0.0,0.05,0,1.0`
113 SPILLOVER {
114 n_parameters: usize,
115 parameter_names: Vec<String>,
116 matrix_values: Vec<f32>,
117 },
118
119 /// Excitation wavelength(s) for parameter n in nanometers (FCS 1.0+, format updated in FCS 3.1)
120 /// Can contain single or multiple wavelengths for co-axial lasers
121 /// **Example**: `$P3L (488)` - single wavelength
122 /// **Example**: `$P4L (488,532,633)` - multiple co-axial lasers
123 PnL(Vec<usize>),
124}
125
126impl StringableKeyword for MixedKeyword {
127 fn get_str(&self) -> Cow<'_, str> {
128 match self {
129 Self::PnCalibration(f1, s) => Cow::Owned(format!("PnCalibration({}, {})", f1, s)),
130 Self::PnD(s, f1, f2) => Cow::Owned(format!("PnD({}, {}, {})", s, f1, f2)),
131 Self::PnE(f1, f2) => Cow::Owned(format!("PnE({}, {})", f1, f2)),
132 Self::GnE(f1, f2) => Cow::Owned(format!("GnE({}, {})", f1, f2)),
133 Self::PnL(vec) => Cow::Owned(format!(
134 "PnL({})",
135 vec.iter()
136 .map(|v| v.to_string())
137 .collect::<Vec<_>>()
138 .join(", ")
139 )),
140 Self::RnW(vec) => Cow::Owned(format!(
141 "RnW({})",
142 vec.iter()
143 .map(|v| v.to_string())
144 .collect::<Vec<_>>()
145 .join(", ")
146 )),
147 Self::SPILLOVER {
148 n_parameters,
149 parameter_names,
150 matrix_values,
151 } => Cow::Owned(format!(
152 "SPILLOVER({}, {}, {})",
153 n_parameters,
154 parameter_names.join(", "),
155 matrix_values
156 .iter()
157 .map(|v| v.to_string())
158 .collect::<Vec<_>>()
159 .join(", ")
160 )),
161 }
162 }
163}
164
165impl Eq for MixedKeyword {}
166impl Hash for MixedKeyword {
167 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
168 match self {
169 Self::PnCalibration(f1, s) => {
170 f1.to_bits().hash(state);
171 s.hash(state);
172 }
173 Self::PnD(s, f1, f2) => {
174 s.hash(state);
175 f1.to_bits().hash(state);
176 f2.to_bits().hash(state);
177 }
178 Self::PnE(f1, f2) | Self::GnE(f1, f2) => {
179 f1.to_bits().hash(state);
180 f2.to_bits().hash(state);
181 }
182 Self::PnL(vec) => {
183 for v in vec {
184 v.hash(state);
185 }
186 }
187 Self::RnW(vec) => {
188 for f in vec {
189 f.to_bits().hash(state);
190 }
191 }
192 Self::SPILLOVER {
193 n_parameters,
194 parameter_names,
195 matrix_values,
196 } => {
197 n_parameters.hash(state);
198 parameter_names.hash(state);
199 for f in matrix_values {
200 f.to_bits().hash(state);
201 }
202 }
203 }
204 }
205}
206
207#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
208pub enum IntegerKeyword {
209 /// The offset to the beginning of the DATA segment (FCS 1.0+)
210 BeginData(usize),
211 /// The offset to the end of the DATA segment (FCS 1.0+)
212 EndData(usize),
213 /// The offset to the beginning of the ANALYSIS segment (FCS 2.0+)
214 BeginAnalysis(usize),
215 /// The offset to the end of the ANALYSIS segment (FCS 2.0+)
216 EndAnalysis(usize),
217 /// The offset to the beginning of the TEXT segment (FCS 1.0+)
218 BeginText(usize),
219 /// The offset to the end of the TEXT segment (FCS 1.0+)
220 EndText(usize),
221 /// The number of parameters in the dataset (FCS 1.0+)
222 PAR(usize),
223 /// The number of events in the dataset (FCS 1.0+)
224 TOT(usize),
225 /// Range for parameter `n` (FCS 1.0+)
226 PnR(usize),
227 /// Number of bits reserved for parameter `n` (FCS 1.0+)
228 PnB(usize),
229 /// Voltage range for parameter `n` (FCS 1.0+)
230 PnV(usize),
231 /// Excitation wavelength for parameter `n` (FCS 1.0+)
232 PnL(usize),
233 /// The transformation to apply when displaying the data (FCS 1.0+)
234 PnDisplay(usize),
235}
236
237#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
238pub enum FloatKeyword {
239 /// Gain for parameter n
240 PnG(f32),
241}
242
243impl Eq for FloatKeyword {}
244impl Hash for FloatKeyword {
245 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
246 match self {
247 FloatKeyword::PnG(f) => f.to_bits().hash(state),
248 }
249 }
250}
251
252#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
253pub enum StringKeyword {
254 /// The name of the cytometer used to acquire the data (FCS 1.0+)
255 CYT(Arc<str>),
256 /// The name of the file containing the dataset (FCS 1.0+)
257 FIL(Arc<str>),
258 /// The globally unique identifier for the dataset (FCS 2.0+)
259 GUID(Arc<str>),
260
261 /// Begin date and time of data acquisition (FCS 3.2+)
262 BEGINDATETIME(Arc<str>),
263 /// End date and time of data acquisition (FCS 3.2+)
264 ENDDATETIME(Arc<str>),
265
266 /// Generic sample carrier identifier (FCS 3.2+, replaces $PLATEID)
267 CARRIERID(Arc<str>),
268 /// Type of sample carrier (FCS 3.2+, replaces $PLATENAME)
269 CARRIERTYPE(Arc<str>),
270 /// Location identifier within carrier (FCS 3.2+, replaces $WELLID)
271 LOCATIONID(Arc<str>),
272
273 /// 'Short name' for parameter `n` (FCS 1.0+)
274 PnN(Arc<str>),
275 /// Label name for parameter `n` (FCS 1.0+)
276 PnS(Arc<str>),
277 /// Name of the optical filter for parameter `n` (FCS 1.0+)
278 PnF(Arc<str>),
279 /// The FCS measurement signal types and evaluation features (e.g., area, height, or width) (FCS 1.0+)
280 PnType(Arc<str>),
281 /// Display scale for parameter `n` - typically "LOG" for logarithmic or "LIN" for linear (FCS 1.0+)
282 /// Note: Some FCS files use this as a string, others as numeric. We store as string for flexibility.
283 PnDISPLAY(Arc<str>),
284
285 /// Detector name for parameter `n` (FCS 3.2+)
286 PnDET(Arc<str>),
287 /// Dye specification for parameter `n` (FCS 3.2+)
288 PnTAG(Arc<str>),
289 /// Target molecule or process for parameter `n` (FCS 3.2+)
290 PnANALYTE(Arc<str>),
291 /// Evaluation features for parameter `n` (FCS 3.2+)
292 PnFEATURE(Arc<str>),
293
294 /// Acquisition flow rate setting (FCS 3.2+)
295 FLOWRATE(Arc<str>),
296
297 /// Sample volume (FCS 3.1+)
298 VOL(Arc<str>),
299
300 /// Distinguish between original and altered data set (FCS 3.1+)
301 ORIGINALITY(Arc<str>),
302 /// Who last modified the data set (FCS 3.1+)
303 LastModifier(Arc<str>),
304 /// When the data set was last modified (FCS 3.1+)
305 LastModified(Arc<str>),
306
307 /// Date of data acquisition
308 ///
309 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $BEGINDATETIME)</small>
310 DATE(Arc<str>),
311
312 /// Begin time of data acquisition
313 ///
314 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $BEGINDATETIME)</small>
315 BTIM(Arc<str>),
316
317 /// End time of data acquisition
318 ///
319 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $ENDDATETIME)</small>
320 ETIM(Arc<str>),
321
322 /// Data acquisition mode
323 ///
324 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2, fixed to "L" list mode)</small>
325 MODE(Arc<str>),
326
327 /// Plate identifier
328 ///
329 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $CARRIERID)</small>
330 PLATEID(Arc<str>),
331
332 /// Platform/plate name
333 ///
334 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $CARRIERTYPE)</small>
335 PLATENAME(Arc<str>),
336
337 /// Well identifier
338 ///
339 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $LOCATIONID)</small>
340 WELLID(Arc<str>),
341
342 /// Gate definition
343 ///
344 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
345 GATE(Arc<str>),
346
347 /// Gate n optical filter
348 ///
349 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
350 GnF(Arc<str>),
351
352 /// Gate n short name
353 ///
354 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
355 GnN(Arc<str>),
356
357 /// Gate n population name
358 ///
359 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
360 GnP(Arc<str>),
361
362 /// Gate n range
363 ///
364 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
365 GnR(Arc<str>),
366
367 /// Gate n label name
368 ///
369 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
370 GnS(Arc<str>),
371
372 /// Gate n threshold
373 ///
374 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
375 GnT(Arc<str>),
376
377 /// Gate n voltage range
378 ///
379 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
380 GnV(Arc<str>),
381
382 /// A catch-all for other keywords, to be stored as Arc<str>
383 Other(Arc<str>),
384}
385
386// Keywords regarding the data-layout, lacking any associated values
387#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
388pub enum ByteKeyword {
389 /// The byte order (endianness) of the data
390 BYTEORD(ByteOrder),
391 /// The data type of the FCS file (integer, float, double, ascii)
392 DATATYPE(FcsDataType),
393 /// Data type for parameter `n` (FCS 3.2+), overriding the default $DATATYPE for a given parameter
394 PnDATATYPE(FcsDataType),
395}
396
397pub trait StringableKeyword {
398 fn get_str(&self) -> Cow<'_, str>;
399}
400pub trait IntegerableKeyword {
401 fn get_usize(&self) -> &usize;
402}
403#[allow(unused)]
404pub trait FloatableKeyword {
405 fn get_f32(&self) -> &f32;
406}
407
408impl IntegerableKeyword for IntegerKeyword {
409 fn get_usize(&self) -> &usize {
410 match self {
411 Self::TOT(value)
412 | Self::BeginData(value)
413 | Self::EndData(value)
414 | Self::BeginAnalysis(value)
415 | Self::EndAnalysis(value)
416 | Self::BeginText(value)
417 | Self::EndText(value)
418 | Self::PnR(value)
419 | Self::PnB(value)
420 | Self::PnV(value)
421 | Self::PnL(value)
422 | Self::PnDisplay(value)
423 | Self::PAR(value) => value,
424 }
425 }
426}
427
428impl FloatableKeyword for FloatKeyword {
429 fn get_f32(&self) -> &f32 {
430 match self {
431 Self::PnG(value) => value,
432 }
433 }
434}
435
436impl StringableKeyword for StringKeyword {
437 /// Get a reference to the string value (if it exists) from a StringKeyword variant
438 fn get_str(&self) -> Cow<'_, str> {
439 match self {
440 Self::CYT(value)
441 | Self::FIL(value)
442 | Self::GUID(value)
443 | Self::BEGINDATETIME(value)
444 | Self::ENDDATETIME(value)
445 | Self::CARRIERID(value)
446 | Self::CARRIERTYPE(value)
447 | Self::LOCATIONID(value)
448 | Self::PnN(value)
449 | Self::PnS(value)
450 | Self::PnF(value)
451 | Self::PnType(value)
452 | Self::PnDISPLAY(value)
453 | Self::PnDET(value)
454 | Self::PnTAG(value)
455 | Self::PnANALYTE(value)
456 | Self::PnFEATURE(value)
457 | Self::FLOWRATE(value)
458 | Self::VOL(value)
459 | Self::ORIGINALITY(value)
460 | Self::LastModifier(value)
461 | Self::LastModified(value)
462 | Self::DATE(value)
463 | Self::BTIM(value)
464 | Self::ETIM(value)
465 | Self::MODE(value)
466 | Self::PLATEID(value)
467 | Self::PLATENAME(value)
468 | Self::WELLID(value)
469 | Self::GATE(value)
470 | Self::GnF(value)
471 | Self::GnN(value)
472 | Self::GnP(value)
473 | Self::GnR(value)
474 | Self::GnS(value)
475 | Self::GnT(value)
476 | Self::GnV(value)
477 | Self::Other(value) => Cow::Borrowed(value.as_ref()),
478 }
479 }
480}
481
482impl StringableKeyword for ByteKeyword {
483 /// Get a reference to the string value (if it exists) from a ByteKeyword variant
484 fn get_str(&self) -> Cow<'_, str> {
485 match self {
486 Self::DATATYPE(data_type) | Self::PnDATATYPE(data_type) => {
487 Cow::Borrowed(data_type.to_keyword_str())
488 }
489 Self::BYTEORD(byte_order) => Cow::Borrowed(byte_order.to_keyword_str()),
490 }
491 }
492}
493
494impl StringableKeyword for IntegerKeyword {
495 fn get_str(&self) -> Cow<'_, str> {
496 match self {
497 Self::BeginData(value)
498 | Self::EndData(value)
499 | Self::BeginAnalysis(value)
500 | Self::EndAnalysis(value)
501 | Self::BeginText(value)
502 | Self::EndText(value)
503 | Self::PAR(value)
504 | Self::TOT(value)
505 | Self::PnR(value)
506 | Self::PnB(value)
507 | Self::PnV(value)
508 | Self::PnL(value)
509 | Self::PnDisplay(value) => Cow::Owned(value.to_string()),
510 }
511 }
512}
513
514impl StringableKeyword for FloatKeyword {
515 fn get_str(&self) -> Cow<'_, str> {
516 match self {
517 Self::PnG(value) => Cow::Owned(value.to_string()),
518 }
519 }
520}
521
522/// Main parsing entry point for FCS keywords
523///
524/// Dispatches to appropriate parsing functions based on keyword name pattern.
525/// Attempts to match the keyword against known patterns (fixed keywords, parameter keywords,
526/// gate keywords, region keywords) and parse the value accordingly.
527///
528/// # Arguments
529/// * `key` - The keyword name (with or without `$` prefix)
530/// * `value` - The keyword value as a string
531///
532/// # Returns
533/// A `KeywordCreationResult` indicating the parsed type, or `UnableToParse` if no pattern matches
534///
535/// # Example
536/// ```ignore
537/// let result = match_and_parse_keyword("$PAR", "10");
538/// // Returns KeywordCreationResult::Int(IntegerKeyword::PAR(10))
539/// ```
540pub fn match_and_parse_keyword(key: &str, value: &str) -> KeywordCreationResult {
541 // Keywords without $ prefix should be treated as Other, not parsed
542 // Exception: GUID keyword doesn't always have $ prefix in some FCS files
543 let dollarless_key = if let Some(key) = key.strip_prefix('$') {
544 key
545 } else if key == "GUID" {
546 // GUID is a special case - it can appear without $ prefix
547 "GUID"
548 } else {
549 // No $ prefix - treat as unknown keyword
550 return KeywordCreationResult::String(StringKeyword::Other(Arc::from(value.trim())));
551 };
552
553 parse_fixed_keywords(dollarless_key, value)
554 .or_else(|| parse_parameter_keywords(dollarless_key, value))
555 .or_else(|| parse_gate_keywords(dollarless_key, value))
556 .or_else(|| parse_region_keywords(dollarless_key, value))
557 .unwrap_or_else(|| {
558 KeywordCreationResult::String(StringKeyword::Other(Arc::from(value.trim())))
559 })
560}
561
562impl From<&StringKeyword> for Arc<str> {
563 fn from(keyword: &StringKeyword) -> Self {
564 keyword.get_str().into()
565 }
566}
567
568// Extract the variant's value and convert it to a string
569impl From<&IntegerKeyword> for String {
570 fn from(keyword: &IntegerKeyword) -> Self {
571 keyword.get_usize().to_string()
572 }
573}