flow_fcs/keyword/mod.rs
1#![allow(deprecated)]
2mod helpers;
3mod parsing;
4#[cfg(test)]
5mod tests;
6use parsing::*;
7
8use crate::{byteorder::ByteOrder, datatype::FcsDataType};
9use serde::{Deserialize, Serialize};
10use std::{borrow::Cow, hash::Hash, sync::Arc};
11use strum_macros::Display;
12
13/// Result of parsing a keyword-value pair from the FCS TEXT segment
14///
15/// This enum represents the possible types a keyword can be parsed as.
16/// The parsing logic attempts to match the keyword name and value format
17/// to determine the appropriate type.
18#[derive(Debug)]
19pub enum KeywordCreationResult {
20 /// Successfully parsed as an integer keyword (e.g., `$PAR`, `$TOT`)
21 Int(IntegerKeyword),
22 /// Successfully parsed as a float keyword (e.g., `$PnG`)
23 Float(FloatKeyword),
24 /// Successfully parsed as a string keyword (e.g., `$CYT`, `$FIL`, `$GUID`)
25 String(StringKeyword),
26 /// Successfully parsed as a byte-oriented keyword (e.g., `$BYTEORD`, `$DATATYPE`)
27 Byte(ByteKeyword),
28 /// Successfully parsed as a mixed-type keyword (e.g., `$SPILLOVER`, `$PnD`, `$PnE`)
29 Mixed(MixedKeyword),
30 /// Unable to parse the keyword-value pair (fallback to generic string storage)
31 UnableToParse,
32}
33
34#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, Display)]
35pub enum Keyword {
36 Int(IntegerKeyword),
37 Float(FloatKeyword),
38 String(StringKeyword),
39 Byte(ByteKeyword),
40 Mixed(MixedKeyword),
41}
42
43type LowerBound = f32;
44type UpperBound = f32;
45
46#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
47#[allow(deprecated)]
48pub enum MixedKeyword {
49 /// Specifies the conversion of arbitrary signal units, recorded as parameter values (uncompensated or compensated)
50 /// to some well defined unit. For example, mean equivalent soluble fluorochrome (MESF) or antibody molecules.
51 /// * f1 - the number of calibrated units corresponding to a unit signal value of parameter n
52 ///
53 /// * str - name of the units corresponding to calibration value
54 ///
55 /// **Example:** If the signal on parameter n has the scale value X then the calibrated value is X * f units
56 PnCalibration(f32, String),
57 /// Recommends visualization scale for parameter `n`.
58 /// * String is either "Linear" or "Logarithmic".
59 /// * f1 and f2 parameter values are in "scale" units, not "channel" units, see below for details.
60 /// * For linear scaling:
61 /// - f1: Lower bound - the scale value corresponding to the left edge of the display
62 /// - f2: Upper bound - the scale value corresponding to the right edge of the display
63 /// * *For logarithmic scaling:*
64 /// - f1: Decades - The number of decades to display.
65 /// - f2: Offset - The scale value corresponding to the left edge of the display
66 ///
67 /// **Example**: `$P3D (Linear,0,1024)`
68 /// - Specifies a linear display range with scale parameter values ranging from 0 to 1024.
69 ///
70 /// **Example**: `$P2D (Logarithmic,4,0.1)`
71 /// - Specifies a logarithmic display ranging from 0.1 to 1000 (scale value), which is *4* decades of display width.
72 ///
73 /// **Example**: `$P1D (Logarithmic,5,0.01)`
74 /// - Specifies a logarithmic display ranging from 0.01 to 1000 (scale value), which is 5 decades of display width.
75 ///
76 /// **Example**: `$P3B (8) | $P3R (256) | $P3G (4) | $P3E (0,0) | $P3D (Linear,0,32)`:
77 /// - This is a linear parameter with channel values going from 0 to 255 (`$P3R`). Taking account the gain (`$PnG`),
78 /// the *scale* values go from 0 to 64 (256/4 = 64). The $P3D specifies a linear display from 0 to 32
79 /// scale units, which only encompasses the bottom half of the collected data range on this scale.
80 ///
81 /// **Example**: `$P4B (16) | $P4R (1024) | $P4E (4,1) | $P4D (Linear,0,1000)`
82 /// - Specifies a linear display, with channel values going from 0 to 1023 (`$P4R`).
83 /// Only the bottom 10th of the scale values shown.
84 /// This will restrict the display to channel values between 0 and 768 (the bottom 3 decades),
85 /// with channels being distributed exponentially in the linear display.
86 ///
87 /// **Example**: `$P4B (16) | $P4R (1024) | $P4E (4,1) | $P4D (Logarithmic,3,1)`:
88 /// - The display keyword specifies that the data should be shown in logarithmic scaling, with only the bottom 3 decades shown.
89 /// This will restrict the display to channel values between 0 and 768 (1024*3/4).
90 ///
91 PnD(String, LowerBound, UpperBound),
92
93 /// (f1, f2) -Amplification type for parameter n. (FCS 1.0+)
94 /// * f1 - number of logarithmic decades
95 /// * f2 - linear value obtained for a signal with log value = 0
96 /// * 0,0 when the parameter is Linear.
97 /// * Also 0,0 when floating-point data (`$DATATYPE` = F or `$DATATYPE` = D) is stored.
98 /// **Example**: `$P3E (4,1)` - 4 decades with offset of 1
99 PnE(f32, f32),
100
101 /// Gate n amplification type.
102 ///
103 /// *<small>(FCS v2.0-3.1, deprecated)</small>*
104 #[deprecated(since = "3.1.0", note = "Use PnE instead")]
105 GnE(f32, f32),
106
107 /// Region n width values - vector of width values for region boundaries
108 /// **Example**: `$R1W (0.5,1.2,0.8)` - Three width values
109 RnW(Vec<f32>),
110
111 /// Spillover matrix for compensation
112 /// Format: n, [param_names...], [matrix_values...]
113 /// **Example**: `$SPILLOVER/3,FL2-A,FL1-A,FL3-A,1.0,0.03,0.2,0.1,1.0,0.0,0.05,0,1.0`
114 SPILLOVER {
115 n_parameters: usize,
116 parameter_names: Vec<String>,
117 matrix_values: Vec<f32>,
118 },
119
120 /// Excitation wavelength(s) for parameter n in nanometers (FCS 1.0+, format updated in FCS 3.1)
121 /// Can contain single or multiple wavelengths for co-axial lasers
122 /// **Example**: `$P3L (488)` - single wavelength
123 /// **Example**: `$P4L (488,532,633)` - multiple co-axial lasers
124 PnL(Vec<usize>),
125}
126
127impl StringableKeyword for MixedKeyword {
128 #[allow(deprecated)]
129 fn get_str(&self) -> Cow<'_, str> {
130 match self {
131 Self::PnCalibration(f1, s) => Cow::Owned(format!("PnCalibration({}, {})", f1, s)),
132 Self::PnD(s, f1, f2) => Cow::Owned(format!("PnD({}, {}, {})", s, f1, f2)),
133 Self::PnE(f1, f2) => Cow::Owned(format!("PnE({}, {})", f1, f2)),
134 Self::GnE(f1, f2) => Cow::Owned(format!("GnE({}, {})", f1, f2)),
135 Self::PnL(vec) => Cow::Owned(format!(
136 "PnL({})",
137 vec.iter()
138 .map(|v| v.to_string())
139 .collect::<Vec<_>>()
140 .join(", ")
141 )),
142 Self::RnW(vec) => Cow::Owned(format!(
143 "RnW({})",
144 vec.iter()
145 .map(|v| v.to_string())
146 .collect::<Vec<_>>()
147 .join(", ")
148 )),
149 Self::SPILLOVER {
150 n_parameters,
151 parameter_names,
152 matrix_values,
153 } => Cow::Owned(format!(
154 "SPILLOVER({}, {}, {})",
155 n_parameters,
156 parameter_names.join(", "),
157 matrix_values
158 .iter()
159 .map(|v| v.to_string())
160 .collect::<Vec<_>>()
161 .join(", ")
162 )),
163 }
164 }
165}
166
167impl Eq for MixedKeyword {}
168impl Hash for MixedKeyword {
169 #[allow(deprecated)]
170 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
171 match self {
172 Self::PnCalibration(f1, s) => {
173 f1.to_bits().hash(state);
174 s.hash(state);
175 }
176 Self::PnD(s, f1, f2) => {
177 s.hash(state);
178 f1.to_bits().hash(state);
179 f2.to_bits().hash(state);
180 }
181 Self::PnE(f1, f2) | Self::GnE(f1, f2) => {
182 f1.to_bits().hash(state);
183 f2.to_bits().hash(state);
184 }
185 Self::PnL(vec) => {
186 for v in vec {
187 v.hash(state);
188 }
189 }
190 Self::RnW(vec) => {
191 for f in vec {
192 f.to_bits().hash(state);
193 }
194 }
195 Self::SPILLOVER {
196 n_parameters,
197 parameter_names,
198 matrix_values,
199 } => {
200 n_parameters.hash(state);
201 parameter_names.hash(state);
202 for f in matrix_values {
203 f.to_bits().hash(state);
204 }
205 }
206 }
207 }
208}
209
210#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
211pub enum IntegerKeyword {
212 /// The offset to the beginning of the DATA segment (FCS 1.0+)
213 BeginData(usize),
214 /// The offset to the end of the DATA segment (FCS 1.0+)
215 EndData(usize),
216 /// The offset to the beginning of the ANALYSIS segment (FCS 2.0+)
217 BeginAnalysis(usize),
218 /// The offset to the end of the ANALYSIS segment (FCS 2.0+)
219 EndAnalysis(usize),
220 /// The offset to the beginning of the TEXT segment (FCS 1.0+)
221 BeginText(usize),
222 /// The offset to the end of the TEXT segment (FCS 1.0+)
223 EndText(usize),
224 /// The number of parameters in the dataset (FCS 1.0+)
225 PAR(usize),
226 /// The number of events in the dataset (FCS 1.0+)
227 TOT(usize),
228 /// Range for parameter `n` (FCS 1.0+)
229 PnR(usize),
230 /// Number of bits reserved for parameter `n` (FCS 1.0+)
231 PnB(usize),
232 /// Voltage range for parameter `n` (FCS 1.0+)
233 PnV(usize),
234 /// Excitation wavelength for parameter `n` (FCS 1.0+)
235 PnL(usize),
236 /// The transformation to apply when displaying the data (FCS 1.0+)
237 PnDisplay(usize),
238 /// Data type for parameter `n` (FCS 3.2+), overriding the default $DATATYPE for a given parameter
239 PnDATATYPE(usize),
240}
241
242#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq)]
243pub enum FloatKeyword {
244 /// Gain for parameter n
245 PnG(f32),
246}
247
248impl Eq for FloatKeyword {}
249impl Hash for FloatKeyword {
250 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
251 match self {
252 FloatKeyword::PnG(f) => f.to_bits().hash(state),
253 }
254 }
255}
256
257#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
258#[allow(deprecated)]
259pub enum StringKeyword {
260 /// The name of the cytometer used to acquire the data (FCS 1.0+)
261 CYT(Arc<str>),
262 /// The name of the file containing the dataset (FCS 1.0+)
263 FIL(Arc<str>),
264 /// The globally unique identifier for the dataset (FCS 2.0+)
265 GUID(Arc<str>),
266
267 /// Begin date and time of data acquisition (FCS 3.2+)
268 BEGINDATETIME(Arc<str>),
269 /// End date and time of data acquisition (FCS 3.2+)
270 ENDDATETIME(Arc<str>),
271
272 /// Generic sample carrier identifier (FCS 3.2+, replaces $PLATEID)
273 CARRIERID(Arc<str>),
274 /// Type of sample carrier (FCS 3.2+, replaces $PLATENAME)
275 CARRIERTYPE(Arc<str>),
276 /// Location identifier within carrier (FCS 3.2+, replaces $WELLID)
277 LOCATIONID(Arc<str>),
278
279 /// 'Short name' for parameter `n` (FCS 1.0+)
280 PnN(Arc<str>),
281 /// Label name for parameter `n` (FCS 1.0+)
282 PnS(Arc<str>),
283 /// Name of the optical filter for parameter `n` (FCS 1.0+)
284 PnF(Arc<str>),
285 /// The FCS measurement signal types and evaluation features (e.g., area, height, or width) (FCS 1.0+)
286 PnType(Arc<str>),
287
288 /// Detector name for parameter `n` (FCS 3.2+)
289 PnDET(Arc<str>),
290 /// Dye specification for parameter `n` (FCS 3.2+)
291 PnTAG(Arc<str>),
292 /// Target molecule or process for parameter `n` (FCS 3.2+)
293 PnANALYTE(Arc<str>),
294 /// Evaluation features for parameter `n` (FCS 3.2+)
295 PnFEATURE(Arc<str>),
296
297 /// Acquisition flow rate setting (FCS 3.2+)
298 FLOWRATE(Arc<str>),
299
300 /// Sample volume (FCS 3.1+)
301 VOL(Arc<str>),
302
303 /// Distinguish between original and altered data set (FCS 3.1+)
304 ORIGINALITY(Arc<str>),
305 /// Who last modified the data set (FCS 3.1+)
306 LastModifier(Arc<str>),
307 /// When the data set was last modified (FCS 3.1+)
308 LastModified(Arc<str>),
309
310 /// Date of data acquisition
311 ///
312 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $BEGINDATETIME)</small>
313 #[deprecated(since = "3.2.0", note = "Use BEGINDATETIME instead")]
314 DATE(Arc<str>),
315
316 /// Begin time of data acquisition
317 ///
318 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $BEGINDATETIME)</small>
319 #[deprecated(since = "3.2.0", note = "Use BEGINDATETIME instead")]
320 BTIM(Arc<str>),
321
322 /// End time of data acquisition
323 ///
324 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $ENDDATETIME)</small>
325 #[deprecated(since = "3.2.0", note = "Use ENDDATETIME instead")]
326 ETIM(Arc<str>),
327
328 /// Data acquisition mode
329 ///
330 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2, fixed to "L" list mode)</small>
331 #[deprecated(since = "3.2.0", note = "Fixed to 'L' list mode in FCS 3.2")]
332 MODE(Arc<str>),
333
334 /// Plate identifier
335 ///
336 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $CARRIERID)</small>
337 #[deprecated(since = "3.2.0", note = "Use CARRIERID instead")]
338 PLATEID(Arc<str>),
339
340 /// Platform/plate name
341 ///
342 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $CARRIERTYPE)</small>
343 #[deprecated(since = "3.2.0", note = "Use CARRIERTYPE instead")]
344 PLATENAME(Arc<str>),
345
346 /// Well identifier
347 ///
348 /// <small>(FCS 2.0-3.1, deprecated in FCS 3.2 in favor of $LOCATIONID)</small>
349 #[deprecated(since = "3.2.0", note = "Use LOCATIONID instead")]
350 WELLID(Arc<str>),
351
352 /// Gate definition
353 ///
354 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
355 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
356 GATE(Arc<str>),
357
358 /// Gate n optical filter
359 ///
360 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
361 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
362 GnF(Arc<str>),
363
364 /// Gate n short name
365 ///
366 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
367 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
368 GnN(Arc<str>),
369
370 /// Gate n population name
371 ///
372 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
373 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
374 GnP(Arc<str>),
375
376 /// Gate n range
377 ///
378 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
379 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
380 GnR(Arc<str>),
381
382 /// Gate n label name
383 ///
384 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
385 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
386 GnS(Arc<str>),
387
388 /// Gate n threshold
389 ///
390 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
391 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
392 GnT(Arc<str>),
393
394 /// Gate n voltage range
395 ///
396 /// <small>(FCS 2.0-3.1, removed in FCS 3.2)</small>
397 #[deprecated(since = "3.2.0", note = "Gate definitions deprecated")]
398 GnV(Arc<str>),
399
400 /// A catch-all for other keywords, to be stored as Arc<str>
401 Other(Arc<str>),
402}
403
404// Keywords regarding the data-layout, lacking any associated values
405#[derive(Clone, Debug, Display, Serialize, Deserialize, PartialEq, Eq, Hash)]
406pub enum ByteKeyword {
407 /// The byte order (endianness) of the data
408 BYTEORD(ByteOrder),
409 /// The data type of the FCS file (number of bytes per event)
410 DATATYPE(FcsDataType),
411}
412
413pub trait StringableKeyword {
414 fn get_str(&self) -> Cow<'_, str>;
415}
416pub trait IntegerableKeyword {
417 fn get_usize(&self) -> &usize;
418}
419#[allow(unused)]
420pub trait FloatableKeyword {
421 fn get_f32(&self) -> &f32;
422}
423
424impl IntegerableKeyword for IntegerKeyword {
425 fn get_usize(&self) -> &usize {
426 match self {
427 Self::TOT(value)
428 | Self::BeginData(value)
429 | Self::EndData(value)
430 | Self::BeginAnalysis(value)
431 | Self::EndAnalysis(value)
432 | Self::BeginText(value)
433 | Self::EndText(value)
434 | Self::PnR(value)
435 | Self::PnB(value)
436 | Self::PnV(value)
437 | Self::PnL(value)
438 | Self::PnDisplay(value)
439 | Self::PnDATATYPE(value)
440 | Self::PAR(value) => value,
441 }
442 }
443}
444
445impl FloatableKeyword for FloatKeyword {
446 fn get_f32(&self) -> &f32 {
447 match self {
448 Self::PnG(value) => value,
449 }
450 }
451}
452
453impl StringableKeyword for StringKeyword {
454 /// Get a reference to the string value (if it exists) from a StringKeyword variant
455 #[allow(deprecated)]
456 fn get_str(&self) -> Cow<'_, str> {
457 match self {
458 Self::CYT(value)
459 | Self::FIL(value)
460 | Self::GUID(value)
461 | Self::BEGINDATETIME(value)
462 | Self::ENDDATETIME(value)
463 | Self::CARRIERID(value)
464 | Self::CARRIERTYPE(value)
465 | Self::LOCATIONID(value)
466 | Self::PnN(value)
467 | Self::PnS(value)
468 | Self::PnF(value)
469 | Self::PnType(value)
470 | Self::PnDET(value)
471 | Self::PnTAG(value)
472 | Self::PnANALYTE(value)
473 | Self::PnFEATURE(value)
474 | Self::FLOWRATE(value)
475 | Self::VOL(value)
476 | Self::ORIGINALITY(value)
477 | Self::LastModifier(value)
478 | Self::LastModified(value)
479 | Self::DATE(value)
480 | Self::BTIM(value)
481 | Self::ETIM(value)
482 | Self::MODE(value)
483 | Self::PLATEID(value)
484 | Self::PLATENAME(value)
485 | Self::WELLID(value)
486 | Self::GATE(value)
487 | Self::GnF(value)
488 | Self::GnN(value)
489 | Self::GnP(value)
490 | Self::GnR(value)
491 | Self::GnS(value)
492 | Self::GnT(value)
493 | Self::GnV(value)
494 | Self::Other(value) => Cow::Borrowed(value.as_ref()),
495 }
496 }
497}
498
499impl StringableKeyword for ByteKeyword {
500 /// Get a reference to the string value (if it exists) from a ByteKeyword variant
501 fn get_str(&self) -> Cow<'_, str> {
502 match self {
503 Self::DATATYPE(data_type) => Cow::Borrowed(data_type.to_keyword_str()),
504 Self::BYTEORD(byte_order) => Cow::Borrowed(byte_order.to_keyword_str()),
505 }
506 }
507}
508
509impl StringableKeyword for IntegerKeyword {
510 fn get_str(&self) -> Cow<'_, str> {
511 match self {
512 Self::BeginData(value)
513 | Self::EndData(value)
514 | Self::BeginAnalysis(value)
515 | Self::EndAnalysis(value)
516 | Self::BeginText(value)
517 | Self::EndText(value)
518 | Self::PAR(value)
519 | Self::TOT(value)
520 | Self::PnR(value)
521 | Self::PnB(value)
522 | Self::PnV(value)
523 | Self::PnL(value)
524 | Self::PnDATATYPE(value)
525 | Self::PnDisplay(value) => Cow::Owned(value.to_string()),
526 }
527 }
528}
529
530impl StringableKeyword for FloatKeyword {
531 fn get_str(&self) -> Cow<'_, str> {
532 match self {
533 Self::PnG(value) => Cow::Owned(value.to_string()),
534 }
535 }
536}
537
538/// Main parsing entry point for FCS keywords
539///
540/// Dispatches to appropriate parsing functions based on keyword name pattern.
541/// Attempts to match the keyword against known patterns (fixed keywords, parameter keywords,
542/// gate keywords, region keywords) and parse the value accordingly.
543///
544/// # Arguments
545/// * `key` - The keyword name (with or without `$` prefix)
546/// * `value` - The keyword value as a string
547///
548/// # Returns
549/// A `KeywordCreationResult` indicating the parsed type, or `UnableToParse` if no pattern matches
550///
551/// # Example
552/// ```ignore
553/// let result = match_and_parse_keyword("$PAR", "10");
554/// // Returns KeywordCreationResult::Int(IntegerKeyword::PAR(10))
555/// ```
556pub fn match_and_parse_keyword(key: &str, value: &str) -> KeywordCreationResult {
557 // Keywords without $ prefix should be treated as Other, not parsed
558 // Exception: GUID keyword doesn't always have $ prefix in some FCS files
559 let dollarless_key = if let Some(key) = key.strip_prefix('$') {
560 key
561 } else if key == "GUID" {
562 // GUID is a special case - it can appear without $ prefix
563 "GUID"
564 } else {
565 // No $ prefix - treat as unknown keyword
566 return KeywordCreationResult::String(StringKeyword::Other(Arc::from(value.trim())));
567 };
568
569 parse_fixed_keywords(dollarless_key, value)
570 .or_else(|| parse_parameter_keywords(dollarless_key, value))
571 .or_else(|| parse_gate_keywords(dollarless_key, value))
572 .or_else(|| parse_region_keywords(dollarless_key, value))
573 .unwrap_or_else(|| {
574 KeywordCreationResult::String(StringKeyword::Other(Arc::from(value.trim())))
575 })
576}
577
578impl From<&StringKeyword> for Arc<str> {
579 fn from(keyword: &StringKeyword) -> Self {
580 keyword.get_str().into()
581 }
582}
583
584// Extract the variant's value and convert it to a string
585impl From<&IntegerKeyword> for String {
586 fn from(keyword: &IntegerKeyword) -> Self {
587 keyword.get_usize().to_string()
588 }
589}