gfa/
optfields.rs

1use bstr::ByteSlice;
2
3use lazy_static::lazy_static;
4use regex::bytes::Regex;
5
6/// These type aliases are useful for configuring the parsers, as the
7/// type of the optional field container must be given when creating a
8/// GFAParser or GFA object.
9pub type OptionalFields = Vec<OptField>;
10pub type NoOptionalFields = ();
11
12/// An optional field a la SAM. Identified by its tag, which is any
13/// two characters matching [A-Za-z][A-Za-z0-9].
14#[derive(Debug, Clone, PartialEq, PartialOrd)]
15pub struct OptField {
16    pub tag: [u8; 2],
17    pub value: OptFieldVal,
18}
19
20/// enum for representing each of the SAM optional field types. The
21/// `B` type, which denotes either an integer or float array, is split
22/// in two variants, and they ignore the size modifiers in the spec,
23/// instead always holding i64 or f32.
24#[derive(Debug, Clone, PartialEq, PartialOrd)]
25pub enum OptFieldVal {
26    A(u8),
27    Int(i64),
28    Float(f32),
29    Z(Vec<u8>),
30    J(Vec<u8>),
31    H(Vec<u32>),
32    BInt(Vec<i64>),
33    BFloat(Vec<f32>),
34}
35
36impl OptField {
37    /// Panics if the provided tag doesn't match the regex
38    /// [A-Za-z][A-Za-z0-9].
39    pub fn tag(t: &[u8]) -> [u8; 2] {
40        assert_eq!(t.len(), 2);
41        assert!(t[0].is_ascii_alphabetic());
42        assert!(t[1].is_ascii_alphanumeric());
43        [t[0], t[1]]
44    }
45
46    /// Create a new OptField from a tag name and a value, panicking
47    /// if the provided tag doesn't fulfill the requirements of
48    /// OptField::tag().
49    pub fn new(tag: &[u8], value: OptFieldVal) -> Self {
50        let tag = OptField::tag(tag);
51        OptField { tag, value }
52    }
53
54    /// Parses an optional field from a bytestring in the format
55    /// <TAG>:<TYPE>:<VALUE>
56    pub fn parse(input: &[u8]) -> Option<Self> {
57        lazy_static! {
58            static ref RE_TAG: Regex =
59                Regex::new(r"(?-u)[A-Za-z][A-Za-z0-9]").unwrap();
60            static ref RE_CHAR: Regex = Regex::new(r"(?-u)[!-~]").unwrap();
61            static ref RE_INT: Regex = Regex::new(r"(?-u)[-+]?[0-9]+").unwrap();
62            static ref RE_FLOAT: Regex =
63                Regex::new(r"(?-u)[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?")
64                    .unwrap();
65            static ref RE_STRING: Regex = Regex::new(r"(?-u)[ !-~]+").unwrap();
66            static ref RE_BYTES: Regex = Regex::new(r"(?-u)[0-9A-F]+").unwrap();
67        }
68
69        use OptFieldVal::*;
70
71        let o_tag = input.get(0..=1)?;
72
73        let o_type = input.get(3)?;
74
75        let o_contents = input.get(5..)?;
76
77        let o_val = match o_type {
78            // char
79            b'A' => RE_CHAR.find(o_contents).map(|s| s.as_bytes()[0]).map(A),
80            // int
81            b'i' => RE_INT
82                .find(o_contents)
83                .and_then(|s| s.as_bytes().to_str().ok())
84                .and_then(|s| s.parse().ok())
85                .map(Int),
86            // float
87            b'f' => RE_FLOAT
88                .find(o_contents)
89                .and_then(|s| s.as_bytes().to_str().ok())
90                .and_then(|s| s.parse().ok())
91                .map(Float),
92            // string
93            b'Z' => RE_STRING
94                .find(o_contents)
95                .map(|s| s.as_bytes().into())
96                .map(Z),
97            // JSON string
98            b'J' => RE_STRING
99                .find(o_contents)
100                .map(|s| s.as_bytes().into())
101                .map(J),
102            // bytearray
103            b'H' => RE_BYTES
104                .find(o_contents)
105                .and_then(|s| s.as_bytes().to_str().ok())
106                .map(|s| s.chars().filter_map(|c| c.to_digit(16)))
107                .map(|s| H(s.collect())),
108            // float or int array
109            b'B' => {
110                let first = o_contents[0];
111                let rest = o_contents[1..]
112                    .split_str(b",")
113                    .filter_map(|s| s.as_bytes().to_str().ok());
114                if first == b'f' {
115                    Some(BFloat(rest.filter_map(|s| s.parse().ok()).collect()))
116                } else {
117                    Some(BInt(rest.filter_map(|s| s.parse().ok()).collect()))
118                }
119            }
120            _ => None,
121        }?;
122
123        Some(Self::new(o_tag, o_val))
124    }
125}
126
127macro_rules! get_variant {
128    ($from:ident, ref $var:path) => {
129        if let $var(x) = &$from.value {
130            Some(&x)
131        } else {
132            None
133        }
134    };
135    ($from:ident, copy $var:path) => {
136        if let $var(x) = $from.value {
137            Some(x)
138        } else {
139            None
140        }
141    };
142}
143
144// Generate a function with name `$fn` for getting contents of variant
145// `$var`, returning an Option containing `$out` or `&$out`. `$op` can
146// be `ref` or `copy`: if it's `ref`, a reference to the value is
147// returned, if it's `copy`, the value is dereferenced and an owned
148// copy is returned.
149macro_rules! get_opt_field_val {
150    ($(#[$meta:meta])* $var:path, $op:tt $out:ty, $fn:ident) => {
151        $(#[$meta])*
152        pub fn $fn(&self) -> Option<$out> {
153            get_variant!(self, $op $var)
154        }
155    };
156}
157
158impl OptField {
159    get_opt_field_val!(
160        /// If this field contains a single character, return it.
161        OptFieldVal::A,      copy   u8,  get_char);
162
163    get_opt_field_val!(
164        /// If this field contains a single integer, return it.
165        OptFieldVal::Int,    copy  i64,  get_int);
166
167    get_opt_field_val!(
168        /// If this field contains a single float, return it.
169        OptFieldVal::Float,  copy  f32,  get_float);
170
171    get_opt_field_val!(
172        /// If this field contains a string, return a slice of it.
173        OptFieldVal::Z,      ref &[ u8], get_string);
174
175    get_opt_field_val!(
176        /// If this field contains a JSON string, return a slice of it.
177        OptFieldVal::J,      ref &[ u8], get_json);
178
179    get_opt_field_val!(
180        /// If this field contains a byte array, return a slice of it.
181        OptFieldVal::H,      ref &[u32], get_byte_array);
182
183    get_opt_field_val!(
184        /// If this field contains an array of integers, return a slice of it.
185        OptFieldVal::BInt,   ref &[i64], get_int_array);
186
187    get_opt_field_val!(
188        /// If this field contains an array of floats, return a slice of it.
189        OptFieldVal::BFloat, ref &[f32], get_float_array);
190}
191
192/// The Display implementation produces spec-compliant strings in the
193/// <TAG>:<TYPE>:<VALUE> format, and can be parsed back using
194/// OptField::parse().
195impl std::fmt::Display for OptField {
196    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
197        use OptFieldVal::*;
198
199        write!(f, "{}{}:", char::from(self.tag[0]), char::from(self.tag[1]))?;
200
201        match &self.value {
202            A(x) => write!(f, "A:{}", char::from(*x)),
203            Int(x) => write!(f, "i:{}", x),
204            Float(x) => write!(f, "f:{}", x),
205            Z(x) => write!(f, "Z:{}", x.as_bstr()),
206            J(x) => write!(f, "J:{}", x.as_bstr()),
207            H(x) => {
208                write!(f, "H:")?;
209                for a in x {
210                    write!(f, "{:x}", a)?
211                }
212                Ok(())
213            }
214            BInt(x) => {
215                write!(f, "B:I{}", x[0])?;
216                for a in x[1..].iter() {
217                    write!(f, ",{}", a)?
218                }
219                Ok(())
220            }
221            BFloat(x) => {
222                write!(f, "B:F{}", x[0])?;
223                for a in x[1..].iter() {
224                    write!(f, ",{}", a)?
225                }
226                Ok(())
227            }
228        }
229    }
230}
231
232/// The OptFields trait describes how to parse, store, and query
233/// optional fields. Each of the GFA line types and the GFA struct
234/// itself are generic over the optional fields, so the choice of
235/// OptFields implementor can impact memory usage, which optional
236/// fields are parsed, and possibly more in the future
237pub trait OptFields: Sized + Default + Clone {
238    /// Return the optional field with the given tag, if it exists.
239    fn get_field(&self, tag: &[u8]) -> Option<&OptField>;
240
241    /// Return a slice over all optional fields. NB: This may be
242    /// replaced by an iterator or something else in the future
243    fn fields(&self) -> &[OptField];
244
245    /// Given an iterator over bytestrings, each expected to hold one
246    /// optional field (in the <TAG>:<TYPE>:<VALUE> format), parse
247    /// them as optional fields to create a collection. Returns `Self`
248    /// rather than `Option<Self>` for now, but this may be changed to
249    /// become fallible in the future.
250    fn parse<T>(input: T) -> Self
251    where
252        T: IntoIterator,
253        T::Item: AsRef<[u8]>;
254}
255
256/// This implementation is useful for performance if we don't actually
257/// need any optional fields. () takes up zero space, and all
258/// methods are no-ops.
259impl OptFields for () {
260    fn get_field(&self, _: &[u8]) -> Option<&OptField> {
261        None
262    }
263
264    fn fields(&self) -> &[OptField] {
265        &[]
266    }
267
268    fn parse<T>(_input: T) -> Self
269    where
270        T: IntoIterator,
271        T::Item: AsRef<[u8]>,
272    {
273    }
274}
275
276/// Stores all the optional fields in a vector. `get_field` simply
277/// uses std::iter::Iterator::find(), but as there are only a
278/// relatively small number of optional fields in practice, it should
279/// be efficient enough.
280impl OptFields for Vec<OptField> {
281    fn get_field(&self, tag: &[u8]) -> Option<&OptField> {
282        self.iter().find(|o| o.tag == tag)
283    }
284
285    fn fields(&self) -> &[OptField] {
286        self.as_slice()
287    }
288
289    fn parse<T>(input: T) -> Self
290    where
291        T: IntoIterator,
292        T::Item: AsRef<[u8]>,
293    {
294        input
295            .into_iter()
296            .filter_map(|f| OptField::parse(f.as_ref()))
297            .collect()
298    }
299}