eightyseven/
reader.rs

1use std::io::BufReader;
2use std::path::Path;
3use std::{io, str::FromStr};
4
5use glam::Vec3;
6
7use crate::structure::{Atom, AtomName, AtomNum, BoxVecs, ResName, ResNum, Structure};
8
9/// The error type for reading `gro` files.
10///
11/// A distinction is made between _I/O_, _unexpected end-of_, and _bad-value_ errors.
12///
13/// A [`ParseGroError`] can always be cast to an [`io::Error`]. If the error is of the
14/// [`ParseGroError::IOError`] variant, the inner `io::Error` will be used directly. Otherwise, the
15/// error is converted to an instance of [`io::ErrorKind::Other`].
16///
17/// Similarly, any `io::Error` can be cast to a `ParseGroError`. This makes conversion between
18/// error types convenient and allows for using the try-operator (`?`) in both
19/// [`reader::Result`][`Result`] and [`io::Result`]-returning function.
20#[non_exhaustive]
21#[derive(Debug)]
22pub enum ParseGroError {
23    IOError(io::Error),
24
25    UnexpectedEOF(ExpectedItem),
26    UnexpectedEOL(ExpectedItem),
27
28    BadNAtoms(std::num::ParseIntError),
29    InsufficientAtoms { exp: usize, enc: usize },
30    BadResnum(std::num::ParseIntError),
31    BadAtomnum(std::num::ParseIntError),
32    BadPositionValue(std::num::ParseFloatError),
33    BadVelocityValue(std::num::ParseFloatError),
34    BadBoxVecsLength(usize),
35    BadBoxVecsValue(std::num::ParseFloatError),
36}
37
38#[non_exhaustive]
39#[derive(Debug, Clone, Copy)]
40/// The expected values for both the [`ParseGroError::UnexpectedEOF`] and
41/// [`ParseGroError::UnexpectedEOL`] variants.
42pub enum ExpectedItem {
43    // UnexpectedEOF.
44    Title,   // "title"
45    NAtoms,  // "number of atoms"
46    BoxVecs, // "box vectors"
47
48    // UnexpectedEOL.
49    Resnum,   // "resnum"
50    Resname,  // "resname"
51    Atomname, // "atomname"
52    Atomnum,  // "atomnum"
53    Position, // "position"
54    Velocity, // "velocity"
55}
56
57impl std::fmt::Display for ParseGroError {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        match self {
60            Self::IOError(err) => write!(f, "I/O error: {err:?}"),
61
62            Self::UnexpectedEOF(expected) => {
63                write!(f, "unexpected end of file: expected {expected}")
64            }
65            Self::UnexpectedEOL(expected) => {
66                write!(f, "unexpected end of line: while reading {expected}")
67            }
68
69            Self::BadNAtoms(err) => write!(f, "could not read the number of atoms: {err}"),
70            Self::InsufficientAtoms { exp, enc } => write!(
71                f,
72                "could not read the specified number of atoms: \
73                    expected {exp} but could only read {enc}"
74            ),
75            Self::BadResnum(err) => write!(f, "could not read the residue number: {err}"),
76            Self::BadAtomnum(err) => write!(f, "could not read the atom number: {err}"),
77            Self::BadPositionValue(err) => write!(f, "could not read position value: {err}"),
78            Self::BadVelocityValue(err) => write!(f, "could not read velocity value: {err}"),
79            Self::BadBoxVecsLength(n) => {
80                write!(
81                    f,
82                    "could not read box vectors: expected 3 or 9 values, found {n}"
83                )
84            }
85            Self::BadBoxVecsValue(err) => write!(f, "could not read box vectors value: {err}"),
86        }
87    }
88}
89
90impl std::error::Error for ParseGroError {}
91
92impl std::fmt::Display for ExpectedItem {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        f.write_str(match self {
95            Self::Title => "title",
96            Self::NAtoms => "number of atoms",
97            Self::BoxVecs => "box vectors",
98            Self::Resnum => "resnum",
99            Self::Resname => "resname",
100            Self::Atomname => "atomname",
101            Self::Atomnum => "atomnum",
102            Self::Position => "position",
103            Self::Velocity => "velocity",
104        })
105    }
106}
107
108impl From<ParseGroError> for io::Error {
109    fn from(err: ParseGroError) -> Self {
110        match err {
111            // Just give the inner io::Error.
112            ParseGroError::IOError(err) => err,
113            // Wrap them as an io::ErrorKind::Other.
114            other => io::Error::other(other),
115        }
116    }
117}
118
119impl From<io::Error> for ParseGroError {
120    fn from(err: io::Error) -> Self {
121        ParseGroError::IOError(err)
122    }
123}
124
125pub type Result<T> = std::result::Result<T, ParseGroError>;
126
127/// The ranges within a single atom line of a `gro` file.
128pub mod ranges {
129    use std::ops::Range;
130
131    pub const RESNUM: Range<usize> = 0..5;
132    pub const RESNAME: Range<usize> = 5..10;
133    pub const ATOMNAME: Range<usize> = 10..15;
134    pub const ATOMNUM: Range<usize> = 15..20;
135
136    // NOTE: These values only hold when the default `f8.3` and `f8.4` precisions are used. This
137    // may change in the future when I implement non-standard precisions.
138    pub const POSITION: Range<usize> = 20..44;
139    pub const VELOCITY: Range<usize> = 44..68;
140
141    /// Fields at least up to and including `position` should be there for a valid `gro` atom line.
142    pub const MIN_LINE_LEN: usize = POSITION.end;
143}
144
145/// Specify which fields must be read and which must be skipped while reading a `gro` atom line.
146///
147/// [`ParseList::ALL`] is considered the default configuration.
148///
149/// If a value is set to `true` (as set by [`ParseList::ALL`]), the `parse_*` for that value will
150/// be called and the result will be presented as `Some(value)` to [`ReadGro<A>::build_atom`].
151pub struct ParseList {
152    pub resnum: bool,
153    pub resname: bool,
154    pub atomname: bool,
155    pub atomnum: bool,
156    pub position: bool,
157    pub velocity: bool,
158}
159
160impl ParseList {
161    pub const ALL: Self = Self {
162        resnum: true,
163        resname: true,
164        atomname: true,
165        atomnum: true,
166        position: true,
167        velocity: true,
168    };
169}
170
171/// A trait for reading `gro` files int.
172///
173/// `ReadGro` implements robust default implementations for reading from generic readers
174/// ([`ReadGro::read`]), and to files ([`ReadGro::read_from_file`]) and
175/// ([`ReadGro::open_gro`]).
176///
177/// To implement this trait for a type, only [`ReadGro::build_atom`] and
178/// [`ReadGro::build_structure`] need to be specified. The default read implementations are based
179/// on these building blocks.
180///
181/// It is possible to read only the parts from the atom lines that are necessary for the atom (`A`)
182/// type with the [`ReadGro::PARSE_LIST`] const struct. For example, if you are only interested in
183/// the positions, the preceding fields can be skipped and only the positions field will be read.
184/// Care must be taken to set up a good match between the implementation of `ReadGro::build_atom`
185/// and the `ReadGro::PARSE_LIST`. The implementation of `ReadGro` for [`Structure`] is a good
186/// example of this.
187pub trait ReadGro<A>: Sized {
188    const PARSE_LIST: ParseList = ParseList::ALL;
189
190    /// Build an atom (`A`) from options of components.
191    ///
192    /// Together with setting the [`ReadGro<A>::PARSE_LIST`], it is possible to read only the
193    /// values that are necessary to construct `A`.
194    ///
195    /// # Note
196    ///
197    /// It is very advisable to mark the implementation for this function as `#[inline]`. This
198    /// function will be called in a rather hot loop, internally.
199    fn build_atom(
200        resnum: Option<ResNum>,
201        resname: Option<ResName>,
202        atomname: Option<AtomName>,
203        atomnum: Option<AtomNum>,
204        position: Option<[f32; 3]>,
205        velocity: Option<[f32; 3]>,
206    ) -> A;
207
208    /// Build the value from a title, atoms, and box vectors.
209    fn build_structure(title: String, atoms: Vec<A>, boxvecs: BoxVecs) -> Self;
210
211    #[inline]
212    fn parse_resnum(line: &str) -> Result<ResNum> {
213        line.get(ranges::RESNUM)
214            .ok_or(ParseGroError::UnexpectedEOL(ExpectedItem::Resnum))?
215            .trim()
216            .parse()
217            .map_err(ParseGroError::BadResnum)
218    }
219
220    #[inline]
221    fn parse_resname(line: &str) -> Result<ResName> {
222        line.get(ranges::RESNAME)
223            .ok_or(ParseGroError::UnexpectedEOL(ExpectedItem::Resname))
224            .map(|s| s.trim().into())
225    }
226
227    #[inline]
228    fn parse_atomname(line: &str) -> Result<AtomName> {
229        line.get(ranges::ATOMNAME)
230            .ok_or(ParseGroError::UnexpectedEOL(ExpectedItem::Atomname))
231            .map(|s| s.trim().into())
232    }
233
234    #[inline]
235    fn parse_atomnum(line: &str) -> Result<AtomNum> {
236        line.get(ranges::ATOMNUM)
237            .ok_or(ParseGroError::UnexpectedEOL(ExpectedItem::Atomnum))?
238            .trim()
239            .parse()
240            .map_err(ParseGroError::BadAtomnum)
241    }
242
243    #[inline]
244    fn parse_position(line: &str) -> Result<[f32; 3]> {
245        parse_floats(
246            transpose_option_array([20..28, 28..36, 36..44].map(|r| line.get(r)))
247                .ok_or(ParseGroError::UnexpectedEOL(ExpectedItem::Position))?
248                .map(str::trim),
249        )
250        .map_err(ParseGroError::BadPositionValue)
251    }
252
253    #[inline]
254    fn parse_velocity(line: &str) -> Result<[f32; 3]> {
255        parse_floats(
256            transpose_option_array([44..52, 52..60, 60..68].map(|r| line.get(r)))
257                .ok_or(ParseGroError::UnexpectedEOL(ExpectedItem::Velocity))?
258                .map(str::trim),
259        )
260        .map_err(ParseGroError::BadVelocityValue)
261    }
262
263    /// Parse an atom line.
264    ///
265    /// # Examples
266    ///
267    /// [`ReadGro`] is implement for [`Structure`].
268    ///
269    /// ```
270    /// use eightyseven::reader::ReadGro;
271    /// use eightyseven::structure::Structure;
272    ///
273    /// let line = "    1MET     BB    1   7.508   4.691   2.177  0.0306 -0.1635  0.1420";
274    /// let atom = Structure::parse_atom_line(line).unwrap();
275    ///
276    /// assert_eq!(atom.resnum, 1);
277    /// assert_eq!(atom.resname.as_str(), "MET");
278    /// assert_eq!(atom.atomname.as_str(), "BB");
279    /// assert_eq!(atom.atomnum, 1);
280    /// ```
281    #[rustfmt::skip]
282    #[inline]
283    fn parse_atom_line(line: &str) -> Result<A> {
284        assert!(line.len() >= ranges::MIN_LINE_LEN);
285        let resnum =   if Self::PARSE_LIST.resnum   { Some(Self::parse_resnum(line)?)   } else { None };
286        let resname =  if Self::PARSE_LIST.resname  { Some(Self::parse_resname(line)?)  } else { None };
287        let atomname = if Self::PARSE_LIST.atomname { Some(Self::parse_atomname(line)?) } else { None };
288        let atomnum =  if Self::PARSE_LIST.atomnum  { Some(Self::parse_atomnum(line)?)  } else { None };
289        let position = if Self::PARSE_LIST.position { Some(Self::parse_position(line)?) } else { None };
290        let velocity = if Self::PARSE_LIST.velocity {
291            if line.len() > 44 {
292                Some(Self::parse_velocity(line)?)
293            } else {
294                None
295            }
296        } else {
297            None
298        };
299
300        Ok(Self::build_atom(
301            resnum, resname, atomname, atomnum, position, velocity,
302        ))
303    }
304
305    /// Parse a box vectors line.
306    ///
307    /// # Examples
308    ///
309    /// [`ReadGro`] is implement for [`Structure`].
310    ///
311    /// ```
312    /// use eightyseven::reader::ReadGro;
313    /// use eightyseven::structure::{BoxVecs, Structure};
314    ///
315    /// let short = "123.0 456.0 789.0";
316    /// let full = "0.0 1.0 2.0 3.0 4.0 5.0 6.0 8.0 9.0";
317    ///
318    /// assert_eq!(Structure::parse_boxvecs(short).unwrap(), BoxVecs::Short([123.0, 456.0, 789.0]));
319    /// assert_eq!(Structure::parse_boxvecs(full).unwrap(), BoxVecs::Full([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 8.0, 9.0]));
320    ///
321    /// let broken = "51.72 51.12";
322    ///
323    /// assert!(Structure::parse_boxvecs(broken).is_err());
324    /// ```
325    fn parse_boxvecs(line: &str) -> Result<BoxVecs> {
326        let vs: Vec<&str> = line.split_ascii_whitespace().collect();
327        match vs.len() {
328            // The try_into().unwrap()s here are safe, since we just checked the len.
329            3 => Ok(BoxVecs::Short(
330                parse_floats(vs.try_into().unwrap()).map_err(ParseGroError::BadBoxVecsValue)?,
331            )),
332            9 => Ok(BoxVecs::Full(
333                parse_floats(vs.try_into().unwrap()).map_err(ParseGroError::BadBoxVecsValue)?,
334            )),
335            n => Err(ParseGroError::BadBoxVecsLength(n)),
336        }
337    }
338
339    /// Read `gro` data.
340    ///
341    /// # Examples
342    ///
343    /// [`ReadGro`] is implement for [`Structure`].
344    ///
345    /// ```
346    /// use std::io::{BufReader, Read};
347    ///
348    /// use eightyseven::reader::ReadGro;
349    /// use eightyseven::structure::Structure;
350    ///
351    /// let mut buf = Vec::new();
352    /// std::fs::File::open("tests/eq.gro").unwrap().read_to_end(&mut buf).unwrap();
353    /// let reader = BufReader::new(buf.as_slice());
354    /// let structure = Structure::read(reader).expect("should be a valid gro file");
355    /// println!("{}", structure.title);
356    /// ```
357    fn read(reader: impl io::BufRead) -> Result<Self> {
358        // We will iterate over lines.
359        let mut lines = reader.lines();
360
361        // Read the header.
362        let title = String::from(
363            lines
364                .next()
365                .ok_or(ParseGroError::UnexpectedEOF(ExpectedItem::Title))??
366                .trim(),
367        );
368        let natoms = lines
369            .next()
370            .ok_or(ParseGroError::UnexpectedEOF(ExpectedItem::NAtoms))??
371            .trim()
372            .parse()
373            .map_err(ParseGroError::BadNAtoms)?;
374
375        // Read all of the atoms.
376        let mut atoms = Vec::with_capacity(natoms);
377        for idx in 0..natoms {
378            let atom = Self::parse_atom_line(&lines.next().ok_or(
379                ParseGroError::InsufficientAtoms {
380                    exp: natoms,
381                    enc: idx,
382                },
383            )??)?;
384            atoms.push(atom);
385        }
386
387        // Finally, the box vectors.
388        let boxvecs = Self::parse_boxvecs(
389            &lines
390                .next()
391                .ok_or(ParseGroError::UnexpectedEOF(ExpectedItem::BoxVecs))??,
392        )?;
393
394        Ok(Self::build_structure(title, atoms, boxvecs))
395    }
396
397    /// Read a `gro` file from a [`std::fs::File`].
398    ///
399    /// # Examples
400    ///
401    /// [`ReadGro`] is implement for [`Structure`].
402    ///
403    /// ```
404    /// use eightyseven::reader::ReadGro;
405    /// use eightyseven::structure::Structure;
406    ///
407    /// let file = std::fs::File::open("tests/eq.gro").unwrap();
408    /// let structure = Structure::read_from_file(file).expect("should be a valid gro file");
409    /// println!("{}", structure.title);
410    /// ```
411    fn read_from_file(file: std::fs::File) -> Result<Self> {
412        let reader = BufReader::new(file);
413        Self::read(reader)
414    }
415
416    /// Read a `gro` file from a file at a path.
417    ///
418    /// # Examples
419    ///
420    /// [`ReadGro`] is implement for [`Structure`].
421    ///
422    /// ```
423    /// use eightyseven::reader::ReadGro;
424    /// use eightyseven::structure::Structure;
425    ///
426    /// let structure = Structure::open_gro("tests/eq.gro").expect("should be a valid gro file");
427    /// println!("{}", structure.title);
428    /// ```
429    fn open_gro<P: AsRef<Path>>(path: P) -> Result<Self> {
430        let file = std::fs::File::open(path)?;
431        Self::read_from_file(file)
432    }
433}
434
435impl ReadGro<Atom> for Structure {
436    #[inline]
437    fn build_atom(
438        resnum: Option<ResNum>,
439        resname: Option<ResName>,
440        atomname: Option<AtomName>,
441        atomnum: Option<AtomNum>,
442        position: Option<[f32; 3]>,
443        velocity: Option<[f32; 3]>,
444    ) -> Atom {
445        // We know that the values we unwrap here should be `Some`. Any errors were handled
446        // upstream and we use the default `ParseList::All` configuration.
447        Atom {
448            resnum: resnum.unwrap(),
449            resname: resname.unwrap(),
450            atomname: atomname.unwrap(),
451            atomnum: atomnum.unwrap(),
452            position: Vec3::from_array(position.unwrap()),
453            velocity: Vec3::from_array(velocity.unwrap_or_default()),
454        }
455    }
456
457    fn build_structure(title: String, atoms: Vec<Atom>, boxvecs: BoxVecs) -> Self {
458        Self {
459            title,
460            atoms,
461            boxvecs,
462        }
463    }
464}
465
466impl FromStr for Structure {
467    type Err = ParseGroError;
468
469    fn from_str(gro: &str) -> Result<Self> {
470        Self::read(gro.as_bytes())
471    }
472}
473
474/// Transpose an array of options into an option of an array.
475///
476/// If any of the values is [`None`], this function will return [`None`].
477/// If all values are [`Some`], an array of the unwrapped values is returned.
478fn transpose_option_array<const N: usize, T>(vs: [Option<T>; N]) -> Option<[T; N]> {
479    if vs.iter().any(Option::is_none) {
480        None
481    } else {
482        Some(vs.map(Option::unwrap))
483    }
484}
485
486/// Parse an array of `&str`s into `f32`s.
487#[inline]
488fn parse_floats<const N: usize>(
489    vs: [&str; N],
490) -> std::result::Result<[f32; N], std::num::ParseFloatError> {
491    vs.iter()
492        .map(|v| v.parse())
493        .collect::<std::result::Result<Vec<_>, _>>()
494        .map(|vs| vs.try_into().unwrap()) // Safe, since we know N_in == N_out.
495}
496
497#[cfg(test)]
498mod tests {
499    use std::io;
500
501    use super::*;
502
503    const EPS: f32 = 0.0001; // For approximate float comparisons.
504
505    #[test]
506    fn open_gro() -> io::Result<()> {
507        let structure = Structure::open_gro(crate::tests::PATH)?;
508        assert_eq!(structure.title, "cg protein in water");
509        assert_eq!(structure.natoms(), 2869);
510
511        let center = Vec3::new(3.9875, 3.9760, 2.7035);
512        assert!(structure.center().abs_diff_eq(center, EPS));
513        assert_eq!(
514            structure.atoms[123].position,
515            Vec3::new(5.589, 5.256, 3.264)
516        );
517        assert_eq!(
518            structure.atoms[456].velocity,
519            Vec3::new(-0.144, -0.2159, -0.1276)
520        );
521
522        assert_eq!(
523            structure.boxvecs,
524            BoxVecs::Full([7.59661, 7.59661, 5.37162, 0.0, 0.0, 0.0, 0.0, 3.79831, 3.79831])
525        );
526
527        Ok(())
528    }
529}