molly/
lib.rs

1use std::fs::File;
2use std::io::{self, Read, Seek, SeekFrom};
3use std::{cell::Cell, path::Path};
4
5use glam::{Mat3, Vec3};
6use reader::read_nbytes;
7
8use crate::buffer::{Buffer, UnBuffered};
9use crate::reader::{
10    read_boxvec, read_compressed_positions, read_f32, read_f32s, read_i32, read_u32,
11};
12use crate::selection::{AtomSelection, FrameSelection};
13
14pub mod buffer;
15pub mod reader;
16pub mod selection;
17
18// See https://gitlab.com/gromacs/gromacs/-/blob/v2024.1/src/gromacs/fileio/xdrf.h?ref_type=tags#L78
19pub const XTC_1995_MAX_NATOMS: usize = 298261617;
20
21thread_local! {
22    /// A scratch buffer to read encoded bytes into for subsequent decoding.
23    static SCRATCH: Cell<Vec<u8>> = const { Cell::new(Vec::new()) };
24}
25
26pub type BoxVec = Mat3;
27
28#[repr(i32)]
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum Magic {
31    Xtc1995 = 1995,
32    Xtc2023 = 2023,
33}
34
35impl Magic {
36    pub const XTC_1995: i32 = Magic::Xtc1995 as _;
37    pub const XTC_2023: i32 = Magic::Xtc2023 as _;
38
39    fn to_be_bytes(&self) -> [u8; 4] {
40        (*self as i32).to_be_bytes()
41    }
42}
43
44impl TryFrom<i32> for Magic {
45    type Error = String;
46
47    fn try_from(value: i32) -> Result<Self, Self::Error> {
48        match value {
49            Magic::XTC_1995 => Ok(Self::Xtc1995),
50            Magic::XTC_2023 => Ok(Self::Xtc2023),
51            unknown => Err(format!(
52                "found invalid magic number '{unknown}' ({unknown:#0x}), {} and {} are supported",
53                Self::XTC_1995,
54                Self::XTC_2023
55            )),
56        }
57    }
58}
59
60impl std::fmt::Display for Magic {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        write!(f, "{}", *self as i32)
63    }
64}
65
66/// The header of a single xtc frame.
67pub struct Header {
68    pub magic: Magic,
69    pub natoms: usize,
70    pub step: u32,
71    pub time: f32,
72    pub boxvec: Mat3,
73    pub natoms_repeated: usize,
74}
75
76impl Header {
77    pub const SIZE: usize = 4 * (5 + 9);
78
79    pub fn read(file: &mut impl Read) -> io::Result<Self> {
80        let magic = Magic::try_from(read_i32(file)?)
81            .map_err(|err| io::Error::other(format!("could not read header: {err}")))?;
82        let natoms: usize = read_u32(file)?
83            .try_into()
84            .map_err(|err| io::Error::other(format!("could not read natoms: {err}")))?;
85        let step: u32 = read_i32(file)?
86            .try_into()
87            .map_err(|err| io::Error::other(format!("could not read step: {err}")))?;
88        let time = read_f32(file)?;
89
90        // Read the frame data.
91        let boxvec = read_boxvec(file)?;
92        let natoms_repeated = read_u32(file)?
93            .try_into()
94            .map_err(|err| io::Error::other(format!("could not read second natoms: {err}")))?;
95        assert_eq!(natoms, natoms_repeated);
96
97        Ok(Header {
98            magic,
99            natoms,
100            step,
101            time,
102            boxvec,
103            natoms_repeated,
104        })
105    }
106
107    pub fn to_be_bytes(&self) -> [u8; Self::SIZE] {
108        let mut bytes = Vec::new();
109        bytes.extend(self.magic.to_be_bytes()); // i32
110
111        let natoms = u32::try_from(self.natoms).unwrap().to_be_bytes();
112        bytes.extend(natoms); // u32
113        bytes.extend(i32::try_from(self.step).unwrap().to_be_bytes()); // i32
114        bytes.extend(self.time.to_be_bytes()); // f32
115        bytes.extend(
116            self.boxvec
117                .to_cols_array()
118                .map(f32::to_be_bytes)
119                .iter()
120                .flatten(),
121        ); // 9 × f32
122        assert_eq!(self.natoms, self.natoms_repeated);
123        bytes.extend(natoms); // u32
124
125        bytes.try_into().unwrap()
126    }
127}
128
129#[derive(Debug, Default, Clone, PartialEq)]
130pub struct Frame {
131    pub step: u32,
132    /// Time in picoseconds.
133    pub time: f32,
134    pub boxvec: BoxVec,
135    pub precision: f32,
136    pub positions: Vec<f32>,
137}
138
139impl Frame {
140    /// Returns an iterator over the coordinates stored in this [`Frame`].
141    pub fn coords(&self) -> impl Iterator<Item = Vec3> + '_ {
142        self.positions.chunks_exact(3).map(Vec3::from_slice)
143    }
144
145    /// Returns the number of atoms in this [`Frame`].
146    pub fn natoms(&self) -> usize {
147        let npos = self.positions.len();
148        assert_eq!(
149            npos % 3,
150            0,
151            "the number of single positions in a frame must always be a multiple of 3"
152        );
153        npos / 3
154    }
155}
156
157/// Calculate the xdr padding for some number of bytes.
158#[doc(hidden)]
159pub fn padding(n: usize) -> usize {
160    (4 - (n % 4)) % 4
161}
162
163/// Read the positions in a frame after the header.
164///
165/// If successful, returns the number of compressed bytes that were read.
166///
167/// Internal use.
168#[doc(hidden)]
169pub fn read_positions<'s, 'r, B: buffer::Buffered<'s, 'r, R>, R: Read>(
170    file: &'r mut R,
171    header_natoms: usize,
172    scratch: &'s mut Vec<u8>,
173    frame: &mut Frame,
174    atom_selection: &AtomSelection,
175    magic: Magic,
176) -> io::Result<usize> {
177    // If the atom_selection specifies fewer atoms, we will only allocate up to that point.
178    let natoms_selected = atom_selection.natoms_selected(header_natoms);
179
180    // Resize the positions array for the selected number of atoms.
181    frame.positions.resize(natoms_selected * 3, f32::NAN);
182    frame.precision = read_f32(file)?;
183    read_compressed_positions::<B, R>(
184        file,
185        header_natoms,
186        &mut frame.positions,
187        frame.precision,
188        scratch,
189        atom_selection,
190        magic,
191    )
192}
193
194#[derive(Debug, Clone)]
195pub struct XTCReader<R> {
196    pub file: R,
197    pub step: usize,
198}
199
200impl XTCReader<std::fs::File> {
201    pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
202        let file = std::fs::File::open(path)?;
203        Ok(Self::new(file))
204    }
205}
206
207impl<R: Read> XTCReader<R> {
208    pub fn new(reader: R) -> Self {
209        Self {
210            file: reader,
211            step: 0,
212        }
213    }
214
215    /// Read the header at the start of a frame.
216    ///
217    /// Assumes the internal reader is at the start of a new frame header.
218    pub fn read_header(&mut self) -> io::Result<Header> {
219        Header::read(&mut self.file)
220    }
221
222    /// Read a small number of uncompressed positions.
223    ///
224    /// If successful, returns the number of compressed bytes that were read.
225    ///
226    /// # Panics
227    ///
228    /// `natoms` must be 9 or less, otherwise the positions must be decompressed and cannot be read
229    /// directly through this function.  
230    ///
231    /// Oh xtc, you are so fucking weird.
232    pub fn read_smol_positions(
233        &mut self,
234        natoms: usize,
235        frame: &mut Frame,
236        atom_selection: &AtomSelection,
237    ) -> io::Result<usize> {
238        assert!(
239            natoms <= 9,
240            "only read uncomprossed positions when the number of atoms is 9 or less"
241        );
242
243        // In case the number of atoms is very small, just read their uncompressed positions.
244        frame.positions.resize(natoms * 3, 0.0);
245        let mut buf = [0.0; 9 * 3]; // We have at most 9 atoms, so we handle them on the stack.
246        let buf = &mut buf[..natoms * 3];
247        read_f32s(&mut self.file, buf)?;
248        frame.positions.clear();
249        frame.positions.extend(
250            buf.chunks_exact(3)
251                .enumerate()
252                .filter_map(|(idx, pos): (usize, &[f32])| -> Option<[f32; 3]> {
253                    if atom_selection.is_included(idx).unwrap_or_default() {
254                        Some(pos.try_into().unwrap())
255                    } else {
256                        None
257                    }
258                })
259                .flatten(),
260        );
261        // TODO: It is unclear to me what to do with the precision in this case. It is
262        // basically invalid, or just irrelevant here, since we don't decode them. They were
263        // never compressed to begin with.
264
265        Ok(buf.len() * std::mem::size_of::<f32>())
266    }
267
268    /// A convenience function to read all frames in a trajectory.
269    ///
270    /// It is likely more efficient to use [`XTCReader::read_frame`] if you are only interested in
271    /// the values of a single frame at a time.
272    pub fn read_all_frames(&mut self) -> io::Result<Box<[Frame]>> {
273        let mut frames = Vec::new();
274        loop {
275            let mut frame = Frame::default();
276            if let Err(err) = self.read_frame(&mut frame) {
277                match err.kind() {
278                    // We have found the end of the file. No more frames, we're done.
279                    io::ErrorKind::UnexpectedEof => break,
280                    // Something else went wrong...
281                    _ => Err(err)?,
282                }
283            }
284            frames.push(frame);
285        }
286        Ok(frames.into_boxed_slice())
287    }
288
289    /// Reads and returns a [`Frame`] and advances one step.
290    pub fn read_frame(&mut self, frame: &mut Frame) -> io::Result<()> {
291        self.read_frame_with_selection(frame, &AtomSelection::All)
292    }
293
294    /// Reads and returns a [`Frame`] according to the [`AtomSelection`], and advances one step.
295    pub fn read_frame_with_selection(
296        &mut self,
297        frame: &mut Frame,
298        atom_selection: &AtomSelection,
299    ) -> io::Result<()> {
300        // Take the thread-local SCRATCH and use that while decoding the values.
301        let mut scratch = SCRATCH.take();
302        self.read_frame_with_scratch(frame, &mut scratch, atom_selection)
303    }
304
305    /// Reads and returns a [`Frame`] and advances one step, internally reading the compressed data
306    /// into `scratch`.
307    ///
308    /// # Note
309    ///
310    /// This function performs the work of [`XTCReader::read_frame`], but leaves all allocations to
311    /// the caller.
312    ///
313    /// The contents of `scratch` should not be depended upon! It just serves as a scratch buffer
314    /// for the inner workings of decoding.
315    ///
316    /// In most cases, [`XTCReader::read_frame`] is more than sufficient. This function only serves
317    /// to make specific optimization possible.
318    pub fn read_frame_with_scratch(
319        &mut self,
320        frame: &mut Frame,
321        scratch: &mut Vec<u8>,
322        atom_selection: &AtomSelection,
323    ) -> io::Result<()> {
324        self.read_frame_with_scratch_impl::<UnBuffered>(frame, scratch, atom_selection)
325    }
326
327    /// Implementation of reading a frame with a scratch buffer.
328    fn read_frame_with_scratch_impl<'s, 'r, B: buffer::Buffered<'s, 'r, R>>(
329        &'r mut self,
330        frame: &mut Frame,
331        scratch: &'s mut Vec<u8>,
332        atom_selection: &AtomSelection,
333    ) -> io::Result<()> {
334        // Start of by reading the header.
335        let header = self.read_header()?;
336
337        // Now, we read the atoms.
338        if header.natoms <= 9 {
339            self.read_smol_positions(header.natoms, frame, atom_selection)?;
340        } else {
341            read_positions::<B, R>(
342                &mut self.file,
343                header.natoms,
344                scratch,
345                frame,
346                atom_selection,
347                header.magic,
348            )?;
349        }
350
351        self.step += 1;
352
353        frame.step = header.step;
354        frame.time = header.time;
355        frame.boxvec = header.boxvec;
356
357        Ok(())
358    }
359}
360
361impl XTCReader<File> {
362    /// Reset the reader to its initial position.
363    ///
364    /// Go back to the first frame.
365    pub fn home(&mut self) -> io::Result<()> {
366        self.file.seek(SeekFrom::Start(0))?;
367        self.step = 0;
368        Ok(())
369    }
370
371    /// Returns the offsets from the headers in this [`XTCReader<R>`] from its current position.
372    ///
373    /// The last value points one byte after the last byte in the reader.
374    ///
375    /// If this function is called when the internal reader is not at its starting position, the
376    /// frame offsets _from_ its position are determined. If you wish to determine the offsets from
377    /// the initial reader position, call [`XTCReader::home`] before calling this function.
378    ///
379    /// # Errors
380    ///
381    /// This function will pass through any reader errors.
382    pub fn determine_offsets_exclusive(&mut self, until: Option<usize>) -> io::Result<Box<[u64]>> {
383        let file = &mut self.file;
384        // Remember where we start so we can return to it later.
385        let start_pos = file.stream_position()?;
386
387        let mut offsets = Vec::new();
388
389        while until.map_or(true, |until| offsets.len() < until) {
390            let header = match Header::read(file) {
391                Ok(header) => header,
392                Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => break,
393                Err(err) => Err(err)?,
394            };
395
396            let skip = if header.natoms <= 9 {
397                // Know how many bytes are in this frame until the next header since the positions
398                // are uncompressed.
399                header.natoms as u64 * 3 * 4
400            } else {
401                // We need to read the nbytes value to get the offset until the next header.
402                file.seek(SeekFrom::Current(32))?;
403                // The size of the buffer is stored either as a 64 or 32-bit integer, depending on
404                // the magic number in the header.
405                let nbytes = read_nbytes(file, header.magic)? as u64;
406                nbytes + padding(nbytes as usize) as u64
407            };
408            let offset = file.seek(SeekFrom::Current(skip as i64))?;
409            offsets.push(offset);
410        }
411
412        // Return back to where we started.
413        file.seek(SeekFrom::Start(start_pos))?;
414
415        Ok(offsets.into_boxed_slice())
416    }
417
418    /// Returns the offsets of this [`XTCReader<R>`] from its current position.
419    ///
420    /// The last value points to the start of the last frame.
421    ///
422    /// If this function is called when the internal reader is not at its starting position, the
423    /// frame offsets _from_ its position are determined. If you wish to determine the offsets from
424    /// the initial reader position, call [`XTCReader::home`] before calling this function.
425    ///
426    /// # Errors
427    ///
428    /// This function will pass through any reader errors.
429    pub fn determine_offsets(&mut self, until: Option<usize>) -> io::Result<Box<[u64]>> {
430        let mut offsets = vec![0];
431        let exclusive = self.determine_offsets_exclusive(until)?;
432        offsets.extend(exclusive.iter().take(exclusive.len().saturating_sub(1)));
433        Ok(offsets.into_boxed_slice())
434    }
435
436    /// Returns the frame sizes of this [`XTCReader<R>`].
437    ///
438    /// # Errors
439    ///
440    /// This function will pass through any reader errors.
441    pub fn determine_frame_sizes(&mut self, until: Option<usize>) -> io::Result<Box<[u64]>> {
442        let starts = self.determine_offsets_exclusive(until)?;
443        let ends = starts.iter().clone().skip(1);
444        Ok(starts
445            .iter()
446            .zip(ends)
447            .map(|(s, e)| e - s)
448            .collect::<Vec<_>>()
449            .into_boxed_slice())
450    }
451
452    /// Seeks to offset, then reads and returns a [`Frame`] and advances one step.
453    ///
454    /// # Note
455    ///
456    /// The `BUFFERED` const generic value can be used to set whether the frame reader will read in
457    /// a buffered manner or not at compile time.
458    ///
459    /// Buffered reading is most favorable when a small number of positions are read from the top of
460    /// the frame (leaving many positions that do not need to be read at the bottom), especially at the
461    /// point where disk read speed is a bottleneck.
462    pub fn read_frame_at_offset<const BUFFERED: bool>(
463        &mut self,
464        frame: &mut Frame,
465        offset: u64,
466        atom_selection: &AtomSelection,
467    ) -> io::Result<()> {
468        self.file.seek(SeekFrom::Start(offset))?;
469        match BUFFERED {
470            false => self.read_frame_with_selection(frame, atom_selection),
471            true => self.read_frame_with_selection_buffered(frame, atom_selection),
472        }
473    }
474
475    /// Append [`Frame`]s to the `frames` buffer according to a [`Selection`].
476    ///
477    /// If successful, it will return the number of frames that were read.
478    /// This can be useful since the selection itself is not enough to tell how many frames will
479    /// actually be read.
480    ///
481    /// # Note
482    ///
483    /// The `BUFFERED` const generic value can be used to set whether the frame reader will read in
484    /// a buffered manner or not at compile time.
485    ///
486    /// Buffered reading is most favorable when a small number of positions are read from the top of
487    /// the frame (leaving many positions that do not need to be read at the bottom), especially at the
488    /// point where disk read speed is a bottleneck.
489    pub fn read_frames<const BUFFERED: bool>(
490        &mut self,
491        frames: &mut impl Extend<Frame>,
492        frame_selection: &FrameSelection,
493        atom_selection: &AtomSelection,
494    ) -> io::Result<usize> {
495        let offsets = self.determine_offsets(frame_selection.until())?;
496        let mut n = 0;
497        for (idx, &offset) in offsets.iter().enumerate() {
498            match frame_selection.is_included(idx) {
499                Some(true) => {}
500                Some(false) => continue,
501                None => break,
502            }
503            let mut frame = Frame::default();
504            self.read_frame_at_offset::<BUFFERED>(&mut frame, offset, atom_selection)?;
505            frames.extend(Some(frame));
506            n += 1;
507        }
508
509        Ok(n)
510    }
511
512    /// Reads and returns a [`Frame`] according to the [`AtomSelection`], and advances one step.
513    pub fn read_frame_with_selection_buffered(
514        &mut self,
515        frame: &mut Frame,
516        atom_selection: &AtomSelection,
517    ) -> io::Result<()> {
518        // Take the thread-local SCRATCH and use that while decoding the values.
519        let mut scratch = SCRATCH.take();
520        self.read_frame_with_scratch_buffered(frame, &mut scratch, atom_selection)
521    }
522
523    /// Reads and returns a [`Frame`] and advances one step, internally reading the compressed data
524    /// into `scratch`.
525    ///
526    /// # Note
527    ///
528    /// This function performs the work of [`XTCReader::read_frame`], but leaves all allocations to
529    /// the caller.
530    ///
531    /// The contents of `scratch` should not be depended upon! It just serves as a scratch buffer
532    /// for the inner workings of decoding.
533    ///
534    /// In most cases, [`XTCReader::read_frame`] is more than sufficient. This function only serves
535    /// to make specific optimization possible.
536    pub fn read_frame_with_scratch_buffered(
537        &mut self,
538        frame: &mut Frame,
539        scratch: &mut Vec<u8>,
540        atom_selection: &AtomSelection,
541    ) -> io::Result<()> {
542        self.read_frame_with_scratch_impl::<Buffer>(frame, scratch, atom_selection)
543    }
544}