molly/
lib.rs

1use std::fs::File;
2use std::io::{self, Read, Seek, SeekFrom};
3use std::{cell::Cell, path::Path};
4
5use glam::{Mat3, Vec3};
6use reader::read_nbytes;
7
8use crate::buffer::{Buffer, UnBuffered};
9use crate::reader::{
10    read_boxvec, read_compressed_positions, read_f32, read_f32s, read_i32, read_u32,
11};
12use crate::selection::{AtomSelection, FrameSelection};
13
14pub mod buffer;
15pub mod reader;
16pub mod selection;
17
18// See https://gitlab.com/gromacs/gromacs/-/blob/v2024.1/src/gromacs/fileio/xdrf.h?ref_type=tags#L78
19pub const XTC_1995_MAX_NATOMS: usize = 298261617;
20
21thread_local! {
22    /// A scratch buffer to read encoded bytes into for subsequent decoding.
23    static SCRATCH: Cell<Vec<u8>> = const { Cell::new(Vec::new()) };
24}
25
26pub type BoxVec = Mat3;
27
28#[repr(i32)]
29#[derive(Debug, Clone, Copy, PartialEq, Eq)]
30pub enum Magic {
31    Xtc1995 = 1995,
32    Xtc2023 = 2023,
33}
34
35impl Magic {
36    pub const XTC_1995: i32 = Magic::Xtc1995 as _;
37    pub const XTC_2023: i32 = Magic::Xtc2023 as _;
38
39    fn to_be_bytes(&self) -> [u8; 4] {
40        (*self as i32).to_be_bytes()
41    }
42}
43
44impl TryFrom<i32> for Magic {
45    type Error = String;
46
47    fn try_from(value: i32) -> Result<Self, Self::Error> {
48        match value {
49            Magic::XTC_1995 => Ok(Self::Xtc1995),
50            Magic::XTC_2023 => Ok(Self::Xtc2023),
51            unknown => Err(format!(
52                "found invalid magic number '{unknown}' ({unknown:#0x}), {} and {} are supported",
53                Self::XTC_1995,
54                Self::XTC_2023
55            )),
56        }
57    }
58}
59
60impl std::fmt::Display for Magic {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        write!(f, "{}", *self as i32)
63    }
64}
65
66/// The header of a single xtc frame.
67pub struct Header {
68    pub magic: Magic,
69    pub natoms: usize,
70    pub step: u32,
71    pub time: f32,
72    pub boxvec: Mat3,
73    pub natoms_repeated: usize,
74}
75
76impl Header {
77    pub const SIZE: usize = 4 * (5 + 9);
78
79    pub fn read(file: &mut impl Read) -> io::Result<Self> {
80        let magic = Magic::try_from(read_i32(file)?)
81            .map_err(|err| io::Error::other(format!("could not read header: {err}")))?;
82        let natoms: usize = read_u32(file)?
83            .try_into()
84            .map_err(|err| io::Error::other(format!("could not read natoms: {err}")))?;
85        let step: u32 = read_u32(file)?;
86        let time = read_f32(file)?;
87
88        // Read the frame data.
89        let boxvec = read_boxvec(file)?;
90        let natoms_repeated = read_u32(file)?
91            .try_into()
92            .map_err(|err| io::Error::other(format!("could not read second natoms: {err}")))?;
93        assert_eq!(natoms, natoms_repeated);
94
95        Ok(Header {
96            magic,
97            natoms,
98            step,
99            time,
100            boxvec,
101            natoms_repeated,
102        })
103    }
104
105    pub fn to_be_bytes(&self) -> [u8; Self::SIZE] {
106        let mut bytes = Vec::new();
107        bytes.extend(self.magic.to_be_bytes()); // i32
108
109        let natoms = u32::try_from(self.natoms).unwrap().to_be_bytes();
110        bytes.extend(natoms); // u32
111        bytes.extend(self.step.to_be_bytes()); // u32
112        bytes.extend(self.time.to_be_bytes()); // f32
113        bytes.extend(
114            self.boxvec
115                .to_cols_array()
116                .map(f32::to_be_bytes)
117                .iter()
118                .flatten(),
119        ); // 9 × f32
120        assert_eq!(self.natoms, self.natoms_repeated);
121        bytes.extend(natoms); // u32
122
123        bytes.try_into().unwrap()
124    }
125}
126
127#[derive(Debug, Default, Clone, PartialEq)]
128pub struct Frame {
129    pub step: u32,
130    /// Time in picoseconds.
131    pub time: f32,
132    pub boxvec: BoxVec,
133    pub precision: f32,
134    pub positions: Vec<f32>,
135}
136
137impl Frame {
138    /// Returns an iterator over the coordinates stored in this [`Frame`].
139    pub fn coords(&self) -> impl Iterator<Item = Vec3> + '_ {
140        self.positions.chunks_exact(3).map(Vec3::from_slice)
141    }
142
143    /// Returns the number of atoms in this [`Frame`].
144    pub fn natoms(&self) -> usize {
145        let npos = self.positions.len();
146        assert_eq!(
147            npos % 3,
148            0,
149            "the number of single positions in a frame must always be a multiple of 3"
150        );
151        npos / 3
152    }
153}
154
155/// Calculate the xdr padding for some number of bytes.
156#[doc(hidden)]
157pub fn padding(n: usize) -> usize {
158    (4 - (n % 4)) % 4
159}
160
161/// Read the positions in a frame after the header.
162///
163/// If successful, returns the number of compressed bytes that were read.
164///
165/// Internal use.
166#[doc(hidden)]
167pub fn read_positions<'s, 'r, B: buffer::Buffered<'s, 'r, R>, R: Read>(
168    file: &'r mut R,
169    header_natoms: usize,
170    scratch: &'s mut Vec<u8>,
171    frame: &mut Frame,
172    atom_selection: &AtomSelection,
173    magic: Magic,
174) -> io::Result<usize> {
175    // If the atom_selection specifies fewer atoms, we will only allocate up to that point.
176    let natoms_selected = atom_selection.natoms_selected(header_natoms);
177
178    // Resize the positions array for the selected number of atoms.
179    frame.positions.resize(natoms_selected * 3, f32::NAN);
180    frame.precision = read_f32(file)?;
181    read_compressed_positions::<B, R>(
182        file,
183        header_natoms,
184        &mut frame.positions,
185        frame.precision,
186        scratch,
187        atom_selection,
188        magic,
189    )
190}
191
192#[derive(Debug, Clone)]
193pub struct XTCReader<R> {
194    pub file: R,
195    pub step: usize,
196}
197
198impl XTCReader<std::fs::File> {
199    pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
200        let file = std::fs::File::open(path)?;
201        Ok(Self::new(file))
202    }
203}
204
205impl<R: Read> XTCReader<R> {
206    pub fn new(reader: R) -> Self {
207        Self {
208            file: reader,
209            step: 0,
210        }
211    }
212
213    /// Read the header at the start of a frame.
214    ///
215    /// Assumes the internal reader is at the start of a new frame header.
216    pub fn read_header(&mut self) -> io::Result<Header> {
217        Header::read(&mut self.file)
218    }
219
220    /// Read a small number of uncompressed positions.
221    ///
222    /// If successful, returns the number of compressed bytes that were read.
223    ///
224    /// # Panics
225    ///
226    /// `natoms` must be 9 or less, otherwise the positions must be decompressed and cannot be read
227    /// directly through this function.  
228    ///
229    /// Oh xtc, you are so fucking weird.
230    pub fn read_smol_positions(
231        &mut self,
232        natoms: usize,
233        frame: &mut Frame,
234        atom_selection: &AtomSelection,
235    ) -> io::Result<usize> {
236        assert!(
237            natoms <= 9,
238            "only read uncomprossed positions when the number of atoms is 9 or less"
239        );
240
241        // In case the number of atoms is very small, just read their uncompressed positions.
242        frame.positions.resize(natoms * 3, 0.0);
243        let mut buf = [0.0; 9 * 3]; // We have at most 9 atoms, so we handle them on the stack.
244        let buf = &mut buf[..natoms * 3];
245        read_f32s(&mut self.file, buf)?;
246        frame.positions.clear();
247        frame.positions.extend(
248            buf.chunks_exact(3)
249                .enumerate()
250                .filter_map(|(idx, pos): (usize, &[f32])| -> Option<[f32; 3]> {
251                    if atom_selection.is_included(idx).unwrap_or_default() {
252                        Some(pos.try_into().unwrap())
253                    } else {
254                        None
255                    }
256                })
257                .flatten(),
258        );
259        // TODO: It is unclear to me what to do with the precision in this case. It is
260        // basically invalid, or just irrelevant here, since we don't decode them. They were
261        // never compressed to begin with.
262
263        Ok(buf.len() * std::mem::size_of::<f32>())
264    }
265
266    /// A convenience function to read all frames in a trajectory.
267    ///
268    /// It is likely more efficient to use [`XTCReader::read_frame`] if you are only interested in
269    /// the values of a single frame at a time.
270    pub fn read_all_frames(&mut self) -> io::Result<Box<[Frame]>> {
271        let mut frames = Vec::new();
272        loop {
273            let mut frame = Frame::default();
274            if let Err(err) = self.read_frame(&mut frame) {
275                match err.kind() {
276                    // We have found the end of the file. No more frames, we're done.
277                    io::ErrorKind::UnexpectedEof => break,
278                    // Something else went wrong...
279                    _ => Err(err)?,
280                }
281            }
282            frames.push(frame);
283        }
284        Ok(frames.into_boxed_slice())
285    }
286
287    /// Reads and returns a [`Frame`] and advances one step.
288    pub fn read_frame(&mut self, frame: &mut Frame) -> io::Result<()> {
289        self.read_frame_with_selection(frame, &AtomSelection::All)
290    }
291
292    /// Reads and returns a [`Frame`] according to the [`AtomSelection`], and advances one step.
293    pub fn read_frame_with_selection(
294        &mut self,
295        frame: &mut Frame,
296        atom_selection: &AtomSelection,
297    ) -> io::Result<()> {
298        // Take the thread-local SCRATCH and use that while decoding the values.
299        let mut scratch = SCRATCH.take();
300        self.read_frame_with_scratch(frame, &mut scratch, atom_selection)
301    }
302
303    /// Reads and returns a [`Frame`] and advances one step, internally reading the compressed data
304    /// into `scratch`.
305    ///
306    /// # Note
307    ///
308    /// This function performs the work of [`XTCReader::read_frame`], but leaves all allocations to
309    /// the caller.
310    ///
311    /// The contents of `scratch` should not be depended upon! It just serves as a scratch buffer
312    /// for the inner workings of decoding.
313    ///
314    /// In most cases, [`XTCReader::read_frame`] is more than sufficient. This function only serves
315    /// to make specific optimization possible.
316    pub fn read_frame_with_scratch(
317        &mut self,
318        frame: &mut Frame,
319        scratch: &mut Vec<u8>,
320        atom_selection: &AtomSelection,
321    ) -> io::Result<()> {
322        self.read_frame_with_scratch_impl::<UnBuffered>(frame, scratch, atom_selection)
323    }
324
325    /// Implementation of reading a frame with a scratch buffer.
326    fn read_frame_with_scratch_impl<'s, 'r, B: buffer::Buffered<'s, 'r, R>>(
327        &'r mut self,
328        frame: &mut Frame,
329        scratch: &'s mut Vec<u8>,
330        atom_selection: &AtomSelection,
331    ) -> io::Result<()> {
332        // Start of by reading the header.
333        let header = self.read_header()?;
334
335        // Now, we read the atoms.
336        if header.natoms <= 9 {
337            self.read_smol_positions(header.natoms, frame, atom_selection)?;
338        } else {
339            read_positions::<B, R>(
340                &mut self.file,
341                header.natoms,
342                scratch,
343                frame,
344                atom_selection,
345                header.magic,
346            )?;
347        }
348
349        self.step += 1;
350
351        frame.step = header.step;
352        frame.time = header.time;
353        frame.boxvec = header.boxvec;
354
355        Ok(())
356    }
357}
358
359impl XTCReader<File> {
360    /// Reset the reader to its initial position.
361    ///
362    /// Go back to the first frame.
363    pub fn home(&mut self) -> io::Result<()> {
364        self.file.seek(SeekFrom::Start(0))?;
365        self.step = 0;
366        Ok(())
367    }
368
369    /// Returns the offsets from the headers in this [`XTCReader<R>`] from its current position.
370    ///
371    /// The last value points one byte after the last byte in the reader.
372    ///
373    /// If this function is called when the internal reader is not at its starting position, the
374    /// frame offsets _from_ its position are determined. If you wish to determine the offsets from
375    /// the initial reader position, call [`XTCReader::home`] before calling this function.
376    ///
377    /// # Errors
378    ///
379    /// This function will pass through any reader errors.
380    pub fn determine_offsets_exclusive(&mut self, until: Option<usize>) -> io::Result<Box<[u64]>> {
381        let file = &mut self.file;
382        // Remember where we start so we can return to it later.
383        let start_pos = file.stream_position()?;
384
385        let mut offsets = Vec::new();
386
387        while until.map_or(true, |until| offsets.len() < until) {
388            let header = match Header::read(file) {
389                Ok(header) => header,
390                Err(err) if err.kind() == io::ErrorKind::UnexpectedEof => break,
391                Err(err) => Err(err)?,
392            };
393
394            let skip = if header.natoms <= 9 {
395                // Know how many bytes are in this frame until the next header since the positions
396                // are uncompressed.
397                header.natoms as u64 * 3 * 4
398            } else {
399                // We need to read the nbytes value to get the offset until the next header.
400                file.seek(SeekFrom::Current(32))?;
401                // The size of the buffer is stored either as a 64 or 32-bit integer, depending on
402                // the magic number in the header.
403                let nbytes = read_nbytes(file, header.magic)? as u64;
404                nbytes + padding(nbytes as usize) as u64
405            };
406            let offset = file.seek(SeekFrom::Current(skip as i64))?;
407            offsets.push(offset);
408        }
409
410        // Return back to where we started.
411        file.seek(SeekFrom::Start(start_pos))?;
412
413        Ok(offsets.into_boxed_slice())
414    }
415
416    /// Returns the offsets of this [`XTCReader<R>`] from its current position.
417    ///
418    /// The last value points to the start of the last frame.
419    ///
420    /// If this function is called when the internal reader is not at its starting position, the
421    /// frame offsets _from_ its position are determined. If you wish to determine the offsets from
422    /// the initial reader position, call [`XTCReader::home`] before calling this function.
423    ///
424    /// # Errors
425    ///
426    /// This function will pass through any reader errors.
427    pub fn determine_offsets(&mut self, until: Option<usize>) -> io::Result<Box<[u64]>> {
428        let mut offsets = vec![0];
429        let exclusive = self.determine_offsets_exclusive(until)?;
430        offsets.extend(exclusive.iter().take(exclusive.len().saturating_sub(1)));
431        Ok(offsets.into_boxed_slice())
432    }
433
434    /// Returns the frame sizes of this [`XTCReader<R>`].
435    ///
436    /// # Errors
437    ///
438    /// This function will pass through any reader errors.
439    pub fn determine_frame_sizes(&mut self, until: Option<usize>) -> io::Result<Box<[u64]>> {
440        let starts = self.determine_offsets_exclusive(until)?;
441        let ends = starts.iter().clone().skip(1);
442        Ok(starts
443            .iter()
444            .zip(ends)
445            .map(|(s, e)| e - s)
446            .collect::<Vec<_>>()
447            .into_boxed_slice())
448    }
449
450    /// Seeks to offset, then reads and returns a [`Frame`] and advances one step.
451    ///
452    /// # Note
453    ///
454    /// The `BUFFERED` const generic value can be used to set whether the frame reader will read in
455    /// a buffered manner or not at compile time.
456    ///
457    /// Buffered reading is most favorable when a small number of positions are read from the top of
458    /// the frame (leaving many positions that do not need to be read at the bottom), especially at the
459    /// point where disk read speed is a bottleneck.
460    pub fn read_frame_at_offset<const BUFFERED: bool>(
461        &mut self,
462        frame: &mut Frame,
463        offset: u64,
464        atom_selection: &AtomSelection,
465    ) -> io::Result<()> {
466        self.file.seek(SeekFrom::Start(offset))?;
467        match BUFFERED {
468            false => self.read_frame_with_selection(frame, atom_selection),
469            true => self.read_frame_with_selection_buffered(frame, atom_selection),
470        }
471    }
472
473    /// Append [`Frame`]s to the `frames` buffer according to a [`Selection`].
474    ///
475    /// If successful, it will return the number of frames that were read.
476    /// This can be useful since the selection itself is not enough to tell how many frames will
477    /// actually be read.
478    ///
479    /// # Note
480    ///
481    /// The `BUFFERED` const generic value can be used to set whether the frame reader will read in
482    /// a buffered manner or not at compile time.
483    ///
484    /// Buffered reading is most favorable when a small number of positions are read from the top of
485    /// the frame (leaving many positions that do not need to be read at the bottom), especially at the
486    /// point where disk read speed is a bottleneck.
487    pub fn read_frames<const BUFFERED: bool>(
488        &mut self,
489        frames: &mut impl Extend<Frame>,
490        frame_selection: &FrameSelection,
491        atom_selection: &AtomSelection,
492    ) -> io::Result<usize> {
493        let offsets = self.determine_offsets(frame_selection.until())?;
494        let mut n = 0;
495        for (idx, &offset) in offsets.iter().enumerate() {
496            match frame_selection.is_included(idx) {
497                Some(true) => {}
498                Some(false) => continue,
499                None => break,
500            }
501            let mut frame = Frame::default();
502            self.read_frame_at_offset::<BUFFERED>(&mut frame, offset, atom_selection)?;
503            frames.extend(Some(frame));
504            n += 1;
505        }
506
507        Ok(n)
508    }
509
510    /// Reads and returns a [`Frame`] according to the [`AtomSelection`], and advances one step.
511    pub fn read_frame_with_selection_buffered(
512        &mut self,
513        frame: &mut Frame,
514        atom_selection: &AtomSelection,
515    ) -> io::Result<()> {
516        // Take the thread-local SCRATCH and use that while decoding the values.
517        let mut scratch = SCRATCH.take();
518        self.read_frame_with_scratch_buffered(frame, &mut scratch, atom_selection)
519    }
520
521    /// Reads and returns a [`Frame`] and advances one step, internally reading the compressed data
522    /// into `scratch`.
523    ///
524    /// # Note
525    ///
526    /// This function performs the work of [`XTCReader::read_frame`], but leaves all allocations to
527    /// the caller.
528    ///
529    /// The contents of `scratch` should not be depended upon! It just serves as a scratch buffer
530    /// for the inner workings of decoding.
531    ///
532    /// In most cases, [`XTCReader::read_frame`] is more than sufficient. This function only serves
533    /// to make specific optimization possible.
534    pub fn read_frame_with_scratch_buffered(
535        &mut self,
536        frame: &mut Frame,
537        scratch: &mut Vec<u8>,
538        atom_selection: &AtomSelection,
539    ) -> io::Result<()> {
540        self.read_frame_with_scratch_impl::<Buffer>(frame, scratch, atom_selection)
541    }
542}