readcon_core/
iterators.rs

1//=============================================================================
2// The Public API - A clean iterator for users of our library
3//=============================================================================
4
5use crate::parser::{parse_single_frame, parse_velocity_section};
6use crate::{error, types};
7use std::iter::Peekable;
8use std::path::Path;
9
10/// An iterator that lazily parses simulation frames from a `.con` or `.convel`
11/// file's contents.
12///
13/// This struct wraps an iterator over the lines of a string and, upon each iteration,
14/// attempts to parse a complete `ConFrame`. Velocity sections are detected
15/// automatically: if a blank line follows the coordinate blocks, the velocity
16/// data is parsed into the atoms.
17///
18/// The iterator yields items of type `Result<ConFrame, ParseError>`, allowing for
19/// robust error handling for each frame.
20pub struct ConFrameIterator<'a> {
21    lines: Peekable<std::str::Lines<'a>>,
22}
23
24impl<'a> ConFrameIterator<'a> {
25    /// Creates a new `ConFrameIterator` from a string slice of the entire file.
26    ///
27    /// # Arguments
28    ///
29    /// * `file_contents` - A string slice containing the text of one or more `.con` frames.
30    pub fn new(file_contents: &'a str) -> Self {
31        ConFrameIterator {
32            lines: file_contents.lines().peekable(),
33        }
34    }
35
36    /// Skips the next frame without fully parsing its atomic data.
37    ///
38    /// This is more efficient than `next()` if you only need to advance the
39    /// iterator. It reads the frame's header to determine how many lines to skip,
40    /// including any velocity section if present.
41    ///
42    /// # Returns
43    ///
44    /// * `Some(Ok(()))` on a successful skip.
45    /// * `Some(Err(ParseError::...))` if there's an error parsing the header.
46    /// * `None` if the iterator is already at the end.
47    pub fn forward(&mut self) -> Option<Result<(), error::ParseError>> {
48        // Skip frame by parsing only required header fields to avoid full parsing overhead
49        if self.lines.peek().is_none() {
50            return None;
51        }
52
53        // Manually consume the first 6 lines of the header, which we don't need for skipping.
54        for _ in 0..6 {
55            if self.lines.next().is_none() {
56                return Some(Err(error::ParseError::IncompleteHeader));
57            }
58        }
59
60        // Line 7: natm_types. We need to parse this.
61        let natm_types: usize = match self.lines.next() {
62            Some(line) => match crate::parser::parse_line_of_n::<usize>(line, 1) {
63                Ok(v) => v[0],
64                Err(e) => return Some(Err(e)),
65            },
66            None => return Some(Err(error::ParseError::IncompleteHeader)),
67        };
68
69        // Line 8: natms_per_type. We need this to sum the total number of atoms.
70        let natms_per_type: Vec<usize> = match self.lines.next() {
71            Some(line) => match crate::parser::parse_line_of_n(line, natm_types) {
72                Ok(v) => v,
73                Err(e) => return Some(Err(e)),
74            },
75            None => return Some(Err(error::ParseError::IncompleteHeader)),
76        };
77
78        // Line 9: masses_per_type. We just need to consume this line.
79        if self.lines.next().is_none() {
80            return Some(Err(error::ParseError::IncompleteHeader));
81        }
82
83        // Calculate how many more lines to skip for coordinate blocks.
84        let total_atoms: usize = natms_per_type.iter().sum();
85        // For each atom type, there is a symbol line and a "Coordinates..." line.
86        let non_atom_lines = natm_types * 2;
87        let lines_to_skip = total_atoms + non_atom_lines;
88
89        // Advance the iterator by skipping the coordinate block lines.
90        for _ in 0..lines_to_skip {
91            if self.lines.next().is_none() {
92                // The file ended before the header's promise was fulfilled.
93                return Some(Err(error::ParseError::IncompleteFrame));
94            }
95        }
96
97        // Check for an optional velocity section (blank separator followed by
98        // velocity blocks with the same structure as coordinate blocks).
99        if let Some(line) = self.lines.peek() {
100            if line.trim().is_empty() {
101                // Consume the blank separator
102                self.lines.next();
103                // Skip the velocity blocks: same structure as coordinate blocks
104                let vel_lines_to_skip = total_atoms + non_atom_lines;
105                for _ in 0..vel_lines_to_skip {
106                    if self.lines.next().is_none() {
107                        return Some(Err(error::ParseError::IncompleteVelocitySection));
108                    }
109                }
110            }
111        }
112
113        Some(Ok(()))
114    }
115}
116
117impl<'a> Iterator for ConFrameIterator<'a> {
118    /// The type of item yielded by the iterator.
119    ///
120    /// Each item is a `Result` that contains a successfully parsed `ConFrame` or a
121    /// `ParseError` if the frame's data is malformed.
122    type Item = Result<types::ConFrame, error::ParseError>;
123
124    /// Advances the iterator and attempts to parse the next frame.
125    ///
126    /// This method will return `None` only when there are no more lines to consume.
127    /// If there are lines but they do not form a complete frame, it will return
128    /// `Some(Err(ParseError::...))`.
129    fn next(&mut self) -> Option<Self::Item> {
130        // If there are no more lines at all, the iterator is exhausted.
131        if self.lines.peek().is_none() {
132            return None;
133        }
134        // Otherwise, attempt to parse the next frame from the available lines.
135        let mut frame = match parse_single_frame(&mut self.lines) {
136            Ok(f) => f,
137            Err(e) => return Some(Err(e)),
138        };
139        // Attempt to parse optional velocity section
140        match parse_velocity_section(&mut self.lines, &frame.header, &mut frame.atom_data) {
141            Ok(_) => {}
142            Err(e) => return Some(Err(e)),
143        }
144        Some(Ok(frame))
145    }
146}
147
148/// Reads all frames from a file using memory-mapped I/O.
149///
150/// This avoids the `read_to_string` allocation for large trajectory files by
151/// letting the OS page cache handle the data. The mmap is read-only and the
152/// file contents are validated as UTF-8 before parsing.
153pub fn read_all_frames(path: &Path) -> Result<Vec<types::ConFrame>, Box<dyn std::error::Error>> {
154    let file = std::fs::File::open(path)?;
155    let mmap = unsafe { memmap2::Mmap::map(&file)? };
156    let contents = std::str::from_utf8(&mmap)?;
157    let iter = ConFrameIterator::new(contents);
158    let frames: Result<Vec<_>, _> = iter.collect();
159    Ok(frames?)
160}
161
162/// Parses frames in parallel using rayon, splitting on frame boundaries.
163///
164/// Phase 1: sequential scan to find byte offsets of each frame's start.
165/// Phase 2: parallel parse of each frame slice using rayon.
166///
167/// Requires the `parallel` feature.
168#[cfg(feature = "parallel")]
169pub fn parse_frames_parallel(
170    file_contents: &str,
171) -> Vec<Result<types::ConFrame, error::ParseError>> {
172    use rayon::prelude::*;
173
174    // Phase 1: find frame byte boundaries by scanning for header patterns.
175    // Each frame starts with a header: 2 comment lines, then a line with 3 floats (box).
176    // We identify boundaries by walking through the file with a ConFrameIterator
177    // and recording byte positions.
178    let mut boundaries: Vec<usize> = Vec::new();
179    let mut offset = 0;
180    boundaries.push(0);
181
182    // Walk through the file using the forward() method to find frame boundaries
183    let mut scanner = ConFrameIterator::new(file_contents);
184    while scanner.forward().is_some() {
185        // After forward(), the internal iterator is positioned right after the frame.
186        // We need to figure out the byte offset of the next frame start.
187        // Since Peekable<Lines> doesn't expose byte offsets, we use a different approach:
188        // count lines consumed per frame and convert to byte offsets.
189    }
190
191    // Simpler approach: split into frame text chunks by parsing sequentially,
192    // recording where each frame starts and ends in the string.
193    boundaries.clear();
194    let lines: Vec<&str> = file_contents.lines().collect();
195    let mut line_idx = 0;
196    let total_lines = lines.len();
197
198    while line_idx < total_lines {
199        // Record the byte offset of this frame's start
200        let byte_offset: usize = lines[..line_idx]
201            .iter()
202            .map(|l| l.len() + 1) // +1 for newline
203            .sum();
204        boundaries.push(byte_offset);
205
206        // Skip 6 header lines (prebox1, prebox2, boxl, angles, postbox1, postbox2)
207        if line_idx + 6 >= total_lines {
208            break;
209        }
210        line_idx += 6;
211
212        // Line 7: natm_types
213        let natm_types: usize = match lines.get(line_idx) {
214            Some(l) => match crate::parser::parse_line_of_n::<usize>(l, 1) {
215                Ok(v) => v[0],
216                Err(_) => break,
217            },
218            None => break,
219        };
220        line_idx += 1;
221
222        // Line 8: natms_per_type
223        let natms_per_type: Vec<usize> = match lines.get(line_idx) {
224            Some(l) => match crate::parser::parse_line_of_n(l, natm_types) {
225                Ok(v) => v,
226                Err(_) => break,
227            },
228            None => break,
229        };
230        line_idx += 1;
231
232        // Line 9: masses (just skip)
233        line_idx += 1;
234
235        // Skip coordinate blocks
236        let total_atoms: usize = natms_per_type.iter().sum();
237        let coord_lines = total_atoms + natm_types * 2;
238        line_idx += coord_lines;
239
240        // Check for velocity section (blank separator)
241        if line_idx < total_lines {
242            if let Some(l) = lines.get(line_idx) {
243                if l.trim().is_empty() {
244                    line_idx += 1; // blank separator
245                    line_idx += coord_lines; // velocity blocks same size
246                }
247            }
248        }
249    }
250
251    // Phase 2: parallel parse each frame chunk
252    let num_frames = boundaries.len();
253    (0..num_frames)
254        .into_par_iter()
255        .map(|i| {
256            let start = boundaries[i];
257            let end = if i + 1 < num_frames {
258                boundaries[i + 1]
259            } else {
260                file_contents.len()
261            };
262            let chunk = &file_contents[start..end];
263            let mut iter = ConFrameIterator::new(chunk);
264            match iter.next() {
265                Some(result) => result,
266                None => Err(error::ParseError::IncompleteFrame),
267            }
268        })
269        .collect()
270}
readcon_core/iterators.rs

readcon_core/
iterators.rs