readcon_core/
parser.rs

1use crate::error::ParseError;
2use crate::types::{AtomDatum, ConFrame, FrameHeader};
3use std::rc::Rc;
4
5/// Parses a line of whitespace-separated values into a vector of a specific type.
6///
7/// This generic helper function takes a string slice, splits it by whitespace,
8/// and attempts to parse each substring into the target type `T`. The type `T`
9/// must implement `std::str::FromStr`.
10///
11/// # Arguments
12///
13/// * `line` - A string slice representing a single line of data.
14/// * `n` - The exact number of values expected on the line.
15///
16/// # Errors
17///
18/// * `ParseError::InvalidVectorLength` if the number of parsed values is not equal to `n`.
19/// * Propagates any error from the `parse()` method of the target type `T`.
20///
21/// # Example
22///
23/// ```
24/// use readcon_core::parser::parse_line_of_n;
25/// let line = "10.5 20.0 30.5";
26/// let values: Vec<f64> = parse_line_of_n(line, 3).unwrap();
27/// assert_eq!(values, vec![10.5, 20.0, 30.5]);
28///
29/// let result = parse_line_of_n::<i32>(line, 2);
30/// assert!(result.is_err());
31/// ```
32pub fn parse_line_of_n<T: std::str::FromStr>(line: &str, n: usize) -> Result<Vec<T>, ParseError>
33where
34    ParseError: From<<T as std::str::FromStr>::Err>,
35{
36    let values: Vec<T> = line
37        .split_whitespace()
38        .map(|s| s.parse::<T>())
39        .collect::<Result<_, _>>()?;
40
41    if values.len() == n {
42        Ok(values)
43    } else {
44        Err(ParseError::InvalidVectorLength {
45            expected: n,
46            found: values.len(),
47        })
48    }
49}
50
51/// Parses the 9-line header of a `.con` file frame from an iterator.
52///
53/// This function consumes the next 9 lines from the given line iterator to
54/// construct a `FrameHeader`. The iterator is advanced by 9 lines on success.
55///
56/// # Arguments
57///
58/// * `lines` - A mutable reference to an iterator that yields string slices.
59///
60/// # Errors
61///
62/// * `ParseError::IncompleteHeader` if the iterator has fewer than 9 lines remaining.
63/// * Propagates any errors from `parse_line_of_n` if the numeric data within
64///   the header is malformed.
65///
66/// # Panics
67///
68/// This function will panic if the intermediate vectors for box dimensions or angles,
69/// after being successfully parsed, cannot be converted into fixed-size arrays.
70/// This should not happen if `parse_line_of_n` is used correctly with `n=3`.
71pub fn parse_frame_header<'a>(
72    lines: &mut impl Iterator<Item = &'a str>,
73) -> Result<FrameHeader, ParseError> {
74    let prebox1 = lines
75        .next()
76        .ok_or(ParseError::IncompleteHeader)?
77        .to_string();
78    let prebox2 = lines
79        .next()
80        .ok_or(ParseError::IncompleteHeader)?
81        .to_string();
82    let boxl_vec = parse_line_of_n::<f64>(lines.next().ok_or(ParseError::IncompleteHeader)?, 3)?;
83    let angles_vec = parse_line_of_n::<f64>(lines.next().ok_or(ParseError::IncompleteHeader)?, 3)?;
84    let postbox1 = lines
85        .next()
86        .ok_or(ParseError::IncompleteHeader)?
87        .to_string();
88    let postbox2 = lines
89        .next()
90        .ok_or(ParseError::IncompleteHeader)?
91        .to_string();
92    let natm_types =
93        parse_line_of_n::<usize>(lines.next().ok_or(ParseError::IncompleteHeader)?, 1)?[0];
94    let natms_per_type = parse_line_of_n::<usize>(
95        lines.next().ok_or(ParseError::IncompleteHeader)?,
96        natm_types,
97    )?;
98    let masses_per_type = parse_line_of_n::<f64>(
99        lines.next().ok_or(ParseError::IncompleteHeader)?,
100        natm_types,
101    )?;
102    Ok(FrameHeader {
103        prebox_header: [prebox1, prebox2],
104        boxl: boxl_vec.try_into().unwrap(),
105        angles: angles_vec.try_into().unwrap(),
106        postbox_header: [postbox1, postbox2],
107        natm_types,
108        natms_per_type,
109        masses_per_type,
110    })
111}
112
113/// Parses a complete frame from a `.con` file, including its header and atomic data.
114///
115/// This function first parses the complete frame header and then uses the information within it
116/// (specifically the number of atom types and atoms per type) to parse the subsequent
117/// atom coordinate blocks.
118///
119/// # Arguments
120///
121/// * `lines` - A mutable reference to an iterator that yields string slices for the frame.
122///
123/// # Errors
124///
125/// * `ParseError::IncompleteFrame` if the iterator ends before all expected
126///   atomic data has been read.
127/// * Propagates any errors from the underlying calls to `parse_frame_header` and
128///   `parse_line_of_n`.
129///
130/// # Example
131///
132/// ```
133/// use readcon_core::parser::parse_single_frame;
134///
135/// let frame_text = r#"
136///PREBOX LINE 1
137///PREBOX LINE 2
138///10.0 10.0 10.0
139///90.0 90.0 90.0
140///POSTBOX LINE 1
141///POSTBOX LINE 2
142///2
143///1 1
144///12.011 1.008
145///C
146///Coordinates of Component 1
147///1.0 1.0 1.0 0.0 1
148///H
149///Coordinates of Component 2
150///2.0 2.0 2.0 0.0 2
151/// "#;
152///
153/// let mut lines = frame_text.trim().lines();
154/// let con_frame = parse_single_frame(&mut lines).unwrap();
155///
156/// assert_eq!(con_frame.header.natm_types, 2);
157/// assert_eq!(con_frame.atom_data.len(), 2);
158/// assert_eq!(&*con_frame.atom_data[0].symbol, "C");
159/// assert_eq!(con_frame.atom_data[1].atom_id, 2);
160/// ```
161pub fn parse_single_frame<'a>(
162    lines: &mut impl Iterator<Item = &'a str>,
163) -> Result<ConFrame, ParseError> {
164    let header = parse_frame_header(lines)?;
165    let total_atoms: usize = header.natms_per_type.iter().sum();
166    let mut atom_data = Vec::with_capacity(total_atoms);
167
168    for num_atoms in &header.natms_per_type {
169        // Create a reference-counted string for the symbol once per component.
170        let symbol = Rc::new(
171            lines
172                .next()
173                .ok_or(ParseError::IncompleteFrame)?
174                .trim()
175                .to_string(),
176        );
177        // Consume and discard the "Coordinates of Component X" line.
178        lines.next().ok_or(ParseError::IncompleteFrame)?;
179        for _ in 0..*num_atoms {
180            let coord_line = lines.next().ok_or(ParseError::IncompleteFrame)?;
181            let vals = parse_line_of_n::<f64>(coord_line, 5)?;
182            atom_data.push(AtomDatum {
183                // This is now a cheap reference-count increment, not a full string clone.
184                symbol: Rc::clone(&symbol),
185                x: vals[0],
186                y: vals[1],
187                z: vals[2],
188                is_fixed: vals[3] != 0.0,
189                atom_id: vals[4] as u64,
190            });
191        }
192    }
193    Ok(ConFrame { header, atom_data })
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn test_parse_line_of_n_success() {
202        let line = "1.0 2.5 -3.0";
203        let values = parse_line_of_n::<f64>(line, 3).unwrap();
204        assert_eq!(values, vec![1.0, 2.5, -3.0]);
205    }
206
207    #[test]
208    fn test_parse_line_of_n_too_short() {
209        let line = "1.0 2.5";
210        let result = parse_line_of_n::<f64>(line, 3);
211        assert!(result.is_err());
212        assert!(matches!(
213            result.unwrap_err(),
214            ParseError::InvalidVectorLength {
215                expected: 3,
216                found: 2
217            }
218        ));
219    }
220
221    #[test]
222    fn test_parse_line_of_n_too_long() {
223        let line = "1.0 2.5 -3.0 4.0";
224        let result = parse_line_of_n::<f64>(line, 3);
225        assert!(result.is_err());
226        assert!(matches!(
227            result.unwrap_err(),
228            ParseError::InvalidVectorLength {
229                expected: 3,
230                found: 4
231            }
232        ));
233    }
234
235    #[test]
236    fn test_parse_line_of_n_invalid_float() {
237        let line = "1.0 abc -3.0";
238        let result = parse_line_of_n::<f64>(line, 3);
239        assert!(result.is_err());
240        assert!(matches!(
241            result.unwrap_err(),
242            ParseError::InvalidNumberFormat(_)
243        ));
244    }
245
246    #[test]
247    fn test_parse_frame_header_success() {
248        let lines = vec![
249            "PREBOX1",
250            "PREBOX2",
251            "10.0 20.0 30.0",
252            "90.0 90.0 90.0",
253            "POSTBOX1",
254            "POSTBOX2",
255            "2",
256            "1 1",
257            "12.011 1.008",
258        ];
259        let mut line_it = lines.iter().copied();
260        match parse_frame_header(&mut line_it) {
261            Ok(header) => {
262                assert_eq!(header.prebox_header, ["PREBOX1", "PREBOX2"]);
263                assert_eq!(header.boxl, [10.0, 20.0, 30.0]);
264                assert_eq!(header.angles, [90.0, 90.0, 90.0]);
265                assert_eq!(header.postbox_header, ["POSTBOX1", "POSTBOX2"]);
266                assert_eq!(header.natm_types, 2);
267                assert_eq!(header.natms_per_type, vec![1, 1]);
268                assert_eq!(header.masses_per_type, vec![12.011, 1.008]);
269            }
270            Err(e) => {
271                panic!(
272                    "Parsing failed when it should have succeeded. Error: {:?}",
273                    e
274                );
275            }
276        }
277    }
278
279    #[test]
280    fn test_parse_frame_header_missing_line() {
281        let lines = vec![
282            "PREBOX1",
283            "PREBOX2",
284            "10.0 20.0 30.0",
285            "90.0 90.0 90.0",
286            "POSTBOX1",
287            "POSTBOX2",
288            "2",
289            "1 1",
290            // Missing masses_per_type
291        ];
292        let mut line_it = lines.iter().copied();
293        let result = parse_frame_header(&mut line_it);
294        assert!(result.is_err());
295        assert!(matches!(result.unwrap_err(), ParseError::IncompleteHeader));
296    }
297
298    #[test]
299    fn test_parse_frame_header_invalid_natms_per_type() {
300        let lines = vec![
301            "PREBOX1",
302            "PREBOX2",
303            "10.0 20.0 30.0",
304            "90.0 90.0 90.0",
305            "POSTBOX1",
306            "POSTBOX2",
307            "2",
308            "1 1 1", // 3 values, but natm_types is 2
309            "12.011 1.008",
310        ];
311        let mut line_it = lines.iter().copied();
312        let result = parse_frame_header(&mut line_it);
313        assert!(result.is_err());
314        assert!(matches!(
315            result.unwrap_err(),
316            ParseError::InvalidVectorLength {
317                expected: 2,
318                found: 3
319            }
320        ));
321    }
322
323    #[test]
324    fn test_parse_single_frame_success() {
325        let lines = vec![
326            "PREBOX1",
327            "PREBOX2",
328            "10.0 20.0 30.0",
329            "90.0 90.0 90.0",
330            "POSTBOX1",
331            "POSTBOX2",
332            "2",
333            "3 3",
334            "12.011 1.008",
335            "1",
336            "Coordinates of Component 1",
337            "0.0 0.0 0.0 0.0 1",
338            "1.0940 0.0 0.0 0.0 2",
339            "-0.5470 0.9499 0.0 0.0 3",
340            "2",
341            "Coordinates of Component 2",
342            "5.0 5.0 5.0 0.0 4",
343            "6.0940 5.0 5.0 0.0 5",
344            "5.5470 5.9499 5.0 0.0 6",
345        ];
346        let mut line_it = lines.iter().copied();
347        let frame = parse_single_frame(&mut line_it).unwrap();
348
349        assert_eq!(frame.header.natm_types, 2);
350        assert_eq!(frame.header.natms_per_type, vec![3, 3]);
351        assert_eq!(frame.header.masses_per_type, vec![12.011, 1.008]);
352        assert_eq!(frame.atom_data.len(), 6);
353        assert_eq!(&*frame.atom_data[0].symbol, "1");
354        assert_eq!(frame.atom_data[0].atom_id, 1);
355        assert_eq!(&*frame.atom_data[5].symbol, "2");
356        assert_eq!(frame.atom_data[5].atom_id, 6);
357    }
358
359    #[test]
360    fn test_parse_single_frame_missing_line() {
361        let lines = vec![
362            "PREBOX1",
363            "PREBOX2",
364            "10.0 20.0 30.0",
365            "90.0 90.0 90.0",
366            "POSTBOX1",
367            "POSTBOX2",
368            "2",
369            "3 3",
370            "12.011 1.008",
371            "1",
372            "Coordinates of Component 1",
373            "0.0 0.0 0.0 0.0 1",
374            "1.0940 0.0 0.0 0.0 2",
375            "-0.5470 0.9499 0.0 0.0 3",
376            // Missing "2" line for Component 2 atoms
377        ];
378        let mut line_it = lines.iter().copied();
379        let result = parse_single_frame(&mut line_it);
380        assert!(result.is_err());
381        assert!(matches!(result.unwrap_err(), ParseError::IncompleteFrame));
382    }
383
384    #[test]
385    fn test_parse_single_frame_invalid_atom_coords() {
386        let lines = vec![
387            "PREBOX1",
388            "PREBOX2",
389            "10.0 20.0 30.0",
390            "90.0 90.0 90.0",
391            "POSTBOX1",
392            "POSTBOX2",
393            "2",
394            "3 3",
395            "12.011 1.008",
396            "1",
397            "Coordinates of Component 1",
398            "0.0 0.0 0.0 0.0 1",
399            "1.0940 0.0 0.0 0.0 2",
400            "-0.5470 0.9499 0.0 0.0 3",
401            "2",
402            "Coordinates of Component 2",
403            "5.0 5.0 5.0 0.0", // Missing atom_id
404            "6.0940 5.0 5.0 0.0 5",
405            "5.5470 5.9499 5.0 0.0 6",
406        ];
407        let mut line_it = lines.iter().copied();
408        let result = parse_single_frame(&mut line_it);
409        assert!(result.is_err());
410        assert!(matches!(
411            result.unwrap_err(),
412            ParseError::InvalidVectorLength {
413                expected: 5,
414                found: 4
415            }
416        ));
417    }
418}