Skip to main content

extxyz/
read.rs

1/* native read parsed using `nom`
2*/
3use crate::error::ExtxyzError;
4use extxyz_types::{FloatNum, Frame, Text, Value};
5use nom::{
6    self,
7    branch::alt,
8    bytes::complete::{tag, take_while1},
9    character::{
10        complete::{self, multispace0, space0, space1},
11        streaming,
12    },
13    combinator::{all_consuming, map, map_res, not, opt, peek, recognize, verify},
14    multi::{many0, separated_list0, separated_list1},
15    number,
16    sequence::{delimited, separated_pair, terminated},
17    IResult, Parser,
18};
19use std::{
20    collections::BTreeMap,
21    io::{self, BufRead},
22};
23
24/// read from a buf reader and return an `FrameReader` which is an interator.
25/// Creates a streaming frame iterator over an extended XYZ input.
26///
27/// This function wraps a mutable buffered reader and returns a [`FrameReader`]
28/// that yields [`Frame`]s one at a time. It is suitable for processing large
29/// `.xyz` files without loading the entire contents into memory.
30///
31/// # Type Parameters
32/// - `R`: A type that implements [`BufRead`], such as `BufReader<File>` or
33///   an in-memory buffer.
34///
35/// # Arguments
36/// - `rd`: A mutable reference to a buffered reader to read frames from.
37///
38/// # Returns
39/// A [`FrameReader`] that implements an iterator-like interface over parsed
40/// frames.
41///
42/// # Notes
43/// - The returned reader borrows `rd` for its lifetime, so `rd` cannot be used
44///   elsewhere while the iterator is alive.
45/// - Frames are parsed lazily as the iterator advances.
46/// - Errors are typically surfaced during iteration (depending on the
47///   [`FrameReader`] implementation).
48///
49/// # Example
50/// ```no_run
51/// use std::fs::File;
52/// use std::io::BufReader;
53/// use extxyz::read_frames;
54///
55/// let file = File::open("input.xyz").unwrap();
56/// let mut reader = BufReader::new(file);
57///
58/// let mut frames = read_frames(&mut reader);
59///
60/// // assuming `FrameReader` implements `Iterator<Item = Result<Frame, _>>`
61/// // for frame in frames {
62/// //     let frame = frame?;
63/// //     // process frame
64/// // }
65/// ```
66pub fn read_frames<'a, R>(rd: &'a mut R) -> FrameReader<'a, R>
67where
68    R: BufRead,
69{
70    FrameReader::new(rd)
71}
72
73/// Streaming frame reader for extended XYZ input.
74///
75/// `FrameReader` provides an iterator-style interface over [`Frame`]s read
76/// from a buffered input source. It borrows the underlying reader and parses
77/// frames lazily as iteration progresses.
78///
79/// # Type Parameters
80/// - `R`: The underlying reader type, which must implement [`BufRead`].
81///
82/// # Fields
83/// - `rd`: Mutable reference to the buffered reader.
84/// - `finished`: Internal flag indicating whether the end of input has been
85///   reached or no further frames can be parsed.
86///
87/// # Usage
88/// This type is typically constructed via [`read_frames`], and then used
89/// to iterate over frames:
90///
91/// ```no_run
92/// use std::fs::File;
93/// use std::io::BufReader;
94/// use extxyz::read_frames;
95//
96/// let file = File::open("input.xyz").unwrap();
97/// let mut reader = BufReader::new(file);
98///
99/// let frames = read_frames(&mut reader);
100///
101/// // `FrameReader` implements `Iterator<Item = Result<Frame, _>>`
102/// // for frame in frames {
103/// //     let frame = frame?;
104/// //     // process frame
105/// // }
106/// ```
107///
108/// # Notes
109/// - The reader is borrowed for the lifetime `'a`, so it cannot be accessed
110///   elsewhere while the `FrameReader` is in use.
111/// - Frames are parsed on demand; this is efficient for large files.
112/// - Once `finished` is `true`, no further frames will be produced.
113/// - Errors are typically returned during iteration rather than at creation.
114pub struct FrameReader<'a, R> {
115    // None as done marker
116    rd: &'a mut R,
117    finished: bool,
118}
119
120impl<'a, R> FrameReader<'a, R> {
121    pub fn new(rd: &'a mut R) -> Self {
122        FrameReader {
123            rd,
124            finished: false,
125        }
126    }
127}
128
129impl<'a, R> Iterator for FrameReader<'a, R>
130where
131    R: BufRead,
132{
133    type Item = Result<Frame, ExtxyzError>;
134
135    fn next(&mut self) -> Option<Self::Item> {
136        // fast finished
137        if self.finished {
138            return None;
139        }
140
141        match _read_frame_native_new(self.rd, None) {
142            Ok(Some(frame)) => Some(Ok(frame)),
143            Ok(None) => None,
144            Err(err) => Some(Err(ExtxyzError::Io(err))),
145        }
146    }
147}
148
149/// Owned frame reader for extended XYZ input.
150///
151/// `FrameReaderOwned` wraps a reader and incrementally parses [`Frame`]s
152/// from it, maintaining internal state between reads. Unlike borrowing-based
153/// readers, this type *owns* the underlying reader, making it especially
154/// suitable for FFI scenarios such as Python bindings where lifetime
155/// management across language boundaries is required.
156///
157/// # Type Parameters
158/// - `R`: The underlying reader type. Typically implements [`BufRead`]
159///   (e.g., `BufReader<File>`), though this depends on the associated
160///   implementation.
161///
162/// # Fields
163/// - `rd`: The owned reader from which frames are read.
164/// - `finished`: Indicates whether the reader has reached the end of input
165///   or can no longer produce frames.
166///
167/// # Usage
168/// This type is primarily intended for:
169/// - Python bindings (e.g., via PyO3), where ownership simplifies safety.
170/// - Streaming large `.xyz` files frame-by-frame without loading everything
171///   into memory.
172///
173/// # Notes
174/// - Once `finished` is set to `true`, no further frames will be produced.
175/// - Typically used with an iterator-style API (`next()` or similar).
176pub struct FrameReaderOwned<R> {
177    rd: R,
178    finished: bool,
179}
180
181impl<R> FrameReaderOwned<R> {
182    pub fn new(rd: R) -> Self {
183        FrameReaderOwned {
184            rd,
185            finished: false,
186        }
187    }
188}
189
190impl<R> Iterator for FrameReaderOwned<R>
191where
192    R: BufRead,
193{
194    type Item = Result<Frame, ExtxyzError>;
195
196    fn next(&mut self) -> Option<Self::Item> {
197        let mut rd = FrameReader {
198            rd: &mut self.rd,
199            finished: self.finished,
200        };
201
202        let out = rd.next();
203        self.finished = rd.finished;
204        out
205    }
206}
207
208/// Reads a single frame from a buffered reader in extended XYZ format.
209///
210/// This function attempts to parse exactly one [`Frame`] from the given reader.
211/// It delegates the parsing to an internal implementation and returns the first
212/// successfully parsed frame.
213///
214/// # Type Parameters
215/// - `R`: A type that implements [`BufRead`], such as `BufReader<File>` or
216///   an in-memory buffer.
217///
218/// # Arguments
219/// - `rd`: A mutable reference to a buffered reader to read from.
220///
221/// # Returns
222/// - `Ok(Frame)` if a frame is successfully parsed.
223/// - `Err(ExtxyzError)` if parsing fails or no frame can be read.
224///
225/// # Errors
226/// Returns an [`ExtxyzError`] in the following cases:
227/// - If an I/O error occurs while reading from the input.
228/// - If the input does not contain a valid frame in extended XYZ format.
229/// - If the end of input is reached before any frame could be parsed
230///   (`UnexpectedEof`).
231///
232/// # Notes
233/// - This function reads from the current position of the reader and may
234///   consume input.
235/// - If no frame is found, an `UnexpectedEof` error is returned rather than
236///   `Ok(None)`.
237///
238/// # Example
239/// ```no_run
240/// use std::fs::File;
241/// use std::io::BufReader;
242/// use extxyz::read_frame;
243///
244/// let file = File::open("input.xyz").unwrap();
245/// let mut reader = BufReader::new(file);
246///
247/// let frame = read_frame(&mut reader).unwrap();
248/// ```
249pub fn read_frame<R>(rd: &mut R) -> Result<Frame, ExtxyzError>
250where
251    R: BufRead,
252{
253    let Some(frame) = _read_frame_native_new(rd, None)? else {
254        return Err(ExtxyzError::Io(io::Error::new(
255            io::ErrorKind::UnexpectedEof,
256            "does not parse anything from reader",
257        )));
258    };
259    Ok(frame)
260}
261
262pub(crate) fn _read_frame_native_new<R>(
263    rd: &mut R,
264    comment_override: Option<&str>,
265) -> io::Result<Option<Frame>>
266where
267    R: BufRead,
268{
269    let mut maybe_natoms_line = String::new();
270    rd.read_line(&mut maybe_natoms_line)?;
271    if maybe_natoms_line.is_empty() {
272        return Ok(None);
273    }
274
275    // parse number of lines
276    let natoms_line_as_bytes = maybe_natoms_line.as_bytes();
277    let (_, natoms) = parse_natoms(natoms_line_as_bytes).map_err(|e| {
278        let es = match e {
279            nom::Err::Incomplete(_) => "nom incomplete streaming".to_string(),
280            nom::Err::Error(err) | nom::Err::Failure(err) => {
281                format!(
282                    "{:?}: {}",
283                    err.code,
284                    str::from_utf8(err.input).unwrap_or("unrecognized u8 input")
285                )
286            }
287        };
288        io::Error::new(io::ErrorKind::InvalidData, es)
289    })?;
290
291    let mut maybe_info_line = String::new();
292    rd.read_line(&mut maybe_info_line)?;
293    if maybe_info_line.is_empty() {
294        return Ok(None);
295    }
296
297    let info_line_as_bytes = maybe_info_line.as_bytes();
298    let (_, (info, prop_shape)) = parse_info(info_line_as_bytes).map_err(|e| {
299        let es = match e {
300            nom::Err::Incomplete(_) => "nom incomplete streaming".to_string(),
301            nom::Err::Error(err) | nom::Err::Failure(err) => {
302                format!(
303                    "{:?}: {}",
304                    err.code,
305                    str::from_utf8(err.input).unwrap_or("unrecognized u8 input")
306                )
307            }
308        };
309        io::Error::new(io::ErrorKind::InvalidData, es)
310    })?;
311
312    // init the arrs from the shape, in order to avoid innermiddle allocation
313    let mut arrs: Vec<(String, Value)> = prop_shape
314        .iter()
315        .map(|(name, ty, n)| {
316            let value = match (ty, n) {
317                (Ty::I, 1) => Value::VecInteger(Vec::with_capacity(natoms), natoms as u32),
318                (Ty::R, 1) => Value::VecFloat(Vec::with_capacity(natoms), natoms as u32),
319                (Ty::L, 1) => Value::VecBool(Vec::with_capacity(natoms), natoms as u32),
320                (Ty::S, 1) => Value::VecText(Vec::with_capacity(natoms), natoms as u32),
321
322                (Ty::I, nc) => {
323                    Value::MatrixInteger(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
324                }
325                (Ty::R, nc) => {
326                    Value::MatrixFloat(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
327                }
328                (Ty::L, nc) => {
329                    Value::MatrixBool(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
330                }
331                (Ty::S, nc) => {
332                    Value::MatrixText(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
333                }
334            };
335
336            (name.to_string(), value)
337        })
338        .collect();
339
340    let mut natoms_to_read = natoms;
341    // TODO: validate natoms and number of rows of the arr
342    loop {
343        let buf = rd.fill_buf()?;
344        if buf.is_empty() {
345            return Err(io::Error::new(
346                io::ErrorKind::UnexpectedEof,
347                "EOF reached before parsing frame",
348            ));
349        }
350
351        match parse_xyz_by_lines(buf, natoms_to_read, &prop_shape, &mut arrs) {
352            Ok((remain, nat)) => {
353                let len_read = buf.len() - remain.len();
354                rd.consume(len_read);
355
356                natoms_to_read -= nat;
357                if natoms_to_read == 0 {
358                    break;
359                } else if natoms_to_read > 0 {
360                    continue;
361                } else {
362                    // < 0
363                    return Err(io::Error::new(
364                        io::ErrorKind::InvalidData,
365                        "too many atoms than expected",
366                    ));
367                }
368            }
369            Err(e) => {
370                let es = match e {
371                    nom::Err::Incomplete(_) => "nom incomplete streaming".to_string(),
372                    nom::Err::Error(err) | nom::Err::Failure(err) => {
373                        format!(
374                            "{:?}: {}",
375                            err.code,
376                            str::from_utf8(err.input).unwrap_or("unrecognized u8 input")
377                        )
378                    }
379                };
380                return Err(io::Error::new(io::ErrorKind::InvalidData, es));
381            }
382        }
383    }
384
385    let mut frame = Frame::new(natoms as u32, info, arrs);
386
387    // // any remain spaces
388    // let (input, _) = multispace0::<_, nom::error::Error<&[u8]>>(input)
389    //     .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
390
391    if let Some(comment) = comment_override {
392        frame.set_comment(comment);
393    }
394
395    Ok(Some(frame))
396}
397
398// XXX: can I lru cache this call? this can be useful when parsing frames because the same info
399// lines will be kept on parsed again and again.
400fn key_value(inp: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
401    let (inp, (k, v)) = separated_pair(
402        delimited(
403            multispace0,
404            verify(
405                take_while1(|c: u8| c != b'=' && !c.is_ascii_whitespace()),
406                |s: &[u8]| recognize_kv_left(s).is_ok(),
407            ),
408            multispace0,
409        ),
410        tag(&b"="[..]),
411        delimited(multispace0, recognize_kv_right, multispace0),
412    )
413    .parse(inp)?;
414    Ok((inp, (k, v)))
415}
416
417fn is_ident_char(c: u8) -> bool {
418    c.is_ascii_alphanumeric() || c == b'_'
419}
420
421fn recognize_int(inp: &[u8]) -> IResult<&[u8], &[u8]> {
422    terminated(
423        recognize(complete::i32),
424        peek(not(take_while1(is_ident_char))),
425    )
426    .parse(inp)
427}
428
429// i32
430fn parse_int(inp: &[u8]) -> IResult<&[u8], Value> {
431    map_res(recognize_int, |bytes: &[u8]| {
432        let s = std::str::from_utf8(bytes)
433            .map_err(|_| nom::error::Error::new(bytes, nom::error::ErrorKind::Char))?;
434        let i = s
435            .parse::<i32>()
436            .map_err(|_| nom::error::Error::new(bytes, nom::error::ErrorKind::Digit))?;
437        Ok::<Value, nom::error::Error<&[u8]>>(Value::Integer(i.into()))
438    })
439    .parse(inp)
440}
441
442fn recognize_float(inp: &[u8]) -> IResult<&[u8], &[u8]> {
443    // number::complete::double will parse an integer into a float, this is what I don't want
444    // I parse twice here, using recognize_float_parts to get the fraction part and error out if it
445    // is a pure integer.
446    // More performant one is reimplement number::complete::double, but I dont bother do it.
447    let (inp_, (_, _, fraction, _)) = number::complete::recognize_float_parts(inp)?;
448    if fraction.is_empty() {
449        return Err(nom::Err::Error(nom::error::Error::new(
450            inp_,
451            nom::error::ErrorKind::Float,
452        )));
453    }
454    let len = inp.len() - inp_.len();
455    Ok((inp_, &inp[..len]))
456}
457
458fn parse_float(inp: &[u8]) -> IResult<&[u8], Value> {
459    let (remain, inp) = recognize_float.parse(inp)?;
460    let (_, float) = number::complete::double
461        .map(|i| Value::Float(i.into()))
462        .parse(inp)?;
463
464    Ok((remain, float))
465}
466
467fn parse_bool(inp: &[u8]) -> IResult<&[u8], Value> {
468    // T or F or [tT]rue or [fF]alse or TRUE or FALSE
469    alt((
470        tag("true").map(|_| Value::Bool(true.into())),
471        tag("false").map(|_| Value::Bool(false.into())),
472        tag("True").map(|_| Value::Bool(true.into())),
473        tag("False").map(|_| Value::Bool(false.into())),
474        tag("TRUE").map(|_| Value::Bool(true.into())),
475        tag("FALSE").map(|_| Value::Bool(false.into())),
476        tag("T").map(|_| Value::Bool(true.into())),
477        tag("F").map(|_| Value::Bool(false.into())),
478    ))
479    .parse(inp)
480}
481
482fn recognize_bool(inp: &[u8]) -> IResult<&[u8], &[u8]> {
483    // XXX: should to v.v
484    recognize(parse_bool).parse(inp)
485}
486
487fn parse_bare_str(inp: &[u8]) -> IResult<&[u8], Value> {
488    let (remain, inp) = recognize_bare_str.parse(inp)?;
489    let s = String::from_utf8(inp.to_vec()).map_err(|_| {
490        nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
491    })?;
492    Ok((remain, Value::Str(Text::from(s))))
493}
494
495fn recognize_bare_str(inp: &[u8]) -> IResult<&[u8], &[u8]> {
496    let (linp, s) = take_while1(|c: u8| is_ident_char(c)).parse(inp)?;
497    if !s[0].is_ascii_alphanumeric() && s[0] != b'_' {
498        return Err(nom::Err::Error(nom::error::Error::new(
499            linp,
500            nom::error::ErrorKind::Verify,
501        )));
502    }
503    let len = inp.len() - linp.len();
504    Ok((linp, &inp[..len]))
505}
506
507fn parse_quote_str(inp: &[u8]) -> IResult<&[u8], Value> {
508    let parse_inner = map_res(
509        many0(alt((
510            take_while1(|b| b != b'\\' && b != b'"'),
511            map(tag(r#"\""#), |_| b"\"".as_ref()),
512            map(tag(r#"\\"#), |_| b"\\".as_ref()),
513            map(tag(r#"\n"#), |_| b"\n".as_ref()),
514        ))),
515        |chunks: Vec<&[u8]>| {
516            let s = chunks.concat();
517            String::from_utf8(s).map(|s| Value::Str(Text::from(s)))
518        },
519    );
520
521    let (inp, xx) = delimited(tag(b"\"".as_ref()), parse_inner, tag(b"\"".as_ref())).parse(inp)?;
522    Ok((inp, xx))
523}
524
525fn parse_bare_properties_str(inp: &[u8]) -> IResult<&[u8], Value> {
526    let (remain, inp) = take_while1(|c: u8| {
527        c.is_ascii_alphanumeric() || c == b'_' || c == b':' || c == b'@' || c == b'/'
528    })
529    .parse(inp)?;
530    let s = String::from_utf8(inp.to_vec()).map_err(|_| {
531        nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
532    })?;
533    let v = Value::Str(Text::from(s));
534    Ok((remain, v))
535}
536
537fn parse_kv_right(inp: &[u8]) -> IResult<&[u8], Value> {
538    // order conform with the spec, see README for spec definition.
539    alt((
540        parse_2d_array,
541        // float before int, to avoid 3.14 -> 3
542        parse_float,
543        parse_int,
544        // bool comes before str, to avoid boll true -> str "true"
545        parse_bool,
546        // quete_str first because it is most picky to parse, must have "" around
547        parse_quote_str,
548        // quotet_properties_str is included in parse_quote_str
549        parse_bare_properties_str,
550        parse_bare_str,
551    ))
552    .parse(inp)
553}
554
555// left part of kv, i.e. the key part, which need to be a parsable string.
556fn parse_kv_left(inp: &[u8]) -> IResult<&[u8], Value> {
557    alt((parse_bare_str, parse_quote_str)).parse(inp)
558}
559
560fn recognize_kv_right(inp: &[u8]) -> IResult<&[u8], &[u8]> {
561    recognize(parse_kv_right).parse(inp)
562}
563
564fn recognize_kv_left(inp: &[u8]) -> IResult<&[u8], &[u8]> {
565    recognize(parse_kv_left).parse(inp)
566}
567
568fn parse_2d_arr_3x3_flatten(inp: &[u8]) -> IResult<&[u8], Value> {
569    let (inp, mut vals) = separated_list0(space1, parse_kv_right).parse(inp)?;
570    if vals.len() != 9 {
571        return Err(nom::Err::Failure(nom::error::Error::new(
572            inp,
573            nom::error::ErrorKind::Verify,
574        )));
575    }
576    promote_values_1d(&mut vals).map_err(|_| {
577        nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
578    })?;
579
580    match &vals[0] {
581        Value::Integer(_) => {
582            let vals = vals
583                .into_iter()
584                .map(|v| v.as_integer().expect("not an integer"))
585                .collect::<Vec<_>>();
586            let row1 = vec![vals[0], vals[3], vals[6]];
587            let row2 = vec![vals[1], vals[4], vals[7]];
588            let row3 = vec![vals[2], vals[5], vals[8]];
589            let vs = vec![row1, row2, row3];
590            Ok((inp, Value::MatrixInteger(vs, (3, 3))))
591        }
592        Value::Float(_) => {
593            let vals = vals
594                .into_iter()
595                .map(|v| v.as_float().expect("not a float"))
596                .collect::<Vec<_>>();
597            let row1 = vec![vals[0], vals[3], vals[6]];
598            let row2 = vec![vals[1], vals[4], vals[7]];
599            let row3 = vec![vals[2], vals[5], vals[8]];
600            let vs = vec![row1, row2, row3];
601            Ok((inp, Value::MatrixFloat(vs, (3, 3))))
602        }
603        _ => Err(nom::Err::Failure(nom::error::Error::new(
604            inp,
605            nom::error::ErrorKind::Verify,
606        ))),
607    }
608}
609
610fn parse_2d_array(inp: &[u8]) -> IResult<&[u8], Value> {
611    let (inp_, vals) = delimited(
612        tag(b"[".as_ref()),
613        separated_list0(
614            tag(b",".as_ref()),
615            delimited(multispace0, parse_1d_array, multispace0),
616        ),
617        tag(b"]".as_ref()),
618    )
619    .parse(inp)?;
620
621    debug_assert!(!vals.is_empty());
622
623    match &vals[0] {
624        Value::VecInteger(_, nc) => {
625            let nc = *nc;
626            let nr = vals.len();
627            let vs = vals
628                .into_iter()
629                .map(|v| {
630                    let Value::VecInteger(i, x) = v else {
631                        unreachable!()
632                    };
633                    debug_assert_eq!(x, nc);
634                    i
635                })
636                .collect::<Vec<_>>();
637            Ok((inp_, Value::MatrixInteger(vs, (nr as u32, nc))))
638        }
639        Value::VecFloat(_, nc) => {
640            let nc = *nc;
641            let nr = vals.len();
642            let vs = vals
643                .into_iter()
644                .map(|v| {
645                    let Value::VecFloat(i, x) = v else {
646                        unreachable!()
647                    };
648                    debug_assert_eq!(x, nc);
649                    i
650                })
651                .collect::<Vec<_>>();
652            Ok((inp_, Value::MatrixFloat(vs, (nr as u32, nc))))
653        }
654        Value::VecBool(_, nc) => {
655            let nc = *nc;
656            let nr = vals.len();
657            let vs = vals
658                .into_iter()
659                .map(|v| {
660                    let Value::VecBool(i, x) = v else {
661                        unreachable!()
662                    };
663                    debug_assert_eq!(x, nc);
664                    i
665                })
666                .collect::<Vec<_>>();
667            Ok((inp_, Value::MatrixBool(vs, (nr as u32, nc))))
668        }
669        Value::VecText(_, nc) => {
670            let nc = *nc;
671            let nr = vals.len();
672            let vs = vals
673                .into_iter()
674                .map(|v| {
675                    let Value::VecText(i, x) = v else {
676                        unreachable!()
677                    };
678                    debug_assert_eq!(x, nc);
679                    i
680                })
681                .collect::<Vec<_>>();
682            Ok((inp_, Value::MatrixText(vs, (nr as u32, nc))))
683        }
684        _ => unreachable!(),
685    }
686}
687
688// fn recognize_2d_array(inp: &[u8]) -> IResult<&[u8], &[u8]> {
689//     recognize(parse_2d_array).parse(inp)
690// }
691
692fn parse_1d_array(inp: &[u8]) -> IResult<&[u8], Value> {
693    let (inp_, mut vals) = delimited(
694        tag(b"[".as_ref()),
695        separated_list0(
696            tag(b",".as_ref()),
697            delimited(multispace0, parse_kv_right, multispace0),
698        ),
699        tag(b"]".as_ref()),
700    )
701    .parse(inp)?;
702
703    debug_assert!(!vals.is_empty());
704
705    // promote by single rule:
706    // only int -> float when mixed, all other mixture will fail.
707    promote_values_1d(&mut vals).map_err(|_| {
708        nom::Err::Failure(nom::error::Error::new(inp_, nom::error::ErrorKind::Verify))
709    })?;
710
711    match &vals[0] {
712        Value::Integer(_) => {
713            let n = vals.len();
714            let vs = vals
715                .into_iter()
716                .map(|v| {
717                    let Value::Integer(i) = v else { unreachable!() };
718                    i
719                })
720                .collect::<Vec<_>>();
721            Ok((inp_, Value::VecInteger(vs, n as u32)))
722        }
723        Value::Float(_) => {
724            let n = vals.len();
725            let vs = vals
726                .into_iter()
727                .map(|v| {
728                    let Value::Float(i) = v else { unreachable!() };
729                    i
730                })
731                .collect::<Vec<_>>();
732            Ok((inp_, Value::VecFloat(vs, n as u32)))
733        }
734        Value::Bool(_) => {
735            let n = vals.len();
736            let vs = vals
737                .into_iter()
738                .map(|v| {
739                    let Value::Bool(i) = v else { unreachable!() };
740                    i
741                })
742                .collect::<Vec<_>>();
743            Ok((inp_, Value::VecBool(vs, n as u32)))
744        }
745        Value::Str(_) => {
746            let n = vals.len();
747            let vs = vals
748                .into_iter()
749                .map(|v| {
750                    let Value::Str(i) = v else { unreachable!() };
751                    i
752                })
753                .collect::<Vec<_>>();
754            Ok((inp_, Value::VecText(vs, n as u32)))
755        }
756        // safe unreachable: because all branches are ruled out in promote_values_1d call.
757        _ => unreachable!(),
758    }
759}
760
761// fn recognize_1d_array(inp: &[u8]) -> IResult<&[u8], &[u8]> {
762//     recognize(parse_1d_array).parse(inp)
763// }
764
765#[derive(Debug)]
766struct InnerParseError;
767
768impl std::fmt::Display for InnerParseError {
769    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
770        write!(f, "inner parse error")
771    }
772}
773
774impl std::error::Error for InnerParseError {}
775
776fn promote_values_1d(vals: &mut [Value]) -> Result<(), InnerParseError> {
777    if vals.is_empty() {
778        return Ok(());
779    }
780
781    if vals.iter().any(|v| {
782        matches!(
783            v,
784            Value::VecBool(_, _)
785                | Value::VecText(_, _)
786                | Value::VecFloat(_, _)
787                | Value::VecInteger(_, _)
788                | Value::MatrixBool(_, _)
789                | Value::MatrixText(_, _)
790                | Value::MatrixFloat(_, _)
791                | Value::MatrixInteger(_, _)
792                | Value::Unsupported
793        )
794    }) {
795        return Err(InnerParseError);
796    }
797
798    let has_bool = vals.iter().any(|v| matches!(v, Value::Bool(_)));
799    let has_float = vals.iter().any(|v| matches!(v, Value::Float(_)));
800    let has_str = vals.iter().any(|v| matches!(v, Value::Str(_)));
801    let has_int = vals.iter().any(|v| matches!(v, Value::Integer(_)));
802
803    match (has_int, has_float, has_bool, has_str) {
804        // homogeneous types in array, no promotion needed
805        (true, false, false, false)
806        | (false, true, false, false)
807        | (false, false, true, false)
808        | (false, false, false, true) => Ok(()),
809        // int and float in array, promote all int to float
810        (true, true, false, false) => {
811            vals.iter_mut().for_each(|v| {
812                if let Value::Integer(i) = v {
813                    *v = Value::Float(FloatNum::from(f64::from(**i)));
814                }
815            });
816            Ok(())
817        }
818        // error out if more mixture types
819        (true, true, true, true)
820        | (true, true, true, false)
821        | (true, true, false, true)
822        | (true, false, true, true)
823        | (true, false, true, false)
824        | (true, false, false, true)
825        | (false, true, true, true)
826        | (false, true, true, false)
827        | (false, true, false, true)
828        | (false, false, true, true) => Err(InnerParseError),
829        (false, false, false, false) => unreachable!(),
830    }
831}
832
833#[allow(clippy::type_complexity)]
834fn parse_info_line(inp: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
835    let (inp, kv) = delimited(
836        multispace0,
837        all_consuming(separated_list1(space0, key_value)),
838        multispace0,
839    )
840    .parse(inp)?;
841    Ok((inp, kv))
842}
843
844#[allow(clippy::type_complexity)]
845fn parse_no_equal_sign_line(inp: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
846    let (inp, ln) = take_while1(|c: u8| c != b'=').parse(inp)?;
847    Ok((inp, vec![(&b"comment"[..], ln)]))
848}
849
850#[derive(Debug, Hash, PartialEq, Eq)]
851enum Ty {
852    // integer
853    I,
854    // Real
855    R,
856    // Logic
857    L,
858    // String
859    S,
860}
861
862type PropShape<'a> = Vec<(&'a str, Ty, u8)>;
863
864fn parse_properties<'a>(inp: &'a [u8]) -> IResult<&'a [u8], PropShape<'a>> {
865    // into triple elements chunk
866    let (inp_, segments) =
867        separated_list1(tag(b":".as_ref()), take_while1(|c: u8| c != b':')).parse(inp)?;
868
869    if segments.len() % 3 != 0 {
870        // TODO: verbose context error
871        return Err(nom::Err::Failure(nom::error::Error::new(
872            inp,
873            nom::error::ErrorKind::Verify,
874        )));
875    }
876
877    // TODO: check key name should not duplicate, because that is the name as keys for arrs
878    let mut mp = Vec::new();
879    for chunk in segments.chunks(3) {
880        let id = chunk[0];
881        let ty = match chunk[1] {
882            b"I" => Ty::I,
883            b"R" => Ty::R,
884            b"L" => Ty::L,
885            b"S" => Ty::S,
886            _ => {
887                // TODO: verbose context error
888                return Err(nom::Err::Failure(nom::error::Error::new(
889                    inp,
890                    nom::error::ErrorKind::Verify,
891                )));
892            }
893        };
894        let nc = str::from_utf8(chunk[2])
895            .map_err(|_| {
896                nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
897            })?
898            .parse::<u8>()
899            .map_err(|_| {
900                nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
901            })?;
902
903        let id = str::from_utf8(id).unwrap();
904        mp.push((id, ty, nc));
905    }
906    Ok((inp_, mp))
907}
908
909type TypInfo = Vec<(String, Value)>;
910type TypPropShape<'a> = Vec<(&'a str, Ty, u8)>;
911
912fn parse_info<'a>(input: &'a [u8]) -> IResult<&'a [u8], (TypInfo, TypPropShape<'a>)> {
913    let (input, line) = terminated(
914        nom::bytes::complete::take_until(&b"\n"[..]),
915        complete::newline,
916    )
917    .parse(input)?;
918
919    let (_, info_kv) = alt((
920        all_consuming(parse_info_line),
921        all_consuming(parse_no_equal_sign_line),
922    ))
923    .parse(line)?;
924
925    // use BTreeMap so the info is stored in order
926    let mut kv = BTreeMap::new();
927
928    for (k, v) in info_kv {
929        let old_val = kv.insert(k, v);
930        // fatal when key duplicate in the info line
931        // TODO: verbose context error
932        if old_val.is_some() {
933            return Err(nom::Err::Failure(nom::error::Error::new(
934                k,
935                nom::error::ErrorKind::Verify,
936            )));
937        }
938    }
939
940    // TODO: check "properties"/"property"/"Property" and raise error with help message.
941    // TODO: check "lattice" and raise error with help message.
942    //
943    // The default (fallback) shape is: Properties=species:S:1:pos:R:3
944    let prop_shape = kv
945        .remove("Properties".as_bytes())
946        .unwrap_or(b"species:S:1:pos:R:3");
947
948    let utf8_str = str::from_utf8(prop_shape).map_err(|_| {
949        nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Float))
950    })?;
951    let prop_shape_value = Value::Str(Text::from(utf8_str));
952    let (_, prop_shape) = parse_properties(prop_shape)?;
953
954    let maybe_latt = kv.remove("Lattice".as_bytes());
955
956    // XXX: comment, latt and prop_shape better to be stored separatly from pure_kv
957    let mut info = Vec::with_capacity(kv.len() + 2);
958    for (k, v) in kv {
959        if k == &b"comment"[..] {
960            let utf8_str = str::from_utf8(v).map_err(|_| {
961                nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify))
962            })?;
963            info.push(("comment".to_string(), Value::Str(Text::from(utf8_str))));
964        } else {
965            let (_, v) = parse_kv_right(v)?;
966            info.push((String::from_utf8(k.to_vec()).expect("utf8"), v));
967        }
968    }
969
970    // TODO: latt can be a matrix wrapped inside a pair of double quotes, test it
971    if let Some(latt) = maybe_latt {
972        let opt_quote_parse_2d_array = delimited(
973            opt(tag(b"\"".as_ref())),
974            parse_2d_array,
975            opt(tag(b"\"".as_ref())),
976        );
977        let opt_quote_parse_2d_arr_3x3_flatten = delimited(
978            opt(tag(b"\"".as_ref())),
979            parse_2d_arr_3x3_flatten,
980            opt(tag(b"\"".as_ref())),
981        );
982        let (_, latt) =
983            alt((opt_quote_parse_2d_array, opt_quote_parse_2d_arr_3x3_flatten)).parse(latt)?;
984        info.push(("Lattice".to_string(), latt));
985    }
986    info.push(("Properties".to_string(), prop_shape_value));
987    Ok((input, (info, prop_shape)))
988}
989
990fn parse_natoms(input: &[u8]) -> IResult<&[u8], usize> {
991    let (input, _) = complete::multispace0(input)?;
992    let (input, natoms) = map_res(complete::digit1, |digits: &[u8]| {
993        let s = std::str::from_utf8(digits).expect("digit1 expect ASCII");
994        s.parse::<usize>()
995    })
996    .parse(input)?;
997    let (input, _) = complete::multispace0(input)?;
998    Ok((input, natoms))
999}
1000
1001fn parse_xyz_by_lines<'a>(
1002    input: &'a [u8],
1003    natoms_to_read: usize,
1004    prop_shape: &Vec<(&'a str, Ty, u8)>,
1005    arrs: &mut [(String, Value)],
1006) -> IResult<&'a [u8], usize> {
1007    let mut nat = 0;
1008    let mut proc_input = input;
1009    while !input.is_empty() && nat < natoms_to_read {
1010        let res = terminated(
1011            nom::bytes::streaming::take_until(&b"\n"[..]),
1012            streaming::newline,
1013        )
1014        .parse(proc_input);
1015
1016        let (rest, line) = match res {
1017            Ok((rest, line)) => (rest, line),
1018            Err(nom::Err::Incomplete(_)) => {
1019                return Ok((input, nat));
1020            }
1021            Err(err) => return Err(err),
1022        };
1023        proc_input = rest;
1024
1025        let (_, mut vs_raw) = delimited(
1026            multispace0,
1027            separated_list1(
1028                space1,
1029                alt((
1030                    recognize_float,
1031                    recognize_int,
1032                    recognize_bool,
1033                    // string is the least special element, so parsed in the end
1034                    recognize_bare_str,
1035                )),
1036            ),
1037            multispace0,
1038        )
1039        .parse(line)?;
1040
1041        let mut loc = 0;
1042        for ((_, ty, n), (_, ref mut arr)) in prop_shape.iter().zip(arrs.iter_mut()) {
1043            match (ty, n, arr) {
1044                (_, 0, _) => unreachable!(),
1045                (Ty::I, 1, Value::VecInteger(v, _)) => {
1046                    let x = std::mem::take(&mut vs_raw[loc]);
1047                    let (_, x) = parse_int(x).expect("parse int");
1048                    let Value::Integer(x) = x else { unreachable!() };
1049                    v.push(x);
1050                    loc += 1;
1051                }
1052                (Ty::R, 1, Value::VecFloat(v, _)) => {
1053                    let x = std::mem::take(&mut vs_raw[loc]);
1054                    let (_, x) = parse_float(x).expect("parse float");
1055                    let Value::Float(x) = x else { unreachable!() };
1056                    v.push(x);
1057                    loc += 1;
1058                }
1059                (Ty::L, 1, Value::VecBool(v, _)) => {
1060                    let x = std::mem::take(&mut vs_raw[loc]);
1061                    let (_, x) = parse_bool(x).expect("parse bool");
1062                    let Value::Bool(x) = x else { unreachable!() };
1063                    v.push(x);
1064                    loc += 1;
1065                }
1066                (Ty::S, 1, Value::VecText(v, _)) => {
1067                    let x = std::mem::take(&mut vs_raw[loc]);
1068                    let (_, x) = parse_bare_str(x).expect("parse str");
1069                    let Value::Str(x) = x else { unreachable!() };
1070                    v.push(x);
1071                    loc += 1;
1072                }
1073                (Ty::I, nc, Value::MatrixInteger(m, _)) => {
1074                    let vv = vs_raw[loc..(loc + *nc as usize)]
1075                        .iter()
1076                        .map(|x| {
1077                            let (_, x) = parse_int(x).expect("parse float");
1078                            let Value::Integer(x) = x else { unreachable!() };
1079                            x
1080                        })
1081                        .collect::<Vec<_>>();
1082                    m.push(vv);
1083                    loc += *nc as usize;
1084                }
1085                (Ty::R, nc, Value::MatrixFloat(m, _)) => {
1086                    let vv = vs_raw[loc..(loc + *nc as usize)]
1087                        .iter()
1088                        .map(|x| {
1089                            let (_, x) = parse_float(x).expect("parse float");
1090                            let Value::Float(x) = x else { unreachable!() };
1091                            x
1092                        })
1093                        .collect::<Vec<_>>();
1094                    m.push(vv);
1095                    loc += *nc as usize;
1096                }
1097                (Ty::L, nc, Value::MatrixBool(m, _)) => {
1098                    let vv = vs_raw[loc..(loc + *nc as usize)]
1099                        .iter()
1100                        .map(|x| {
1101                            let (_, x) = parse_bool(x).expect("parse float");
1102                            let Value::Bool(x) = x else { unreachable!() };
1103                            x
1104                        })
1105                        .collect::<Vec<_>>();
1106                    m.push(vv);
1107                    loc += *nc as usize;
1108                }
1109                (Ty::S, nc, Value::MatrixText(m, _)) => {
1110                    let vv = vs_raw[loc..(loc + *nc as usize)]
1111                        .iter()
1112                        .map(|x| {
1113                            let (_, mut x) = parse_bare_str(x).expect("parse float");
1114                            let Value::Str(x) = std::mem::take(&mut x) else {
1115                                unreachable!()
1116                            };
1117                            x
1118                        })
1119                        .collect::<Vec<_>>();
1120                    m.push(vv);
1121                    loc += *nc as usize;
1122                }
1123                _ => unreachable!(),
1124            }
1125        }
1126
1127        nat += 1;
1128    }
1129
1130    Ok((proc_input, nat))
1131}
1132
1133#[cfg(test)]
1134mod tests {
1135    use std::io::Cursor;
1136
1137    use extxyz_types::{Boolean, Integer};
1138
1139    use crate::write_frame;
1140
1141    use super::*;
1142
1143    #[test]
1144    fn test_parse_properties() {
1145        let expect = b"species:S:1:pos:R:3";
1146        let (_, prop) = parse_properties(expect).unwrap();
1147        assert_eq!(prop[0], ("species", Ty::S, 1));
1148        assert_eq!(prop[1], ("pos", Ty::R, 3));
1149
1150        // TODO: if size is 0, raise parsing error
1151    }
1152
1153    #[test]
1154    fn test_promote_values_1d() {
1155        let mut vals = [];
1156        promote_values_1d(&mut vals).unwrap();
1157
1158        assert!(vals.is_empty());
1159
1160        let mut vals = [
1161            Value::Float(FloatNum::from(0.0)),
1162            Value::Float(FloatNum::from(0.0)),
1163            Value::Integer(Integer::from(1)),
1164        ];
1165        promote_values_1d(&mut vals).unwrap();
1166
1167        assert!(matches!(vals[2], Value::Float(_)));
1168
1169        let mut vals = [
1170            Value::Float(FloatNum::from(0.0)),
1171            Value::Float(FloatNum::from(0.0)),
1172            Value::Bool(Boolean::from(true)),
1173        ];
1174        assert!(promote_values_1d(&mut vals).is_err());
1175    }
1176
1177    #[test]
1178    fn test_parse_1d_array() {
1179        let arr = b"[0,1]";
1180        let (_, val) = parse_1d_array(arr).unwrap();
1181        let Value::VecInteger(vs, 2) = val else {
1182            panic!("not a VecInteger")
1183        };
1184        assert_eq!(*vs[0], 0);
1185        assert_eq!(*vs[1], 1);
1186
1187        let valid_expects: &[&[u8]] = &[
1188            b"[0.1, 0.2, 0]",
1189            // TODO: should support extra trailing ',' in the end of array
1190            // b"[0.1, 0.2, 0,]",
1191            b"[ 0.1, 0.2, 0.0]",
1192            b"[0.1, \t0.2, 0.0]",
1193            b"[0.1, 0.2,      0]",
1194            b"[0.1  , 0.2   , 0.0    ]",
1195        ];
1196        for expect in valid_expects {
1197            let (_, val) = parse_1d_array(expect).unwrap();
1198            let Value::VecFloat(vs, 3) = val else {
1199                panic!("not a VecFloat")
1200            };
1201            assert_eq!(*vs[0], 0.1);
1202            assert_eq!(*vs[1], 0.2);
1203            assert_eq!(*vs[2], 0.0);
1204        }
1205    }
1206
1207    #[test]
1208    fn test_parse_2d_array() {
1209        let valid_expects: &[&[u8]] = &[
1210            b"[[-0,1],[2,2],[10,-1]]",
1211            b"[ [  -0, 1], \t[2,  2], [   10, -1]]",
1212            b"[[-0, 1  ], [ 2  , 2], [10 , -1]]",
1213            b"[[-0    \t , 1], [2, 2], [10, -1]]",
1214        ];
1215        for expect in valid_expects {
1216            let (_, val) = parse_2d_array(expect).unwrap();
1217            let Value::MatrixInteger(ms, (3, 2)) = val else {
1218                panic!("not a MatrixInteger")
1219            };
1220            assert_eq!(*ms[0][0], 0);
1221            assert_eq!(*ms[0][1], 1);
1222            assert_eq!(*ms[1][0], 2);
1223            assert_eq!(*ms[1][1], 2);
1224            assert_eq!(*ms[2][0], 10);
1225            assert_eq!(*ms[2][1], -1);
1226        }
1227
1228        // TODO: test array of other types
1229    }
1230
1231    #[test]
1232    fn test_parse_info_line_default() {
1233        let valid_expects: &[&[u8]] = &[
1234            b"key1=aa key2=bb",
1235            b"  key1=aa key2=bb",
1236            b"  key1=aa key2=bb  ",
1237            b"key1=aa  \t \t  key2=bb",
1238            b" key1 =aa key2=bb",
1239            b" key1= aa key2 =bb",
1240            b" key1  =  aa key2  =  bb",
1241            // TODO: move to final value test
1242            // b"key1= \"aa\" key2  =  \"bb\"",
1243        ];
1244        for expect in valid_expects {
1245            let (remain, v) = parse_info_line(expect).unwrap();
1246            assert!(remain.is_empty());
1247            assert_eq!(
1248                format!(
1249                    "{}={}",
1250                    str::from_utf8(v[0].0).unwrap(),
1251                    str::from_utf8(v[0].1).unwrap()
1252                ),
1253                "key1=aa".to_string()
1254            );
1255            assert_eq!(
1256                format!(
1257                    "{}={}",
1258                    str::from_utf8(v[1].0).unwrap(),
1259                    str::from_utf8(v[1].1).unwrap()
1260                ),
1261                "key2=bb".to_string()
1262            );
1263        }
1264    }
1265
1266    #[test]
1267    fn test_parse_info_line_with_array() {
1268        let valid_expects: &[&[u8]] = &[
1269            b"key1=aa key2=bb Lattice=[[0,0,0],[10,4,4]]",
1270            b"key1=aa key2=bb Lattice=[[ 0,0 ,0],[10, 4,4]]",
1271            b"key1=aa key2=bb Lattice=[[0,0,0], [10,4,4]]",
1272        ];
1273        for expect in valid_expects {
1274            let (remain, v) = parse_info_line(expect).unwrap();
1275            assert!(remain.is_empty());
1276            assert_eq!(
1277                format!(
1278                    "{}={}",
1279                    str::from_utf8(v[0].0).unwrap(),
1280                    str::from_utf8(v[0].1).unwrap()
1281                ),
1282                "key1=aa".to_string()
1283            );
1284            assert_eq!(
1285                format!(
1286                    "{}={}",
1287                    str::from_utf8(v[1].0).unwrap(),
1288                    str::from_utf8(v[1].1).unwrap()
1289                ),
1290                "key2=bb".to_string()
1291            );
1292            assert_eq!(
1293                format!(
1294                    "{}={}",
1295                    str::from_utf8(v[2].0).unwrap(),
1296                    str::from_utf8(v[2].1)
1297                        .unwrap()
1298                        .chars()
1299                        .filter(|c| !c.is_whitespace())
1300                        .collect::<String>()
1301                ),
1302                "Lattice=[[0,0,0],[10,4,4]]".to_string()
1303            );
1304        }
1305    }
1306
1307    #[test]
1308    fn test_parse_info_line_with_str() {
1309        // key_value use recognize instead of doing further parse no extxyz::Value
1310        let valid_expects: &[&[u8]] = &[
1311            br#"key1=aa key2=bb pp=what"#,
1312            br#"key1=aa key2=bb pp= what"#,
1313        ];
1314        for expect in valid_expects {
1315            let (remain, v) = parse_info_line(expect).unwrap();
1316            assert!(remain.is_empty());
1317            assert_eq!(
1318                format!(
1319                    "{}={}",
1320                    str::from_utf8(v[2].0).unwrap(),
1321                    str::from_utf8(v[2].1).unwrap(),
1322                ),
1323                "pp=what".to_string()
1324            );
1325        }
1326
1327        let valid_expects: &[&[u8]] = &[
1328            br#"key1=aa key2=bb pp="what""#,
1329            br#"key1=aa key2=bb pp=  "what""#,
1330        ];
1331        for expect in valid_expects {
1332            let (remain, v) = parse_info_line(expect).unwrap();
1333            assert!(remain.is_empty());
1334            assert_eq!(
1335                format!(
1336                    "{}={}",
1337                    str::from_utf8(v[2].0).unwrap(),
1338                    str::from_utf8(v[2].1).unwrap(),
1339                ),
1340                "pp=\"what\"".to_string()
1341            );
1342        }
1343    }
1344
1345    struct TFrame(Frame);
1346
1347    impl std::fmt::Display for TFrame {
1348        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1349            let mut buf = Vec::new();
1350            write_frame(&mut buf, &self.0).map_err(|_| std::fmt::Error)?;
1351            let s = std::str::from_utf8(&buf).map_err(|_| std::fmt::Error)?;
1352            f.write_str(s)
1353        }
1354    }
1355
1356    #[test]
1357    fn test_parse_frame_default() {
1358        let inp = r#"2
1359Properties=species:S:1:pos:R:3 key1=aa key2=87 key3=thisisaverylongstring ZZPnonsense=65.9
1360Mn 0.0 0.5 0.5
1361C 0.0 0.5 0.3
1362"#;
1363
1364        let mut rd = Cursor::new(inp.as_bytes());
1365        let frame = read_frame(&mut rd).unwrap();
1366        let frame = TFrame(frame);
1367
1368        let expect = r#"2
1369ZZPnonsense=65.90000000 key1=aa key2=87 key3=thisisaverylongstring Properties=species:S:1:pos:R:3
1370Mn          0.00000000       0.50000000       0.50000000
1371C           0.00000000       0.50000000       0.30000000
1372"#;
1373        assert_eq!(format!("{frame}"), expect);
1374    }
1375
1376    #[test]
1377    fn test_parse_frame_numeric_start_str_in_arrs() {
1378        let inp = r#"2
1379Properties=species:S:1:pos:R:3:s:S:1 key1=aa key2=87 key3=thisisaverylongstring ZZPnonsense=65.9
1380Mn 0.0 0.5 0.5 0000
1381C 0.0 0.5 0.3 878X
1382"#;
1383
1384        let mut rd = Cursor::new(inp.as_bytes());
1385        let frame = read_frame(&mut rd).unwrap();
1386        let frame = TFrame(frame);
1387
1388        let expect = r#"2
1389ZZPnonsense=65.90000000 key1=aa key2=87 key3=thisisaverylongstring Properties=species:S:1:pos:R:3:s:S:1
1390Mn          0.00000000       0.50000000       0.50000000 0000 
1391C           0.00000000       0.50000000       0.30000000 878X 
1392"#;
1393        assert_eq!(format!("{frame}"), expect);
1394    }
1395
1396    #[test]
1397    fn test_parse_frame_without_properties() {
1398        let inp = r#"2
1399key1=aa key2=87 key3=thisisaverylongstring ZZPnonsense=65.9
1400Mn 0.0 0.5 0.5
1401C 0.0 0.5 0.3
1402"#;
1403
1404        let mut rd = Cursor::new(inp.as_bytes());
1405        let frame = read_frame(&mut rd).unwrap();
1406        let frame = TFrame(frame);
1407
1408        let expect = r#"2
1409ZZPnonsense=65.90000000 key1=aa key2=87 key3=thisisaverylongstring Properties=species:S:1:pos:R:3
1410Mn          0.00000000       0.50000000       0.50000000
1411C           0.00000000       0.50000000       0.30000000
1412"#;
1413        assert_eq!(format!("{frame}"), expect);
1414    }
1415
1416    #[test]
1417    fn test_parse_lattice_from_flatten() {
1418        let inp = r#"3
1419Lattice="5.0 1.0 0.0 0.0 5.0 2.0 1.0 0.4 5.0" Properties=species:S:1:pos:R:3
1420Si    0.0    0.0    0.0
1421Si    2.5    2.5    2.5
1422O     1.25   1.25   1.25
1423"#;
1424
1425        let mut rd = Cursor::new(inp.as_bytes());
1426        let frame = read_frame(&mut rd).unwrap();
1427        let frame = TFrame(frame);
1428
1429        let expect = r#"3
1430Lattice=[[5.00000000, 0.00000000, 1.00000000], [1.00000000, 5.00000000, 0.40000000], [0.00000000, 2.00000000, 5.00000000]] Properties=species:S:1:pos:R:3
1431Si          0.00000000       0.00000000       0.00000000
1432Si          2.50000000       2.50000000       2.50000000
1433O           1.25000000       1.25000000       1.25000000
1434"#;
1435        assert_eq!(format!("{frame}"), expect);
1436    }
1437
1438    #[test]
1439    fn test_no_equal_sign_line() {
1440        let inp = r#"3
1441full line that has no equal will be a comment line
1442Si    0.0    0.0    0.0
1443Si    2.5    2.5    2.5
1444O     1.25   1.25   1.25
1445"#;
1446
1447        let mut rd = Cursor::new(inp.as_bytes());
1448        let frame = read_frame(&mut rd).unwrap();
1449        let frame = TFrame(frame);
1450
1451        let expect = r#"3
1452comment="full line that has no equal will be a comment line" Properties=species:S:1:pos:R:3
1453Si          0.00000000       0.00000000       0.00000000
1454Si          2.50000000       2.50000000       2.50000000
1455O           1.25000000       1.25000000       1.25000000
1456"#;
1457        assert_eq!(format!("{frame}"), expect);
1458    }
1459
1460    #[test]
1461    fn test_read_frames_default() {
1462        let inp = r#"4
1463key1=a key2=a/b key3=a@b key4="a@b"
1464Mg        -4.25650        3.79180       -2.54123
1465C         -1.15405        2.86652       -1.26699
1466C         -5.53758        3.70936        0.63504
1467C         -7.28250        4.71303       -3.82016
14684
1469key1=a key2=a/b key3=a@b key4="a@b"
1470Mg        -4.25650        3.79180       -2.54123
1471C         -1.15405        2.86652       -1.26699
1472C         -5.53758        3.70936        0.63504
1473C         -7.28250        4.71303       -3.82016
1474"#;
1475        let mut rd = Cursor::new(inp.as_bytes());
1476        let mut frames = vec![];
1477        for frame in read_frames(&mut rd) {
1478            let frame = frame.unwrap();
1479            frames.push(frame);
1480        }
1481
1482        assert_eq!(frames.len(), 2);
1483    }
1484}