1use crate::error::ExtxyzError;
4use extxyz_types::{FloatNum, Frame, Text, Value};
5use nom::{
6 self,
7 branch::alt,
8 bytes::complete::{tag, take_while1},
9 character::{
10 complete::{self, multispace0, space0, space1},
11 streaming,
12 },
13 combinator::{all_consuming, map, map_res, not, opt, peek, recognize, verify},
14 multi::{many0, separated_list0, separated_list1},
15 number,
16 sequence::{delimited, separated_pair, terminated},
17 IResult, Parser,
18};
19use std::{
20 collections::BTreeMap,
21 io::{self, BufRead},
22};
23
24pub fn read_frames<'a, R>(rd: &'a mut R) -> FrameReader<'a, R>
67where
68 R: BufRead,
69{
70 FrameReader::new(rd)
71}
72
73pub struct FrameReader<'a, R> {
115 rd: &'a mut R,
117 finished: bool,
118}
119
120impl<'a, R> FrameReader<'a, R> {
121 pub fn new(rd: &'a mut R) -> Self {
122 FrameReader {
123 rd,
124 finished: false,
125 }
126 }
127}
128
129impl<'a, R> Iterator for FrameReader<'a, R>
130where
131 R: BufRead,
132{
133 type Item = Result<Frame, ExtxyzError>;
134
135 fn next(&mut self) -> Option<Self::Item> {
136 if self.finished {
138 return None;
139 }
140
141 match _read_frame_native_new(self.rd, None) {
142 Ok(Some(frame)) => Some(Ok(frame)),
143 Ok(None) => None,
144 Err(err) => Some(Err(ExtxyzError::Io(err))),
145 }
146 }
147}
148
149pub struct FrameReaderOwned<R> {
177 rd: R,
178 finished: bool,
179}
180
181impl<R> FrameReaderOwned<R> {
182 pub fn new(rd: R) -> Self {
183 FrameReaderOwned {
184 rd,
185 finished: false,
186 }
187 }
188}
189
190impl<R> Iterator for FrameReaderOwned<R>
191where
192 R: BufRead,
193{
194 type Item = Result<Frame, ExtxyzError>;
195
196 fn next(&mut self) -> Option<Self::Item> {
197 let mut rd = FrameReader {
198 rd: &mut self.rd,
199 finished: self.finished,
200 };
201
202 let out = rd.next();
203 self.finished = rd.finished;
204 out
205 }
206}
207
208pub fn read_frame<R>(rd: &mut R) -> Result<Frame, ExtxyzError>
250where
251 R: BufRead,
252{
253 let Some(frame) = _read_frame_native_new(rd, None)? else {
254 return Err(ExtxyzError::Io(io::Error::new(
255 io::ErrorKind::UnexpectedEof,
256 "does not parse anything from reader",
257 )));
258 };
259 Ok(frame)
260}
261
262pub(crate) fn _read_frame_native_new<R>(
263 rd: &mut R,
264 comment_override: Option<&str>,
265) -> io::Result<Option<Frame>>
266where
267 R: BufRead,
268{
269 let mut maybe_natoms_line = String::new();
270 rd.read_line(&mut maybe_natoms_line)?;
271 if maybe_natoms_line.is_empty() {
272 return Ok(None);
273 }
274
275 let natoms_line_as_bytes = maybe_natoms_line.as_bytes();
277 let (_, natoms) = parse_natoms(natoms_line_as_bytes).map_err(|e| {
278 let es = match e {
279 nom::Err::Incomplete(_) => "nom incomplete streaming".to_string(),
280 nom::Err::Error(err) | nom::Err::Failure(err) => {
281 format!(
282 "{:?}: {}",
283 err.code,
284 str::from_utf8(err.input).unwrap_or("unrecognized u8 input")
285 )
286 }
287 };
288 io::Error::new(io::ErrorKind::InvalidData, es)
289 })?;
290
291 let mut maybe_info_line = String::new();
292 rd.read_line(&mut maybe_info_line)?;
293 if maybe_info_line.is_empty() {
294 return Ok(None);
295 }
296
297 let info_line_as_bytes = maybe_info_line.as_bytes();
298 let (_, (info, prop_shape)) = parse_info(info_line_as_bytes).map_err(|e| {
299 let es = match e {
300 nom::Err::Incomplete(_) => "nom incomplete streaming".to_string(),
301 nom::Err::Error(err) | nom::Err::Failure(err) => {
302 format!(
303 "{:?}: {}",
304 err.code,
305 str::from_utf8(err.input).unwrap_or("unrecognized u8 input")
306 )
307 }
308 };
309 io::Error::new(io::ErrorKind::InvalidData, es)
310 })?;
311
312 let mut arrs: Vec<(String, Value)> = prop_shape
314 .iter()
315 .map(|(name, ty, n)| {
316 let value = match (ty, n) {
317 (Ty::I, 1) => Value::VecInteger(Vec::with_capacity(natoms), natoms as u32),
318 (Ty::R, 1) => Value::VecFloat(Vec::with_capacity(natoms), natoms as u32),
319 (Ty::L, 1) => Value::VecBool(Vec::with_capacity(natoms), natoms as u32),
320 (Ty::S, 1) => Value::VecText(Vec::with_capacity(natoms), natoms as u32),
321
322 (Ty::I, nc) => {
323 Value::MatrixInteger(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
324 }
325 (Ty::R, nc) => {
326 Value::MatrixFloat(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
327 }
328 (Ty::L, nc) => {
329 Value::MatrixBool(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
330 }
331 (Ty::S, nc) => {
332 Value::MatrixText(Vec::with_capacity(natoms), (natoms as u32, *nc as u32))
333 }
334 };
335
336 (name.to_string(), value)
337 })
338 .collect();
339
340 let mut natoms_to_read = natoms;
341 loop {
343 let buf = rd.fill_buf()?;
344 if buf.is_empty() {
345 return Err(io::Error::new(
346 io::ErrorKind::UnexpectedEof,
347 "EOF reached before parsing frame",
348 ));
349 }
350
351 match parse_xyz_by_lines(buf, natoms_to_read, &prop_shape, &mut arrs) {
352 Ok((remain, nat)) => {
353 let len_read = buf.len() - remain.len();
354 rd.consume(len_read);
355
356 natoms_to_read -= nat;
357 if natoms_to_read == 0 {
358 break;
359 } else if natoms_to_read > 0 {
360 continue;
361 } else {
362 return Err(io::Error::new(
364 io::ErrorKind::InvalidData,
365 "too many atoms than expected",
366 ));
367 }
368 }
369 Err(e) => {
370 let es = match e {
371 nom::Err::Incomplete(_) => "nom incomplete streaming".to_string(),
372 nom::Err::Error(err) | nom::Err::Failure(err) => {
373 format!(
374 "{:?}: {}",
375 err.code,
376 str::from_utf8(err.input).unwrap_or("unrecognized u8 input")
377 )
378 }
379 };
380 return Err(io::Error::new(io::ErrorKind::InvalidData, es));
381 }
382 }
383 }
384
385 let mut frame = Frame::new(natoms as u32, info, arrs);
386
387 if let Some(comment) = comment_override {
392 frame.set_comment(comment);
393 }
394
395 Ok(Some(frame))
396}
397
398fn key_value(inp: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
401 let (inp, (k, v)) = separated_pair(
402 delimited(
403 multispace0,
404 verify(
405 take_while1(|c: u8| c != b'=' && !c.is_ascii_whitespace()),
406 |s: &[u8]| recognize_kv_left(s).is_ok(),
407 ),
408 multispace0,
409 ),
410 tag(&b"="[..]),
411 delimited(multispace0, recognize_kv_right, multispace0),
412 )
413 .parse(inp)?;
414 Ok((inp, (k, v)))
415}
416
417fn is_ident_char(c: u8) -> bool {
418 c.is_ascii_alphanumeric() || c == b'_'
419}
420
421fn recognize_int(inp: &[u8]) -> IResult<&[u8], &[u8]> {
422 terminated(
423 recognize(complete::i32),
424 peek(not(take_while1(is_ident_char))),
425 )
426 .parse(inp)
427}
428
429fn parse_int(inp: &[u8]) -> IResult<&[u8], Value> {
431 map_res(recognize_int, |bytes: &[u8]| {
432 let s = std::str::from_utf8(bytes)
433 .map_err(|_| nom::error::Error::new(bytes, nom::error::ErrorKind::Char))?;
434 let i = s
435 .parse::<i32>()
436 .map_err(|_| nom::error::Error::new(bytes, nom::error::ErrorKind::Digit))?;
437 Ok::<Value, nom::error::Error<&[u8]>>(Value::Integer(i.into()))
438 })
439 .parse(inp)
440}
441
442fn recognize_float(inp: &[u8]) -> IResult<&[u8], &[u8]> {
443 let (inp_, (_, _, fraction, _)) = number::complete::recognize_float_parts(inp)?;
448 if fraction.is_empty() {
449 return Err(nom::Err::Error(nom::error::Error::new(
450 inp_,
451 nom::error::ErrorKind::Float,
452 )));
453 }
454 let len = inp.len() - inp_.len();
455 Ok((inp_, &inp[..len]))
456}
457
458fn parse_float(inp: &[u8]) -> IResult<&[u8], Value> {
459 let (remain, inp) = recognize_float.parse(inp)?;
460 let (_, float) = number::complete::double
461 .map(|i| Value::Float(i.into()))
462 .parse(inp)?;
463
464 Ok((remain, float))
465}
466
467fn parse_bool(inp: &[u8]) -> IResult<&[u8], Value> {
468 alt((
470 tag("true").map(|_| Value::Bool(true.into())),
471 tag("false").map(|_| Value::Bool(false.into())),
472 tag("True").map(|_| Value::Bool(true.into())),
473 tag("False").map(|_| Value::Bool(false.into())),
474 tag("TRUE").map(|_| Value::Bool(true.into())),
475 tag("FALSE").map(|_| Value::Bool(false.into())),
476 tag("T").map(|_| Value::Bool(true.into())),
477 tag("F").map(|_| Value::Bool(false.into())),
478 ))
479 .parse(inp)
480}
481
482fn recognize_bool(inp: &[u8]) -> IResult<&[u8], &[u8]> {
483 recognize(parse_bool).parse(inp)
485}
486
487fn parse_bare_str(inp: &[u8]) -> IResult<&[u8], Value> {
488 let (remain, inp) = recognize_bare_str.parse(inp)?;
489 let s = String::from_utf8(inp.to_vec()).map_err(|_| {
490 nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
491 })?;
492 Ok((remain, Value::Str(Text::from(s))))
493}
494
495fn recognize_bare_str(inp: &[u8]) -> IResult<&[u8], &[u8]> {
496 let (linp, s) = take_while1(|c: u8| is_ident_char(c)).parse(inp)?;
497 if !s[0].is_ascii_alphanumeric() && s[0] != b'_' {
498 return Err(nom::Err::Error(nom::error::Error::new(
499 linp,
500 nom::error::ErrorKind::Verify,
501 )));
502 }
503 let len = inp.len() - linp.len();
504 Ok((linp, &inp[..len]))
505}
506
507fn parse_quote_str(inp: &[u8]) -> IResult<&[u8], Value> {
508 let parse_inner = map_res(
509 many0(alt((
510 take_while1(|b| b != b'\\' && b != b'"'),
511 map(tag(r#"\""#), |_| b"\"".as_ref()),
512 map(tag(r#"\\"#), |_| b"\\".as_ref()),
513 map(tag(r#"\n"#), |_| b"\n".as_ref()),
514 ))),
515 |chunks: Vec<&[u8]>| {
516 let s = chunks.concat();
517 String::from_utf8(s).map(|s| Value::Str(Text::from(s)))
518 },
519 );
520
521 let (inp, xx) = delimited(tag(b"\"".as_ref()), parse_inner, tag(b"\"".as_ref())).parse(inp)?;
522 Ok((inp, xx))
523}
524
525fn parse_bare_properties_str(inp: &[u8]) -> IResult<&[u8], Value> {
526 let (remain, inp) = take_while1(|c: u8| {
527 c.is_ascii_alphanumeric() || c == b'_' || c == b':' || c == b'@' || c == b'/'
528 })
529 .parse(inp)?;
530 let s = String::from_utf8(inp.to_vec()).map_err(|_| {
531 nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
532 })?;
533 let v = Value::Str(Text::from(s));
534 Ok((remain, v))
535}
536
537fn parse_kv_right(inp: &[u8]) -> IResult<&[u8], Value> {
538 alt((
540 parse_2d_array,
541 parse_float,
543 parse_int,
544 parse_bool,
546 parse_quote_str,
548 parse_bare_properties_str,
550 parse_bare_str,
551 ))
552 .parse(inp)
553}
554
555fn parse_kv_left(inp: &[u8]) -> IResult<&[u8], Value> {
557 alt((parse_bare_str, parse_quote_str)).parse(inp)
558}
559
560fn recognize_kv_right(inp: &[u8]) -> IResult<&[u8], &[u8]> {
561 recognize(parse_kv_right).parse(inp)
562}
563
564fn recognize_kv_left(inp: &[u8]) -> IResult<&[u8], &[u8]> {
565 recognize(parse_kv_left).parse(inp)
566}
567
568fn parse_2d_arr_3x3_flatten(inp: &[u8]) -> IResult<&[u8], Value> {
569 let (inp, mut vals) = separated_list0(space1, parse_kv_right).parse(inp)?;
570 if vals.len() != 9 {
571 return Err(nom::Err::Failure(nom::error::Error::new(
572 inp,
573 nom::error::ErrorKind::Verify,
574 )));
575 }
576 promote_values_1d(&mut vals).map_err(|_| {
577 nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
578 })?;
579
580 match &vals[0] {
581 Value::Integer(_) => {
582 let vals = vals
583 .into_iter()
584 .map(|v| v.as_integer().expect("not an integer"))
585 .collect::<Vec<_>>();
586 let row1 = vec![vals[0], vals[3], vals[6]];
587 let row2 = vec![vals[1], vals[4], vals[7]];
588 let row3 = vec![vals[2], vals[5], vals[8]];
589 let vs = vec![row1, row2, row3];
590 Ok((inp, Value::MatrixInteger(vs, (3, 3))))
591 }
592 Value::Float(_) => {
593 let vals = vals
594 .into_iter()
595 .map(|v| v.as_float().expect("not a float"))
596 .collect::<Vec<_>>();
597 let row1 = vec![vals[0], vals[3], vals[6]];
598 let row2 = vec![vals[1], vals[4], vals[7]];
599 let row3 = vec![vals[2], vals[5], vals[8]];
600 let vs = vec![row1, row2, row3];
601 Ok((inp, Value::MatrixFloat(vs, (3, 3))))
602 }
603 _ => Err(nom::Err::Failure(nom::error::Error::new(
604 inp,
605 nom::error::ErrorKind::Verify,
606 ))),
607 }
608}
609
610fn parse_2d_array(inp: &[u8]) -> IResult<&[u8], Value> {
611 let (inp_, vals) = delimited(
612 tag(b"[".as_ref()),
613 separated_list0(
614 tag(b",".as_ref()),
615 delimited(multispace0, parse_1d_array, multispace0),
616 ),
617 tag(b"]".as_ref()),
618 )
619 .parse(inp)?;
620
621 debug_assert!(!vals.is_empty());
622
623 match &vals[0] {
624 Value::VecInteger(_, nc) => {
625 let nc = *nc;
626 let nr = vals.len();
627 let vs = vals
628 .into_iter()
629 .map(|v| {
630 let Value::VecInteger(i, x) = v else {
631 unreachable!()
632 };
633 debug_assert_eq!(x, nc);
634 i
635 })
636 .collect::<Vec<_>>();
637 Ok((inp_, Value::MatrixInteger(vs, (nr as u32, nc))))
638 }
639 Value::VecFloat(_, nc) => {
640 let nc = *nc;
641 let nr = vals.len();
642 let vs = vals
643 .into_iter()
644 .map(|v| {
645 let Value::VecFloat(i, x) = v else {
646 unreachable!()
647 };
648 debug_assert_eq!(x, nc);
649 i
650 })
651 .collect::<Vec<_>>();
652 Ok((inp_, Value::MatrixFloat(vs, (nr as u32, nc))))
653 }
654 Value::VecBool(_, nc) => {
655 let nc = *nc;
656 let nr = vals.len();
657 let vs = vals
658 .into_iter()
659 .map(|v| {
660 let Value::VecBool(i, x) = v else {
661 unreachable!()
662 };
663 debug_assert_eq!(x, nc);
664 i
665 })
666 .collect::<Vec<_>>();
667 Ok((inp_, Value::MatrixBool(vs, (nr as u32, nc))))
668 }
669 Value::VecText(_, nc) => {
670 let nc = *nc;
671 let nr = vals.len();
672 let vs = vals
673 .into_iter()
674 .map(|v| {
675 let Value::VecText(i, x) = v else {
676 unreachable!()
677 };
678 debug_assert_eq!(x, nc);
679 i
680 })
681 .collect::<Vec<_>>();
682 Ok((inp_, Value::MatrixText(vs, (nr as u32, nc))))
683 }
684 _ => unreachable!(),
685 }
686}
687
688fn parse_1d_array(inp: &[u8]) -> IResult<&[u8], Value> {
693 let (inp_, mut vals) = delimited(
694 tag(b"[".as_ref()),
695 separated_list0(
696 tag(b",".as_ref()),
697 delimited(multispace0, parse_kv_right, multispace0),
698 ),
699 tag(b"]".as_ref()),
700 )
701 .parse(inp)?;
702
703 debug_assert!(!vals.is_empty());
704
705 promote_values_1d(&mut vals).map_err(|_| {
708 nom::Err::Failure(nom::error::Error::new(inp_, nom::error::ErrorKind::Verify))
709 })?;
710
711 match &vals[0] {
712 Value::Integer(_) => {
713 let n = vals.len();
714 let vs = vals
715 .into_iter()
716 .map(|v| {
717 let Value::Integer(i) = v else { unreachable!() };
718 i
719 })
720 .collect::<Vec<_>>();
721 Ok((inp_, Value::VecInteger(vs, n as u32)))
722 }
723 Value::Float(_) => {
724 let n = vals.len();
725 let vs = vals
726 .into_iter()
727 .map(|v| {
728 let Value::Float(i) = v else { unreachable!() };
729 i
730 })
731 .collect::<Vec<_>>();
732 Ok((inp_, Value::VecFloat(vs, n as u32)))
733 }
734 Value::Bool(_) => {
735 let n = vals.len();
736 let vs = vals
737 .into_iter()
738 .map(|v| {
739 let Value::Bool(i) = v else { unreachable!() };
740 i
741 })
742 .collect::<Vec<_>>();
743 Ok((inp_, Value::VecBool(vs, n as u32)))
744 }
745 Value::Str(_) => {
746 let n = vals.len();
747 let vs = vals
748 .into_iter()
749 .map(|v| {
750 let Value::Str(i) = v else { unreachable!() };
751 i
752 })
753 .collect::<Vec<_>>();
754 Ok((inp_, Value::VecText(vs, n as u32)))
755 }
756 _ => unreachable!(),
758 }
759}
760
761#[derive(Debug)]
766struct InnerParseError;
767
768impl std::fmt::Display for InnerParseError {
769 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
770 write!(f, "inner parse error")
771 }
772}
773
774impl std::error::Error for InnerParseError {}
775
776fn promote_values_1d(vals: &mut [Value]) -> Result<(), InnerParseError> {
777 if vals.is_empty() {
778 return Ok(());
779 }
780
781 if vals.iter().any(|v| {
782 matches!(
783 v,
784 Value::VecBool(_, _)
785 | Value::VecText(_, _)
786 | Value::VecFloat(_, _)
787 | Value::VecInteger(_, _)
788 | Value::MatrixBool(_, _)
789 | Value::MatrixText(_, _)
790 | Value::MatrixFloat(_, _)
791 | Value::MatrixInteger(_, _)
792 | Value::Unsupported
793 )
794 }) {
795 return Err(InnerParseError);
796 }
797
798 let has_bool = vals.iter().any(|v| matches!(v, Value::Bool(_)));
799 let has_float = vals.iter().any(|v| matches!(v, Value::Float(_)));
800 let has_str = vals.iter().any(|v| matches!(v, Value::Str(_)));
801 let has_int = vals.iter().any(|v| matches!(v, Value::Integer(_)));
802
803 match (has_int, has_float, has_bool, has_str) {
804 (true, false, false, false)
806 | (false, true, false, false)
807 | (false, false, true, false)
808 | (false, false, false, true) => Ok(()),
809 (true, true, false, false) => {
811 vals.iter_mut().for_each(|v| {
812 if let Value::Integer(i) = v {
813 *v = Value::Float(FloatNum::from(f64::from(**i)));
814 }
815 });
816 Ok(())
817 }
818 (true, true, true, true)
820 | (true, true, true, false)
821 | (true, true, false, true)
822 | (true, false, true, true)
823 | (true, false, true, false)
824 | (true, false, false, true)
825 | (false, true, true, true)
826 | (false, true, true, false)
827 | (false, true, false, true)
828 | (false, false, true, true) => Err(InnerParseError),
829 (false, false, false, false) => unreachable!(),
830 }
831}
832
833#[allow(clippy::type_complexity)]
834fn parse_info_line(inp: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
835 let (inp, kv) = delimited(
836 multispace0,
837 all_consuming(separated_list1(space0, key_value)),
838 multispace0,
839 )
840 .parse(inp)?;
841 Ok((inp, kv))
842}
843
844#[allow(clippy::type_complexity)]
845fn parse_no_equal_sign_line(inp: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
846 let (inp, ln) = take_while1(|c: u8| c != b'=').parse(inp)?;
847 Ok((inp, vec![(&b"comment"[..], ln)]))
848}
849
850#[derive(Debug, Hash, PartialEq, Eq)]
851enum Ty {
852 I,
854 R,
856 L,
858 S,
860}
861
862type PropShape<'a> = Vec<(&'a str, Ty, u8)>;
863
864fn parse_properties<'a>(inp: &'a [u8]) -> IResult<&'a [u8], PropShape<'a>> {
865 let (inp_, segments) =
867 separated_list1(tag(b":".as_ref()), take_while1(|c: u8| c != b':')).parse(inp)?;
868
869 if segments.len() % 3 != 0 {
870 return Err(nom::Err::Failure(nom::error::Error::new(
872 inp,
873 nom::error::ErrorKind::Verify,
874 )));
875 }
876
877 let mut mp = Vec::new();
879 for chunk in segments.chunks(3) {
880 let id = chunk[0];
881 let ty = match chunk[1] {
882 b"I" => Ty::I,
883 b"R" => Ty::R,
884 b"L" => Ty::L,
885 b"S" => Ty::S,
886 _ => {
887 return Err(nom::Err::Failure(nom::error::Error::new(
889 inp,
890 nom::error::ErrorKind::Verify,
891 )));
892 }
893 };
894 let nc = str::from_utf8(chunk[2])
895 .map_err(|_| {
896 nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
897 })?
898 .parse::<u8>()
899 .map_err(|_| {
900 nom::Err::Failure(nom::error::Error::new(inp, nom::error::ErrorKind::Verify))
901 })?;
902
903 let id = str::from_utf8(id).unwrap();
904 mp.push((id, ty, nc));
905 }
906 Ok((inp_, mp))
907}
908
909type TypInfo = Vec<(String, Value)>;
910type TypPropShape<'a> = Vec<(&'a str, Ty, u8)>;
911
912fn parse_info<'a>(input: &'a [u8]) -> IResult<&'a [u8], (TypInfo, TypPropShape<'a>)> {
913 let (input, line) = terminated(
914 nom::bytes::complete::take_until(&b"\n"[..]),
915 complete::newline,
916 )
917 .parse(input)?;
918
919 let (_, info_kv) = alt((
920 all_consuming(parse_info_line),
921 all_consuming(parse_no_equal_sign_line),
922 ))
923 .parse(line)?;
924
925 let mut kv = BTreeMap::new();
927
928 for (k, v) in info_kv {
929 let old_val = kv.insert(k, v);
930 if old_val.is_some() {
933 return Err(nom::Err::Failure(nom::error::Error::new(
934 k,
935 nom::error::ErrorKind::Verify,
936 )));
937 }
938 }
939
940 let prop_shape = kv
945 .remove("Properties".as_bytes())
946 .unwrap_or(b"species:S:1:pos:R:3");
947
948 let utf8_str = str::from_utf8(prop_shape).map_err(|_| {
949 nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Float))
950 })?;
951 let prop_shape_value = Value::Str(Text::from(utf8_str));
952 let (_, prop_shape) = parse_properties(prop_shape)?;
953
954 let maybe_latt = kv.remove("Lattice".as_bytes());
955
956 let mut info = Vec::with_capacity(kv.len() + 2);
958 for (k, v) in kv {
959 if k == &b"comment"[..] {
960 let utf8_str = str::from_utf8(v).map_err(|_| {
961 nom::Err::Error(nom::error::Error::new(input, nom::error::ErrorKind::Verify))
962 })?;
963 info.push(("comment".to_string(), Value::Str(Text::from(utf8_str))));
964 } else {
965 let (_, v) = parse_kv_right(v)?;
966 info.push((String::from_utf8(k.to_vec()).expect("utf8"), v));
967 }
968 }
969
970 if let Some(latt) = maybe_latt {
972 let opt_quote_parse_2d_array = delimited(
973 opt(tag(b"\"".as_ref())),
974 parse_2d_array,
975 opt(tag(b"\"".as_ref())),
976 );
977 let opt_quote_parse_2d_arr_3x3_flatten = delimited(
978 opt(tag(b"\"".as_ref())),
979 parse_2d_arr_3x3_flatten,
980 opt(tag(b"\"".as_ref())),
981 );
982 let (_, latt) =
983 alt((opt_quote_parse_2d_array, opt_quote_parse_2d_arr_3x3_flatten)).parse(latt)?;
984 info.push(("Lattice".to_string(), latt));
985 }
986 info.push(("Properties".to_string(), prop_shape_value));
987 Ok((input, (info, prop_shape)))
988}
989
990fn parse_natoms(input: &[u8]) -> IResult<&[u8], usize> {
991 let (input, _) = complete::multispace0(input)?;
992 let (input, natoms) = map_res(complete::digit1, |digits: &[u8]| {
993 let s = std::str::from_utf8(digits).expect("digit1 expect ASCII");
994 s.parse::<usize>()
995 })
996 .parse(input)?;
997 let (input, _) = complete::multispace0(input)?;
998 Ok((input, natoms))
999}
1000
1001fn parse_xyz_by_lines<'a>(
1002 input: &'a [u8],
1003 natoms_to_read: usize,
1004 prop_shape: &Vec<(&'a str, Ty, u8)>,
1005 arrs: &mut [(String, Value)],
1006) -> IResult<&'a [u8], usize> {
1007 let mut nat = 0;
1008 let mut proc_input = input;
1009 while !input.is_empty() && nat < natoms_to_read {
1010 let res = terminated(
1011 nom::bytes::streaming::take_until(&b"\n"[..]),
1012 streaming::newline,
1013 )
1014 .parse(proc_input);
1015
1016 let (rest, line) = match res {
1017 Ok((rest, line)) => (rest, line),
1018 Err(nom::Err::Incomplete(_)) => {
1019 return Ok((input, nat));
1020 }
1021 Err(err) => return Err(err),
1022 };
1023 proc_input = rest;
1024
1025 let (_, mut vs_raw) = delimited(
1026 multispace0,
1027 separated_list1(
1028 space1,
1029 alt((
1030 recognize_float,
1031 recognize_int,
1032 recognize_bool,
1033 recognize_bare_str,
1035 )),
1036 ),
1037 multispace0,
1038 )
1039 .parse(line)?;
1040
1041 let mut loc = 0;
1042 for ((_, ty, n), (_, ref mut arr)) in prop_shape.iter().zip(arrs.iter_mut()) {
1043 match (ty, n, arr) {
1044 (_, 0, _) => unreachable!(),
1045 (Ty::I, 1, Value::VecInteger(v, _)) => {
1046 let x = std::mem::take(&mut vs_raw[loc]);
1047 let (_, x) = parse_int(x).expect("parse int");
1048 let Value::Integer(x) = x else { unreachable!() };
1049 v.push(x);
1050 loc += 1;
1051 }
1052 (Ty::R, 1, Value::VecFloat(v, _)) => {
1053 let x = std::mem::take(&mut vs_raw[loc]);
1054 let (_, x) = parse_float(x).expect("parse float");
1055 let Value::Float(x) = x else { unreachable!() };
1056 v.push(x);
1057 loc += 1;
1058 }
1059 (Ty::L, 1, Value::VecBool(v, _)) => {
1060 let x = std::mem::take(&mut vs_raw[loc]);
1061 let (_, x) = parse_bool(x).expect("parse bool");
1062 let Value::Bool(x) = x else { unreachable!() };
1063 v.push(x);
1064 loc += 1;
1065 }
1066 (Ty::S, 1, Value::VecText(v, _)) => {
1067 let x = std::mem::take(&mut vs_raw[loc]);
1068 let (_, x) = parse_bare_str(x).expect("parse str");
1069 let Value::Str(x) = x else { unreachable!() };
1070 v.push(x);
1071 loc += 1;
1072 }
1073 (Ty::I, nc, Value::MatrixInteger(m, _)) => {
1074 let vv = vs_raw[loc..(loc + *nc as usize)]
1075 .iter()
1076 .map(|x| {
1077 let (_, x) = parse_int(x).expect("parse float");
1078 let Value::Integer(x) = x else { unreachable!() };
1079 x
1080 })
1081 .collect::<Vec<_>>();
1082 m.push(vv);
1083 loc += *nc as usize;
1084 }
1085 (Ty::R, nc, Value::MatrixFloat(m, _)) => {
1086 let vv = vs_raw[loc..(loc + *nc as usize)]
1087 .iter()
1088 .map(|x| {
1089 let (_, x) = parse_float(x).expect("parse float");
1090 let Value::Float(x) = x else { unreachable!() };
1091 x
1092 })
1093 .collect::<Vec<_>>();
1094 m.push(vv);
1095 loc += *nc as usize;
1096 }
1097 (Ty::L, nc, Value::MatrixBool(m, _)) => {
1098 let vv = vs_raw[loc..(loc + *nc as usize)]
1099 .iter()
1100 .map(|x| {
1101 let (_, x) = parse_bool(x).expect("parse float");
1102 let Value::Bool(x) = x else { unreachable!() };
1103 x
1104 })
1105 .collect::<Vec<_>>();
1106 m.push(vv);
1107 loc += *nc as usize;
1108 }
1109 (Ty::S, nc, Value::MatrixText(m, _)) => {
1110 let vv = vs_raw[loc..(loc + *nc as usize)]
1111 .iter()
1112 .map(|x| {
1113 let (_, mut x) = parse_bare_str(x).expect("parse float");
1114 let Value::Str(x) = std::mem::take(&mut x) else {
1115 unreachable!()
1116 };
1117 x
1118 })
1119 .collect::<Vec<_>>();
1120 m.push(vv);
1121 loc += *nc as usize;
1122 }
1123 _ => unreachable!(),
1124 }
1125 }
1126
1127 nat += 1;
1128 }
1129
1130 Ok((proc_input, nat))
1131}
1132
1133#[cfg(test)]
1134mod tests {
1135 use std::io::Cursor;
1136
1137 use extxyz_types::{Boolean, Integer};
1138
1139 use crate::write_frame;
1140
1141 use super::*;
1142
1143 #[test]
1144 fn test_parse_properties() {
1145 let expect = b"species:S:1:pos:R:3";
1146 let (_, prop) = parse_properties(expect).unwrap();
1147 assert_eq!(prop[0], ("species", Ty::S, 1));
1148 assert_eq!(prop[1], ("pos", Ty::R, 3));
1149
1150 }
1152
1153 #[test]
1154 fn test_promote_values_1d() {
1155 let mut vals = [];
1156 promote_values_1d(&mut vals).unwrap();
1157
1158 assert!(vals.is_empty());
1159
1160 let mut vals = [
1161 Value::Float(FloatNum::from(0.0)),
1162 Value::Float(FloatNum::from(0.0)),
1163 Value::Integer(Integer::from(1)),
1164 ];
1165 promote_values_1d(&mut vals).unwrap();
1166
1167 assert!(matches!(vals[2], Value::Float(_)));
1168
1169 let mut vals = [
1170 Value::Float(FloatNum::from(0.0)),
1171 Value::Float(FloatNum::from(0.0)),
1172 Value::Bool(Boolean::from(true)),
1173 ];
1174 assert!(promote_values_1d(&mut vals).is_err());
1175 }
1176
1177 #[test]
1178 fn test_parse_1d_array() {
1179 let arr = b"[0,1]";
1180 let (_, val) = parse_1d_array(arr).unwrap();
1181 let Value::VecInteger(vs, 2) = val else {
1182 panic!("not a VecInteger")
1183 };
1184 assert_eq!(*vs[0], 0);
1185 assert_eq!(*vs[1], 1);
1186
1187 let valid_expects: &[&[u8]] = &[
1188 b"[0.1, 0.2, 0]",
1189 b"[ 0.1, 0.2, 0.0]",
1192 b"[0.1, \t0.2, 0.0]",
1193 b"[0.1, 0.2, 0]",
1194 b"[0.1 , 0.2 , 0.0 ]",
1195 ];
1196 for expect in valid_expects {
1197 let (_, val) = parse_1d_array(expect).unwrap();
1198 let Value::VecFloat(vs, 3) = val else {
1199 panic!("not a VecFloat")
1200 };
1201 assert_eq!(*vs[0], 0.1);
1202 assert_eq!(*vs[1], 0.2);
1203 assert_eq!(*vs[2], 0.0);
1204 }
1205 }
1206
1207 #[test]
1208 fn test_parse_2d_array() {
1209 let valid_expects: &[&[u8]] = &[
1210 b"[[-0,1],[2,2],[10,-1]]",
1211 b"[ [ -0, 1], \t[2, 2], [ 10, -1]]",
1212 b"[[-0, 1 ], [ 2 , 2], [10 , -1]]",
1213 b"[[-0 \t , 1], [2, 2], [10, -1]]",
1214 ];
1215 for expect in valid_expects {
1216 let (_, val) = parse_2d_array(expect).unwrap();
1217 let Value::MatrixInteger(ms, (3, 2)) = val else {
1218 panic!("not a MatrixInteger")
1219 };
1220 assert_eq!(*ms[0][0], 0);
1221 assert_eq!(*ms[0][1], 1);
1222 assert_eq!(*ms[1][0], 2);
1223 assert_eq!(*ms[1][1], 2);
1224 assert_eq!(*ms[2][0], 10);
1225 assert_eq!(*ms[2][1], -1);
1226 }
1227
1228 }
1230
1231 #[test]
1232 fn test_parse_info_line_default() {
1233 let valid_expects: &[&[u8]] = &[
1234 b"key1=aa key2=bb",
1235 b" key1=aa key2=bb",
1236 b" key1=aa key2=bb ",
1237 b"key1=aa \t \t key2=bb",
1238 b" key1 =aa key2=bb",
1239 b" key1= aa key2 =bb",
1240 b" key1 = aa key2 = bb",
1241 ];
1244 for expect in valid_expects {
1245 let (remain, v) = parse_info_line(expect).unwrap();
1246 assert!(remain.is_empty());
1247 assert_eq!(
1248 format!(
1249 "{}={}",
1250 str::from_utf8(v[0].0).unwrap(),
1251 str::from_utf8(v[0].1).unwrap()
1252 ),
1253 "key1=aa".to_string()
1254 );
1255 assert_eq!(
1256 format!(
1257 "{}={}",
1258 str::from_utf8(v[1].0).unwrap(),
1259 str::from_utf8(v[1].1).unwrap()
1260 ),
1261 "key2=bb".to_string()
1262 );
1263 }
1264 }
1265
1266 #[test]
1267 fn test_parse_info_line_with_array() {
1268 let valid_expects: &[&[u8]] = &[
1269 b"key1=aa key2=bb Lattice=[[0,0,0],[10,4,4]]",
1270 b"key1=aa key2=bb Lattice=[[ 0,0 ,0],[10, 4,4]]",
1271 b"key1=aa key2=bb Lattice=[[0,0,0], [10,4,4]]",
1272 ];
1273 for expect in valid_expects {
1274 let (remain, v) = parse_info_line(expect).unwrap();
1275 assert!(remain.is_empty());
1276 assert_eq!(
1277 format!(
1278 "{}={}",
1279 str::from_utf8(v[0].0).unwrap(),
1280 str::from_utf8(v[0].1).unwrap()
1281 ),
1282 "key1=aa".to_string()
1283 );
1284 assert_eq!(
1285 format!(
1286 "{}={}",
1287 str::from_utf8(v[1].0).unwrap(),
1288 str::from_utf8(v[1].1).unwrap()
1289 ),
1290 "key2=bb".to_string()
1291 );
1292 assert_eq!(
1293 format!(
1294 "{}={}",
1295 str::from_utf8(v[2].0).unwrap(),
1296 str::from_utf8(v[2].1)
1297 .unwrap()
1298 .chars()
1299 .filter(|c| !c.is_whitespace())
1300 .collect::<String>()
1301 ),
1302 "Lattice=[[0,0,0],[10,4,4]]".to_string()
1303 );
1304 }
1305 }
1306
1307 #[test]
1308 fn test_parse_info_line_with_str() {
1309 let valid_expects: &[&[u8]] = &[
1311 br#"key1=aa key2=bb pp=what"#,
1312 br#"key1=aa key2=bb pp= what"#,
1313 ];
1314 for expect in valid_expects {
1315 let (remain, v) = parse_info_line(expect).unwrap();
1316 assert!(remain.is_empty());
1317 assert_eq!(
1318 format!(
1319 "{}={}",
1320 str::from_utf8(v[2].0).unwrap(),
1321 str::from_utf8(v[2].1).unwrap(),
1322 ),
1323 "pp=what".to_string()
1324 );
1325 }
1326
1327 let valid_expects: &[&[u8]] = &[
1328 br#"key1=aa key2=bb pp="what""#,
1329 br#"key1=aa key2=bb pp= "what""#,
1330 ];
1331 for expect in valid_expects {
1332 let (remain, v) = parse_info_line(expect).unwrap();
1333 assert!(remain.is_empty());
1334 assert_eq!(
1335 format!(
1336 "{}={}",
1337 str::from_utf8(v[2].0).unwrap(),
1338 str::from_utf8(v[2].1).unwrap(),
1339 ),
1340 "pp=\"what\"".to_string()
1341 );
1342 }
1343 }
1344
1345 struct TFrame(Frame);
1346
1347 impl std::fmt::Display for TFrame {
1348 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1349 let mut buf = Vec::new();
1350 write_frame(&mut buf, &self.0).map_err(|_| std::fmt::Error)?;
1351 let s = std::str::from_utf8(&buf).map_err(|_| std::fmt::Error)?;
1352 f.write_str(s)
1353 }
1354 }
1355
1356 #[test]
1357 fn test_parse_frame_default() {
1358 let inp = r#"2
1359Properties=species:S:1:pos:R:3 key1=aa key2=87 key3=thisisaverylongstring ZZPnonsense=65.9
1360Mn 0.0 0.5 0.5
1361C 0.0 0.5 0.3
1362"#;
1363
1364 let mut rd = Cursor::new(inp.as_bytes());
1365 let frame = read_frame(&mut rd).unwrap();
1366 let frame = TFrame(frame);
1367
1368 let expect = r#"2
1369ZZPnonsense=65.90000000 key1=aa key2=87 key3=thisisaverylongstring Properties=species:S:1:pos:R:3
1370Mn 0.00000000 0.50000000 0.50000000
1371C 0.00000000 0.50000000 0.30000000
1372"#;
1373 assert_eq!(format!("{frame}"), expect);
1374 }
1375
1376 #[test]
1377 fn test_parse_frame_numeric_start_str_in_arrs() {
1378 let inp = r#"2
1379Properties=species:S:1:pos:R:3:s:S:1 key1=aa key2=87 key3=thisisaverylongstring ZZPnonsense=65.9
1380Mn 0.0 0.5 0.5 0000
1381C 0.0 0.5 0.3 878X
1382"#;
1383
1384 let mut rd = Cursor::new(inp.as_bytes());
1385 let frame = read_frame(&mut rd).unwrap();
1386 let frame = TFrame(frame);
1387
1388 let expect = r#"2
1389ZZPnonsense=65.90000000 key1=aa key2=87 key3=thisisaverylongstring Properties=species:S:1:pos:R:3:s:S:1
1390Mn 0.00000000 0.50000000 0.50000000 0000
1391C 0.00000000 0.50000000 0.30000000 878X
1392"#;
1393 assert_eq!(format!("{frame}"), expect);
1394 }
1395
1396 #[test]
1397 fn test_parse_frame_without_properties() {
1398 let inp = r#"2
1399key1=aa key2=87 key3=thisisaverylongstring ZZPnonsense=65.9
1400Mn 0.0 0.5 0.5
1401C 0.0 0.5 0.3
1402"#;
1403
1404 let mut rd = Cursor::new(inp.as_bytes());
1405 let frame = read_frame(&mut rd).unwrap();
1406 let frame = TFrame(frame);
1407
1408 let expect = r#"2
1409ZZPnonsense=65.90000000 key1=aa key2=87 key3=thisisaverylongstring Properties=species:S:1:pos:R:3
1410Mn 0.00000000 0.50000000 0.50000000
1411C 0.00000000 0.50000000 0.30000000
1412"#;
1413 assert_eq!(format!("{frame}"), expect);
1414 }
1415
1416 #[test]
1417 fn test_parse_lattice_from_flatten() {
1418 let inp = r#"3
1419Lattice="5.0 1.0 0.0 0.0 5.0 2.0 1.0 0.4 5.0" Properties=species:S:1:pos:R:3
1420Si 0.0 0.0 0.0
1421Si 2.5 2.5 2.5
1422O 1.25 1.25 1.25
1423"#;
1424
1425 let mut rd = Cursor::new(inp.as_bytes());
1426 let frame = read_frame(&mut rd).unwrap();
1427 let frame = TFrame(frame);
1428
1429 let expect = r#"3
1430Lattice=[[5.00000000, 0.00000000, 1.00000000], [1.00000000, 5.00000000, 0.40000000], [0.00000000, 2.00000000, 5.00000000]] Properties=species:S:1:pos:R:3
1431Si 0.00000000 0.00000000 0.00000000
1432Si 2.50000000 2.50000000 2.50000000
1433O 1.25000000 1.25000000 1.25000000
1434"#;
1435 assert_eq!(format!("{frame}"), expect);
1436 }
1437
1438 #[test]
1439 fn test_no_equal_sign_line() {
1440 let inp = r#"3
1441full line that has no equal will be a comment line
1442Si 0.0 0.0 0.0
1443Si 2.5 2.5 2.5
1444O 1.25 1.25 1.25
1445"#;
1446
1447 let mut rd = Cursor::new(inp.as_bytes());
1448 let frame = read_frame(&mut rd).unwrap();
1449 let frame = TFrame(frame);
1450
1451 let expect = r#"3
1452comment="full line that has no equal will be a comment line" Properties=species:S:1:pos:R:3
1453Si 0.00000000 0.00000000 0.00000000
1454Si 2.50000000 2.50000000 2.50000000
1455O 1.25000000 1.25000000 1.25000000
1456"#;
1457 assert_eq!(format!("{frame}"), expect);
1458 }
1459
1460 #[test]
1461 fn test_read_frames_default() {
1462 let inp = r#"4
1463key1=a key2=a/b key3=a@b key4="a@b"
1464Mg -4.25650 3.79180 -2.54123
1465C -1.15405 2.86652 -1.26699
1466C -5.53758 3.70936 0.63504
1467C -7.28250 4.71303 -3.82016
14684
1469key1=a key2=a/b key3=a@b key4="a@b"
1470Mg -4.25650 3.79180 -2.54123
1471C -1.15405 2.86652 -1.26699
1472C -5.53758 3.70936 0.63504
1473C -7.28250 4.71303 -3.82016
1474"#;
1475 let mut rd = Cursor::new(inp.as_bytes());
1476 let mut frames = vec![];
1477 for frame in read_frames(&mut rd) {
1478 let frame = frame.unwrap();
1479 frames.push(frame);
1480 }
1481
1482 assert_eq!(frames.len(), 2);
1483 }
1484}