gchemol_readwrite/formats/
xyz.rs

1// [[file:../../gchemol-readwrite.note::*imports][imports:1]]
2use super::parser::*;
3use super::*;
4// imports:1 ends here
5
6// [[file:../../gchemol-readwrite.note::bdc38ed5][bdc38ed5]]
7type Point3 = [f64; 3];
8
9// C -10.0949 -0.5455  0.0000
10fn read_atom_xyz(s: &str) -> IResult<&str, (&str, Point3, &str)> {
11    let mut element = alt((digit1, alpha1));
12    do_parse!(
13        s,
14        space0 >> s: element >> space1 >> p: xyz_array >>
15        remained: read_line >>              // ignore the remaing
16        ((s, p, remained))
17    )
18}
19
20#[test]
21fn test_xyz_read_atom() {
22    let line = "C -11.4286 -1.3155  0.0000\n";
23    let (_, (symbol, _, _)) = read_atom_xyz(line).expect("xyz atom");
24    assert_eq!("C", symbol);
25
26    let line = "6 -11.4286 -1.3155  0.0000 0.0 0.0 0.0\n";
27    let (_, (symbol, position, _)) = read_atom_xyz(line).expect("xyz atom velocity");
28    assert_eq!("6", symbol);
29    assert_eq!(0.0, position[2]);
30}
31
32/// Create a list of atoms from many lines in xyz format
33/// # Example
34/// C -11.4286  1.7645  0.0000
35/// C -10.0949  0.9945  0.0000
36/// C -10.0949 -0.5455  0.0000
37fn read_atoms_pxyz(s: &str) -> IResult<&str, Vec<(&str, Point3, &str)>> {
38    many1(read_atom_xyz)(s)
39}
40
41#[test]
42fn test_xyz_read_atoms() {
43    let txt = "C -11.4286  1.7645  0.0000
44C -10.0949  0.9945  0.0000
45C -10.0949 -0.5455  0.0000
46C -11.4286 -1.3155  0.0000
47\n";
48    let (_, atoms) = read_atoms_pxyz(txt).expect("read_atoms");
49    assert_eq!(4, atoms.len());
50}
51// bdc38ed5 ends here
52
53// [[file:../../gchemol-readwrite.note::f20b5155][f20b5155]]
54fn read_atoms_xyz(s: &str) -> IResult<&str, (&str, Vec<(&str, Point3, &str)>)> {
55    do_parse!(
56        s,
57        n: read_usize >>          // the number of atoms
58        title: read_line >>       // title line
59        atoms: read_atoms_pxyz >> // symbols and positions
60        ({
61            if n != atoms.len() {
62                warn!("Informal xyz format: expect {} atoms, but found {}", n, atoms.len());
63                debug!("{:?}", s);
64            }
65            (title.trim(), atoms)
66        })
67    )
68}
69
70#[test]
71fn test_xyz_read_molecule() {
72    let txt = "12
73
74C -11.4286  1.7645  0.0000
75C -10.0949  0.9945  0.0000
76C -10.0949 -0.5455  0.0000
77C -11.4286 -1.3155  0.0000
78C -12.7623 -0.5455  0.0000
79C -12.7623  0.9945  0.0000
80H -11.4286  2.8545  0.0000
81H -9.15090  1.5395  0.0000
82H -9.15090 -1.0905  0.0000
83H -11.4286 -2.4055  0.0000
84H -13.7062 -1.0905  0.0000
85H -13.7062  1.5395  0.0000";
86
87    let (_, (_, atoms)) = read_atoms_xyz(txt).expect("xyz atoms");
88    assert_eq!(12, atoms.len());
89}
90// f20b5155 ends here
91
92// [[file:../../gchemol-readwrite.note::ed71e42e][ed71e42e]]
93fn parse_molecule(input: &str, plain: bool) -> Result<Molecule> {
94    // plain xyz style with coordinates only?
95    let lines: Vec<_> = input.trim().lines().collect();
96    ensure!(lines.len() > 2, "invalid xyz part: {lines:?}");
97
98    let mol = if plain {
99        build_mol_xyz(&lines[..])?
100    } else {
101        let natoms: usize = lines[0].trim().parse()?;
102        let title = lines[1].trim();
103        let mut mol = build_mol_xyz(&lines[2..])?;
104        mol.set_title(title.to_owned());
105        let natoms_ = mol.natoms();
106        if natoms_ != natoms {
107            warn!("found xyz format error: expand {natoms}, but found {natoms_}");
108        }
109        mol
110    };
111
112    Ok(mol)
113}
114
115/// Handle dummy TV atoms (transitional vector, traditionally used in
116/// Gaussian/MOPAC package for periodic system)
117fn build_mol_xyz(lines: &[&str]) -> Result<Molecule> {
118    let mut atoms = vec![];
119    for line in lines.iter() {
120        let a: Atom = line.parse()?;
121        atoms.push(a);
122    }
123
124    // HACK: parse TV/VEC for lattice vectors
125    let mut lat_vectors = vec![];
126    let atoms = atoms.into_iter().filter_map(|a| match a.kind() {
127        AtomKind::Dummy(x) => {
128            // ASE/ADF writes cell vectors using VEC line
129            if x == "TV" || x == "VEC1" || x == "VEC2" || x == "VEC3" {
130                trace!("found TV dummy atom.");
131                lat_vectors.push(a.position());
132            }
133            None
134        }
135        AtomKind::Element(x) => Some(a),
136    });
137
138    let mut mol = Molecule::from_atoms(atoms);
139
140    // construct lattice parsed from three "TV" dummy atoms.
141    if lat_vectors.len() == 3 {
142        let lat = Lattice::new([lat_vectors[0], lat_vectors[1], lat_vectors[2]]);
143        mol.set_lattice(lat);
144    } else if !lat_vectors.is_empty() {
145        error!("Expect 3, but found {} TV atoms.", lat_vectors.len());
146    }
147
148    Ok(mol)
149}
150// ed71e42e ends here
151
152// [[file:../../gchemol-readwrite.note::fa6b0b98][fa6b0b98]]
153/// Classical XYZ format
154#[derive(Copy, Clone, Debug)]
155pub(super) struct XyzFile();
156
157impl ChemicalFile for XyzFile {
158    fn ftype(&self) -> &str {
159        "text/xyz"
160    }
161
162    fn possible_extensions(&self) -> Vec<&str> {
163        vec![".xyz"]
164    }
165
166    fn format_molecule(&self, mol: &Molecule) -> Result<String> {
167        // meta information
168        let mut lines = String::new();
169        if mol.is_periodic() {
170            writeln!(&mut lines, "{}", mol.natoms() + 3)?;
171        } else {
172            writeln!(&mut lines, "{}", mol.natoms())?;
173        }
174        writeln!(&mut lines, "{}", mol.title())?;
175
176        // only write velocities when they are meaningful
177        let write_velocity = !mol.atoms().all(|(_, a)| {
178            let [vx, vy, vz] = a.velocity();
179            vx == 0.0 && vy == 0.0 && vz == 0.0
180        });
181        for (_, a) in mol.atoms() {
182            let p = a.position();
183            let v = a.velocity();
184            let sym = a.symbol();
185            if write_velocity {
186                writeln!(
187                    &mut lines,
188                    "{:6} {:-18.6}{:-18.6}{:-18.6}{:-18.6}{:-18.6}{:-18.6}",
189                    sym, p[0], p[1], p[2], v[0], v[1], v[2]
190                )?;
191            } else {
192                writeln!(&mut lines, "{:6} {:-18.6}{:-18.6}{:-18.6}", sym, p[0], p[1], p[2])?;
193            }
194        }
195
196        // write lattice transition vectors using TV symbol.
197        if let Some(lat) = &mol.lattice {
198            for v in lat.vectors().iter() {
199                writeln!(&mut lines, "TV {:-12.8} {:-12.8} {:-12.8}", v[0], v[1], v[2]);
200            }
201        }
202
203        Ok(lines)
204    }
205}
206
207impl ParseMolecule for XyzFile {
208    fn parse_molecule(&self, input: &str) -> Result<Molecule> {
209        parse_molecule(input, false)
210    }
211}
212// fa6b0b98 ends here
213
214// [[file:../../gchemol-readwrite.note::*plain xyz][plain xyz:1]]
215/// Plain xyz coordinates with atom symbols (no atom count line and title line)
216#[derive(Debug, Clone, Copy)]
217pub(super) struct PlainXyzFile();
218
219impl ParseMolecule for PlainXyzFile {
220    fn parse_molecule(&self, input: &str) -> Result<Molecule> {
221        // remove starting empty line
222        parse_molecule(input.trim_start(), true)
223    }
224}
225
226impl ChemicalFile for PlainXyzFile {
227    /// Possible file extensions
228    fn possible_extensions(&self) -> Vec<&str> {
229        [".coord", ".pxyz", ".coords"].to_vec()
230    }
231
232    fn ftype(&self) -> &str {
233        "text/pxyz"
234    }
235
236    /// Return a string representation of molecule
237    /// Multiple molecules will be separated by a blank line
238    fn format_molecule(&self, mol: &Molecule) -> Result<String> {
239        let mut lines = String::new();
240
241        for (_, a) in mol.atoms() {
242            lines.push_str(format!("{}\n", a.to_string()).as_ref());
243        }
244
245        // write lattice transition vectors using TV symbol.
246        if let Some(lat) = &mol.lattice {
247            for v in lat.vectors().iter() {
248                let line = format!("TV {:-12.8} {:-12.8} {:-12.8}\n", v[0], v[1], v[2]);
249                lines.push_str(&line);
250            }
251        }
252
253        // append a blank line as a separator between multiple molecules
254        lines.push_str("\n");
255
256        Ok(lines)
257    }
258}
259// plain xyz:1 ends here
260
261// [[file:../../gchemol-readwrite.note::c62a0af4][c62a0af4]]
262impl XyzFile {
263    pub fn partitions<R: BufRead + Seek>(&self, mut reader: TextReader<R>) -> Result<impl Iterator<Item = String>> {
264        let get_natoms = |line: &str| line.trim().parse::<usize>().ok();
265
266        let iter = std::iter::from_fn(move || {
267            let mut buf = String::new();
268            let _ = reader.read_line(&mut buf)?;
269            let natoms: usize = get_natoms(&buf)?;
270            // skip comment line
271            let _ = reader.read_line(&mut buf)?;
272            // skip lines for n atoms
273            for _ in 0..natoms {
274                let n = reader.read_line(&mut buf)?;
275            }
276            // read extra lines which may exists as TV or VEC for cell vectors
277            if let Some(line) = reader.peek_line() {
278                if get_natoms(&line).is_some() {
279                    return Some(buf);
280                } else {
281                    for _ in 0..3 {
282                        reader.read_line(&mut buf);
283                    }
284                }
285            }
286            return Some(buf);
287        });
288        Ok(iter)
289    }
290}
291// c62a0af4 ends here
292
293// [[file:../../gchemol-readwrite.note::d27ea4ee][d27ea4ee]]
294impl PlainXyzFile {
295    pub fn partitions<R: BufRead + Seek>(&self, mut reader: TextReader<R>) -> Result<impl Iterator<Item = String>> {
296        let iter = std::iter::from_fn(move || {
297            let mut buf = String::new();
298            let mut eof = false;
299            // stop when found an empty line or reach EOF
300            loop {
301                if let Some(n) = reader.read_line(&mut buf) {
302                    let m = buf.len();
303                    if buf[m - n..].trim().is_empty() {
304                        break Some(buf);
305                    }
306                } else {
307                    // we should not miss the last part when reach EOF
308                    if buf.is_empty() {
309                        break None;
310                    } else {
311                        break Some(buf);
312                    }
313                }
314            }
315        });
316        Ok(iter)
317    }
318}
319// d27ea4ee ends here