gchemol_readwrite/formats/
mol2.rs

1// [[file:../../gchemol-readwrite.note::*imports][imports:1]]
2/// Tripos Mol2 File Format
3///
4/// Reference
5/// ---------
6/// http://tripos.com/tripos_resources/fileroot/pdfs/mol2_format.pdf
7/// http://chemyang.ccnu.edu.cn/ccb/server/AIMMS/mol2.pdf
8///
9use super::{parser::*, *};
10// imports:1 ends here
11
12// [[file:../../gchemol-readwrite.note::5b121788][5b121788]]
13fn read_atom_record(s: &str) -> IResult<&str, (usize, Atom)> {
14    let mut optional = opt(atom_subst_and_charge);
15    do_parse!(
16        s,
17        space0 >> id: unsigned_digit >> space1 >>  // atom index
18        name: not_space >> space1 >> // atom name
19        xyz : xyz_array >> space1 >> // cartesian coordinates
20        emtype: mm_type >> space0 >> // Element and Atom type
21        optional >> eol >>  // substructure and partial charge, which could be omitted
22        ({
23            let (e, mtype) = emtype;
24            let mut a = Atom::new(e, xyz);
25            a.set_label(name.trim());
26            (id, a)
27        })
28    )
29}
30
31#[test]
32fn test_formats_mol2_atom() {
33    let line = " 3	C3	2.414	0.000	0.000	C.ar	1	BENZENE	0.000	DICT\n";
34    let (r, (_, a)) = read_atom_record(line).expect("mol2 full");
35    assert_eq!("C", a.symbol());
36
37    let line = " 3	C3	2.414	0.000	0.000	C.ar	1	BENZENE	0.000\n";
38    let (r, (_, a)) = read_atom_record(line).expect("mol2 atom: missing status bit");
39    assert_eq!("C", a.symbol());
40
41    let line = " 3	C3	2.414	0.000	0.000	C.ar	1	BENZENE \n";
42    let (r, (_, a)) = read_atom_record(line).expect("mol2 atom: missing partial charge");
43    assert_eq!("C", a.symbol());
44
45    let line = " 3	C3	2.414	0.000	0.000	C.ar\n";
46    let (r, (_, a)) = read_atom_record(line).expect("mol2 atom: missing substructure");
47    assert_eq!("C", a.symbol());
48}
49
50// parse mol2 atom type. example records:
51// C, C.2, C.3, C.ar
52fn mm_type(s: &str) -> IResult<&str, (&str, Option<&str>)> {
53    let mut mtype = opt(preceded(tag("."), alphanumeric1));
54    do_parse!(
55        s,
56        s: alpha1 >> // element symbol
57        t: mtype  >> // atom type for force field
58        ((s, t))
59    )
60}
61
62#[test]
63fn test_mol2_mmtype() {
64    let (_, (sym, mtype)) = mm_type("C.ar\n").expect("mol2 atom type");
65    assert_eq!("C", sym);
66    assert_eq!(Some("ar"), mtype);
67
68    let (_, (sym, mtype)) = mm_type("C.4\n").expect("mol2 atom type 2");
69    assert_eq!("C", sym);
70    assert_eq!(Some("4"), mtype);
71
72    let (_, (sym, mtype)) = mm_type("C ").expect("mol atom type: missing mm type");
73    assert_eq!("C", sym);
74    assert_eq!(None, mtype);
75}
76
77// substructure id and subtructure name
78fn atom_subst_and_charge(s: &str) -> IResult<&str, (usize, &str, Option<f64>)> {
79    let mut charge = opt(double);
80    let mut status_bit = opt(alpha1);
81    do_parse!(
82        s,
83        subst_id   : unsigned_digit >> space1 >> // xx
84        subst_name : not_space      >> space1 >> // xx
85        charge     : charge         >> space0 >> // xx
86        status_bit : status_bit     >> // xx
87        ((subst_id, subst_name, charge))
88    )
89}
90
91/// simple translation without considering the bonding pattern
92/// http://www.sdsc.edu/CCMS/Packages/cambridge/pluto/atom_types.html
93/// I just want material studio happy to accept my .mol2 file
94fn get_atom_type(atom: &Atom) -> &str {
95    match atom.symbol() {
96        "C" => "C.3",
97        "P" => "P.3",
98        "Co" => "Co.oh",
99        "Ru" => "Ru.oh",
100        "O" => "O.2",
101        "N" => "N.3",
102        "S" => "S.2",
103        "Ti" => "Ti.oh",
104        "Cr" => "Cr.oh",
105        _ => atom.symbol(),
106    }
107}
108
109fn format_atom(a: &Atom) -> String {
110    let position = a.position();
111    format!(
112        "{name:8} {x:-12.5} {y:-12.5} {z:-12.5} {symbol:8} {subst_id:5} {subst_name:8} {charge:-6.4}\n",
113        name = a.get_label().unwrap_or(a.symbol()),
114        x = position[0],
115        y = position[1],
116        z = position[2],
117        // FIXME:
118        symbol = get_atom_type(a),
119        subst_id = 1,
120        subst_name = "SUBUNIT",
121        charge = 0.0,
122    )
123}
124// 5b121788 ends here
125
126// [[file:../../gchemol-readwrite.note::*atoms][atoms:1]]
127/// Parse Tripos Atom section
128fn read_atoms(s: &str) -> IResult<&str, Vec<(usize, Atom)>> {
129    let mut tag_atom = tag("@<TRIPOS>ATOM");
130    let mut atoms = many1(read_atom_record);
131    do_parse!(s, tag_atom >> eol >> s: atoms >> (s))
132}
133
134#[test]
135fn test_mol2_get_atoms() {
136    let lines = "@<TRIPOS>ATOM
137      1 N           1.3863   -0.2920    0.0135 N.ar    1  UNL1       -0.2603
138      2 N          -1.3863    0.2923    0.0068 N.ar    1  UNL1       -0.2603
139      3 C           0.9188    0.9708   -0.0188 C.ar    1  UNL1        0.0456
140      4 C          -0.4489    1.2590   -0.0221 C.ar    1  UNL1        0.0456
141      5 C          -0.9188   -0.9709    0.0073 C.ar    1  UNL1        0.0456
142      6 C           0.4489   -1.2591    0.0106 C.ar    1  UNL1        0.0456
143      7 H           1.6611    1.7660   -0.0258 H       1  UNL1        0.0845
144      8 H          -0.8071    2.2860   -0.0318 H       1  UNL1        0.0845
145      9 H           0.8071   -2.2861    0.0273 H       1  UNL1        0.0845
146     10 H          -1.6611   -1.7660    0.0214 H       1  UNL1        0.0845
147
148";
149    let (_, atoms) = read_atoms(lines).expect("mol2 atoms");
150    assert_eq!(10, atoms.len());
151}
152// atoms:1 ends here
153
154// [[file:../../gchemol-readwrite.note::*bonds][bonds:1]]
155/// Parse Tripos Bond section
156fn read_bonds(s: &str) -> IResult<&str, Vec<(usize, usize, Bond)>> {
157    let mut tag_bond = tag("@<TRIPOS>BOND");
158    let mut bonds = many0(read_bond_record);
159    do_parse!(s, tag_bond >> eol >> bonds: bonds >> (bonds))
160}
161
162#[test]
163fn test_mol2_bonds() {
164    let lines = "@<TRIPOS>BOND
165     1    13    11    1
166     2    11    12    1
167     3     8     4    1
168     4     7     3    1
169     5     4     3   ar
170
171";
172
173    let (_, x) = read_bonds(lines).expect("mol2 bonds");
174    assert_eq!(5, x.len());
175}
176
177fn read_bond_record(s: &str) -> IResult<&str, (usize, usize, Bond)> {
178    do_parse!(
179        s,
180        space0 >>                        // ignore leading space
181        unsigned_digit >> space1 >>      // bond_id
182        n1: unsigned_digit >> space1 >>  // origin_atom_id
183        n2: unsigned_digit >> space1 >>  // target_atom_id
184        bo: alphanumeric1  >> space0 >>  // bond_type
185        read_line >>                     // ignore status_bits
186        ({
187            let bond = match bo.to_lowercase().as_ref() {
188                "1"  => Bond::single(),
189                "2"  => Bond::double(),
190                "3"  => Bond::triple(),
191                "ar" => Bond::aromatic(),
192                "am" => Bond::aromatic(),
193                "nc" => Bond::dummy(),
194                "wk" => Bond::partial(), // gaussian view use this
195                _    => Bond::single()
196            };
197            (n1, n2, bond)
198        })
199    )
200}
201
202#[test]
203fn test_formats_mol2_bond_record() {
204    let (_, (i, j, b)) = read_bond_record("1	1	2	1 BACKBONE\n").expect("mol2 bond: full");
205    assert_eq!(BondKind::Single, b.kind());
206
207    let (_, (i, j, b)) = read_bond_record("1	1	2	1\n").expect("mol2 bond: missing status bits");
208    assert_eq!(BondKind::Single, b.kind());
209
210    let (_, (i, j, b)) = read_bond_record("1	1	2	ar\n").expect("mol2 bond: aromatic bond type");
211    assert_eq!(BondKind::Aromatic, b.kind());
212}
213
214fn format_bond_order(bond: &Bond) -> &str {
215    match bond.kind() {
216        BondKind::Single => "1",
217        BondKind::Double => "2",
218        BondKind::Triple => "3",
219        BondKind::Quadruple => "4",
220        BondKind::Aromatic => "ar",
221        BondKind::Partial => "wk", // gaussian view use this
222        BondKind::Dummy => "nc",
223    }
224}
225// bonds:1 ends here
226
227// [[file:../../gchemol-readwrite.note::*lattice][lattice:1]]
228fn read_lattice(s: &str) -> IResult<&str, Lattice> {
229    let tag_crysin = tag("@<TRIPOS>CRYSIN");
230    do_parse!(
231        s,
232        tag_crysin >> eol >>    // goto section header
233        space0 >>               // ignore leading spaces
234        a: double >> space1 >>  // a
235        b: double >> space1 >>  // b
236        c: double >> space1 >>  // c
237        alpha: double >> space1 >> // angle alpha
238        beta : double >> space1 >> // angle beta
239        gamma: double >> space1 >> // angle gamma
240        space_grp : unsigned_digit >> space1    >>
241        setting   : unsigned_digit >> read_line >>
242        ({
243            Lattice::from_params(a, b, c, alpha, beta, gamma)
244        })
245    )
246}
247
248#[test]
249fn test_formats_mol2_crystal() {
250    let txt = "@<TRIPOS>CRYSIN
251 12.312000 4.959000 15.876000 90.000000 99.070000 90.000000 4 1\n";
252
253    let (_, mut x) = read_lattice(txt).expect("mol2 crystal");
254    assert_eq!([12.312, 4.959, 15.876], x.lengths());
255}
256// lattice:1 ends here
257
258// [[file:../../gchemol-readwrite.note::*parse][parse:1]]
259fn read_molecule(s: &str) -> IResult<&str, Molecule> {
260    let mut jump = opt(take_until("@<TRIPOS>"));
261    let mut read_bonds = opt(read_bonds);
262    let mut read_lattice = opt(read_lattice);
263    do_parse!(
264        s,
265        jump >> meta: read_molecule_meta >> // meta data
266        jump >> atoms: read_atoms >>        // atoms
267        jump >> bonds: read_bonds >>        // optional bonds
268        jump >> lattice: read_lattice >>    // optional lattice
269        ({
270            let (title, natoms, nbonds) = meta;
271            let mut mol = Molecule::new(title);
272
273            // assign atoms
274            if natoms != atoms.len() {
275                warn!("Inconsistency: expected {} atoms, but found {}", natoms, atoms.len());
276            }
277            mol.add_atoms_from(atoms);
278
279            // assign bonds
280            if let Some(bonds) = bonds {
281                mol.add_bonds_from(bonds);
282            }
283
284            // assign lattice
285            if let Some(lattice) = lattice {
286                mol.set_lattice(lattice);
287            }
288
289            mol
290        })
291    )
292}
293
294fn read_counts(s: &str) -> IResult<&str, (usize, Option<usize>)> {
295    let mut opt_num_bonds = opt(unsigned_digit);
296    do_parse!(
297        s,
298        space0 >> n: unsigned_digit >> space0 >> m: opt_num_bonds >> read_line >> ((n, m))
299    )
300}
301
302fn read_molecule_meta(s: &str) -> IResult<&str, (&str, usize, Option<usize>)> {
303    let mut tag_mol = tag("@<TRIPOS>MOLECULE");
304    do_parse!(
305        s,
306        tag_mol >> eol >>        // section header
307        title: read_until_eol >> // mol_name
308        counts: read_counts   >> // num_aatoms, num_bonds
309        read_line >>             // ignore mol_type
310        read_line >>             // ignore charge_type
311        ({
312            let (natoms, nbonds) = counts;
313            (title, natoms, nbonds)
314        })
315    )
316}
317
318#[test]
319fn test_mol2_meta() {
320    let txt = "@<TRIPOS>MOLECULE
321Molecule Name
3225 4
323SMALL
324NO_CHARGES
325
326";
327    let (_, (title, natoms, nbonds)) = read_molecule_meta(txt).expect("mol2 meta");
328    assert_eq!(title, "Molecule Name");
329    assert_eq!(natoms, 5);
330    assert_eq!(nbonds, Some(4));
331}
332// parse:1 ends here
333
334// [[file:../../gchemol-readwrite.note::*format][format:1]]
335fn format_molecule(mol: &Molecule) -> Result<String> {
336    let natoms = mol.natoms();
337    let nbonds = mol.nbonds();
338
339    let mut lines = String::new();
340    lines += "#	Created by gchemol\n\n";
341    lines += "@<TRIPOS>MOLECULE\n";
342    lines += &format!("{}\n", mol.title());
343
344    // atom count, bond numbers, substructure numbers
345    lines += &format!("{:>5} {:>5}\n", natoms, nbonds);
346    // molecule type
347    lines += "SMALL\n";
348    // customed charges
349    lines += "USER CHARGES\n";
350    // atoms
351    lines += "@<TRIPOS>ATOM\n";
352
353    // format atoms
354    for (i, a) in mol.atoms() {
355        lines += &format!("{:5} {}", i, format_atom(&a));
356    }
357
358    // format bonds
359    if nbonds > 0 {
360        lines += "@<TRIPOS>BOND\n";
361        let mut sn = 1;
362        for (i, j, b) in mol.bonds() {
363            lines += &format!(
364                "{sn:4} {bond_i:4} {bond_j:4} {bond_type:3}\n",
365                sn = sn,
366                bond_i = i,
367                bond_j = j,
368                bond_type = format_bond_order(&b)
369            );
370            sn += 1;
371        }
372    }
373
374    // format crystal
375    if let Some(lat) = &mol.lattice {
376        lines += "@<TRIPOS>CRYSIN\n";
377        let [a, b, c] = lat.lengths();
378        let [alpha, beta, gamma] = lat.angles();
379        lines += &format!(
380            "{a:10.4} {b:10.4} {c:10.4} {alpha:5.2} {beta:5.2} {gamma:5.2} {sgrp} 1\n",
381            a = a,
382            b = b,
383            c = c,
384            alpha = alpha,
385            beta = beta,
386            gamma = gamma,
387            // FIXME: crystal space group
388            sgrp = 4
389        );
390    }
391
392    // Final blank line
393    lines += "\n";
394
395    Ok(lines)
396}
397// format:1 ends here
398
399// [[file:../../gchemol-readwrite.note::dfc305c8][dfc305c8]]
400#[derive(Copy, Clone, Debug)]
401pub struct Mol2File();
402
403impl ChemicalFile for Mol2File {
404    fn ftype(&self) -> &str {
405        "text/mol2"
406    }
407
408    fn possible_extensions(&self) -> Vec<&str> {
409        vec![".mol2"]
410    }
411
412    fn format_molecule(&self, mol: &Molecule) -> Result<String> {
413        format_molecule(mol)
414    }
415}
416
417impl ParseMolecule for Mol2File {
418    fn parse_molecule(&self, input: &str) -> Result<Molecule> {
419        let (_, mol) = read_molecule(input).map_err(|e| format_err!("{:}", e))?;
420        Ok(mol)
421    }
422}
423// dfc305c8 ends here
424
425// [[file:../../gchemol-readwrite.note::3137ddbd][3137ddbd]]
426impl ReadPart for Mol2File {
427    fn read_next(&self, context: ReadContext) -> ReadAction {
428        Preceded(|line: &str| line.starts_with("@<TRIPOS>MOLECULE")).read_next(context)
429    }
430}
431
432impl Mol2File {
433    pub fn partitions<R: BufRead + Seek>(&self, mut r: TextReader<R>) -> Result<impl Iterator<Item = String>> {
434        r.seek_line(|line| line.starts_with("@<TRIPOS>MOLECULE"))?;
435        Ok(r.partitions(*self))
436    }
437}
438// 3137ddbd ends here