gchemol_readwrite/formats/
sdf.rs

1// [[file:../../gchemol-readwrite.note::*header][header:1]]
2// MDL SD file format
3//
4// SD file format reference
5// ------------------------
6// Ctab block format for V2000
7// - http://download.accelrys.com/freeware/ctfile-formats/ctfile-formats.zip
8// header:1 ends here
9
10// [[file:../../gchemol-readwrite.note::*imports][imports:1]]
11use super::parser::*;
12use super::*;
13// imports:1 ends here
14
15// [[file:../../gchemol-readwrite.note::*counts line][counts line:1]]
16// aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv
17// aaa = number of atoms
18// bbb = number of bonds
19fn counts_line(s: &str) -> IResult<&str, (usize, usize)> {
20    let mut read_count = map_res(take_s(3), |x: &str| x.trim().parse::<usize>());
21    do_parse!(
22        s,
23        na: read_count >> // number of atoms
24        nb: read_count >> // number of bonds
25        read_line >>  // ignore the remaining
26        ((na, nb))
27    )
28}
29
30#[test]
31fn test_sdf_counts_line() {
32    let line = " 16 14  0  0  0  0  0  0  0  0999 V2000\n";
33    let (_, (na, nb)) = counts_line(line).expect("sdf counts line");
34    assert_eq!(16, na);
35    assert_eq!(14, nb);
36}
37// counts line:1 ends here
38
39// [[file:../../gchemol-readwrite.note::*atoms][atoms:1]]
40// Example input
41// -------------
42//    -1.2940   -0.5496   -0.0457 C   0  0  0  0  0  0  0  0  0  0  0  0
43fn get_atom_from(s: &str) -> IResult<&str, Atom> {
44    let mut read_coord = map_res(take_s(10), |x| x.trim().parse::<f64>());
45    let mut read_symbol = map(take_s(3), |x| x.trim());
46    do_parse!(
47        s,
48        x: read_coord  >> // x coords
49        y: read_coord  >> // y coords
50        z: read_coord  >> // z coords
51        s: read_symbol >> // element symbol
52        read_line >>      // ignore remaing part
53        ({
54            Atom::new(s, [x, y, z])
55        })
56    )
57}
58
59// output atom line in .sdf format
60fn format_atom(i: usize, a: &Atom) -> String {
61    let pos = a.position();
62    format!(
63        "{x:-10.4} {y:-9.4} {z:-9.4} {sym:3} 0  0  0  0  0  0  0  0  0 {index:2}\n",
64        x = pos[0],
65        y = pos[1],
66        z = pos[2],
67        sym = a.symbol(),
68        index = i,
69    )
70}
71
72#[test]
73fn test_sdf_atom() {
74    let line = "  -13.5661  206.9157  111.5569 C   0  0  0  0  0  0  0  0  0 12 \n\n";
75    let (_, a) = get_atom_from(line).expect("sdf atom");
76    let line2 = format_atom(12, &a);
77    assert_eq!(line[..60], line2[..60]);
78}
79// atoms:1 ends here
80
81// [[file:../../gchemol-readwrite.note::*bonds][bonds:1]]
82//   1  4  1  0  0  0  0
83fn get_bond_from(s: &str) -> IResult<&str, (usize, usize, Bond)> {
84    let mut read_number = map_res(take_s(3), |x| x.trim().parse::<usize>());
85    do_parse!(
86        s,
87        i: read_number >> // atom i
88        j: read_number >> // atom j
89        b: read_number >> read_line >> // bond order
90        ({
91            let bond = match b {
92                1 => Bond::single(),
93                2 => Bond::double(),
94                3 => Bond::triple(),
95                4 => Bond::aromatic(),
96                _ => {
97                    warn!("ignore sdf bond type: {}", b);
98                    Bond::single()
99                },
100            };
101            (i, j, bond)
102        })
103    )
104}
105
106use std::fmt::Display;
107fn format_bond<T: Display>(index1: T, index2: T, bond: &Bond) -> String {
108    format!(
109        "{index1:>3}{index2:>3}{order:3}  0  0  0 \n",
110        index1 = index1,
111        index2 = index2,
112        order = 1
113    )
114}
115
116#[test]
117fn test_sdf_bond() {
118    let line = "  6  7  1  0  0  0 \n";
119    let (_, (index1, index2, bond)) = get_bond_from(line).expect("sdf bond");
120    let line2 = format_bond(index1, index2, &bond);
121    assert_eq!(line[..9], line2[..9]);
122}
123// bonds:1 ends here
124
125// [[file:../../gchemol-readwrite.note::*molecule][molecule:1]]
126pub fn get_molecule_from(input: &str) -> IResult<&str, Molecule> {
127    let mut read_atoms = many1(get_atom_from);
128    let mut read_bonds = many0(get_bond_from);
129    let (input, mol) = do_parse!(
130        input,
131        title   : read_line     >> // molecule title
132        software: read_line     >> // version?
133        comment : read_line     >> // user comments
134        counts  : counts_line   >> // number of atoms and bonds
135        atoms   : read_atoms    >> // atoms
136        bonds   : read_bonds    >> // bonds
137        (
138            {
139                let naa = atoms.len();
140                let nbb = bonds.len();
141                let (na, nb) = counts;
142                if na != naa {
143                    eprintln!("expect {} atoms, but found {}", na, naa);
144                }
145                if nb != nbb {
146                    eprintln!("expect {} bonds, but found {}", nb, nbb);
147                }
148
149                let mut mol = Molecule::from_atoms(atoms);
150                mol.set_title(title);
151
152                for (i, j, b) in bonds {
153                    mol.add_bond(i, j, b);
154                }
155                mol
156            }
157        )
158    )?;
159
160    Ok((input, mol))
161}
162
163fn format_molecule(mol: &Molecule) -> String {
164    let mut lines = String::new();
165
166    // molecule title
167    lines.push_str(&format!("{}\n", mol.title()));
168    // software
169    lines.push_str("gchemol\n");
170    // comment
171    lines.push_str("\n");
172    // counts line
173    let line = format!(
174        "{natoms:3}{nbonds:3}  0  0  0  0  0  0  0  0999 V2000 \n",
175        natoms = mol.natoms(),
176        nbonds = mol.nbonds()
177    );
178
179    lines.push_str(&line);
180
181    for (i, a) in mol.atoms() {
182        lines.push_str(&format_atom(i, a));
183    }
184
185    for (i, j, b) in mol.bonds() {
186        lines.push_str(&format_bond(i, j, &b));
187    }
188
189    lines.push_str("M  END\n$$$$\n");
190
191    lines
192}
193// molecule:1 ends here
194
195// [[file:../../gchemol-readwrite.note::*chemfile][chemfile:1]]
196#[derive(Clone, Copy, Debug)]
197pub struct SdfFile();
198
199impl ChemicalFile for SdfFile {
200    fn ftype(&self) -> &str {
201        "text/sdf"
202    }
203
204    fn possible_extensions(&self) -> Vec<&str> {
205        vec![".sd", ".sdf", ".mol"]
206    }
207
208    fn format_molecule(&self, mol: &Molecule) -> Result<String> {
209        if mol.lattice.is_some() {
210            eprintln!("WARNING: cannot render Lattice in SDF format!");
211        }
212        Ok(format_molecule(mol))
213    }
214}
215
216impl ParseMolecule for SdfFile {
217    fn parse_molecule(&self, input: &str) -> Result<Molecule> {
218        let (_, mol) = get_molecule_from(input).map_err(|e| format_err!("parse SDF format failure: {:?}", e))?;
219        Ok(mol)
220    }
221}
222// chemfile:1 ends here
223
224// [[file:../../gchemol-readwrite.note::b6db584e][b6db584e]]
225impl ReadPart for SdfFile {
226    fn read_next(&self, context: ReadContext) -> ReadAction {
227        Terminated(|line: &str| line == "$$$$\n").read_next(context)
228    }
229}
230
231impl SdfFile {
232    pub fn partitions<R: BufRead + Seek>(&self, mut r: TextReader<R>) -> Result<impl Iterator<Item = String>> {
233        Ok(r.partitions(*self))
234    }
235}
236// b6db584e ends here