Skip to main content

sdfrust/writer/
mol2.rs

1//! TRIPOS MOL2 format writer.
2//!
3//! This module provides functions to write molecules to the MOL2 format.
4//! The MOL2 format uses sections marked by `@<TRIPOS>SECTION_NAME`.
5
6use std::io::Write;
7use std::path::Path;
8
9use crate::bond::BondOrder;
10use crate::error::Result;
11use crate::molecule::Molecule;
12
13/// Converts a BondOrder to MOL2 bond type string.
14fn order_to_mol2_bond_type(order: BondOrder) -> &'static str {
15    match order {
16        BondOrder::Single => "1",
17        BondOrder::Double => "2",
18        BondOrder::Triple => "3",
19        BondOrder::Aromatic => "ar",
20        BondOrder::SingleOrDouble => "1",
21        BondOrder::SingleOrAromatic => "ar",
22        BondOrder::DoubleOrAromatic => "ar",
23        BondOrder::Any => "1",
24        BondOrder::Coordination => "1",
25        BondOrder::Hydrogen => "1",
26    }
27}
28
29/// Infers a simple SYBYL atom type from element and bond information.
30///
31/// Since the parser discards SYBYL subtypes, we infer from:
32/// - Element symbol
33/// - Presence of aromatic bonds
34/// - Bond count for hybridization hints
35fn infer_sybyl_type(mol: &Molecule, atom_index: usize) -> String {
36    let atom = &mol.atoms[atom_index];
37    let element = &atom.element;
38
39    // Check if atom has aromatic bonds
40    let has_aromatic = mol
41        .bonds
42        .iter()
43        .any(|b| (b.atom1 == atom_index || b.atom2 == atom_index) && b.is_aromatic());
44
45    // Count bonds for hybridization hints
46    let bond_count = mol
47        .bonds
48        .iter()
49        .filter(|b| b.atom1 == atom_index || b.atom2 == atom_index)
50        .count();
51
52    // Calculate sum of bond orders for this atom
53    let bond_order_sum: f64 = mol
54        .bonds
55        .iter()
56        .filter(|b| b.atom1 == atom_index || b.atom2 == atom_index)
57        .map(|b| b.order.order())
58        .sum();
59
60    match element.as_str() {
61        "C" => {
62            if has_aromatic {
63                "C.ar".to_string()
64            } else if bond_order_sum > 3.5 {
65                "C.1".to_string() // sp hybridization (triple bond)
66            } else if bond_order_sum > 2.5 || bond_count == 3 {
67                "C.2".to_string() // sp2 hybridization
68            } else {
69                "C.3".to_string() // sp3 hybridization
70            }
71        }
72        "N" => {
73            if has_aromatic {
74                "N.ar".to_string()
75            } else if bond_count == 4 || atom.formal_charge > 0 {
76                "N.4".to_string() // quaternary nitrogen
77            } else if bond_order_sum > 2.5 {
78                "N.1".to_string() // sp
79            } else if bond_order_sum > 1.5 || bond_count == 3 {
80                "N.pl3".to_string() // planar sp2
81            } else {
82                "N.3".to_string() // sp3
83            }
84        }
85        "O" => {
86            if has_aromatic {
87                "O.ar".to_string()
88            } else if atom.formal_charge < 0 {
89                "O.co2".to_string() // carboxylate oxygen
90            } else if bond_order_sum > 1.5 {
91                "O.2".to_string() // sp2 (carbonyl)
92            } else {
93                "O.3".to_string() // sp3 (hydroxyl, ether)
94            }
95        }
96        "S" => {
97            if has_aromatic {
98                "S.ar".to_string()
99            } else if bond_order_sum > 1.5 {
100                "S.2".to_string()
101            } else {
102                "S.3".to_string()
103            }
104        }
105        "P" => "P.3".to_string(),
106        "H" => "H".to_string(),
107        "F" => "F".to_string(),
108        "Cl" => "Cl".to_string(),
109        "Br" => "Br".to_string(),
110        "I" => "I".to_string(),
111        // Default: just use the element symbol
112        _ => element.clone(),
113    }
114}
115
116/// Writes a molecule to MOL2 format.
117pub fn write_mol2<W: Write>(writer: &mut W, molecule: &Molecule) -> Result<()> {
118    // @<TRIPOS>MOLECULE section
119    writeln!(writer, "@<TRIPOS>MOLECULE")?;
120    writeln!(
121        writer,
122        "{}",
123        if molecule.name.is_empty() {
124            "unnamed"
125        } else {
126            &molecule.name
127        }
128    )?;
129    writeln!(
130        writer,
131        " {} {} 0 0 0",
132        molecule.atom_count(),
133        molecule.bond_count()
134    )?;
135    writeln!(writer, "SMALL")?;
136    writeln!(writer, "NO_CHARGES")?;
137    writeln!(writer)?;
138
139    // @<TRIPOS>ATOM section
140    writeln!(writer, "@<TRIPOS>ATOM")?;
141    for (i, atom) in molecule.atoms.iter().enumerate() {
142        let atom_name = format!("{}{}", atom.element, i + 1);
143        let atom_type = infer_sybyl_type(molecule, i);
144        writeln!(
145            writer,
146            "{:>7} {:<4} {:>10.4} {:>10.4} {:>10.4} {:<6} {:>3} {:<4} {:>8.4}",
147            i + 1,                     // atom_id (1-based)
148            atom_name,                 // atom_name
149            atom.x,                    // x coordinate
150            atom.y,                    // y coordinate
151            atom.z,                    // z coordinate
152            atom_type,                 // SYBYL atom type
153            1,                         // subst_id
154            "MOL",                     // subst_name
155            atom.formal_charge as f64  // charge (as float)
156        )?;
157    }
158
159    // @<TRIPOS>BOND section
160    writeln!(writer, "@<TRIPOS>BOND")?;
161    for (i, bond) in molecule.bonds.iter().enumerate() {
162        let bond_type = order_to_mol2_bond_type(bond.order);
163        writeln!(
164            writer,
165            "{:>6} {:>5} {:>5} {}",
166            i + 1,          // bond_id (1-based)
167            bond.atom1 + 1, // origin_atom_id (1-based)
168            bond.atom2 + 1, // target_atom_id (1-based)
169            bond_type       // bond_type
170        )?;
171    }
172
173    Ok(())
174}
175
176/// Writes a molecule to a MOL2 string.
177pub fn write_mol2_string(molecule: &Molecule) -> Result<String> {
178    let mut buffer = Vec::new();
179    write_mol2(&mut buffer, molecule)?;
180    Ok(String::from_utf8_lossy(&buffer).to_string())
181}
182
183/// Writes multiple molecules to MOL2 format.
184pub fn write_mol2_multi<W: Write>(writer: &mut W, molecules: &[Molecule]) -> Result<()> {
185    for mol in molecules {
186        write_mol2(writer, mol)?;
187    }
188    Ok(())
189}
190
191/// Writes a molecule to a MOL2 file.
192pub fn write_mol2_file<P: AsRef<Path>>(path: P, molecule: &Molecule) -> Result<()> {
193    let file = std::fs::File::create(path)?;
194    let mut writer = std::io::BufWriter::new(file);
195    write_mol2(&mut writer, molecule)
196}
197
198/// Writes multiple molecules to a MOL2 file.
199pub fn write_mol2_file_multi<P: AsRef<Path>>(path: P, molecules: &[Molecule]) -> Result<()> {
200    let file = std::fs::File::create(path)?;
201    let mut writer = std::io::BufWriter::new(file);
202    write_mol2_multi(&mut writer, molecules)
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use crate::atom::Atom;
209    use crate::bond::Bond;
210    use crate::parser::parse_mol2_string;
211
212    #[test]
213    fn test_write_simple_molecule() {
214        let mut mol = Molecule::new("methane");
215        mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
216        mol.atoms.push(Atom::new(1, "H", 0.6289, 0.6289, 0.6289));
217        mol.atoms.push(Atom::new(2, "H", -0.6289, -0.6289, 0.6289));
218        mol.atoms.push(Atom::new(3, "H", -0.6289, 0.6289, -0.6289));
219        mol.atoms.push(Atom::new(4, "H", 0.6289, -0.6289, -0.6289));
220        mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
221        mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
222        mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
223        mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
224
225        let output = write_mol2_string(&mol).unwrap();
226
227        assert!(output.contains("@<TRIPOS>MOLECULE"));
228        assert!(output.contains("methane"));
229        assert!(output.contains("@<TRIPOS>ATOM"));
230        assert!(output.contains("@<TRIPOS>BOND"));
231        assert!(output.contains("5 4 0 0 0")); // 5 atoms, 4 bonds
232    }
233
234    #[test]
235    fn test_round_trip() {
236        let mut mol = Molecule::new("test_molecule");
237        mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
238        mol.atoms.push(Atom::new(1, "O", 1.2, 0.0, 0.0));
239        mol.bonds.push(Bond::new(0, 1, BondOrder::Double));
240
241        let mol2_string = write_mol2_string(&mol).unwrap();
242        let parsed = parse_mol2_string(&mol2_string).unwrap();
243
244        assert_eq!(parsed.name, mol.name);
245        assert_eq!(parsed.atom_count(), mol.atom_count());
246        assert_eq!(parsed.bond_count(), mol.bond_count());
247
248        // Check coordinates (with some tolerance for formatting)
249        assert!((parsed.atoms[0].x - mol.atoms[0].x).abs() < 0.001);
250        assert!((parsed.atoms[1].x - mol.atoms[1].x).abs() < 0.001);
251
252        // Check bond order
253        assert_eq!(parsed.bonds[0].order, mol.bonds[0].order);
254    }
255
256    #[test]
257    fn test_write_aromatic_bonds() {
258        let mut mol = Molecule::new("benzene");
259        // Simple benzene ring (6 carbons)
260        for i in 0..6 {
261            let angle = std::f64::consts::PI * 2.0 * (i as f64) / 6.0;
262            let x = 1.4 * angle.cos();
263            let y = 1.4 * angle.sin();
264            mol.atoms.push(Atom::new(i, "C", x, y, 0.0));
265        }
266        // Aromatic bonds
267        for i in 0..6 {
268            mol.bonds
269                .push(Bond::new(i, (i + 1) % 6, BondOrder::Aromatic));
270        }
271
272        let output = write_mol2_string(&mol).unwrap();
273
274        // All bonds should be written as "ar"
275        assert!(output.contains(" ar"));
276        // Atoms should have aromatic type
277        assert!(output.contains("C.ar"));
278    }
279
280    #[test]
281    fn test_bond_type_conversion() {
282        assert_eq!(order_to_mol2_bond_type(BondOrder::Single), "1");
283        assert_eq!(order_to_mol2_bond_type(BondOrder::Double), "2");
284        assert_eq!(order_to_mol2_bond_type(BondOrder::Triple), "3");
285        assert_eq!(order_to_mol2_bond_type(BondOrder::Aromatic), "ar");
286    }
287
288    #[test]
289    fn test_write_multi() {
290        let mut mol1 = Molecule::new("mol1");
291        mol1.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
292
293        let mut mol2 = Molecule::new("mol2");
294        mol2.atoms.push(Atom::new(0, "O", 0.0, 0.0, 0.0));
295
296        let mut buffer = Vec::new();
297        write_mol2_multi(&mut buffer, &[mol1, mol2]).unwrap();
298        let output = String::from_utf8_lossy(&buffer);
299
300        // Should contain two MOLECULE sections
301        assert_eq!(output.matches("@<TRIPOS>MOLECULE").count(), 2);
302        assert!(output.contains("mol1"));
303        assert!(output.contains("mol2"));
304    }
305}