use std::collections::{HashMap, HashSet};
use chematic_core::{AtomIdx, BondIdx, BondOrder, Molecule};
pub fn write(mol: &Molecule) -> String {
if mol.atom_count() == 0 {
return String::new();
}
SmilesWriter::new(mol).write_all()
}
struct SmilesWriter<'a> {
mol: &'a Molecule,
ring_bonds: HashSet<BondIdx>,
atom_ring_nums: HashMap<AtomIdx, Vec<(u8, BondOrder)>>,
written: Vec<bool>,
next_ring: u8,
out: String,
}
impl<'a> SmilesWriter<'a> {
fn new(mol: &'a Molecule) -> Self {
let n = mol.atom_count();
Self {
mol,
ring_bonds: HashSet::new(),
atom_ring_nums: HashMap::new(),
written: vec![false; n],
next_ring: 1,
out: String::new(),
}
}
fn write_all(mut self) -> String {
self.find_ring_closures();
let mut first = true;
for i in 0..self.mol.atom_count() {
if !self.written[i] {
if !first { self.out.push('.'); }
first = false;
self.write_chain(AtomIdx(i as u32), None, None);
}
}
self.out
}
fn find_ring_closures(&mut self) {
let n = self.mol.atom_count();
let mut visited = vec![false; n];
let mut in_stack = vec![false; n];
for start in 0..n {
if !visited[start] {
self.dfs_mark(AtomIdx(start as u32), None, &mut visited, &mut in_stack);
}
}
}
fn dfs_mark(
&mut self,
atom: AtomIdx,
from_bond: Option<BondIdx>,
visited: &mut Vec<bool>,
in_stack: &mut Vec<bool>,
) {
visited[atom.0 as usize] = true;
in_stack[atom.0 as usize] = true;
for (neighbor, bidx) in self.mol.neighbors(atom) {
if Some(bidx) == from_bond { continue; }
if self.ring_bonds.contains(&bidx) { continue; }
if !visited[neighbor.0 as usize] {
self.dfs_mark(neighbor, Some(bidx), visited, in_stack);
} else if in_stack[neighbor.0 as usize] {
self.ring_bonds.insert(bidx);
let rn = self.next_ring;
self.next_ring += 1;
let bond_order = self.mol.bond(bidx).order;
self.atom_ring_nums.entry(neighbor).or_default().push((rn, bond_order)); self.atom_ring_nums.entry(atom).or_default().push((rn, bond_order)); }
}
in_stack[atom.0 as usize] = false;
}
fn write_chain(
&mut self,
atom: AtomIdx,
from_atom: Option<AtomIdx>,
incoming_bond: Option<BondOrder>,
) {
self.written[atom.0 as usize] = true;
if let Some(bond) = incoming_bond {
self.out.push(bond.smiles_char());
}
self.emit_atom(atom);
if let Some(rings) = self.atom_ring_nums.remove(&atom) {
for (rn, bond_order) in rings {
let atom_aromatic = self.mol.atom(atom).aromatic;
if !(bond_order == BondOrder::Aromatic && atom_aromatic) && bond_order != BondOrder::Single {
self.out.push(bond_order.smiles_char());
}
if rn >= 10 {
self.out.push('%');
self.out.push(char::from_digit((rn / 10) as u32, 10).unwrap());
self.out.push(char::from_digit((rn % 10) as u32, 10).unwrap());
} else {
self.out.push(char::from_digit(rn as u32, 10).unwrap());
}
}
}
let children: Vec<(AtomIdx, BondOrder)> = self.mol
.neighbors(atom)
.filter(|(nb, bidx)| {
Some(*nb) != from_atom
&& !self.written[nb.0 as usize]
&& !self.ring_bonds.contains(bidx)
})
.map(|(nb, bidx)| (nb, self.mol.bond(bidx).order))
.collect();
let n = children.len();
for (i, (child, bond_order)) in children.into_iter().enumerate() {
let is_last = i == n - 1;
let parent_arom = self.mol.atom(atom).aromatic;
let child_arom = self.mol.atom(child).aromatic;
let implicit = match bond_order {
BondOrder::Single => !(parent_arom && child_arom), BondOrder::Aromatic => parent_arom && child_arom, _ => false,
};
let written_bond = if implicit { None } else { Some(bond_order) };
if !is_last {
self.out.push('(');
self.write_chain(child, Some(atom), written_bond);
self.out.push(')');
} else {
self.write_chain(child, Some(atom), written_bond);
}
}
}
fn emit_atom(&mut self, idx: AtomIdx) {
let atom = self.mol.atom(idx);
let needs_bracket = atom.isotope.is_some()
|| atom.charge != 0
|| atom.hydrogen_count.is_some()
|| !atom.element.is_organic_subset()
|| atom.atom_map.is_some();
if needs_bracket {
self.out.push('[');
if let Some(iso) = atom.isotope {
self.out.push_str(&iso.to_string());
}
let sym = if atom.aromatic {
atom.element.symbol().to_lowercase()
} else {
atom.element.symbol().to_string()
};
self.out.push_str(&sym);
match atom.chirality {
chematic_core::Chirality::CounterClockwise => self.out.push('@'),
chematic_core::Chirality::Clockwise => self.out.push_str("@@"),
chematic_core::Chirality::None => {}
}
if let Some(h) = atom.hydrogen_count {
if h > 0 {
self.out.push('H');
if h > 1 { self.out.push_str(&h.to_string()); }
}
}
match atom.charge {
0 => {}
1 => self.out.push('+'),
-1 => self.out.push('-'),
c if c > 0 => { self.out.push('+'); self.out.push_str(&c.to_string()); }
c => self.out.push_str(&c.to_string()),
}
if let Some(m) = atom.atom_map {
self.out.push(':');
self.out.push_str(&m.to_string());
}
self.out.push(']');
} else if atom.aromatic {
self.out.push_str(&atom.element.symbol().to_lowercase());
} else {
self.out.push_str(atom.element.symbol());
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parser::parse;
fn roundtrip(smiles: &str) {
let mol1 = parse(smiles).expect(smiles);
let out = write(&mol1);
let mol2 = parse(&out).unwrap_or_else(|e| {
panic!("roundtrip failed for '{}': wrote '{}', error: {e}", smiles, out)
});
assert_eq!(mol1.atom_count(), mol2.atom_count(),
"atom count mismatch: input='{}' output='{}'", smiles, out);
assert_eq!(mol1.bond_count(), mol2.bond_count(),
"bond count mismatch: input='{}' output='{}'", smiles, out);
}
#[test]
fn test_write_methane() { assert_eq!(write(&parse("C").unwrap()), "C"); }
#[test]
fn test_write_ethane() { assert_eq!(write(&parse("CC").unwrap()), "CC"); }
#[test]
fn test_roundtrip_propane() { roundtrip("CCC"); }
#[test]
fn test_roundtrip_isobutane() { roundtrip("CC(C)C"); }
#[test]
fn test_roundtrip_ethanol() { roundtrip("CCO"); }
#[test]
fn test_roundtrip_acetic_acid() { roundtrip("CC(=O)O"); }
#[test]
fn test_roundtrip_cyclohexane() { roundtrip("C1CCCCC1"); }
#[test]
fn test_roundtrip_benzene_kekule() { roundtrip("C1=CC=CC=C1"); }
#[test]
fn test_roundtrip_benzene_arom() { roundtrip("c1ccccc1"); }
#[test]
fn test_roundtrip_pyridine() { roundtrip("c1ccncc1"); }
#[test]
fn test_roundtrip_naphthalene() { roundtrip("c1ccc2ccccc2c1"); }
#[test]
fn test_roundtrip_chlorobenzene() { roundtrip("c1ccccc1Cl"); }
#[test]
fn test_roundtrip_13c() { roundtrip("[13C]"); }
#[test]
fn test_roundtrip_ammonium() { roundtrip("[NH4+]"); }
#[test]
fn test_roundtrip_disconnected() { roundtrip("[Na+].[Cl-]"); }
#[test]
fn test_roundtrip_aspirin() { roundtrip("CC(=O)Oc1ccccc1C(=O)O"); }
#[test]
fn test_roundtrip_caffeine() { roundtrip("Cn1cnc2c1c(=O)n(c(=O)n2C)C"); }
}