use crate::atom::Atom;
use crate::bond::{BondEntry, BondOrder};
use crate::element::Element;
use crate::stereo_group::StereoGroup;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct AtomIdx(pub u32);
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct BondIdx(pub u32);
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum MolError {
InvalidAtomIdx(AtomIdx),
DuplicateBond(AtomIdx, AtomIdx),
}
impl core::fmt::Display for MolError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::InvalidAtomIdx(idx) => write!(f, "invalid atom index: {}", idx.0),
Self::DuplicateBond(a, b) => write!(f, "duplicate bond between atoms {} and {}", a.0, b.0),
}
}
}
pub struct Molecule {
atoms: Vec<Atom>,
bonds: Vec<BondEntry>,
adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
stereo_groups: Vec<StereoGroup>,
}
impl Molecule {
pub fn atom_count(&self) -> usize {
self.atoms.len()
}
pub fn bond_count(&self) -> usize {
self.bonds.len()
}
pub fn atom(&self, idx: AtomIdx) -> &Atom {
&self.atoms[idx.0 as usize]
}
pub fn bond(&self, idx: BondIdx) -> &BondEntry {
&self.bonds[idx.0 as usize]
}
pub fn atoms(&self) -> impl Iterator<Item = (AtomIdx, &Atom)> {
self.atoms
.iter()
.enumerate()
.map(|(i, a)| (AtomIdx(i as u32), a))
}
pub fn bonds(&self) -> impl Iterator<Item = (BondIdx, &BondEntry)> {
self.bonds
.iter()
.enumerate()
.map(|(i, b)| (BondIdx(i as u32), b))
}
pub fn neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (AtomIdx, BondIdx)> + '_ {
self.adjacency[idx.0 as usize].iter().copied()
}
pub fn degree(&self, idx: AtomIdx) -> usize {
self.adjacency[idx.0 as usize].len()
}
pub fn bond_between(&self, a: AtomIdx, b: AtomIdx) -> Option<(BondIdx, &BondEntry)> {
self.adjacency[a.0 as usize]
.iter()
.find(|&&(nb, _)| nb == b)
.map(|&(_, bidx)| (bidx, &self.bonds[bidx.0 as usize]))
}
pub fn formula(&self) -> String {
use std::collections::BTreeMap;
let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
for (_, atom) in self.atoms() {
*counts.entry(atom.element.symbol()).or_insert(0) += 1;
}
let mut result = String::new();
let push_count = |sym: &str, n: u32, out: &mut String| {
out.push_str(sym);
if n > 1 {
out.push_str(&n.to_string());
}
};
if let Some(c) = counts.remove("C") {
push_count("C", c, &mut result);
}
if let Some(h) = counts.remove("H") {
push_count("H", h, &mut result);
}
for (sym, count) in &counts {
push_count(sym, *count, &mut result);
}
result
}
}
impl Molecule {
pub fn with_atom_added(&self, atom: Atom) -> (Molecule, AtomIdx) {
let mut builder = MoleculeBuilder::new();
for (_, a) in self.atoms() {
builder.add_atom(a.clone());
}
for (_, b) in self.bonds() {
let _ = builder.add_bond(b.atom1, b.atom2, b.order);
}
let new_idx = builder.add_atom(atom);
(builder.build(), new_idx)
}
pub fn with_bond_added(
&self,
a: AtomIdx,
b: AtomIdx,
order: BondOrder,
) -> Result<(Molecule, BondIdx), MolError> {
let mut builder = MoleculeBuilder::new();
for (_, atom) in self.atoms() {
builder.add_atom(atom.clone());
}
for (_, bond) in self.bonds() {
let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
}
let bond_idx = builder.add_bond(a, b, order)?;
Ok((builder.build(), bond_idx))
}
pub fn with_atom_charge(&self, idx: AtomIdx, charge: i8) -> Molecule {
let mut builder = MoleculeBuilder::new();
for (aidx, atom) in self.atoms() {
let mut a = atom.clone();
if aidx == idx { a.charge = charge; }
builder.add_atom(a);
}
for (_, bond) in self.bonds() {
let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
}
builder.build()
}
pub fn with_atom_element(&self, idx: AtomIdx, el: Element) -> Molecule {
let mut builder = MoleculeBuilder::new();
for (aidx, atom) in self.atoms() {
let mut a = atom.clone();
if aidx == idx {
a.element = el;
a.chirality = crate::atom::Chirality::None;
a.hydrogen_count = None;
a.aromatic = false;
}
builder.add_atom(a);
}
for (_, bond) in self.bonds() {
let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
}
builder.build()
}
pub fn with_atom_removed(&self, idx: AtomIdx) -> (Molecule, Vec<Option<AtomIdx>>) {
let n = self.atom_count();
let removed = idx.0 as usize;
let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
let mut new_pos = 0u32;
for old in 0..n {
if old == removed {
continue;
}
remap[old] = Some(AtomIdx(new_pos));
new_pos += 1;
}
let mut builder = MoleculeBuilder::new();
for (aidx, atom) in self.atoms() {
if aidx == idx { continue; }
builder.add_atom(atom.clone());
}
for (_, bond) in self.bonds() {
if bond.atom1 == idx || bond.atom2 == idx { continue; }
if let (Some(a1), Some(a2)) = (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize]) {
let _ = builder.add_bond(a1, a2, bond.order);
}
}
(builder.build(), remap)
}
pub fn implicit_hydrogen_count(&self, idx: AtomIdx) -> u8 {
crate::valence::implicit_hcount(self, idx)
}
pub fn total_formula(&self) -> String {
use std::collections::BTreeMap;
let mut counts: BTreeMap<&str, u32> = BTreeMap::new();
let mut implicit_h: u32 = 0;
for (aidx, atom) in self.atoms() {
*counts.entry(atom.element.symbol()).or_insert(0) += 1;
implicit_h += crate::valence::implicit_hcount(self, aidx) as u32;
}
*counts.entry("H").or_insert(0) += implicit_h;
let mut result = String::new();
let push_count = |sym: &str, n: u32, out: &mut String| {
out.push_str(sym);
if n > 1 {
out.push_str(&n.to_string());
}
};
if let Some(c) = counts.remove("C") {
push_count("C", c, &mut result);
}
if let Some(h) = counts.remove("H") {
if h > 0 {
push_count("H", h, &mut result);
}
}
for (sym, count) in &counts {
push_count(sym, *count, &mut result);
}
result
}
pub fn formula_with_isotopes(&self) -> String {
use std::collections::BTreeMap;
let mut counts: BTreeMap<String, u32> = BTreeMap::new();
let mut has_carbon = false;
let mut has_explicit_h = false;
for (_, atom) in self.atoms() {
let sym = atom.element.symbol();
let key = match atom.isotope {
Some(n) => format!("{n}{sym}"),
None => sym.to_string(),
};
if sym == "C" && atom.isotope.is_none() { has_carbon = true; }
if sym == "H" { has_explicit_h = true; }
*counts.entry(key).or_insert(0) += 1;
}
let push_count = |key: &str, n: u32, out: &mut String| {
out.push_str(key);
if n > 1 { out.push_str(&n.to_string()); }
};
let mut result = String::new();
if has_carbon {
if let Some(c) = counts.remove("C") {
push_count("C", c, &mut result);
}
}
if has_explicit_h {
if let Some(h) = counts.remove("H") {
push_count("H", h, &mut result);
}
}
for (key, count) in &counts {
push_count(key, *count, &mut result);
}
result
}
pub fn with_atom_aromatic(&self, idx: AtomIdx, aromatic: bool) -> Molecule {
let mut builder = MoleculeBuilder::new();
for (aidx, atom) in self.atoms() {
let mut a = atom.clone();
if aidx == idx { a.aromatic = aromatic; }
builder.add_atom(a);
}
for (_, bond) in self.bonds() {
let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
}
builder.build()
}
pub fn with_bond_order(&self, idx: BondIdx, order: BondOrder) -> Molecule {
let mut builder = MoleculeBuilder::new();
for (_, atom) in self.atoms() {
builder.add_atom(atom.clone());
}
for (bidx, bond) in self.bonds() {
let o = if bidx == idx { order } else { bond.order };
let _ = builder.add_bond(bond.atom1, bond.atom2, o);
}
builder.build()
}
pub fn with_bond_removed(&self, idx: BondIdx) -> Molecule {
let mut builder = MoleculeBuilder::new();
for (_, atom) in self.atoms() {
builder.add_atom(atom.clone());
}
for (bidx, bond) in self.bonds() {
if bidx == idx { continue; }
let _ = builder.add_bond(bond.atom1, bond.atom2, bond.order);
}
builder.build()
}
}
impl Molecule {
pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
let idx = AtomIdx(self.atoms.len() as u32);
self.atoms.push(atom);
self.adjacency.push(vec![]);
idx
}
pub fn remove_atom(&mut self, idx: AtomIdx) -> Vec<Option<AtomIdx>> {
let n = self.atoms.len();
let removed = idx.0 as usize;
let mut remap: Vec<Option<AtomIdx>> = vec![None; n];
let mut new_pos = 0u32;
for old in 0..n {
if old == removed { continue; }
remap[old] = Some(AtomIdx(new_pos));
new_pos += 1;
}
self.atoms.remove(removed);
let mut new_bonds: Vec<BondEntry> = Vec::new();
for bond in &self.bonds {
if bond.atom1 == idx || bond.atom2 == idx { continue; }
if let (Some(a1), Some(a2)) = (remap[bond.atom1.0 as usize], remap[bond.atom2.0 as usize]) {
new_bonds.push(BondEntry { atom1: a1, atom2: a2, order: bond.order });
}
}
self.bonds = new_bonds;
let new_n = self.atoms.len();
self.adjacency = vec![vec![]; new_n];
for (bidx, bond) in self.bonds.iter().enumerate() {
let bi = BondIdx(bidx as u32);
self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
}
remap
}
pub fn add_bond(&mut self, a: AtomIdx, b: AtomIdx, order: BondOrder) -> Result<BondIdx, MolError> {
let n = self.atoms.len() as u32;
if a.0 >= n { return Err(MolError::InvalidAtomIdx(a)); }
if b.0 >= n { return Err(MolError::InvalidAtomIdx(b)); }
if self.adjacency[a.0 as usize].iter().any(|&(nb, _)| nb == b) {
return Err(MolError::DuplicateBond(a, b));
}
let bidx = BondIdx(self.bonds.len() as u32);
self.bonds.push(BondEntry { atom1: a, atom2: b, order });
self.adjacency[a.0 as usize].push((b, bidx));
self.adjacency[b.0 as usize].push((a, bidx));
Ok(bidx)
}
pub fn remove_bond(&mut self, idx: BondIdx) {
let removed = idx.0 as usize;
if removed >= self.bonds.len() { return; }
self.bonds.remove(removed);
let n = self.atoms.len();
self.adjacency = vec![vec![]; n];
for (bidx, bond) in self.bonds.iter().enumerate() {
let bi = BondIdx(bidx as u32);
self.adjacency[bond.atom1.0 as usize].push((bond.atom2, bi));
self.adjacency[bond.atom2.0 as usize].push((bond.atom1, bi));
}
}
pub fn set_charge(&mut self, idx: AtomIdx, charge: i8) {
self.atoms[idx.0 as usize].charge = charge;
}
pub fn set_element(&mut self, idx: AtomIdx, el: Element) {
let a = &mut self.atoms[idx.0 as usize];
a.element = el;
a.chirality = crate::atom::Chirality::None;
a.hydrogen_count = None;
a.aromatic = false;
}
pub fn set_cip_code(&mut self, idx: AtomIdx, code: Option<crate::atom::CipCode>) {
self.atoms[idx.0 as usize].cip_code = code;
}
pub fn stereo_groups(&self) -> &[StereoGroup] {
&self.stereo_groups
}
pub fn set_stereo_groups(&mut self, groups: Vec<StereoGroup>) {
self.stereo_groups = groups;
}
pub fn add_stereo_group(&mut self, group: StereoGroup) {
self.stereo_groups.push(group);
}
}
impl Molecule {
pub fn is_connected(&self) -> bool {
let n = self.atoms.len();
if n == 0 { return true; }
let mut visited = vec![false; n];
let mut stack = vec![AtomIdx(0)];
visited[0] = true;
let mut count = 1;
while let Some(cur) = stack.pop() {
for (nb, _) in self.neighbors(cur) {
if !visited[nb.0 as usize] {
visited[nb.0 as usize] = true;
count += 1;
stack.push(nb);
}
}
}
count == n
}
pub fn fragments(&self) -> Vec<Molecule> {
let n = self.atoms.len();
if n == 0 { return vec![]; }
let mut component: Vec<usize> = vec![usize::MAX; n];
let mut comp_id = 0;
for start in 0..n {
if component[start] != usize::MAX { continue; }
let mut stack = vec![start];
component[start] = comp_id;
while let Some(cur) = stack.pop() {
for (nb, _) in self.neighbors(AtomIdx(cur as u32)) {
let ni = nb.0 as usize;
if component[ni] == usize::MAX {
component[ni] = comp_id;
stack.push(ni);
}
}
}
comp_id += 1;
}
(0..comp_id).map(|cid| {
let mut builder = MoleculeBuilder::new();
let mut old_to_new: std::collections::HashMap<AtomIdx, AtomIdx> = std::collections::HashMap::new();
for (aidx, atom) in self.atoms() {
if component[aidx.0 as usize] == cid {
let new_idx = builder.add_atom(atom.clone());
old_to_new.insert(aidx, new_idx);
}
}
for (_, bond) in self.bonds() {
if let (Some(&a1), Some(&a2)) = (old_to_new.get(&bond.atom1), old_to_new.get(&bond.atom2)) {
let _ = builder.add_bond(a1, a2, bond.order);
}
}
builder.build()
}).collect()
}
}
#[derive(Default)]
pub struct MoleculeBuilder {
atoms: Vec<Atom>,
bonds: Vec<BondEntry>,
adjacency: Vec<Vec<(AtomIdx, BondIdx)>>,
stereo_groups: Vec<StereoGroup>,
}
impl MoleculeBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn from_molecule(mol: &Molecule) -> Self {
let mut b = Self::new();
for (_, atom) in mol.atoms() {
b.add_atom(atom.clone());
}
for (_, bond) in mol.bonds() {
let _ = b.add_bond(bond.atom1, bond.atom2, bond.order);
}
b.stereo_groups = mol.stereo_groups.clone();
b
}
pub fn add_stereo_group(&mut self, group: StereoGroup) {
self.stereo_groups.push(group);
}
pub fn atom_at(&self, idx: AtomIdx) -> &Atom {
&self.atoms[idx.0 as usize]
}
pub fn atom_count(&self) -> usize {
self.atoms.len()
}
pub fn atom_neighbors(&self, idx: AtomIdx) -> impl Iterator<Item = (BondIdx, AtomIdx)> + '_ {
self.adjacency[idx.0 as usize].iter().map(|&(nb, bidx)| (bidx, nb))
}
pub fn add_atom(&mut self, atom: Atom) -> AtomIdx {
let idx = AtomIdx(self.atoms.len() as u32);
self.atoms.push(atom);
self.adjacency.push(Vec::new());
idx
}
pub fn add_bond(&mut self, a: AtomIdx, b: AtomIdx, order: BondOrder) -> Result<BondIdx, MolError> {
let n = self.atoms.len() as u32;
if a.0 >= n { return Err(MolError::InvalidAtomIdx(a)); }
if b.0 >= n { return Err(MolError::InvalidAtomIdx(b)); }
for &(nb, _) in &self.adjacency[a.0 as usize] {
if nb == b {
return Err(MolError::DuplicateBond(a, b));
}
}
let bidx = BondIdx(self.bonds.len() as u32);
self.bonds.push(BondEntry { atom1: a, atom2: b, order });
self.adjacency[a.0 as usize].push((b, bidx));
self.adjacency[b.0 as usize].push((a, bidx));
Ok(bidx)
}
pub fn build(self) -> Molecule {
Molecule {
atoms: self.atoms,
bonds: self.bonds,
adjacency: self.adjacency,
stereo_groups: self.stereo_groups,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::atom::Atom;
use crate::element::Element;
fn ethane() -> Molecule {
let mut b = MoleculeBuilder::new();
let c1 = b.add_atom(Atom::new(Element::C));
let c2 = b.add_atom(Atom::new(Element::C));
b.add_bond(c1, c2, BondOrder::Single).unwrap();
b.build()
}
#[test]
fn test_basic_molecule() {
let mol = ethane();
assert_eq!(mol.atom_count(), 2);
assert_eq!(mol.bond_count(), 1);
}
#[test]
fn test_adjacency() {
let mol = ethane();
let neighbors: Vec<_> = mol.neighbors(AtomIdx(0)).collect();
assert_eq!(neighbors.len(), 1);
assert_eq!(neighbors[0].0, AtomIdx(1));
}
#[test]
fn test_bond_between() {
let mol = ethane();
assert!(mol.bond_between(AtomIdx(0), AtomIdx(1)).is_some());
assert!(mol.bond_between(AtomIdx(1), AtomIdx(0)).is_some());
}
#[test]
fn test_duplicate_bond_error() {
let mut b = MoleculeBuilder::new();
let c1 = b.add_atom(Atom::new(Element::C));
let c2 = b.add_atom(Atom::new(Element::C));
b.add_bond(c1, c2, BondOrder::Single).unwrap();
let err = b.add_bond(c1, c2, BondOrder::Double);
assert!(matches!(err, Err(MolError::DuplicateBond(_, _))));
}
#[test]
fn test_formula() {
let mut b = MoleculeBuilder::new();
let c = b.add_atom(Atom::new(Element::C));
let n = b.add_atom(Atom::new(Element::N));
b.add_bond(c, n, BondOrder::Single).unwrap();
let mol = b.build();
assert_eq!(mol.formula(), "CN");
}
#[test]
fn test_implicit_hydrogen_count() {
let mut b = MoleculeBuilder::new();
b.add_atom(Atom::organic(Element::C));
let mol = b.build();
assert_eq!(mol.implicit_hydrogen_count(AtomIdx(0)), 4);
}
#[test]
fn test_total_formula_methane() {
let mut b = MoleculeBuilder::new();
b.add_atom(Atom::organic(Element::C));
let mol = b.build();
assert_eq!(mol.total_formula(), "CH4");
}
#[test]
fn test_total_formula_no_hydrogen() {
let mut b = MoleculeBuilder::new();
let na = b.add_atom(Atom::new(Element::NA));
let cl = b.add_atom(Atom::new(Element::CL));
b.add_bond(na, cl, BondOrder::Single).unwrap();
let mol = b.build();
assert_eq!(mol.total_formula(), "ClNa");
}
#[test]
fn test_with_atom_aromatic() {
let mol = ethane();
let updated = mol.with_atom_aromatic(AtomIdx(0), true);
assert!(updated.atom(AtomIdx(0)).aromatic);
assert!(!updated.atom(AtomIdx(1)).aromatic);
}
#[test]
fn test_with_bond_order() {
let mol = ethane();
let updated = mol.with_bond_order(BondIdx(0), BondOrder::Double);
assert_eq!(updated.bond(BondIdx(0)).order, BondOrder::Double);
}
#[test]
fn test_add_remove_atom() {
let mut mol = ethane();
let n_idx = mol.add_atom(Atom::new(Element::N));
assert_eq!(mol.atom_count(), 3);
assert_eq!(mol.atom(n_idx).element.atomic_number(), 7);
let remap = mol.remove_atom(n_idx);
assert_eq!(mol.atom_count(), 2);
assert!(remap[n_idx.0 as usize].is_none());
}
#[test]
fn test_add_remove_bond() {
let mut mol = ethane();
let n_idx = mol.add_atom(Atom::new(Element::N));
let bidx = mol.add_bond(AtomIdx(0), n_idx, BondOrder::Single).unwrap();
assert_eq!(mol.bond_count(), 2);
mol.remove_bond(bidx);
assert_eq!(mol.bond_count(), 1);
}
#[test]
fn test_set_charge_element() {
let mut mol = ethane();
mol.set_charge(AtomIdx(0), 1);
assert_eq!(mol.atom(AtomIdx(0)).charge, 1);
mol.set_element(AtomIdx(0), Element::N);
assert_eq!(mol.atom(AtomIdx(0)).element.atomic_number(), 7);
}
#[test]
fn test_is_connected() {
let mol = ethane();
assert!(mol.is_connected());
let mut b = MoleculeBuilder::new();
b.add_atom(Atom::new(Element::C));
b.add_atom(Atom::new(Element::N));
let disconnected = b.build();
assert!(!disconnected.is_connected());
}
#[test]
fn test_fragments() {
let mut b = MoleculeBuilder::new();
let c1 = b.add_atom(Atom::organic(Element::C));
let c2 = b.add_atom(Atom::organic(Element::C));
b.add_bond(c1, c2, BondOrder::Single).unwrap();
b.add_atom(Atom::new(Element::N)); let mol = b.build();
let frags = mol.fragments();
assert_eq!(frags.len(), 2);
let sizes: std::collections::HashSet<usize> = frags.iter().map(|f| f.atom_count()).collect();
assert!(sizes.contains(&2));
assert!(sizes.contains(&1));
}
#[test]
fn test_builder_from_molecule() {
let mol = ethane();
let mut b = MoleculeBuilder::from_molecule(&mol);
b.add_atom(Atom::new(Element::O));
let mol2 = b.build();
assert_eq!(mol2.atom_count(), 3);
assert_eq!(mol2.bond_count(), 1); }
}