use crate::{AdjacencyList, Atom, Bond};
use glam::{DVec2, DVec3};
use std::collections::BTreeMap;
use std::sync::Arc;
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum SmilesParseError {
#[error("SMILES parser is not implemented yet")]
NotImplemented,
#[error("{0}")]
ParseError(String),
}
pub use crate::smiles_write::SmilesWriteError;
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum CoordinateDimension {
TwoD,
ThreeD,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct TopologyData {
pub atoms: Vec<Atom>,
pub bonds: Vec<Bond>,
pub adjacency: Option<AdjacencyList>,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct ConformerStore {
pub coords_2d: Option<Vec<DVec2>>,
pub conformers_3d: Vec<Vec<DVec3>>,
pub source_coordinate_dim: Option<CoordinateDimension>,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct PropertyStore {
pub name: Option<String>,
pub sdf_data_fields: Vec<(String, String)>,
pub props: BTreeMap<String, String>,
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Molecule {
topology: Arc<TopologyData>,
conformers: Arc<ConformerStore>,
props: Arc<PropertyStore>,
}
impl Molecule {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub(crate) fn from_owned_blocks(
topology: TopologyData,
conformers: ConformerStore,
props: PropertyStore,
) -> Self {
Self {
topology: Arc::new(topology),
conformers: Arc::new(conformers),
props: Arc::new(props),
}
}
pub fn add_atom(&mut self, atom: Atom) -> usize {
let index = self.atoms().len();
let mut atom = atom;
atom.index = index;
self.atoms_mut().push(atom);
self.clear_conformers();
self.clear_adjacency_cache();
index
}
pub fn add_bond(&mut self, bond: Bond) -> usize {
let index = self.bonds().len();
let mut bond = bond;
bond.index = index;
self.bonds_mut().push(bond);
self.clear_adjacency_cache();
index
}
pub fn rebuild_adjacency(&mut self) {
let adjacency = AdjacencyList::from_topology(self.atoms().len(), self.bonds());
self.topology_mut().adjacency = Some(adjacency);
}
pub fn from_smiles(smiles: &str) -> Result<Self, SmilesParseError> {
Self::from_smiles_with_sanitize(smiles, true)
}
pub fn from_smiles_with_sanitize(
smiles: &str,
sanitize: bool,
) -> Result<Self, SmilesParseError> {
crate::smiles::parse_smiles_with_sanitize(smiles, sanitize)
}
pub fn from_mol_block(block: &str) -> Result<Self, crate::io::sdf::SdfReadError> {
crate::io::molfile::read_mol_record_from_str(block).map(|record| record.molecule)
}
pub fn from_mol_file(
path: impl AsRef<std::path::Path>,
) -> Result<Self, crate::io::sdf::SdfReadError> {
crate::io::molfile::read_mol_file(path).map(|record| record.molecule)
}
pub fn to_smiles(&self, isomeric_smiles: bool) -> Result<String, SmilesWriteError> {
let params = crate::smiles_write::SmilesWriteParams {
do_isomeric_smiles: isomeric_smiles,
..Default::default()
};
self.to_smiles_with_params(¶ms)
}
pub fn to_smiles_with_params(
&self,
params: &crate::smiles_write::SmilesWriteParams,
) -> Result<String, SmilesWriteError> {
crate::smiles_write::mol_to_smiles(self, params)
}
pub fn compute_2d_coords(&mut self) -> Result<&mut Self, crate::io::molblock::MolWriteError> {
let coords = crate::io::molblock::compute_2d_coords(self)?;
let conformers = self.conformers_mut();
conformers.coords_2d = Some(coords.into_iter().map(|(x, y)| DVec2::new(x, y)).collect());
conformers.source_coordinate_dim = Some(CoordinateDimension::TwoD);
Ok(self)
}
pub fn with_hydrogens(&self) -> Result<Self, crate::hydrogens::AddHydrogensError> {
let mut out = self.clone();
crate::hydrogens::add_hydrogens_in_place(&mut out)?;
Ok(out)
}
pub fn without_hydrogens(&self) -> Result<Self, crate::hydrogens::RemoveHydrogensError> {
self.without_hydrogens_with_sanitize(true)
}
pub fn without_hydrogens_with_sanitize(
&self,
sanitize: bool,
) -> Result<Self, crate::hydrogens::RemoveHydrogensError> {
let mut out = self.clone();
crate::hydrogens::remove_hydrogens_with_sanitize_in_place(&mut out, sanitize)?;
Ok(out)
}
pub fn with_2d_coords(&self) -> Result<Self, crate::io::molblock::MolWriteError> {
let mut out = self.clone();
out.compute_2d_coords()?;
Ok(out)
}
pub fn with_kekulized_bonds(
&self,
sanitize: bool,
) -> Result<Self, crate::kekulize::KekulizeError> {
let mut out = self.clone();
crate::kekulize::kekulize_in_place(&mut out, sanitize)?;
Ok(out)
}
pub fn sanitize(&self) -> Result<Self, crate::sanitize::SanitizeError> {
self.sanitize_with_ops(crate::sanitize::SanitizeOps::SUPPORTED_ALL)
}
pub fn sanitize_with_ops(
&self,
ops: crate::sanitize::SanitizeOps,
) -> Result<Self, crate::sanitize::SanitizeError> {
let mut out = self.clone();
crate::sanitize::apply_sanitize_pipeline(&mut out, ops)?;
Ok(out)
}
#[must_use]
pub fn with_name(&self, name: impl Into<String>) -> Self {
let mut out = self.clone();
out.props_mut().name = Some(name.into());
out
}
#[must_use]
pub fn topology(&self) -> &TopologyData {
&self.topology
}
pub fn topology_mut(&mut self) -> &mut TopologyData {
Arc::make_mut(&mut self.topology)
}
#[must_use]
pub fn atoms(&self) -> &[Atom] {
&self.topology.atoms
}
pub fn atoms_mut(&mut self) -> &mut Vec<Atom> {
&mut self.topology_mut().atoms
}
#[must_use]
pub fn bonds(&self) -> &[Bond] {
&self.topology.bonds
}
pub fn bonds_mut(&mut self) -> &mut Vec<Bond> {
&mut self.topology_mut().bonds
}
#[must_use]
pub fn adjacency(&self) -> Option<&AdjacencyList> {
self.topology.adjacency.as_ref()
}
pub fn adjacency_mut(&mut self) -> &mut Option<AdjacencyList> {
&mut self.topology_mut().adjacency
}
pub fn clear_adjacency_cache(&mut self) {
self.topology_mut().adjacency = None;
}
#[must_use]
pub fn conformers(&self) -> &ConformerStore {
&self.conformers
}
pub fn conformers_mut(&mut self) -> &mut ConformerStore {
Arc::make_mut(&mut self.conformers)
}
pub fn coords_2d_mut(&mut self) -> &mut Option<Vec<DVec2>> {
&mut self.conformers_mut().coords_2d
}
pub fn set_coords_2d(&mut self, coords: Option<Vec<DVec2>>) {
self.conformers_mut().coords_2d = coords;
}
#[must_use]
pub fn conformers_3d(&self) -> &[Vec<DVec3>] {
&self.conformers.conformers_3d
}
pub fn conformers_3d_mut(&mut self) -> &mut Vec<Vec<DVec3>> {
&mut self.conformers_mut().conformers_3d
}
#[must_use]
pub fn source_coordinate_dim(&self) -> Option<CoordinateDimension> {
self.conformers.source_coordinate_dim
}
pub fn set_source_coordinate_dim(&mut self, coordinate_dim: Option<CoordinateDimension>) {
self.conformers_mut().source_coordinate_dim = coordinate_dim;
}
pub fn clear_conformers(&mut self) {
let conformers = self.conformers_mut();
conformers.coords_2d = None;
conformers.conformers_3d.clear();
conformers.source_coordinate_dim = None;
}
#[must_use]
pub fn props(&self) -> &PropertyStore {
&self.props
}
pub fn props_mut(&mut self) -> &mut PropertyStore {
Arc::make_mut(&mut self.props)
}
#[must_use]
pub fn prop(&self, key: &str) -> Option<&str> {
self.props.props.get(key).map(String::as_str)
}
#[must_use]
pub fn coords_2d(&self) -> Option<&[DVec2]> {
self.conformers.coords_2d.as_deref()
}
#[must_use]
pub fn coords_3d(&self) -> Option<&[DVec3]> {
self.conformers.conformers_3d.first().map(Vec::as_slice)
}
#[must_use]
pub fn conformer_3d(&self, index: usize) -> Option<&[DVec3]> {
self.conformers.conformers_3d.get(index).map(Vec::as_slice)
}
#[must_use]
pub fn num_3d_conformers(&self) -> usize {
self.conformers.conformers_3d.len()
}
#[must_use]
pub fn atomic_numbers(&self) -> Vec<u8> {
self.atoms().iter().map(|atom| atom.atomic_num).collect()
}
pub fn dg_bounds_matrix(&self) -> Result<Vec<Vec<f64>>, crate::DgBoundsError> {
crate::distgeom::dg_bounds_matrix(self)
}
pub fn morgan_fingerprint(
&self,
params: &crate::MorganFingerprintParams,
) -> Result<crate::Fingerprint, crate::FingerprintError> {
crate::fingerprint::morgan_fingerprint(self, params)
}
pub fn morgan_fingerprint_with_output(
&self,
params: &crate::MorganFingerprintParams,
) -> Result<crate::MorganFingerprintOutput, crate::FingerprintError> {
crate::fingerprint::morgan_fingerprint_with_output(self, params)
}
pub fn to_svg(&self, width: u32, height: u32) -> Result<String, crate::SvgDrawError> {
crate::draw::mol_to_svg(self, width, height)
}
pub fn to_png(&self, width: u32, height: u32) -> Result<Vec<u8>, crate::SvgDrawError> {
crate::draw::mol_to_png(self, width, height)
}
pub fn prepare_for_drawing_parity(
&self,
) -> Result<crate::PreparedDrawMolecule, crate::SvgDrawError> {
crate::draw::prepare_mol_for_drawing_parity(self)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{BondDirection, BondOrder, BondStereo, ChiralTag};
fn carbon(index: usize) -> Atom {
Atom {
index,
atomic_num: 6,
is_aromatic: false,
formal_charge: 0,
explicit_hydrogens: 0,
no_implicit: false,
num_radical_electrons: 0,
chiral_tag: ChiralTag::Unspecified,
isotope: None,
atom_map_num: None,
props: Default::default(),
query: None,
rdkit_cip_rank: None,
}
}
fn single_bond(begin_atom: usize, end_atom: usize) -> Bond {
Bond {
index: 0,
begin_atom,
end_atom,
order: BondOrder::Single,
is_aromatic: false,
direction: BondDirection::None,
stereo: BondStereo::None,
stereo_atoms: Vec::new(),
molfile_query_bond_code: None,
props: Default::default(),
query: None,
}
}
fn ethane_with_2d_coords() -> Molecule {
let mut mol = Molecule::new();
mol.add_atom(carbon(0));
mol.add_atom(carbon(1));
mol.add_bond(single_bond(0, 1));
mol.set_coords_2d(Some(vec![DVec2::new(0.0, 0.0), DVec2::new(1.0, 0.0)]));
mol
}
fn assert_coordinate_rows_match_atoms(mol: &Molecule) {
let atom_count = mol.atoms().len();
if let Some(coords) = mol.coords_2d() {
assert_eq!(coords.len(), atom_count, "2D coordinate row count mismatch");
}
for (idx, coords) in mol.conformers_3d().iter().enumerate() {
assert_eq!(
coords.len(),
atom_count,
"3D conformer {idx} coordinate row count mismatch"
);
}
}
fn interleaved_hydrogen_sdf() -> &'static str {
"interleaved_h
COSMolKit 3D
4 3 0 0 0 0 0 0 0 0999 V2000
0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0
1.0000 0.0000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
2.0000 0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0
3.0000 0.0000 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0
1 2 1 0
1 3 1 0
3 4 1 0
M END
$$$$
"
}
#[test]
fn clone_shares_storage_blocks_until_touched_block_mutates() {
let mol = ethane_with_2d_coords();
let cloned = mol.clone();
assert!(Arc::ptr_eq(&mol.topology, &cloned.topology));
assert!(Arc::ptr_eq(&mol.conformers, &cloned.conformers));
assert!(Arc::ptr_eq(&mol.props, &cloned.props));
let mut topology_edit = mol.clone();
topology_edit.atoms_mut()[0].formal_charge = 1;
assert!(!Arc::ptr_eq(&mol.topology, &topology_edit.topology));
assert!(Arc::ptr_eq(&mol.conformers, &topology_edit.conformers));
assert!(Arc::ptr_eq(&mol.props, &topology_edit.props));
let mut conformer_edit = mol.clone();
conformer_edit.set_coords_2d(None);
assert!(Arc::ptr_eq(&mol.topology, &conformer_edit.topology));
assert!(!Arc::ptr_eq(&mol.conformers, &conformer_edit.conformers));
assert!(Arc::ptr_eq(&mol.props, &conformer_edit.props));
let mut props_edit = mol.clone();
props_edit.props_mut().name = Some("ligand".to_owned());
assert!(Arc::ptr_eq(&mol.topology, &props_edit.topology));
assert!(Arc::ptr_eq(&mol.conformers, &props_edit.conformers));
assert!(!Arc::ptr_eq(&mol.props, &props_edit.props));
}
#[test]
fn value_coordinate_transform_detaches_only_conformers() {
let mut mol = Molecule::new();
mol.add_atom(carbon(0));
let with_coords = mol
.with_2d_coords()
.expect("single atom 2D coordinates should compute");
assert!(Arc::ptr_eq(&mol.topology, &with_coords.topology));
assert!(!Arc::ptr_eq(&mol.conformers, &with_coords.conformers));
assert!(Arc::ptr_eq(&mol.props, &with_coords.props));
assert!(mol.coords_2d().is_none());
assert!(with_coords.coords_2d().is_some());
}
#[test]
fn public_transforms_keep_coordinate_rows_aligned_with_atoms() {
let base_2d = Molecule::from_smiles("CCO")
.expect("SMILES should parse")
.with_2d_coords()
.expect("2D coordinates should compute");
assert_coordinate_rows_match_atoms(&base_2d);
assert_coordinate_rows_match_atoms(
&base_2d
.with_hydrogens()
.expect("hydrogens should add and clear stale coordinates"),
);
assert_coordinate_rows_match_atoms(
&base_2d
.sanitize()
.expect("sanitize should preserve coordinate alignment"),
);
let benzene = Molecule::from_smiles("c1ccccc1")
.expect("benzene should parse")
.with_2d_coords()
.expect("benzene 2D coordinates should compute");
assert_coordinate_rows_match_atoms(
&benzene
.with_kekulized_bonds(false)
.expect("kekulize should preserve coordinate alignment"),
);
let base_3d = crate::io::sdf::read_sdf_from_str(interleaved_hydrogen_sdf())
.expect("3D SDF should parse")
.molecule;
assert_coordinate_rows_match_atoms(&base_3d);
assert_coordinate_rows_match_atoms(
&base_3d
.without_hydrogens_with_sanitize(false)
.expect("hydrogens should remove"),
);
let base_both = base_3d
.with_2d_coords()
.expect("2D coordinates should compute while preserving 3D conformer");
assert_coordinate_rows_match_atoms(&base_both);
assert_coordinate_rows_match_atoms(
&base_both
.without_hydrogens_with_sanitize(false)
.expect("hydrogens should remove"),
);
}
#[test]
fn value_metadata_transform_detaches_only_props() {
let mol = ethane_with_2d_coords();
let named = mol.with_name("ligand");
assert!(Arc::ptr_eq(&mol.topology, &named.topology));
assert!(Arc::ptr_eq(&mol.conformers, &named.conformers));
assert!(!Arc::ptr_eq(&mol.props, &named.props));
assert_eq!(mol.props().name, None);
assert_eq!(named.props().name.as_deref(), Some("ligand"));
}
#[test]
fn value_hydrogen_transform_detaches_topology_only() {
let mol = Molecule::from_smiles("CCO")
.expect("SMILES should parse")
.with_name("ethanol")
.with_2d_coords()
.expect("2D coordinates should compute")
.with_hydrogens()
.expect("hydrogens should add");
let without_h = mol.without_hydrogens().expect("hydrogens should remove");
assert!(!Arc::ptr_eq(&mol.topology, &without_h.topology));
assert!(Arc::ptr_eq(&mol.conformers, &without_h.conformers));
assert!(Arc::ptr_eq(&mol.props, &without_h.props));
assert!(mol.atoms().len() > without_h.atoms().len());
assert_eq!(mol.props().name, without_h.props().name);
}
#[test]
fn reassignment_drops_replaced_topology_block() {
let mut mol = Molecule::from_smiles("CCO")
.expect("SMILES should parse")
.with_hydrogens()
.expect("hydrogens should add");
let old_topology: std::sync::Weak<TopologyData> = Arc::downgrade(&mol.topology);
mol = mol.without_hydrogens().expect("hydrogens should remove");
assert!(old_topology.upgrade().is_none());
assert_eq!(mol.atoms().len(), 3);
}
#[test]
fn retaining_old_and_new_keeps_both_topology_blocks() {
let mol = Molecule::from_smiles("CCO")
.expect("SMILES should parse")
.with_hydrogens()
.expect("hydrogens should add");
let old_topology: std::sync::Weak<TopologyData> = Arc::downgrade(&mol.topology);
let without_h = mol.without_hydrogens().expect("hydrogens should remove");
assert!(old_topology.upgrade().is_some());
assert!(mol.atoms().len() > without_h.atoms().len());
}
}