use crate::{
AdjacencyList, Atom, AtomSpec, Bond, BondSpec, Molecule, MoleculeBuilder,
error::MoleculeBuildError,
};
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum FragmentError {
#[error("molecule has no atoms")]
EmptyMolecule,
#[error("failed to build fragment molecule: {0}")]
BuildError(#[from] MoleculeBuildError),
}
fn find_connected_components(mol: &Molecule) -> Vec<usize> {
let num_atoms = mol.num_atoms();
if num_atoms == 0 {
return Vec::new();
}
let adjacency = AdjacencyList::from_topology(num_atoms, mol.bonds());
let mut component_of = vec![usize::MAX; num_atoms];
let mut next_component: usize = 0;
for start in 0..num_atoms {
if component_of[start] != usize::MAX {
continue;
}
let mut stack: Vec<usize> = Vec::new();
stack.push(start);
component_of[start] = next_component;
while let Some(atom_idx) = stack.pop() {
for neighbor_ref in adjacency.neighbors_of(atom_idx) {
let nbr = neighbor_ref.atom_index;
if component_of[nbr] == usize::MAX {
component_of[nbr] = next_component;
stack.push(nbr);
}
}
}
next_component += 1;
}
component_of
}
pub fn get_fragment_atom_mapping(mol: &Molecule) -> Vec<usize> {
find_connected_components(mol)
}
pub fn get_num_fragments(mol: &Molecule) -> usize {
let component_of = find_connected_components(mol);
if component_of.is_empty() {
return 0;
}
*component_of.iter().max().unwrap_or(&0) + 1
}
fn group_atoms_by_fragment(mol: &Molecule) -> Vec<Vec<usize>> {
let component_of = find_connected_components(mol);
if component_of.is_empty() {
return Vec::new();
}
let num_fragments = get_num_fragments(mol);
let mut frags: Vec<Vec<usize>> = vec![Vec::new(); num_fragments];
for (atom_idx, frag_idx) in component_of.into_iter().enumerate() {
frags[frag_idx].push(atom_idx);
}
frags
}
fn build_fragment_molecule(
mol: &Molecule,
fragment_atoms: &[usize],
copy_conformers: bool,
) -> Result<Molecule, MoleculeBuildError> {
let mut builder = MoleculeBuilder::new();
let num_frag_atoms = fragment_atoms.len();
let mut old_to_new = vec![usize::MAX; mol.num_atoms()];
for (new_idx, old_idx) in fragment_atoms.iter().enumerate() {
old_to_new[*old_idx] = new_idx;
}
for old_idx in fragment_atoms {
let atom = mol
.atoms()
.get(*old_idx)
.expect("fragment atom index out of range");
let spec = atom_to_spec(atom);
builder.add_atom(spec);
}
for bond in mol.bonds() {
let begin_old = bond.begin().index();
let end_old = bond.end().index();
if old_to_new[begin_old] != usize::MAX && old_to_new[end_old] != usize::MAX {
let begin_new = AtomId_usize(old_to_new[begin_old]);
let end_new = AtomId_usize(old_to_new[end_old]);
let stereo_atoms = bond.stereo_atoms().map(|[sa, sb]| {
(
AtomId_usize(old_to_new[sa.index()]),
AtomId_usize(old_to_new[sb.index()]),
)
});
let mut spec = BondSpec::new(begin_new, end_new, bond.order())
.with_aromatic(bond.is_aromatic())
.with_conjugated(bond.is_conjugated())
.with_direction(bond.direction())
.with_stereo(bond.stereo());
if let Some((sa, sb)) = stereo_atoms {
spec = spec.with_stereo_atoms(sa, sb);
}
for (key, value) in bond.props() {
spec = spec.with_prop(key.clone(), value.clone());
}
builder.add_bond(spec)?;
}
}
if copy_conformers {
if let Some(coords_2d) = mol.coords_2d() {
let frag_coords: Vec<[f64; 2]> = fragment_atoms
.iter()
.filter_map(|old_idx| coords_2d.get(*old_idx).copied())
.collect();
if frag_coords.len() == num_frag_atoms {
builder.set_2d_coordinates(frag_coords)?;
}
}
for conformer in mol.conformers_3d() {
let frag_coords: Vec<[f64; 3]> = fragment_atoms
.iter()
.filter_map(|old_idx| conformer.coords().get(*old_idx).copied())
.collect();
if frag_coords.len() == num_frag_atoms {
builder.add_3d_conformer(frag_coords)?;
}
}
}
builder.build()
}
fn atom_to_spec(atom: &Atom) -> AtomSpec {
let mut spec = AtomSpec::new(atom.element())
.with_formal_charge(atom.formal_charge())
.with_explicit_hydrogens(atom.explicit_hydrogens())
.with_chiral_tag(atom.chiral_tag())
.with_aromatic(atom.is_aromatic())
.with_no_implicit(atom.no_implicit())
.with_radical_electrons(atom.radical_electrons())
.with_hybridization(atom.hybridization());
if let Some(isotope) = atom.isotope() {
spec = spec.with_isotope(isotope);
}
if let Some(atom_map) = atom.atom_map() {
spec = spec.with_atom_map(atom_map);
}
if let Some(chiral_perm) = atom.chiral_permutation() {
spec = spec.with_chiral_permutation(chiral_perm);
}
if atom.unknown_stereo() {
spec = spec.with_unknown_stereo(true);
}
if let Some(mol_parity) = atom.mol_parity() {
spec = spec.with_mol_parity(mol_parity);
}
if let Some(mol_inv) = atom.mol_inversion_flag() {
spec = spec.with_mol_inversion_flag(mol_inv);
}
if atom.implicit_hydrogen() {
spec = spec.with_implicit_hydrogen(true);
}
let tracked: Vec<u16> = atom.tracked_isotopic_hydrogens().to_vec();
if !tracked.is_empty() {
spec = spec.with_tracked_isotopic_hydrogens(tracked);
}
if let Some(query) = atom.query() {
spec = spec.with_query(query.clone());
}
for (key, value) in atom.props() {
spec = spec.with_prop(key.clone(), value.clone());
}
if let Some(pdb_info) = atom.pdb_residue_info() {
spec = spec.with_pdb_residue_info(pdb_info.clone());
}
spec
}
fn AtomId_usize(index: usize) -> crate::AtomId {
crate::AtomId::new(index)
}
pub fn get_mol_frags(mol: &Molecule) -> Result<Vec<Molecule>, FragmentError> {
if mol.num_atoms() == 0 {
return Err(FragmentError::EmptyMolecule);
}
let frag_groups = group_atoms_by_fragment(mol);
if frag_groups.len() == 1 {
return Ok(vec![mol.clone()]);
}
let mut result = Vec::with_capacity(frag_groups.len());
for group in &frag_groups {
let fragment = build_fragment_molecule(mol, group, true)?;
result.push(fragment);
}
Ok(result)
}
pub fn get_largest_fragment(mol: &Molecule) -> Result<Molecule, FragmentError> {
let fragments = get_mol_frags(mol)?;
fragments
.into_iter()
.max_by_key(|frag| frag.num_atoms())
.ok_or(FragmentError::EmptyMolecule)
}