use std::collections::HashSet;
use crate::{
AtomId, BondId, BondStereo, ChiralTag, Molecule, stereo::assign_atom_cip_ranks,
stereo::is_atom_potential_chiral_center,
};
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
pub enum EnumerationError {
#[error("no stereo centers found in molecule")]
NoStereoCenters,
#[error("too many stereo centers ({0}), maximum supported is 20")]
TooManyCenters(usize),
#[error("too many E/Z bonds ({0}), maximum supported is 20")]
TooManyBonds(usize),
#[error("failed to generate isomer at configuration index {0}")]
IsomerGenerationFailed(usize),
#[error("stereo error: {0}")]
StereoError(#[from] crate::StereoError),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EnumerationStrategy {
Default,
Random,
Symmetry,
}
#[derive(Debug, Clone)]
pub struct EnumerationParams {
pub strategy: EnumerationStrategy,
pub max_isomers: u64,
pub only_unique: bool,
pub sample_seed: i32,
pub max_tries: u64,
}
impl Default for EnumerationParams {
fn default() -> Self {
Self {
strategy: EnumerationStrategy::Default,
max_isomers: 0,
only_unique: true,
sample_seed: -1,
max_tries: 1_000_000,
}
}
}
struct SimpleRng(u64);
impl SimpleRng {
fn new(seed: u64) -> Self {
let mut state = seed;
state = (state ^ (state >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
state = (state ^ (state >> 27)).wrapping_mul(0x94d049bb133111eb);
state ^= state >> 31;
SimpleRng(state)
}
fn next_u64(&mut self) -> u64 {
self.0 ^= self.0 >> 12;
self.0 ^= self.0 << 25;
self.0 ^= self.0 >> 27;
self.0.wrapping_mul(0x2545f4914f6cdd1d)
}
fn next_bool(&mut self) -> bool {
self.next_u64() & 1 == 1
}
}
#[derive(Debug, Clone, Copy)]
struct TetrahedralCenter {
atom: AtomId,
}
#[derive(Debug, Clone, Copy)]
struct StereoBond {
bond: BondId,
begin: AtomId,
end: AtomId,
}
fn find_tetrahedral_centers(mol: &Molecule) -> Vec<TetrahedralCenter> {
let ranks = assign_atom_cip_ranks(mol).unwrap_or_default();
mol.atoms()
.iter()
.filter_map(|atom| {
let idx = atom.id().index();
let tag = atom.chiral_tag();
let explicit_tagged =
tag == ChiralTag::TetrahedralCw || tag == ChiralTag::TetrahedralCcw;
let (legal_center, has_dupes, _) = is_atom_potential_chiral_center(mol, idx, &ranks);
if legal_center && !has_dupes && (explicit_tagged || !ranks.is_empty()) {
Some(TetrahedralCenter { atom: atom.id() })
} else {
None
}
})
.collect()
}
fn find_stereo_bonds(mol: &Molecule) -> Vec<StereoBond> {
mol.bonds()
.iter()
.filter(|bond| {
let order = bond.order();
if order != crate::BondOrder::Double && order != crate::BondOrder::Aromatic {
return false;
}
let stereo = bond.stereo();
matches!(
stereo,
BondStereo::E | BondStereo::Z | BondStereo::Any | BondStereo::None
)
})
.filter(|bond| {
let begin = bond.begin();
let end = bond.end();
let begin_nbrs: Vec<AtomId> = mol
.bonds()
.iter()
.filter(|b| (b.begin() == begin || b.end() == begin) && b.id() != bond.id())
.map(|b| {
if b.begin() == begin {
b.end()
} else {
b.begin()
}
})
.collect();
let end_nbrs: Vec<AtomId> = mol
.bonds()
.iter()
.filter(|b| (b.begin() == end || b.end() == end) && b.id() != bond.id())
.map(|b| if b.begin() == end { b.end() } else { b.begin() })
.collect();
begin_nbrs.len() >= 2 && end_nbrs.len() >= 2
})
.map(|bond| StereoBond {
bond: bond.id(),
begin: bond.begin(),
end: bond.end(),
})
.collect()
}
fn generate_combinations(n: usize) -> Vec<Vec<bool>> {
if n == 0 {
return vec![vec![]];
}
let count = 1usize << n;
let mut result = Vec::with_capacity(count);
for i in 0..count {
let mut combo = Vec::with_capacity(n);
for j in 0..n {
combo.push((i >> j) & 1 == 1);
}
result.push(combo);
}
result
}
fn build_tetrahedral_isomer(
base: &Molecule,
centers: &[TetrahedralCenter],
config: &[bool],
) -> Result<Molecule, EnumerationError> {
let mut isomer = base.clone();
for (center, &flip) in centers.iter().zip(config.iter()) {
let idx = center.atom.index();
let Some(atom) = isomer.topology_block_mut().atoms.get_mut(idx) else {
return Err(EnumerationError::IsomerGenerationFailed(
config
.iter()
.enumerate()
.find(|(_, f)| **f)
.map_or(0, |(i, _)| i),
));
};
let current_tag = atom.chiral_tag();
let new_tag = if flip {
match current_tag {
ChiralTag::TetrahedralCw => ChiralTag::TetrahedralCcw,
ChiralTag::TetrahedralCcw => ChiralTag::TetrahedralCw,
other => other,
}
} else {
current_tag
};
atom.set_chiral_tag(new_tag);
}
Ok(isomer)
}
fn build_double_bond_isomer(
base: &Molecule,
bonds: &[StereoBond],
config: &[bool],
) -> Result<Molecule, EnumerationError> {
let mut isomer = base.clone();
for (sb, &flip) in bonds.iter().zip(config.iter()) {
let idx = sb.bond.index();
let Some(bond) = isomer.topology_block_mut().bonds.get_mut(idx) else {
return Err(EnumerationError::IsomerGenerationFailed(
config
.iter()
.enumerate()
.find(|(_, f)| **f)
.map_or(0, |(i, _)| i),
));
};
let new_stereo = if flip {
match bond.stereo() {
BondStereo::E => BondStereo::Z,
BondStereo::Z => BondStereo::E,
BondStereo::Any => BondStereo::E,
BondStereo::None => BondStereo::E,
BondStereo::AtropCw => BondStereo::AtropCcw,
BondStereo::AtropCcw => BondStereo::AtropCw,
other => other,
}
} else {
match bond.stereo() {
BondStereo::Any => BondStereo::E,
BondStereo::None => BondStereo::E,
other => other,
}
};
bond.set_stereo(new_stereo);
}
Ok(isomer)
}
fn is_valid_tetrahedral_config(
_base: &Molecule,
_centers: &[TetrahedralCenter],
_config: &[bool],
) -> bool {
true
}
fn is_valid_double_bond_config(base: &Molecule, bonds: &[StereoBond], config: &[bool]) -> bool {
for (sb, &flip) in bonds.iter().zip(config.iter()) {
let idx = sb.bond.index();
let Some(bond) = base.bonds().get(idx) else {
return false;
};
let desired_stereo = if flip {
match bond.stereo() {
BondStereo::E => BondStereo::Z,
BondStereo::Z => BondStereo::E,
BondStereo::Any | BondStereo::None => BondStereo::E,
_ => return false,
}
} else {
match bond.stereo() {
BondStereo::E => BondStereo::E,
BondStereo::Z => BondStereo::Z,
BondStereo::Any | BondStereo::None => BondStereo::E,
_ => return false,
}
};
if let Some(ring_info) = base.derived_cache().rings.as_ref() {
let min_ring_size = ring_info.min_bond_ring_size(sb.bond);
if desired_stereo == BondStereo::E && min_ring_size > 0 && min_ring_size <= 7 {
return false;
}
}
}
true
}
fn canonical_smiles(mol: &Molecule) -> Result<String, EnumerationError> {
let params = crate::SmilesWriteParams {
do_isomeric_smiles: true,
..Default::default()
};
mol.to_smiles_with_params(¶ms)
.map_err(|_| EnumerationError::IsomerGenerationFailed(0))
}
pub fn enum_stereoisomers(
mol: &Molecule,
params: &EnumerationParams,
) -> Result<Vec<Molecule>, EnumerationError> {
let centers = find_tetrahedral_centers(mol);
if centers.is_empty() {
return Err(EnumerationError::NoStereoCenters);
}
let n = centers.len();
if n > 20 {
return Err(EnumerationError::TooManyCenters(n));
}
let total_possible = 1u64 << n;
let num_to_return = if params.max_isomers > 0 {
total_possible.min(params.max_isomers)
} else {
total_possible
};
match params.strategy {
EnumerationStrategy::Default | EnumerationStrategy::Symmetry => {
let combinations = generate_combinations(n);
let mut result: Vec<Molecule> = Vec::new();
let mut seen: HashSet<String> = HashSet::new();
for config in &combinations {
if result.len() >= num_to_return as usize {
break;
}
if !is_valid_tetrahedral_config(mol, ¢ers, config) {
continue;
}
let isomer = build_tetrahedral_isomer(mol, ¢ers, config)?;
if params.only_unique {
let smi = canonical_smiles(&isomer)?;
if !seen.insert(smi) {
continue;
}
}
result.push(isomer);
}
Ok(result)
}
EnumerationStrategy::Random => {
let seed = if params.sample_seed == -1 {
let mol_hash = mol.to_smiles(false).unwrap_or_default();
let h: u64 = mol_hash.bytes().fold(0u64, |acc, b| {
acc.wrapping_mul(6364136223846793005).wrapping_add(b as u64)
});
h.wrapping_add(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(42),
)
} else {
params.sample_seed as u64
};
let mut rng = SimpleRng::new(seed);
let mut result: Vec<Molecule> = Vec::new();
let mut seen_configs: HashSet<Vec<bool>> = HashSet::new();
let mut seen_smiles: HashSet<String> = HashSet::new();
let mut tries: u64 = 0;
while result.len() < num_to_return as usize && tries < params.max_tries {
tries += 1;
let config: Vec<bool> = (0..n).map(|_| rng.next_bool()).collect();
if !seen_configs.insert(config.clone()) {
continue;
}
if !is_valid_tetrahedral_config(mol, ¢ers, &config) {
continue;
}
let isomer = build_tetrahedral_isomer(mol, ¢ers, &config)?;
if params.only_unique {
let smi = canonical_smiles(&isomer)?;
if !seen_smiles.insert(smi) {
continue;
}
}
result.push(isomer);
}
Ok(result)
}
}
}
pub fn enum_double_bond_stereoisomers(
mol: &Molecule,
params: &EnumerationParams,
) -> Result<Vec<Molecule>, EnumerationError> {
let bonds = find_stereo_bonds(mol);
if bonds.is_empty() {
return Err(EnumerationError::NoStereoCenters);
}
let m = bonds.len();
if m > 20 {
return Err(EnumerationError::TooManyBonds(m));
}
let total_possible = 1u64 << m;
let num_to_return = if params.max_isomers > 0 {
total_possible.min(params.max_isomers)
} else {
total_possible
};
match params.strategy {
EnumerationStrategy::Default | EnumerationStrategy::Symmetry => {
let combinations = generate_combinations(m);
let mut result: Vec<Molecule> = Vec::new();
let mut seen: HashSet<String> = HashSet::new();
for config in &combinations {
if result.len() >= num_to_return as usize {
break;
}
if !is_valid_double_bond_config(mol, &bonds, config) {
continue;
}
let isomer = build_double_bond_isomer(mol, &bonds, config)?;
if params.only_unique {
let smi = canonical_smiles(&isomer)?;
if !seen.insert(smi) {
continue;
}
}
result.push(isomer);
}
Ok(result)
}
EnumerationStrategy::Random => {
let seed = if params.sample_seed == -1 {
let mol_hash = mol.to_smiles(false).unwrap_or_default();
let h: u64 = mol_hash.bytes().fold(0u64, |acc, b| {
acc.wrapping_mul(6364136223846793005).wrapping_add(b as u64)
});
h.wrapping_add(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(42),
)
} else {
params.sample_seed as u64
};
let mut rng = SimpleRng::new(seed);
let mut result: Vec<Molecule> = Vec::new();
let mut seen_configs: HashSet<Vec<bool>> = HashSet::new();
let mut seen_smiles: HashSet<String> = HashSet::new();
let mut tries: u64 = 0;
while result.len() < num_to_return as usize && tries < params.max_tries {
tries += 1;
let config: Vec<bool> = (0..m).map(|_| rng.next_bool()).collect();
if !seen_configs.insert(config.clone()) {
continue;
}
if !is_valid_double_bond_config(mol, &bonds, &config) {
continue;
}
let isomer = build_double_bond_isomer(mol, &bonds, &config)?;
if params.only_unique {
let smi = canonical_smiles(&isomer)?;
if !seen_smiles.insert(smi) {
continue;
}
}
result.push(isomer);
}
Ok(result)
}
}
}
pub fn enum_all_stereoisomers(
mol: &Molecule,
params: &EnumerationParams,
) -> Result<Vec<Molecule>, EnumerationError> {
let centers = find_tetrahedral_centers(mol);
let bonds = find_stereo_bonds(mol);
let n = centers.len();
let m = bonds.len();
if n == 0 && m == 0 {
return Err(EnumerationError::NoStereoCenters);
}
let total_centers = n + m;
if total_centers > 20 {
if n > 20 {
return Err(EnumerationError::TooManyCenters(n));
}
if m > 20 {
return Err(EnumerationError::TooManyBonds(m));
}
}
let total_possible = 1u64 << total_centers;
let num_to_return = if params.max_isomers > 0 {
total_possible.min(params.max_isomers)
} else {
total_possible
};
match params.strategy {
EnumerationStrategy::Default | EnumerationStrategy::Symmetry => {
let combinations = generate_combinations(total_centers);
let mut result: Vec<Molecule> = Vec::new();
let mut seen: HashSet<String> = HashSet::new();
for config in &combinations {
if result.len() >= num_to_return as usize {
break;
}
let tetra_config: Vec<bool> = config[..n].to_vec();
let bond_config: Vec<bool> = config[n..].to_vec();
if !is_valid_tetrahedral_config(mol, ¢ers, &tetra_config) {
continue;
}
if !is_valid_double_bond_config(mol, &bonds, &bond_config) {
continue;
}
let isomer = build_tetrahedral_isomer(mol, ¢ers, &tetra_config)?;
let isomer = build_double_bond_isomer(&isomer, &bonds, &bond_config)?;
if params.only_unique {
let smi = canonical_smiles(&isomer)?;
if !seen.insert(smi) {
continue;
}
}
result.push(isomer);
}
Ok(result)
}
EnumerationStrategy::Random => {
let seed = if params.sample_seed == -1 {
let mol_hash = mol.to_smiles(false).unwrap_or_default();
let h: u64 = mol_hash.bytes().fold(0u64, |acc, b| {
acc.wrapping_mul(6364136223846793005).wrapping_add(b as u64)
});
h.wrapping_add(
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_nanos() as u64)
.unwrap_or(42),
)
} else {
params.sample_seed as u64
};
let mut rng = SimpleRng::new(seed);
let mut result: Vec<Molecule> = Vec::new();
let mut seen_configs: HashSet<Vec<bool>> = HashSet::new();
let mut seen_smiles: HashSet<String> = HashSet::new();
let mut tries: u64 = 0;
while result.len() < num_to_return as usize && tries < params.max_tries {
tries += 1;
let config: Vec<bool> = (0..total_centers).map(|_| rng.next_bool()).collect();
if !seen_configs.insert(config.clone()) {
continue;
}
let tetra_config: Vec<bool> = config[..n].to_vec();
let bond_config: Vec<bool> = config[n..].to_vec();
if !is_valid_tetrahedral_config(mol, ¢ers, &tetra_config) {
continue;
}
if !is_valid_double_bond_config(mol, &bonds, &bond_config) {
continue;
}
let isomer = build_tetrahedral_isomer(mol, ¢ers, &tetra_config)?;
let isomer = build_double_bond_isomer(&isomer, &bonds, &bond_config)?;
if params.only_unique {
let smi = canonical_smiles(&isomer)?;
if !seen_smiles.insert(smi) {
continue;
}
}
result.push(isomer);
}
Ok(result)
}
}
}
#[must_use]
pub fn count_stereoisomers(mol: &Molecule) -> u64 {
let n = find_tetrahedral_centers(mol).len();
if n == 0 { 1 } else { 1u64 << n }
}
#[must_use]
pub fn count_double_bond_stereoisomers(mol: &Molecule) -> u64 {
let m = find_stereo_bonds(mol).len();
if m == 0 { 1 } else { 1u64 << m }
}
#[must_use]
pub fn count_all_stereoisomers(mol: &Molecule) -> u64 {
let n = find_tetrahedral_centers(mol).len();
let m = find_stereo_bonds(mol).len();
match n + m {
0 => 1,
total => 1u64 << total,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Molecule;
fn make_test_mol() -> Molecule {
Molecule::from_smiles("Cl[C@H](Br)I").expect("failed to parse test SMILES")
}
fn make_meso_mol() -> Molecule {
Molecule::from_smiles("O[C@@H](C(=O)O)[C@@H](C(=O)O)O")
.expect("failed to parse meso test SMILES")
}
fn make_double_bond_mol() -> Molecule {
Molecule::from_smiles("C/C=C/Cl").expect("failed to parse double bond test SMILES")
}
#[test]
fn test_find_tetrahedral_centers() {
let mol = make_test_mol();
let centers = find_tetrahedral_centers(&mol);
assert_eq!(centers.len(), 1, "expected 1 tetrahedral center");
}
#[test]
fn test_enum_stereoisomers_basic() {
let mol = make_test_mol();
let params = EnumerationParams::default();
let isomers = enum_stereoisomers(&mol, ¶ms).expect("enumeration failed");
assert_eq!(isomers.len(), 2, "expected 2 isomers for one center");
}
#[test]
fn test_enum_stereoisomers_two_centers() {
let mol = make_meso_mol();
let params = EnumerationParams::default();
let isomers = enum_stereoisomers(&mol, ¶ms).expect("enumeration failed");
assert!(
isomers.len() <= 4,
"expected at most 4 isomers for two centers"
);
assert!(!isomers.is_empty(), "expected at least one isomer");
}
#[test]
fn test_generate_combinations() {
let combos = generate_combinations(3);
assert_eq!(combos.len(), 8, "expected 8 combinations for 3 centers");
for combo in &combos {
assert_eq!(combo.len(), 3, "each combo must have 3 elements");
}
}
#[test]
fn test_enum_double_bond_stereoisomers() {
let mol = make_double_bond_mol();
let params = EnumerationParams::default();
let isomers = enum_double_bond_stereoisomers(&mol, ¶ms);
if let Ok(isomers) = isomers {
assert!(
!isomers.is_empty(),
"expected at least one double bond isomer"
);
}
}
#[test]
fn test_no_stereo_centers() {
let mol = Molecule::from_smiles("CC").expect("failed to parse ethane");
let params = EnumerationParams::default();
let result = enum_stereoisomers(&mol, ¶ms);
assert!(
result.is_err(),
"expected error for molecule with no centers"
);
assert_eq!(result.unwrap_err(), EnumerationError::NoStereoCenters);
}
#[test]
fn test_count_functions() {
let mol = make_test_mol();
assert_eq!(count_stereoisomers(&mol), 2, "one center -> 2 isomers");
let mol2 = make_meso_mol();
assert_eq!(count_stereoisomers(&mol2), 4, "two centers -> 4 isomers");
let ethane = Molecule::from_smiles("CC").expect("failed to parse ethane");
assert_eq!(count_stereoisomers(ðane), 1, "no centers -> 1 isomer");
}
#[test]
fn test_unique_isomers() {
let mol = make_meso_mol();
let params_unique = EnumerationParams {
only_unique: true,
..Default::default()
};
let unique_isomers = enum_stereoisomers(&mol, ¶ms_unique).expect("enumeration failed");
let params_all = EnumerationParams {
only_unique: false,
..Default::default()
};
let all_isomers = enum_stereoisomers(&mol, ¶ms_all).expect("enumeration failed");
assert!(unique_isomers.len() <= all_isomers.len());
}
}