use std::cmp::Ordering;
use crate::bond::{BondOrder, BondStereo};
use crate::descriptors::elements::get_element;
use crate::descriptors::hybridization::{Hybridization, all_hybridizations};
use crate::descriptors::valence::implicit_hydrogen_count;
use crate::graph::{AdjacencyList, is_hydrogen};
use crate::molecule::Molecule;
const MAX_CIP_DEPTH: usize = 8;
const VOLUME_THRESHOLD: f64 = 0.01;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ChiralTag {
Unspecified,
CW,
CCW,
Other,
}
impl ChiralTag {
pub fn to_ogb_index(&self) -> u8 {
match self {
ChiralTag::Unspecified => 0,
ChiralTag::CW => 1,
ChiralTag::CCW => 2,
ChiralTag::Other => 3,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct CipPriority {
layers: Vec<Vec<u8>>,
}
impl Ord for CipPriority {
fn cmp(&self, other: &Self) -> Ordering {
for (a, b) in self.layers.iter().zip(other.layers.iter()) {
let cmp = a.cmp(b);
if cmp != Ordering::Equal {
return cmp;
}
}
self.layers.len().cmp(&other.layers.len())
}
}
impl PartialOrd for CipPriority {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
fn atomic_number_of(mol: &Molecule, idx: usize) -> u8 {
get_element(&mol.atoms[idx].element)
.map(|e| e.atomic_number)
.unwrap_or(0)
}
fn bond_multiplicity(order: BondOrder) -> usize {
match order {
BondOrder::Single => 1,
BondOrder::Double => 2,
BondOrder::Triple => 3,
BondOrder::Aromatic => 1,
_ => 1,
}
}
fn compute_cip_priority(
mol: &Molecule,
adj: &AdjacencyList,
center: usize,
start_neighbor: usize,
) -> CipPriority {
let n = mol.atom_count();
let start_an = if start_neighbor == usize::MAX {
1 } else {
atomic_number_of(mol, start_neighbor)
};
let mut layers = vec![vec![start_an]];
if start_neighbor == usize::MAX {
return CipPriority { layers };
}
let start_mult = find_bond_multiplicity(mol, center, start_neighbor);
let mut current_real: Vec<usize> = vec![start_neighbor];
let mut phantom_ans: Vec<u8> = Vec::new();
for _ in 1..start_mult {
phantom_ans.push(atomic_number_of(mol, start_neighbor));
}
let mut visited = vec![false; n];
visited[center] = true;
visited[start_neighbor] = true;
for _depth in 1..MAX_CIP_DEPTH {
let mut next_real: Vec<usize> = Vec::new();
let mut layer_ans: Vec<u8> = Vec::new();
layer_ans.extend_from_slice(&phantom_ans);
phantom_ans.clear();
for &node_idx in ¤t_real {
for &(neighbor, bond_idx) in adj.neighbors(node_idx) {
let an = atomic_number_of(mol, neighbor);
let mult = mol
.bonds
.get(bond_idx)
.map(|b| bond_multiplicity(b.order))
.unwrap_or(1);
if visited[neighbor] {
for _ in 0..mult {
layer_ans.push(an);
}
} else {
layer_ans.push(an);
next_real.push(neighbor);
visited[neighbor] = true;
for _ in 1..mult {
layer_ans.push(an);
}
}
}
let impl_h = implicit_hydrogen_count(mol, node_idx);
layer_ans.extend(std::iter::repeat_n(1u8, impl_h as usize));
}
if layer_ans.is_empty() {
break;
}
layer_ans.sort_unstable_by(|a, b| b.cmp(a));
layers.push(layer_ans);
current_real = next_real;
if current_real.is_empty() {
break;
}
}
CipPriority { layers }
}
fn find_bond_multiplicity(mol: &Molecule, a: usize, b: usize) -> usize {
for bond in &mol.bonds {
if (bond.atom1 == a && bond.atom2 == b) || (bond.atom1 == b && bond.atom2 == a) {
return bond_multiplicity(bond.order);
}
}
1
}
fn normalize_element(element: &str) -> String {
let elem = element.trim();
let mut chars = elem.chars();
match chars.next() {
None => String::new(),
Some(first) => first.to_uppercase().collect::<String>() + &chars.as_str().to_lowercase(),
}
}
fn is_potential_stereocenter(
mol: &Molecule,
idx: usize,
adj: &AdjacencyList,
hybridizations: &[Hybridization],
) -> bool {
if hybridizations[idx] != Hybridization::SP3 {
return false;
}
if is_hydrogen(&mol.atoms[idx].element) {
return false;
}
let elem = normalize_element(&mol.atoms[idx].element);
if elem == "N" {
return false;
}
let explicit = adj.degree(idx);
let impl_h = implicit_hydrogen_count(mol, idx);
let total = explicit + impl_h as usize;
if total != 4 {
return false;
}
if impl_h >= 2 {
return false;
}
matches!(elem.as_str(), "C" | "S" | "P" | "Se" | "Si" | "Ge")
}
fn is_2d(mol: &Molecule) -> bool {
mol.atoms.iter().all(|a| a.z.abs() < 1e-6)
}
fn get_z_offset(mol: &Molecule, center: usize, neighbor: usize) -> f64 {
for bond in &mol.bonds {
if bond.atom1 == center && bond.atom2 == neighbor {
return match bond.stereo {
BondStereo::Up => 1.0,
BondStereo::Down => -1.0,
_ => 0.0,
};
}
if bond.atom2 == center && bond.atom1 == neighbor {
return match bond.stereo {
BondStereo::Up => -1.0,
BondStereo::Down => 1.0,
_ => 0.0,
};
}
}
0.0
}
fn has_stereo_bonds(mol: &Molecule, center: usize) -> bool {
mol.bonds.iter().any(|bond| {
bond.stereo != BondStereo::None && (bond.atom1 == center || bond.atom2 == center)
})
}
fn substituent_position(
mol: &Molecule,
center: usize,
sub_idx: usize,
mol_is_2d: bool,
) -> (f64, f64, f64) {
let atom = &mol.atoms[sub_idx];
let z = if mol_is_2d {
get_z_offset(mol, center, sub_idx)
} else {
atom.z
};
(atom.x, atom.y, z)
}
fn synthesize_implicit_h_position(
center_pos: (f64, f64, f64),
explicit_positions: &[(f64, f64, f64)],
) -> (f64, f64, f64) {
let (cx, cy, cz) = center_pos;
if explicit_positions.is_empty() {
return (cx + 1.0, cy, cz);
}
let n = explicit_positions.len() as f64;
let avg_x: f64 = explicit_positions.iter().map(|p| p.0).sum::<f64>() / n;
let avg_y: f64 = explicit_positions.iter().map(|p| p.1).sum::<f64>() / n;
let avg_z: f64 = explicit_positions.iter().map(|p| p.2).sum::<f64>() / n;
let dx = cx - avg_x;
let dy = cy - avg_y;
let dz = cz - avg_z;
let len = (dx * dx + dy * dy + dz * dz).sqrt();
if len < 1e-10 {
return (cx, cy, cz + 1.0);
}
let scale = 1.0 / len;
(cx + dx * scale, cy + dy * scale, cz + dz * scale)
}
fn signed_volume(subs: &[(f64, f64, f64); 4], center: &(f64, f64, f64)) -> f64 {
let v: [(f64, f64, f64); 4] = [
(
subs[0].0 - center.0,
subs[0].1 - center.1,
subs[0].2 - center.2,
),
(
subs[1].0 - center.0,
subs[1].1 - center.1,
subs[1].2 - center.2,
),
(
subs[2].0 - center.0,
subs[2].1 - center.1,
subs[2].2 - center.2,
),
(
subs[3].0 - center.0,
subs[3].1 - center.1,
subs[3].2 - center.2,
),
];
let a = (v[0].0 - v[3].0, v[0].1 - v[3].1, v[0].2 - v[3].2);
let b = (v[1].0 - v[3].0, v[1].1 - v[3].1, v[1].2 - v[3].2);
let c = (v[2].0 - v[3].0, v[2].1 - v[3].1, v[2].2 - v[3].2);
let bxc = (
b.1 * c.2 - b.2 * c.1,
b.2 * c.0 - b.0 * c.2,
b.0 * c.1 - b.1 * c.0,
);
a.0 * bxc.0 + a.1 * bxc.1 + a.2 * bxc.2
}
pub fn atom_chirality(mol: &Molecule, idx: usize) -> ChiralTag {
all_chiralities(mol)
.get(idx)
.copied()
.unwrap_or(ChiralTag::Unspecified)
}
pub fn all_chiralities(mol: &Molecule) -> Vec<ChiralTag> {
let n = mol.atom_count();
if n == 0 {
return Vec::new();
}
let adj = AdjacencyList::from_molecule(mol);
let hybridizations = all_hybridizations(mol);
let mol_is_2d = is_2d(mol);
let mut result = vec![ChiralTag::Unspecified; n];
for (i, chirality) in result.iter_mut().enumerate() {
if !is_potential_stereocenter(mol, i, &adj, &hybridizations) {
continue;
}
if mol_is_2d && !has_stereo_bonds(mol, i) {
continue;
}
let mut explicit_neighbors = adj.neighbor_atoms(i);
explicit_neighbors.sort_unstable(); let impl_h = implicit_hydrogen_count(mol, i);
let mut sub_priorities: Vec<(CipPriority, usize)> = Vec::with_capacity(4);
for &neighbor in &explicit_neighbors {
let priority = compute_cip_priority(mol, &adj, i, neighbor);
sub_priorities.push((priority, neighbor));
}
for _ in 0..impl_h {
let priority = CipPriority {
layers: vec![vec![1]],
};
sub_priorities.push((priority, usize::MAX));
}
if sub_priorities.len() != 4 {
continue;
}
let mut sorted_prios: Vec<&CipPriority> = sub_priorities.iter().map(|(p, _)| p).collect();
sorted_prios.sort();
if sorted_prios[0] == sorted_prios[1]
|| sorted_prios[1] == sorted_prios[2]
|| sorted_prios[2] == sorted_prios[3]
{
continue;
}
let center_pos = if mol_is_2d {
(mol.atoms[i].x, mol.atoms[i].y, 0.0)
} else {
(mol.atoms[i].x, mol.atoms[i].y, mol.atoms[i].z)
};
let ordered_neighbors: Vec<usize> = {
let mut v: Vec<usize> = explicit_neighbors.clone();
for _ in 0..impl_h {
v.push(usize::MAX);
}
v
};
let explicit_positions: Vec<(f64, f64, f64)> = ordered_neighbors
.iter()
.filter(|idx| **idx != usize::MAX)
.map(|idx| substituent_position(mol, i, *idx, mol_is_2d))
.collect();
let mut positions = [(0.0, 0.0, 0.0); 4];
for (k, &sub_idx) in ordered_neighbors.iter().enumerate() {
if sub_idx == usize::MAX {
positions[k] = synthesize_implicit_h_position(center_pos, &explicit_positions);
} else {
positions[k] = substituent_position(mol, i, sub_idx, mol_is_2d);
}
}
let vol = signed_volume(&positions, ¢er_pos);
if vol < -VOLUME_THRESHOLD {
*chirality = ChiralTag::CW;
} else if vol > VOLUME_THRESHOLD {
*chirality = ChiralTag::CCW;
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::atom::Atom;
use crate::bond::{Bond, BondOrder, BondStereo};
#[test]
fn test_achiral_methane() {
let mut mol = Molecule::new("methane");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "H", 1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(2, "H", -1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(3, "H", 0.0, 1.0, 0.0));
mol.atoms.push(Atom::new(4, "H", 0.0, -1.0, 0.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
assert_eq!(atom_chirality(&mol, 0), ChiralTag::Unspecified);
}
#[test]
fn test_sp2_not_stereocenter() {
let mut mol = Molecule::new("ethylene");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "C", 1.3, 0.0, 0.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Double));
assert_eq!(atom_chirality(&mol, 0), ChiralTag::Unspecified);
}
#[test]
fn test_nitrogen_not_stereocenter() {
let mut mol = Molecule::new("amine");
mol.atoms.push(Atom::new(0, "N", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "C", 1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(2, "C", 0.0, 1.0, 0.0));
mol.atoms.push(Atom::new(3, "C", 0.0, 0.0, 1.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
assert_eq!(atom_chirality(&mol, 0), ChiralTag::Unspecified);
}
#[test]
fn test_chiral_3d_r() {
let mut mol = Molecule::new("r_chiral");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "Br", -1.0, -1.0, 1.0));
mol.atoms.push(Atom::new(2, "Cl", -1.0, 1.0, -1.0));
mol.atoms.push(Atom::new(3, "F", 1.0, -1.0, -1.0));
mol.atoms.push(Atom::new(4, "H", 1.0, 1.0, 1.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
let tag = atom_chirality(&mol, 0);
assert!(
tag == ChiralTag::CW || tag == ChiralTag::CCW,
"Expected CW or CCW, got {:?}",
tag
);
}
#[test]
fn test_chiral_3d_cw_ccw_differ() {
let mut mol_r = Molecule::new("r");
mol_r.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol_r.atoms.push(Atom::new(1, "Br", 1.0, 0.0, 0.0));
mol_r.atoms.push(Atom::new(2, "Cl", 0.0, 1.0, 0.0));
mol_r.atoms.push(Atom::new(3, "F", 0.0, 0.0, 1.0));
mol_r.atoms.push(Atom::new(4, "H", -1.0, -1.0, -1.0));
mol_r.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol_r.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol_r.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol_r.bonds.push(Bond::new(0, 4, BondOrder::Single));
let mut mol_s = Molecule::new("s");
mol_s.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol_s.atoms.push(Atom::new(1, "Br", 1.0, 0.0, 0.0));
mol_s.atoms.push(Atom::new(2, "Cl", 0.0, 1.0, 0.0));
mol_s.atoms.push(Atom::new(3, "F", 0.0, 0.0, -1.0));
mol_s.atoms.push(Atom::new(4, "H", -1.0, -1.0, 1.0));
mol_s.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol_s.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol_s.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol_s.bonds.push(Bond::new(0, 4, BondOrder::Single));
let tag_r = atom_chirality(&mol_r, 0);
let tag_s = atom_chirality(&mol_s, 0);
assert_ne!(tag_r, ChiralTag::Unspecified, "R should be chiral");
assert_ne!(tag_s, ChiralTag::Unspecified, "S should be chiral");
assert_ne!(
tag_r, tag_s,
"Enantiomers should have different chirality tags"
);
}
#[test]
fn test_2d_with_wedge() {
let mut mol = Molecule::new("chiral_2d");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "F", 1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(2, "Cl", -1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(3, "Br", 0.0, 1.0, 0.0));
mol.atoms.push(Atom::new(4, "H", 0.0, -1.0, 0.0));
mol.bonds
.push(Bond::with_stereo(0, 1, BondOrder::Single, BondStereo::Up));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
let tag = atom_chirality(&mol, 0);
assert!(
tag == ChiralTag::CW || tag == ChiralTag::CCW,
"Expected CW or CCW for 2D chiral center with wedge bond, got {:?}",
tag
);
}
#[test]
fn test_2d_no_wedge_unspecified() {
let mut mol = Molecule::new("flat");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "F", 1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(2, "Cl", -1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(3, "Br", 0.0, 1.0, 0.0));
mol.atoms.push(Atom::new(4, "H", 0.0, -1.0, 0.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
assert_eq!(atom_chirality(&mol, 0), ChiralTag::Unspecified);
}
#[test]
fn test_symmetric_substituents_not_stereocenter() {
let mut mol = Molecule::new("symmetric");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "C", 1.0, 0.0, 0.0)); mol.atoms.push(Atom::new(2, "C", -1.0, 0.0, 0.0)); mol.atoms.push(Atom::new(3, "F", 0.0, 1.0, 0.0));
mol.atoms.push(Atom::new(4, "H", 0.0, 0.0, 1.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
assert_eq!(atom_chirality(&mol, 0), ChiralTag::Unspecified);
}
#[test]
fn test_ogb_index() {
assert_eq!(ChiralTag::Unspecified.to_ogb_index(), 0);
assert_eq!(ChiralTag::CW.to_ogb_index(), 1);
assert_eq!(ChiralTag::CCW.to_ogb_index(), 2);
assert_eq!(ChiralTag::Other.to_ogb_index(), 3);
}
#[test]
fn test_empty_molecule() {
let mol = Molecule::new("empty");
assert!(all_chiralities(&mol).is_empty());
}
#[test]
fn test_all_chiralities_length() {
let mut mol = Molecule::new("test");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "H", 1.0, 0.0, 0.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Single));
let chiralities = all_chiralities(&mol);
assert_eq!(chiralities.len(), 2);
}
#[test]
fn test_cip_double_bond_phantom() {
let mut mol = Molecule::new("cip_test");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0)); mol.atoms.push(Atom::new(1, "O", 1.0, 0.0, 1.0)); mol.atoms.push(Atom::new(2, "O", -1.0, 0.0, 0.0)); mol.atoms.push(Atom::new(3, "N", 0.0, 1.0, 0.0)); mol.atoms.push(Atom::new(4, "H", 0.0, -1.0, -1.0));
mol.bonds.push(Bond::new(0, 1, BondOrder::Double));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
assert_eq!(atom_chirality(&mol, 0), ChiralTag::Unspecified);
}
#[test]
fn test_aspirin_no_stereocenters() {
let path = std::path::Path::new("tests/test_data/aspirin.sdf");
if path.exists() {
let mol = crate::parse_sdf_file(path).unwrap();
let chiralities = all_chiralities(&mol);
assert!(
chiralities.iter().all(|c| *c == ChiralTag::Unspecified),
"Aspirin should have no stereocenters"
);
}
}
#[test]
fn test_methionine_has_stereocenter() {
let path = std::path::Path::new("tests/test_data/methionine.sdf");
if path.exists() {
let mol = crate::parse_sdf_file(path).unwrap();
let chiralities = all_chiralities(&mol);
let chiral_count = chiralities
.iter()
.filter(|c| **c != ChiralTag::Unspecified)
.count();
assert_eq!(
chiral_count, 1,
"Methionine should have exactly 1 stereocenter, got {}",
chiral_count
);
}
}
#[test]
fn test_caffeine_no_stereocenters() {
let path = std::path::Path::new("tests/test_data/caffeine_pubchem.sdf");
if path.exists() {
let mol = crate::parse_sdf_file(path).unwrap();
let chiralities = all_chiralities(&mol);
assert!(
chiralities.iter().all(|c| *c == ChiralTag::Unspecified),
"Caffeine should have no stereocenters"
);
}
}
#[test]
fn test_glucose_multiple_stereocenters() {
let path = std::path::Path::new("tests/test_data/glucose.sdf");
if path.exists() {
let mol = crate::parse_sdf_file(path).unwrap();
let chiralities = all_chiralities(&mol);
let chiral_count = chiralities
.iter()
.filter(|c| **c != ChiralTag::Unspecified)
.count();
assert!(
chiral_count >= 5,
"Glucose should have multiple stereocenters, got {}",
chiral_count
);
}
}
#[test]
fn test_2d_down_bond_stereocenter() {
let mut mol = Molecule::new("down_stereo");
mol.atoms.push(Atom::new(0, "C", 0.0, 0.0, 0.0));
mol.atoms.push(Atom::new(1, "Br", 1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(2, "Cl", 0.0, 1.0, 0.0));
mol.atoms.push(Atom::new(3, "F", -1.0, 0.0, 0.0));
mol.atoms.push(Atom::new(4, "H", 0.0, -1.0, 0.0));
mol.bonds
.push(Bond::with_stereo(0, 1, BondOrder::Single, BondStereo::Down));
mol.bonds.push(Bond::new(0, 2, BondOrder::Single));
mol.bonds.push(Bond::new(0, 3, BondOrder::Single));
mol.bonds.push(Bond::new(0, 4, BondOrder::Single));
let tag = atom_chirality(&mol, 0);
assert_ne!(
tag,
ChiralTag::Unspecified,
"Should detect chirality from Down bond"
);
}
}