use std::fmt;
use chematic_core::{AtomIdx, Molecule};
use crate::coords::Coords3D;
use crate::shape_descriptors::jacobi3;
#[derive(Debug, PartialEq)]
pub enum ConformerError {
AtomCountMismatch { expected: usize, got: usize },
}
impl fmt::Display for ConformerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ConformerError::AtomCountMismatch { expected, got } => {
write!(f, "conformer has {got} atoms but molecule has {expected}")
}
}
}
}
impl std::error::Error for ConformerError {}
pub struct ConformerEnsemble {
mol: Molecule,
conformers: Vec<Coords3D>,
}
impl ConformerEnsemble {
pub fn new(mol: Molecule) -> Self {
Self {
mol,
conformers: Vec::new(),
}
}
pub fn with_conformer(mol: Molecule, coords: Coords3D) -> Result<Self, ConformerError> {
let expected = mol.atom_count();
let got = coords.atom_count();
if got != expected {
return Err(ConformerError::AtomCountMismatch { expected, got });
}
Ok(Self {
mol,
conformers: vec![coords],
})
}
pub fn mol(&self) -> &Molecule {
&self.mol
}
pub fn conformer_count(&self) -> usize {
self.conformers.len()
}
pub fn add_conformer(&mut self, coords: Coords3D) -> Result<usize, ConformerError> {
let expected = self.mol.atom_count();
let got = coords.atom_count();
if got != expected {
return Err(ConformerError::AtomCountMismatch { expected, got });
}
let idx = self.conformers.len();
self.conformers.push(coords);
Ok(idx)
}
pub fn get_conformer(&self, idx: usize) -> Option<&Coords3D> {
self.conformers.get(idx)
}
pub fn get_conformer_mut(&mut self, idx: usize) -> Option<&mut Coords3D> {
self.conformers.get_mut(idx)
}
pub fn remove_conformer(&mut self, idx: usize) -> Option<Coords3D> {
if idx < self.conformers.len() {
Some(self.conformers.remove(idx))
} else {
None
}
}
pub fn conformer_rmsd_no_align(&self, a: usize, b: usize) -> Option<f64> {
let ca = self.conformers.get(a)?;
let cb = self.conformers.get(b)?;
let n = self.mol.atom_count();
if n == 0 {
return Some(0.0);
}
let sum_sq: f64 = (0..n)
.map(|i| {
let idx = AtomIdx(i as u32);
let pa = ca.get(idx);
let pb = cb.get(idx);
let dx = pa.x - pb.x;
let dy = pa.y - pb.y;
let dz = pa.z - pb.z;
dx * dx + dy * dy + dz * dz
})
.sum();
Some((sum_sq / n as f64).sqrt())
}
pub fn conformer_rmsd(&self, a: usize, b: usize) -> Option<f64> {
let ca = self.conformers.get(a)?;
let cb = self.conformers.get(b)?;
let n = self.mol.atom_count();
Some(kabsch_rmsd(ca, cb, n))
}
pub fn conformer_usr_descriptors(&self, idx: usize) -> Option<[f64; 12]> {
let c = self.conformers.get(idx)?;
let pts: Vec<[f64; 3]> = c.points.iter().map(|p| [p.x, p.y, p.z]).collect();
Some(crate::usr::usr_descriptors(&pts))
}
pub fn cluster_conformers_by_rms(&self, rms_threshold: f64) -> Vec<usize> {
let n = self.conformers.len();
if n == 0 {
return vec![];
}
if rms_threshold <= 0.0 {
return (0..n).collect();
}
let mut leaders: Vec<usize> = Vec::new();
'outer: for i in 0..n {
for &leader in &leaders {
let rmsd = self.conformer_rmsd(i, leader).unwrap_or(f64::INFINITY);
if rmsd < rms_threshold {
continue 'outer; }
}
leaders.push(i); }
leaders
}
pub fn conformer_diversity_usr(&self) -> f64 {
let n = self.conformers.len();
if n < 2 {
return 0.0;
}
let descs: Vec<[f64; 12]> = (0..n)
.filter_map(|i| self.conformer_usr_descriptors(i))
.collect();
let mut total = 0.0;
let mut count = 0usize;
for i in 0..descs.len() {
for j in i + 1..descs.len() {
total += 1.0 - crate::usr::usr_similarity(&descs[i], &descs[j]);
count += 1;
}
}
if count == 0 { 0.0 } else { total / count as f64 }
}
}
fn kabsch_rmsd(coords_a: &Coords3D, coords_b: &Coords3D, n: usize) -> f64 {
if n == 0 {
return 0.0;
}
let nf = n as f64;
let mut ca = [0.0f64; 3];
let mut cb = [0.0f64; 3];
for i in 0..n {
let idx = AtomIdx(i as u32);
let pa = coords_a.get(idx);
let pb = coords_b.get(idx);
ca[0] += pa.x;
ca[1] += pa.y;
ca[2] += pa.z;
cb[0] += pb.x;
cb[1] += pb.y;
cb[2] += pb.z;
}
for k in 0..3 {
ca[k] /= nf;
cb[k] /= nf;
}
let mut p = vec![[0.0f64; 3]; n];
let mut q = vec![[0.0f64; 3]; n];
for i in 0..n {
let idx = AtomIdx(i as u32);
let pa = coords_a.get(idx);
let pb = coords_b.get(idx);
p[i] = [pa.x - ca[0], pa.y - ca[1], pa.z - ca[2]];
q[i] = [pb.x - cb[0], pb.y - cb[1], pb.z - cb[2]];
}
let mut h = [[0.0f64; 3]; 3];
for i in 0..n {
for r in 0..3 {
for c in 0..3 {
h[r][c] += p[i][r] * q[i][c];
}
}
}
let mut hth = [[0.0f64; 3]; 3];
for r in 0..3 {
for c in 0..3 {
for k in 0..3 {
hth[r][c] += h[k][r] * h[k][c];
}
}
}
let (evals, v) = jacobi3(hth);
let mut hv = [[0.0f64; 3]; 3];
for r in 0..3 {
for c in 0..3 {
for k in 0..3 {
hv[r][c] += h[r][k] * v[k][c];
}
}
}
let mut u = [[0.0f64; 3]; 3];
for j in 0..3 {
let sigma = evals[j].max(0.0).sqrt();
for r in 0..3 {
u[r][j] = if sigma > 1e-10 { hv[r][j] / sigma } else { 0.0 };
}
}
let mut r_mat = [[0.0f64; 3]; 3];
for r in 0..3 {
for c in 0..3 {
for k in 0..3 {
r_mat[r][c] += u[r][k] * v[c][k];
}
}
}
let det = det3(r_mat);
let mut v_final = v;
if det < 0.0 {
for r in 0..3 {
v_final[r][0] *= -1.0;
}
r_mat = [[0.0f64; 3]; 3];
for r in 0..3 {
for c in 0..3 {
for k in 0..3 {
r_mat[r][c] += u[r][k] * v_final[c][k];
}
}
}
}
let mut sum_sq = 0.0f64;
for i in 0..n {
for row in 0..3 {
let rotated =
r_mat[row][0] * q[i][0] + r_mat[row][1] * q[i][1] + r_mat[row][2] * q[i][2];
let diff = p[i][row] - rotated;
sum_sq += diff * diff;
}
}
(sum_sq / nf).sqrt()
}
fn det3(m: [[f64; 3]; 3]) -> f64 {
m[0][0] * (m[1][1] * m[2][2] - m[1][2] * m[2][1])
- m[0][1] * (m[1][0] * m[2][2] - m[1][2] * m[2][0])
+ m[0][2] * (m[1][0] * m[2][1] - m[1][1] * m[2][0])
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
use crate::{coords::Point3, dg::generate_coords};
fn make_ensemble() -> ConformerEnsemble {
let mol = parse("CCC").unwrap();
let c = generate_coords(&mol);
ConformerEnsemble::with_conformer(mol, c).unwrap()
}
#[test]
fn new_has_zero_conformers() {
let mol = parse("C").unwrap();
let ens = ConformerEnsemble::new(mol);
assert_eq!(ens.conformer_count(), 0);
}
#[test]
fn with_conformer_has_one() {
let ens = make_ensemble();
assert_eq!(ens.conformer_count(), 1);
}
#[test]
fn add_conformer_increments_count() {
let mol = parse("CC").unwrap();
let c1 = generate_coords(&mol);
let c2 = generate_coords(&mol);
let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
let idx = ens.add_conformer(c2).unwrap();
assert_eq!(idx, 1);
assert_eq!(ens.conformer_count(), 2);
}
#[test]
fn add_conformer_wrong_atom_count_errors() {
let mol = parse("CC").unwrap();
let wrong = Coords3D::new_zeroed(5);
let mut ens = ConformerEnsemble::new(mol);
let err = ens.add_conformer(wrong).unwrap_err();
assert!(matches!(
err,
ConformerError::AtomCountMismatch {
expected: 2,
got: 5
}
));
}
#[test]
fn get_conformer_out_of_range_returns_none() {
let ens = make_ensemble();
assert!(ens.get_conformer(99).is_none());
}
#[test]
fn remove_conformer_decrements_count() {
let mut ens = make_ensemble();
let removed = ens.remove_conformer(0);
assert!(removed.is_some());
assert_eq!(ens.conformer_count(), 0);
}
#[test]
fn remove_conformer_shifts_indices() {
let mol = parse("C").unwrap();
let n = mol.atom_count();
let mut ens = ConformerEnsemble::new(mol);
for x in [1.0f64, 2.0, 3.0] {
let mut c = Coords3D::new_zeroed(n);
c.set(AtomIdx(0), Point3::new(x, 0.0, 0.0));
ens.add_conformer(c).unwrap();
}
ens.remove_conformer(0).unwrap();
assert_eq!(ens.conformer_count(), 2);
assert!((ens.get_conformer(0).unwrap().get(AtomIdx(0)).x - 2.0).abs() < 1e-10);
}
#[test]
fn remove_conformer_out_of_range_returns_none() {
let mut ens = make_ensemble();
assert!(ens.remove_conformer(99).is_none());
}
#[test]
fn rmsd_no_align_same_conformer_is_zero() {
let ens = make_ensemble();
let rmsd = ens.conformer_rmsd_no_align(0, 0).unwrap();
assert!(rmsd.abs() < 1e-10, "self-RMSD should be 0, got {rmsd}");
}
#[test]
fn rmsd_no_align_translated_is_nonzero() {
let mol = parse("CC").unwrap();
let n = mol.atom_count();
let mut c1 = Coords3D::new_zeroed(n);
let mut c2 = Coords3D::new_zeroed(n);
for i in 0..n {
c1.set(AtomIdx(i as u32), Point3::new(i as f64, 0.0, 0.0));
c2.set(AtomIdx(i as u32), Point3::new(i as f64 + 10.0, 0.0, 0.0));
}
let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
ens.add_conformer(c2).unwrap();
let rmsd = ens.conformer_rmsd_no_align(0, 1).unwrap();
assert!(
rmsd > 0.0,
"translated conformers should have non-zero RMSD"
);
}
#[test]
fn kabsch_rmsd_same_conformer_is_zero() {
let ens = make_ensemble();
let rmsd = ens.conformer_rmsd(0, 0).unwrap();
assert!(
rmsd.abs() < 1e-8,
"Kabsch self-RMSD should be 0, got {rmsd}"
);
}
#[test]
fn kabsch_rmsd_pure_translation_is_zero() {
let mol = parse("CCC").unwrap();
let n = mol.atom_count();
let base = generate_coords(&mol);
let mut shifted = Coords3D::new_zeroed(n);
let offset = 5.0;
for i in 0..n {
let p = base.get(AtomIdx(i as u32));
shifted.set(
AtomIdx(i as u32),
Point3::new(p.x + offset, p.y + offset, p.z + offset),
);
}
let mut ens = ConformerEnsemble::with_conformer(mol, base).unwrap();
ens.add_conformer(shifted).unwrap();
let rmsd = ens.conformer_rmsd(0, 1).unwrap();
assert!(
rmsd < 1e-6,
"pure-translation Kabsch RMSD should be ~0, got {rmsd}"
);
}
#[test]
fn kabsch_rmsd_pure_rotation_is_zero() {
let mol = parse("CCC").unwrap();
let n = mol.atom_count();
let base = generate_coords(&mol);
let mut rotated = Coords3D::new_zeroed(n);
for i in 0..n {
let p = base.get(AtomIdx(i as u32));
rotated.set(AtomIdx(i as u32), Point3::new(-p.y, p.x, p.z));
}
let mut ens = ConformerEnsemble::with_conformer(mol, base).unwrap();
ens.add_conformer(rotated).unwrap();
let rmsd = ens.conformer_rmsd(0, 1).unwrap();
assert!(rmsd < 1e-6, "pure-rotation Kabsch RMSD should be ~0, got {rmsd}");
}
#[test]
fn kabsch_rmsd_different_conformers_nonzero() {
let mol = parse("CCC").unwrap();
let c1 = generate_coords(&mol);
let n = mol.atom_count();
let mut c2 = Coords3D::new_zeroed(n);
for i in 0..n {
let p = c1.get(AtomIdx(i as u32));
c2.set(AtomIdx(i as u32), Point3::new(-p.x, p.y, p.z));
}
let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
ens.add_conformer(c2).unwrap();
let rmsd = ens.conformer_rmsd(0, 1).unwrap();
assert!(rmsd >= 0.0, "RMSD must be non-negative, got {rmsd}");
}
#[test]
fn kabsch_rmsd_out_of_range_returns_none() {
let ens = make_ensemble();
assert!(ens.conformer_rmsd(0, 99).is_none());
assert!(ens.conformer_rmsd(99, 0).is_none());
}
#[test]
fn rmsd_no_align_out_of_range_returns_none() {
let ens = make_ensemble();
assert!(ens.conformer_rmsd_no_align(0, 99).is_none());
}
#[test]
fn usr_descriptors_single_conformer() {
let ens = make_ensemble();
let d = ens.conformer_usr_descriptors(0);
assert!(d.is_some(), "valid index must return Some");
assert!(d.unwrap().iter().all(|v| v.is_finite()), "all USR values finite");
}
#[test]
fn usr_descriptors_out_of_range() {
let ens = make_ensemble();
assert!(ens.conformer_usr_descriptors(99).is_none());
}
#[test]
fn diversity_usr_single_conformer_is_zero() {
let ens = make_ensemble();
assert_eq!(ens.conformer_diversity_usr(), 0.0,
"single conformer → diversity 0");
}
#[test]
fn diversity_usr_identical_conformers_is_zero() {
use chematic_core::{Atom, Element, MoleculeBuilder};
use crate::coords::Point3;
let mut b = MoleculeBuilder::new();
let a0 = b.add_atom(Atom::new(Element::C));
let a1 = b.add_atom(Atom::new(Element::C));
let mol = b.build();
let mut c = Coords3D::new_zeroed(2);
c.set(a0, Point3::new(0.0, 0.0, 0.0));
c.set(a1, Point3::new(1.5, 0.0, 0.0));
let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
ens.add_conformer(c).unwrap();
let div = ens.conformer_diversity_usr();
assert!(div.abs() < 1e-9, "identical conformers → diversity ~0, got {div}");
}
#[test]
fn diversity_usr_different_shapes_positive() {
use crate::coords::Point3;
use chematic_core::{Atom, Element, MoleculeBuilder};
let mut b = MoleculeBuilder::new();
let a0 = b.add_atom(Atom::new(Element::C));
let a1 = b.add_atom(Atom::new(Element::C));
let a2 = b.add_atom(Atom::new(Element::C));
let mol = b.build();
let mut c1 = Coords3D::new_zeroed(3);
c1.set(a0, Point3::new(0.0, 0.0, 0.0));
c1.set(a1, Point3::new(1.0, 0.0, 0.0));
c1.set(a2, Point3::new(2.0, 0.0, 0.0));
let mut c2 = Coords3D::new_zeroed(3);
c2.set(a0, Point3::new(0.0, 0.0, 0.0));
c2.set(a1, Point3::new(0.0, 10.0, 0.0));
c2.set(a2, Point3::new(0.0, 0.0, 10.0));
let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
ens.add_conformer(c2).unwrap();
let div = ens.conformer_diversity_usr();
assert!(div > 0.0 && div <= 1.0, "diverse ensemble → diversity in (0,1], got {div}");
}
#[test]
fn cluster_empty_ensemble() {
let mol = parse("C").unwrap();
let ens = ConformerEnsemble::new(mol);
assert!(ens.cluster_conformers_by_rms(0.5).is_empty());
}
#[test]
fn cluster_single_conformer() {
let ens = make_ensemble();
assert_eq!(ens.cluster_conformers_by_rms(0.5), vec![0]);
}
#[test]
fn cluster_zero_threshold_keeps_all() {
let mol = parse("CCC").unwrap();
let c = generate_coords(&mol);
let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
ens.add_conformer(c.clone()).unwrap();
ens.add_conformer(c).unwrap();
let kept = ens.cluster_conformers_by_rms(0.0);
assert_eq!(kept, vec![0, 1, 2], "threshold ≤ 0 → keep all");
}
#[test]
fn cluster_negative_threshold_keeps_all() {
let mol = parse("CCC").unwrap();
let c = generate_coords(&mol);
let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
ens.add_conformer(c).unwrap();
assert_eq!(ens.cluster_conformers_by_rms(-1.0), vec![0, 1]);
}
#[test]
fn cluster_identical_conformers_keeps_first() {
let mol = parse("CCC").unwrap();
let c = generate_coords(&mol);
let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
ens.add_conformer(c.clone()).unwrap();
ens.add_conformer(c).unwrap();
let kept = ens.cluster_conformers_by_rms(0.01);
assert_eq!(kept, vec![0], "three identical conformers → keep only first");
}
#[test]
fn cluster_distinct_conformers_keeps_all() {
let mol = parse("CCC").unwrap();
let n = mol.atom_count();
let mut ca = Coords3D::new_zeroed(n);
let mut cb = Coords3D::new_zeroed(n);
for i in 0..n {
ca.set(chematic_core::AtomIdx(i as u32), Point3 { x: i as f64, y: 0.0, z: 0.0 });
cb.set(chematic_core::AtomIdx(i as u32), Point3 { x: 0.0, y: i as f64 * 100.0, z: 0.0 });
}
let mut ens = ConformerEnsemble::with_conformer(mol, ca).unwrap();
ens.add_conformer(cb).unwrap();
let kept = ens.cluster_conformers_by_rms(0.5);
assert_eq!(kept, vec![0, 1], "two distinct conformers → both kept");
}
#[test]
fn cluster_ascending_order() {
let mol = parse("CCC").unwrap();
let c0 = generate_coords(&mol);
let mut ens = ConformerEnsemble::with_conformer(mol, c0.clone()).unwrap();
ens.add_conformer(c0).unwrap(); let n = ens.mol().atom_count();
let mut far = Coords3D::new_zeroed(n);
for i in 0..n {
far.set(chematic_core::AtomIdx(i as u32), Point3 { x: 0.0, y: i as f64 * 100.0, z: 0.0 });
}
ens.add_conformer(far).unwrap(); let kept = ens.cluster_conformers_by_rms(0.1);
for w in kept.windows(2) {
assert!(w[0] < w[1], "kept indices not ascending");
}
}
}