1use std::fmt;
4
5use chematic_core::{AtomIdx, Molecule};
6
7use crate::coords::Coords3D;
8use crate::shape_descriptors::jacobi3;
9
10#[derive(Debug, PartialEq)]
15pub enum ConformerError {
16 AtomCountMismatch { expected: usize, got: usize },
17}
18
19impl fmt::Display for ConformerError {
20 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21 match self {
22 ConformerError::AtomCountMismatch { expected, got } => {
23 write!(f, "conformer has {got} atoms but molecule has {expected}")
24 }
25 }
26 }
27}
28
29impl std::error::Error for ConformerError {}
30
31pub struct ConformerEnsemble {
40 mol: Molecule,
41 conformers: Vec<Coords3D>,
42}
43
44impl ConformerEnsemble {
45 pub fn new(mol: Molecule) -> Self {
47 Self {
48 mol,
49 conformers: Vec::new(),
50 }
51 }
52
53 pub fn with_conformer(mol: Molecule, coords: Coords3D) -> Result<Self, ConformerError> {
57 let expected = mol.atom_count();
58 let got = coords.atom_count();
59 if got != expected {
60 return Err(ConformerError::AtomCountMismatch { expected, got });
61 }
62 Ok(Self {
63 mol,
64 conformers: vec![coords],
65 })
66 }
67
68 pub fn mol(&self) -> &Molecule {
70 &self.mol
71 }
72
73 pub fn conformer_count(&self) -> usize {
75 self.conformers.len()
76 }
77
78 pub fn add_conformer(&mut self, coords: Coords3D) -> Result<usize, ConformerError> {
83 let expected = self.mol.atom_count();
84 let got = coords.atom_count();
85 if got != expected {
86 return Err(ConformerError::AtomCountMismatch { expected, got });
87 }
88 let idx = self.conformers.len();
89 self.conformers.push(coords);
90 Ok(idx)
91 }
92
93 pub fn get_conformer(&self, idx: usize) -> Option<&Coords3D> {
95 self.conformers.get(idx)
96 }
97
98 pub fn get_conformer_mut(&mut self, idx: usize) -> Option<&mut Coords3D> {
100 self.conformers.get_mut(idx)
101 }
102
103 pub fn remove_conformer(&mut self, idx: usize) -> Option<Coords3D> {
108 if idx < self.conformers.len() {
109 Some(self.conformers.remove(idx))
110 } else {
111 None
112 }
113 }
114
115 pub fn conformer_rmsd_no_align(&self, a: usize, b: usize) -> Option<f64> {
119 let ca = self.conformers.get(a)?;
120 let cb = self.conformers.get(b)?;
121 let n = self.mol.atom_count();
122 if n == 0 {
123 return Some(0.0);
124 }
125 let sum_sq: f64 = (0..n)
126 .map(|i| {
127 let idx = AtomIdx(i as u32);
128 let pa = ca.get(idx);
129 let pb = cb.get(idx);
130 let dx = pa.x - pb.x;
131 let dy = pa.y - pb.y;
132 let dz = pa.z - pb.z;
133 dx * dx + dy * dy + dz * dz
134 })
135 .sum();
136 Some((sum_sq / n as f64).sqrt())
137 }
138
139 pub fn conformer_rmsd(&self, a: usize, b: usize) -> Option<f64> {
145 let ca = self.conformers.get(a)?;
146 let cb = self.conformers.get(b)?;
147 let n = self.mol.atom_count();
148 Some(kabsch_rmsd(ca, cb, n))
149 }
150
151 pub fn conformer_usr_descriptors(&self, idx: usize) -> Option<[f64; 12]> {
155 let c = self.conformers.get(idx)?;
156 let pts: Vec<[f64; 3]> = c.points.iter().map(|p| [p.x, p.y, p.z]).collect();
157 Some(crate::usr::usr_descriptors(&pts))
158 }
159
160 pub fn cluster_conformers_by_rms(&self, rms_threshold: f64) -> Vec<usize> {
181 let n = self.conformers.len();
182 if n == 0 {
183 return vec![];
184 }
185 if rms_threshold <= 0.0 {
186 return (0..n).collect();
187 }
188 let mut leaders: Vec<usize> = Vec::new();
189 'outer: for i in 0..n {
190 for &leader in &leaders {
191 let rmsd = self.conformer_rmsd(i, leader).unwrap_or(f64::INFINITY);
192 if rmsd < rms_threshold {
193 continue 'outer; }
195 }
196 leaders.push(i); }
198 leaders
199 }
200
201 pub fn conformer_diversity_usr(&self) -> f64 {
207 let n = self.conformers.len();
208 if n < 2 {
209 return 0.0;
210 }
211 let descs: Vec<[f64; 12]> = (0..n)
212 .filter_map(|i| self.conformer_usr_descriptors(i))
213 .collect();
214 let mut total = 0.0;
215 let mut count = 0usize;
216 for i in 0..descs.len() {
217 for j in i + 1..descs.len() {
218 total += 1.0 - crate::usr::usr_similarity(&descs[i], &descs[j]);
219 count += 1;
220 }
221 }
222 if count == 0 { 0.0 } else { total / count as f64 }
223 }
224}
225
226fn kabsch_rmsd(coords_a: &Coords3D, coords_b: &Coords3D, n: usize) -> f64 {
231 if n == 0 {
232 return 0.0;
233 }
234
235 let nf = n as f64;
236
237 let mut ca = [0.0f64; 3];
239 let mut cb = [0.0f64; 3];
240 for i in 0..n {
241 let idx = AtomIdx(i as u32);
242 let pa = coords_a.get(idx);
243 let pb = coords_b.get(idx);
244 ca[0] += pa.x;
245 ca[1] += pa.y;
246 ca[2] += pa.z;
247 cb[0] += pb.x;
248 cb[1] += pb.y;
249 cb[2] += pb.z;
250 }
251 for k in 0..3 {
252 ca[k] /= nf;
253 cb[k] /= nf;
254 }
255
256 let mut p = vec![[0.0f64; 3]; n];
258 let mut q = vec![[0.0f64; 3]; n];
259 for i in 0..n {
260 let idx = AtomIdx(i as u32);
261 let pa = coords_a.get(idx);
262 let pb = coords_b.get(idx);
263 p[i] = [pa.x - ca[0], pa.y - ca[1], pa.z - ca[2]];
264 q[i] = [pb.x - cb[0], pb.y - cb[1], pb.z - cb[2]];
265 }
266
267 let mut h = [[0.0f64; 3]; 3];
269 for i in 0..n {
270 for r in 0..3 {
271 for c in 0..3 {
272 h[r][c] += p[i][r] * q[i][c];
273 }
274 }
275 }
276
277 let mut hth = [[0.0f64; 3]; 3];
279 for r in 0..3 {
280 for c in 0..3 {
281 for k in 0..3 {
282 hth[r][c] += h[k][r] * h[k][c];
283 }
284 }
285 }
286
287 let (evals, v) = jacobi3(hth);
290
291 let mut hv = [[0.0f64; 3]; 3];
293 for r in 0..3 {
294 for c in 0..3 {
295 for k in 0..3 {
296 hv[r][c] += h[r][k] * v[k][c];
297 }
298 }
299 }
300 let mut u = [[0.0f64; 3]; 3];
301 for j in 0..3 {
302 let sigma = evals[j].max(0.0).sqrt();
303 for r in 0..3 {
304 u[r][j] = if sigma > 1e-10 { hv[r][j] / sigma } else { 0.0 };
305 }
306 }
307
308 let mut r_mat = [[0.0f64; 3]; 3];
310 for r in 0..3 {
311 for c in 0..3 {
312 for k in 0..3 {
313 r_mat[r][c] += u[r][k] * v[c][k];
314 }
315 }
316 }
317
318 let det = det3(r_mat);
320 let mut v_final = v;
321 if det < 0.0 {
322 for r in 0..3 {
323 v_final[r][0] *= -1.0;
324 }
325 r_mat = [[0.0f64; 3]; 3];
327 for r in 0..3 {
328 for c in 0..3 {
329 for k in 0..3 {
330 r_mat[r][c] += u[r][k] * v_final[c][k];
331 }
332 }
333 }
334 }
335
336 let mut sum_sq = 0.0f64;
338 for i in 0..n {
339 for row in 0..3 {
340 let rotated =
341 r_mat[row][0] * q[i][0] + r_mat[row][1] * q[i][1] + r_mat[row][2] * q[i][2];
342 let diff = p[i][row] - rotated;
343 sum_sq += diff * diff;
344 }
345 }
346 (sum_sq / nf).sqrt()
347}
348
349fn det3(m: [[f64; 3]; 3]) -> f64 {
350 m[0][0] * (m[1][1] * m[2][2] - m[1][2] * m[2][1])
351 - m[0][1] * (m[1][0] * m[2][2] - m[1][2] * m[2][0])
352 + m[0][2] * (m[1][0] * m[2][1] - m[1][1] * m[2][0])
353}
354
355#[cfg(test)]
360mod tests {
361 use super::*;
362 use chematic_smiles::parse;
363
364 use crate::{coords::Point3, dg::generate_coords};
365
366 fn make_ensemble() -> ConformerEnsemble {
367 let mol = parse("CCC").unwrap();
368 let c = generate_coords(&mol);
369 ConformerEnsemble::with_conformer(mol, c).unwrap()
370 }
371
372 #[test]
375 fn new_has_zero_conformers() {
376 let mol = parse("C").unwrap();
377 let ens = ConformerEnsemble::new(mol);
378 assert_eq!(ens.conformer_count(), 0);
379 }
380
381 #[test]
382 fn with_conformer_has_one() {
383 let ens = make_ensemble();
384 assert_eq!(ens.conformer_count(), 1);
385 }
386
387 #[test]
388 fn add_conformer_increments_count() {
389 let mol = parse("CC").unwrap();
390 let c1 = generate_coords(&mol);
391 let c2 = generate_coords(&mol);
392 let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
393 let idx = ens.add_conformer(c2).unwrap();
394 assert_eq!(idx, 1);
395 assert_eq!(ens.conformer_count(), 2);
396 }
397
398 #[test]
399 fn add_conformer_wrong_atom_count_errors() {
400 let mol = parse("CC").unwrap();
401 let wrong = Coords3D::new_zeroed(5);
402 let mut ens = ConformerEnsemble::new(mol);
403 let err = ens.add_conformer(wrong).unwrap_err();
404 assert!(matches!(
405 err,
406 ConformerError::AtomCountMismatch {
407 expected: 2,
408 got: 5
409 }
410 ));
411 }
412
413 #[test]
414 fn get_conformer_out_of_range_returns_none() {
415 let ens = make_ensemble();
416 assert!(ens.get_conformer(99).is_none());
417 }
418
419 #[test]
422 fn remove_conformer_decrements_count() {
423 let mut ens = make_ensemble();
424 let removed = ens.remove_conformer(0);
425 assert!(removed.is_some());
426 assert_eq!(ens.conformer_count(), 0);
427 }
428
429 #[test]
430 fn remove_conformer_shifts_indices() {
431 let mol = parse("C").unwrap();
432 let n = mol.atom_count();
433 let mut ens = ConformerEnsemble::new(mol);
434
435 for x in [1.0f64, 2.0, 3.0] {
437 let mut c = Coords3D::new_zeroed(n);
438 c.set(AtomIdx(0), Point3::new(x, 0.0, 0.0));
439 ens.add_conformer(c).unwrap();
440 }
441
442 ens.remove_conformer(0).unwrap();
444 assert_eq!(ens.conformer_count(), 2);
445 assert!((ens.get_conformer(0).unwrap().get(AtomIdx(0)).x - 2.0).abs() < 1e-10);
446 }
447
448 #[test]
449 fn remove_conformer_out_of_range_returns_none() {
450 let mut ens = make_ensemble();
451 assert!(ens.remove_conformer(99).is_none());
452 }
453
454 #[test]
457 fn rmsd_no_align_same_conformer_is_zero() {
458 let ens = make_ensemble();
459 let rmsd = ens.conformer_rmsd_no_align(0, 0).unwrap();
460 assert!(rmsd.abs() < 1e-10, "self-RMSD should be 0, got {rmsd}");
461 }
462
463 #[test]
464 fn rmsd_no_align_translated_is_nonzero() {
465 let mol = parse("CC").unwrap();
466 let n = mol.atom_count();
467 let mut c1 = Coords3D::new_zeroed(n);
468 let mut c2 = Coords3D::new_zeroed(n);
469 for i in 0..n {
470 c1.set(AtomIdx(i as u32), Point3::new(i as f64, 0.0, 0.0));
471 c2.set(AtomIdx(i as u32), Point3::new(i as f64 + 10.0, 0.0, 0.0));
472 }
473 let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
474 ens.add_conformer(c2).unwrap();
475 let rmsd = ens.conformer_rmsd_no_align(0, 1).unwrap();
476 assert!(
477 rmsd > 0.0,
478 "translated conformers should have non-zero RMSD"
479 );
480 }
481
482 #[test]
483 fn kabsch_rmsd_same_conformer_is_zero() {
484 let ens = make_ensemble();
485 let rmsd = ens.conformer_rmsd(0, 0).unwrap();
486 assert!(
487 rmsd.abs() < 1e-8,
488 "Kabsch self-RMSD should be 0, got {rmsd}"
489 );
490 }
491
492 #[test]
493 fn kabsch_rmsd_pure_translation_is_zero() {
494 let mol = parse("CCC").unwrap();
496 let n = mol.atom_count();
497 let base = generate_coords(&mol);
498 let mut shifted = Coords3D::new_zeroed(n);
499 let offset = 5.0;
500 for i in 0..n {
501 let p = base.get(AtomIdx(i as u32));
502 shifted.set(
503 AtomIdx(i as u32),
504 Point3::new(p.x + offset, p.y + offset, p.z + offset),
505 );
506 }
507 let mut ens = ConformerEnsemble::with_conformer(mol, base).unwrap();
508 ens.add_conformer(shifted).unwrap();
509 let rmsd = ens.conformer_rmsd(0, 1).unwrap();
510 assert!(
511 rmsd < 1e-6,
512 "pure-translation Kabsch RMSD should be ~0, got {rmsd}"
513 );
514 }
515
516 #[test]
517 fn kabsch_rmsd_pure_rotation_is_zero() {
518 let mol = parse("CCC").unwrap();
520 let n = mol.atom_count();
521 let base = generate_coords(&mol);
522 let mut rotated = Coords3D::new_zeroed(n);
524 for i in 0..n {
525 let p = base.get(AtomIdx(i as u32));
526 rotated.set(AtomIdx(i as u32), Point3::new(-p.y, p.x, p.z));
527 }
528 let mut ens = ConformerEnsemble::with_conformer(mol, base).unwrap();
529 ens.add_conformer(rotated).unwrap();
530 let rmsd = ens.conformer_rmsd(0, 1).unwrap();
531 assert!(rmsd < 1e-6, "pure-rotation Kabsch RMSD should be ~0, got {rmsd}");
532 }
533
534 #[test]
535 fn kabsch_rmsd_different_conformers_nonzero() {
536 let mol = parse("CCC").unwrap();
537 let c1 = generate_coords(&mol);
538 let n = mol.atom_count();
539 let mut c2 = Coords3D::new_zeroed(n);
541 for i in 0..n {
542 let p = c1.get(AtomIdx(i as u32));
543 c2.set(AtomIdx(i as u32), Point3::new(-p.x, p.y, p.z));
544 }
545 let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
546 ens.add_conformer(c2).unwrap();
547 let rmsd = ens.conformer_rmsd(0, 1).unwrap();
548 assert!(rmsd >= 0.0, "RMSD must be non-negative, got {rmsd}");
551 }
552
553 #[test]
554 fn kabsch_rmsd_out_of_range_returns_none() {
555 let ens = make_ensemble();
556 assert!(ens.conformer_rmsd(0, 99).is_none());
557 assert!(ens.conformer_rmsd(99, 0).is_none());
558 }
559
560 #[test]
561 fn rmsd_no_align_out_of_range_returns_none() {
562 let ens = make_ensemble();
563 assert!(ens.conformer_rmsd_no_align(0, 99).is_none());
564 }
565
566 #[test]
569 fn usr_descriptors_single_conformer() {
570 let ens = make_ensemble();
571 let d = ens.conformer_usr_descriptors(0);
572 assert!(d.is_some(), "valid index must return Some");
573 assert!(d.unwrap().iter().all(|v| v.is_finite()), "all USR values finite");
574 }
575
576 #[test]
577 fn usr_descriptors_out_of_range() {
578 let ens = make_ensemble();
579 assert!(ens.conformer_usr_descriptors(99).is_none());
580 }
581
582 #[test]
583 fn diversity_usr_single_conformer_is_zero() {
584 let ens = make_ensemble();
585 assert_eq!(ens.conformer_diversity_usr(), 0.0,
586 "single conformer → diversity 0");
587 }
588
589 #[test]
590 fn diversity_usr_identical_conformers_is_zero() {
591 use chematic_core::{Atom, Element, MoleculeBuilder};
592 use crate::coords::Point3;
593
594 let mut b = MoleculeBuilder::new();
595 let a0 = b.add_atom(Atom::new(Element::C));
596 let a1 = b.add_atom(Atom::new(Element::C));
597 let mol = b.build();
598
599 let mut c = Coords3D::new_zeroed(2);
600 c.set(a0, Point3::new(0.0, 0.0, 0.0));
601 c.set(a1, Point3::new(1.5, 0.0, 0.0));
602
603 let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
604 ens.add_conformer(c).unwrap();
605
606 let div = ens.conformer_diversity_usr();
607 assert!(div.abs() < 1e-9, "identical conformers → diversity ~0, got {div}");
608 }
609
610 #[test]
611 fn diversity_usr_different_shapes_positive() {
612 use crate::coords::Point3;
613 use chematic_core::{Atom, Element, MoleculeBuilder};
614
615 let mut b = MoleculeBuilder::new();
617 let a0 = b.add_atom(Atom::new(Element::C));
618 let a1 = b.add_atom(Atom::new(Element::C));
619 let a2 = b.add_atom(Atom::new(Element::C));
620 let mol = b.build();
621
622 let mut c1 = Coords3D::new_zeroed(3);
623 c1.set(a0, Point3::new(0.0, 0.0, 0.0));
624 c1.set(a1, Point3::new(1.0, 0.0, 0.0));
625 c1.set(a2, Point3::new(2.0, 0.0, 0.0));
626
627 let mut c2 = Coords3D::new_zeroed(3);
628 c2.set(a0, Point3::new(0.0, 0.0, 0.0));
629 c2.set(a1, Point3::new(0.0, 10.0, 0.0));
630 c2.set(a2, Point3::new(0.0, 0.0, 10.0));
631
632 let mut ens = ConformerEnsemble::with_conformer(mol, c1).unwrap();
633 ens.add_conformer(c2).unwrap();
634
635 let div = ens.conformer_diversity_usr();
636 assert!(div > 0.0 && div <= 1.0, "diverse ensemble → diversity in (0,1], got {div}");
637 }
638
639 #[test]
642 fn cluster_empty_ensemble() {
643 let mol = parse("C").unwrap();
644 let ens = ConformerEnsemble::new(mol);
645 assert!(ens.cluster_conformers_by_rms(0.5).is_empty());
646 }
647
648 #[test]
649 fn cluster_single_conformer() {
650 let ens = make_ensemble();
651 assert_eq!(ens.cluster_conformers_by_rms(0.5), vec![0]);
652 }
653
654 #[test]
655 fn cluster_zero_threshold_keeps_all() {
656 let mol = parse("CCC").unwrap();
657 let c = generate_coords(&mol);
658 let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
659 ens.add_conformer(c.clone()).unwrap();
660 ens.add_conformer(c).unwrap();
661 let kept = ens.cluster_conformers_by_rms(0.0);
662 assert_eq!(kept, vec![0, 1, 2], "threshold ≤ 0 → keep all");
663 }
664
665 #[test]
666 fn cluster_negative_threshold_keeps_all() {
667 let mol = parse("CCC").unwrap();
668 let c = generate_coords(&mol);
669 let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
670 ens.add_conformer(c).unwrap();
671 assert_eq!(ens.cluster_conformers_by_rms(-1.0), vec![0, 1]);
672 }
673
674 #[test]
675 fn cluster_identical_conformers_keeps_first() {
676 let mol = parse("CCC").unwrap();
677 let c = generate_coords(&mol);
678 let mut ens = ConformerEnsemble::with_conformer(mol, c.clone()).unwrap();
679 ens.add_conformer(c.clone()).unwrap();
680 ens.add_conformer(c).unwrap();
681 let kept = ens.cluster_conformers_by_rms(0.01);
682 assert_eq!(kept, vec![0], "three identical conformers → keep only first");
683 }
684
685 #[test]
686 fn cluster_distinct_conformers_keeps_all() {
687 let mol = parse("CCC").unwrap();
689 let n = mol.atom_count();
690 let mut ca = Coords3D::new_zeroed(n);
691 let mut cb = Coords3D::new_zeroed(n);
692 for i in 0..n {
693 ca.set(chematic_core::AtomIdx(i as u32), Point3 { x: i as f64, y: 0.0, z: 0.0 });
694 cb.set(chematic_core::AtomIdx(i as u32), Point3 { x: 0.0, y: i as f64 * 100.0, z: 0.0 });
695 }
696 let mut ens = ConformerEnsemble::with_conformer(mol, ca).unwrap();
697 ens.add_conformer(cb).unwrap();
698 let kept = ens.cluster_conformers_by_rms(0.5);
699 assert_eq!(kept, vec![0, 1], "two distinct conformers → both kept");
700 }
701
702 #[test]
703 fn cluster_ascending_order() {
704 let mol = parse("CCC").unwrap();
706 let c0 = generate_coords(&mol);
707 let mut ens = ConformerEnsemble::with_conformer(mol, c0.clone()).unwrap();
708 ens.add_conformer(c0).unwrap(); let n = ens.mol().atom_count();
710 let mut far = Coords3D::new_zeroed(n);
711 for i in 0..n {
712 far.set(chematic_core::AtomIdx(i as u32), Point3 { x: 0.0, y: i as f64 * 100.0, z: 0.0 });
713 }
714 ens.add_conformer(far).unwrap(); let kept = ens.cluster_conformers_by_rms(0.1);
716 for w in kept.windows(2) {
717 assert!(w[0] < w[1], "kept indices not ascending");
718 }
719 }
720}