use std::collections::{HashSet, VecDeque};
use chematic_core::{Atom, AtomIdx, BondOrder, Molecule, implicit_hcount};
use crate::bitvec::BitVec2048;
use crate::ecfp::fnv1a;
fn identify_functional_groups(mol: &Molecule) -> Vec<Vec<usize>> {
let n = mol.atom_count();
if n == 0 {
return Vec::new();
}
let mut is_hetero = vec![false; n];
for (idx, atom) in mol.atoms() {
let an = atom.element.atomic_number();
if an != 1 && an != 6 {
is_hetero[idx.0 as usize] = true;
}
}
let hetero_idxs: Vec<usize> = (0..n).filter(|&i| is_hetero[i]).collect();
if hetero_idxs.is_empty() {
return Vec::new();
}
let mut parent: Vec<usize> = (0..n).collect();
for &hi in &hetero_idxs {
for (nb, _) in mol.neighbors(AtomIdx(hi as u32)) {
let nbi = nb.0 as usize;
if is_hetero[nbi] {
uf_union(&mut parent, hi, nbi);
}
}
}
for ci in 0..n {
if mol.atom(AtomIdx(ci as u32)).element.atomic_number() != 6 {
continue;
}
let hetero_nbs: Vec<usize> = mol.neighbors(AtomIdx(ci as u32))
.filter(|(nb, _)| is_hetero[nb.0 as usize])
.map(|(nb, _)| nb.0 as usize)
.collect();
if hetero_nbs.len() >= 2 {
for &hi in &hetero_nbs[1..] {
uf_union(&mut parent, hetero_nbs[0], hi);
}
}
}
let mut cluster_map: std::collections::HashMap<usize, Vec<usize>> =
std::collections::HashMap::new();
for &hi in &hetero_idxs {
let root = uf_find(&mut parent, hi);
cluster_map.entry(root).or_default().push(hi);
}
cluster_map.into_values().map(|mut atoms| {
let hetero_set: std::collections::HashSet<usize> = atoms.iter().copied().collect();
let snapshot = atoms.clone();
for &ai in &snapshot {
for (nb, _) in mol.neighbors(AtomIdx(ai as u32)) {
let nbi = nb.0 as usize;
let nb_an = mol.atom(nb).element.atomic_number();
if nb_an == 6 && !hetero_set.contains(&nbi) {
atoms.push(nbi);
}
}
}
atoms.sort_unstable();
atoms.dedup();
atoms
}).collect()
}
fn uf_find(parent: &mut Vec<usize>, mut x: usize) -> usize {
while parent[x] != x {
parent[x] = parent[parent[x]]; x = parent[x];
}
x
}
fn uf_union(parent: &mut Vec<usize>, a: usize, b: usize) {
let ra = uf_find(parent, a);
let rb = uf_find(parent, b);
if ra != rb {
parent[rb] = ra;
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct ErgNodeType(pub u8);
impl ErgNodeType {
const AROMATIC: u8 = 1;
const DONOR: u8 = 2;
const ACCEPTOR: u8 = 4;
const HYDROPHOBIC: u8 = 8;
const POSITIVE: u8 = 16;
const NEGATIVE: u8 = 32;
pub fn new() -> Self {
ErgNodeType(0)
}
}
impl Default for ErgNodeType {
fn default() -> Self {
Self::new()
}
}
impl ErgNodeType {
pub fn with_aromatic(mut self) -> Self {
self.0 |= Self::AROMATIC;
self
}
pub fn with_donor(mut self) -> Self {
self.0 |= Self::DONOR;
self
}
pub fn with_acceptor(mut self) -> Self {
self.0 |= Self::ACCEPTOR;
self
}
pub fn with_hydrophobic(mut self) -> Self {
self.0 |= Self::HYDROPHOBIC;
self
}
pub fn with_positive(mut self) -> Self {
self.0 |= Self::POSITIVE;
self
}
pub fn with_negative(mut self) -> Self {
self.0 |= Self::NEGATIVE;
self
}
}
#[derive(Clone, Debug)]
pub struct ErgNode {
pub ntype: ErgNodeType,
pub atom_indices: Vec<usize>,
}
#[derive(Clone, Debug)]
pub struct ErgEdge {
pub node_a: usize,
pub node_b: usize,
pub linker_len: u32,
}
fn is_amide_like_nitrogen(mol: &Molecule, n_idx: AtomIdx) -> bool {
mol.neighbors(n_idx).any(|(nb, _)| {
let nb_an = mol.atom(nb).element.atomic_number();
match nb_an {
6 => mol.neighbors(nb).any(|(c_nb, bond_idx)| {
mol.atom(c_nb).element.atomic_number() == 8
&& mol.bond(bond_idx).order == BondOrder::Double
}),
16 => mol.neighbors(nb).any(|(s_nb, bond_idx)| {
mol.atom(s_nb).element.atomic_number() == 8
&& mol.bond(bond_idx).order == BondOrder::Double
}),
_ => false,
}
})
}
fn assign_pharmacophore_features(mol: &Molecule, atom_indices: &[usize]) -> ErgNodeType {
let mut ntype = ErgNodeType::new();
for &i in atom_indices {
let idx = AtomIdx(i as u32);
let atom = mol.atom(idx);
let an = atom.element.atomic_number();
if atom.aromatic {
ntype = ntype.with_aromatic();
}
let explicit_h: usize = mol.neighbors(idx)
.filter(|(nb, _)| mol.atom(*nb).element.atomic_number() == 1)
.count();
let impl_h = implicit_hcount(mol, idx) as usize;
let total_h = explicit_h + impl_h;
if (an == 8 || an == 16) && total_h > 0 {
ntype = ntype.with_donor();
} else if an == 7 && total_h > 0 && !is_amide_like_nitrogen(mol, idx) {
ntype = ntype.with_donor();
}
match an {
7 if !atom.aromatic => ntype = ntype.with_acceptor(),
7 if atom.aromatic && total_h == 0 => ntype = ntype.with_acceptor(),
8 | 9 => ntype = ntype.with_acceptor(),
_ => {}
}
if atom.charge > 0 { ntype = ntype.with_positive(); }
if atom.charge < 0 { ntype = ntype.with_negative(); }
}
let all_c_or_h = atom_indices.iter().all(|&i| {
let an = mol.atom(AtomIdx(i as u32)).element.atomic_number();
an == 6 || an == 1
});
if all_c_or_h && ntype.0 == 0 {
ntype = ntype.with_hydrophobic();
}
ntype
}
fn build_reduced_graph(mol: &Molecule) -> (Vec<ErgNode>, Vec<ErgEdge>) {
let fg_groups = identify_functional_groups(mol);
let mut nodes: Vec<ErgNode> = fg_groups
.into_iter()
.map(|atom_indices| {
let ntype = assign_pharmacophore_features(mol, &atom_indices);
ErgNode { ntype, atom_indices }
})
.collect();
if nodes.is_empty() {
let all_atoms: Vec<usize> = (0..mol.atom_count()).collect();
let ntype = assign_pharmacophore_features(mol, &all_atoms);
nodes.push(ErgNode { ntype, atom_indices: all_atoms });
}
const MAX_ERG_NODES: usize = 128;
if nodes.len() > MAX_ERG_NODES {
nodes.truncate(MAX_ERG_NODES);
}
let mut edges = Vec::new();
let fg_set: HashSet<usize> = nodes.iter().flat_map(|n| n.atom_indices.clone()).collect();
for i in 0..nodes.len() {
for j in (i + 1)..nodes.len() {
let linker_len = shortest_path_linker(mol, &nodes[i], &nodes[j], &fg_set);
edges.push(ErgEdge {
node_a: i,
node_b: j,
linker_len,
});
}
}
(nodes, edges)
}
fn shortest_path_linker(
mol: &Molecule,
node_a: &ErgNode,
node_b: &ErgNode,
fg_set: &HashSet<usize>,
) -> u32 {
let mut dist = vec![u32::MAX; mol.atom_count()];
let mut queue = VecDeque::new();
for &i in &node_a.atom_indices {
dist[i] = 0;
queue.push_back(i);
}
while let Some(cur) = queue.pop_front() {
for (nb, _) in mol.neighbors(AtomIdx(cur as u32)) {
let nbi = nb.0 as usize;
if node_b.atom_indices.contains(&nbi) {
return dist[cur] + 1;
}
if dist[nbi] == u32::MAX {
let new_dist = dist[cur] + if fg_set.contains(&nbi) { 0 } else { 1 };
dist[nbi] = new_dist;
queue.push_back(nbi);
}
}
}
u32::MAX
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ErgAtomType {
CAliphatic = 0,
CAromatic = 1,
N = 2,
O = 3,
S = 4,
Halogen = 5,
Other = 6,
}
impl ErgAtomType {
pub fn from_atom(atom: &Atom) -> Self {
let an = atom.element.atomic_number();
let aromatic = atom.aromatic;
match an {
6 => {
if aromatic {
ErgAtomType::CAromatic
} else {
ErgAtomType::CAliphatic
}
}
7 => ErgAtomType::N,
8 => ErgAtomType::O,
16 => ErgAtomType::S,
9 | 17 | 35 | 53 => ErgAtomType::Halogen,
_ => ErgAtomType::Other,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ErgBondType {
Single = 0,
Double = 1,
Triple = 2,
Aromatic = 3,
}
impl ErgBondType {
pub fn from_bond(order: BondOrder) -> Self {
match order {
BondOrder::Single => ErgBondType::Single,
BondOrder::Double => ErgBondType::Double,
BondOrder::Triple => ErgBondType::Triple,
BondOrder::Aromatic => ErgBondType::Aromatic,
_ => ErgBondType::Single,
}
}
}
#[derive(Clone, Debug)]
pub struct ErgConfig {
pub use_atom_counts: bool,
pub use_bond_types: bool,
}
impl Default for ErgConfig {
fn default() -> Self {
ErgConfig {
use_atom_counts: true,
use_bond_types: true,
}
}
}
#[derive(Clone, Debug)]
pub struct ErgFingerprint {
pub bits: BitVec2048,
pub atom_counts: [u32; 7],
pub bond_counts: [u32; 4],
}
impl ErgFingerprint {
pub fn tanimoto(&self, other: &ErgFingerprint) -> f64 {
self.bits.tanimoto(&other.bits)
}
}
pub fn erg(mol: &chematic_core::Molecule) -> ErgFingerprint {
erg_with_config(mol, &ErgConfig::default())
}
pub fn erg_with_config(
mol: &chematic_core::Molecule,
config: &ErgConfig,
) -> ErgFingerprint {
let mut bits = BitVec2048::new();
let mut atom_counts = [0u32; 7];
let mut bond_counts = [0u32; 4];
for (idx, atom) in mol.atoms() {
let erg_type = ErgAtomType::from_atom(atom);
atom_counts[erg_type as usize] += 1;
let bit_pos = (erg_type as usize) * 16;
if bit_pos < 2048 {
bits.set(bit_pos);
}
let degree = mol.neighbors(idx).count();
let degree_bits = (degree.min(4) << 2) + (erg_type as usize);
let degree_bit_pos = 512 + degree_bits.min(127);
if degree_bit_pos < 2048 {
bits.set(degree_bit_pos);
}
}
for (_, bond) in mol.bonds() {
let erg_type = ErgBondType::from_bond(bond.order);
bond_counts[erg_type as usize] += 1;
let bit_pos = 112 + (erg_type as usize) * 16;
if bit_pos < 2048 {
bits.set(bit_pos);
}
}
if config.use_atom_counts {
for (i, &count) in atom_counts.iter().enumerate() {
for j in 0..4 {
if ((count >> j) & 1) != 0 {
let bit_pos = 200 + i * 4 + j;
if bit_pos < 2048 {
bits.set(bit_pos);
}
}
}
}
}
if config.use_bond_types {
for (i, &count) in bond_counts.iter().enumerate() {
for j in 0..4 {
if ((count >> j) & 1) != 0 {
let bit_pos = 228 + i * 4 + j;
if bit_pos < 2048 {
bits.set(bit_pos);
}
}
}
}
}
let (nodes, edges) = build_reduced_graph(mol);
for edge in &edges {
if edge.linker_len == u32::MAX {
continue; }
let ta = nodes[edge.node_a].ntype.0;
let tb = nodes[edge.node_b].ntype.0;
let bin: u8 = match edge.linker_len {
0 => 0,
1..=2 => 1,
3..=5 => 2,
_ => 3,
};
let (t_lo, t_hi) = if ta <= tb { (ta, tb) } else { (tb, ta) };
let h = fnv1a(&[t_lo, t_hi, bin, 0xE7]) as usize;
bits.set(259 + h % (2048 - 259));
}
if nodes.iter().any(|n| n.ntype.0 & ErgNodeType::AROMATIC != 0) {
bits.set(256);
}
if nodes.iter().any(|n| n.ntype.0 != 0) {
bits.set(257);
}
if !nodes.iter().any(|n| n.ntype.0 & ErgNodeType::AROMATIC != 0)
&& atom_counts[ErgAtomType::CAliphatic as usize] > 0
{
bits.set(258);
}
ErgFingerprint {
bits,
atom_counts,
bond_counts,
}
}
pub fn erg_extended(mol: &chematic_core::Molecule) -> ErgFingerprint {
erg(mol)
}
pub fn tanimoto_erg(mol1: &chematic_core::Molecule, mol2: &chematic_core::Molecule) -> f64 {
let fp1 = erg(mol1);
let fp2 = erg(mol2);
fp1.tanimoto(&fp2)
}
pub const ERG_VEC_LEN: usize = 315;
const N_FEAT: usize = 6;
const N_BINS: usize = 15;
const FUZZ: f64 = 0.3;
const FEATURE_BITS: [(u8, usize); N_FEAT] = [
(ErgNodeType::AROMATIC, 0),
(ErgNodeType::DONOR, 1),
(ErgNodeType::ACCEPTOR, 2),
(ErgNodeType::POSITIVE, 3),
(ErgNodeType::NEGATIVE, 4),
(ErgNodeType::HYDROPHOBIC, 5),
];
#[inline]
fn pair_idx(a: usize, b: usize) -> usize {
let (lo, hi) = if a <= b { (a, b) } else { (b, a) };
lo * N_FEAT - lo * (lo.wrapping_sub(1)) / 2 + (hi - lo)
}
#[inline]
fn fuzz_add(vec: &mut [f64; ERG_VEC_LEN], base: usize, d: usize, weight: f64) {
vec[base + d] += weight;
if d > 0 { vec[base + d - 1] += weight * FUZZ; }
if d + 1 < N_BINS { vec[base + d + 1] += weight * FUZZ; }
}
pub fn erg_vec(mol: &Molecule) -> [f64; ERG_VEC_LEN] {
let mut vec = [0.0f64; ERG_VEC_LEN];
let (nodes, edges) = build_reduced_graph(mol);
for node in &nodes {
let ntype = node.ntype.0;
for &(bit, fi) in &FEATURE_BITS {
if ntype & bit == 0 { continue; }
let base = pair_idx(fi, fi) * N_BINS;
fuzz_add(&mut vec, base, 0, 1.0);
}
}
for edge in &edges {
let dist = edge.linker_len;
if dist == u32::MAX {
continue;
}
let bin = (dist as usize).min(N_BINS - 1);
let ntype_a = nodes[edge.node_a].ntype.0;
let ntype_b = nodes[edge.node_b].ntype.0;
for &(bit_a, fi) in &FEATURE_BITS {
if ntype_a & bit_a == 0 { continue; }
for &(bit_b, fj) in &FEATURE_BITS {
if ntype_b & bit_b == 0 { continue; }
let base = pair_idx(fi, fj) * N_BINS;
fuzz_add(&mut vec, base, bin, 1.0);
}
}
}
vec
}
pub fn cosine_erg_vec(v1: &[f64; ERG_VEC_LEN], v2: &[f64; ERG_VEC_LEN]) -> f64 {
let dot: f64 = v1.iter().zip(v2.iter()).map(|(a, b)| a * b).sum();
let n1: f64 = v1.iter().map(|x| x * x).sum::<f64>().sqrt();
let n2: f64 = v2.iter().map(|x| x * x).sum::<f64>().sqrt();
if n1 < 1e-12 || n2 < 1e-12 { return 0.0; }
(dot / (n1 * n2)).min(1.0)
}
pub fn tanimoto_erg_vec(v1: &[f64; ERG_VEC_LEN], v2: &[f64; ERG_VEC_LEN]) -> f64 {
let dot: f64 = v1.iter().zip(v2.iter()).map(|(a, b)| a * b).sum();
let n1: f64 = v1.iter().map(|x| x * x).sum();
let n2: f64 = v2.iter().map(|x| x * x).sum();
let denom = n1 + n2 - dot;
if denom < 1e-12 { return 1.0; }
(dot / denom).max(0.0).min(1.0)
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
#[test]
fn test_erg_simple() {
let mol = parse("CC").unwrap();
let fp = erg(&mol);
assert_eq!(fp.atom_counts[ErgAtomType::CAliphatic as usize], 2);
assert!(fp.bits.popcount() > 0);
}
#[test]
fn test_erg_identical() {
let mol = parse("CC").unwrap();
let fp1 = erg(&mol);
let fp2 = erg(&mol);
assert_eq!(fp1.bits.tanimoto(&fp2.bits), 1.0);
assert_eq!(fp1.atom_counts, fp2.atom_counts);
}
#[test]
fn test_erg_different_molecules() {
let mol1 = parse("CC").unwrap();
let mol2 = parse("c1ccccc1").unwrap();
let fp1 = erg(&mol1);
let fp2 = erg(&mol2);
assert!(fp1.atom_counts[ErgAtomType::CAromatic as usize] == 0);
assert!(fp2.atom_counts[ErgAtomType::CAromatic as usize] > 0);
}
#[test]
fn test_erg_symmetry() {
let mol1 = parse("CC").unwrap();
let mol2 = parse("c1ccccc1").unwrap();
let sim12 = tanimoto_erg(&mol1, &mol2);
let sim21 = tanimoto_erg(&mol2, &mol1);
assert!((sim12 - sim21).abs() < 1e-10);
}
#[test]
fn test_erg_heteroatom_detection() {
let mol = parse("CCO").unwrap();
let fp = erg(&mol);
assert!(fp.atom_counts[ErgAtomType::O as usize] > 0);
}
#[test]
fn test_erg_config() {
let mol = parse("CC").unwrap();
let config = ErgConfig {
use_atom_counts: false,
use_bond_types: true,
};
let fp = erg_with_config(&mol, &config);
assert!(fp.bits.popcount() > 0);
}
#[test]
fn test_erg_aromatic_vs_aliphatic() {
let aliphatic = parse("CCCC").unwrap();
let aromatic = parse("c1ccccc1").unwrap();
let fp_aliphatic = erg(&aliphatic);
let fp_aromatic = erg(&aromatic);
assert_eq!(fp_aliphatic.atom_counts[ErgAtomType::CAromatic as usize], 0);
assert!(fp_aromatic.atom_counts[ErgAtomType::CAromatic as usize] > 0);
}
#[test]
fn test_erg_bond_counting() {
let single_bond = parse("CC").unwrap();
let double_bond = parse("C=C").unwrap();
let fp_single = erg(&single_bond);
let fp_double = erg(&double_bond);
assert!(fp_single.bond_counts[ErgBondType::Single as usize] > 0);
assert!(fp_double.bond_counts[ErgBondType::Double as usize] > 0);
}
#[test]
fn test_erg_functional_group_aromatic_bit() {
let aliphatic = parse("CCCC").unwrap();
let aromatic = parse("c1ccccc1").unwrap();
let fp_aliphatic = erg(&aliphatic);
let fp_aromatic = erg(&aromatic);
assert!(!fp_aliphatic.bits.get(256), "aliphatic should not have aromatic bit");
assert!(fp_aromatic.bits.get(256), "aromatic should have aromatic bit");
}
#[test]
fn test_erg_functional_group_heteroatom_bit() {
let alkane = parse("CC").unwrap();
let alcohol = parse("CCO").unwrap();
let amine = parse("CCN").unwrap();
let fp_alkane = erg(&alkane);
let fp_alcohol = erg(&alcohol);
let fp_amine = erg(&amine);
assert!(fp_alkane.bits.get(257), "alkane gets HYDROPHOBIC → bit 257 set");
assert!(fp_alcohol.bits.get(257), "alcohol gets ACCEPTOR/DONOR → bit 257 set");
assert!(fp_amine.bits.get(257), "amine gets ACCEPTOR/DONOR → bit 257 set");
let topo_alkane: usize = (259..2048).filter(|&b| fp_alkane.bits.get(b)).count();
let topo_alcohol: usize = (259..2048).filter(|&b| fp_alcohol.bits.get(b)).count();
let topo_amine: usize = (259..2048).filter(|&b| fp_amine.bits.get(b)).count();
let _ = (topo_alkane, topo_alcohol, topo_amine);
assert!(fp_alkane.tanimoto(&fp_alcohol) < 1.0, "alkane vs alcohol should differ");
assert!(fp_alkane.tanimoto(&fp_amine) < 1.0, "alkane vs amine should differ");
}
#[test]
fn test_erg_functional_group_improved_discrimination() {
let methane = parse("C").unwrap();
let ethanol = parse("CCO").unwrap();
let pyridine = parse("c1ccncc1").unwrap();
let fp_methane = erg(&methane);
let fp_ethanol = erg(ðanol);
let fp_pyridine = erg(&pyridine);
let sim_methane_ethanol = fp_methane.tanimoto(&fp_ethanol);
let sim_methane_pyridine = fp_methane.tanimoto(&fp_pyridine);
let sim_ethanol_pyridine = fp_ethanol.tanimoto(&fp_pyridine);
assert!((0.0..=1.0).contains(&sim_methane_ethanol));
assert!((0.0..=1.0).contains(&sim_methane_pyridine));
assert!((0.0..=1.0).contains(&sim_ethanol_pyridine));
}
#[test]
fn test_erg_linker_distance_changes_fingerprint() {
let short = parse("NCC(=O)O").unwrap(); let long = parse("NCCCCCC(=O)O").unwrap();
let v_short = erg_vec(&short);
let v_long = erg_vec(&long);
assert_ne!(v_short, v_long,
"different linker lengths must produce different erg_vec entries");
let sim = tanimoto_erg_vec(&v_short, &v_long);
assert!(sim < 1.0,
"different linker lengths should give Tanimoto < 1.0, got {sim:.4}");
}
#[test]
fn test_erg_nh_donor() {
let aniline = parse("c1ccccc1N").unwrap();
let fp = erg(&aniline);
let benzene = parse("c1ccccc1").unwrap();
let fp_benz = erg(&benzene);
assert!(fp.tanimoto(&fp_benz) < 1.0, "aniline should differ from benzene");
assert!(fp.bits.popcount() > 0);
}
#[test]
fn test_erg_carbonyl_acceptor() {
let acetone = parse("CC(C)=O").unwrap();
let propane = parse("CCC").unwrap();
let fp_acetone = erg(&acetone);
let fp_propane = erg(&propane);
assert!(fp_acetone.tanimoto(&fp_propane) < 1.0,
"acetone vs propane should differ due to acceptor O");
}
#[test]
fn test_erg_carboxylate_negative() {
let acetate = parse("CC(=O)[O-]").unwrap();
let groups = identify_functional_groups(&acetate);
assert!(!groups.is_empty(), "acetate should have a functional group");
let ntype = assign_pharmacophore_features(&acetate, &groups[0]);
assert!(
ntype.0 & ErgNodeType::NEGATIVE != 0,
"acetate O- should produce NEGATIVE pharmacophore feature, got ntype={}", ntype.0
);
let acid = parse("CC(=O)O").unwrap();
let acid_groups = identify_functional_groups(&acid);
assert!(!acid_groups.is_empty());
let acid_ntype = assign_pharmacophore_features(&acid, &acid_groups[0]);
assert_eq!(
acid_ntype.0 & ErgNodeType::NEGATIVE, 0,
"acetic acid should NOT have NEGATIVE feature"
);
assert!(
acid_ntype.0 & ErgNodeType::DONOR != 0,
"acetic acid O-H should have DONOR feature"
);
}
#[test]
fn test_erg_hydrophobic_chain() {
let hexane = parse("CCCCCC").unwrap();
let fp = erg(&hexane);
assert!(fp.bits.get(257), "hexane should have a pharmacophore feature (HYDROPHOBIC)");
assert!(!fp.bits.get(256), "hexane should not have aromatic bit");
}
#[test]
fn test_erg_pyridine_vs_pyrrole_acceptor() {
let pyridine = parse("c1ccncc1").unwrap();
let pyrrole = parse("c1cc[nH]c1").unwrap();
let fp_pyr = erg(&pyridine);
let fp_rol = erg(&pyrrole);
assert!(fp_pyr.tanimoto(&fp_rol) < 1.0,
"pyridine (N acceptor) vs pyrrole (N-H donor) should differ");
}
#[test]
fn test_erg_adjacent_groups_bin0() {
let direct = parse("NO").unwrap(); let indirect = parse("NCCCO").unwrap();
let fp_direct = erg(&direct);
let fp_indirect = erg(&indirect);
assert!(fp_direct.tanimoto(&fp_indirect) < 1.0,
"adjacent vs. 3-linker groups should differ");
}
#[test]
fn test_erg_amide_n_is_acceptor_not_donor() {
use super::{assign_pharmacophore_features, identify_functional_groups};
let acetamide = parse("CC(=O)N").unwrap(); let groups = identify_functional_groups(&acetamide);
let n_group = groups.iter().find(|g| {
g.iter().any(|&i| acetamide.atom(AtomIdx(i as u32)).element.atomic_number() == 7)
}).expect("should find N-containing group");
let ntype = assign_pharmacophore_features(&acetamide, n_group);
assert!(ntype.0 & ErgNodeType::ACCEPTOR != 0,
"amide N should be ACCEPTOR (bits={:#010b})", ntype.0);
assert!(ntype.0 & ErgNodeType::DONOR == 0,
"amide N should NOT be DONOR (bits={:#010b})", ntype.0);
}
#[test]
fn test_erg_amine_n_is_donor_and_acceptor() {
use super::{assign_pharmacophore_features, identify_functional_groups};
let ethylamine = parse("CCN").unwrap(); let groups = identify_functional_groups(ðylamine);
let n_group = groups.iter().find(|g| {
g.iter().any(|&i| ethylamine.atom(AtomIdx(i as u32)).element.atomic_number() == 7)
}).expect("should find N-containing group");
let ntype = assign_pharmacophore_features(ðylamine, n_group);
assert!(ntype.0 & ErgNodeType::DONOR != 0,
"amine N should be DONOR (bits={:#010b})", ntype.0);
assert!(ntype.0 & ErgNodeType::ACCEPTOR != 0,
"amine N should be ACCEPTOR (bits={:#010b})", ntype.0);
}
#[test]
fn test_erg_vec_length() {
let mol = parse("CC").unwrap();
let v = erg_vec(&mol);
assert_eq!(v.len(), ERG_VEC_LEN, "erg_vec must return exactly 315 floats");
}
#[test]
fn test_erg_vec_consistency() {
let mol = parse("c1ccccc1N").unwrap();
let v1 = erg_vec(&mol);
let v2 = erg_vec(&mol);
assert_eq!(v1, v2, "erg_vec must be deterministic");
}
#[test]
fn test_erg_vec_nonnegative() {
for smi in &["CC", "CCO", "c1ccccc1", "c1ccncc1", "CC(=O)N", "c1ccccc1N"] {
let mol = parse(smi).unwrap();
let v = erg_vec(&mol);
for (i, &x) in v.iter().enumerate() {
assert!(x >= 0.0, "erg_vec[{i}] negative for {smi}: {x}");
}
}
}
#[test]
fn test_erg_vec_same_mol_self_similarity() {
let mol = parse("c1ccccc1").unwrap();
let v = erg_vec(&mol);
let sim = tanimoto_erg_vec(&v, &v);
assert!((sim - 1.0).abs() < 1e-9, "self-similarity must be 1.0 (got {sim})");
}
#[test]
fn test_erg_vec_different_molecules() {
let mol1 = parse("CC").unwrap();
let mol2 = parse("c1ccccc1N").unwrap();
let v1 = erg_vec(&mol1);
let v2 = erg_vec(&mol2);
let sim = tanimoto_erg_vec(&v1, &v2);
assert!((0.0..=1.0).contains(&sim), "Tanimoto must be in [0,1] (got {sim})");
assert!(sim < 1.0, "ethane vs aniline must not be identical");
}
#[test]
fn test_erg_vec_donor_acceptor_linker_distance() {
let gaba = parse("NCCCC(=O)O").unwrap();
let glycine = parse("NCC(=O)O").unwrap();
let v_gaba = erg_vec(&gaba);
let v_glycine = erg_vec(&glycine);
let sim = tanimoto_erg_vec(&v_gaba, &v_glycine);
assert!(sim < 1.0,
"GABA vs glycine differ only in linker length — Tanimoto must be < 1 (got {sim:.3})");
}
#[test]
fn test_erg_vec_fuzz_spreads_adjacent_bins() {
let mol = parse("NCC(=O)O").unwrap(); let v = erg_vec(&mol);
let da_pair = pair_idx(1, 2) * N_BINS;
let sum: f64 = v[da_pair..da_pair + N_BINS].iter().sum();
assert!(sum > 0.0, "Donor–Acceptor bins must be non-zero for glycine analogue");
}
#[test]
fn test_pair_idx_all_21() {
let mut seen = std::collections::HashSet::new();
for a in 0..N_FEAT {
for b in a..N_FEAT {
let idx = pair_idx(a, b);
assert!(idx < 21, "pair_idx({a},{b}) = {idx} out of range");
assert!(seen.insert(idx), "pair_idx({a},{b}) = {idx} duplicates earlier pair");
}
}
assert_eq!(seen.len(), 21);
}
#[test]
fn test_erg_sulfonamide_n_is_acceptor_not_donor() {
use super::{assign_pharmacophore_features, identify_functional_groups};
let sulfonamide = parse("CS(=O)(=O)N").unwrap(); let groups = identify_functional_groups(&sulfonamide);
let n_group = groups.iter().find(|g| {
g.iter().any(|&i| sulfonamide.atom(AtomIdx(i as u32)).element.atomic_number() == 7)
}).expect("should find N-containing group");
let ntype = assign_pharmacophore_features(&sulfonamide, n_group);
assert!(ntype.0 & ErgNodeType::ACCEPTOR != 0,
"sulfonamide N should be ACCEPTOR (bits={:#010b})", ntype.0);
assert!(ntype.0 & ErgNodeType::DONOR == 0,
"sulfonamide N should NOT be DONOR (bits={:#010b})", ntype.0);
}
}