use chematic_core::{Atom, BondOrder};
use crate::bitvec::BitVec2048;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ErgAtomType {
CAliphatic = 0,
CAromatic = 1,
N = 2,
O = 3,
S = 4,
Halogen = 5,
Other = 6,
}
impl ErgAtomType {
pub fn from_atom(atom: &Atom) -> Self {
let an = atom.element.atomic_number();
let aromatic = atom.aromatic;
match an {
6 => {
if aromatic {
ErgAtomType::CAromatic
} else {
ErgAtomType::CAliphatic
}
}
7 => ErgAtomType::N,
8 => ErgAtomType::O,
16 => ErgAtomType::S,
9 | 17 | 35 | 53 => ErgAtomType::Halogen,
_ => ErgAtomType::Other,
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum ErgBondType {
Single = 0,
Double = 1,
Triple = 2,
Aromatic = 3,
}
impl ErgBondType {
pub fn from_bond(order: BondOrder) -> Self {
match order {
BondOrder::Single => ErgBondType::Single,
BondOrder::Double => ErgBondType::Double,
BondOrder::Triple => ErgBondType::Triple,
BondOrder::Aromatic => ErgBondType::Aromatic,
_ => ErgBondType::Single,
}
}
}
#[derive(Clone, Debug)]
pub struct ErgConfig {
pub use_atom_counts: bool,
pub use_bond_types: bool,
}
impl Default for ErgConfig {
fn default() -> Self {
ErgConfig {
use_atom_counts: true,
use_bond_types: true,
}
}
}
#[derive(Clone, Debug)]
pub struct ErgFingerprint {
pub bits: BitVec2048,
pub atom_counts: [u32; 7],
pub bond_counts: [u32; 4],
}
impl ErgFingerprint {
pub fn tanimoto(&self, other: &ErgFingerprint) -> f64 {
self.bits.tanimoto(&other.bits)
}
}
pub fn erg(mol: &chematic_core::Molecule) -> ErgFingerprint {
erg_with_config(mol, &ErgConfig::default())
}
pub fn erg_with_config(
mol: &chematic_core::Molecule,
config: &ErgConfig,
) -> ErgFingerprint {
let mut bits = BitVec2048::new();
let mut atom_counts = [0u32; 7];
let mut bond_counts = [0u32; 4];
for (_, atom) in mol.atoms() {
let erg_type = ErgAtomType::from_atom(atom);
atom_counts[erg_type as usize] += 1;
let bit_pos = (erg_type as usize) * 16;
if bit_pos < 2048 {
bits.set(bit_pos);
}
}
for (_, bond) in mol.bonds() {
let erg_type = ErgBondType::from_bond(bond.order);
bond_counts[erg_type as usize] += 1;
let bit_pos = 112 + (erg_type as usize) * 16;
if bit_pos < 2048 {
bits.set(bit_pos);
}
}
if config.use_atom_counts {
for (i, &count) in atom_counts.iter().enumerate() {
for j in 0..4 {
if ((count >> j) & 1) != 0 {
let bit_pos = 200 + i * 4 + j;
if bit_pos < 2048 {
bits.set(bit_pos);
}
}
}
}
}
if config.use_bond_types {
for (i, &count) in bond_counts.iter().enumerate() {
for j in 0..4 {
if ((count >> j) & 1) != 0 {
let bit_pos = 228 + i * 4 + j;
if bit_pos < 2048 {
bits.set(bit_pos);
}
}
}
}
}
if atom_counts[ErgAtomType::CAromatic as usize] > 0 {
bits.set(256); }
let has_heteroatom = atom_counts[ErgAtomType::N as usize] > 0
|| atom_counts[ErgAtomType::O as usize] > 0
|| atom_counts[ErgAtomType::S as usize] > 0
|| atom_counts[ErgAtomType::Halogen as usize] > 0;
if has_heteroatom {
bits.set(257); }
if atom_counts[ErgAtomType::CAromatic as usize] == 0 && atom_counts[ErgAtomType::CAliphatic as usize] > 0 {
bits.set(258); }
ErgFingerprint {
bits,
atom_counts,
bond_counts,
}
}
pub fn erg_extended(mol: &chematic_core::Molecule) -> ErgFingerprint {
erg(mol)
}
pub fn tanimoto_erg(mol1: &chematic_core::Molecule, mol2: &chematic_core::Molecule) -> f64 {
let fp1 = erg(mol1);
let fp2 = erg(mol2);
fp1.tanimoto(&fp2)
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
#[test]
fn test_erg_simple() {
let mol = parse("CC").unwrap();
let fp = erg(&mol);
assert_eq!(fp.atom_counts[ErgAtomType::CAliphatic as usize], 2);
assert!(fp.bits.popcount() > 0);
}
#[test]
fn test_erg_identical() {
let mol = parse("CC").unwrap();
let fp1 = erg(&mol);
let fp2 = erg(&mol);
assert_eq!(fp1.bits.tanimoto(&fp2.bits), 1.0);
assert_eq!(fp1.atom_counts, fp2.atom_counts);
}
#[test]
fn test_erg_different_molecules() {
let mol1 = parse("CC").unwrap();
let mol2 = parse("c1ccccc1").unwrap();
let fp1 = erg(&mol1);
let fp2 = erg(&mol2);
assert!(fp1.atom_counts[ErgAtomType::CAromatic as usize] == 0);
assert!(fp2.atom_counts[ErgAtomType::CAromatic as usize] > 0);
}
#[test]
fn test_erg_symmetry() {
let mol1 = parse("CC").unwrap();
let mol2 = parse("c1ccccc1").unwrap();
let sim12 = tanimoto_erg(&mol1, &mol2);
let sim21 = tanimoto_erg(&mol2, &mol1);
assert!((sim12 - sim21).abs() < 1e-10);
}
#[test]
fn test_erg_heteroatom_detection() {
let mol = parse("CCO").unwrap();
let fp = erg(&mol);
assert!(fp.atom_counts[ErgAtomType::O as usize] > 0);
}
#[test]
fn test_erg_config() {
let mol = parse("CC").unwrap();
let config = ErgConfig {
use_atom_counts: false,
use_bond_types: true,
};
let fp = erg_with_config(&mol, &config);
assert!(fp.bits.popcount() > 0);
}
#[test]
fn test_erg_aromatic_vs_aliphatic() {
let aliphatic = parse("CCCC").unwrap();
let aromatic = parse("c1ccccc1").unwrap();
let fp_aliphatic = erg(&aliphatic);
let fp_aromatic = erg(&aromatic);
assert_eq!(fp_aliphatic.atom_counts[ErgAtomType::CAromatic as usize], 0);
assert!(fp_aromatic.atom_counts[ErgAtomType::CAromatic as usize] > 0);
}
#[test]
fn test_erg_bond_counting() {
let single_bond = parse("CC").unwrap();
let double_bond = parse("C=C").unwrap();
let fp_single = erg(&single_bond);
let fp_double = erg(&double_bond);
assert!(fp_single.bond_counts[ErgBondType::Single as usize] > 0);
assert!(fp_double.bond_counts[ErgBondType::Double as usize] > 0);
}
#[test]
fn test_erg_functional_group_aromatic_bit() {
let aliphatic = parse("CCCC").unwrap();
let aromatic = parse("c1ccccc1").unwrap();
let fp_aliphatic = erg(&aliphatic);
let fp_aromatic = erg(&aromatic);
assert!(!fp_aliphatic.bits.get(256), "aliphatic should not have aromatic bit");
assert!(fp_aromatic.bits.get(256), "aromatic should have aromatic bit");
}
#[test]
fn test_erg_functional_group_heteroatom_bit() {
let alkane = parse("CC").unwrap();
let alcohol = parse("CCO").unwrap();
let amine = parse("CCN").unwrap();
let fp_alkane = erg(&alkane);
let fp_alcohol = erg(&alcohol);
let fp_amine = erg(&amine);
assert!(!fp_alkane.bits.get(257), "alkane should not have heteroatom bit");
assert!(fp_alcohol.bits.get(257), "alcohol should have heteroatom bit");
assert!(fp_amine.bits.get(257), "amine should have heteroatom bit");
}
#[test]
fn test_erg_functional_group_improved_discrimination() {
let methane = parse("C").unwrap();
let ethanol = parse("CCO").unwrap();
let pyridine = parse("c1ccncc1").unwrap();
let fp_methane = erg(&methane);
let fp_ethanol = erg(ðanol);
let fp_pyridine = erg(&pyridine);
let sim_methane_ethanol = fp_methane.tanimoto(&fp_ethanol);
let sim_methane_pyridine = fp_methane.tanimoto(&fp_pyridine);
let sim_ethanol_pyridine = fp_ethanol.tanimoto(&fp_pyridine);
assert!(sim_methane_ethanol >= 0.0 && sim_methane_ethanol <= 1.0);
assert!(sim_methane_pyridine >= 0.0 && sim_methane_pyridine <= 1.0);
assert!(sim_ethanol_pyridine >= 0.0 && sim_ethanol_pyridine <= 1.0);
}
}