use chematic_core::{AtomIdx, BondOrder, Molecule, implicit_hcount};
use crate::bitvec::BitVec2048;
use crate::ecfp::{EcfpConfig, bond_type_int, fnv1a};
const DONOR: u8 = 1 << 0;
const ACCEPTOR: u8 = 1 << 1;
const AROMATIC: u8 = 1 << 2;
const HYDROPHOBIC: u8 = 1 << 3;
const POS_IONIZABLE: u8 = 1 << 4;
const NEG_IONIZABLE: u8 = 1 << 5;
fn feature_class(mol: &Molecule, idx: AtomIdx) -> u8 {
let atom = mol.atom(idx);
let an = atom.element.atomic_number();
let h = implicit_hcount(mol, idx);
let mut cls = 0u8;
if atom.aromatic {
cls |= AROMATIC;
}
if (an == 7 || an == 8) && h > 0 {
cls |= DONOR;
}
if an == 7 || an == 8 {
cls |= ACCEPTOR;
}
if an == 6 && !atom.aromatic {
let bonded_to_hetero = mol.neighbors(idx).any(|(nb, _)| {
let nban = mol.atom(nb).element.atomic_number();
nban != 6 && nban != 1
});
if !bonded_to_hetero {
cls |= HYDROPHOBIC;
}
}
if matches!(an, 9 | 17 | 35 | 53) {
cls |= HYDROPHOBIC;
}
if an == 7 && !atom.aromatic {
if atom.charge > 0 {
cls |= POS_IONIZABLE;
} else {
let is_amide_n = mol.neighbors(idx).any(|(nb, _)| {
if mol.atom(nb).element.atomic_number() == 6 {
mol.neighbors(nb).any(|(nb2, nb2bidx)| {
nb2 != idx
&& mol.atom(nb2).element.atomic_number() == 8
&& mol.bond(nb2bidx).order == BondOrder::Double
})
} else {
false
}
});
if !is_amide_n {
cls |= POS_IONIZABLE;
}
}
}
if an == 8 && atom.charge <= 0 {
cls |= NEG_IONIZABLE;
}
cls
}
pub fn fcfp(mol: &Molecule, config: &EcfpConfig) -> BitVec2048 {
let n = mol.atom_count();
let nbits = config.nbits;
let mut fp = BitVec2048::new();
if n == 0 {
return fp;
}
let mut ids: Vec<u64> = Vec::with_capacity(n);
for i in 0..n {
let idx = AtomIdx(i as u32);
let cls = feature_class(mol, idx);
let id = fnv1a(&[cls]);
fp.set((id % nbits as u64) as usize);
ids.push(id);
}
let mut new_ids: Vec<u64> = vec![0u64; n];
for r in 1..=config.radius {
for i in 0..n {
let idx = AtomIdx(i as u32);
let mut neighbor_info: Vec<(u8, u64)> = mol
.neighbors(idx)
.map(|(nb_idx, bond_idx)| {
let bond = mol.bond(bond_idx);
let btype = bond_type_int(bond.order);
let nb_id = ids[nb_idx.0 as usize];
(btype, nb_id)
})
.collect();
neighbor_info.sort_unstable();
let mut bytes: Vec<u8> = Vec::with_capacity(1 + 8 + neighbor_info.len() * 9);
bytes.push(r as u8);
bytes.extend_from_slice(&ids[i].to_le_bytes());
for (btype, nb_id) in &neighbor_info {
bytes.push(*btype);
bytes.extend_from_slice(&nb_id.to_le_bytes());
}
let new_id = fnv1a(&bytes);
new_ids[i] = new_id;
fp.set((new_id % nbits as u64) as usize);
}
core::mem::swap(&mut ids, &mut new_ids);
}
fp
}
pub fn fcfp4(mol: &Molecule) -> BitVec2048 {
fcfp(mol, &EcfpConfig::default())
}
pub fn fcfp6(mol: &Molecule) -> BitVec2048 {
fcfp(
mol,
&EcfpConfig {
radius: 3,
nbits: 2048,
use_chirality: false,
use_double_fold: false,
},
)
}
pub fn tanimoto_fcfp4(a: &Molecule, b: &Molecule) -> f64 {
fcfp4(a).tanimoto(&fcfp4(b))
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
fn mol(s: &str) -> Molecule {
parse(s).unwrap()
}
#[test]
fn test_fcfp4_same_molecule() {
let m = mol("c1ccccc1");
assert!((fcfp4(&m).tanimoto(&fcfp4(&m)) - 1.0).abs() < 1e-9);
}
#[test]
fn test_fcfp4_different_molecules() {
let benzene = mol("c1ccccc1");
let aspirin = mol("CC(=O)Oc1ccccc1C(=O)O");
let sim = tanimoto_fcfp4(&benzene, &aspirin);
assert!(sim < 1.0, "FCFP4 of benzene vs aspirin should differ");
assert!(
sim > 0.0,
"FCFP4 of benzene vs aspirin should share some bits"
);
}
#[test]
fn test_fcfp4_bioisostere_closer_than_ecfp4() {
use crate::ecfp::tanimoto_ecfp4;
let benzene = mol("c1ccccc1");
let pyridine = mol("c1ccncc1");
let fcfp_sim = tanimoto_fcfp4(&benzene, &pyridine);
let ecfp_sim = tanimoto_ecfp4(&benzene, &pyridine);
assert!(
fcfp_sim >= ecfp_sim,
"FCFP4({fcfp_sim:.3}) should be ≥ ECFP4({ecfp_sim:.3}) for bioisosteres"
);
}
#[test]
fn test_fcfp4_nonempty() {
let fp = fcfp4(&mol("CCO"));
assert!(fp.popcount() > 0, "FCFP4 should set at least one bit");
}
}