use crate::bitvec::BitVec2048;
use chematic_core::{AtomIdx, Molecule};
const HASH_MOD: usize = 2048;
#[derive(Clone, Debug)]
pub struct RdkitPathConfig {
pub max_path_len: usize,
pub use_atom_types: bool,
}
impl Default for RdkitPathConfig {
fn default() -> Self {
Self {
max_path_len: 7,
use_atom_types: false,
}
}
}
pub fn rdkit_path_fp(mol: &Molecule) -> BitVec2048 {
rdkit_path_fp_with_config(mol, &RdkitPathConfig::default())
}
pub fn rdkit_path_fp_with_config(
mol: &Molecule,
config: &RdkitPathConfig,
) -> BitVec2048 {
let mut fp = BitVec2048::new();
if mol.atom_count() == 0 {
return fp;
}
for start_idx in 0..mol.atom_count() {
let start = AtomIdx(start_idx as u32);
enumerate_paths_from(
mol,
start,
vec![start],
&mut |path| {
let hash = hash_path(mol, path);
let bit_idx = hash % HASH_MOD;
fp.set(bit_idx);
},
config.max_path_len,
);
}
fp
}
fn enumerate_paths_from<F>(
mol: &Molecule,
current: AtomIdx,
path: Vec<AtomIdx>,
callback: &mut F,
max_len: usize,
) where
F: FnMut(&[AtomIdx]),
{
if path.len() <= max_len {
if path.len() > 1 {
callback(&path);
}
if path.len() < max_len {
for (neighbor, _) in mol.neighbors(current) {
if !path.contains(&neighbor) {
let mut new_path = path.clone();
new_path.push(neighbor);
enumerate_paths_from(mol, neighbor, new_path, callback, max_len);
}
}
}
}
}
fn hash_path(mol: &Molecule, path: &[AtomIdx]) -> usize {
let fnv_prime: usize = 16777619; let mut hash: usize = 2166136261;
for atom_idx in path {
let atom = mol.atom(*atom_idx);
let an = atom.element.atomic_number() as usize;
hash ^= an;
hash = hash.wrapping_mul(fnv_prime);
}
hash
}
pub fn tanimoto_rdkit_path(a: &BitVec2048, b: &BitVec2048) -> f64 {
a.tanimoto(b)
}
#[cfg(test)]
mod tests {
use super::*;
use chematic_smiles::parse;
fn mol(smiles: &str) -> Molecule {
parse(smiles).unwrap_or_else(|e| panic!("failed to parse {smiles:?}: {e}"))
}
#[test]
fn test_rdkit_path_fp_ethane() {
let m = mol("CC");
let fp = rdkit_path_fp(&m);
assert!(fp.popcount() > 0, "ethane should have non-zero bits");
}
#[test]
fn test_rdkit_path_fp_propane() {
let m = mol("CCC");
let fp = rdkit_path_fp(&m);
assert!(fp.popcount() > 0, "propane should have non-zero bits");
}
#[test]
fn test_rdkit_path_fp_benzene() {
let m = mol("c1ccccc1");
let fp = rdkit_path_fp(&m);
assert!(fp.popcount() > 0, "benzene should have non-zero bits");
}
#[test]
fn test_rdkit_path_fp_identical() {
let m1 = mol("CC");
let m2 = mol("CC");
let fp1 = rdkit_path_fp(&m1);
let fp2 = rdkit_path_fp(&m2);
assert_eq!(fp1.tanimoto(&fp2), 1.0, "identical molecules should have tanimoto=1.0");
}
#[test]
fn test_rdkit_path_fp_single_atom() {
let m = mol("C");
let fp = rdkit_path_fp(&m);
assert_eq!(fp.popcount(), 0, "single atom should have zero bits");
}
}