Skip to main content

sci_form/ml/
pharmacophore.rs

1//! Pharmacophore fingerprints: encode molecular pharmacophoric features.
2//!
3//! Detects pharmacophoric feature points (HBD, HBA, aromatic, hydrophobic,
4//! positive, negative) and encodes pairwise distance bins into a fingerprint.
5
6use serde::{Deserialize, Serialize};
7use std::collections::BTreeSet;
8
9/// Type of pharmacophoric feature.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
11pub enum PharmFeatureType {
12    /// Hydrogen bond donor (N-H, O-H).
13    HBondDonor,
14    /// Hydrogen bond acceptor (N, O lone pairs).
15    HBondAcceptor,
16    /// Aromatic ring centroid.
17    Aromatic,
18    /// Hydrophobic group.
19    Hydrophobic,
20    /// Positively ionizable (amines).
21    Positive,
22    /// Negatively ionizable (carboxylates, phosphates, sulfonates).
23    Negative,
24}
25
26/// A detected pharmacophoric feature.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct PharmFeature {
29    /// Feature type.
30    pub feature_type: PharmFeatureType,
31    /// Atom indices contributing to this feature.
32    pub atom_indices: Vec<usize>,
33    /// 3D centroid of the feature (if coordinates available).
34    pub centroid: Option<[f64; 3]>,
35}
36
37/// Pharmacophore fingerprint result.
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct PharmacophoreFingerprint {
40    /// Number of bits in the fingerprint.
41    pub n_bits: usize,
42    /// Set of on-bits.
43    pub on_bits: BTreeSet<usize>,
44    /// Detected features.
45    pub features: Vec<PharmFeature>,
46    /// Bit density (fraction of on bits).
47    pub density: f64,
48}
49
50/// Detect pharmacophoric features from molecular topology.
51///
52/// `elements`: atomic numbers.
53/// `bonds`: (atom_i, atom_j, bond_order) list.
54/// `coords`: flat xyz coordinates (optional, for 3D fingerpoints). Empty = 2D only.
55/// `aromatic_atoms`: boolean aromatic flags per atom (or empty).
56pub fn detect_features(
57    elements: &[u8],
58    bonds: &[(usize, usize, u8)],
59    coords: &[f64],
60    aromatic_atoms: &[bool],
61) -> Vec<PharmFeature> {
62    let n = elements.len();
63    let has_3d = coords.len() >= n * 3;
64
65    // Build adjacency
66    let mut adj: Vec<Vec<(usize, u8)>> = vec![vec![]; n];
67    for &(i, j, ord) in bonds {
68        if i < n && j < n {
69            adj[i].push((j, ord));
70            adj[j].push((i, ord));
71        }
72    }
73
74    let mut features = Vec::new();
75
76    for i in 0..n {
77        let z = elements[i];
78        let centroid = if has_3d {
79            Some([coords[i * 3], coords[i * 3 + 1], coords[i * 3 + 2]])
80        } else {
81            None
82        };
83
84        // H-bond donor: N-H, O-H, S-H
85        if z == 7 || z == 8 || z == 16 {
86            let has_h = adj[i].iter().any(|&(nb, _)| elements[nb] == 1);
87            if has_h {
88                features.push(PharmFeature {
89                    feature_type: PharmFeatureType::HBondDonor,
90                    atom_indices: vec![i],
91                    centroid,
92                });
93            }
94        }
95
96        // H-bond acceptor: N, O with lone pairs
97        if z == 7 || z == 8 {
98            features.push(PharmFeature {
99                feature_type: PharmFeatureType::HBondAcceptor,
100                atom_indices: vec![i],
101                centroid,
102            });
103        }
104
105        // Positive ionizable: primary, secondary, tertiary amines
106        if z == 7 {
107            let heavy_neighbors = adj[i].iter().filter(|&&(nb, _)| elements[nb] != 1).count();
108            if heavy_neighbors <= 3 {
109                let has_h = adj[i].iter().any(|&(nb, _)| elements[nb] == 1);
110                if has_h {
111                    features.push(PharmFeature {
112                        feature_type: PharmFeatureType::Positive,
113                        atom_indices: vec![i],
114                        centroid,
115                    });
116                }
117            }
118        }
119
120        // Negative ionizable: carboxylate O, phosphate O, sulfonate O
121        if z == 8 {
122            let is_terminal = adj[i].len() == 1;
123            if is_terminal {
124                let &(nb, ord) = &adj[i][0];
125                // Check if neighbor C/P/S has double-bonded O
126                if (elements[nb] == 6 || elements[nb] == 15 || elements[nb] == 16)
127                    && (ord == 1 || ord == 2)
128                {
129                    let neighbor_has_other_o =
130                        adj[nb].iter().any(|&(k, _)| k != i && elements[k] == 8);
131                    if neighbor_has_other_o {
132                        features.push(PharmFeature {
133                            feature_type: PharmFeatureType::Negative,
134                            atom_indices: vec![i],
135                            centroid,
136                        });
137                    }
138                }
139            }
140        }
141
142        // Hydrophobic: C, S, halogen not bonded to polar atoms
143        if z == 6 || z == 16 || z == 9 || z == 17 || z == 35 || z == 53 {
144            let is_polar_neighbor = adj[i]
145                .iter()
146                .any(|&(nb, _)| elements[nb] == 7 || elements[nb] == 8);
147            if !is_polar_neighbor && z == 6 {
148                features.push(PharmFeature {
149                    feature_type: PharmFeatureType::Hydrophobic,
150                    atom_indices: vec![i],
151                    centroid,
152                });
153            }
154            if z == 17 || z == 35 || z == 53 {
155                features.push(PharmFeature {
156                    feature_type: PharmFeatureType::Hydrophobic,
157                    atom_indices: vec![i],
158                    centroid,
159                });
160            }
161        }
162    }
163
164    // Aromatic ring centroids
165    if !aromatic_atoms.is_empty() {
166        // Detect aromatic ring groups via connected aromatic atoms
167        let mut visited = vec![false; n];
168        for start in 0..n {
169            if !aromatic_atoms.get(start).copied().unwrap_or(false) || visited[start] {
170                continue;
171            }
172            // BFS to find connected aromatic cluster
173            let mut ring_atoms = Vec::new();
174            let mut queue = std::collections::VecDeque::new();
175            queue.push_back(start);
176            visited[start] = true;
177            while let Some(u) = queue.pop_front() {
178                ring_atoms.push(u);
179                for &(v, _) in &adj[u] {
180                    if !visited[v] && aromatic_atoms.get(v).copied().unwrap_or(false) {
181                        visited[v] = true;
182                        queue.push_back(v);
183                    }
184                }
185            }
186            if ring_atoms.len() >= 5 {
187                let centroid = if has_3d {
188                    let mut c = [0.0; 3];
189                    for &a in &ring_atoms {
190                        for k in 0..3 {
191                            c[k] += coords[a * 3 + k];
192                        }
193                    }
194                    let n_r = ring_atoms.len() as f64;
195                    Some([c[0] / n_r, c[1] / n_r, c[2] / n_r])
196                } else {
197                    None
198                };
199                features.push(PharmFeature {
200                    feature_type: PharmFeatureType::Aromatic,
201                    atom_indices: ring_atoms,
202                    centroid,
203                });
204            }
205        }
206    }
207
208    features
209}
210
211/// Compute pharmacophore fingerprint from detected features.
212///
213/// Encodes feature-type pairs and (optionally) distance bins into a bit vector.
214/// Without 3D coords, encodes only feature-pair counts.
215pub fn compute_pharmacophore_fingerprint(
216    features: &[PharmFeature],
217    n_bits: usize,
218) -> PharmacophoreFingerprint {
219    let mut on_bits = BTreeSet::new();
220
221    // One-point features: hash feature type × count
222    let type_counts: std::collections::HashMap<PharmFeatureType, usize> =
223        features
224            .iter()
225            .fold(std::collections::HashMap::new(), |mut m, f| {
226                *m.entry(f.feature_type).or_insert(0) += 1;
227                m
228            });
229
230    for (&ft, &count) in &type_counts {
231        let base = ft as usize * 31;
232        on_bits.insert((base + count.min(10)) % n_bits);
233    }
234
235    // Two-point features: hash pairs of feature types
236    for i in 0..features.len() {
237        for j in (i + 1)..features.len() {
238            let ft1 = features[i].feature_type as usize;
239            let ft2 = features[j].feature_type as usize;
240            let (lo, hi) = if ft1 <= ft2 { (ft1, ft2) } else { (ft2, ft1) };
241
242            // If 3D, add distance bin encoding
243            if let (Some(c1), Some(c2)) = (&features[i].centroid, &features[j].centroid) {
244                let dist =
245                    ((c1[0] - c2[0]).powi(2) + (c1[1] - c2[1]).powi(2) + (c1[2] - c2[2]).powi(2))
246                        .sqrt();
247                // Bin distance: 0-2Å, 2-4Å, 4-6Å, 6-8Å, 8-10Å, 10+Å
248                let bin = ((dist / 2.0) as usize).min(5);
249                let hash = (lo * 7 + hi * 13 + bin * 37 + 1000) % n_bits;
250                on_bits.insert(hash);
251            } else {
252                let hash = (lo * 7 + hi * 13 + 500) % n_bits;
253                on_bits.insert(hash);
254            }
255        }
256    }
257
258    let density = on_bits.len() as f64 / n_bits as f64;
259
260    PharmacophoreFingerprint {
261        n_bits,
262        on_bits,
263        features: features.to_vec(),
264        density,
265    }
266}
267
268/// Compute Tanimoto similarity between two pharmacophore fingerprints.
269pub fn pharmacophore_tanimoto(
270    fp1: &PharmacophoreFingerprint,
271    fp2: &PharmacophoreFingerprint,
272) -> f64 {
273    let intersection = fp1.on_bits.intersection(&fp2.on_bits).count();
274    let union = fp1.on_bits.union(&fp2.on_bits).count();
275    if union == 0 {
276        0.0
277    } else {
278        intersection as f64 / union as f64
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    #[test]
287    fn test_detect_features_ethanol() {
288        // Ethanol: CCO → has HBD (O-H), HBA (O), hydrophobic (C)
289        let elements = [6u8, 6, 8, 1, 1, 1, 1, 1, 1];
290        let bonds = [
291            (0, 1, 1u8),
292            (1, 2, 1),
293            (0, 3, 1),
294            (0, 4, 1),
295            (0, 5, 1),
296            (1, 6, 1),
297            (1, 7, 1),
298            (2, 8, 1),
299        ];
300        let features = detect_features(&elements, &bonds, &[], &[]);
301        let has_hbd = features
302            .iter()
303            .any(|f| f.feature_type == PharmFeatureType::HBondDonor);
304        let has_hba = features
305            .iter()
306            .any(|f| f.feature_type == PharmFeatureType::HBondAcceptor);
307        assert!(has_hbd, "Ethanol should have an H-bond donor");
308        assert!(has_hba, "Ethanol should have an H-bond acceptor");
309    }
310
311    #[test]
312    fn test_pharmacophore_fingerprint() {
313        let elements = [6u8, 6, 8, 1, 1, 1, 1, 1, 1];
314        let bonds = [
315            (0, 1, 1u8),
316            (1, 2, 1),
317            (0, 3, 1),
318            (0, 4, 1),
319            (0, 5, 1),
320            (1, 6, 1),
321            (1, 7, 1),
322            (2, 8, 1),
323        ];
324        let features = detect_features(&elements, &bonds, &[], &[]);
325        let fp = compute_pharmacophore_fingerprint(&features, 1024);
326        assert!(!fp.on_bits.is_empty(), "Fingerprint should have on-bits");
327        assert!(fp.density > 0.0 && fp.density < 1.0);
328    }
329
330    #[test]
331    fn test_pharmacophore_tanimoto_self() {
332        let elements = [6u8, 8, 1, 1, 1];
333        let bonds = [(0, 1, 1u8), (0, 2, 1), (0, 3, 1), (1, 4, 1)];
334        let features = detect_features(&elements, &bonds, &[], &[]);
335        let fp = compute_pharmacophore_fingerprint(&features, 1024);
336        let sim = pharmacophore_tanimoto(&fp, &fp);
337        assert!((sim - 1.0).abs() < 1e-10, "Self-similarity should be 1.0");
338    }
339}