Skip to main content

sci_form/ml/
whim.rs

1//! WHIM (Weighted Holistic Invariant Molecular) descriptors.
2//!
3//! 3D molecular descriptors based on statistical indices of the atomic
4//! coordinate distribution projected onto principal axes. Includes:
5//! - Directional WHIM (per principal axis): λ, ν, γ, η
6//! - Global WHIM: T (total spread), A (anisotropy), D (directional), K (kurtosis)
7//!
8//! Reference: Todeschini & Gramatica, 3D Molecular Descriptors (1997).
9
10use serde::{Deserialize, Serialize};
11
12/// WHIM descriptor result for a molecule.
13#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct WhimDescriptors {
15    /// Eigenvalues of the weighted covariance matrix (λ1 ≥ λ2 ≥ λ3).
16    pub eigenvalues: [f64; 3],
17    /// Directional WHIM: spread along each principal axis.
18    pub lambda: [f64; 3],
19    /// Directional shape: ν_i = λ_i / (λ1 + λ2 + λ3).
20    pub nu: [f64; 3],
21    /// Directional symmetry: γ_i (third moment / λ_i^1.5).
22    pub gamma: [f64; 3],
23    /// Directional kurtosis: η_i (fourth moment / λ_i^2).
24    pub eta: [f64; 3],
25    /// Total spread: T = λ1 + λ2 + λ3.
26    pub total_spread: f64,
27    /// Anisotropy: A = 1 − (3 * product / T^2)^(1/3).
28    /// 0 = isotropic (sphere), 1 = maximally anisotropic.
29    pub anisotropy: f64,
30    /// Directional index: D = product of directional symmetries.
31    pub directional: f64,
32    /// Global kurtosis: K = sum of η_i.
33    pub kurtosis: f64,
34}
35
36/// Atomic weighting scheme for WHIM descriptors.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
38pub enum WhimWeighting {
39    /// Unit weights (unweighted).
40    Unit,
41    /// Atomic mass weights.
42    Mass,
43    /// Van der Waals volume weights.
44    Volume,
45    /// Pauling electronegativity weights.
46    Electronegativity,
47    /// Atomic polarizability weights.
48    Polarizability,
49}
50
51/// Compute WHIM descriptors from 3D coordinates with specified weighting.
52pub fn compute_whim(
53    elements: &[u8],
54    positions: &[[f64; 3]],
55    weighting: WhimWeighting,
56) -> WhimDescriptors {
57    let n = elements.len().min(positions.len());
58    if n == 0 {
59        return WhimDescriptors {
60            eigenvalues: [0.0; 3],
61            lambda: [0.0; 3],
62            nu: [1.0 / 3.0; 3],
63            gamma: [0.0; 3],
64            eta: [0.0; 3],
65            total_spread: 0.0,
66            anisotropy: 0.0,
67            directional: 0.0,
68            kurtosis: 0.0,
69        };
70    }
71
72    // Compute weights
73    let weights: Vec<f64> = elements[..n]
74        .iter()
75        .map(|&z| atom_weight(z, weighting))
76        .collect();
77    let w_sum: f64 = weights.iter().sum();
78    if w_sum < 1e-12 {
79        return WhimDescriptors {
80            eigenvalues: [0.0; 3],
81            lambda: [0.0; 3],
82            nu: [1.0 / 3.0; 3],
83            gamma: [0.0; 3],
84            eta: [0.0; 3],
85            total_spread: 0.0,
86            anisotropy: 0.0,
87            directional: 0.0,
88            kurtosis: 0.0,
89        };
90    }
91
92    // Weighted centroid
93    let mut centroid = [0.0f64; 3];
94    for i in 0..n {
95        for d in 0..3 {
96            centroid[d] += weights[i] * positions[i][d];
97        }
98    }
99    for d in 0..3 {
100        centroid[d] /= w_sum;
101    }
102
103    // Weighted covariance matrix (3×3 symmetric)
104    let mut cov = [[0.0f64; 3]; 3];
105    for i in 0..n {
106        let dx = [
107            positions[i][0] - centroid[0],
108            positions[i][1] - centroid[1],
109            positions[i][2] - centroid[2],
110        ];
111        for r in 0..3 {
112            for c in r..3 {
113                cov[r][c] += weights[i] * dx[r] * dx[c];
114            }
115        }
116    }
117    for r in 0..3 {
118        for c in r..3 {
119            cov[r][c] /= w_sum;
120            if c != r {
121                cov[c][r] = cov[r][c];
122            }
123        }
124    }
125
126    // Eigendecomposition of 3×3 symmetric matrix (Jacobi iteration)
127    let (eigenvalues, eigenvectors) = eigendecompose_3x3(&cov);
128
129    // Sort eigenvalues descending
130    let mut sorted: Vec<(f64, usize)> = eigenvalues
131        .iter()
132        .enumerate()
133        .map(|(i, &v)| (v, i))
134        .collect();
135    sorted.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
136
137    let lambda = [
138        sorted[0].0.max(0.0),
139        sorted[1].0.max(0.0),
140        sorted[2].0.max(0.0),
141    ];
142
143    // Project onto principal axes and compute higher moments
144    let mut gamma = [0.0f64; 3];
145    let mut eta = [0.0f64; 3];
146
147    for axis in 0..3 {
148        let ax_idx = sorted[axis].1;
149        let ev = &eigenvectors[ax_idx];
150        let lam = lambda[axis];
151
152        if lam < 1e-12 {
153            continue;
154        }
155
156        let mut m3 = 0.0;
157        let mut m4 = 0.0;
158        for i in 0..n {
159            let dx = [
160                positions[i][0] - centroid[0],
161                positions[i][1] - centroid[1],
162                positions[i][2] - centroid[2],
163            ];
164            let proj = dx[0] * ev[0] + dx[1] * ev[1] + dx[2] * ev[2];
165            let p2 = proj * proj;
166            m3 += weights[i] * proj * p2;
167            m4 += weights[i] * p2 * p2;
168        }
169        m3 /= w_sum;
170        m4 /= w_sum;
171
172        gamma[axis] = m3 / lam.powf(1.5);
173        eta[axis] = m4 / (lam * lam);
174    }
175
176    let total_spread = lambda[0] + lambda[1] + lambda[2];
177    let nu = if total_spread > 1e-12 {
178        [
179            lambda[0] / total_spread,
180            lambda[1] / total_spread,
181            lambda[2] / total_spread,
182        ]
183    } else {
184        [1.0 / 3.0; 3]
185    };
186
187    let product = lambda[0] * lambda[1] * lambda[2];
188    let anisotropy = if total_spread > 1e-12 {
189        let ratio = (3.0 * product) / (total_spread * total_spread);
190        1.0 - ratio.cbrt().min(1.0)
191    } else {
192        0.0
193    };
194
195    let directional = nu[0] * nu[1] * nu[2];
196    let kurtosis = eta[0] + eta[1] + eta[2];
197
198    WhimDescriptors {
199        eigenvalues: lambda,
200        lambda,
201        nu,
202        gamma,
203        eta,
204        total_spread,
205        anisotropy,
206        directional,
207        kurtosis,
208    }
209}
210
211/// Eigendecompose a 3×3 symmetric matrix using Jacobi rotations.
212/// Returns (eigenvalues, eigenvectors) where eigenvectors[i] is the i-th eigenvector.
213fn eigendecompose_3x3(mat: &[[f64; 3]; 3]) -> ([f64; 3], [[f64; 3]; 3]) {
214    let mut a = *mat;
215    let mut v = [[0.0f64; 3]; 3];
216    for i in 0..3 {
217        v[i][i] = 1.0;
218    }
219
220    for _ in 0..100 {
221        // Find largest off-diagonal element
222        let mut p = 0;
223        let mut q = 1;
224        let mut max_val = a[0][1].abs();
225        for i in 0..3 {
226            for j in (i + 1)..3 {
227                if a[i][j].abs() > max_val {
228                    max_val = a[i][j].abs();
229                    p = i;
230                    q = j;
231                }
232            }
233        }
234
235        if max_val < 1e-14 {
236            break;
237        }
238
239        let theta = if (a[p][p] - a[q][q]).abs() < 1e-14 {
240            std::f64::consts::FRAC_PI_4
241        } else {
242            0.5 * (2.0 * a[p][q] / (a[p][p] - a[q][q])).atan()
243        };
244
245        let c = theta.cos();
246        let s = theta.sin();
247
248        // Rotate matrix
249        let mut new_a = a;
250        new_a[p][p] = c * c * a[p][p] + 2.0 * s * c * a[p][q] + s * s * a[q][q];
251        new_a[q][q] = s * s * a[p][p] - 2.0 * s * c * a[p][q] + c * c * a[q][q];
252        new_a[p][q] = 0.0;
253        new_a[q][p] = 0.0;
254
255        for r in 0..3 {
256            if r != p && r != q {
257                new_a[r][p] = c * a[r][p] + s * a[r][q];
258                new_a[p][r] = new_a[r][p];
259                new_a[r][q] = -s * a[r][p] + c * a[r][q];
260                new_a[q][r] = new_a[r][q];
261            }
262        }
263        a = new_a;
264
265        // Rotate eigenvectors
266        let mut new_v = v;
267        for r in 0..3 {
268            new_v[r][p] = c * v[r][p] + s * v[r][q];
269            new_v[r][q] = -s * v[r][p] + c * v[r][q];
270        }
271        v = new_v;
272    }
273
274    let eigenvalues = [a[0][0], a[1][1], a[2][2]];
275    // Transpose v so eigenvectors[i] is column i
276    let eigenvectors = [
277        [v[0][0], v[1][0], v[2][0]],
278        [v[0][1], v[1][1], v[2][1]],
279        [v[0][2], v[1][2], v[2][2]],
280    ];
281
282    (eigenvalues, eigenvectors)
283}
284
285/// Get atomic weight for WHIM weighting scheme.
286fn atom_weight(z: u8, scheme: WhimWeighting) -> f64 {
287    match scheme {
288        WhimWeighting::Unit => 1.0,
289        WhimWeighting::Mass => atomic_mass(z),
290        WhimWeighting::Volume => vdw_volume(z),
291        WhimWeighting::Electronegativity => electronegativity(z),
292        WhimWeighting::Polarizability => polarizability(z),
293    }
294}
295
296fn atomic_mass(z: u8) -> f64 {
297    match z {
298        1 => 1.008,
299        5 => 10.81,
300        6 => 12.011,
301        7 => 14.007,
302        8 => 15.999,
303        9 => 18.998,
304        14 => 28.086,
305        15 => 30.974,
306        16 => 32.065,
307        17 => 35.453,
308        35 => 79.904,
309        53 => 126.904,
310        _ => z as f64 * 1.5, // rough fallback
311    }
312}
313
314fn vdw_volume(z: u8) -> f64 {
315    // Approximate van der Waals volumes in ų
316    match z {
317        1 => 7.24,
318        6 => 20.58,
319        7 => 15.60,
320        8 => 14.71,
321        9 => 13.31,
322        15 => 24.43,
323        16 => 24.43,
324        17 => 22.45,
325        35 => 26.52,
326        53 => 32.52,
327        _ => 20.0,
328    }
329}
330
331fn electronegativity(z: u8) -> f64 {
332    match z {
333        1 => 2.20,
334        5 => 2.04,
335        6 => 2.55,
336        7 => 3.04,
337        8 => 3.44,
338        9 => 3.98,
339        14 => 1.90,
340        15 => 2.19,
341        16 => 2.58,
342        17 => 3.16,
343        35 => 2.96,
344        53 => 2.66,
345        _ => 2.00,
346    }
347}
348
349fn polarizability(z: u8) -> f64 {
350    // Approximate atomic polarizabilities in ų
351    match z {
352        1 => 0.387,
353        6 => 1.026,
354        7 => 0.802,
355        8 => 0.637,
356        9 => 0.440,
357        14 => 3.707,
358        15 => 3.222,
359        16 => 2.900,
360        17 => 2.315,
361        35 => 3.013,
362        53 => 5.415,
363        _ => 1.0,
364    }
365}
366
367#[cfg(test)]
368mod tests {
369    use super::*;
370
371    #[test]
372    fn test_whim_water() {
373        let elements = vec![8, 1, 1];
374        let positions = vec![
375            [0.0, 0.0, 0.117],
376            [0.0, 0.757, -0.469],
377            [0.0, -0.757, -0.469],
378        ];
379
380        let w = compute_whim(&elements, &positions, WhimWeighting::Unit);
381        assert!(w.total_spread > 0.0);
382        assert!(w.lambda[0] >= w.lambda[1]);
383        assert!(w.lambda[1] >= w.lambda[2]);
384        assert!((w.nu[0] + w.nu[1] + w.nu[2] - 1.0).abs() < 1e-10);
385    }
386
387    #[test]
388    fn test_whim_mass_weighted() {
389        let elements = vec![8, 1, 1];
390        let positions = vec![
391            [0.0, 0.0, 0.117],
392            [0.0, 0.757, -0.469],
393            [0.0, -0.757, -0.469],
394        ];
395
396        let w = compute_whim(&elements, &positions, WhimWeighting::Mass);
397        assert!(w.total_spread > 0.0);
398    }
399
400    #[test]
401    fn test_whim_empty() {
402        let w = compute_whim(&[], &[], WhimWeighting::Unit);
403        assert_eq!(w.total_spread, 0.0);
404    }
405}