Skip to main content

cyanea_struct/
analysis.rs

1//! B-factor analysis for macromolecular structures.
2//!
3//! Provides per-residue and per-chain B-factor statistics, as well as normalized
4//! flexibility scores (Z-scores) to identify flexible and rigid regions.
5
6use cyanea_core::{CyaneaError, Result};
7
8use crate::types::Structure;
9
10use alloc::string::String;
11use alloc::vec::Vec;
12
13/// Descriptive statistics for B-factor values.
14#[derive(Debug, Clone, PartialEq)]
15#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
16pub struct BFactorStats {
17    /// Arithmetic mean of B-factors.
18    pub mean: f64,
19    /// Population standard deviation.
20    pub std_dev: f64,
21    /// Minimum B-factor.
22    pub min: f64,
23    /// Maximum B-factor.
24    pub max: f64,
25    /// Median B-factor.
26    pub median: f64,
27}
28
29/// Compute per-residue average B-factors across all chains.
30///
31/// Returns a list of `(residue_seq_num, residue_name, average_b_factor)` for
32/// each residue in the structure.
33///
34/// # Errors
35///
36/// Returns an error if the structure has no atoms.
37pub fn residue_bfactors(structure: &Structure) -> Result<Vec<(usize, String, f64)>> {
38    if structure.atom_count() == 0 {
39        return Err(CyaneaError::InvalidInput(
40            "cannot compute B-factors for structure with no atoms".into(),
41        ));
42    }
43
44    let mut result = Vec::new();
45
46    for chain in &structure.chains {
47        for residue in &chain.residues {
48            if residue.atoms.is_empty() {
49                continue;
50            }
51            let sum: f64 = residue.atoms.iter().map(|a| a.temp_factor).sum();
52            let avg = sum / residue.atoms.len() as f64;
53            result.push((residue.seq_num as usize, residue.name.clone(), avg));
54        }
55    }
56
57    Ok(result)
58}
59
60/// Compute per-chain B-factor statistics.
61///
62/// Returns a list of `(chain_id, stats)` where `stats` contains mean, std_dev,
63/// min, max, and median B-factors for all atoms in the chain.
64///
65/// # Errors
66///
67/// Returns an error if the structure has no chains or if a chain has no atoms.
68pub fn chain_bfactors(structure: &Structure) -> Result<Vec<(String, BFactorStats)>> {
69    if structure.chains.is_empty() {
70        return Err(CyaneaError::InvalidInput(
71            "cannot compute B-factors for structure with no chains".into(),
72        ));
73    }
74
75    let mut result = Vec::new();
76
77    for chain in &structure.chains {
78        let mut bfactors: Vec<f64> = Vec::new();
79        for residue in &chain.residues {
80            for atom in &residue.atoms {
81                bfactors.push(atom.temp_factor);
82            }
83        }
84
85        if bfactors.is_empty() {
86            continue;
87        }
88
89        let stats = compute_stats(&mut bfactors);
90        let chain_name = alloc::format!("{}", chain.id);
91        result.push((chain_name, stats));
92    }
93
94    Ok(result)
95}
96
97/// Compute normalized B-factor Z-scores per residue.
98///
99/// For each residue, the flexibility score is:
100///   `Z = (residue_avg_B - global_mean_B) / global_std_B`
101///
102/// High Z-scores indicate flexible regions; low Z-scores indicate rigid regions.
103///
104/// Returns a list of `(residue_seq_num, z_score)`.
105///
106/// # Errors
107///
108/// Returns an error if the structure has no atoms or if the global B-factor
109/// standard deviation is zero (all atoms have the same B-factor).
110pub fn flexibility_score(structure: &Structure) -> Result<Vec<(usize, f64)>> {
111    let residue_bfs = residue_bfactors(structure)?;
112
113    if residue_bfs.is_empty() {
114        return Err(CyaneaError::InvalidInput(
115            "no residues to compute flexibility scores".into(),
116        ));
117    }
118
119    // Compute global mean and std dev from all atoms
120    let mut all_bfactors: Vec<f64> = Vec::new();
121    for chain in &structure.chains {
122        for residue in &chain.residues {
123            for atom in &residue.atoms {
124                all_bfactors.push(atom.temp_factor);
125            }
126        }
127    }
128
129    let global_mean = mean_f64(&all_bfactors);
130    let global_std = std_dev_f64(&all_bfactors, global_mean);
131
132    if global_std < 1e-15 {
133        // All B-factors are identical — Z-scores are all 0
134        return Ok(residue_bfs.iter().map(|(seq, _, _)| (*seq, 0.0)).collect());
135    }
136
137    let result: Vec<(usize, f64)> = residue_bfs
138        .iter()
139        .map(|(seq, _, avg_b)| (*seq, (avg_b - global_mean) / global_std))
140        .collect();
141
142    Ok(result)
143}
144
145// ---- Internal helper functions ----
146
147/// Compute BFactorStats from a mutable slice (sorted in place for median).
148fn compute_stats(values: &mut Vec<f64>) -> BFactorStats {
149    let n = values.len();
150    debug_assert!(n > 0);
151
152    let mean = mean_f64(values);
153    let std_dev = std_dev_f64(values, mean);
154
155    // Sort for min, max, median
156    values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(core::cmp::Ordering::Equal));
157
158    let min = values[0];
159    let max = values[n - 1];
160    let median = if n % 2 == 0 {
161        (values[n / 2 - 1] + values[n / 2]) / 2.0
162    } else {
163        values[n / 2]
164    };
165
166    BFactorStats {
167        mean,
168        std_dev,
169        min,
170        max,
171        median,
172    }
173}
174
175/// Arithmetic mean.
176fn mean_f64(values: &[f64]) -> f64 {
177    if values.is_empty() {
178        return 0.0;
179    }
180    let sum: f64 = values.iter().sum();
181    sum / values.len() as f64
182}
183
184/// Population standard deviation.
185fn std_dev_f64(values: &[f64], mean: f64) -> f64 {
186    if values.len() < 2 {
187        return 0.0;
188    }
189    let variance: f64 = values.iter().map(|v| (v - mean) * (v - mean)).sum::<f64>()
190        / values.len() as f64;
191    variance.sqrt()
192}
193
194#[cfg(test)]
195mod tests {
196    use super::*;
197    use crate::types::{Atom, Chain, Point3D, Residue, Structure};
198    use alloc::vec;
199
200    fn make_atom_with_b(name: &str, b: f64) -> Atom {
201        Atom {
202            serial: 1,
203            name: name.into(),
204            alt_loc: None,
205            coords: Point3D::new(0.0, 0.0, 0.0),
206            occupancy: 1.0,
207            temp_factor: b,
208            element: None,
209            charge: None,
210            is_hetatm: false,
211        }
212    }
213
214    fn test_structure() -> Structure {
215        Structure {
216            id: "TEST".into(),
217            chains: vec![Chain::new(
218                'A',
219                vec![
220                    Residue {
221                        name: "ALA".into(),
222                        seq_num: 1,
223                        i_code: None,
224                        atoms: vec![
225                            make_atom_with_b("N", 10.0),
226                            make_atom_with_b("CA", 12.0),
227                            make_atom_with_b("C", 11.0),
228                            make_atom_with_b("O", 13.0),
229                        ],
230                    },
231                    Residue {
232                        name: "GLY".into(),
233                        seq_num: 2,
234                        i_code: None,
235                        atoms: vec![
236                            make_atom_with_b("N", 20.0),
237                            make_atom_with_b("CA", 22.0),
238                            make_atom_with_b("C", 21.0),
239                            make_atom_with_b("O", 23.0),
240                        ],
241                    },
242                    Residue {
243                        name: "VAL".into(),
244                        seq_num: 3,
245                        i_code: None,
246                        atoms: vec![
247                            make_atom_with_b("N", 5.0),
248                            make_atom_with_b("CA", 6.0),
249                            make_atom_with_b("C", 5.5),
250                            make_atom_with_b("O", 6.5),
251                        ],
252                    },
253                ],
254            )],
255        }
256    }
257
258    #[test]
259    fn residue_bfactors_basic() {
260        let s = test_structure();
261        let bfs = residue_bfactors(&s).unwrap();
262        assert_eq!(bfs.len(), 3);
263
264        // ALA: (10+12+11+13)/4 = 11.5
265        assert!((bfs[0].2 - 11.5).abs() < 1e-10);
266        assert_eq!(bfs[0].1, "ALA");
267
268        // GLY: (20+22+21+23)/4 = 21.5
269        assert!((bfs[1].2 - 21.5).abs() < 1e-10);
270
271        // VAL: (5+6+5.5+6.5)/4 = 5.75
272        assert!((bfs[2].2 - 5.75).abs() < 1e-10);
273    }
274
275    #[test]
276    fn chain_bfactors_basic() {
277        let s = test_structure();
278        let cbs = chain_bfactors(&s).unwrap();
279        assert_eq!(cbs.len(), 1);
280        assert_eq!(cbs[0].0, "A");
281
282        let stats = &cbs[0].1;
283        // All 12 atoms: 10,12,11,13,20,22,21,23,5,6,5.5,6.5
284        // Sum = 155, mean = 155/12 ≈ 12.9167
285        assert!((stats.mean - 155.0 / 12.0).abs() < 1e-4);
286        assert!((stats.min - 5.0).abs() < 1e-10);
287        assert!((stats.max - 23.0).abs() < 1e-10);
288
289        // Median of sorted [5,5.5,6,6.5,10,11,12,13,20,21,22,23]: (11+12)/2 = 11.5
290        assert!((stats.median - 11.5).abs() < 1e-10);
291    }
292
293    #[test]
294    fn flexibility_score_basic() {
295        let s = test_structure();
296        let scores = flexibility_score(&s).unwrap();
297        assert_eq!(scores.len(), 3);
298
299        // GLY (seq=2) should have the highest Z-score (most flexible)
300        let gly_score = scores.iter().find(|(seq, _)| *seq == 2).unwrap().1;
301        // VAL (seq=3) should have the lowest Z-score (most rigid)
302        let val_score = scores.iter().find(|(seq, _)| *seq == 3).unwrap().1;
303        assert!(gly_score > val_score);
304        assert!(gly_score > 0.0); // above mean
305        assert!(val_score < 0.0); // below mean
306    }
307
308    #[test]
309    fn flexibility_identifies_flexible_residue() {
310        let s = test_structure();
311        let scores = flexibility_score(&s).unwrap();
312        // The most flexible residue should be GLY (highest avg B = 21.5)
313        let max_score = scores
314            .iter()
315            .max_by(|a, b| a.1.partial_cmp(&b.1).unwrap())
316            .unwrap();
317        assert_eq!(max_score.0, 2); // GLY at seq_num 2
318    }
319
320    #[test]
321    fn empty_structure_error() {
322        let s = Structure {
323            id: "EMPTY".into(),
324            chains: vec![],
325        };
326        assert!(residue_bfactors(&s).is_err());
327        assert!(chain_bfactors(&s).is_err());
328        assert!(flexibility_score(&s).is_err());
329    }
330
331    #[test]
332    fn multi_chain_bfactors() {
333        let s = Structure {
334            id: "MULTI".into(),
335            chains: vec![
336                Chain::new(
337                    'A',
338                    vec![Residue {
339                        name: "ALA".into(),
340                        seq_num: 1,
341                        i_code: None,
342                        atoms: vec![make_atom_with_b("CA", 10.0)],
343                    }],
344                ),
345                Chain::new(
346                    'B',
347                    vec![Residue {
348                        name: "GLY".into(),
349                        seq_num: 1,
350                        i_code: None,
351                        atoms: vec![make_atom_with_b("CA", 20.0)],
352                    }],
353                ),
354            ],
355        };
356
357        let cbs = chain_bfactors(&s).unwrap();
358        assert_eq!(cbs.len(), 2);
359        assert!((cbs[0].1.mean - 10.0).abs() < 1e-10);
360        assert!((cbs[1].1.mean - 20.0).abs() < 1e-10);
361    }
362
363    #[test]
364    fn uniform_bfactors_zscore_zero() {
365        let s = Structure {
366            id: "UNI".into(),
367            chains: vec![Chain::new(
368                'A',
369                vec![
370                    Residue {
371                        name: "ALA".into(),
372                        seq_num: 1,
373                        i_code: None,
374                        atoms: vec![make_atom_with_b("CA", 15.0)],
375                    },
376                    Residue {
377                        name: "GLY".into(),
378                        seq_num: 2,
379                        i_code: None,
380                        atoms: vec![make_atom_with_b("CA", 15.0)],
381                    },
382                ],
383            )],
384        };
385
386        let scores = flexibility_score(&s).unwrap();
387        for (_, z) in &scores {
388            assert!(z.abs() < 1e-10, "uniform B-factors should give Z=0");
389        }
390    }
391
392    #[test]
393    fn bfactor_stats_std_dev() {
394        // Known values: [2, 4, 4, 4, 5, 5, 7, 9]
395        // Mean = 5.0, Variance = 4.0, StdDev = 2.0
396        let mut vals = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
397        let stats = compute_stats(&mut vals);
398        assert!((stats.mean - 5.0).abs() < 1e-10);
399        assert!((stats.std_dev - 2.0).abs() < 1e-10);
400        assert!((stats.min - 2.0).abs() < 1e-10);
401        assert!((stats.max - 9.0).abs() < 1e-10);
402        assert!((stats.median - 4.5).abs() < 1e-10);
403    }
404}