1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
//! RustSASA is a Rust library for computing the absolute solvent accessible surface area (ASA/SASA) of each atom in a given protein structure using the Shrake-Rupley algorithm[1].
mod consts;
mod test;
mod utils;

use crate::consts::POLAR_AMINO_ACIDS;
use crate::utils::{serialize_chain_id, simd_sum};
use nalgebra::{Point3, Vector3};
use pdbtbx::PDB;
use rayon::prelude::*;
use rstar::{PointDistance, RTree, RTreeObject, AABB};
use snafu::prelude::*;
use snafu::OptionExt;
use std::collections::HashMap;
use std::sync::Arc;

/// This struct represents an individual Atom
#[derive(Clone)]
pub struct Atom {
    /// The 3D position of the atom
    pub position: Point3<f32>,
    /// The Van Der Walls radius of the atom
    pub radius: f32,
    /// A unique Id for the atom
    pub id: usize,
    /// Parent Id
    pub parent_id: Option<isize>,
}

/// Can be used to specify output resolution of SASA computation for convenience.
pub enum SASALevel {
    Atom,
    Residue,
    Chain,
    Protein,
}

#[derive(Debug, PartialEq)]
pub struct ChainResult {
    pub name: String,
    pub value: f32,
}

#[derive(Debug, PartialEq)]
pub struct ResidueResult {
    pub serial_number: isize,
    pub value: f32,
    pub name: String,
    pub is_polar: bool,
}

#[derive(Debug, PartialEq)]
pub struct ProteinResult {
    /// The total SASA value for the entire protein
    pub global_total: f32,
    /// The total polar SASA value for the entire protein
    pub polar_total: f32,
    /// The total *non*-polar SASA value for the entire protein
    pub non_polar_total: f32,
}

#[derive(Debug, PartialEq)]
pub enum SASAResult {
    Atom(Vec<f32>),
    Residue(Vec<ResidueResult>),
    Chain(Vec<ChainResult>),
    Protein(ProteinResult),
}

#[derive(Debug, Snafu)]
pub enum SASACalcError {
    #[snafu(display("Element missing for atom"))]
    ElementMissing,

    #[snafu(display("Van der Waals radius missing for element"))]
    VanDerWaalsMissing,

    #[snafu(display("Failed to map atoms back to level element"))]
    AtomMapToLevelElementFailed,

    #[snafu(display("Failed to get residue name"))]
    FailedToGetResidueName,
}

impl RTreeObject for Atom {
    type Envelope = AABB<[f32; 3]>;

    fn envelope(&self) -> Self::Envelope {
        AABB::from_point(<[f32; 3]>::from(self.position))
    }
}

impl PointDistance for Atom {
    fn distance_2(&self, other: &[f32; 3]) -> f32 {
        let xyz = self.position.coords.xyz();
        let z = xyz[2];
        let y = xyz[1];
        let x = xyz[0];
        // No square root as that is required by the package
        (other[2] - z).mul_add(
            other[2] - z,
            (other[1] - y).mul_add(other[1] - y, (other[0] - x).powi(2)),
        )
    }
}

/// Generates points on a sphere using the Golden Section Spiral algorithm
fn generate_sphere_points(n_points: usize) -> Vec<Vector3<f32>> {
    let mut points = Vec::with_capacity(n_points);
    let golden_ratio = (1.0 + 5f32.sqrt()) / 2.0;
    let angle_increment = 2.0 * std::f32::consts::PI * golden_ratio;

    for i in 0..n_points {
        let t = i as f32 / n_points as f32;
        let inclination = (1.0 - 2.0 * t).acos();
        let azimuth = angle_increment * i as f32;

        let x = inclination.sin() * azimuth.cos();
        let y = inclination.sin() * azimuth.sin();
        let z = inclination.cos();

        points.push(Vector3::new(x, y, z));
    }

    points
}

fn is_accessible_rstar(
    test_point: &Point3<f32>,
    atom: &Atom,
    atoms: &RTree<Atom>,
    probe_radius: f32,
    max_radii: f32,
) -> bool {
    let xyz = test_point.coords.xyz();
    let sr = probe_radius + (max_radii * 2.0);
    let candidates = atoms.locate_within_distance([xyz[0], xyz[1], xyz[2]], sr * sr);
    for candidate in candidates {
        if atom.id != candidate.id
            && (test_point - candidate.position).norm() < (candidate.radius + probe_radius)
        {
            return false;
        }
    }
    true
}

/// Takes the probe radius and number of points to use along with a list of Atoms as inputs and returns a Vec with SASA values for each atom.
/// For most users it is recommend that you use `calculate_sasa` instead. This method can be used directly if you do not want to use pdbtbx to load PDB/mmCIF files or want to load them from a different source.
/// Probe Radius Default: 1.4
/// Point Count Default: 100
/// ## Example using pdbtbx:
/// ```
/// use nalgebra::{Point3, Vector3};
/// use pdbtbx::StrictnessLevel;
/// use rust_sasa::{Atom, calculate_sasa_internal};
/// let (mut pdb, _errors) = pdbtbx::open(
///             "./example.cif",
///             StrictnessLevel::Medium
///         ).unwrap();
/// let mut atoms = vec![];
/// for atom in pdb.atoms() {
///     atoms.push(Atom {
///                 position: Point3::new(atom.pos().0 as f32, atom.pos().1 as f32, atom.pos().2 as f32),
///                 radius: atom.element().unwrap().atomic_radius().van_der_waals.unwrap() as f32,
///                 id: atom.serial_number(),
///                 parent_id: None
///     })
///  }
///  let sasa = calculate_sasa_internal(&atoms, None, None);
/// ```
pub fn calculate_sasa_internal(
    atoms: &[Atom],
    in_probe_radius: Option<f32>,
    in_n_points: Option<usize>,
) -> Vec<f32> {
    // Load defaults if not specified
    let mut probe_radius = 1.4;
    let mut n_points = 100;
    if let Some(in_probe_radius) = in_probe_radius {
        probe_radius = in_probe_radius;
    }
    if let Some(in_n_points) = in_n_points {
        n_points = in_n_points;
    }
    //
    let sphere_points = generate_sphere_points(n_points);

    // Create R*-tree from atoms for spatial lookup
    let tree = RTree::bulk_load(atoms.to_vec());
    let tree_arc = Arc::new(tree); // Use Arc for safe sharing among threads
    let mut max_radii = 0.0;
    for atom in atoms {
        if atom.radius > max_radii {
            max_radii = atom.radius;
        }
    }
    atoms
        .par_iter()
        .map(|atom| {
            let mut accessible_points = 0;

            for sphere_point in &sphere_points {
                let test_point = atom.position + sphere_point * (atom.radius + probe_radius);
                if is_accessible_rstar(&test_point, atom, &tree_arc, probe_radius, max_radii) {
                    accessible_points += 1;
                }
            }
            4.0 * std::f32::consts::PI
                * (atom.radius + probe_radius).powi(2)
                * (accessible_points as f32)
                / (n_points as f32)
        })
        .collect()
}

/// This function calculates the SASA for a given protein. The output level can be specified with the level attribute e.g: (SASALevel::Atom,SASALevel::Residue,etc...).
/// Probe radius and n_points can be customized if not customized will default to 1.4, and 100 respectively.
/// If you want more fine-grained control you may want to use [calculate_sasa_internal] instead.
/// ## Example
/// ```
/// use pdbtbx::StrictnessLevel;
/// use rust_sasa::{Atom, calculate_sasa, calculate_sasa_internal, SASALevel};
/// let (mut pdb, _errors) = pdbtbx::open(
///             "./example.cif",
///             StrictnessLevel::Medium
/// ).unwrap();
/// let result = calculate_sasa(&pdb,None,None,SASALevel::Residue);
/// ```
pub fn calculate_sasa(
    pdb: &PDB,
    probe_radius: Option<f32>,
    n_points: Option<usize>,
    level: SASALevel,
) -> Result<SASAResult, SASACalcError> {
    let mut atoms = vec![];
    let mut parent_to_atoms = HashMap::new();
    match level {
        SASALevel::Atom => {
            for atom in pdb.atoms() {
                atoms.push(Atom {
                    position: Point3::new(
                        atom.pos().0 as f32,
                        atom.pos().1 as f32,
                        atom.pos().2 as f32,
                    ),
                    radius: atom
                        .element()
                        .context(ElementMissingSnafu)?
                        .atomic_radius()
                        .van_der_waals
                        .context(VanDerWaalsMissingSnafu)? as f32,
                    id: atom.serial_number(),
                    parent_id: None,
                })
            }
        }
        SASALevel::Residue | SASALevel::Protein => {
            let mut i = 0;
            for residue in pdb.residues() {
                let mut temp = vec![];
                for atom in residue.atoms() {
                    atoms.push(Atom {
                        position: Point3::new(
                            atom.pos().0 as f32,
                            atom.pos().1 as f32,
                            atom.pos().2 as f32,
                        ),
                        radius: atom
                            .element()
                            .context(ElementMissingSnafu)?
                            .atomic_radius()
                            .van_der_waals
                            .context(VanDerWaalsMissingSnafu)?
                            as f32,
                        id: atom.serial_number(),
                        parent_id: Some(residue.serial_number()),
                    });
                    temp.push(i);
                    i += 1;
                }
                parent_to_atoms.insert(residue.serial_number(), temp);
            }
        }
        SASALevel::Chain => {
            let mut i = 0;
            for chain in pdb.chains() {
                let mut temp = vec![];
                let chain_id = serialize_chain_id(chain.id());
                for atom in chain.atoms() {
                    atoms.push(Atom {
                        position: Point3::new(
                            atom.pos().0 as f32,
                            atom.pos().1 as f32,
                            atom.pos().2 as f32,
                        ),
                        radius: atom
                            .element()
                            .context(ElementMissingSnafu)?
                            .atomic_radius()
                            .van_der_waals
                            .context(VanDerWaalsMissingSnafu)?
                            as f32,
                        id: atom.serial_number(),
                        parent_id: Some(chain_id),
                    });
                    temp.push(i);
                    i += 1
                }
                parent_to_atoms.insert(chain_id, temp);
            }
        }
    }
    let atom_sasa = calculate_sasa_internal(&atoms, probe_radius, n_points);
    return match level {
        SASALevel::Atom => Ok(SASAResult::Atom(atom_sasa)),
        SASALevel::Chain => {
            let mut chain_sasa = vec![];
            for chain in pdb.chains() {
                let chain_id = serialize_chain_id(chain.id());
                let chain_atom_index = parent_to_atoms
                    .get(&chain_id)
                    .context(AtomMapToLevelElementFailedSnafu)?;
                let chain_atoms: Vec<_> = chain_atom_index
                    .iter()
                    .map(|&index| atom_sasa[index])
                    .collect();
                let sum = simd_sum(chain_atoms.as_slice());
                chain_sasa.push(ChainResult {
                    name: chain.id().to_string(),
                    value: sum,
                })
            }
            Ok(SASAResult::Chain(chain_sasa))
        }
        SASALevel::Residue => {
            let mut residue_sasa = vec![];
            for residue in pdb.residues() {
                let residue_atom_index = parent_to_atoms
                    .get(&residue.serial_number())
                    .context(AtomMapToLevelElementFailedSnafu)?;
                let residue_atoms: Vec<_> = residue_atom_index
                    .iter()
                    .map(|&index| atom_sasa[index])
                    .collect();
                let sum = simd_sum(residue_atoms.as_slice());
                let name = residue
                    .name()
                    .context(FailedToGetResidueNameSnafu)?
                    .to_string();
                residue_sasa.push(ResidueResult {
                    serial_number: residue.serial_number(),
                    value: sum,
                    is_polar: POLAR_AMINO_ACIDS.contains(&name),
                    name,
                })
            }
            Ok(SASAResult::Residue(residue_sasa))
        }
        SASALevel::Protein => {
            let mut polar_total: f32 = 0.0;
            let mut non_polar_total: f32 = 0.0;
            for residue in pdb.residues() {
                let residue_atom_index = parent_to_atoms
                    .get(&residue.serial_number())
                    .context(AtomMapToLevelElementFailedSnafu)?;
                let residue_atoms: Vec<_> = residue_atom_index
                    .iter()
                    .map(|&index| atom_sasa[index])
                    .collect();
                let sum = simd_sum(residue_atoms.as_slice());
                let name = residue
                    .name()
                    .context(FailedToGetResidueNameSnafu)?
                    .to_string();
                if POLAR_AMINO_ACIDS.contains(&name) {
                    polar_total += sum
                } else {
                    non_polar_total += sum
                }
            }
            let global_sum = simd_sum(atom_sasa.as_slice());
            Ok(SASAResult::Protein(ProteinResult {
                global_total: global_sum,
                polar_total,
                non_polar_total,
            }))
        }
    };
}