dynamics 0.1.8

Molecular dynamics
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
//! Contains setup code, including applying forcefield data to our specific
//! atoms.

// Notes to square away the 3 "atom name" / "Amber atom type" / "force field type" keys.
// This guide shows Type 1. https://emleddin.github.io/comp-chem-website/AMBERguide-AMBER-atom-types.html,
//
// Update: "Type 1" = "type_in_res" in our code now. "Type 2" = "ff_type" for AAs, and "Type 3" = "ff_type" for small mols.
//
// Type 1 Examples: "CA", "HA", "CZ", "HB3", "HH22", HZ2", "N", "H", "HG3", "O", "CD", "C", "HG23", "CG", "CB", "CG1", "HE2", "HB3",
// Type 1 Sources: `amino19.lib`, col 0. mmCIF atom coordinate files.
//
// Type 2 Examples:  "HC", "C8", "HC", "H"(both), "XC", "N"(both), "H"(both), "H1", "CT", "OH", "HO", "2C",
// Type 2 Sources: `amino19.lib` (AA/protein partial charges), col 1. `frcmod.ff19SB`. (AA/protein params)
//
// Small Mol/lig:
// Type 3 Examples: "oh", "h1", "ca", "o", "os", "c6", "n3", "c3"
// Type 3 Sources: `.mol2` generated by Amber. (Small mol coordinates and partial charges) `gaff2.dat` (Small molg params)
//
// MOl2 for ligands also have "C10", "O7" etc, which is ambiguous here, and not required, as their params
// use Type 3, which is present. It's clear what to do for ligand
//
// Best guess: Type 1 identifies labels within the residue only. Type 2 (AA) and Type 3 (small mol) are the FF types.

use std::{collections::HashSet, fmt};

#[cfg(feature = "encode")]
use bincode::{Decode, Encode};
use bio_files::{
    gromacs::mdp::{ConstraintAlgorithm, Constraints},
    md_params::{AngleBendingParams, BondStretchingParams, ForceFieldParams, LjParams, MassParams},
};
use itertools::Itertools;
use na_seq::Element::Hydrogen;

use crate::{
    AtomDynamics, MdState, ParamError, bonded::SHAKE_TOL_DEFAULT, params::ForceFieldParamsIndexed,
};

/// Add items from one parameter set to the other. If there are duplicates, the second set's overrides
/// the baseline.
pub fn merge_params(baseline: &ForceFieldParams, add_this: &ForceFieldParams) -> ForceFieldParams {
    let mut merged = baseline.clone();

    merged.mass.extend(add_this.mass.clone());
    merged.lennard_jones.extend(add_this.lennard_jones.clone());

    merged.bond.extend(add_this.bond.clone());
    merged.angle.extend(add_this.angle.clone());
    merged.dihedral.extend(add_this.dihedral.clone());
    merged.improper.extend(add_this.improper.clone());

    merged
}

/// We use this variant in the configuration API. Deferrs to `HydrogenConstraintInner` for holding
/// constraints.
#[cfg_attr(feature = "encode", derive(Encode, Decode))]
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum HydrogenConstraint {
    /// A linear constraint solver. Similar to GROMACS' LINCS. This is slightly faster,
    /// and slightly more stable than SHAKE.
    Linear {
        order: u8,
        iter: u8,
    },
    Shake {
        shake_tolerance: f32,
    },
    /// Uses Shake and Rattle to fix the hydrogen positions. This allows for a larger timestep,
    /// e.g. 2fs instead of 1fs.
    /// Uses the same bonded parameters as elsewhere: A spring model
    Flexible,
}

impl Default for HydrogenConstraint {
    fn default() -> Self {
        // HydrogenConstraint::Shake {
        //     shake_tolerance: SHAKE_TOL_DEFAULT,
        // }
        // GROMACS defaults here.
        HydrogenConstraint::Linear { order: 4, iter: 1 }
    }
}

impl HydrogenConstraint {
    pub fn to_gromacs(&self) -> Constraints {
        match self {
            Self::Linear { order, iter } => Constraints::HBonds(ConstraintAlgorithm::Lincs {
                order: *order,
                iter: *iter,
            }),
            Self::Shake {
                shake_tolerance: tolerance,
            } => Constraints::HBonds(ConstraintAlgorithm::Shake { tol: *tolerance }),
            Self::Flexible => Constraints::None,
        }
    }
}

impl fmt::Display for HydrogenConstraint {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            HydrogenConstraint::Linear { .. } => {
                write!(f, "Constrained (LINCS)")
            }
            HydrogenConstraint::Shake { shake_tolerance: _ } => {
                write!(f, "Constrained (SHAKE)")
            }
            HydrogenConstraint::Flexible => write!(f, "Flexible"),
        }
    }
}

/// Associate loaded Force field data (e.g. from Amber) into the atom indices used in a specific
/// dynamics sim. This handles combining general and molecule-specific parameter sets, and converting
/// between atom name, and the specific indices of the atoms we're using.
///
/// This code is straightforward if params are available; much of the logic here is related to handling
/// missing parameters.
impl ForceFieldParamsIndexed {
    pub fn new(
        params: &ForceFieldParams,
        // params_specific: Option<&ForceFieldParams>,
        // atoms: &[AtomGeneric],
        atoms: &[AtomDynamics],
        // bonds: &[BondGeneric],
        adjacency_list: &[Vec<usize>],
        // Mutable, since we load the hydrogen r0s into it, instead of adding bond stretching params
        // in case of fixed hydrogen.
        // h_constraints: &mut HydrogenConstraintInner,
        h_constraint: HydrogenConstraint,
        // allow_missing_dihedral_params: bool,
    ) -> Result<Self, ParamError> {
        let mut result = Self::default();

        for (i, atom) in atoms.iter().enumerate() {
            let ff_type = &atom.force_field_type;

            // Mass
            if let Some(mass) = params.mass.get(ff_type) {
                result.mass.insert(i, mass.clone());
            } else if ff_type.starts_with("C") {
                match params.mass.get("C") {
                    Some(m) => {
                        result.mass.insert(i, m.clone());
                        println!("Using C fallback mass for {ff_type}");
                    }
                    None => {
                        return Err(ParamError::new(&format!(
                            "\nMD failure: Missing mass params for {ff_type}"
                        )));
                    }
                }
            } else if ff_type.starts_with("N") {
                match params.mass.get("N") {
                    Some(m) => {
                        result.mass.insert(i, m.clone());
                        println!("Using N fallback mass for {ff_type}");
                    }
                    None => {
                        return Err(ParamError::new(&format!(
                            "\nMD failure: Missing mass params for {ff_type}"
                        )));
                    }
                }
            } else if ff_type.starts_with("O") {
                match params.mass.get("O") {
                    Some(m) => {
                        result.mass.insert(i, m.clone());
                        println!("Using O fallback mass for {ff_type}");
                    }
                    None => {
                        return Err(ParamError::new(&format!(
                            "\nMD failure: Missing mass params for {ff_type}"
                        )));
                    }
                }
            } else {
                result.mass.insert(
                    i,
                    MassParams {
                        atom_type: "".to_string(),
                        mass: atom.element.atomic_weight(),
                        comment: None,
                    },
                );

                println!("\nMissing mass params on {atom}; using element default.");

                // return Err(ParamError::new(&format!(
                //     "MD failure: Missing mass params for {ff_type}"
                // )));
            }

            // Lennard-Jones / van der Waals
            if let Some(vdw) = params.lennard_jones.get(ff_type) {
                result.lennard_jones.insert(i, vdw.clone());
                // If the key is missing for the given FF type in our loaded data, check for certain
                // special cases.
            } else {
                // The mass values for all 4 of these are present in frcmod.ff19sb.
                if ff_type == "2C" || ff_type == "3C" || ff_type == "C8" {
                    result
                        .lennard_jones
                        .insert(i, params.lennard_jones.get("CT").unwrap().clone());
                } else if ff_type == "CO" {
                    result
                        .lennard_jones
                        .insert(i, params.lennard_jones.get("C").unwrap().clone());
                } else if ff_type == "OXT" {
                    result
                        .lennard_jones
                        .insert(i, params.lennard_jones.get("O2").unwrap().clone());
                } else if ff_type.starts_with("N") {
                    result
                        .lennard_jones
                        .insert(i, params.lennard_jones.get("N").unwrap().clone());
                    println!("Using N fallback VdW for {atom}");
                } else if ff_type.starts_with("O") {
                    result
                        .lennard_jones
                        .insert(i, params.lennard_jones.get("O").unwrap().clone());
                    println!("Using O fallback LJ for {atom}");
                } else {
                    println!("\nMissing LJ params for {atom}; setting to 0.");
                    // 0. no interaction.
                    // todo: If this is "CG" etc, fall back to other carbon params instead.
                    result.lennard_jones.insert(
                        i,
                        LjParams {
                            atom_type: "".to_string(),
                            sigma: 0.,
                            eps: 0.,
                        },
                    );
                }

                // return Err(ParamError::new(&format!(
                //     "MD failure: Missing Van der Waals params for {ff_type}"
                // )));
            }
        }

        // Map from serial number fo index, for bonds and the atoms they point ot.
        // let mut index_map = HashMap::new();
        // for (i, atom) in atoms.iter().enumerate() {
        //     index_map.insert(atom.serial_number, i);
        // }

        // Bond lengths.
        // for bond in bonds {
        for (i0, neighbors) in adjacency_list.iter().enumerate() {
            for &i1 in neighbors {
                if i0 >= i1 {
                    continue; // Only add each bond once.
                }

                let type_0 = &atoms[i0].force_field_type;
                let type_1 = &atoms[i1].force_field_type;

                let data = params.get_bond(&(type_0.clone(), type_1.clone()), true);

                let Some(data) = data else {
                    // todo: Consider removing this, and return an error.
                    eprintln!(
                        "\nMissing bond parameters for {type_0}-{type_1} on {} - {}. Using a safe default.",
                        atoms[i0], atoms[i1]
                    );
                    result.bond_stretching.insert(
                        (i0.min(i1), i0.max(i1)),
                        BondStretchingParams {
                            atom_types: (String::new(), String::new()),
                            k_b: 300.,
                            r_0: (atoms[i0].posit - atoms[i1].posit).magnitude(),
                            comment: None,
                        },
                    );
                    continue;
                };
                let data = data.clone();

                // If using fixed hydrogens, don't add these to our bond stretching params;
                // add to a separate hydrogen rigid param variable.
                if matches!(
                    h_constraint,
                    HydrogenConstraint::Shake { shake_tolerance: _ }
                        | HydrogenConstraint::Linear { .. }
                ) && (atoms[i0].element == Hydrogen || atoms[i1].element == Hydrogen)
                {
                    // Set up inverse mass using params directly, so we don't have to have
                    // mass loaded into the atom directly yet.
                    let ff_type_0 = &atoms[i0].force_field_type;
                    let ff_type_1 = &atoms[i1].force_field_type;

                    // Mass
                    let (Some(mass_0), Some(mass_1)) =
                        (params.mass.get(ff_type_0), params.mass.get(ff_type_1))
                    else {
                        return Err(ParamError::new(&format!(
                            "MD failure: Missing mass params for {ff_type_0} or {ff_type_1}"
                        )));
                    };

                    // `bonds_topology` exists separately from `bond_params` specifically so we can
                    // account for bonds to H in exclusions.
                    // We will populate inverse mass in a second loop.
                    let inv_mass = 1. / mass_0.mass + 1. / mass_1.mass;

                    result
                        .bond_rigid_constraints
                        .insert((i0, i1), (data.r_0.powi(2), inv_mass));
                    result.bonds_topology.insert((i0, i1));
                    continue;
                }

                result.bond_stretching.insert((i0, i1), data);
                result.bonds_topology.insert((i0, i1));
            }
        }

        // Valence angles: Every connection between 3 atoms bonded linearly.
        for (ctr, neighbors) in adjacency_list.iter().enumerate() {
            if neighbors.len() < 2 {
                continue;
            }
            for (&n0, &n1) in neighbors.iter().tuple_combinations() {
                let type_n0 = &atoms[n0].force_field_type;
                let type_ctr = &atoms[ctr].force_field_type;
                let type_n1 = &atoms[n1].force_field_type;

                let data = params
                    .get_valence_angle(&(type_n0.clone(), type_ctr.clone(), type_n1.clone()), true);

                let Some(data) = data else {
                    // This comes up with the Hydrogen bound to NB in His. I don't know what to make of it.
                    // I'm not sure exactly why I can't find this CR-NB-H angle, but
                    // try subbing the NA variant:
                    // "CR-NA-H     50.0      120.00    AA his,    changed based on NMA nmodes"
                    if (type_n0 == "H" || type_n1 == "H")
                        && type_ctr == "NB"
                        && (type_n0 == "CR"
                            || type_n1 == "CR"
                            || type_n0 == "CV"
                            || type_n1 == "CV")
                    {
                        // todo: Get to the bottom of this.
                        println!(
                            "His HB: Skipping valence angle. For now, inserting a dummy one with no force."
                        );
                        result.angle.insert(
                            (n0, ctr, n1),
                            AngleBendingParams {
                                atom_types: (String::new(), String::new(), String::new()),
                                k: 0.,
                                theta_0: 0.,
                                comment: None,
                            },
                        );
                        continue;

                        // if let Some(v) = params.get_valence_angle(&(
                        //     type_n0.clone(),
                        //     "NA".to_string(),
                        //     type_n1.clone(),
                        // )) {
                        //     println!("Substituting NA for NB in valence angle params");
                        //     result.angle.insert((n0, ctr, n1), v.clone());
                        //     continue;
                        // }
                    }

                    return Err(ParamError::new(&format!(
                        "\nMD failure: Missing valence angle params for {type_n0}-{type_ctr}-{type_n1}. (sns: {} - {} - {})",
                        atoms[n0].serial_number, atoms[ctr].serial_number, atoms[n1].serial_number,
                    )));
                    // }
                };
                let data = data.clone();

                result.angle.insert((n0, ctr, n1), data);
            }
        }

        // Proper and improper dihedral angles.
        let mut seen = HashSet::<(usize, usize, usize, usize)>::new();

        // Proper dihedrals: Atoms 1-2-3-4 bonded linearly
        for (i1, nbr_j) in adjacency_list.iter().enumerate() {
            for &i2 in nbr_j {
                if i1 >= i2 {
                    continue;
                } // handle each j-k bond once

                for &i0 in adjacency_list[i1].iter().filter(|&&x| x != i2) {
                    for &i3 in adjacency_list[i2].iter().filter(|&&x| x != i1) {
                        if i0 == i3 {
                            continue;
                        }

                        // Canonicalise so (i1, i2) is always (min, max)
                        let idx_key = if i1 < i2 {
                            (i0, i1, i2, i3)
                        } else {
                            (i3, i2, i1, i0)
                        };
                        if !seen.insert(idx_key) {
                            continue;
                        }

                        let type_0 = &atoms[i0].force_field_type;
                        let type_1 = &atoms[i1].force_field_type;
                        let type_2 = &atoms[i2].force_field_type;
                        let type_3 = &atoms[i3].force_field_type;

                        let key = (
                            type_0.clone(),
                            type_1.clone(),
                            type_2.clone(),
                            type_3.clone(),
                        );

                        if let Some(dihes) = params.get_dihedral(&key, true, true) {
                            let mut dihes = dihes.clone();

                            for d in &mut dihes {
                                // Divide here; then don't do it during the dynamics run. Optimization.
                                d.barrier_height /= d.divider as f32;
                                d.divider = 1;
                            }
                            result.dihedral.insert(idx_key, dihes);
                        // }
                        // else if allow_missing_dihedral_params {
                        //     // Default of no constraint
                        //     result.dihedral.insert(
                        //         idx_key,
                        //         vec![DihedralParams {
                        //             atom_types: key,
                        //             divider: 1,
                        //             barrier_height: 0.,
                        //             phase: 0.,
                        //             periodicity: 1,
                        //             comment: None,
                        //         }],
                        //     );
                        } else {
                            return Err(ParamError::new(&format!(
                                "\nMD failure: Missing dihedral params for {type_0}-{type_1}-{type_2}-{type_3}. (atom0 sn: {})",
                                atoms[i0].serial_number
                            )));
                        }
                    }
                }
            }
        }

        // Improper dihedrals 2-1-3-4. Atom 3 is the hub, with the other 3 atoms bonded to it.
        // The order of the others in the angle calculation affects the sign of the result.
        // Generally only for planar configs.
        //
        // Note: The sattelites are expected to be in alphabetical order, re their FF types.
        // So, for the hub of "ca" with sattelites of "ca", "ca", and "os", the correct combination
        // to look for in the params is "ca-ca-ca-os"
        for (ctr, satellites) in adjacency_list.iter().enumerate() {
            if satellites.len() < 3 {
                continue;
            }

            // Unique unordered triples of neighbours
            for a in 0..satellites.len() - 2 {
                for b in a + 1..satellites.len() - 1 {
                    for d in b + 1..satellites.len() {
                        let (sat0, sat1, sat2) = (satellites[a], satellites[b], satellites[d]);

                        let idx_key = (sat0, sat1, ctr, sat2); // Order is fixed; no swap
                        if !seen.insert(idx_key) {
                            continue;
                        }

                        let type_0 = &atoms[sat0].force_field_type;
                        let type_1 = &atoms[sat1].force_field_type;
                        let type_ctr = &atoms[ctr].force_field_type;
                        let type_2 = &atoms[sat2].force_field_type;

                        // Sort satellites alphabetically; required to ensure we don't miss combinations.
                        let mut sat_types = [type_0.clone(), type_1.clone(), type_2.clone()];
                        sat_types.sort();

                        let key = (
                            sat_types[0].clone(),
                            sat_types[1].clone(),
                            type_ctr.clone(),
                            sat_types[2].clone(),
                        );

                        // In the case of improper, unlike all other param types, we are allowed to
                        // have missing values. Impropers are only, by Amber convention, for planar
                        // hub and spoke setups, so non-planar ones will be omitted. These may occur,
                        // for example, at ring intersections.
                        if let Some(dihes) = params.get_dihedral(&key, false, true) {
                            let mut dihes = dihes.clone();
                            for d in &mut dihes {
                                // Generally, there is no divisor for impropers, but set it up here
                                // to be more general.
                                d.barrier_height /= d.divider as f32;
                                d.divider = 1;
                            }
                            result.improper.insert(idx_key, dihes);
                        }
                    }
                }
            }
        }

        Ok(result)
    }
}

impl MdState {
    /// We use this to set up optimizations defined in the Amber reference manual. `excluded` deals
    /// with sections were we skip coulomb and Vdw interactions for atoms separated by 1 or 2 bonds. `scaled14` applies a force
    /// scaler for these interactions, when separated by 3 bonds.
    pub(crate) fn setup_nonbonded_exclusion_scale_flags(&mut self) {
        // Helper to store pairs in canonical (low,high) order
        let push = |set: &mut HashSet<(usize, usize)>, i: usize, j: usize| {
            if i < j {
                set.insert((i, j));
            } else {
                set.insert((j, i));
            }
        };

        // 1-2
        for indices in &self.force_field_params.bonds_topology {
            push(&mut self.pairs_excluded_12_13, indices.0, indices.1);
        }

        // 1-3
        for indices in self.force_field_params.angle.keys() {
            push(&mut self.pairs_excluded_12_13, indices.0, indices.2);
        }

        // 1-4. We do not count improper dihedrals here.
        for indices in self.force_field_params.dihedral.keys() {
            push(&mut self.pairs_14_scaled, indices.0, indices.3);
        }

        // Make sure no 1-4 pair is also in the excluded set
        for p in &self.pairs_14_scaled {
            self.pairs_excluded_12_13.remove(p);
        }
    }
}