dynamics 0.1.9

Molecular dynamics
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
//! For Amber and other parameters.

use std::{
    collections::{HashMap, HashSet},
    io,
    path::PathBuf,
};

use bio_files::{
    AtomGeneric, BondGeneric, ChainGeneric, LipidStandard, MmCif, ResidueEnd, ResidueGeneric,
    ResidueType, create_bonds,
    md_params::{
        AngleBendingParams, BondStretchingParams, ChargeParams, ChargeParamsProtein,
        DihedralParams, ForceFieldParams, LjParams, MassParams, NucleotideTemplate,
        load_amino_charges, parse_lib_lipid, parse_lib_nucleic_acid, parse_lib_peptide,
    },
};
use na_seq::{AminoAcid, AminoAcidGeneral, AminoAcidProtenationVariant, AtomTypeInRes, Element};

use crate::{Dihedral, ParamError, merge_params, populate_hydrogens_dihedrals};

pub type ProtFfChargeMap = HashMap<AminoAcidGeneral, Vec<ChargeParamsProtein>>;
pub type LipidFfChargeMap = HashMap<LipidStandard, Vec<ChargeParams>>;
pub type NucleicAcidFfChargeMap = HashMap<NucleotideTemplate, Vec<ChargeParams>>;

// We include Amber parameter files with this package.
// Proteins and amino acids:
const PARM_19: &str = include_str!("../param_data/parm19.dat"); // Bonded, and LJ
const FRCMOD_FF19SB: &str = include_str!("../param_data/frcmod.ff19SB"); // Bonded, and LJ: overrides and new types
pub const AMINO_19: &str = include_str!("../param_data/amino19.lib"); // Charge; internal residues
const AMINO_NT12: &str = include_str!("../param_data/aminont12.lib"); // Charge; protonated N-terminus residues
const AMINO_CT12: &str = include_str!("../param_data/aminoct12.lib"); // Charge; protonated C-terminus residues

// Ligands/small organic molecules: *General Amber Force Fields*.
const GAFF2: &str = include_str!("../param_data/gaff2.dat");
// Lipids
const LIPID_21: &str = include_str!("../param_data/lipid21.dat"); // Bonded and LJ

// Public, so we can use it for lipid templates.
pub const LIPID_21_LIB: &str = include_str!("../param_data/lipid21.lib"); // Charge and FF names

// DNA (OL24) and RNA (OL3)
pub const OL24_LIB: &str = include_str!("../param_data/ff-nucleic-OL24.lib");
const OL24_FRCMOD: &str = include_str!("../param_data/ff-nucleic-OL24.frcmod");
// todo: frcmod.protonated_nucleic?
// RNA (I believe this is the OL3 Amber's FF page recommends?)
pub const RNA_LIB: &str = include_str!("../param_data/RNA.lib");
// todo: RNA.YIL.lib? RNA_CI.lib? RNA_Shaw.lib? These are, I believe, "alternative" libraries,
// todo, and not required. YIL: Yildirim torsion refit. CI: Legacy Cornell-style. SHAW: incomplete,
// todo from a person named Shaw.

// Note: Water parameters are concise; we store them directly.

#[derive(Default, Debug)]
/// A set of general parameters that aren't molecule-specific. E.g. from GAFF2, OL3, RNA, or amino19.
/// These are used as a baseline, and in some cases, overridden by molecule-specific parameters.
pub struct FfParamSet {
    pub peptide: Option<ForceFieldParams>,
    pub small_mol: Option<ForceFieldParams>,
    pub dna: Option<ForceFieldParams>,
    pub rna: Option<ForceFieldParams>,
    pub lipids: Option<ForceFieldParams>,
    pub carbohydrates: Option<ForceFieldParams>,
    /// In addition to charge, this also contains the mapping of res type to FF type; required to map
    /// other parameters to protein atoms. E.g. from `amino19.lib`, and its N and C-terminus variants.
    pub peptide_ff_q_map: Option<ProtFfChargeMapSet>,
    pub lipid_ff_q_map: Option<LipidFfChargeMap>,
    // todo: QC these types; lipid as place holder. See how they parse.
    pub dna_ff_q_map: Option<NucleicAcidFfChargeMap>,
    pub rna_ff_q_map: Option<NucleicAcidFfChargeMap>,
}

/// Paths for to general parameter files. Used to create a FfParamSet.
#[derive(Clone, Debug, Default)]
pub struct ParamGeneralPaths {
    /// E.g. parm19.dat
    pub peptide: Option<PathBuf>,
    /// E.g. ff19sb.dat
    pub peptide_mod: Option<PathBuf>,
    /// E.g. amino19.lib
    pub peptide_ff_q: Option<PathBuf>,
    /// E.g. aminoct12.lib
    pub peptide_ff_q_c: Option<PathBuf>,
    /// E.g. aminont12.lib
    pub peptide_ff_q_n: Option<PathBuf>,
    /// e.g. gaff2.dat
    pub small_organic: Option<PathBuf>,
    /// e.g. ff-nucleic-OL24.lib
    pub dna: Option<PathBuf>,
    /// e.g. ff-nucleic-OL24.frcmod
    pub dna_mod: Option<PathBuf>,
    /// e.g. RNA.lib
    pub rna: Option<PathBuf>,
    pub lipid: Option<PathBuf>,
    pub carbohydrate: Option<PathBuf>,
}

impl FfParamSet {
    /// Load general parameter files for proteins, and small organic molecules.
    /// This also populates ff type and charge for protein atoms.
    pub fn new(paths: &ParamGeneralPaths) -> io::Result<Self> {
        let mut result = FfParamSet::default();

        if let Some(p) = &paths.peptide {
            let peptide = ForceFieldParams::load_dat(p)?;

            if let Some(p_mod) = &paths.peptide_mod {
                let frcmod = ForceFieldParams::load_frcmod(p_mod)?;
                result.peptide = Some(merge_params(&peptide, &frcmod));
            } else {
                result.peptide = Some(peptide);
            }
        }

        let mut ff_map = ProtFfChargeMapSet::default();
        if let Some(p) = &paths.peptide_ff_q {
            ff_map.internal = load_amino_charges(p)?;
        }
        if let Some(p) = &paths.peptide_ff_q_c {
            ff_map.internal = load_amino_charges(p)?;
        }
        if let Some(p) = &paths.peptide_ff_q_n {
            ff_map.internal = load_amino_charges(p)?;
        }

        result.peptide_ff_q_map = Some(ff_map);

        if let Some(p) = &paths.small_organic {
            result.small_mol = Some(ForceFieldParams::load_dat(p)?);
        }

        if let Some(p) = &paths.dna {
            let peptide = ForceFieldParams::load_dat(p)?;

            if let Some(p_mod) = &paths.dna_mod {
                let frcmod = ForceFieldParams::load_frcmod(p_mod)?;
                result.dna = Some(merge_params(&peptide, &frcmod));
            } else {
                result.dna = Some(peptide);
            }
        }

        if let Some(p) = &paths.rna {
            result.rna = Some(ForceFieldParams::load_dat(p)?);
        }

        if let Some(p) = &paths.lipid {
            result.lipids = Some(ForceFieldParams::load_dat(p)?);
        }

        if let Some(p) = &paths.carbohydrate {
            result.carbohydrates = Some(ForceFieldParams::load_dat(p)?);
        }

        Ok(result)
    }

    /// Create a parameter set using Amber parameters included with this library. This uses
    /// the param sets recommended by Amber, CAO Sept 2025: ff19SB, OL24, OL3, GLYCAM_06j, lipids21,
    /// and gaff2.
    pub fn new_amber() -> io::Result<Self> {
        let mut result = FfParamSet::default();

        // We use parm19 for both peptides, and nucleic acids.
        let parm19 = ForceFieldParams::from_dat(PARM_19)?;

        let peptide_frcmod = ForceFieldParams::from_frcmod(FRCMOD_FF19SB)?;
        result.peptide = Some(merge_params(&parm19, &peptide_frcmod));

        {
            let internal = parse_lib_peptide(AMINO_19)?;
            let n_terminus = parse_lib_peptide(AMINO_NT12)?;
            let c_terminus = parse_lib_peptide(AMINO_CT12)?;

            result.peptide_ff_q_map = Some(ProtFfChargeMapSet {
                internal,
                n_terminus,
                c_terminus,
            });
        }

        let lipid_dat = ForceFieldParams::from_dat(LIPID_21)?;
        result.lipids = Some(lipid_dat);

        let lipid_charges = parse_lib_lipid(LIPID_21_LIB)?;
        result.lipid_ff_q_map = Some(lipid_charges);

        result.small_mol = Some(ForceFieldParams::from_dat(GAFF2)?);

        // todo: Load these, and get them working. They currently trigger a mass-parsing error.
        // todo: You must update your Lib parser in bio_files to handle this variant.

        let dna_frcmod = ForceFieldParams::from_frcmod(OL24_FRCMOD)?;
        result.dna = Some(merge_params(&parm19, &dna_frcmod));

        // todo: A/R
        result.rna = Some(parm19.clone());

        // todo: Currently hardcoded peptide/lipid versions for this lib parsing. Generalize?
        let dna_charges = parse_lib_nucleic_acid(OL24_LIB)?;
        result.dna_ff_q_map = Some(dna_charges);

        // todo: Currently hardcoded peptide/lipid versions for this lib parsing. Generalize?
        let rna_charges = parse_lib_nucleic_acid(RNA_LIB)?;
        result.rna_ff_q_map = Some(rna_charges);

        Ok(result)
    }
}

/// This variant of forcefield parameters offers the fastest lookups. Unlike the Vec and Hashmap
/// based parameter structs, this is specific to the atom in our docking setup: The indices are provincial
/// to specific sets of atoms. For a description of fields, see `ForceFieldParams`, or the individual
/// param-type structs here.
///
/// Note: The single-atom fields of `mass` and `partial_charges` are omitted: They're part of our
/// `AtomDynamics` struct.`
#[derive(Clone, Debug, Default)]
pub(crate) struct ForceFieldParamsIndexed {
    pub mass: HashMap<usize, MassParams>,
    pub bond_stretching: HashMap<(usize, usize), BondStretchingParams>,
    /// Any bond to Hydrogen if configured as constrained. (Distance^2 in Ã…, 1 / mass in Daltons)
    pub bond_rigid_constraints: HashMap<(usize, usize), (f32, f32)>,
    pub angle: HashMap<(usize, usize, usize), AngleBendingParams>,
    pub dihedral: HashMap<(usize, usize, usize, usize), Vec<DihedralParams>>,
    pub improper: HashMap<(usize, usize, usize, usize), Vec<DihedralParams>>,
    /// We use this to determine which 1-2 exclusions to apply for non-bonded forces. We use this
    /// instead of `bond_stretching`, because `bond_stretching` omits bonds to Hydrogen, which we need
    /// to account when applying exclusions.
    pub bonds_topology: HashSet<(usize, usize)>,
    pub lennard_jones: HashMap<usize, LjParams>,
}

#[derive(Clone, Default, Debug)]
/// Maps type-in-residue (found in, e.g. mmCIF and PDB files) to Amber FF type, and partial charge.
/// We assume that if one of these is loaded, so are the others. So, these aren't `Options`s, but
/// the field that holds this struct should be one.
pub struct ProtFfChargeMapSet {
    pub internal: ProtFfChargeMap,
    pub n_terminus: ProtFfChargeMap,
    pub c_terminus: ProtFfChargeMap,
}

/// Populate forcefield type, and partial charge on atoms. This should be run on mmCIF
/// files prior to running molecular dynamics on them. These files from RCSB PDB do not
/// natively have this data.
///
/// `residues` must be the full set; this is relevant to how we index it.
pub fn populate_peptide_ff_and_q(
    atoms: &mut [AtomGeneric],
    residues: &[ResidueGeneric],
    ff_type_charge: &ProtFfChargeMapSet,
) -> Result<(), ParamError> {
    // Tis is slower than if we had an index map already.
    let mut index_map = HashMap::new();
    for (i, atom) in atoms.iter().enumerate() {
        index_map.insert(atom.serial_number, i);
    }

    for res in residues {
        for sn in &res.atom_sns {
            let atom = match atoms.get_mut(index_map[sn]) {
                Some(a) => a,
                None => {
                    return Err(ParamError::new(&format!(
                        "Unable to populate Charge or FF type for atom {sn}"
                    )));
                }
            };

            if atom.hetero {
                continue;
            }

            let Some(type_in_res) = &atom.type_in_res else {
                return Err(ParamError::new(&format!(
                    "MD failure: Missing type in residue for atom: {atom}"
                )));
            };

            let ResidueType::AminoAcid(aa) = &res.res_type else {
                // e.g. solvent or other hetero atoms; skip.
                continue;
            };

            // todo: Eventually, determine how to load non-standard AA variants from files; set up your
            // todo state to use those labels. They are available in the params.
            let aa_gen = AminoAcidGeneral::Standard(*aa);

            let charge_map = match res.end {
                ResidueEnd::Internal => &ff_type_charge.internal,
                ResidueEnd::NTerminus => &ff_type_charge.n_terminus,
                ResidueEnd::CTerminus => &ff_type_charge.c_terminus,
                ResidueEnd::Hetero => {
                    return Err(ParamError::new(&format!(
                        "Error: Encountered hetero atom when parsing amino acid FF types: {atom}"
                    )));
                }
            };

            let charges = match charge_map.get(&aa_gen) {
                Some(c) => c,
                // A specific workaround to plain "HIS" being absent from amino19.lib (2025.
                // Choose one of "HID", "HIE", "HIP arbitrarily.
                // todo: Re-evaluate this, e.g. which one of the three to load.
                None if aa_gen == AminoAcidGeneral::Standard(AminoAcid::His) => charge_map
                    .get(&AminoAcidGeneral::Variant(AminoAcidProtenationVariant::Hid))
                    .ok_or_else(|| ParamError::new("Unable to find AA mapping"))?,
                None => return Err(ParamError::new("Unable to find AA mapping")),
            };

            let mut found = false;

            for charge in charges {
                // todo: Note that we have multiple branches in some case, due to Amber names like
                // todo: "HYP" for variants on AAs for different protenation states. Handle this.
                if charge.type_in_res == *type_in_res {
                    atom.force_field_type = Some(charge.ff_type.clone());
                    atom.partial_charge = Some(charge.charge);

                    found = true;
                    break;
                }
            }

            // Code below is mainly for the case of missing data; otherwise, the logic for this operation
            // is complete.

            if !found {
                match type_in_res {
                    // todo: This is a workaround for having trouble with H types. LIkely
                    // todo when we create them. For now, this meets the intent.
                    AtomTypeInRes::H(_) => {
                        // todo: This is a workaround for the above; try other HIS variants.
                        if aa_gen == AminoAcidGeneral::Standard(AminoAcid::His) {
                            let charges = charge_map
                                .get(&AminoAcidGeneral::Variant(AminoAcidProtenationVariant::Hie))
                                .ok_or_else(|| {
                                    ParamError::new("Unable to find AA mapping for HIE")
                                })?;

                            // todo: You may need HIP too, even with this workaround.
                            // todo: DRY

                            for charge in charges {
                                if charge.type_in_res == *type_in_res {
                                    atom.force_field_type = Some(charge.ff_type.clone());
                                    atom.partial_charge = Some(charge.charge);

                                    found = true;
                                    break;
                                }
                            }
                            if found {
                                break;
                            }
                        }

                        // The amber template doesn't have HH23; only 2 Hs on that. I believe
                        // this may be an omission.
                        if aa_gen == AminoAcidGeneral::Standard(AminoAcid::Arg)
                            && *type_in_res == AtomTypeInRes::H("HH23".to_owned())
                        {
                            for charge in charges {
                                if charge.type_in_res == AtomTypeInRes::H("HH22".to_string()) {
                                    atom.force_field_type = Some("HH22".to_string());
                                    atom.partial_charge = Some(charge.charge);

                                    found = true;
                                    break;
                                }
                            }
                            if found {
                                break;
                            }
                        }

                        // Note: We've witnessed this due to errors in the mmCIF file, e.g. on ASP #88 on 9GLS.
                        eprintln!(
                            "Error assigning FF type and q based on atom type in res: Failed to match H type. Res #{}, Atom #{}, {type_in_res}, {aa_gen:?}. \
                         Falling back to a generic H",
                            res.serial_number, atom.serial_number,
                        );

                        for charge in charges {
                            if charge.type_in_res == AtomTypeInRes::H("H".to_string())
                                || charge.type_in_res == AtomTypeInRes::H("HA".to_string())
                            {
                                atom.force_field_type = Some("HB2".to_string());
                                atom.partial_charge = Some(charge.charge);

                                found = true;
                                break;
                            }
                        }
                    }
                    _ => (),
                }

                // i.e. if still not found after our specific workarounds above.
                if !found {
                    eprintln!("Problem populating FF or Q: {}", atom);
                    continue;
                }
            }
        }
    }

    Ok(())
}

/// Combines several functions that should be run after loading protein files from PDB. Add hydrogens,
/// load force field parameters and partial charge, and add bonds.
pub fn prepare_peptide(
    atoms: &mut Vec<AtomGeneric>,
    bonds: &mut Vec<BondGeneric>,
    residues: &mut Vec<ResidueGeneric>,
    chains: &mut [ChainGeneric],
    ff_map: &ProtFfChargeMapSet,
    ph: f32, // todo: Implement.
) -> Result<Vec<Dihedral>, ParamError> {
    let mut dihedrals = Vec::new();

    let h_count = atoms
        .iter()
        .filter(|a| a.element == Element::Hydrogen)
        .count();
    if h_count < 10 {
        dihedrals = populate_hydrogens_dihedrals(atoms, residues, chains, ff_map, ph)?;
    }

    // todo: Similar checks for empty etc.
    populate_peptide_ff_and_q(atoms, residues, ff_map)?;

    if bonds.is_empty() {
        *bonds = create_bonds(atoms);
    }

    Ok(dihedrals)
}

/// See docs on `prepare_peptide`. This is a convenience variant that uses an `MmCif` file.
pub fn prepare_peptide_mmcif(
    mol: &mut MmCif,
    ff_map: &ProtFfChargeMapSet,
    ph: f32, // todo: Implement.
) -> Result<(Vec<BondGeneric>, Vec<Dihedral>), ParamError> {
    let mut dihedrals = Vec::new();

    let h_count = mol
        .atoms
        .iter()
        .filter(|a| a.element == Element::Hydrogen)
        .count();
    if h_count < 10 {
        dihedrals = populate_hydrogens_dihedrals(
            &mut mol.atoms,
            &mut mol.residues,
            &mut mol.chains,
            ff_map,
            ph,
        )?;
    }

    // todo: Similar checks for empty etc.
    populate_peptide_ff_and_q(&mut mol.atoms, &mol.residues, ff_map)?;

    let bonds = create_bonds(&mol.atoms);

    Ok((bonds, dihedrals))
}