1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
#![allow(dead_code)]
use crate::reference_tables;
use crate::structs::*;
use crate::transformation::*;

#[derive(Debug, Clone, PartialEq)]
/// A PDB file containing the 3D coordinates of many atoms making up the
/// 3D structure of a protein, but it can also be used for other molecules.
pub struct PDB {
    /// The identifier as posed in the PDB Header or mmCIF entry.id, normally a 4 char string like '1UBQ'
    pub identifier: Option<String>,
    /// The remarks above the PDB file, containing the remark-type-number and a line of free text
    remarks: Vec<(usize, String)>,
    /// The Scale needed to transform orthogonal coordinates to fractional coordinates, if available
    pub scale: Option<TransformationMatrix>,
    /// The OrigX needed to transform orthogonal coordinates to submitted coordinates, if available
    pub origx: Option<TransformationMatrix>,
    /// The MtriXs needed to transform the Models to the full asymmetric subunit, if needed to contain the non-crystallographic symmetry
    mtrix: Vec<MtriX>,
    /// The unit cell of the crystal, containing its size and shape, if available
    pub unit_cell: Option<UnitCell>,
    /// The Symmetry or space group of the crystal, if available
    pub symmetry: Option<Symmetry>,
    /// The Models making up this PDB
    models: Vec<Model>,
}

impl PDB {
    /// Create an empty PDB struct
    pub fn new() -> PDB {
        PDB {
            identifier: None,
            remarks: Vec::new(),
            scale: None,
            origx: None,
            mtrix: Vec::new(),
            unit_cell: None,
            symmetry: None,
            models: Vec::new(),
        }
    }

    /// Get the number of REMARK records in the PDB file
    pub fn remark_count(&self) -> usize {
        self.remarks.len()
    }

    /// Get the remarks, containing the remark-type-number and a line of free text
    pub fn remarks(&self) -> impl DoubleEndedIterator<Item = &(usize, String)> + '_ {
        self.remarks.iter()
    }

    /// Get the remarks as mutable references, containing the remark-type-number and a line of free text
    pub fn remarks_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut (usize, String)> + '_ {
        self.remarks.iter_mut()
    }

    /// Add a remark
    ///
    /// ## Arguments
    /// * `remark_type` - the remark-type-number
    /// * `remark_text` - the free line of text, containing the actual remark
    ///
    /// ## Panics
    /// It panics if the text if too long, the text contains invalid characters or the remark-type-number is not valid (wwPDB v3.30).
    pub fn add_remark(&mut self, remark_type: usize, remark_text: String) {
        if !reference_tables::valid_remark_type_number(remark_type) {
            panic!("The given remark-type-number is not valid, see wwPDB v3.30 for valid remark-type-numbers");
        }
        if !valid_text(&remark_text) {
            panic!("The given remark text contains invalid characters.");
        }
        // As the text can only contain ASCII len() on strings is fine (it returns the length in bytes)
        if remark_text.len() > 70 {
            println!("WARNING: The given remark text is too long, the maximal length is 68 characters, the given string is {} characters.", remark_text.len());
        }

        self.remarks.push((remark_type, remark_text));
    }

    /// Get the MtriX records for this PDB
    pub fn mtrix(&self) -> impl DoubleEndedIterator<Item = &MtriX> + '_ {
        self.mtrix.iter()
    }

    /// Get the MtriX records for this PDB, as mutable references
    pub fn mtrix_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut MtriX> + '_ {
        self.mtrix.iter_mut()
    }

    /// Get a specific MtriX.
    ///
    /// ## Arguments
    /// * `index` - the index of the MtriX to return
    ///
    /// ## Fails
    /// It fails when the index is out of bounds.
    pub fn get_mtrix(&self, index: usize) -> Option<&MtriX> {
        self.mtrix.get(index)
    }

    /// Get a specific MtriX as a mutable reference.
    ///
    /// ## Arguments
    /// * `index` - the index of the MtriX to return
    ///
    /// ## Fails
    /// It fails when the index is out of bounds.
    pub fn get_mtrix_mut(&mut self, index: usize) -> Option<&mut MtriX> {
        self.mtrix.get_mut(index)
    }

    /// Add a MtriX to this PDB
    pub fn add_mtrix(&mut self, mtrix: MtriX) {
        self.mtrix.push(mtrix);
    }

    /// Adds a Model to this PDB
    pub fn add_model(&mut self, new_model: Model) {
        self.models.push(new_model);
    }

    /// Get the amount of Models making up this PDB
    pub fn model_count(&self) -> usize {
        self.models.len()
    }

    /// Get the amount of Chains making up this PDB.
    pub fn chain_count(&self) -> usize {
        if !self.models.is_empty() {
            self.models[0].chain_count()
        } else {
            0
        }
    }

    /// Get the amount of Residues making up this PDB.
    pub fn residue_count(&self) -> usize {
        if !self.models.is_empty() {
            self.models[0].residue_count()
        } else {
            0
        }
    }

    /// Get the amount of Conformers making up this PDB.
    pub fn conformer_count(&self) -> usize {
        if !self.models.is_empty() {
            self.models[0].conformer_count()
        } else {
            0
        }
    }

    /// Get the amount of Atoms making up this PDB.
    pub fn atom_count(&self) -> usize {
        if !self.models.is_empty() {
            self.models[0].atom_count()
        } else {
            0
        }
    }

    /// Get the amount of Chains making up this PDB. Including all models.
    pub fn total_chain_count(&self) -> usize {
        self.models
            .iter()
            .fold(0, |acc, item| acc + item.chain_count())
    }

    /// Get the amount of Residues making up this PDB. Including all models.
    pub fn total_residue_count(&self) -> usize {
        self.models
            .iter()
            .fold(0, |acc, item| acc + item.residue_count())
    }

    /// Get the amount of Conformer making up this PDB. Including all models.
    pub fn total_conformer_count(&self) -> usize {
        self.models
            .iter()
            .fold(0, |acc, item| acc + item.conformer_count())
    }

    /// Get the amount of Atoms making up this PDB. Including all models.
    pub fn total_atom_count(&self) -> usize {
        self.models
            .iter()
            .fold(0, |acc, item| acc + item.atom_count())
    }

    /// Get a specific Model from list of Models making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Model
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn model(&self, index: usize) -> Option<&Model> {
        self.models.get(index)
    }

    /// Get a specific Model as a mutable reference from list of Models making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Model
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn model_mut(&mut self, index: usize) -> Option<&mut Model> {
        self.models.get_mut(index)
    }

    /// Get a specific Chain from the Chains making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Chain
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn chain(&self, index: usize) -> Option<&Chain> {
        self.chains().nth(index)
    }

    /// Get a specific Chain as a mutable reference from the Chains making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Chain
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn chain_mut(&mut self, index: usize) -> Option<&mut Chain> {
        self.chains_mut().nth(index)
    }

    /// Get a specific Residue from the Residues making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Residue
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn residue(&self, index: usize) -> Option<&Residue> {
        self.residues().nth(index)
    }

    /// Get a specific Residue as a mutable reference from the Residues making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Residue
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn residue_mut(&mut self, index: usize) -> Option<&mut Residue> {
        self.residues_mut().nth(index)
    }

    /// Get a specific Conformer from the Conformers making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Conformer
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn conformer(&self, index: usize) -> Option<&Conformer> {
        self.conformers().nth(index)
    }

    /// Get a specific Conformer as a mutable reference from the Conformers making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Conformer
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn conformer_mut(&mut self, index: usize) -> Option<&mut Conformer> {
        self.conformers_mut().nth(index)
    }

    /// Get a specific Atom from the Atoms making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Atom
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn atom(&self, index: usize) -> Option<&Atom> {
        self.atoms().nth(index)
    }

    /// Get a specific Atom as a mutable reference from the Atoms making up this PDB.
    ///
    /// ## Arguments
    /// * `index` - the index of the Atom
    ///
    /// ## Fails
    /// It fails when the index is outside bounds.
    pub fn atom_mut(&mut self, index: usize) -> Option<&mut Atom> {
        self.atoms_mut().nth(index)
    }

    /// Get the list of Models making up this PDB.
    pub fn models(&self) -> impl DoubleEndedIterator<Item = &Model> + '_ {
        self.models.iter()
    }

    /// Get the list of Models as mutable references making up this PDB.
    pub fn models_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Model> + '_ {
        self.models.iter_mut()
    }

    /// Get the list of Chains making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn chains(&self) -> impl DoubleEndedIterator<Item = &Chain> + '_ {
        self.models.iter().flat_map(|a| a.chains())
    }

    /// Get the list of Chains as mutable references making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn chains_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Chain> + '_ {
        self.models.iter_mut().flat_map(|a| a.chains_mut())
    }

    /// Get the list of Residues making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn residues(&self) -> impl DoubleEndedIterator<Item = &Residue> + '_ {
        self.models.iter().flat_map(|a| a.residues())
    }

    /// Get the list of Residue as mutable references making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn residues_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Residue> + '_ {
        self.models.iter_mut().flat_map(|a| a.residues_mut())
    }

    /// Get the list of Conformers making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn conformers(&self) -> impl DoubleEndedIterator<Item = &Conformer> + '_ {
        self.models.iter().flat_map(|a| a.conformers())
    }

    /// Get the list of Conformers as mutable references making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn conformers_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Conformer> + '_ {
        self.models.iter_mut().flat_map(|a| a.conformers_mut())
    }

    /// Get the list of Atom making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn atoms(&self) -> impl DoubleEndedIterator<Item = &Atom> + '_ {
        self.models.iter().flat_map(|a| a.atoms())
    }

    /// Get the list of Atom as mutable references making up this PDB.
    /// Double ended so iterating from the end is just as fast as from the start.
    pub fn atoms_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Atom> + '_ {
        self.models.iter_mut().flat_map(|a| a.atoms_mut())
    }

    /// Remove all Atoms matching the given predicate. The predicate will be run on all Atoms.
    /// As this is done in place this is the fastest way to remove Atoms from this PDB.
    pub fn remove_atoms_by<F>(&mut self, predicate: F)
    where
        F: Fn(&Atom) -> bool,
    {
        for residue in self.residues_mut() {
            residue.remove_atoms_by(&predicate);
        }
    }

    /// Remove all Conformers matching the given predicate. The predicate will be run on all Conformers.
    /// As this is done in place this is the fastest way to remove Conformers from this PDB.
    pub fn remove_conformers_by<F>(&mut self, predicate: F)
    where
        F: Fn(&Conformer) -> bool,
    {
        for chain in self.chains_mut() {
            chain.remove_conformers_by(&predicate);
        }
    }

    /// Remove all Residues matching the given predicate. The predicate will be run on all Residues.
    /// As this is done in place this is the fastest way to remove Residues from this PDB.
    pub fn remove_residues_by<F>(&mut self, predicate: F)
    where
        F: Fn(&Residue) -> bool,
    {
        for chain in self.chains_mut() {
            chain.remove_residues_by(&predicate);
        }
    }

    /// Remove all Residues matching the given predicate. The predicate will be run on all Residues.
    /// As this is done in place this is the fastest way to remove Residues from this PDB.
    pub fn remove_chains_by<F>(&mut self, predicate: F)
    where
        F: Fn(&Chain) -> bool,
    {
        for model in self.models_mut() {
            model.remove_chains_by(&predicate);
        }
    }

    /// Remove all Chains matching the given predicate. The predicate will be run on all Chains.
    /// As this is done in place this is the fastest way to remove Chains from this PDB.
    pub fn remove_models_by<F>(&mut self, predicate: F)
    where
        F: Fn(&Model) -> bool,
    {
        self.models.retain(|model| !predicate(model));
    }

    /// Remove the Model specified.
    ///
    /// ## Arguments
    /// * `index` - the index of the Model to remove
    ///
    /// ## Panics
    /// It panics when the index is outside bounds.
    pub fn remove_model(&mut self, index: usize) {
        self.models.remove(index);
    }

    /// Remove the Model specified. It returns `true` if it found a matching Model and removed it.
    /// It removes the first matching Model from the list.
    ///
    /// ## Arguments
    /// * `serial_number` - the serial number of the Model to remove
    pub fn remove_model_serial_number(&mut self, serial_number: usize) -> bool {
        let index = self
            .models
            .iter()
            .position(|a| a.serial_number() == serial_number);

        if let Some(i) = index {
            self.remove_model(i);
            true
        } else {
            false
        }
    }

    /// Remove all empty Models from this PDB, and all empty Chains from the Model, and all empty Residues from the Chains.
    pub fn remove_empty(&mut self) {
        self.models.iter_mut().for_each(|m| m.remove_empty());
        self.models.retain(|m| m.chain_count() > 0);
    }

    /// This renumbers all numbered structs in the PDB.
    /// So it renumbers models, atoms, residues, chains and MtriXs.
    pub fn renumber(&mut self) {
        let mut model_counter = 1;
        for model in self.models_mut() {
            model.set_serial_number(model_counter);
            model_counter += 1;

            let mut counter = 1;
            for atom in model.atoms_mut() {
                atom.set_serial_number(counter);
                counter += 1;
            }
            let mut counter_i = 1;
            for residue in model.residues_mut() {
                residue.set_serial_number(counter_i);
                residue.remove_insertion_code();
                counter_i += 1;

                #[allow(clippy::comparison_chain)] // Using match here is kind of weird
                if residue.conformer_count() > 1 {
                    counter = 0;
                    for conformer in residue.conformers_mut() {
                        conformer.set_alternative_location(&number_to_base26(counter));
                        counter += 1;
                    }
                } else if residue.conformer_count() == 1 {
                    #[allow(clippy::unwrap_used)]
                    residue
                        .conformer_mut(0)
                        .unwrap()
                        .remove_alternative_location();
                }
            }
            counter = 0;
            for chain in model.chains_mut() {
                chain.set_id(&number_to_base26(counter));
                counter += 1;
            }
        }
    }

    /// Apply a transformation to the position of all atoms making up this PDB, the new position is immediately set.
    pub fn apply_transformation(&mut self, transformation: &TransformationMatrix) {
        for atom in self.atoms_mut() {
            atom.apply_transformation(transformation);
        }
    }

    /// Joins two PDBs. If one has multiple models it extends the models of this PDB with the models of the other PDB. If this PDB does
    /// not have any models it moves the models of the other PDB to this PDB. If both have one model it moves all chains/residues/atoms
    /// form the first model of the other PDB to the first model of this PDB. Effectively the same as calling join on those models.
    pub fn join(&mut self, mut other: PDB) {
        #[allow(clippy::unwrap_used)]
        if self.model_count() > 1 || other.model_count() > 1 {
            self.models.extend(other.models);
        } else if self.model_count() == 0 {
            self.models = other.models;
        } else if other.model_count() == 0 {
            // There is nothing to join
        } else {
            self.model_mut(0).unwrap().join(other.models.remove(0))
        }
    }

    /// Extend the Models on this PDB by the given iterator.
    pub fn extend<T: IntoIterator<Item = Model>>(&mut self, iter: T) {
        self.models.extend(iter);
    }

    /// Sort the Models of this PDB
    pub fn sort(&mut self) {
        self.models.sort();
    }

    /// Sort all structs in this PDB
    pub fn full_sort(&mut self) {
        self.sort();
        for model in self.models_mut() {
            model.sort();
        }
        for chain in self.chains_mut() {
            chain.sort();
        }
        for residue in self.residues_mut() {
            residue.sort();
        }
        for conformer in self.conformers_mut() {
            conformer.sort();
        }
    }
}

use std::fmt;
impl fmt::Display for PDB {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "PDB Models: {}", self.models.len())
    }
}

impl Default for PDB {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    use super::*;

    #[test]
    fn sort_atoms() {
        let a = Atom::new(false, 0, "", 0.0, 0.0, 0.0, 0.0, 0.0, "", 0).unwrap();
        let b = Atom::new(false, 1, "", 0.0, 0.0, 0.0, 0.0, 0.0, "", 0).unwrap();
        let mut model = Model::new(0);
        model.add_atom(b, "A", (0, None), ("LYS", None));
        model.add_atom(a, "A", (0, None), ("LYS", None));
        let mut pdb = PDB::new();
        pdb.add_model(model);
        assert_eq!(pdb.atom(0).unwrap().serial_number(), 1);
        assert_eq!(pdb.atom(1).unwrap().serial_number(), 0);
        pdb.full_sort();
        assert_eq!(pdb.atom(0).unwrap().serial_number(), 0);
        assert_eq!(pdb.atom(1).unwrap().serial_number(), 1);
    }
}