1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
#![allow(dead_code)]
use crate::structs::*;
use crate::transformation::TransformationMatrix;
use doc_cfg::doc_cfg;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
use std::cmp::Ordering;
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq)]
/// A Chain containing multiple Residues
pub struct Chain {
/// The identifier of this Chain
id: String,
/// The Residues making up this Chain
residues: Vec<Residue>,
/// A possible reference to a database for this chain
database_reference: Option<DatabaseReference>,
}
impl<'a> Chain {
/// Create a new Chain
///
/// ## Arguments
/// * `id` - the identifier
///
/// ## Fails
/// It returns `None` if the identifier is an invalid character.
#[must_use]
pub fn new(id: impl AsRef<str>) -> Option<Chain> {
prepare_identifier(id).map(|id| Chain {
id,
residues: Vec::new(),
database_reference: None,
})
}
/// Create a new Chain filled with the Residues provided.
///
/// ## Fails
/// It returns `None` if the identifier is an invalid character.
pub fn from_iter(
id: impl AsRef<str>,
residues: impl Iterator<Item = Residue>,
) -> Option<Chain> {
prepare_identifier(id).map(|id| Chain {
id,
residues: residues.collect(),
database_reference: None,
})
}
/// The ID of the Chain
pub fn id(&self) -> &str {
&self.id
}
/// Set the ID of the Chain, returns `false` if the new id is an invalid character.
/// The ID will be changed to uppercase as requested by PDB/PDBx standard.
pub fn set_id(&mut self, new_id: impl AsRef<str>) -> bool {
prepare_identifier(new_id).map(|id| self.id = id).is_some()
}
/// Get the database reference, if any, for this chain.
pub const fn database_reference(&self) -> Option<&DatabaseReference> {
self.database_reference.as_ref()
}
/// Get the database reference mutably, if any, for this chain.
pub fn database_reference_mut(&mut self) -> Option<&mut DatabaseReference> {
self.database_reference.as_mut()
}
/// Set the database reference for this chain.
pub fn set_database_reference(&mut self, reference: DatabaseReference) {
self.database_reference = Some(reference);
}
/// Get the number of Residues making up this Chain
pub fn residue_count(&self) -> usize {
self.residues.len()
}
/// Get the number of Conformers making up this Chain
pub fn conformer_count(&self) -> usize {
self.residues().map(Residue::conformer_count).sum()
}
/// Get the number of Conformers making up this Chain in parallel
#[doc_cfg(feature = "rayon")]
pub fn par_conformer_count(&self) -> usize {
self.par_residues().map(Residue::conformer_count).sum()
}
/// Get the number of Atoms making up this Chain
pub fn atom_count(&self) -> usize {
self.residues().map(Residue::atom_count).sum()
}
/// Get the number of Atoms making up this Chain in parallel
#[doc_cfg(feature = "rayon")]
pub fn par_atom_count(&self) -> usize {
self.par_residues().map(Residue::par_atom_count).sum()
}
/// Get a reference to a specific Residue from list of Residues making up this Chain.
///
/// ## Arguments
/// * `index` - the index of the Residue
///
/// ## Fails
/// It returns `None` if the index is out of bounds.
pub fn residue(&self, index: usize) -> Option<&Residue> {
self.residues.get(index)
}
/// Get a mutable reference to a specific Residue from the list of Residues making up this Chain.
///
/// ## Arguments
/// * `index` - the index of the Residue
///
/// ## Fails
/// It returns `None` if the index is out of bounds.
pub fn residue_mut(&mut self, index: usize) -> Option<&mut Residue> {
self.residues.get_mut(index)
}
/// Get a reference to a specific Conformer from list of Conformers making up this Chain.
///
/// ## Arguments
/// * `index` - the index of the Conformer
///
/// ## Fails
/// It returns `None` if the index is out of bounds.
pub fn conformer(&self, index: usize) -> Option<&Conformer> {
self.conformers().nth(index)
}
/// Get a mutable reference to a specific Conformer from list of Conformers making up this Chain.
///
/// ## Arguments
/// * `index` - the index of the Conformer
///
/// ## Fails
/// It returns `None` if the index is out of bounds.
pub fn conformer_mut(&mut self, index: usize) -> Option<&mut Conformer> {
self.conformers_mut().nth(index)
}
/// Get a reference to a specific Atom from the Atoms making up this Chain.
///
/// ## Arguments
/// * `index` - the index of the Atom
///
/// ## Fails
/// It returns `None` if the index is out of bounds.
pub fn atom(&self, index: usize) -> Option<&Atom> {
self.atoms().nth(index)
}
/// Get a mutable reference to a specific Atom from the Atoms making up this Chain.
///
/// ## Arguments
/// * `index` - the index of the Atom
///
/// ## Fails
/// It returns `None` if the index is out of bounds.
pub fn atom_mut(&mut self, index: usize) -> Option<&mut Atom> {
self.atoms_mut().nth(index)
}
/// Get a reference to the specified atom. Its uniqueness is guaranteed by including the
/// `alternative_location`, with its full hierarchy. The algorithm is based
/// on binary search so it is faster than an exhaustive search, but the
/// full structure is assumed to be sorted. This assumption can be enforced
/// by using `pdb.full_sort()`.
#[allow(clippy::unwrap_used)]
pub fn binary_find_atom(
&'a self,
serial_number: usize,
alternative_location: Option<&str>,
) -> Option<hierarchy::AtomConformerResidue<'a>> {
if self.residue_count() == 0 {
None
} else {
self.residues
.binary_search_by(|residue| {
let low = residue.atoms().next().expect(
"All residues should have at least a single atom for binary_find_atom",
);
let high = residue.atoms().next_back().expect(
"All residues should have at least a single atom for binary_find_atom",
);
if low.serial_number() <= serial_number && serial_number <= high.serial_number()
{
Ordering::Equal
} else if serial_number < low.serial_number() {
Ordering::Less
} else {
Ordering::Greater
}
})
.ok()
.and_then(|index| {
self.residue(index)
.unwrap()
.binary_find_atom(serial_number, alternative_location)
.map(|h| h.extend(self.residue(index).unwrap()))
})
}
}
/// Get a mutable reference to the specified atom. Its uniqueness is guaranteed by including the
/// `alternative_location`, with its full hierarchy. The algorithm is based
/// on binary search so it is faster than an exhaustive search, but the
/// full structure is assumed to be sorted. This assumption can be enforced
/// by using `pdb.full_sort()`.
#[allow(clippy::unwrap_used)]
pub fn binary_find_atom_mut(
&'a mut self,
serial_number: usize,
alternative_location: Option<&str>,
) -> Option<hierarchy::AtomConformerResidueMut<'a>> {
if self.residue_count() == 0 {
None
} else {
self.residues
.binary_search_by(|residue| {
let low = residue.atoms().next().expect(
"All residues should have at least a single atom for binary_find_atom",
);
let high = residue.atoms().next_back().expect(
"All residues should have at least a single atom for binary_find_atom",
);
if low.serial_number() <= serial_number && serial_number <= high.serial_number()
{
Ordering::Equal
} else if serial_number < low.serial_number() {
Ordering::Less
} else {
Ordering::Greater
}
})
.ok()
.and_then(move |index| {
let residue: *mut Residue = self.residue_mut(index).unwrap();
self.residue_mut(index)
.unwrap()
.binary_find_atom_mut(serial_number, alternative_location)
.map(|h| h.extend(residue))
})
}
}
/// Find all hierarchies matching the given information
pub fn find(
&'a self,
search: Search,
) -> impl DoubleEndedIterator<Item = AtomConformerResidue<'a>> + '_ {
self.residues()
.map(move |r| (r, search.clone().add_residue_info(r)))
.filter(|(_r, search)| !matches!(search, Search::Known(false)))
.flat_map(move |(r, search)| r.find(search).map(move |h| h.extend(r)))
}
/// Find all hierarchies matching the given information
pub fn find_mut(
&'a mut self,
search: Search,
) -> impl DoubleEndedIterator<Item = AtomConformerResidueMut<'a>> + '_ {
self.residues_mut()
.map(move |r| {
let search = search.clone().add_residue_info(r);
(r, search)
})
.filter(|(_r, search)| !matches!(search, Search::Known(false)))
.flat_map(move |(r, search)| {
let r_ptr: *mut Residue = r;
r.find_mut(search).map(move |h| h.extend(r_ptr))
})
}
/// Get an iterator of references to Residues making up this Chain.
/// Double ended so iterating from the end is just as fast as from the start.
pub fn residues(&self) -> impl DoubleEndedIterator<Item = &Residue> + '_ {
self.residues.iter()
}
/// Get a parallel iterator of references to Residues making up this Chain.
#[doc_cfg(feature = "rayon")]
pub fn par_residues(&self) -> impl ParallelIterator<Item = &Residue> + '_ {
self.residues.par_iter()
}
/// Get an iterator of mutable references to Residues making up this Chain.
/// Double ended so iterating from the end is just as fast as from the start.
pub fn residues_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Residue> + '_ {
self.residues.iter_mut()
}
/// Get a parallel iterator of mutable references to Residues making up this Chain.
#[doc_cfg(feature = "rayon")]
pub fn par_residues_mut(&mut self) -> impl ParallelIterator<Item = &mut Residue> + '_ {
self.residues.par_iter_mut()
}
/// Get an iterator of references to Conformers making up this Chain.
/// Double ended so iterating from the end is just as fast as from the start.
pub fn conformers(&self) -> impl DoubleEndedIterator<Item = &Conformer> + '_ {
self.residues().flat_map(Residue::conformers)
}
/// Get a parallel iterator of references to Conformers making up this Chain.
#[doc_cfg(feature = "rayon")]
pub fn par_conformers(&self) -> impl ParallelIterator<Item = &Conformer> + '_ {
self.par_residues().flat_map(Residue::par_conformers)
}
/// Get an iterator of mutable references to Conformers making up this Chain.
/// Double ended so iterating from the end is just as fast as from the start.
pub fn conformers_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Conformer> + '_ {
self.residues_mut().flat_map(Residue::conformers_mut)
}
/// Get a parallel iterator of mutable references to Conformers making up this Chain.
#[doc_cfg(feature = "rayon")]
pub fn par_conformers_mut(&mut self) -> impl ParallelIterator<Item = &mut Conformer> + '_ {
self.par_residues_mut()
.flat_map(Residue::par_conformers_mut)
}
/// Get an iterator of references to Atoms making up this Chain.
/// Double ended so iterating from the end is just as fast as from the start.
pub fn atoms(&self) -> impl DoubleEndedIterator<Item = &Atom> + '_ {
self.residues().flat_map(Residue::atoms)
}
/// Get a parallel iterator of references to Atoms making up this Chain.
#[doc_cfg(feature = "rayon")]
pub fn par_atoms(&self) -> impl ParallelIterator<Item = &Atom> + '_ {
self.par_residues().flat_map(Residue::par_atoms)
}
/// Get an iterator of mutable references to Atoms making up this Chain.
/// Double ended so iterating from the end is just as fast as from the start.
pub fn atoms_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Atom> + '_ {
self.residues_mut().flat_map(Residue::atoms_mut)
}
/// Get a parallel iterator of mutablereferences to Atoms making up this Chain.
#[doc_cfg(feature = "rayon")]
pub fn par_atoms_mut(&mut self) -> impl ParallelIterator<Item = &mut Atom> + '_ {
self.par_residues_mut().flat_map(Residue::par_atoms_mut)
}
/// Get an iterator of references to a struct containing all atoms with their hierarchy making up this Chain.
pub fn atoms_with_hierarchy(
&'a self,
) -> impl DoubleEndedIterator<Item = hierarchy::AtomConformerResidue<'a>> + '_ {
self.residues()
.flat_map(|r| r.atoms_with_hierarchy().map(move |h| h.extend(r)))
}
/// Get an iterator of mutable references to a struct containing all atoms with their hierarchy making up this Chain.
pub fn atoms_with_hierarchy_mut(
&'a mut self,
) -> impl DoubleEndedIterator<Item = hierarchy::AtomConformerResidueMut<'a>> + '_ {
self.residues_mut().flat_map(|r| {
let residue: *mut Residue = r;
r.atoms_with_hierarchy_mut().map(move |h| h.extend(residue))
})
}
/// Add a new Atom to this Chain. If a Residue with the given serial number already exists, the
/// Atom will be added to it, otherwise a new Residue is created to hold the created atom
/// and added to the list of Residues in this chain.
///
/// ## Arguments
/// * `new_atom` - the new Atom to add
/// * `residue_id` - the id construct of the Residue to add the Atom to
/// * `conformer_id` - the id construct of the Conformer to add the Atom to
///
/// ## Panics
/// It panics if the given Residue ID contains any invalid characters.
pub fn add_atom(
&mut self,
new_atom: Atom,
residue_id: (isize, Option<&str>),
conformer_id: (impl AsRef<str>, Option<&str>),
) {
let mut found = false;
let mut new_residue = Residue::new(residue_id.0, residue_id.1, None)
.expect("Invalid chars in Residue creation");
let mut current_residue = &mut new_residue;
for residue in &mut self.residues.iter_mut().rev() {
if residue.id() == residue_id {
current_residue = residue;
found = true;
break;
}
}
#[allow(clippy::unwrap_used)]
if !found {
self.residues.push(new_residue);
current_residue = self.residues.last_mut().unwrap();
}
current_residue.add_atom(new_atom, conformer_id);
}
/// Add a Residue to the end of to the list of Residues making up this Chain. This does not detect any duplicates of names or serial numbers in the list of Residues.
pub fn add_residue(&mut self, residue: Residue) {
self.residues.push(residue);
}
/// Inserts a Residue at the given index into the list of Residues making up this Chain. This does not detect any duplicates of names or serial numbers in the list of Residues.
/// This panics if `index > len`.
pub fn insert_residue(&mut self, index: usize, residue: Residue) {
self.residues.insert(index, residue);
}
/// Remove all Atoms matching the given predicate. As this is done in place this is the fastest way to remove Atoms from this Chain.
pub fn remove_atoms_by<F>(&mut self, predicate: F)
where
F: Fn(&Atom) -> bool,
{
for residue in self.residues_mut() {
residue.remove_atoms_by(&predicate);
}
}
/// Remove all Conformers matching the given predicate. As this is done in place this is the fastest way to remove Conformers from this Chain.
pub fn remove_conformers_by<F>(&mut self, predicate: F)
where
F: Fn(&Conformer) -> bool,
{
for residue in self.residues_mut() {
residue.remove_conformers_by(&predicate);
}
}
/// Remove all residues matching the given predicate. As this is done in place this is the fastest way to remove Residues from this Chain.
pub fn remove_residues_by<F>(&mut self, predicate: F)
where
F: Fn(&Residue) -> bool,
{
self.residues.retain(|residue| !predicate(residue));
}
/// Remove the specified Residue.
///
/// ## Arguments
/// * `index` - the index of the Residue to remove
///
/// ## Panics
/// It panics if the index is out of bounds.
pub fn remove_residue(&mut self, index: usize) {
self.residues.remove(index);
}
/// Remove the specified Residue. Returns `true` if a matching Residue was found and removed.
/// Removes the first matching Residue from the list.
///
/// ## Arguments
/// * `id` - the id construct of the Residue to remove (see Residue.id())
pub fn remove_residue_by_id(&mut self, id: (isize, Option<&str>)) -> bool {
let index = self.residues.iter().position(|a| a.id() == id);
if let Some(i) = index {
self.remove_residue(i);
true
} else {
false
}
}
/// Remove the specified Residue. Returns `true` if a matching Residue was found and removed.
/// Removes the first matching Residue from the list.
///
/// ## Arguments
/// * `id` - the id construct of the Residue to remove (see Residue.id())
#[doc_cfg(feature = "rayon")]
pub fn par_remove_residue_by_id(&mut self, id: (isize, Option<&str>)) -> bool {
let index = self.residues.par_iter().position_first(|a| a.id() == id);
if let Some(i) = index {
self.remove_residue(i);
true
} else {
false
}
}
/// Remove all empty Residues from this Chain, and all empty Conformers from the Residues.
pub fn remove_empty(&mut self) {
self.residues_mut().for_each(Residue::remove_empty);
self.residues.retain(|r| r.conformer_count() > 0);
}
/// Apply a transformation to the position of all atoms making up this Chain, the new position is immediately set.
pub fn apply_transformation(&mut self, transformation: &TransformationMatrix) {
for atom in self.atoms_mut() {
atom.apply_transformation(transformation);
}
}
/// Apply a transformation to the position of all atoms making up this Chain, the new position is immediately set.
/// Done in parallel.
#[doc_cfg(feature = "rayon")]
pub fn par_apply_transformation(&mut self, transformation: &TransformationMatrix) {
self.par_atoms_mut()
.for_each(|atom| atom.apply_transformation(transformation));
}
/// Join this Chain with another Chain, this moves all atoms from the other Chain
/// to this Chain. All other (meta) data of this Chain will stay the same.
pub fn join(&mut self, other: Chain) {
self.residues.extend(other.residues);
}
/// Sort the residues of this chain
pub fn sort(&mut self) {
self.residues.sort();
}
/// Sort the residues of this chain in parallel
#[doc_cfg(feature = "rayon")]
pub fn par_sort(&mut self) {
self.residues.par_sort();
}
}
use std::fmt;
impl fmt::Display for Chain {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"CHAIN ID:{}, Residues: {}",
self.id(),
self.residues.len()
)
}
}
impl PartialOrd for Chain {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.id().cmp(other.id()))
}
}
impl Ord for Chain {
fn cmp(&self, other: &Self) -> Ordering {
self.id().cmp(other.id())
}
}
impl Extend<Residue> for Chain {
/// Extend the Residues on this Chain by the given iterator of Residues.
fn extend<T: IntoIterator<Item = Residue>>(&mut self, iter: T) {
self.residues.extend(iter);
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
use super::*;
#[test]
fn test_id_validation() {
let mut a = Chain::new("A").unwrap();
assert_eq!(Chain::new("R̊"), None);
assert!(!a.set_id("Oͦ"));
assert_eq!(a.id(), "A");
a.set_id("atom");
assert_eq!(a.id(), "atom");
}
#[test]
fn ordering_and_equality() {
let a = Chain::new("A").unwrap();
let b = Chain::new("A").unwrap();
let c = Chain::new("B").unwrap();
assert_eq!(a, b);
assert_ne!(a, c);
assert!(a < c);
assert!(b < c);
}
#[test]
fn test_empty_chain() {
let mut a = Chain::new("A").unwrap();
assert_eq!(a.database_reference(), None);
assert_eq!(a.database_reference_mut(), None);
assert_eq!(a.residue_count(), 0);
assert_eq!(a.conformer_count(), 0);
assert_eq!(a.atom_count(), 0);
}
#[test]
fn test_residue() {
let mut a = Chain::new("A").unwrap();
let mut r = Residue::new(1, None, None).unwrap();
a.add_residue(r.clone());
a.add_residue(Residue::new(13, None, None).unwrap());
assert_eq!(a.residue(0), Some(&r));
assert_eq!(a.residue_mut(0), Some(&mut r));
a.remove_residue(0);
assert!(a.remove_residue_by_id((13, None)));
assert_eq!(a.residue_count(), 0);
assert!(!a.remove_residue_by_id((13, None)));
}
#[test]
fn check_display() {
let a = Chain::new("A").unwrap();
format!("{a:?}");
format!("{a}");
}
}