1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
#![allow(dead_code)]
use crate::structs::*;
use crate::transformation::TransformationMatrix;
use doc_cfg::doc_cfg;
#[cfg(feature = "rayon")]
use rayon::prelude::*;
use std::cmp::Ordering;
use std::fmt;

#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug, Clone, PartialEq, Eq)]
/// A Residue containing multiple Conformers
pub struct Residue {
    /// The serial number of this Residue, can be negative as that is used sometimes. See <https://proteopedia.org/wiki/index.php/Unusual_sequence_numbering>.
    serial_number: isize,
    /// The insertion code of this Residue, used in conjunction with the serial number to uniquely identify Residues.
    insertion_code: Option<String>,
    /// The list of conformers making up this Residue
    conformers: Vec<Conformer>,
}

impl<'a> Residue {
    /// Create a new Residue
    ///
    /// ## Arguments
    /// * `number` - the serial number
    /// * `insertion_code` - the insertion code
    /// * `conformer` - if available it can already add an conformer
    ///
    /// ## Fails
    /// It fails and returns `None` if any of the characters making up the `insertion_code` are invalid.
    #[must_use]
    pub fn new(
        number: isize,
        insertion_code: Option<&str>,
        conformer: Option<Conformer>,
    ) -> Option<Self> {
        let mut res = Self {
            serial_number: number,
            insertion_code: None,
            conformers: Vec::new(),
        };
        if let Some(ic) = insertion_code {
            if !res.set_insertion_code(ic) {
                return None;
            }
        }

        if let Some(c) = conformer {
            res.conformers.push(c);
        }

        Some(res)
    }

    /// Get the serial number of the Residue.
    #[must_use]
    pub const fn serial_number(&self) -> isize {
        self.serial_number
    }

    /// Set the serial number of the Residue.
    pub fn set_serial_number(&mut self, new_number: isize) {
        self.serial_number = new_number;
    }

    /// Get the insertion code of the Residue.
    #[must_use]
    pub fn insertion_code(&self) -> Option<&str> {
        self.insertion_code.as_deref()
    }

    /// Set the insertion code of the Residue.
    /// Fails and returns false if the `new_code` contains invalid characters
    pub fn set_insertion_code(&mut self, new_code: impl AsRef<str>) -> bool {
        prepare_identifier_uppercase(new_code)
            .map(|c| self.insertion_code = Some(c))
            .is_some()
    }

    /// Set the insertion code of the Residue to None.
    pub fn remove_insertion_code(&mut self) {
        self.insertion_code = None;
    }

    /// Returns the uniquely identifying construct for this Residue,
    /// consisting of the serial number and the insertion code.
    #[must_use]
    pub fn id(&self) -> (isize, Option<&str>) {
        (self.serial_number, self.insertion_code())
    }

    /// The ID or name of the Residue, it will only give a value if there is only one conformer or if all conformers have the same name
    #[must_use]
    pub fn name(&self) -> Option<&str> {
        match self.conformers.len() {
            0 => None,
            1 => Some(self.conformers[0].name()),
            _ => {
                let res = self.conformers[0].name();
                for conf in self.conformers().skip(1) {
                    if res != conf.name() {
                        return None;
                    }
                }
                Some(res)
            }
        }
    }

    /// The number of Conformers making up this Residue.
    #[must_use]
    pub fn conformer_count(&self) -> usize {
        self.conformers.len()
    }

    /// Get the number of Atoms making up this Residue.
    #[must_use]
    pub fn atom_count(&self) -> usize {
        self.conformers().fold(0, |sum, res| res.atom_count() + sum)
    }

    /// Get the number of Atoms making up this Residue in parallel.
    #[doc_cfg(feature = "rayon")]
    #[must_use]
    pub fn par_atom_count(&self) -> usize {
        self.par_conformers().map(Conformer::atom_count).sum()
    }

    /// Get a reference to a specific Conformer from the list of Conformers making up this Residue.
    ///
    /// ## Arguments
    /// * `index` - the index of the conformer
    ///
    /// ## Fails
    /// Returns `None` if the index is out of bounds.
    #[must_use]
    pub fn conformer(&self, index: usize) -> Option<&Conformer> {
        self.conformers.get(index)
    }

    /// Get a mutable reference to a specific Conformer from the list of Conformers making up this Residue.
    ///
    /// ## Arguments
    /// * `index` - the index of the conformer
    ///
    /// ## Fails
    /// Returns `None` if the index is out of bounds.
    #[must_use]
    pub fn conformer_mut(&mut self, index: usize) -> Option<&mut Conformer> {
        self.conformers.get_mut(index)
    }

    /// Get a reference to a specific Atom from the list of Conformers making up this Residue.
    ///
    /// ## Arguments
    /// * `index` - the index of the Atom
    ///
    /// ## Fails
    /// Returns `None` if the index is out of bounds.
    #[must_use]
    pub fn atom(&self, index: usize) -> Option<&Atom> {
        self.atoms().nth(index)
    }

    /// Get a mutable reference to a specific Atom from the list of Conformers making up this Residue.
    ///
    /// ## Arguments
    /// * `index` - the index of the Atom
    ///
    /// ## Fails
    /// Returns `None` if the index is out of bounds.
    #[must_use]
    pub fn atom_mut(&mut self, index: usize) -> Option<&mut Atom> {
        self.atoms_mut().nth(index)
    }

    /// Get A reference to the specified Atom. Its uniqueness is guaranteed by including the
    /// `insertion_code`, with its full hierarchy. The algorithm is based
    /// on binary search so it is faster than an exhaustive search, but the
    /// full structure is assumed to be sorted. This assumption can be enforced
    /// by using `pdb.full_sort()`.
    #[must_use]
    pub fn binary_find_atom(
        &'a self,
        serial_number: usize,
        alternative_location: Option<&str>,
    ) -> Option<hierarchy::AtomConformer<'a>> {
        for conformer in self.conformers() {
            if conformer.alternative_location() == alternative_location {
                if let Some(f) = conformer.atoms().next() {
                    if let Some(b) = conformer.atoms().next_back() {
                        if f.serial_number() <= serial_number && serial_number <= b.serial_number()
                        {
                            if let Some(atom) = conformer.binary_find_atom(serial_number) {
                                return Some(hierarchy::AtomConformer::new(atom, conformer));
                            }
                        }
                    }
                }
            }
        }
        None
    }

    /// Get a mutable reference to the specified Atom. Its uniqueness is guaranteed by
    /// including the `insertion_code`, with its full hierarchy. The algorithm is based
    /// on binary search so it is faster than an exhaustive search, but the
    /// full structure is assumed to be sorted. This assumption can be enforced
    /// by using `pdb.full_sort()`.
    #[allow(clippy::unwrap_used)]
    #[must_use]
    pub fn binary_find_atom_mut(
        &'a mut self,
        serial_number: usize,
        alternative_location: Option<&str>,
    ) -> Option<hierarchy::AtomConformerMut<'a>> {
        unsafe {
            for c in self.conformers_mut() {
                let c_ptr: *mut Conformer = c;
                let conformer = c_ptr.as_mut().unwrap();
                if conformer.alternative_location() == alternative_location {
                    if let Some(f) = conformer.atoms().next() {
                        if let Some(b) = conformer.atoms().next_back() {
                            if f.serial_number() <= serial_number
                                && serial_number <= b.serial_number()
                            {
                                if let Some(atom) =
                                    c_ptr.as_mut().unwrap().binary_find_atom_mut(serial_number)
                                {
                                    return Some(hierarchy::AtomConformerMut::new(atom, c_ptr));
                                }
                            }
                        }
                    }
                }
            }
            None
        }
    }

    /// Find all hierarchies matching the given search. For more details see [Search].
    #[must_use]
    pub fn find(
        &'a self,
        search: Search,
    ) -> impl DoubleEndedIterator<Item = AtomConformer<'a>> + '_ {
        self.conformers()
            .map(move |c| (c, search.clone().add_conformer_info(c)))
            .filter(|(_c, search)| !matches!(search, Search::Known(false)))
            .flat_map(move |(c, search)| {
                c.find(search)
                    .map(move |a| hierarchy::AtomConformer::new(a, c))
            })
    }

    /// Find all hierarchies matching the given search. For more details see [Search].
    #[must_use]
    pub fn find_mut(
        &'a mut self,
        search: Search,
    ) -> impl DoubleEndedIterator<Item = AtomConformerMut<'a>> + '_ {
        self.conformers_mut()
            .map(move |c| {
                let search = search.clone().add_conformer_info(c);
                (c, search)
            })
            .filter(|(_c, search)| !matches!(search, Search::Known(false)))
            .flat_map(move |(c, search)| {
                let c_ptr: *mut Conformer = c;
                c.find_mut(search)
                    .map(move |a| hierarchy::AtomConformerMut::new(a, c_ptr))
            })
    }

    /// Get an iterator of references to Conformers making up this Model.
    /// Double ended so iterating from the end is just as fast as from the start.
    #[must_use]
    pub fn conformers(&self) -> impl DoubleEndedIterator<Item = &Conformer> + '_ {
        self.conformers.iter()
    }

    /// Get a parallel iterator of references to Conformers making up this Model.
    #[doc_cfg(feature = "rayon")]
    #[must_use]
    pub fn par_conformers(&self) -> impl ParallelIterator<Item = &Conformer> + '_ {
        self.conformers.par_iter()
    }

    /// Get an iterator of mutable references to Conformers making up this Model.
    /// Double ended so iterating from the end is just as fast as from the start.
    #[must_use]
    pub fn conformers_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Conformer> + '_ {
        self.conformers.iter_mut()
    }

    /// Get a parallel iterator of mutable references to Conformers making up this Model.
    #[doc_cfg(feature = "rayon")]
    #[must_use]
    pub fn par_conformers_mut(&mut self) -> impl ParallelIterator<Item = &mut Conformer> + '_ {
        self.conformers.par_iter_mut()
    }

    /// Get an iterator of references to Atoms making up this Model.
    /// Double ended so iterating from the end is just as fast as from the start.
    #[must_use]
    pub fn atoms(&self) -> impl DoubleEndedIterator<Item = &Atom> + '_ {
        self.conformers().flat_map(Conformer::atoms)
    }

    /// Get a parallel iterator of references to Atoms making up this Model.
    #[doc_cfg(feature = "rayon")]
    #[must_use]
    pub fn par_atoms(&self) -> impl ParallelIterator<Item = &Atom> + '_ {
        self.par_conformers().flat_map(Conformer::par_atoms)
    }

    /// Get an iterator of mutable references to Atoms making up this Model.
    /// Double ended so iterating from the end is just as fast as from the start.
    #[must_use]
    pub fn atoms_mut(&mut self) -> impl DoubleEndedIterator<Item = &mut Atom> + '_ {
        self.conformers_mut().flat_map(Conformer::atoms_mut)
    }

    /// Get a parallel iterator of mutable references to Atoms making up this Model.
    #[doc_cfg(feature = "rayon")]
    #[must_use]
    pub fn par_atoms_mut(&mut self) -> impl ParallelIterator<Item = &mut Atom> + '_ {
        self.par_conformers_mut().flat_map(Conformer::par_atoms_mut)
    }

    /// Get an iterator of references to a struct containing all atoms with their hierarchy making up this Model.
    #[must_use]
    pub fn atoms_with_hierarchy(
        &'a self,
    ) -> impl DoubleEndedIterator<Item = hierarchy::AtomConformer<'a>> + '_ {
        self.conformers()
            .flat_map(|c| c.atoms().map(move |a| (a, c)))
            .map(hierarchy::AtomConformer::from_tuple)
    }

    /// Get an iterator of mutable references to a struct containing all atoms with their hierarchy making up this Model.
    #[allow(trivial_casts)]
    #[must_use]
    pub fn atoms_with_hierarchy_mut(
        &'a mut self,
    ) -> impl DoubleEndedIterator<Item = hierarchy::AtomConformerMut<'a>> + '_ {
        self.conformers_mut()
            .flat_map(|c| {
                let conformer: *mut Conformer = c;
                c.atoms_mut().map(move |a| (a as *mut Atom, conformer))
            })
            .map(hierarchy::AtomConformerMut::from_tuple)
    }

    /// Add a new conformer to the list of conformers making up this Residue.
    /// ## Arguments
    /// * `new_conformer` - the new conformer to add
    pub fn add_conformer(&mut self, new_conformer: Conformer) {
        self.conformers.push(new_conformer);
    }

    /// Add a new Atom to this Residue. If a Residue with the given serial number already exists, the
    /// Atom will be added to it, otherwise a new Residue is created to hold the created atom
    /// and added to the list of Residues in its chain.
    ///
    /// ## Arguments
    /// * `new_atom` - the new Atom to add
    /// * `residue_serial_number` - the serial number of the Residue to add the Atom to
    /// * `residue_name` - the name of the Residue to add the Atom to, only used to create a new Residue if needed
    ///
    /// ## Panics
    /// It panics if the Residue name contains any invalid characters.
    pub fn add_atom(&mut self, new_atom: Atom, conformer_id: (impl AsRef<str>, Option<&str>)) {
        let mut found = false;
        let name = prepare_identifier_uppercase(conformer_id.0).expect("Invalid Conformer ID");
        let conformer_id = (name.as_str(), conformer_id.1);
        let mut new_conformer = Conformer::new(conformer_id.0, conformer_id.1, None)
            .expect("Invalid chars in Residue creation");
        let mut current_conformer = &mut new_conformer;
        for conformer in &mut self.conformers {
            if conformer.id() == conformer_id {
                current_conformer = conformer;
                found = true;
                break;
            }
        }
        #[allow(clippy::unwrap_used)]
        if !found {
            self.conformers.push(new_conformer);
            current_conformer = self.conformers.last_mut().unwrap();
        }

        current_conformer.add_atom(new_atom);
    }

    /// Remove all empty Conformers from this Residue.
    pub fn remove_empty(&mut self) {
        self.conformers.retain(|c| c.atom_count() > 0);
    }

    /// Remove all conformers matching the given predicate. As this is done in place this is the fastest way to remove conformers from this Residue.
    pub fn remove_conformers_by<F>(&mut self, predicate: F)
    where
        F: Fn(&Conformer) -> bool,
    {
        self.conformers.retain(|conformer| !predicate(conformer));
    }

    /// Remove all atoms matching the given predicate. As this is done in place this is the fastest way to remove atoms from this Residue.
    pub fn remove_atoms_by<F>(&mut self, predicate: F)
    where
        F: Fn(&Atom) -> bool,
    {
        // for conformer in self.conformers_mut() {
        //     conformer.remove_atoms_by(&predicate);
        // }
        self.conformers_mut()
            .for_each(|conformer| conformer.remove_atoms_by(&predicate));
    }

    /// Remove the specified conformer.
    ///
    /// ## Arguments
    /// * `index` - the index of the Conformer to remove
    ///
    /// ## Panics
    /// Panics when the index is outside bounds.
    pub fn remove_conformer(&mut self, index: usize) {
        self.conformers.remove(index);
    }

    /// Remove the specified conformer. Returns `true` if a matching Conformer was found and
    /// removed.
    /// Removes the first matching Conformer from the list.
    ///
    /// ## Arguments
    /// * `id` - the identifying construct of the Conformer to remove
    ///
    /// ## Panics
    /// Panics when the index is outside bounds.
    pub fn remove_conformer_by_id(&mut self, id: (&str, Option<&str>)) -> bool {
        let index = self.conformers().position(|a| a.id() == id);

        index.map_or(false, |i| {
            self.remove_conformer(i);
            true
        })
    }

    /// Remove the specified Conformer. Returns `true` if a matching Conformer was found and
    /// removed.
    /// It removes the first matching Conformer from the list. Searching is done in parallel.
    ///
    /// ## Arguments
    /// * `id` - the identifying construct of the Conformer to remove
    ///
    /// ## Panics
    /// Panics when the index is outside bounds.
    #[doc_cfg(feature = "rayon")]
    pub fn par_remove_conformer_by_id(&mut self, id: (&str, Option<&str>)) -> bool {
        let index = self.conformers.par_iter().position_first(|a| a.id() == id);

        index.map_or(false, |i| {
            self.remove_conformer(i);
            true
        })
    }

    /// Apply a transformation to the position of all Conformers making up this Residue, the new position is immediately set.
    pub fn apply_transformation(&mut self, transformation: &TransformationMatrix) {
        for conformer in self.conformers_mut() {
            conformer.apply_transformation(transformation);
        }
    }

    /// Apply a transformation to the position of all Conformers making up this Residue, the new position is immediately set.
    /// Done in parallel
    #[doc_cfg(feature = "rayon")]
    pub fn par_apply_transformation(&mut self, transformation: &TransformationMatrix) {
        self.par_conformers_mut()
            .for_each(|conformer| conformer.apply_transformation(transformation));
    }

    /// Join this Residue with another Residue, this moves all Conformers from the other Residue
    /// to this Residue. All other (meta) data of this Residue will stay the same.
    pub fn join(&mut self, other: Self) {
        self.conformers.extend(other.conformers);
    }

    /// Sort the Conformers of this Residue
    pub fn sort(&mut self) {
        self.conformers.sort();
    }

    /// Sort the Conformers of this Residue in parallel
    #[doc_cfg(feature = "rayon")]
    pub fn par_sort(&mut self) {
        self.conformers.par_sort();
    }
}

impl fmt::Display for Residue {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "RESIDUE Number:{}, InsertionCode:{}, Conformers:{}",
            self.serial_number(),
            self.insertion_code().unwrap_or(""),
            self.conformers.len(),
        )
    }
}

impl PartialOrd for Residue {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.id().cmp(&other.id()))
    }
}

impl Ord for Residue {
    fn cmp(&self, other: &Self) -> Ordering {
        self.id().cmp(&other.id())
    }
}

impl Extend<Conformer> for Residue {
    /// Extend the Conformers on this Residue by the given iterator.
    fn extend<T: IntoIterator<Item = Conformer>>(&mut self, iter: T) {
        self.conformers.extend(iter);
    }
}

#[cfg(test)]
#[allow(clippy::unwrap_used)]
mod tests {
    use super::*;

    #[test]
    fn test_text_validation() {
        let mut a = Residue::new(1, Some("A"), None).unwrap();
        assert_eq!(Residue::new(2, Some("Rͦ"), None), None);
        assert!(!a.set_insertion_code("Oͦ"));
        assert_eq!(a.insertion_code(), Some("A"));
        a.set_insertion_code("Conformer");
        assert_eq!(a.insertion_code(), Some("CONFORMER"));
    }

    #[test]
    fn ordering_and_equality() {
        let a = Residue::new(1, None, None).unwrap();
        let b = Residue::new(1, None, None).unwrap();
        let c = Residue::new(2, None, None).unwrap();
        assert_eq!(a, b);
        assert_ne!(a, c);
        assert!(a < c);
        assert!(b < c);
    }

    #[test]
    fn test_empty() {
        let a = Residue::new(1, None, None).unwrap();
        assert_eq!(a.conformer_count(), 0);
    }

    #[test]
    fn test_conformer() {
        let mut a = Residue::new(1, None, None).unwrap();
        let mut conformer1 = Conformer::new("A", None, None).unwrap();
        a.add_conformer(conformer1.clone());
        a.add_conformer(Conformer::new("B", None, None).unwrap());
        assert_eq!(a.conformer(0), Some(&conformer1));
        assert_eq!(a.conformer_mut(0), Some(&mut conformer1));
        a.remove_conformer(0);
        assert!(a.remove_conformer_by_id(("B", None)));
        assert_eq!(a.conformer_count(), 0);
    }

    #[test]
    fn test_join() {
        let mut a = Residue::new(1, None, None).unwrap();
        let mut b = Residue::new(1, None, None).unwrap();
        let conformer1 = Conformer::new("A", None, None).unwrap();
        b.add_conformer(conformer1.clone());

        a.join(b);
        a.extend(vec![conformer1]);

        assert_eq!(a.conformer_count(), 2);
    }

    #[test]
    fn check_display() {
        let a = Residue::new(1, None, None).unwrap();
        format!("{a:?}");
        format!("{a}");
    }
}