compact_genome/interface/sequence/
neighbor_iterators.rs

1//! Iterators over different neighborhoods of genome sequences.
2
3use crate::interface::alphabet::{Alphabet, AlphabetCharacter};
4use crate::interface::sequence::{GenomeSequenceMut, OwnedGenomeSequence};
5use std::marker::PhantomData;
6
7/// Returns an iterator over the genome sequences that are exactly one substitution away from this genome sequence.
8///
9/// In other words, all sequences with hamming distance one of the same length.
10/// While the returned type works like an iterator, it does not implement the iterator trait due to limitations of the type system.
11pub fn substitution_distance_one_neighbor_iterator<
12    AlphabetType: Alphabet,
13    Sequence: OwnedGenomeSequence<AlphabetType, Subsequence>
14        + GenomeSequenceMut<AlphabetType, Subsequence>
15        + Sized,
16    Subsequence: GenomeSequenceMut<AlphabetType, Subsequence> + ?Sized,
17>(
18    sequence: Sequence,
19) -> SubstitutionDistanceOneNeighborIterator<AlphabetType, Sequence, Subsequence> {
20    SubstitutionDistanceOneNeighborIterator::new(sequence)
21}
22
23/// An iterator over the genome sequences that are exactly one substitution away from the given genome sequence.
24///
25/// In other words, all sequences with hamming distance one of the same length.
26/// While the type works like an iterator, it does not implement the iterator trait due to limitations of the type system.
27pub struct SubstitutionDistanceOneNeighborIterator<
28    AlphabetType: Alphabet,
29    Sequence: OwnedGenomeSequence<AlphabetType, Subsequence>
30        + GenomeSequenceMut<AlphabetType, Subsequence>
31        + Sized,
32    Subsequence: GenomeSequenceMut<AlphabetType, Subsequence> + ?Sized,
33> {
34    current_index: usize,
35    current_character: u8,
36    original_character: AlphabetType::CharacterType,
37    sequence: Sequence,
38    subsequence: PhantomData<Subsequence>,
39    alphabet_type: PhantomData<AlphabetType>,
40}
41
42impl<
43        AlphabetType: Alphabet,
44        Sequence: OwnedGenomeSequence<AlphabetType, Subsequence>
45            + GenomeSequenceMut<AlphabetType, Subsequence>
46            + Sized,
47        Subsequence: GenomeSequenceMut<AlphabetType, Subsequence> + ?Sized,
48    > SubstitutionDistanceOneNeighborIterator<AlphabetType, Sequence, Subsequence>
49{
50    fn new(sequence: Sequence) -> Self {
51        Self {
52            current_index: 0,
53            current_character: 0,
54            original_character: if sequence.len() > 0 {
55                sequence[0].clone()
56            } else {
57                AlphabetType::CharacterType::from_index(0).unwrap()
58            },
59            sequence,
60            subsequence: Default::default(),
61            alphabet_type: Default::default(),
62        }
63    }
64
65    /// Like the `Iterator::next` function.
66    /// The returned reference must be dropped before next is called a second time.
67    pub fn next<'this: 'returned_reference, 'returned_reference>(
68        &'this mut self,
69    ) -> Option<&'returned_reference Subsequence> {
70        while self.current_index < self.sequence.len() {
71            while self.current_character < AlphabetType::SIZE {
72                let current_character =
73                    AlphabetType::CharacterType::from_index(self.current_character).unwrap();
74                self.current_character += 1;
75
76                if self.original_character != current_character {
77                    self.sequence[self.current_index] = current_character;
78                    return Some(self.sequence.as_genome_subsequence());
79                }
80            }
81
82            self.sequence[self.current_index] = self.original_character.clone();
83            self.current_index += 1;
84            self.current_character = 0;
85            if self.current_index < self.sequence.len() {
86                self.original_character = self.sequence[self.current_index].clone();
87            }
88        }
89
90        None
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use crate::implementation::alphabets::dna_alphabet::DnaAlphabet;
97    use crate::implementation::vec_sequence::VectorGenome;
98    use crate::interface::sequence::neighbor_iterators::substitution_distance_one_neighbor_iterator;
99    use crate::interface::sequence::OwnedGenomeSequence;
100
101    #[test]
102    fn test_substitution_distance_one_neighbor_iterator() {
103        let sequence = VectorGenome::<DnaAlphabet>::from_slice_u8(b"ACCGTTA").unwrap();
104        let mut neighbors = Vec::new();
105        let mut neighbor_iterator = substitution_distance_one_neighbor_iterator(sequence);
106        while let Some(neighbor) = neighbor_iterator.next() {
107            neighbors.push(neighbor.to_owned());
108        }
109        neighbors.sort();
110        let mut expected = vec![
111            VectorGenome::from_slice_u8(b"CCCGTTA").unwrap(),
112            VectorGenome::from_slice_u8(b"GCCGTTA").unwrap(),
113            VectorGenome::from_slice_u8(b"TCCGTTA").unwrap(),
114            VectorGenome::from_slice_u8(b"AACGTTA").unwrap(),
115            VectorGenome::from_slice_u8(b"AGCGTTA").unwrap(),
116            VectorGenome::from_slice_u8(b"ATCGTTA").unwrap(),
117            VectorGenome::from_slice_u8(b"ACAGTTA").unwrap(),
118            VectorGenome::from_slice_u8(b"ACGGTTA").unwrap(),
119            VectorGenome::from_slice_u8(b"ACTGTTA").unwrap(),
120            VectorGenome::from_slice_u8(b"ACCATTA").unwrap(),
121            VectorGenome::from_slice_u8(b"ACCCTTA").unwrap(),
122            VectorGenome::from_slice_u8(b"ACCTTTA").unwrap(),
123            VectorGenome::from_slice_u8(b"ACCGATA").unwrap(),
124            VectorGenome::from_slice_u8(b"ACCGCTA").unwrap(),
125            VectorGenome::from_slice_u8(b"ACCGGTA").unwrap(),
126            VectorGenome::from_slice_u8(b"ACCGTAA").unwrap(),
127            VectorGenome::from_slice_u8(b"ACCGTCA").unwrap(),
128            VectorGenome::from_slice_u8(b"ACCGTGA").unwrap(),
129            VectorGenome::from_slice_u8(b"ACCGTTC").unwrap(),
130            VectorGenome::from_slice_u8(b"ACCGTTG").unwrap(),
131            VectorGenome::from_slice_u8(b"ACCGTTT").unwrap(),
132        ];
133        expected.sort();
134        debug_assert_eq!(neighbors, expected);
135    }
136
137    #[test]
138    fn test_substitution_distance_one_neighbor_iterator_empty() {
139        let sequence = VectorGenome::from_slice_u8(b"").unwrap();
140        let mut neighbors = Vec::new();
141        let mut neighbor_iterator = substitution_distance_one_neighbor_iterator(sequence);
142        while let Some(neighbor) = neighbor_iterator.next() {
143            neighbors.push(neighbor.to_owned());
144        }
145        neighbors.sort();
146        let mut expected: Vec<VectorGenome<DnaAlphabet>> = Vec::new();
147        expected.sort();
148        debug_assert_eq!(neighbors, expected);
149    }
150}