bio_seq/seq/
iterators.rs

1// Copyright 2021, 2022 Jeff Knaggs
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use crate::codec::Codec;
7use crate::kmer::KmerIter;
8use crate::seq::{Seq, SeqSlice};
9use core::iter::Chain;
10use core::marker::PhantomData;
11
12/// An iterator over fixed-size non-overlapping chunks of a sequence
13pub struct SeqChunks<'a, A: Codec> {
14    slice: &'a SeqSlice<A>,
15    width: usize,
16    skip: usize,
17    index: usize,
18}
19
20/// An iterator over the elements of a sequence
21pub struct SeqIter<'a, A: Codec> {
22    slice: &'a SeqSlice<A>,
23    index: usize,
24}
25
26impl<'a, A: Codec> SeqSlice<A> {
27    pub fn chain(
28        self: &'a SeqSlice<A>,
29        second: &'a SeqSlice<A>,
30    ) -> Chain<SeqIter<'a, A>, SeqIter<'a, A>> {
31        self.into_iter().chain(second)
32    }
33
34    pub fn iter(&'a self) -> SeqIter<'a, A> {
35        <&Self as IntoIterator>::into_iter(self)
36    }
37
38    /// Iterate over sliding windows of length K
39    pub fn kmers<const K: usize>(&self) -> KmerIter<A, K> {
40        KmerIter::<A, K> {
41            slice: self,
42            index: 0,
43            len: self.len(),
44            _p: PhantomData,
45        }
46    }
47
48    /// Iterate over the sequence in reverse order
49    pub fn rev_iter(&self) -> RevIter<A> {
50        RevIter {
51            slice: self,
52            index: self.len(),
53        }
54    }
55
56    /// Iterate over the sequence in overlapping windows of a specified width
57    ///
58    /// ```
59    /// use bio_seq::prelude::*;
60    ///
61    /// let seq: Seq<Dna> = "ACTGATCG".try_into().unwrap();
62    /// let windows: Vec<String> = seq.windows(3).map(String::from).collect();
63    /// assert_eq!(windows, vec!["ACT", "CTG", "TGA", "GAT", "ATC", "TCG"]);
64    /// ```
65    pub fn windows(&self, width: usize) -> SeqChunks<A> {
66        SeqChunks {
67            slice: self,
68            width,
69            skip: 1,
70            index: 0,
71        }
72    }
73
74    /// Iterate over the sequence in non-overlapping chunks of a specified width
75    ///
76    /// The last incomplete chunk will be excluded if the sequence length is not divisible by the specified
77    /// width.
78    ///
79    /// ```
80    /// use bio_seq::prelude::*;
81    ///
82    /// let seq: Seq<Dna> = "ACTGATCG".try_into().unwrap();
83    /// let chunks: Vec<Seq<Dna>> = seq.chunks(3).collect();
84    /// assert_eq!(chunks, vec![dna!("ACT"), dna!("GAT")]);
85    /// ```
86    pub fn chunks(&self, width: usize) -> SeqChunks<A> {
87        SeqChunks {
88            slice: self,
89            width,
90            skip: width,
91            index: 0,
92        }
93    }
94}
95
96/// An iterator over the elements of a sequence in reverse order
97pub struct RevIter<'a, A: Codec> {
98    pub slice: &'a SeqSlice<A>,
99    pub index: usize,
100}
101
102impl<A: Codec> Iterator for RevIter<'_, A> {
103    type Item = A;
104    fn next(&mut self) -> Option<A> {
105        let i = self.index;
106
107        if self.index == 0 {
108            return None;
109        }
110        self.index -= 1;
111        Some(A::unsafe_from_bits(self.slice[i - 1].into()))
112    }
113}
114
115impl<'a, A: Codec + core::fmt::Debug> Iterator for SeqChunks<'a, A> {
116    type Item = &'a SeqSlice<A>;
117
118    fn next(&mut self) -> Option<Self::Item> {
119        if self.index + self.width > self.slice.len() {
120            return None;
121        }
122        let i = self.index;
123        self.index += self.skip;
124        if i + self.width > self.slice.len() {
125            return None;
126        }
127        Some(&self.slice[i..i + self.width])
128    }
129}
130
131impl<'a, A: Codec> IntoIterator for &'a Seq<A> {
132    type Item = A;
133    type IntoIter = SeqIter<'a, A>;
134
135    fn into_iter(self) -> Self::IntoIter {
136        self.as_ref().into_iter()
137    }
138}
139
140impl<'a, A: Codec> IntoIterator for &'a SeqSlice<A> {
141    type Item = A;
142    type IntoIter = SeqIter<'a, A>;
143
144    fn into_iter(self) -> Self::IntoIter {
145        SeqIter {
146            slice: self,
147            index: 0,
148        }
149    }
150}
151
152impl<'a, A: Codec> FromIterator<&'a SeqSlice<A>> for Vec<Seq<A>> {
153    fn from_iter<T: IntoIterator<Item = &'a SeqSlice<A>>>(iter: T) -> Self {
154        iter.into_iter().map(ToOwned::to_owned).collect()
155    }
156}
157
158impl<A: Codec> Iterator for SeqIter<'_, A> {
159    type Item = A;
160    fn next(&mut self) -> Option<A> {
161        let i = self.index;
162        if self.index >= self.slice.len() {
163            return None;
164        }
165        self.index += 1;
166        Some(A::unsafe_from_bits(self.slice[i].into()))
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use crate::codec::dna::Dna::*;
173    use crate::prelude::*;
174
175    #[test]
176    fn seq_iter() {
177        let seq: Seq<Dna> = dna!("ACTGATCGATAC").into();
178        let elements: Vec<Dna> = seq.into_iter().collect();
179        assert_eq!(elements, vec![A, C, T, G, A, T, C, G, A, T, A, C]);
180        assert_ne!(elements, vec![A, C, T, G, A, T, C, G, A, T, A, C, A]);
181        assert_ne!(elements, vec![C, A, T, A, G, C, T, A, G, T, C, A]);
182    }
183
184    #[test]
185    fn rev_iter() {
186        let seq: Seq<Dna> = dna!("ACTGATCGATAC").into();
187        let rev_elements: Vec<Dna> = seq.rev_iter().collect();
188        assert_ne!(rev_elements, vec![A, C, T, G, A, T, C, G, A, T, A, C]);
189        assert_eq!(rev_elements, vec![C, A, T, A, G, C, T, A, G, T, C, A]);
190    }
191
192    #[test]
193    fn iterators() {
194        let seq: Seq<Dna> = dna!("ACTGATCGATAC").into();
195        let slice = &seq[2..9];
196        let elements: Vec<Dna> = slice.into_iter().collect();
197        assert_eq!(elements, vec![T, G, A, T, C, G, A]);
198    }
199
200    #[test]
201    fn chunks() {
202        let seq: Seq<Dna> = dna!("ACTGATCGATAC").into();
203        let cs: Vec<Seq<Dna>> = seq.chunks(5).collect();
204        assert_eq!(cs[0], dna!("ACTGA"));
205        assert_eq!(cs[1], dna!("TCGAT"));
206        assert_eq!(cs.len(), 2);
207    }
208
209    #[test]
210    fn test_chain() {
211        let seq1 = Seq::<Dna>::try_from("ATG").unwrap();
212        let seq2 = Seq::<Dna>::try_from("TAC").unwrap();
213
214        let chained = seq1.chain(&seq2);
215
216        let expected_seq = Seq::<Dna>::try_from("ATGTAC").unwrap();
217        for (a, b) in chained.zip(expected_seq.into_iter()) {
218            assert_eq!(a, b);
219        }
220
221        let chained = seq1.chain(&seq2);
222        for (a, b) in chained.map(|b| b.to_comp()).zip(expected_seq.into_iter()) {
223            assert_ne!(a, b);
224        }
225    }
226
227    #[test]
228    fn windows() {
229        let seq: Seq<Dna> = dna!("ACTGATACG").into();
230        let windows: Vec<String> = seq.windows(5).map(String::from).collect();
231        assert_eq!(windows, vec!["ACTGA", "CTGAT", "TGATA", "GATAC", "ATACG"]);
232    }
233}