Skip to main content

cyanea_seq/
kmer.rs

1//! Zero-allocation k-mer iterator.
2//!
3//! Wraps [`std::slice::Windows`] over a pre-validated uppercase sequence.
4//! Since input is already validated and normalized, no per-kmer processing
5//! is needed.
6
7use cyanea_core::{CyaneaError, Result};
8
9/// Iterator over k-mer windows of a byte slice.
10///
11/// Yields `&[u8]` slices of length `k`. Implements [`ExactSizeIterator`]
12/// and [`DoubleEndedIterator`] for efficient bidirectional traversal.
13pub struct KmerIter<'a> {
14    inner: std::slice::Windows<'a, u8>,
15    remaining: usize,
16}
17
18impl<'a> KmerIter<'a> {
19    /// Create a new k-mer iterator.
20    ///
21    /// `k` must be in `[1, 31]` and `k <= seq.len()` (unless the sequence
22    /// is empty, in which case `k >= 1` is accepted and yields nothing — wait,
23    /// actually: if k > seq.len() we return an error per the plan).
24    pub fn new(seq: &'a [u8], k: usize) -> Result<Self> {
25        if k == 0 {
26            return Err(CyaneaError::InvalidInput(
27                "k-mer size must be at least 1".into(),
28            ));
29        }
30        if k > 31 {
31            return Err(CyaneaError::InvalidInput(
32                "k-mer size must be at most 31".into(),
33            ));
34        }
35        if k > seq.len() {
36            return Err(CyaneaError::InvalidInput(format!(
37                "k-mer size {} exceeds sequence length {}",
38                k,
39                seq.len()
40            )));
41        }
42        let remaining = seq.len() - k + 1;
43        Ok(Self {
44            inner: seq.windows(k),
45            remaining,
46        })
47    }
48}
49
50impl<'a> Iterator for KmerIter<'a> {
51    type Item = &'a [u8];
52
53    fn next(&mut self) -> Option<Self::Item> {
54        let item = self.inner.next()?;
55        self.remaining -= 1;
56        Some(item)
57    }
58
59    fn size_hint(&self) -> (usize, Option<usize>) {
60        (self.remaining, Some(self.remaining))
61    }
62}
63
64impl<'a> ExactSizeIterator for KmerIter<'a> {}
65
66impl<'a> DoubleEndedIterator for KmerIter<'a> {
67    fn next_back(&mut self) -> Option<Self::Item> {
68        let item = self.inner.next_back()?;
69        self.remaining -= 1;
70        Some(item)
71    }
72}
73
74#[cfg(test)]
75mod tests {
76    use super::*;
77
78    #[test]
79    fn basic_k2() {
80        let seq = b"ACGT";
81        let kmers: Vec<&[u8]> = KmerIter::new(seq, 2).unwrap().collect();
82        assert_eq!(kmers, vec![b"AC", b"CG", b"GT"]);
83    }
84
85    #[test]
86    fn exact_size() {
87        let seq = b"ACGTACGT";
88        let iter = KmerIter::new(seq, 3).unwrap();
89        assert_eq!(iter.len(), 6);
90    }
91
92    #[test]
93    fn k_zero_error() {
94        let result = KmerIter::new(b"ACGT", 0);
95        assert!(result.is_err());
96    }
97
98    #[test]
99    fn k_exceeds_len_error() {
100        let result = KmerIter::new(b"AC", 3);
101        assert!(result.is_err());
102    }
103
104    #[test]
105    fn double_ended() {
106        let seq = b"ACGT";
107        let mut iter = KmerIter::new(seq, 2).unwrap();
108        assert_eq!(iter.next_back(), Some(b"GT".as_slice()));
109        assert_eq!(iter.next(), Some(b"AC".as_slice()));
110        assert_eq!(iter.next(), Some(b"CG".as_slice()));
111        assert_eq!(iter.next(), None);
112    }
113}