rust_seq2kminmers/
lib.rs

1use nthash::NtHashIterator;
2mod kminmer;
3use kminmer::Kminmer;
4mod nthash_hpc;
5use nthash_hpc::NtHashHPCIterator;
6use std::io::Result;
7
8// An iterator for getting k-min-mers out of a DNA sequence
9///
10/// Parameters:
11/// l: minimizer length
12/// k: k-min-mer length
13/// density: density of minimizer scheme
14/// hpc: true if minimizers are computed as if the sequence was homopolymer compressed but all positions reported
15///      in original sequence space, false if everything is done in original sequence space
16///
17/// Hashing is performed by NtHash1
18///
19/// Example usage:
20/// ```
21///     use rust_seq2kminmers::KminmersIterator;
22///     
23///     fn main() {
24///        let seq = b"AACTGCACTGCACTGCACTGCACACTGCACTGCACTGCACTGCACACTGCACTGCACTGACTGCACTGCACTGCACTGCACTGCCTGC";
25///        let iter = KminmersIterator::new(seq, 10, 5, 0.1, false).unwrap();
26///        for kminmer in iter
27///        {
28///            println!("kminmer: {:?}",kminmer);
29///        }
30///     }
31/// ```
32
33pub struct KminmersIterator<'a> {
34    seq_pos : usize, 
35    k: usize,
36    hash_bound: u64,
37    hpc: bool,
38    nthash_hpc_iterator: Option<NtHashHPCIterator<'a>>,
39    nthash_iterator: Option<NtHashIterator<'a>>,
40    curr_sk : Vec::<u64>,
41    curr_pos : Vec::<usize>,
42}
43
44impl<'a> KminmersIterator<'a> {
45    pub fn new(seq: &'a [u8], l: usize, k: usize, density: f64, hpc: bool) -> Result<KminmersIterator<'a>> {
46
47        let hash_bound = ((density as f64) * (u64::max_value() as f64)) as u64;
48
49        let mut nthash_hpc_iterator = None;
50        let mut nthash_iterator = None;
51        if seq.len() > l {
52            if hpc
53            {
54                nthash_hpc_iterator = Some(NtHashHPCIterator::new(seq, l, hash_bound).unwrap());
55            }
56            else
57            { 
58                nthash_iterator = Some(NtHashIterator::new(seq, l).unwrap());
59            }
60        }
61
62        let curr_sk = Vec::<u64>::new();
63        let curr_pos = Vec::<usize>::new();
64
65        Ok(KminmersIterator {
66            seq_pos: 0,
67            k,
68            hash_bound,
69            hpc,
70            nthash_hpc_iterator: nthash_hpc_iterator,
71            nthash_iterator: nthash_iterator,
72            curr_pos,
73            curr_sk,
74        })
75    }
76}
77
78impl<'a> Iterator for KminmersIterator<'a> {
79    type Item = Kminmer;
80
81    fn next(&mut self) -> Option<Kminmer> {
82        let kminmer;
83        loop
84        {
85            let mut j;
86            let mut hash;
87            if self.hpc
88            {
89                match self.nthash_hpc_iterator.as_mut().unwrap().next()
90                {
91                    Some(n) => { (j,hash) = n; } 
92                    None => return None
93                };
94            }
95            else
96            {
97                loop
98                {
99                    match self.nthash_iterator.as_mut().unwrap().next()
100                    {
101                        Some(x) => { hash = x;}
102                        None => return None
103                    };
104                    self.seq_pos += 1;
105                    j = self.seq_pos;
106                    if hash < self.hash_bound { break; }
107                }
108            }
109
110            self.curr_pos.push(j); // raw sequence position
111            self.curr_sk.push(hash);
112            if self.curr_sk.len() == self.k { 
113                kminmer = Kminmer::new(&self.curr_sk, self.curr_pos[0], self.curr_pos[self.k - 1], j);
114                self.curr_sk = self.curr_sk[1..self.k].to_vec();
115                self.curr_pos = self.curr_pos[1..self.k].to_vec();
116                break; 
117            }
118        }
119        Some(kminmer)
120    }
121}