jseqio/
seq_db.rs

1use crate::record::{MutRefRecord, RefRecord};
2
3pub struct SeqDB {
4    headbuf: Vec<u8>,
5    seqbuf: Vec<u8>,
6    qualbuf: Vec<u8>,
7    head_starts: Vec<usize>, // Contains end sentinel at the end
8    seq_starts: Vec<usize>, // Contains end sentinel at the end
9    qual_starts: Vec<usize>, // Contains end sentinel at the end.
10
11    // A mix of records with and without quality values is allowed. Then
12    // the quality value slices will have length 0 for records without quality values.
13}
14
15impl SeqDB{
16
17    pub fn iter(&self) -> SeqDBIterator {
18        SeqDBIterator{seq_db: self, pos: 0}
19    }
20
21    pub fn sequence_count(&self) -> usize{
22        self.head_starts.len() - 1
23        // ^ The -1 is because we have an end sentinel at the end of the head_starts vector
24    }
25
26    pub fn total_seq_len(&self) -> usize{
27        self.seqbuf.len()
28    }
29
30    pub fn get_mut(&mut self, seq_index: usize) -> MutRefRecord {
31        if seq_index >= self.head_starts.len(){
32            panic!("SeqDB: Sequence index {} not found in database containing {} sequences", seq_index, self.sequence_count());
33        }
34
35        let head = &mut self.headbuf[self.head_starts[seq_index]..self.head_starts[seq_index+1]];
36        let seq = &mut self.seqbuf[self.seq_starts[seq_index]..self.seq_starts[seq_index+1]];
37        let qual = {
38            let start = self.qual_starts[seq_index];
39            let end = self.qual_starts[seq_index+1];
40            if start == end {
41                None
42            }
43            else {
44                Some(&mut self.qualbuf[start..end])
45            }
46        };
47        MutRefRecord{head, seq, qual}
48
49    }
50
51    pub fn get(&self, seq_index: usize) -> RefRecord{
52        if seq_index >= self.head_starts.len(){
53            panic!("SeqDB: Sequence index {} not found in database containing {} sequences", seq_index, self.sequence_count());
54        }
55
56        let head = &self.headbuf[self.head_starts[seq_index]..self.head_starts[seq_index+1]];
57        let seq = &self.seqbuf[self.seq_starts[seq_index]..self.seq_starts[seq_index+1]];
58        let qual = {
59            let start = self.qual_starts[seq_index];   
60            let end = self.qual_starts[seq_index+1];
61            if start == end {
62                None
63            }
64            else {
65                Some(&self.qualbuf[start..end])
66            }
67        };
68        RefRecord{head, seq, qual}
69    }
70
71    pub fn new() -> SeqDB{
72        let headbuf: Vec<u8> = Vec::new();
73        let seqbuf: Vec<u8> = Vec::new();
74        let qualbuf: Vec<u8> = Vec::new();
75
76        let head_starts: Vec<usize> = vec![0];
77        let seq_starts: Vec<usize> = vec![0];
78        let qual_starts: Vec<usize> = vec![0];
79        
80        SeqDB{headbuf, seqbuf, qualbuf, head_starts, seq_starts, qual_starts}
81    }
82
83    pub fn push_record<R: crate::record::Record>(&mut self, rec: R){
84        self.headbuf.extend_from_slice(rec.head());
85        self.seqbuf.extend_from_slice(rec.seq());
86        self.head_starts.push(self.headbuf.len());
87        self.seq_starts.push(self.seqbuf.len());
88
89        if let Some(qual) = rec.qual(){
90            // Record has quality values
91            self.qualbuf.extend_from_slice(qual);
92        }
93        self.qual_starts.push(self.qualbuf.len());
94    }
95
96    // Push a sequence with no quality values or header
97    pub fn push_seq(&mut self, seq: &[u8]){
98        self.seqbuf.extend_from_slice(seq);
99        self.seq_starts.push(self.seqbuf.len());
100
101        self.head_starts.push(self.headbuf.len()); // Empty header
102        self.qual_starts.push(self.qualbuf.len()); // Empty quality values
103    }
104
105    pub fn shrink_to_fit(&mut self){
106        self.headbuf.shrink_to_fit();
107        self.seqbuf.shrink_to_fit();
108        self.qualbuf.shrink_to_fit();
109    }
110}
111
112pub struct SeqDBIterator<'a>{
113    seq_db: &'a SeqDB,
114    pos: usize,
115}
116
117impl<'a> Iterator for SeqDBIterator<'a> {
118    type Item = RefRecord<'a>;
119
120    fn next(&mut self) -> Option<RefRecord<'a>> {
121        match self.pos{
122            i if i < self.seq_db.head_starts.len() - 1 => { // Iteration is not finished yet
123                self.pos += 1; // Advance pointer to next element for the next round
124                Some(self.seq_db.get(i)) // Should never be out of bounds so we unwrap the error.
125            }
126            _ => None, // End of iteration
127        }
128    }
129}
130
131impl ExactSizeIterator for SeqDBIterator<'_> {
132    fn len(&self) -> usize {
133        self.seq_db.sequence_count()
134    }
135}