ncbi_vdb_sys/
reader.rs

1use crate::safe::vdb::{FastqCursor, SafeVTable};
2use crate::{Error, Result};
3
4pub const BUFFER_SIZE: usize = 1024 * 1024; // 1MB buffer
5
6pub struct SraReader {
7    pub cursor: FastqCursor,
8    start: i64,
9    stop: u64,
10    pos: i64,
11}
12impl SraReader {
13    pub fn new(path: &str) -> Result<Self> {
14        let table = SafeVTable::new(path)?;
15        let cursor = table.new_fastq_cursor(BUFFER_SIZE)?;
16        let (start, stop) = cursor.get_range()?;
17        Ok(Self {
18            cursor,
19            start,
20            stop,
21            pos: start,
22        })
23    }
24    pub fn with_capacity(path: &str, capacity: usize) -> Result<Self> {
25        let table = SafeVTable::new(path)?;
26        let cursor = table.new_fastq_cursor(capacity)?;
27        let (start, stop) = cursor.get_range()?;
28        Ok(Self {
29            cursor,
30            start,
31            stop,
32            pos: start,
33        })
34    }
35    pub fn start(&self) -> i64 {
36        self.start
37    }
38    pub fn stop(&self) -> u64 {
39        self.stop
40    }
41    pub fn pos(&self) -> i64 {
42        self.pos
43    }
44    pub fn get_record(&self, row_id: i64) -> Result<RefRecord> {
45        let rid = row_id as usize;
46        let seq = self.cursor.get_read(row_id)?;
47        let qual = self.cursor.get_qual(row_id)?;
48        let read_starts = self.cursor.get_read_starts(row_id)?;
49        let read_lens = self.cursor.get_read_lens(row_id)?;
50        let read_types = self.cursor.get_read_types(row_id)?;
51        Ok(RefRecord {
52            rid,
53            seq,
54            qual,
55            read_starts,
56            read_lens,
57            read_types,
58        })
59    }
60    pub fn into_iter(&self) -> RecordIter {
61        RecordIter::new(self)
62    }
63    pub fn into_range_iter(&self, start: i64, stop: u64) -> Result<RecordIter> {
64        if start < self.start() || stop > self.stop() || start > stop as i64 {
65            return Err(Error::CursorRangeError(
66                start as usize,
67                stop as usize,
68                self.start() as usize,
69                self.stop() as usize,
70            ));
71        }
72        Ok(RecordIter::new_range(self, start, stop))
73    }
74}
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq)]
77#[repr(u8)]
78pub enum SegmentType {
79    /// Technical read (0)
80    Technical,
81    /// Biological read (1)
82    Biological,
83}
84impl From<u8> for SegmentType {
85    fn from(ty: u8) -> Self {
86        match ty {
87            0 => SegmentType::Technical,
88            _ => SegmentType::Biological,
89        }
90    }
91}
92impl From<SegmentType> for u8 {
93    fn from(ty: SegmentType) -> Self {
94        match ty {
95            SegmentType::Technical => 0,
96            SegmentType::Biological => 1,
97        }
98    }
99}
100
101pub struct RefRecord<'a> {
102    /// Row ID (Spot ID)
103    pub rid: usize,
104    /// Spot sequence
105    pub seq: &'a [u8],
106    /// Spot quality
107    pub qual: &'a [u8],
108    /// Read segment start positions
109    pub read_starts: &'a [u32],
110    /// Read segment lengths
111    pub read_lens: &'a [u32],
112    /// Read segment types
113    pub read_types: &'a [u8],
114}
115impl<'a> RefRecord<'a> {
116    #[allow(clippy::should_implement_trait)]
117    pub fn into_iter(self) -> SegmentIter<'a> {
118        SegmentIter::new(self)
119    }
120
121    pub fn get_segment(&self, index: usize) -> Option<Segment<'a>> {
122        if index >= self.read_starts.len() {
123            return None;
124        }
125        let start = self.read_starts[index] as usize;
126        let len = self.read_lens[index] as usize;
127        Some(Segment {
128            rid: self.rid,
129            sid: index,
130            seq: &self.seq[start..start + len],
131            qual: &self.qual[start..start + len],
132            ty: self.read_types[index].into(),
133        })
134    }
135}
136
137pub struct Segment<'a> {
138    /// Row ID (Spot ID)
139    rid: usize,
140    /// Segment ID
141    sid: usize,
142    /// Segment sequence
143    seq: &'a [u8],
144    /// Segment quality
145    qual: &'a [u8],
146    /// Segment type
147    ty: SegmentType,
148}
149impl Segment<'_> {
150    pub fn rid(&self) -> usize {
151        self.rid
152    }
153    pub fn sid(&self) -> usize {
154        self.sid
155    }
156    pub fn seq(&self) -> &[u8] {
157        self.seq
158    }
159    pub fn qual(&self) -> &[u8] {
160        self.qual
161    }
162    pub fn ty(&self) -> SegmentType {
163        self.ty
164    }
165    pub fn len(&self) -> usize {
166        self.seq.len()
167    }
168    pub fn is_technical(&self) -> bool {
169        self.ty == SegmentType::Technical
170    }
171    pub fn is_empty(&self) -> bool {
172        self.len() == 0
173    }
174}
175
176pub struct RecordIter<'a> {
177    reader: &'a SraReader,
178    pos: i64,
179    end: u64,
180}
181impl<'a> RecordIter<'a> {
182    pub fn new(reader: &'a SraReader) -> Self {
183        Self {
184            reader,
185            pos: reader.start(),
186            end: reader.stop(),
187        }
188    }
189    pub fn new_range(reader: &'a SraReader, start: i64, stop: u64) -> Self {
190        Self {
191            reader,
192            pos: start,
193            end: stop,
194        }
195    }
196}
197impl<'a> Iterator for RecordIter<'a> {
198    type Item = Result<RefRecord<'a>>;
199    fn next(&mut self) -> Option<Self::Item> {
200        if self.pos > self.end as i64 {
201            return None;
202        }
203        match self.reader.get_record(self.pos) {
204            Ok(record) => {
205                self.pos += 1;
206                Some(Ok(record))
207            }
208            Err(rc) => {
209                self.pos += 1;
210                Some(Err(rc))
211            }
212        }
213    }
214}
215
216pub struct SegmentIter<'a> {
217    record: RefRecord<'a>,
218    pos: usize,
219}
220impl<'a> SegmentIter<'a> {
221    pub fn new(record: RefRecord<'a>) -> Self {
222        Self { pos: 0, record }
223    }
224}
225impl<'a> Iterator for SegmentIter<'a> {
226    type Item = Segment<'a>;
227    fn next(&mut self) -> Option<Self::Item> {
228        let segment = self.record.get_segment(self.pos)?;
229        self.pos += 1;
230        Some(segment)
231    }
232}