gfa/
mmap.rs

1use crate::{
2    gfa::{Line, Link, Path, Segment},
3    parser::GFAParser,
4};
5
6use anyhow::{bail, Result};
7
8use memmap::Mmap;
9
10use std::fs::File;
11use std::io::prelude::*;
12
13use bstr::ByteSlice;
14
15#[derive(Debug)]
16pub struct MmapGFA {
17    pub cursor: std::io::Cursor<Mmap>,
18    pub line_buf: Vec<u8>,
19    pub current_line_len: usize,
20    pub last_buf_offset: usize,
21    pub parser: GFAParser<usize, ()>,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum LineType {
26    Segment,
27    Link,
28    Path,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub struct LineIndices {
33    pub segments: Vec<(usize, usize)>,
34    pub links: Vec<usize>,
35    pub paths: Vec<usize>,
36}
37
38#[derive(Debug)]
39pub struct SegmentIter<'a> {
40    mmap: &'a mut MmapGFA,
41    parser: GFAParser<usize, ()>,
42}
43
44impl<'a> Iterator for SegmentIter<'a> {
45    type Item = Segment<usize, ()>;
46
47    #[inline]
48    fn next(&mut self) -> Option<Self::Item> {
49        while let Ok(line) = self.mmap.next_line() {
50            if let Some(b'S') = line.first() {
51                if let Some(Line::Segment(s)) =
52                    self.parser.parse_gfa_line(line).ok()
53                {
54                    return Some(s);
55                }
56            }
57        }
58        None
59    }
60}
61
62#[derive(Debug)]
63pub struct LinkIter<'a> {
64    mmap: &'a mut MmapGFA,
65    parser: GFAParser<usize, ()>,
66}
67
68impl<'a> Iterator for LinkIter<'a> {
69    type Item = Link<usize, ()>;
70
71    #[inline]
72    fn next(&mut self) -> Option<Self::Item> {
73        while let Ok(line) = self.mmap.next_line() {
74            if let Some(b'S') = line.first() {
75                if let Some(Line::Link(s)) =
76                    self.parser.parse_gfa_line(line).ok()
77                {
78                    return Some(s);
79                }
80            }
81        }
82        None
83    }
84}
85
86#[derive(Debug)]
87pub struct PathIter<'a> {
88    mmap: &'a mut MmapGFA,
89    parser: GFAParser<usize, ()>,
90}
91
92impl<'a> Iterator for PathIter<'a> {
93    type Item = Path<usize, ()>;
94
95    #[inline]
96    fn next(&mut self) -> Option<Self::Item> {
97        while let Ok(line) = self.mmap.next_line() {
98            if let Some(b'S') = line.first() {
99                if let Some(Line::Path(s)) =
100                    self.parser.parse_gfa_line(line).ok()
101                {
102                    return Some(s);
103                }
104            }
105        }
106        None
107    }
108}
109
110impl MmapGFA {
111    pub fn new(path: &str) -> Result<Self> {
112        let file = File::open(path)?;
113        let mmap = unsafe { Mmap::map(&file)? };
114
115        let cursor = std::io::Cursor::new(mmap);
116        let line_buf = Vec::with_capacity(1024);
117        let current_line_len = 0;
118        let last_buf_offset = 0;
119
120        let parser = GFAParser::new();
121
122        Ok(Self {
123            cursor,
124            line_buf,
125            current_line_len,
126            last_buf_offset,
127            parser,
128        })
129    }
130
131    pub fn reset_position(&mut self) -> u64 {
132        let cur_pos = self.cursor.position();
133        self.cursor.set_position(0);
134        cur_pos
135    }
136
137    pub fn set_position(&mut self, new_pos: u64) -> u64 {
138        let cur_pos = self.cursor.position();
139        self.cursor.set_position(new_pos);
140        cur_pos
141    }
142    pub fn get_ref(&self) -> &[u8] {
143        self.cursor.get_ref().as_ref()
144    }
145
146    pub fn get_parser(&self) -> &GFAParser<usize, ()> {
147        &self.parser
148    }
149
150    pub fn next_line(&mut self) -> Result<&[u8]> {
151        self.line_buf.clear();
152
153        self.last_buf_offset = self.cursor.position() as usize;
154
155        let n_read = self.cursor.read_until(b'\n', &mut self.line_buf)?;
156
157        self.current_line_len = n_read;
158
159        Ok(&self.line_buf[..n_read])
160    }
161
162    pub fn read_line_at(&mut self, offset: usize) -> Result<&[u8]> {
163        self.cursor.set_position(offset as u64);
164        self.next_line()
165    }
166
167    pub fn build_index(&mut self) -> Result<LineIndices> {
168        let start_position = self.cursor.position();
169        let current_line_len = self.current_line_len;
170        let last_buf_offset = self.last_buf_offset;
171
172        let mut segments = Vec::new();
173        let mut links = Vec::new();
174        let mut paths = Vec::new();
175
176        self.cursor.set_position(0);
177
178        let mut line_start = 0;
179
180        loop {
181            let line = self.next_line()?;
182            let length = line.len();
183
184            if let Some(ref byte) = line.first() {
185                match byte {
186                    b'S' => {
187                        segments.push((line_start, length));
188                    }
189                    b'L' => {
190                        links.push(line_start);
191                    }
192                    b'P' => {
193                        paths.push(line_start);
194                    }
195                    _ => (),
196                };
197
198                line_start += line.len();
199            } else {
200                break;
201            }
202        }
203
204        self.cursor.set_position(start_position);
205        self.current_line_len = current_line_len;
206        self.last_buf_offset = last_buf_offset;
207
208        let res = LineIndices {
209            segments,
210            links,
211            paths,
212        };
213
214        Ok(res)
215    }
216
217    pub fn current_line(&self) -> &[u8] {
218        &self.line_buf[..self.current_line_len]
219    }
220
221    pub fn current_line_name(&self) -> Option<&[u8]> {
222        let mut iter = self.line_buf.split_str("\t");
223        let _lt = iter.next()?;
224        let name = iter.next()?;
225        Some(name)
226    }
227
228    pub fn parse_current_line(&self) -> Result<Line<usize, ()>> {
229        let line = self.current_line();
230        if line.is_empty() {
231            bail!("Line at offset {} is empty", self.last_buf_offset);
232        }
233
234        let gfa_line = self.parser.parse_gfa_line(line)?;
235        Ok(gfa_line)
236    }
237
238    pub fn iter_segments(&mut self, from_start: bool) -> SegmentIter<'_> {
239        if from_start {
240            self.cursor.set_position(0);
241        }
242        let parser = self.parser.clone();
243        SegmentIter { mmap: self, parser }
244    }
245
246    pub fn iter_links(&mut self, from_start: bool) -> LinkIter<'_> {
247        if from_start {
248            self.cursor.set_position(0);
249        }
250        let parser = self.parser.clone();
251        LinkIter { mmap: self, parser }
252    }
253
254    pub fn iter_paths(&mut self, from_start: bool) -> PathIter<'_> {
255        if from_start {
256            self.cursor.set_position(0);
257        }
258        let parser = self.parser.clone();
259        PathIter { mmap: self, parser }
260    }
261}