1use crate::{
2 gfa::{Line, Link, Path, Segment},
3 parser::GFAParser,
4};
5
6use anyhow::{bail, Result};
7
8use memmap::Mmap;
9
10use std::fs::File;
11use std::io::prelude::*;
12
13use bstr::ByteSlice;
14
15#[derive(Debug)]
16pub struct MmapGFA {
17 pub cursor: std::io::Cursor<Mmap>,
18 pub line_buf: Vec<u8>,
19 pub current_line_len: usize,
20 pub last_buf_offset: usize,
21 pub parser: GFAParser<usize, ()>,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub enum LineType {
26 Segment,
27 Link,
28 Path,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub struct LineIndices {
33 pub segments: Vec<(usize, usize)>,
34 pub links: Vec<usize>,
35 pub paths: Vec<usize>,
36}
37
38#[derive(Debug)]
39pub struct SegmentIter<'a> {
40 mmap: &'a mut MmapGFA,
41 parser: GFAParser<usize, ()>,
42}
43
44impl<'a> Iterator for SegmentIter<'a> {
45 type Item = Segment<usize, ()>;
46
47 #[inline]
48 fn next(&mut self) -> Option<Self::Item> {
49 while let Ok(line) = self.mmap.next_line() {
50 if let Some(b'S') = line.first() {
51 if let Some(Line::Segment(s)) =
52 self.parser.parse_gfa_line(line).ok()
53 {
54 return Some(s);
55 }
56 }
57 }
58 None
59 }
60}
61
62#[derive(Debug)]
63pub struct LinkIter<'a> {
64 mmap: &'a mut MmapGFA,
65 parser: GFAParser<usize, ()>,
66}
67
68impl<'a> Iterator for LinkIter<'a> {
69 type Item = Link<usize, ()>;
70
71 #[inline]
72 fn next(&mut self) -> Option<Self::Item> {
73 while let Ok(line) = self.mmap.next_line() {
74 if let Some(b'S') = line.first() {
75 if let Some(Line::Link(s)) =
76 self.parser.parse_gfa_line(line).ok()
77 {
78 return Some(s);
79 }
80 }
81 }
82 None
83 }
84}
85
86#[derive(Debug)]
87pub struct PathIter<'a> {
88 mmap: &'a mut MmapGFA,
89 parser: GFAParser<usize, ()>,
90}
91
92impl<'a> Iterator for PathIter<'a> {
93 type Item = Path<usize, ()>;
94
95 #[inline]
96 fn next(&mut self) -> Option<Self::Item> {
97 while let Ok(line) = self.mmap.next_line() {
98 if let Some(b'S') = line.first() {
99 if let Some(Line::Path(s)) =
100 self.parser.parse_gfa_line(line).ok()
101 {
102 return Some(s);
103 }
104 }
105 }
106 None
107 }
108}
109
110impl MmapGFA {
111 pub fn new(path: &str) -> Result<Self> {
112 let file = File::open(path)?;
113 let mmap = unsafe { Mmap::map(&file)? };
114
115 let cursor = std::io::Cursor::new(mmap);
116 let line_buf = Vec::with_capacity(1024);
117 let current_line_len = 0;
118 let last_buf_offset = 0;
119
120 let parser = GFAParser::new();
121
122 Ok(Self {
123 cursor,
124 line_buf,
125 current_line_len,
126 last_buf_offset,
127 parser,
128 })
129 }
130
131 pub fn reset_position(&mut self) -> u64 {
132 let cur_pos = self.cursor.position();
133 self.cursor.set_position(0);
134 cur_pos
135 }
136
137 pub fn set_position(&mut self, new_pos: u64) -> u64 {
138 let cur_pos = self.cursor.position();
139 self.cursor.set_position(new_pos);
140 cur_pos
141 }
142 pub fn get_ref(&self) -> &[u8] {
143 self.cursor.get_ref().as_ref()
144 }
145
146 pub fn get_parser(&self) -> &GFAParser<usize, ()> {
147 &self.parser
148 }
149
150 pub fn next_line(&mut self) -> Result<&[u8]> {
151 self.line_buf.clear();
152
153 self.last_buf_offset = self.cursor.position() as usize;
154
155 let n_read = self.cursor.read_until(b'\n', &mut self.line_buf)?;
156
157 self.current_line_len = n_read;
158
159 Ok(&self.line_buf[..n_read])
160 }
161
162 pub fn read_line_at(&mut self, offset: usize) -> Result<&[u8]> {
163 self.cursor.set_position(offset as u64);
164 self.next_line()
165 }
166
167 pub fn build_index(&mut self) -> Result<LineIndices> {
168 let start_position = self.cursor.position();
169 let current_line_len = self.current_line_len;
170 let last_buf_offset = self.last_buf_offset;
171
172 let mut segments = Vec::new();
173 let mut links = Vec::new();
174 let mut paths = Vec::new();
175
176 self.cursor.set_position(0);
177
178 let mut line_start = 0;
179
180 loop {
181 let line = self.next_line()?;
182 let length = line.len();
183
184 if let Some(ref byte) = line.first() {
185 match byte {
186 b'S' => {
187 segments.push((line_start, length));
188 }
189 b'L' => {
190 links.push(line_start);
191 }
192 b'P' => {
193 paths.push(line_start);
194 }
195 _ => (),
196 };
197
198 line_start += line.len();
199 } else {
200 break;
201 }
202 }
203
204 self.cursor.set_position(start_position);
205 self.current_line_len = current_line_len;
206 self.last_buf_offset = last_buf_offset;
207
208 let res = LineIndices {
209 segments,
210 links,
211 paths,
212 };
213
214 Ok(res)
215 }
216
217 pub fn current_line(&self) -> &[u8] {
218 &self.line_buf[..self.current_line_len]
219 }
220
221 pub fn current_line_name(&self) -> Option<&[u8]> {
222 let mut iter = self.line_buf.split_str("\t");
223 let _lt = iter.next()?;
224 let name = iter.next()?;
225 Some(name)
226 }
227
228 pub fn parse_current_line(&self) -> Result<Line<usize, ()>> {
229 let line = self.current_line();
230 if line.is_empty() {
231 bail!("Line at offset {} is empty", self.last_buf_offset);
232 }
233
234 let gfa_line = self.parser.parse_gfa_line(line)?;
235 Ok(gfa_line)
236 }
237
238 pub fn iter_segments(&mut self, from_start: bool) -> SegmentIter<'_> {
239 if from_start {
240 self.cursor.set_position(0);
241 }
242 let parser = self.parser.clone();
243 SegmentIter { mmap: self, parser }
244 }
245
246 pub fn iter_links(&mut self, from_start: bool) -> LinkIter<'_> {
247 if from_start {
248 self.cursor.set_position(0);
249 }
250 let parser = self.parser.clone();
251 LinkIter { mmap: self, parser }
252 }
253
254 pub fn iter_paths(&mut self, from_start: bool) -> PathIter<'_> {
255 if from_start {
256 self.cursor.set_position(0);
257 }
258 let parser = self.parser.clone();
259 PathIter { mmap: self, parser }
260 }
261}