noodles_gff/io/reader.rs
1//! GFF reader and iterators.
2
3pub(crate) mod line;
4mod line_bufs;
5mod lines;
6mod record_bufs;
7
8use std::io::{self, BufRead, Read, Seek};
9
10use noodles_bgzf as bgzf;
11use noodles_core::Region;
12use noodles_csi::{self as csi, BinningIndex};
13
14pub use self::{line_bufs::LineBufs, lines::Lines, record_bufs::RecordBufs};
15use crate::{Line, feature::RecordBuf};
16
17/// A GFF reader.
18pub struct Reader<R> {
19 inner: R,
20}
21
22impl<R> Reader<R> {
23 /// Returns a reference to the underlying reader.
24 ///
25 /// # Examples
26 ///
27 /// ```
28 /// # use std::io;
29 /// use noodles_gff as gff;
30 /// let reader = gff::io::Reader::new(io::empty());
31 /// let _ = reader.get_ref();
32 /// ```
33 pub fn get_ref(&self) -> &R {
34 &self.inner
35 }
36
37 /// Returns a mutable reference to the underlying reader.
38 ///
39 /// # Examples
40 ///
41 /// ```
42 /// # use std::io;
43 /// use noodles_gff as gff;
44 /// let mut reader = gff::io::Reader::new(io::empty());
45 /// let _ = reader.get_mut();
46 /// ```
47 pub fn get_mut(&mut self) -> &mut R {
48 &mut self.inner
49 }
50
51 /// Unwraps and returns the underlying reader.
52 ///
53 /// # Examples
54 ///
55 /// ```
56 /// # use std::io;
57 /// use noodles_gff as gff;
58 /// let reader = gff::io::Reader::new(io::empty());
59 /// let _ = reader.into_inner();
60 /// ```
61 pub fn into_inner(self) -> R {
62 self.inner
63 }
64}
65
66impl<R> Reader<R>
67where
68 R: BufRead,
69{
70 /// Creates a GFF reader.
71 ///
72 /// # Examples
73 ///
74 /// ```
75 /// # use std::io;
76 /// use noodles_gff as gff;
77 /// let reader = gff::io::Reader::new(io::empty());
78 /// ```
79 pub fn new(inner: R) -> Self {
80 Self { inner }
81 }
82
83 /// Returns an iterator over line buffers starting from the current stream position.
84 ///
85 /// When using this, the caller is responsible to stop reading at either EOF or when the
86 /// `FASTA` directive is read, whichever comes first.
87 ///
88 /// Unlike [`Self::read_line`], each line is parsed as a [`crate::Line`].
89 ///
90 /// # Examples
91 ///
92 /// ```
93 /// use noodles_gff::{self as gff, LineBuf};
94 ///
95 /// let data = b"##gff-version 3
96 /// sq0\tNOODLES\tgene\t8\t13\t.\t+\t.\tgene_id=ndls0;gene_name=gene0
97 /// ";
98 /// let mut reader = gff::io::Reader::new(&data[..]);
99 /// let mut lines = reader.line_bufs();
100 ///
101 /// let line = lines.next().transpose()?;
102 /// assert!(matches!(line, Some(LineBuf::Directive(_))));
103 ///
104 /// let line = lines.next().transpose()?;
105 /// assert!(matches!(line, Some(LineBuf::Record(_))));
106 ///
107 /// assert!(lines.next().is_none());
108 /// # Ok::<_, std::io::Error>(())
109 /// ```
110 pub fn line_bufs(&mut self) -> LineBufs<'_, R> {
111 LineBufs::new(self)
112 }
113
114 /// Reads a single line without eagerly decoding it.
115 ///
116 /// # Examples
117 ///
118 /// ```
119 /// use noodles_gff as gff;
120 ///
121 /// let data = b"##gff-version 3\n";
122 /// let mut reader = gff::io::Reader::new(&data[..]);
123 ///
124 /// let mut line = gff::Line::default();
125 ///
126 /// reader.read_line(&mut line)?;
127 /// assert_eq!(line.kind(), gff::line::Kind::Directive);
128 ///
129 /// assert_eq!(reader.read_line(&mut line)?, 0);
130 /// # Ok::<_, std::io::Error>(())
131 /// ```
132 pub fn read_line(&mut self, line: &mut Line) -> io::Result<usize> {
133 line::read_line(&mut self.inner, line)
134 }
135
136 /// Returns an iterator over lines starting from the current stream position.
137 ///
138 /// When using this, the caller is responsible to stop reading at either EOF or when the
139 /// `FASTA` directive is read, whichever comes first.
140 ///
141 /// # Examples
142 ///
143 /// ```
144 /// # use std::io;
145 /// use noodles_gff::{self as gff, directive_buf::key};
146 ///
147 /// let mut reader = gff::io::Reader::new(io::empty());
148 ///
149 /// for result in reader.lines() {
150 /// let line = result?;
151 ///
152 /// if let Some(key::FASTA) = line.as_directive().map(|directive| directive.key().as_ref()) {
153 /// break;
154 /// }
155 ///
156 /// // ...
157 /// }
158 /// # Ok::<_, io::Error>(())
159 /// ```
160 pub fn lines(&mut self) -> Lines<'_, R> {
161 Lines::new(self)
162 }
163
164 /// Returns an iterator over records starting from the current stream position.
165 ///
166 /// This filters lines for only records. It stops at either EOF or when the `FASTA` directive
167 /// is read, whichever comes first.
168 ///
169 /// # Examples
170 ///
171 /// ```
172 /// use noodles_gff as gff;
173 ///
174 /// let data = b"##gff-version 3
175 /// sq0\tNOODLES\tgene\t8\t13\t.\t+\t.\tgene_id=ndls0;gene_name=gene0
176 /// ";
177 /// let mut reader = gff::io::Reader::new(&data[..]);
178 /// let mut records = reader.record_bufs();
179 ///
180 /// assert!(records.next().transpose()?.is_some());
181 /// assert!(records.next().is_none());
182 /// # Ok::<_, std::io::Error>(())
183 /// ```
184 pub fn record_bufs(&mut self) -> RecordBufs<'_, R> {
185 RecordBufs::new(self.line_bufs())
186 }
187}
188
189impl<R> Reader<bgzf::io::Reader<R>>
190where
191 R: Read + Seek,
192{
193 /// Returns an iterator over records that intersects the given region.
194 ///
195 /// # Examples
196 ///
197 /// ```no_run
198 /// # use std::fs::File;
199 /// use noodles_bgzf as bgzf;
200 /// use noodles_csi as csi;
201 /// use noodles_gff as gff;
202 ///
203 /// let mut reader = File::open("annotations.gff3.gz")
204 /// .map(bgzf::io::Reader::new)
205 /// .map(gff::io::Reader::new)?;
206 ///
207 /// let index = csi::fs::read("annotations.gff3.gz.csi")?;
208 /// let region = "sq0:8-13".parse()?;
209 /// let query = reader.query(&index, ®ion)?;
210 ///
211 /// for result in query {
212 /// let record = result?;
213 /// // ...
214 /// }
215 ///
216 /// # Ok::<(), Box<dyn std::error::Error>>(())
217 /// ```
218 pub fn query<'r, I>(
219 &'r mut self,
220 index: &I,
221 region: &'r Region,
222 ) -> io::Result<impl Iterator<Item = io::Result<RecordBuf>> + use<'r, I, R>>
223 where
224 I: BinningIndex,
225 {
226 let header = index
227 .header()
228 .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidInput, "missing index header"))?;
229
230 let reference_sequence_id = header
231 .reference_sequence_names()
232 .get_index_of(region.name())
233 .ok_or_else(|| {
234 io::Error::new(
235 io::ErrorKind::InvalidInput,
236 "missing reference sequence name",
237 )
238 })?;
239
240 let chunks = index.query(reference_sequence_id, region.interval())?;
241
242 let records = csi::io::Query::new(&mut self.inner, chunks)
243 .indexed_records(header)
244 .filter_by_region(region)
245 .map(|result| {
246 result.and_then(|r| {
247 let line = Line(r.as_ref().into());
248
249 line.as_record()
250 .ok_or_else(|| {
251 io::Error::new(io::ErrorKind::InvalidData, "line is not a record")
252 })?
253 .and_then(|record| RecordBuf::try_from_feature_record(&record))
254 })
255 });
256
257 Ok(records)
258 }
259}
260
261fn read_line<R>(reader: &mut R, buf: &mut Vec<u8>) -> io::Result<usize>
262where
263 R: BufRead,
264{
265 const LINE_FEED: u8 = b'\n';
266 const CARRIAGE_RETURN: u8 = b'\r';
267
268 match reader.read_until(LINE_FEED, buf)? {
269 0 => Ok(0),
270 n => {
271 if buf.ends_with(&[LINE_FEED]) {
272 buf.pop();
273
274 if buf.ends_with(&[CARRIAGE_RETURN]) {
275 buf.pop();
276 }
277 }
278
279 Ok(n)
280 }
281 }
282}
283
284#[cfg(test)]
285mod tests {
286 use super::*;
287
288 #[test]
289 fn test_records() -> io::Result<()> {
290 let data = b"\
291##gff-version 3
292sq0\tNOODLES\tgene\t8\t13\t.\t+\t.\tgene_id=ndls0;gene_name=gene0
293";
294
295 let mut reader = Reader::new(&data[..]);
296 let mut n = 0;
297
298 for result in reader.record_bufs() {
299 let _ = result?;
300 n += 1;
301 }
302
303 assert_eq!(n, 1);
304
305 Ok(())
306 }
307
308 #[test]
309 fn test_records_with_fasta_directive() -> io::Result<()> {
310 let data = b"\
311##gff-version 3
312sq0\tNOODLES\tgene\t8\t13\t.\t+\t.\tgene_id=ndls0;gene_name=gene0
313##FASTA
314>sq0
315ACGT
316";
317
318 let mut reader = Reader::new(&data[..]);
319 let mut n = 0;
320
321 for result in reader.record_bufs() {
322 let _ = result?;
323 n += 1;
324 }
325
326 assert_eq!(n, 1);
327
328 Ok(())
329 }
330
331 #[test]
332 fn test_read_line() -> io::Result<()> {
333 fn t(buf: &mut Vec<u8>, mut reader: &[u8], expected: &[u8]) -> io::Result<()> {
334 buf.clear();
335 read_line(&mut reader, buf)?;
336 assert_eq!(buf, expected);
337 Ok(())
338 }
339
340 let mut buf = Vec::new();
341
342 t(&mut buf, b"noodles\n", b"noodles")?;
343 t(&mut buf, b"noodles\r\n", b"noodles")?;
344 t(&mut buf, b"noodles", b"noodles")?;
345
346 Ok(())
347 }
348}