noodles_bcf/io/
reader.rs

1//! BCF reader.
2
3mod builder;
4pub mod header;
5pub(crate) mod num;
6pub(crate) mod query;
7pub(crate) mod record;
8pub(crate) mod record_buf;
9mod record_bufs;
10
11pub use self::{builder::Builder, query::Query, record_bufs::RecordBufs};
12
13use std::{
14    io::{self, BufRead, Read},
15    iter, str,
16};
17
18use noodles_bgzf as bgzf;
19use noodles_core::Region;
20use noodles_csi::BinningIndex;
21use noodles_vcf::{self as vcf, header::string_maps::ContigStringMap, variant::RecordBuf};
22
23use self::{header::read_header, record::read_record, record_buf::read_record_buf};
24use crate::Record;
25
26/// A BCF reader.
27///
28/// The BCF format is comprised of two parts: 1) a VCF header and 2) a list of records.
29pub struct Reader<R> {
30    inner: R,
31    buf: Vec<u8>,
32}
33
34impl<R> Reader<R> {
35    /// Returns a reference to the underlying reader.
36    ///
37    /// # Examples
38    ///
39    /// ```
40    /// # use std::io;
41    /// use noodles_bcf as bcf;
42    /// let reader = bcf::io::Reader::from(io::empty());
43    /// let _inner = reader.get_ref();
44    /// ```
45    pub fn get_ref(&self) -> &R {
46        &self.inner
47    }
48
49    /// Returns a mutable reference to the underlying reader.
50    ///
51    /// # Examples
52    ///
53    /// ```
54    /// # use std::io;
55    /// use noodles_bcf as bcf;
56    /// let mut reader = bcf::io::Reader::from(io::empty());
57    /// let _inner = reader.get_mut();
58    /// ```
59    pub fn get_mut(&mut self) -> &mut R {
60        &mut self.inner
61    }
62
63    /// Returns the underlying reader.
64    ///
65    /// # Examples
66    ///
67    /// ```
68    /// # use std::io;
69    /// use noodles_bcf as bcf;
70    /// let reader = bcf::io::Reader::from(io::empty());
71    /// let _inner = reader.into_inner();
72    /// ```
73    pub fn into_inner(self) -> R {
74        self.inner
75    }
76}
77
78impl<R> Reader<R>
79where
80    R: Read,
81{
82    /// Returns a BCF header reader.
83    ///
84    /// This creates an adapter that reads at most the length of the header, i.e., the BCF magic
85    /// number, the format version, and VCF header.
86    ///
87    /// It is more ergonomic to read the BCF header as a VCF header using [`Self::read_header`],
88    /// but this adapter allows for control of how the header is read, e.g., to read the raw VCF
89    /// header.
90    ///
91    /// The position of the stream is expected to be at the start.
92    ///
93    /// # Examples
94    ///
95    /// ```no_run
96    /// # use std::{fs::File, io::Read};
97    /// use noodles_bcf as bcf;
98    ///
99    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
100    ///
101    /// let mut header_reader = reader.header_reader();
102    /// header_reader.read_magic_number()?;
103    /// header_reader.read_format_version()?;
104    ///
105    /// let mut raw_vcf_header_reader = header_reader.raw_vcf_header_reader()?;
106    /// let mut raw_header = String::new();
107    /// raw_vcf_header_reader.read_to_string(&mut raw_header)?;
108    /// raw_vcf_header_reader.discard_to_end()?;
109    /// # Ok::<_, std::io::Error>(())
110    /// ```
111    pub fn header_reader(&mut self) -> header::Reader<&mut R> {
112        header::Reader::new(&mut self.inner)
113    }
114
115    /// Reads the VCF header.
116    ///
117    /// This verifies the BCF magic number, discards the file format version, and reads and parses
118    /// the raw VCF header. Associated string maps are also built from the raw header.
119    ///
120    /// The position of the stream is expected to be at the start.
121    ///
122    /// # Examples
123    ///
124    /// ```no_run
125    /// # use std::{fs::File, io};
126    /// use noodles_bcf as bcf;
127    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
128    /// let header = reader.read_header()?;
129    /// # Ok::<(), io::Error>(())
130    /// ```
131    pub fn read_header(&mut self) -> io::Result<vcf::Header> {
132        read_header(&mut self.inner)
133    }
134
135    /// Reads a single record.
136    ///
137    /// The stream is expected to be directly after the header or at the start of another record.
138    ///
139    /// It is more ergonomic to read records using an iterator (see [`Self::records`]), but using
140    /// this method directly allows the reuse of a single [`vcf::Record`] buffer.
141    ///
142    /// If successful, the record size is returned. If a record size of 0 is returned, the stream
143    /// reached EOF.
144    ///
145    /// # Examples
146    ///
147    /// ```no_run
148    /// # use std::{fs::File, io};
149    /// use noodles_bcf as bcf;
150    /// use noodles_vcf as vcf;
151    ///
152    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
153    /// let header = reader.read_header()?;
154    ///
155    /// let mut record = vcf::variant::RecordBuf::default();
156    /// reader.read_record_buf(&header, &mut record)?;
157    /// # Ok::<(), io::Error>(())
158    /// ```
159    pub fn read_record_buf(
160        &mut self,
161        header: &vcf::Header,
162        record: &mut RecordBuf,
163    ) -> io::Result<usize> {
164        read_record_buf(&mut self.inner, header, &mut self.buf, record)
165    }
166
167    /// Reads a single record without eagerly decoding (most of) its fields.
168    ///
169    /// The stream is expected to be directly after the header or at the start of another record.
170    ///
171    /// It is more ergnomic to read records using an iterator (see [`Self::records`]), but using
172    /// this method directly allows the reuse of a single [`Record`] buffer.
173    ///
174    /// If successful, the record size is returned. If a record size of 0 is returned, the stream
175    /// reached EOF.
176    ///
177    /// # Examples
178    ///
179    /// ```no_run
180    /// # use std::{fs::File, io};
181    /// use noodles_bcf as bcf;
182    ///
183    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
184    /// reader.read_header()?;
185    ///
186    /// let mut record = bcf::Record::default();
187    /// reader.read_record(&mut record)?;
188    /// # Ok::<(), io::Error>(())
189    /// ```
190    pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
191        read_record(&mut self.inner, record)
192    }
193
194    /// Returns an iterator over records starting from the current stream position.
195    ///
196    /// The stream is expected to be directly after the reference sequences or at the start of
197    /// another record.
198    ///
199    /// # Examples
200    ///
201    /// ```no_run
202    /// # use std::{fs::File, io};
203    /// use noodles_bcf as bcf;
204    ///
205    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
206    /// let header = reader.read_header()?;
207    ///
208    /// for result in reader.record_bufs(&header) {
209    ///     let record = result?;
210    ///     // ...
211    /// }
212    /// # Ok::<(), io::Error>(())
213    pub fn record_bufs<'r, 'h>(&'r mut self, header: &'h vcf::Header) -> RecordBufs<'r, 'h, R> {
214        RecordBufs::new(self, header)
215    }
216
217    /// Returns an iterator over lazy records starting from the current stream position.
218    ///
219    /// The stream is expected to be directly after the header or at the start of another record.
220    ///
221    /// # Examples
222    ///
223    /// ```no_run
224    /// # use std::{fs::File, io};
225    /// use noodles_bcf as bcf;
226    ///
227    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
228    /// reader.read_header()?;
229    ///
230    /// for result in reader.records() {
231    ///     let record = result?;
232    ///     println!("{:?}", record);
233    /// }
234    /// # Ok::<(), io::Error>(())
235    /// ```
236    pub fn records(&mut self) -> impl Iterator<Item = io::Result<Record>> {
237        let mut record = Record::default();
238
239        iter::from_fn(move || match self.read_record(&mut record) {
240            Ok(0) => None,
241            Ok(_) => Some(Ok(record.clone())),
242            Err(e) => Some(Err(e)),
243        })
244    }
245}
246
247impl<R> Reader<bgzf::io::Reader<R>>
248where
249    R: Read,
250{
251    /// Creates a BCF reader.
252    ///
253    /// # Examples
254    ///
255    /// ```
256    /// # use std::io;
257    /// use noodles_bcf as bcf;
258    /// let reader = bcf::io::Reader::new(io::empty());
259    /// ```
260    pub fn new(reader: R) -> Self {
261        Self::from(bgzf::io::Reader::new(reader))
262    }
263}
264
265impl<R> Reader<R>
266where
267    R: bgzf::io::BufRead + bgzf::io::Seek,
268{
269    /// Returns a reader over records that intersects the given region.
270    ///
271    /// # Examples
272    ///
273    /// ```no_run
274    /// # use std::fs::File;
275    /// use noodles_bcf as bcf;
276    /// use noodles_core::Region;
277    /// use noodles_csi as csi;
278    ///
279    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
280    /// let header = reader.read_header()?;
281    ///
282    /// let index = csi::fs::read("sample.bcf.csi")?;
283    /// let region = "sq0:8-13".parse()?;
284    /// let query = reader.query(&header, &index, &region)?;
285    ///
286    /// for result in query.records() {
287    ///     let record = result?;
288    ///     // ...
289    /// }
290    /// # Ok::<(), Box<dyn std::error::Error>>(())
291    /// ```
292    pub fn query<'r, 'h, I>(
293        &'r mut self,
294        header: &'h vcf::Header,
295        index: &I,
296        region: &Region,
297    ) -> io::Result<Query<'r, 'h, R>>
298    where
299        I: BinningIndex,
300    {
301        let reference_sequence_id = resolve_region(header.string_maps().contigs(), region)?;
302        let chunks = index.query(reference_sequence_id, region.interval())?;
303
304        Ok(Query::new(
305            &mut self.inner,
306            header,
307            chunks,
308            reference_sequence_id,
309            region.interval(),
310        ))
311    }
312}
313
314impl<R> From<R> for Reader<R> {
315    fn from(inner: R) -> Self {
316        Self {
317            inner,
318            buf: Vec::new(),
319        }
320    }
321}
322
323impl<R> vcf::variant::io::Read<R> for Reader<R>
324where
325    R: BufRead,
326{
327    fn read_variant_header(&mut self) -> io::Result<vcf::Header> {
328        self.read_header()
329    }
330
331    fn variant_records<'r, 'h: 'r>(
332        &'r mut self,
333        _: &'h vcf::Header,
334    ) -> Box<dyn Iterator<Item = io::Result<Box<dyn vcf::variant::Record>>> + 'r> {
335        Box::new(
336            self.records().map(|result| {
337                result.map(|record| Box::new(record) as Box<dyn vcf::variant::Record>)
338            }),
339        )
340    }
341}
342
343pub(crate) fn resolve_region(
344    contig_string_map: &ContigStringMap,
345    region: &Region,
346) -> io::Result<usize> {
347    let region_name = str::from_utf8(region.name())
348        .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
349
350    contig_string_map.get_index_of(region_name).ok_or_else(|| {
351        io::Error::new(
352            io::ErrorKind::InvalidInput,
353            format!("region does not exist in contigs: {region:?}"),
354        )
355    })
356}