noodles_bcf/io/reader.rs
1//! BCF reader.
2
3mod builder;
4pub mod header;
5pub(crate) mod num;
6pub(crate) mod query;
7pub(crate) mod record;
8pub(crate) mod record_buf;
9mod record_bufs;
10
11pub use self::{builder::Builder, query::Query, record_bufs::RecordBufs};
12
13use std::{
14 io::{self, BufRead, Read},
15 iter, str,
16};
17
18use noodles_bgzf as bgzf;
19use noodles_core::Region;
20use noodles_csi::BinningIndex;
21use noodles_vcf::{self as vcf, header::string_maps::ContigStringMap, variant::RecordBuf};
22
23use self::{header::read_header, record::read_record, record_buf::read_record_buf};
24use crate::Record;
25
26/// A BCF reader.
27///
28/// The BCF format is comprised of two parts: 1) a VCF header and 2) a list of records.
29pub struct Reader<R> {
30 inner: R,
31 buf: Vec<u8>,
32}
33
34impl<R> Reader<R> {
35 /// Returns a reference to the underlying reader.
36 ///
37 /// # Examples
38 ///
39 /// ```
40 /// # use std::io;
41 /// use noodles_bcf as bcf;
42 /// let reader = bcf::io::Reader::from(io::empty());
43 /// let _inner = reader.get_ref();
44 /// ```
45 pub fn get_ref(&self) -> &R {
46 &self.inner
47 }
48
49 /// Returns a mutable reference to the underlying reader.
50 ///
51 /// # Examples
52 ///
53 /// ```
54 /// # use std::io;
55 /// use noodles_bcf as bcf;
56 /// let mut reader = bcf::io::Reader::from(io::empty());
57 /// let _inner = reader.get_mut();
58 /// ```
59 pub fn get_mut(&mut self) -> &mut R {
60 &mut self.inner
61 }
62
63 /// Returns the underlying reader.
64 ///
65 /// # Examples
66 ///
67 /// ```
68 /// # use std::io;
69 /// use noodles_bcf as bcf;
70 /// let reader = bcf::io::Reader::from(io::empty());
71 /// let _inner = reader.into_inner();
72 /// ```
73 pub fn into_inner(self) -> R {
74 self.inner
75 }
76}
77
78impl<R> Reader<R>
79where
80 R: Read,
81{
82 /// Returns a BCF header reader.
83 ///
84 /// This creates an adapter that reads at most the length of the header, i.e., the BCF magic
85 /// number, the format version, and VCF header.
86 ///
87 /// It is more ergonomic to read the BCF header as a VCF header using [`Self::read_header`],
88 /// but this adapter allows for control of how the header is read, e.g., to read the raw VCF
89 /// header.
90 ///
91 /// The position of the stream is expected to be at the start.
92 ///
93 /// # Examples
94 ///
95 /// ```no_run
96 /// # use std::{fs::File, io::Read};
97 /// use noodles_bcf as bcf;
98 ///
99 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
100 ///
101 /// let mut header_reader = reader.header_reader();
102 /// header_reader.read_magic_number()?;
103 /// header_reader.read_format_version()?;
104 ///
105 /// let mut raw_vcf_header_reader = header_reader.raw_vcf_header_reader()?;
106 /// let mut raw_header = String::new();
107 /// raw_vcf_header_reader.read_to_string(&mut raw_header)?;
108 /// raw_vcf_header_reader.discard_to_end()?;
109 /// # Ok::<_, std::io::Error>(())
110 /// ```
111 pub fn header_reader(&mut self) -> header::Reader<&mut R> {
112 header::Reader::new(&mut self.inner)
113 }
114
115 /// Reads the VCF header.
116 ///
117 /// This verifies the BCF magic number, discards the file format version, and reads and parses
118 /// the raw VCF header. Associated string maps are also built from the raw header.
119 ///
120 /// The position of the stream is expected to be at the start.
121 ///
122 /// # Examples
123 ///
124 /// ```no_run
125 /// # use std::{fs::File, io};
126 /// use noodles_bcf as bcf;
127 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
128 /// let header = reader.read_header()?;
129 /// # Ok::<(), io::Error>(())
130 /// ```
131 pub fn read_header(&mut self) -> io::Result<vcf::Header> {
132 read_header(&mut self.inner)
133 }
134
135 /// Reads a single record.
136 ///
137 /// The stream is expected to be directly after the header or at the start of another record.
138 ///
139 /// It is more ergonomic to read records using an iterator (see [`Self::records`]), but using
140 /// this method directly allows the reuse of a single [`vcf::Record`] buffer.
141 ///
142 /// If successful, the record size is returned. If a record size of 0 is returned, the stream
143 /// reached EOF.
144 ///
145 /// # Examples
146 ///
147 /// ```no_run
148 /// # use std::{fs::File, io};
149 /// use noodles_bcf as bcf;
150 /// use noodles_vcf as vcf;
151 ///
152 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
153 /// let header = reader.read_header()?;
154 ///
155 /// let mut record = vcf::variant::RecordBuf::default();
156 /// reader.read_record_buf(&header, &mut record)?;
157 /// # Ok::<(), io::Error>(())
158 /// ```
159 pub fn read_record_buf(
160 &mut self,
161 header: &vcf::Header,
162 record: &mut RecordBuf,
163 ) -> io::Result<usize> {
164 read_record_buf(&mut self.inner, header, &mut self.buf, record)
165 }
166
167 /// Reads a single record without eagerly decoding (most of) its fields.
168 ///
169 /// The stream is expected to be directly after the header or at the start of another record.
170 ///
171 /// It is more ergnomic to read records using an iterator (see [`Self::records`]), but using
172 /// this method directly allows the reuse of a single [`Record`] buffer.
173 ///
174 /// If successful, the record size is returned. If a record size of 0 is returned, the stream
175 /// reached EOF.
176 ///
177 /// # Examples
178 ///
179 /// ```no_run
180 /// # use std::{fs::File, io};
181 /// use noodles_bcf as bcf;
182 ///
183 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
184 /// reader.read_header()?;
185 ///
186 /// let mut record = bcf::Record::default();
187 /// reader.read_record(&mut record)?;
188 /// # Ok::<(), io::Error>(())
189 /// ```
190 pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
191 read_record(&mut self.inner, record)
192 }
193
194 /// Returns an iterator over records starting from the current stream position.
195 ///
196 /// The stream is expected to be directly after the reference sequences or at the start of
197 /// another record.
198 ///
199 /// # Examples
200 ///
201 /// ```no_run
202 /// # use std::{fs::File, io};
203 /// use noodles_bcf as bcf;
204 ///
205 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
206 /// let header = reader.read_header()?;
207 ///
208 /// for result in reader.record_bufs(&header) {
209 /// let record = result?;
210 /// // ...
211 /// }
212 /// # Ok::<(), io::Error>(())
213 pub fn record_bufs<'r, 'h>(&'r mut self, header: &'h vcf::Header) -> RecordBufs<'r, 'h, R> {
214 RecordBufs::new(self, header)
215 }
216
217 /// Returns an iterator over lazy records starting from the current stream position.
218 ///
219 /// The stream is expected to be directly after the header or at the start of another record.
220 ///
221 /// # Examples
222 ///
223 /// ```no_run
224 /// # use std::{fs::File, io};
225 /// use noodles_bcf as bcf;
226 ///
227 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
228 /// reader.read_header()?;
229 ///
230 /// for result in reader.records() {
231 /// let record = result?;
232 /// println!("{:?}", record);
233 /// }
234 /// # Ok::<(), io::Error>(())
235 /// ```
236 pub fn records(&mut self) -> impl Iterator<Item = io::Result<Record>> {
237 let mut record = Record::default();
238
239 iter::from_fn(move || match self.read_record(&mut record) {
240 Ok(0) => None,
241 Ok(_) => Some(Ok(record.clone())),
242 Err(e) => Some(Err(e)),
243 })
244 }
245}
246
247impl<R> Reader<bgzf::io::Reader<R>>
248where
249 R: Read,
250{
251 /// Creates a BCF reader.
252 ///
253 /// # Examples
254 ///
255 /// ```
256 /// # use std::io;
257 /// use noodles_bcf as bcf;
258 /// let reader = bcf::io::Reader::new(io::empty());
259 /// ```
260 pub fn new(reader: R) -> Self {
261 Self::from(bgzf::io::Reader::new(reader))
262 }
263}
264
265impl<R> Reader<R>
266where
267 R: bgzf::io::BufRead + bgzf::io::Seek,
268{
269 /// Returns a reader over records that intersects the given region.
270 ///
271 /// # Examples
272 ///
273 /// ```no_run
274 /// # use std::fs::File;
275 /// use noodles_bcf as bcf;
276 /// use noodles_core::Region;
277 /// use noodles_csi as csi;
278 ///
279 /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
280 /// let header = reader.read_header()?;
281 ///
282 /// let index = csi::fs::read("sample.bcf.csi")?;
283 /// let region = "sq0:8-13".parse()?;
284 /// let query = reader.query(&header, &index, ®ion)?;
285 ///
286 /// for result in query.records() {
287 /// let record = result?;
288 /// // ...
289 /// }
290 /// # Ok::<(), Box<dyn std::error::Error>>(())
291 /// ```
292 pub fn query<'r, 'h, I>(
293 &'r mut self,
294 header: &'h vcf::Header,
295 index: &I,
296 region: &Region,
297 ) -> io::Result<Query<'r, 'h, R>>
298 where
299 I: BinningIndex,
300 {
301 let reference_sequence_id = resolve_region(header.string_maps().contigs(), region)?;
302 let chunks = index.query(reference_sequence_id, region.interval())?;
303
304 Ok(Query::new(
305 &mut self.inner,
306 header,
307 chunks,
308 reference_sequence_id,
309 region.interval(),
310 ))
311 }
312}
313
314impl<R> From<R> for Reader<R> {
315 fn from(inner: R) -> Self {
316 Self {
317 inner,
318 buf: Vec::new(),
319 }
320 }
321}
322
323impl<R> vcf::variant::io::Read<R> for Reader<R>
324where
325 R: BufRead,
326{
327 fn read_variant_header(&mut self) -> io::Result<vcf::Header> {
328 self.read_header()
329 }
330
331 fn variant_records<'r, 'h: 'r>(
332 &'r mut self,
333 _: &'h vcf::Header,
334 ) -> Box<dyn Iterator<Item = io::Result<Box<dyn vcf::variant::Record>>> + 'r> {
335 Box::new(
336 self.records().map(|result| {
337 result.map(|record| Box::new(record) as Box<dyn vcf::variant::Record>)
338 }),
339 )
340 }
341}
342
343pub(crate) fn resolve_region(
344 contig_string_map: &ContigStringMap,
345 region: &Region,
346) -> io::Result<usize> {
347 let region_name = str::from_utf8(region.name())
348 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
349
350 contig_string_map.get_index_of(region_name).ok_or_else(|| {
351 io::Error::new(
352 io::ErrorKind::InvalidInput,
353 format!("region does not exist in contigs: {region:?}"),
354 )
355 })
356}