noodles_cram/io/
reader.rs

1//! CRAM reader and record iterator.
2
3mod builder;
4pub(crate) mod collections;
5pub(crate) mod container;
6pub mod header;
7pub(crate) mod num;
8mod query;
9mod records;
10
11use std::io::{self, Read, Seek, SeekFrom};
12
13use noodles_core::Region;
14use noodles_fasta as fasta;
15use noodles_sam as sam;
16
17pub use self::{builder::Builder, container::Container, query::Query, records::Records};
18use self::{container::read_container, header::read_header};
19use crate::{FileDefinition, crai};
20
21/// A CRAM reader.
22///
23/// The CRAM format is comprised of four main parts: 1) a file definition, 2) a file header, 3) a
24/// list of containers, and 4) an end-of-file (EOF) container.
25///
26/// # Examples
27///
28/// ```no_run
29/// # use std::{fs::File, io};
30/// use noodles_cram as cram;
31/// use noodles_fasta as fasta;
32///
33/// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
34/// let header = reader.read_header()?;
35///
36/// for result in reader.records(&header) {
37///     let record = result?;
38///     // ...
39/// }
40///
41/// # Ok::<_, io::Error>(())
42/// ```
43pub struct Reader<R> {
44    inner: R,
45    reference_sequence_repository: fasta::Repository,
46}
47
48impl<R> Reader<R> {
49    /// Returns a reference to the underlying reader.
50    ///
51    /// # Examples
52    ///
53    /// ```
54    /// # use std::io;
55    /// use noodles_cram as cram;
56    /// let reader = cram::io::Reader::new(io::empty());
57    /// let _inner = reader.get_ref();
58    /// ```
59    pub fn get_ref(&self) -> &R {
60        &self.inner
61    }
62
63    /// Returns a mutable reference to the underlying reader.
64    ///
65    /// # Examples
66    ///
67    /// ```
68    /// # use std::io;
69    /// use noodles_cram as cram;
70    /// let mut reader = cram::io::Reader::new(io::empty());
71    /// let _inner = reader.get_mut();
72    /// ```
73    pub fn get_mut(&mut self) -> &mut R {
74        &mut self.inner
75    }
76
77    /// Unwraps and returns the underlying reader.
78    ///
79    /// # Examples
80    ///
81    /// ```
82    /// # use std::io;
83    /// use noodles_cram as cram;
84    /// let reader = cram::io::Reader::new(io::empty());
85    /// let _inner = reader.into_inner();
86    /// ```
87    pub fn into_inner(self) -> R {
88        self.inner
89    }
90}
91
92impl<R> Reader<R>
93where
94    R: Read,
95{
96    /// Creates a CRAM reader.
97    ///
98    /// # Examples
99    ///
100    /// ```no_run
101    /// # use std::{fs::File, io};
102    /// use noodles_cram as cram;
103    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
104    /// # Ok::<(), io::Error>(())
105    /// ```
106    pub fn new(inner: R) -> Self {
107        Builder::default().build_from_reader(inner)
108    }
109
110    pub(crate) fn reference_sequence_repository(&self) -> &fasta::Repository {
111        &self.reference_sequence_repository
112    }
113
114    /// Returns a CRAM header reader.
115    ///
116    /// # Examples
117    ///
118    /// ```no_run
119    /// # use std::{fs::File, io::Read};
120    /// use noodles_cram as cram;
121    ///
122    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
123    ///
124    /// let mut header_reader = reader.header_reader();
125    /// header_reader.read_magic_number()?;
126    /// header_reader.read_format_version()?;
127    /// header_reader.read_file_id()?;
128    ///
129    /// let mut container_reader = header_reader.container_reader()?;
130    ///
131    /// let _raw_header = {
132    ///     let mut raw_sam_header_reader = container_reader.raw_sam_header_reader()?;
133    ///     let mut raw_header = String::new();
134    ///     raw_sam_header_reader.read_to_string(&mut raw_header)?;
135    ///     raw_sam_header_reader.discard_to_end()?;
136    ///     raw_header
137    /// };
138    ///
139    /// container_reader.discard_to_end()?;
140    /// Ok::<_, std::io::Error>(())
141    /// ```
142    pub fn header_reader(&mut self) -> header::Reader<&mut R> {
143        header::Reader::new(&mut self.inner)
144    }
145
146    /// Reads the CRAM file definition.
147    ///
148    /// The CRAM magic number is also checked.
149    ///
150    /// The position of the stream is expected to be at the start.
151    ///
152    /// # Examples
153    ///
154    /// ```no_run
155    /// # use std::{fs::File, io};
156    /// use noodles_cram as cram;
157    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
158    /// let file_definition = reader.read_file_definition()?;
159    /// # Ok::<(), io::Error>(())
160    /// ```
161    pub fn read_file_definition(&mut self) -> io::Result<FileDefinition> {
162        header::read_file_definition(&mut self.inner)
163    }
164
165    /// Reads the SAM header.
166    ///
167    /// The position of the stream is expected to be at the CRAM header container, i.e., directly
168    /// after the file definition.
169    ///
170    /// # Examples
171    ///
172    /// ```no_run
173    /// # use std::{fs::File, io};
174    /// use noodles_cram as cram;
175    ///
176    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
177    /// reader.read_file_definition()?;
178    ///
179    /// let header = reader.read_file_header()?;
180    /// # Ok::<(), io::Error>(())
181    /// ```
182    pub fn read_file_header(&mut self) -> io::Result<sam::Header> {
183        header::read_file_header(&mut self.inner)
184    }
185
186    /// Reads the SAM header.
187    ///
188    /// This verifies the CRAM magic number, discards the file definition, and reads and parses the
189    /// file header as a SAM header.
190    ///
191    /// The position of the stream is expected to be at the start.
192    ///
193    /// # Examples
194    ///
195    /// ```no_run
196    /// # use std::{fs::File, io};
197    /// use noodles_cram as cram;
198    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
199    /// let header = reader.read_header()?;
200    /// # Ok::<(), io::Error>(())
201    /// ```
202    pub fn read_header(&mut self) -> io::Result<sam::Header> {
203        read_header(&mut self.inner)
204    }
205
206    /// Reads a container.
207    ///
208    /// This returns `None` if the container header is the EOF container header, which signals the
209    /// end of the stream.
210    ///
211    /// # Examples
212    ///
213    /// ```no_run
214    /// # use std::{fs::File, io};
215    /// use noodles_cram::{self as cram, io::reader::Container};
216    ///
217    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
218    /// reader.read_header()?;
219    ///
220    /// let mut container = Container::default();
221    ///
222    /// while reader.read_container(&mut container)? != 0 {
223    ///     // ...
224    /// }
225    /// # Ok::<(), io::Error>(())
226    /// ```
227    pub fn read_container(&mut self, container: &mut Container) -> io::Result<usize> {
228        read_container(&mut self.inner, container)
229    }
230
231    /// Returns a iterator over records starting from the current stream position.
232    ///
233    /// The stream is expected to be at the start of a container.
234    ///
235    /// # Examples
236    ///
237    /// ```no_run
238    /// # use std::{fs::File, io};
239    /// use noodles_cram as cram;
240    /// use noodles_fasta as fasta;
241    ///
242    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
243    /// let header = reader.read_header()?;
244    ///
245    /// for result in reader.records(&header) {
246    ///     let record = result?;
247    ///     // ...
248    /// }
249    /// # Ok::<_, io::Error>(())
250    /// ```
251    pub fn records<'r, 'h: 'r>(&'r mut self, header: &'h sam::Header) -> Records<'r, 'h, R> {
252        Records::new(self, header)
253    }
254}
255
256impl<R> Reader<R>
257where
258    R: Read + Seek,
259{
260    /// Seeks the underlying reader to the given position.
261    ///
262    /// Positions typically come from the associated CRAM index file.
263    ///
264    /// # Examples
265    ///
266    /// ```no_run
267    /// # use std::io::{self, SeekFrom};
268    /// use noodles_cram as cram;
269    /// let mut reader = cram::io::Reader::new(io::empty());
270    /// reader.seek(SeekFrom::Start(0))?;
271    /// # Ok::<(), io::Error>(())
272    /// ```
273    pub fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
274        self.inner.seek(pos)
275    }
276
277    /// Returns the current position of the underlying reader.
278    ///
279    /// # Examples
280    ///
281    /// ```
282    /// # use std::io;
283    /// use noodles_cram as cram;
284    /// let mut reader = cram::io::Reader::new(io::empty());
285    /// let position = reader.position()?;
286    /// assert_eq!(position, 0);
287    /// # Ok::<(), io::Error>(())
288    /// ```
289    pub fn position(&mut self) -> io::Result<u64> {
290        self.inner.stream_position()
291    }
292
293    /// Returns an iterator over records that intersects the given region.
294    ///
295    /// # Examples
296    ///
297    /// ```no_run
298    /// # use std::{fs::File, io};
299    /// use noodles_cram::{self as cram, crai};
300    /// use noodles_fasta as fasta;
301    ///
302    /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
303    ///
304    /// let header = reader.read_header()?;
305    /// let index = crai::fs::read("sample.cram.crai")?;
306    /// let region = "sq0:8-13".parse()?;
307    /// let query = reader.query(&header, &index, &region)?;
308    ///
309    /// for result in query {
310    ///     let record = result?;
311    ///     // ...
312    /// }
313    /// # Ok::<_, Box<dyn std::error::Error>>(())
314    /// ```
315    pub fn query<'r, 'h: 'r, 'i: 'r>(
316        &'r mut self,
317        header: &'h sam::Header,
318        index: &'i crai::Index,
319        region: &Region,
320    ) -> io::Result<Query<'r, 'h, 'i, R>> {
321        let reference_sequence_id = header
322            .reference_sequences()
323            .get_index_of(region.name())
324            .ok_or_else(|| {
325                io::Error::new(
326                    io::ErrorKind::InvalidInput,
327                    "invalid reference sequence name",
328                )
329            })?;
330
331        Ok(Query::new(
332            self,
333            header,
334            index,
335            reference_sequence_id,
336            region.interval(),
337        ))
338    }
339}
340
341impl<R> sam::alignment::io::Read<R> for Reader<R>
342where
343    R: Read,
344{
345    fn read_alignment_header(&mut self) -> io::Result<sam::Header> {
346        self.read_header()
347    }
348
349    fn alignment_records<'a>(
350        &'a mut self,
351        header: &'a sam::Header,
352    ) -> Box<dyn Iterator<Item = io::Result<Box<dyn sam::alignment::Record>>> + 'a> {
353        Box::new(
354            self.records(header).map(|result| {
355                result.map(|record| Box::new(record) as Box<dyn sam::alignment::Record>)
356            }),
357        )
358    }
359}