noodles_cram/io/reader.rs
1//! CRAM reader and record iterator.
2
3mod builder;
4pub(crate) mod collections;
5pub(crate) mod container;
6pub mod header;
7pub(crate) mod num;
8mod query;
9mod records;
10
11use std::io::{self, Read, Seek, SeekFrom};
12
13use noodles_core::Region;
14use noodles_fasta as fasta;
15use noodles_sam as sam;
16
17pub use self::{builder::Builder, container::Container, query::Query, records::Records};
18use self::{container::read_container, header::read_header};
19use crate::{FileDefinition, crai};
20
21/// A CRAM reader.
22///
23/// The CRAM format is comprised of four main parts: 1) a file definition, 2) a file header, 3) a
24/// list of containers, and 4) an end-of-file (EOF) container.
25///
26/// # Examples
27///
28/// ```no_run
29/// # use std::{fs::File, io};
30/// use noodles_cram as cram;
31/// use noodles_fasta as fasta;
32///
33/// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
34/// let header = reader.read_header()?;
35///
36/// for result in reader.records(&header) {
37/// let record = result?;
38/// // ...
39/// }
40///
41/// # Ok::<_, io::Error>(())
42/// ```
43pub struct Reader<R> {
44 inner: R,
45 reference_sequence_repository: fasta::Repository,
46}
47
48impl<R> Reader<R> {
49 /// Returns a reference to the underlying reader.
50 ///
51 /// # Examples
52 ///
53 /// ```
54 /// # use std::io;
55 /// use noodles_cram as cram;
56 /// let reader = cram::io::Reader::new(io::empty());
57 /// let _inner = reader.get_ref();
58 /// ```
59 pub fn get_ref(&self) -> &R {
60 &self.inner
61 }
62
63 /// Returns a mutable reference to the underlying reader.
64 ///
65 /// # Examples
66 ///
67 /// ```
68 /// # use std::io;
69 /// use noodles_cram as cram;
70 /// let mut reader = cram::io::Reader::new(io::empty());
71 /// let _inner = reader.get_mut();
72 /// ```
73 pub fn get_mut(&mut self) -> &mut R {
74 &mut self.inner
75 }
76
77 /// Unwraps and returns the underlying reader.
78 ///
79 /// # Examples
80 ///
81 /// ```
82 /// # use std::io;
83 /// use noodles_cram as cram;
84 /// let reader = cram::io::Reader::new(io::empty());
85 /// let _inner = reader.into_inner();
86 /// ```
87 pub fn into_inner(self) -> R {
88 self.inner
89 }
90}
91
92impl<R> Reader<R>
93where
94 R: Read,
95{
96 /// Creates a CRAM reader.
97 ///
98 /// # Examples
99 ///
100 /// ```no_run
101 /// # use std::{fs::File, io};
102 /// use noodles_cram as cram;
103 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
104 /// # Ok::<(), io::Error>(())
105 /// ```
106 pub fn new(inner: R) -> Self {
107 Builder::default().build_from_reader(inner)
108 }
109
110 pub(crate) fn reference_sequence_repository(&self) -> &fasta::Repository {
111 &self.reference_sequence_repository
112 }
113
114 /// Returns a CRAM header reader.
115 ///
116 /// # Examples
117 ///
118 /// ```no_run
119 /// # use std::{fs::File, io::Read};
120 /// use noodles_cram as cram;
121 ///
122 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
123 ///
124 /// let mut header_reader = reader.header_reader();
125 /// header_reader.read_magic_number()?;
126 /// header_reader.read_format_version()?;
127 /// header_reader.read_file_id()?;
128 ///
129 /// let mut container_reader = header_reader.container_reader()?;
130 ///
131 /// let _raw_header = {
132 /// let mut raw_sam_header_reader = container_reader.raw_sam_header_reader()?;
133 /// let mut raw_header = String::new();
134 /// raw_sam_header_reader.read_to_string(&mut raw_header)?;
135 /// raw_sam_header_reader.discard_to_end()?;
136 /// raw_header
137 /// };
138 ///
139 /// container_reader.discard_to_end()?;
140 /// Ok::<_, std::io::Error>(())
141 /// ```
142 pub fn header_reader(&mut self) -> header::Reader<&mut R> {
143 header::Reader::new(&mut self.inner)
144 }
145
146 /// Reads the CRAM file definition.
147 ///
148 /// The CRAM magic number is also checked.
149 ///
150 /// The position of the stream is expected to be at the start.
151 ///
152 /// # Examples
153 ///
154 /// ```no_run
155 /// # use std::{fs::File, io};
156 /// use noodles_cram as cram;
157 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
158 /// let file_definition = reader.read_file_definition()?;
159 /// # Ok::<(), io::Error>(())
160 /// ```
161 pub fn read_file_definition(&mut self) -> io::Result<FileDefinition> {
162 header::read_file_definition(&mut self.inner)
163 }
164
165 /// Reads the SAM header.
166 ///
167 /// The position of the stream is expected to be at the CRAM header container, i.e., directly
168 /// after the file definition.
169 ///
170 /// # Examples
171 ///
172 /// ```no_run
173 /// # use std::{fs::File, io};
174 /// use noodles_cram as cram;
175 ///
176 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
177 /// reader.read_file_definition()?;
178 ///
179 /// let header = reader.read_file_header()?;
180 /// # Ok::<(), io::Error>(())
181 /// ```
182 pub fn read_file_header(&mut self) -> io::Result<sam::Header> {
183 header::read_file_header(&mut self.inner)
184 }
185
186 /// Reads the SAM header.
187 ///
188 /// This verifies the CRAM magic number, discards the file definition, and reads and parses the
189 /// file header as a SAM header.
190 ///
191 /// The position of the stream is expected to be at the start.
192 ///
193 /// # Examples
194 ///
195 /// ```no_run
196 /// # use std::{fs::File, io};
197 /// use noodles_cram as cram;
198 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
199 /// let header = reader.read_header()?;
200 /// # Ok::<(), io::Error>(())
201 /// ```
202 pub fn read_header(&mut self) -> io::Result<sam::Header> {
203 read_header(&mut self.inner)
204 }
205
206 /// Reads a container.
207 ///
208 /// This returns `None` if the container header is the EOF container header, which signals the
209 /// end of the stream.
210 ///
211 /// # Examples
212 ///
213 /// ```no_run
214 /// # use std::{fs::File, io};
215 /// use noodles_cram::{self as cram, io::reader::Container};
216 ///
217 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
218 /// reader.read_header()?;
219 ///
220 /// let mut container = Container::default();
221 ///
222 /// while reader.read_container(&mut container)? != 0 {
223 /// // ...
224 /// }
225 /// # Ok::<(), io::Error>(())
226 /// ```
227 pub fn read_container(&mut self, container: &mut Container) -> io::Result<usize> {
228 read_container(&mut self.inner, container)
229 }
230
231 /// Returns a iterator over records starting from the current stream position.
232 ///
233 /// The stream is expected to be at the start of a container.
234 ///
235 /// # Examples
236 ///
237 /// ```no_run
238 /// # use std::{fs::File, io};
239 /// use noodles_cram as cram;
240 /// use noodles_fasta as fasta;
241 ///
242 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
243 /// let header = reader.read_header()?;
244 ///
245 /// for result in reader.records(&header) {
246 /// let record = result?;
247 /// // ...
248 /// }
249 /// # Ok::<_, io::Error>(())
250 /// ```
251 pub fn records<'r, 'h: 'r>(&'r mut self, header: &'h sam::Header) -> Records<'r, 'h, R> {
252 Records::new(self, header)
253 }
254}
255
256impl<R> Reader<R>
257where
258 R: Read + Seek,
259{
260 /// Seeks the underlying reader to the given position.
261 ///
262 /// Positions typically come from the associated CRAM index file.
263 ///
264 /// # Examples
265 ///
266 /// ```no_run
267 /// # use std::io::{self, SeekFrom};
268 /// use noodles_cram as cram;
269 /// let mut reader = cram::io::Reader::new(io::empty());
270 /// reader.seek(SeekFrom::Start(0))?;
271 /// # Ok::<(), io::Error>(())
272 /// ```
273 pub fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
274 self.inner.seek(pos)
275 }
276
277 /// Returns the current position of the underlying reader.
278 ///
279 /// # Examples
280 ///
281 /// ```
282 /// # use std::io;
283 /// use noodles_cram as cram;
284 /// let mut reader = cram::io::Reader::new(io::empty());
285 /// let position = reader.position()?;
286 /// assert_eq!(position, 0);
287 /// # Ok::<(), io::Error>(())
288 /// ```
289 pub fn position(&mut self) -> io::Result<u64> {
290 self.inner.stream_position()
291 }
292
293 /// Returns an iterator over records that intersects the given region.
294 ///
295 /// # Examples
296 ///
297 /// ```no_run
298 /// # use std::{fs::File, io};
299 /// use noodles_cram::{self as cram, crai};
300 /// use noodles_fasta as fasta;
301 ///
302 /// let mut reader = File::open("sample.cram").map(cram::io::Reader::new)?;
303 ///
304 /// let header = reader.read_header()?;
305 /// let index = crai::fs::read("sample.cram.crai")?;
306 /// let region = "sq0:8-13".parse()?;
307 /// let query = reader.query(&header, &index, ®ion)?;
308 ///
309 /// for result in query {
310 /// let record = result?;
311 /// // ...
312 /// }
313 /// # Ok::<_, Box<dyn std::error::Error>>(())
314 /// ```
315 pub fn query<'r, 'h: 'r, 'i: 'r>(
316 &'r mut self,
317 header: &'h sam::Header,
318 index: &'i crai::Index,
319 region: &Region,
320 ) -> io::Result<Query<'r, 'h, 'i, R>> {
321 let reference_sequence_id = header
322 .reference_sequences()
323 .get_index_of(region.name())
324 .ok_or_else(|| {
325 io::Error::new(
326 io::ErrorKind::InvalidInput,
327 "invalid reference sequence name",
328 )
329 })?;
330
331 Ok(Query::new(
332 self,
333 header,
334 index,
335 reference_sequence_id,
336 region.interval(),
337 ))
338 }
339}
340
341impl<R> sam::alignment::io::Read<R> for Reader<R>
342where
343 R: Read,
344{
345 fn read_alignment_header(&mut self) -> io::Result<sam::Header> {
346 self.read_header()
347 }
348
349 fn alignment_records<'a>(
350 &'a mut self,
351 header: &'a sam::Header,
352 ) -> Box<dyn Iterator<Item = io::Result<Box<dyn sam::alignment::Record>>> + 'a> {
353 Box::new(
354 self.records(header).map(|result| {
355 result.map(|record| Box::new(record) as Box<dyn sam::alignment::Record>)
356 }),
357 )
358 }
359}