Skip to main content

noodles_bam/io/reader/
query.rs

1mod records;
2
3use std::io;
4
5use noodles_bgzf as bgzf;
6use noodles_core::region::Interval;
7use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
8use noodles_sam::alignment::Record as _;
9
10pub use self::records::Records;
11
12use crate::Record;
13
14/// A reader over records of a BAM reader that intersects a given region.
15///
16/// This is created by calling [`super::Reader::query`].
17pub struct Query<'r, R> {
18    inner: super::Reader<csi::io::Query<'r, R>>,
19    reference_sequence_id: usize,
20    interval: Interval,
21}
22
23impl<'r, R> Query<'r, R>
24where
25    R: bgzf::io::BufRead + bgzf::io::Seek,
26{
27    pub(super) fn new(
28        reader: &'r mut R,
29        chunks: Vec<Chunk>,
30        reference_sequence_id: usize,
31        interval: Interval,
32    ) -> Self {
33        Self {
34            inner: super::Reader::from(csi::io::Query::new(reader, chunks)),
35            reference_sequence_id,
36            interval,
37        }
38    }
39
40    /// Reads a record.
41    ///
42    /// # Examples
43    ///
44    /// ```no_run
45    /// # use std::fs::File;
46    /// use noodles_bam::{self as bam, bai};
47    ///
48    /// let mut reader = File::open("sample.bam").map(bam::io::Reader::new)?;
49    /// let header = reader.read_header()?;
50    ///
51    /// let index = bai::fs::read("sample.bam.bai")?;
52    /// let region = "sq0:8-13".parse()?;
53    /// let mut query = reader.query(&header, &index, &region)?;
54    ///
55    /// let mut record = bam::Record::default();
56    ///
57    /// while query.read_record(&mut record)? != 0 {
58    ///     // ...
59    /// }
60    /// # Ok::<_, Box<dyn std::error::Error>>(())
61    /// ```
62    pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
63        next_record(
64            &mut self.inner,
65            record,
66            self.reference_sequence_id,
67            self.interval,
68        )
69    }
70
71    /// Returns an iterator over records.
72    ///
73    /// ```no_run
74    /// # use std::fs::File;
75    /// use noodles_bam::{self as bam, bai};
76    ///
77    /// let mut reader = File::open("sample.bam").map(bam::io::Reader::new)?;
78    /// let header = reader.read_header()?;
79    ///
80    /// let index = bai::fs::read("sample.bam.bai")?;
81    /// let region = "sq0:8-13".parse()?;
82    /// let query = reader.query(&header, &index, &region)?;
83    ///
84    /// let mut record = bam::Record::default();
85    ///
86    /// for result in query.records() {
87    ///     let record = result?;
88    ///     // ...
89    /// }
90    /// # Ok::<_, Box<dyn std::error::Error>>(())
91    /// ```
92    pub fn records(self) -> Records<'r, R> {
93        Records::new(self)
94    }
95}
96
97pub(crate) fn intersects(
98    record: &Record,
99    reference_sequence_id: usize,
100    region_interval: Interval,
101) -> io::Result<bool> {
102    let Some(id) = record.reference_sequence_id().transpose()? else {
103        return Ok(false);
104    };
105
106    if id != reference_sequence_id {
107        return Ok(false);
108    }
109
110    if interval_is_unbounded(region_interval) {
111        Ok(true)
112    } else {
113        match (
114            record.alignment_start().transpose()?,
115            record.alignment_end().transpose()?,
116        ) {
117            (Some(start), Some(end)) => {
118                let alignment_interval = (start..=end).into();
119                Ok(region_interval.intersects(alignment_interval))
120            }
121            _ => Ok(false),
122        }
123    }
124}
125
126fn interval_is_unbounded(interval: Interval) -> bool {
127    interval.start().is_none() && interval.end().is_none()
128}
129
130fn next_record<R>(
131    reader: &mut super::Reader<csi::io::Query<'_, R>>,
132    record: &mut Record,
133    reference_sequence_id: usize,
134    interval: Interval,
135) -> io::Result<usize>
136where
137    R: bgzf::io::BufRead + bgzf::io::Seek,
138{
139    loop {
140        match reader.read_record(record)? {
141            0 => return Ok(0),
142            n => {
143                if intersects(record, reference_sequence_id, interval)? {
144                    return Ok(n);
145                }
146            }
147        }
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use std::{io::Cursor, num::NonZero};
154
155    use noodles_core::Position;
156    use noodles_csi::binning_index::Indexer;
157    use noodles_sam::{
158        self as sam,
159        alignment::{
160            RecordBuf,
161            io::Write,
162            record::{
163                Flags,
164                cigar::{Op, op::Kind},
165            },
166        },
167        header::record::value::{Map, map::ReferenceSequence},
168    };
169
170    use super::*;
171    use crate::{bai, io::Writer};
172
173    fn write(header: &sam::Header, records: &[RecordBuf]) -> io::Result<Vec<u8>> {
174        let mut writer = Writer::new(Vec::new());
175        writer.write_header(header)?;
176
177        for record in records {
178            writer.write_alignment_record(header, record)?;
179        }
180
181        writer.into_inner().finish()
182    }
183
184    fn index(src: &[u8]) -> io::Result<bai::Index> {
185        let mut reader = crate::io::Reader::new(src);
186        let header = reader.read_header()?;
187
188        let mut indexer = Indexer::default();
189        let mut chunk_start = reader.get_ref().virtual_position();
190
191        let mut record = Record::default();
192
193        while reader.read_record(&mut record)? != 0 {
194            let chunk_end = reader.get_ref().virtual_position();
195
196            let alignment_context = match (
197                record.reference_sequence_id().transpose()?,
198                record.alignment_start().transpose()?,
199                record.alignment_end().transpose()?,
200            ) {
201                (Some(id), Some(start), Some(end)) => {
202                    let is_mapped = !record.flags().is_unmapped();
203                    Some((id, start, end, is_mapped))
204                }
205                _ => None,
206            };
207
208            let chunk = Chunk::new(chunk_start, chunk_end);
209            indexer.add_record(alignment_context, chunk)?;
210
211            chunk_start = chunk_end;
212        }
213
214        Ok(indexer.build(header.reference_sequences().len()))
215    }
216
217    #[test]
218    fn test_next() -> Result<(), Box<dyn std::error::Error>> {
219        let header = sam::Header::builder()
220            .add_reference_sequence(
221                "sq0",
222                Map::<ReferenceSequence>::new(const { NonZero::new(8).unwrap() }),
223            )
224            .add_reference_sequence(
225                "sq1",
226                Map::<ReferenceSequence>::new(const { NonZero::new(13).unwrap() }),
227            )
228            .build();
229
230        let records = [
231            RecordBuf::builder()
232                .set_reference_sequence_id(0)
233                .set_flags(Flags::default())
234                .set_alignment_start(Position::MIN)
235                .set_cigar([Op::new(Kind::Match, 4)].into_iter().collect())
236                .build(),
237            RecordBuf::builder()
238                .set_reference_sequence_id(1)
239                .set_flags(Flags::default())
240                .set_alignment_start(Position::MIN)
241                .set_cigar([Op::new(Kind::Match, 4)].into_iter().collect())
242                .build(),
243            RecordBuf::builder()
244                .set_reference_sequence_id(1)
245                .set_flags(Flags::default())
246                .set_alignment_start(Position::try_from(8)?)
247                .set_cigar([Op::new(Kind::Match, 4)].into_iter().collect())
248                .build(),
249        ];
250
251        let src = write(&header, &records)?;
252        let index = index(&src)?;
253
254        let mut reader = crate::io::Reader::new(Cursor::new(src));
255
256        let region = "sq1:2-5".parse()?;
257        let query = reader.query(&header, &index, &region)?;
258
259        let actual: Vec<_> = query
260            .records()
261            .map(|result| {
262                result.and_then(|record| RecordBuf::try_from_alignment_record(&header, &record))
263            })
264            .collect::<Result<_, _>>()?;
265
266        let expected = [records[1].clone()];
267
268        assert_eq!(actual, expected);
269
270        Ok(())
271    }
272}