noodles_bam/io/reader/
query.rs1mod records;
2
3use std::io;
4
5use noodles_bgzf as bgzf;
6use noodles_core::region::Interval;
7use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
8use noodles_sam::alignment::Record as _;
9
10pub use self::records::Records;
11
12use crate::Record;
13
14pub struct Query<'r, R> {
18 inner: super::Reader<csi::io::Query<'r, R>>,
19 reference_sequence_id: usize,
20 interval: Interval,
21}
22
23impl<'r, R> Query<'r, R>
24where
25 R: bgzf::io::BufRead + bgzf::io::Seek,
26{
27 pub(super) fn new(
28 reader: &'r mut R,
29 chunks: Vec<Chunk>,
30 reference_sequence_id: usize,
31 interval: Interval,
32 ) -> Self {
33 Self {
34 inner: super::Reader::from(csi::io::Query::new(reader, chunks)),
35 reference_sequence_id,
36 interval,
37 }
38 }
39
40 pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
63 next_record(
64 &mut self.inner,
65 record,
66 self.reference_sequence_id,
67 self.interval,
68 )
69 }
70
71 pub fn records(self) -> Records<'r, R> {
93 Records::new(self)
94 }
95}
96
97pub(crate) fn intersects(
98 record: &Record,
99 reference_sequence_id: usize,
100 region_interval: Interval,
101) -> io::Result<bool> {
102 let Some(id) = record.reference_sequence_id().transpose()? else {
103 return Ok(false);
104 };
105
106 if id != reference_sequence_id {
107 return Ok(false);
108 }
109
110 if interval_is_unbounded(region_interval) {
111 Ok(true)
112 } else {
113 match (
114 record.alignment_start().transpose()?,
115 record.alignment_end().transpose()?,
116 ) {
117 (Some(start), Some(end)) => {
118 let alignment_interval = (start..=end).into();
119 Ok(region_interval.intersects(alignment_interval))
120 }
121 _ => Ok(false),
122 }
123 }
124}
125
126fn interval_is_unbounded(interval: Interval) -> bool {
127 interval.start().is_none() && interval.end().is_none()
128}
129
130fn next_record<R>(
131 reader: &mut super::Reader<csi::io::Query<'_, R>>,
132 record: &mut Record,
133 reference_sequence_id: usize,
134 interval: Interval,
135) -> io::Result<usize>
136where
137 R: bgzf::io::BufRead + bgzf::io::Seek,
138{
139 loop {
140 match reader.read_record(record)? {
141 0 => return Ok(0),
142 n => {
143 if intersects(record, reference_sequence_id, interval)? {
144 return Ok(n);
145 }
146 }
147 }
148 }
149}
150
151#[cfg(test)]
152mod tests {
153 use std::{io::Cursor, num::NonZero};
154
155 use noodles_core::Position;
156 use noodles_csi::binning_index::Indexer;
157 use noodles_sam::{
158 self as sam,
159 alignment::{
160 RecordBuf,
161 io::Write,
162 record::{
163 Flags,
164 cigar::{Op, op::Kind},
165 },
166 },
167 header::record::value::{Map, map::ReferenceSequence},
168 };
169
170 use super::*;
171 use crate::{bai, io::Writer};
172
173 fn write(header: &sam::Header, records: &[RecordBuf]) -> io::Result<Vec<u8>> {
174 let mut writer = Writer::new(Vec::new());
175 writer.write_header(header)?;
176
177 for record in records {
178 writer.write_alignment_record(header, record)?;
179 }
180
181 writer.into_inner().finish()
182 }
183
184 fn index(src: &[u8]) -> io::Result<bai::Index> {
185 let mut reader = crate::io::Reader::new(src);
186 let header = reader.read_header()?;
187
188 let mut indexer = Indexer::default();
189 let mut chunk_start = reader.get_ref().virtual_position();
190
191 let mut record = Record::default();
192
193 while reader.read_record(&mut record)? != 0 {
194 let chunk_end = reader.get_ref().virtual_position();
195
196 let alignment_context = match (
197 record.reference_sequence_id().transpose()?,
198 record.alignment_start().transpose()?,
199 record.alignment_end().transpose()?,
200 ) {
201 (Some(id), Some(start), Some(end)) => {
202 let is_mapped = !record.flags().is_unmapped();
203 Some((id, start, end, is_mapped))
204 }
205 _ => None,
206 };
207
208 let chunk = Chunk::new(chunk_start, chunk_end);
209 indexer.add_record(alignment_context, chunk)?;
210
211 chunk_start = chunk_end;
212 }
213
214 Ok(indexer.build(header.reference_sequences().len()))
215 }
216
217 #[test]
218 fn test_next() -> Result<(), Box<dyn std::error::Error>> {
219 let header = sam::Header::builder()
220 .add_reference_sequence(
221 "sq0",
222 Map::<ReferenceSequence>::new(const { NonZero::new(8).unwrap() }),
223 )
224 .add_reference_sequence(
225 "sq1",
226 Map::<ReferenceSequence>::new(const { NonZero::new(13).unwrap() }),
227 )
228 .build();
229
230 let records = [
231 RecordBuf::builder()
232 .set_reference_sequence_id(0)
233 .set_flags(Flags::default())
234 .set_alignment_start(Position::MIN)
235 .set_cigar([Op::new(Kind::Match, 4)].into_iter().collect())
236 .build(),
237 RecordBuf::builder()
238 .set_reference_sequence_id(1)
239 .set_flags(Flags::default())
240 .set_alignment_start(Position::MIN)
241 .set_cigar([Op::new(Kind::Match, 4)].into_iter().collect())
242 .build(),
243 RecordBuf::builder()
244 .set_reference_sequence_id(1)
245 .set_flags(Flags::default())
246 .set_alignment_start(Position::try_from(8)?)
247 .set_cigar([Op::new(Kind::Match, 4)].into_iter().collect())
248 .build(),
249 ];
250
251 let src = write(&header, &records)?;
252 let index = index(&src)?;
253
254 let mut reader = crate::io::Reader::new(Cursor::new(src));
255
256 let region = "sq1:2-5".parse()?;
257 let query = reader.query(&header, &index, ®ion)?;
258
259 let actual: Vec<_> = query
260 .records()
261 .map(|result| {
262 result.and_then(|record| RecordBuf::try_from_alignment_record(&header, &record))
263 })
264 .collect::<Result<_, _>>()?;
265
266 let expected = [records[1].clone()];
267
268 assert_eq!(actual, expected);
269
270 Ok(())
271 }
272}