noodles_bcf/io/reader/
query.rs1mod records;
2
3use std::io;
4
5use noodles_bgzf as bgzf;
6use noodles_core::region::Interval;
7use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
8use noodles_vcf::{self as vcf, variant::Record as _};
9
10use self::records::Records;
11use super::Reader;
12use crate::Record;
13
14pub struct Query<'r, 'h, R> {
18 reader: Reader<csi::io::Query<'r, R>>,
19 header: &'h vcf::Header,
20 reference_sequence_id: usize,
21 interval: Interval,
22}
23
24impl<'r, 'h, R> Query<'r, 'h, R>
25where
26 R: bgzf::io::BufRead + bgzf::io::Seek,
27{
28 pub(super) fn new(
29 reader: &'r mut R,
30 header: &'h vcf::Header,
31 chunks: Vec<Chunk>,
32 reference_sequence_id: usize,
33 interval: Interval,
34 ) -> Self {
35 Self {
36 reader: Reader::from(csi::io::Query::new(reader, chunks)),
37 header,
38 reference_sequence_id,
39 interval,
40 }
41 }
42
43 pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
67 next_record(
68 &mut self.reader,
69 record,
70 self.header,
71 self.reference_sequence_id,
72 self.interval,
73 )
74 }
75
76 pub fn records(self) -> Records<'r, 'h, R> {
99 Records::new(self)
100 }
101}
102
103fn intersects(
104 header: &vcf::Header,
105 record: &Record,
106 reference_sequence_id: usize,
107 region_interval: Interval,
108) -> io::Result<bool> {
109 let reference_sequence_name = record.reference_sequence_name(header.string_maps())?;
110
111 let id = header
112 .string_maps()
113 .contigs()
114 .get_index_of(reference_sequence_name)
115 .ok_or_else(|| {
116 io::Error::new(
117 io::ErrorKind::InvalidInput,
118 format!(
119 "reference sequence name does not exist in contigs: {reference_sequence_name}"
120 ),
121 )
122 })?;
123
124 if reference_sequence_id != id {
125 return Ok(false);
126 }
127
128 if interval_is_unbounded(region_interval) {
129 Ok(true)
130 } else {
131 let Some(start) = record.variant_start().transpose()? else {
132 return Ok(false);
133 };
134
135 let end = record.variant_end(header)?;
136 let record_interval = Interval::from(start..=end);
137
138 Ok(record_interval.intersects(region_interval))
139 }
140}
141
142fn interval_is_unbounded(interval: Interval) -> bool {
143 interval.start().is_none() && interval.end().is_none()
144}
145
146fn next_record<R>(
147 reader: &mut Reader<csi::io::Query<'_, R>>,
148 record: &mut Record,
149 header: &vcf::Header,
150 reference_sequence_id: usize,
151 interval: Interval,
152) -> io::Result<usize>
153where
154 R: bgzf::io::BufRead + bgzf::io::Seek,
155{
156 loop {
157 match reader.read_record(record)? {
158 0 => return Ok(0),
159 n => {
160 if intersects(header, record, reference_sequence_id, interval)? {
161 return Ok(n);
162 }
163 }
164 }
165 }
166}