Skip to main content

noodles_bcf/io/reader/
query.rs

1mod records;
2
3use std::io;
4
5use noodles_bgzf as bgzf;
6use noodles_core::region::Interval;
7use noodles_csi::{self as csi, binning_index::index::reference_sequence::bin::Chunk};
8use noodles_vcf::{self as vcf, variant::Record as _};
9
10use self::records::Records;
11use super::Reader;
12use crate::Record;
13
14/// A reader over records of a BCF reader that intersects a given region.
15///
16/// This is created by calling [`Reader::query`].
17pub struct Query<'r, 'h, R> {
18    reader: Reader<csi::io::Query<'r, R>>,
19    header: &'h vcf::Header,
20    reference_sequence_id: usize,
21    interval: Interval,
22}
23
24impl<'r, 'h, R> Query<'r, 'h, R>
25where
26    R: bgzf::io::BufRead + bgzf::io::Seek,
27{
28    pub(super) fn new(
29        reader: &'r mut R,
30        header: &'h vcf::Header,
31        chunks: Vec<Chunk>,
32        reference_sequence_id: usize,
33        interval: Interval,
34    ) -> Self {
35        Self {
36            reader: Reader::from(csi::io::Query::new(reader, chunks)),
37            header,
38            reference_sequence_id,
39            interval,
40        }
41    }
42
43    /// Reads a record.
44    ///
45    /// # Examples
46    ///
47    /// ```no_run
48    /// # use std::fs::File;
49    /// use noodles_bcf as bcf;
50    /// use noodles_csi as csi;
51    ///
52    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
53    /// let header = reader.read_header()?;
54    ///
55    /// let index = csi::fs::read("sample.bcf.csi")?;
56    /// let region = "sq0:8-13".parse()?;
57    /// let query = reader.query(&header, &index, &region)?;
58    ///
59    /// let mut record = bcf::Record::default();
60    ///
61    /// while reader.read_record(&mut record)? != 0 {
62    ///     // ...
63    /// }
64    /// # Ok::<(), Box<dyn std::error::Error>>(())
65    /// ```
66    pub fn read_record(&mut self, record: &mut Record) -> io::Result<usize> {
67        next_record(
68            &mut self.reader,
69            record,
70            self.header,
71            self.reference_sequence_id,
72            self.interval,
73        )
74    }
75
76    /// Returns an iterator over records.
77    ///
78    /// # Examples
79    ///
80    /// ```no_run
81    /// # use std::fs::File;
82    /// use noodles_bcf as bcf;
83    /// use noodles_csi as csi;
84    ///
85    /// let mut reader = File::open("sample.bcf").map(bcf::io::Reader::new)?;
86    /// let header = reader.read_header()?;
87    ///
88    /// let index = csi::fs::read("sample.bcf.csi")?;
89    /// let region = "sq0:8-13".parse()?;
90    /// let query = reader.query(&header, &index, &region)?;
91    ///
92    /// for result in query.records() {
93    ///     let record = result?;
94    ///     // ...
95    /// }
96    /// # Ok::<(), Box<dyn std::error::Error>>(())
97    /// ```
98    pub fn records(self) -> Records<'r, 'h, R> {
99        Records::new(self)
100    }
101}
102
103fn intersects(
104    header: &vcf::Header,
105    record: &Record,
106    reference_sequence_id: usize,
107    region_interval: Interval,
108) -> io::Result<bool> {
109    let reference_sequence_name = record.reference_sequence_name(header.string_maps())?;
110
111    let id = header
112        .string_maps()
113        .contigs()
114        .get_index_of(reference_sequence_name)
115        .ok_or_else(|| {
116            io::Error::new(
117                io::ErrorKind::InvalidInput,
118                format!(
119                    "reference sequence name does not exist in contigs: {reference_sequence_name}"
120                ),
121            )
122        })?;
123
124    if reference_sequence_id != id {
125        return Ok(false);
126    }
127
128    if interval_is_unbounded(region_interval) {
129        Ok(true)
130    } else {
131        let Some(start) = record.variant_start().transpose()? else {
132            return Ok(false);
133        };
134
135        let end = record.variant_end(header)?;
136        let record_interval = Interval::from(start..=end);
137
138        Ok(record_interval.intersects(region_interval))
139    }
140}
141
142fn interval_is_unbounded(interval: Interval) -> bool {
143    interval.start().is_none() && interval.end().is_none()
144}
145
146fn next_record<R>(
147    reader: &mut Reader<csi::io::Query<'_, R>>,
148    record: &mut Record,
149    header: &vcf::Header,
150    reference_sequence_id: usize,
151    interval: Interval,
152) -> io::Result<usize>
153where
154    R: bgzf::io::BufRead + bgzf::io::Seek,
155{
156    loop {
157        match reader.read_record(record)? {
158            0 => return Ok(0),
159            n => {
160                if intersects(header, record, reference_sequence_id, interval)? {
161                    return Ok(n);
162                }
163            }
164        }
165    }
166}