Skip to main content

rsomics_tabix/
lib.rs

1use std::io;
2
3mod build;
4mod compress;
5mod config;
6mod query;
7
8pub use build::index_file;
9pub use config::{Config, IndexKind, Preset};
10pub use query::{list_chromosomes, query_regions};
11
12/// The 1-based inclusive genomic interval htslib derives for one data line, plus
13/// the reference-sequence name. htslib works in 0-based half-open `[beg, end)`
14/// internally; the index stores the 1-based inclusive `[beg+1, end]` span, which
15/// is what noodles' `Position`-based indexers want.
16pub(crate) struct Interval<'a> {
17    /// Raw contig-name bytes. Left unvalidated on the hot path; the build path
18    /// only validates UTF-8 once per contig, when a new name is first seen.
19    pub ref_name: &'a [u8],
20    /// 1-based inclusive start.
21    pub start: usize,
22    /// 1-based inclusive end.
23    pub end: usize,
24}
25
26/// Parse a non-negative ASCII-decimal coordinate directly off the byte slice,
27/// skipping surrounding whitespace. Avoids the UTF-8 validation and generic
28/// `str::parse` machinery on the per-record hot path; coordinates are always
29/// plain digits.
30pub(crate) fn parse_coord(bytes: &[u8]) -> io::Result<i64> {
31    let mut i = 0;
32    let n = bytes.len();
33    while i < n && bytes[i].is_ascii_whitespace() {
34        i += 1;
35    }
36    let start = i;
37    let mut value: i64 = 0;
38    while i < n {
39        let b = bytes[i];
40        if b.is_ascii_whitespace() {
41            break;
42        }
43        let d = b.wrapping_sub(b'0');
44        if d > 9 {
45            return Err(invalid(format!(
46                "invalid coordinate: {:?}",
47                String::from_utf8_lossy(bytes)
48            )));
49        }
50        value = value * 10 + i64::from(d);
51        i += 1;
52    }
53    if i == start {
54        return Err(invalid("empty coordinate"));
55    }
56    Ok(value)
57}
58
59pub(crate) fn invalid(msg: impl Into<String>) -> io::Error {
60    io::Error::new(io::ErrorKind::InvalidData, msg.into())
61}