use std::ffi;
use std::path::Path;
use std::ptr;
use url::Url;
use crate::errors::{Error, Result};
use crate::htslib;
use crate::utils::path_as_bytes;
pub trait Read: Sized {
fn read(&mut self, record: &mut Vec<u8>) -> Result<bool>;
fn records(&mut self) -> Records<'_, Self>;
fn header(&self) -> &Vec<String>;
}
#[derive(Debug)]
pub struct Reader {
header: Vec<String>,
hts_file: *mut htslib::htsFile,
hts_format: htslib::htsExactFormat,
tbx: *mut htslib::tbx_t,
buf: htslib::kstring_t,
itr: Option<*mut htslib::hts_itr_t>,
tid: i64,
start: i64,
end: i64,
}
unsafe impl Send for Reader {}
const KS_SEP_LINE: i32 = 2;
impl Reader {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
Self::new(&path_as_bytes(path, true)?)
}
pub fn from_url(url: &Url) -> Result<Self> {
Self::new(url.as_str().as_bytes())
}
fn new(path: &[u8]) -> Result<Self> {
let path = ffi::CString::new(path).unwrap();
let c_str = ffi::CString::new("r").unwrap();
let hts_file = unsafe { htslib::hts_open(path.as_ptr(), c_str.as_ptr()) };
let hts_format: u32 = unsafe {
let file_format: *const hts_sys::htsFormat = htslib::hts_get_format(hts_file);
(*file_format).format
};
let tbx = unsafe { htslib::tbx_index_load(path.as_ptr()) };
if tbx.is_null() {
return Err(Error::TabixInvalidIndex);
}
let mut header = Vec::new();
let mut buf = htslib::kstring_t {
l: 0,
m: 0,
s: ptr::null_mut(),
};
unsafe {
while htslib::hts_getline(hts_file, KS_SEP_LINE, &mut buf) >= 0 {
if buf.l > 0 && i32::from(*buf.s) == (*tbx).conf.meta_char {
header.push(String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap()));
} else {
break;
}
}
}
Ok(Reader {
header,
hts_file,
hts_format,
tbx,
buf,
itr: None,
tid: -1,
start: -1,
end: -1,
})
}
pub fn tid(&self, name: &str) -> Result<u64> {
let name_cstr = ffi::CString::new(name.as_bytes()).unwrap();
let res = unsafe { htslib::tbx_name2id(self.tbx, name_cstr.as_ptr()) };
if res < 0 {
Err(Error::UnknownSequence {
sequence: name.to_owned(),
})
} else {
Ok(res as u64)
}
}
pub fn fetch(&mut self, tid: u64, start: u64, end: u64) -> Result<()> {
self.tid = tid as i64;
self.start = start as i64;
self.end = end as i64;
if let Some(itr) = self.itr {
unsafe {
htslib::hts_itr_destroy(itr);
}
}
let itr = unsafe {
htslib::hts_itr_query(
(*self.tbx).idx,
tid as i32,
start as i64,
end as i64,
Some(htslib::tbx_readrec),
)
};
if itr.is_null() {
self.itr = None;
Err(Error::Fetch)
} else {
self.itr = Some(itr);
Ok(())
}
}
pub fn seqnames(&self) -> Vec<String> {
let mut result = Vec::new();
let mut nseq: i32 = 0;
let seqs = unsafe { htslib::tbx_seqnames(self.tbx, &mut nseq) };
for i in 0..nseq {
unsafe {
result.push(String::from(
ffi::CStr::from_ptr(*seqs.offset(i as isize))
.to_str()
.unwrap(),
));
}
}
unsafe {
libc::free(seqs as *mut libc::c_void);
};
result
}
pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
assert!(n_threads > 0, "n_threads must be > 0");
let r = unsafe { htslib::hts_set_threads(self.hts_file, n_threads as i32) };
if r != 0 {
Err(Error::SetThreads)
} else {
Ok(())
}
}
pub fn hts_format(&self) -> htslib::htsExactFormat {
self.hts_format
}
}
fn overlap(tid1: i64, begin1: i64, end1: i64, tid2: i64, begin2: i64, end2: i64) -> bool {
(tid1 == tid2) && (begin1 < end2) && (begin2 < end1)
}
impl Read for Reader {
fn read(&mut self, record: &mut Vec<u8>) -> Result<bool> {
match self.itr {
Some(itr) => {
loop {
let ret = unsafe {
htslib::hts_itr_next(
htslib::hts_get_bgzfp(self.hts_file),
itr,
&mut self.buf as *mut htslib::kstring_t as *mut libc::c_void,
self.tbx as *mut libc::c_void,
)
};
if ret == -1 {
return Ok(false);
} else if ret == -2 {
return Err(Error::TabixTruncatedRecord);
} else if ret < 0 {
panic!("Return value should not be <0 but was: {}", ret);
}
let (tid, start, end) =
unsafe { ((*itr).curr_tid, (*itr).curr_beg, (*itr).curr_end) };
if overlap(self.tid, self.start, self.end, tid as i64, start, end) {
*record =
unsafe { Vec::from(ffi::CStr::from_ptr(self.buf.s).to_str().unwrap()) };
return Ok(true);
}
}
}
_ => Err(Error::TabixNoIter),
}
}
fn records(&mut self) -> Records<'_, Self> {
Records { reader: self }
}
fn header(&self) -> &Vec<String> {
&self.header
}
}
impl Drop for Reader {
fn drop(&mut self) {
unsafe {
if self.itr.is_some() {
htslib::hts_itr_destroy(self.itr.unwrap());
}
htslib::tbx_destroy(self.tbx);
htslib::hts_close(self.hts_file);
}
}
}
#[derive(Debug)]
pub struct Records<'a, R: Read> {
reader: &'a mut R,
}
impl<R: Read> Iterator for Records<'_, R> {
type Item = Result<Vec<u8>>;
#[allow(clippy::read_zero_byte_vec)]
fn next(&mut self) -> Option<Result<Vec<u8>>> {
let mut record = Vec::new();
match self.reader.read(&mut record) {
Ok(false) => None,
Ok(true) => Some(Ok(record)),
Err(err) => Some(Err(err)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bed_basic() {
let reader =
Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
assert_eq!(
reader.seqnames(),
vec![String::from("chr1"), String::from("chr2")]
);
assert_eq!(reader.tid("chr1").unwrap(), 0);
assert_eq!(reader.tid("chr2").unwrap(), 1);
assert!(reader.tid("chr3").is_err());
}
#[test]
fn bed_fetch_from_chr1_read_api() {
let mut reader =
Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
let chr1_id = reader.tid("chr1").unwrap();
assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
let mut record = Vec::new();
assert!(reader.read(&mut record).is_ok());
assert_eq!(record, Vec::from("chr1\t1001\t1002"));
assert_eq!(reader.read(&mut record), Ok(false)); }
#[test]
fn bed_fetch_from_chr1_iterator_api() {
let mut reader =
Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
let chr1_id = reader.tid("chr1").unwrap();
assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
let records: Vec<Vec<u8>> = reader.records().map(|r| r.unwrap()).collect();
assert_eq!(records, vec![Vec::from("chr1\t1001\t1002")]);
}
#[test]
fn test_fails_on_bam() {
let reader = Reader::from_path("test/test.bam");
assert!(reader.is_err());
}
#[test]
fn test_fails_on_non_existiant() {
let reader = Reader::from_path("test/no_such_file");
assert!(reader.is_err());
}
#[test]
fn test_fails_on_vcf() {
let reader = Reader::from_path("test/test_left.vcf");
assert!(reader.is_err());
}
#[test]
fn test_text_header_regions() {
Reader::from_path("test/tabix_reader/genomic_regions_header.txt.gz")
.expect("Error opening file.");
}
#[test]
fn test_text_header_positions() {
Reader::from_path("test/tabix_reader/genomic_positions_header.txt.gz")
.expect("Error opening file.");
}
#[test]
fn test_text_bad_header() {
Reader::from_path("test/tabix_reader/bad_header.txt.gz")
.expect_err("Invalid index file should fail.");
}
}