use libc;
use std::ffi;
use std::mem;
use std::path::Path;
use std::ptr;
use url::Url;
use htslib;
pub trait Read: Sized {
fn read(&mut self, record: &mut Vec<u8>) -> Result<(), ReadError>;
fn records(&mut self) -> Records<Self>;
fn header(&self) -> &Vec<String>;
}
#[derive(Debug)]
pub struct Reader {
header: Vec<String>,
hts_file: *mut htslib::htsFile,
hts_format: htslib::htsExactFormat,
tbx: *mut htslib::tbx_t,
buf: htslib::kstring_t,
itr: Option<*mut htslib::hts_itr_t>,
tid: i32,
start: i32,
end: i32,
}
unsafe impl Send for Reader {}
const KS_SEP_LINE: i32 = 2;
impl Reader {
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, TabixReaderPathError> {
match path.as_ref().to_str() {
Some(p) if path.as_ref().exists() => Ok(try!(Self::new(p.as_bytes()))),
_ => Err(TabixReaderPathError::InvalidPath),
}
}
pub fn from_url(url: &Url) -> Result<Self, TabixReaderError> {
Self::new(url.as_str().as_bytes())
}
fn new(path: &[u8]) -> Result<Self, TabixReaderError> {
let path = ffi::CString::new(path).unwrap();
let hts_file =
unsafe { htslib::hts_open(path.as_ptr(), ffi::CString::new("r").unwrap().as_ptr()) };
unsafe {
if (*hts_file).format.category != htslib::htsFormatCategory_region_list
&& (*hts_file).format.format != htslib::htsExactFormat_sam
{
return Err(TabixReaderError::InvalidIndex);
}
}
let hts_format = unsafe { (*htslib::hts_get_format(hts_file)).format };
let tbx = unsafe { htslib::tbx_index_load(path.as_ptr()) };
let mut header = Vec::new();
let mut buf = htslib::kstring_t {
l: 0,
m: 0,
s: ptr::null_mut(),
};
unsafe {
while htslib::hts_getline(hts_file, KS_SEP_LINE, &mut buf) >= 0 {
if buf.l > 0 && (*buf.s) as i32 == (*tbx).conf.meta_char {
header.push(String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap()));
} else {
break;
}
}
}
if tbx.is_null() {
Err(TabixReaderError::InvalidIndex)
} else {
Ok(Reader {
header,
hts_file,
hts_format,
tbx,
buf,
itr: None,
tid: -1,
start: -1,
end: -1,
})
}
}
pub fn tid(&self, name: &str) -> Result<u32, SequenceLookupError> {
let res = unsafe {
htslib::tbx_name2id(
self.tbx,
ffi::CString::new(name.as_bytes()).unwrap().as_ptr(),
)
};
if res < 0 {
Err(SequenceLookupError::Some)
} else {
Ok(res as u32)
}
}
pub fn fetch(&mut self, tid: u32, start: u32, end: u32) -> Result<(), FetchError> {
self.tid = tid as i32;
self.start = start as i32;
self.end = end as i32;
if let Some(itr) = self.itr {
unsafe {
htslib::hts_itr_destroy(itr);
}
}
let itr = unsafe {
htslib::hts_itr_query(
(*self.tbx).idx,
tid as i32,
start as i32,
end as i32,
Some(htslib::tbx_readrec),
)
};
if itr.is_null() {
self.itr = None;
Err(FetchError::Some)
} else {
self.itr = Some(itr);
Ok(())
}
}
pub fn seqnames(&self) -> Vec<String> {
let mut result = Vec::new();
let mut nseq: i32 = 0;
let seqs = unsafe { htslib::tbx_seqnames(self.tbx, &mut nseq) };
for i in 0..nseq {
unsafe {
result.push(String::from(
ffi::CStr::from_ptr(*seqs.offset(i as isize))
.to_str()
.unwrap(),
));
}
}
unsafe {
libc::free(seqs as (*mut libc::c_void));
};
result
}
pub fn set_threads(&mut self, n_threads: usize) -> Result<(), ThreadingError> {
assert!(n_threads > 0, "n_threads must be > 0");
let r = unsafe { htslib::hts_set_threads(self.hts_file, n_threads as i32) };
if r != 0 {
Err(ThreadingError::Some)
} else {
Ok(())
}
}
}
fn overlap(tid1: i32, begin1: i32, end1: i32, tid2: i32, begin2: i32, end2: i32) -> bool {
(tid1 == tid2) && (begin1 < end2) && (begin2 < end1)
}
impl Read for Reader {
fn read(&mut self, record: &mut Vec<u8>) -> Result<(), ReadError> {
match self.itr {
Some(itr) => {
loop {
let ret = unsafe {
htslib::hts_itr_next(
htslib::hts_get_bgzfp(self.hts_file),
itr,
mem::transmute(&mut self.buf),
mem::transmute(self.tbx),
)
};
if ret == -1 {
return Err(ReadError::NoMoreRecord);
} else if ret == -2 {
return Err(ReadError::Truncated);
} else if ret < 0 {
panic!("Return value should not be <0 but was: {}", ret);
}
let (tid, start, end) =
unsafe { ((*itr).curr_tid, (*itr).curr_beg, (*itr).curr_end) };
if overlap(self.tid, self.start, self.end, tid, start, end) {
*record =
unsafe { Vec::from(ffi::CStr::from_ptr(self.buf.s).to_str().unwrap()) };
return Ok(());
}
}
}
_ => Err(ReadError::NoIter),
}
}
fn records(&mut self) -> Records<Self> {
Records { reader: self }
}
fn header(&self) -> &Vec<String> {
&self.header
}
}
impl Drop for Reader {
fn drop(&mut self) {
unsafe {
if self.itr.is_some() {
htslib::hts_itr_destroy(self.itr.unwrap());
}
htslib::tbx_destroy(self.tbx);
htslib::hts_close(self.hts_file);
}
}
}
#[derive(Debug)]
pub struct Records<'a, R: 'a + Read> {
reader: &'a mut R,
}
impl<'a, R: Read> Iterator for Records<'a, R> {
type Item = Result<Vec<u8>, ReadError>;
fn next(&mut self) -> Option<Result<Vec<u8>, ReadError>> {
let mut record = Vec::new();
match self.reader.read(&mut record) {
Err(ReadError::NoMoreRecord) => None,
Ok(()) => Some(Ok(record)),
Err(err) => Some(Err(err)),
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum ReadError {
NoIter {
description("previous iterator generation failed")
}
Truncated {
description("truncated record")
}
Invalid {
description("invalid record")
}
NoMoreRecord {
description("no more record")
}
}
}
impl ReadError {
pub fn is_eof(&self) -> bool {
match self {
&ReadError::NoMoreRecord => true,
_ => false,
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum TabixReaderError {
InvalidIndex {
description("invalid index")
}
BGZFError(err: BGZFError) {
from()
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum TabixReaderPathError {
InvalidPath {
description("invalid path")
}
TabixReaderError(err: TabixReaderError) {
from()
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum BGZFError {
Some {
description("error reading BGZF file")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum ThreadingError {
Some {
description("error setting threads for multi-threaded I/O")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum FetchError {
Some {
description("error fetching a locus")
}
}
}
quick_error! {
#[derive(Debug, Clone)]
pub enum SequenceLookupError {
Some {
description("error looking up a sequence name")
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bed_header() {
let reader = Reader::from_path("test/test_bed3.bed.gz")
.ok()
.expect("Error opening file.");
assert_eq!(
reader.header,
vec![String::from("#foo"), String::from("#bar")]
);
assert_eq!(
reader.seqnames(),
vec![String::from("chr1"), String::from("chr2")]
);
assert_eq!(reader.tid("chr1").unwrap(), 0);
assert_eq!(reader.tid("chr2").unwrap(), 1);
assert!(reader.tid("chr3").is_err());
}
#[test]
fn bed_fetch_from_chr1_read_api() {
let mut reader = Reader::from_path("test/test_bed3.bed.gz")
.ok()
.expect("Error opening file.");
let chr1_id = reader.tid("chr1").unwrap();
assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
let mut record = Vec::new();
assert!(reader.read(&mut record).is_ok());
assert_eq!(record, Vec::from("chr1\t1001\t1002"));
assert!(reader.read(&mut record).is_err());
}
#[test]
fn bed_fetch_from_chr1_iterator_api() {
let mut reader = Reader::from_path("test/test_bed3.bed.gz")
.ok()
.expect("Error opening file.");
let chr1_id = reader.tid("chr1").unwrap();
assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
let records: Vec<Vec<u8>> = reader.records().map(|r| r.unwrap()).collect();
assert_eq!(records, vec![Vec::from("chr1\t1001\t1002")]);
}
#[test]
fn test_fails_on_bam() {
let reader = Reader::from_path("test/test.bam");
assert!(reader.is_err());
}
#[test]
fn test_fails_on_non_existiant() {
let reader = Reader::from_path("test/no_such_file");
assert!(reader.is_err());
}
#[test]
fn test_fails_on_vcf() {
let reader = Reader::from_path("test/test_left.vcf");
assert!(reader.is_err());
}
}