1use std::ffi;
45use std::path::Path;
46use std::ptr;
47use url::Url;
48
49use crate::errors::{Error, Result};
50use crate::htslib;
51use crate::utils::path_as_bytes;
52
53pub trait Read: Sized {
55 fn read(&mut self, record: &mut Vec<u8>) -> Result<bool>;
67
68 fn records(&mut self) -> Records<'_, Self>;
74
75 fn header(&self) -> &Vec<String>;
77}
78
79#[derive(Debug)]
87pub struct Reader {
88 header: Vec<String>,
90
91 hts_file: *mut htslib::htsFile,
93 hts_format: htslib::htsExactFormat,
95 tbx: *mut htslib::tbx_t,
97 buf: htslib::kstring_t,
99 itr: Option<*mut htslib::hts_itr_t>,
101
102 tid: i64,
104 start: i64,
106 end: i64,
108}
109
110unsafe impl Send for Reader {}
111
112const KS_SEP_LINE: i32 = 2;
114
115impl Reader {
116 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
122 Self::new(&path_as_bytes(path, true)?)
123 }
124
125 pub fn from_url(url: &Url) -> Result<Self> {
126 Self::new(url.as_str().as_bytes())
127 }
128
129 fn new(path: &[u8]) -> Result<Self> {
135 let path = ffi::CString::new(path).unwrap();
136 let c_str = ffi::CString::new("r").unwrap();
137 let hts_file = unsafe { htslib::hts_open(path.as_ptr(), c_str.as_ptr()) };
138 let hts_format: u32 = unsafe {
139 let file_format: *const hts_sys::htsFormat = htslib::hts_get_format(hts_file);
140 (*file_format).format
141 };
142
143 let tbx = unsafe { htslib::tbx_index_load(path.as_ptr()) };
144 if tbx.is_null() {
145 return Err(Error::TabixInvalidIndex);
146 }
147 let mut header = Vec::new();
148 let mut buf = htslib::kstring_t {
149 l: 0,
150 m: 0,
151 s: ptr::null_mut(),
152 };
153 unsafe {
154 while htslib::hts_getline(hts_file, KS_SEP_LINE, &mut buf) >= 0 {
155 if buf.l > 0 && i32::from(*buf.s) == (*tbx).conf.meta_char {
156 header.push(String::from(ffi::CStr::from_ptr(buf.s).to_str().unwrap()));
157 } else {
158 break;
159 }
160 }
161 }
162
163 Ok(Reader {
164 header,
165 hts_file,
166 hts_format,
167 tbx,
168 buf,
169 itr: None,
170 tid: -1,
171 start: -1,
172 end: -1,
173 })
174 }
175
176 pub fn tid(&self, name: &str) -> Result<u64> {
178 let name_cstr = ffi::CString::new(name.as_bytes()).unwrap();
179 let res = unsafe { htslib::tbx_name2id(self.tbx, name_cstr.as_ptr()) };
180 if res < 0 {
181 Err(Error::UnknownSequence {
182 sequence: name.to_owned(),
183 })
184 } else {
185 Ok(res as u64)
186 }
187 }
188
189 pub fn fetch(&mut self, tid: u64, start: u64, end: u64) -> Result<()> {
191 self.tid = tid as i64;
192 self.start = start as i64;
193 self.end = end as i64;
194
195 if let Some(itr) = self.itr {
196 unsafe {
197 htslib::hts_itr_destroy(itr);
198 }
199 }
200 let itr = unsafe {
201 htslib::hts_itr_query(
202 (*self.tbx).idx,
203 tid as i32,
204 start as i64,
205 end as i64,
206 Some(htslib::tbx_readrec),
207 )
208 };
209 if itr.is_null() {
210 self.itr = None;
211 Err(Error::Fetch)
212 } else {
213 self.itr = Some(itr);
214 Ok(())
215 }
216 }
217
218 pub fn seqnames(&self) -> Vec<String> {
220 let mut result = Vec::new();
221
222 let mut nseq: i32 = 0;
223 let seqs = unsafe { htslib::tbx_seqnames(self.tbx, &mut nseq) };
224 for i in 0..nseq {
225 unsafe {
226 result.push(String::from(
227 ffi::CStr::from_ptr(*seqs.offset(i as isize))
228 .to_str()
229 .unwrap(),
230 ));
231 }
232 }
233 unsafe {
234 libc::free(seqs as *mut libc::c_void);
235 };
236
237 result
238 }
239
240 pub fn set_threads(&mut self, n_threads: usize) -> Result<()> {
247 assert!(n_threads > 0, "n_threads must be > 0");
248
249 let r = unsafe { htslib::hts_set_threads(self.hts_file, n_threads as i32) };
250 if r != 0 {
251 Err(Error::SetThreads)
252 } else {
253 Ok(())
254 }
255 }
256
257 pub fn hts_format(&self) -> htslib::htsExactFormat {
258 self.hts_format
259 }
260}
261
262fn overlap(tid1: i64, begin1: i64, end1: i64, tid2: i64, begin2: i64, end2: i64) -> bool {
264 (tid1 == tid2) && (begin1 < end2) && (begin2 < end1)
265}
266
267impl Read for Reader {
268 fn read(&mut self, record: &mut Vec<u8>) -> Result<bool> {
269 match self.itr {
270 Some(itr) => {
271 loop {
272 let ret = unsafe {
274 htslib::hts_itr_next(
275 htslib::hts_get_bgzfp(self.hts_file),
276 itr,
277 &mut self.buf as *mut htslib::kstring_t as *mut libc::c_void,
279 self.tbx as *mut libc::c_void,
281 )
282 };
283 if ret == -1 {
285 return Ok(false);
286 } else if ret == -2 {
287 return Err(Error::TabixTruncatedRecord);
288 } else if ret < 0 {
289 panic!("Return value should not be <0 but was: {}", ret);
290 }
291 let (tid, start, end) =
294 unsafe { ((*itr).curr_tid, (*itr).curr_beg, (*itr).curr_end) };
295 if overlap(self.tid, self.start, self.end, tid as i64, start, end) {
297 *record =
298 unsafe { Vec::from(ffi::CStr::from_ptr(self.buf.s).to_str().unwrap()) };
299 return Ok(true);
300 }
301 }
302 }
303 _ => Err(Error::TabixNoIter),
304 }
305 }
306
307 fn records(&mut self) -> Records<'_, Self> {
308 Records { reader: self }
309 }
310
311 fn header(&self) -> &Vec<String> {
312 &self.header
313 }
314}
315
316impl Drop for Reader {
317 fn drop(&mut self) {
318 unsafe {
319 if self.itr.is_some() {
320 htslib::hts_itr_destroy(self.itr.unwrap());
321 }
322 htslib::tbx_destroy(self.tbx);
323 htslib::hts_close(self.hts_file);
324 }
325 }
326}
327
328#[derive(Debug)]
330pub struct Records<'a, R: Read> {
331 reader: &'a mut R,
332}
333
334impl<R: Read> Iterator for Records<'_, R> {
335 type Item = Result<Vec<u8>>;
336
337 #[allow(clippy::read_zero_byte_vec)]
338 fn next(&mut self) -> Option<Result<Vec<u8>>> {
339 let mut record = Vec::new();
340 match self.reader.read(&mut record) {
341 Ok(false) => None,
342 Ok(true) => Some(Ok(record)),
343 Err(err) => Some(Err(err)),
344 }
345 }
346}
347
348#[cfg(test)]
349mod tests {
350 use super::*;
351
352 #[test]
353 fn bed_basic() {
354 let reader =
355 Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
356
357 assert_eq!(
359 reader.seqnames(),
360 vec![String::from("chr1"), String::from("chr2")]
361 );
362
363 assert_eq!(reader.tid("chr1").unwrap(), 0);
365 assert_eq!(reader.tid("chr2").unwrap(), 1);
366 assert!(reader.tid("chr3").is_err());
367 }
368
369 #[test]
370 fn bed_fetch_from_chr1_read_api() {
371 let mut reader =
372 Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
373
374 let chr1_id = reader.tid("chr1").unwrap();
375 assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
376
377 let mut record = Vec::new();
378 assert!(reader.read(&mut record).is_ok());
379 assert_eq!(record, Vec::from("chr1\t1001\t1002"));
380 assert_eq!(reader.read(&mut record), Ok(false)); }
382
383 #[test]
384 fn bed_fetch_from_chr1_iterator_api() {
385 let mut reader =
386 Reader::from_path("test/tabix_reader/test_bed3.bed.gz").expect("Error opening file.");
387
388 let chr1_id = reader.tid("chr1").unwrap();
389 assert!(reader.fetch(chr1_id, 1000, 1003).is_ok());
390
391 let records: Vec<Vec<u8>> = reader.records().map(|r| r.unwrap()).collect();
392 assert_eq!(records, vec![Vec::from("chr1\t1001\t1002")]);
393 }
394
395 #[test]
396 fn test_fails_on_bam() {
397 let reader = Reader::from_path("test/test.bam");
398 assert!(reader.is_err());
399 }
400
401 #[test]
402 fn test_fails_on_non_existiant() {
403 let reader = Reader::from_path("test/no_such_file");
404 assert!(reader.is_err());
405 }
406
407 #[test]
408 fn test_fails_on_vcf() {
409 let reader = Reader::from_path("test/test_left.vcf");
410 assert!(reader.is_err());
411 }
412
413 #[test]
414 fn test_text_header_regions() {
415 Reader::from_path("test/tabix_reader/genomic_regions_header.txt.gz")
417 .expect("Error opening file.");
418 }
419
420 #[test]
421 fn test_text_header_positions() {
422 Reader::from_path("test/tabix_reader/genomic_positions_header.txt.gz")
425 .expect("Error opening file.");
426 }
427
428 #[test]
429 fn test_text_bad_header() {
430 Reader::from_path("test/tabix_reader/bad_header.txt.gz")
432 .expect_err("Invalid index file should fail.");
433 }
434}