d4_hts/alignment/
bamfile.rs

1use std::cell::Cell;
2use std::ffi::{c_void, c_char};
3use std::ffi::{CStr, CString};
4use std::os::unix::ffi::OsStrExt;
5use std::path::{Path, PathBuf};
6use std::ptr::null_mut;
7
8use super::error::AlignmentError;
9use super::htslib::*;
10use super::{Alignment, AlignmentReader};
11
12/// A BAM/CRAM/SAM File
13pub struct BamFile {
14    chrom_list: Vec<(String, usize)>,
15    fp: *mut htsFile,
16    hdr: *mut bam_hdr_t,
17    idx: *mut hts_idx_t,
18    mp_size_limit: usize,
19    mp_free: Cell<Vec<*mut bam1_t>>,
20    path: Box<PathBuf>,
21}
22
23impl Drop for BamFile {
24    fn drop(&mut self) {
25        if !self.idx.is_null() {
26            unsafe {
27                hts_idx_destroy(self.idx);
28            }
29            self.idx = null_mut();
30        }
31
32        if !self.hdr.is_null() {
33            unsafe {
34                bam_hdr_destroy(self.hdr);
35            }
36            self.hdr = null_mut();
37        }
38
39        if !self.fp.is_null() {
40            unsafe { hts_close(self.fp) };
41            self.fp = null_mut();
42        }
43
44        let bams = self.mp_free.replace(vec![]);
45
46        for bam in bams {
47            unsafe { bam_destroy1(bam) };
48        }
49    }
50}
51
52impl BamFile {
53    pub fn set_required_fields(&mut self, flag: u32) {
54        unsafe {
55            hts_set_opt(self.fp, hts_fmt_option_CRAM_OPT_REQUIRED_FIELDS, flag);
56        }
57    }
58    /// Set the path to the reference FAI file. Only used for CRAM
59    pub fn reference_path<P: AsRef<Path>>(&self, path: P) {
60        unsafe {
61            let path_buf = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap();
62            hts_set_fai_filename(self.fp, path_buf.as_ptr());
63        }
64    }
65    /// Open a BAM/CRAM/SAM file on the disk
66    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, AlignmentError> {
67        let mut ret = BamFile {
68            path: Box::new(path.as_ref().to_path_buf()),
69            chrom_list: vec![],
70            fp: null_mut(),
71            hdr: null_mut(),
72            idx: null_mut(),
73            mp_free: Cell::new(vec![]),
74            mp_size_limit: 20,
75        };
76
77        ret.fp = unsafe {
78            let path_buf = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap();
79            let mod_buf = CString::new("rb").unwrap();
80            let ptr = hts_open(path_buf.as_ptr(), mod_buf.as_ptr());
81            if ptr.is_null() {
82                return Err((-1).into());
83            }
84            ptr
85        };
86
87        ret.hdr = unsafe { sam_hdr_read(ret.fp) };
88
89        if ret.hdr.is_null() {
90            return Err((-1).into());
91        }
92
93        let raw_names = unsafe {
94            std::slice::from_raw_parts((*ret.hdr).target_name, (*ret.hdr).n_targets as usize)
95        };
96
97        let sizes = unsafe {
98            std::slice::from_raw_parts((*ret.hdr).target_len, (*ret.hdr).n_targets as usize)
99        };
100
101        for (size, raw_name) in sizes.iter().zip(raw_names) {
102            let raw_name = unsafe { CStr::from_ptr(*raw_name as *const c_char) };
103
104            ret.chrom_list
105                .push((raw_name.to_string_lossy().to_string(), *size as usize));
106        }
107
108        Ok(ret)
109    }
110
111    pub fn chroms(&self) -> &[(String, usize)] {
112        &self.chrom_list[..]
113    }
114
115    pub(super) fn alloc_inner_obj(&self) -> Result<*mut bam1_t, AlignmentError> {
116        let ret;
117
118        let mut cur_list = self.mp_free.replace(vec![]);
119
120        if cur_list.is_empty() {
121            ret = unsafe { bam_init1() };
122            if ret.is_null() {
123                self.mp_free.replace(cur_list);
124                return Err((-1).into());
125            }
126        } else {
127            ret = cur_list.pop().unwrap();
128        }
129
130        self.mp_free.replace(cur_list);
131        Ok(ret)
132    }
133
134    pub(super) fn free_inner_obj(&self, obj: *mut bam1_t) {
135        if obj.is_null() {
136            return;
137        }
138
139        let mut cur_list = self.mp_free.replace(vec![]);
140
141        if cur_list.len() >= self.mp_size_limit {
142            unsafe {
143                bam_destroy1(obj);
144            }
145            return;
146        }
147
148        cur_list.push(obj);
149
150        self.mp_free.replace(cur_list);
151    }
152
153    pub fn range(
154        &mut self,
155        chrom: &str,
156        from: usize,
157        to: usize,
158    ) -> Result<Ranged<'_>, AlignmentError> {
159        if self.idx.is_null() {
160            self.idx = unsafe {
161                let path_buf = CString::new(self.path.as_path().as_os_str().as_bytes()).unwrap();
162                sam_index_load(self.fp, path_buf.as_ptr())
163            };
164            if self.idx.is_null() {
165                return Err((-1).into());
166            }
167        }
168
169        let mut chrom_iter = self.chrom_list[..].iter().zip(0..);
170        let chrom = loop {
171            if let Some(((name, _), idx)) = chrom_iter.next() {
172                if name == chrom {
173                    break idx;
174                }
175            } else {
176                return Err(AlignmentError::BadPosition);
177            }
178        };
179
180        let iter = unsafe {
181            sam_itr_queryi(
182                self.idx,
183                chrom as i32,
184                (from as i32).into(),
185                (to as i32).into(),
186            )
187        };
188
189        if iter.is_null() {
190            return Err((-1).into());
191        }
192
193        Ok(Ranged {
194            file: self,
195            iter,
196            start: from as u32,
197            chrom,
198        })
199    }
200}
201
202pub struct Ranged<'a> {
203    chrom: u32,
204    start: u32,
205    file: &'a BamFile,
206    iter: *mut hts_itr_t,
207}
208
209impl<'a> Drop for Ranged<'a> {
210    fn drop(&mut self) {
211        if !self.iter.is_null() {
212            unsafe { hts_itr_destroy(self.iter) };
213            self.iter = null_mut();
214        }
215    }
216}
217
218impl<'a> AlignmentReader<'a> for &'a BamFile {
219    fn start(&self) -> (u32, u32) {
220        (0, 0)
221    }
222    fn get_file(&self) -> &'a BamFile {
223        *self
224    }
225
226    #[allow(clippy::not_unsafe_ptr_arg_deref)]
227    fn next(&self, buf: *mut bam1_t) -> Result<Option<Alignment<'a>>, AlignmentError> {
228        let rc = unsafe { sam_read1(self.fp, self.hdr, buf) };
229
230        if rc > 0 {
231            return Ok(Some(Alignment::new(buf, self)));
232        }
233
234        if rc == 0 {
235            return Ok(None);
236        }
237
238        Err(rc.into())
239    }
240}
241
242impl<'a> AlignmentReader<'a> for Ranged<'a> {
243    fn start(&self) -> (u32, u32) {
244        (self.chrom, self.start)
245    }
246    fn get_file(&self) -> &'a BamFile {
247        self.file
248    }
249
250    fn next(&self, buf: *mut bam1_t) -> Result<Option<Alignment<'a>>, AlignmentError> {
251        let rc = unsafe {
252            hts_itr_next(
253                (*self.file.fp).fp.bgzf,
254                self.iter,
255                buf as *mut c_void,
256                self.file.fp as *mut c_void,
257            )
258        };
259
260        if rc > 0 {
261            return Ok(Some(Alignment::new(buf, self.file)));
262        }
263
264        if rc == 0 {
265            return Ok(None);
266        }
267
268        Err(rc.into())
269    }
270}