1use std::ffi;
11use std::path::Path;
12use url::Url;
13
14use crate::htslib;
15
16use crate::errors::{Error, Result};
17use crate::utils::path_as_bytes;
18
19#[derive(Debug)]
21pub struct Reader {
22 inner: *mut htslib::faidx_t,
23}
24
25pub fn build(
38 path: impl Into<std::path::PathBuf>,
39) -> Result<(), std::boxed::Box<dyn std::error::Error>> {
40 let path = path.into();
41 let os_path = std::ffi::CString::new(path.display().to_string())?;
42 let rc = unsafe { htslib::fai_build(os_path.as_ptr()) };
43 if rc < 0 {
44 Err(Error::FaidxBuildFailed { path })?
45 } else {
46 Ok(())
47 }
48}
49
50impl Reader {
51 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
57 Self::new(&path_as_bytes(path, true)?)
58 }
59
60 pub fn from_url(url: &Url) -> Result<Self, Error> {
66 Self::new(url.as_str().as_bytes())
67 }
68
69 fn new(path: &[u8]) -> Result<Self, Error> {
76 let cpath = ffi::CString::new(path).unwrap();
77 let inner = unsafe { htslib::fai_load(cpath.as_ptr()) };
78 Ok(Self { inner })
79 }
80
81 pub fn fetch_seq<N: AsRef<str>>(&self, name: N, begin: usize, end: usize) -> Result<Vec<u8>> {
89 if begin > i64::MAX as usize {
90 return Err(Error::FaidxPositionTooLarge);
91 }
92 if end > i64::MAX as usize {
93 return Err(Error::FaidxPositionTooLarge);
94 }
95 let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
96 let mut len_out: htslib::hts_pos_t = 0;
97 let ptr = unsafe {
98 htslib::faidx_fetch_seq64(
99 self.inner, cname.as_ptr(), begin as htslib::hts_pos_t, end as htslib::hts_pos_t, &mut len_out, )
105 };
106 let vec =
107 unsafe { Vec::from_raw_parts(ptr as *mut u8, len_out as usize, len_out as usize) };
108 Ok(vec)
109 }
110
111 pub fn fetch_seq_string<N: AsRef<str>>(
119 &self,
120 name: N,
121 begin: usize,
122 end: usize,
123 ) -> Result<String> {
124 let bytes = self.fetch_seq(name, begin, end)?;
125 Ok(std::str::from_utf8(&bytes).unwrap().to_owned())
126 }
127
128 pub fn n_seqs(&self) -> u64 {
130 let n = unsafe { htslib::faidx_nseq(self.inner) };
131 n as u64
132 }
133
134 pub fn seq_name(&self, i: i32) -> Result<String> {
140 let cname = unsafe {
141 let ptr = htslib::faidx_iseq(self.inner, i);
142 ffi::CStr::from_ptr(ptr)
143 };
144
145 let out = match cname.to_str() {
146 Ok(s) => s.to_string(),
147 Err(_) => {
148 return Err(Error::FaidxBadSeqName);
149 }
150 };
151
152 Ok(out)
153 }
154
155 pub fn fetch_seq_len<N: AsRef<str>>(&self, name: N) -> u64 {
161 let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
162 let seq_len = unsafe { htslib::faidx_seq_len(self.inner, cname.as_ptr()) };
163 seq_len as u64
164 }
165
166 pub fn seq_names(&self) -> Result<Vec<String>> {
181 let num_seq = self.n_seqs();
182 let mut ret = Vec::with_capacity(num_seq as usize);
183 for seq_id in 0..num_seq {
184 ret.push(self.seq_name(seq_id as i32)?);
185 }
186 Ok(ret)
187 }
188}
189
190impl Drop for Reader {
191 fn drop(&mut self) {
192 unsafe {
193 htslib::fai_destroy(self.inner);
194 }
195 }
196}
197
198unsafe impl Send for Reader {}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203
204 fn open_reader() -> Reader {
205 Reader::from_path(format!("{}/test/test_cram.fa", env!("CARGO_MANIFEST_DIR")))
206 .ok()
207 .unwrap()
208 }
209 #[test]
210 fn faidx_open() {
211 open_reader();
212 }
213
214 #[test]
215 fn faidx_read_chr_first_base() {
216 let r = open_reader();
217
218 let bseq = r.fetch_seq("chr1", 0, 0).unwrap();
219 assert_eq!(bseq.len(), 1);
220 assert_eq!(bseq, b"G");
221
222 let seq = r.fetch_seq_string("chr1", 0, 0).unwrap();
223 assert_eq!(seq.len(), 1);
224 assert_eq!(seq, "G");
225 }
226
227 #[test]
228 fn faidx_read_chr_start() {
229 let r = open_reader();
230
231 let bseq = r.fetch_seq("chr1", 0, 9).unwrap();
233 assert_eq!(bseq.len(), 10);
234 assert_eq!(bseq, b"GGGCACAGCC");
235
236 let seq = r.fetch_seq_string("chr1", 0, 9).unwrap();
237 assert_eq!(seq.len(), 10);
238 assert_eq!(seq, "GGGCACAGCC");
239 }
241
242 #[test]
243 fn faidx_read_chr_between() {
244 let r = open_reader();
245
246 let bseq = r.fetch_seq("chr1", 4, 14).unwrap();
247 assert_eq!(bseq.len(), 11);
248 assert_eq!(bseq, b"ACAGCCTCACC");
249
250 let seq = r.fetch_seq_string("chr1", 4, 14).unwrap();
251 assert_eq!(seq.len(), 11);
252 assert_eq!(seq, "ACAGCCTCACC");
253 }
254
255 #[test]
256 fn faidx_read_chr_end() {
257 let r = open_reader();
258
259 let bseq = r.fetch_seq("chr1", 110, 120).unwrap();
260 assert_eq!(bseq.len(), 10);
261 assert_eq!(bseq, b"CCCCTCCGTG");
262
263 let seq = r.fetch_seq_string("chr1", 110, 120).unwrap();
264 assert_eq!(seq.len(), 10);
265 assert_eq!(seq, "CCCCTCCGTG");
266 }
267
268 #[test]
269 fn faidx_read_twice_string() {
270 let r = open_reader();
271 let seq = r.fetch_seq_string("chr1", 110, 120).unwrap();
272 assert_eq!(seq.len(), 10);
273 assert_eq!(seq, "CCCCTCCGTG");
274
275 let seq = r.fetch_seq_string("chr1", 5, 9).unwrap();
276 assert_eq!(seq.len(), 5);
277 assert_eq!(seq, "CAGCC");
278 }
279
280 #[test]
281 fn faidx_read_twice_bytes() {
282 let r = open_reader();
283 let seq = r.fetch_seq("chr1", 110, 120).unwrap();
284 assert_eq!(seq.len(), 10);
285 assert_eq!(seq, b"CCCCTCCGTG");
286
287 let seq = r.fetch_seq("chr1", 5, 9).unwrap();
288 assert_eq!(seq.len(), 5);
289 assert_eq!(seq, b"CAGCC");
290 }
291
292 #[test]
293 fn faidx_position_too_large() {
294 let r = open_reader();
295 let position_too_large = i64::MAX as usize;
296 let res = r.fetch_seq("chr1", position_too_large, position_too_large + 1);
297 assert_eq!(res, Err(Error::FaidxPositionTooLarge));
298 }
299
300 #[test]
301 fn faidx_n_seqs() {
302 let r = open_reader();
303 assert_eq!(r.n_seqs(), 3);
304 }
305
306 #[test]
307 fn faidx_seq_name() {
308 let r = open_reader();
309 let n = r.seq_name(1).unwrap();
310 assert_eq!(n, "chr2");
311 }
312
313 #[test]
314 fn faidx_get_seq_len() {
315 let r = open_reader();
316 let chr1_len = r.fetch_seq_len("chr1");
317 let chr2_len = r.fetch_seq_len("chr2");
318 assert_eq!(chr1_len, 120u64);
319 assert_eq!(chr2_len, 120u64);
320 }
321
322 #[test]
323 fn open_many_readers() {
324 for _ in 0..500_000 {
325 let reader = open_reader();
326 drop(reader);
327 }
328 }
329}