1use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::rc::Rc;
15use std::slice;
16use std::str;
17
18use byteorder::{LittleEndian, ReadBytesExt};
19
20use crate::bam::Error;
21use crate::bam::HeaderView;
22use crate::errors::Result;
23use crate::htslib;
24use crate::utils;
25#[cfg(feature = "serde_feature")]
26use serde::{self, Deserialize, Serialize};
27
28use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
29use bio_types::genome;
30use bio_types::sequence::SequenceRead;
31use bio_types::sequence::SequenceReadPairOrientation;
32use bio_types::strand::ReqStrand;
33
34macro_rules! flag {
36 ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
37 pub fn $get(&self) -> bool {
38 self.inner().core.flag & $bit != 0
39 }
40
41 pub fn $set(&mut self) {
42 self.inner_mut().core.flag |= $bit;
43 }
44
45 pub fn $unset(&mut self) {
46 self.inner_mut().core.flag &= !$bit;
47 }
48 };
49}
50
51pub struct Record {
53 pub inner: htslib::bam1_t,
54 own: bool,
55 cigar: Option<CigarStringView>,
56 header: Option<Rc<HeaderView>>,
57}
58
59unsafe impl Send for Record {}
60unsafe impl Sync for Record {}
61
62impl Clone for Record {
63 fn clone(&self) -> Self {
64 let mut copy = Record::new();
65 unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
66 copy
67 }
68}
69
70impl PartialEq for Record {
71 fn eq(&self, other: &Record) -> bool {
72 self.tid() == other.tid()
73 && self.pos() == other.pos()
74 && self.bin() == other.bin()
75 && self.mapq() == other.mapq()
76 && self.flags() == other.flags()
77 && self.mtid() == other.mtid()
78 && self.mpos() == other.mpos()
79 && self.insert_size() == other.insert_size()
80 && self.data() == other.data()
81 && self.inner().core.l_extranul == other.inner().core.l_extranul
82 }
83}
84
85impl Eq for Record {}
86
87impl fmt::Debug for Record {
88 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
89 fmt.write_fmt(format_args!(
90 "Record(tid: {}, pos: {})",
91 self.tid(),
92 self.pos()
93 ))
94 }
95}
96
97impl Default for Record {
98 fn default() -> Self {
99 Self::new()
100 }
101}
102
103#[inline]
104fn extranul_from_qname(qname: &[u8]) -> usize {
105 let qlen = qname.len() + 1;
106 if !qlen.is_multiple_of(4) {
107 4 - qlen % 4
108 } else {
109 0
110 }
111}
112
113impl Record {
114 pub fn new() -> Self {
116 let mut record = Record {
117 inner: unsafe { MaybeUninit::zeroed().assume_init() },
118 own: true,
119 cigar: None,
120 header: None,
121 };
122 record.set_qname(b"");
125 record.set_unmapped();
128 record.set_tid(-1);
129 record.set_pos(-1);
130 record.set_mpos(-1);
131 record.set_mtid(-1);
132 record
133 }
134
135 pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
136 Record {
137 inner: {
138 #[allow(clippy::uninit_assumed_init, invalid_value)]
139 let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
140 unsafe {
141 ::libc::memcpy(
142 &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
143 from as *const ::libc::c_void,
144 size_of::<htslib::bam1_t>(),
145 );
146 }
147 inner
148 },
149 own: false,
150 cigar: None,
151 header: None,
152 }
153 }
154
155 pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
157 let mut record = Self::new();
158
159 let mut sam_copy = Vec::with_capacity(sam.len() + 1);
160 sam_copy.extend(sam);
161 sam_copy.push(0);
162
163 let mut sam_string = htslib::kstring_t {
164 s: sam_copy.as_ptr() as *mut c_char,
165 l: sam_copy.len(),
166 m: sam_copy.len(),
167 };
168
169 let succ = unsafe {
170 htslib::sam_parse1(
171 &mut sam_string,
172 header_view.inner_ptr() as *mut htslib::bam_hdr_t,
173 record.inner_ptr_mut(),
174 )
175 };
176
177 if succ == 0 {
178 Ok(record)
179 } else {
180 Err(Error::BamParseSAM {
181 rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
182 })
183 }
184 }
185
186 pub fn set_header(&mut self, header: Rc<HeaderView>) {
187 self.header = Some(header);
188 }
189
190 pub(super) fn data(&self) -> &[u8] {
191 unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
192 }
193
194 #[inline]
195 pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
196 &mut self.inner
197 }
198
199 #[inline]
200 pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
201 &mut self.inner as *mut htslib::bam1_t
202 }
203
204 #[inline]
205 pub fn inner(&self) -> &htslib::bam1_t {
206 &self.inner
207 }
208
209 #[inline]
210 pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
211 &self.inner as *const htslib::bam1_t
212 }
213
214 pub fn tid(&self) -> i32 {
216 self.inner().core.tid
217 }
218
219 pub fn set_tid(&mut self, tid: i32) {
221 self.inner_mut().core.tid = tid;
222 }
223
224 pub fn pos(&self) -> i64 {
226 self.inner().core.pos
227 }
228
229 pub fn set_pos(&mut self, pos: i64) {
231 self.inner_mut().core.pos = pos;
232 }
233
234 pub fn bin(&self) -> u16 {
235 self.inner().core.bin
236 }
237
238 pub fn set_bin(&mut self, bin: u16) {
239 self.inner_mut().core.bin = bin;
240 }
241
242 pub fn mapq(&self) -> u8 {
244 self.inner().core.qual
245 }
246
247 pub fn set_mapq(&mut self, mapq: u8) {
249 self.inner_mut().core.qual = mapq;
250 }
251
252 pub fn strand(&self) -> ReqStrand {
254 let reverse = self.flags() & 0x10 != 0;
255 if reverse {
256 ReqStrand::Reverse
257 } else {
258 ReqStrand::Forward
259 }
260 }
261
262 pub fn flags(&self) -> u16 {
264 self.inner().core.flag
265 }
266
267 pub fn set_flags(&mut self, flags: u16) {
269 self.inner_mut().core.flag = flags;
270 }
271
272 pub fn unset_flags(&mut self) {
274 self.inner_mut().core.flag = 0;
275 }
276
277 pub fn mtid(&self) -> i32 {
279 self.inner().core.mtid
280 }
281
282 pub fn set_mtid(&mut self, mtid: i32) {
284 self.inner_mut().core.mtid = mtid;
285 }
286
287 pub fn mpos(&self) -> i64 {
289 self.inner().core.mpos
290 }
291
292 pub fn set_mpos(&mut self, mpos: i64) {
294 self.inner_mut().core.mpos = mpos;
295 }
296
297 pub fn insert_size(&self) -> i64 {
299 self.inner().core.isize_
300 }
301
302 pub fn set_insert_size(&mut self, insert_size: i64) {
304 self.inner_mut().core.isize_ = insert_size;
305 }
306
307 fn qname_capacity(&self) -> usize {
308 self.inner().core.l_qname as usize
309 }
310
311 fn qname_len(&self) -> usize {
312 self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
314 }
315
316 pub fn qname(&self) -> &[u8] {
318 &self.data()[..self.qname_len()]
319 }
320
321 pub fn set_data(&mut self, new_data: &[u8]) {
323 self.cigar = None;
324
325 self.inner_mut().l_data = new_data.len() as i32;
326 if (self.inner().m_data as i32) < self.inner().l_data {
327 let l_data = self.inner().l_data;
329 self.realloc_var_data(l_data as usize);
330 }
331
332 let data =
334 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
335 utils::copy_memory(new_data, data);
336 }
337
338 pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
346 assert!(qname.len() < 255);
347 assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
348
349 self.cigar = None;
350
351 let cigar_width = if let Some(cigar_string) = cigar {
352 cigar_string.len()
353 } else {
354 0
355 } * 4;
356 let q_len = qname.len() + 1;
357 let extranul = extranul_from_qname(qname);
358
359 let orig_aux_offset = self.qname_capacity()
360 + 4 * self.cigar_len()
361 + self.seq_len().div_ceil(2)
362 + self.seq_len();
363 let new_aux_offset = q_len + extranul + cigar_width + seq.len().div_ceil(2) + qual.len();
364 assert!(orig_aux_offset <= self.inner.l_data as usize);
365 let aux_len = self.inner.l_data as usize - orig_aux_offset;
366 self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
367 if (self.inner().m_data as i32) < self.inner().l_data {
368 let l_data = self.inner().l_data;
370 self.realloc_var_data(l_data as usize);
371 }
372
373 if aux_len > 0 && orig_aux_offset != new_aux_offset {
375 let data =
376 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
377 data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
378 }
379
380 let data =
381 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
382
383 utils::copy_memory(qname, data);
385 for i in 0..=extranul {
386 data[qname.len() + i] = b'\0';
387 }
388 let mut i = q_len + extranul;
389 self.inner_mut().core.l_qname = i as u16;
390 self.inner_mut().core.l_extranul = extranul as u8;
391
392 if let Some(cigar_string) = cigar {
394 let cigar_data = unsafe {
395 #[allow(clippy::cast_ptr_alignment)]
397 slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
398 };
399 for (i, c) in cigar_string.iter().enumerate() {
400 cigar_data[i] = c.encode();
401 }
402 self.inner_mut().core.n_cigar = cigar_string.len() as u32;
403 i += cigar_string.len() * 4;
404 } else {
405 self.inner_mut().core.n_cigar = 0;
406 };
407
408 {
410 for j in (0..seq.len()).step_by(2) {
411 data[i + j / 2] = (ENCODE_BASE[seq[j] as usize] << 4)
412 | (if j + 1 < seq.len() {
413 ENCODE_BASE[seq[j + 1] as usize]
414 } else {
415 0
416 });
417 }
418 self.inner_mut().core.l_qseq = seq.len() as i32;
419 i += seq.len().div_ceil(2);
420 }
421
422 utils::copy_memory(qual, &mut data[i..]);
424 }
425
426 pub fn set_qname(&mut self, new_qname: &[u8]) {
428 assert!(new_qname.len() < 252);
430
431 let old_q_len = self.qname_capacity();
432 let extranul = extranul_from_qname(new_qname);
434 let new_q_len = new_qname.len() + 1 + extranul;
435
436 let other_len = self.inner_mut().l_data - old_q_len as i32;
438
439 if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
440 self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
441 } else if new_q_len > old_q_len {
442 self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
443
444 if (self.inner().m_data as i32) < self.inner().l_data {
446 let l_data = self.inner().l_data;
448 self.realloc_var_data(l_data as usize);
449 }
450 }
451
452 if new_q_len != old_q_len {
453 unsafe {
455 let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
456
457 ::libc::memmove(
458 data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
459 data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
460 other_len as usize,
461 );
462 }
463 }
464
465 let data =
467 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
468 utils::copy_memory(new_qname, data);
469 for i in 0..=extranul {
470 data[new_q_len - i - 1] = b'\0';
471 }
472 self.inner_mut().core.l_qname = new_q_len as u16;
473 self.inner_mut().core.l_extranul = extranul as u8;
474 }
475
476 pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
478 self.cigar = None;
479
480 let qname_data_len = self.qname_capacity();
481 let old_cigar_data_len = self.cigar_len() * 4;
482
483 let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
485
486 let new_cigar_len = match new_cigar {
487 Some(x) => x.len(),
488 None => 0,
489 };
490 let new_cigar_data_len = new_cigar_len * 4;
491
492 if new_cigar_data_len < old_cigar_data_len {
493 self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
494 } else if new_cigar_data_len > old_cigar_data_len {
495 self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
496
497 if (self.inner().m_data as i32) < self.inner().l_data {
499 let l_data = self.inner().l_data;
501 self.realloc_var_data(l_data as usize);
502 }
503 }
504
505 if new_cigar_data_len != old_cigar_data_len {
506 unsafe {
508 ::libc::memmove(
509 self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
510 self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
511 other_data_len as usize,
512 );
513 }
514 }
515
516 if let Some(cigar_string) = new_cigar {
518 let cigar_data = unsafe {
519 #[allow(clippy::cast_ptr_alignment)]
520 slice::from_raw_parts_mut(
521 self.inner.data.add(qname_data_len) as *mut u32,
522 cigar_string.len(),
523 )
524 };
525 for (i, c) in cigar_string.iter().enumerate() {
526 cigar_data[i] = c.encode();
527 }
528 }
529 self.inner_mut().core.n_cigar = new_cigar_len as u32;
530 }
531
532 fn realloc_var_data(&mut self, new_len: usize) {
533 let new_len = new_len as u32;
535 let new_request = new_len + 32 - (new_len % 32);
536
537 let ptr = unsafe {
538 ::libc::realloc(
539 self.inner().data as *mut ::libc::c_void,
540 new_request as usize,
541 ) as *mut u8
542 };
543
544 if ptr.is_null() {
545 panic!("ran out of memory in rust_htslib trying to realloc");
546 }
547
548 self.inner_mut().m_data = new_request;
551 self.inner_mut().data = ptr;
552
553 self.own = true;
555 }
556
557 pub fn cigar_len(&self) -> usize {
558 self.inner().core.n_cigar as usize
559 }
560
561 pub fn raw_cigar(&self) -> &[u32] {
564 #[allow(clippy::cast_ptr_alignment)]
566 unsafe {
567 slice::from_raw_parts(
568 self.data()[self.qname_capacity()..].as_ptr() as *const u32,
569 self.cigar_len(),
570 )
571 }
572 }
573
574 pub fn cigar(&self) -> CigarStringView {
576 match self.cigar {
577 Some(ref c) => c.clone(),
578 None => self.unpack_cigar(),
579 }
580 }
581
582 pub fn cigar_cached(&self) -> Option<&CigarStringView> {
584 self.cigar.as_ref()
585 }
586
587 pub fn cache_cigar(&mut self) {
589 self.cigar = Some(self.unpack_cigar())
590 }
591
592 fn unpack_cigar(&self) -> CigarStringView {
594 CigarString(
595 self.raw_cigar()
596 .iter()
597 .map(|&c| {
598 let len = c >> 4;
599 match c & 0b1111 {
600 0 => Cigar::Match(len),
601 1 => Cigar::Ins(len),
602 2 => Cigar::Del(len),
603 3 => Cigar::RefSkip(len),
604 4 => Cigar::SoftClip(len),
605 5 => Cigar::HardClip(len),
606 6 => Cigar::Pad(len),
607 7 => Cigar::Equal(len),
608 8 => Cigar::Diff(len),
609 _ => panic!("Unexpected cigar operation"),
610 }
611 })
612 .collect(),
613 )
614 .into_view(self.pos())
615 }
616
617 pub fn seq_len(&self) -> usize {
618 self.inner().core.l_qseq as usize
619 }
620
621 fn seq_data(&self) -> &[u8] {
622 let offset = self.qname_capacity() + self.cigar_len() * 4;
623 &self.data()[offset..][..self.seq_len().div_ceil(2)]
624 }
625
626 pub fn seq(&self) -> Seq<'_> {
628 Seq {
629 encoded: self.seq_data(),
630 len: self.seq_len(),
631 }
632 }
633
634 pub fn qual(&self) -> &[u8] {
638 &self.data()[self.qname_capacity() + self.cigar_len() * 4 + self.seq_len().div_ceil(2)..]
639 [..self.seq_len()]
640 }
641
642 pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
647 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
648 let aux = unsafe {
649 htslib::bam_aux_get(
650 &self.inner as *const htslib::bam1_t,
651 c_str.as_ptr() as *mut c_char,
652 )
653 };
654 unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
655 }
656
657 unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
658 const TAG_LEN: isize = 2;
659 const TYPE_ID_LEN: isize = 1;
661
662 if aux.is_null() {
663 return Err(Error::BamAuxTagNotFound);
664 }
665
666 let (data, type_size) = match *aux {
667 b'A' => {
668 let type_size = size_of::<u8>();
669 (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
670 }
671 b'c' => {
672 let type_size = size_of::<i8>();
673 (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
674 }
675 b'C' => {
676 let type_size = size_of::<u8>();
677 (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
678 }
679 b's' => {
680 let type_size = size_of::<i16>();
681 (
682 Aux::I16(
683 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
684 .read_i16::<LittleEndian>()
685 .map_err(|_| Error::BamAuxParsingError)?,
686 ),
687 type_size,
688 )
689 }
690 b'S' => {
691 let type_size = size_of::<u16>();
692 (
693 Aux::U16(
694 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
695 .read_u16::<LittleEndian>()
696 .map_err(|_| Error::BamAuxParsingError)?,
697 ),
698 type_size,
699 )
700 }
701 b'i' => {
702 let type_size = size_of::<i32>();
703 (
704 Aux::I32(
705 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
706 .read_i32::<LittleEndian>()
707 .map_err(|_| Error::BamAuxParsingError)?,
708 ),
709 type_size,
710 )
711 }
712 b'I' => {
713 let type_size = size_of::<u32>();
714 (
715 Aux::U32(
716 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
717 .read_u32::<LittleEndian>()
718 .map_err(|_| Error::BamAuxParsingError)?,
719 ),
720 type_size,
721 )
722 }
723 b'f' => {
724 let type_size = size_of::<f32>();
725 (
726 Aux::Float(
727 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
728 .read_f32::<LittleEndian>()
729 .map_err(|_| Error::BamAuxParsingError)?,
730 ),
731 type_size,
732 )
733 }
734 b'd' => {
735 let type_size = size_of::<f64>();
736 (
737 Aux::Double(
738 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
739 .read_f64::<LittleEndian>()
740 .map_err(|_| Error::BamAuxParsingError)?,
741 ),
742 type_size,
743 )
744 }
745 b'Z' | b'H' => {
746 let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
747 let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
748 (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
749 }
750 b'B' => {
751 const ARRAY_INNER_TYPE_LEN: isize = 1;
752 const ARRAY_COUNT_LEN: isize = 4;
753
754 let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
756
757 let length =
758 slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
759 .read_u32::<LittleEndian>()
760 .map_err(|_| Error::BamAuxParsingError)? as usize;
761
762 let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
764 b'c' => (
765 Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
766 aux.offset(array_data_offset),
767 length,
768 ))),
769 length,
770 ),
771 b'C' => (
772 Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
773 aux.offset(array_data_offset),
774 length,
775 ))),
776 length,
777 ),
778 b's' => (
779 Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
780 aux.offset(array_data_offset),
781 length * size_of::<i16>(),
782 ))),
783 length * std::mem::size_of::<i16>(),
784 ),
785 b'S' => (
786 Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
787 aux.offset(array_data_offset),
788 length * size_of::<u16>(),
789 ))),
790 length * std::mem::size_of::<u16>(),
791 ),
792 b'i' => (
793 Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
794 aux.offset(array_data_offset),
795 length * size_of::<i32>(),
796 ))),
797 length * std::mem::size_of::<i32>(),
798 ),
799 b'I' => (
800 Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
801 aux.offset(array_data_offset),
802 length * size_of::<u32>(),
803 ))),
804 length * std::mem::size_of::<u32>(),
805 ),
806 b'f' => (
807 Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
808 aux.offset(array_data_offset),
809 length * size_of::<f32>(),
810 ))),
811 length * std::mem::size_of::<f32>(),
812 ),
813 _ => {
814 return Err(Error::BamAuxUnknownType);
815 }
816 };
817 (
818 array_data,
819 ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
821 )
822 }
823 _ => {
824 return Err(Error::BamAuxUnknownType);
825 }
826 };
827
828 Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
830 }
831
832 pub fn aux_iter(&self) -> AuxIter<'_> {
837 AuxIter {
838 aux: &self.data()[
841 self.qname_capacity()
843 + self.cigar_len() * std::mem::size_of::<u32>()
845 + self.seq_len().div_ceil(2)
847 + self.seq_len()..],
849 }
850 }
851
852 pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
854 if self.aux(tag).is_ok() {
858 return Err(Error::BamAuxTagAlreadyPresent);
859 }
860
861 let ctag = tag.as_ptr() as *mut c_char;
862 let ret = unsafe {
863 match value {
864 Aux::Char(v) => htslib::bam_aux_append(
865 self.inner_ptr_mut(),
866 ctag,
867 b'A' as c_char,
868 size_of::<u8>() as i32,
869 [v].as_mut_ptr(),
870 ),
871 Aux::I8(v) => htslib::bam_aux_append(
872 self.inner_ptr_mut(),
873 ctag,
874 b'c' as c_char,
875 size_of::<i8>() as i32,
876 [v].as_mut_ptr() as *mut u8,
877 ),
878 Aux::U8(v) => htslib::bam_aux_append(
879 self.inner_ptr_mut(),
880 ctag,
881 b'C' as c_char,
882 size_of::<u8>() as i32,
883 [v].as_mut_ptr(),
884 ),
885 Aux::I16(v) => htslib::bam_aux_append(
886 self.inner_ptr_mut(),
887 ctag,
888 b's' as c_char,
889 size_of::<i16>() as i32,
890 [v].as_mut_ptr() as *mut u8,
891 ),
892 Aux::U16(v) => htslib::bam_aux_append(
893 self.inner_ptr_mut(),
894 ctag,
895 b'S' as c_char,
896 size_of::<u16>() as i32,
897 [v].as_mut_ptr() as *mut u8,
898 ),
899 Aux::I32(v) => htslib::bam_aux_append(
900 self.inner_ptr_mut(),
901 ctag,
902 b'i' as c_char,
903 size_of::<i32>() as i32,
904 [v].as_mut_ptr() as *mut u8,
905 ),
906 Aux::U32(v) => htslib::bam_aux_append(
907 self.inner_ptr_mut(),
908 ctag,
909 b'I' as c_char,
910 size_of::<u32>() as i32,
911 [v].as_mut_ptr() as *mut u8,
912 ),
913 Aux::Float(v) => htslib::bam_aux_append(
914 self.inner_ptr_mut(),
915 ctag,
916 b'f' as c_char,
917 size_of::<f32>() as i32,
918 [v].as_mut_ptr() as *mut u8,
919 ),
920 Aux::Double(v) => htslib::bam_aux_append(
922 self.inner_ptr_mut(),
923 ctag,
924 b'd' as c_char,
925 size_of::<f64>() as i32,
926 [v].as_mut_ptr() as *mut u8,
927 ),
928 Aux::String(v) => {
929 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
930 htslib::bam_aux_append(
931 self.inner_ptr_mut(),
932 ctag,
933 b'Z' as c_char,
934 (v.len() + 1) as i32,
935 c_str.as_ptr() as *mut u8,
936 )
937 }
938 Aux::HexByteArray(v) => {
939 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
940 htslib::bam_aux_append(
941 self.inner_ptr_mut(),
942 ctag,
943 b'H' as c_char,
944 (v.len() + 1) as i32,
945 c_str.as_ptr() as *mut u8,
946 )
947 }
948 Aux::ArrayI8(aux_array) => match aux_array {
950 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
951 self.inner_ptr_mut(),
952 ctag,
953 b'c',
954 inner.len() as u32,
955 inner.slice.as_ptr() as *mut ::libc::c_void,
956 ),
957 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
958 self.inner_ptr_mut(),
959 ctag,
960 b'c',
961 inner.len() as u32,
962 inner.slice.as_ptr() as *mut ::libc::c_void,
963 ),
964 },
965 Aux::ArrayU8(aux_array) => match aux_array {
966 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
967 self.inner_ptr_mut(),
968 ctag,
969 b'C',
970 inner.len() as u32,
971 inner.slice.as_ptr() as *mut ::libc::c_void,
972 ),
973 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
974 self.inner_ptr_mut(),
975 ctag,
976 b'C',
977 inner.len() as u32,
978 inner.slice.as_ptr() as *mut ::libc::c_void,
979 ),
980 },
981 Aux::ArrayI16(aux_array) => match aux_array {
982 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
983 self.inner_ptr_mut(),
984 ctag,
985 b's',
986 inner.len() as u32,
987 inner.slice.as_ptr() as *mut ::libc::c_void,
988 ),
989 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
990 self.inner_ptr_mut(),
991 ctag,
992 b's',
993 inner.len() as u32,
994 inner.slice.as_ptr() as *mut ::libc::c_void,
995 ),
996 },
997 Aux::ArrayU16(aux_array) => match aux_array {
998 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
999 self.inner_ptr_mut(),
1000 ctag,
1001 b'S',
1002 inner.len() as u32,
1003 inner.slice.as_ptr() as *mut ::libc::c_void,
1004 ),
1005 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1006 self.inner_ptr_mut(),
1007 ctag,
1008 b'S',
1009 inner.len() as u32,
1010 inner.slice.as_ptr() as *mut ::libc::c_void,
1011 ),
1012 },
1013 Aux::ArrayI32(aux_array) => match aux_array {
1014 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1015 self.inner_ptr_mut(),
1016 ctag,
1017 b'i',
1018 inner.len() as u32,
1019 inner.slice.as_ptr() as *mut ::libc::c_void,
1020 ),
1021 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1022 self.inner_ptr_mut(),
1023 ctag,
1024 b'i',
1025 inner.len() as u32,
1026 inner.slice.as_ptr() as *mut ::libc::c_void,
1027 ),
1028 },
1029 Aux::ArrayU32(aux_array) => match aux_array {
1030 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1031 self.inner_ptr_mut(),
1032 ctag,
1033 b'I',
1034 inner.len() as u32,
1035 inner.slice.as_ptr() as *mut ::libc::c_void,
1036 ),
1037 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1038 self.inner_ptr_mut(),
1039 ctag,
1040 b'I',
1041 inner.len() as u32,
1042 inner.slice.as_ptr() as *mut ::libc::c_void,
1043 ),
1044 },
1045 Aux::ArrayFloat(aux_array) => match aux_array {
1046 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1047 self.inner_ptr_mut(),
1048 ctag,
1049 b'f',
1050 inner.len() as u32,
1051 inner.slice.as_ptr() as *mut ::libc::c_void,
1052 ),
1053 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1054 self.inner_ptr_mut(),
1055 ctag,
1056 b'f',
1057 inner.len() as u32,
1058 inner.slice.as_ptr() as *mut ::libc::c_void,
1059 ),
1060 },
1061 }
1062 };
1063
1064 if ret < 0 {
1065 Err(Error::BamAux)
1066 } else {
1067 Ok(())
1068 }
1069 }
1070
1071 pub fn update_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
1073 let ctag = tag.as_ptr() as *mut c_char;
1078 let ret = unsafe {
1079 match value {
1080 Aux::Char(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1081 Aux::I8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1082 Aux::U8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1083 Aux::I16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1084 Aux::U16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1085 Aux::I32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1086 Aux::U32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1087 Aux::Float(v) => htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v),
1088 Aux::Double(v) => {
1090 htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v as f32)
1091 }
1092 Aux::String(v) => {
1093 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
1094 htslib::bam_aux_update_str(
1095 self.inner_ptr_mut(),
1096 ctag,
1097 (v.len() + 1) as i32,
1098 c_str.as_ptr() as *const c_char,
1099 )
1100 }
1101 Aux::HexByteArray(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1102 Aux::ArrayI8(aux_array) => match aux_array {
1104 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1105 self.inner_ptr_mut(),
1106 ctag,
1107 b'c',
1108 inner.len() as u32,
1109 inner.slice.as_ptr() as *mut ::libc::c_void,
1110 ),
1111 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1112 self.inner_ptr_mut(),
1113 ctag,
1114 b'c',
1115 inner.len() as u32,
1116 inner.slice.as_ptr() as *mut ::libc::c_void,
1117 ),
1118 },
1119 Aux::ArrayU8(aux_array) => match aux_array {
1120 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1121 self.inner_ptr_mut(),
1122 ctag,
1123 b'C',
1124 inner.len() as u32,
1125 inner.slice.as_ptr() as *mut ::libc::c_void,
1126 ),
1127 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1128 self.inner_ptr_mut(),
1129 ctag,
1130 b'C',
1131 inner.len() as u32,
1132 inner.slice.as_ptr() as *mut ::libc::c_void,
1133 ),
1134 },
1135 Aux::ArrayI16(aux_array) => match aux_array {
1136 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1137 self.inner_ptr_mut(),
1138 ctag,
1139 b's',
1140 inner.len() as u32,
1141 inner.slice.as_ptr() as *mut ::libc::c_void,
1142 ),
1143 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1144 self.inner_ptr_mut(),
1145 ctag,
1146 b's',
1147 inner.len() as u32,
1148 inner.slice.as_ptr() as *mut ::libc::c_void,
1149 ),
1150 },
1151 Aux::ArrayU16(aux_array) => match aux_array {
1152 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1153 self.inner_ptr_mut(),
1154 ctag,
1155 b'S',
1156 inner.len() as u32,
1157 inner.slice.as_ptr() as *mut ::libc::c_void,
1158 ),
1159 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1160 self.inner_ptr_mut(),
1161 ctag,
1162 b'S',
1163 inner.len() as u32,
1164 inner.slice.as_ptr() as *mut ::libc::c_void,
1165 ),
1166 },
1167 Aux::ArrayI32(aux_array) => match aux_array {
1168 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1169 self.inner_ptr_mut(),
1170 ctag,
1171 b'i',
1172 inner.len() as u32,
1173 inner.slice.as_ptr() as *mut ::libc::c_void,
1174 ),
1175 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1176 self.inner_ptr_mut(),
1177 ctag,
1178 b'i',
1179 inner.len() as u32,
1180 inner.slice.as_ptr() as *mut ::libc::c_void,
1181 ),
1182 },
1183 Aux::ArrayU32(aux_array) => match aux_array {
1184 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1185 self.inner_ptr_mut(),
1186 ctag,
1187 b'I',
1188 inner.len() as u32,
1189 inner.slice.as_ptr() as *mut ::libc::c_void,
1190 ),
1191 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1192 self.inner_ptr_mut(),
1193 ctag,
1194 b'I',
1195 inner.len() as u32,
1196 inner.slice.as_ptr() as *mut ::libc::c_void,
1197 ),
1198 },
1199 Aux::ArrayFloat(aux_array) => match aux_array {
1200 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1201 self.inner_ptr_mut(),
1202 ctag,
1203 b'f',
1204 inner.len() as u32,
1205 inner.slice.as_ptr() as *mut ::libc::c_void,
1206 ),
1207 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1208 self.inner_ptr_mut(),
1209 ctag,
1210 b'f',
1211 inner.len() as u32,
1212 inner.slice.as_ptr() as *mut ::libc::c_void,
1213 ),
1214 },
1215 }
1216 };
1217
1218 if ret < 0 {
1219 Err(Error::BamAux)
1220 } else {
1221 Ok(())
1222 }
1223 }
1224
1225 pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1227 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1228 let aux = unsafe {
1229 htslib::bam_aux_get(
1230 &self.inner as *const htslib::bam1_t,
1231 c_str.as_ptr() as *mut c_char,
1232 )
1233 };
1234 unsafe {
1235 if aux.is_null() {
1236 Err(Error::BamAuxTagNotFound)
1237 } else {
1238 htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1239 Ok(())
1240 }
1241 }
1242 }
1243
1244 pub fn basemods_iter(&self) -> Result<BaseModificationsIter<'_>> {
1276 BaseModificationsIter::new(self)
1277 }
1278
1279 pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter<'_>> {
1283 BaseModificationsPositionIter::new(self)
1284 }
1285
1286 pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1290 if self.is_paired()
1291 && !self.is_unmapped()
1292 && !self.is_mate_unmapped()
1293 && self.tid() == self.mtid()
1294 {
1295 if self.pos() == self.mpos() {
1296 return SequenceReadPairOrientation::None;
1298 }
1299
1300 let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1301 (
1302 self.pos(),
1303 self.mpos(),
1304 !self.is_reverse(),
1305 !self.is_mate_reverse(),
1306 )
1307 } else {
1308 (
1309 self.mpos(),
1310 self.pos(),
1311 !self.is_mate_reverse(),
1312 !self.is_reverse(),
1313 )
1314 };
1315
1316 if pos_1 < pos_2 {
1317 match (fwd_1, fwd_2) {
1318 (true, true) => SequenceReadPairOrientation::F1F2,
1319 (true, false) => SequenceReadPairOrientation::F1R2,
1320 (false, true) => SequenceReadPairOrientation::R1F2,
1321 (false, false) => SequenceReadPairOrientation::R1R2,
1322 }
1323 } else {
1324 match (fwd_2, fwd_1) {
1325 (true, true) => SequenceReadPairOrientation::F2F1,
1326 (true, false) => SequenceReadPairOrientation::F2R1,
1327 (false, true) => SequenceReadPairOrientation::R2F1,
1328 (false, false) => SequenceReadPairOrientation::R2R1,
1329 }
1330 }
1331 } else {
1332 SequenceReadPairOrientation::None
1333 }
1334 }
1335
1336 flag!(is_paired, set_paired, unset_paired, 1u16);
1337 flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1338 flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1339 flag!(
1340 is_mate_unmapped,
1341 set_mate_unmapped,
1342 unset_mate_unmapped,
1343 8u16
1344 );
1345 flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1346 flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1347 flag!(
1348 is_first_in_template,
1349 set_first_in_template,
1350 unset_first_in_template,
1351 64u16
1352 );
1353 flag!(
1354 is_last_in_template,
1355 set_last_in_template,
1356 unset_last_in_template,
1357 128u16
1358 );
1359 flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1360 flag!(
1361 is_quality_check_failed,
1362 set_quality_check_failed,
1363 unset_quality_check_failed,
1364 512u16
1365 );
1366 flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1367 flag!(
1368 is_supplementary,
1369 set_supplementary,
1370 unset_supplementary,
1371 2048u16
1372 );
1373}
1374
1375impl Drop for Record {
1376 fn drop(&mut self) {
1377 if self.own {
1378 unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1379 }
1380 }
1381}
1382
1383impl SequenceRead for Record {
1384 fn name(&self) -> &[u8] {
1385 self.qname()
1386 }
1387
1388 fn base(&self, i: usize) -> u8 {
1389 *decode_base_unchecked(encoded_base(self.seq_data(), i))
1390 }
1391
1392 fn base_qual(&self, i: usize) -> u8 {
1393 self.qual()[i]
1394 }
1395
1396 fn len(&self) -> usize {
1397 self.seq_len()
1398 }
1399
1400 fn is_empty(&self) -> bool {
1401 self.len() == 0
1402 }
1403}
1404
1405impl genome::AbstractInterval for Record {
1406 fn contig(&self) -> &str {
1408 let tid = self.tid();
1409 if tid < 0 {
1410 panic!("invalid tid, must be at least zero");
1411 }
1412 str::from_utf8(
1413 self.header
1414 .as_ref()
1415 .expect(
1416 "header must be set (this is the case if the record has been read from a file)",
1417 )
1418 .tid2name(tid as u32),
1419 )
1420 .expect("unable to interpret contig name as UTF-8")
1421 }
1422
1423 fn range(&self) -> ops::Range<genome::Position> {
1425 let end_pos = self
1426 .cigar_cached()
1427 .expect("cigar has not been cached yet, call cache_cigar() first")
1428 .end_pos() as u64;
1429
1430 if self.pos() < 0 {
1431 panic!("invalid position, must be positive")
1432 }
1433
1434 self.pos() as u64..end_pos
1435 }
1436}
1437
1438#[derive(Debug, PartialEq)]
1493pub enum Aux<'a> {
1494 Char(u8),
1495 I8(i8),
1496 U8(u8),
1497 I16(i16),
1498 U16(u16),
1499 I32(i32),
1500 U32(u32),
1501 Float(f32),
1502 Double(f64), String(&'a str),
1504 HexByteArray(&'a str),
1505 ArrayI8(AuxArray<'a, i8>),
1506 ArrayU8(AuxArray<'a, u8>),
1507 ArrayI16(AuxArray<'a, i16>),
1508 ArrayU16(AuxArray<'a, u16>),
1509 ArrayI32(AuxArray<'a, i32>),
1510 ArrayU32(AuxArray<'a, u32>),
1511 ArrayFloat(AuxArray<'a, f32>),
1512}
1513
1514unsafe impl Send for Aux<'_> {}
1515unsafe impl Sync for Aux<'_> {}
1516
1517pub trait AuxArrayElement: Copy {
1519 fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1520}
1521
1522impl AuxArrayElement for i8 {
1523 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1524 std::io::Cursor::new(bytes).read_i8().ok()
1525 }
1526}
1527impl AuxArrayElement for u8 {
1528 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1529 std::io::Cursor::new(bytes).read_u8().ok()
1530 }
1531}
1532impl AuxArrayElement for i16 {
1533 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1534 std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1535 }
1536}
1537impl AuxArrayElement for u16 {
1538 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1539 std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1540 }
1541}
1542impl AuxArrayElement for i32 {
1543 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1544 std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1545 }
1546}
1547impl AuxArrayElement for u32 {
1548 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1549 std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1550 }
1551}
1552impl AuxArrayElement for f32 {
1553 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1554 std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1555 }
1556}
1557
1558#[derive(Debug)]
1602pub enum AuxArray<'a, T> {
1603 TargetType(AuxArrayTargetType<'a, T>),
1604 RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1605}
1606
1607impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1608where
1609 T: AuxArrayElement + PartialEq,
1610{
1611 fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1612 use AuxArray::*;
1613 match (self, other) {
1614 (TargetType(v), TargetType(v_other)) => v == v_other,
1615 (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1616 (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1617 (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1618 }
1619 }
1620}
1621
1622impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1624where
1625 I: AuxArrayElement,
1626 T: AsRef<[I]> + ?Sized,
1627{
1628 fn from(src: &'a T) -> Self {
1629 AuxArray::TargetType(AuxArrayTargetType {
1630 slice: src.as_ref(),
1631 })
1632 }
1633}
1634
1635impl<'a, T> AuxArray<'a, T>
1636where
1637 T: AuxArrayElement,
1638{
1639 pub fn get(&self, index: usize) -> Option<T> {
1641 match self {
1642 AuxArray::TargetType(v) => v.get(index),
1643 AuxArray::RawLeBytes(v) => v.get(index),
1644 }
1645 }
1646
1647 pub fn len(&self) -> usize {
1649 match self {
1650 AuxArray::TargetType(a) => a.len(),
1651 AuxArray::RawLeBytes(a) => a.len(),
1652 }
1653 }
1654
1655 pub fn is_empty(&self) -> bool {
1657 self.len() == 0
1658 }
1659
1660 pub fn iter(&self) -> AuxArrayIter<'_, T> {
1662 AuxArrayIter {
1663 index: 0,
1664 array: self,
1665 }
1666 }
1667
1668 fn from_bytes(bytes: &'a [u8]) -> Self {
1670 Self::RawLeBytes(AuxArrayRawLeBytes {
1671 slice: bytes,
1672 phantom_data: PhantomData,
1673 })
1674 }
1675}
1676
1677#[doc(hidden)]
1679#[derive(Debug, PartialEq)]
1680pub struct AuxArrayTargetType<'a, T> {
1681 slice: &'a [T],
1682}
1683
1684impl<T> AuxArrayTargetType<'_, T>
1685where
1686 T: AuxArrayElement,
1687{
1688 fn get(&self, index: usize) -> Option<T> {
1689 self.slice.get(index).copied()
1690 }
1691
1692 fn len(&self) -> usize {
1693 self.slice.len()
1694 }
1695}
1696
1697#[doc(hidden)]
1699#[derive(Debug, PartialEq)]
1700pub struct AuxArrayRawLeBytes<'a, T> {
1701 slice: &'a [u8],
1702 phantom_data: PhantomData<T>,
1703}
1704
1705impl<T> AuxArrayRawLeBytes<'_, T>
1706where
1707 T: AuxArrayElement,
1708{
1709 fn get(&self, index: usize) -> Option<T> {
1710 let type_size = std::mem::size_of::<T>();
1711 if index * type_size + type_size > self.slice.len() {
1712 return None;
1713 }
1714 T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1715 }
1716
1717 fn len(&self) -> usize {
1718 self.slice.len() / std::mem::size_of::<T>()
1719 }
1720}
1721
1722pub struct AuxArrayIter<'a, T> {
1726 index: usize,
1727 array: &'a AuxArray<'a, T>,
1728}
1729
1730impl<T> Iterator for AuxArrayIter<'_, T>
1731where
1732 T: AuxArrayElement,
1733{
1734 type Item = T;
1735 fn next(&mut self) -> Option<Self::Item> {
1736 let value = self.array.get(self.index);
1737 self.index += 1;
1738 value
1739 }
1740
1741 fn size_hint(&self) -> (usize, Option<usize>) {
1742 let array_length = self.array.len() - self.index;
1743 (array_length, Some(array_length))
1744 }
1745}
1746
1747pub struct AuxIter<'a> {
1758 aux: &'a [u8],
1759}
1760
1761impl<'a> Iterator for AuxIter<'a> {
1762 type Item = Result<(&'a [u8], Aux<'a>)>;
1763
1764 fn next(&mut self) -> Option<Self::Item> {
1765 if self.aux.is_empty() {
1767 return None;
1768 }
1769 if (1..=3).contains(&self.aux.len()) {
1771 self.aux = &[];
1773 return Some(Err(Error::BamAuxParsingError));
1774 }
1775 let tag = &self.aux[..2];
1776 Some(unsafe {
1777 let data_ptr = self.aux[2..].as_ptr();
1778 Record::read_aux_field(data_ptr)
1779 .map(|(aux, offset)| {
1780 self.aux = &self.aux[offset..];
1781 (tag, aux)
1782 })
1783 .inspect_err(|_e| {
1784 self.aux = &[];
1786 })
1787 })
1788 }
1789}
1790
1791static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1792static ENCODE_BASE: [u8; 256] = [
1793 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1794 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1795 1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1796 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1797 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1798 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1799 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1800 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1801 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1802 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1803 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1804];
1805
1806#[inline]
1807fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1808 (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1809}
1810
1811#[inline]
1812unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1813 (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1814}
1815
1816#[inline]
1817fn decode_base_unchecked(base: u8) -> &'static u8 {
1818 unsafe { DECODE_BASE.get_unchecked(base as usize) }
1819}
1820
1821#[derive(Debug, Copy, Clone)]
1823pub struct Seq<'a> {
1824 pub encoded: &'a [u8],
1825 len: usize,
1826}
1827
1828impl Seq<'_> {
1829 #[inline]
1831 pub fn encoded_base(&self, i: usize) -> u8 {
1832 encoded_base(self.encoded, i)
1833 }
1834
1835 #[inline]
1841 pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1842 encoded_base_unchecked(self.encoded, i)
1843 }
1844
1845 #[inline]
1853 pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1854 *decode_base_unchecked(self.encoded_base_unchecked(i))
1855 }
1856
1857 pub fn as_bytes(&self) -> Vec<u8> {
1859 (0..self.len()).map(|i| self[i]).collect()
1860 }
1861
1862 pub fn len(&self) -> usize {
1864 self.len
1865 }
1866
1867 pub fn is_empty(&self) -> bool {
1868 self.len() == 0
1869 }
1870}
1871
1872impl ops::Index<usize> for Seq<'_> {
1873 type Output = u8;
1874
1875 fn index(&self, index: usize) -> &u8 {
1877 decode_base_unchecked(self.encoded_base(index))
1878 }
1879}
1880
1881unsafe impl Send for Seq<'_> {}
1882unsafe impl Sync for Seq<'_> {}
1883
1884#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1885#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1886pub enum Cigar {
1887 Match(u32), Ins(u32), Del(u32), RefSkip(u32), SoftClip(u32), HardClip(u32), Pad(u32), Equal(u32), Diff(u32), }
1897
1898impl Cigar {
1899 fn encode(self) -> u32 {
1900 match self {
1901 Cigar::Match(len) => len << 4, Cigar::Ins(len) => (len << 4) | 1,
1903 Cigar::Del(len) => (len << 4) | 2,
1904 Cigar::RefSkip(len) => (len << 4) | 3,
1905 Cigar::SoftClip(len) => (len << 4) | 4,
1906 Cigar::HardClip(len) => (len << 4) | 5,
1907 Cigar::Pad(len) => (len << 4) | 6,
1908 Cigar::Equal(len) => (len << 4) | 7,
1909 Cigar::Diff(len) => (len << 4) | 8,
1910 }
1911 }
1912
1913 pub fn len(self) -> u32 {
1915 match self {
1916 Cigar::Match(len) => len,
1917 Cigar::Ins(len) => len,
1918 Cigar::Del(len) => len,
1919 Cigar::RefSkip(len) => len,
1920 Cigar::SoftClip(len) => len,
1921 Cigar::HardClip(len) => len,
1922 Cigar::Pad(len) => len,
1923 Cigar::Equal(len) => len,
1924 Cigar::Diff(len) => len,
1925 }
1926 }
1927
1928 pub fn is_empty(self) -> bool {
1929 self.len() == 0
1930 }
1931
1932 pub fn char(self) -> char {
1934 match self {
1935 Cigar::Match(_) => 'M',
1936 Cigar::Ins(_) => 'I',
1937 Cigar::Del(_) => 'D',
1938 Cigar::RefSkip(_) => 'N',
1939 Cigar::SoftClip(_) => 'S',
1940 Cigar::HardClip(_) => 'H',
1941 Cigar::Pad(_) => 'P',
1942 Cigar::Equal(_) => '=',
1943 Cigar::Diff(_) => 'X',
1944 }
1945 }
1946}
1947
1948impl fmt::Display for Cigar {
1949 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1950 fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1951 }
1952}
1953
1954unsafe impl Send for Cigar {}
1955unsafe impl Sync for Cigar {}
1956
1957custom_derive! {
1958 #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1977 #[derive(NewtypeDeref,
1978 NewtypeDerefMut,
1979 NewtypeIndex(usize),
1980 NewtypeIndexMut(usize),
1981 NewtypeFrom,
1982 PartialEq,
1983 PartialOrd,
1984 Eq,
1985 NewtypeDebug,
1986 Clone,
1987 Hash
1988 )]
1989 pub struct CigarString(pub Vec<Cigar>);
1990}
1991
1992impl CigarString {
1993 pub fn into_view(self, pos: i64) -> CigarStringView {
1995 CigarStringView::new(self, pos)
1996 }
1997
1998 pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
2003 match alignment.mode {
2004 AlignmentMode::Global => {
2005 panic!(" Bam cigar fn not supported for Global Alignment mode")
2006 }
2007 AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
2008 _ => {}
2009 }
2010
2011 let mut cigar = Vec::new();
2012 if alignment.operations.is_empty() {
2013 return CigarString(cigar);
2014 }
2015
2016 let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
2017 AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
2018 AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
2019 AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
2020 AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
2021 _ => {}
2022 };
2023
2024 if alignment.xstart > 0 {
2025 cigar.push(if hard_clip {
2026 Cigar::HardClip(alignment.xstart as u32)
2027 } else {
2028 Cigar::SoftClip(alignment.xstart as u32)
2029 });
2030 }
2031
2032 let mut last = alignment.operations[0];
2033 let mut k = 1u32;
2034 for &op in alignment.operations[1..].iter() {
2035 if op == last {
2036 k += 1;
2037 } else {
2038 add_op(last, k, &mut cigar);
2039 k = 1;
2040 }
2041 last = op;
2042 }
2043 add_op(last, k, &mut cigar);
2044 if alignment.xlen > alignment.xend {
2045 cigar.push(if hard_clip {
2046 Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
2047 } else {
2048 Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
2049 });
2050 }
2051
2052 CigarString(cigar)
2053 }
2054}
2055
2056impl TryFrom<&[u8]> for CigarString {
2057 type Error = Error;
2058
2059 fn try_from(bytes: &[u8]) -> Result<Self> {
2080 let mut inner = Vec::new();
2081 let mut i = 0;
2082 let text_len = bytes.len();
2083 while i < text_len {
2084 let mut j = i;
2085 while j < text_len && bytes[j].is_ascii_digit() {
2086 j += 1;
2087 }
2088 if i == j {
2090 return Err(Error::BamParseCigar {
2091 msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
2092 });
2093 }
2094 let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
2096 msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
2097 })?;
2098 let n = s.parse().map_err(|_| Error::BamParseCigar {
2099 msg: format!("Unable to parse &str '{:?}' to u32.", s),
2100 })?;
2101 let op = &bytes[j];
2103 inner.push(match op {
2104 b'M' => Cigar::Match(n),
2105 b'I' => Cigar::Ins(n),
2106 b'D' => Cigar::Del(n),
2107 b'N' => Cigar::RefSkip(n),
2108 b'H' => {
2109 if i == 0 || j + 1 == text_len {
2110 Cigar::HardClip(n)
2111 } else {
2112 return Err(Error::BamParseCigar {
2113 msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
2114 .to_owned(),
2115 });
2116 }
2117 }
2118 b'S' => {
2119 if i == 0
2120 || j + 1 == text_len
2121 || bytes[i-1] == b'H'
2122 || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
2123 Cigar::SoftClip(n)
2124 } else {
2125 return Err(Error::BamParseCigar {
2126 msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
2127 .to_owned(),
2128 });
2129 }
2130 },
2131 b'P' => Cigar::Pad(n),
2132 b'=' => Cigar::Equal(n),
2133 b'X' => Cigar::Diff(n),
2134 op => {
2135 return Err(Error::BamParseCigar {
2136 msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
2137 })
2138 }
2139 });
2140 i = j + 1;
2141 }
2142 Ok(CigarString(inner))
2143 }
2144}
2145
2146impl TryFrom<&str> for CigarString {
2147 type Error = Error;
2148
2149 fn try_from(text: &str) -> Result<Self> {
2170 let bytes = text.as_bytes();
2171 if text.chars().count() != bytes.len() {
2172 return Err(Error::BamParseCigar {
2173 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2174 });
2175 }
2176 CigarString::try_from(bytes)
2177 }
2178}
2179
2180impl<'a> CigarString {
2181 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2182 self.into_iter()
2183 }
2184}
2185
2186impl<'a> IntoIterator for &'a CigarString {
2187 type Item = &'a Cigar;
2188 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2189
2190 fn into_iter(self) -> Self::IntoIter {
2191 self.0.iter()
2192 }
2193}
2194
2195impl fmt::Display for CigarString {
2196 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2197 for op in self {
2198 fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2199 }
2200 Ok(())
2201 }
2202}
2203
2204fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2206 match (cigar.next(), cigar.next()) {
2207 (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2208 *s as i64
2209 }
2210 _ => 0,
2211 }
2212}
2213
2214#[derive(Eq, PartialEq, Clone, Debug)]
2215pub struct CigarStringView {
2216 inner: CigarString,
2217 pos: i64,
2218}
2219
2220impl CigarStringView {
2221 pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2223 CigarStringView { inner: c, pos }
2224 }
2225
2226 pub fn end_pos(&self) -> i64 {
2228 let mut pos = self.pos;
2229 for c in self {
2230 match c {
2231 Cigar::Match(l)
2232 | Cigar::RefSkip(l)
2233 | Cigar::Del(l)
2234 | Cigar::Equal(l)
2235 | Cigar::Diff(l) => pos += *l as i64,
2236 Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2238 }
2239 }
2240 pos
2241 }
2242
2243 pub fn pos(&self) -> i64 {
2245 self.pos
2246 }
2247
2248 pub fn leading_softclips(&self) -> i64 {
2250 calc_softclips(self.iter())
2251 }
2252
2253 pub fn trailing_softclips(&self) -> i64 {
2255 calc_softclips(self.iter().rev())
2256 }
2257
2258 pub fn leading_hardclips(&self) -> i64 {
2260 self.first().map_or(0, |cigar| {
2261 if let Cigar::HardClip(s) = cigar {
2262 *s as i64
2263 } else {
2264 0
2265 }
2266 })
2267 }
2268
2269 pub fn trailing_hardclips(&self) -> i64 {
2271 self.last().map_or(0, |cigar| {
2272 if let Cigar::HardClip(s) = cigar {
2273 *s as i64
2274 } else {
2275 0
2276 }
2277 })
2278 }
2279
2280 pub fn read_pos(
2290 &self,
2291 ref_pos: u32,
2292 include_softclips: bool,
2293 include_dels: bool,
2294 ) -> Result<Option<u32>> {
2295 let mut rpos = self.pos as u32; let mut qpos = 0u32; let mut j = 0; for (i, c) in self.iter().enumerate() {
2302 match c {
2303 Cigar::Match(_) |
2304 Cigar::Diff(_) |
2305 Cigar::Equal(_) |
2306 Cigar::Ins(_) => {
2309 j = i;
2310 break;
2311 },
2312 Cigar::SoftClip(l) => {
2313 j = i;
2314 if include_softclips {
2315 rpos = rpos.saturating_sub(*l);
2319 }
2320 break;
2321 },
2322 Cigar::Del(l) => {
2323 rpos += l;
2331 },
2332 Cigar::RefSkip(_) => {
2333 return Err(Error::BamUnexpectedCigarOperation {
2334 msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2335 });
2336 },
2337 Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2338 return Err(Error::BamUnexpectedCigarOperation{
2339 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2340 });
2341 },
2342 Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2344 Cigar::Pad(_) | Cigar::HardClip(_) => ()
2346 }
2347 }
2348
2349 let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2350 cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2351 };
2352
2353 while rpos <= ref_pos && j < self.len() {
2354 match self[j] {
2355 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2357 qpos += ref_pos - rpos;
2360 return Ok(Some(qpos));
2361 }
2362 Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2363 qpos += ref_pos - rpos;
2364 return Ok(Some(qpos));
2365 }
2366 Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2367 return Ok(Some(qpos));
2369 }
2370 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2372 rpos += l;
2373 qpos += l;
2374 j += 1;
2375 }
2376 Cigar::SoftClip(l) => {
2377 qpos += l;
2378 j += 1;
2379 if include_softclips {
2380 rpos += l;
2381 }
2382 }
2383 Cigar::Ins(l) => {
2384 qpos += l;
2385 j += 1;
2386 }
2387 Cigar::RefSkip(l) | Cigar::Del(l) => {
2388 rpos += l;
2389 j += 1;
2390 }
2391 Cigar::Pad(_) => {
2392 j += 1;
2393 }
2394 Cigar::HardClip(_) if j < self.len() - 1 => {
2395 return Err(Error::BamUnexpectedCigarOperation{
2396 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2397 });
2398 }
2399 Cigar::HardClip(_) => return Ok(None),
2400 }
2401 }
2402
2403 Ok(None)
2404 }
2405
2406 pub fn take(self) -> CigarString {
2408 self.inner
2409 }
2410}
2411
2412impl ops::Deref for CigarStringView {
2413 type Target = CigarString;
2414
2415 fn deref(&self) -> &CigarString {
2416 &self.inner
2417 }
2418}
2419
2420impl ops::Index<usize> for CigarStringView {
2421 type Output = Cigar;
2422
2423 fn index(&self, index: usize) -> &Cigar {
2424 self.inner.index(index)
2425 }
2426}
2427
2428impl ops::IndexMut<usize> for CigarStringView {
2429 fn index_mut(&mut self, index: usize) -> &mut Cigar {
2430 self.inner.index_mut(index)
2431 }
2432}
2433
2434impl<'a> CigarStringView {
2435 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2436 self.inner.into_iter()
2437 }
2438}
2439
2440impl<'a> IntoIterator for &'a CigarStringView {
2441 type Item = &'a Cigar;
2442 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2443
2444 fn into_iter(self) -> Self::IntoIter {
2445 self.inner.into_iter()
2446 }
2447}
2448
2449impl fmt::Display for CigarStringView {
2450 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2451 self.inner.fmt(fmt)
2452 }
2453}
2454
2455pub struct BaseModificationMetadata {
2456 pub strand: i32,
2457 pub implicit: i32,
2458 pub canonical: u8,
2459}
2460
2461pub struct BaseModificationState<'a> {
2464 record: &'a Record,
2465 state: *mut htslib::hts_base_mod_state,
2466 buffer: Vec<htslib::hts_base_mod>,
2467 buffer_pos: i32,
2468}
2469
2470impl BaseModificationState<'_> {
2471 fn new(r: &Record) -> Result<BaseModificationState<'_>> {
2476 let mut bm = unsafe {
2477 BaseModificationState {
2478 record: r,
2479 state: hts_sys::hts_base_mod_state_alloc(),
2480 buffer: Vec::new(),
2481 buffer_pos: -1,
2482 }
2483 };
2484
2485 if bm.state.is_null() {
2486 panic!("Unable to allocate memory for hts_base_mod_state");
2487 }
2488
2489 unsafe {
2491 let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2492 if ret != 0 {
2493 return Err(Error::BamBaseModificationTagNotFound);
2494 }
2495 }
2496
2497 let types = bm.recorded();
2498 bm.buffer.reserve(types.len());
2499 Ok(bm)
2500 }
2501
2502 pub fn buffer_next_mods(&mut self) -> Result<usize> {
2503 unsafe {
2504 let ret = hts_sys::bam_next_basemod(
2505 self.record.inner_ptr(),
2506 self.state,
2507 self.buffer.as_mut_ptr(),
2508 self.buffer.capacity() as i32,
2509 &mut self.buffer_pos,
2510 );
2511
2512 if ret < 0 {
2513 return Err(Error::BamBaseModificationIterationFailed);
2514 }
2515
2516 if ret as usize > self.buffer.capacity() {
2520 return Err(Error::BamBaseModificationTooManyMods);
2521 }
2522
2523 self.buffer.set_len(ret as usize);
2526
2527 Ok(ret as usize)
2528 }
2529 }
2530
2531 pub fn recorded<'a>(&self) -> &'a [i32] {
2534 unsafe {
2535 let mut n: i32 = 0;
2536 let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2537
2538 if data_ptr.is_null() {
2540 panic!("Unable to obtain pointer to base modifications");
2541 }
2542 assert!(n >= 0);
2543 slice::from_raw_parts(data_ptr, n as usize)
2544 }
2545 }
2546
2547 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2553 unsafe {
2554 let mut strand: i32 = 0;
2555 let mut implicit: i32 = 0;
2556 let mut canonical: c_char = 0;
2558
2559 let ret = hts_sys::bam_mods_query_type(
2560 self.state,
2561 code,
2562 &mut strand,
2563 &mut implicit,
2564 &mut canonical,
2565 );
2566 if ret == -1 {
2567 Err(Error::BamBaseModificationTypeNotFound)
2568 } else {
2569 Ok(BaseModificationMetadata {
2570 strand,
2571 implicit,
2572 canonical: canonical.try_into().unwrap(),
2573 })
2574 }
2575 }
2576 }
2577}
2578
2579impl Drop for BaseModificationState<'_> {
2580 fn drop<'a>(&mut self) {
2581 unsafe {
2582 hts_sys::hts_base_mod_state_free(self.state);
2583 }
2584 }
2585}
2586
2587pub struct BaseModificationsPositionIter<'a> {
2590 mod_state: BaseModificationState<'a>,
2591}
2592
2593impl BaseModificationsPositionIter<'_> {
2594 fn new(r: &Record) -> Result<BaseModificationsPositionIter<'_>> {
2595 let state = BaseModificationState::new(r)?;
2596 Ok(BaseModificationsPositionIter { mod_state: state })
2597 }
2598
2599 pub fn recorded<'a>(&self) -> &'a [i32] {
2600 self.mod_state.recorded()
2601 }
2602
2603 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2604 self.mod_state.query_type(code)
2605 }
2606}
2607
2608impl Iterator for BaseModificationsPositionIter<'_> {
2609 type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2610
2611 fn next(&mut self) -> Option<Self::Item> {
2612 let ret = self.mod_state.buffer_next_mods();
2613
2614 match ret {
2619 Ok(num_mods) => {
2620 if num_mods == 0 {
2621 None
2622 } else {
2623 let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2624 Some(Ok(data))
2625 }
2626 }
2627 Err(e) => Some(Err(e)),
2628 }
2629 }
2630}
2631
2632pub struct BaseModificationsIter<'a> {
2635 mod_state: BaseModificationState<'a>,
2636 buffer_idx: usize,
2637}
2638
2639impl BaseModificationsIter<'_> {
2640 fn new(r: &Record) -> Result<BaseModificationsIter<'_>> {
2641 let state = BaseModificationState::new(r)?;
2642 Ok(BaseModificationsIter {
2643 mod_state: state,
2644 buffer_idx: 0,
2645 })
2646 }
2647
2648 pub fn recorded<'a>(&self) -> &'a [i32] {
2649 self.mod_state.recorded()
2650 }
2651
2652 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2653 self.mod_state.query_type(code)
2654 }
2655}
2656
2657impl Iterator for BaseModificationsIter<'_> {
2658 type Item = Result<(i32, hts_sys::hts_base_mod)>;
2659
2660 fn next(&mut self) -> Option<Self::Item> {
2661 if self.buffer_idx == self.mod_state.buffer.len() {
2662 let ret = self.mod_state.buffer_next_mods();
2665
2666 match ret {
2667 Ok(num_mods) => {
2668 if num_mods == 0 {
2669 return None;
2671 } else {
2672 self.buffer_idx = 0;
2674 }
2675 }
2676 Err(e) => return Some(Err(e)),
2677 }
2678 }
2679
2680 assert!(self.buffer_idx < self.mod_state.buffer.len());
2682 let data = (
2683 self.mod_state.buffer_pos,
2684 self.mod_state.buffer[self.buffer_idx],
2685 );
2686 self.buffer_idx += 1;
2687 Some(Ok(data))
2688 }
2689}
2690
2691#[cfg(test)]
2692mod tests {
2693 use super::*;
2694
2695 #[test]
2696 fn test_cigar_string() {
2697 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2698
2699 assert_eq!(cigar[0], Cigar::Match(100));
2700 assert_eq!(format!("{}", cigar), "100M10S");
2701 for op in &cigar {
2702 println!("{}", op);
2703 }
2704 }
2705
2706 #[test]
2707 fn test_cigar_string_view_pos() {
2708 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2709 assert_eq!(cigar.pos(), 5);
2710 }
2711
2712 #[test]
2713 fn test_cigar_string_leading_softclips() {
2714 let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2715 assert_eq!(cigar.leading_softclips(), 10);
2716 let cigar2 = CigarString(vec![
2717 Cigar::HardClip(5),
2718 Cigar::SoftClip(10),
2719 Cigar::Match(100),
2720 ])
2721 .into_view(0);
2722 assert_eq!(cigar2.leading_softclips(), 10);
2723 }
2724
2725 #[test]
2726 fn test_cigar_string_trailing_softclips() {
2727 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2728 assert_eq!(cigar.trailing_softclips(), 10);
2729 let cigar2 = CigarString(vec![
2730 Cigar::Match(100),
2731 Cigar::SoftClip(10),
2732 Cigar::HardClip(5),
2733 ])
2734 .into_view(0);
2735 assert_eq!(cigar2.trailing_softclips(), 10);
2736 }
2737
2738 #[test]
2739 fn test_cigar_read_pos() {
2740 let vpos = 5; let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2748 assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2749
2750 let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2758 assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2759 assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2760
2761 let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2770 assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2771 assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2772
2773 let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2779 assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2780
2781 let c05 = CigarString(vec![
2787 Cigar::Equal(2),
2788 Cigar::Del(2),
2789 Cigar::Diff(1),
2790 Cigar::Equal(2),
2791 ])
2792 .into_view(0);
2793 assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2794
2795 let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2801 assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2802 assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2803
2804 let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2810 assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2811 assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2812
2813 let c08 = CigarString(vec![
2819 Cigar::Equal(1),
2820 Cigar::Diff(1),
2821 Cigar::RefSkip(3),
2822 Cigar::Match(2),
2823 ])
2824 .into_view(2);
2825 assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2826 assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2827
2828 let c09 = CigarString(vec![
2834 Cigar::HardClip(3),
2835 Cigar::Equal(2),
2836 Cigar::HardClip(3),
2837 Cigar::Equal(2),
2838 ])
2839 .into_view(2);
2840 assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2841
2842 let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2848 assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2849
2850 let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2856 assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2857
2858 let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2864 assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2865
2866 let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2872 assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2873
2874 let vpos2 = 15;
2876 let c14 = CigarString(vec![
2881 Cigar::HardClip(5),
2882 Cigar::SoftClip(3),
2883 Cigar::Equal(1),
2884 Cigar::Pad(2),
2885 Cigar::Match(1),
2886 Cigar::Diff(1),
2887 Cigar::Ins(3),
2888 Cigar::Match(2),
2889 Cigar::Del(1),
2890 Cigar::Ins(2),
2891 Cigar::Equal(2),
2892 Cigar::RefSkip(3),
2893 Cigar::Match(3),
2894 Cigar::Equal(2),
2895 Cigar::SoftClip(5),
2896 Cigar::HardClip(2),
2897 ])
2898 .into_view(0);
2899 assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2900
2901 let c15 =
2907 CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2908 assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2909
2910 let c16 =
2913 CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2914 assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2915 }
2916
2917 #[test]
2918 fn test_clone() {
2919 let mut rec = Record::new();
2920 rec.set_pos(300);
2921 rec.set_qname(b"read1");
2922 let clone = rec.clone();
2923 assert_eq!(rec, clone);
2924 }
2925
2926 #[test]
2927 fn test_flags() {
2928 let mut rec = Record::new();
2929
2930 rec.set_paired();
2931 assert_eq!(rec.is_paired(), true);
2932
2933 rec.set_supplementary();
2934 assert_eq!(rec.is_supplementary(), true);
2935 assert_eq!(rec.is_supplementary(), true);
2936
2937 rec.unset_paired();
2938 assert_eq!(rec.is_paired(), false);
2939 assert_eq!(rec.is_supplementary(), true);
2940
2941 rec.unset_supplementary();
2942 assert_eq!(rec.is_paired(), false);
2943 assert_eq!(rec.is_supplementary(), false);
2944 }
2945
2946 #[test]
2947 fn test_cigar_parse() {
2948 let cigar = "1S20M1D2I3X1=2H";
2949 let parsed = CigarString::try_from(cigar).unwrap();
2950 assert_eq!(parsed.to_string(), cigar);
2951 }
2952}
2953
2954#[cfg(test)]
2955mod alignment_cigar_tests {
2956 use super::*;
2957 use crate::bam::{Read, Reader};
2958 use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2959 use bio_types::alignment::{Alignment, AlignmentMode};
2960
2961 #[test]
2962 fn test_cigar() {
2963 let alignment = Alignment {
2964 score: 5,
2965 xstart: 3,
2966 ystart: 0,
2967 xend: 9,
2968 yend: 10,
2969 ylen: 10,
2970 xlen: 10,
2971 operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2972 mode: AlignmentMode::Semiglobal,
2973 };
2974 assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2975 assert_eq!(
2976 CigarString::from_alignment(&alignment, false).0,
2977 vec![
2978 Cigar::SoftClip(3),
2979 Cigar::Equal(3),
2980 Cigar::Diff(1),
2981 Cigar::Ins(2),
2982 Cigar::Del(2),
2983 Cigar::SoftClip(1),
2984 ]
2985 );
2986
2987 let alignment = Alignment {
2988 score: 5,
2989 xstart: 0,
2990 ystart: 5,
2991 xend: 4,
2992 yend: 10,
2993 ylen: 10,
2994 xlen: 5,
2995 operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2996 mode: AlignmentMode::Custom,
2997 };
2998 assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2999 assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
3000 assert_eq!(
3001 CigarString::from_alignment(&alignment, false).0,
3002 vec![
3003 Cigar::Equal(1),
3004 Cigar::Diff(2),
3005 Cigar::Ins(1),
3006 Cigar::Del(2),
3007 Cigar::SoftClip(1),
3008 ]
3009 );
3010 assert_eq!(
3011 CigarString::from_alignment(&alignment, true).0,
3012 vec![
3013 Cigar::Equal(1),
3014 Cigar::Diff(2),
3015 Cigar::Ins(1),
3016 Cigar::Del(2),
3017 Cigar::HardClip(1),
3018 ]
3019 );
3020
3021 let alignment = Alignment {
3022 score: 5,
3023 xstart: 0,
3024 ystart: 5,
3025 xend: 3,
3026 yend: 8,
3027 ylen: 10,
3028 xlen: 3,
3029 operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
3030 mode: AlignmentMode::Custom,
3031 };
3032 assert_eq!(alignment.cigar(false), "1X1=1X");
3033 assert_eq!(
3034 CigarString::from_alignment(&alignment, false).0,
3035 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3036 );
3037
3038 let alignment = Alignment {
3039 score: 5,
3040 xstart: 0,
3041 ystart: 5,
3042 xend: 3,
3043 yend: 8,
3044 ylen: 10,
3045 xlen: 3,
3046 operations: vec![Subst, Match, Subst],
3047 mode: AlignmentMode::Semiglobal,
3048 };
3049 assert_eq!(alignment.cigar(false), "1X1=1X");
3050 assert_eq!(
3051 CigarString::from_alignment(&alignment, false).0,
3052 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3053 );
3054 }
3055
3056 #[test]
3057 fn test_read_orientation_f1r2() {
3058 let mut bam = Reader::from_path("test/test_paired.sam").unwrap();
3059
3060 for res in bam.records() {
3061 let record = res.unwrap();
3062 assert_eq!(
3063 record.read_pair_orientation(),
3064 SequenceReadPairOrientation::F1R2
3065 );
3066 }
3067 }
3068
3069 #[test]
3070 fn test_read_orientation_f2r1() {
3071 let mut bam = Reader::from_path("test/test_nonstandard_orientation.sam").unwrap();
3072
3073 for res in bam.records() {
3074 let record = res.unwrap();
3075 assert_eq!(
3076 record.read_pair_orientation(),
3077 SequenceReadPairOrientation::F2R1
3078 );
3079 }
3080 }
3081
3082 #[test]
3083 fn test_read_orientation_supplementary() {
3084 let mut bam = Reader::from_path("test/test_orientation_supplementary.sam").unwrap();
3085
3086 for res in bam.records() {
3087 let record = res.unwrap();
3088 assert_eq!(
3089 record.read_pair_orientation(),
3090 SequenceReadPairOrientation::F2R1
3091 );
3092 }
3093 }
3094
3095 #[test]
3096 pub fn test_cigar_parsing_non_ascii_error() {
3097 let cigar_str = "43ጷ";
3098 let expected_error = Err(Error::BamParseCigar {
3099 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
3100 });
3101
3102 let result = CigarString::try_from(cigar_str);
3103 assert_eq!(expected_error, result);
3104 }
3105
3106 #[test]
3107 pub fn test_cigar_parsing() {
3108 let cigar_strs = [
3110 "1H10M4D100I300N1102=10P25X11S", "100M", "", "1H1=1H", "1S1=1S", "11H11S11=11S11H", "10H",
3117 "10S",
3118 ];
3119 let cigars = [
3121 CigarString(vec![
3122 Cigar::HardClip(1),
3123 Cigar::Match(10),
3124 Cigar::Del(4),
3125 Cigar::Ins(100),
3126 Cigar::RefSkip(300),
3127 Cigar::Equal(1102),
3128 Cigar::Pad(10),
3129 Cigar::Diff(25),
3130 Cigar::SoftClip(11),
3131 ]),
3132 CigarString(vec![Cigar::Match(100)]),
3133 CigarString(vec![]),
3134 CigarString(vec![
3135 Cigar::HardClip(1),
3136 Cigar::Equal(1),
3137 Cigar::HardClip(1),
3138 ]),
3139 CigarString(vec![
3140 Cigar::SoftClip(1),
3141 Cigar::Equal(1),
3142 Cigar::SoftClip(1),
3143 ]),
3144 CigarString(vec![
3145 Cigar::HardClip(11),
3146 Cigar::SoftClip(11),
3147 Cigar::Equal(11),
3148 Cigar::SoftClip(11),
3149 Cigar::HardClip(11),
3150 ]),
3151 CigarString(vec![Cigar::HardClip(10)]),
3152 CigarString(vec![Cigar::SoftClip(10)]),
3153 ];
3154 for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
3156 let cigar_parse = CigarString::try_from(cigar_str)
3157 .unwrap_or_else(|_| panic!("Unable to parse cigar: {}", cigar_str));
3158 assert_eq!(&cigar_parse, truth);
3159 }
3160 }
3161}
3162
3163#[cfg(test)]
3164mod basemod_tests {
3165 use crate::bam::{Read, Reader};
3166
3167 #[test]
3168 pub fn test_count_recorded() {
3169 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3170
3171 for r in bam.records() {
3172 let record = r.unwrap();
3173 if let Ok(mods) = record.basemods_iter() {
3174 let n = mods.recorded().len();
3175 assert_eq!(n, 3);
3176 };
3177 }
3178 }
3179
3180 #[test]
3181 pub fn test_query_type() {
3182 let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
3183
3184 let mut n_fwd = 0;
3185 let mut n_rev = 0;
3186
3187 for r in bam.records() {
3188 let record = r.unwrap();
3189 if let Ok(mods) = record.basemods_iter() {
3190 for mod_code in mods.recorded() {
3191 if let Ok(mod_metadata) = mods.query_type(*mod_code) {
3192 if mod_metadata.strand == 0 {
3193 n_fwd += 1;
3194 }
3195 if mod_metadata.strand == 1 {
3196 n_rev += 1;
3197 }
3198 }
3199 }
3200 };
3201 }
3202 assert_eq!(n_fwd, 2);
3203 assert_eq!(n_rev, 2);
3204 }
3205
3206 #[test]
3207 pub fn test_mod_iter() {
3208 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3209 let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3210 let mut i = 0;
3211
3212 for r in bam.records() {
3213 let record = r.unwrap();
3214 for res in record.basemods_iter().unwrap().flatten() {
3215 let (position, _m) = res;
3216 assert_eq!(position, expected_positions[i]);
3217 i += 1;
3218 }
3219 }
3220 }
3221
3222 #[test]
3223 pub fn test_position_iter() {
3224 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3225 let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3226 let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3227 let mut i = 0;
3228
3229 for r in bam.records() {
3230 let record = r.unwrap();
3231 for res in record.basemods_position_iter().unwrap().flatten() {
3232 let (position, elements) = res;
3233 assert_eq!(position, expected_positions[i]);
3234 assert_eq!(elements.len(), expected_counts[i]);
3235 i += 1;
3236 }
3237 }
3238 }
3239}