1use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::slice;
15use std::str;
16use std::sync::Arc;
17
18use byteorder::{LittleEndian, ReadBytesExt};
19
20use crate::bam::Error;
21use crate::bam::HeaderView;
22use crate::errors::Result;
23use crate::htslib;
24use crate::utils;
25#[cfg(feature = "serde_feature")]
26use serde::{self, Deserialize, Serialize};
27
28use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
29use bio_types::genome;
30use bio_types::sequence::SequenceRead;
31use bio_types::sequence::SequenceReadPairOrientation;
32use bio_types::strand::ReqStrand;
33
34macro_rules! flag {
36 ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
37 pub fn $get(&self) -> bool {
38 self.inner().core.flag & $bit != 0
39 }
40
41 pub fn $set(&mut self) {
42 self.inner_mut().core.flag |= $bit;
43 }
44
45 pub fn $unset(&mut self) {
46 self.inner_mut().core.flag &= !$bit;
47 }
48 };
49}
50
51pub struct Record {
53 pub inner: htslib::bam1_t,
54 own: bool,
55 cigar: Option<CigarStringView>,
56 header: Option<Arc<HeaderView>>,
57}
58
59unsafe impl Send for Record {}
60unsafe impl Sync for Record {}
61
62impl Clone for Record {
63 fn clone(&self) -> Self {
64 let mut copy = Record::new();
65 unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
66 copy
67 }
68}
69
70impl PartialEq for Record {
71 fn eq(&self, other: &Record) -> bool {
72 self.tid() == other.tid()
73 && self.pos() == other.pos()
74 && self.bin() == other.bin()
75 && self.mapq() == other.mapq()
76 && self.flags() == other.flags()
77 && self.mtid() == other.mtid()
78 && self.mpos() == other.mpos()
79 && self.insert_size() == other.insert_size()
80 && self.data() == other.data()
81 && self.inner().core.l_extranul == other.inner().core.l_extranul
82 }
83}
84
85impl Eq for Record {}
86
87impl fmt::Debug for Record {
88 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
89 fmt.write_fmt(format_args!(
90 "Record(tid: {}, pos: {})",
91 self.tid(),
92 self.pos()
93 ))
94 }
95}
96
97impl Default for Record {
98 fn default() -> Self {
99 Self::new()
100 }
101}
102
103#[inline]
104fn extranul_from_qname(qname: &[u8]) -> usize {
105 let qlen = qname.len() + 1;
106 if !qlen.is_multiple_of(4) {
107 4 - qlen % 4
108 } else {
109 0
110 }
111}
112
113impl Record {
114 pub fn new() -> Self {
116 let mut record = Record {
117 inner: unsafe { MaybeUninit::zeroed().assume_init() },
118 own: true,
119 cigar: None,
120 header: None,
121 };
122 record.set_qname(b"");
125 record.set_unmapped();
128 record.set_tid(-1);
129 record.set_pos(-1);
130 record.set_mpos(-1);
131 record.set_mtid(-1);
132 record
133 }
134
135 pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
136 Record {
137 inner: {
138 #[allow(clippy::uninit_assumed_init, invalid_value)]
139 let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
140 unsafe {
141 ::libc::memcpy(
142 &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
143 from as *const ::libc::c_void,
144 size_of::<htslib::bam1_t>(),
145 );
146 }
147 inner
148 },
149 own: false,
150 cigar: None,
151 header: None,
152 }
153 }
154
155 pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
157 let mut record = Self::new();
158
159 let mut sam_copy = Vec::with_capacity(sam.len() + 1);
160 sam_copy.extend(sam);
161 sam_copy.push(0);
162
163 let mut sam_string = htslib::kstring_t {
164 s: sam_copy.as_ptr() as *mut c_char,
165 l: sam_copy.len(),
166 m: sam_copy.len(),
167 };
168
169 let succ = unsafe {
170 htslib::sam_parse1(
171 &mut sam_string,
172 header_view.inner_ptr() as *mut htslib::bam_hdr_t,
173 record.inner_ptr_mut(),
174 )
175 };
176
177 if succ == 0 {
178 Ok(record)
179 } else {
180 Err(Error::BamParseSAM {
181 rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
182 })
183 }
184 }
185
186 pub fn set_header(&mut self, header: Arc<HeaderView>) {
187 self.header = Some(header);
188 }
189
190 pub(super) fn data(&self) -> &[u8] {
191 unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
192 }
193
194 #[inline]
195 pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
196 &mut self.inner
197 }
198
199 #[inline]
200 pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
201 &mut self.inner as *mut htslib::bam1_t
202 }
203
204 #[inline]
205 pub fn inner(&self) -> &htslib::bam1_t {
206 &self.inner
207 }
208
209 #[inline]
210 pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
211 &self.inner as *const htslib::bam1_t
212 }
213
214 pub fn tid(&self) -> i32 {
216 self.inner().core.tid
217 }
218
219 pub fn set_tid(&mut self, tid: i32) {
221 self.inner_mut().core.tid = tid;
222 }
223
224 pub fn pos(&self) -> i64 {
226 self.inner().core.pos
227 }
228
229 pub fn set_pos(&mut self, pos: i64) {
231 self.inner_mut().core.pos = pos;
232 }
233
234 pub fn bin(&self) -> u16 {
235 self.inner().core.bin
236 }
237
238 pub fn set_bin(&mut self, bin: u16) {
239 self.inner_mut().core.bin = bin;
240 }
241
242 pub fn mapq(&self) -> u8 {
244 self.inner().core.qual
245 }
246
247 pub fn set_mapq(&mut self, mapq: u8) {
249 self.inner_mut().core.qual = mapq;
250 }
251
252 pub fn strand(&self) -> ReqStrand {
254 let reverse = self.flags() & 0x10 != 0;
255 if reverse {
256 ReqStrand::Reverse
257 } else {
258 ReqStrand::Forward
259 }
260 }
261
262 pub fn flags(&self) -> u16 {
264 self.inner().core.flag
265 }
266
267 pub fn set_flags(&mut self, flags: u16) {
269 self.inner_mut().core.flag = flags;
270 }
271
272 pub fn unset_flags(&mut self) {
274 self.inner_mut().core.flag = 0;
275 }
276
277 pub fn mtid(&self) -> i32 {
279 self.inner().core.mtid
280 }
281
282 pub fn set_mtid(&mut self, mtid: i32) {
284 self.inner_mut().core.mtid = mtid;
285 }
286
287 pub fn mpos(&self) -> i64 {
289 self.inner().core.mpos
290 }
291
292 pub fn set_mpos(&mut self, mpos: i64) {
294 self.inner_mut().core.mpos = mpos;
295 }
296
297 pub fn insert_size(&self) -> i64 {
299 self.inner().core.isize_
300 }
301
302 pub fn set_insert_size(&mut self, insert_size: i64) {
304 self.inner_mut().core.isize_ = insert_size;
305 }
306
307 fn qname_capacity(&self) -> usize {
308 self.inner().core.l_qname as usize
309 }
310
311 fn qname_len(&self) -> usize {
312 self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
314 }
315
316 pub fn qname(&self) -> &[u8] {
318 &self.data()[..self.qname_len()]
319 }
320
321 pub fn set_data(&mut self, new_data: &[u8]) {
323 self.cigar = None;
324
325 self.inner_mut().l_data = new_data.len() as i32;
326 if (self.inner().m_data as i32) < self.inner().l_data {
327 let l_data = self.inner().l_data;
329 self.realloc_var_data(l_data as usize);
330 }
331
332 let data =
334 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
335 utils::copy_memory(new_data, data);
336 }
337
338 pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
346 assert!(qname.len() < 255);
347 assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
348
349 self.cigar = None;
350
351 let cigar_width = if let Some(cigar_string) = cigar {
352 cigar_string.len()
353 } else {
354 0
355 } * 4;
356 let q_len = qname.len() + 1;
357 let extranul = extranul_from_qname(qname);
358
359 let orig_aux_offset = self.qname_capacity()
360 + 4 * self.cigar_len()
361 + self.seq_len().div_ceil(2)
362 + self.seq_len();
363 let new_aux_offset = q_len + extranul + cigar_width + seq.len().div_ceil(2) + qual.len();
364 assert!(orig_aux_offset <= self.inner.l_data as usize);
365 let aux_len = self.inner.l_data as usize - orig_aux_offset;
366 self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
367 if (self.inner().m_data as i32) < self.inner().l_data {
368 let l_data = self.inner().l_data;
370 self.realloc_var_data(l_data as usize);
371 }
372
373 if aux_len > 0 && orig_aux_offset != new_aux_offset {
375 let data =
376 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
377 data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
378 }
379
380 let data =
381 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
382
383 utils::copy_memory(qname, data);
385 for i in 0..=extranul {
386 data[qname.len() + i] = b'\0';
387 }
388 let mut i = q_len + extranul;
389 self.inner_mut().core.l_qname = i as u16;
390 self.inner_mut().core.l_extranul = extranul as u8;
391
392 if let Some(cigar_string) = cigar {
394 let cigar_data = unsafe {
395 #[allow(clippy::cast_ptr_alignment)]
397 slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
398 };
399 for (i, c) in cigar_string.iter().enumerate() {
400 cigar_data[i] = c.encode();
401 }
402 self.inner_mut().core.n_cigar = cigar_string.len() as u32;
403 i += cigar_string.len() * 4;
404 } else {
405 self.inner_mut().core.n_cigar = 0;
406 };
407
408 {
410 for j in (0..seq.len()).step_by(2) {
411 data[i + j / 2] = (ENCODE_BASE[seq[j] as usize] << 4)
412 | (if j + 1 < seq.len() {
413 ENCODE_BASE[seq[j + 1] as usize]
414 } else {
415 0
416 });
417 }
418 self.inner_mut().core.l_qseq = seq.len() as i32;
419 i += seq.len().div_ceil(2);
420 }
421
422 utils::copy_memory(qual, &mut data[i..]);
424 }
425
426 pub fn set_qname(&mut self, new_qname: &[u8]) {
428 assert!(new_qname.len() < 252);
430
431 let old_q_len = self.qname_capacity();
432 let extranul = extranul_from_qname(new_qname);
434 let new_q_len = new_qname.len() + 1 + extranul;
435
436 let other_len = self.inner_mut().l_data - old_q_len as i32;
438
439 if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
440 self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
441 } else if new_q_len > old_q_len {
442 self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
443
444 if (self.inner().m_data as i32) < self.inner().l_data {
446 let l_data = self.inner().l_data;
448 self.realloc_var_data(l_data as usize);
449 }
450 }
451
452 if new_q_len != old_q_len {
453 unsafe {
455 let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
456
457 ::libc::memmove(
458 data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
459 data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
460 other_len as usize,
461 );
462 }
463 }
464
465 let data =
467 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
468 utils::copy_memory(new_qname, data);
469 for i in 0..=extranul {
470 data[new_q_len - i - 1] = b'\0';
471 }
472 self.inner_mut().core.l_qname = new_q_len as u16;
473 self.inner_mut().core.l_extranul = extranul as u8;
474 }
475
476 pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
478 self.cigar = None;
479
480 let qname_data_len = self.qname_capacity();
481 let old_cigar_data_len = self.cigar_len() * 4;
482
483 let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
485
486 let new_cigar_len = match new_cigar {
487 Some(x) => x.len(),
488 None => 0,
489 };
490 let new_cigar_data_len = new_cigar_len * 4;
491
492 if new_cigar_data_len < old_cigar_data_len {
493 self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
494 } else if new_cigar_data_len > old_cigar_data_len {
495 self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
496
497 if (self.inner().m_data as i32) < self.inner().l_data {
499 let l_data = self.inner().l_data;
501 self.realloc_var_data(l_data as usize);
502 }
503 }
504
505 if new_cigar_data_len != old_cigar_data_len {
506 unsafe {
508 ::libc::memmove(
509 self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
510 self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
511 other_data_len as usize,
512 );
513 }
514 }
515
516 if let Some(cigar_string) = new_cigar {
518 let cigar_data = unsafe {
519 #[allow(clippy::cast_ptr_alignment)]
520 slice::from_raw_parts_mut(
521 self.inner.data.add(qname_data_len) as *mut u32,
522 cigar_string.len(),
523 )
524 };
525 for (i, c) in cigar_string.iter().enumerate() {
526 cigar_data[i] = c.encode();
527 }
528 }
529 self.inner_mut().core.n_cigar = new_cigar_len as u32;
530 }
531
532 fn realloc_var_data(&mut self, new_len: usize) {
533 let new_len = new_len as u32;
535 let new_request = new_len + 32 - (new_len % 32);
536
537 let ptr = unsafe {
538 ::libc::realloc(
539 self.inner().data as *mut ::libc::c_void,
540 new_request as usize,
541 ) as *mut u8
542 };
543
544 if ptr.is_null() {
545 panic!("ran out of memory in rust_htslib trying to realloc");
546 }
547
548 self.inner_mut().m_data = new_request;
551 self.inner_mut().data = ptr;
552
553 self.own = true;
555 }
556
557 pub fn cigar_len(&self) -> usize {
558 self.inner().core.n_cigar as usize
559 }
560
561 pub fn raw_cigar(&self) -> &[u32] {
564 #[allow(clippy::cast_ptr_alignment)]
566 unsafe {
567 slice::from_raw_parts(
568 self.data()[self.qname_capacity()..].as_ptr() as *const u32,
569 self.cigar_len(),
570 )
571 }
572 }
573
574 pub fn cigar(&self) -> CigarStringView {
576 match self.cigar {
577 Some(ref c) => c.clone(),
578 None => self.unpack_cigar(),
579 }
580 }
581
582 pub fn cigar_cached(&self) -> Option<&CigarStringView> {
584 self.cigar.as_ref()
585 }
586
587 pub fn cache_cigar(&mut self) {
589 self.cigar = Some(self.unpack_cigar())
590 }
591
592 fn unpack_cigar(&self) -> CigarStringView {
594 CigarString(
595 self.raw_cigar()
596 .iter()
597 .map(|&c| {
598 let len = c >> 4;
599 match c & 0b1111 {
600 0 => Cigar::Match(len),
601 1 => Cigar::Ins(len),
602 2 => Cigar::Del(len),
603 3 => Cigar::RefSkip(len),
604 4 => Cigar::SoftClip(len),
605 5 => Cigar::HardClip(len),
606 6 => Cigar::Pad(len),
607 7 => Cigar::Equal(len),
608 8 => Cigar::Diff(len),
609 _ => panic!("Unexpected cigar operation"),
610 }
611 })
612 .collect(),
613 )
614 .into_view(self.pos())
615 }
616
617 pub fn seq_len(&self) -> usize {
618 self.inner().core.l_qseq as usize
619 }
620
621 fn seq_data(&self) -> &[u8] {
622 let offset = self.qname_capacity() + self.cigar_len() * 4;
623 &self.data()[offset..][..self.seq_len().div_ceil(2)]
624 }
625
626 pub fn seq(&self) -> Seq<'_> {
628 Seq {
629 encoded: self.seq_data(),
630 len: self.seq_len(),
631 }
632 }
633
634 pub fn qual(&self) -> &[u8] {
638 &self.data()[self.qname_capacity() + self.cigar_len() * 4 + self.seq_len().div_ceil(2)..]
639 [..self.seq_len()]
640 }
641
642 pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
647 if tag.len() < 2 {
648 return Err(Error::BamAuxStringError);
649 }
650 let aux = unsafe {
651 htslib::bam_aux_get(
652 &self.inner as *const htslib::bam1_t,
653 tag.as_ptr() as *const c_char,
654 )
655 };
656 unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
657 }
658
659 unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
660 const TAG_LEN: isize = 2;
661 const TYPE_ID_LEN: isize = 1;
663
664 if aux.is_null() {
665 return Err(Error::BamAuxTagNotFound);
666 }
667
668 let (data, type_size) = match *aux {
669 b'A' => {
670 let type_size = size_of::<u8>();
671 (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
672 }
673 b'c' => {
674 let type_size = size_of::<i8>();
675 (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
676 }
677 b'C' => {
678 let type_size = size_of::<u8>();
679 (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
680 }
681 b's' => {
682 let type_size = size_of::<i16>();
683 (
684 Aux::I16(
685 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
686 .read_i16::<LittleEndian>()
687 .map_err(|_| Error::BamAuxParsingError)?,
688 ),
689 type_size,
690 )
691 }
692 b'S' => {
693 let type_size = size_of::<u16>();
694 (
695 Aux::U16(
696 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
697 .read_u16::<LittleEndian>()
698 .map_err(|_| Error::BamAuxParsingError)?,
699 ),
700 type_size,
701 )
702 }
703 b'i' => {
704 let type_size = size_of::<i32>();
705 (
706 Aux::I32(
707 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
708 .read_i32::<LittleEndian>()
709 .map_err(|_| Error::BamAuxParsingError)?,
710 ),
711 type_size,
712 )
713 }
714 b'I' => {
715 let type_size = size_of::<u32>();
716 (
717 Aux::U32(
718 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
719 .read_u32::<LittleEndian>()
720 .map_err(|_| Error::BamAuxParsingError)?,
721 ),
722 type_size,
723 )
724 }
725 b'f' => {
726 let type_size = size_of::<f32>();
727 (
728 Aux::Float(
729 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
730 .read_f32::<LittleEndian>()
731 .map_err(|_| Error::BamAuxParsingError)?,
732 ),
733 type_size,
734 )
735 }
736 b'd' => {
737 let type_size = size_of::<f64>();
738 (
739 Aux::Double(
740 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
741 .read_f64::<LittleEndian>()
742 .map_err(|_| Error::BamAuxParsingError)?,
743 ),
744 type_size,
745 )
746 }
747 b'Z' | b'H' => {
748 let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
749 let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
750 (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
751 }
752 b'B' => {
753 const ARRAY_INNER_TYPE_LEN: isize = 1;
754 const ARRAY_COUNT_LEN: isize = 4;
755
756 let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
758
759 let length =
760 slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
761 .read_u32::<LittleEndian>()
762 .map_err(|_| Error::BamAuxParsingError)? as usize;
763
764 let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
766 b'c' => (
767 Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
768 aux.offset(array_data_offset),
769 length,
770 ))),
771 length,
772 ),
773 b'C' => (
774 Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
775 aux.offset(array_data_offset),
776 length,
777 ))),
778 length,
779 ),
780 b's' => (
781 Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
782 aux.offset(array_data_offset),
783 length * size_of::<i16>(),
784 ))),
785 length * std::mem::size_of::<i16>(),
786 ),
787 b'S' => (
788 Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
789 aux.offset(array_data_offset),
790 length * size_of::<u16>(),
791 ))),
792 length * std::mem::size_of::<u16>(),
793 ),
794 b'i' => (
795 Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
796 aux.offset(array_data_offset),
797 length * size_of::<i32>(),
798 ))),
799 length * std::mem::size_of::<i32>(),
800 ),
801 b'I' => (
802 Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
803 aux.offset(array_data_offset),
804 length * size_of::<u32>(),
805 ))),
806 length * std::mem::size_of::<u32>(),
807 ),
808 b'f' => (
809 Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
810 aux.offset(array_data_offset),
811 length * size_of::<f32>(),
812 ))),
813 length * std::mem::size_of::<f32>(),
814 ),
815 _ => {
816 return Err(Error::BamAuxUnknownType);
817 }
818 };
819 (
820 array_data,
821 ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
823 )
824 }
825 _ => {
826 return Err(Error::BamAuxUnknownType);
827 }
828 };
829
830 Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
832 }
833
834 pub fn aux_iter(&'_ self) -> AuxIter<'_> {
839 AuxIter {
840 aux: &self.data()[
843 self.qname_capacity()
845 + self.cigar_len() * std::mem::size_of::<u32>()
847 + self.seq_len().div_ceil(2)
849 + self.seq_len()..],
851 }
852 }
853
854 pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
856 if self.aux(tag).is_ok() {
860 return Err(Error::BamAuxTagAlreadyPresent);
861 }
862 self.push_aux_unchecked(tag, value)
863 }
864
865 pub fn push_aux_unchecked(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
870 let ctag = tag.as_ptr() as *mut c_char;
871 let ret = unsafe {
872 match value {
873 Aux::Char(v) => htslib::bam_aux_append(
874 self.inner_ptr_mut(),
875 ctag,
876 b'A' as c_char,
877 size_of::<u8>() as i32,
878 [v].as_mut_ptr(),
879 ),
880 Aux::I8(v) => htslib::bam_aux_append(
881 self.inner_ptr_mut(),
882 ctag,
883 b'c' as c_char,
884 size_of::<i8>() as i32,
885 [v].as_mut_ptr() as *mut u8,
886 ),
887 Aux::U8(v) => htslib::bam_aux_append(
888 self.inner_ptr_mut(),
889 ctag,
890 b'C' as c_char,
891 size_of::<u8>() as i32,
892 [v].as_mut_ptr(),
893 ),
894 Aux::I16(v) => htslib::bam_aux_append(
895 self.inner_ptr_mut(),
896 ctag,
897 b's' as c_char,
898 size_of::<i16>() as i32,
899 [v].as_mut_ptr() as *mut u8,
900 ),
901 Aux::U16(v) => htslib::bam_aux_append(
902 self.inner_ptr_mut(),
903 ctag,
904 b'S' as c_char,
905 size_of::<u16>() as i32,
906 [v].as_mut_ptr() as *mut u8,
907 ),
908 Aux::I32(v) => htslib::bam_aux_append(
909 self.inner_ptr_mut(),
910 ctag,
911 b'i' as c_char,
912 size_of::<i32>() as i32,
913 [v].as_mut_ptr() as *mut u8,
914 ),
915 Aux::U32(v) => htslib::bam_aux_append(
916 self.inner_ptr_mut(),
917 ctag,
918 b'I' as c_char,
919 size_of::<u32>() as i32,
920 [v].as_mut_ptr() as *mut u8,
921 ),
922 Aux::Float(v) => htslib::bam_aux_append(
923 self.inner_ptr_mut(),
924 ctag,
925 b'f' as c_char,
926 size_of::<f32>() as i32,
927 [v].as_mut_ptr() as *mut u8,
928 ),
929 Aux::Double(v) => htslib::bam_aux_append(
931 self.inner_ptr_mut(),
932 ctag,
933 b'd' as c_char,
934 size_of::<f64>() as i32,
935 [v].as_mut_ptr() as *mut u8,
936 ),
937 Aux::String(v) => {
938 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
939 htslib::bam_aux_append(
940 self.inner_ptr_mut(),
941 ctag,
942 b'Z' as c_char,
943 (v.len() + 1) as i32,
944 c_str.as_ptr() as *mut u8,
945 )
946 }
947 Aux::HexByteArray(v) => {
948 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
949 htslib::bam_aux_append(
950 self.inner_ptr_mut(),
951 ctag,
952 b'H' as c_char,
953 (v.len() + 1) as i32,
954 c_str.as_ptr() as *mut u8,
955 )
956 }
957 Aux::ArrayI8(aux_array) => match aux_array {
959 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
960 self.inner_ptr_mut(),
961 ctag,
962 b'c',
963 inner.len() as u32,
964 inner.slice.as_ptr() as *mut ::libc::c_void,
965 ),
966 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
967 self.inner_ptr_mut(),
968 ctag,
969 b'c',
970 inner.len() as u32,
971 inner.slice.as_ptr() as *mut ::libc::c_void,
972 ),
973 },
974 Aux::ArrayU8(aux_array) => match aux_array {
975 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
976 self.inner_ptr_mut(),
977 ctag,
978 b'C',
979 inner.len() as u32,
980 inner.slice.as_ptr() as *mut ::libc::c_void,
981 ),
982 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
983 self.inner_ptr_mut(),
984 ctag,
985 b'C',
986 inner.len() as u32,
987 inner.slice.as_ptr() as *mut ::libc::c_void,
988 ),
989 },
990 Aux::ArrayI16(aux_array) => match aux_array {
991 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
992 self.inner_ptr_mut(),
993 ctag,
994 b's',
995 inner.len() as u32,
996 inner.slice.as_ptr() as *mut ::libc::c_void,
997 ),
998 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
999 self.inner_ptr_mut(),
1000 ctag,
1001 b's',
1002 inner.len() as u32,
1003 inner.slice.as_ptr() as *mut ::libc::c_void,
1004 ),
1005 },
1006 Aux::ArrayU16(aux_array) => match aux_array {
1007 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1008 self.inner_ptr_mut(),
1009 ctag,
1010 b'S',
1011 inner.len() as u32,
1012 inner.slice.as_ptr() as *mut ::libc::c_void,
1013 ),
1014 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1015 self.inner_ptr_mut(),
1016 ctag,
1017 b'S',
1018 inner.len() as u32,
1019 inner.slice.as_ptr() as *mut ::libc::c_void,
1020 ),
1021 },
1022 Aux::ArrayI32(aux_array) => match aux_array {
1023 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1024 self.inner_ptr_mut(),
1025 ctag,
1026 b'i',
1027 inner.len() as u32,
1028 inner.slice.as_ptr() as *mut ::libc::c_void,
1029 ),
1030 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1031 self.inner_ptr_mut(),
1032 ctag,
1033 b'i',
1034 inner.len() as u32,
1035 inner.slice.as_ptr() as *mut ::libc::c_void,
1036 ),
1037 },
1038 Aux::ArrayU32(aux_array) => match aux_array {
1039 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1040 self.inner_ptr_mut(),
1041 ctag,
1042 b'I',
1043 inner.len() as u32,
1044 inner.slice.as_ptr() as *mut ::libc::c_void,
1045 ),
1046 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1047 self.inner_ptr_mut(),
1048 ctag,
1049 b'I',
1050 inner.len() as u32,
1051 inner.slice.as_ptr() as *mut ::libc::c_void,
1052 ),
1053 },
1054 Aux::ArrayFloat(aux_array) => match aux_array {
1055 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1056 self.inner_ptr_mut(),
1057 ctag,
1058 b'f',
1059 inner.len() as u32,
1060 inner.slice.as_ptr() as *mut ::libc::c_void,
1061 ),
1062 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1063 self.inner_ptr_mut(),
1064 ctag,
1065 b'f',
1066 inner.len() as u32,
1067 inner.slice.as_ptr() as *mut ::libc::c_void,
1068 ),
1069 },
1070 }
1071 };
1072
1073 if ret < 0 {
1074 Err(Error::BamAux)
1075 } else {
1076 Ok(())
1077 }
1078 }
1079
1080 pub fn update_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
1082 let ctag = tag.as_ptr() as *mut c_char;
1087 let ret = unsafe {
1088 match value {
1089 Aux::Char(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1090 Aux::I8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1091 Aux::U8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1092 Aux::I16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1093 Aux::U16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1094 Aux::I32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1095 Aux::U32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1096 Aux::Float(v) => htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v),
1097 Aux::Double(v) => {
1099 htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v as f32)
1100 }
1101 Aux::String(v) => {
1102 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
1103 htslib::bam_aux_update_str(
1104 self.inner_ptr_mut(),
1105 ctag,
1106 (v.len() + 1) as i32,
1107 c_str.as_ptr() as *const c_char,
1108 )
1109 }
1110 Aux::HexByteArray(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1111 Aux::ArrayI8(aux_array) => match aux_array {
1113 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1114 self.inner_ptr_mut(),
1115 ctag,
1116 b'c',
1117 inner.len() as u32,
1118 inner.slice.as_ptr() as *mut ::libc::c_void,
1119 ),
1120 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1121 self.inner_ptr_mut(),
1122 ctag,
1123 b'c',
1124 inner.len() as u32,
1125 inner.slice.as_ptr() as *mut ::libc::c_void,
1126 ),
1127 },
1128 Aux::ArrayU8(aux_array) => match aux_array {
1129 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1130 self.inner_ptr_mut(),
1131 ctag,
1132 b'C',
1133 inner.len() as u32,
1134 inner.slice.as_ptr() as *mut ::libc::c_void,
1135 ),
1136 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1137 self.inner_ptr_mut(),
1138 ctag,
1139 b'C',
1140 inner.len() as u32,
1141 inner.slice.as_ptr() as *mut ::libc::c_void,
1142 ),
1143 },
1144 Aux::ArrayI16(aux_array) => match aux_array {
1145 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1146 self.inner_ptr_mut(),
1147 ctag,
1148 b's',
1149 inner.len() as u32,
1150 inner.slice.as_ptr() as *mut ::libc::c_void,
1151 ),
1152 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1153 self.inner_ptr_mut(),
1154 ctag,
1155 b's',
1156 inner.len() as u32,
1157 inner.slice.as_ptr() as *mut ::libc::c_void,
1158 ),
1159 },
1160 Aux::ArrayU16(aux_array) => match aux_array {
1161 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1162 self.inner_ptr_mut(),
1163 ctag,
1164 b'S',
1165 inner.len() as u32,
1166 inner.slice.as_ptr() as *mut ::libc::c_void,
1167 ),
1168 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1169 self.inner_ptr_mut(),
1170 ctag,
1171 b'S',
1172 inner.len() as u32,
1173 inner.slice.as_ptr() as *mut ::libc::c_void,
1174 ),
1175 },
1176 Aux::ArrayI32(aux_array) => match aux_array {
1177 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1178 self.inner_ptr_mut(),
1179 ctag,
1180 b'i',
1181 inner.len() as u32,
1182 inner.slice.as_ptr() as *mut ::libc::c_void,
1183 ),
1184 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1185 self.inner_ptr_mut(),
1186 ctag,
1187 b'i',
1188 inner.len() as u32,
1189 inner.slice.as_ptr() as *mut ::libc::c_void,
1190 ),
1191 },
1192 Aux::ArrayU32(aux_array) => match aux_array {
1193 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1194 self.inner_ptr_mut(),
1195 ctag,
1196 b'I',
1197 inner.len() as u32,
1198 inner.slice.as_ptr() as *mut ::libc::c_void,
1199 ),
1200 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1201 self.inner_ptr_mut(),
1202 ctag,
1203 b'I',
1204 inner.len() as u32,
1205 inner.slice.as_ptr() as *mut ::libc::c_void,
1206 ),
1207 },
1208 Aux::ArrayFloat(aux_array) => match aux_array {
1209 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1210 self.inner_ptr_mut(),
1211 ctag,
1212 b'f',
1213 inner.len() as u32,
1214 inner.slice.as_ptr() as *mut ::libc::c_void,
1215 ),
1216 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1217 self.inner_ptr_mut(),
1218 ctag,
1219 b'f',
1220 inner.len() as u32,
1221 inner.slice.as_ptr() as *mut ::libc::c_void,
1222 ),
1223 },
1224 }
1225 };
1226
1227 if ret < 0 {
1228 Err(Error::BamAux)
1229 } else {
1230 Ok(())
1231 }
1232 }
1233
1234 pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1236 if tag.len() < 2 {
1237 return Err(Error::BamAuxStringError);
1238 }
1239 let aux = unsafe {
1240 htslib::bam_aux_get(
1241 &self.inner as *const htslib::bam1_t,
1242 tag.as_ptr() as *const c_char,
1243 )
1244 };
1245 unsafe {
1246 if aux.is_null() {
1247 Err(Error::BamAuxTagNotFound)
1248 } else {
1249 htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1250 Ok(())
1251 }
1252 }
1253 }
1254
1255 pub fn basemods_iter(&'_ self) -> Result<BaseModificationsIter<'_>> {
1287 BaseModificationsIter::new(self)
1288 }
1289
1290 pub fn basemods_position_iter(&'_ self) -> Result<BaseModificationsPositionIter<'_>> {
1294 BaseModificationsPositionIter::new(self)
1295 }
1296
1297 pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1301 if self.is_paired()
1302 && !self.is_unmapped()
1303 && !self.is_mate_unmapped()
1304 && self.tid() == self.mtid()
1305 {
1306 if self.pos() == self.mpos() {
1307 return SequenceReadPairOrientation::None;
1309 }
1310
1311 let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1312 (
1313 self.pos(),
1314 self.mpos(),
1315 !self.is_reverse(),
1316 !self.is_mate_reverse(),
1317 )
1318 } else {
1319 (
1320 self.mpos(),
1321 self.pos(),
1322 !self.is_mate_reverse(),
1323 !self.is_reverse(),
1324 )
1325 };
1326
1327 if pos_1 < pos_2 {
1328 match (fwd_1, fwd_2) {
1329 (true, true) => SequenceReadPairOrientation::F1F2,
1330 (true, false) => SequenceReadPairOrientation::F1R2,
1331 (false, true) => SequenceReadPairOrientation::R1F2,
1332 (false, false) => SequenceReadPairOrientation::R1R2,
1333 }
1334 } else {
1335 match (fwd_2, fwd_1) {
1336 (true, true) => SequenceReadPairOrientation::F2F1,
1337 (true, false) => SequenceReadPairOrientation::F2R1,
1338 (false, true) => SequenceReadPairOrientation::R2F1,
1339 (false, false) => SequenceReadPairOrientation::R2R1,
1340 }
1341 }
1342 } else {
1343 SequenceReadPairOrientation::None
1344 }
1345 }
1346
1347 flag!(is_paired, set_paired, unset_paired, 1u16);
1348 flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1349 flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1350 flag!(
1351 is_mate_unmapped,
1352 set_mate_unmapped,
1353 unset_mate_unmapped,
1354 8u16
1355 );
1356 flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1357 flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1358 flag!(
1359 is_first_in_template,
1360 set_first_in_template,
1361 unset_first_in_template,
1362 64u16
1363 );
1364 flag!(
1365 is_last_in_template,
1366 set_last_in_template,
1367 unset_last_in_template,
1368 128u16
1369 );
1370 flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1371 flag!(
1372 is_quality_check_failed,
1373 set_quality_check_failed,
1374 unset_quality_check_failed,
1375 512u16
1376 );
1377 flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1378 flag!(
1379 is_supplementary,
1380 set_supplementary,
1381 unset_supplementary,
1382 2048u16
1383 );
1384}
1385
1386impl Drop for Record {
1387 fn drop(&mut self) {
1388 if self.own {
1389 unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1390 }
1391 }
1392}
1393
1394impl SequenceRead for Record {
1395 fn name(&self) -> &[u8] {
1396 self.qname()
1397 }
1398
1399 fn base(&self, i: usize) -> u8 {
1400 *decode_base_unchecked(encoded_base(self.seq_data(), i))
1401 }
1402
1403 fn base_qual(&self, i: usize) -> u8 {
1404 self.qual()[i]
1405 }
1406
1407 fn len(&self) -> usize {
1408 self.seq_len()
1409 }
1410
1411 fn is_empty(&self) -> bool {
1412 self.len() == 0
1413 }
1414}
1415
1416impl genome::AbstractInterval for Record {
1417 fn contig(&self) -> &str {
1419 let tid = self.tid();
1420 if tid < 0 {
1421 panic!("invalid tid, must be at least zero");
1422 }
1423 str::from_utf8(
1424 self.header
1425 .as_ref()
1426 .expect(
1427 "header must be set (this is the case if the record has been read from a file)",
1428 )
1429 .tid2name(tid as u32),
1430 )
1431 .expect("unable to interpret contig name as UTF-8")
1432 }
1433
1434 fn range(&self) -> ops::Range<genome::Position> {
1436 let end_pos = self
1437 .cigar_cached()
1438 .expect("cigar has not been cached yet, call cache_cigar() first")
1439 .end_pos() as u64;
1440
1441 if self.pos() < 0 {
1442 panic!("invalid position, must be positive")
1443 }
1444
1445 self.pos() as u64..end_pos
1446 }
1447}
1448
1449#[derive(Debug, PartialEq)]
1504pub enum Aux<'a> {
1505 Char(u8),
1506 I8(i8),
1507 U8(u8),
1508 I16(i16),
1509 U16(u16),
1510 I32(i32),
1511 U32(u32),
1512 Float(f32),
1513 Double(f64), String(&'a str),
1515 HexByteArray(&'a str),
1516 ArrayI8(AuxArray<'a, i8>),
1517 ArrayU8(AuxArray<'a, u8>),
1518 ArrayI16(AuxArray<'a, i16>),
1519 ArrayU16(AuxArray<'a, u16>),
1520 ArrayI32(AuxArray<'a, i32>),
1521 ArrayU32(AuxArray<'a, u32>),
1522 ArrayFloat(AuxArray<'a, f32>),
1523}
1524
1525unsafe impl Send for Aux<'_> {}
1526unsafe impl Sync for Aux<'_> {}
1527
1528pub trait AuxArrayElement: Copy {
1530 fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1531}
1532
1533impl AuxArrayElement for i8 {
1534 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1535 std::io::Cursor::new(bytes).read_i8().ok()
1536 }
1537}
1538impl AuxArrayElement for u8 {
1539 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1540 std::io::Cursor::new(bytes).read_u8().ok()
1541 }
1542}
1543impl AuxArrayElement for i16 {
1544 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1545 std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1546 }
1547}
1548impl AuxArrayElement for u16 {
1549 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1550 std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1551 }
1552}
1553impl AuxArrayElement for i32 {
1554 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1555 std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1556 }
1557}
1558impl AuxArrayElement for u32 {
1559 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1560 std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1561 }
1562}
1563impl AuxArrayElement for f32 {
1564 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1565 std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1566 }
1567}
1568
1569#[derive(Debug)]
1613pub enum AuxArray<'a, T> {
1614 TargetType(AuxArrayTargetType<'a, T>),
1615 RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1616}
1617
1618impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1619where
1620 T: AuxArrayElement + PartialEq,
1621{
1622 fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1623 use AuxArray::*;
1624 match (self, other) {
1625 (TargetType(v), TargetType(v_other)) => v == v_other,
1626 (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1627 (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1628 (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1629 }
1630 }
1631}
1632
1633impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1635where
1636 I: AuxArrayElement,
1637 T: AsRef<[I]> + ?Sized,
1638{
1639 fn from(src: &'a T) -> Self {
1640 AuxArray::TargetType(AuxArrayTargetType {
1641 slice: src.as_ref(),
1642 })
1643 }
1644}
1645
1646impl<'a, T> AuxArray<'a, T>
1647where
1648 T: AuxArrayElement,
1649{
1650 pub fn get(&self, index: usize) -> Option<T> {
1652 match self {
1653 AuxArray::TargetType(v) => v.get(index),
1654 AuxArray::RawLeBytes(v) => v.get(index),
1655 }
1656 }
1657
1658 pub fn len(&self) -> usize {
1660 match self {
1661 AuxArray::TargetType(a) => a.len(),
1662 AuxArray::RawLeBytes(a) => a.len(),
1663 }
1664 }
1665
1666 pub fn is_empty(&self) -> bool {
1668 self.len() == 0
1669 }
1670
1671 pub fn iter(&'_ self) -> AuxArrayIter<'_, T> {
1673 AuxArrayIter {
1674 index: 0,
1675 array: self,
1676 }
1677 }
1678
1679 fn from_bytes(bytes: &'a [u8]) -> Self {
1681 Self::RawLeBytes(AuxArrayRawLeBytes {
1682 slice: bytes,
1683 phantom_data: PhantomData,
1684 })
1685 }
1686}
1687
1688#[doc(hidden)]
1690#[derive(Debug, PartialEq)]
1691pub struct AuxArrayTargetType<'a, T> {
1692 slice: &'a [T],
1693}
1694
1695impl<T> AuxArrayTargetType<'_, T>
1696where
1697 T: AuxArrayElement,
1698{
1699 fn get(&self, index: usize) -> Option<T> {
1700 self.slice.get(index).copied()
1701 }
1702
1703 fn len(&self) -> usize {
1704 self.slice.len()
1705 }
1706}
1707
1708#[doc(hidden)]
1710#[derive(Debug, PartialEq)]
1711pub struct AuxArrayRawLeBytes<'a, T> {
1712 slice: &'a [u8],
1713 phantom_data: PhantomData<T>,
1714}
1715
1716impl<T> AuxArrayRawLeBytes<'_, T>
1717where
1718 T: AuxArrayElement,
1719{
1720 fn get(&self, index: usize) -> Option<T> {
1721 let type_size = std::mem::size_of::<T>();
1722 if index * type_size + type_size > self.slice.len() {
1723 return None;
1724 }
1725 T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1726 }
1727
1728 fn len(&self) -> usize {
1729 self.slice.len() / std::mem::size_of::<T>()
1730 }
1731}
1732
1733pub struct AuxArrayIter<'a, T> {
1737 index: usize,
1738 array: &'a AuxArray<'a, T>,
1739}
1740
1741impl<T> Iterator for AuxArrayIter<'_, T>
1742where
1743 T: AuxArrayElement,
1744{
1745 type Item = T;
1746 fn next(&mut self) -> Option<Self::Item> {
1747 let value = self.array.get(self.index);
1748 self.index += 1;
1749 value
1750 }
1751
1752 fn size_hint(&self) -> (usize, Option<usize>) {
1753 let array_length = self.array.len() - self.index;
1754 (array_length, Some(array_length))
1755 }
1756}
1757
1758pub struct AuxIter<'a> {
1769 aux: &'a [u8],
1770}
1771
1772impl<'a> Iterator for AuxIter<'a> {
1773 type Item = Result<(&'a [u8], Aux<'a>)>;
1774
1775 fn next(&mut self) -> Option<Self::Item> {
1776 if self.aux.is_empty() {
1778 return None;
1779 }
1780 if (1..=3).contains(&self.aux.len()) {
1782 self.aux = &[];
1784 return Some(Err(Error::BamAuxParsingError));
1785 }
1786 let tag = &self.aux[..2];
1787 Some(unsafe {
1788 let data_ptr = self.aux[2..].as_ptr();
1789 Record::read_aux_field(data_ptr)
1790 .map(|(aux, offset)| {
1791 self.aux = &self.aux[offset..];
1792 (tag, aux)
1793 })
1794 .inspect_err(|_e| {
1795 self.aux = &[];
1797 })
1798 })
1799 }
1800}
1801
1802static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1803static ENCODE_BASE: [u8; 256] = [
1804 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1805 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1806 1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1807 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1808 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1809 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1810 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1811 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1812 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1813 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1814 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1815];
1816
1817#[inline]
1818fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1819 (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1820}
1821
1822#[inline]
1823unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1824 (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1825}
1826
1827#[inline]
1828fn decode_base_unchecked(base: u8) -> &'static u8 {
1829 unsafe { DECODE_BASE.get_unchecked(base as usize) }
1830}
1831
1832#[derive(Debug, Copy, Clone)]
1834pub struct Seq<'a> {
1835 pub encoded: &'a [u8],
1836 len: usize,
1837}
1838
1839impl Seq<'_> {
1840 #[inline]
1842 pub fn encoded_base(&self, i: usize) -> u8 {
1843 encoded_base(self.encoded, i)
1844 }
1845
1846 #[inline]
1852 pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1853 encoded_base_unchecked(self.encoded, i)
1854 }
1855
1856 #[inline]
1864 pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1865 *decode_base_unchecked(self.encoded_base_unchecked(i))
1866 }
1867
1868 pub fn as_bytes(&self) -> Vec<u8> {
1870 (0..self.len()).map(|i| self[i]).collect()
1871 }
1872
1873 pub fn len(&self) -> usize {
1875 self.len
1876 }
1877
1878 pub fn is_empty(&self) -> bool {
1879 self.len() == 0
1880 }
1881}
1882
1883impl ops::Index<usize> for Seq<'_> {
1884 type Output = u8;
1885
1886 fn index(&self, index: usize) -> &u8 {
1888 decode_base_unchecked(self.encoded_base(index))
1889 }
1890}
1891
1892unsafe impl Send for Seq<'_> {}
1893unsafe impl Sync for Seq<'_> {}
1894
1895#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1896#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1897pub enum Cigar {
1898 Match(u32), Ins(u32), Del(u32), RefSkip(u32), SoftClip(u32), HardClip(u32), Pad(u32), Equal(u32), Diff(u32), }
1908
1909impl Cigar {
1910 fn encode(self) -> u32 {
1911 match self {
1912 Cigar::Match(len) => len << 4, Cigar::Ins(len) => (len << 4) | 1,
1914 Cigar::Del(len) => (len << 4) | 2,
1915 Cigar::RefSkip(len) => (len << 4) | 3,
1916 Cigar::SoftClip(len) => (len << 4) | 4,
1917 Cigar::HardClip(len) => (len << 4) | 5,
1918 Cigar::Pad(len) => (len << 4) | 6,
1919 Cigar::Equal(len) => (len << 4) | 7,
1920 Cigar::Diff(len) => (len << 4) | 8,
1921 }
1922 }
1923
1924 pub fn len(self) -> u32 {
1926 match self {
1927 Cigar::Match(len) => len,
1928 Cigar::Ins(len) => len,
1929 Cigar::Del(len) => len,
1930 Cigar::RefSkip(len) => len,
1931 Cigar::SoftClip(len) => len,
1932 Cigar::HardClip(len) => len,
1933 Cigar::Pad(len) => len,
1934 Cigar::Equal(len) => len,
1935 Cigar::Diff(len) => len,
1936 }
1937 }
1938
1939 pub fn is_empty(self) -> bool {
1940 self.len() == 0
1941 }
1942
1943 pub fn char(self) -> char {
1945 match self {
1946 Cigar::Match(_) => 'M',
1947 Cigar::Ins(_) => 'I',
1948 Cigar::Del(_) => 'D',
1949 Cigar::RefSkip(_) => 'N',
1950 Cigar::SoftClip(_) => 'S',
1951 Cigar::HardClip(_) => 'H',
1952 Cigar::Pad(_) => 'P',
1953 Cigar::Equal(_) => '=',
1954 Cigar::Diff(_) => 'X',
1955 }
1956 }
1957}
1958
1959impl fmt::Display for Cigar {
1960 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1961 fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1962 }
1963}
1964
1965unsafe impl Send for Cigar {}
1966unsafe impl Sync for Cigar {}
1967
1968custom_derive! {
1969 #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1988 #[derive(NewtypeDeref,
1989 NewtypeDerefMut,
1990 NewtypeIndex(usize),
1991 NewtypeIndexMut(usize),
1992 NewtypeFrom,
1993 PartialEq,
1994 PartialOrd,
1995 Eq,
1996 NewtypeDebug,
1997 Clone,
1998 Hash
1999 )]
2000 pub struct CigarString(pub Vec<Cigar>);
2001}
2002
2003impl CigarString {
2004 pub fn into_view(self, pos: i64) -> CigarStringView {
2006 CigarStringView::new(self, pos)
2007 }
2008
2009 pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
2014 match alignment.mode {
2015 AlignmentMode::Global => {
2016 panic!(" Bam cigar fn not supported for Global Alignment mode")
2017 }
2018 AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
2019 _ => {}
2020 }
2021
2022 let mut cigar = Vec::new();
2023 if alignment.operations.is_empty() {
2024 return CigarString(cigar);
2025 }
2026
2027 let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
2028 AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
2029 AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
2030 AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
2031 AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
2032 _ => {}
2033 };
2034
2035 if alignment.xstart > 0 {
2036 cigar.push(if hard_clip {
2037 Cigar::HardClip(alignment.xstart as u32)
2038 } else {
2039 Cigar::SoftClip(alignment.xstart as u32)
2040 });
2041 }
2042
2043 let mut last = alignment.operations[0];
2044 let mut k = 1u32;
2045 for &op in alignment.operations[1..].iter() {
2046 if op == last {
2047 k += 1;
2048 } else {
2049 add_op(last, k, &mut cigar);
2050 k = 1;
2051 }
2052 last = op;
2053 }
2054 add_op(last, k, &mut cigar);
2055 if alignment.xlen > alignment.xend {
2056 cigar.push(if hard_clip {
2057 Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
2058 } else {
2059 Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
2060 });
2061 }
2062
2063 CigarString(cigar)
2064 }
2065}
2066
2067impl TryFrom<&[u8]> for CigarString {
2068 type Error = Error;
2069
2070 fn try_from(bytes: &[u8]) -> Result<Self> {
2091 let mut inner = Vec::new();
2092 let mut i = 0;
2093 let text_len = bytes.len();
2094 while i < text_len {
2095 let mut j = i;
2096 while j < text_len && bytes[j].is_ascii_digit() {
2097 j += 1;
2098 }
2099 if i == j {
2101 return Err(Error::BamParseCigar {
2102 msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
2103 });
2104 }
2105 let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
2107 msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
2108 })?;
2109 let n = s.parse().map_err(|_| Error::BamParseCigar {
2110 msg: format!("Unable to parse &str '{:?}' to u32.", s),
2111 })?;
2112 let op = &bytes[j];
2114 inner.push(match op {
2115 b'M' => Cigar::Match(n),
2116 b'I' => Cigar::Ins(n),
2117 b'D' => Cigar::Del(n),
2118 b'N' => Cigar::RefSkip(n),
2119 b'H' => {
2120 if i == 0 || j + 1 == text_len {
2121 Cigar::HardClip(n)
2122 } else {
2123 return Err(Error::BamParseCigar {
2124 msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
2125 .to_owned(),
2126 });
2127 }
2128 }
2129 b'S' => {
2130 if i == 0
2131 || j + 1 == text_len
2132 || bytes[i-1] == b'H'
2133 || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
2134 Cigar::SoftClip(n)
2135 } else {
2136 return Err(Error::BamParseCigar {
2137 msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
2138 .to_owned(),
2139 });
2140 }
2141 },
2142 b'P' => Cigar::Pad(n),
2143 b'=' => Cigar::Equal(n),
2144 b'X' => Cigar::Diff(n),
2145 op => {
2146 return Err(Error::BamParseCigar {
2147 msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
2148 })
2149 }
2150 });
2151 i = j + 1;
2152 }
2153 Ok(CigarString(inner))
2154 }
2155}
2156
2157impl TryFrom<&str> for CigarString {
2158 type Error = Error;
2159
2160 fn try_from(text: &str) -> Result<Self> {
2181 let bytes = text.as_bytes();
2182 if text.chars().count() != bytes.len() {
2183 return Err(Error::BamParseCigar {
2184 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2185 });
2186 }
2187 CigarString::try_from(bytes)
2188 }
2189}
2190
2191impl<'a> CigarString {
2192 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2193 self.into_iter()
2194 }
2195}
2196
2197impl<'a> IntoIterator for &'a CigarString {
2198 type Item = &'a Cigar;
2199 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2200
2201 fn into_iter(self) -> Self::IntoIter {
2202 self.0.iter()
2203 }
2204}
2205
2206impl fmt::Display for CigarString {
2207 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2208 for op in self {
2209 fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2210 }
2211 Ok(())
2212 }
2213}
2214
2215fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2217 match (cigar.next(), cigar.next()) {
2218 (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2219 *s as i64
2220 }
2221 _ => 0,
2222 }
2223}
2224
2225#[derive(Eq, PartialEq, Clone, Debug)]
2226pub struct CigarStringView {
2227 inner: CigarString,
2228 pos: i64,
2229}
2230
2231impl CigarStringView {
2232 pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2234 CigarStringView { inner: c, pos }
2235 }
2236
2237 pub fn end_pos(&self) -> i64 {
2239 let mut pos = self.pos;
2240 for c in self {
2241 match c {
2242 Cigar::Match(l)
2243 | Cigar::RefSkip(l)
2244 | Cigar::Del(l)
2245 | Cigar::Equal(l)
2246 | Cigar::Diff(l) => pos += *l as i64,
2247 Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2249 }
2250 }
2251 pos
2252 }
2253
2254 pub fn pos(&self) -> i64 {
2256 self.pos
2257 }
2258
2259 pub fn leading_softclips(&self) -> i64 {
2261 calc_softclips(self.iter())
2262 }
2263
2264 pub fn trailing_softclips(&self) -> i64 {
2266 calc_softclips(self.iter().rev())
2267 }
2268
2269 pub fn leading_hardclips(&self) -> i64 {
2271 self.first().map_or(0, |cigar| {
2272 if let Cigar::HardClip(s) = cigar {
2273 *s as i64
2274 } else {
2275 0
2276 }
2277 })
2278 }
2279
2280 pub fn trailing_hardclips(&self) -> i64 {
2282 self.last().map_or(0, |cigar| {
2283 if let Cigar::HardClip(s) = cigar {
2284 *s as i64
2285 } else {
2286 0
2287 }
2288 })
2289 }
2290
2291 pub fn read_pos(
2301 &self,
2302 ref_pos: u32,
2303 include_softclips: bool,
2304 include_dels: bool,
2305 ) -> Result<Option<u32>> {
2306 let mut rpos = self.pos as u32; let mut qpos = 0u32; let mut j = 0; for (i, c) in self.iter().enumerate() {
2313 match c {
2314 Cigar::Match(_) |
2315 Cigar::Diff(_) |
2316 Cigar::Equal(_) |
2317 Cigar::Ins(_) => {
2320 j = i;
2321 break;
2322 },
2323 Cigar::SoftClip(l) => {
2324 j = i;
2325 if include_softclips {
2326 rpos = rpos.saturating_sub(*l);
2330 }
2331 break;
2332 },
2333 Cigar::Del(l) => {
2334 rpos += l;
2342 },
2343 Cigar::RefSkip(_) => {
2344 return Err(Error::BamUnexpectedCigarOperation {
2345 msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2346 });
2347 },
2348 Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2349 return Err(Error::BamUnexpectedCigarOperation{
2350 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2351 });
2352 },
2353 Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2355 Cigar::Pad(_) | Cigar::HardClip(_) => ()
2357 }
2358 }
2359
2360 let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2361 cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2362 };
2363
2364 while rpos <= ref_pos && j < self.len() {
2365 match self[j] {
2366 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2368 qpos += ref_pos - rpos;
2371 return Ok(Some(qpos));
2372 }
2373 Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2374 qpos += ref_pos - rpos;
2375 return Ok(Some(qpos));
2376 }
2377 Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2378 return Ok(Some(qpos));
2380 }
2381 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2383 rpos += l;
2384 qpos += l;
2385 j += 1;
2386 }
2387 Cigar::SoftClip(l) => {
2388 qpos += l;
2389 j += 1;
2390 if include_softclips {
2391 rpos += l;
2392 }
2393 }
2394 Cigar::Ins(l) => {
2395 qpos += l;
2396 j += 1;
2397 }
2398 Cigar::RefSkip(l) | Cigar::Del(l) => {
2399 rpos += l;
2400 j += 1;
2401 }
2402 Cigar::Pad(_) => {
2403 j += 1;
2404 }
2405 Cigar::HardClip(_) if j < self.len() - 1 => {
2406 return Err(Error::BamUnexpectedCigarOperation{
2407 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2408 });
2409 }
2410 Cigar::HardClip(_) => return Ok(None),
2411 }
2412 }
2413
2414 Ok(None)
2415 }
2416
2417 pub fn take(self) -> CigarString {
2419 self.inner
2420 }
2421}
2422
2423impl ops::Deref for CigarStringView {
2424 type Target = CigarString;
2425
2426 fn deref(&self) -> &CigarString {
2427 &self.inner
2428 }
2429}
2430
2431impl ops::Index<usize> for CigarStringView {
2432 type Output = Cigar;
2433
2434 fn index(&self, index: usize) -> &Cigar {
2435 self.inner.index(index)
2436 }
2437}
2438
2439impl ops::IndexMut<usize> for CigarStringView {
2440 fn index_mut(&mut self, index: usize) -> &mut Cigar {
2441 self.inner.index_mut(index)
2442 }
2443}
2444
2445impl<'a> CigarStringView {
2446 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2447 self.inner.into_iter()
2448 }
2449}
2450
2451impl<'a> IntoIterator for &'a CigarStringView {
2452 type Item = &'a Cigar;
2453 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2454
2455 fn into_iter(self) -> Self::IntoIter {
2456 self.inner.into_iter()
2457 }
2458}
2459
2460impl fmt::Display for CigarStringView {
2461 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2462 self.inner.fmt(fmt)
2463 }
2464}
2465
2466pub struct BaseModificationMetadata {
2467 pub strand: i32,
2468 pub implicit: i32,
2469 pub canonical: u8,
2470}
2471
2472pub struct BaseModificationState<'a> {
2475 record: &'a Record,
2476 state: *mut htslib::hts_base_mod_state,
2477 buffer: Vec<htslib::hts_base_mod>,
2478 buffer_pos: i32,
2479}
2480
2481impl BaseModificationState<'_> {
2482 fn new(r: &Record) -> Result<BaseModificationState<'_>> {
2487 let mut bm = unsafe {
2488 BaseModificationState {
2489 record: r,
2490 state: hts_sys::hts_base_mod_state_alloc(),
2491 buffer: Vec::new(),
2492 buffer_pos: -1,
2493 }
2494 };
2495
2496 if bm.state.is_null() {
2497 panic!("Unable to allocate memory for hts_base_mod_state");
2498 }
2499
2500 unsafe {
2502 let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2503 if ret != 0 {
2504 return Err(Error::BamBaseModificationTagNotFound);
2505 }
2506 }
2507
2508 let types = bm.recorded();
2509 bm.buffer.reserve(types.len());
2510 Ok(bm)
2511 }
2512
2513 pub fn buffer_next_mods(&mut self) -> Result<usize> {
2514 unsafe {
2515 let ret = hts_sys::bam_next_basemod(
2516 self.record.inner_ptr(),
2517 self.state,
2518 self.buffer.as_mut_ptr(),
2519 self.buffer.capacity() as i32,
2520 &mut self.buffer_pos,
2521 );
2522
2523 if ret < 0 {
2524 return Err(Error::BamBaseModificationIterationFailed);
2525 }
2526
2527 if ret as usize > self.buffer.capacity() {
2531 return Err(Error::BamBaseModificationTooManyMods);
2532 }
2533
2534 self.buffer.set_len(ret as usize);
2537
2538 Ok(ret as usize)
2539 }
2540 }
2541
2542 pub fn recorded<'a>(&self) -> &'a [i32] {
2545 unsafe {
2546 let mut n: i32 = 0;
2547 let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2548
2549 if data_ptr.is_null() {
2551 panic!("Unable to obtain pointer to base modifications");
2552 }
2553 assert!(n >= 0);
2554 slice::from_raw_parts(data_ptr, n as usize)
2555 }
2556 }
2557
2558 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2564 unsafe {
2565 let mut strand: i32 = 0;
2566 let mut implicit: i32 = 0;
2567 let mut canonical: c_char = 0;
2569
2570 let ret = hts_sys::bam_mods_query_type(
2571 self.state,
2572 code,
2573 &mut strand,
2574 &mut implicit,
2575 &mut canonical,
2576 );
2577 if ret == -1 {
2578 Err(Error::BamBaseModificationTypeNotFound)
2579 } else {
2580 Ok(BaseModificationMetadata {
2581 strand,
2582 implicit,
2583 canonical: canonical.try_into().unwrap(),
2584 })
2585 }
2586 }
2587 }
2588}
2589
2590impl Drop for BaseModificationState<'_> {
2591 fn drop<'a>(&mut self) {
2592 unsafe {
2593 hts_sys::hts_base_mod_state_free(self.state);
2594 }
2595 }
2596}
2597
2598pub struct BaseModificationsPositionIter<'a> {
2601 mod_state: BaseModificationState<'a>,
2602}
2603
2604impl BaseModificationsPositionIter<'_> {
2605 fn new(r: &Record) -> Result<BaseModificationsPositionIter<'_>> {
2606 let state = BaseModificationState::new(r)?;
2607 Ok(BaseModificationsPositionIter { mod_state: state })
2608 }
2609
2610 pub fn recorded<'a>(&self) -> &'a [i32] {
2611 self.mod_state.recorded()
2612 }
2613
2614 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2615 self.mod_state.query_type(code)
2616 }
2617}
2618
2619impl Iterator for BaseModificationsPositionIter<'_> {
2620 type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2621
2622 fn next(&mut self) -> Option<Self::Item> {
2623 let ret = self.mod_state.buffer_next_mods();
2624
2625 match ret {
2630 Ok(num_mods) => {
2631 if num_mods == 0 {
2632 None
2633 } else {
2634 let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2635 Some(Ok(data))
2636 }
2637 }
2638 Err(e) => Some(Err(e)),
2639 }
2640 }
2641}
2642
2643pub struct BaseModificationsIter<'a> {
2646 mod_state: BaseModificationState<'a>,
2647 buffer_idx: usize,
2648}
2649
2650impl BaseModificationsIter<'_> {
2651 fn new(r: &Record) -> Result<BaseModificationsIter<'_>> {
2652 let state = BaseModificationState::new(r)?;
2653 Ok(BaseModificationsIter {
2654 mod_state: state,
2655 buffer_idx: 0,
2656 })
2657 }
2658
2659 pub fn recorded<'a>(&self) -> &'a [i32] {
2660 self.mod_state.recorded()
2661 }
2662
2663 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2664 self.mod_state.query_type(code)
2665 }
2666}
2667
2668impl Iterator for BaseModificationsIter<'_> {
2669 type Item = Result<(i32, hts_sys::hts_base_mod)>;
2670
2671 fn next(&mut self) -> Option<Self::Item> {
2672 if self.buffer_idx == self.mod_state.buffer.len() {
2673 let ret = self.mod_state.buffer_next_mods();
2676
2677 match ret {
2678 Ok(num_mods) => {
2679 if num_mods == 0 {
2680 return None;
2682 } else {
2683 self.buffer_idx = 0;
2685 }
2686 }
2687 Err(e) => return Some(Err(e)),
2688 }
2689 }
2690
2691 assert!(self.buffer_idx < self.mod_state.buffer.len());
2693 let data = (
2694 self.mod_state.buffer_pos,
2695 self.mod_state.buffer[self.buffer_idx],
2696 );
2697 self.buffer_idx += 1;
2698 Some(Ok(data))
2699 }
2700}
2701
2702#[cfg(test)]
2703mod tests {
2704 use super::*;
2705
2706 #[test]
2707 fn test_cigar_string() {
2708 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2709
2710 assert_eq!(cigar[0], Cigar::Match(100));
2711 assert_eq!(format!("{}", cigar), "100M10S");
2712 for op in &cigar {
2713 println!("{}", op);
2714 }
2715 }
2716
2717 #[test]
2718 fn test_cigar_string_view_pos() {
2719 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2720 assert_eq!(cigar.pos(), 5);
2721 }
2722
2723 #[test]
2724 fn test_cigar_string_leading_softclips() {
2725 let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2726 assert_eq!(cigar.leading_softclips(), 10);
2727 let cigar2 = CigarString(vec![
2728 Cigar::HardClip(5),
2729 Cigar::SoftClip(10),
2730 Cigar::Match(100),
2731 ])
2732 .into_view(0);
2733 assert_eq!(cigar2.leading_softclips(), 10);
2734 }
2735
2736 #[test]
2737 fn test_cigar_string_trailing_softclips() {
2738 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2739 assert_eq!(cigar.trailing_softclips(), 10);
2740 let cigar2 = CigarString(vec![
2741 Cigar::Match(100),
2742 Cigar::SoftClip(10),
2743 Cigar::HardClip(5),
2744 ])
2745 .into_view(0);
2746 assert_eq!(cigar2.trailing_softclips(), 10);
2747 }
2748
2749 #[test]
2750 fn test_cigar_read_pos() {
2751 let vpos = 5; let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2759 assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2760
2761 let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2769 assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2770 assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2771
2772 let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2781 assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2782 assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2783
2784 let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2790 assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2791
2792 let c05 = CigarString(vec![
2798 Cigar::Equal(2),
2799 Cigar::Del(2),
2800 Cigar::Diff(1),
2801 Cigar::Equal(2),
2802 ])
2803 .into_view(0);
2804 assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2805
2806 let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2812 assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2813 assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2814
2815 let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2821 assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2822 assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2823
2824 let c08 = CigarString(vec![
2830 Cigar::Equal(1),
2831 Cigar::Diff(1),
2832 Cigar::RefSkip(3),
2833 Cigar::Match(2),
2834 ])
2835 .into_view(2);
2836 assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2837 assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2838
2839 let c09 = CigarString(vec![
2845 Cigar::HardClip(3),
2846 Cigar::Equal(2),
2847 Cigar::HardClip(3),
2848 Cigar::Equal(2),
2849 ])
2850 .into_view(2);
2851 assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2852
2853 let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2859 assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2860
2861 let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2867 assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2868
2869 let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2875 assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2876
2877 let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2883 assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2884
2885 let vpos2 = 15;
2887 let c14 = CigarString(vec![
2892 Cigar::HardClip(5),
2893 Cigar::SoftClip(3),
2894 Cigar::Equal(1),
2895 Cigar::Pad(2),
2896 Cigar::Match(1),
2897 Cigar::Diff(1),
2898 Cigar::Ins(3),
2899 Cigar::Match(2),
2900 Cigar::Del(1),
2901 Cigar::Ins(2),
2902 Cigar::Equal(2),
2903 Cigar::RefSkip(3),
2904 Cigar::Match(3),
2905 Cigar::Equal(2),
2906 Cigar::SoftClip(5),
2907 Cigar::HardClip(2),
2908 ])
2909 .into_view(0);
2910 assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2911
2912 let c15 =
2918 CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2919 assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2920
2921 let c16 =
2924 CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2925 assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2926 }
2927
2928 #[test]
2929 fn test_clone() {
2930 let mut rec = Record::new();
2931 rec.set_pos(300);
2932 rec.set_qname(b"read1");
2933 let clone = rec.clone();
2934 assert_eq!(rec, clone);
2935 }
2936
2937 #[test]
2938 fn test_flags() {
2939 let mut rec = Record::new();
2940
2941 rec.set_paired();
2942 assert_eq!(rec.is_paired(), true);
2943
2944 rec.set_supplementary();
2945 assert_eq!(rec.is_supplementary(), true);
2946 assert_eq!(rec.is_supplementary(), true);
2947
2948 rec.unset_paired();
2949 assert_eq!(rec.is_paired(), false);
2950 assert_eq!(rec.is_supplementary(), true);
2951
2952 rec.unset_supplementary();
2953 assert_eq!(rec.is_paired(), false);
2954 assert_eq!(rec.is_supplementary(), false);
2955 }
2956
2957 #[test]
2958 fn test_cigar_parse() {
2959 let cigar = "1S20M1D2I3X1=2H";
2960 let parsed = CigarString::try_from(cigar).unwrap();
2961 assert_eq!(parsed.to_string(), cigar);
2962 }
2963}
2964
2965#[cfg(test)]
2966mod alignment_cigar_tests {
2967 use super::*;
2968 use crate::bam::{Read, Reader};
2969 use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2970 use bio_types::alignment::{Alignment, AlignmentMode};
2971
2972 #[test]
2973 fn test_cigar() {
2974 let alignment = Alignment {
2975 score: 5,
2976 xstart: 3,
2977 ystart: 0,
2978 xend: 9,
2979 yend: 10,
2980 ylen: 10,
2981 xlen: 10,
2982 operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2983 mode: AlignmentMode::Semiglobal,
2984 };
2985 assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2986 assert_eq!(
2987 CigarString::from_alignment(&alignment, false).0,
2988 vec![
2989 Cigar::SoftClip(3),
2990 Cigar::Equal(3),
2991 Cigar::Diff(1),
2992 Cigar::Ins(2),
2993 Cigar::Del(2),
2994 Cigar::SoftClip(1),
2995 ]
2996 );
2997
2998 let alignment = Alignment {
2999 score: 5,
3000 xstart: 0,
3001 ystart: 5,
3002 xend: 4,
3003 yend: 10,
3004 ylen: 10,
3005 xlen: 5,
3006 operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
3007 mode: AlignmentMode::Custom,
3008 };
3009 assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
3010 assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
3011 assert_eq!(
3012 CigarString::from_alignment(&alignment, false).0,
3013 vec![
3014 Cigar::Equal(1),
3015 Cigar::Diff(2),
3016 Cigar::Ins(1),
3017 Cigar::Del(2),
3018 Cigar::SoftClip(1),
3019 ]
3020 );
3021 assert_eq!(
3022 CigarString::from_alignment(&alignment, true).0,
3023 vec![
3024 Cigar::Equal(1),
3025 Cigar::Diff(2),
3026 Cigar::Ins(1),
3027 Cigar::Del(2),
3028 Cigar::HardClip(1),
3029 ]
3030 );
3031
3032 let alignment = Alignment {
3033 score: 5,
3034 xstart: 0,
3035 ystart: 5,
3036 xend: 3,
3037 yend: 8,
3038 ylen: 10,
3039 xlen: 3,
3040 operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
3041 mode: AlignmentMode::Custom,
3042 };
3043 assert_eq!(alignment.cigar(false), "1X1=1X");
3044 assert_eq!(
3045 CigarString::from_alignment(&alignment, false).0,
3046 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3047 );
3048
3049 let alignment = Alignment {
3050 score: 5,
3051 xstart: 0,
3052 ystart: 5,
3053 xend: 3,
3054 yend: 8,
3055 ylen: 10,
3056 xlen: 3,
3057 operations: vec![Subst, Match, Subst],
3058 mode: AlignmentMode::Semiglobal,
3059 };
3060 assert_eq!(alignment.cigar(false), "1X1=1X");
3061 assert_eq!(
3062 CigarString::from_alignment(&alignment, false).0,
3063 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3064 );
3065 }
3066
3067 #[test]
3068 fn test_read_orientation_f1r2() {
3069 let mut bam = Reader::from_path("test/test_paired.sam").unwrap();
3070
3071 for res in bam.records() {
3072 let record = res.unwrap();
3073 assert_eq!(
3074 record.read_pair_orientation(),
3075 SequenceReadPairOrientation::F1R2
3076 );
3077 }
3078 }
3079
3080 #[test]
3081 fn test_read_orientation_f2r1() {
3082 let mut bam = Reader::from_path("test/test_nonstandard_orientation.sam").unwrap();
3083
3084 for res in bam.records() {
3085 let record = res.unwrap();
3086 assert_eq!(
3087 record.read_pair_orientation(),
3088 SequenceReadPairOrientation::F2R1
3089 );
3090 }
3091 }
3092
3093 #[test]
3094 fn test_read_orientation_supplementary() {
3095 let mut bam = Reader::from_path("test/test_orientation_supplementary.sam").unwrap();
3096
3097 for res in bam.records() {
3098 let record = res.unwrap();
3099 assert_eq!(
3100 record.read_pair_orientation(),
3101 SequenceReadPairOrientation::F2R1
3102 );
3103 }
3104 }
3105
3106 #[test]
3107 pub fn test_cigar_parsing_non_ascii_error() {
3108 let cigar_str = "43ጷ";
3109 let expected_error = Err(Error::BamParseCigar {
3110 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
3111 });
3112
3113 let result = CigarString::try_from(cigar_str);
3114 assert_eq!(expected_error, result);
3115 }
3116
3117 #[test]
3118 pub fn test_cigar_parsing() {
3119 let cigar_strs = [
3121 "1H10M4D100I300N1102=10P25X11S", "100M", "", "1H1=1H", "1S1=1S", "11H11S11=11S11H", "10H",
3128 "10S",
3129 ];
3130 let cigars = [
3132 CigarString(vec![
3133 Cigar::HardClip(1),
3134 Cigar::Match(10),
3135 Cigar::Del(4),
3136 Cigar::Ins(100),
3137 Cigar::RefSkip(300),
3138 Cigar::Equal(1102),
3139 Cigar::Pad(10),
3140 Cigar::Diff(25),
3141 Cigar::SoftClip(11),
3142 ]),
3143 CigarString(vec![Cigar::Match(100)]),
3144 CigarString(vec![]),
3145 CigarString(vec![
3146 Cigar::HardClip(1),
3147 Cigar::Equal(1),
3148 Cigar::HardClip(1),
3149 ]),
3150 CigarString(vec![
3151 Cigar::SoftClip(1),
3152 Cigar::Equal(1),
3153 Cigar::SoftClip(1),
3154 ]),
3155 CigarString(vec![
3156 Cigar::HardClip(11),
3157 Cigar::SoftClip(11),
3158 Cigar::Equal(11),
3159 Cigar::SoftClip(11),
3160 Cigar::HardClip(11),
3161 ]),
3162 CigarString(vec![Cigar::HardClip(10)]),
3163 CigarString(vec![Cigar::SoftClip(10)]),
3164 ];
3165 for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
3167 let cigar_parse = CigarString::try_from(cigar_str)
3168 .unwrap_or_else(|_| panic!("Unable to parse cigar: {}", cigar_str));
3169 assert_eq!(&cigar_parse, truth);
3170 }
3171 }
3172}
3173
3174#[cfg(test)]
3175mod basemod_tests {
3176 use crate::bam::{Read, Reader};
3177
3178 #[test]
3179 pub fn test_count_recorded() {
3180 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3181
3182 for r in bam.records() {
3183 let record = r.unwrap();
3184 if let Ok(mods) = record.basemods_iter() {
3185 let n = mods.recorded().len();
3186 assert_eq!(n, 3);
3187 };
3188 }
3189 }
3190
3191 #[test]
3192 pub fn test_query_type() {
3193 let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
3194
3195 let mut n_fwd = 0;
3196 let mut n_rev = 0;
3197
3198 for r in bam.records() {
3199 let record = r.unwrap();
3200 if let Ok(mods) = record.basemods_iter() {
3201 for mod_code in mods.recorded() {
3202 if let Ok(mod_metadata) = mods.query_type(*mod_code) {
3203 if mod_metadata.strand == 0 {
3204 n_fwd += 1;
3205 }
3206 if mod_metadata.strand == 1 {
3207 n_rev += 1;
3208 }
3209 }
3210 }
3211 };
3212 }
3213 assert_eq!(n_fwd, 2);
3214 assert_eq!(n_rev, 2);
3215 }
3216
3217 #[test]
3218 pub fn test_mod_iter() {
3219 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3220 let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3221 let mut i = 0;
3222
3223 for r in bam.records() {
3224 let record = r.unwrap();
3225 for res in record.basemods_iter().unwrap().flatten() {
3226 let (position, _m) = res;
3227 assert_eq!(position, expected_positions[i]);
3228 i += 1;
3229 }
3230 }
3231 }
3232
3233 #[test]
3234 pub fn test_position_iter() {
3235 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3236 let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3237 let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3238 let mut i = 0;
3239
3240 for r in bam.records() {
3241 let record = r.unwrap();
3242 for res in record.basemods_position_iter().unwrap().flatten() {
3243 let (position, elements) = res;
3244 assert_eq!(position, expected_positions[i]);
3245 assert_eq!(elements.len(), expected_counts[i]);
3246 i += 1;
3247 }
3248 }
3249 }
3250}