1use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::rc::Rc;
15use std::slice;
16use std::str;
17
18use byteorder::{LittleEndian, ReadBytesExt};
19
20use crate::bam::Error;
21use crate::bam::HeaderView;
22use crate::errors::Result;
23use crate::htslib;
24use crate::utils;
25#[cfg(feature = "serde_feature")]
26use serde::{self, Deserialize, Serialize};
27
28use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
29use bio_types::genome;
30use bio_types::sequence::SequenceRead;
31use bio_types::sequence::SequenceReadPairOrientation;
32use bio_types::strand::ReqStrand;
33
34macro_rules! flag {
36 ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
37 pub fn $get(&self) -> bool {
38 self.inner().core.flag & $bit != 0
39 }
40
41 pub fn $set(&mut self) {
42 self.inner_mut().core.flag |= $bit;
43 }
44
45 pub fn $unset(&mut self) {
46 self.inner_mut().core.flag &= !$bit;
47 }
48 };
49}
50
51pub struct Record {
53 pub inner: htslib::bam1_t,
54 own: bool,
55 cigar: Option<CigarStringView>,
56 header: Option<Rc<HeaderView>>,
57}
58
59unsafe impl Send for Record {}
60unsafe impl Sync for Record {}
61
62impl Clone for Record {
63 fn clone(&self) -> Self {
64 let mut copy = Record::new();
65 unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
66 copy
67 }
68}
69
70impl PartialEq for Record {
71 fn eq(&self, other: &Record) -> bool {
72 self.tid() == other.tid()
73 && self.pos() == other.pos()
74 && self.bin() == other.bin()
75 && self.mapq() == other.mapq()
76 && self.flags() == other.flags()
77 && self.mtid() == other.mtid()
78 && self.mpos() == other.mpos()
79 && self.insert_size() == other.insert_size()
80 && self.data() == other.data()
81 && self.inner().core.l_extranul == other.inner().core.l_extranul
82 }
83}
84
85impl Eq for Record {}
86
87impl fmt::Debug for Record {
88 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
89 fmt.write_fmt(format_args!(
90 "Record(tid: {}, pos: {})",
91 self.tid(),
92 self.pos()
93 ))
94 }
95}
96
97impl Default for Record {
98 fn default() -> Self {
99 Self::new()
100 }
101}
102
103#[inline]
104fn extranul_from_qname(qname: &[u8]) -> usize {
105 let qlen = qname.len() + 1;
106 if qlen % 4 != 0 {
107 4 - qlen % 4
108 } else {
109 0
110 }
111}
112
113impl Record {
114 pub fn new() -> Self {
116 let mut record = Record {
117 inner: unsafe { MaybeUninit::zeroed().assume_init() },
118 own: true,
119 cigar: None,
120 header: None,
121 };
122 record.set_qname(b"");
125 record.set_unmapped();
128 record.set_tid(-1);
129 record.set_pos(-1);
130 record.set_mpos(-1);
131 record.set_mtid(-1);
132 record
133 }
134
135 pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
136 Record {
137 inner: {
138 #[allow(clippy::uninit_assumed_init, invalid_value)]
139 let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
140 unsafe {
141 ::libc::memcpy(
142 &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
143 from as *const ::libc::c_void,
144 size_of::<htslib::bam1_t>(),
145 );
146 }
147 inner
148 },
149 own: false,
150 cigar: None,
151 header: None,
152 }
153 }
154
155 pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
157 let mut record = Self::new();
158
159 let mut sam_copy = Vec::with_capacity(sam.len() + 1);
160 sam_copy.extend(sam);
161 sam_copy.push(0);
162
163 let mut sam_string = htslib::kstring_t {
164 s: sam_copy.as_ptr() as *mut c_char,
165 l: sam_copy.len(),
166 m: sam_copy.len(),
167 };
168
169 let succ = unsafe {
170 htslib::sam_parse1(
171 &mut sam_string,
172 header_view.inner_ptr() as *mut htslib::bam_hdr_t,
173 record.inner_ptr_mut(),
174 )
175 };
176
177 if succ == 0 {
178 Ok(record)
179 } else {
180 Err(Error::BamParseSAM {
181 rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
182 })
183 }
184 }
185
186 pub fn set_header(&mut self, header: Rc<HeaderView>) {
187 self.header = Some(header);
188 }
189
190 pub(super) fn data(&self) -> &[u8] {
191 unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
192 }
193
194 #[inline]
195 pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
196 &mut self.inner
197 }
198
199 #[inline]
200 pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
201 &mut self.inner as *mut htslib::bam1_t
202 }
203
204 #[inline]
205 pub fn inner(&self) -> &htslib::bam1_t {
206 &self.inner
207 }
208
209 #[inline]
210 pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
211 &self.inner as *const htslib::bam1_t
212 }
213
214 pub fn tid(&self) -> i32 {
216 self.inner().core.tid
217 }
218
219 pub fn set_tid(&mut self, tid: i32) {
221 self.inner_mut().core.tid = tid;
222 }
223
224 pub fn pos(&self) -> i64 {
226 self.inner().core.pos
227 }
228
229 pub fn set_pos(&mut self, pos: i64) {
231 self.inner_mut().core.pos = pos;
232 }
233
234 pub fn bin(&self) -> u16 {
235 self.inner().core.bin
236 }
237
238 pub fn set_bin(&mut self, bin: u16) {
239 self.inner_mut().core.bin = bin;
240 }
241
242 pub fn mapq(&self) -> u8 {
244 self.inner().core.qual
245 }
246
247 pub fn set_mapq(&mut self, mapq: u8) {
249 self.inner_mut().core.qual = mapq;
250 }
251
252 pub fn strand(&self) -> ReqStrand {
254 let reverse = self.flags() & 0x10 != 0;
255 if reverse {
256 ReqStrand::Reverse
257 } else {
258 ReqStrand::Forward
259 }
260 }
261
262 pub fn flags(&self) -> u16 {
264 self.inner().core.flag
265 }
266
267 pub fn set_flags(&mut self, flags: u16) {
269 self.inner_mut().core.flag = flags;
270 }
271
272 pub fn unset_flags(&mut self) {
274 self.inner_mut().core.flag = 0;
275 }
276
277 pub fn mtid(&self) -> i32 {
279 self.inner().core.mtid
280 }
281
282 pub fn set_mtid(&mut self, mtid: i32) {
284 self.inner_mut().core.mtid = mtid;
285 }
286
287 pub fn mpos(&self) -> i64 {
289 self.inner().core.mpos
290 }
291
292 pub fn set_mpos(&mut self, mpos: i64) {
294 self.inner_mut().core.mpos = mpos;
295 }
296
297 pub fn insert_size(&self) -> i64 {
299 self.inner().core.isize_
300 }
301
302 pub fn set_insert_size(&mut self, insert_size: i64) {
304 self.inner_mut().core.isize_ = insert_size;
305 }
306
307 fn qname_capacity(&self) -> usize {
308 self.inner().core.l_qname as usize
309 }
310
311 fn qname_len(&self) -> usize {
312 self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
314 }
315
316 pub fn qname(&self) -> &[u8] {
318 &self.data()[..self.qname_len()]
319 }
320
321 pub fn set_data(&mut self, new_data: &[u8]) {
323 self.cigar = None;
324
325 self.inner_mut().l_data = new_data.len() as i32;
326 if (self.inner().m_data as i32) < self.inner().l_data {
327 let l_data = self.inner().l_data;
329 self.realloc_var_data(l_data as usize);
330 }
331
332 let data =
334 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
335 utils::copy_memory(new_data, data);
336 }
337
338 pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
346 assert!(qname.len() < 255);
347 assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
348
349 self.cigar = None;
350
351 let cigar_width = if let Some(cigar_string) = cigar {
352 cigar_string.len()
353 } else {
354 0
355 } * 4;
356 let q_len = qname.len() + 1;
357 let extranul = extranul_from_qname(qname);
358
359 let orig_aux_offset = self.qname_capacity()
360 + 4 * self.cigar_len()
361 + self.seq_len().div_ceil(2)
362 + self.seq_len();
363 let new_aux_offset = q_len + extranul + cigar_width + seq.len().div_ceil(2) + qual.len();
364 assert!(orig_aux_offset <= self.inner.l_data as usize);
365 let aux_len = self.inner.l_data as usize - orig_aux_offset;
366 self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
367 if (self.inner().m_data as i32) < self.inner().l_data {
368 let l_data = self.inner().l_data;
370 self.realloc_var_data(l_data as usize);
371 }
372
373 if aux_len > 0 && orig_aux_offset != new_aux_offset {
375 let data =
376 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
377 data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
378 }
379
380 let data =
381 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
382
383 utils::copy_memory(qname, data);
385 for i in 0..=extranul {
386 data[qname.len() + i] = b'\0';
387 }
388 let mut i = q_len + extranul;
389 self.inner_mut().core.l_qname = i as u16;
390 self.inner_mut().core.l_extranul = extranul as u8;
391
392 if let Some(cigar_string) = cigar {
394 let cigar_data = unsafe {
395 #[allow(clippy::cast_ptr_alignment)]
397 slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
398 };
399 for (i, c) in cigar_string.iter().enumerate() {
400 cigar_data[i] = c.encode();
401 }
402 self.inner_mut().core.n_cigar = cigar_string.len() as u32;
403 i += cigar_string.len() * 4;
404 } else {
405 self.inner_mut().core.n_cigar = 0;
406 };
407
408 {
410 for j in (0..seq.len()).step_by(2) {
411 data[i + j / 2] = (ENCODE_BASE[seq[j] as usize] << 4)
412 | (if j + 1 < seq.len() {
413 ENCODE_BASE[seq[j + 1] as usize]
414 } else {
415 0
416 });
417 }
418 self.inner_mut().core.l_qseq = seq.len() as i32;
419 i += seq.len().div_ceil(2);
420 }
421
422 utils::copy_memory(qual, &mut data[i..]);
424 }
425
426 pub fn set_qname(&mut self, new_qname: &[u8]) {
428 assert!(new_qname.len() < 252);
430
431 let old_q_len = self.qname_capacity();
432 let extranul = extranul_from_qname(new_qname);
434 let new_q_len = new_qname.len() + 1 + extranul;
435
436 let other_len = self.inner_mut().l_data - old_q_len as i32;
438
439 if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
440 self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
441 } else if new_q_len > old_q_len {
442 self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
443
444 if (self.inner().m_data as i32) < self.inner().l_data {
446 let l_data = self.inner().l_data;
448 self.realloc_var_data(l_data as usize);
449 }
450 }
451
452 if new_q_len != old_q_len {
453 unsafe {
455 let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
456
457 ::libc::memmove(
458 data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
459 data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
460 other_len as usize,
461 );
462 }
463 }
464
465 let data =
467 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
468 utils::copy_memory(new_qname, data);
469 for i in 0..=extranul {
470 data[new_q_len - i - 1] = b'\0';
471 }
472 self.inner_mut().core.l_qname = new_q_len as u16;
473 self.inner_mut().core.l_extranul = extranul as u8;
474 }
475
476 pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
478 self.cigar = None;
479
480 let qname_data_len = self.qname_capacity();
481 let old_cigar_data_len = self.cigar_len() * 4;
482
483 let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
485
486 let new_cigar_len = match new_cigar {
487 Some(x) => x.len(),
488 None => 0,
489 };
490 let new_cigar_data_len = new_cigar_len * 4;
491
492 if new_cigar_data_len < old_cigar_data_len {
493 self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
494 } else if new_cigar_data_len > old_cigar_data_len {
495 self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
496
497 if (self.inner().m_data as i32) < self.inner().l_data {
499 let l_data = self.inner().l_data;
501 self.realloc_var_data(l_data as usize);
502 }
503 }
504
505 if new_cigar_data_len != old_cigar_data_len {
506 unsafe {
508 ::libc::memmove(
509 self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
510 self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
511 other_data_len as usize,
512 );
513 }
514 }
515
516 if let Some(cigar_string) = new_cigar {
518 let cigar_data = unsafe {
519 #[allow(clippy::cast_ptr_alignment)]
520 slice::from_raw_parts_mut(
521 self.inner.data.add(qname_data_len) as *mut u32,
522 cigar_string.len(),
523 )
524 };
525 for (i, c) in cigar_string.iter().enumerate() {
526 cigar_data[i] = c.encode();
527 }
528 }
529 self.inner_mut().core.n_cigar = new_cigar_len as u32;
530 }
531
532 fn realloc_var_data(&mut self, new_len: usize) {
533 let new_len = new_len as u32;
535 let new_request = new_len + 32 - (new_len % 32);
536
537 let ptr = unsafe {
538 ::libc::realloc(
539 self.inner().data as *mut ::libc::c_void,
540 new_request as usize,
541 ) as *mut u8
542 };
543
544 if ptr.is_null() {
545 panic!("ran out of memory in rust_htslib trying to realloc");
546 }
547
548 self.inner_mut().m_data = new_request;
551 self.inner_mut().data = ptr;
552
553 self.own = true;
555 }
556
557 pub fn cigar_len(&self) -> usize {
558 self.inner().core.n_cigar as usize
559 }
560
561 pub fn raw_cigar(&self) -> &[u32] {
564 #[allow(clippy::cast_ptr_alignment)]
566 unsafe {
567 slice::from_raw_parts(
568 self.data()[self.qname_capacity()..].as_ptr() as *const u32,
569 self.cigar_len(),
570 )
571 }
572 }
573
574 pub fn cigar(&self) -> CigarStringView {
576 match self.cigar {
577 Some(ref c) => c.clone(),
578 None => self.unpack_cigar(),
579 }
580 }
581
582 pub fn cigar_cached(&self) -> Option<&CigarStringView> {
584 self.cigar.as_ref()
585 }
586
587 pub fn cache_cigar(&mut self) {
589 self.cigar = Some(self.unpack_cigar())
590 }
591
592 fn unpack_cigar(&self) -> CigarStringView {
594 CigarString(
595 self.raw_cigar()
596 .iter()
597 .map(|&c| {
598 let len = c >> 4;
599 match c & 0b1111 {
600 0 => Cigar::Match(len),
601 1 => Cigar::Ins(len),
602 2 => Cigar::Del(len),
603 3 => Cigar::RefSkip(len),
604 4 => Cigar::SoftClip(len),
605 5 => Cigar::HardClip(len),
606 6 => Cigar::Pad(len),
607 7 => Cigar::Equal(len),
608 8 => Cigar::Diff(len),
609 _ => panic!("Unexpected cigar operation"),
610 }
611 })
612 .collect(),
613 )
614 .into_view(self.pos())
615 }
616
617 pub fn seq_len(&self) -> usize {
618 self.inner().core.l_qseq as usize
619 }
620
621 fn seq_data(&self) -> &[u8] {
622 let offset = self.qname_capacity() + self.cigar_len() * 4;
623 &self.data()[offset..][..self.seq_len().div_ceil(2)]
624 }
625
626 pub fn seq(&self) -> Seq<'_> {
628 Seq {
629 encoded: self.seq_data(),
630 len: self.seq_len(),
631 }
632 }
633
634 pub fn qual(&self) -> &[u8] {
638 &self.data()[self.qname_capacity() + self.cigar_len() * 4 + self.seq_len().div_ceil(2)..]
639 [..self.seq_len()]
640 }
641
642 pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
647 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
648 let aux = unsafe {
649 htslib::bam_aux_get(
650 &self.inner as *const htslib::bam1_t,
651 c_str.as_ptr() as *mut c_char,
652 )
653 };
654 unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
655 }
656
657 unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
658 const TAG_LEN: isize = 2;
659 const TYPE_ID_LEN: isize = 1;
661
662 if aux.is_null() {
663 return Err(Error::BamAuxTagNotFound);
664 }
665
666 let (data, type_size) = match *aux {
667 b'A' => {
668 let type_size = size_of::<u8>();
669 (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
670 }
671 b'c' => {
672 let type_size = size_of::<i8>();
673 (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
674 }
675 b'C' => {
676 let type_size = size_of::<u8>();
677 (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
678 }
679 b's' => {
680 let type_size = size_of::<i16>();
681 (
682 Aux::I16(
683 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
684 .read_i16::<LittleEndian>()
685 .map_err(|_| Error::BamAuxParsingError)?,
686 ),
687 type_size,
688 )
689 }
690 b'S' => {
691 let type_size = size_of::<u16>();
692 (
693 Aux::U16(
694 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
695 .read_u16::<LittleEndian>()
696 .map_err(|_| Error::BamAuxParsingError)?,
697 ),
698 type_size,
699 )
700 }
701 b'i' => {
702 let type_size = size_of::<i32>();
703 (
704 Aux::I32(
705 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
706 .read_i32::<LittleEndian>()
707 .map_err(|_| Error::BamAuxParsingError)?,
708 ),
709 type_size,
710 )
711 }
712 b'I' => {
713 let type_size = size_of::<u32>();
714 (
715 Aux::U32(
716 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
717 .read_u32::<LittleEndian>()
718 .map_err(|_| Error::BamAuxParsingError)?,
719 ),
720 type_size,
721 )
722 }
723 b'f' => {
724 let type_size = size_of::<f32>();
725 (
726 Aux::Float(
727 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
728 .read_f32::<LittleEndian>()
729 .map_err(|_| Error::BamAuxParsingError)?,
730 ),
731 type_size,
732 )
733 }
734 b'd' => {
735 let type_size = size_of::<f64>();
736 (
737 Aux::Double(
738 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
739 .read_f64::<LittleEndian>()
740 .map_err(|_| Error::BamAuxParsingError)?,
741 ),
742 type_size,
743 )
744 }
745 b'Z' | b'H' => {
746 let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
747 let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
748 (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
749 }
750 b'B' => {
751 const ARRAY_INNER_TYPE_LEN: isize = 1;
752 const ARRAY_COUNT_LEN: isize = 4;
753
754 let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
756
757 let length =
758 slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
759 .read_u32::<LittleEndian>()
760 .map_err(|_| Error::BamAuxParsingError)? as usize;
761
762 let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
764 b'c' => (
765 Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
766 aux.offset(array_data_offset),
767 length,
768 ))),
769 length,
770 ),
771 b'C' => (
772 Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
773 aux.offset(array_data_offset),
774 length,
775 ))),
776 length,
777 ),
778 b's' => (
779 Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
780 aux.offset(array_data_offset),
781 length * size_of::<i16>(),
782 ))),
783 length * std::mem::size_of::<i16>(),
784 ),
785 b'S' => (
786 Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
787 aux.offset(array_data_offset),
788 length * size_of::<u16>(),
789 ))),
790 length * std::mem::size_of::<u16>(),
791 ),
792 b'i' => (
793 Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
794 aux.offset(array_data_offset),
795 length * size_of::<i32>(),
796 ))),
797 length * std::mem::size_of::<i32>(),
798 ),
799 b'I' => (
800 Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
801 aux.offset(array_data_offset),
802 length * size_of::<u32>(),
803 ))),
804 length * std::mem::size_of::<u32>(),
805 ),
806 b'f' => (
807 Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
808 aux.offset(array_data_offset),
809 length * size_of::<f32>(),
810 ))),
811 length * std::mem::size_of::<f32>(),
812 ),
813 _ => {
814 return Err(Error::BamAuxUnknownType);
815 }
816 };
817 (
818 array_data,
819 ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
821 )
822 }
823 _ => {
824 return Err(Error::BamAuxUnknownType);
825 }
826 };
827
828 Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
830 }
831
832 pub fn aux_iter(&self) -> AuxIter {
837 AuxIter {
838 aux: &self.data()[
841 self.qname_capacity()
843 + self.cigar_len() * std::mem::size_of::<u32>()
845 + self.seq_len().div_ceil(2)
847 + self.seq_len()..],
849 }
850 }
851
852 pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
854 if self.aux(tag).is_ok() {
858 return Err(Error::BamAuxTagAlreadyPresent);
859 }
860
861 let ctag = tag.as_ptr() as *mut c_char;
862 let ret = unsafe {
863 match value {
864 Aux::Char(v) => htslib::bam_aux_append(
865 self.inner_ptr_mut(),
866 ctag,
867 b'A' as c_char,
868 size_of::<u8>() as i32,
869 [v].as_mut_ptr(),
870 ),
871 Aux::I8(v) => htslib::bam_aux_append(
872 self.inner_ptr_mut(),
873 ctag,
874 b'c' as c_char,
875 size_of::<i8>() as i32,
876 [v].as_mut_ptr() as *mut u8,
877 ),
878 Aux::U8(v) => htslib::bam_aux_append(
879 self.inner_ptr_mut(),
880 ctag,
881 b'C' as c_char,
882 size_of::<u8>() as i32,
883 [v].as_mut_ptr(),
884 ),
885 Aux::I16(v) => htslib::bam_aux_append(
886 self.inner_ptr_mut(),
887 ctag,
888 b's' as c_char,
889 size_of::<i16>() as i32,
890 [v].as_mut_ptr() as *mut u8,
891 ),
892 Aux::U16(v) => htslib::bam_aux_append(
893 self.inner_ptr_mut(),
894 ctag,
895 b'S' as c_char,
896 size_of::<u16>() as i32,
897 [v].as_mut_ptr() as *mut u8,
898 ),
899 Aux::I32(v) => htslib::bam_aux_append(
900 self.inner_ptr_mut(),
901 ctag,
902 b'i' as c_char,
903 size_of::<i32>() as i32,
904 [v].as_mut_ptr() as *mut u8,
905 ),
906 Aux::U32(v) => htslib::bam_aux_append(
907 self.inner_ptr_mut(),
908 ctag,
909 b'I' as c_char,
910 size_of::<u32>() as i32,
911 [v].as_mut_ptr() as *mut u8,
912 ),
913 Aux::Float(v) => htslib::bam_aux_append(
914 self.inner_ptr_mut(),
915 ctag,
916 b'f' as c_char,
917 size_of::<f32>() as i32,
918 [v].as_mut_ptr() as *mut u8,
919 ),
920 Aux::Double(v) => htslib::bam_aux_append(
922 self.inner_ptr_mut(),
923 ctag,
924 b'd' as c_char,
925 size_of::<f64>() as i32,
926 [v].as_mut_ptr() as *mut u8,
927 ),
928 Aux::String(v) => {
929 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
930 htslib::bam_aux_append(
931 self.inner_ptr_mut(),
932 ctag,
933 b'Z' as c_char,
934 (v.len() + 1) as i32,
935 c_str.as_ptr() as *mut u8,
936 )
937 }
938 Aux::HexByteArray(v) => {
939 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
940 htslib::bam_aux_append(
941 self.inner_ptr_mut(),
942 ctag,
943 b'H' as c_char,
944 (v.len() + 1) as i32,
945 c_str.as_ptr() as *mut u8,
946 )
947 }
948 Aux::ArrayI8(aux_array) => match aux_array {
950 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
951 self.inner_ptr_mut(),
952 ctag,
953 b'c',
954 inner.len() as u32,
955 inner.slice.as_ptr() as *mut ::libc::c_void,
956 ),
957 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
958 self.inner_ptr_mut(),
959 ctag,
960 b'c',
961 inner.len() as u32,
962 inner.slice.as_ptr() as *mut ::libc::c_void,
963 ),
964 },
965 Aux::ArrayU8(aux_array) => match aux_array {
966 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
967 self.inner_ptr_mut(),
968 ctag,
969 b'C',
970 inner.len() as u32,
971 inner.slice.as_ptr() as *mut ::libc::c_void,
972 ),
973 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
974 self.inner_ptr_mut(),
975 ctag,
976 b'C',
977 inner.len() as u32,
978 inner.slice.as_ptr() as *mut ::libc::c_void,
979 ),
980 },
981 Aux::ArrayI16(aux_array) => match aux_array {
982 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
983 self.inner_ptr_mut(),
984 ctag,
985 b's',
986 inner.len() as u32,
987 inner.slice.as_ptr() as *mut ::libc::c_void,
988 ),
989 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
990 self.inner_ptr_mut(),
991 ctag,
992 b's',
993 inner.len() as u32,
994 inner.slice.as_ptr() as *mut ::libc::c_void,
995 ),
996 },
997 Aux::ArrayU16(aux_array) => match aux_array {
998 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
999 self.inner_ptr_mut(),
1000 ctag,
1001 b'S',
1002 inner.len() as u32,
1003 inner.slice.as_ptr() as *mut ::libc::c_void,
1004 ),
1005 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1006 self.inner_ptr_mut(),
1007 ctag,
1008 b'S',
1009 inner.len() as u32,
1010 inner.slice.as_ptr() as *mut ::libc::c_void,
1011 ),
1012 },
1013 Aux::ArrayI32(aux_array) => match aux_array {
1014 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1015 self.inner_ptr_mut(),
1016 ctag,
1017 b'i',
1018 inner.len() as u32,
1019 inner.slice.as_ptr() as *mut ::libc::c_void,
1020 ),
1021 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1022 self.inner_ptr_mut(),
1023 ctag,
1024 b'i',
1025 inner.len() as u32,
1026 inner.slice.as_ptr() as *mut ::libc::c_void,
1027 ),
1028 },
1029 Aux::ArrayU32(aux_array) => match aux_array {
1030 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1031 self.inner_ptr_mut(),
1032 ctag,
1033 b'I',
1034 inner.len() as u32,
1035 inner.slice.as_ptr() as *mut ::libc::c_void,
1036 ),
1037 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1038 self.inner_ptr_mut(),
1039 ctag,
1040 b'I',
1041 inner.len() as u32,
1042 inner.slice.as_ptr() as *mut ::libc::c_void,
1043 ),
1044 },
1045 Aux::ArrayFloat(aux_array) => match aux_array {
1046 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1047 self.inner_ptr_mut(),
1048 ctag,
1049 b'f',
1050 inner.len() as u32,
1051 inner.slice.as_ptr() as *mut ::libc::c_void,
1052 ),
1053 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1054 self.inner_ptr_mut(),
1055 ctag,
1056 b'f',
1057 inner.len() as u32,
1058 inner.slice.as_ptr() as *mut ::libc::c_void,
1059 ),
1060 },
1061 }
1062 };
1063
1064 if ret < 0 {
1065 Err(Error::BamAux)
1066 } else {
1067 Ok(())
1068 }
1069 }
1070
1071 pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1073 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1074 let aux = unsafe {
1075 htslib::bam_aux_get(
1076 &self.inner as *const htslib::bam1_t,
1077 c_str.as_ptr() as *mut c_char,
1078 )
1079 };
1080 unsafe {
1081 if aux.is_null() {
1082 Err(Error::BamAuxTagNotFound)
1083 } else {
1084 htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1085 Ok(())
1086 }
1087 }
1088 }
1089
1090 pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
1122 BaseModificationsIter::new(self)
1123 }
1124
1125 pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1129 BaseModificationsPositionIter::new(self)
1130 }
1131
1132 pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1136 if self.is_paired()
1137 && !self.is_unmapped()
1138 && !self.is_mate_unmapped()
1139 && self.tid() == self.mtid()
1140 {
1141 if self.pos() == self.mpos() {
1142 return SequenceReadPairOrientation::None;
1144 }
1145
1146 let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1147 (
1148 self.pos(),
1149 self.mpos(),
1150 !self.is_reverse(),
1151 !self.is_mate_reverse(),
1152 )
1153 } else {
1154 (
1155 self.mpos(),
1156 self.pos(),
1157 !self.is_mate_reverse(),
1158 !self.is_reverse(),
1159 )
1160 };
1161
1162 if pos_1 < pos_2 {
1163 match (fwd_1, fwd_2) {
1164 (true, true) => SequenceReadPairOrientation::F1F2,
1165 (true, false) => SequenceReadPairOrientation::F1R2,
1166 (false, true) => SequenceReadPairOrientation::R1F2,
1167 (false, false) => SequenceReadPairOrientation::R1R2,
1168 }
1169 } else {
1170 match (fwd_2, fwd_1) {
1171 (true, true) => SequenceReadPairOrientation::F2F1,
1172 (true, false) => SequenceReadPairOrientation::F2R1,
1173 (false, true) => SequenceReadPairOrientation::R2F1,
1174 (false, false) => SequenceReadPairOrientation::R2R1,
1175 }
1176 }
1177 } else {
1178 SequenceReadPairOrientation::None
1179 }
1180 }
1181
1182 flag!(is_paired, set_paired, unset_paired, 1u16);
1183 flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1184 flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1185 flag!(
1186 is_mate_unmapped,
1187 set_mate_unmapped,
1188 unset_mate_unmapped,
1189 8u16
1190 );
1191 flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1192 flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1193 flag!(
1194 is_first_in_template,
1195 set_first_in_template,
1196 unset_first_in_template,
1197 64u16
1198 );
1199 flag!(
1200 is_last_in_template,
1201 set_last_in_template,
1202 unset_last_in_template,
1203 128u16
1204 );
1205 flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1206 flag!(
1207 is_quality_check_failed,
1208 set_quality_check_failed,
1209 unset_quality_check_failed,
1210 512u16
1211 );
1212 flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1213 flag!(
1214 is_supplementary,
1215 set_supplementary,
1216 unset_supplementary,
1217 2048u16
1218 );
1219}
1220
1221impl Drop for Record {
1222 fn drop(&mut self) {
1223 if self.own {
1224 unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1225 }
1226 }
1227}
1228
1229impl SequenceRead for Record {
1230 fn name(&self) -> &[u8] {
1231 self.qname()
1232 }
1233
1234 fn base(&self, i: usize) -> u8 {
1235 *decode_base_unchecked(encoded_base(self.seq_data(), i))
1236 }
1237
1238 fn base_qual(&self, i: usize) -> u8 {
1239 self.qual()[i]
1240 }
1241
1242 fn len(&self) -> usize {
1243 self.seq_len()
1244 }
1245
1246 fn is_empty(&self) -> bool {
1247 self.len() == 0
1248 }
1249}
1250
1251impl genome::AbstractInterval for Record {
1252 fn contig(&self) -> &str {
1254 let tid = self.tid();
1255 if tid < 0 {
1256 panic!("invalid tid, must be at least zero");
1257 }
1258 str::from_utf8(
1259 self.header
1260 .as_ref()
1261 .expect(
1262 "header must be set (this is the case if the record has been read from a file)",
1263 )
1264 .tid2name(tid as u32),
1265 )
1266 .expect("unable to interpret contig name as UTF-8")
1267 }
1268
1269 fn range(&self) -> ops::Range<genome::Position> {
1271 let end_pos = self
1272 .cigar_cached()
1273 .expect("cigar has not been cached yet, call cache_cigar() first")
1274 .end_pos() as u64;
1275
1276 if self.pos() < 0 {
1277 panic!("invalid position, must be positive")
1278 }
1279
1280 self.pos() as u64..end_pos
1281 }
1282}
1283
1284#[derive(Debug, PartialEq)]
1339pub enum Aux<'a> {
1340 Char(u8),
1341 I8(i8),
1342 U8(u8),
1343 I16(i16),
1344 U16(u16),
1345 I32(i32),
1346 U32(u32),
1347 Float(f32),
1348 Double(f64), String(&'a str),
1350 HexByteArray(&'a str),
1351 ArrayI8(AuxArray<'a, i8>),
1352 ArrayU8(AuxArray<'a, u8>),
1353 ArrayI16(AuxArray<'a, i16>),
1354 ArrayU16(AuxArray<'a, u16>),
1355 ArrayI32(AuxArray<'a, i32>),
1356 ArrayU32(AuxArray<'a, u32>),
1357 ArrayFloat(AuxArray<'a, f32>),
1358}
1359
1360unsafe impl Send for Aux<'_> {}
1361unsafe impl Sync for Aux<'_> {}
1362
1363pub trait AuxArrayElement: Copy {
1365 fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1366}
1367
1368impl AuxArrayElement for i8 {
1369 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1370 std::io::Cursor::new(bytes).read_i8().ok()
1371 }
1372}
1373impl AuxArrayElement for u8 {
1374 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1375 std::io::Cursor::new(bytes).read_u8().ok()
1376 }
1377}
1378impl AuxArrayElement for i16 {
1379 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1380 std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1381 }
1382}
1383impl AuxArrayElement for u16 {
1384 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1385 std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1386 }
1387}
1388impl AuxArrayElement for i32 {
1389 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1390 std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1391 }
1392}
1393impl AuxArrayElement for u32 {
1394 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1395 std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1396 }
1397}
1398impl AuxArrayElement for f32 {
1399 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1400 std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1401 }
1402}
1403
1404#[derive(Debug)]
1448pub enum AuxArray<'a, T> {
1449 TargetType(AuxArrayTargetType<'a, T>),
1450 RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1451}
1452
1453impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1454where
1455 T: AuxArrayElement + PartialEq,
1456{
1457 fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1458 use AuxArray::*;
1459 match (self, other) {
1460 (TargetType(v), TargetType(v_other)) => v == v_other,
1461 (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1462 (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1463 (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1464 }
1465 }
1466}
1467
1468impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1470where
1471 I: AuxArrayElement,
1472 T: AsRef<[I]> + ?Sized,
1473{
1474 fn from(src: &'a T) -> Self {
1475 AuxArray::TargetType(AuxArrayTargetType {
1476 slice: src.as_ref(),
1477 })
1478 }
1479}
1480
1481impl<'a, T> AuxArray<'a, T>
1482where
1483 T: AuxArrayElement,
1484{
1485 pub fn get(&self, index: usize) -> Option<T> {
1487 match self {
1488 AuxArray::TargetType(v) => v.get(index),
1489 AuxArray::RawLeBytes(v) => v.get(index),
1490 }
1491 }
1492
1493 pub fn len(&self) -> usize {
1495 match self {
1496 AuxArray::TargetType(a) => a.len(),
1497 AuxArray::RawLeBytes(a) => a.len(),
1498 }
1499 }
1500
1501 pub fn is_empty(&self) -> bool {
1503 self.len() == 0
1504 }
1505
1506 pub fn iter(&self) -> AuxArrayIter<T> {
1508 AuxArrayIter {
1509 index: 0,
1510 array: self,
1511 }
1512 }
1513
1514 fn from_bytes(bytes: &'a [u8]) -> Self {
1516 Self::RawLeBytes(AuxArrayRawLeBytes {
1517 slice: bytes,
1518 phantom_data: PhantomData,
1519 })
1520 }
1521}
1522
1523#[doc(hidden)]
1525#[derive(Debug, PartialEq)]
1526pub struct AuxArrayTargetType<'a, T> {
1527 slice: &'a [T],
1528}
1529
1530impl<T> AuxArrayTargetType<'_, T>
1531where
1532 T: AuxArrayElement,
1533{
1534 fn get(&self, index: usize) -> Option<T> {
1535 self.slice.get(index).copied()
1536 }
1537
1538 fn len(&self) -> usize {
1539 self.slice.len()
1540 }
1541}
1542
1543#[doc(hidden)]
1545#[derive(Debug, PartialEq)]
1546pub struct AuxArrayRawLeBytes<'a, T> {
1547 slice: &'a [u8],
1548 phantom_data: PhantomData<T>,
1549}
1550
1551impl<T> AuxArrayRawLeBytes<'_, T>
1552where
1553 T: AuxArrayElement,
1554{
1555 fn get(&self, index: usize) -> Option<T> {
1556 let type_size = std::mem::size_of::<T>();
1557 if index * type_size + type_size > self.slice.len() {
1558 return None;
1559 }
1560 T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1561 }
1562
1563 fn len(&self) -> usize {
1564 self.slice.len() / std::mem::size_of::<T>()
1565 }
1566}
1567
1568pub struct AuxArrayIter<'a, T> {
1572 index: usize,
1573 array: &'a AuxArray<'a, T>,
1574}
1575
1576impl<T> Iterator for AuxArrayIter<'_, T>
1577where
1578 T: AuxArrayElement,
1579{
1580 type Item = T;
1581 fn next(&mut self) -> Option<Self::Item> {
1582 let value = self.array.get(self.index);
1583 self.index += 1;
1584 value
1585 }
1586
1587 fn size_hint(&self) -> (usize, Option<usize>) {
1588 let array_length = self.array.len() - self.index;
1589 (array_length, Some(array_length))
1590 }
1591}
1592
1593pub struct AuxIter<'a> {
1604 aux: &'a [u8],
1605}
1606
1607impl<'a> Iterator for AuxIter<'a> {
1608 type Item = Result<(&'a [u8], Aux<'a>)>;
1609
1610 fn next(&mut self) -> Option<Self::Item> {
1611 if self.aux.is_empty() {
1613 return None;
1614 }
1615 if (1..=3).contains(&self.aux.len()) {
1617 self.aux = &[];
1619 return Some(Err(Error::BamAuxParsingError));
1620 }
1621 let tag = &self.aux[..2];
1622 Some(unsafe {
1623 let data_ptr = self.aux[2..].as_ptr();
1624 Record::read_aux_field(data_ptr)
1625 .map(|(aux, offset)| {
1626 self.aux = &self.aux[offset..];
1627 (tag, aux)
1628 })
1629 .inspect_err(|_e| {
1630 self.aux = &[];
1632 })
1633 })
1634 }
1635}
1636
1637static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1638static ENCODE_BASE: [u8; 256] = [
1639 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1640 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1641 1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1642 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1643 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1644 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1645 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1646 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1647 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1648 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1649 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1650];
1651
1652#[inline]
1653fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1654 (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1655}
1656
1657#[inline]
1658unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1659 (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1660}
1661
1662#[inline]
1663fn decode_base_unchecked(base: u8) -> &'static u8 {
1664 unsafe { DECODE_BASE.get_unchecked(base as usize) }
1665}
1666
1667#[derive(Debug, Copy, Clone)]
1669pub struct Seq<'a> {
1670 pub encoded: &'a [u8],
1671 len: usize,
1672}
1673
1674impl Seq<'_> {
1675 #[inline]
1677 pub fn encoded_base(&self, i: usize) -> u8 {
1678 encoded_base(self.encoded, i)
1679 }
1680
1681 #[inline]
1687 pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1688 encoded_base_unchecked(self.encoded, i)
1689 }
1690
1691 #[inline]
1699 pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1700 *decode_base_unchecked(self.encoded_base_unchecked(i))
1701 }
1702
1703 pub fn as_bytes(&self) -> Vec<u8> {
1705 (0..self.len()).map(|i| self[i]).collect()
1706 }
1707
1708 pub fn len(&self) -> usize {
1710 self.len
1711 }
1712
1713 pub fn is_empty(&self) -> bool {
1714 self.len() == 0
1715 }
1716}
1717
1718impl ops::Index<usize> for Seq<'_> {
1719 type Output = u8;
1720
1721 fn index(&self, index: usize) -> &u8 {
1723 decode_base_unchecked(self.encoded_base(index))
1724 }
1725}
1726
1727unsafe impl Send for Seq<'_> {}
1728unsafe impl Sync for Seq<'_> {}
1729
1730#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1731#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1732pub enum Cigar {
1733 Match(u32), Ins(u32), Del(u32), RefSkip(u32), SoftClip(u32), HardClip(u32), Pad(u32), Equal(u32), Diff(u32), }
1743
1744impl Cigar {
1745 fn encode(self) -> u32 {
1746 match self {
1747 Cigar::Match(len) => len << 4, Cigar::Ins(len) => (len << 4) | 1,
1749 Cigar::Del(len) => (len << 4) | 2,
1750 Cigar::RefSkip(len) => (len << 4) | 3,
1751 Cigar::SoftClip(len) => (len << 4) | 4,
1752 Cigar::HardClip(len) => (len << 4) | 5,
1753 Cigar::Pad(len) => (len << 4) | 6,
1754 Cigar::Equal(len) => (len << 4) | 7,
1755 Cigar::Diff(len) => (len << 4) | 8,
1756 }
1757 }
1758
1759 pub fn len(self) -> u32 {
1761 match self {
1762 Cigar::Match(len) => len,
1763 Cigar::Ins(len) => len,
1764 Cigar::Del(len) => len,
1765 Cigar::RefSkip(len) => len,
1766 Cigar::SoftClip(len) => len,
1767 Cigar::HardClip(len) => len,
1768 Cigar::Pad(len) => len,
1769 Cigar::Equal(len) => len,
1770 Cigar::Diff(len) => len,
1771 }
1772 }
1773
1774 pub fn is_empty(self) -> bool {
1775 self.len() == 0
1776 }
1777
1778 pub fn char(self) -> char {
1780 match self {
1781 Cigar::Match(_) => 'M',
1782 Cigar::Ins(_) => 'I',
1783 Cigar::Del(_) => 'D',
1784 Cigar::RefSkip(_) => 'N',
1785 Cigar::SoftClip(_) => 'S',
1786 Cigar::HardClip(_) => 'H',
1787 Cigar::Pad(_) => 'P',
1788 Cigar::Equal(_) => '=',
1789 Cigar::Diff(_) => 'X',
1790 }
1791 }
1792}
1793
1794impl fmt::Display for Cigar {
1795 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1796 fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1797 }
1798}
1799
1800unsafe impl Send for Cigar {}
1801unsafe impl Sync for Cigar {}
1802
1803custom_derive! {
1804 #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1823 #[derive(NewtypeDeref,
1824 NewtypeDerefMut,
1825 NewtypeIndex(usize),
1826 NewtypeIndexMut(usize),
1827 NewtypeFrom,
1828 PartialEq,
1829 PartialOrd,
1830 Eq,
1831 NewtypeDebug,
1832 Clone,
1833 Hash
1834 )]
1835 pub struct CigarString(pub Vec<Cigar>);
1836}
1837
1838impl CigarString {
1839 pub fn into_view(self, pos: i64) -> CigarStringView {
1841 CigarStringView::new(self, pos)
1842 }
1843
1844 pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1849 match alignment.mode {
1850 AlignmentMode::Global => {
1851 panic!(" Bam cigar fn not supported for Global Alignment mode")
1852 }
1853 AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
1854 _ => {}
1855 }
1856
1857 let mut cigar = Vec::new();
1858 if alignment.operations.is_empty() {
1859 return CigarString(cigar);
1860 }
1861
1862 let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
1863 AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1864 AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1865 AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1866 AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1867 _ => {}
1868 };
1869
1870 if alignment.xstart > 0 {
1871 cigar.push(if hard_clip {
1872 Cigar::HardClip(alignment.xstart as u32)
1873 } else {
1874 Cigar::SoftClip(alignment.xstart as u32)
1875 });
1876 }
1877
1878 let mut last = alignment.operations[0];
1879 let mut k = 1u32;
1880 for &op in alignment.operations[1..].iter() {
1881 if op == last {
1882 k += 1;
1883 } else {
1884 add_op(last, k, &mut cigar);
1885 k = 1;
1886 }
1887 last = op;
1888 }
1889 add_op(last, k, &mut cigar);
1890 if alignment.xlen > alignment.xend {
1891 cigar.push(if hard_clip {
1892 Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
1893 } else {
1894 Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
1895 });
1896 }
1897
1898 CigarString(cigar)
1899 }
1900}
1901
1902impl TryFrom<&[u8]> for CigarString {
1903 type Error = Error;
1904
1905 fn try_from(bytes: &[u8]) -> Result<Self> {
1926 let mut inner = Vec::new();
1927 let mut i = 0;
1928 let text_len = bytes.len();
1929 while i < text_len {
1930 let mut j = i;
1931 while j < text_len && bytes[j].is_ascii_digit() {
1932 j += 1;
1933 }
1934 if i == j {
1936 return Err(Error::BamParseCigar {
1937 msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
1938 });
1939 }
1940 let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
1942 msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
1943 })?;
1944 let n = s.parse().map_err(|_| Error::BamParseCigar {
1945 msg: format!("Unable to parse &str '{:?}' to u32.", s),
1946 })?;
1947 let op = &bytes[j];
1949 inner.push(match op {
1950 b'M' => Cigar::Match(n),
1951 b'I' => Cigar::Ins(n),
1952 b'D' => Cigar::Del(n),
1953 b'N' => Cigar::RefSkip(n),
1954 b'H' => {
1955 if i == 0 || j + 1 == text_len {
1956 Cigar::HardClip(n)
1957 } else {
1958 return Err(Error::BamParseCigar {
1959 msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
1960 .to_owned(),
1961 });
1962 }
1963 }
1964 b'S' => {
1965 if i == 0
1966 || j + 1 == text_len
1967 || bytes[i-1] == b'H'
1968 || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
1969 Cigar::SoftClip(n)
1970 } else {
1971 return Err(Error::BamParseCigar {
1972 msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
1973 .to_owned(),
1974 });
1975 }
1976 },
1977 b'P' => Cigar::Pad(n),
1978 b'=' => Cigar::Equal(n),
1979 b'X' => Cigar::Diff(n),
1980 op => {
1981 return Err(Error::BamParseCigar {
1982 msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
1983 })
1984 }
1985 });
1986 i = j + 1;
1987 }
1988 Ok(CigarString(inner))
1989 }
1990}
1991
1992impl TryFrom<&str> for CigarString {
1993 type Error = Error;
1994
1995 fn try_from(text: &str) -> Result<Self> {
2016 let bytes = text.as_bytes();
2017 if text.chars().count() != bytes.len() {
2018 return Err(Error::BamParseCigar {
2019 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2020 });
2021 }
2022 CigarString::try_from(bytes)
2023 }
2024}
2025
2026impl<'a> CigarString {
2027 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2028 self.into_iter()
2029 }
2030}
2031
2032impl<'a> IntoIterator for &'a CigarString {
2033 type Item = &'a Cigar;
2034 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2035
2036 fn into_iter(self) -> Self::IntoIter {
2037 self.0.iter()
2038 }
2039}
2040
2041impl fmt::Display for CigarString {
2042 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2043 for op in self {
2044 fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2045 }
2046 Ok(())
2047 }
2048}
2049
2050fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2052 match (cigar.next(), cigar.next()) {
2053 (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2054 *s as i64
2055 }
2056 _ => 0,
2057 }
2058}
2059
2060#[derive(Eq, PartialEq, Clone, Debug)]
2061pub struct CigarStringView {
2062 inner: CigarString,
2063 pos: i64,
2064}
2065
2066impl CigarStringView {
2067 pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2069 CigarStringView { inner: c, pos }
2070 }
2071
2072 pub fn end_pos(&self) -> i64 {
2074 let mut pos = self.pos;
2075 for c in self {
2076 match c {
2077 Cigar::Match(l)
2078 | Cigar::RefSkip(l)
2079 | Cigar::Del(l)
2080 | Cigar::Equal(l)
2081 | Cigar::Diff(l) => pos += *l as i64,
2082 Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2084 }
2085 }
2086 pos
2087 }
2088
2089 pub fn pos(&self) -> i64 {
2091 self.pos
2092 }
2093
2094 pub fn leading_softclips(&self) -> i64 {
2096 calc_softclips(self.iter())
2097 }
2098
2099 pub fn trailing_softclips(&self) -> i64 {
2101 calc_softclips(self.iter().rev())
2102 }
2103
2104 pub fn leading_hardclips(&self) -> i64 {
2106 self.first().map_or(0, |cigar| {
2107 if let Cigar::HardClip(s) = cigar {
2108 *s as i64
2109 } else {
2110 0
2111 }
2112 })
2113 }
2114
2115 pub fn trailing_hardclips(&self) -> i64 {
2117 self.last().map_or(0, |cigar| {
2118 if let Cigar::HardClip(s) = cigar {
2119 *s as i64
2120 } else {
2121 0
2122 }
2123 })
2124 }
2125
2126 pub fn read_pos(
2136 &self,
2137 ref_pos: u32,
2138 include_softclips: bool,
2139 include_dels: bool,
2140 ) -> Result<Option<u32>> {
2141 let mut rpos = self.pos as u32; let mut qpos = 0u32; let mut j = 0; for (i, c) in self.iter().enumerate() {
2148 match c {
2149 Cigar::Match(_) |
2150 Cigar::Diff(_) |
2151 Cigar::Equal(_) |
2152 Cigar::Ins(_) => {
2155 j = i;
2156 break;
2157 },
2158 Cigar::SoftClip(l) => {
2159 j = i;
2160 if include_softclips {
2161 rpos = rpos.saturating_sub(*l);
2165 }
2166 break;
2167 },
2168 Cigar::Del(l) => {
2169 rpos += l;
2177 },
2178 Cigar::RefSkip(_) => {
2179 return Err(Error::BamUnexpectedCigarOperation {
2180 msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2181 });
2182 },
2183 Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2184 return Err(Error::BamUnexpectedCigarOperation{
2185 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2186 });
2187 },
2188 Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2190 Cigar::Pad(_) | Cigar::HardClip(_) => ()
2192 }
2193 }
2194
2195 let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2196 cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2197 };
2198
2199 while rpos <= ref_pos && j < self.len() {
2200 match self[j] {
2201 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2203 qpos += ref_pos - rpos;
2206 return Ok(Some(qpos));
2207 }
2208 Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2209 qpos += ref_pos - rpos;
2210 return Ok(Some(qpos));
2211 }
2212 Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2213 return Ok(Some(qpos));
2215 }
2216 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2218 rpos += l;
2219 qpos += l;
2220 j += 1;
2221 }
2222 Cigar::SoftClip(l) => {
2223 qpos += l;
2224 j += 1;
2225 if include_softclips {
2226 rpos += l;
2227 }
2228 }
2229 Cigar::Ins(l) => {
2230 qpos += l;
2231 j += 1;
2232 }
2233 Cigar::RefSkip(l) | Cigar::Del(l) => {
2234 rpos += l;
2235 j += 1;
2236 }
2237 Cigar::Pad(_) => {
2238 j += 1;
2239 }
2240 Cigar::HardClip(_) if j < self.len() - 1 => {
2241 return Err(Error::BamUnexpectedCigarOperation{
2242 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2243 });
2244 }
2245 Cigar::HardClip(_) => return Ok(None),
2246 }
2247 }
2248
2249 Ok(None)
2250 }
2251
2252 pub fn take(self) -> CigarString {
2254 self.inner
2255 }
2256}
2257
2258impl ops::Deref for CigarStringView {
2259 type Target = CigarString;
2260
2261 fn deref(&self) -> &CigarString {
2262 &self.inner
2263 }
2264}
2265
2266impl ops::Index<usize> for CigarStringView {
2267 type Output = Cigar;
2268
2269 fn index(&self, index: usize) -> &Cigar {
2270 self.inner.index(index)
2271 }
2272}
2273
2274impl ops::IndexMut<usize> for CigarStringView {
2275 fn index_mut(&mut self, index: usize) -> &mut Cigar {
2276 self.inner.index_mut(index)
2277 }
2278}
2279
2280impl<'a> CigarStringView {
2281 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2282 self.inner.into_iter()
2283 }
2284}
2285
2286impl<'a> IntoIterator for &'a CigarStringView {
2287 type Item = &'a Cigar;
2288 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2289
2290 fn into_iter(self) -> Self::IntoIter {
2291 self.inner.into_iter()
2292 }
2293}
2294
2295impl fmt::Display for CigarStringView {
2296 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2297 self.inner.fmt(fmt)
2298 }
2299}
2300
2301pub struct BaseModificationMetadata {
2302 pub strand: i32,
2303 pub implicit: i32,
2304 pub canonical: u8,
2305}
2306
2307pub struct BaseModificationState<'a> {
2310 record: &'a Record,
2311 state: *mut htslib::hts_base_mod_state,
2312 buffer: Vec<htslib::hts_base_mod>,
2313 buffer_pos: i32,
2314}
2315
2316impl BaseModificationState<'_> {
2317 fn new(r: &Record) -> Result<BaseModificationState<'_>> {
2322 let mut bm = unsafe {
2323 BaseModificationState {
2324 record: r,
2325 state: hts_sys::hts_base_mod_state_alloc(),
2326 buffer: Vec::new(),
2327 buffer_pos: -1,
2328 }
2329 };
2330
2331 if bm.state.is_null() {
2332 panic!("Unable to allocate memory for hts_base_mod_state");
2333 }
2334
2335 unsafe {
2337 let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2338 if ret != 0 {
2339 return Err(Error::BamBaseModificationTagNotFound);
2340 }
2341 }
2342
2343 let types = bm.recorded();
2344 bm.buffer.reserve(types.len());
2345 Ok(bm)
2346 }
2347
2348 pub fn buffer_next_mods(&mut self) -> Result<usize> {
2349 unsafe {
2350 let ret = hts_sys::bam_next_basemod(
2351 self.record.inner_ptr(),
2352 self.state,
2353 self.buffer.as_mut_ptr(),
2354 self.buffer.capacity() as i32,
2355 &mut self.buffer_pos,
2356 );
2357
2358 if ret < 0 {
2359 return Err(Error::BamBaseModificationIterationFailed);
2360 }
2361
2362 if ret as usize > self.buffer.capacity() {
2366 return Err(Error::BamBaseModificationTooManyMods);
2367 }
2368
2369 self.buffer.set_len(ret as usize);
2372
2373 Ok(ret as usize)
2374 }
2375 }
2376
2377 pub fn recorded<'a>(&self) -> &'a [i32] {
2380 unsafe {
2381 let mut n: i32 = 0;
2382 let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2383
2384 if data_ptr.is_null() {
2386 panic!("Unable to obtain pointer to base modifications");
2387 }
2388 assert!(n >= 0);
2389 slice::from_raw_parts(data_ptr, n as usize)
2390 }
2391 }
2392
2393 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2399 unsafe {
2400 let mut strand: i32 = 0;
2401 let mut implicit: i32 = 0;
2402 let mut canonical: c_char = 0;
2404
2405 let ret = hts_sys::bam_mods_query_type(
2406 self.state,
2407 code,
2408 &mut strand,
2409 &mut implicit,
2410 &mut canonical,
2411 );
2412 if ret == -1 {
2413 Err(Error::BamBaseModificationTypeNotFound)
2414 } else {
2415 Ok(BaseModificationMetadata {
2416 strand,
2417 implicit,
2418 canonical: canonical.try_into().unwrap(),
2419 })
2420 }
2421 }
2422 }
2423}
2424
2425impl Drop for BaseModificationState<'_> {
2426 fn drop<'a>(&mut self) {
2427 unsafe {
2428 hts_sys::hts_base_mod_state_free(self.state);
2429 }
2430 }
2431}
2432
2433pub struct BaseModificationsPositionIter<'a> {
2436 mod_state: BaseModificationState<'a>,
2437}
2438
2439impl BaseModificationsPositionIter<'_> {
2440 fn new(r: &Record) -> Result<BaseModificationsPositionIter<'_>> {
2441 let state = BaseModificationState::new(r)?;
2442 Ok(BaseModificationsPositionIter { mod_state: state })
2443 }
2444
2445 pub fn recorded<'a>(&self) -> &'a [i32] {
2446 self.mod_state.recorded()
2447 }
2448
2449 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2450 self.mod_state.query_type(code)
2451 }
2452}
2453
2454impl Iterator for BaseModificationsPositionIter<'_> {
2455 type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2456
2457 fn next(&mut self) -> Option<Self::Item> {
2458 let ret = self.mod_state.buffer_next_mods();
2459
2460 match ret {
2465 Ok(num_mods) => {
2466 if num_mods == 0 {
2467 None
2468 } else {
2469 let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2470 Some(Ok(data))
2471 }
2472 }
2473 Err(e) => Some(Err(e)),
2474 }
2475 }
2476}
2477
2478pub struct BaseModificationsIter<'a> {
2481 mod_state: BaseModificationState<'a>,
2482 buffer_idx: usize,
2483}
2484
2485impl BaseModificationsIter<'_> {
2486 fn new(r: &Record) -> Result<BaseModificationsIter<'_>> {
2487 let state = BaseModificationState::new(r)?;
2488 Ok(BaseModificationsIter {
2489 mod_state: state,
2490 buffer_idx: 0,
2491 })
2492 }
2493
2494 pub fn recorded<'a>(&self) -> &'a [i32] {
2495 self.mod_state.recorded()
2496 }
2497
2498 pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2499 self.mod_state.query_type(code)
2500 }
2501}
2502
2503impl Iterator for BaseModificationsIter<'_> {
2504 type Item = Result<(i32, hts_sys::hts_base_mod)>;
2505
2506 fn next(&mut self) -> Option<Self::Item> {
2507 if self.buffer_idx == self.mod_state.buffer.len() {
2508 let ret = self.mod_state.buffer_next_mods();
2511
2512 match ret {
2513 Ok(num_mods) => {
2514 if num_mods == 0 {
2515 return None;
2517 } else {
2518 self.buffer_idx = 0;
2520 }
2521 }
2522 Err(e) => return Some(Err(e)),
2523 }
2524 }
2525
2526 assert!(self.buffer_idx < self.mod_state.buffer.len());
2528 let data = (
2529 self.mod_state.buffer_pos,
2530 self.mod_state.buffer[self.buffer_idx],
2531 );
2532 self.buffer_idx += 1;
2533 Some(Ok(data))
2534 }
2535}
2536
2537#[cfg(test)]
2538mod tests {
2539 use super::*;
2540
2541 #[test]
2542 fn test_cigar_string() {
2543 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2544
2545 assert_eq!(cigar[0], Cigar::Match(100));
2546 assert_eq!(format!("{}", cigar), "100M10S");
2547 for op in &cigar {
2548 println!("{}", op);
2549 }
2550 }
2551
2552 #[test]
2553 fn test_cigar_string_view_pos() {
2554 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2555 assert_eq!(cigar.pos(), 5);
2556 }
2557
2558 #[test]
2559 fn test_cigar_string_leading_softclips() {
2560 let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2561 assert_eq!(cigar.leading_softclips(), 10);
2562 let cigar2 = CigarString(vec![
2563 Cigar::HardClip(5),
2564 Cigar::SoftClip(10),
2565 Cigar::Match(100),
2566 ])
2567 .into_view(0);
2568 assert_eq!(cigar2.leading_softclips(), 10);
2569 }
2570
2571 #[test]
2572 fn test_cigar_string_trailing_softclips() {
2573 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2574 assert_eq!(cigar.trailing_softclips(), 10);
2575 let cigar2 = CigarString(vec![
2576 Cigar::Match(100),
2577 Cigar::SoftClip(10),
2578 Cigar::HardClip(5),
2579 ])
2580 .into_view(0);
2581 assert_eq!(cigar2.trailing_softclips(), 10);
2582 }
2583
2584 #[test]
2585 fn test_cigar_read_pos() {
2586 let vpos = 5; let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2594 assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2595
2596 let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2604 assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2605 assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2606
2607 let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2616 assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2617 assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2618
2619 let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2625 assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2626
2627 let c05 = CigarString(vec![
2633 Cigar::Equal(2),
2634 Cigar::Del(2),
2635 Cigar::Diff(1),
2636 Cigar::Equal(2),
2637 ])
2638 .into_view(0);
2639 assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2640
2641 let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2647 assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2648 assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2649
2650 let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2656 assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2657 assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2658
2659 let c08 = CigarString(vec![
2665 Cigar::Equal(1),
2666 Cigar::Diff(1),
2667 Cigar::RefSkip(3),
2668 Cigar::Match(2),
2669 ])
2670 .into_view(2);
2671 assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2672 assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2673
2674 let c09 = CigarString(vec![
2680 Cigar::HardClip(3),
2681 Cigar::Equal(2),
2682 Cigar::HardClip(3),
2683 Cigar::Equal(2),
2684 ])
2685 .into_view(2);
2686 assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2687
2688 let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2694 assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2695
2696 let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2702 assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2703
2704 let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2710 assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2711
2712 let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2718 assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2719
2720 let vpos2 = 15;
2722 let c14 = CigarString(vec![
2727 Cigar::HardClip(5),
2728 Cigar::SoftClip(3),
2729 Cigar::Equal(1),
2730 Cigar::Pad(2),
2731 Cigar::Match(1),
2732 Cigar::Diff(1),
2733 Cigar::Ins(3),
2734 Cigar::Match(2),
2735 Cigar::Del(1),
2736 Cigar::Ins(2),
2737 Cigar::Equal(2),
2738 Cigar::RefSkip(3),
2739 Cigar::Match(3),
2740 Cigar::Equal(2),
2741 Cigar::SoftClip(5),
2742 Cigar::HardClip(2),
2743 ])
2744 .into_view(0);
2745 assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2746
2747 let c15 =
2753 CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2754 assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2755
2756 let c16 =
2759 CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2760 assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2761 }
2762
2763 #[test]
2764 fn test_clone() {
2765 let mut rec = Record::new();
2766 rec.set_pos(300);
2767 rec.set_qname(b"read1");
2768 let clone = rec.clone();
2769 assert_eq!(rec, clone);
2770 }
2771
2772 #[test]
2773 fn test_flags() {
2774 let mut rec = Record::new();
2775
2776 rec.set_paired();
2777 assert_eq!(rec.is_paired(), true);
2778
2779 rec.set_supplementary();
2780 assert_eq!(rec.is_supplementary(), true);
2781 assert_eq!(rec.is_supplementary(), true);
2782
2783 rec.unset_paired();
2784 assert_eq!(rec.is_paired(), false);
2785 assert_eq!(rec.is_supplementary(), true);
2786
2787 rec.unset_supplementary();
2788 assert_eq!(rec.is_paired(), false);
2789 assert_eq!(rec.is_supplementary(), false);
2790 }
2791
2792 #[test]
2793 fn test_cigar_parse() {
2794 let cigar = "1S20M1D2I3X1=2H";
2795 let parsed = CigarString::try_from(cigar).unwrap();
2796 assert_eq!(parsed.to_string(), cigar);
2797 }
2798}
2799
2800#[cfg(test)]
2801mod alignment_cigar_tests {
2802 use super::*;
2803 use crate::bam::{Read, Reader};
2804 use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2805 use bio_types::alignment::{Alignment, AlignmentMode};
2806
2807 #[test]
2808 fn test_cigar() {
2809 let alignment = Alignment {
2810 score: 5,
2811 xstart: 3,
2812 ystart: 0,
2813 xend: 9,
2814 yend: 10,
2815 ylen: 10,
2816 xlen: 10,
2817 operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2818 mode: AlignmentMode::Semiglobal,
2819 };
2820 assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2821 assert_eq!(
2822 CigarString::from_alignment(&alignment, false).0,
2823 vec![
2824 Cigar::SoftClip(3),
2825 Cigar::Equal(3),
2826 Cigar::Diff(1),
2827 Cigar::Ins(2),
2828 Cigar::Del(2),
2829 Cigar::SoftClip(1),
2830 ]
2831 );
2832
2833 let alignment = Alignment {
2834 score: 5,
2835 xstart: 0,
2836 ystart: 5,
2837 xend: 4,
2838 yend: 10,
2839 ylen: 10,
2840 xlen: 5,
2841 operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2842 mode: AlignmentMode::Custom,
2843 };
2844 assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2845 assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2846 assert_eq!(
2847 CigarString::from_alignment(&alignment, false).0,
2848 vec![
2849 Cigar::Equal(1),
2850 Cigar::Diff(2),
2851 Cigar::Ins(1),
2852 Cigar::Del(2),
2853 Cigar::SoftClip(1),
2854 ]
2855 );
2856 assert_eq!(
2857 CigarString::from_alignment(&alignment, true).0,
2858 vec![
2859 Cigar::Equal(1),
2860 Cigar::Diff(2),
2861 Cigar::Ins(1),
2862 Cigar::Del(2),
2863 Cigar::HardClip(1),
2864 ]
2865 );
2866
2867 let alignment = Alignment {
2868 score: 5,
2869 xstart: 0,
2870 ystart: 5,
2871 xend: 3,
2872 yend: 8,
2873 ylen: 10,
2874 xlen: 3,
2875 operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2876 mode: AlignmentMode::Custom,
2877 };
2878 assert_eq!(alignment.cigar(false), "1X1=1X");
2879 assert_eq!(
2880 CigarString::from_alignment(&alignment, false).0,
2881 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2882 );
2883
2884 let alignment = Alignment {
2885 score: 5,
2886 xstart: 0,
2887 ystart: 5,
2888 xend: 3,
2889 yend: 8,
2890 ylen: 10,
2891 xlen: 3,
2892 operations: vec![Subst, Match, Subst],
2893 mode: AlignmentMode::Semiglobal,
2894 };
2895 assert_eq!(alignment.cigar(false), "1X1=1X");
2896 assert_eq!(
2897 CigarString::from_alignment(&alignment, false).0,
2898 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2899 );
2900 }
2901
2902 #[test]
2903 fn test_read_orientation_f1r2() {
2904 let mut bam = Reader::from_path("test/test_paired.sam").unwrap();
2905
2906 for res in bam.records() {
2907 let record = res.unwrap();
2908 assert_eq!(
2909 record.read_pair_orientation(),
2910 SequenceReadPairOrientation::F1R2
2911 );
2912 }
2913 }
2914
2915 #[test]
2916 fn test_read_orientation_f2r1() {
2917 let mut bam = Reader::from_path("test/test_nonstandard_orientation.sam").unwrap();
2918
2919 for res in bam.records() {
2920 let record = res.unwrap();
2921 assert_eq!(
2922 record.read_pair_orientation(),
2923 SequenceReadPairOrientation::F2R1
2924 );
2925 }
2926 }
2927
2928 #[test]
2929 fn test_read_orientation_supplementary() {
2930 let mut bam = Reader::from_path("test/test_orientation_supplementary.sam").unwrap();
2931
2932 for res in bam.records() {
2933 let record = res.unwrap();
2934 assert_eq!(
2935 record.read_pair_orientation(),
2936 SequenceReadPairOrientation::F2R1
2937 );
2938 }
2939 }
2940
2941 #[test]
2942 pub fn test_cigar_parsing_non_ascii_error() {
2943 let cigar_str = "43ጷ";
2944 let expected_error = Err(Error::BamParseCigar {
2945 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2946 });
2947
2948 let result = CigarString::try_from(cigar_str);
2949 assert_eq!(expected_error, result);
2950 }
2951
2952 #[test]
2953 pub fn test_cigar_parsing() {
2954 let cigar_strs = [
2956 "1H10M4D100I300N1102=10P25X11S", "100M", "", "1H1=1H", "1S1=1S", "11H11S11=11S11H", "10H",
2963 "10S",
2964 ];
2965 let cigars = [
2967 CigarString(vec![
2968 Cigar::HardClip(1),
2969 Cigar::Match(10),
2970 Cigar::Del(4),
2971 Cigar::Ins(100),
2972 Cigar::RefSkip(300),
2973 Cigar::Equal(1102),
2974 Cigar::Pad(10),
2975 Cigar::Diff(25),
2976 Cigar::SoftClip(11),
2977 ]),
2978 CigarString(vec![Cigar::Match(100)]),
2979 CigarString(vec![]),
2980 CigarString(vec![
2981 Cigar::HardClip(1),
2982 Cigar::Equal(1),
2983 Cigar::HardClip(1),
2984 ]),
2985 CigarString(vec![
2986 Cigar::SoftClip(1),
2987 Cigar::Equal(1),
2988 Cigar::SoftClip(1),
2989 ]),
2990 CigarString(vec![
2991 Cigar::HardClip(11),
2992 Cigar::SoftClip(11),
2993 Cigar::Equal(11),
2994 Cigar::SoftClip(11),
2995 Cigar::HardClip(11),
2996 ]),
2997 CigarString(vec![Cigar::HardClip(10)]),
2998 CigarString(vec![Cigar::SoftClip(10)]),
2999 ];
3000 for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
3002 let cigar_parse = CigarString::try_from(cigar_str)
3003 .unwrap_or_else(|_| panic!("Unable to parse cigar: {}", cigar_str));
3004 assert_eq!(&cigar_parse, truth);
3005 }
3006 }
3007}
3008
3009#[cfg(test)]
3010mod basemod_tests {
3011 use crate::bam::{Read, Reader};
3012
3013 #[test]
3014 pub fn test_count_recorded() {
3015 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3016
3017 for r in bam.records() {
3018 let record = r.unwrap();
3019 if let Ok(mods) = record.basemods_iter() {
3020 let n = mods.recorded().len();
3021 assert_eq!(n, 3);
3022 };
3023 }
3024 }
3025
3026 #[test]
3027 pub fn test_query_type() {
3028 let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
3029
3030 let mut n_fwd = 0;
3031 let mut n_rev = 0;
3032
3033 for r in bam.records() {
3034 let record = r.unwrap();
3035 if let Ok(mods) = record.basemods_iter() {
3036 for mod_code in mods.recorded() {
3037 if let Ok(mod_metadata) = mods.query_type(*mod_code) {
3038 if mod_metadata.strand == 0 {
3039 n_fwd += 1;
3040 }
3041 if mod_metadata.strand == 1 {
3042 n_rev += 1;
3043 }
3044 }
3045 }
3046 };
3047 }
3048 assert_eq!(n_fwd, 2);
3049 assert_eq!(n_rev, 2);
3050 }
3051
3052 #[test]
3053 pub fn test_mod_iter() {
3054 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3055 let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3056 let mut i = 0;
3057
3058 for r in bam.records() {
3059 let record = r.unwrap();
3060 for res in record.basemods_iter().unwrap().flatten() {
3061 let (position, _m) = res;
3062 assert_eq!(position, expected_positions[i]);
3063 i += 1;
3064 }
3065 }
3066 }
3067
3068 #[test]
3069 pub fn test_position_iter() {
3070 let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3071 let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3072 let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3073 let mut i = 0;
3074
3075 for r in bam.records() {
3076 let record = r.unwrap();
3077 for res in record.basemods_position_iter().unwrap().flatten() {
3078 let (position, elements) = res;
3079 assert_eq!(position, expected_positions[i]);
3080 assert_eq!(elements.len(), expected_counts[i]);
3081 i += 1;
3082 }
3083 }
3084 }
3085}