1use std::borrow::Cow;
7use std::convert::TryFrom;
8use std::convert::TryInto;
9use std::ffi;
10use std::fmt;
11use std::marker::PhantomData;
12use std::mem::{size_of, MaybeUninit};
13use std::ops;
14use std::ops::RangeBounds;
15use std::os::raw::c_char;
16use std::rc::Rc;
17use std::slice;
18use std::slice::SliceIndex;
19use std::str;
20use std::u32;
21
22use byteorder::{LittleEndian, ReadBytesExt};
23
24use crate::bam::Error;
25use crate::bam::HeaderView;
26use crate::errors::Result;
27use crate::htslib;
28use crate::utils;
29#[cfg(feature = "serde_feature")]
30use serde::{self, Deserialize, Serialize};
31
32use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
33use bio_types::genome;
34use bio_types::sequence::SequenceRead;
35use bio_types::sequence::SequenceReadPairOrientation;
36use bio_types::strand::ReqStrand;
37
38macro_rules! flag {
40 ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
41 pub fn $get(&self) -> bool {
42 self.inner().core.flag & $bit != 0
43 }
44
45 pub fn $set(&mut self) {
46 self.inner_mut().core.flag |= $bit;
47 }
48
49 pub fn $unset(&mut self) {
50 self.inner_mut().core.flag &= !$bit;
51 }
52 };
53}
54
55pub struct Record {
57 pub inner: htslib::bam1_t,
58 own: bool,
59 cigar: Option<CigarStringView>,
60 header: Option<Rc<HeaderView>>,
61}
62
63unsafe impl Send for Record {}
64unsafe impl Sync for Record {}
65
66impl Clone for Record {
67 fn clone(&self) -> Self {
68 let mut copy = Record::new();
69 unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
70 copy
71 }
72}
73
74impl PartialEq for Record {
75 fn eq(&self, other: &Record) -> bool {
76 self.tid() == other.tid()
77 && self.pos() == other.pos()
78 && self.bin() == other.bin()
79 && self.mapq() == other.mapq()
80 && self.flags() == other.flags()
81 && self.mtid() == other.mtid()
82 && self.mpos() == other.mpos()
83 && self.insert_size() == other.insert_size()
84 && self.data() == other.data()
85 && self.inner().core.l_extranul == other.inner().core.l_extranul
86 }
87}
88
89impl Eq for Record {}
90
91impl fmt::Debug for Record {
92 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
93 fmt.write_fmt(format_args!(
94 "Record(tid: {}, pos: {})",
95 self.tid(),
96 self.pos()
97 ))
98 }
99}
100
101impl Default for Record {
102 fn default() -> Self {
103 Self::new()
104 }
105}
106
107#[inline]
108fn extranul_from_qname(qname: &[u8]) -> usize {
109 let qlen = qname.len() + 1;
110 if qlen % 4 != 0 {
111 4 - qlen % 4
112 } else {
113 0
114 }
115}
116
117impl Record {
118 pub fn new() -> Self {
120 let mut record = Record {
121 inner: unsafe { MaybeUninit::zeroed().assume_init() },
122 own: true,
123 cigar: None,
124 header: None,
125 };
126 record.set_qname(b"");
129 record.set_unmapped();
132 record.set_tid(-1);
133 record.set_pos(-1);
134 record.set_mpos(-1);
135 record.set_mtid(-1);
136 record
137 }
138
139 pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
140 Record {
141 inner: {
142 #[allow(clippy::uninit_assumed_init)]
143 let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
144 unsafe {
145 ::libc::memcpy(
146 &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
147 from as *const ::libc::c_void,
148 size_of::<htslib::bam1_t>(),
149 );
150 }
151 inner
152 },
153 own: false,
154 cigar: None,
155 header: None,
156 }
157 }
158
159 pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
161 let mut record = Self::new();
162
163 let mut sam_copy = Vec::with_capacity(sam.len() + 1);
164 sam_copy.extend(sam);
165 sam_copy.push(0);
166
167 let mut sam_string = htslib::kstring_t {
168 s: sam_copy.as_ptr() as *mut c_char,
169 l: sam_copy.len(),
170 m: sam_copy.len(),
171 };
172
173 let succ = unsafe {
174 htslib::sam_parse1(
175 &mut sam_string,
176 header_view.inner_ptr() as *mut htslib::bam_hdr_t,
177 record.inner_ptr_mut(),
178 )
179 };
180
181 if succ == 0 {
182 Ok(record)
183 } else {
184 Err(Error::BamParseSAM {
185 rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
186 })
187 }
188 }
189
190 pub fn set_header(&mut self, header: Rc<HeaderView>) {
191 self.header = Some(header);
192 }
193
194 pub(super) fn data(&self) -> &[u8] {
195 unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
196 }
197
198 #[inline]
199 pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
200 &mut self.inner
201 }
202
203 #[inline]
204 pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
205 &mut self.inner as *mut htslib::bam1_t
206 }
207
208 #[inline]
209 pub fn inner(&self) -> &htslib::bam1_t {
210 &self.inner
211 }
212
213 #[inline]
214 pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
215 &self.inner as *const htslib::bam1_t
216 }
217
218 pub fn tid(&self) -> i32 {
220 self.inner().core.tid
221 }
222
223 pub fn set_tid(&mut self, tid: i32) {
225 self.inner_mut().core.tid = tid;
226 }
227
228 pub fn pos(&self) -> i64 {
230 self.inner().core.pos
231 }
232
233 pub fn set_pos(&mut self, pos: i64) {
235 self.inner_mut().core.pos = pos;
236 }
237
238 pub fn bin(&self) -> u16 {
239 self.inner().core.bin
240 }
241
242 pub fn set_bin(&mut self, bin: u16) {
243 self.inner_mut().core.bin = bin;
244 }
245
246 pub fn mapq(&self) -> u8 {
248 self.inner().core.qual
249 }
250
251 pub fn set_mapq(&mut self, mapq: u8) {
253 self.inner_mut().core.qual = mapq;
254 }
255
256 pub fn strand(&self) -> ReqStrand {
258 let reverse = self.flags() & 0x10 != 0;
259 if reverse {
260 ReqStrand::Reverse
261 } else {
262 ReqStrand::Forward
263 }
264 }
265
266 pub fn flags(&self) -> u16 {
268 self.inner().core.flag
269 }
270
271 pub fn set_flags(&mut self, flags: u16) {
273 self.inner_mut().core.flag = flags;
274 }
275
276 pub fn unset_flags(&mut self) {
278 self.inner_mut().core.flag = 0;
279 }
280
281 pub fn mtid(&self) -> i32 {
283 self.inner().core.mtid
284 }
285
286 pub fn set_mtid(&mut self, mtid: i32) {
288 self.inner_mut().core.mtid = mtid;
289 }
290
291 pub fn mpos(&self) -> i64 {
293 self.inner().core.mpos
294 }
295
296 pub fn set_mpos(&mut self, mpos: i64) {
298 self.inner_mut().core.mpos = mpos;
299 }
300
301 pub fn insert_size(&self) -> i64 {
303 self.inner().core.isize_
304 }
305
306 pub fn set_insert_size(&mut self, insert_size: i64) {
308 self.inner_mut().core.isize_ = insert_size;
309 }
310
311 fn qname_capacity(&self) -> usize {
312 self.inner().core.l_qname as usize
313 }
314
315 fn qname_len(&self) -> usize {
316 self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
318 }
319
320 pub fn qname(&self) -> &[u8] {
322 &self.data()[..self.qname_len()]
323 }
324
325 pub fn set_data(&mut self, new_data: &[u8]) {
327 self.cigar = None;
328
329 self.inner_mut().l_data = new_data.len() as i32;
330 if (self.inner().m_data as i32) < self.inner().l_data {
331 let l_data = self.inner().l_data;
333 self.realloc_var_data(l_data as usize);
334 }
335
336 let data =
338 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
339 utils::copy_memory(new_data, data);
340 }
341
342 pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
350 assert!(qname.len() < 255);
351 assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
352
353 self.cigar = None;
354
355 let cigar_width = if let Some(cigar_string) = cigar {
356 cigar_string.len()
357 } else {
358 0
359 } * 4;
360 let q_len = qname.len() + 1;
361 let extranul = extranul_from_qname(qname);
362
363 let orig_aux_offset = self.qname_capacity()
364 + 4 * self.cigar_len()
365 + (self.seq_len() + 1) / 2
366 + self.seq_len();
367 let new_aux_offset = q_len + extranul + cigar_width + (seq.len() + 1) / 2 + qual.len();
368 assert!(orig_aux_offset <= self.inner.l_data as usize);
369 let aux_len = self.inner.l_data as usize - orig_aux_offset;
370 self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
371 if (self.inner().m_data as i32) < self.inner().l_data {
372 let l_data = self.inner().l_data;
374 self.realloc_var_data(l_data as usize);
375 }
376
377 if aux_len > 0 && orig_aux_offset != new_aux_offset {
379 let data =
380 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
381 data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
382 }
383
384 let data =
385 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
386
387 utils::copy_memory(qname, data);
389 for i in 0..=extranul {
390 data[qname.len() + i] = b'\0';
391 }
392 let mut i = q_len + extranul;
393 self.inner_mut().core.l_qname = i as u16;
394 self.inner_mut().core.l_extranul = extranul as u8;
395
396 if let Some(cigar_string) = cigar {
398 let cigar_data = unsafe {
399 #[allow(clippy::cast_ptr_alignment)]
401 slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
402 };
403 for (i, c) in cigar_string.iter().enumerate() {
404 cigar_data[i] = c.encode();
405 }
406 self.inner_mut().core.n_cigar = cigar_string.len() as u32;
407 i += cigar_string.len() * 4;
408 } else {
409 self.inner_mut().core.n_cigar = 0;
410 };
411
412 {
414 for j in (0..seq.len()).step_by(2) {
415 data[i + j / 2] = ENCODE_BASE[seq[j] as usize] << 4
416 | (if j + 1 < seq.len() {
417 ENCODE_BASE[seq[j + 1] as usize]
418 } else {
419 0
420 });
421 }
422 self.inner_mut().core.l_qseq = seq.len() as i32;
423 i += (seq.len() + 1) / 2;
424 }
425
426 utils::copy_memory(qual, &mut data[i..]);
428 }
429
430 pub fn set_qname(&mut self, new_qname: &[u8]) {
432 assert!(new_qname.len() < 252);
434
435 let old_q_len = self.qname_capacity();
436 let extranul = extranul_from_qname(new_qname);
438 let new_q_len = new_qname.len() + 1 + extranul;
439
440 let other_len = self.inner_mut().l_data - old_q_len as i32;
442
443 if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
444 self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
445 } else if new_q_len > old_q_len {
446 self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
447
448 if (self.inner().m_data as i32) < self.inner().l_data {
450 let l_data = self.inner().l_data;
452 self.realloc_var_data(l_data as usize);
453 }
454 }
455
456 if new_q_len != old_q_len {
457 unsafe {
459 let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
460
461 ::libc::memmove(
462 data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
463 data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
464 other_len as usize,
465 );
466 }
467 }
468
469 let data =
471 unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
472 utils::copy_memory(new_qname, data);
473 for i in 0..=extranul {
474 data[new_q_len - i - 1] = b'\0';
475 }
476 self.inner_mut().core.l_qname = new_q_len as u16;
477 self.inner_mut().core.l_extranul = extranul as u8;
478 }
479
480 fn realloc_var_data(&mut self, new_len: usize) {
481 let new_len = new_len as u32;
483 let new_request = new_len + 32 - (new_len % 32);
484
485 let ptr = unsafe {
486 ::libc::realloc(
487 self.inner().data as *mut ::libc::c_void,
488 new_request as usize,
489 ) as *mut u8
490 };
491
492 if ptr.is_null() {
493 panic!("ran out of memory in rust_htslib trying to realloc");
494 }
495
496 self.inner_mut().m_data = new_request;
499 self.inner_mut().data = ptr;
500
501 self.own = true;
503 }
504
505 pub fn cigar_len(&self) -> usize {
506 self.inner().core.n_cigar as usize
507 }
508
509 pub fn raw_cigar(&self) -> &[u32] {
512 #[allow(clippy::cast_ptr_alignment)]
514 unsafe {
515 slice::from_raw_parts(
516 self.data()[self.qname_capacity()..].as_ptr() as *const u32,
517 self.cigar_len(),
518 )
519 }
520 }
521
522 pub fn cigar(&self) -> CigarStringView {
524 match self.cigar {
525 Some(ref c) => c.clone(),
526 None => self.unpack_cigar(),
527 }
528 }
529
530 pub fn cigar_cached(&self) -> Option<&CigarStringView> {
532 self.cigar.as_ref()
533 }
534
535 pub fn cache_cigar(&mut self) {
537 self.cigar = Some(self.unpack_cigar())
538 }
539
540 fn unpack_cigar(&self) -> CigarStringView {
542 CigarString(
543 self.raw_cigar()
544 .iter()
545 .map(|&c| {
546 let len = c >> 4;
547 match c & 0b1111 {
548 0 => Cigar::Match(len),
549 1 => Cigar::Ins(len),
550 2 => Cigar::Del(len),
551 3 => Cigar::RefSkip(len),
552 4 => Cigar::SoftClip(len),
553 5 => Cigar::HardClip(len),
554 6 => Cigar::Pad(len),
555 7 => Cigar::Equal(len),
556 8 => Cigar::Diff(len),
557 _ => panic!("Unexpected cigar operation"),
558 }
559 })
560 .collect(),
561 )
562 .into_view(self.pos())
563 }
564
565 pub fn seq_len(&self) -> usize {
566 self.inner().core.l_qseq as usize
567 }
568
569 fn seq_data(&self) -> &[u8] {
570 let offset = self.qname_capacity() + self.cigar_len() * 4;
571 &self.data()[offset..][..(self.seq_len() + 1) / 2]
572 }
573
574 pub fn seq(&self) -> Seq<'_> {
576 Seq {
577 encoded: self.seq_data(),
578 len: self.seq_len(),
579 }
580 }
581
582 pub fn qual(&self) -> &[u8] {
586 &self.data()[self.qname_capacity() + self.cigar_len() * 4 + (self.seq_len() + 1) / 2..]
587 [..self.seq_len()]
588 }
589
590 pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
595 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
596 let aux = unsafe {
597 htslib::bam_aux_get(
598 &self.inner as *const htslib::bam1_t,
599 c_str.as_ptr() as *mut c_char,
600 )
601 };
602 unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
603 }
604
605 unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
606 const TAG_LEN: isize = 2;
607 const TYPE_ID_LEN: isize = 1;
609
610 if aux.is_null() {
611 return Err(Error::BamAuxTagNotFound);
612 }
613
614 let (data, type_size) = match *aux {
615 b'A' => {
616 let type_size = size_of::<u8>();
617 (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
618 }
619 b'c' => {
620 let type_size = size_of::<i8>();
621 (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
622 }
623 b'C' => {
624 let type_size = size_of::<u8>();
625 (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
626 }
627 b's' => {
628 let type_size = size_of::<i16>();
629 (
630 Aux::I16(
631 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
632 .read_i16::<LittleEndian>()
633 .map_err(|_| Error::BamAuxParsingError)?,
634 ),
635 type_size,
636 )
637 }
638 b'S' => {
639 let type_size = size_of::<u16>();
640 (
641 Aux::U16(
642 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
643 .read_u16::<LittleEndian>()
644 .map_err(|_| Error::BamAuxParsingError)?,
645 ),
646 type_size,
647 )
648 }
649 b'i' => {
650 let type_size = size_of::<i32>();
651 (
652 Aux::I32(
653 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
654 .read_i32::<LittleEndian>()
655 .map_err(|_| Error::BamAuxParsingError)?,
656 ),
657 type_size,
658 )
659 }
660 b'I' => {
661 let type_size = size_of::<u32>();
662 (
663 Aux::U32(
664 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
665 .read_u32::<LittleEndian>()
666 .map_err(|_| Error::BamAuxParsingError)?,
667 ),
668 type_size,
669 )
670 }
671 b'f' => {
672 let type_size = size_of::<f32>();
673 (
674 Aux::Float(
675 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
676 .read_f32::<LittleEndian>()
677 .map_err(|_| Error::BamAuxParsingError)?,
678 ),
679 type_size,
680 )
681 }
682 b'd' => {
683 let type_size = size_of::<f64>();
684 (
685 Aux::Double(
686 slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
687 .read_f64::<LittleEndian>()
688 .map_err(|_| Error::BamAuxParsingError)?,
689 ),
690 type_size,
691 )
692 }
693 b'Z' | b'H' => {
694 let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
695 let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
696 (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
697 }
698 b'B' => {
699 const ARRAY_INNER_TYPE_LEN: isize = 1;
700 const ARRAY_COUNT_LEN: isize = 4;
701
702 let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
704
705 let length =
706 slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
707 .read_u32::<LittleEndian>()
708 .map_err(|_| Error::BamAuxParsingError)? as usize;
709
710 let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
712 b'c' => (
713 Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
714 aux.offset(array_data_offset),
715 length,
716 ))),
717 length,
718 ),
719 b'C' => (
720 Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
721 aux.offset(array_data_offset),
722 length,
723 ))),
724 length,
725 ),
726 b's' => (
727 Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
728 aux.offset(array_data_offset),
729 length * size_of::<i16>(),
730 ))),
731 length * std::mem::size_of::<i16>(),
732 ),
733 b'S' => (
734 Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
735 aux.offset(array_data_offset),
736 length * size_of::<u16>(),
737 ))),
738 length * std::mem::size_of::<u16>(),
739 ),
740 b'i' => (
741 Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
742 aux.offset(array_data_offset),
743 length * size_of::<i32>(),
744 ))),
745 length * std::mem::size_of::<i32>(),
746 ),
747 b'I' => (
748 Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
749 aux.offset(array_data_offset),
750 length * size_of::<u32>(),
751 ))),
752 length * std::mem::size_of::<u32>(),
753 ),
754 b'f' => (
755 Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
756 aux.offset(array_data_offset),
757 length * size_of::<f32>(),
758 ))),
759 length * std::mem::size_of::<f32>(),
760 ),
761 _ => {
762 return Err(Error::BamAuxUnknownType);
763 }
764 };
765 (
766 array_data,
767 ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
769 )
770 }
771 _ => {
772 return Err(Error::BamAuxUnknownType);
773 }
774 };
775
776 Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
778 }
779
780 pub fn aux_iter(&self) -> AuxIter {
785 AuxIter {
786 aux: &self.data()[
789 self.qname_capacity()
791 + self.cigar_len() * std::mem::size_of::<u32>()
793 + (self.seq_len() + 1) / 2
795 + self.seq_len()..],
797 }
798 }
799
800 pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
802 if self.aux(tag).is_ok() {
806 return Err(Error::BamAuxTagAlreadyPresent);
807 }
808
809 let ctag = tag.as_ptr() as *mut c_char;
810 let ret = unsafe {
811 match value {
812 Aux::Char(v) => htslib::bam_aux_append(
813 self.inner_ptr_mut(),
814 ctag,
815 b'A' as c_char,
816 size_of::<u8>() as i32,
817 [v].as_mut_ptr() as *mut u8,
818 ),
819 Aux::I8(v) => htslib::bam_aux_append(
820 self.inner_ptr_mut(),
821 ctag,
822 b'c' as c_char,
823 size_of::<i8>() as i32,
824 [v].as_mut_ptr() as *mut u8,
825 ),
826 Aux::U8(v) => htslib::bam_aux_append(
827 self.inner_ptr_mut(),
828 ctag,
829 b'C' as c_char,
830 size_of::<u8>() as i32,
831 [v].as_mut_ptr() as *mut u8,
832 ),
833 Aux::I16(v) => htslib::bam_aux_append(
834 self.inner_ptr_mut(),
835 ctag,
836 b's' as c_char,
837 size_of::<i16>() as i32,
838 [v].as_mut_ptr() as *mut u8,
839 ),
840 Aux::U16(v) => htslib::bam_aux_append(
841 self.inner_ptr_mut(),
842 ctag,
843 b'S' as c_char,
844 size_of::<u16>() as i32,
845 [v].as_mut_ptr() as *mut u8,
846 ),
847 Aux::I32(v) => htslib::bam_aux_append(
848 self.inner_ptr_mut(),
849 ctag,
850 b'i' as c_char,
851 size_of::<i32>() as i32,
852 [v].as_mut_ptr() as *mut u8,
853 ),
854 Aux::U32(v) => htslib::bam_aux_append(
855 self.inner_ptr_mut(),
856 ctag,
857 b'I' as c_char,
858 size_of::<u32>() as i32,
859 [v].as_mut_ptr() as *mut u8,
860 ),
861 Aux::Float(v) => htslib::bam_aux_append(
862 self.inner_ptr_mut(),
863 ctag,
864 b'f' as c_char,
865 size_of::<f32>() as i32,
866 [v].as_mut_ptr() as *mut u8,
867 ),
868 Aux::Double(v) => htslib::bam_aux_append(
870 self.inner_ptr_mut(),
871 ctag,
872 b'd' as c_char,
873 size_of::<f64>() as i32,
874 [v].as_mut_ptr() as *mut u8,
875 ),
876 Aux::String(v) => {
877 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
878 htslib::bam_aux_append(
879 self.inner_ptr_mut(),
880 ctag,
881 b'Z' as c_char,
882 (v.len() + 1) as i32,
883 c_str.as_ptr() as *mut u8,
884 )
885 }
886 Aux::HexByteArray(v) => {
887 let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
888 htslib::bam_aux_append(
889 self.inner_ptr_mut(),
890 ctag,
891 b'H' as c_char,
892 (v.len() + 1) as i32,
893 c_str.as_ptr() as *mut u8,
894 )
895 }
896 Aux::ArrayI8(aux_array) => match aux_array {
898 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
899 self.inner_ptr_mut(),
900 ctag,
901 b'c',
902 inner.len() as u32,
903 inner.slice.as_ptr() as *mut ::libc::c_void,
904 ),
905 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
906 self.inner_ptr_mut(),
907 ctag,
908 b'c',
909 inner.len() as u32,
910 inner.slice.as_ptr() as *mut ::libc::c_void,
911 ),
912 },
913 Aux::ArrayU8(aux_array) => match aux_array {
914 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
915 self.inner_ptr_mut(),
916 ctag,
917 b'C',
918 inner.len() as u32,
919 inner.slice.as_ptr() as *mut ::libc::c_void,
920 ),
921 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
922 self.inner_ptr_mut(),
923 ctag,
924 b'C',
925 inner.len() as u32,
926 inner.slice.as_ptr() as *mut ::libc::c_void,
927 ),
928 },
929 Aux::ArrayI16(aux_array) => match aux_array {
930 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
931 self.inner_ptr_mut(),
932 ctag,
933 b's',
934 inner.len() as u32,
935 inner.slice.as_ptr() as *mut ::libc::c_void,
936 ),
937 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
938 self.inner_ptr_mut(),
939 ctag,
940 b's',
941 inner.len() as u32,
942 inner.slice.as_ptr() as *mut ::libc::c_void,
943 ),
944 },
945 Aux::ArrayU16(aux_array) => match aux_array {
946 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
947 self.inner_ptr_mut(),
948 ctag,
949 b'S',
950 inner.len() as u32,
951 inner.slice.as_ptr() as *mut ::libc::c_void,
952 ),
953 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
954 self.inner_ptr_mut(),
955 ctag,
956 b'S',
957 inner.len() as u32,
958 inner.slice.as_ptr() as *mut ::libc::c_void,
959 ),
960 },
961 Aux::ArrayI32(aux_array) => match aux_array {
962 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
963 self.inner_ptr_mut(),
964 ctag,
965 b'i',
966 inner.len() as u32,
967 inner.slice.as_ptr() as *mut ::libc::c_void,
968 ),
969 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
970 self.inner_ptr_mut(),
971 ctag,
972 b'i',
973 inner.len() as u32,
974 inner.slice.as_ptr() as *mut ::libc::c_void,
975 ),
976 },
977 Aux::ArrayU32(aux_array) => match aux_array {
978 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
979 self.inner_ptr_mut(),
980 ctag,
981 b'I',
982 inner.len() as u32,
983 inner.slice.as_ptr() as *mut ::libc::c_void,
984 ),
985 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
986 self.inner_ptr_mut(),
987 ctag,
988 b'I',
989 inner.len() as u32,
990 inner.slice.as_ptr() as *mut ::libc::c_void,
991 ),
992 },
993 Aux::ArrayFloat(aux_array) => match aux_array {
994 AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
995 self.inner_ptr_mut(),
996 ctag,
997 b'f',
998 inner.len() as u32,
999 inner.slice.as_ptr() as *mut ::libc::c_void,
1000 ),
1001 AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1002 self.inner_ptr_mut(),
1003 ctag,
1004 b'f',
1005 inner.len() as u32,
1006 inner.slice.as_ptr() as *mut ::libc::c_void,
1007 ),
1008 },
1009 }
1010 };
1011
1012 if ret < 0 {
1013 Err(Error::BamAux)
1014 } else {
1015 Ok(())
1016 }
1017 }
1018
1019 pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1021 let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1022 let aux = unsafe {
1023 htslib::bam_aux_get(
1024 &self.inner as *const htslib::bam1_t,
1025 c_str.as_ptr() as *mut c_char,
1026 )
1027 };
1028 unsafe {
1029 if aux.is_null() {
1030 Err(Error::BamAuxTagNotFound)
1031 } else {
1032 htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1033 Ok(())
1034 }
1035 }
1036 }
1037
1038 pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
1070 BaseModificationsIter::new(self)
1071 }
1072
1073 pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1077 BaseModificationsPositionIter::new(self)
1078 }
1079
1080 pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1084 if self.is_paired()
1085 && !self.is_unmapped()
1086 && !self.is_mate_unmapped()
1087 && self.tid() == self.mtid()
1088 {
1089 if self.pos() == self.mpos() {
1090 return SequenceReadPairOrientation::None;
1092 }
1093
1094 let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1095 (
1096 self.pos(),
1097 self.mpos(),
1098 !self.is_reverse(),
1099 !self.is_mate_reverse(),
1100 )
1101 } else {
1102 (
1103 self.mpos(),
1104 self.pos(),
1105 !self.is_mate_reverse(),
1106 !self.is_reverse(),
1107 )
1108 };
1109
1110 if pos_1 < pos_2 {
1111 match (fwd_1, fwd_2) {
1112 (true, true) => SequenceReadPairOrientation::F1F2,
1113 (true, false) => SequenceReadPairOrientation::F1R2,
1114 (false, true) => SequenceReadPairOrientation::R1F2,
1115 (false, false) => SequenceReadPairOrientation::R1R2,
1116 }
1117 } else {
1118 match (fwd_2, fwd_1) {
1119 (true, true) => SequenceReadPairOrientation::F2F1,
1120 (true, false) => SequenceReadPairOrientation::F2R1,
1121 (false, true) => SequenceReadPairOrientation::R2F1,
1122 (false, false) => SequenceReadPairOrientation::R2R1,
1123 }
1124 }
1125 } else {
1126 SequenceReadPairOrientation::None
1127 }
1128 }
1129
1130 flag!(is_paired, set_paired, unset_paired, 1u16);
1131 flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1132 flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1133 flag!(
1134 is_mate_unmapped,
1135 set_mate_unmapped,
1136 unset_mate_unmapped,
1137 8u16
1138 );
1139 flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1140 flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1141 flag!(
1142 is_first_in_template,
1143 set_first_in_template,
1144 unset_first_in_template,
1145 64u16
1146 );
1147 flag!(
1148 is_last_in_template,
1149 set_last_in_template,
1150 unset_last_in_template,
1151 128u16
1152 );
1153 flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1154 flag!(
1155 is_quality_check_failed,
1156 set_quality_check_failed,
1157 unset_quality_check_failed,
1158 512u16
1159 );
1160 flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1161 flag!(
1162 is_supplementary,
1163 set_supplementary,
1164 unset_supplementary,
1165 2048u16
1166 );
1167}
1168
1169impl Drop for Record {
1170 fn drop(&mut self) {
1171 if self.own {
1172 unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1173 }
1174 }
1175}
1176
1177impl SequenceRead for Record {
1178 fn name(&self) -> &[u8] {
1179 self.qname()
1180 }
1181
1182 fn base(&self, i: usize) -> u8 {
1183 *decode_base_unchecked(encoded_base(self.seq_data(), i))
1184 }
1185
1186 fn base_qual(&self, i: usize) -> u8 {
1187 self.qual()[i]
1188 }
1189
1190 fn len(&self) -> usize {
1191 self.seq_len()
1192 }
1193
1194 fn is_empty(&self) -> bool {
1195 self.len() == 0
1196 }
1197}
1198
1199impl genome::AbstractInterval for Record {
1200 fn contig(&self) -> &str {
1202 let tid = self.tid();
1203 if tid < 0 {
1204 panic!("invalid tid, must be at least zero");
1205 }
1206 str::from_utf8(
1207 self.header
1208 .as_ref()
1209 .expect(
1210 "header must be set (this is the case if the record has been read from a file)",
1211 )
1212 .tid2name(tid as u32),
1213 )
1214 .expect("unable to interpret contig name as UTF-8")
1215 }
1216
1217 fn range(&self) -> ops::Range<genome::Position> {
1219 let end_pos = self
1220 .cigar_cached()
1221 .expect("cigar has not been cached yet, call cache_cigar() first")
1222 .end_pos() as u64;
1223
1224 if self.pos() < 0 {
1225 panic!("invalid position, must be positive")
1226 }
1227
1228 self.pos() as u64..end_pos
1229 }
1230}
1231
1232#[derive(Debug, PartialEq)]
1287pub enum Aux<'a> {
1288 Char(u8),
1289 I8(i8),
1290 U8(u8),
1291 I16(i16),
1292 U16(u16),
1293 I32(i32),
1294 U32(u32),
1295 Float(f32),
1296 Double(f64), String(&'a str),
1298 HexByteArray(&'a str),
1299 ArrayI8(AuxArray<'a, i8>),
1300 ArrayU8(AuxArray<'a, u8>),
1301 ArrayI16(AuxArray<'a, i16>),
1302 ArrayU16(AuxArray<'a, u16>),
1303 ArrayI32(AuxArray<'a, i32>),
1304 ArrayU32(AuxArray<'a, u32>),
1305 ArrayFloat(AuxArray<'a, f32>),
1306}
1307
1308unsafe impl<'a> Send for Aux<'a> {}
1309unsafe impl<'a> Sync for Aux<'a> {}
1310
1311pub trait AuxArrayElement: Copy {
1313 fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1314}
1315
1316impl AuxArrayElement for i8 {
1317 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1318 std::io::Cursor::new(bytes).read_i8().ok()
1319 }
1320}
1321impl AuxArrayElement for u8 {
1322 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1323 std::io::Cursor::new(bytes).read_u8().ok()
1324 }
1325}
1326impl AuxArrayElement for i16 {
1327 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1328 std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1329 }
1330}
1331impl AuxArrayElement for u16 {
1332 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1333 std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1334 }
1335}
1336impl AuxArrayElement for i32 {
1337 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1338 std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1339 }
1340}
1341impl AuxArrayElement for u32 {
1342 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1343 std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1344 }
1345}
1346impl AuxArrayElement for f32 {
1347 fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1348 std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1349 }
1350}
1351
1352#[derive(Debug)]
1396pub enum AuxArray<'a, T> {
1397 TargetType(AuxArrayTargetType<'a, T>),
1398 RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1399}
1400
1401impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1402where
1403 T: AuxArrayElement + PartialEq,
1404{
1405 fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1406 use AuxArray::*;
1407 match (self, other) {
1408 (TargetType(v), TargetType(v_other)) => v == v_other,
1409 (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1410 (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1411 (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1412 }
1413 }
1414}
1415
1416impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1418where
1419 I: AuxArrayElement,
1420 T: AsRef<[I]> + ?Sized,
1421{
1422 fn from(src: &'a T) -> Self {
1423 AuxArray::TargetType(AuxArrayTargetType {
1424 slice: src.as_ref(),
1425 })
1426 }
1427}
1428
1429impl<'a, T> AuxArray<'a, T>
1430where
1431 T: AuxArrayElement,
1432{
1433 pub fn get(&self, index: usize) -> Option<T> {
1435 match self {
1436 AuxArray::TargetType(v) => v.get(index),
1437 AuxArray::RawLeBytes(v) => v.get(index),
1438 }
1439 }
1440
1441 pub fn len(&self) -> usize {
1443 match self {
1444 AuxArray::TargetType(a) => a.len(),
1445 AuxArray::RawLeBytes(a) => a.len(),
1446 }
1447 }
1448
1449 pub fn is_empty(&self) -> bool {
1451 self.len() == 0
1452 }
1453
1454 pub fn iter(&self) -> AuxArrayIter<T> {
1456 AuxArrayIter {
1457 index: 0,
1458 array: self,
1459 }
1460 }
1461
1462 fn from_bytes(bytes: &'a [u8]) -> Self {
1464 Self::RawLeBytes(AuxArrayRawLeBytes {
1465 slice: bytes,
1466 phantom_data: PhantomData,
1467 })
1468 }
1469}
1470
1471#[doc(hidden)]
1473#[derive(Debug, PartialEq)]
1474pub struct AuxArrayTargetType<'a, T> {
1475 slice: &'a [T],
1476}
1477
1478impl<'a, T> AuxArrayTargetType<'a, T>
1479where
1480 T: AuxArrayElement,
1481{
1482 fn get(&self, index: usize) -> Option<T> {
1483 self.slice.get(index).copied()
1484 }
1485
1486 fn len(&self) -> usize {
1487 self.slice.len()
1488 }
1489}
1490
1491#[doc(hidden)]
1493#[derive(Debug, PartialEq)]
1494pub struct AuxArrayRawLeBytes<'a, T> {
1495 slice: &'a [u8],
1496 phantom_data: PhantomData<T>,
1497}
1498
1499impl<'a, T> AuxArrayRawLeBytes<'a, T>
1500where
1501 T: AuxArrayElement,
1502{
1503 fn get(&self, index: usize) -> Option<T> {
1504 let type_size = std::mem::size_of::<T>();
1505 if index * type_size + type_size > self.slice.len() {
1506 return None;
1507 }
1508 T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1509 }
1510
1511 fn len(&self) -> usize {
1512 self.slice.len() / std::mem::size_of::<T>()
1513 }
1514}
1515
1516pub struct AuxArrayIter<'a, T> {
1520 index: usize,
1521 array: &'a AuxArray<'a, T>,
1522}
1523
1524impl<T> Iterator for AuxArrayIter<'_, T>
1525where
1526 T: AuxArrayElement,
1527{
1528 type Item = T;
1529 fn next(&mut self) -> Option<Self::Item> {
1530 let value = self.array.get(self.index);
1531 self.index += 1;
1532 value
1533 }
1534
1535 fn size_hint(&self) -> (usize, Option<usize>) {
1536 let array_length = self.array.len() - self.index;
1537 (array_length, Some(array_length))
1538 }
1539}
1540
1541pub struct AuxIter<'a> {
1552 aux: &'a [u8],
1553}
1554
1555impl<'a> Iterator for AuxIter<'a> {
1556 type Item = Result<(&'a [u8], Aux<'a>)>;
1557
1558 fn next(&mut self) -> Option<Self::Item> {
1559 if self.aux.is_empty() {
1561 return None;
1562 }
1563 if (1..=3).contains(&self.aux.len()) {
1565 self.aux = &[];
1567 return Some(Err(Error::BamAuxParsingError));
1568 }
1569 let tag = &self.aux[..2];
1570 Some(unsafe {
1571 let data_ptr = self.aux[2..].as_ptr();
1572 Record::read_aux_field(data_ptr)
1573 .map(|(aux, offset)| {
1574 self.aux = &self.aux[offset..];
1575 (tag, aux)
1576 })
1577 .map_err(|e| {
1578 self.aux = &[];
1580 e
1581 })
1582 })
1583 }
1584}
1585
1586static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1587static ENCODE_BASE: [u8; 256] = [
1588 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1589 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1590 1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1591 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1592 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1593 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1594 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1595 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1596 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1597 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1598 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1599];
1600
1601#[inline]
1602fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1603 (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1604}
1605
1606#[inline]
1607unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1608 (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1609}
1610
1611#[inline]
1612fn decode_base_unchecked(base: u8) -> &'static u8 {
1613 unsafe { DECODE_BASE.get_unchecked(base as usize) }
1614}
1615
1616#[derive(Debug, Copy, Clone)]
1618pub struct Seq<'a> {
1619 pub encoded: &'a [u8],
1620 len: usize,
1621}
1622
1623impl<'a> Seq<'a> {
1624 #[inline]
1626 pub fn encoded_base(&self, i: usize) -> u8 {
1627 encoded_base(self.encoded, i)
1628 }
1629
1630 #[inline]
1632 pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1633 encoded_base_unchecked(self.encoded, i)
1634 }
1635
1636 #[inline]
1640 pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1641 *decode_base_unchecked(self.encoded_base_unchecked(i))
1642 }
1643
1644 pub fn as_bytes(&self) -> Vec<u8> {
1646 (0..self.len()).map(|i| self[i]).collect()
1647 }
1648 pub fn rangeextract<T>(&self, range: T) -> std::io::Result<Cow<str>>
1650 where
1651 T: RangeBounds<usize> + SliceIndex<str>,
1652 <T as SliceIndex<str>>::Output: std::string::ToString,
1653 {
1654 if self.is_empty() {
1655 return Err(std::io::Error::from(std::io::ErrorKind::InvalidInput))
1656 }
1657 let seq = match String::from_utf8(self.as_bytes().to_vec()) {
1658 Ok(d) if d.is_ascii() => d,
1659 _ => return Err(std::io::Error::from(std::io::ErrorKind::InvalidData)),
1660 };
1661 let substring = match seq.get(range) {
1662 Some(d) => d,
1663 None => return Err(std::io::Error::from(std::io::ErrorKind::InvalidInput)),
1664 };
1665
1666 Ok(Cow::Owned(substring.to_string()))
1667 }
1668 pub fn len(&self) -> usize {
1670 self.len
1671 }
1672
1673 pub fn is_empty(&self) -> bool {
1674 self.len() == 0
1675 }
1676}
1677
1678impl<'a> ops::Index<usize> for Seq<'a> {
1679 type Output = u8;
1680
1681 fn index(&self, index: usize) -> &u8 {
1683 decode_base_unchecked(self.encoded_base(index))
1684 }
1685}
1686
1687unsafe impl<'a> Send for Seq<'a> {}
1688unsafe impl<'a> Sync for Seq<'a> {}
1689
1690#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1691#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1692pub enum Cigar {
1693 Match(u32), Ins(u32), Del(u32), RefSkip(u32), SoftClip(u32), HardClip(u32), Pad(u32), Equal(u32), Diff(u32), }
1703
1704impl Cigar {
1705 fn encode(self) -> u32 {
1706 match self {
1707 Cigar::Match(len) => len << 4, Cigar::Ins(len) => len << 4 | 1,
1709 Cigar::Del(len) => len << 4 | 2,
1710 Cigar::RefSkip(len) => len << 4 | 3,
1711 Cigar::SoftClip(len) => len << 4 | 4,
1712 Cigar::HardClip(len) => len << 4 | 5,
1713 Cigar::Pad(len) => len << 4 | 6,
1714 Cigar::Equal(len) => len << 4 | 7,
1715 Cigar::Diff(len) => len << 4 | 8,
1716 }
1717 }
1718
1719 pub fn len(self) -> u32 {
1721 match self {
1722 Cigar::Match(len) => len,
1723 Cigar::Ins(len) => len,
1724 Cigar::Del(len) => len,
1725 Cigar::RefSkip(len) => len,
1726 Cigar::SoftClip(len) => len,
1727 Cigar::HardClip(len) => len,
1728 Cigar::Pad(len) => len,
1729 Cigar::Equal(len) => len,
1730 Cigar::Diff(len) => len,
1731 }
1732 }
1733
1734 pub fn is_empty(self) -> bool {
1735 self.len() == 0
1736 }
1737
1738 pub fn char(self) -> char {
1740 match self {
1741 Cigar::Match(_) => 'M',
1742 Cigar::Ins(_) => 'I',
1743 Cigar::Del(_) => 'D',
1744 Cigar::RefSkip(_) => 'N',
1745 Cigar::SoftClip(_) => 'S',
1746 Cigar::HardClip(_) => 'H',
1747 Cigar::Pad(_) => 'P',
1748 Cigar::Equal(_) => '=',
1749 Cigar::Diff(_) => 'X',
1750 }
1751 }
1752}
1753
1754impl fmt::Display for Cigar {
1755 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1756 fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1757 }
1758}
1759
1760unsafe impl Send for Cigar {}
1761unsafe impl Sync for Cigar {}
1762
1763custom_derive! {
1764 #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1783 #[derive(NewtypeDeref,
1784 NewtypeDerefMut,
1785 NewtypeIndex(usize),
1786 NewtypeIndexMut(usize),
1787 NewtypeFrom,
1788 PartialEq,
1789 PartialOrd,
1790 Eq,
1791 NewtypeDebug,
1792 Clone,
1793 Hash
1794 )]
1795 pub struct CigarString(pub Vec<Cigar>);
1796}
1797
1798impl CigarString {
1799 pub fn into_view(self, pos: i64) -> CigarStringView {
1801 CigarStringView::new(self, pos)
1802 }
1803
1804 pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1809 match alignment.mode {
1810 AlignmentMode::Global => {
1811 panic!(" Bam cigar fn not supported for Global Alignment mode")
1812 }
1813 AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
1814 _ => {}
1815 }
1816
1817 let mut cigar = Vec::new();
1818 if alignment.operations.is_empty() {
1819 return CigarString(cigar);
1820 }
1821
1822 let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
1823 AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1824 AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1825 AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1826 AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1827 _ => {}
1828 };
1829
1830 if alignment.xstart > 0 {
1831 cigar.push(if hard_clip {
1832 Cigar::HardClip(alignment.xstart as u32)
1833 } else {
1834 Cigar::SoftClip(alignment.xstart as u32)
1835 });
1836 }
1837
1838 let mut last = alignment.operations[0];
1839 let mut k = 1u32;
1840 for &op in alignment.operations[1..].iter() {
1841 if op == last {
1842 k += 1;
1843 } else {
1844 add_op(last, k, &mut cigar);
1845 k = 1;
1846 }
1847 last = op;
1848 }
1849 add_op(last, k, &mut cigar);
1850 if alignment.xlen > alignment.xend {
1851 cigar.push(if hard_clip {
1852 Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
1853 } else {
1854 Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
1855 });
1856 }
1857
1858 CigarString(cigar)
1859 }
1860}
1861
1862impl TryFrom<&[u8]> for CigarString {
1863 type Error = Error;
1864
1865 fn try_from(bytes: &[u8]) -> Result<Self> {
1886 let mut inner = Vec::new();
1887 let mut i = 0;
1888 let text_len = bytes.len();
1889 while i < text_len {
1890 let mut j = i;
1891 while j < text_len && bytes[j].is_ascii_digit() {
1892 j += 1;
1893 }
1894 if i == j {
1896 return Err(Error::BamParseCigar {
1897 msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
1898 });
1899 }
1900 let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
1902 msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
1903 })?;
1904 let n = s.parse().map_err(|_| Error::BamParseCigar {
1905 msg: format!("Unable to parse &str '{:?}' to u32.", s),
1906 })?;
1907 let op = &bytes[j];
1909 inner.push(match op {
1910 b'M' => Cigar::Match(n),
1911 b'I' => Cigar::Ins(n),
1912 b'D' => Cigar::Del(n),
1913 b'N' => Cigar::RefSkip(n),
1914 b'H' => {
1915 if i == 0 || j + 1 == text_len {
1916 Cigar::HardClip(n)
1917 } else {
1918 return Err(Error::BamParseCigar {
1919 msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
1920 .to_owned(),
1921 });
1922 }
1923 }
1924 b'S' => {
1925 if i == 0
1926 || j + 1 == text_len
1927 || bytes[i-1] == b'H'
1928 || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
1929 Cigar::SoftClip(n)
1930 } else {
1931 return Err(Error::BamParseCigar {
1932 msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
1933 .to_owned(),
1934 });
1935 }
1936 },
1937 b'P' => Cigar::Pad(n),
1938 b'=' => Cigar::Equal(n),
1939 b'X' => Cigar::Diff(n),
1940 op => {
1941 return Err(Error::BamParseCigar {
1942 msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
1943 })
1944 }
1945 });
1946 i = j + 1;
1947 }
1948 Ok(CigarString(inner))
1949 }
1950}
1951
1952impl TryFrom<&str> for CigarString {
1953 type Error = Error;
1954
1955 fn try_from(text: &str) -> Result<Self> {
1976 let bytes = text.as_bytes();
1977 if text.chars().count() != bytes.len() {
1978 return Err(Error::BamParseCigar {
1979 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
1980 });
1981 }
1982 CigarString::try_from(bytes)
1983 }
1984}
1985
1986impl<'a> CigarString {
1987 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1988 self.into_iter()
1989 }
1990}
1991
1992impl<'a> IntoIterator for &'a CigarString {
1993 type Item = &'a Cigar;
1994 type IntoIter = ::std::slice::Iter<'a, Cigar>;
1995
1996 fn into_iter(self) -> Self::IntoIter {
1997 self.0.iter()
1998 }
1999}
2000
2001impl fmt::Display for CigarString {
2002 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2003 for op in self {
2004 fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2005 }
2006 Ok(())
2007 }
2008}
2009
2010fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2012 match (cigar.next(), cigar.next()) {
2013 (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2014 *s as i64
2015 }
2016 _ => 0,
2017 }
2018}
2019
2020#[derive(Eq, PartialEq, Clone, Debug)]
2021pub struct CigarStringView {
2022 inner: CigarString,
2023 pos: i64,
2024}
2025
2026impl CigarStringView {
2027 pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2029 CigarStringView { inner: c, pos }
2030 }
2031
2032 pub fn end_pos(&self) -> i64 {
2034 let mut pos = self.pos;
2035 for c in self {
2036 match c {
2037 Cigar::Match(l)
2038 | Cigar::RefSkip(l)
2039 | Cigar::Del(l)
2040 | Cigar::Equal(l)
2041 | Cigar::Diff(l) => pos += *l as i64,
2042 Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2044 }
2045 }
2046 pos
2047 }
2048
2049 pub fn pos(&self) -> i64 {
2051 self.pos
2052 }
2053
2054 pub fn leading_softclips(&self) -> i64 {
2056 calc_softclips(self.iter())
2057 }
2058
2059 pub fn trailing_softclips(&self) -> i64 {
2061 calc_softclips(self.iter().rev())
2062 }
2063
2064 pub fn leading_hardclips(&self) -> i64 {
2066 self.first().map_or(0, |cigar| {
2067 if let Cigar::HardClip(s) = cigar {
2068 *s as i64
2069 } else {
2070 0
2071 }
2072 })
2073 }
2074
2075 pub fn trailing_hardclips(&self) -> i64 {
2077 self.last().map_or(0, |cigar| {
2078 if let Cigar::HardClip(s) = cigar {
2079 *s as i64
2080 } else {
2081 0
2082 }
2083 })
2084 }
2085
2086 pub fn read_pos(
2096 &self,
2097 ref_pos: u32,
2098 include_softclips: bool,
2099 include_dels: bool,
2100 ) -> Result<Option<u32>> {
2101 let mut rpos = self.pos as u32; let mut qpos = 0u32; let mut j = 0; for (i, c) in self.iter().enumerate() {
2108 match c {
2109 Cigar::Match(_) |
2110 Cigar::Diff(_) |
2111 Cigar::Equal(_) |
2112 Cigar::Ins(_) => {
2115 j = i;
2116 break;
2117 },
2118 Cigar::SoftClip(l) => {
2119 j = i;
2120 if include_softclips {
2121 rpos = rpos.saturating_sub(*l);
2125 }
2126 break;
2127 },
2128 Cigar::Del(l) => {
2129 rpos += l;
2137 },
2138 Cigar::RefSkip(_) => {
2139 return Err(Error::BamUnexpectedCigarOperation {
2140 msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2141 });
2142 },
2143 Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2144 return Err(Error::BamUnexpectedCigarOperation{
2145 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2146 });
2147 },
2148 Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2150 Cigar::Pad(_) | Cigar::HardClip(_) => ()
2152 }
2153 }
2154
2155 let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2156 cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2157 };
2158
2159 while rpos <= ref_pos && j < self.len() {
2160 match self[j] {
2161 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2163 qpos += ref_pos - rpos;
2166 return Ok(Some(qpos));
2167 }
2168 Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2169 qpos += ref_pos - rpos;
2170 return Ok(Some(qpos));
2171 }
2172 Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2173 return Ok(Some(qpos));
2175 }
2176 Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2178 rpos += l;
2179 qpos += l;
2180 j += 1;
2181 }
2182 Cigar::SoftClip(l) => {
2183 qpos += l;
2184 j += 1;
2185 if include_softclips {
2186 rpos += l;
2187 }
2188 }
2189 Cigar::Ins(l) => {
2190 qpos += l;
2191 j += 1;
2192 }
2193 Cigar::RefSkip(l) | Cigar::Del(l) => {
2194 rpos += l;
2195 j += 1;
2196 }
2197 Cigar::Pad(_) => {
2198 j += 1;
2199 }
2200 Cigar::HardClip(_) if j < self.len() - 1 => {
2201 return Err(Error::BamUnexpectedCigarOperation{
2202 msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2203 });
2204 }
2205 Cigar::HardClip(_) => return Ok(None),
2206 }
2207 }
2208
2209 Ok(None)
2210 }
2211
2212 pub fn take(self) -> CigarString {
2214 self.inner
2215 }
2216}
2217
2218impl ops::Deref for CigarStringView {
2219 type Target = CigarString;
2220
2221 fn deref(&self) -> &CigarString {
2222 &self.inner
2223 }
2224}
2225
2226impl ops::Index<usize> for CigarStringView {
2227 type Output = Cigar;
2228
2229 fn index(&self, index: usize) -> &Cigar {
2230 self.inner.index(index)
2231 }
2232}
2233
2234impl ops::IndexMut<usize> for CigarStringView {
2235 fn index_mut(&mut self, index: usize) -> &mut Cigar {
2236 self.inner.index_mut(index)
2237 }
2238}
2239
2240impl<'a> CigarStringView {
2241 pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2242 self.inner.into_iter()
2243 }
2244}
2245
2246impl<'a> IntoIterator for &'a CigarStringView {
2247 type Item = &'a Cigar;
2248 type IntoIter = ::std::slice::Iter<'a, Cigar>;
2249
2250 fn into_iter(self) -> Self::IntoIter {
2251 self.inner.into_iter()
2252 }
2253}
2254
2255impl fmt::Display for CigarStringView {
2256 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2257 self.inner.fmt(fmt)
2258 }
2259}
2260
2261pub struct BaseModificationMetadata {
2262 pub strand: i32,
2263 pub implicit: i32,
2264 pub canonical: u8,
2265}
2266
2267pub struct BaseModificationState<'a> {
2270 record: &'a Record,
2271 state: *mut htslib::hts_base_mod_state,
2272 buffer: Vec<htslib::hts_base_mod>,
2273 buffer_pos: i32,
2274}
2275
2276impl BaseModificationState<'_> {
2277 fn new<'a>(r: &'a Record) -> Result<BaseModificationState<'a>> {
2282 let mut bm = unsafe {
2283 BaseModificationState {
2284 record: r,
2285 state: hts_sys::hts_base_mod_state_alloc(),
2286 buffer: Vec::new(),
2287 buffer_pos: -1,
2288 }
2289 };
2290
2291 if bm.state.is_null() {
2292 panic!("Unable to allocate memory for hts_base_mod_state");
2293 }
2294
2295 unsafe {
2297 let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2298 if ret != 0 {
2299 return Err(Error::BamBaseModificationTagNotFound);
2300 }
2301 }
2302
2303 let types = bm.recorded();
2304 bm.buffer.reserve(types.len());
2305 return Ok(bm);
2306 }
2307
2308 pub fn buffer_next_mods(&mut self) -> Result<usize> {
2309 unsafe {
2310 let ret = hts_sys::bam_next_basemod(
2311 self.record.inner_ptr(),
2312 self.state,
2313 self.buffer.as_mut_ptr(),
2314 self.buffer.capacity() as i32,
2315 &mut self.buffer_pos,
2316 );
2317
2318 if ret < 0 {
2319 return Err(Error::BamBaseModificationIterationFailed);
2320 }
2321
2322 if ret as usize > self.buffer.capacity() {
2326 return Err(Error::BamBaseModificationTooManyMods);
2327 }
2328
2329 self.buffer.set_len(ret as usize);
2332
2333 return Ok(ret as usize);
2334 }
2335 }
2336
2337 pub fn recorded<'a>(&self) -> &'a [i32] {
2340 unsafe {
2341 let mut n: i32 = 0;
2342 let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2343
2344 if data_ptr.is_null() {
2346 panic!("Unable to obtain pointer to base modifications");
2347 }
2348 assert!(n >= 0);
2349 return slice::from_raw_parts(data_ptr, n as usize);
2350 }
2351 }
2352
2353 pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2359 unsafe {
2360 let mut strand: i32 = 0;
2361 let mut implicit: i32 = 0;
2362 let mut canonical: c_char = 0;
2364
2365 let ret = hts_sys::bam_mods_query_type(
2366 self.state,
2367 code,
2368 &mut strand,
2369 &mut implicit,
2370 &mut canonical,
2371 );
2372 if ret == -1 {
2373 return Err(Error::BamBaseModificationTypeNotFound);
2374 } else {
2375 return Ok(BaseModificationMetadata {
2376 strand,
2377 implicit,
2378 canonical: canonical.try_into().unwrap(),
2379 });
2380 }
2381 }
2382 }
2383}
2384
2385impl Drop for BaseModificationState<'_> {
2386 fn drop<'a>(&mut self) {
2387 unsafe {
2388 hts_sys::hts_base_mod_state_free(self.state);
2389 }
2390 }
2391}
2392
2393pub struct BaseModificationsPositionIter<'a> {
2396 mod_state: BaseModificationState<'a>,
2397}
2398
2399impl BaseModificationsPositionIter<'_> {
2400 fn new<'a>(r: &'a Record) -> Result<BaseModificationsPositionIter<'a>> {
2401 let state = BaseModificationState::new(r)?;
2402 Ok(BaseModificationsPositionIter { mod_state: state })
2403 }
2404
2405 pub fn recorded<'a>(&self) -> &'a [i32] {
2406 return self.mod_state.recorded();
2407 }
2408
2409 pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2410 return self.mod_state.query_type(code);
2411 }
2412}
2413
2414impl<'a> Iterator for BaseModificationsPositionIter<'a> {
2415 type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2416
2417 fn next(&mut self) -> Option<Self::Item> {
2418 let ret = self.mod_state.buffer_next_mods();
2419
2420 match ret {
2425 Ok(num_mods) => {
2426 if num_mods == 0 {
2427 return None;
2428 } else {
2429 let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2430 return Some(Ok(data));
2431 }
2432 }
2433 Err(e) => return Some(Err(e)),
2434 }
2435 }
2436}
2437
2438pub struct BaseModificationsIter<'a> {
2441 mod_state: BaseModificationState<'a>,
2442 buffer_idx: usize,
2443}
2444
2445impl BaseModificationsIter<'_> {
2446 fn new<'a>(r: &'a Record) -> Result<BaseModificationsIter<'a>> {
2447 let state = BaseModificationState::new(r)?;
2448 Ok(BaseModificationsIter {
2449 mod_state: state,
2450 buffer_idx: 0,
2451 })
2452 }
2453
2454 pub fn recorded<'a>(&self) -> &'a [i32] {
2455 return self.mod_state.recorded();
2456 }
2457
2458 pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2459 return self.mod_state.query_type(code);
2460 }
2461}
2462
2463impl<'a> Iterator for BaseModificationsIter<'a> {
2464 type Item = Result<(i32, hts_sys::hts_base_mod)>;
2465
2466 fn next(&mut self) -> Option<Self::Item> {
2467 if self.buffer_idx == self.mod_state.buffer.len() {
2468 let ret = self.mod_state.buffer_next_mods();
2471
2472 match ret {
2473 Ok(num_mods) => {
2474 if num_mods == 0 {
2475 return None;
2477 } else {
2478 self.buffer_idx = 0;
2480 }
2481 }
2482 Err(e) => return Some(Err(e)),
2483 }
2484 }
2485
2486 assert!(self.buffer_idx < self.mod_state.buffer.len());
2488 let data = (
2489 self.mod_state.buffer_pos,
2490 self.mod_state.buffer[self.buffer_idx],
2491 );
2492 self.buffer_idx += 1;
2493 return Some(Ok(data));
2494 }
2495}
2496
2497#[cfg(test)]
2498mod tests {
2499 use super::*;
2500
2501 #[test]
2502 fn test_cigar_string() {
2503 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2504
2505 assert_eq!(cigar[0], Cigar::Match(100));
2506 assert_eq!(format!("{}", cigar), "100M10S");
2507 for op in &cigar {
2508 println!("{}", op);
2509 }
2510 }
2511
2512 #[test]
2513 fn test_cigar_string_view_pos() {
2514 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2515 assert_eq!(cigar.pos(), 5);
2516 }
2517
2518 #[test]
2519 fn test_cigar_string_leading_softclips() {
2520 let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2521 assert_eq!(cigar.leading_softclips(), 10);
2522 let cigar2 = CigarString(vec![
2523 Cigar::HardClip(5),
2524 Cigar::SoftClip(10),
2525 Cigar::Match(100),
2526 ])
2527 .into_view(0);
2528 assert_eq!(cigar2.leading_softclips(), 10);
2529 }
2530
2531 #[test]
2532 fn test_cigar_string_trailing_softclips() {
2533 let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2534 assert_eq!(cigar.trailing_softclips(), 10);
2535 let cigar2 = CigarString(vec![
2536 Cigar::Match(100),
2537 Cigar::SoftClip(10),
2538 Cigar::HardClip(5),
2539 ])
2540 .into_view(0);
2541 assert_eq!(cigar2.trailing_softclips(), 10);
2542 }
2543
2544 #[test]
2545 fn test_cigar_read_pos() {
2546 let vpos = 5; let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2554 assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2555
2556 let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2564 assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2565 assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2566
2567 let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2576 assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2577 assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2578
2579 let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2585 assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2586
2587 let c05 = CigarString(vec![
2593 Cigar::Equal(2),
2594 Cigar::Del(2),
2595 Cigar::Diff(1),
2596 Cigar::Equal(2),
2597 ])
2598 .into_view(0);
2599 assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2600
2601 let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2607 assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2608 assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2609
2610 let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2616 assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2617 assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2618
2619 let c08 = CigarString(vec![
2625 Cigar::Equal(1),
2626 Cigar::Diff(1),
2627 Cigar::RefSkip(3),
2628 Cigar::Match(2),
2629 ])
2630 .into_view(2);
2631 assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2632 assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2633
2634 let c09 = CigarString(vec![
2640 Cigar::HardClip(3),
2641 Cigar::Equal(2),
2642 Cigar::HardClip(3),
2643 Cigar::Equal(2),
2644 ])
2645 .into_view(2);
2646 assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2647
2648 let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2654 assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2655
2656 let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2662 assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2663
2664 let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2670 assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2671
2672 let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2678 assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2679
2680 let vpos2 = 15;
2682 let c14 = CigarString(vec![
2687 Cigar::HardClip(5),
2688 Cigar::SoftClip(3),
2689 Cigar::Equal(1),
2690 Cigar::Pad(2),
2691 Cigar::Match(1),
2692 Cigar::Diff(1),
2693 Cigar::Ins(3),
2694 Cigar::Match(2),
2695 Cigar::Del(1),
2696 Cigar::Ins(2),
2697 Cigar::Equal(2),
2698 Cigar::RefSkip(3),
2699 Cigar::Match(3),
2700 Cigar::Equal(2),
2701 Cigar::SoftClip(5),
2702 Cigar::HardClip(2),
2703 ])
2704 .into_view(0);
2705 assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2706
2707 let c15 =
2713 CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2714 assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2715
2716 let c16 =
2719 CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2720 assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2721 }
2722
2723 #[test]
2724 fn test_clone() {
2725 let mut rec = Record::new();
2726 rec.set_pos(300);
2727 rec.set_qname(b"read1");
2728 let clone = rec.clone();
2729 assert_eq!(rec, clone);
2730 }
2731
2732 #[test]
2733 fn test_flags() {
2734 let mut rec = Record::new();
2735
2736 rec.set_paired();
2737 assert_eq!(rec.is_paired(), true);
2738
2739 rec.set_supplementary();
2740 assert_eq!(rec.is_supplementary(), true);
2741 assert_eq!(rec.is_supplementary(), true);
2742
2743 rec.unset_paired();
2744 assert_eq!(rec.is_paired(), false);
2745 assert_eq!(rec.is_supplementary(), true);
2746
2747 rec.unset_supplementary();
2748 assert_eq!(rec.is_paired(), false);
2749 assert_eq!(rec.is_supplementary(), false);
2750 }
2751
2752 #[test]
2753 fn test_cigar_parse() {
2754 let cigar = "1S20M1D2I3X1=2H";
2755 let parsed = CigarString::try_from(cigar).unwrap();
2756 assert_eq!(parsed.to_string(), cigar);
2757 }
2758}
2759
2760#[cfg(test)]
2761mod alignment_cigar_tests {
2762 use super::*;
2763 use crate::bam::{Read, Reader};
2764 use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2765 use bio_types::alignment::{Alignment, AlignmentMode};
2766
2767 #[test]
2768 fn test_cigar() {
2769 let alignment = Alignment {
2770 score: 5,
2771 xstart: 3,
2772 ystart: 0,
2773 xend: 9,
2774 yend: 10,
2775 ylen: 10,
2776 xlen: 10,
2777 operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2778 mode: AlignmentMode::Semiglobal,
2779 };
2780 assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2781 assert_eq!(
2782 CigarString::from_alignment(&alignment, false).0,
2783 vec![
2784 Cigar::SoftClip(3),
2785 Cigar::Equal(3),
2786 Cigar::Diff(1),
2787 Cigar::Ins(2),
2788 Cigar::Del(2),
2789 Cigar::SoftClip(1),
2790 ]
2791 );
2792
2793 let alignment = Alignment {
2794 score: 5,
2795 xstart: 0,
2796 ystart: 5,
2797 xend: 4,
2798 yend: 10,
2799 ylen: 10,
2800 xlen: 5,
2801 operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2802 mode: AlignmentMode::Custom,
2803 };
2804 assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2805 assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2806 assert_eq!(
2807 CigarString::from_alignment(&alignment, false).0,
2808 vec![
2809 Cigar::Equal(1),
2810 Cigar::Diff(2),
2811 Cigar::Ins(1),
2812 Cigar::Del(2),
2813 Cigar::SoftClip(1),
2814 ]
2815 );
2816 assert_eq!(
2817 CigarString::from_alignment(&alignment, true).0,
2818 vec![
2819 Cigar::Equal(1),
2820 Cigar::Diff(2),
2821 Cigar::Ins(1),
2822 Cigar::Del(2),
2823 Cigar::HardClip(1),
2824 ]
2825 );
2826
2827 let alignment = Alignment {
2828 score: 5,
2829 xstart: 0,
2830 ystart: 5,
2831 xend: 3,
2832 yend: 8,
2833 ylen: 10,
2834 xlen: 3,
2835 operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2836 mode: AlignmentMode::Custom,
2837 };
2838 assert_eq!(alignment.cigar(false), "1X1=1X");
2839 assert_eq!(
2840 CigarString::from_alignment(&alignment, false).0,
2841 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2842 );
2843
2844 let alignment = Alignment {
2845 score: 5,
2846 xstart: 0,
2847 ystart: 5,
2848 xend: 3,
2849 yend: 8,
2850 ylen: 10,
2851 xlen: 3,
2852 operations: vec![Subst, Match, Subst],
2853 mode: AlignmentMode::Semiglobal,
2854 };
2855 assert_eq!(alignment.cigar(false), "1X1=1X");
2856 assert_eq!(
2857 CigarString::from_alignment(&alignment, false).0,
2858 vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2859 );
2860 }
2861
2862 #[test]
2863 fn test_read_orientation_f1r2() {
2864 let mut bam = Reader::from_path(&"test/test_paired.sam").unwrap();
2865
2866 for res in bam.records() {
2867 let record = res.unwrap();
2868 assert_eq!(
2869 record.read_pair_orientation(),
2870 SequenceReadPairOrientation::F1R2
2871 );
2872 }
2873 }
2874
2875 #[test]
2876 fn test_read_orientation_f2r1() {
2877 let mut bam = Reader::from_path(&"test/test_nonstandard_orientation.sam").unwrap();
2878
2879 for res in bam.records() {
2880 let record = res.unwrap();
2881 assert_eq!(
2882 record.read_pair_orientation(),
2883 SequenceReadPairOrientation::F2R1
2884 );
2885 }
2886 }
2887
2888 #[test]
2889 fn test_read_orientation_supplementary() {
2890 let mut bam = Reader::from_path(&"test/test_orientation_supplementary.sam").unwrap();
2891
2892 for res in bam.records() {
2893 let record = res.unwrap();
2894 assert_eq!(
2895 record.read_pair_orientation(),
2896 SequenceReadPairOrientation::F2R1
2897 );
2898 }
2899 }
2900
2901 #[test]
2902 pub fn test_cigar_parsing_non_ascii_error() {
2903 let cigar_str = "43ጷ";
2904 let expected_error = Err(Error::BamParseCigar {
2905 msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2906 });
2907
2908 let result = CigarString::try_from(cigar_str);
2909 assert_eq!(expected_error, result);
2910 }
2911
2912 #[test]
2913 pub fn test_cigar_parsing() {
2914 let cigar_strs = vec![
2916 "1H10M4D100I300N1102=10P25X11S", "100M", "", "1H1=1H", "1S1=1S", "11H11S11=11S11H", "10H",
2923 "10S",
2924 ];
2925 let cigars = vec![
2927 CigarString(vec![
2928 Cigar::HardClip(1),
2929 Cigar::Match(10),
2930 Cigar::Del(4),
2931 Cigar::Ins(100),
2932 Cigar::RefSkip(300),
2933 Cigar::Equal(1102),
2934 Cigar::Pad(10),
2935 Cigar::Diff(25),
2936 Cigar::SoftClip(11),
2937 ]),
2938 CigarString(vec![Cigar::Match(100)]),
2939 CigarString(vec![]),
2940 CigarString(vec![
2941 Cigar::HardClip(1),
2942 Cigar::Equal(1),
2943 Cigar::HardClip(1),
2944 ]),
2945 CigarString(vec![
2946 Cigar::SoftClip(1),
2947 Cigar::Equal(1),
2948 Cigar::SoftClip(1),
2949 ]),
2950 CigarString(vec![
2951 Cigar::HardClip(11),
2952 Cigar::SoftClip(11),
2953 Cigar::Equal(11),
2954 Cigar::SoftClip(11),
2955 Cigar::HardClip(11),
2956 ]),
2957 CigarString(vec![Cigar::HardClip(10)]),
2958 CigarString(vec![Cigar::SoftClip(10)]),
2959 ];
2960 for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
2962 let cigar_parse = CigarString::try_from(cigar_str)
2963 .expect(&format!("Unable to parse cigar: {}", cigar_str));
2964 assert_eq!(&cigar_parse, truth);
2965 }
2966 }
2967}
2968
2969#[cfg(test)]
2970mod basemod_tests {
2971 use crate::bam::{Read, Reader};
2972
2973 #[test]
2974 pub fn test_count_recorded() {
2975 let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2976
2977 for r in bam.records() {
2978 let record = r.unwrap();
2979 if let Ok(mods) = record.basemods_iter() {
2980 let n = mods.recorded().len();
2981 assert_eq!(n, 3);
2982 };
2983 }
2984 }
2985
2986 #[test]
2987 pub fn test_query_type() {
2988 let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
2989
2990 let mut n_fwd = 0;
2991 let mut n_rev = 0;
2992
2993 for r in bam.records() {
2994 let record = r.unwrap();
2995 if let Ok(mods) = record.basemods_iter() {
2996 for mod_code in mods.recorded() {
2997 if let Ok(mod_metadata) = mods.query_type(*mod_code) {
2998 if mod_metadata.strand == 0 {
2999 n_fwd += 1;
3000 }
3001 if mod_metadata.strand == 1 {
3002 n_rev += 1;
3003 }
3004 }
3005 }
3006 };
3007 }
3008 assert_eq!(n_fwd, 2);
3009 assert_eq!(n_rev, 2);
3010 }
3011
3012 #[test]
3013 pub fn test_mod_iter() {
3014 let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
3015 let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3016 let mut i = 0;
3017
3018 for r in bam.records() {
3019 let record = r.unwrap();
3020 for res in record.basemods_iter().unwrap() {
3021 if let Ok((position, _m)) = res {
3022 assert_eq!(position, expected_positions[i]);
3023 i += 1;
3024 }
3025 }
3026 }
3027 }
3028
3029 #[test]
3030 pub fn test_position_iter() {
3031 let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
3032 let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3033 let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3034 let mut i = 0;
3035
3036 for r in bam.records() {
3037 let record = r.unwrap();
3038 for res in record.basemods_position_iter().unwrap() {
3039 if let Ok((position, elements)) = res {
3040 assert_eq!(position, expected_positions[i]);
3041 assert_eq!(elements.len(), expected_counts[i]);
3042 i += 1;
3043 }
3044 }
3045 }
3046 }
3047}