Skip to main content

rust_htslib/bam/
record.rs

1// Copyright 2014 Christopher Schröder, Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::slice;
15use std::str;
16use std::sync::Arc;
17
18use byteorder::{LittleEndian, ReadBytesExt};
19
20use crate::bam::Error;
21use crate::bam::HeaderView;
22use crate::errors::Result;
23use crate::htslib;
24use crate::utils;
25#[cfg(feature = "serde_feature")]
26use serde::{self, Deserialize, Serialize};
27
28use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
29use bio_types::genome;
30use bio_types::sequence::SequenceRead;
31use bio_types::sequence::SequenceReadPairOrientation;
32use bio_types::strand::ReqStrand;
33
34/// A macro creating methods for flag access.
35macro_rules! flag {
36    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
37        pub fn $get(&self) -> bool {
38            self.inner().core.flag & $bit != 0
39        }
40
41        pub fn $set(&mut self) {
42            self.inner_mut().core.flag |= $bit;
43        }
44
45        pub fn $unset(&mut self) {
46            self.inner_mut().core.flag &= !$bit;
47        }
48    };
49}
50
51/// A BAM record.
52pub struct Record {
53    pub inner: htslib::bam1_t,
54    own: bool,
55    cigar: Option<CigarStringView>,
56    header: Option<Arc<HeaderView>>,
57}
58
59unsafe impl Send for Record {}
60unsafe impl Sync for Record {}
61
62impl Clone for Record {
63    fn clone(&self) -> Self {
64        let mut copy = Record::new();
65        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
66        copy
67    }
68}
69
70impl PartialEq for Record {
71    fn eq(&self, other: &Record) -> bool {
72        self.tid() == other.tid()
73            && self.pos() == other.pos()
74            && self.bin() == other.bin()
75            && self.mapq() == other.mapq()
76            && self.flags() == other.flags()
77            && self.mtid() == other.mtid()
78            && self.mpos() == other.mpos()
79            && self.insert_size() == other.insert_size()
80            && self.data() == other.data()
81            && self.inner().core.l_extranul == other.inner().core.l_extranul
82    }
83}
84
85impl Eq for Record {}
86
87impl fmt::Debug for Record {
88    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
89        fmt.write_fmt(format_args!(
90            "Record(tid: {}, pos: {})",
91            self.tid(),
92            self.pos()
93        ))
94    }
95}
96
97impl Default for Record {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103#[inline]
104fn extranul_from_qname(qname: &[u8]) -> usize {
105    let qlen = qname.len() + 1;
106    if !qlen.is_multiple_of(4) {
107        4 - qlen % 4
108    } else {
109        0
110    }
111}
112
113impl Record {
114    /// Create an empty BAM record.
115    pub fn new() -> Self {
116        let mut record = Record {
117            inner: unsafe { MaybeUninit::zeroed().assume_init() },
118            own: true,
119            cigar: None,
120            header: None,
121        };
122        // The read/query name needs to be set as empty to properly initialize
123        // the record
124        record.set_qname(b"");
125        // Developer note: these are needed so the returned record is properly
126        // initialized as unmapped.
127        record.set_unmapped();
128        record.set_tid(-1);
129        record.set_pos(-1);
130        record.set_mpos(-1);
131        record.set_mtid(-1);
132        record
133    }
134
135    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
136        Record {
137            inner: {
138                #[allow(clippy::uninit_assumed_init, invalid_value)]
139                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
140                unsafe {
141                    ::libc::memcpy(
142                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
143                        from as *const ::libc::c_void,
144                        size_of::<htslib::bam1_t>(),
145                    );
146                }
147                inner
148            },
149            own: false,
150            cigar: None,
151            header: None,
152        }
153    }
154
155    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
156    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
157        let mut record = Self::new();
158
159        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
160        sam_copy.extend(sam);
161        sam_copy.push(0);
162
163        let mut sam_string = htslib::kstring_t {
164            s: sam_copy.as_ptr() as *mut c_char,
165            l: sam_copy.len(),
166            m: sam_copy.len(),
167        };
168
169        let succ = unsafe {
170            htslib::sam_parse1(
171                &mut sam_string,
172                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
173                record.inner_ptr_mut(),
174            )
175        };
176
177        if succ == 0 {
178            Ok(record)
179        } else {
180            Err(Error::BamParseSAM {
181                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
182            })
183        }
184    }
185
186    pub fn set_header(&mut self, header: Arc<HeaderView>) {
187        self.header = Some(header);
188    }
189
190    pub(super) fn data(&self) -> &[u8] {
191        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
192    }
193
194    #[inline]
195    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
196        &mut self.inner
197    }
198
199    #[inline]
200    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
201        &mut self.inner as *mut htslib::bam1_t
202    }
203
204    #[inline]
205    pub fn inner(&self) -> &htslib::bam1_t {
206        &self.inner
207    }
208
209    #[inline]
210    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
211        &self.inner as *const htslib::bam1_t
212    }
213
214    /// Get target id.
215    pub fn tid(&self) -> i32 {
216        self.inner().core.tid
217    }
218
219    /// Set target id.
220    pub fn set_tid(&mut self, tid: i32) {
221        self.inner_mut().core.tid = tid;
222    }
223
224    /// Get position (0-based).
225    pub fn pos(&self) -> i64 {
226        self.inner().core.pos
227    }
228
229    /// Set position (0-based).
230    pub fn set_pos(&mut self, pos: i64) {
231        self.inner_mut().core.pos = pos;
232    }
233
234    pub fn bin(&self) -> u16 {
235        self.inner().core.bin
236    }
237
238    pub fn set_bin(&mut self, bin: u16) {
239        self.inner_mut().core.bin = bin;
240    }
241
242    /// Get MAPQ.
243    pub fn mapq(&self) -> u8 {
244        self.inner().core.qual
245    }
246
247    /// Set MAPQ.
248    pub fn set_mapq(&mut self, mapq: u8) {
249        self.inner_mut().core.qual = mapq;
250    }
251
252    /// Get strand information from record flags.
253    pub fn strand(&self) -> ReqStrand {
254        let reverse = self.flags() & 0x10 != 0;
255        if reverse {
256            ReqStrand::Reverse
257        } else {
258            ReqStrand::Forward
259        }
260    }
261
262    /// Get raw flags.
263    pub fn flags(&self) -> u16 {
264        self.inner().core.flag
265    }
266
267    /// Set raw flags.
268    pub fn set_flags(&mut self, flags: u16) {
269        self.inner_mut().core.flag = flags;
270    }
271
272    /// Unset all flags.
273    pub fn unset_flags(&mut self) {
274        self.inner_mut().core.flag = 0;
275    }
276
277    /// Get target id of mate.
278    pub fn mtid(&self) -> i32 {
279        self.inner().core.mtid
280    }
281
282    /// Set target id of mate.
283    pub fn set_mtid(&mut self, mtid: i32) {
284        self.inner_mut().core.mtid = mtid;
285    }
286
287    /// Get mate position.
288    pub fn mpos(&self) -> i64 {
289        self.inner().core.mpos
290    }
291
292    /// Set mate position.
293    pub fn set_mpos(&mut self, mpos: i64) {
294        self.inner_mut().core.mpos = mpos;
295    }
296
297    /// Get insert size.
298    pub fn insert_size(&self) -> i64 {
299        self.inner().core.isize_
300    }
301
302    /// Set insert size.
303    pub fn set_insert_size(&mut self, insert_size: i64) {
304        self.inner_mut().core.isize_ = insert_size;
305    }
306
307    fn qname_capacity(&self) -> usize {
308        self.inner().core.l_qname as usize
309    }
310
311    fn qname_len(&self) -> usize {
312        // discount all trailing zeros (the default one and extra nulls)
313        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
314    }
315
316    /// Get qname (read name). Complexity: O(1).
317    pub fn qname(&self) -> &[u8] {
318        &self.data()[..self.qname_len()]
319    }
320
321    /// Set the variable length data buffer
322    pub fn set_data(&mut self, new_data: &[u8]) {
323        self.cigar = None;
324
325        self.inner_mut().l_data = new_data.len() as i32;
326        if (self.inner().m_data as i32) < self.inner().l_data {
327            // Verbosity due to lexical borrowing
328            let l_data = self.inner().l_data;
329            self.realloc_var_data(l_data as usize);
330        }
331
332        // Copy new data into buffer
333        let data =
334            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
335        utils::copy_memory(new_data, data);
336    }
337
338    /// Set variable length data (qname, cigar, seq, qual).
339    /// The aux data is left unchanged.
340    /// `qual` is Phred-scaled quality values, without any offset.
341    /// NOTE: seq.len() must equal qual.len() or this method
342    /// will panic. If you don't have quality values use
343    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
344    /// be recognized as missing QVs by `samtools`.
345    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
346        assert!(qname.len() < 255);
347        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
348
349        self.cigar = None;
350
351        let cigar_width = if let Some(cigar_string) = cigar {
352            cigar_string.len()
353        } else {
354            0
355        } * 4;
356        let q_len = qname.len() + 1;
357        let extranul = extranul_from_qname(qname);
358
359        let orig_aux_offset = self.qname_capacity()
360            + 4 * self.cigar_len()
361            + self.seq_len().div_ceil(2)
362            + self.seq_len();
363        let new_aux_offset = q_len + extranul + cigar_width + seq.len().div_ceil(2) + qual.len();
364        assert!(orig_aux_offset <= self.inner.l_data as usize);
365        let aux_len = self.inner.l_data as usize - orig_aux_offset;
366        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
367        if (self.inner().m_data as i32) < self.inner().l_data {
368            // Verbosity due to lexical borrowing
369            let l_data = self.inner().l_data;
370            self.realloc_var_data(l_data as usize);
371        }
372
373        // Copy the aux data.
374        if aux_len > 0 && orig_aux_offset != new_aux_offset {
375            let data =
376                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
377            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
378        }
379
380        let data =
381            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
382
383        // qname
384        utils::copy_memory(qname, data);
385        for i in 0..=extranul {
386            data[qname.len() + i] = b'\0';
387        }
388        let mut i = q_len + extranul;
389        self.inner_mut().core.l_qname = i as u16;
390        self.inner_mut().core.l_extranul = extranul as u8;
391
392        // cigar
393        if let Some(cigar_string) = cigar {
394            let cigar_data = unsafe {
395                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
396                #[allow(clippy::cast_ptr_alignment)]
397                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
398            };
399            for (i, c) in cigar_string.iter().enumerate() {
400                cigar_data[i] = c.encode();
401            }
402            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
403            i += cigar_string.len() * 4;
404        } else {
405            self.inner_mut().core.n_cigar = 0;
406        };
407
408        // seq
409        {
410            for j in (0..seq.len()).step_by(2) {
411                data[i + j / 2] = (ENCODE_BASE[seq[j] as usize] << 4)
412                    | (if j + 1 < seq.len() {
413                        ENCODE_BASE[seq[j + 1] as usize]
414                    } else {
415                        0
416                    });
417            }
418            self.inner_mut().core.l_qseq = seq.len() as i32;
419            i += seq.len().div_ceil(2);
420        }
421
422        // qual
423        utils::copy_memory(qual, &mut data[i..]);
424    }
425
426    /// Replace current qname with a new one.
427    pub fn set_qname(&mut self, new_qname: &[u8]) {
428        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
429        assert!(new_qname.len() < 252);
430
431        let old_q_len = self.qname_capacity();
432        // We're going to add a terminal NUL
433        let extranul = extranul_from_qname(new_qname);
434        let new_q_len = new_qname.len() + 1 + extranul;
435
436        // Length of data after qname
437        let other_len = self.inner_mut().l_data - old_q_len as i32;
438
439        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
440            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
441        } else if new_q_len > old_q_len {
442            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
443
444            // Reallocate if necessary
445            if (self.inner().m_data as i32) < self.inner().l_data {
446                // Verbosity due to lexical borrowing
447                let l_data = self.inner().l_data;
448                self.realloc_var_data(l_data as usize);
449            }
450        }
451
452        if new_q_len != old_q_len {
453            // Move other data to new location
454            unsafe {
455                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
456
457                ::libc::memmove(
458                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
459                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
460                    other_len as usize,
461                );
462            }
463        }
464
465        // Copy qname data
466        let data =
467            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
468        utils::copy_memory(new_qname, data);
469        for i in 0..=extranul {
470            data[new_q_len - i - 1] = b'\0';
471        }
472        self.inner_mut().core.l_qname = new_q_len as u16;
473        self.inner_mut().core.l_extranul = extranul as u8;
474    }
475
476    /// Replace current cigar with a new one.
477    pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
478        self.cigar = None;
479
480        let qname_data_len = self.qname_capacity();
481        let old_cigar_data_len = self.cigar_len() * 4;
482
483        // Length of data after cigar
484        let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
485
486        let new_cigar_len = match new_cigar {
487            Some(x) => x.len(),
488            None => 0,
489        };
490        let new_cigar_data_len = new_cigar_len * 4;
491
492        if new_cigar_data_len < old_cigar_data_len {
493            self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
494        } else if new_cigar_data_len > old_cigar_data_len {
495            self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
496
497            // Reallocate if necessary
498            if (self.inner().m_data as i32) < self.inner().l_data {
499                // Verbosity due to lexical borrowing
500                let l_data = self.inner().l_data;
501                self.realloc_var_data(l_data as usize);
502            }
503        }
504
505        if new_cigar_data_len != old_cigar_data_len {
506            // Move other data to new location
507            unsafe {
508                ::libc::memmove(
509                    self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
510                    self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
511                    other_data_len as usize,
512                );
513            }
514        }
515
516        // Copy cigar data
517        if let Some(cigar_string) = new_cigar {
518            let cigar_data = unsafe {
519                #[allow(clippy::cast_ptr_alignment)]
520                slice::from_raw_parts_mut(
521                    self.inner.data.add(qname_data_len) as *mut u32,
522                    cigar_string.len(),
523                )
524            };
525            for (i, c) in cigar_string.iter().enumerate() {
526                cigar_data[i] = c.encode();
527            }
528        }
529        self.inner_mut().core.n_cigar = new_cigar_len as u32;
530    }
531
532    fn realloc_var_data(&mut self, new_len: usize) {
533        // pad request
534        let new_len = new_len as u32;
535        let new_request = new_len + 32 - (new_len % 32);
536
537        let ptr = unsafe {
538            ::libc::realloc(
539                self.inner().data as *mut ::libc::c_void,
540                new_request as usize,
541            ) as *mut u8
542        };
543
544        if ptr.is_null() {
545            panic!("ran out of memory in rust_htslib trying to realloc");
546        }
547
548        // don't update m_data until we know we have
549        // a successful allocation.
550        self.inner_mut().m_data = new_request;
551        self.inner_mut().data = ptr;
552
553        // we now own inner.data
554        self.own = true;
555    }
556
557    pub fn cigar_len(&self) -> usize {
558        self.inner().core.n_cigar as usize
559    }
560
561    /// Get reference to raw cigar string representation (as stored in BAM file).
562    /// Usually, the method `Record::cigar` should be used instead.
563    pub fn raw_cigar(&self) -> &[u32] {
564        //cigar is always aligned to 4 bytes - so this is safe
565        #[allow(clippy::cast_ptr_alignment)]
566        unsafe {
567            slice::from_raw_parts(
568                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
569                self.cigar_len(),
570            )
571        }
572    }
573
574    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
575    pub fn cigar(&self) -> CigarStringView {
576        match self.cigar {
577            Some(ref c) => c.clone(),
578            None => self.unpack_cigar(),
579        }
580    }
581
582    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
583    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
584        self.cigar.as_ref()
585    }
586
587    /// Decode the cigar string and cache it inside the `Record`
588    pub fn cache_cigar(&mut self) {
589        self.cigar = Some(self.unpack_cigar())
590    }
591
592    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
593    fn unpack_cigar(&self) -> CigarStringView {
594        CigarString(
595            self.raw_cigar()
596                .iter()
597                .map(|&c| {
598                    let len = c >> 4;
599                    match c & 0b1111 {
600                        0 => Cigar::Match(len),
601                        1 => Cigar::Ins(len),
602                        2 => Cigar::Del(len),
603                        3 => Cigar::RefSkip(len),
604                        4 => Cigar::SoftClip(len),
605                        5 => Cigar::HardClip(len),
606                        6 => Cigar::Pad(len),
607                        7 => Cigar::Equal(len),
608                        8 => Cigar::Diff(len),
609                        _ => panic!("Unexpected cigar operation"),
610                    }
611                })
612                .collect(),
613        )
614        .into_view(self.pos())
615    }
616
617    pub fn seq_len(&self) -> usize {
618        self.inner().core.l_qseq as usize
619    }
620
621    fn seq_data(&self) -> &[u8] {
622        let offset = self.qname_capacity() + self.cigar_len() * 4;
623        &self.data()[offset..][..self.seq_len().div_ceil(2)]
624    }
625
626    /// Get read sequence. Complexity: O(1).
627    pub fn seq(&self) -> Seq<'_> {
628        Seq {
629            encoded: self.seq_data(),
630            len: self.seq_len(),
631        }
632    }
633
634    /// Get base qualities (PHRED-scaled probability that base is wrong).
635    /// This does not entail any offsets, hence the qualities can be used directly without
636    /// e.g. subtracting 33. Complexity: O(1).
637    pub fn qual(&self) -> &[u8] {
638        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + self.seq_len().div_ceil(2)..]
639            [..self.seq_len()]
640    }
641
642    /// Look up an auxiliary field by its tag.
643    ///
644    /// Only the first two bytes of a given tag are used for the look-up of a field.
645    /// See [`Aux`] for more details.
646    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
647        if tag.len() < 2 {
648            return Err(Error::BamAuxStringError);
649        }
650        let aux = unsafe {
651            htslib::bam_aux_get(
652                &self.inner as *const htslib::bam1_t,
653                tag.as_ptr() as *const c_char,
654            )
655        };
656        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
657    }
658
659    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
660        const TAG_LEN: isize = 2;
661        // Used for skipping type identifier
662        const TYPE_ID_LEN: isize = 1;
663
664        if aux.is_null() {
665            return Err(Error::BamAuxTagNotFound);
666        }
667
668        let (data, type_size) = match *aux {
669            b'A' => {
670                let type_size = size_of::<u8>();
671                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
672            }
673            b'c' => {
674                let type_size = size_of::<i8>();
675                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
676            }
677            b'C' => {
678                let type_size = size_of::<u8>();
679                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
680            }
681            b's' => {
682                let type_size = size_of::<i16>();
683                (
684                    Aux::I16(
685                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
686                            .read_i16::<LittleEndian>()
687                            .map_err(|_| Error::BamAuxParsingError)?,
688                    ),
689                    type_size,
690                )
691            }
692            b'S' => {
693                let type_size = size_of::<u16>();
694                (
695                    Aux::U16(
696                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
697                            .read_u16::<LittleEndian>()
698                            .map_err(|_| Error::BamAuxParsingError)?,
699                    ),
700                    type_size,
701                )
702            }
703            b'i' => {
704                let type_size = size_of::<i32>();
705                (
706                    Aux::I32(
707                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
708                            .read_i32::<LittleEndian>()
709                            .map_err(|_| Error::BamAuxParsingError)?,
710                    ),
711                    type_size,
712                )
713            }
714            b'I' => {
715                let type_size = size_of::<u32>();
716                (
717                    Aux::U32(
718                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
719                            .read_u32::<LittleEndian>()
720                            .map_err(|_| Error::BamAuxParsingError)?,
721                    ),
722                    type_size,
723                )
724            }
725            b'f' => {
726                let type_size = size_of::<f32>();
727                (
728                    Aux::Float(
729                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
730                            .read_f32::<LittleEndian>()
731                            .map_err(|_| Error::BamAuxParsingError)?,
732                    ),
733                    type_size,
734                )
735            }
736            b'd' => {
737                let type_size = size_of::<f64>();
738                (
739                    Aux::Double(
740                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
741                            .read_f64::<LittleEndian>()
742                            .map_err(|_| Error::BamAuxParsingError)?,
743                    ),
744                    type_size,
745                )
746            }
747            b'Z' | b'H' => {
748                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
749                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
750                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
751            }
752            b'B' => {
753                const ARRAY_INNER_TYPE_LEN: isize = 1;
754                const ARRAY_COUNT_LEN: isize = 4;
755
756                // Used for skipping metadata
757                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
758
759                let length =
760                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
761                        .read_u32::<LittleEndian>()
762                        .map_err(|_| Error::BamAuxParsingError)? as usize;
763
764                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
765                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
766                    b'c' => (
767                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
768                            aux.offset(array_data_offset),
769                            length,
770                        ))),
771                        length,
772                    ),
773                    b'C' => (
774                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
775                            aux.offset(array_data_offset),
776                            length,
777                        ))),
778                        length,
779                    ),
780                    b's' => (
781                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
782                            aux.offset(array_data_offset),
783                            length * size_of::<i16>(),
784                        ))),
785                        length * std::mem::size_of::<i16>(),
786                    ),
787                    b'S' => (
788                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
789                            aux.offset(array_data_offset),
790                            length * size_of::<u16>(),
791                        ))),
792                        length * std::mem::size_of::<u16>(),
793                    ),
794                    b'i' => (
795                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
796                            aux.offset(array_data_offset),
797                            length * size_of::<i32>(),
798                        ))),
799                        length * std::mem::size_of::<i32>(),
800                    ),
801                    b'I' => (
802                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
803                            aux.offset(array_data_offset),
804                            length * size_of::<u32>(),
805                        ))),
806                        length * std::mem::size_of::<u32>(),
807                    ),
808                    b'f' => (
809                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
810                            aux.offset(array_data_offset),
811                            length * size_of::<f32>(),
812                        ))),
813                        length * std::mem::size_of::<f32>(),
814                    ),
815                    _ => {
816                        return Err(Error::BamAuxUnknownType);
817                    }
818                };
819                (
820                    array_data,
821                    // Offset: array-specific metadata + array size
822                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
823                )
824            }
825            _ => {
826                return Err(Error::BamAuxUnknownType);
827            }
828        };
829
830        // Offset: metadata + type size
831        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
832    }
833
834    /// Returns an iterator over the auxiliary fields of the record.
835    ///
836    /// When an error occurs, the `Err` variant will be returned
837    /// and the iterator will not be able to advance anymore.
838    pub fn aux_iter(&'_ self) -> AuxIter<'_> {
839        AuxIter {
840            // In order to get to the aux data section of a `bam::Record`
841            // we need to skip fields in front of it
842            aux: &self.data()[
843                // NUL terminated read name:
844                self.qname_capacity()
845                // CIGAR (uint32_t):
846                + self.cigar_len() * std::mem::size_of::<u32>()
847                // Read sequence (4-bit encoded):
848                + self.seq_len().div_ceil(2)
849                // Base qualities (char):
850                + self.seq_len()..],
851        }
852    }
853
854    /// Add auxiliary data.
855    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
856        // Don't allow pushing aux data when the given tag is already present in the record.
857        // `htslib` seems to allow this (for non-array values), which can lead to problems
858        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
859        if self.aux(tag).is_ok() {
860            return Err(Error::BamAuxTagAlreadyPresent);
861        }
862        self.push_aux_unchecked(tag, value)
863    }
864
865    /// Add auxiliary data, without checking if the tag is present.
866    ///
867    /// The caller should ensure that the same tag is not pushed more than once.
868    /// This is provided as a performance optimization.
869    pub fn push_aux_unchecked(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
870        let ctag = tag.as_ptr() as *mut c_char;
871        let ret = unsafe {
872            match value {
873                Aux::Char(v) => htslib::bam_aux_append(
874                    self.inner_ptr_mut(),
875                    ctag,
876                    b'A' as c_char,
877                    size_of::<u8>() as i32,
878                    [v].as_mut_ptr(),
879                ),
880                Aux::I8(v) => htslib::bam_aux_append(
881                    self.inner_ptr_mut(),
882                    ctag,
883                    b'c' as c_char,
884                    size_of::<i8>() as i32,
885                    [v].as_mut_ptr() as *mut u8,
886                ),
887                Aux::U8(v) => htslib::bam_aux_append(
888                    self.inner_ptr_mut(),
889                    ctag,
890                    b'C' as c_char,
891                    size_of::<u8>() as i32,
892                    [v].as_mut_ptr(),
893                ),
894                Aux::I16(v) => htslib::bam_aux_append(
895                    self.inner_ptr_mut(),
896                    ctag,
897                    b's' as c_char,
898                    size_of::<i16>() as i32,
899                    [v].as_mut_ptr() as *mut u8,
900                ),
901                Aux::U16(v) => htslib::bam_aux_append(
902                    self.inner_ptr_mut(),
903                    ctag,
904                    b'S' as c_char,
905                    size_of::<u16>() as i32,
906                    [v].as_mut_ptr() as *mut u8,
907                ),
908                Aux::I32(v) => htslib::bam_aux_append(
909                    self.inner_ptr_mut(),
910                    ctag,
911                    b'i' as c_char,
912                    size_of::<i32>() as i32,
913                    [v].as_mut_ptr() as *mut u8,
914                ),
915                Aux::U32(v) => htslib::bam_aux_append(
916                    self.inner_ptr_mut(),
917                    ctag,
918                    b'I' as c_char,
919                    size_of::<u32>() as i32,
920                    [v].as_mut_ptr() as *mut u8,
921                ),
922                Aux::Float(v) => htslib::bam_aux_append(
923                    self.inner_ptr_mut(),
924                    ctag,
925                    b'f' as c_char,
926                    size_of::<f32>() as i32,
927                    [v].as_mut_ptr() as *mut u8,
928                ),
929                // Not part of specs but implemented in `htslib`:
930                Aux::Double(v) => htslib::bam_aux_append(
931                    self.inner_ptr_mut(),
932                    ctag,
933                    b'd' as c_char,
934                    size_of::<f64>() as i32,
935                    [v].as_mut_ptr() as *mut u8,
936                ),
937                Aux::String(v) => {
938                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
939                    htslib::bam_aux_append(
940                        self.inner_ptr_mut(),
941                        ctag,
942                        b'Z' as c_char,
943                        (v.len() + 1) as i32,
944                        c_str.as_ptr() as *mut u8,
945                    )
946                }
947                Aux::HexByteArray(v) => {
948                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
949                    htslib::bam_aux_append(
950                        self.inner_ptr_mut(),
951                        ctag,
952                        b'H' as c_char,
953                        (v.len() + 1) as i32,
954                        c_str.as_ptr() as *mut u8,
955                    )
956                }
957                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
958                Aux::ArrayI8(aux_array) => match aux_array {
959                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
960                        self.inner_ptr_mut(),
961                        ctag,
962                        b'c',
963                        inner.len() as u32,
964                        inner.slice.as_ptr() as *mut ::libc::c_void,
965                    ),
966                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
967                        self.inner_ptr_mut(),
968                        ctag,
969                        b'c',
970                        inner.len() as u32,
971                        inner.slice.as_ptr() as *mut ::libc::c_void,
972                    ),
973                },
974                Aux::ArrayU8(aux_array) => match aux_array {
975                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
976                        self.inner_ptr_mut(),
977                        ctag,
978                        b'C',
979                        inner.len() as u32,
980                        inner.slice.as_ptr() as *mut ::libc::c_void,
981                    ),
982                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
983                        self.inner_ptr_mut(),
984                        ctag,
985                        b'C',
986                        inner.len() as u32,
987                        inner.slice.as_ptr() as *mut ::libc::c_void,
988                    ),
989                },
990                Aux::ArrayI16(aux_array) => match aux_array {
991                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
992                        self.inner_ptr_mut(),
993                        ctag,
994                        b's',
995                        inner.len() as u32,
996                        inner.slice.as_ptr() as *mut ::libc::c_void,
997                    ),
998                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
999                        self.inner_ptr_mut(),
1000                        ctag,
1001                        b's',
1002                        inner.len() as u32,
1003                        inner.slice.as_ptr() as *mut ::libc::c_void,
1004                    ),
1005                },
1006                Aux::ArrayU16(aux_array) => match aux_array {
1007                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1008                        self.inner_ptr_mut(),
1009                        ctag,
1010                        b'S',
1011                        inner.len() as u32,
1012                        inner.slice.as_ptr() as *mut ::libc::c_void,
1013                    ),
1014                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1015                        self.inner_ptr_mut(),
1016                        ctag,
1017                        b'S',
1018                        inner.len() as u32,
1019                        inner.slice.as_ptr() as *mut ::libc::c_void,
1020                    ),
1021                },
1022                Aux::ArrayI32(aux_array) => match aux_array {
1023                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1024                        self.inner_ptr_mut(),
1025                        ctag,
1026                        b'i',
1027                        inner.len() as u32,
1028                        inner.slice.as_ptr() as *mut ::libc::c_void,
1029                    ),
1030                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1031                        self.inner_ptr_mut(),
1032                        ctag,
1033                        b'i',
1034                        inner.len() as u32,
1035                        inner.slice.as_ptr() as *mut ::libc::c_void,
1036                    ),
1037                },
1038                Aux::ArrayU32(aux_array) => match aux_array {
1039                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1040                        self.inner_ptr_mut(),
1041                        ctag,
1042                        b'I',
1043                        inner.len() as u32,
1044                        inner.slice.as_ptr() as *mut ::libc::c_void,
1045                    ),
1046                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1047                        self.inner_ptr_mut(),
1048                        ctag,
1049                        b'I',
1050                        inner.len() as u32,
1051                        inner.slice.as_ptr() as *mut ::libc::c_void,
1052                    ),
1053                },
1054                Aux::ArrayFloat(aux_array) => match aux_array {
1055                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1056                        self.inner_ptr_mut(),
1057                        ctag,
1058                        b'f',
1059                        inner.len() as u32,
1060                        inner.slice.as_ptr() as *mut ::libc::c_void,
1061                    ),
1062                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1063                        self.inner_ptr_mut(),
1064                        ctag,
1065                        b'f',
1066                        inner.len() as u32,
1067                        inner.slice.as_ptr() as *mut ::libc::c_void,
1068                    ),
1069                },
1070            }
1071        };
1072
1073        if ret < 0 {
1074            Err(Error::BamAux)
1075        } else {
1076            Ok(())
1077        }
1078    }
1079
1080    /// Update or add auxiliary data.
1081    pub fn update_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
1082        // Update existing aux data for the given tag if already present in the record
1083        // without changing the ordering of tags in the record or append aux data at
1084        // the end of the existing aux records if it is a new tag.
1085
1086        let ctag = tag.as_ptr() as *mut c_char;
1087        let ret = unsafe {
1088            match value {
1089                Aux::Char(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1090                Aux::I8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1091                Aux::U8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1092                Aux::I16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1093                Aux::U16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1094                Aux::I32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1095                Aux::U32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1096                Aux::Float(v) => htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v),
1097                // Not part of specs but implemented in `htslib`:
1098                Aux::Double(v) => {
1099                    htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v as f32)
1100                }
1101                Aux::String(v) => {
1102                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
1103                    htslib::bam_aux_update_str(
1104                        self.inner_ptr_mut(),
1105                        ctag,
1106                        (v.len() + 1) as i32,
1107                        c_str.as_ptr() as *const c_char,
1108                    )
1109                }
1110                Aux::HexByteArray(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1111                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
1112                Aux::ArrayI8(aux_array) => match aux_array {
1113                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1114                        self.inner_ptr_mut(),
1115                        ctag,
1116                        b'c',
1117                        inner.len() as u32,
1118                        inner.slice.as_ptr() as *mut ::libc::c_void,
1119                    ),
1120                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1121                        self.inner_ptr_mut(),
1122                        ctag,
1123                        b'c',
1124                        inner.len() as u32,
1125                        inner.slice.as_ptr() as *mut ::libc::c_void,
1126                    ),
1127                },
1128                Aux::ArrayU8(aux_array) => match aux_array {
1129                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1130                        self.inner_ptr_mut(),
1131                        ctag,
1132                        b'C',
1133                        inner.len() as u32,
1134                        inner.slice.as_ptr() as *mut ::libc::c_void,
1135                    ),
1136                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1137                        self.inner_ptr_mut(),
1138                        ctag,
1139                        b'C',
1140                        inner.len() as u32,
1141                        inner.slice.as_ptr() as *mut ::libc::c_void,
1142                    ),
1143                },
1144                Aux::ArrayI16(aux_array) => match aux_array {
1145                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1146                        self.inner_ptr_mut(),
1147                        ctag,
1148                        b's',
1149                        inner.len() as u32,
1150                        inner.slice.as_ptr() as *mut ::libc::c_void,
1151                    ),
1152                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1153                        self.inner_ptr_mut(),
1154                        ctag,
1155                        b's',
1156                        inner.len() as u32,
1157                        inner.slice.as_ptr() as *mut ::libc::c_void,
1158                    ),
1159                },
1160                Aux::ArrayU16(aux_array) => match aux_array {
1161                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1162                        self.inner_ptr_mut(),
1163                        ctag,
1164                        b'S',
1165                        inner.len() as u32,
1166                        inner.slice.as_ptr() as *mut ::libc::c_void,
1167                    ),
1168                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1169                        self.inner_ptr_mut(),
1170                        ctag,
1171                        b'S',
1172                        inner.len() as u32,
1173                        inner.slice.as_ptr() as *mut ::libc::c_void,
1174                    ),
1175                },
1176                Aux::ArrayI32(aux_array) => match aux_array {
1177                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1178                        self.inner_ptr_mut(),
1179                        ctag,
1180                        b'i',
1181                        inner.len() as u32,
1182                        inner.slice.as_ptr() as *mut ::libc::c_void,
1183                    ),
1184                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1185                        self.inner_ptr_mut(),
1186                        ctag,
1187                        b'i',
1188                        inner.len() as u32,
1189                        inner.slice.as_ptr() as *mut ::libc::c_void,
1190                    ),
1191                },
1192                Aux::ArrayU32(aux_array) => match aux_array {
1193                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1194                        self.inner_ptr_mut(),
1195                        ctag,
1196                        b'I',
1197                        inner.len() as u32,
1198                        inner.slice.as_ptr() as *mut ::libc::c_void,
1199                    ),
1200                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1201                        self.inner_ptr_mut(),
1202                        ctag,
1203                        b'I',
1204                        inner.len() as u32,
1205                        inner.slice.as_ptr() as *mut ::libc::c_void,
1206                    ),
1207                },
1208                Aux::ArrayFloat(aux_array) => match aux_array {
1209                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1210                        self.inner_ptr_mut(),
1211                        ctag,
1212                        b'f',
1213                        inner.len() as u32,
1214                        inner.slice.as_ptr() as *mut ::libc::c_void,
1215                    ),
1216                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1217                        self.inner_ptr_mut(),
1218                        ctag,
1219                        b'f',
1220                        inner.len() as u32,
1221                        inner.slice.as_ptr() as *mut ::libc::c_void,
1222                    ),
1223                },
1224            }
1225        };
1226
1227        if ret < 0 {
1228            Err(Error::BamAux)
1229        } else {
1230            Ok(())
1231        }
1232    }
1233
1234    // Delete auxiliary tag.
1235    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1236        if tag.len() < 2 {
1237            return Err(Error::BamAuxStringError);
1238        }
1239        let aux = unsafe {
1240            htslib::bam_aux_get(
1241                &self.inner as *const htslib::bam1_t,
1242                tag.as_ptr() as *const c_char,
1243            )
1244        };
1245        unsafe {
1246            if aux.is_null() {
1247                Err(Error::BamAuxTagNotFound)
1248            } else {
1249                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1250                Ok(())
1251            }
1252        }
1253    }
1254
1255    /// Access the base modifications associated with this Record through the MM tag.
1256    /// Example:
1257    /// ```
1258    ///    use rust_htslib::bam::{Read, Reader, Record};
1259    ///    let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
1260    ///    let mut mod_count = 0;
1261    ///    for r in bam.records() {
1262    ///        let record = r.unwrap();
1263    ///        if let Ok(mods) = record.basemods_iter() {
1264    ///            // print metadata for the modifications present in this record
1265    ///            for mod_code in mods.recorded() {
1266    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1267    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1268    ///                              mod_code, *mod_code as u8 as char,
1269    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1270    ///                }
1271    ///            }
1272    ///
1273    ///            // iterate over the modifications in this record
1274    ///            // the modifications are returned as a tuple with the
1275    ///            // position within SEQ and an hts_base_mod struct
1276    ///            for res in mods {
1277    ///                if let Ok( (position, m) ) = res {
1278    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1279    ///                    mod_count += 1;
1280    ///                }
1281    ///            }
1282    ///        };
1283    ///    }
1284    ///    assert_eq!(mod_count, 14);
1285    /// ```
1286    pub fn basemods_iter(&'_ self) -> Result<BaseModificationsIter<'_>> {
1287        BaseModificationsIter::new(self)
1288    }
1289
1290    /// An iterator that returns all of the modifications for each position as a vector.
1291    /// This is useful for the case where multiple possible modifications can be annotated
1292    /// at a single position (for example a C could be 5-mC or 5-hmC)
1293    pub fn basemods_position_iter(&'_ self) -> Result<BaseModificationsPositionIter<'_>> {
1294        BaseModificationsPositionIter::new(self)
1295    }
1296
1297    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1298    /// is not paired, mates are not mapping to the same contig, or mates start at the
1299    /// same position.
1300    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1301        if self.is_paired()
1302            && !self.is_unmapped()
1303            && !self.is_mate_unmapped()
1304            && self.tid() == self.mtid()
1305        {
1306            if self.pos() == self.mpos() {
1307                // both reads start at the same position, we cannot decide on the orientation.
1308                return SequenceReadPairOrientation::None;
1309            }
1310
1311            let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1312                (
1313                    self.pos(),
1314                    self.mpos(),
1315                    !self.is_reverse(),
1316                    !self.is_mate_reverse(),
1317                )
1318            } else {
1319                (
1320                    self.mpos(),
1321                    self.pos(),
1322                    !self.is_mate_reverse(),
1323                    !self.is_reverse(),
1324                )
1325            };
1326
1327            if pos_1 < pos_2 {
1328                match (fwd_1, fwd_2) {
1329                    (true, true) => SequenceReadPairOrientation::F1F2,
1330                    (true, false) => SequenceReadPairOrientation::F1R2,
1331                    (false, true) => SequenceReadPairOrientation::R1F2,
1332                    (false, false) => SequenceReadPairOrientation::R1R2,
1333                }
1334            } else {
1335                match (fwd_2, fwd_1) {
1336                    (true, true) => SequenceReadPairOrientation::F2F1,
1337                    (true, false) => SequenceReadPairOrientation::F2R1,
1338                    (false, true) => SequenceReadPairOrientation::R2F1,
1339                    (false, false) => SequenceReadPairOrientation::R2R1,
1340                }
1341            }
1342        } else {
1343            SequenceReadPairOrientation::None
1344        }
1345    }
1346
1347    flag!(is_paired, set_paired, unset_paired, 1u16);
1348    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1349    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1350    flag!(
1351        is_mate_unmapped,
1352        set_mate_unmapped,
1353        unset_mate_unmapped,
1354        8u16
1355    );
1356    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1357    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1358    flag!(
1359        is_first_in_template,
1360        set_first_in_template,
1361        unset_first_in_template,
1362        64u16
1363    );
1364    flag!(
1365        is_last_in_template,
1366        set_last_in_template,
1367        unset_last_in_template,
1368        128u16
1369    );
1370    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1371    flag!(
1372        is_quality_check_failed,
1373        set_quality_check_failed,
1374        unset_quality_check_failed,
1375        512u16
1376    );
1377    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1378    flag!(
1379        is_supplementary,
1380        set_supplementary,
1381        unset_supplementary,
1382        2048u16
1383    );
1384}
1385
1386impl Drop for Record {
1387    fn drop(&mut self) {
1388        if self.own {
1389            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1390        }
1391    }
1392}
1393
1394impl SequenceRead for Record {
1395    fn name(&self) -> &[u8] {
1396        self.qname()
1397    }
1398
1399    fn base(&self, i: usize) -> u8 {
1400        *decode_base_unchecked(encoded_base(self.seq_data(), i))
1401    }
1402
1403    fn base_qual(&self, i: usize) -> u8 {
1404        self.qual()[i]
1405    }
1406
1407    fn len(&self) -> usize {
1408        self.seq_len()
1409    }
1410
1411    fn is_empty(&self) -> bool {
1412        self.len() == 0
1413    }
1414}
1415
1416impl genome::AbstractInterval for Record {
1417    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1418    fn contig(&self) -> &str {
1419        let tid = self.tid();
1420        if tid < 0 {
1421            panic!("invalid tid, must be at least zero");
1422        }
1423        str::from_utf8(
1424            self.header
1425                .as_ref()
1426                .expect(
1427                    "header must be set (this is the case if the record has been read from a file)",
1428                )
1429                .tid2name(tid as u32),
1430        )
1431        .expect("unable to interpret contig name as UTF-8")
1432    }
1433
1434    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1435    fn range(&self) -> ops::Range<genome::Position> {
1436        let end_pos = self
1437            .cigar_cached()
1438            .expect("cigar has not been cached yet, call cache_cigar() first")
1439            .end_pos() as u64;
1440
1441        if self.pos() < 0 {
1442            panic!("invalid position, must be positive")
1443        }
1444
1445        self.pos() as u64..end_pos
1446    }
1447}
1448
1449/// Auxiliary record data
1450///
1451/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1452///
1453/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1454///
1455/// # Examples
1456///
1457/// ```
1458/// use rust_htslib::{
1459///     bam,
1460///     bam::record::{Aux, AuxArray},
1461///     errors::Error,
1462/// };
1463///
1464/// //Set up BAM record
1465/// let bam_header = bam::Header::new();
1466/// let mut record = bam::Record::from_sam(
1467///     &mut bam::HeaderView::from_header(&bam_header),
1468///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1469/// )
1470/// .unwrap();
1471///
1472/// // Add an integer field
1473/// let aux_integer_field = Aux::I32(1234);
1474/// record.push_aux(b"XI", aux_integer_field).unwrap();
1475///
1476/// match record.aux(b"XI") {
1477///     Ok(value) => {
1478///         // Typically, callers expect an aux field to be of a certain type.
1479///         // If that's not the case, the value can be `match`ed exhaustively.
1480///         if let Aux::I32(v) = value {
1481///             assert_eq!(v, 1234);
1482///         }
1483///     }
1484///     Err(e) => {
1485///         panic!("Error reading aux field: {}", e);
1486///     }
1487/// }
1488///
1489/// // Add an array field
1490/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1491/// let slice_of_data = &array_like_data;
1492/// let aux_array: AuxArray<f32> = slice_of_data.into();
1493/// let aux_array_field = Aux::ArrayFloat(aux_array);
1494/// record.push_aux(b"XA", aux_array_field).unwrap();
1495///
1496/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1497///     let read_array = array.iter().collect::<Vec<_>>();
1498///     assert_eq!(read_array, array_like_data);
1499/// } else {
1500///     panic!("Could not read array data");
1501/// }
1502/// ```
1503#[derive(Debug, PartialEq)]
1504pub enum Aux<'a> {
1505    Char(u8),
1506    I8(i8),
1507    U8(u8),
1508    I16(i16),
1509    U16(u16),
1510    I32(i32),
1511    U32(u32),
1512    Float(f32),
1513    Double(f64), // Not part of specs but implemented in `htslib`
1514    String(&'a str),
1515    HexByteArray(&'a str),
1516    ArrayI8(AuxArray<'a, i8>),
1517    ArrayU8(AuxArray<'a, u8>),
1518    ArrayI16(AuxArray<'a, i16>),
1519    ArrayU16(AuxArray<'a, u16>),
1520    ArrayI32(AuxArray<'a, i32>),
1521    ArrayU32(AuxArray<'a, u32>),
1522    ArrayFloat(AuxArray<'a, f32>),
1523}
1524
1525unsafe impl Send for Aux<'_> {}
1526unsafe impl Sync for Aux<'_> {}
1527
1528/// Types that can be used in aux arrays.
1529pub trait AuxArrayElement: Copy {
1530    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1531}
1532
1533impl AuxArrayElement for i8 {
1534    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1535        std::io::Cursor::new(bytes).read_i8().ok()
1536    }
1537}
1538impl AuxArrayElement for u8 {
1539    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1540        std::io::Cursor::new(bytes).read_u8().ok()
1541    }
1542}
1543impl AuxArrayElement for i16 {
1544    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1545        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1546    }
1547}
1548impl AuxArrayElement for u16 {
1549    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1550        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1551    }
1552}
1553impl AuxArrayElement for i32 {
1554    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1555        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1556    }
1557}
1558impl AuxArrayElement for u32 {
1559    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1560        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1561    }
1562}
1563impl AuxArrayElement for f32 {
1564    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1565        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1566    }
1567}
1568
1569/// Provides access to aux arrays.
1570///
1571/// Provides methods to either retrieve single elements or an iterator over the
1572/// array.
1573///
1574/// This type is used for wrapping both, array data that was read from a
1575/// BAM record and slices of data that are going to be stored in one.
1576///
1577/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1578/// can be constructed via the `From` trait which is implemented for all
1579/// supported types (see [`AuxArrayElement`] for a list).
1580///
1581/// # Examples
1582///
1583/// ```
1584/// use rust_htslib::{
1585///     bam,
1586///     bam::record::{Aux, AuxArray},
1587/// };
1588///
1589/// //Set up BAM record
1590/// let bam_header = bam::Header::new();
1591/// let mut record = bam::Record::from_sam(
1592///     &mut bam::HeaderView::from_header(&bam_header),
1593///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1594/// ).unwrap();
1595///
1596/// let data = vec![0.4, 0.3, 0.2, 0.1];
1597/// let slice_of_data = &data;
1598/// let aux_array: AuxArray<f32> = slice_of_data.into();
1599/// let aux_field = Aux::ArrayFloat(aux_array);
1600/// record.push_aux(b"XA", aux_field);
1601///
1602/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1603///     // Retrieve the second element from the array
1604///     assert_eq!(array.get(1).unwrap(), 0.3);
1605///     // Iterate over the array and collect it into a `Vec`
1606///     let read_array = array.iter().collect::<Vec<_>>();
1607///     assert_eq!(read_array, data);
1608/// } else {
1609///     panic!("Could not read array data");
1610/// }
1611/// ```
1612#[derive(Debug)]
1613pub enum AuxArray<'a, T> {
1614    TargetType(AuxArrayTargetType<'a, T>),
1615    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1616}
1617
1618impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1619where
1620    T: AuxArrayElement + PartialEq,
1621{
1622    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1623        use AuxArray::*;
1624        match (self, other) {
1625            (TargetType(v), TargetType(v_other)) => v == v_other,
1626            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1627            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1628            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1629        }
1630    }
1631}
1632
1633/// Create AuxArrays from slices of allowed target types.
1634impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1635where
1636    I: AuxArrayElement,
1637    T: AsRef<[I]> + ?Sized,
1638{
1639    fn from(src: &'a T) -> Self {
1640        AuxArray::TargetType(AuxArrayTargetType {
1641            slice: src.as_ref(),
1642        })
1643    }
1644}
1645
1646impl<'a, T> AuxArray<'a, T>
1647where
1648    T: AuxArrayElement,
1649{
1650    /// Returns the element at a position or None if out of bounds.
1651    pub fn get(&self, index: usize) -> Option<T> {
1652        match self {
1653            AuxArray::TargetType(v) => v.get(index),
1654            AuxArray::RawLeBytes(v) => v.get(index),
1655        }
1656    }
1657
1658    /// Returns the number of elements in the array.
1659    pub fn len(&self) -> usize {
1660        match self {
1661            AuxArray::TargetType(a) => a.len(),
1662            AuxArray::RawLeBytes(a) => a.len(),
1663        }
1664    }
1665
1666    /// Returns true if the array contains no elements.
1667    pub fn is_empty(&self) -> bool {
1668        self.len() == 0
1669    }
1670
1671    /// Returns an iterator over the array.
1672    pub fn iter(&'_ self) -> AuxArrayIter<'_, T> {
1673        AuxArrayIter {
1674            index: 0,
1675            array: self,
1676        }
1677    }
1678
1679    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1680    fn from_bytes(bytes: &'a [u8]) -> Self {
1681        Self::RawLeBytes(AuxArrayRawLeBytes {
1682            slice: bytes,
1683            phantom_data: PhantomData,
1684        })
1685    }
1686}
1687
1688/// Encapsulates slice of target type.
1689#[doc(hidden)]
1690#[derive(Debug, PartialEq)]
1691pub struct AuxArrayTargetType<'a, T> {
1692    slice: &'a [T],
1693}
1694
1695impl<T> AuxArrayTargetType<'_, T>
1696where
1697    T: AuxArrayElement,
1698{
1699    fn get(&self, index: usize) -> Option<T> {
1700        self.slice.get(index).copied()
1701    }
1702
1703    fn len(&self) -> usize {
1704        self.slice.len()
1705    }
1706}
1707
1708/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1709#[doc(hidden)]
1710#[derive(Debug, PartialEq)]
1711pub struct AuxArrayRawLeBytes<'a, T> {
1712    slice: &'a [u8],
1713    phantom_data: PhantomData<T>,
1714}
1715
1716impl<T> AuxArrayRawLeBytes<'_, T>
1717where
1718    T: AuxArrayElement,
1719{
1720    fn get(&self, index: usize) -> Option<T> {
1721        let type_size = std::mem::size_of::<T>();
1722        if index * type_size + type_size > self.slice.len() {
1723            return None;
1724        }
1725        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1726    }
1727
1728    fn len(&self) -> usize {
1729        self.slice.len() / std::mem::size_of::<T>()
1730    }
1731}
1732
1733/// Aux array iterator
1734///
1735/// This struct is created by the [`AuxArray::iter`] method.
1736pub struct AuxArrayIter<'a, T> {
1737    index: usize,
1738    array: &'a AuxArray<'a, T>,
1739}
1740
1741impl<T> Iterator for AuxArrayIter<'_, T>
1742where
1743    T: AuxArrayElement,
1744{
1745    type Item = T;
1746    fn next(&mut self) -> Option<Self::Item> {
1747        let value = self.array.get(self.index);
1748        self.index += 1;
1749        value
1750    }
1751
1752    fn size_hint(&self) -> (usize, Option<usize>) {
1753        let array_length = self.array.len() - self.index;
1754        (array_length, Some(array_length))
1755    }
1756}
1757
1758/// Auxiliary data iterator
1759///
1760/// This struct is created by the [`Record::aux_iter`] method.
1761///
1762/// This iterator returns `Result`s that wrap tuples containing
1763/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1764/// well as an `Aux` enum that wraps the associated value.
1765///
1766/// When an error occurs, the `Err` variant will be returned
1767/// and the iterator will not be able to advance anymore.
1768pub struct AuxIter<'a> {
1769    aux: &'a [u8],
1770}
1771
1772impl<'a> Iterator for AuxIter<'a> {
1773    type Item = Result<(&'a [u8], Aux<'a>)>;
1774
1775    fn next(&mut self) -> Option<Self::Item> {
1776        // We're finished
1777        if self.aux.is_empty() {
1778            return None;
1779        }
1780        // Incomplete aux data
1781        if (1..=3).contains(&self.aux.len()) {
1782            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1783            self.aux = &[];
1784            return Some(Err(Error::BamAuxParsingError));
1785        }
1786        let tag = &self.aux[..2];
1787        Some(unsafe {
1788            let data_ptr = self.aux[2..].as_ptr();
1789            Record::read_aux_field(data_ptr)
1790                .map(|(aux, offset)| {
1791                    self.aux = &self.aux[offset..];
1792                    (tag, aux)
1793                })
1794                .inspect_err(|_e| {
1795                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1796                    self.aux = &[];
1797                })
1798        })
1799    }
1800}
1801
1802static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1803static ENCODE_BASE: [u8; 256] = [
1804    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1805    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1806    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1807    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1808    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1809    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1810    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1811    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1812    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1813    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1814    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1815];
1816
1817#[inline]
1818fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1819    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1820}
1821
1822#[inline]
1823unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1824    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1825}
1826
1827#[inline]
1828fn decode_base_unchecked(base: u8) -> &'static u8 {
1829    unsafe { DECODE_BASE.get_unchecked(base as usize) }
1830}
1831
1832/// The sequence of a record.
1833#[derive(Debug, Copy, Clone)]
1834pub struct Seq<'a> {
1835    pub encoded: &'a [u8],
1836    len: usize,
1837}
1838
1839impl Seq<'_> {
1840    /// Return encoded base. Complexity: O(1).
1841    #[inline]
1842    pub fn encoded_base(&self, i: usize) -> u8 {
1843        encoded_base(self.encoded, i)
1844    }
1845
1846    /// Return encoded base. Complexity: O(1).
1847    ///
1848    /// # Safety
1849    ///
1850    /// TODO
1851    #[inline]
1852    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1853        encoded_base_unchecked(self.encoded, i)
1854    }
1855
1856    /// Obtain decoded base without performing bounds checking.
1857    /// Use index based access seq()[i], for checked, safe access.
1858    /// Complexity: O(1).
1859    ///
1860    /// # Safety
1861    ///
1862    /// TODO
1863    #[inline]
1864    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1865        *decode_base_unchecked(self.encoded_base_unchecked(i))
1866    }
1867
1868    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1869    pub fn as_bytes(&self) -> Vec<u8> {
1870        (0..self.len()).map(|i| self[i]).collect()
1871    }
1872
1873    /// Return length (in bases) of the sequence.
1874    pub fn len(&self) -> usize {
1875        self.len
1876    }
1877
1878    pub fn is_empty(&self) -> bool {
1879        self.len() == 0
1880    }
1881}
1882
1883impl ops::Index<usize> for Seq<'_> {
1884    type Output = u8;
1885
1886    /// Return decoded base at given position within read. Complexity: O(1).
1887    fn index(&self, index: usize) -> &u8 {
1888        decode_base_unchecked(self.encoded_base(index))
1889    }
1890}
1891
1892unsafe impl Send for Seq<'_> {}
1893unsafe impl Sync for Seq<'_> {}
1894
1895#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1896#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1897pub enum Cigar {
1898    Match(u32),    // M
1899    Ins(u32),      // I
1900    Del(u32),      // D
1901    RefSkip(u32),  // N
1902    SoftClip(u32), // S
1903    HardClip(u32), // H
1904    Pad(u32),      // P
1905    Equal(u32),    // =
1906    Diff(u32),     // X
1907}
1908
1909impl Cigar {
1910    fn encode(self) -> u32 {
1911        match self {
1912            Cigar::Match(len) => len << 4, // | 0,
1913            Cigar::Ins(len) => (len << 4) | 1,
1914            Cigar::Del(len) => (len << 4) | 2,
1915            Cigar::RefSkip(len) => (len << 4) | 3,
1916            Cigar::SoftClip(len) => (len << 4) | 4,
1917            Cigar::HardClip(len) => (len << 4) | 5,
1918            Cigar::Pad(len) => (len << 4) | 6,
1919            Cigar::Equal(len) => (len << 4) | 7,
1920            Cigar::Diff(len) => (len << 4) | 8,
1921        }
1922    }
1923
1924    /// Return the length of the CIGAR.
1925    pub fn len(self) -> u32 {
1926        match self {
1927            Cigar::Match(len) => len,
1928            Cigar::Ins(len) => len,
1929            Cigar::Del(len) => len,
1930            Cigar::RefSkip(len) => len,
1931            Cigar::SoftClip(len) => len,
1932            Cigar::HardClip(len) => len,
1933            Cigar::Pad(len) => len,
1934            Cigar::Equal(len) => len,
1935            Cigar::Diff(len) => len,
1936        }
1937    }
1938
1939    pub fn is_empty(self) -> bool {
1940        self.len() == 0
1941    }
1942
1943    /// Return the character representing the CIGAR.
1944    pub fn char(self) -> char {
1945        match self {
1946            Cigar::Match(_) => 'M',
1947            Cigar::Ins(_) => 'I',
1948            Cigar::Del(_) => 'D',
1949            Cigar::RefSkip(_) => 'N',
1950            Cigar::SoftClip(_) => 'S',
1951            Cigar::HardClip(_) => 'H',
1952            Cigar::Pad(_) => 'P',
1953            Cigar::Equal(_) => '=',
1954            Cigar::Diff(_) => 'X',
1955        }
1956    }
1957}
1958
1959impl fmt::Display for Cigar {
1960    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1961        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1962    }
1963}
1964
1965unsafe impl Send for Cigar {}
1966unsafe impl Sync for Cigar {}
1967
1968custom_derive! {
1969    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1970    ///
1971    /// # Example
1972    ///
1973    /// ```
1974    /// use rust_htslib::bam::record::{Cigar, CigarString};
1975    ///
1976    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1977    ///
1978    /// // access by index
1979    /// assert_eq!(cigar[0], Cigar::Match(100));
1980    /// // format into classical string representation
1981    /// assert_eq!(format!("{}", cigar), "100M10S");
1982    /// // iterate
1983    /// for op in &cigar {
1984    ///    println!("{}", op);
1985    /// }
1986    /// ```
1987    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1988    #[derive(NewtypeDeref,
1989            NewtypeDerefMut,
1990             NewtypeIndex(usize),
1991             NewtypeIndexMut(usize),
1992             NewtypeFrom,
1993             PartialEq,
1994             PartialOrd,
1995             Eq,
1996             NewtypeDebug,
1997             Clone,
1998             Hash
1999    )]
2000    pub struct CigarString(pub Vec<Cigar>);
2001}
2002
2003impl CigarString {
2004    /// Create a `CigarStringView` from this CigarString at position `pos`
2005    pub fn into_view(self, pos: i64) -> CigarStringView {
2006        CigarStringView::new(self, pos)
2007    }
2008
2009    /// Calculate the bam cigar from the alignment struct. x is the target string
2010    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
2011    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
2012    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
2013    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
2014        match alignment.mode {
2015            AlignmentMode::Global => {
2016                panic!(" Bam cigar fn not supported for Global Alignment mode")
2017            }
2018            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
2019            _ => {}
2020        }
2021
2022        let mut cigar = Vec::new();
2023        if alignment.operations.is_empty() {
2024            return CigarString(cigar);
2025        }
2026
2027        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
2028            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
2029            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
2030            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
2031            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
2032            _ => {}
2033        };
2034
2035        if alignment.xstart > 0 {
2036            cigar.push(if hard_clip {
2037                Cigar::HardClip(alignment.xstart as u32)
2038            } else {
2039                Cigar::SoftClip(alignment.xstart as u32)
2040            });
2041        }
2042
2043        let mut last = alignment.operations[0];
2044        let mut k = 1u32;
2045        for &op in alignment.operations[1..].iter() {
2046            if op == last {
2047                k += 1;
2048            } else {
2049                add_op(last, k, &mut cigar);
2050                k = 1;
2051            }
2052            last = op;
2053        }
2054        add_op(last, k, &mut cigar);
2055        if alignment.xlen > alignment.xend {
2056            cigar.push(if hard_clip {
2057                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
2058            } else {
2059                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
2060            });
2061        }
2062
2063        CigarString(cigar)
2064    }
2065}
2066
2067impl TryFrom<&[u8]> for CigarString {
2068    type Error = Error;
2069
2070    /// Create a CigarString from given &[u8].
2071    /// # Example
2072    /// ```
2073    /// use rust_htslib::bam::record::*;
2074    /// use rust_htslib::bam::record::CigarString;
2075    /// use rust_htslib::bam::record::Cigar::*;
2076    /// use std::convert::TryFrom;
2077    ///
2078    /// let cigar_str = "2H10M5X3=2H".as_bytes();
2079    /// let cigar = CigarString::try_from(cigar_str)
2080    ///     .expect("Unable to parse cigar string.");
2081    /// let expected_cigar = CigarString(vec![
2082    ///     HardClip(2),
2083    ///     Match(10),
2084    ///     Diff(5),
2085    ///     Equal(3),
2086    ///     HardClip(2),
2087    /// ]);
2088    /// assert_eq!(cigar, expected_cigar);
2089    /// ```
2090    fn try_from(bytes: &[u8]) -> Result<Self> {
2091        let mut inner = Vec::new();
2092        let mut i = 0;
2093        let text_len = bytes.len();
2094        while i < text_len {
2095            let mut j = i;
2096            while j < text_len && bytes[j].is_ascii_digit() {
2097                j += 1;
2098            }
2099            // check that length is provided
2100            if i == j {
2101                return Err(Error::BamParseCigar {
2102                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
2103                });
2104            }
2105            // get the length of the operation
2106            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
2107                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
2108            })?;
2109            let n = s.parse().map_err(|_| Error::BamParseCigar {
2110                msg: format!("Unable to parse &str '{:?}' to u32.", s),
2111            })?;
2112            // get the operation
2113            let op = &bytes[j];
2114            inner.push(match op {
2115                b'M' => Cigar::Match(n),
2116                b'I' => Cigar::Ins(n),
2117                b'D' => Cigar::Del(n),
2118                b'N' => Cigar::RefSkip(n),
2119                b'H' => {
2120                    if i == 0 || j + 1 == text_len {
2121                        Cigar::HardClip(n)
2122                    } else {
2123                        return Err(Error::BamParseCigar {
2124                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
2125                                .to_owned(),
2126                        });
2127                    }
2128                }
2129                b'S' => {
2130                    if i == 0
2131                        || j + 1 == text_len
2132                        || bytes[i-1] == b'H'
2133                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
2134                        Cigar::SoftClip(n)
2135                    } else {
2136                        return Err(Error::BamParseCigar {
2137                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
2138                            .to_owned(),
2139                        });
2140                    }
2141                },
2142                b'P' => Cigar::Pad(n),
2143                b'=' => Cigar::Equal(n),
2144                b'X' => Cigar::Diff(n),
2145                op => {
2146                    return Err(Error::BamParseCigar {
2147                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
2148                    })
2149                }
2150            });
2151            i = j + 1;
2152        }
2153        Ok(CigarString(inner))
2154    }
2155}
2156
2157impl TryFrom<&str> for CigarString {
2158    type Error = Error;
2159
2160    /// Create a CigarString from given &str.
2161    /// # Example
2162    /// ```
2163    /// use rust_htslib::bam::record::*;
2164    /// use rust_htslib::bam::record::CigarString;
2165    /// use rust_htslib::bam::record::Cigar::*;
2166    /// use std::convert::TryFrom;
2167    ///
2168    /// let cigar_str = "2H10M5X3=2H";
2169    /// let cigar = CigarString::try_from(cigar_str)
2170    ///     .expect("Unable to parse cigar string.");
2171    /// let expected_cigar = CigarString(vec![
2172    ///     HardClip(2),
2173    ///     Match(10),
2174    ///     Diff(5),
2175    ///     Equal(3),
2176    ///     HardClip(2),
2177    /// ]);
2178    /// assert_eq!(cigar, expected_cigar);
2179    /// ```
2180    fn try_from(text: &str) -> Result<Self> {
2181        let bytes = text.as_bytes();
2182        if text.chars().count() != bytes.len() {
2183            return Err(Error::BamParseCigar {
2184                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2185            });
2186        }
2187        CigarString::try_from(bytes)
2188    }
2189}
2190
2191impl<'a> CigarString {
2192    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2193        self.into_iter()
2194    }
2195}
2196
2197impl<'a> IntoIterator for &'a CigarString {
2198    type Item = &'a Cigar;
2199    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2200
2201    fn into_iter(self) -> Self::IntoIter {
2202        self.0.iter()
2203    }
2204}
2205
2206impl fmt::Display for CigarString {
2207    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2208        for op in self {
2209            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2210        }
2211        Ok(())
2212    }
2213}
2214
2215// Get number of leading/trailing softclips if a CigarString taking hardclips into account
2216fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2217    match (cigar.next(), cigar.next()) {
2218        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2219            *s as i64
2220        }
2221        _ => 0,
2222    }
2223}
2224
2225#[derive(Eq, PartialEq, Clone, Debug)]
2226pub struct CigarStringView {
2227    inner: CigarString,
2228    pos: i64,
2229}
2230
2231impl CigarStringView {
2232    /// Construct a new CigarStringView from a CigarString at a position
2233    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2234        CigarStringView { inner: c, pos }
2235    }
2236
2237    /// Get (exclusive) end position of alignment.
2238    pub fn end_pos(&self) -> i64 {
2239        let mut pos = self.pos;
2240        for c in self {
2241            match c {
2242                Cigar::Match(l)
2243                | Cigar::RefSkip(l)
2244                | Cigar::Del(l)
2245                | Cigar::Equal(l)
2246                | Cigar::Diff(l) => pos += *l as i64,
2247                // these don't add to end_pos on reference
2248                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2249            }
2250        }
2251        pos
2252    }
2253
2254    /// Get the start position of the alignment (0-based).
2255    pub fn pos(&self) -> i64 {
2256        self.pos
2257    }
2258
2259    /// Get number of bases softclipped at the beginning of the alignment.
2260    pub fn leading_softclips(&self) -> i64 {
2261        calc_softclips(self.iter())
2262    }
2263
2264    /// Get number of bases softclipped at the end of the alignment.
2265    pub fn trailing_softclips(&self) -> i64 {
2266        calc_softclips(self.iter().rev())
2267    }
2268
2269    /// Get number of bases hardclipped at the beginning of the alignment.
2270    pub fn leading_hardclips(&self) -> i64 {
2271        self.first().map_or(0, |cigar| {
2272            if let Cigar::HardClip(s) = cigar {
2273                *s as i64
2274            } else {
2275                0
2276            }
2277        })
2278    }
2279
2280    /// Get number of bases hardclipped at the end of the alignment.
2281    pub fn trailing_hardclips(&self) -> i64 {
2282        self.last().map_or(0, |cigar| {
2283            if let Cigar::HardClip(s) = cigar {
2284                *s as i64
2285            } else {
2286                0
2287            }
2288        })
2289    }
2290
2291    /// For a given position in the reference, get corresponding position within read.
2292    /// If reference position is outside of the read alignment, return None.
2293    ///
2294    /// # Arguments
2295    ///
2296    /// * `ref_pos` - the reference position
2297    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2298    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2299    ///
2300    pub fn read_pos(
2301        &self,
2302        ref_pos: u32,
2303        include_softclips: bool,
2304        include_dels: bool,
2305    ) -> Result<Option<u32>> {
2306        let mut rpos = self.pos as u32; // reference position
2307        let mut qpos = 0u32; // position within read
2308        let mut j = 0; // index into cigar operation vector
2309
2310        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2311        // because all augmentations of qpos and rpos before that are invalid
2312        for (i, c) in self.iter().enumerate() {
2313            match c {
2314                Cigar::Match(_) |
2315                Cigar::Diff(_)  |
2316                Cigar::Equal(_) |
2317                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2318                // before matching positions
2319                Cigar::Ins(_) => {
2320                    j = i;
2321                    break;
2322                },
2323                Cigar::SoftClip(l) => {
2324                    j = i;
2325                    if include_softclips {
2326                        // Alignment starts with softclip and we want to include it in the
2327                        // projection of the reference position. However, the POS field does not
2328                        // include the softclip. Hence we have to subtract its length.
2329                        rpos = rpos.saturating_sub(*l);
2330                    }
2331                    break;
2332                },
2333                Cigar::Del(l) => {
2334                    // METHOD: leading deletions can happen in case of trimmed reads where
2335                    // a primer has been removed AFTER read mapping.
2336                    // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming
2337                    // with fgbio. While leading deletions should be impossible with
2338                    // normal read mapping, they make perfect sense with primer trimming
2339                    // because the mapper still had the evidence to decide in favor of
2340                    // the deletion via the primer sequence.
2341                    rpos += l;
2342                },
2343                Cigar::RefSkip(_) => {
2344                    return Err(Error::BamUnexpectedCigarOperation {
2345                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2346                    });
2347                },
2348                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2349                    return Err(Error::BamUnexpectedCigarOperation{
2350                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2351                    });
2352                },
2353                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2354                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2355                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2356                Cigar::Pad(_) | Cigar::HardClip(_) => ()
2357            }
2358        }
2359
2360        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2361            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2362        };
2363
2364        while rpos <= ref_pos && j < self.len() {
2365            match self[j] {
2366                // potential SNV evidence
2367                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2368                    // difference between desired position and first position of current cigar
2369                    // operation
2370                    qpos += ref_pos - rpos;
2371                    return Ok(Some(qpos));
2372                }
2373                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2374                    qpos += ref_pos - rpos;
2375                    return Ok(Some(qpos));
2376                }
2377                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2378                    // qpos shall resemble the start of the deletion
2379                    return Ok(Some(qpos));
2380                }
2381                // for others, just increase pos and qpos as needed
2382                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2383                    rpos += l;
2384                    qpos += l;
2385                    j += 1;
2386                }
2387                Cigar::SoftClip(l) => {
2388                    qpos += l;
2389                    j += 1;
2390                    if include_softclips {
2391                        rpos += l;
2392                    }
2393                }
2394                Cigar::Ins(l) => {
2395                    qpos += l;
2396                    j += 1;
2397                }
2398                Cigar::RefSkip(l) | Cigar::Del(l) => {
2399                    rpos += l;
2400                    j += 1;
2401                }
2402                Cigar::Pad(_) => {
2403                    j += 1;
2404                }
2405                Cigar::HardClip(_) if j < self.len() - 1 => {
2406                    return Err(Error::BamUnexpectedCigarOperation{
2407                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2408                    });
2409                }
2410                Cigar::HardClip(_) => return Ok(None),
2411            }
2412        }
2413
2414        Ok(None)
2415    }
2416
2417    /// transfer ownership of the Cigar out of the CigarView
2418    pub fn take(self) -> CigarString {
2419        self.inner
2420    }
2421}
2422
2423impl ops::Deref for CigarStringView {
2424    type Target = CigarString;
2425
2426    fn deref(&self) -> &CigarString {
2427        &self.inner
2428    }
2429}
2430
2431impl ops::Index<usize> for CigarStringView {
2432    type Output = Cigar;
2433
2434    fn index(&self, index: usize) -> &Cigar {
2435        self.inner.index(index)
2436    }
2437}
2438
2439impl ops::IndexMut<usize> for CigarStringView {
2440    fn index_mut(&mut self, index: usize) -> &mut Cigar {
2441        self.inner.index_mut(index)
2442    }
2443}
2444
2445impl<'a> CigarStringView {
2446    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2447        self.inner.into_iter()
2448    }
2449}
2450
2451impl<'a> IntoIterator for &'a CigarStringView {
2452    type Item = &'a Cigar;
2453    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2454
2455    fn into_iter(self) -> Self::IntoIter {
2456        self.inner.into_iter()
2457    }
2458}
2459
2460impl fmt::Display for CigarStringView {
2461    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2462        self.inner.fmt(fmt)
2463    }
2464}
2465
2466pub struct BaseModificationMetadata {
2467    pub strand: i32,
2468    pub implicit: i32,
2469    pub canonical: u8,
2470}
2471
2472/// struct containing the internal state required to access
2473/// the base modifications for a bam::Record
2474pub struct BaseModificationState<'a> {
2475    record: &'a Record,
2476    state: *mut htslib::hts_base_mod_state,
2477    buffer: Vec<htslib::hts_base_mod>,
2478    buffer_pos: i32,
2479}
2480
2481impl BaseModificationState<'_> {
2482    /// Initialize a new BaseModification struct from a bam::Record
2483    /// This function allocates memory for the state structure
2484    /// and initializes the iterator to the start of the modification
2485    /// records.
2486    fn new(r: &Record) -> Result<BaseModificationState<'_>> {
2487        let mut bm = unsafe {
2488            BaseModificationState {
2489                record: r,
2490                state: hts_sys::hts_base_mod_state_alloc(),
2491                buffer: Vec::new(),
2492                buffer_pos: -1,
2493            }
2494        };
2495
2496        if bm.state.is_null() {
2497            panic!("Unable to allocate memory for hts_base_mod_state");
2498        }
2499
2500        // parse the MM tag to initialize the state
2501        unsafe {
2502            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2503            if ret != 0 {
2504                return Err(Error::BamBaseModificationTagNotFound);
2505            }
2506        }
2507
2508        let types = bm.recorded();
2509        bm.buffer.reserve(types.len());
2510        Ok(bm)
2511    }
2512
2513    pub fn buffer_next_mods(&mut self) -> Result<usize> {
2514        unsafe {
2515            let ret = hts_sys::bam_next_basemod(
2516                self.record.inner_ptr(),
2517                self.state,
2518                self.buffer.as_mut_ptr(),
2519                self.buffer.capacity() as i32,
2520                &mut self.buffer_pos,
2521            );
2522
2523            if ret < 0 {
2524                return Err(Error::BamBaseModificationIterationFailed);
2525            }
2526
2527            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2528            // return the actual number of modifications found. We return an error to the caller
2529            // in the case where there was insufficient storage to return all mods.
2530            if ret as usize > self.buffer.capacity() {
2531                return Err(Error::BamBaseModificationTooManyMods);
2532            }
2533
2534            // we read the modifications directly into the vector, which does
2535            // not update the length so needs to be manually set
2536            self.buffer.set_len(ret as usize);
2537
2538            Ok(ret as usize)
2539        }
2540    }
2541
2542    /// Return an array containing the modification codes listed for this record.
2543    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2544    pub fn recorded<'a>(&self) -> &'a [i32] {
2545        unsafe {
2546            let mut n: i32 = 0;
2547            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2548
2549            // htslib should not return a null pointer, even when there are no base mods
2550            if data_ptr.is_null() {
2551                panic!("Unable to obtain pointer to base modifications");
2552            }
2553            assert!(n >= 0);
2554            slice::from_raw_parts(data_ptr, n as usize)
2555        }
2556    }
2557
2558    /// Return metadata for the specified character code indicating the strand
2559    /// the base modification was called on, whether the tag uses implicit mode
2560    /// and the ascii code for the canonical base.
2561    /// If there are multiple modifications with the same code this will return the data
2562    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2563    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2564        unsafe {
2565            let mut strand: i32 = 0;
2566            let mut implicit: i32 = 0;
2567            // This may be i8 or u8 in hts_sys.
2568            let mut canonical: c_char = 0;
2569
2570            let ret = hts_sys::bam_mods_query_type(
2571                self.state,
2572                code,
2573                &mut strand,
2574                &mut implicit,
2575                &mut canonical,
2576            );
2577            if ret == -1 {
2578                Err(Error::BamBaseModificationTypeNotFound)
2579            } else {
2580                Ok(BaseModificationMetadata {
2581                    strand,
2582                    implicit,
2583                    canonical: canonical.try_into().unwrap(),
2584                })
2585            }
2586        }
2587    }
2588}
2589
2590impl Drop for BaseModificationState<'_> {
2591    fn drop<'a>(&mut self) {
2592        unsafe {
2593            hts_sys::hts_base_mod_state_free(self.state);
2594        }
2595    }
2596}
2597
2598/// Iterator over the base modifications that returns
2599/// a vector for all of the mods at each position
2600pub struct BaseModificationsPositionIter<'a> {
2601    mod_state: BaseModificationState<'a>,
2602}
2603
2604impl BaseModificationsPositionIter<'_> {
2605    fn new(r: &Record) -> Result<BaseModificationsPositionIter<'_>> {
2606        let state = BaseModificationState::new(r)?;
2607        Ok(BaseModificationsPositionIter { mod_state: state })
2608    }
2609
2610    pub fn recorded<'a>(&self) -> &'a [i32] {
2611        self.mod_state.recorded()
2612    }
2613
2614    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2615        self.mod_state.query_type(code)
2616    }
2617}
2618
2619impl Iterator for BaseModificationsPositionIter<'_> {
2620    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2621
2622    fn next(&mut self) -> Option<Self::Item> {
2623        let ret = self.mod_state.buffer_next_mods();
2624
2625        // Three possible things happened in buffer_next_mods:
2626        // 1. the htslib API call was successful but there are no more mods
2627        // 2. ths htslib API call was successful and we read some mods
2628        // 3. the htslib API call failed, we propogate the error wrapped in an option
2629        match ret {
2630            Ok(num_mods) => {
2631                if num_mods == 0 {
2632                    None
2633                } else {
2634                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2635                    Some(Ok(data))
2636                }
2637            }
2638            Err(e) => Some(Err(e)),
2639        }
2640    }
2641}
2642
2643/// Iterator over the base modifications that returns
2644/// the next modification found, one by one
2645pub struct BaseModificationsIter<'a> {
2646    mod_state: BaseModificationState<'a>,
2647    buffer_idx: usize,
2648}
2649
2650impl BaseModificationsIter<'_> {
2651    fn new(r: &Record) -> Result<BaseModificationsIter<'_>> {
2652        let state = BaseModificationState::new(r)?;
2653        Ok(BaseModificationsIter {
2654            mod_state: state,
2655            buffer_idx: 0,
2656        })
2657    }
2658
2659    pub fn recorded<'a>(&self) -> &'a [i32] {
2660        self.mod_state.recorded()
2661    }
2662
2663    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2664        self.mod_state.query_type(code)
2665    }
2666}
2667
2668impl Iterator for BaseModificationsIter<'_> {
2669    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2670
2671    fn next(&mut self) -> Option<Self::Item> {
2672        if self.buffer_idx == self.mod_state.buffer.len() {
2673            // need to use the internal state to read the next
2674            // set of modifications into the buffer
2675            let ret = self.mod_state.buffer_next_mods();
2676
2677            match ret {
2678                Ok(num_mods) => {
2679                    if num_mods == 0 {
2680                        // done iterating
2681                        return None;
2682                    } else {
2683                        // we read some mods, reset the position in the buffer then fall through
2684                        self.buffer_idx = 0;
2685                    }
2686                }
2687                Err(e) => return Some(Err(e)),
2688            }
2689        }
2690
2691        // if we got here when there are mods buffered that we haven't emitted yet
2692        assert!(self.buffer_idx < self.mod_state.buffer.len());
2693        let data = (
2694            self.mod_state.buffer_pos,
2695            self.mod_state.buffer[self.buffer_idx],
2696        );
2697        self.buffer_idx += 1;
2698        Some(Ok(data))
2699    }
2700}
2701
2702#[cfg(test)]
2703mod tests {
2704    use super::*;
2705
2706    #[test]
2707    fn test_cigar_string() {
2708        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2709
2710        assert_eq!(cigar[0], Cigar::Match(100));
2711        assert_eq!(format!("{}", cigar), "100M10S");
2712        for op in &cigar {
2713            println!("{}", op);
2714        }
2715    }
2716
2717    #[test]
2718    fn test_cigar_string_view_pos() {
2719        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2720        assert_eq!(cigar.pos(), 5);
2721    }
2722
2723    #[test]
2724    fn test_cigar_string_leading_softclips() {
2725        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2726        assert_eq!(cigar.leading_softclips(), 10);
2727        let cigar2 = CigarString(vec![
2728            Cigar::HardClip(5),
2729            Cigar::SoftClip(10),
2730            Cigar::Match(100),
2731        ])
2732        .into_view(0);
2733        assert_eq!(cigar2.leading_softclips(), 10);
2734    }
2735
2736    #[test]
2737    fn test_cigar_string_trailing_softclips() {
2738        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2739        assert_eq!(cigar.trailing_softclips(), 10);
2740        let cigar2 = CigarString(vec![
2741            Cigar::Match(100),
2742            Cigar::SoftClip(10),
2743            Cigar::HardClip(5),
2744        ])
2745        .into_view(0);
2746        assert_eq!(cigar2.trailing_softclips(), 10);
2747    }
2748
2749    #[test]
2750    fn test_cigar_read_pos() {
2751        let vpos = 5; // variant position
2752
2753        // Ignore leading HardClip
2754        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2755        // var:                       V
2756        // c01: 7H                 M  M
2757        // qpos:                  00 01
2758        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2759        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2760
2761        // Skip leading SoftClip or use as pre-POS matches
2762        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2763        // var:                       V
2764        // c02: 5H2S         M  M  M  M  M  M
2765        // qpos:  00        02 03 04 05 06 07
2766        // c02: 5H     S  S  M  M  M  M  M  M
2767        // qpos:      00 01 02 03 04 05 06 07
2768        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2769        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2770        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2771
2772        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2773        // pre-POS matches
2774        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2775        // var:                       V
2776        // c03:  3S                      M  M
2777        // qpos: 00                     03 04
2778        // c03:                 S  S  S  M  M
2779        // qpos:               00 01 02 03 04
2780        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2781        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2782        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2783
2784        // Skip leading Insertion before variant position
2785        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2786        // var:                       V
2787        // c04:  3I                X  X  X
2788        // qpos: 00               03 04 05
2789        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2790        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2791
2792        // Matches and deletion before variant position
2793        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2794        // var:                       V
2795        // c05:        =  =  D  D  X  =  =
2796        // qpos:      00 01       02 03 04 05
2797        let c05 = CigarString(vec![
2798            Cigar::Equal(2),
2799            Cigar::Del(2),
2800            Cigar::Diff(1),
2801            Cigar::Equal(2),
2802        ])
2803        .into_view(0);
2804        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2805
2806        // single nucleotide Deletion covering variant position
2807        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2808        // var:                       V
2809        // c06:                 =  =  D  X  X
2810        // qpos:               00 01    02 03
2811        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2812        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2813        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2814
2815        // three nucleotide Deletion covering variant position
2816        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2817        // var:                       V
2818        // c07:              =  =  D  D  D  M  M
2819        // qpos:            00 01          02 03
2820        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2821        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2822        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2823
2824        // three nucleotide RefSkip covering variant position
2825        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2826        // var:                       V
2827        // c08:              =  X  N  N  N  M  M
2828        // qpos:            00 01          02 03
2829        let c08 = CigarString(vec![
2830            Cigar::Equal(1),
2831            Cigar::Diff(1),
2832            Cigar::RefSkip(3),
2833            Cigar::Match(2),
2834        ])
2835        .into_view(2);
2836        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2837        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2838
2839        // internal hard clip before variant pos
2840        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2841        // var:                          V
2842        // c09: 3H           =  = 3H  =  =
2843        // qpos:            00 01    02 03
2844        let c09 = CigarString(vec![
2845            Cigar::HardClip(3),
2846            Cigar::Equal(2),
2847            Cigar::HardClip(3),
2848            Cigar::Equal(2),
2849        ])
2850        .into_view(2);
2851        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2852
2853        // Deletion right before variant position
2854        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2855        // var:                       V
2856        // c10:           M  M  D  D  M  M
2857        // qpos:         00 01       02 03
2858        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2859        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2860
2861        // Insertion right before variant position
2862        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2863        // var:                          V
2864        // c11:                 M  M 3I  M
2865        // qpos:               00 01 02 05 06
2866        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2867        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2868
2869        // Insertion right after variant position
2870        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2871        // var:                       V
2872        // c12:                 M  M  M 2I  =
2873        // qpos:               00 01 02 03 05
2874        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2875        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2876
2877        // Deletion right after variant position
2878        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2879        // var:                       V
2880        // c13:                 M  M  M  D  =
2881        // qpos:               00 01 02    03
2882        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2883        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2884
2885        // A messy and complicated example, including a Pad operation
2886        let vpos2 = 15;
2887        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2888        // var:                                                           V
2889        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2890        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2891        let c14 = CigarString(vec![
2892            Cigar::HardClip(5),
2893            Cigar::SoftClip(3),
2894            Cigar::Equal(1),
2895            Cigar::Pad(2),
2896            Cigar::Match(1),
2897            Cigar::Diff(1),
2898            Cigar::Ins(3),
2899            Cigar::Match(2),
2900            Cigar::Del(1),
2901            Cigar::Ins(2),
2902            Cigar::Equal(2),
2903            Cigar::RefSkip(3),
2904            Cigar::Match(3),
2905            Cigar::Equal(2),
2906            Cigar::SoftClip(5),
2907            Cigar::HardClip(2),
2908        ])
2909        .into_view(0);
2910        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2911
2912        // HardClip after Pad
2913        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2914        // var:                       V
2915        // c15: 5P1H            =  =  =
2916        // qpos:               00 01 02
2917        let c15 =
2918            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2919        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2920
2921        // only HardClip and Pad operations
2922        // c16: 7H5P2H
2923        let c16 =
2924            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2925        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2926    }
2927
2928    #[test]
2929    fn test_clone() {
2930        let mut rec = Record::new();
2931        rec.set_pos(300);
2932        rec.set_qname(b"read1");
2933        let clone = rec.clone();
2934        assert_eq!(rec, clone);
2935    }
2936
2937    #[test]
2938    fn test_flags() {
2939        let mut rec = Record::new();
2940
2941        rec.set_paired();
2942        assert_eq!(rec.is_paired(), true);
2943
2944        rec.set_supplementary();
2945        assert_eq!(rec.is_supplementary(), true);
2946        assert_eq!(rec.is_supplementary(), true);
2947
2948        rec.unset_paired();
2949        assert_eq!(rec.is_paired(), false);
2950        assert_eq!(rec.is_supplementary(), true);
2951
2952        rec.unset_supplementary();
2953        assert_eq!(rec.is_paired(), false);
2954        assert_eq!(rec.is_supplementary(), false);
2955    }
2956
2957    #[test]
2958    fn test_cigar_parse() {
2959        let cigar = "1S20M1D2I3X1=2H";
2960        let parsed = CigarString::try_from(cigar).unwrap();
2961        assert_eq!(parsed.to_string(), cigar);
2962    }
2963}
2964
2965#[cfg(test)]
2966mod alignment_cigar_tests {
2967    use super::*;
2968    use crate::bam::{Read, Reader};
2969    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2970    use bio_types::alignment::{Alignment, AlignmentMode};
2971
2972    #[test]
2973    fn test_cigar() {
2974        let alignment = Alignment {
2975            score: 5,
2976            xstart: 3,
2977            ystart: 0,
2978            xend: 9,
2979            yend: 10,
2980            ylen: 10,
2981            xlen: 10,
2982            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2983            mode: AlignmentMode::Semiglobal,
2984        };
2985        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2986        assert_eq!(
2987            CigarString::from_alignment(&alignment, false).0,
2988            vec![
2989                Cigar::SoftClip(3),
2990                Cigar::Equal(3),
2991                Cigar::Diff(1),
2992                Cigar::Ins(2),
2993                Cigar::Del(2),
2994                Cigar::SoftClip(1),
2995            ]
2996        );
2997
2998        let alignment = Alignment {
2999            score: 5,
3000            xstart: 0,
3001            ystart: 5,
3002            xend: 4,
3003            yend: 10,
3004            ylen: 10,
3005            xlen: 5,
3006            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
3007            mode: AlignmentMode::Custom,
3008        };
3009        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
3010        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
3011        assert_eq!(
3012            CigarString::from_alignment(&alignment, false).0,
3013            vec![
3014                Cigar::Equal(1),
3015                Cigar::Diff(2),
3016                Cigar::Ins(1),
3017                Cigar::Del(2),
3018                Cigar::SoftClip(1),
3019            ]
3020        );
3021        assert_eq!(
3022            CigarString::from_alignment(&alignment, true).0,
3023            vec![
3024                Cigar::Equal(1),
3025                Cigar::Diff(2),
3026                Cigar::Ins(1),
3027                Cigar::Del(2),
3028                Cigar::HardClip(1),
3029            ]
3030        );
3031
3032        let alignment = Alignment {
3033            score: 5,
3034            xstart: 0,
3035            ystart: 5,
3036            xend: 3,
3037            yend: 8,
3038            ylen: 10,
3039            xlen: 3,
3040            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
3041            mode: AlignmentMode::Custom,
3042        };
3043        assert_eq!(alignment.cigar(false), "1X1=1X");
3044        assert_eq!(
3045            CigarString::from_alignment(&alignment, false).0,
3046            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3047        );
3048
3049        let alignment = Alignment {
3050            score: 5,
3051            xstart: 0,
3052            ystart: 5,
3053            xend: 3,
3054            yend: 8,
3055            ylen: 10,
3056            xlen: 3,
3057            operations: vec![Subst, Match, Subst],
3058            mode: AlignmentMode::Semiglobal,
3059        };
3060        assert_eq!(alignment.cigar(false), "1X1=1X");
3061        assert_eq!(
3062            CigarString::from_alignment(&alignment, false).0,
3063            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3064        );
3065    }
3066
3067    #[test]
3068    fn test_read_orientation_f1r2() {
3069        let mut bam = Reader::from_path("test/test_paired.sam").unwrap();
3070
3071        for res in bam.records() {
3072            let record = res.unwrap();
3073            assert_eq!(
3074                record.read_pair_orientation(),
3075                SequenceReadPairOrientation::F1R2
3076            );
3077        }
3078    }
3079
3080    #[test]
3081    fn test_read_orientation_f2r1() {
3082        let mut bam = Reader::from_path("test/test_nonstandard_orientation.sam").unwrap();
3083
3084        for res in bam.records() {
3085            let record = res.unwrap();
3086            assert_eq!(
3087                record.read_pair_orientation(),
3088                SequenceReadPairOrientation::F2R1
3089            );
3090        }
3091    }
3092
3093    #[test]
3094    fn test_read_orientation_supplementary() {
3095        let mut bam = Reader::from_path("test/test_orientation_supplementary.sam").unwrap();
3096
3097        for res in bam.records() {
3098            let record = res.unwrap();
3099            assert_eq!(
3100                record.read_pair_orientation(),
3101                SequenceReadPairOrientation::F2R1
3102            );
3103        }
3104    }
3105
3106    #[test]
3107    pub fn test_cigar_parsing_non_ascii_error() {
3108        let cigar_str = "43ጷ";
3109        let expected_error = Err(Error::BamParseCigar {
3110                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
3111            });
3112
3113        let result = CigarString::try_from(cigar_str);
3114        assert_eq!(expected_error, result);
3115    }
3116
3117    #[test]
3118    pub fn test_cigar_parsing() {
3119        // parsing test cases
3120        let cigar_strs = [
3121            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
3122            "100M",                          // test a single op
3123            "",                              // test empty input
3124            "1H1=1H",                        // test simple hardclip
3125            "1S1=1S",                        // test simple softclip
3126            "11H11S11=11S11H",               // test complex softclip
3127            "10H",
3128            "10S",
3129        ];
3130        // expected results
3131        let cigars = [
3132            CigarString(vec![
3133                Cigar::HardClip(1),
3134                Cigar::Match(10),
3135                Cigar::Del(4),
3136                Cigar::Ins(100),
3137                Cigar::RefSkip(300),
3138                Cigar::Equal(1102),
3139                Cigar::Pad(10),
3140                Cigar::Diff(25),
3141                Cigar::SoftClip(11),
3142            ]),
3143            CigarString(vec![Cigar::Match(100)]),
3144            CigarString(vec![]),
3145            CigarString(vec![
3146                Cigar::HardClip(1),
3147                Cigar::Equal(1),
3148                Cigar::HardClip(1),
3149            ]),
3150            CigarString(vec![
3151                Cigar::SoftClip(1),
3152                Cigar::Equal(1),
3153                Cigar::SoftClip(1),
3154            ]),
3155            CigarString(vec![
3156                Cigar::HardClip(11),
3157                Cigar::SoftClip(11),
3158                Cigar::Equal(11),
3159                Cigar::SoftClip(11),
3160                Cigar::HardClip(11),
3161            ]),
3162            CigarString(vec![Cigar::HardClip(10)]),
3163            CigarString(vec![Cigar::SoftClip(10)]),
3164        ];
3165        // compare
3166        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
3167            let cigar_parse = CigarString::try_from(cigar_str)
3168                .unwrap_or_else(|_| panic!("Unable to parse cigar: {}", cigar_str));
3169            assert_eq!(&cigar_parse, truth);
3170        }
3171    }
3172}
3173
3174#[cfg(test)]
3175mod basemod_tests {
3176    use crate::bam::{Read, Reader};
3177
3178    #[test]
3179    pub fn test_count_recorded() {
3180        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3181
3182        for r in bam.records() {
3183            let record = r.unwrap();
3184            if let Ok(mods) = record.basemods_iter() {
3185                let n = mods.recorded().len();
3186                assert_eq!(n, 3);
3187            };
3188        }
3189    }
3190
3191    #[test]
3192    pub fn test_query_type() {
3193        let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
3194
3195        let mut n_fwd = 0;
3196        let mut n_rev = 0;
3197
3198        for r in bam.records() {
3199            let record = r.unwrap();
3200            if let Ok(mods) = record.basemods_iter() {
3201                for mod_code in mods.recorded() {
3202                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
3203                        if mod_metadata.strand == 0 {
3204                            n_fwd += 1;
3205                        }
3206                        if mod_metadata.strand == 1 {
3207                            n_rev += 1;
3208                        }
3209                    }
3210                }
3211            };
3212        }
3213        assert_eq!(n_fwd, 2);
3214        assert_eq!(n_rev, 2);
3215    }
3216
3217    #[test]
3218    pub fn test_mod_iter() {
3219        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3220        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3221        let mut i = 0;
3222
3223        for r in bam.records() {
3224            let record = r.unwrap();
3225            for res in record.basemods_iter().unwrap().flatten() {
3226                let (position, _m) = res;
3227                assert_eq!(position, expected_positions[i]);
3228                i += 1;
3229            }
3230        }
3231    }
3232
3233    #[test]
3234    pub fn test_position_iter() {
3235        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3236        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3237        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3238        let mut i = 0;
3239
3240        for r in bam.records() {
3241            let record = r.unwrap();
3242            for res in record.basemods_position_iter().unwrap().flatten() {
3243                let (position, elements) = res;
3244                assert_eq!(position, expected_positions[i]);
3245                assert_eq!(elements.len(), expected_counts[i]);
3246                i += 1;
3247            }
3248        }
3249    }
3250}