rust_htslib/bam/
record.rs

1// Copyright 2014 Christopher Schröder, Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::rc::Rc;
15use std::slice;
16use std::str;
17
18use byteorder::{LittleEndian, ReadBytesExt};
19
20use crate::bam::Error;
21use crate::bam::HeaderView;
22use crate::errors::Result;
23use crate::htslib;
24use crate::utils;
25#[cfg(feature = "serde_feature")]
26use serde::{self, Deserialize, Serialize};
27
28use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
29use bio_types::genome;
30use bio_types::sequence::SequenceRead;
31use bio_types::sequence::SequenceReadPairOrientation;
32use bio_types::strand::ReqStrand;
33
34/// A macro creating methods for flag access.
35macro_rules! flag {
36    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
37        pub fn $get(&self) -> bool {
38            self.inner().core.flag & $bit != 0
39        }
40
41        pub fn $set(&mut self) {
42            self.inner_mut().core.flag |= $bit;
43        }
44
45        pub fn $unset(&mut self) {
46            self.inner_mut().core.flag &= !$bit;
47        }
48    };
49}
50
51/// A BAM record.
52pub struct Record {
53    pub inner: htslib::bam1_t,
54    own: bool,
55    cigar: Option<CigarStringView>,
56    header: Option<Rc<HeaderView>>,
57}
58
59unsafe impl Send for Record {}
60unsafe impl Sync for Record {}
61
62impl Clone for Record {
63    fn clone(&self) -> Self {
64        let mut copy = Record::new();
65        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
66        copy
67    }
68}
69
70impl PartialEq for Record {
71    fn eq(&self, other: &Record) -> bool {
72        self.tid() == other.tid()
73            && self.pos() == other.pos()
74            && self.bin() == other.bin()
75            && self.mapq() == other.mapq()
76            && self.flags() == other.flags()
77            && self.mtid() == other.mtid()
78            && self.mpos() == other.mpos()
79            && self.insert_size() == other.insert_size()
80            && self.data() == other.data()
81            && self.inner().core.l_extranul == other.inner().core.l_extranul
82    }
83}
84
85impl Eq for Record {}
86
87impl fmt::Debug for Record {
88    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
89        fmt.write_fmt(format_args!(
90            "Record(tid: {}, pos: {})",
91            self.tid(),
92            self.pos()
93        ))
94    }
95}
96
97impl Default for Record {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103#[inline]
104fn extranul_from_qname(qname: &[u8]) -> usize {
105    let qlen = qname.len() + 1;
106    if !qlen.is_multiple_of(4) {
107        4 - qlen % 4
108    } else {
109        0
110    }
111}
112
113impl Record {
114    /// Create an empty BAM record.
115    pub fn new() -> Self {
116        let mut record = Record {
117            inner: unsafe { MaybeUninit::zeroed().assume_init() },
118            own: true,
119            cigar: None,
120            header: None,
121        };
122        // The read/query name needs to be set as empty to properly initialize
123        // the record
124        record.set_qname(b"");
125        // Developer note: these are needed so the returned record is properly
126        // initialized as unmapped.
127        record.set_unmapped();
128        record.set_tid(-1);
129        record.set_pos(-1);
130        record.set_mpos(-1);
131        record.set_mtid(-1);
132        record
133    }
134
135    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
136        Record {
137            inner: {
138                #[allow(clippy::uninit_assumed_init, invalid_value)]
139                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
140                unsafe {
141                    ::libc::memcpy(
142                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
143                        from as *const ::libc::c_void,
144                        size_of::<htslib::bam1_t>(),
145                    );
146                }
147                inner
148            },
149            own: false,
150            cigar: None,
151            header: None,
152        }
153    }
154
155    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
156    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
157        let mut record = Self::new();
158
159        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
160        sam_copy.extend(sam);
161        sam_copy.push(0);
162
163        let mut sam_string = htslib::kstring_t {
164            s: sam_copy.as_ptr() as *mut c_char,
165            l: sam_copy.len(),
166            m: sam_copy.len(),
167        };
168
169        let succ = unsafe {
170            htslib::sam_parse1(
171                &mut sam_string,
172                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
173                record.inner_ptr_mut(),
174            )
175        };
176
177        if succ == 0 {
178            Ok(record)
179        } else {
180            Err(Error::BamParseSAM {
181                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
182            })
183        }
184    }
185
186    pub fn set_header(&mut self, header: Rc<HeaderView>) {
187        self.header = Some(header);
188    }
189
190    pub(super) fn data(&self) -> &[u8] {
191        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
192    }
193
194    #[inline]
195    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
196        &mut self.inner
197    }
198
199    #[inline]
200    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
201        &mut self.inner as *mut htslib::bam1_t
202    }
203
204    #[inline]
205    pub fn inner(&self) -> &htslib::bam1_t {
206        &self.inner
207    }
208
209    #[inline]
210    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
211        &self.inner as *const htslib::bam1_t
212    }
213
214    /// Get target id.
215    pub fn tid(&self) -> i32 {
216        self.inner().core.tid
217    }
218
219    /// Set target id.
220    pub fn set_tid(&mut self, tid: i32) {
221        self.inner_mut().core.tid = tid;
222    }
223
224    /// Get position (0-based).
225    pub fn pos(&self) -> i64 {
226        self.inner().core.pos
227    }
228
229    /// Set position (0-based).
230    pub fn set_pos(&mut self, pos: i64) {
231        self.inner_mut().core.pos = pos;
232    }
233
234    pub fn bin(&self) -> u16 {
235        self.inner().core.bin
236    }
237
238    pub fn set_bin(&mut self, bin: u16) {
239        self.inner_mut().core.bin = bin;
240    }
241
242    /// Get MAPQ.
243    pub fn mapq(&self) -> u8 {
244        self.inner().core.qual
245    }
246
247    /// Set MAPQ.
248    pub fn set_mapq(&mut self, mapq: u8) {
249        self.inner_mut().core.qual = mapq;
250    }
251
252    /// Get strand information from record flags.
253    pub fn strand(&self) -> ReqStrand {
254        let reverse = self.flags() & 0x10 != 0;
255        if reverse {
256            ReqStrand::Reverse
257        } else {
258            ReqStrand::Forward
259        }
260    }
261
262    /// Get raw flags.
263    pub fn flags(&self) -> u16 {
264        self.inner().core.flag
265    }
266
267    /// Set raw flags.
268    pub fn set_flags(&mut self, flags: u16) {
269        self.inner_mut().core.flag = flags;
270    }
271
272    /// Unset all flags.
273    pub fn unset_flags(&mut self) {
274        self.inner_mut().core.flag = 0;
275    }
276
277    /// Get target id of mate.
278    pub fn mtid(&self) -> i32 {
279        self.inner().core.mtid
280    }
281
282    /// Set target id of mate.
283    pub fn set_mtid(&mut self, mtid: i32) {
284        self.inner_mut().core.mtid = mtid;
285    }
286
287    /// Get mate position.
288    pub fn mpos(&self) -> i64 {
289        self.inner().core.mpos
290    }
291
292    /// Set mate position.
293    pub fn set_mpos(&mut self, mpos: i64) {
294        self.inner_mut().core.mpos = mpos;
295    }
296
297    /// Get insert size.
298    pub fn insert_size(&self) -> i64 {
299        self.inner().core.isize_
300    }
301
302    /// Set insert size.
303    pub fn set_insert_size(&mut self, insert_size: i64) {
304        self.inner_mut().core.isize_ = insert_size;
305    }
306
307    fn qname_capacity(&self) -> usize {
308        self.inner().core.l_qname as usize
309    }
310
311    fn qname_len(&self) -> usize {
312        // discount all trailing zeros (the default one and extra nulls)
313        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
314    }
315
316    /// Get qname (read name). Complexity: O(1).
317    pub fn qname(&self) -> &[u8] {
318        &self.data()[..self.qname_len()]
319    }
320
321    /// Set the variable length data buffer
322    pub fn set_data(&mut self, new_data: &[u8]) {
323        self.cigar = None;
324
325        self.inner_mut().l_data = new_data.len() as i32;
326        if (self.inner().m_data as i32) < self.inner().l_data {
327            // Verbosity due to lexical borrowing
328            let l_data = self.inner().l_data;
329            self.realloc_var_data(l_data as usize);
330        }
331
332        // Copy new data into buffer
333        let data =
334            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
335        utils::copy_memory(new_data, data);
336    }
337
338    /// Set variable length data (qname, cigar, seq, qual).
339    /// The aux data is left unchanged.
340    /// `qual` is Phred-scaled quality values, without any offset.
341    /// NOTE: seq.len() must equal qual.len() or this method
342    /// will panic. If you don't have quality values use
343    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
344    /// be recognized as missing QVs by `samtools`.
345    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
346        assert!(qname.len() < 255);
347        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
348
349        self.cigar = None;
350
351        let cigar_width = if let Some(cigar_string) = cigar {
352            cigar_string.len()
353        } else {
354            0
355        } * 4;
356        let q_len = qname.len() + 1;
357        let extranul = extranul_from_qname(qname);
358
359        let orig_aux_offset = self.qname_capacity()
360            + 4 * self.cigar_len()
361            + self.seq_len().div_ceil(2)
362            + self.seq_len();
363        let new_aux_offset = q_len + extranul + cigar_width + seq.len().div_ceil(2) + qual.len();
364        assert!(orig_aux_offset <= self.inner.l_data as usize);
365        let aux_len = self.inner.l_data as usize - orig_aux_offset;
366        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
367        if (self.inner().m_data as i32) < self.inner().l_data {
368            // Verbosity due to lexical borrowing
369            let l_data = self.inner().l_data;
370            self.realloc_var_data(l_data as usize);
371        }
372
373        // Copy the aux data.
374        if aux_len > 0 && orig_aux_offset != new_aux_offset {
375            let data =
376                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
377            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
378        }
379
380        let data =
381            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
382
383        // qname
384        utils::copy_memory(qname, data);
385        for i in 0..=extranul {
386            data[qname.len() + i] = b'\0';
387        }
388        let mut i = q_len + extranul;
389        self.inner_mut().core.l_qname = i as u16;
390        self.inner_mut().core.l_extranul = extranul as u8;
391
392        // cigar
393        if let Some(cigar_string) = cigar {
394            let cigar_data = unsafe {
395                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
396                #[allow(clippy::cast_ptr_alignment)]
397                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
398            };
399            for (i, c) in cigar_string.iter().enumerate() {
400                cigar_data[i] = c.encode();
401            }
402            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
403            i += cigar_string.len() * 4;
404        } else {
405            self.inner_mut().core.n_cigar = 0;
406        };
407
408        // seq
409        {
410            for j in (0..seq.len()).step_by(2) {
411                data[i + j / 2] = (ENCODE_BASE[seq[j] as usize] << 4)
412                    | (if j + 1 < seq.len() {
413                        ENCODE_BASE[seq[j + 1] as usize]
414                    } else {
415                        0
416                    });
417            }
418            self.inner_mut().core.l_qseq = seq.len() as i32;
419            i += seq.len().div_ceil(2);
420        }
421
422        // qual
423        utils::copy_memory(qual, &mut data[i..]);
424    }
425
426    /// Replace current qname with a new one.
427    pub fn set_qname(&mut self, new_qname: &[u8]) {
428        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
429        assert!(new_qname.len() < 252);
430
431        let old_q_len = self.qname_capacity();
432        // We're going to add a terminal NUL
433        let extranul = extranul_from_qname(new_qname);
434        let new_q_len = new_qname.len() + 1 + extranul;
435
436        // Length of data after qname
437        let other_len = self.inner_mut().l_data - old_q_len as i32;
438
439        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
440            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
441        } else if new_q_len > old_q_len {
442            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
443
444            // Reallocate if necessary
445            if (self.inner().m_data as i32) < self.inner().l_data {
446                // Verbosity due to lexical borrowing
447                let l_data = self.inner().l_data;
448                self.realloc_var_data(l_data as usize);
449            }
450        }
451
452        if new_q_len != old_q_len {
453            // Move other data to new location
454            unsafe {
455                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
456
457                ::libc::memmove(
458                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
459                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
460                    other_len as usize,
461                );
462            }
463        }
464
465        // Copy qname data
466        let data =
467            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
468        utils::copy_memory(new_qname, data);
469        for i in 0..=extranul {
470            data[new_q_len - i - 1] = b'\0';
471        }
472        self.inner_mut().core.l_qname = new_q_len as u16;
473        self.inner_mut().core.l_extranul = extranul as u8;
474    }
475
476    /// Replace current cigar with a new one.
477    pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
478        self.cigar = None;
479
480        let qname_data_len = self.qname_capacity();
481        let old_cigar_data_len = self.cigar_len() * 4;
482
483        // Length of data after cigar
484        let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
485
486        let new_cigar_len = match new_cigar {
487            Some(x) => x.len(),
488            None => 0,
489        };
490        let new_cigar_data_len = new_cigar_len * 4;
491
492        if new_cigar_data_len < old_cigar_data_len {
493            self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
494        } else if new_cigar_data_len > old_cigar_data_len {
495            self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
496
497            // Reallocate if necessary
498            if (self.inner().m_data as i32) < self.inner().l_data {
499                // Verbosity due to lexical borrowing
500                let l_data = self.inner().l_data;
501                self.realloc_var_data(l_data as usize);
502            }
503        }
504
505        if new_cigar_data_len != old_cigar_data_len {
506            // Move other data to new location
507            unsafe {
508                ::libc::memmove(
509                    self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
510                    self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
511                    other_data_len as usize,
512                );
513            }
514        }
515
516        // Copy cigar data
517        if let Some(cigar_string) = new_cigar {
518            let cigar_data = unsafe {
519                #[allow(clippy::cast_ptr_alignment)]
520                slice::from_raw_parts_mut(
521                    self.inner.data.add(qname_data_len) as *mut u32,
522                    cigar_string.len(),
523                )
524            };
525            for (i, c) in cigar_string.iter().enumerate() {
526                cigar_data[i] = c.encode();
527            }
528        }
529        self.inner_mut().core.n_cigar = new_cigar_len as u32;
530    }
531
532    fn realloc_var_data(&mut self, new_len: usize) {
533        // pad request
534        let new_len = new_len as u32;
535        let new_request = new_len + 32 - (new_len % 32);
536
537        let ptr = unsafe {
538            ::libc::realloc(
539                self.inner().data as *mut ::libc::c_void,
540                new_request as usize,
541            ) as *mut u8
542        };
543
544        if ptr.is_null() {
545            panic!("ran out of memory in rust_htslib trying to realloc");
546        }
547
548        // don't update m_data until we know we have
549        // a successful allocation.
550        self.inner_mut().m_data = new_request;
551        self.inner_mut().data = ptr;
552
553        // we now own inner.data
554        self.own = true;
555    }
556
557    pub fn cigar_len(&self) -> usize {
558        self.inner().core.n_cigar as usize
559    }
560
561    /// Get reference to raw cigar string representation (as stored in BAM file).
562    /// Usually, the method `Record::cigar` should be used instead.
563    pub fn raw_cigar(&self) -> &[u32] {
564        //cigar is always aligned to 4 bytes - so this is safe
565        #[allow(clippy::cast_ptr_alignment)]
566        unsafe {
567            slice::from_raw_parts(
568                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
569                self.cigar_len(),
570            )
571        }
572    }
573
574    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
575    pub fn cigar(&self) -> CigarStringView {
576        match self.cigar {
577            Some(ref c) => c.clone(),
578            None => self.unpack_cigar(),
579        }
580    }
581
582    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
583    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
584        self.cigar.as_ref()
585    }
586
587    /// Decode the cigar string and cache it inside the `Record`
588    pub fn cache_cigar(&mut self) {
589        self.cigar = Some(self.unpack_cigar())
590    }
591
592    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
593    fn unpack_cigar(&self) -> CigarStringView {
594        CigarString(
595            self.raw_cigar()
596                .iter()
597                .map(|&c| {
598                    let len = c >> 4;
599                    match c & 0b1111 {
600                        0 => Cigar::Match(len),
601                        1 => Cigar::Ins(len),
602                        2 => Cigar::Del(len),
603                        3 => Cigar::RefSkip(len),
604                        4 => Cigar::SoftClip(len),
605                        5 => Cigar::HardClip(len),
606                        6 => Cigar::Pad(len),
607                        7 => Cigar::Equal(len),
608                        8 => Cigar::Diff(len),
609                        _ => panic!("Unexpected cigar operation"),
610                    }
611                })
612                .collect(),
613        )
614        .into_view(self.pos())
615    }
616
617    pub fn seq_len(&self) -> usize {
618        self.inner().core.l_qseq as usize
619    }
620
621    fn seq_data(&self) -> &[u8] {
622        let offset = self.qname_capacity() + self.cigar_len() * 4;
623        &self.data()[offset..][..self.seq_len().div_ceil(2)]
624    }
625
626    /// Get read sequence. Complexity: O(1).
627    pub fn seq(&self) -> Seq<'_> {
628        Seq {
629            encoded: self.seq_data(),
630            len: self.seq_len(),
631        }
632    }
633
634    /// Get base qualities (PHRED-scaled probability that base is wrong).
635    /// This does not entail any offsets, hence the qualities can be used directly without
636    /// e.g. subtracting 33. Complexity: O(1).
637    pub fn qual(&self) -> &[u8] {
638        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + self.seq_len().div_ceil(2)..]
639            [..self.seq_len()]
640    }
641
642    /// Look up an auxiliary field by its tag.
643    ///
644    /// Only the first two bytes of a given tag are used for the look-up of a field.
645    /// See [`Aux`] for more details.
646    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
647        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
648        let aux = unsafe {
649            htslib::bam_aux_get(
650                &self.inner as *const htslib::bam1_t,
651                c_str.as_ptr() as *mut c_char,
652            )
653        };
654        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
655    }
656
657    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
658        const TAG_LEN: isize = 2;
659        // Used for skipping type identifier
660        const TYPE_ID_LEN: isize = 1;
661
662        if aux.is_null() {
663            return Err(Error::BamAuxTagNotFound);
664        }
665
666        let (data, type_size) = match *aux {
667            b'A' => {
668                let type_size = size_of::<u8>();
669                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
670            }
671            b'c' => {
672                let type_size = size_of::<i8>();
673                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
674            }
675            b'C' => {
676                let type_size = size_of::<u8>();
677                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
678            }
679            b's' => {
680                let type_size = size_of::<i16>();
681                (
682                    Aux::I16(
683                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
684                            .read_i16::<LittleEndian>()
685                            .map_err(|_| Error::BamAuxParsingError)?,
686                    ),
687                    type_size,
688                )
689            }
690            b'S' => {
691                let type_size = size_of::<u16>();
692                (
693                    Aux::U16(
694                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
695                            .read_u16::<LittleEndian>()
696                            .map_err(|_| Error::BamAuxParsingError)?,
697                    ),
698                    type_size,
699                )
700            }
701            b'i' => {
702                let type_size = size_of::<i32>();
703                (
704                    Aux::I32(
705                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
706                            .read_i32::<LittleEndian>()
707                            .map_err(|_| Error::BamAuxParsingError)?,
708                    ),
709                    type_size,
710                )
711            }
712            b'I' => {
713                let type_size = size_of::<u32>();
714                (
715                    Aux::U32(
716                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
717                            .read_u32::<LittleEndian>()
718                            .map_err(|_| Error::BamAuxParsingError)?,
719                    ),
720                    type_size,
721                )
722            }
723            b'f' => {
724                let type_size = size_of::<f32>();
725                (
726                    Aux::Float(
727                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
728                            .read_f32::<LittleEndian>()
729                            .map_err(|_| Error::BamAuxParsingError)?,
730                    ),
731                    type_size,
732                )
733            }
734            b'd' => {
735                let type_size = size_of::<f64>();
736                (
737                    Aux::Double(
738                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
739                            .read_f64::<LittleEndian>()
740                            .map_err(|_| Error::BamAuxParsingError)?,
741                    ),
742                    type_size,
743                )
744            }
745            b'Z' | b'H' => {
746                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
747                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
748                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
749            }
750            b'B' => {
751                const ARRAY_INNER_TYPE_LEN: isize = 1;
752                const ARRAY_COUNT_LEN: isize = 4;
753
754                // Used for skipping metadata
755                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
756
757                let length =
758                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
759                        .read_u32::<LittleEndian>()
760                        .map_err(|_| Error::BamAuxParsingError)? as usize;
761
762                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
763                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
764                    b'c' => (
765                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
766                            aux.offset(array_data_offset),
767                            length,
768                        ))),
769                        length,
770                    ),
771                    b'C' => (
772                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
773                            aux.offset(array_data_offset),
774                            length,
775                        ))),
776                        length,
777                    ),
778                    b's' => (
779                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
780                            aux.offset(array_data_offset),
781                            length * size_of::<i16>(),
782                        ))),
783                        length * std::mem::size_of::<i16>(),
784                    ),
785                    b'S' => (
786                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
787                            aux.offset(array_data_offset),
788                            length * size_of::<u16>(),
789                        ))),
790                        length * std::mem::size_of::<u16>(),
791                    ),
792                    b'i' => (
793                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
794                            aux.offset(array_data_offset),
795                            length * size_of::<i32>(),
796                        ))),
797                        length * std::mem::size_of::<i32>(),
798                    ),
799                    b'I' => (
800                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
801                            aux.offset(array_data_offset),
802                            length * size_of::<u32>(),
803                        ))),
804                        length * std::mem::size_of::<u32>(),
805                    ),
806                    b'f' => (
807                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
808                            aux.offset(array_data_offset),
809                            length * size_of::<f32>(),
810                        ))),
811                        length * std::mem::size_of::<f32>(),
812                    ),
813                    _ => {
814                        return Err(Error::BamAuxUnknownType);
815                    }
816                };
817                (
818                    array_data,
819                    // Offset: array-specific metadata + array size
820                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
821                )
822            }
823            _ => {
824                return Err(Error::BamAuxUnknownType);
825            }
826        };
827
828        // Offset: metadata + type size
829        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
830    }
831
832    /// Returns an iterator over the auxiliary fields of the record.
833    ///
834    /// When an error occurs, the `Err` variant will be returned
835    /// and the iterator will not be able to advance anymore.
836    pub fn aux_iter(&self) -> AuxIter<'_> {
837        AuxIter {
838            // In order to get to the aux data section of a `bam::Record`
839            // we need to skip fields in front of it
840            aux: &self.data()[
841                // NUL terminated read name:
842                self.qname_capacity()
843                // CIGAR (uint32_t):
844                + self.cigar_len() * std::mem::size_of::<u32>()
845                // Read sequence (4-bit encoded):
846                + self.seq_len().div_ceil(2)
847                // Base qualities (char):
848                + self.seq_len()..],
849        }
850    }
851
852    /// Add auxiliary data.
853    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
854        // Don't allow pushing aux data when the given tag is already present in the record.
855        // `htslib` seems to allow this (for non-array values), which can lead to problems
856        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
857        if self.aux(tag).is_ok() {
858            return Err(Error::BamAuxTagAlreadyPresent);
859        }
860
861        let ctag = tag.as_ptr() as *mut c_char;
862        let ret = unsafe {
863            match value {
864                Aux::Char(v) => htslib::bam_aux_append(
865                    self.inner_ptr_mut(),
866                    ctag,
867                    b'A' as c_char,
868                    size_of::<u8>() as i32,
869                    [v].as_mut_ptr(),
870                ),
871                Aux::I8(v) => htslib::bam_aux_append(
872                    self.inner_ptr_mut(),
873                    ctag,
874                    b'c' as c_char,
875                    size_of::<i8>() as i32,
876                    [v].as_mut_ptr() as *mut u8,
877                ),
878                Aux::U8(v) => htslib::bam_aux_append(
879                    self.inner_ptr_mut(),
880                    ctag,
881                    b'C' as c_char,
882                    size_of::<u8>() as i32,
883                    [v].as_mut_ptr(),
884                ),
885                Aux::I16(v) => htslib::bam_aux_append(
886                    self.inner_ptr_mut(),
887                    ctag,
888                    b's' as c_char,
889                    size_of::<i16>() as i32,
890                    [v].as_mut_ptr() as *mut u8,
891                ),
892                Aux::U16(v) => htslib::bam_aux_append(
893                    self.inner_ptr_mut(),
894                    ctag,
895                    b'S' as c_char,
896                    size_of::<u16>() as i32,
897                    [v].as_mut_ptr() as *mut u8,
898                ),
899                Aux::I32(v) => htslib::bam_aux_append(
900                    self.inner_ptr_mut(),
901                    ctag,
902                    b'i' as c_char,
903                    size_of::<i32>() as i32,
904                    [v].as_mut_ptr() as *mut u8,
905                ),
906                Aux::U32(v) => htslib::bam_aux_append(
907                    self.inner_ptr_mut(),
908                    ctag,
909                    b'I' as c_char,
910                    size_of::<u32>() as i32,
911                    [v].as_mut_ptr() as *mut u8,
912                ),
913                Aux::Float(v) => htslib::bam_aux_append(
914                    self.inner_ptr_mut(),
915                    ctag,
916                    b'f' as c_char,
917                    size_of::<f32>() as i32,
918                    [v].as_mut_ptr() as *mut u8,
919                ),
920                // Not part of specs but implemented in `htslib`:
921                Aux::Double(v) => htslib::bam_aux_append(
922                    self.inner_ptr_mut(),
923                    ctag,
924                    b'd' as c_char,
925                    size_of::<f64>() as i32,
926                    [v].as_mut_ptr() as *mut u8,
927                ),
928                Aux::String(v) => {
929                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
930                    htslib::bam_aux_append(
931                        self.inner_ptr_mut(),
932                        ctag,
933                        b'Z' as c_char,
934                        (v.len() + 1) as i32,
935                        c_str.as_ptr() as *mut u8,
936                    )
937                }
938                Aux::HexByteArray(v) => {
939                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
940                    htslib::bam_aux_append(
941                        self.inner_ptr_mut(),
942                        ctag,
943                        b'H' as c_char,
944                        (v.len() + 1) as i32,
945                        c_str.as_ptr() as *mut u8,
946                    )
947                }
948                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
949                Aux::ArrayI8(aux_array) => match aux_array {
950                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
951                        self.inner_ptr_mut(),
952                        ctag,
953                        b'c',
954                        inner.len() as u32,
955                        inner.slice.as_ptr() as *mut ::libc::c_void,
956                    ),
957                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
958                        self.inner_ptr_mut(),
959                        ctag,
960                        b'c',
961                        inner.len() as u32,
962                        inner.slice.as_ptr() as *mut ::libc::c_void,
963                    ),
964                },
965                Aux::ArrayU8(aux_array) => match aux_array {
966                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
967                        self.inner_ptr_mut(),
968                        ctag,
969                        b'C',
970                        inner.len() as u32,
971                        inner.slice.as_ptr() as *mut ::libc::c_void,
972                    ),
973                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
974                        self.inner_ptr_mut(),
975                        ctag,
976                        b'C',
977                        inner.len() as u32,
978                        inner.slice.as_ptr() as *mut ::libc::c_void,
979                    ),
980                },
981                Aux::ArrayI16(aux_array) => match aux_array {
982                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
983                        self.inner_ptr_mut(),
984                        ctag,
985                        b's',
986                        inner.len() as u32,
987                        inner.slice.as_ptr() as *mut ::libc::c_void,
988                    ),
989                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
990                        self.inner_ptr_mut(),
991                        ctag,
992                        b's',
993                        inner.len() as u32,
994                        inner.slice.as_ptr() as *mut ::libc::c_void,
995                    ),
996                },
997                Aux::ArrayU16(aux_array) => match aux_array {
998                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
999                        self.inner_ptr_mut(),
1000                        ctag,
1001                        b'S',
1002                        inner.len() as u32,
1003                        inner.slice.as_ptr() as *mut ::libc::c_void,
1004                    ),
1005                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1006                        self.inner_ptr_mut(),
1007                        ctag,
1008                        b'S',
1009                        inner.len() as u32,
1010                        inner.slice.as_ptr() as *mut ::libc::c_void,
1011                    ),
1012                },
1013                Aux::ArrayI32(aux_array) => match aux_array {
1014                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1015                        self.inner_ptr_mut(),
1016                        ctag,
1017                        b'i',
1018                        inner.len() as u32,
1019                        inner.slice.as_ptr() as *mut ::libc::c_void,
1020                    ),
1021                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1022                        self.inner_ptr_mut(),
1023                        ctag,
1024                        b'i',
1025                        inner.len() as u32,
1026                        inner.slice.as_ptr() as *mut ::libc::c_void,
1027                    ),
1028                },
1029                Aux::ArrayU32(aux_array) => match aux_array {
1030                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1031                        self.inner_ptr_mut(),
1032                        ctag,
1033                        b'I',
1034                        inner.len() as u32,
1035                        inner.slice.as_ptr() as *mut ::libc::c_void,
1036                    ),
1037                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1038                        self.inner_ptr_mut(),
1039                        ctag,
1040                        b'I',
1041                        inner.len() as u32,
1042                        inner.slice.as_ptr() as *mut ::libc::c_void,
1043                    ),
1044                },
1045                Aux::ArrayFloat(aux_array) => match aux_array {
1046                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1047                        self.inner_ptr_mut(),
1048                        ctag,
1049                        b'f',
1050                        inner.len() as u32,
1051                        inner.slice.as_ptr() as *mut ::libc::c_void,
1052                    ),
1053                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1054                        self.inner_ptr_mut(),
1055                        ctag,
1056                        b'f',
1057                        inner.len() as u32,
1058                        inner.slice.as_ptr() as *mut ::libc::c_void,
1059                    ),
1060                },
1061            }
1062        };
1063
1064        if ret < 0 {
1065            Err(Error::BamAux)
1066        } else {
1067            Ok(())
1068        }
1069    }
1070
1071    /// Update or add auxiliary data.
1072    pub fn update_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
1073        // Update existing aux data for the given tag if already present in the record
1074        // without changing the ordering of tags in the record or append aux data at
1075        // the end of the existing aux records if it is a new tag.
1076
1077        let ctag = tag.as_ptr() as *mut c_char;
1078        let ret = unsafe {
1079            match value {
1080                Aux::Char(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1081                Aux::I8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1082                Aux::U8(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1083                Aux::I16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1084                Aux::U16(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1085                Aux::I32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1086                Aux::U32(v) => htslib::bam_aux_update_int(self.inner_ptr_mut(), ctag, v as i64),
1087                Aux::Float(v) => htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v),
1088                // Not part of specs but implemented in `htslib`:
1089                Aux::Double(v) => {
1090                    htslib::bam_aux_update_float(self.inner_ptr_mut(), ctag, v as f32)
1091                }
1092                Aux::String(v) => {
1093                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
1094                    htslib::bam_aux_update_str(
1095                        self.inner_ptr_mut(),
1096                        ctag,
1097                        (v.len() + 1) as i32,
1098                        c_str.as_ptr() as *const c_char,
1099                    )
1100                }
1101                Aux::HexByteArray(_v) => return Err(Error::BamAuxTagUpdatingNotSupported),
1102                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
1103                Aux::ArrayI8(aux_array) => match aux_array {
1104                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1105                        self.inner_ptr_mut(),
1106                        ctag,
1107                        b'c',
1108                        inner.len() as u32,
1109                        inner.slice.as_ptr() as *mut ::libc::c_void,
1110                    ),
1111                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1112                        self.inner_ptr_mut(),
1113                        ctag,
1114                        b'c',
1115                        inner.len() as u32,
1116                        inner.slice.as_ptr() as *mut ::libc::c_void,
1117                    ),
1118                },
1119                Aux::ArrayU8(aux_array) => match aux_array {
1120                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1121                        self.inner_ptr_mut(),
1122                        ctag,
1123                        b'C',
1124                        inner.len() as u32,
1125                        inner.slice.as_ptr() as *mut ::libc::c_void,
1126                    ),
1127                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1128                        self.inner_ptr_mut(),
1129                        ctag,
1130                        b'C',
1131                        inner.len() as u32,
1132                        inner.slice.as_ptr() as *mut ::libc::c_void,
1133                    ),
1134                },
1135                Aux::ArrayI16(aux_array) => match aux_array {
1136                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1137                        self.inner_ptr_mut(),
1138                        ctag,
1139                        b's',
1140                        inner.len() as u32,
1141                        inner.slice.as_ptr() as *mut ::libc::c_void,
1142                    ),
1143                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1144                        self.inner_ptr_mut(),
1145                        ctag,
1146                        b's',
1147                        inner.len() as u32,
1148                        inner.slice.as_ptr() as *mut ::libc::c_void,
1149                    ),
1150                },
1151                Aux::ArrayU16(aux_array) => match aux_array {
1152                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1153                        self.inner_ptr_mut(),
1154                        ctag,
1155                        b'S',
1156                        inner.len() as u32,
1157                        inner.slice.as_ptr() as *mut ::libc::c_void,
1158                    ),
1159                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1160                        self.inner_ptr_mut(),
1161                        ctag,
1162                        b'S',
1163                        inner.len() as u32,
1164                        inner.slice.as_ptr() as *mut ::libc::c_void,
1165                    ),
1166                },
1167                Aux::ArrayI32(aux_array) => match aux_array {
1168                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1169                        self.inner_ptr_mut(),
1170                        ctag,
1171                        b'i',
1172                        inner.len() as u32,
1173                        inner.slice.as_ptr() as *mut ::libc::c_void,
1174                    ),
1175                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1176                        self.inner_ptr_mut(),
1177                        ctag,
1178                        b'i',
1179                        inner.len() as u32,
1180                        inner.slice.as_ptr() as *mut ::libc::c_void,
1181                    ),
1182                },
1183                Aux::ArrayU32(aux_array) => match aux_array {
1184                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1185                        self.inner_ptr_mut(),
1186                        ctag,
1187                        b'I',
1188                        inner.len() as u32,
1189                        inner.slice.as_ptr() as *mut ::libc::c_void,
1190                    ),
1191                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1192                        self.inner_ptr_mut(),
1193                        ctag,
1194                        b'I',
1195                        inner.len() as u32,
1196                        inner.slice.as_ptr() as *mut ::libc::c_void,
1197                    ),
1198                },
1199                Aux::ArrayFloat(aux_array) => match aux_array {
1200                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1201                        self.inner_ptr_mut(),
1202                        ctag,
1203                        b'f',
1204                        inner.len() as u32,
1205                        inner.slice.as_ptr() as *mut ::libc::c_void,
1206                    ),
1207                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1208                        self.inner_ptr_mut(),
1209                        ctag,
1210                        b'f',
1211                        inner.len() as u32,
1212                        inner.slice.as_ptr() as *mut ::libc::c_void,
1213                    ),
1214                },
1215            }
1216        };
1217
1218        if ret < 0 {
1219            Err(Error::BamAux)
1220        } else {
1221            Ok(())
1222        }
1223    }
1224
1225    // Delete auxiliary tag.
1226    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1227        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1228        let aux = unsafe {
1229            htslib::bam_aux_get(
1230                &self.inner as *const htslib::bam1_t,
1231                c_str.as_ptr() as *mut c_char,
1232            )
1233        };
1234        unsafe {
1235            if aux.is_null() {
1236                Err(Error::BamAuxTagNotFound)
1237            } else {
1238                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1239                Ok(())
1240            }
1241        }
1242    }
1243
1244    /// Access the base modifications associated with this Record through the MM tag.
1245    /// Example:
1246    /// ```
1247    ///    use rust_htslib::bam::{Read, Reader, Record};
1248    ///    let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
1249    ///    let mut mod_count = 0;
1250    ///    for r in bam.records() {
1251    ///        let record = r.unwrap();
1252    ///        if let Ok(mods) = record.basemods_iter() {
1253    ///            // print metadata for the modifications present in this record
1254    ///            for mod_code in mods.recorded() {
1255    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1256    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1257    ///                              mod_code, *mod_code as u8 as char,
1258    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1259    ///                }
1260    ///            }
1261    ///
1262    ///            // iterate over the modifications in this record
1263    ///            // the modifications are returned as a tuple with the
1264    ///            // position within SEQ and an hts_base_mod struct
1265    ///            for res in mods {
1266    ///                if let Ok( (position, m) ) = res {
1267    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1268    ///                    mod_count += 1;
1269    ///                }
1270    ///            }
1271    ///        };
1272    ///    }
1273    ///    assert_eq!(mod_count, 14);
1274    /// ```
1275    pub fn basemods_iter(&self) -> Result<BaseModificationsIter<'_>> {
1276        BaseModificationsIter::new(self)
1277    }
1278
1279    /// An iterator that returns all of the modifications for each position as a vector.
1280    /// This is useful for the case where multiple possible modifications can be annotated
1281    /// at a single position (for example a C could be 5-mC or 5-hmC)
1282    pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter<'_>> {
1283        BaseModificationsPositionIter::new(self)
1284    }
1285
1286    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1287    /// is not paired, mates are not mapping to the same contig, or mates start at the
1288    /// same position.
1289    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1290        if self.is_paired()
1291            && !self.is_unmapped()
1292            && !self.is_mate_unmapped()
1293            && self.tid() == self.mtid()
1294        {
1295            if self.pos() == self.mpos() {
1296                // both reads start at the same position, we cannot decide on the orientation.
1297                return SequenceReadPairOrientation::None;
1298            }
1299
1300            let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1301                (
1302                    self.pos(),
1303                    self.mpos(),
1304                    !self.is_reverse(),
1305                    !self.is_mate_reverse(),
1306                )
1307            } else {
1308                (
1309                    self.mpos(),
1310                    self.pos(),
1311                    !self.is_mate_reverse(),
1312                    !self.is_reverse(),
1313                )
1314            };
1315
1316            if pos_1 < pos_2 {
1317                match (fwd_1, fwd_2) {
1318                    (true, true) => SequenceReadPairOrientation::F1F2,
1319                    (true, false) => SequenceReadPairOrientation::F1R2,
1320                    (false, true) => SequenceReadPairOrientation::R1F2,
1321                    (false, false) => SequenceReadPairOrientation::R1R2,
1322                }
1323            } else {
1324                match (fwd_2, fwd_1) {
1325                    (true, true) => SequenceReadPairOrientation::F2F1,
1326                    (true, false) => SequenceReadPairOrientation::F2R1,
1327                    (false, true) => SequenceReadPairOrientation::R2F1,
1328                    (false, false) => SequenceReadPairOrientation::R2R1,
1329                }
1330            }
1331        } else {
1332            SequenceReadPairOrientation::None
1333        }
1334    }
1335
1336    flag!(is_paired, set_paired, unset_paired, 1u16);
1337    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1338    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1339    flag!(
1340        is_mate_unmapped,
1341        set_mate_unmapped,
1342        unset_mate_unmapped,
1343        8u16
1344    );
1345    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1346    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1347    flag!(
1348        is_first_in_template,
1349        set_first_in_template,
1350        unset_first_in_template,
1351        64u16
1352    );
1353    flag!(
1354        is_last_in_template,
1355        set_last_in_template,
1356        unset_last_in_template,
1357        128u16
1358    );
1359    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1360    flag!(
1361        is_quality_check_failed,
1362        set_quality_check_failed,
1363        unset_quality_check_failed,
1364        512u16
1365    );
1366    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1367    flag!(
1368        is_supplementary,
1369        set_supplementary,
1370        unset_supplementary,
1371        2048u16
1372    );
1373}
1374
1375impl Drop for Record {
1376    fn drop(&mut self) {
1377        if self.own {
1378            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1379        }
1380    }
1381}
1382
1383impl SequenceRead for Record {
1384    fn name(&self) -> &[u8] {
1385        self.qname()
1386    }
1387
1388    fn base(&self, i: usize) -> u8 {
1389        *decode_base_unchecked(encoded_base(self.seq_data(), i))
1390    }
1391
1392    fn base_qual(&self, i: usize) -> u8 {
1393        self.qual()[i]
1394    }
1395
1396    fn len(&self) -> usize {
1397        self.seq_len()
1398    }
1399
1400    fn is_empty(&self) -> bool {
1401        self.len() == 0
1402    }
1403}
1404
1405impl genome::AbstractInterval for Record {
1406    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1407    fn contig(&self) -> &str {
1408        let tid = self.tid();
1409        if tid < 0 {
1410            panic!("invalid tid, must be at least zero");
1411        }
1412        str::from_utf8(
1413            self.header
1414                .as_ref()
1415                .expect(
1416                    "header must be set (this is the case if the record has been read from a file)",
1417                )
1418                .tid2name(tid as u32),
1419        )
1420        .expect("unable to interpret contig name as UTF-8")
1421    }
1422
1423    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1424    fn range(&self) -> ops::Range<genome::Position> {
1425        let end_pos = self
1426            .cigar_cached()
1427            .expect("cigar has not been cached yet, call cache_cigar() first")
1428            .end_pos() as u64;
1429
1430        if self.pos() < 0 {
1431            panic!("invalid position, must be positive")
1432        }
1433
1434        self.pos() as u64..end_pos
1435    }
1436}
1437
1438/// Auxiliary record data
1439///
1440/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1441///
1442/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1443///
1444/// # Examples
1445///
1446/// ```
1447/// use rust_htslib::{
1448///     bam,
1449///     bam::record::{Aux, AuxArray},
1450///     errors::Error,
1451/// };
1452///
1453/// //Set up BAM record
1454/// let bam_header = bam::Header::new();
1455/// let mut record = bam::Record::from_sam(
1456///     &mut bam::HeaderView::from_header(&bam_header),
1457///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1458/// )
1459/// .unwrap();
1460///
1461/// // Add an integer field
1462/// let aux_integer_field = Aux::I32(1234);
1463/// record.push_aux(b"XI", aux_integer_field).unwrap();
1464///
1465/// match record.aux(b"XI") {
1466///     Ok(value) => {
1467///         // Typically, callers expect an aux field to be of a certain type.
1468///         // If that's not the case, the value can be `match`ed exhaustively.
1469///         if let Aux::I32(v) = value {
1470///             assert_eq!(v, 1234);
1471///         }
1472///     }
1473///     Err(e) => {
1474///         panic!("Error reading aux field: {}", e);
1475///     }
1476/// }
1477///
1478/// // Add an array field
1479/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1480/// let slice_of_data = &array_like_data;
1481/// let aux_array: AuxArray<f32> = slice_of_data.into();
1482/// let aux_array_field = Aux::ArrayFloat(aux_array);
1483/// record.push_aux(b"XA", aux_array_field).unwrap();
1484///
1485/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1486///     let read_array = array.iter().collect::<Vec<_>>();
1487///     assert_eq!(read_array, array_like_data);
1488/// } else {
1489///     panic!("Could not read array data");
1490/// }
1491/// ```
1492#[derive(Debug, PartialEq)]
1493pub enum Aux<'a> {
1494    Char(u8),
1495    I8(i8),
1496    U8(u8),
1497    I16(i16),
1498    U16(u16),
1499    I32(i32),
1500    U32(u32),
1501    Float(f32),
1502    Double(f64), // Not part of specs but implemented in `htslib`
1503    String(&'a str),
1504    HexByteArray(&'a str),
1505    ArrayI8(AuxArray<'a, i8>),
1506    ArrayU8(AuxArray<'a, u8>),
1507    ArrayI16(AuxArray<'a, i16>),
1508    ArrayU16(AuxArray<'a, u16>),
1509    ArrayI32(AuxArray<'a, i32>),
1510    ArrayU32(AuxArray<'a, u32>),
1511    ArrayFloat(AuxArray<'a, f32>),
1512}
1513
1514unsafe impl Send for Aux<'_> {}
1515unsafe impl Sync for Aux<'_> {}
1516
1517/// Types that can be used in aux arrays.
1518pub trait AuxArrayElement: Copy {
1519    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1520}
1521
1522impl AuxArrayElement for i8 {
1523    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1524        std::io::Cursor::new(bytes).read_i8().ok()
1525    }
1526}
1527impl AuxArrayElement for u8 {
1528    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1529        std::io::Cursor::new(bytes).read_u8().ok()
1530    }
1531}
1532impl AuxArrayElement for i16 {
1533    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1534        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1535    }
1536}
1537impl AuxArrayElement for u16 {
1538    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1539        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1540    }
1541}
1542impl AuxArrayElement for i32 {
1543    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1544        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1545    }
1546}
1547impl AuxArrayElement for u32 {
1548    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1549        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1550    }
1551}
1552impl AuxArrayElement for f32 {
1553    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1554        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1555    }
1556}
1557
1558/// Provides access to aux arrays.
1559///
1560/// Provides methods to either retrieve single elements or an iterator over the
1561/// array.
1562///
1563/// This type is used for wrapping both, array data that was read from a
1564/// BAM record and slices of data that are going to be stored in one.
1565///
1566/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1567/// can be constructed via the `From` trait which is implemented for all
1568/// supported types (see [`AuxArrayElement`] for a list).
1569///
1570/// # Examples
1571///
1572/// ```
1573/// use rust_htslib::{
1574///     bam,
1575///     bam::record::{Aux, AuxArray},
1576/// };
1577///
1578/// //Set up BAM record
1579/// let bam_header = bam::Header::new();
1580/// let mut record = bam::Record::from_sam(
1581///     &mut bam::HeaderView::from_header(&bam_header),
1582///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1583/// ).unwrap();
1584///
1585/// let data = vec![0.4, 0.3, 0.2, 0.1];
1586/// let slice_of_data = &data;
1587/// let aux_array: AuxArray<f32> = slice_of_data.into();
1588/// let aux_field = Aux::ArrayFloat(aux_array);
1589/// record.push_aux(b"XA", aux_field);
1590///
1591/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1592///     // Retrieve the second element from the array
1593///     assert_eq!(array.get(1).unwrap(), 0.3);
1594///     // Iterate over the array and collect it into a `Vec`
1595///     let read_array = array.iter().collect::<Vec<_>>();
1596///     assert_eq!(read_array, data);
1597/// } else {
1598///     panic!("Could not read array data");
1599/// }
1600/// ```
1601#[derive(Debug)]
1602pub enum AuxArray<'a, T> {
1603    TargetType(AuxArrayTargetType<'a, T>),
1604    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1605}
1606
1607impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1608where
1609    T: AuxArrayElement + PartialEq,
1610{
1611    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1612        use AuxArray::*;
1613        match (self, other) {
1614            (TargetType(v), TargetType(v_other)) => v == v_other,
1615            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1616            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1617            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1618        }
1619    }
1620}
1621
1622/// Create AuxArrays from slices of allowed target types.
1623impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1624where
1625    I: AuxArrayElement,
1626    T: AsRef<[I]> + ?Sized,
1627{
1628    fn from(src: &'a T) -> Self {
1629        AuxArray::TargetType(AuxArrayTargetType {
1630            slice: src.as_ref(),
1631        })
1632    }
1633}
1634
1635impl<'a, T> AuxArray<'a, T>
1636where
1637    T: AuxArrayElement,
1638{
1639    /// Returns the element at a position or None if out of bounds.
1640    pub fn get(&self, index: usize) -> Option<T> {
1641        match self {
1642            AuxArray::TargetType(v) => v.get(index),
1643            AuxArray::RawLeBytes(v) => v.get(index),
1644        }
1645    }
1646
1647    /// Returns the number of elements in the array.
1648    pub fn len(&self) -> usize {
1649        match self {
1650            AuxArray::TargetType(a) => a.len(),
1651            AuxArray::RawLeBytes(a) => a.len(),
1652        }
1653    }
1654
1655    /// Returns true if the array contains no elements.
1656    pub fn is_empty(&self) -> bool {
1657        self.len() == 0
1658    }
1659
1660    /// Returns an iterator over the array.
1661    pub fn iter(&self) -> AuxArrayIter<'_, T> {
1662        AuxArrayIter {
1663            index: 0,
1664            array: self,
1665        }
1666    }
1667
1668    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1669    fn from_bytes(bytes: &'a [u8]) -> Self {
1670        Self::RawLeBytes(AuxArrayRawLeBytes {
1671            slice: bytes,
1672            phantom_data: PhantomData,
1673        })
1674    }
1675}
1676
1677/// Encapsulates slice of target type.
1678#[doc(hidden)]
1679#[derive(Debug, PartialEq)]
1680pub struct AuxArrayTargetType<'a, T> {
1681    slice: &'a [T],
1682}
1683
1684impl<T> AuxArrayTargetType<'_, T>
1685where
1686    T: AuxArrayElement,
1687{
1688    fn get(&self, index: usize) -> Option<T> {
1689        self.slice.get(index).copied()
1690    }
1691
1692    fn len(&self) -> usize {
1693        self.slice.len()
1694    }
1695}
1696
1697/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1698#[doc(hidden)]
1699#[derive(Debug, PartialEq)]
1700pub struct AuxArrayRawLeBytes<'a, T> {
1701    slice: &'a [u8],
1702    phantom_data: PhantomData<T>,
1703}
1704
1705impl<T> AuxArrayRawLeBytes<'_, T>
1706where
1707    T: AuxArrayElement,
1708{
1709    fn get(&self, index: usize) -> Option<T> {
1710        let type_size = std::mem::size_of::<T>();
1711        if index * type_size + type_size > self.slice.len() {
1712            return None;
1713        }
1714        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1715    }
1716
1717    fn len(&self) -> usize {
1718        self.slice.len() / std::mem::size_of::<T>()
1719    }
1720}
1721
1722/// Aux array iterator
1723///
1724/// This struct is created by the [`AuxArray::iter`] method.
1725pub struct AuxArrayIter<'a, T> {
1726    index: usize,
1727    array: &'a AuxArray<'a, T>,
1728}
1729
1730impl<T> Iterator for AuxArrayIter<'_, T>
1731where
1732    T: AuxArrayElement,
1733{
1734    type Item = T;
1735    fn next(&mut self) -> Option<Self::Item> {
1736        let value = self.array.get(self.index);
1737        self.index += 1;
1738        value
1739    }
1740
1741    fn size_hint(&self) -> (usize, Option<usize>) {
1742        let array_length = self.array.len() - self.index;
1743        (array_length, Some(array_length))
1744    }
1745}
1746
1747/// Auxiliary data iterator
1748///
1749/// This struct is created by the [`Record::aux_iter`] method.
1750///
1751/// This iterator returns `Result`s that wrap tuples containing
1752/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1753/// well as an `Aux` enum that wraps the associated value.
1754///
1755/// When an error occurs, the `Err` variant will be returned
1756/// and the iterator will not be able to advance anymore.
1757pub struct AuxIter<'a> {
1758    aux: &'a [u8],
1759}
1760
1761impl<'a> Iterator for AuxIter<'a> {
1762    type Item = Result<(&'a [u8], Aux<'a>)>;
1763
1764    fn next(&mut self) -> Option<Self::Item> {
1765        // We're finished
1766        if self.aux.is_empty() {
1767            return None;
1768        }
1769        // Incomplete aux data
1770        if (1..=3).contains(&self.aux.len()) {
1771            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1772            self.aux = &[];
1773            return Some(Err(Error::BamAuxParsingError));
1774        }
1775        let tag = &self.aux[..2];
1776        Some(unsafe {
1777            let data_ptr = self.aux[2..].as_ptr();
1778            Record::read_aux_field(data_ptr)
1779                .map(|(aux, offset)| {
1780                    self.aux = &self.aux[offset..];
1781                    (tag, aux)
1782                })
1783                .inspect_err(|_e| {
1784                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1785                    self.aux = &[];
1786                })
1787        })
1788    }
1789}
1790
1791static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1792static ENCODE_BASE: [u8; 256] = [
1793    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1794    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1795    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1796    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1797    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1798    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1799    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1800    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1801    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1802    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1803    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1804];
1805
1806#[inline]
1807fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1808    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1809}
1810
1811#[inline]
1812unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1813    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1814}
1815
1816#[inline]
1817fn decode_base_unchecked(base: u8) -> &'static u8 {
1818    unsafe { DECODE_BASE.get_unchecked(base as usize) }
1819}
1820
1821/// The sequence of a record.
1822#[derive(Debug, Copy, Clone)]
1823pub struct Seq<'a> {
1824    pub encoded: &'a [u8],
1825    len: usize,
1826}
1827
1828impl Seq<'_> {
1829    /// Return encoded base. Complexity: O(1).
1830    #[inline]
1831    pub fn encoded_base(&self, i: usize) -> u8 {
1832        encoded_base(self.encoded, i)
1833    }
1834
1835    /// Return encoded base. Complexity: O(1).
1836    ///
1837    /// # Safety
1838    ///
1839    /// TODO
1840    #[inline]
1841    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1842        encoded_base_unchecked(self.encoded, i)
1843    }
1844
1845    /// Obtain decoded base without performing bounds checking.
1846    /// Use index based access seq()[i], for checked, safe access.
1847    /// Complexity: O(1).
1848    ///
1849    /// # Safety
1850    ///
1851    /// TODO
1852    #[inline]
1853    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1854        *decode_base_unchecked(self.encoded_base_unchecked(i))
1855    }
1856
1857    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1858    pub fn as_bytes(&self) -> Vec<u8> {
1859        (0..self.len()).map(|i| self[i]).collect()
1860    }
1861
1862    /// Return length (in bases) of the sequence.
1863    pub fn len(&self) -> usize {
1864        self.len
1865    }
1866
1867    pub fn is_empty(&self) -> bool {
1868        self.len() == 0
1869    }
1870}
1871
1872impl ops::Index<usize> for Seq<'_> {
1873    type Output = u8;
1874
1875    /// Return decoded base at given position within read. Complexity: O(1).
1876    fn index(&self, index: usize) -> &u8 {
1877        decode_base_unchecked(self.encoded_base(index))
1878    }
1879}
1880
1881unsafe impl Send for Seq<'_> {}
1882unsafe impl Sync for Seq<'_> {}
1883
1884#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1885#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1886pub enum Cigar {
1887    Match(u32),    // M
1888    Ins(u32),      // I
1889    Del(u32),      // D
1890    RefSkip(u32),  // N
1891    SoftClip(u32), // S
1892    HardClip(u32), // H
1893    Pad(u32),      // P
1894    Equal(u32),    // =
1895    Diff(u32),     // X
1896}
1897
1898impl Cigar {
1899    fn encode(self) -> u32 {
1900        match self {
1901            Cigar::Match(len) => len << 4, // | 0,
1902            Cigar::Ins(len) => (len << 4) | 1,
1903            Cigar::Del(len) => (len << 4) | 2,
1904            Cigar::RefSkip(len) => (len << 4) | 3,
1905            Cigar::SoftClip(len) => (len << 4) | 4,
1906            Cigar::HardClip(len) => (len << 4) | 5,
1907            Cigar::Pad(len) => (len << 4) | 6,
1908            Cigar::Equal(len) => (len << 4) | 7,
1909            Cigar::Diff(len) => (len << 4) | 8,
1910        }
1911    }
1912
1913    /// Return the length of the CIGAR.
1914    pub fn len(self) -> u32 {
1915        match self {
1916            Cigar::Match(len) => len,
1917            Cigar::Ins(len) => len,
1918            Cigar::Del(len) => len,
1919            Cigar::RefSkip(len) => len,
1920            Cigar::SoftClip(len) => len,
1921            Cigar::HardClip(len) => len,
1922            Cigar::Pad(len) => len,
1923            Cigar::Equal(len) => len,
1924            Cigar::Diff(len) => len,
1925        }
1926    }
1927
1928    pub fn is_empty(self) -> bool {
1929        self.len() == 0
1930    }
1931
1932    /// Return the character representing the CIGAR.
1933    pub fn char(self) -> char {
1934        match self {
1935            Cigar::Match(_) => 'M',
1936            Cigar::Ins(_) => 'I',
1937            Cigar::Del(_) => 'D',
1938            Cigar::RefSkip(_) => 'N',
1939            Cigar::SoftClip(_) => 'S',
1940            Cigar::HardClip(_) => 'H',
1941            Cigar::Pad(_) => 'P',
1942            Cigar::Equal(_) => '=',
1943            Cigar::Diff(_) => 'X',
1944        }
1945    }
1946}
1947
1948impl fmt::Display for Cigar {
1949    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1950        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1951    }
1952}
1953
1954unsafe impl Send for Cigar {}
1955unsafe impl Sync for Cigar {}
1956
1957custom_derive! {
1958    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1959    ///
1960    /// # Example
1961    ///
1962    /// ```
1963    /// use rust_htslib::bam::record::{Cigar, CigarString};
1964    ///
1965    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1966    ///
1967    /// // access by index
1968    /// assert_eq!(cigar[0], Cigar::Match(100));
1969    /// // format into classical string representation
1970    /// assert_eq!(format!("{}", cigar), "100M10S");
1971    /// // iterate
1972    /// for op in &cigar {
1973    ///    println!("{}", op);
1974    /// }
1975    /// ```
1976    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1977    #[derive(NewtypeDeref,
1978            NewtypeDerefMut,
1979             NewtypeIndex(usize),
1980             NewtypeIndexMut(usize),
1981             NewtypeFrom,
1982             PartialEq,
1983             PartialOrd,
1984             Eq,
1985             NewtypeDebug,
1986             Clone,
1987             Hash
1988    )]
1989    pub struct CigarString(pub Vec<Cigar>);
1990}
1991
1992impl CigarString {
1993    /// Create a `CigarStringView` from this CigarString at position `pos`
1994    pub fn into_view(self, pos: i64) -> CigarStringView {
1995        CigarStringView::new(self, pos)
1996    }
1997
1998    /// Calculate the bam cigar from the alignment struct. x is the target string
1999    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
2000    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
2001    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
2002    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
2003        match alignment.mode {
2004            AlignmentMode::Global => {
2005                panic!(" Bam cigar fn not supported for Global Alignment mode")
2006            }
2007            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
2008            _ => {}
2009        }
2010
2011        let mut cigar = Vec::new();
2012        if alignment.operations.is_empty() {
2013            return CigarString(cigar);
2014        }
2015
2016        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
2017            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
2018            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
2019            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
2020            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
2021            _ => {}
2022        };
2023
2024        if alignment.xstart > 0 {
2025            cigar.push(if hard_clip {
2026                Cigar::HardClip(alignment.xstart as u32)
2027            } else {
2028                Cigar::SoftClip(alignment.xstart as u32)
2029            });
2030        }
2031
2032        let mut last = alignment.operations[0];
2033        let mut k = 1u32;
2034        for &op in alignment.operations[1..].iter() {
2035            if op == last {
2036                k += 1;
2037            } else {
2038                add_op(last, k, &mut cigar);
2039                k = 1;
2040            }
2041            last = op;
2042        }
2043        add_op(last, k, &mut cigar);
2044        if alignment.xlen > alignment.xend {
2045            cigar.push(if hard_clip {
2046                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
2047            } else {
2048                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
2049            });
2050        }
2051
2052        CigarString(cigar)
2053    }
2054}
2055
2056impl TryFrom<&[u8]> for CigarString {
2057    type Error = Error;
2058
2059    /// Create a CigarString from given &[u8].
2060    /// # Example
2061    /// ```
2062    /// use rust_htslib::bam::record::*;
2063    /// use rust_htslib::bam::record::CigarString;
2064    /// use rust_htslib::bam::record::Cigar::*;
2065    /// use std::convert::TryFrom;
2066    ///
2067    /// let cigar_str = "2H10M5X3=2H".as_bytes();
2068    /// let cigar = CigarString::try_from(cigar_str)
2069    ///     .expect("Unable to parse cigar string.");
2070    /// let expected_cigar = CigarString(vec![
2071    ///     HardClip(2),
2072    ///     Match(10),
2073    ///     Diff(5),
2074    ///     Equal(3),
2075    ///     HardClip(2),
2076    /// ]);
2077    /// assert_eq!(cigar, expected_cigar);
2078    /// ```
2079    fn try_from(bytes: &[u8]) -> Result<Self> {
2080        let mut inner = Vec::new();
2081        let mut i = 0;
2082        let text_len = bytes.len();
2083        while i < text_len {
2084            let mut j = i;
2085            while j < text_len && bytes[j].is_ascii_digit() {
2086                j += 1;
2087            }
2088            // check that length is provided
2089            if i == j {
2090                return Err(Error::BamParseCigar {
2091                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
2092                });
2093            }
2094            // get the length of the operation
2095            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
2096                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
2097            })?;
2098            let n = s.parse().map_err(|_| Error::BamParseCigar {
2099                msg: format!("Unable to parse &str '{:?}' to u32.", s),
2100            })?;
2101            // get the operation
2102            let op = &bytes[j];
2103            inner.push(match op {
2104                b'M' => Cigar::Match(n),
2105                b'I' => Cigar::Ins(n),
2106                b'D' => Cigar::Del(n),
2107                b'N' => Cigar::RefSkip(n),
2108                b'H' => {
2109                    if i == 0 || j + 1 == text_len {
2110                        Cigar::HardClip(n)
2111                    } else {
2112                        return Err(Error::BamParseCigar {
2113                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
2114                                .to_owned(),
2115                        });
2116                    }
2117                }
2118                b'S' => {
2119                    if i == 0
2120                        || j + 1 == text_len
2121                        || bytes[i-1] == b'H'
2122                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
2123                        Cigar::SoftClip(n)
2124                    } else {
2125                        return Err(Error::BamParseCigar {
2126                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
2127                            .to_owned(),
2128                        });
2129                    }
2130                },
2131                b'P' => Cigar::Pad(n),
2132                b'=' => Cigar::Equal(n),
2133                b'X' => Cigar::Diff(n),
2134                op => {
2135                    return Err(Error::BamParseCigar {
2136                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
2137                    })
2138                }
2139            });
2140            i = j + 1;
2141        }
2142        Ok(CigarString(inner))
2143    }
2144}
2145
2146impl TryFrom<&str> for CigarString {
2147    type Error = Error;
2148
2149    /// Create a CigarString from given &str.
2150    /// # Example
2151    /// ```
2152    /// use rust_htslib::bam::record::*;
2153    /// use rust_htslib::bam::record::CigarString;
2154    /// use rust_htslib::bam::record::Cigar::*;
2155    /// use std::convert::TryFrom;
2156    ///
2157    /// let cigar_str = "2H10M5X3=2H";
2158    /// let cigar = CigarString::try_from(cigar_str)
2159    ///     .expect("Unable to parse cigar string.");
2160    /// let expected_cigar = CigarString(vec![
2161    ///     HardClip(2),
2162    ///     Match(10),
2163    ///     Diff(5),
2164    ///     Equal(3),
2165    ///     HardClip(2),
2166    /// ]);
2167    /// assert_eq!(cigar, expected_cigar);
2168    /// ```
2169    fn try_from(text: &str) -> Result<Self> {
2170        let bytes = text.as_bytes();
2171        if text.chars().count() != bytes.len() {
2172            return Err(Error::BamParseCigar {
2173                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2174            });
2175        }
2176        CigarString::try_from(bytes)
2177    }
2178}
2179
2180impl<'a> CigarString {
2181    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2182        self.into_iter()
2183    }
2184}
2185
2186impl<'a> IntoIterator for &'a CigarString {
2187    type Item = &'a Cigar;
2188    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2189
2190    fn into_iter(self) -> Self::IntoIter {
2191        self.0.iter()
2192    }
2193}
2194
2195impl fmt::Display for CigarString {
2196    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2197        for op in self {
2198            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2199        }
2200        Ok(())
2201    }
2202}
2203
2204// Get number of leading/trailing softclips if a CigarString taking hardclips into account
2205fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2206    match (cigar.next(), cigar.next()) {
2207        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2208            *s as i64
2209        }
2210        _ => 0,
2211    }
2212}
2213
2214#[derive(Eq, PartialEq, Clone, Debug)]
2215pub struct CigarStringView {
2216    inner: CigarString,
2217    pos: i64,
2218}
2219
2220impl CigarStringView {
2221    /// Construct a new CigarStringView from a CigarString at a position
2222    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2223        CigarStringView { inner: c, pos }
2224    }
2225
2226    /// Get (exclusive) end position of alignment.
2227    pub fn end_pos(&self) -> i64 {
2228        let mut pos = self.pos;
2229        for c in self {
2230            match c {
2231                Cigar::Match(l)
2232                | Cigar::RefSkip(l)
2233                | Cigar::Del(l)
2234                | Cigar::Equal(l)
2235                | Cigar::Diff(l) => pos += *l as i64,
2236                // these don't add to end_pos on reference
2237                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2238            }
2239        }
2240        pos
2241    }
2242
2243    /// Get the start position of the alignment (0-based).
2244    pub fn pos(&self) -> i64 {
2245        self.pos
2246    }
2247
2248    /// Get number of bases softclipped at the beginning of the alignment.
2249    pub fn leading_softclips(&self) -> i64 {
2250        calc_softclips(self.iter())
2251    }
2252
2253    /// Get number of bases softclipped at the end of the alignment.
2254    pub fn trailing_softclips(&self) -> i64 {
2255        calc_softclips(self.iter().rev())
2256    }
2257
2258    /// Get number of bases hardclipped at the beginning of the alignment.
2259    pub fn leading_hardclips(&self) -> i64 {
2260        self.first().map_or(0, |cigar| {
2261            if let Cigar::HardClip(s) = cigar {
2262                *s as i64
2263            } else {
2264                0
2265            }
2266        })
2267    }
2268
2269    /// Get number of bases hardclipped at the end of the alignment.
2270    pub fn trailing_hardclips(&self) -> i64 {
2271        self.last().map_or(0, |cigar| {
2272            if let Cigar::HardClip(s) = cigar {
2273                *s as i64
2274            } else {
2275                0
2276            }
2277        })
2278    }
2279
2280    /// For a given position in the reference, get corresponding position within read.
2281    /// If reference position is outside of the read alignment, return None.
2282    ///
2283    /// # Arguments
2284    ///
2285    /// * `ref_pos` - the reference position
2286    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2287    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2288    ///
2289    pub fn read_pos(
2290        &self,
2291        ref_pos: u32,
2292        include_softclips: bool,
2293        include_dels: bool,
2294    ) -> Result<Option<u32>> {
2295        let mut rpos = self.pos as u32; // reference position
2296        let mut qpos = 0u32; // position within read
2297        let mut j = 0; // index into cigar operation vector
2298
2299        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2300        // because all augmentations of qpos and rpos before that are invalid
2301        for (i, c) in self.iter().enumerate() {
2302            match c {
2303                Cigar::Match(_) |
2304                Cigar::Diff(_)  |
2305                Cigar::Equal(_) |
2306                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2307                // before matching positions
2308                Cigar::Ins(_) => {
2309                    j = i;
2310                    break;
2311                },
2312                Cigar::SoftClip(l) => {
2313                    j = i;
2314                    if include_softclips {
2315                        // Alignment starts with softclip and we want to include it in the
2316                        // projection of the reference position. However, the POS field does not
2317                        // include the softclip. Hence we have to subtract its length.
2318                        rpos = rpos.saturating_sub(*l);
2319                    }
2320                    break;
2321                },
2322                Cigar::Del(l) => {
2323                    // METHOD: leading deletions can happen in case of trimmed reads where
2324                    // a primer has been removed AFTER read mapping.
2325                    // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming
2326                    // with fgbio. While leading deletions should be impossible with
2327                    // normal read mapping, they make perfect sense with primer trimming
2328                    // because the mapper still had the evidence to decide in favor of
2329                    // the deletion via the primer sequence.
2330                    rpos += l;
2331                },
2332                Cigar::RefSkip(_) => {
2333                    return Err(Error::BamUnexpectedCigarOperation {
2334                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2335                    });
2336                },
2337                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2338                    return Err(Error::BamUnexpectedCigarOperation{
2339                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2340                    });
2341                },
2342                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2343                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2344                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2345                Cigar::Pad(_) | Cigar::HardClip(_) => ()
2346            }
2347        }
2348
2349        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2350            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2351        };
2352
2353        while rpos <= ref_pos && j < self.len() {
2354            match self[j] {
2355                // potential SNV evidence
2356                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2357                    // difference between desired position and first position of current cigar
2358                    // operation
2359                    qpos += ref_pos - rpos;
2360                    return Ok(Some(qpos));
2361                }
2362                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2363                    qpos += ref_pos - rpos;
2364                    return Ok(Some(qpos));
2365                }
2366                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2367                    // qpos shall resemble the start of the deletion
2368                    return Ok(Some(qpos));
2369                }
2370                // for others, just increase pos and qpos as needed
2371                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2372                    rpos += l;
2373                    qpos += l;
2374                    j += 1;
2375                }
2376                Cigar::SoftClip(l) => {
2377                    qpos += l;
2378                    j += 1;
2379                    if include_softclips {
2380                        rpos += l;
2381                    }
2382                }
2383                Cigar::Ins(l) => {
2384                    qpos += l;
2385                    j += 1;
2386                }
2387                Cigar::RefSkip(l) | Cigar::Del(l) => {
2388                    rpos += l;
2389                    j += 1;
2390                }
2391                Cigar::Pad(_) => {
2392                    j += 1;
2393                }
2394                Cigar::HardClip(_) if j < self.len() - 1 => {
2395                    return Err(Error::BamUnexpectedCigarOperation{
2396                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2397                    });
2398                }
2399                Cigar::HardClip(_) => return Ok(None),
2400            }
2401        }
2402
2403        Ok(None)
2404    }
2405
2406    /// transfer ownership of the Cigar out of the CigarView
2407    pub fn take(self) -> CigarString {
2408        self.inner
2409    }
2410}
2411
2412impl ops::Deref for CigarStringView {
2413    type Target = CigarString;
2414
2415    fn deref(&self) -> &CigarString {
2416        &self.inner
2417    }
2418}
2419
2420impl ops::Index<usize> for CigarStringView {
2421    type Output = Cigar;
2422
2423    fn index(&self, index: usize) -> &Cigar {
2424        self.inner.index(index)
2425    }
2426}
2427
2428impl ops::IndexMut<usize> for CigarStringView {
2429    fn index_mut(&mut self, index: usize) -> &mut Cigar {
2430        self.inner.index_mut(index)
2431    }
2432}
2433
2434impl<'a> CigarStringView {
2435    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2436        self.inner.into_iter()
2437    }
2438}
2439
2440impl<'a> IntoIterator for &'a CigarStringView {
2441    type Item = &'a Cigar;
2442    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2443
2444    fn into_iter(self) -> Self::IntoIter {
2445        self.inner.into_iter()
2446    }
2447}
2448
2449impl fmt::Display for CigarStringView {
2450    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2451        self.inner.fmt(fmt)
2452    }
2453}
2454
2455pub struct BaseModificationMetadata {
2456    pub strand: i32,
2457    pub implicit: i32,
2458    pub canonical: u8,
2459}
2460
2461/// struct containing the internal state required to access
2462/// the base modifications for a bam::Record
2463pub struct BaseModificationState<'a> {
2464    record: &'a Record,
2465    state: *mut htslib::hts_base_mod_state,
2466    buffer: Vec<htslib::hts_base_mod>,
2467    buffer_pos: i32,
2468}
2469
2470impl BaseModificationState<'_> {
2471    /// Initialize a new BaseModification struct from a bam::Record
2472    /// This function allocates memory for the state structure
2473    /// and initializes the iterator to the start of the modification
2474    /// records.
2475    fn new(r: &Record) -> Result<BaseModificationState<'_>> {
2476        let mut bm = unsafe {
2477            BaseModificationState {
2478                record: r,
2479                state: hts_sys::hts_base_mod_state_alloc(),
2480                buffer: Vec::new(),
2481                buffer_pos: -1,
2482            }
2483        };
2484
2485        if bm.state.is_null() {
2486            panic!("Unable to allocate memory for hts_base_mod_state");
2487        }
2488
2489        // parse the MM tag to initialize the state
2490        unsafe {
2491            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2492            if ret != 0 {
2493                return Err(Error::BamBaseModificationTagNotFound);
2494            }
2495        }
2496
2497        let types = bm.recorded();
2498        bm.buffer.reserve(types.len());
2499        Ok(bm)
2500    }
2501
2502    pub fn buffer_next_mods(&mut self) -> Result<usize> {
2503        unsafe {
2504            let ret = hts_sys::bam_next_basemod(
2505                self.record.inner_ptr(),
2506                self.state,
2507                self.buffer.as_mut_ptr(),
2508                self.buffer.capacity() as i32,
2509                &mut self.buffer_pos,
2510            );
2511
2512            if ret < 0 {
2513                return Err(Error::BamBaseModificationIterationFailed);
2514            }
2515
2516            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2517            // return the actual number of modifications found. We return an error to the caller
2518            // in the case where there was insufficient storage to return all mods.
2519            if ret as usize > self.buffer.capacity() {
2520                return Err(Error::BamBaseModificationTooManyMods);
2521            }
2522
2523            // we read the modifications directly into the vector, which does
2524            // not update the length so needs to be manually set
2525            self.buffer.set_len(ret as usize);
2526
2527            Ok(ret as usize)
2528        }
2529    }
2530
2531    /// Return an array containing the modification codes listed for this record.
2532    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2533    pub fn recorded<'a>(&self) -> &'a [i32] {
2534        unsafe {
2535            let mut n: i32 = 0;
2536            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2537
2538            // htslib should not return a null pointer, even when there are no base mods
2539            if data_ptr.is_null() {
2540                panic!("Unable to obtain pointer to base modifications");
2541            }
2542            assert!(n >= 0);
2543            slice::from_raw_parts(data_ptr, n as usize)
2544        }
2545    }
2546
2547    /// Return metadata for the specified character code indicating the strand
2548    /// the base modification was called on, whether the tag uses implicit mode
2549    /// and the ascii code for the canonical base.
2550    /// If there are multiple modifications with the same code this will return the data
2551    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2552    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2553        unsafe {
2554            let mut strand: i32 = 0;
2555            let mut implicit: i32 = 0;
2556            // This may be i8 or u8 in hts_sys.
2557            let mut canonical: c_char = 0;
2558
2559            let ret = hts_sys::bam_mods_query_type(
2560                self.state,
2561                code,
2562                &mut strand,
2563                &mut implicit,
2564                &mut canonical,
2565            );
2566            if ret == -1 {
2567                Err(Error::BamBaseModificationTypeNotFound)
2568            } else {
2569                Ok(BaseModificationMetadata {
2570                    strand,
2571                    implicit,
2572                    canonical: canonical.try_into().unwrap(),
2573                })
2574            }
2575        }
2576    }
2577}
2578
2579impl Drop for BaseModificationState<'_> {
2580    fn drop<'a>(&mut self) {
2581        unsafe {
2582            hts_sys::hts_base_mod_state_free(self.state);
2583        }
2584    }
2585}
2586
2587/// Iterator over the base modifications that returns
2588/// a vector for all of the mods at each position
2589pub struct BaseModificationsPositionIter<'a> {
2590    mod_state: BaseModificationState<'a>,
2591}
2592
2593impl BaseModificationsPositionIter<'_> {
2594    fn new(r: &Record) -> Result<BaseModificationsPositionIter<'_>> {
2595        let state = BaseModificationState::new(r)?;
2596        Ok(BaseModificationsPositionIter { mod_state: state })
2597    }
2598
2599    pub fn recorded<'a>(&self) -> &'a [i32] {
2600        self.mod_state.recorded()
2601    }
2602
2603    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2604        self.mod_state.query_type(code)
2605    }
2606}
2607
2608impl Iterator for BaseModificationsPositionIter<'_> {
2609    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2610
2611    fn next(&mut self) -> Option<Self::Item> {
2612        let ret = self.mod_state.buffer_next_mods();
2613
2614        // Three possible things happened in buffer_next_mods:
2615        // 1. the htslib API call was successful but there are no more mods
2616        // 2. ths htslib API call was successful and we read some mods
2617        // 3. the htslib API call failed, we propogate the error wrapped in an option
2618        match ret {
2619            Ok(num_mods) => {
2620                if num_mods == 0 {
2621                    None
2622                } else {
2623                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2624                    Some(Ok(data))
2625                }
2626            }
2627            Err(e) => Some(Err(e)),
2628        }
2629    }
2630}
2631
2632/// Iterator over the base modifications that returns
2633/// the next modification found, one by one
2634pub struct BaseModificationsIter<'a> {
2635    mod_state: BaseModificationState<'a>,
2636    buffer_idx: usize,
2637}
2638
2639impl BaseModificationsIter<'_> {
2640    fn new(r: &Record) -> Result<BaseModificationsIter<'_>> {
2641        let state = BaseModificationState::new(r)?;
2642        Ok(BaseModificationsIter {
2643            mod_state: state,
2644            buffer_idx: 0,
2645        })
2646    }
2647
2648    pub fn recorded<'a>(&self) -> &'a [i32] {
2649        self.mod_state.recorded()
2650    }
2651
2652    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2653        self.mod_state.query_type(code)
2654    }
2655}
2656
2657impl Iterator for BaseModificationsIter<'_> {
2658    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2659
2660    fn next(&mut self) -> Option<Self::Item> {
2661        if self.buffer_idx == self.mod_state.buffer.len() {
2662            // need to use the internal state to read the next
2663            // set of modifications into the buffer
2664            let ret = self.mod_state.buffer_next_mods();
2665
2666            match ret {
2667                Ok(num_mods) => {
2668                    if num_mods == 0 {
2669                        // done iterating
2670                        return None;
2671                    } else {
2672                        // we read some mods, reset the position in the buffer then fall through
2673                        self.buffer_idx = 0;
2674                    }
2675                }
2676                Err(e) => return Some(Err(e)),
2677            }
2678        }
2679
2680        // if we got here when there are mods buffered that we haven't emitted yet
2681        assert!(self.buffer_idx < self.mod_state.buffer.len());
2682        let data = (
2683            self.mod_state.buffer_pos,
2684            self.mod_state.buffer[self.buffer_idx],
2685        );
2686        self.buffer_idx += 1;
2687        Some(Ok(data))
2688    }
2689}
2690
2691#[cfg(test)]
2692mod tests {
2693    use super::*;
2694
2695    #[test]
2696    fn test_cigar_string() {
2697        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2698
2699        assert_eq!(cigar[0], Cigar::Match(100));
2700        assert_eq!(format!("{}", cigar), "100M10S");
2701        for op in &cigar {
2702            println!("{}", op);
2703        }
2704    }
2705
2706    #[test]
2707    fn test_cigar_string_view_pos() {
2708        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2709        assert_eq!(cigar.pos(), 5);
2710    }
2711
2712    #[test]
2713    fn test_cigar_string_leading_softclips() {
2714        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2715        assert_eq!(cigar.leading_softclips(), 10);
2716        let cigar2 = CigarString(vec![
2717            Cigar::HardClip(5),
2718            Cigar::SoftClip(10),
2719            Cigar::Match(100),
2720        ])
2721        .into_view(0);
2722        assert_eq!(cigar2.leading_softclips(), 10);
2723    }
2724
2725    #[test]
2726    fn test_cigar_string_trailing_softclips() {
2727        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2728        assert_eq!(cigar.trailing_softclips(), 10);
2729        let cigar2 = CigarString(vec![
2730            Cigar::Match(100),
2731            Cigar::SoftClip(10),
2732            Cigar::HardClip(5),
2733        ])
2734        .into_view(0);
2735        assert_eq!(cigar2.trailing_softclips(), 10);
2736    }
2737
2738    #[test]
2739    fn test_cigar_read_pos() {
2740        let vpos = 5; // variant position
2741
2742        // Ignore leading HardClip
2743        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2744        // var:                       V
2745        // c01: 7H                 M  M
2746        // qpos:                  00 01
2747        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2748        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2749
2750        // Skip leading SoftClip or use as pre-POS matches
2751        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2752        // var:                       V
2753        // c02: 5H2S         M  M  M  M  M  M
2754        // qpos:  00        02 03 04 05 06 07
2755        // c02: 5H     S  S  M  M  M  M  M  M
2756        // qpos:      00 01 02 03 04 05 06 07
2757        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2758        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2759        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2760
2761        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2762        // pre-POS matches
2763        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2764        // var:                       V
2765        // c03:  3S                      M  M
2766        // qpos: 00                     03 04
2767        // c03:                 S  S  S  M  M
2768        // qpos:               00 01 02 03 04
2769        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2770        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2771        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2772
2773        // Skip leading Insertion before variant position
2774        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2775        // var:                       V
2776        // c04:  3I                X  X  X
2777        // qpos: 00               03 04 05
2778        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2779        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2780
2781        // Matches and deletion before variant position
2782        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2783        // var:                       V
2784        // c05:        =  =  D  D  X  =  =
2785        // qpos:      00 01       02 03 04 05
2786        let c05 = CigarString(vec![
2787            Cigar::Equal(2),
2788            Cigar::Del(2),
2789            Cigar::Diff(1),
2790            Cigar::Equal(2),
2791        ])
2792        .into_view(0);
2793        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2794
2795        // single nucleotide Deletion covering variant position
2796        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2797        // var:                       V
2798        // c06:                 =  =  D  X  X
2799        // qpos:               00 01    02 03
2800        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2801        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2802        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2803
2804        // three nucleotide Deletion covering variant position
2805        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2806        // var:                       V
2807        // c07:              =  =  D  D  D  M  M
2808        // qpos:            00 01          02 03
2809        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2810        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2811        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2812
2813        // three nucleotide RefSkip covering variant position
2814        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2815        // var:                       V
2816        // c08:              =  X  N  N  N  M  M
2817        // qpos:            00 01          02 03
2818        let c08 = CigarString(vec![
2819            Cigar::Equal(1),
2820            Cigar::Diff(1),
2821            Cigar::RefSkip(3),
2822            Cigar::Match(2),
2823        ])
2824        .into_view(2);
2825        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2826        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2827
2828        // internal hard clip before variant pos
2829        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2830        // var:                          V
2831        // c09: 3H           =  = 3H  =  =
2832        // qpos:            00 01    02 03
2833        let c09 = CigarString(vec![
2834            Cigar::HardClip(3),
2835            Cigar::Equal(2),
2836            Cigar::HardClip(3),
2837            Cigar::Equal(2),
2838        ])
2839        .into_view(2);
2840        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2841
2842        // Deletion right before variant position
2843        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2844        // var:                       V
2845        // c10:           M  M  D  D  M  M
2846        // qpos:         00 01       02 03
2847        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2848        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2849
2850        // Insertion right before variant position
2851        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2852        // var:                          V
2853        // c11:                 M  M 3I  M
2854        // qpos:               00 01 02 05 06
2855        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2856        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2857
2858        // Insertion right after variant position
2859        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2860        // var:                       V
2861        // c12:                 M  M  M 2I  =
2862        // qpos:               00 01 02 03 05
2863        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2864        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2865
2866        // Deletion right after variant position
2867        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2868        // var:                       V
2869        // c13:                 M  M  M  D  =
2870        // qpos:               00 01 02    03
2871        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2872        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2873
2874        // A messy and complicated example, including a Pad operation
2875        let vpos2 = 15;
2876        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2877        // var:                                                           V
2878        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2879        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2880        let c14 = CigarString(vec![
2881            Cigar::HardClip(5),
2882            Cigar::SoftClip(3),
2883            Cigar::Equal(1),
2884            Cigar::Pad(2),
2885            Cigar::Match(1),
2886            Cigar::Diff(1),
2887            Cigar::Ins(3),
2888            Cigar::Match(2),
2889            Cigar::Del(1),
2890            Cigar::Ins(2),
2891            Cigar::Equal(2),
2892            Cigar::RefSkip(3),
2893            Cigar::Match(3),
2894            Cigar::Equal(2),
2895            Cigar::SoftClip(5),
2896            Cigar::HardClip(2),
2897        ])
2898        .into_view(0);
2899        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2900
2901        // HardClip after Pad
2902        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2903        // var:                       V
2904        // c15: 5P1H            =  =  =
2905        // qpos:               00 01 02
2906        let c15 =
2907            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2908        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2909
2910        // only HardClip and Pad operations
2911        // c16: 7H5P2H
2912        let c16 =
2913            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2914        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2915    }
2916
2917    #[test]
2918    fn test_clone() {
2919        let mut rec = Record::new();
2920        rec.set_pos(300);
2921        rec.set_qname(b"read1");
2922        let clone = rec.clone();
2923        assert_eq!(rec, clone);
2924    }
2925
2926    #[test]
2927    fn test_flags() {
2928        let mut rec = Record::new();
2929
2930        rec.set_paired();
2931        assert_eq!(rec.is_paired(), true);
2932
2933        rec.set_supplementary();
2934        assert_eq!(rec.is_supplementary(), true);
2935        assert_eq!(rec.is_supplementary(), true);
2936
2937        rec.unset_paired();
2938        assert_eq!(rec.is_paired(), false);
2939        assert_eq!(rec.is_supplementary(), true);
2940
2941        rec.unset_supplementary();
2942        assert_eq!(rec.is_paired(), false);
2943        assert_eq!(rec.is_supplementary(), false);
2944    }
2945
2946    #[test]
2947    fn test_cigar_parse() {
2948        let cigar = "1S20M1D2I3X1=2H";
2949        let parsed = CigarString::try_from(cigar).unwrap();
2950        assert_eq!(parsed.to_string(), cigar);
2951    }
2952}
2953
2954#[cfg(test)]
2955mod alignment_cigar_tests {
2956    use super::*;
2957    use crate::bam::{Read, Reader};
2958    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2959    use bio_types::alignment::{Alignment, AlignmentMode};
2960
2961    #[test]
2962    fn test_cigar() {
2963        let alignment = Alignment {
2964            score: 5,
2965            xstart: 3,
2966            ystart: 0,
2967            xend: 9,
2968            yend: 10,
2969            ylen: 10,
2970            xlen: 10,
2971            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2972            mode: AlignmentMode::Semiglobal,
2973        };
2974        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2975        assert_eq!(
2976            CigarString::from_alignment(&alignment, false).0,
2977            vec![
2978                Cigar::SoftClip(3),
2979                Cigar::Equal(3),
2980                Cigar::Diff(1),
2981                Cigar::Ins(2),
2982                Cigar::Del(2),
2983                Cigar::SoftClip(1),
2984            ]
2985        );
2986
2987        let alignment = Alignment {
2988            score: 5,
2989            xstart: 0,
2990            ystart: 5,
2991            xend: 4,
2992            yend: 10,
2993            ylen: 10,
2994            xlen: 5,
2995            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2996            mode: AlignmentMode::Custom,
2997        };
2998        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2999        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
3000        assert_eq!(
3001            CigarString::from_alignment(&alignment, false).0,
3002            vec![
3003                Cigar::Equal(1),
3004                Cigar::Diff(2),
3005                Cigar::Ins(1),
3006                Cigar::Del(2),
3007                Cigar::SoftClip(1),
3008            ]
3009        );
3010        assert_eq!(
3011            CigarString::from_alignment(&alignment, true).0,
3012            vec![
3013                Cigar::Equal(1),
3014                Cigar::Diff(2),
3015                Cigar::Ins(1),
3016                Cigar::Del(2),
3017                Cigar::HardClip(1),
3018            ]
3019        );
3020
3021        let alignment = Alignment {
3022            score: 5,
3023            xstart: 0,
3024            ystart: 5,
3025            xend: 3,
3026            yend: 8,
3027            ylen: 10,
3028            xlen: 3,
3029            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
3030            mode: AlignmentMode::Custom,
3031        };
3032        assert_eq!(alignment.cigar(false), "1X1=1X");
3033        assert_eq!(
3034            CigarString::from_alignment(&alignment, false).0,
3035            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3036        );
3037
3038        let alignment = Alignment {
3039            score: 5,
3040            xstart: 0,
3041            ystart: 5,
3042            xend: 3,
3043            yend: 8,
3044            ylen: 10,
3045            xlen: 3,
3046            operations: vec![Subst, Match, Subst],
3047            mode: AlignmentMode::Semiglobal,
3048        };
3049        assert_eq!(alignment.cigar(false), "1X1=1X");
3050        assert_eq!(
3051            CigarString::from_alignment(&alignment, false).0,
3052            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
3053        );
3054    }
3055
3056    #[test]
3057    fn test_read_orientation_f1r2() {
3058        let mut bam = Reader::from_path("test/test_paired.sam").unwrap();
3059
3060        for res in bam.records() {
3061            let record = res.unwrap();
3062            assert_eq!(
3063                record.read_pair_orientation(),
3064                SequenceReadPairOrientation::F1R2
3065            );
3066        }
3067    }
3068
3069    #[test]
3070    fn test_read_orientation_f2r1() {
3071        let mut bam = Reader::from_path("test/test_nonstandard_orientation.sam").unwrap();
3072
3073        for res in bam.records() {
3074            let record = res.unwrap();
3075            assert_eq!(
3076                record.read_pair_orientation(),
3077                SequenceReadPairOrientation::F2R1
3078            );
3079        }
3080    }
3081
3082    #[test]
3083    fn test_read_orientation_supplementary() {
3084        let mut bam = Reader::from_path("test/test_orientation_supplementary.sam").unwrap();
3085
3086        for res in bam.records() {
3087            let record = res.unwrap();
3088            assert_eq!(
3089                record.read_pair_orientation(),
3090                SequenceReadPairOrientation::F2R1
3091            );
3092        }
3093    }
3094
3095    #[test]
3096    pub fn test_cigar_parsing_non_ascii_error() {
3097        let cigar_str = "43ጷ";
3098        let expected_error = Err(Error::BamParseCigar {
3099                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
3100            });
3101
3102        let result = CigarString::try_from(cigar_str);
3103        assert_eq!(expected_error, result);
3104    }
3105
3106    #[test]
3107    pub fn test_cigar_parsing() {
3108        // parsing test cases
3109        let cigar_strs = [
3110            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
3111            "100M",                          // test a single op
3112            "",                              // test empty input
3113            "1H1=1H",                        // test simple hardclip
3114            "1S1=1S",                        // test simple softclip
3115            "11H11S11=11S11H",               // test complex softclip
3116            "10H",
3117            "10S",
3118        ];
3119        // expected results
3120        let cigars = [
3121            CigarString(vec![
3122                Cigar::HardClip(1),
3123                Cigar::Match(10),
3124                Cigar::Del(4),
3125                Cigar::Ins(100),
3126                Cigar::RefSkip(300),
3127                Cigar::Equal(1102),
3128                Cigar::Pad(10),
3129                Cigar::Diff(25),
3130                Cigar::SoftClip(11),
3131            ]),
3132            CigarString(vec![Cigar::Match(100)]),
3133            CigarString(vec![]),
3134            CigarString(vec![
3135                Cigar::HardClip(1),
3136                Cigar::Equal(1),
3137                Cigar::HardClip(1),
3138            ]),
3139            CigarString(vec![
3140                Cigar::SoftClip(1),
3141                Cigar::Equal(1),
3142                Cigar::SoftClip(1),
3143            ]),
3144            CigarString(vec![
3145                Cigar::HardClip(11),
3146                Cigar::SoftClip(11),
3147                Cigar::Equal(11),
3148                Cigar::SoftClip(11),
3149                Cigar::HardClip(11),
3150            ]),
3151            CigarString(vec![Cigar::HardClip(10)]),
3152            CigarString(vec![Cigar::SoftClip(10)]),
3153        ];
3154        // compare
3155        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
3156            let cigar_parse = CigarString::try_from(cigar_str)
3157                .unwrap_or_else(|_| panic!("Unable to parse cigar: {}", cigar_str));
3158            assert_eq!(&cigar_parse, truth);
3159        }
3160    }
3161}
3162
3163#[cfg(test)]
3164mod basemod_tests {
3165    use crate::bam::{Read, Reader};
3166
3167    #[test]
3168    pub fn test_count_recorded() {
3169        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3170
3171        for r in bam.records() {
3172            let record = r.unwrap();
3173            if let Ok(mods) = record.basemods_iter() {
3174                let n = mods.recorded().len();
3175                assert_eq!(n, 3);
3176            };
3177        }
3178    }
3179
3180    #[test]
3181    pub fn test_query_type() {
3182        let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
3183
3184        let mut n_fwd = 0;
3185        let mut n_rev = 0;
3186
3187        for r in bam.records() {
3188            let record = r.unwrap();
3189            if let Ok(mods) = record.basemods_iter() {
3190                for mod_code in mods.recorded() {
3191                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
3192                        if mod_metadata.strand == 0 {
3193                            n_fwd += 1;
3194                        }
3195                        if mod_metadata.strand == 1 {
3196                            n_rev += 1;
3197                        }
3198                    }
3199                }
3200            };
3201        }
3202        assert_eq!(n_fwd, 2);
3203        assert_eq!(n_rev, 2);
3204    }
3205
3206    #[test]
3207    pub fn test_mod_iter() {
3208        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3209        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3210        let mut i = 0;
3211
3212        for r in bam.records() {
3213            let record = r.unwrap();
3214            for res in record.basemods_iter().unwrap().flatten() {
3215                let (position, _m) = res;
3216                assert_eq!(position, expected_positions[i]);
3217                i += 1;
3218            }
3219        }
3220    }
3221
3222    #[test]
3223    pub fn test_position_iter() {
3224        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3225        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3226        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3227        let mut i = 0;
3228
3229        for r in bam.records() {
3230            let record = r.unwrap();
3231            for res in record.basemods_position_iter().unwrap().flatten() {
3232                let (position, elements) = res;
3233                assert_eq!(position, expected_positions[i]);
3234                assert_eq!(elements.len(), expected_counts[i]);
3235                i += 1;
3236            }
3237        }
3238    }
3239}