rust_htslib/bam/
record.rs

1// Copyright 2014 Christopher Schröder, Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::convert::TryFrom;
7use std::convert::TryInto;
8use std::ffi;
9use std::fmt;
10use std::marker::PhantomData;
11use std::mem::{size_of, MaybeUninit};
12use std::ops;
13use std::os::raw::c_char;
14use std::rc::Rc;
15use std::slice;
16use std::str;
17
18use byteorder::{LittleEndian, ReadBytesExt};
19
20use crate::bam::Error;
21use crate::bam::HeaderView;
22use crate::errors::Result;
23use crate::htslib;
24use crate::utils;
25#[cfg(feature = "serde_feature")]
26use serde::{self, Deserialize, Serialize};
27
28use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
29use bio_types::genome;
30use bio_types::sequence::SequenceRead;
31use bio_types::sequence::SequenceReadPairOrientation;
32use bio_types::strand::ReqStrand;
33
34/// A macro creating methods for flag access.
35macro_rules! flag {
36    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
37        pub fn $get(&self) -> bool {
38            self.inner().core.flag & $bit != 0
39        }
40
41        pub fn $set(&mut self) {
42            self.inner_mut().core.flag |= $bit;
43        }
44
45        pub fn $unset(&mut self) {
46            self.inner_mut().core.flag &= !$bit;
47        }
48    };
49}
50
51/// A BAM record.
52pub struct Record {
53    pub inner: htslib::bam1_t,
54    own: bool,
55    cigar: Option<CigarStringView>,
56    header: Option<Rc<HeaderView>>,
57}
58
59unsafe impl Send for Record {}
60unsafe impl Sync for Record {}
61
62impl Clone for Record {
63    fn clone(&self) -> Self {
64        let mut copy = Record::new();
65        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
66        copy
67    }
68}
69
70impl PartialEq for Record {
71    fn eq(&self, other: &Record) -> bool {
72        self.tid() == other.tid()
73            && self.pos() == other.pos()
74            && self.bin() == other.bin()
75            && self.mapq() == other.mapq()
76            && self.flags() == other.flags()
77            && self.mtid() == other.mtid()
78            && self.mpos() == other.mpos()
79            && self.insert_size() == other.insert_size()
80            && self.data() == other.data()
81            && self.inner().core.l_extranul == other.inner().core.l_extranul
82    }
83}
84
85impl Eq for Record {}
86
87impl fmt::Debug for Record {
88    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
89        fmt.write_fmt(format_args!(
90            "Record(tid: {}, pos: {})",
91            self.tid(),
92            self.pos()
93        ))
94    }
95}
96
97impl Default for Record {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103#[inline]
104fn extranul_from_qname(qname: &[u8]) -> usize {
105    let qlen = qname.len() + 1;
106    if qlen % 4 != 0 {
107        4 - qlen % 4
108    } else {
109        0
110    }
111}
112
113impl Record {
114    /// Create an empty BAM record.
115    pub fn new() -> Self {
116        let mut record = Record {
117            inner: unsafe { MaybeUninit::zeroed().assume_init() },
118            own: true,
119            cigar: None,
120            header: None,
121        };
122        // The read/query name needs to be set as empty to properly initialize
123        // the record
124        record.set_qname(b"");
125        // Developer note: these are needed so the returned record is properly
126        // initialized as unmapped.
127        record.set_unmapped();
128        record.set_tid(-1);
129        record.set_pos(-1);
130        record.set_mpos(-1);
131        record.set_mtid(-1);
132        record
133    }
134
135    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
136        Record {
137            inner: {
138                #[allow(clippy::uninit_assumed_init, invalid_value)]
139                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
140                unsafe {
141                    ::libc::memcpy(
142                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
143                        from as *const ::libc::c_void,
144                        size_of::<htslib::bam1_t>(),
145                    );
146                }
147                inner
148            },
149            own: false,
150            cigar: None,
151            header: None,
152        }
153    }
154
155    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
156    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
157        let mut record = Self::new();
158
159        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
160        sam_copy.extend(sam);
161        sam_copy.push(0);
162
163        let mut sam_string = htslib::kstring_t {
164            s: sam_copy.as_ptr() as *mut c_char,
165            l: sam_copy.len(),
166            m: sam_copy.len(),
167        };
168
169        let succ = unsafe {
170            htslib::sam_parse1(
171                &mut sam_string,
172                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
173                record.inner_ptr_mut(),
174            )
175        };
176
177        if succ == 0 {
178            Ok(record)
179        } else {
180            Err(Error::BamParseSAM {
181                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
182            })
183        }
184    }
185
186    pub fn set_header(&mut self, header: Rc<HeaderView>) {
187        self.header = Some(header);
188    }
189
190    pub(super) fn data(&self) -> &[u8] {
191        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
192    }
193
194    #[inline]
195    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
196        &mut self.inner
197    }
198
199    #[inline]
200    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
201        &mut self.inner as *mut htslib::bam1_t
202    }
203
204    #[inline]
205    pub fn inner(&self) -> &htslib::bam1_t {
206        &self.inner
207    }
208
209    #[inline]
210    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
211        &self.inner as *const htslib::bam1_t
212    }
213
214    /// Get target id.
215    pub fn tid(&self) -> i32 {
216        self.inner().core.tid
217    }
218
219    /// Set target id.
220    pub fn set_tid(&mut self, tid: i32) {
221        self.inner_mut().core.tid = tid;
222    }
223
224    /// Get position (0-based).
225    pub fn pos(&self) -> i64 {
226        self.inner().core.pos
227    }
228
229    /// Set position (0-based).
230    pub fn set_pos(&mut self, pos: i64) {
231        self.inner_mut().core.pos = pos;
232    }
233
234    pub fn bin(&self) -> u16 {
235        self.inner().core.bin
236    }
237
238    pub fn set_bin(&mut self, bin: u16) {
239        self.inner_mut().core.bin = bin;
240    }
241
242    /// Get MAPQ.
243    pub fn mapq(&self) -> u8 {
244        self.inner().core.qual
245    }
246
247    /// Set MAPQ.
248    pub fn set_mapq(&mut self, mapq: u8) {
249        self.inner_mut().core.qual = mapq;
250    }
251
252    /// Get strand information from record flags.
253    pub fn strand(&self) -> ReqStrand {
254        let reverse = self.flags() & 0x10 != 0;
255        if reverse {
256            ReqStrand::Reverse
257        } else {
258            ReqStrand::Forward
259        }
260    }
261
262    /// Get raw flags.
263    pub fn flags(&self) -> u16 {
264        self.inner().core.flag
265    }
266
267    /// Set raw flags.
268    pub fn set_flags(&mut self, flags: u16) {
269        self.inner_mut().core.flag = flags;
270    }
271
272    /// Unset all flags.
273    pub fn unset_flags(&mut self) {
274        self.inner_mut().core.flag = 0;
275    }
276
277    /// Get target id of mate.
278    pub fn mtid(&self) -> i32 {
279        self.inner().core.mtid
280    }
281
282    /// Set target id of mate.
283    pub fn set_mtid(&mut self, mtid: i32) {
284        self.inner_mut().core.mtid = mtid;
285    }
286
287    /// Get mate position.
288    pub fn mpos(&self) -> i64 {
289        self.inner().core.mpos
290    }
291
292    /// Set mate position.
293    pub fn set_mpos(&mut self, mpos: i64) {
294        self.inner_mut().core.mpos = mpos;
295    }
296
297    /// Get insert size.
298    pub fn insert_size(&self) -> i64 {
299        self.inner().core.isize_
300    }
301
302    /// Set insert size.
303    pub fn set_insert_size(&mut self, insert_size: i64) {
304        self.inner_mut().core.isize_ = insert_size;
305    }
306
307    fn qname_capacity(&self) -> usize {
308        self.inner().core.l_qname as usize
309    }
310
311    fn qname_len(&self) -> usize {
312        // discount all trailing zeros (the default one and extra nulls)
313        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
314    }
315
316    /// Get qname (read name). Complexity: O(1).
317    pub fn qname(&self) -> &[u8] {
318        &self.data()[..self.qname_len()]
319    }
320
321    /// Set the variable length data buffer
322    pub fn set_data(&mut self, new_data: &[u8]) {
323        self.cigar = None;
324
325        self.inner_mut().l_data = new_data.len() as i32;
326        if (self.inner().m_data as i32) < self.inner().l_data {
327            // Verbosity due to lexical borrowing
328            let l_data = self.inner().l_data;
329            self.realloc_var_data(l_data as usize);
330        }
331
332        // Copy new data into buffer
333        let data =
334            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
335        utils::copy_memory(new_data, data);
336    }
337
338    /// Set variable length data (qname, cigar, seq, qual).
339    /// The aux data is left unchanged.
340    /// `qual` is Phred-scaled quality values, without any offset.
341    /// NOTE: seq.len() must equal qual.len() or this method
342    /// will panic. If you don't have quality values use
343    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
344    /// be recognized as missing QVs by `samtools`.
345    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
346        assert!(qname.len() < 255);
347        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
348
349        self.cigar = None;
350
351        let cigar_width = if let Some(cigar_string) = cigar {
352            cigar_string.len()
353        } else {
354            0
355        } * 4;
356        let q_len = qname.len() + 1;
357        let extranul = extranul_from_qname(qname);
358
359        let orig_aux_offset = self.qname_capacity()
360            + 4 * self.cigar_len()
361            + self.seq_len().div_ceil(2)
362            + self.seq_len();
363        let new_aux_offset = q_len + extranul + cigar_width + seq.len().div_ceil(2) + qual.len();
364        assert!(orig_aux_offset <= self.inner.l_data as usize);
365        let aux_len = self.inner.l_data as usize - orig_aux_offset;
366        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
367        if (self.inner().m_data as i32) < self.inner().l_data {
368            // Verbosity due to lexical borrowing
369            let l_data = self.inner().l_data;
370            self.realloc_var_data(l_data as usize);
371        }
372
373        // Copy the aux data.
374        if aux_len > 0 && orig_aux_offset != new_aux_offset {
375            let data =
376                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
377            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
378        }
379
380        let data =
381            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
382
383        // qname
384        utils::copy_memory(qname, data);
385        for i in 0..=extranul {
386            data[qname.len() + i] = b'\0';
387        }
388        let mut i = q_len + extranul;
389        self.inner_mut().core.l_qname = i as u16;
390        self.inner_mut().core.l_extranul = extranul as u8;
391
392        // cigar
393        if let Some(cigar_string) = cigar {
394            let cigar_data = unsafe {
395                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
396                #[allow(clippy::cast_ptr_alignment)]
397                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
398            };
399            for (i, c) in cigar_string.iter().enumerate() {
400                cigar_data[i] = c.encode();
401            }
402            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
403            i += cigar_string.len() * 4;
404        } else {
405            self.inner_mut().core.n_cigar = 0;
406        };
407
408        // seq
409        {
410            for j in (0..seq.len()).step_by(2) {
411                data[i + j / 2] = (ENCODE_BASE[seq[j] as usize] << 4)
412                    | (if j + 1 < seq.len() {
413                        ENCODE_BASE[seq[j + 1] as usize]
414                    } else {
415                        0
416                    });
417            }
418            self.inner_mut().core.l_qseq = seq.len() as i32;
419            i += seq.len().div_ceil(2);
420        }
421
422        // qual
423        utils::copy_memory(qual, &mut data[i..]);
424    }
425
426    /// Replace current qname with a new one.
427    pub fn set_qname(&mut self, new_qname: &[u8]) {
428        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
429        assert!(new_qname.len() < 252);
430
431        let old_q_len = self.qname_capacity();
432        // We're going to add a terminal NUL
433        let extranul = extranul_from_qname(new_qname);
434        let new_q_len = new_qname.len() + 1 + extranul;
435
436        // Length of data after qname
437        let other_len = self.inner_mut().l_data - old_q_len as i32;
438
439        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
440            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
441        } else if new_q_len > old_q_len {
442            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
443
444            // Reallocate if necessary
445            if (self.inner().m_data as i32) < self.inner().l_data {
446                // Verbosity due to lexical borrowing
447                let l_data = self.inner().l_data;
448                self.realloc_var_data(l_data as usize);
449            }
450        }
451
452        if new_q_len != old_q_len {
453            // Move other data to new location
454            unsafe {
455                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
456
457                ::libc::memmove(
458                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
459                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
460                    other_len as usize,
461                );
462            }
463        }
464
465        // Copy qname data
466        let data =
467            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
468        utils::copy_memory(new_qname, data);
469        for i in 0..=extranul {
470            data[new_q_len - i - 1] = b'\0';
471        }
472        self.inner_mut().core.l_qname = new_q_len as u16;
473        self.inner_mut().core.l_extranul = extranul as u8;
474    }
475
476    /// Replace current cigar with a new one.
477    pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
478        self.cigar = None;
479
480        let qname_data_len = self.qname_capacity();
481        let old_cigar_data_len = self.cigar_len() * 4;
482
483        // Length of data after cigar
484        let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
485
486        let new_cigar_len = match new_cigar {
487            Some(x) => x.len(),
488            None => 0,
489        };
490        let new_cigar_data_len = new_cigar_len * 4;
491
492        if new_cigar_data_len < old_cigar_data_len {
493            self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
494        } else if new_cigar_data_len > old_cigar_data_len {
495            self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
496
497            // Reallocate if necessary
498            if (self.inner().m_data as i32) < self.inner().l_data {
499                // Verbosity due to lexical borrowing
500                let l_data = self.inner().l_data;
501                self.realloc_var_data(l_data as usize);
502            }
503        }
504
505        if new_cigar_data_len != old_cigar_data_len {
506            // Move other data to new location
507            unsafe {
508                ::libc::memmove(
509                    self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
510                    self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
511                    other_data_len as usize,
512                );
513            }
514        }
515
516        // Copy cigar data
517        if let Some(cigar_string) = new_cigar {
518            let cigar_data = unsafe {
519                #[allow(clippy::cast_ptr_alignment)]
520                slice::from_raw_parts_mut(
521                    self.inner.data.add(qname_data_len) as *mut u32,
522                    cigar_string.len(),
523                )
524            };
525            for (i, c) in cigar_string.iter().enumerate() {
526                cigar_data[i] = c.encode();
527            }
528        }
529        self.inner_mut().core.n_cigar = new_cigar_len as u32;
530    }
531
532    fn realloc_var_data(&mut self, new_len: usize) {
533        // pad request
534        let new_len = new_len as u32;
535        let new_request = new_len + 32 - (new_len % 32);
536
537        let ptr = unsafe {
538            ::libc::realloc(
539                self.inner().data as *mut ::libc::c_void,
540                new_request as usize,
541            ) as *mut u8
542        };
543
544        if ptr.is_null() {
545            panic!("ran out of memory in rust_htslib trying to realloc");
546        }
547
548        // don't update m_data until we know we have
549        // a successful allocation.
550        self.inner_mut().m_data = new_request;
551        self.inner_mut().data = ptr;
552
553        // we now own inner.data
554        self.own = true;
555    }
556
557    pub fn cigar_len(&self) -> usize {
558        self.inner().core.n_cigar as usize
559    }
560
561    /// Get reference to raw cigar string representation (as stored in BAM file).
562    /// Usually, the method `Record::cigar` should be used instead.
563    pub fn raw_cigar(&self) -> &[u32] {
564        //cigar is always aligned to 4 bytes - so this is safe
565        #[allow(clippy::cast_ptr_alignment)]
566        unsafe {
567            slice::from_raw_parts(
568                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
569                self.cigar_len(),
570            )
571        }
572    }
573
574    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
575    pub fn cigar(&self) -> CigarStringView {
576        match self.cigar {
577            Some(ref c) => c.clone(),
578            None => self.unpack_cigar(),
579        }
580    }
581
582    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
583    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
584        self.cigar.as_ref()
585    }
586
587    /// Decode the cigar string and cache it inside the `Record`
588    pub fn cache_cigar(&mut self) {
589        self.cigar = Some(self.unpack_cigar())
590    }
591
592    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
593    fn unpack_cigar(&self) -> CigarStringView {
594        CigarString(
595            self.raw_cigar()
596                .iter()
597                .map(|&c| {
598                    let len = c >> 4;
599                    match c & 0b1111 {
600                        0 => Cigar::Match(len),
601                        1 => Cigar::Ins(len),
602                        2 => Cigar::Del(len),
603                        3 => Cigar::RefSkip(len),
604                        4 => Cigar::SoftClip(len),
605                        5 => Cigar::HardClip(len),
606                        6 => Cigar::Pad(len),
607                        7 => Cigar::Equal(len),
608                        8 => Cigar::Diff(len),
609                        _ => panic!("Unexpected cigar operation"),
610                    }
611                })
612                .collect(),
613        )
614        .into_view(self.pos())
615    }
616
617    pub fn seq_len(&self) -> usize {
618        self.inner().core.l_qseq as usize
619    }
620
621    fn seq_data(&self) -> &[u8] {
622        let offset = self.qname_capacity() + self.cigar_len() * 4;
623        &self.data()[offset..][..self.seq_len().div_ceil(2)]
624    }
625
626    /// Get read sequence. Complexity: O(1).
627    pub fn seq(&self) -> Seq<'_> {
628        Seq {
629            encoded: self.seq_data(),
630            len: self.seq_len(),
631        }
632    }
633
634    /// Get base qualities (PHRED-scaled probability that base is wrong).
635    /// This does not entail any offsets, hence the qualities can be used directly without
636    /// e.g. subtracting 33. Complexity: O(1).
637    pub fn qual(&self) -> &[u8] {
638        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + self.seq_len().div_ceil(2)..]
639            [..self.seq_len()]
640    }
641
642    /// Look up an auxiliary field by its tag.
643    ///
644    /// Only the first two bytes of a given tag are used for the look-up of a field.
645    /// See [`Aux`] for more details.
646    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
647        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
648        let aux = unsafe {
649            htslib::bam_aux_get(
650                &self.inner as *const htslib::bam1_t,
651                c_str.as_ptr() as *mut c_char,
652            )
653        };
654        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
655    }
656
657    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
658        const TAG_LEN: isize = 2;
659        // Used for skipping type identifier
660        const TYPE_ID_LEN: isize = 1;
661
662        if aux.is_null() {
663            return Err(Error::BamAuxTagNotFound);
664        }
665
666        let (data, type_size) = match *aux {
667            b'A' => {
668                let type_size = size_of::<u8>();
669                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
670            }
671            b'c' => {
672                let type_size = size_of::<i8>();
673                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
674            }
675            b'C' => {
676                let type_size = size_of::<u8>();
677                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
678            }
679            b's' => {
680                let type_size = size_of::<i16>();
681                (
682                    Aux::I16(
683                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
684                            .read_i16::<LittleEndian>()
685                            .map_err(|_| Error::BamAuxParsingError)?,
686                    ),
687                    type_size,
688                )
689            }
690            b'S' => {
691                let type_size = size_of::<u16>();
692                (
693                    Aux::U16(
694                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
695                            .read_u16::<LittleEndian>()
696                            .map_err(|_| Error::BamAuxParsingError)?,
697                    ),
698                    type_size,
699                )
700            }
701            b'i' => {
702                let type_size = size_of::<i32>();
703                (
704                    Aux::I32(
705                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
706                            .read_i32::<LittleEndian>()
707                            .map_err(|_| Error::BamAuxParsingError)?,
708                    ),
709                    type_size,
710                )
711            }
712            b'I' => {
713                let type_size = size_of::<u32>();
714                (
715                    Aux::U32(
716                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
717                            .read_u32::<LittleEndian>()
718                            .map_err(|_| Error::BamAuxParsingError)?,
719                    ),
720                    type_size,
721                )
722            }
723            b'f' => {
724                let type_size = size_of::<f32>();
725                (
726                    Aux::Float(
727                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
728                            .read_f32::<LittleEndian>()
729                            .map_err(|_| Error::BamAuxParsingError)?,
730                    ),
731                    type_size,
732                )
733            }
734            b'd' => {
735                let type_size = size_of::<f64>();
736                (
737                    Aux::Double(
738                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
739                            .read_f64::<LittleEndian>()
740                            .map_err(|_| Error::BamAuxParsingError)?,
741                    ),
742                    type_size,
743                )
744            }
745            b'Z' | b'H' => {
746                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
747                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
748                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
749            }
750            b'B' => {
751                const ARRAY_INNER_TYPE_LEN: isize = 1;
752                const ARRAY_COUNT_LEN: isize = 4;
753
754                // Used for skipping metadata
755                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
756
757                let length =
758                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
759                        .read_u32::<LittleEndian>()
760                        .map_err(|_| Error::BamAuxParsingError)? as usize;
761
762                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
763                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
764                    b'c' => (
765                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
766                            aux.offset(array_data_offset),
767                            length,
768                        ))),
769                        length,
770                    ),
771                    b'C' => (
772                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
773                            aux.offset(array_data_offset),
774                            length,
775                        ))),
776                        length,
777                    ),
778                    b's' => (
779                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
780                            aux.offset(array_data_offset),
781                            length * size_of::<i16>(),
782                        ))),
783                        length * std::mem::size_of::<i16>(),
784                    ),
785                    b'S' => (
786                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
787                            aux.offset(array_data_offset),
788                            length * size_of::<u16>(),
789                        ))),
790                        length * std::mem::size_of::<u16>(),
791                    ),
792                    b'i' => (
793                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
794                            aux.offset(array_data_offset),
795                            length * size_of::<i32>(),
796                        ))),
797                        length * std::mem::size_of::<i32>(),
798                    ),
799                    b'I' => (
800                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
801                            aux.offset(array_data_offset),
802                            length * size_of::<u32>(),
803                        ))),
804                        length * std::mem::size_of::<u32>(),
805                    ),
806                    b'f' => (
807                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
808                            aux.offset(array_data_offset),
809                            length * size_of::<f32>(),
810                        ))),
811                        length * std::mem::size_of::<f32>(),
812                    ),
813                    _ => {
814                        return Err(Error::BamAuxUnknownType);
815                    }
816                };
817                (
818                    array_data,
819                    // Offset: array-specific metadata + array size
820                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
821                )
822            }
823            _ => {
824                return Err(Error::BamAuxUnknownType);
825            }
826        };
827
828        // Offset: metadata + type size
829        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
830    }
831
832    /// Returns an iterator over the auxiliary fields of the record.
833    ///
834    /// When an error occurs, the `Err` variant will be returned
835    /// and the iterator will not be able to advance anymore.
836    pub fn aux_iter(&self) -> AuxIter {
837        AuxIter {
838            // In order to get to the aux data section of a `bam::Record`
839            // we need to skip fields in front of it
840            aux: &self.data()[
841                // NUL terminated read name:
842                self.qname_capacity()
843                // CIGAR (uint32_t):
844                + self.cigar_len() * std::mem::size_of::<u32>()
845                // Read sequence (4-bit encoded):
846                + self.seq_len().div_ceil(2)
847                // Base qualities (char):
848                + self.seq_len()..],
849        }
850    }
851
852    /// Add auxiliary data.
853    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
854        // Don't allow pushing aux data when the given tag is already present in the record.
855        // `htslib` seems to allow this (for non-array values), which can lead to problems
856        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
857        if self.aux(tag).is_ok() {
858            return Err(Error::BamAuxTagAlreadyPresent);
859        }
860
861        let ctag = tag.as_ptr() as *mut c_char;
862        let ret = unsafe {
863            match value {
864                Aux::Char(v) => htslib::bam_aux_append(
865                    self.inner_ptr_mut(),
866                    ctag,
867                    b'A' as c_char,
868                    size_of::<u8>() as i32,
869                    [v].as_mut_ptr(),
870                ),
871                Aux::I8(v) => htslib::bam_aux_append(
872                    self.inner_ptr_mut(),
873                    ctag,
874                    b'c' as c_char,
875                    size_of::<i8>() as i32,
876                    [v].as_mut_ptr() as *mut u8,
877                ),
878                Aux::U8(v) => htslib::bam_aux_append(
879                    self.inner_ptr_mut(),
880                    ctag,
881                    b'C' as c_char,
882                    size_of::<u8>() as i32,
883                    [v].as_mut_ptr(),
884                ),
885                Aux::I16(v) => htslib::bam_aux_append(
886                    self.inner_ptr_mut(),
887                    ctag,
888                    b's' as c_char,
889                    size_of::<i16>() as i32,
890                    [v].as_mut_ptr() as *mut u8,
891                ),
892                Aux::U16(v) => htslib::bam_aux_append(
893                    self.inner_ptr_mut(),
894                    ctag,
895                    b'S' as c_char,
896                    size_of::<u16>() as i32,
897                    [v].as_mut_ptr() as *mut u8,
898                ),
899                Aux::I32(v) => htslib::bam_aux_append(
900                    self.inner_ptr_mut(),
901                    ctag,
902                    b'i' as c_char,
903                    size_of::<i32>() as i32,
904                    [v].as_mut_ptr() as *mut u8,
905                ),
906                Aux::U32(v) => htslib::bam_aux_append(
907                    self.inner_ptr_mut(),
908                    ctag,
909                    b'I' as c_char,
910                    size_of::<u32>() as i32,
911                    [v].as_mut_ptr() as *mut u8,
912                ),
913                Aux::Float(v) => htslib::bam_aux_append(
914                    self.inner_ptr_mut(),
915                    ctag,
916                    b'f' as c_char,
917                    size_of::<f32>() as i32,
918                    [v].as_mut_ptr() as *mut u8,
919                ),
920                // Not part of specs but implemented in `htslib`:
921                Aux::Double(v) => htslib::bam_aux_append(
922                    self.inner_ptr_mut(),
923                    ctag,
924                    b'd' as c_char,
925                    size_of::<f64>() as i32,
926                    [v].as_mut_ptr() as *mut u8,
927                ),
928                Aux::String(v) => {
929                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
930                    htslib::bam_aux_append(
931                        self.inner_ptr_mut(),
932                        ctag,
933                        b'Z' as c_char,
934                        (v.len() + 1) as i32,
935                        c_str.as_ptr() as *mut u8,
936                    )
937                }
938                Aux::HexByteArray(v) => {
939                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
940                    htslib::bam_aux_append(
941                        self.inner_ptr_mut(),
942                        ctag,
943                        b'H' as c_char,
944                        (v.len() + 1) as i32,
945                        c_str.as_ptr() as *mut u8,
946                    )
947                }
948                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
949                Aux::ArrayI8(aux_array) => match aux_array {
950                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
951                        self.inner_ptr_mut(),
952                        ctag,
953                        b'c',
954                        inner.len() as u32,
955                        inner.slice.as_ptr() as *mut ::libc::c_void,
956                    ),
957                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
958                        self.inner_ptr_mut(),
959                        ctag,
960                        b'c',
961                        inner.len() as u32,
962                        inner.slice.as_ptr() as *mut ::libc::c_void,
963                    ),
964                },
965                Aux::ArrayU8(aux_array) => match aux_array {
966                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
967                        self.inner_ptr_mut(),
968                        ctag,
969                        b'C',
970                        inner.len() as u32,
971                        inner.slice.as_ptr() as *mut ::libc::c_void,
972                    ),
973                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
974                        self.inner_ptr_mut(),
975                        ctag,
976                        b'C',
977                        inner.len() as u32,
978                        inner.slice.as_ptr() as *mut ::libc::c_void,
979                    ),
980                },
981                Aux::ArrayI16(aux_array) => match aux_array {
982                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
983                        self.inner_ptr_mut(),
984                        ctag,
985                        b's',
986                        inner.len() as u32,
987                        inner.slice.as_ptr() as *mut ::libc::c_void,
988                    ),
989                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
990                        self.inner_ptr_mut(),
991                        ctag,
992                        b's',
993                        inner.len() as u32,
994                        inner.slice.as_ptr() as *mut ::libc::c_void,
995                    ),
996                },
997                Aux::ArrayU16(aux_array) => match aux_array {
998                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
999                        self.inner_ptr_mut(),
1000                        ctag,
1001                        b'S',
1002                        inner.len() as u32,
1003                        inner.slice.as_ptr() as *mut ::libc::c_void,
1004                    ),
1005                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1006                        self.inner_ptr_mut(),
1007                        ctag,
1008                        b'S',
1009                        inner.len() as u32,
1010                        inner.slice.as_ptr() as *mut ::libc::c_void,
1011                    ),
1012                },
1013                Aux::ArrayI32(aux_array) => match aux_array {
1014                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1015                        self.inner_ptr_mut(),
1016                        ctag,
1017                        b'i',
1018                        inner.len() as u32,
1019                        inner.slice.as_ptr() as *mut ::libc::c_void,
1020                    ),
1021                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1022                        self.inner_ptr_mut(),
1023                        ctag,
1024                        b'i',
1025                        inner.len() as u32,
1026                        inner.slice.as_ptr() as *mut ::libc::c_void,
1027                    ),
1028                },
1029                Aux::ArrayU32(aux_array) => match aux_array {
1030                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1031                        self.inner_ptr_mut(),
1032                        ctag,
1033                        b'I',
1034                        inner.len() as u32,
1035                        inner.slice.as_ptr() as *mut ::libc::c_void,
1036                    ),
1037                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1038                        self.inner_ptr_mut(),
1039                        ctag,
1040                        b'I',
1041                        inner.len() as u32,
1042                        inner.slice.as_ptr() as *mut ::libc::c_void,
1043                    ),
1044                },
1045                Aux::ArrayFloat(aux_array) => match aux_array {
1046                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
1047                        self.inner_ptr_mut(),
1048                        ctag,
1049                        b'f',
1050                        inner.len() as u32,
1051                        inner.slice.as_ptr() as *mut ::libc::c_void,
1052                    ),
1053                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1054                        self.inner_ptr_mut(),
1055                        ctag,
1056                        b'f',
1057                        inner.len() as u32,
1058                        inner.slice.as_ptr() as *mut ::libc::c_void,
1059                    ),
1060                },
1061            }
1062        };
1063
1064        if ret < 0 {
1065            Err(Error::BamAux)
1066        } else {
1067            Ok(())
1068        }
1069    }
1070
1071    // Delete auxiliary tag.
1072    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1073        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1074        let aux = unsafe {
1075            htslib::bam_aux_get(
1076                &self.inner as *const htslib::bam1_t,
1077                c_str.as_ptr() as *mut c_char,
1078            )
1079        };
1080        unsafe {
1081            if aux.is_null() {
1082                Err(Error::BamAuxTagNotFound)
1083            } else {
1084                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1085                Ok(())
1086            }
1087        }
1088    }
1089
1090    /// Access the base modifications associated with this Record through the MM tag.
1091    /// Example:
1092    /// ```
1093    ///    use rust_htslib::bam::{Read, Reader, Record};
1094    ///    let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
1095    ///    let mut mod_count = 0;
1096    ///    for r in bam.records() {
1097    ///        let record = r.unwrap();
1098    ///        if let Ok(mods) = record.basemods_iter() {
1099    ///            // print metadata for the modifications present in this record
1100    ///            for mod_code in mods.recorded() {
1101    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1102    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1103    ///                              mod_code, *mod_code as u8 as char,
1104    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1105    ///                }
1106    ///            }
1107    ///
1108    ///            // iterate over the modifications in this record
1109    ///            // the modifications are returned as a tuple with the
1110    ///            // position within SEQ and an hts_base_mod struct
1111    ///            for res in mods {
1112    ///                if let Ok( (position, m) ) = res {
1113    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1114    ///                    mod_count += 1;
1115    ///                }
1116    ///            }
1117    ///        };
1118    ///    }
1119    ///    assert_eq!(mod_count, 14);
1120    /// ```
1121    pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
1122        BaseModificationsIter::new(self)
1123    }
1124
1125    /// An iterator that returns all of the modifications for each position as a vector.
1126    /// This is useful for the case where multiple possible modifications can be annotated
1127    /// at a single position (for example a C could be 5-mC or 5-hmC)
1128    pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1129        BaseModificationsPositionIter::new(self)
1130    }
1131
1132    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1133    /// is not paired, mates are not mapping to the same contig, or mates start at the
1134    /// same position.
1135    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1136        if self.is_paired()
1137            && !self.is_unmapped()
1138            && !self.is_mate_unmapped()
1139            && self.tid() == self.mtid()
1140        {
1141            if self.pos() == self.mpos() {
1142                // both reads start at the same position, we cannot decide on the orientation.
1143                return SequenceReadPairOrientation::None;
1144            }
1145
1146            let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1147                (
1148                    self.pos(),
1149                    self.mpos(),
1150                    !self.is_reverse(),
1151                    !self.is_mate_reverse(),
1152                )
1153            } else {
1154                (
1155                    self.mpos(),
1156                    self.pos(),
1157                    !self.is_mate_reverse(),
1158                    !self.is_reverse(),
1159                )
1160            };
1161
1162            if pos_1 < pos_2 {
1163                match (fwd_1, fwd_2) {
1164                    (true, true) => SequenceReadPairOrientation::F1F2,
1165                    (true, false) => SequenceReadPairOrientation::F1R2,
1166                    (false, true) => SequenceReadPairOrientation::R1F2,
1167                    (false, false) => SequenceReadPairOrientation::R1R2,
1168                }
1169            } else {
1170                match (fwd_2, fwd_1) {
1171                    (true, true) => SequenceReadPairOrientation::F2F1,
1172                    (true, false) => SequenceReadPairOrientation::F2R1,
1173                    (false, true) => SequenceReadPairOrientation::R2F1,
1174                    (false, false) => SequenceReadPairOrientation::R2R1,
1175                }
1176            }
1177        } else {
1178            SequenceReadPairOrientation::None
1179        }
1180    }
1181
1182    flag!(is_paired, set_paired, unset_paired, 1u16);
1183    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1184    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1185    flag!(
1186        is_mate_unmapped,
1187        set_mate_unmapped,
1188        unset_mate_unmapped,
1189        8u16
1190    );
1191    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1192    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1193    flag!(
1194        is_first_in_template,
1195        set_first_in_template,
1196        unset_first_in_template,
1197        64u16
1198    );
1199    flag!(
1200        is_last_in_template,
1201        set_last_in_template,
1202        unset_last_in_template,
1203        128u16
1204    );
1205    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1206    flag!(
1207        is_quality_check_failed,
1208        set_quality_check_failed,
1209        unset_quality_check_failed,
1210        512u16
1211    );
1212    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1213    flag!(
1214        is_supplementary,
1215        set_supplementary,
1216        unset_supplementary,
1217        2048u16
1218    );
1219}
1220
1221impl Drop for Record {
1222    fn drop(&mut self) {
1223        if self.own {
1224            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1225        }
1226    }
1227}
1228
1229impl SequenceRead for Record {
1230    fn name(&self) -> &[u8] {
1231        self.qname()
1232    }
1233
1234    fn base(&self, i: usize) -> u8 {
1235        *decode_base_unchecked(encoded_base(self.seq_data(), i))
1236    }
1237
1238    fn base_qual(&self, i: usize) -> u8 {
1239        self.qual()[i]
1240    }
1241
1242    fn len(&self) -> usize {
1243        self.seq_len()
1244    }
1245
1246    fn is_empty(&self) -> bool {
1247        self.len() == 0
1248    }
1249}
1250
1251impl genome::AbstractInterval for Record {
1252    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1253    fn contig(&self) -> &str {
1254        let tid = self.tid();
1255        if tid < 0 {
1256            panic!("invalid tid, must be at least zero");
1257        }
1258        str::from_utf8(
1259            self.header
1260                .as_ref()
1261                .expect(
1262                    "header must be set (this is the case if the record has been read from a file)",
1263                )
1264                .tid2name(tid as u32),
1265        )
1266        .expect("unable to interpret contig name as UTF-8")
1267    }
1268
1269    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1270    fn range(&self) -> ops::Range<genome::Position> {
1271        let end_pos = self
1272            .cigar_cached()
1273            .expect("cigar has not been cached yet, call cache_cigar() first")
1274            .end_pos() as u64;
1275
1276        if self.pos() < 0 {
1277            panic!("invalid position, must be positive")
1278        }
1279
1280        self.pos() as u64..end_pos
1281    }
1282}
1283
1284/// Auxiliary record data
1285///
1286/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1287///
1288/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1289///
1290/// # Examples
1291///
1292/// ```
1293/// use rust_htslib::{
1294///     bam,
1295///     bam::record::{Aux, AuxArray},
1296///     errors::Error,
1297/// };
1298///
1299/// //Set up BAM record
1300/// let bam_header = bam::Header::new();
1301/// let mut record = bam::Record::from_sam(
1302///     &mut bam::HeaderView::from_header(&bam_header),
1303///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1304/// )
1305/// .unwrap();
1306///
1307/// // Add an integer field
1308/// let aux_integer_field = Aux::I32(1234);
1309/// record.push_aux(b"XI", aux_integer_field).unwrap();
1310///
1311/// match record.aux(b"XI") {
1312///     Ok(value) => {
1313///         // Typically, callers expect an aux field to be of a certain type.
1314///         // If that's not the case, the value can be `match`ed exhaustively.
1315///         if let Aux::I32(v) = value {
1316///             assert_eq!(v, 1234);
1317///         }
1318///     }
1319///     Err(e) => {
1320///         panic!("Error reading aux field: {}", e);
1321///     }
1322/// }
1323///
1324/// // Add an array field
1325/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1326/// let slice_of_data = &array_like_data;
1327/// let aux_array: AuxArray<f32> = slice_of_data.into();
1328/// let aux_array_field = Aux::ArrayFloat(aux_array);
1329/// record.push_aux(b"XA", aux_array_field).unwrap();
1330///
1331/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1332///     let read_array = array.iter().collect::<Vec<_>>();
1333///     assert_eq!(read_array, array_like_data);
1334/// } else {
1335///     panic!("Could not read array data");
1336/// }
1337/// ```
1338#[derive(Debug, PartialEq)]
1339pub enum Aux<'a> {
1340    Char(u8),
1341    I8(i8),
1342    U8(u8),
1343    I16(i16),
1344    U16(u16),
1345    I32(i32),
1346    U32(u32),
1347    Float(f32),
1348    Double(f64), // Not part of specs but implemented in `htslib`
1349    String(&'a str),
1350    HexByteArray(&'a str),
1351    ArrayI8(AuxArray<'a, i8>),
1352    ArrayU8(AuxArray<'a, u8>),
1353    ArrayI16(AuxArray<'a, i16>),
1354    ArrayU16(AuxArray<'a, u16>),
1355    ArrayI32(AuxArray<'a, i32>),
1356    ArrayU32(AuxArray<'a, u32>),
1357    ArrayFloat(AuxArray<'a, f32>),
1358}
1359
1360unsafe impl Send for Aux<'_> {}
1361unsafe impl Sync for Aux<'_> {}
1362
1363/// Types that can be used in aux arrays.
1364pub trait AuxArrayElement: Copy {
1365    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1366}
1367
1368impl AuxArrayElement for i8 {
1369    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1370        std::io::Cursor::new(bytes).read_i8().ok()
1371    }
1372}
1373impl AuxArrayElement for u8 {
1374    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1375        std::io::Cursor::new(bytes).read_u8().ok()
1376    }
1377}
1378impl AuxArrayElement for i16 {
1379    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1380        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1381    }
1382}
1383impl AuxArrayElement for u16 {
1384    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1385        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1386    }
1387}
1388impl AuxArrayElement for i32 {
1389    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1390        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1391    }
1392}
1393impl AuxArrayElement for u32 {
1394    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1395        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1396    }
1397}
1398impl AuxArrayElement for f32 {
1399    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1400        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1401    }
1402}
1403
1404/// Provides access to aux arrays.
1405///
1406/// Provides methods to either retrieve single elements or an iterator over the
1407/// array.
1408///
1409/// This type is used for wrapping both, array data that was read from a
1410/// BAM record and slices of data that are going to be stored in one.
1411///
1412/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1413/// can be constructed via the `From` trait which is implemented for all
1414/// supported types (see [`AuxArrayElement`] for a list).
1415///
1416/// # Examples
1417///
1418/// ```
1419/// use rust_htslib::{
1420///     bam,
1421///     bam::record::{Aux, AuxArray},
1422/// };
1423///
1424/// //Set up BAM record
1425/// let bam_header = bam::Header::new();
1426/// let mut record = bam::Record::from_sam(
1427///     &mut bam::HeaderView::from_header(&bam_header),
1428///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1429/// ).unwrap();
1430///
1431/// let data = vec![0.4, 0.3, 0.2, 0.1];
1432/// let slice_of_data = &data;
1433/// let aux_array: AuxArray<f32> = slice_of_data.into();
1434/// let aux_field = Aux::ArrayFloat(aux_array);
1435/// record.push_aux(b"XA", aux_field);
1436///
1437/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1438///     // Retrieve the second element from the array
1439///     assert_eq!(array.get(1).unwrap(), 0.3);
1440///     // Iterate over the array and collect it into a `Vec`
1441///     let read_array = array.iter().collect::<Vec<_>>();
1442///     assert_eq!(read_array, data);
1443/// } else {
1444///     panic!("Could not read array data");
1445/// }
1446/// ```
1447#[derive(Debug)]
1448pub enum AuxArray<'a, T> {
1449    TargetType(AuxArrayTargetType<'a, T>),
1450    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1451}
1452
1453impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1454where
1455    T: AuxArrayElement + PartialEq,
1456{
1457    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1458        use AuxArray::*;
1459        match (self, other) {
1460            (TargetType(v), TargetType(v_other)) => v == v_other,
1461            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1462            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1463            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1464        }
1465    }
1466}
1467
1468/// Create AuxArrays from slices of allowed target types.
1469impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1470where
1471    I: AuxArrayElement,
1472    T: AsRef<[I]> + ?Sized,
1473{
1474    fn from(src: &'a T) -> Self {
1475        AuxArray::TargetType(AuxArrayTargetType {
1476            slice: src.as_ref(),
1477        })
1478    }
1479}
1480
1481impl<'a, T> AuxArray<'a, T>
1482where
1483    T: AuxArrayElement,
1484{
1485    /// Returns the element at a position or None if out of bounds.
1486    pub fn get(&self, index: usize) -> Option<T> {
1487        match self {
1488            AuxArray::TargetType(v) => v.get(index),
1489            AuxArray::RawLeBytes(v) => v.get(index),
1490        }
1491    }
1492
1493    /// Returns the number of elements in the array.
1494    pub fn len(&self) -> usize {
1495        match self {
1496            AuxArray::TargetType(a) => a.len(),
1497            AuxArray::RawLeBytes(a) => a.len(),
1498        }
1499    }
1500
1501    /// Returns true if the array contains no elements.
1502    pub fn is_empty(&self) -> bool {
1503        self.len() == 0
1504    }
1505
1506    /// Returns an iterator over the array.
1507    pub fn iter(&self) -> AuxArrayIter<T> {
1508        AuxArrayIter {
1509            index: 0,
1510            array: self,
1511        }
1512    }
1513
1514    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1515    fn from_bytes(bytes: &'a [u8]) -> Self {
1516        Self::RawLeBytes(AuxArrayRawLeBytes {
1517            slice: bytes,
1518            phantom_data: PhantomData,
1519        })
1520    }
1521}
1522
1523/// Encapsulates slice of target type.
1524#[doc(hidden)]
1525#[derive(Debug, PartialEq)]
1526pub struct AuxArrayTargetType<'a, T> {
1527    slice: &'a [T],
1528}
1529
1530impl<T> AuxArrayTargetType<'_, T>
1531where
1532    T: AuxArrayElement,
1533{
1534    fn get(&self, index: usize) -> Option<T> {
1535        self.slice.get(index).copied()
1536    }
1537
1538    fn len(&self) -> usize {
1539        self.slice.len()
1540    }
1541}
1542
1543/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1544#[doc(hidden)]
1545#[derive(Debug, PartialEq)]
1546pub struct AuxArrayRawLeBytes<'a, T> {
1547    slice: &'a [u8],
1548    phantom_data: PhantomData<T>,
1549}
1550
1551impl<T> AuxArrayRawLeBytes<'_, T>
1552where
1553    T: AuxArrayElement,
1554{
1555    fn get(&self, index: usize) -> Option<T> {
1556        let type_size = std::mem::size_of::<T>();
1557        if index * type_size + type_size > self.slice.len() {
1558            return None;
1559        }
1560        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1561    }
1562
1563    fn len(&self) -> usize {
1564        self.slice.len() / std::mem::size_of::<T>()
1565    }
1566}
1567
1568/// Aux array iterator
1569///
1570/// This struct is created by the [`AuxArray::iter`] method.
1571pub struct AuxArrayIter<'a, T> {
1572    index: usize,
1573    array: &'a AuxArray<'a, T>,
1574}
1575
1576impl<T> Iterator for AuxArrayIter<'_, T>
1577where
1578    T: AuxArrayElement,
1579{
1580    type Item = T;
1581    fn next(&mut self) -> Option<Self::Item> {
1582        let value = self.array.get(self.index);
1583        self.index += 1;
1584        value
1585    }
1586
1587    fn size_hint(&self) -> (usize, Option<usize>) {
1588        let array_length = self.array.len() - self.index;
1589        (array_length, Some(array_length))
1590    }
1591}
1592
1593/// Auxiliary data iterator
1594///
1595/// This struct is created by the [`Record::aux_iter`] method.
1596///
1597/// This iterator returns `Result`s that wrap tuples containing
1598/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1599/// well as an `Aux` enum that wraps the associated value.
1600///
1601/// When an error occurs, the `Err` variant will be returned
1602/// and the iterator will not be able to advance anymore.
1603pub struct AuxIter<'a> {
1604    aux: &'a [u8],
1605}
1606
1607impl<'a> Iterator for AuxIter<'a> {
1608    type Item = Result<(&'a [u8], Aux<'a>)>;
1609
1610    fn next(&mut self) -> Option<Self::Item> {
1611        // We're finished
1612        if self.aux.is_empty() {
1613            return None;
1614        }
1615        // Incomplete aux data
1616        if (1..=3).contains(&self.aux.len()) {
1617            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1618            self.aux = &[];
1619            return Some(Err(Error::BamAuxParsingError));
1620        }
1621        let tag = &self.aux[..2];
1622        Some(unsafe {
1623            let data_ptr = self.aux[2..].as_ptr();
1624            Record::read_aux_field(data_ptr)
1625                .map(|(aux, offset)| {
1626                    self.aux = &self.aux[offset..];
1627                    (tag, aux)
1628                })
1629                .inspect_err(|_e| {
1630                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1631                    self.aux = &[];
1632                })
1633        })
1634    }
1635}
1636
1637static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1638static ENCODE_BASE: [u8; 256] = [
1639    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1640    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1641    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1642    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1643    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1644    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1645    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1646    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1647    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1648    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1649    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1650];
1651
1652#[inline]
1653fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1654    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1655}
1656
1657#[inline]
1658unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1659    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1660}
1661
1662#[inline]
1663fn decode_base_unchecked(base: u8) -> &'static u8 {
1664    unsafe { DECODE_BASE.get_unchecked(base as usize) }
1665}
1666
1667/// The sequence of a record.
1668#[derive(Debug, Copy, Clone)]
1669pub struct Seq<'a> {
1670    pub encoded: &'a [u8],
1671    len: usize,
1672}
1673
1674impl Seq<'_> {
1675    /// Return encoded base. Complexity: O(1).
1676    #[inline]
1677    pub fn encoded_base(&self, i: usize) -> u8 {
1678        encoded_base(self.encoded, i)
1679    }
1680
1681    /// Return encoded base. Complexity: O(1).
1682    ///
1683    /// # Safety
1684    ///
1685    /// TODO
1686    #[inline]
1687    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1688        encoded_base_unchecked(self.encoded, i)
1689    }
1690
1691    /// Obtain decoded base without performing bounds checking.
1692    /// Use index based access seq()[i], for checked, safe access.
1693    /// Complexity: O(1).
1694    ///
1695    /// # Safety
1696    ///
1697    /// TODO
1698    #[inline]
1699    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1700        *decode_base_unchecked(self.encoded_base_unchecked(i))
1701    }
1702
1703    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1704    pub fn as_bytes(&self) -> Vec<u8> {
1705        (0..self.len()).map(|i| self[i]).collect()
1706    }
1707
1708    /// Return length (in bases) of the sequence.
1709    pub fn len(&self) -> usize {
1710        self.len
1711    }
1712
1713    pub fn is_empty(&self) -> bool {
1714        self.len() == 0
1715    }
1716}
1717
1718impl ops::Index<usize> for Seq<'_> {
1719    type Output = u8;
1720
1721    /// Return decoded base at given position within read. Complexity: O(1).
1722    fn index(&self, index: usize) -> &u8 {
1723        decode_base_unchecked(self.encoded_base(index))
1724    }
1725}
1726
1727unsafe impl Send for Seq<'_> {}
1728unsafe impl Sync for Seq<'_> {}
1729
1730#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1731#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1732pub enum Cigar {
1733    Match(u32),    // M
1734    Ins(u32),      // I
1735    Del(u32),      // D
1736    RefSkip(u32),  // N
1737    SoftClip(u32), // S
1738    HardClip(u32), // H
1739    Pad(u32),      // P
1740    Equal(u32),    // =
1741    Diff(u32),     // X
1742}
1743
1744impl Cigar {
1745    fn encode(self) -> u32 {
1746        match self {
1747            Cigar::Match(len) => len << 4, // | 0,
1748            Cigar::Ins(len) => (len << 4) | 1,
1749            Cigar::Del(len) => (len << 4) | 2,
1750            Cigar::RefSkip(len) => (len << 4) | 3,
1751            Cigar::SoftClip(len) => (len << 4) | 4,
1752            Cigar::HardClip(len) => (len << 4) | 5,
1753            Cigar::Pad(len) => (len << 4) | 6,
1754            Cigar::Equal(len) => (len << 4) | 7,
1755            Cigar::Diff(len) => (len << 4) | 8,
1756        }
1757    }
1758
1759    /// Return the length of the CIGAR.
1760    pub fn len(self) -> u32 {
1761        match self {
1762            Cigar::Match(len) => len,
1763            Cigar::Ins(len) => len,
1764            Cigar::Del(len) => len,
1765            Cigar::RefSkip(len) => len,
1766            Cigar::SoftClip(len) => len,
1767            Cigar::HardClip(len) => len,
1768            Cigar::Pad(len) => len,
1769            Cigar::Equal(len) => len,
1770            Cigar::Diff(len) => len,
1771        }
1772    }
1773
1774    pub fn is_empty(self) -> bool {
1775        self.len() == 0
1776    }
1777
1778    /// Return the character representing the CIGAR.
1779    pub fn char(self) -> char {
1780        match self {
1781            Cigar::Match(_) => 'M',
1782            Cigar::Ins(_) => 'I',
1783            Cigar::Del(_) => 'D',
1784            Cigar::RefSkip(_) => 'N',
1785            Cigar::SoftClip(_) => 'S',
1786            Cigar::HardClip(_) => 'H',
1787            Cigar::Pad(_) => 'P',
1788            Cigar::Equal(_) => '=',
1789            Cigar::Diff(_) => 'X',
1790        }
1791    }
1792}
1793
1794impl fmt::Display for Cigar {
1795    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1796        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1797    }
1798}
1799
1800unsafe impl Send for Cigar {}
1801unsafe impl Sync for Cigar {}
1802
1803custom_derive! {
1804    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1805    ///
1806    /// # Example
1807    ///
1808    /// ```
1809    /// use rust_htslib::bam::record::{Cigar, CigarString};
1810    ///
1811    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1812    ///
1813    /// // access by index
1814    /// assert_eq!(cigar[0], Cigar::Match(100));
1815    /// // format into classical string representation
1816    /// assert_eq!(format!("{}", cigar), "100M10S");
1817    /// // iterate
1818    /// for op in &cigar {
1819    ///    println!("{}", op);
1820    /// }
1821    /// ```
1822    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1823    #[derive(NewtypeDeref,
1824            NewtypeDerefMut,
1825             NewtypeIndex(usize),
1826             NewtypeIndexMut(usize),
1827             NewtypeFrom,
1828             PartialEq,
1829             PartialOrd,
1830             Eq,
1831             NewtypeDebug,
1832             Clone,
1833             Hash
1834    )]
1835    pub struct CigarString(pub Vec<Cigar>);
1836}
1837
1838impl CigarString {
1839    /// Create a `CigarStringView` from this CigarString at position `pos`
1840    pub fn into_view(self, pos: i64) -> CigarStringView {
1841        CigarStringView::new(self, pos)
1842    }
1843
1844    /// Calculate the bam cigar from the alignment struct. x is the target string
1845    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
1846    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
1847    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
1848    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1849        match alignment.mode {
1850            AlignmentMode::Global => {
1851                panic!(" Bam cigar fn not supported for Global Alignment mode")
1852            }
1853            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
1854            _ => {}
1855        }
1856
1857        let mut cigar = Vec::new();
1858        if alignment.operations.is_empty() {
1859            return CigarString(cigar);
1860        }
1861
1862        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
1863            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1864            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1865            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1866            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1867            _ => {}
1868        };
1869
1870        if alignment.xstart > 0 {
1871            cigar.push(if hard_clip {
1872                Cigar::HardClip(alignment.xstart as u32)
1873            } else {
1874                Cigar::SoftClip(alignment.xstart as u32)
1875            });
1876        }
1877
1878        let mut last = alignment.operations[0];
1879        let mut k = 1u32;
1880        for &op in alignment.operations[1..].iter() {
1881            if op == last {
1882                k += 1;
1883            } else {
1884                add_op(last, k, &mut cigar);
1885                k = 1;
1886            }
1887            last = op;
1888        }
1889        add_op(last, k, &mut cigar);
1890        if alignment.xlen > alignment.xend {
1891            cigar.push(if hard_clip {
1892                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
1893            } else {
1894                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
1895            });
1896        }
1897
1898        CigarString(cigar)
1899    }
1900}
1901
1902impl TryFrom<&[u8]> for CigarString {
1903    type Error = Error;
1904
1905    /// Create a CigarString from given &[u8].
1906    /// # Example
1907    /// ```
1908    /// use rust_htslib::bam::record::*;
1909    /// use rust_htslib::bam::record::CigarString;
1910    /// use rust_htslib::bam::record::Cigar::*;
1911    /// use std::convert::TryFrom;
1912    ///
1913    /// let cigar_str = "2H10M5X3=2H".as_bytes();
1914    /// let cigar = CigarString::try_from(cigar_str)
1915    ///     .expect("Unable to parse cigar string.");
1916    /// let expected_cigar = CigarString(vec![
1917    ///     HardClip(2),
1918    ///     Match(10),
1919    ///     Diff(5),
1920    ///     Equal(3),
1921    ///     HardClip(2),
1922    /// ]);
1923    /// assert_eq!(cigar, expected_cigar);
1924    /// ```
1925    fn try_from(bytes: &[u8]) -> Result<Self> {
1926        let mut inner = Vec::new();
1927        let mut i = 0;
1928        let text_len = bytes.len();
1929        while i < text_len {
1930            let mut j = i;
1931            while j < text_len && bytes[j].is_ascii_digit() {
1932                j += 1;
1933            }
1934            // check that length is provided
1935            if i == j {
1936                return Err(Error::BamParseCigar {
1937                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
1938                });
1939            }
1940            // get the length of the operation
1941            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
1942                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
1943            })?;
1944            let n = s.parse().map_err(|_| Error::BamParseCigar {
1945                msg: format!("Unable to parse &str '{:?}' to u32.", s),
1946            })?;
1947            // get the operation
1948            let op = &bytes[j];
1949            inner.push(match op {
1950                b'M' => Cigar::Match(n),
1951                b'I' => Cigar::Ins(n),
1952                b'D' => Cigar::Del(n),
1953                b'N' => Cigar::RefSkip(n),
1954                b'H' => {
1955                    if i == 0 || j + 1 == text_len {
1956                        Cigar::HardClip(n)
1957                    } else {
1958                        return Err(Error::BamParseCigar {
1959                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
1960                                .to_owned(),
1961                        });
1962                    }
1963                }
1964                b'S' => {
1965                    if i == 0
1966                        || j + 1 == text_len
1967                        || bytes[i-1] == b'H'
1968                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
1969                        Cigar::SoftClip(n)
1970                    } else {
1971                        return Err(Error::BamParseCigar {
1972                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
1973                            .to_owned(),
1974                        });
1975                    }
1976                },
1977                b'P' => Cigar::Pad(n),
1978                b'=' => Cigar::Equal(n),
1979                b'X' => Cigar::Diff(n),
1980                op => {
1981                    return Err(Error::BamParseCigar {
1982                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
1983                    })
1984                }
1985            });
1986            i = j + 1;
1987        }
1988        Ok(CigarString(inner))
1989    }
1990}
1991
1992impl TryFrom<&str> for CigarString {
1993    type Error = Error;
1994
1995    /// Create a CigarString from given &str.
1996    /// # Example
1997    /// ```
1998    /// use rust_htslib::bam::record::*;
1999    /// use rust_htslib::bam::record::CigarString;
2000    /// use rust_htslib::bam::record::Cigar::*;
2001    /// use std::convert::TryFrom;
2002    ///
2003    /// let cigar_str = "2H10M5X3=2H";
2004    /// let cigar = CigarString::try_from(cigar_str)
2005    ///     .expect("Unable to parse cigar string.");
2006    /// let expected_cigar = CigarString(vec![
2007    ///     HardClip(2),
2008    ///     Match(10),
2009    ///     Diff(5),
2010    ///     Equal(3),
2011    ///     HardClip(2),
2012    /// ]);
2013    /// assert_eq!(cigar, expected_cigar);
2014    /// ```
2015    fn try_from(text: &str) -> Result<Self> {
2016        let bytes = text.as_bytes();
2017        if text.chars().count() != bytes.len() {
2018            return Err(Error::BamParseCigar {
2019                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2020            });
2021        }
2022        CigarString::try_from(bytes)
2023    }
2024}
2025
2026impl<'a> CigarString {
2027    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2028        self.into_iter()
2029    }
2030}
2031
2032impl<'a> IntoIterator for &'a CigarString {
2033    type Item = &'a Cigar;
2034    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2035
2036    fn into_iter(self) -> Self::IntoIter {
2037        self.0.iter()
2038    }
2039}
2040
2041impl fmt::Display for CigarString {
2042    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2043        for op in self {
2044            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2045        }
2046        Ok(())
2047    }
2048}
2049
2050// Get number of leading/trailing softclips if a CigarString taking hardclips into account
2051fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2052    match (cigar.next(), cigar.next()) {
2053        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2054            *s as i64
2055        }
2056        _ => 0,
2057    }
2058}
2059
2060#[derive(Eq, PartialEq, Clone, Debug)]
2061pub struct CigarStringView {
2062    inner: CigarString,
2063    pos: i64,
2064}
2065
2066impl CigarStringView {
2067    /// Construct a new CigarStringView from a CigarString at a position
2068    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2069        CigarStringView { inner: c, pos }
2070    }
2071
2072    /// Get (exclusive) end position of alignment.
2073    pub fn end_pos(&self) -> i64 {
2074        let mut pos = self.pos;
2075        for c in self {
2076            match c {
2077                Cigar::Match(l)
2078                | Cigar::RefSkip(l)
2079                | Cigar::Del(l)
2080                | Cigar::Equal(l)
2081                | Cigar::Diff(l) => pos += *l as i64,
2082                // these don't add to end_pos on reference
2083                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2084            }
2085        }
2086        pos
2087    }
2088
2089    /// Get the start position of the alignment (0-based).
2090    pub fn pos(&self) -> i64 {
2091        self.pos
2092    }
2093
2094    /// Get number of bases softclipped at the beginning of the alignment.
2095    pub fn leading_softclips(&self) -> i64 {
2096        calc_softclips(self.iter())
2097    }
2098
2099    /// Get number of bases softclipped at the end of the alignment.
2100    pub fn trailing_softclips(&self) -> i64 {
2101        calc_softclips(self.iter().rev())
2102    }
2103
2104    /// Get number of bases hardclipped at the beginning of the alignment.
2105    pub fn leading_hardclips(&self) -> i64 {
2106        self.first().map_or(0, |cigar| {
2107            if let Cigar::HardClip(s) = cigar {
2108                *s as i64
2109            } else {
2110                0
2111            }
2112        })
2113    }
2114
2115    /// Get number of bases hardclipped at the end of the alignment.
2116    pub fn trailing_hardclips(&self) -> i64 {
2117        self.last().map_or(0, |cigar| {
2118            if let Cigar::HardClip(s) = cigar {
2119                *s as i64
2120            } else {
2121                0
2122            }
2123        })
2124    }
2125
2126    /// For a given position in the reference, get corresponding position within read.
2127    /// If reference position is outside of the read alignment, return None.
2128    ///
2129    /// # Arguments
2130    ///
2131    /// * `ref_pos` - the reference position
2132    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2133    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2134    ///
2135    pub fn read_pos(
2136        &self,
2137        ref_pos: u32,
2138        include_softclips: bool,
2139        include_dels: bool,
2140    ) -> Result<Option<u32>> {
2141        let mut rpos = self.pos as u32; // reference position
2142        let mut qpos = 0u32; // position within read
2143        let mut j = 0; // index into cigar operation vector
2144
2145        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2146        // because all augmentations of qpos and rpos before that are invalid
2147        for (i, c) in self.iter().enumerate() {
2148            match c {
2149                Cigar::Match(_) |
2150                Cigar::Diff(_)  |
2151                Cigar::Equal(_) |
2152                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2153                // before matching positions
2154                Cigar::Ins(_) => {
2155                    j = i;
2156                    break;
2157                },
2158                Cigar::SoftClip(l) => {
2159                    j = i;
2160                    if include_softclips {
2161                        // Alignment starts with softclip and we want to include it in the
2162                        // projection of the reference position. However, the POS field does not
2163                        // include the softclip. Hence we have to subtract its length.
2164                        rpos = rpos.saturating_sub(*l);
2165                    }
2166                    break;
2167                },
2168                Cigar::Del(l) => {
2169                    // METHOD: leading deletions can happen in case of trimmed reads where
2170                    // a primer has been removed AFTER read mapping.
2171                    // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming
2172                    // with fgbio. While leading deletions should be impossible with
2173                    // normal read mapping, they make perfect sense with primer trimming
2174                    // because the mapper still had the evidence to decide in favor of
2175                    // the deletion via the primer sequence.
2176                    rpos += l;
2177                },
2178                Cigar::RefSkip(_) => {
2179                    return Err(Error::BamUnexpectedCigarOperation {
2180                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2181                    });
2182                },
2183                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2184                    return Err(Error::BamUnexpectedCigarOperation{
2185                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2186                    });
2187                },
2188                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2189                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2190                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2191                Cigar::Pad(_) | Cigar::HardClip(_) => ()
2192            }
2193        }
2194
2195        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2196            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2197        };
2198
2199        while rpos <= ref_pos && j < self.len() {
2200            match self[j] {
2201                // potential SNV evidence
2202                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2203                    // difference between desired position and first position of current cigar
2204                    // operation
2205                    qpos += ref_pos - rpos;
2206                    return Ok(Some(qpos));
2207                }
2208                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2209                    qpos += ref_pos - rpos;
2210                    return Ok(Some(qpos));
2211                }
2212                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2213                    // qpos shall resemble the start of the deletion
2214                    return Ok(Some(qpos));
2215                }
2216                // for others, just increase pos and qpos as needed
2217                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2218                    rpos += l;
2219                    qpos += l;
2220                    j += 1;
2221                }
2222                Cigar::SoftClip(l) => {
2223                    qpos += l;
2224                    j += 1;
2225                    if include_softclips {
2226                        rpos += l;
2227                    }
2228                }
2229                Cigar::Ins(l) => {
2230                    qpos += l;
2231                    j += 1;
2232                }
2233                Cigar::RefSkip(l) | Cigar::Del(l) => {
2234                    rpos += l;
2235                    j += 1;
2236                }
2237                Cigar::Pad(_) => {
2238                    j += 1;
2239                }
2240                Cigar::HardClip(_) if j < self.len() - 1 => {
2241                    return Err(Error::BamUnexpectedCigarOperation{
2242                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2243                    });
2244                }
2245                Cigar::HardClip(_) => return Ok(None),
2246            }
2247        }
2248
2249        Ok(None)
2250    }
2251
2252    /// transfer ownership of the Cigar out of the CigarView
2253    pub fn take(self) -> CigarString {
2254        self.inner
2255    }
2256}
2257
2258impl ops::Deref for CigarStringView {
2259    type Target = CigarString;
2260
2261    fn deref(&self) -> &CigarString {
2262        &self.inner
2263    }
2264}
2265
2266impl ops::Index<usize> for CigarStringView {
2267    type Output = Cigar;
2268
2269    fn index(&self, index: usize) -> &Cigar {
2270        self.inner.index(index)
2271    }
2272}
2273
2274impl ops::IndexMut<usize> for CigarStringView {
2275    fn index_mut(&mut self, index: usize) -> &mut Cigar {
2276        self.inner.index_mut(index)
2277    }
2278}
2279
2280impl<'a> CigarStringView {
2281    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2282        self.inner.into_iter()
2283    }
2284}
2285
2286impl<'a> IntoIterator for &'a CigarStringView {
2287    type Item = &'a Cigar;
2288    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2289
2290    fn into_iter(self) -> Self::IntoIter {
2291        self.inner.into_iter()
2292    }
2293}
2294
2295impl fmt::Display for CigarStringView {
2296    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2297        self.inner.fmt(fmt)
2298    }
2299}
2300
2301pub struct BaseModificationMetadata {
2302    pub strand: i32,
2303    pub implicit: i32,
2304    pub canonical: u8,
2305}
2306
2307/// struct containing the internal state required to access
2308/// the base modifications for a bam::Record
2309pub struct BaseModificationState<'a> {
2310    record: &'a Record,
2311    state: *mut htslib::hts_base_mod_state,
2312    buffer: Vec<htslib::hts_base_mod>,
2313    buffer_pos: i32,
2314}
2315
2316impl BaseModificationState<'_> {
2317    /// Initialize a new BaseModification struct from a bam::Record
2318    /// This function allocates memory for the state structure
2319    /// and initializes the iterator to the start of the modification
2320    /// records.
2321    fn new(r: &Record) -> Result<BaseModificationState<'_>> {
2322        let mut bm = unsafe {
2323            BaseModificationState {
2324                record: r,
2325                state: hts_sys::hts_base_mod_state_alloc(),
2326                buffer: Vec::new(),
2327                buffer_pos: -1,
2328            }
2329        };
2330
2331        if bm.state.is_null() {
2332            panic!("Unable to allocate memory for hts_base_mod_state");
2333        }
2334
2335        // parse the MM tag to initialize the state
2336        unsafe {
2337            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2338            if ret != 0 {
2339                return Err(Error::BamBaseModificationTagNotFound);
2340            }
2341        }
2342
2343        let types = bm.recorded();
2344        bm.buffer.reserve(types.len());
2345        Ok(bm)
2346    }
2347
2348    pub fn buffer_next_mods(&mut self) -> Result<usize> {
2349        unsafe {
2350            let ret = hts_sys::bam_next_basemod(
2351                self.record.inner_ptr(),
2352                self.state,
2353                self.buffer.as_mut_ptr(),
2354                self.buffer.capacity() as i32,
2355                &mut self.buffer_pos,
2356            );
2357
2358            if ret < 0 {
2359                return Err(Error::BamBaseModificationIterationFailed);
2360            }
2361
2362            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2363            // return the actual number of modifications found. We return an error to the caller
2364            // in the case where there was insufficient storage to return all mods.
2365            if ret as usize > self.buffer.capacity() {
2366                return Err(Error::BamBaseModificationTooManyMods);
2367            }
2368
2369            // we read the modifications directly into the vector, which does
2370            // not update the length so needs to be manually set
2371            self.buffer.set_len(ret as usize);
2372
2373            Ok(ret as usize)
2374        }
2375    }
2376
2377    /// Return an array containing the modification codes listed for this record.
2378    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2379    pub fn recorded<'a>(&self) -> &'a [i32] {
2380        unsafe {
2381            let mut n: i32 = 0;
2382            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2383
2384            // htslib should not return a null pointer, even when there are no base mods
2385            if data_ptr.is_null() {
2386                panic!("Unable to obtain pointer to base modifications");
2387            }
2388            assert!(n >= 0);
2389            slice::from_raw_parts(data_ptr, n as usize)
2390        }
2391    }
2392
2393    /// Return metadata for the specified character code indicating the strand
2394    /// the base modification was called on, whether the tag uses implicit mode
2395    /// and the ascii code for the canonical base.
2396    /// If there are multiple modifications with the same code this will return the data
2397    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2398    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2399        unsafe {
2400            let mut strand: i32 = 0;
2401            let mut implicit: i32 = 0;
2402            // This may be i8 or u8 in hts_sys.
2403            let mut canonical: c_char = 0;
2404
2405            let ret = hts_sys::bam_mods_query_type(
2406                self.state,
2407                code,
2408                &mut strand,
2409                &mut implicit,
2410                &mut canonical,
2411            );
2412            if ret == -1 {
2413                Err(Error::BamBaseModificationTypeNotFound)
2414            } else {
2415                Ok(BaseModificationMetadata {
2416                    strand,
2417                    implicit,
2418                    canonical: canonical.try_into().unwrap(),
2419                })
2420            }
2421        }
2422    }
2423}
2424
2425impl Drop for BaseModificationState<'_> {
2426    fn drop<'a>(&mut self) {
2427        unsafe {
2428            hts_sys::hts_base_mod_state_free(self.state);
2429        }
2430    }
2431}
2432
2433/// Iterator over the base modifications that returns
2434/// a vector for all of the mods at each position
2435pub struct BaseModificationsPositionIter<'a> {
2436    mod_state: BaseModificationState<'a>,
2437}
2438
2439impl BaseModificationsPositionIter<'_> {
2440    fn new(r: &Record) -> Result<BaseModificationsPositionIter<'_>> {
2441        let state = BaseModificationState::new(r)?;
2442        Ok(BaseModificationsPositionIter { mod_state: state })
2443    }
2444
2445    pub fn recorded<'a>(&self) -> &'a [i32] {
2446        self.mod_state.recorded()
2447    }
2448
2449    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2450        self.mod_state.query_type(code)
2451    }
2452}
2453
2454impl Iterator for BaseModificationsPositionIter<'_> {
2455    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2456
2457    fn next(&mut self) -> Option<Self::Item> {
2458        let ret = self.mod_state.buffer_next_mods();
2459
2460        // Three possible things happened in buffer_next_mods:
2461        // 1. the htslib API call was successful but there are no more mods
2462        // 2. ths htslib API call was successful and we read some mods
2463        // 3. the htslib API call failed, we propogate the error wrapped in an option
2464        match ret {
2465            Ok(num_mods) => {
2466                if num_mods == 0 {
2467                    None
2468                } else {
2469                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2470                    Some(Ok(data))
2471                }
2472            }
2473            Err(e) => Some(Err(e)),
2474        }
2475    }
2476}
2477
2478/// Iterator over the base modifications that returns
2479/// the next modification found, one by one
2480pub struct BaseModificationsIter<'a> {
2481    mod_state: BaseModificationState<'a>,
2482    buffer_idx: usize,
2483}
2484
2485impl BaseModificationsIter<'_> {
2486    fn new(r: &Record) -> Result<BaseModificationsIter<'_>> {
2487        let state = BaseModificationState::new(r)?;
2488        Ok(BaseModificationsIter {
2489            mod_state: state,
2490            buffer_idx: 0,
2491        })
2492    }
2493
2494    pub fn recorded<'a>(&self) -> &'a [i32] {
2495        self.mod_state.recorded()
2496    }
2497
2498    pub fn query_type(&self, code: i32) -> Result<BaseModificationMetadata> {
2499        self.mod_state.query_type(code)
2500    }
2501}
2502
2503impl Iterator for BaseModificationsIter<'_> {
2504    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2505
2506    fn next(&mut self) -> Option<Self::Item> {
2507        if self.buffer_idx == self.mod_state.buffer.len() {
2508            // need to use the internal state to read the next
2509            // set of modifications into the buffer
2510            let ret = self.mod_state.buffer_next_mods();
2511
2512            match ret {
2513                Ok(num_mods) => {
2514                    if num_mods == 0 {
2515                        // done iterating
2516                        return None;
2517                    } else {
2518                        // we read some mods, reset the position in the buffer then fall through
2519                        self.buffer_idx = 0;
2520                    }
2521                }
2522                Err(e) => return Some(Err(e)),
2523            }
2524        }
2525
2526        // if we got here when there are mods buffered that we haven't emitted yet
2527        assert!(self.buffer_idx < self.mod_state.buffer.len());
2528        let data = (
2529            self.mod_state.buffer_pos,
2530            self.mod_state.buffer[self.buffer_idx],
2531        );
2532        self.buffer_idx += 1;
2533        Some(Ok(data))
2534    }
2535}
2536
2537#[cfg(test)]
2538mod tests {
2539    use super::*;
2540
2541    #[test]
2542    fn test_cigar_string() {
2543        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2544
2545        assert_eq!(cigar[0], Cigar::Match(100));
2546        assert_eq!(format!("{}", cigar), "100M10S");
2547        for op in &cigar {
2548            println!("{}", op);
2549        }
2550    }
2551
2552    #[test]
2553    fn test_cigar_string_view_pos() {
2554        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2555        assert_eq!(cigar.pos(), 5);
2556    }
2557
2558    #[test]
2559    fn test_cigar_string_leading_softclips() {
2560        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2561        assert_eq!(cigar.leading_softclips(), 10);
2562        let cigar2 = CigarString(vec![
2563            Cigar::HardClip(5),
2564            Cigar::SoftClip(10),
2565            Cigar::Match(100),
2566        ])
2567        .into_view(0);
2568        assert_eq!(cigar2.leading_softclips(), 10);
2569    }
2570
2571    #[test]
2572    fn test_cigar_string_trailing_softclips() {
2573        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2574        assert_eq!(cigar.trailing_softclips(), 10);
2575        let cigar2 = CigarString(vec![
2576            Cigar::Match(100),
2577            Cigar::SoftClip(10),
2578            Cigar::HardClip(5),
2579        ])
2580        .into_view(0);
2581        assert_eq!(cigar2.trailing_softclips(), 10);
2582    }
2583
2584    #[test]
2585    fn test_cigar_read_pos() {
2586        let vpos = 5; // variant position
2587
2588        // Ignore leading HardClip
2589        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2590        // var:                       V
2591        // c01: 7H                 M  M
2592        // qpos:                  00 01
2593        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2594        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2595
2596        // Skip leading SoftClip or use as pre-POS matches
2597        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2598        // var:                       V
2599        // c02: 5H2S         M  M  M  M  M  M
2600        // qpos:  00        02 03 04 05 06 07
2601        // c02: 5H     S  S  M  M  M  M  M  M
2602        // qpos:      00 01 02 03 04 05 06 07
2603        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2604        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2605        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2606
2607        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2608        // pre-POS matches
2609        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2610        // var:                       V
2611        // c03:  3S                      M  M
2612        // qpos: 00                     03 04
2613        // c03:                 S  S  S  M  M
2614        // qpos:               00 01 02 03 04
2615        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2616        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2617        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2618
2619        // Skip leading Insertion before variant position
2620        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2621        // var:                       V
2622        // c04:  3I                X  X  X
2623        // qpos: 00               03 04 05
2624        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2625        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2626
2627        // Matches and deletion before variant position
2628        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2629        // var:                       V
2630        // c05:        =  =  D  D  X  =  =
2631        // qpos:      00 01       02 03 04 05
2632        let c05 = CigarString(vec![
2633            Cigar::Equal(2),
2634            Cigar::Del(2),
2635            Cigar::Diff(1),
2636            Cigar::Equal(2),
2637        ])
2638        .into_view(0);
2639        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2640
2641        // single nucleotide Deletion covering variant position
2642        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2643        // var:                       V
2644        // c06:                 =  =  D  X  X
2645        // qpos:               00 01    02 03
2646        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2647        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2648        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2649
2650        // three nucleotide Deletion covering variant position
2651        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2652        // var:                       V
2653        // c07:              =  =  D  D  D  M  M
2654        // qpos:            00 01          02 03
2655        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2656        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2657        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2658
2659        // three nucleotide RefSkip covering variant position
2660        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2661        // var:                       V
2662        // c08:              =  X  N  N  N  M  M
2663        // qpos:            00 01          02 03
2664        let c08 = CigarString(vec![
2665            Cigar::Equal(1),
2666            Cigar::Diff(1),
2667            Cigar::RefSkip(3),
2668            Cigar::Match(2),
2669        ])
2670        .into_view(2);
2671        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2672        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2673
2674        // internal hard clip before variant pos
2675        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2676        // var:                          V
2677        // c09: 3H           =  = 3H  =  =
2678        // qpos:            00 01    02 03
2679        let c09 = CigarString(vec![
2680            Cigar::HardClip(3),
2681            Cigar::Equal(2),
2682            Cigar::HardClip(3),
2683            Cigar::Equal(2),
2684        ])
2685        .into_view(2);
2686        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2687
2688        // Deletion right before variant position
2689        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2690        // var:                       V
2691        // c10:           M  M  D  D  M  M
2692        // qpos:         00 01       02 03
2693        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2694        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2695
2696        // Insertion right before variant position
2697        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2698        // var:                          V
2699        // c11:                 M  M 3I  M
2700        // qpos:               00 01 02 05 06
2701        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2702        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2703
2704        // Insertion right after variant position
2705        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2706        // var:                       V
2707        // c12:                 M  M  M 2I  =
2708        // qpos:               00 01 02 03 05
2709        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2710        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2711
2712        // Deletion right after variant position
2713        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2714        // var:                       V
2715        // c13:                 M  M  M  D  =
2716        // qpos:               00 01 02    03
2717        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2718        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2719
2720        // A messy and complicated example, including a Pad operation
2721        let vpos2 = 15;
2722        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2723        // var:                                                           V
2724        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2725        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2726        let c14 = CigarString(vec![
2727            Cigar::HardClip(5),
2728            Cigar::SoftClip(3),
2729            Cigar::Equal(1),
2730            Cigar::Pad(2),
2731            Cigar::Match(1),
2732            Cigar::Diff(1),
2733            Cigar::Ins(3),
2734            Cigar::Match(2),
2735            Cigar::Del(1),
2736            Cigar::Ins(2),
2737            Cigar::Equal(2),
2738            Cigar::RefSkip(3),
2739            Cigar::Match(3),
2740            Cigar::Equal(2),
2741            Cigar::SoftClip(5),
2742            Cigar::HardClip(2),
2743        ])
2744        .into_view(0);
2745        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2746
2747        // HardClip after Pad
2748        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2749        // var:                       V
2750        // c15: 5P1H            =  =  =
2751        // qpos:               00 01 02
2752        let c15 =
2753            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2754        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2755
2756        // only HardClip and Pad operations
2757        // c16: 7H5P2H
2758        let c16 =
2759            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2760        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2761    }
2762
2763    #[test]
2764    fn test_clone() {
2765        let mut rec = Record::new();
2766        rec.set_pos(300);
2767        rec.set_qname(b"read1");
2768        let clone = rec.clone();
2769        assert_eq!(rec, clone);
2770    }
2771
2772    #[test]
2773    fn test_flags() {
2774        let mut rec = Record::new();
2775
2776        rec.set_paired();
2777        assert_eq!(rec.is_paired(), true);
2778
2779        rec.set_supplementary();
2780        assert_eq!(rec.is_supplementary(), true);
2781        assert_eq!(rec.is_supplementary(), true);
2782
2783        rec.unset_paired();
2784        assert_eq!(rec.is_paired(), false);
2785        assert_eq!(rec.is_supplementary(), true);
2786
2787        rec.unset_supplementary();
2788        assert_eq!(rec.is_paired(), false);
2789        assert_eq!(rec.is_supplementary(), false);
2790    }
2791
2792    #[test]
2793    fn test_cigar_parse() {
2794        let cigar = "1S20M1D2I3X1=2H";
2795        let parsed = CigarString::try_from(cigar).unwrap();
2796        assert_eq!(parsed.to_string(), cigar);
2797    }
2798}
2799
2800#[cfg(test)]
2801mod alignment_cigar_tests {
2802    use super::*;
2803    use crate::bam::{Read, Reader};
2804    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2805    use bio_types::alignment::{Alignment, AlignmentMode};
2806
2807    #[test]
2808    fn test_cigar() {
2809        let alignment = Alignment {
2810            score: 5,
2811            xstart: 3,
2812            ystart: 0,
2813            xend: 9,
2814            yend: 10,
2815            ylen: 10,
2816            xlen: 10,
2817            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2818            mode: AlignmentMode::Semiglobal,
2819        };
2820        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2821        assert_eq!(
2822            CigarString::from_alignment(&alignment, false).0,
2823            vec![
2824                Cigar::SoftClip(3),
2825                Cigar::Equal(3),
2826                Cigar::Diff(1),
2827                Cigar::Ins(2),
2828                Cigar::Del(2),
2829                Cigar::SoftClip(1),
2830            ]
2831        );
2832
2833        let alignment = Alignment {
2834            score: 5,
2835            xstart: 0,
2836            ystart: 5,
2837            xend: 4,
2838            yend: 10,
2839            ylen: 10,
2840            xlen: 5,
2841            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2842            mode: AlignmentMode::Custom,
2843        };
2844        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2845        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2846        assert_eq!(
2847            CigarString::from_alignment(&alignment, false).0,
2848            vec![
2849                Cigar::Equal(1),
2850                Cigar::Diff(2),
2851                Cigar::Ins(1),
2852                Cigar::Del(2),
2853                Cigar::SoftClip(1),
2854            ]
2855        );
2856        assert_eq!(
2857            CigarString::from_alignment(&alignment, true).0,
2858            vec![
2859                Cigar::Equal(1),
2860                Cigar::Diff(2),
2861                Cigar::Ins(1),
2862                Cigar::Del(2),
2863                Cigar::HardClip(1),
2864            ]
2865        );
2866
2867        let alignment = Alignment {
2868            score: 5,
2869            xstart: 0,
2870            ystart: 5,
2871            xend: 3,
2872            yend: 8,
2873            ylen: 10,
2874            xlen: 3,
2875            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2876            mode: AlignmentMode::Custom,
2877        };
2878        assert_eq!(alignment.cigar(false), "1X1=1X");
2879        assert_eq!(
2880            CigarString::from_alignment(&alignment, false).0,
2881            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2882        );
2883
2884        let alignment = Alignment {
2885            score: 5,
2886            xstart: 0,
2887            ystart: 5,
2888            xend: 3,
2889            yend: 8,
2890            ylen: 10,
2891            xlen: 3,
2892            operations: vec![Subst, Match, Subst],
2893            mode: AlignmentMode::Semiglobal,
2894        };
2895        assert_eq!(alignment.cigar(false), "1X1=1X");
2896        assert_eq!(
2897            CigarString::from_alignment(&alignment, false).0,
2898            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2899        );
2900    }
2901
2902    #[test]
2903    fn test_read_orientation_f1r2() {
2904        let mut bam = Reader::from_path("test/test_paired.sam").unwrap();
2905
2906        for res in bam.records() {
2907            let record = res.unwrap();
2908            assert_eq!(
2909                record.read_pair_orientation(),
2910                SequenceReadPairOrientation::F1R2
2911            );
2912        }
2913    }
2914
2915    #[test]
2916    fn test_read_orientation_f2r1() {
2917        let mut bam = Reader::from_path("test/test_nonstandard_orientation.sam").unwrap();
2918
2919        for res in bam.records() {
2920            let record = res.unwrap();
2921            assert_eq!(
2922                record.read_pair_orientation(),
2923                SequenceReadPairOrientation::F2R1
2924            );
2925        }
2926    }
2927
2928    #[test]
2929    fn test_read_orientation_supplementary() {
2930        let mut bam = Reader::from_path("test/test_orientation_supplementary.sam").unwrap();
2931
2932        for res in bam.records() {
2933            let record = res.unwrap();
2934            assert_eq!(
2935                record.read_pair_orientation(),
2936                SequenceReadPairOrientation::F2R1
2937            );
2938        }
2939    }
2940
2941    #[test]
2942    pub fn test_cigar_parsing_non_ascii_error() {
2943        let cigar_str = "43ጷ";
2944        let expected_error = Err(Error::BamParseCigar {
2945                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2946            });
2947
2948        let result = CigarString::try_from(cigar_str);
2949        assert_eq!(expected_error, result);
2950    }
2951
2952    #[test]
2953    pub fn test_cigar_parsing() {
2954        // parsing test cases
2955        let cigar_strs = [
2956            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
2957            "100M",                          // test a single op
2958            "",                              // test empty input
2959            "1H1=1H",                        // test simple hardclip
2960            "1S1=1S",                        // test simple softclip
2961            "11H11S11=11S11H",               // test complex softclip
2962            "10H",
2963            "10S",
2964        ];
2965        // expected results
2966        let cigars = [
2967            CigarString(vec![
2968                Cigar::HardClip(1),
2969                Cigar::Match(10),
2970                Cigar::Del(4),
2971                Cigar::Ins(100),
2972                Cigar::RefSkip(300),
2973                Cigar::Equal(1102),
2974                Cigar::Pad(10),
2975                Cigar::Diff(25),
2976                Cigar::SoftClip(11),
2977            ]),
2978            CigarString(vec![Cigar::Match(100)]),
2979            CigarString(vec![]),
2980            CigarString(vec![
2981                Cigar::HardClip(1),
2982                Cigar::Equal(1),
2983                Cigar::HardClip(1),
2984            ]),
2985            CigarString(vec![
2986                Cigar::SoftClip(1),
2987                Cigar::Equal(1),
2988                Cigar::SoftClip(1),
2989            ]),
2990            CigarString(vec![
2991                Cigar::HardClip(11),
2992                Cigar::SoftClip(11),
2993                Cigar::Equal(11),
2994                Cigar::SoftClip(11),
2995                Cigar::HardClip(11),
2996            ]),
2997            CigarString(vec![Cigar::HardClip(10)]),
2998            CigarString(vec![Cigar::SoftClip(10)]),
2999        ];
3000        // compare
3001        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
3002            let cigar_parse = CigarString::try_from(cigar_str)
3003                .unwrap_or_else(|_| panic!("Unable to parse cigar: {}", cigar_str));
3004            assert_eq!(&cigar_parse, truth);
3005        }
3006    }
3007}
3008
3009#[cfg(test)]
3010mod basemod_tests {
3011    use crate::bam::{Read, Reader};
3012
3013    #[test]
3014    pub fn test_count_recorded() {
3015        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3016
3017        for r in bam.records() {
3018            let record = r.unwrap();
3019            if let Ok(mods) = record.basemods_iter() {
3020                let n = mods.recorded().len();
3021                assert_eq!(n, 3);
3022            };
3023        }
3024    }
3025
3026    #[test]
3027    pub fn test_query_type() {
3028        let mut bam = Reader::from_path("test/base_mods/MM-orient.sam").unwrap();
3029
3030        let mut n_fwd = 0;
3031        let mut n_rev = 0;
3032
3033        for r in bam.records() {
3034            let record = r.unwrap();
3035            if let Ok(mods) = record.basemods_iter() {
3036                for mod_code in mods.recorded() {
3037                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
3038                        if mod_metadata.strand == 0 {
3039                            n_fwd += 1;
3040                        }
3041                        if mod_metadata.strand == 1 {
3042                            n_rev += 1;
3043                        }
3044                    }
3045                }
3046            };
3047        }
3048        assert_eq!(n_fwd, 2);
3049        assert_eq!(n_rev, 2);
3050    }
3051
3052    #[test]
3053    pub fn test_mod_iter() {
3054        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3055        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3056        let mut i = 0;
3057
3058        for r in bam.records() {
3059            let record = r.unwrap();
3060            for res in record.basemods_iter().unwrap().flatten() {
3061                let (position, _m) = res;
3062                assert_eq!(position, expected_positions[i]);
3063                i += 1;
3064            }
3065        }
3066    }
3067
3068    #[test]
3069    pub fn test_position_iter() {
3070        let mut bam = Reader::from_path("test/base_mods/MM-double.sam").unwrap();
3071        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3072        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3073        let mut i = 0;
3074
3075        for r in bam.records() {
3076            let record = r.unwrap();
3077            for res in record.basemods_position_iter().unwrap().flatten() {
3078                let (position, elements) = res;
3079                assert_eq!(position, expected_positions[i]);
3080                assert_eq!(elements.len(), expected_counts[i]);
3081                i += 1;
3082            }
3083        }
3084    }
3085}