extended_htslib/bam/
record.rs

1// Copyright 2014 Christopher Schröder, Johannes Köster.
2// Licensed under the MIT license (http://opensource.org/licenses/MIT)
3// This file may not be copied, modified, or distributed
4// except according to those terms.
5
6use std::borrow::Cow;
7use std::convert::TryFrom;
8use std::convert::TryInto;
9use std::ffi;
10use std::fmt;
11use std::marker::PhantomData;
12use std::mem::{size_of, MaybeUninit};
13use std::ops;
14use std::ops::RangeBounds;
15use std::os::raw::c_char;
16use std::rc::Rc;
17use std::slice;
18use std::slice::SliceIndex;
19use std::str;
20use std::u32;
21
22use byteorder::{LittleEndian, ReadBytesExt};
23
24use crate::bam::Error;
25use crate::bam::HeaderView;
26use crate::errors::Result;
27use crate::htslib;
28use crate::utils;
29#[cfg(feature = "serde_feature")]
30use serde::{self, Deserialize, Serialize};
31
32use bio_types::alignment::{Alignment, AlignmentMode, AlignmentOperation};
33use bio_types::genome;
34use bio_types::sequence::SequenceRead;
35use bio_types::sequence::SequenceReadPairOrientation;
36use bio_types::strand::ReqStrand;
37
38/// A macro creating methods for flag access.
39macro_rules! flag {
40    ($get:ident, $set:ident, $unset:ident, $bit:expr) => {
41        pub fn $get(&self) -> bool {
42            self.inner().core.flag & $bit != 0
43        }
44
45        pub fn $set(&mut self) {
46            self.inner_mut().core.flag |= $bit;
47        }
48
49        pub fn $unset(&mut self) {
50            self.inner_mut().core.flag &= !$bit;
51        }
52    };
53}
54
55/// A BAM record.
56pub struct Record {
57    pub inner: htslib::bam1_t,
58    own: bool,
59    cigar: Option<CigarStringView>,
60    header: Option<Rc<HeaderView>>,
61}
62
63unsafe impl Send for Record {}
64unsafe impl Sync for Record {}
65
66impl Clone for Record {
67    fn clone(&self) -> Self {
68        let mut copy = Record::new();
69        unsafe { htslib::bam_copy1(copy.inner_ptr_mut(), self.inner_ptr()) };
70        copy
71    }
72}
73
74impl PartialEq for Record {
75    fn eq(&self, other: &Record) -> bool {
76        self.tid() == other.tid()
77            && self.pos() == other.pos()
78            && self.bin() == other.bin()
79            && self.mapq() == other.mapq()
80            && self.flags() == other.flags()
81            && self.mtid() == other.mtid()
82            && self.mpos() == other.mpos()
83            && self.insert_size() == other.insert_size()
84            && self.data() == other.data()
85            && self.inner().core.l_extranul == other.inner().core.l_extranul
86    }
87}
88
89impl Eq for Record {}
90
91impl fmt::Debug for Record {
92    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
93        fmt.write_fmt(format_args!(
94            "Record(tid: {}, pos: {})",
95            self.tid(),
96            self.pos()
97        ))
98    }
99}
100
101impl Default for Record {
102    fn default() -> Self {
103        Self::new()
104    }
105}
106
107#[inline]
108fn extranul_from_qname(qname: &[u8]) -> usize {
109    let qlen = qname.len() + 1;
110    if qlen % 4 != 0 {
111        4 - qlen % 4
112    } else {
113        0
114    }
115}
116
117impl Record {
118    /// Create an empty BAM record.
119    pub fn new() -> Self {
120        let mut record = Record {
121            inner: unsafe { MaybeUninit::zeroed().assume_init() },
122            own: true,
123            cigar: None,
124            header: None,
125        };
126        // The read/query name needs to be set as empty to properly initialize
127        // the record
128        record.set_qname(b"");
129        // Developer note: these are needed so the returned record is properly
130        // initialized as unmapped.
131        record.set_unmapped();
132        record.set_tid(-1);
133        record.set_pos(-1);
134        record.set_mpos(-1);
135        record.set_mtid(-1);
136        record
137    }
138
139    pub fn from_inner(from: *mut htslib::bam1_t) -> Self {
140        Record {
141            inner: {
142                #[allow(clippy::uninit_assumed_init)]
143                let mut inner = unsafe { MaybeUninit::uninit().assume_init() };
144                unsafe {
145                    ::libc::memcpy(
146                        &mut inner as *mut htslib::bam1_t as *mut ::libc::c_void,
147                        from as *const ::libc::c_void,
148                        size_of::<htslib::bam1_t>(),
149                    );
150                }
151                inner
152            },
153            own: false,
154            cigar: None,
155            header: None,
156        }
157    }
158
159    // Create a BAM record from a line SAM text. SAM slice need not be 0-terminated.
160    pub fn from_sam(header_view: &HeaderView, sam: &[u8]) -> Result<Record> {
161        let mut record = Self::new();
162
163        let mut sam_copy = Vec::with_capacity(sam.len() + 1);
164        sam_copy.extend(sam);
165        sam_copy.push(0);
166
167        let mut sam_string = htslib::kstring_t {
168            s: sam_copy.as_ptr() as *mut c_char,
169            l: sam_copy.len(),
170            m: sam_copy.len(),
171        };
172
173        let succ = unsafe {
174            htslib::sam_parse1(
175                &mut sam_string,
176                header_view.inner_ptr() as *mut htslib::bam_hdr_t,
177                record.inner_ptr_mut(),
178            )
179        };
180
181        if succ == 0 {
182            Ok(record)
183        } else {
184            Err(Error::BamParseSAM {
185                rec: str::from_utf8(&sam_copy).unwrap().to_owned(),
186            })
187        }
188    }
189
190    pub fn set_header(&mut self, header: Rc<HeaderView>) {
191        self.header = Some(header);
192    }
193
194    pub(super) fn data(&self) -> &[u8] {
195        unsafe { slice::from_raw_parts(self.inner().data, self.inner().l_data as usize) }
196    }
197
198    #[inline]
199    pub fn inner_mut(&mut self) -> &mut htslib::bam1_t {
200        &mut self.inner
201    }
202
203    #[inline]
204    pub(super) fn inner_ptr_mut(&mut self) -> *mut htslib::bam1_t {
205        &mut self.inner as *mut htslib::bam1_t
206    }
207
208    #[inline]
209    pub fn inner(&self) -> &htslib::bam1_t {
210        &self.inner
211    }
212
213    #[inline]
214    pub(super) fn inner_ptr(&self) -> *const htslib::bam1_t {
215        &self.inner as *const htslib::bam1_t
216    }
217
218    /// Get target id.
219    pub fn tid(&self) -> i32 {
220        self.inner().core.tid
221    }
222
223    /// Set target id.
224    pub fn set_tid(&mut self, tid: i32) {
225        self.inner_mut().core.tid = tid;
226    }
227
228    /// Get position (0-based).
229    pub fn pos(&self) -> i64 {
230        self.inner().core.pos
231    }
232
233    /// Set position (0-based).
234    pub fn set_pos(&mut self, pos: i64) {
235        self.inner_mut().core.pos = pos;
236    }
237
238    pub fn bin(&self) -> u16 {
239        self.inner().core.bin
240    }
241
242    pub fn set_bin(&mut self, bin: u16) {
243        self.inner_mut().core.bin = bin;
244    }
245
246    /// Get MAPQ.
247    pub fn mapq(&self) -> u8 {
248        self.inner().core.qual
249    }
250
251    /// Set MAPQ.
252    pub fn set_mapq(&mut self, mapq: u8) {
253        self.inner_mut().core.qual = mapq;
254    }
255
256    /// Get strand information from record flags.
257    pub fn strand(&self) -> ReqStrand {
258        let reverse = self.flags() & 0x10 != 0;
259        if reverse {
260            ReqStrand::Reverse
261        } else {
262            ReqStrand::Forward
263        }
264    }
265
266    /// Get raw flags.
267    pub fn flags(&self) -> u16 {
268        self.inner().core.flag
269    }
270
271    /// Set raw flags.
272    pub fn set_flags(&mut self, flags: u16) {
273        self.inner_mut().core.flag = flags;
274    }
275
276    /// Unset all flags.
277    pub fn unset_flags(&mut self) {
278        self.inner_mut().core.flag = 0;
279    }
280
281    /// Get target id of mate.
282    pub fn mtid(&self) -> i32 {
283        self.inner().core.mtid
284    }
285
286    /// Set target id of mate.
287    pub fn set_mtid(&mut self, mtid: i32) {
288        self.inner_mut().core.mtid = mtid;
289    }
290
291    /// Get mate position.
292    pub fn mpos(&self) -> i64 {
293        self.inner().core.mpos
294    }
295
296    /// Set mate position.
297    pub fn set_mpos(&mut self, mpos: i64) {
298        self.inner_mut().core.mpos = mpos;
299    }
300
301    /// Get insert size.
302    pub fn insert_size(&self) -> i64 {
303        self.inner().core.isize_
304    }
305
306    /// Set insert size.
307    pub fn set_insert_size(&mut self, insert_size: i64) {
308        self.inner_mut().core.isize_ = insert_size;
309    }
310
311    fn qname_capacity(&self) -> usize {
312        self.inner().core.l_qname as usize
313    }
314
315    fn qname_len(&self) -> usize {
316        // discount all trailing zeros (the default one and extra nulls)
317        self.qname_capacity() - 1 - self.inner().core.l_extranul as usize
318    }
319
320    /// Get qname (read name). Complexity: O(1).
321    pub fn qname(&self) -> &[u8] {
322        &self.data()[..self.qname_len()]
323    }
324
325    /// Set the variable length data buffer
326    pub fn set_data(&mut self, new_data: &[u8]) {
327        self.cigar = None;
328
329        self.inner_mut().l_data = new_data.len() as i32;
330        if (self.inner().m_data as i32) < self.inner().l_data {
331            // Verbosity due to lexical borrowing
332            let l_data = self.inner().l_data;
333            self.realloc_var_data(l_data as usize);
334        }
335
336        // Copy new data into buffer
337        let data =
338            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
339        utils::copy_memory(new_data, data);
340    }
341
342    /// Set variable length data (qname, cigar, seq, qual).
343    /// The aux data is left unchanged.
344    /// `qual` is Phred-scaled quality values, without any offset.
345    /// NOTE: seq.len() must equal qual.len() or this method
346    /// will panic. If you don't have quality values use
347    /// `let quals = vec![ 255 as u8; seq.len()];` as a placeholder that will
348    /// be recognized as missing QVs by `samtools`.
349    pub fn set(&mut self, qname: &[u8], cigar: Option<&CigarString>, seq: &[u8], qual: &[u8]) {
350        assert!(qname.len() < 255);
351        assert_eq!(seq.len(), qual.len(), "seq.len() must equal qual.len()");
352
353        self.cigar = None;
354
355        let cigar_width = if let Some(cigar_string) = cigar {
356            cigar_string.len()
357        } else {
358            0
359        } * 4;
360        let q_len = qname.len() + 1;
361        let extranul = extranul_from_qname(qname);
362
363        let orig_aux_offset = self.qname_capacity()
364            + 4 * self.cigar_len()
365            + (self.seq_len() + 1) / 2
366            + self.seq_len();
367        let new_aux_offset = q_len + extranul + cigar_width + (seq.len() + 1) / 2 + qual.len();
368        assert!(orig_aux_offset <= self.inner.l_data as usize);
369        let aux_len = self.inner.l_data as usize - orig_aux_offset;
370        self.inner_mut().l_data = (new_aux_offset + aux_len) as i32;
371        if (self.inner().m_data as i32) < self.inner().l_data {
372            // Verbosity due to lexical borrowing
373            let l_data = self.inner().l_data;
374            self.realloc_var_data(l_data as usize);
375        }
376
377        // Copy the aux data.
378        if aux_len > 0 && orig_aux_offset != new_aux_offset {
379            let data =
380                unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().m_data as usize) };
381            data.copy_within(orig_aux_offset..orig_aux_offset + aux_len, new_aux_offset);
382        }
383
384        let data =
385            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
386
387        // qname
388        utils::copy_memory(qname, data);
389        for i in 0..=extranul {
390            data[qname.len() + i] = b'\0';
391        }
392        let mut i = q_len + extranul;
393        self.inner_mut().core.l_qname = i as u16;
394        self.inner_mut().core.l_extranul = extranul as u8;
395
396        // cigar
397        if let Some(cigar_string) = cigar {
398            let cigar_data = unsafe {
399                //cigar is always aligned to 4 bytes (see extranul above) - so this is safe
400                #[allow(clippy::cast_ptr_alignment)]
401                slice::from_raw_parts_mut(data[i..].as_ptr() as *mut u32, cigar_string.len())
402            };
403            for (i, c) in cigar_string.iter().enumerate() {
404                cigar_data[i] = c.encode();
405            }
406            self.inner_mut().core.n_cigar = cigar_string.len() as u32;
407            i += cigar_string.len() * 4;
408        } else {
409            self.inner_mut().core.n_cigar = 0;
410        };
411
412        // seq
413        {
414            for j in (0..seq.len()).step_by(2) {
415                data[i + j / 2] = ENCODE_BASE[seq[j] as usize] << 4
416                    | (if j + 1 < seq.len() {
417                        ENCODE_BASE[seq[j + 1] as usize]
418                    } else {
419                        0
420                    });
421            }
422            self.inner_mut().core.l_qseq = seq.len() as i32;
423            i += (seq.len() + 1) / 2;
424        }
425
426        // qual
427        utils::copy_memory(qual, &mut data[i..]);
428    }
429
430    /// Replace current qname with a new one.
431    pub fn set_qname(&mut self, new_qname: &[u8]) {
432        // 251 + 1NUL is the max 32-bit aligned value that fits in u8
433        assert!(new_qname.len() < 252);
434
435        let old_q_len = self.qname_capacity();
436        // We're going to add a terminal NUL
437        let extranul = extranul_from_qname(new_qname);
438        let new_q_len = new_qname.len() + 1 + extranul;
439
440        // Length of data after qname
441        let other_len = self.inner_mut().l_data - old_q_len as i32;
442
443        if new_q_len < old_q_len && self.inner().l_data > (old_q_len as i32) {
444            self.inner_mut().l_data -= (old_q_len - new_q_len) as i32;
445        } else if new_q_len > old_q_len {
446            self.inner_mut().l_data += (new_q_len - old_q_len) as i32;
447
448            // Reallocate if necessary
449            if (self.inner().m_data as i32) < self.inner().l_data {
450                // Verbosity due to lexical borrowing
451                let l_data = self.inner().l_data;
452                self.realloc_var_data(l_data as usize);
453            }
454        }
455
456        if new_q_len != old_q_len {
457            // Move other data to new location
458            unsafe {
459                let data = slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize);
460
461                ::libc::memmove(
462                    data.as_mut_ptr().add(new_q_len) as *mut ::libc::c_void,
463                    data.as_mut_ptr().add(old_q_len) as *mut ::libc::c_void,
464                    other_len as usize,
465                );
466            }
467        }
468
469        // Copy qname data
470        let data =
471            unsafe { slice::from_raw_parts_mut(self.inner.data, self.inner().l_data as usize) };
472        utils::copy_memory(new_qname, data);
473        for i in 0..=extranul {
474            data[new_q_len - i - 1] = b'\0';
475        }
476        self.inner_mut().core.l_qname = new_q_len as u16;
477        self.inner_mut().core.l_extranul = extranul as u8;
478    }
479
480    fn realloc_var_data(&mut self, new_len: usize) {
481        // pad request
482        let new_len = new_len as u32;
483        let new_request = new_len + 32 - (new_len % 32);
484
485        let ptr = unsafe {
486            ::libc::realloc(
487                self.inner().data as *mut ::libc::c_void,
488                new_request as usize,
489            ) as *mut u8
490        };
491
492        if ptr.is_null() {
493            panic!("ran out of memory in rust_htslib trying to realloc");
494        }
495
496        // don't update m_data until we know we have
497        // a successful allocation.
498        self.inner_mut().m_data = new_request;
499        self.inner_mut().data = ptr;
500
501        // we now own inner.data
502        self.own = true;
503    }
504
505    pub fn cigar_len(&self) -> usize {
506        self.inner().core.n_cigar as usize
507    }
508
509    /// Get reference to raw cigar string representation (as stored in BAM file).
510    /// Usually, the method `Record::cigar` should be used instead.
511    pub fn raw_cigar(&self) -> &[u32] {
512        //cigar is always aligned to 4 bytes - so this is safe
513        #[allow(clippy::cast_ptr_alignment)]
514        unsafe {
515            slice::from_raw_parts(
516                self.data()[self.qname_capacity()..].as_ptr() as *const u32,
517                self.cigar_len(),
518            )
519        }
520    }
521
522    /// Return unpacked cigar string. This will create a fresh copy the Cigar data.
523    pub fn cigar(&self) -> CigarStringView {
524        match self.cigar {
525            Some(ref c) => c.clone(),
526            None => self.unpack_cigar(),
527        }
528    }
529
530    // Return unpacked cigar string. This returns None unless you have first called `bam::Record::cache_cigar`.
531    pub fn cigar_cached(&self) -> Option<&CigarStringView> {
532        self.cigar.as_ref()
533    }
534
535    /// Decode the cigar string and cache it inside the `Record`
536    pub fn cache_cigar(&mut self) {
537        self.cigar = Some(self.unpack_cigar())
538    }
539
540    /// Unpack cigar string. Complexity: O(k) with k being the length of the cigar string.
541    fn unpack_cigar(&self) -> CigarStringView {
542        CigarString(
543            self.raw_cigar()
544                .iter()
545                .map(|&c| {
546                    let len = c >> 4;
547                    match c & 0b1111 {
548                        0 => Cigar::Match(len),
549                        1 => Cigar::Ins(len),
550                        2 => Cigar::Del(len),
551                        3 => Cigar::RefSkip(len),
552                        4 => Cigar::SoftClip(len),
553                        5 => Cigar::HardClip(len),
554                        6 => Cigar::Pad(len),
555                        7 => Cigar::Equal(len),
556                        8 => Cigar::Diff(len),
557                        _ => panic!("Unexpected cigar operation"),
558                    }
559                })
560                .collect(),
561        )
562        .into_view(self.pos())
563    }
564
565    pub fn seq_len(&self) -> usize {
566        self.inner().core.l_qseq as usize
567    }
568
569    fn seq_data(&self) -> &[u8] {
570        let offset = self.qname_capacity() + self.cigar_len() * 4;
571        &self.data()[offset..][..(self.seq_len() + 1) / 2]
572    }
573
574    /// Get read sequence. Complexity: O(1).
575    pub fn seq(&self) -> Seq<'_> {
576        Seq {
577            encoded: self.seq_data(),
578            len: self.seq_len(),
579        }
580    }
581
582    /// Get base qualities (PHRED-scaled probability that base is wrong).
583    /// This does not entail any offsets, hence the qualities can be used directly without
584    /// e.g. subtracting 33. Complexity: O(1).
585    pub fn qual(&self) -> &[u8] {
586        &self.data()[self.qname_capacity() + self.cigar_len() * 4 + (self.seq_len() + 1) / 2..]
587            [..self.seq_len()]
588    }
589
590    /// Look up an auxiliary field by its tag.
591    ///
592    /// Only the first two bytes of a given tag are used for the look-up of a field.
593    /// See [`Aux`] for more details.
594    pub fn aux(&self, tag: &[u8]) -> Result<Aux<'_>> {
595        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
596        let aux = unsafe {
597            htslib::bam_aux_get(
598                &self.inner as *const htslib::bam1_t,
599                c_str.as_ptr() as *mut c_char,
600            )
601        };
602        unsafe { Self::read_aux_field(aux).map(|(aux_field, _length)| aux_field) }
603    }
604
605    unsafe fn read_aux_field<'a>(aux: *const u8) -> Result<(Aux<'a>, usize)> {
606        const TAG_LEN: isize = 2;
607        // Used for skipping type identifier
608        const TYPE_ID_LEN: isize = 1;
609
610        if aux.is_null() {
611            return Err(Error::BamAuxTagNotFound);
612        }
613
614        let (data, type_size) = match *aux {
615            b'A' => {
616                let type_size = size_of::<u8>();
617                (Aux::Char(*aux.offset(TYPE_ID_LEN)), type_size)
618            }
619            b'c' => {
620                let type_size = size_of::<i8>();
621                (Aux::I8(*aux.offset(TYPE_ID_LEN).cast::<i8>()), type_size)
622            }
623            b'C' => {
624                let type_size = size_of::<u8>();
625                (Aux::U8(*aux.offset(TYPE_ID_LEN)), type_size)
626            }
627            b's' => {
628                let type_size = size_of::<i16>();
629                (
630                    Aux::I16(
631                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
632                            .read_i16::<LittleEndian>()
633                            .map_err(|_| Error::BamAuxParsingError)?,
634                    ),
635                    type_size,
636                )
637            }
638            b'S' => {
639                let type_size = size_of::<u16>();
640                (
641                    Aux::U16(
642                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
643                            .read_u16::<LittleEndian>()
644                            .map_err(|_| Error::BamAuxParsingError)?,
645                    ),
646                    type_size,
647                )
648            }
649            b'i' => {
650                let type_size = size_of::<i32>();
651                (
652                    Aux::I32(
653                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
654                            .read_i32::<LittleEndian>()
655                            .map_err(|_| Error::BamAuxParsingError)?,
656                    ),
657                    type_size,
658                )
659            }
660            b'I' => {
661                let type_size = size_of::<u32>();
662                (
663                    Aux::U32(
664                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
665                            .read_u32::<LittleEndian>()
666                            .map_err(|_| Error::BamAuxParsingError)?,
667                    ),
668                    type_size,
669                )
670            }
671            b'f' => {
672                let type_size = size_of::<f32>();
673                (
674                    Aux::Float(
675                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
676                            .read_f32::<LittleEndian>()
677                            .map_err(|_| Error::BamAuxParsingError)?,
678                    ),
679                    type_size,
680                )
681            }
682            b'd' => {
683                let type_size = size_of::<f64>();
684                (
685                    Aux::Double(
686                        slice::from_raw_parts(aux.offset(TYPE_ID_LEN), type_size)
687                            .read_f64::<LittleEndian>()
688                            .map_err(|_| Error::BamAuxParsingError)?,
689                    ),
690                    type_size,
691                )
692            }
693            b'Z' | b'H' => {
694                let c_str = ffi::CStr::from_ptr(aux.offset(TYPE_ID_LEN).cast::<c_char>());
695                let rust_str = c_str.to_str().map_err(|_| Error::BamAuxParsingError)?;
696                (Aux::String(rust_str), c_str.to_bytes_with_nul().len())
697            }
698            b'B' => {
699                const ARRAY_INNER_TYPE_LEN: isize = 1;
700                const ARRAY_COUNT_LEN: isize = 4;
701
702                // Used for skipping metadata
703                let array_data_offset = TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN + ARRAY_COUNT_LEN;
704
705                let length =
706                    slice::from_raw_parts(aux.offset(TYPE_ID_LEN + ARRAY_INNER_TYPE_LEN), 4)
707                        .read_u32::<LittleEndian>()
708                        .map_err(|_| Error::BamAuxParsingError)? as usize;
709
710                // Return tuples of an `Aux` enum and the length of data + metadata in bytes
711                let (array_data, array_size) = match *aux.offset(TYPE_ID_LEN) {
712                    b'c' => (
713                        Aux::ArrayI8(AuxArray::<'a, i8>::from_bytes(slice::from_raw_parts(
714                            aux.offset(array_data_offset),
715                            length,
716                        ))),
717                        length,
718                    ),
719                    b'C' => (
720                        Aux::ArrayU8(AuxArray::<'a, u8>::from_bytes(slice::from_raw_parts(
721                            aux.offset(array_data_offset),
722                            length,
723                        ))),
724                        length,
725                    ),
726                    b's' => (
727                        Aux::ArrayI16(AuxArray::<'a, i16>::from_bytes(slice::from_raw_parts(
728                            aux.offset(array_data_offset),
729                            length * size_of::<i16>(),
730                        ))),
731                        length * std::mem::size_of::<i16>(),
732                    ),
733                    b'S' => (
734                        Aux::ArrayU16(AuxArray::<'a, u16>::from_bytes(slice::from_raw_parts(
735                            aux.offset(array_data_offset),
736                            length * size_of::<u16>(),
737                        ))),
738                        length * std::mem::size_of::<u16>(),
739                    ),
740                    b'i' => (
741                        Aux::ArrayI32(AuxArray::<'a, i32>::from_bytes(slice::from_raw_parts(
742                            aux.offset(array_data_offset),
743                            length * size_of::<i32>(),
744                        ))),
745                        length * std::mem::size_of::<i32>(),
746                    ),
747                    b'I' => (
748                        Aux::ArrayU32(AuxArray::<'a, u32>::from_bytes(slice::from_raw_parts(
749                            aux.offset(array_data_offset),
750                            length * size_of::<u32>(),
751                        ))),
752                        length * std::mem::size_of::<u32>(),
753                    ),
754                    b'f' => (
755                        Aux::ArrayFloat(AuxArray::<f32>::from_bytes(slice::from_raw_parts(
756                            aux.offset(array_data_offset),
757                            length * size_of::<f32>(),
758                        ))),
759                        length * std::mem::size_of::<f32>(),
760                    ),
761                    _ => {
762                        return Err(Error::BamAuxUnknownType);
763                    }
764                };
765                (
766                    array_data,
767                    // Offset: array-specific metadata + array size
768                    ARRAY_INNER_TYPE_LEN as usize + ARRAY_COUNT_LEN as usize + array_size,
769                )
770            }
771            _ => {
772                return Err(Error::BamAuxUnknownType);
773            }
774        };
775
776        // Offset: metadata + type size
777        Ok((data, TAG_LEN as usize + TYPE_ID_LEN as usize + type_size))
778    }
779
780    /// Returns an iterator over the auxiliary fields of the record.
781    ///
782    /// When an error occurs, the `Err` variant will be returned
783    /// and the iterator will not be able to advance anymore.
784    pub fn aux_iter(&self) -> AuxIter {
785        AuxIter {
786            // In order to get to the aux data section of a `bam::Record`
787            // we need to skip fields in front of it
788            aux: &self.data()[
789                // NUL terminated read name:
790                self.qname_capacity()
791                // CIGAR (uint32_t):
792                + self.cigar_len() * std::mem::size_of::<u32>()
793                // Read sequence (4-bit encoded):
794                + (self.seq_len() + 1) / 2
795                // Base qualities (char):
796                + self.seq_len()..],
797        }
798    }
799
800    /// Add auxiliary data.
801    pub fn push_aux(&mut self, tag: &[u8], value: Aux<'_>) -> Result<()> {
802        // Don't allow pushing aux data when the given tag is already present in the record.
803        // `htslib` seems to allow this (for non-array values), which can lead to problems
804        // since retrieving aux fields consumes &[u8; 2] and yields one field only.
805        if self.aux(tag).is_ok() {
806            return Err(Error::BamAuxTagAlreadyPresent);
807        }
808
809        let ctag = tag.as_ptr() as *mut c_char;
810        let ret = unsafe {
811            match value {
812                Aux::Char(v) => htslib::bam_aux_append(
813                    self.inner_ptr_mut(),
814                    ctag,
815                    b'A' as c_char,
816                    size_of::<u8>() as i32,
817                    [v].as_mut_ptr() as *mut u8,
818                ),
819                Aux::I8(v) => htslib::bam_aux_append(
820                    self.inner_ptr_mut(),
821                    ctag,
822                    b'c' as c_char,
823                    size_of::<i8>() as i32,
824                    [v].as_mut_ptr() as *mut u8,
825                ),
826                Aux::U8(v) => htslib::bam_aux_append(
827                    self.inner_ptr_mut(),
828                    ctag,
829                    b'C' as c_char,
830                    size_of::<u8>() as i32,
831                    [v].as_mut_ptr() as *mut u8,
832                ),
833                Aux::I16(v) => htslib::bam_aux_append(
834                    self.inner_ptr_mut(),
835                    ctag,
836                    b's' as c_char,
837                    size_of::<i16>() as i32,
838                    [v].as_mut_ptr() as *mut u8,
839                ),
840                Aux::U16(v) => htslib::bam_aux_append(
841                    self.inner_ptr_mut(),
842                    ctag,
843                    b'S' as c_char,
844                    size_of::<u16>() as i32,
845                    [v].as_mut_ptr() as *mut u8,
846                ),
847                Aux::I32(v) => htslib::bam_aux_append(
848                    self.inner_ptr_mut(),
849                    ctag,
850                    b'i' as c_char,
851                    size_of::<i32>() as i32,
852                    [v].as_mut_ptr() as *mut u8,
853                ),
854                Aux::U32(v) => htslib::bam_aux_append(
855                    self.inner_ptr_mut(),
856                    ctag,
857                    b'I' as c_char,
858                    size_of::<u32>() as i32,
859                    [v].as_mut_ptr() as *mut u8,
860                ),
861                Aux::Float(v) => htslib::bam_aux_append(
862                    self.inner_ptr_mut(),
863                    ctag,
864                    b'f' as c_char,
865                    size_of::<f32>() as i32,
866                    [v].as_mut_ptr() as *mut u8,
867                ),
868                // Not part of specs but implemented in `htslib`:
869                Aux::Double(v) => htslib::bam_aux_append(
870                    self.inner_ptr_mut(),
871                    ctag,
872                    b'd' as c_char,
873                    size_of::<f64>() as i32,
874                    [v].as_mut_ptr() as *mut u8,
875                ),
876                Aux::String(v) => {
877                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
878                    htslib::bam_aux_append(
879                        self.inner_ptr_mut(),
880                        ctag,
881                        b'Z' as c_char,
882                        (v.len() + 1) as i32,
883                        c_str.as_ptr() as *mut u8,
884                    )
885                }
886                Aux::HexByteArray(v) => {
887                    let c_str = ffi::CString::new(v).map_err(|_| Error::BamAuxStringError)?;
888                    htslib::bam_aux_append(
889                        self.inner_ptr_mut(),
890                        ctag,
891                        b'H' as c_char,
892                        (v.len() + 1) as i32,
893                        c_str.as_ptr() as *mut u8,
894                    )
895                }
896                // Not sure it's safe to cast an immutable slice to a mutable pointer in the following branches
897                Aux::ArrayI8(aux_array) => match aux_array {
898                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
899                        self.inner_ptr_mut(),
900                        ctag,
901                        b'c',
902                        inner.len() as u32,
903                        inner.slice.as_ptr() as *mut ::libc::c_void,
904                    ),
905                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
906                        self.inner_ptr_mut(),
907                        ctag,
908                        b'c',
909                        inner.len() as u32,
910                        inner.slice.as_ptr() as *mut ::libc::c_void,
911                    ),
912                },
913                Aux::ArrayU8(aux_array) => match aux_array {
914                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
915                        self.inner_ptr_mut(),
916                        ctag,
917                        b'C',
918                        inner.len() as u32,
919                        inner.slice.as_ptr() as *mut ::libc::c_void,
920                    ),
921                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
922                        self.inner_ptr_mut(),
923                        ctag,
924                        b'C',
925                        inner.len() as u32,
926                        inner.slice.as_ptr() as *mut ::libc::c_void,
927                    ),
928                },
929                Aux::ArrayI16(aux_array) => match aux_array {
930                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
931                        self.inner_ptr_mut(),
932                        ctag,
933                        b's',
934                        inner.len() as u32,
935                        inner.slice.as_ptr() as *mut ::libc::c_void,
936                    ),
937                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
938                        self.inner_ptr_mut(),
939                        ctag,
940                        b's',
941                        inner.len() as u32,
942                        inner.slice.as_ptr() as *mut ::libc::c_void,
943                    ),
944                },
945                Aux::ArrayU16(aux_array) => match aux_array {
946                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
947                        self.inner_ptr_mut(),
948                        ctag,
949                        b'S',
950                        inner.len() as u32,
951                        inner.slice.as_ptr() as *mut ::libc::c_void,
952                    ),
953                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
954                        self.inner_ptr_mut(),
955                        ctag,
956                        b'S',
957                        inner.len() as u32,
958                        inner.slice.as_ptr() as *mut ::libc::c_void,
959                    ),
960                },
961                Aux::ArrayI32(aux_array) => match aux_array {
962                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
963                        self.inner_ptr_mut(),
964                        ctag,
965                        b'i',
966                        inner.len() as u32,
967                        inner.slice.as_ptr() as *mut ::libc::c_void,
968                    ),
969                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
970                        self.inner_ptr_mut(),
971                        ctag,
972                        b'i',
973                        inner.len() as u32,
974                        inner.slice.as_ptr() as *mut ::libc::c_void,
975                    ),
976                },
977                Aux::ArrayU32(aux_array) => match aux_array {
978                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
979                        self.inner_ptr_mut(),
980                        ctag,
981                        b'I',
982                        inner.len() as u32,
983                        inner.slice.as_ptr() as *mut ::libc::c_void,
984                    ),
985                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
986                        self.inner_ptr_mut(),
987                        ctag,
988                        b'I',
989                        inner.len() as u32,
990                        inner.slice.as_ptr() as *mut ::libc::c_void,
991                    ),
992                },
993                Aux::ArrayFloat(aux_array) => match aux_array {
994                    AuxArray::TargetType(inner) => htslib::bam_aux_update_array(
995                        self.inner_ptr_mut(),
996                        ctag,
997                        b'f',
998                        inner.len() as u32,
999                        inner.slice.as_ptr() as *mut ::libc::c_void,
1000                    ),
1001                    AuxArray::RawLeBytes(inner) => htslib::bam_aux_update_array(
1002                        self.inner_ptr_mut(),
1003                        ctag,
1004                        b'f',
1005                        inner.len() as u32,
1006                        inner.slice.as_ptr() as *mut ::libc::c_void,
1007                    ),
1008                },
1009            }
1010        };
1011
1012        if ret < 0 {
1013            Err(Error::BamAux)
1014        } else {
1015            Ok(())
1016        }
1017    }
1018
1019    // Delete auxiliary tag.
1020    pub fn remove_aux(&mut self, tag: &[u8]) -> Result<()> {
1021        let c_str = ffi::CString::new(tag).map_err(|_| Error::BamAuxStringError)?;
1022        let aux = unsafe {
1023            htslib::bam_aux_get(
1024                &self.inner as *const htslib::bam1_t,
1025                c_str.as_ptr() as *mut c_char,
1026            )
1027        };
1028        unsafe {
1029            if aux.is_null() {
1030                Err(Error::BamAuxTagNotFound)
1031            } else {
1032                htslib::bam_aux_del(self.inner_ptr_mut(), aux);
1033                Ok(())
1034            }
1035        }
1036    }
1037
1038    /// Access the base modifications associated with this Record through the MM tag.
1039    /// Example:
1040    /// ```
1041    ///    use extended_htslib::bam::{Read, Reader, Record};
1042    ///    let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
1043    ///    let mut mod_count = 0;
1044    ///    for r in bam.records() {
1045    ///        let record = r.unwrap();
1046    ///        if let Ok(mods) = record.basemods_iter() {
1047    ///            // print metadata for the modifications present in this record
1048    ///            for mod_code in mods.recorded() {
1049    ///                if let Ok(mod_metadata) = mods.query_type(*mod_code) {
1050    ///                    println!("mod found with code {}/{} flags: [{} {} {}]",
1051    ///                              mod_code, *mod_code as u8 as char,
1052    ///                              mod_metadata.strand, mod_metadata.implicit, mod_metadata.canonical as u8 as char);
1053    ///                }
1054    ///            }
1055    ///
1056    ///            // iterate over the modifications in this record
1057    ///            // the modifications are returned as a tuple with the
1058    ///            // position within SEQ and an hts_base_mod struct
1059    ///            for res in mods {
1060    ///                if let Ok( (position, m) ) = res {
1061    ///                    println!("{} {},{}", position, m.modified_base as u8 as char, m.qual);
1062    ///                    mod_count += 1;
1063    ///                }
1064    ///            }
1065    ///        };
1066    ///    }
1067    ///    assert_eq!(mod_count, 14);
1068    /// ```
1069    pub fn basemods_iter(&self) -> Result<BaseModificationsIter> {
1070        BaseModificationsIter::new(self)
1071    }
1072
1073    /// An iterator that returns all of the modifications for each position as a vector.
1074    /// This is useful for the case where multiple possible modifications can be annotated
1075    /// at a single position (for example a C could be 5-mC or 5-hmC)
1076    pub fn basemods_position_iter(&self) -> Result<BaseModificationsPositionIter> {
1077        BaseModificationsPositionIter::new(self)
1078    }
1079
1080    /// Infer read pair orientation from record. Returns `SequenceReadPairOrientation::None` if record
1081    /// is not paired, mates are not mapping to the same contig, or mates start at the
1082    /// same position.
1083    pub fn read_pair_orientation(&self) -> SequenceReadPairOrientation {
1084        if self.is_paired()
1085            && !self.is_unmapped()
1086            && !self.is_mate_unmapped()
1087            && self.tid() == self.mtid()
1088        {
1089            if self.pos() == self.mpos() {
1090                // both reads start at the same position, we cannot decide on the orientation.
1091                return SequenceReadPairOrientation::None;
1092            }
1093
1094            let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() {
1095                (
1096                    self.pos(),
1097                    self.mpos(),
1098                    !self.is_reverse(),
1099                    !self.is_mate_reverse(),
1100                )
1101            } else {
1102                (
1103                    self.mpos(),
1104                    self.pos(),
1105                    !self.is_mate_reverse(),
1106                    !self.is_reverse(),
1107                )
1108            };
1109
1110            if pos_1 < pos_2 {
1111                match (fwd_1, fwd_2) {
1112                    (true, true) => SequenceReadPairOrientation::F1F2,
1113                    (true, false) => SequenceReadPairOrientation::F1R2,
1114                    (false, true) => SequenceReadPairOrientation::R1F2,
1115                    (false, false) => SequenceReadPairOrientation::R1R2,
1116                }
1117            } else {
1118                match (fwd_2, fwd_1) {
1119                    (true, true) => SequenceReadPairOrientation::F2F1,
1120                    (true, false) => SequenceReadPairOrientation::F2R1,
1121                    (false, true) => SequenceReadPairOrientation::R2F1,
1122                    (false, false) => SequenceReadPairOrientation::R2R1,
1123                }
1124            }
1125        } else {
1126            SequenceReadPairOrientation::None
1127        }
1128    }
1129
1130    flag!(is_paired, set_paired, unset_paired, 1u16);
1131    flag!(is_proper_pair, set_proper_pair, unset_proper_pair, 2u16);
1132    flag!(is_unmapped, set_unmapped, unset_unmapped, 4u16);
1133    flag!(
1134        is_mate_unmapped,
1135        set_mate_unmapped,
1136        unset_mate_unmapped,
1137        8u16
1138    );
1139    flag!(is_reverse, set_reverse, unset_reverse, 16u16);
1140    flag!(is_mate_reverse, set_mate_reverse, unset_mate_reverse, 32u16);
1141    flag!(
1142        is_first_in_template,
1143        set_first_in_template,
1144        unset_first_in_template,
1145        64u16
1146    );
1147    flag!(
1148        is_last_in_template,
1149        set_last_in_template,
1150        unset_last_in_template,
1151        128u16
1152    );
1153    flag!(is_secondary, set_secondary, unset_secondary, 256u16);
1154    flag!(
1155        is_quality_check_failed,
1156        set_quality_check_failed,
1157        unset_quality_check_failed,
1158        512u16
1159    );
1160    flag!(is_duplicate, set_duplicate, unset_duplicate, 1024u16);
1161    flag!(
1162        is_supplementary,
1163        set_supplementary,
1164        unset_supplementary,
1165        2048u16
1166    );
1167}
1168
1169impl Drop for Record {
1170    fn drop(&mut self) {
1171        if self.own {
1172            unsafe { ::libc::free(self.inner.data as *mut ::libc::c_void) }
1173        }
1174    }
1175}
1176
1177impl SequenceRead for Record {
1178    fn name(&self) -> &[u8] {
1179        self.qname()
1180    }
1181
1182    fn base(&self, i: usize) -> u8 {
1183        *decode_base_unchecked(encoded_base(self.seq_data(), i))
1184    }
1185
1186    fn base_qual(&self, i: usize) -> u8 {
1187        self.qual()[i]
1188    }
1189
1190    fn len(&self) -> usize {
1191        self.seq_len()
1192    }
1193
1194    fn is_empty(&self) -> bool {
1195        self.len() == 0
1196    }
1197}
1198
1199impl genome::AbstractInterval for Record {
1200    /// Return contig name. Panics if record does not know its header (which happens if it has not been read from a file).
1201    fn contig(&self) -> &str {
1202        let tid = self.tid();
1203        if tid < 0 {
1204            panic!("invalid tid, must be at least zero");
1205        }
1206        str::from_utf8(
1207            self.header
1208                .as_ref()
1209                .expect(
1210                    "header must be set (this is the case if the record has been read from a file)",
1211                )
1212                .tid2name(tid as u32),
1213        )
1214        .expect("unable to interpret contig name as UTF-8")
1215    }
1216
1217    /// Return genomic range covered by alignment. Panics if `Record::cache_cigar()` has not been called first or `Record::pos()` is less than zero.
1218    fn range(&self) -> ops::Range<genome::Position> {
1219        let end_pos = self
1220            .cigar_cached()
1221            .expect("cigar has not been cached yet, call cache_cigar() first")
1222            .end_pos() as u64;
1223
1224        if self.pos() < 0 {
1225            panic!("invalid position, must be positive")
1226        }
1227
1228        self.pos() as u64..end_pos
1229    }
1230}
1231
1232/// Auxiliary record data
1233///
1234/// The specification allows a wide range of types to be stored as an auxiliary data field of a BAM record.
1235///
1236/// Please note that the [`Aux::Double`] variant is _not_ part of the specification, but it is supported by `htslib`.
1237///
1238/// # Examples
1239///
1240/// ```
1241/// use extended_htslib::{
1242///     bam,
1243///     bam::record::{Aux, AuxArray},
1244///     errors::Error,
1245/// };
1246///
1247/// //Set up BAM record
1248/// let bam_header = bam::Header::new();
1249/// let mut record = bam::Record::from_sam(
1250///     &mut bam::HeaderView::from_header(&bam_header),
1251///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1252/// )
1253/// .unwrap();
1254///
1255/// // Add an integer field
1256/// let aux_integer_field = Aux::I32(1234);
1257/// record.push_aux(b"XI", aux_integer_field).unwrap();
1258///
1259/// match record.aux(b"XI") {
1260///     Ok(value) => {
1261///         // Typically, callers expect an aux field to be of a certain type.
1262///         // If that's not the case, the value can be `match`ed exhaustively.
1263///         if let Aux::I32(v) = value {
1264///             assert_eq!(v, 1234);
1265///         }
1266///     }
1267///     Err(e) => {
1268///         panic!("Error reading aux field: {}", e);
1269///     }
1270/// }
1271///
1272/// // Add an array field
1273/// let array_like_data = vec![0.4, 0.3, 0.2, 0.1];
1274/// let slice_of_data = &array_like_data;
1275/// let aux_array: AuxArray<f32> = slice_of_data.into();
1276/// let aux_array_field = Aux::ArrayFloat(aux_array);
1277/// record.push_aux(b"XA", aux_array_field).unwrap();
1278///
1279/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1280///     let read_array = array.iter().collect::<Vec<_>>();
1281///     assert_eq!(read_array, array_like_data);
1282/// } else {
1283///     panic!("Could not read array data");
1284/// }
1285/// ```
1286#[derive(Debug, PartialEq)]
1287pub enum Aux<'a> {
1288    Char(u8),
1289    I8(i8),
1290    U8(u8),
1291    I16(i16),
1292    U16(u16),
1293    I32(i32),
1294    U32(u32),
1295    Float(f32),
1296    Double(f64), // Not part of specs but implemented in `htslib`
1297    String(&'a str),
1298    HexByteArray(&'a str),
1299    ArrayI8(AuxArray<'a, i8>),
1300    ArrayU8(AuxArray<'a, u8>),
1301    ArrayI16(AuxArray<'a, i16>),
1302    ArrayU16(AuxArray<'a, u16>),
1303    ArrayI32(AuxArray<'a, i32>),
1304    ArrayU32(AuxArray<'a, u32>),
1305    ArrayFloat(AuxArray<'a, f32>),
1306}
1307
1308unsafe impl<'a> Send for Aux<'a> {}
1309unsafe impl<'a> Sync for Aux<'a> {}
1310
1311/// Types that can be used in aux arrays.
1312pub trait AuxArrayElement: Copy {
1313    fn from_le_bytes(bytes: &[u8]) -> Option<Self>;
1314}
1315
1316impl AuxArrayElement for i8 {
1317    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1318        std::io::Cursor::new(bytes).read_i8().ok()
1319    }
1320}
1321impl AuxArrayElement for u8 {
1322    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1323        std::io::Cursor::new(bytes).read_u8().ok()
1324    }
1325}
1326impl AuxArrayElement for i16 {
1327    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1328        std::io::Cursor::new(bytes).read_i16::<LittleEndian>().ok()
1329    }
1330}
1331impl AuxArrayElement for u16 {
1332    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1333        std::io::Cursor::new(bytes).read_u16::<LittleEndian>().ok()
1334    }
1335}
1336impl AuxArrayElement for i32 {
1337    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1338        std::io::Cursor::new(bytes).read_i32::<LittleEndian>().ok()
1339    }
1340}
1341impl AuxArrayElement for u32 {
1342    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1343        std::io::Cursor::new(bytes).read_u32::<LittleEndian>().ok()
1344    }
1345}
1346impl AuxArrayElement for f32 {
1347    fn from_le_bytes(bytes: &[u8]) -> Option<Self> {
1348        std::io::Cursor::new(bytes).read_f32::<LittleEndian>().ok()
1349    }
1350}
1351
1352/// Provides access to aux arrays.
1353///
1354/// Provides methods to either retrieve single elements or an iterator over the
1355/// array.
1356///
1357/// This type is used for wrapping both, array data that was read from a
1358/// BAM record and slices of data that are going to be stored in one.
1359///
1360/// In order to be able to add an `AuxArray` field to a BAM record, `AuxArray`s
1361/// can be constructed via the `From` trait which is implemented for all
1362/// supported types (see [`AuxArrayElement`] for a list).
1363///
1364/// # Examples
1365///
1366/// ```
1367/// use extended_htslib::{
1368///     bam,
1369///     bam::record::{Aux, AuxArray},
1370/// };
1371///
1372/// //Set up BAM record
1373/// let bam_header = bam::Header::new();
1374/// let mut record = bam::Record::from_sam(
1375///     &mut bam::HeaderView::from_header(&bam_header),
1376///     "ali1\t4\t*\t0\t0\t*\t*\t0\t0\tACGT\tFFFF".as_bytes(),
1377/// ).unwrap();
1378///
1379/// let data = vec![0.4, 0.3, 0.2, 0.1];
1380/// let slice_of_data = &data;
1381/// let aux_array: AuxArray<f32> = slice_of_data.into();
1382/// let aux_field = Aux::ArrayFloat(aux_array);
1383/// record.push_aux(b"XA", aux_field);
1384///
1385/// if let Ok(Aux::ArrayFloat(array)) = record.aux(b"XA") {
1386///     // Retrieve the second element from the array
1387///     assert_eq!(array.get(1).unwrap(), 0.3);
1388///     // Iterate over the array and collect it into a `Vec`
1389///     let read_array = array.iter().collect::<Vec<_>>();
1390///     assert_eq!(read_array, data);
1391/// } else {
1392///     panic!("Could not read array data");
1393/// }
1394/// ```
1395#[derive(Debug)]
1396pub enum AuxArray<'a, T> {
1397    TargetType(AuxArrayTargetType<'a, T>),
1398    RawLeBytes(AuxArrayRawLeBytes<'a, T>),
1399}
1400
1401impl<T> PartialEq<AuxArray<'_, T>> for AuxArray<'_, T>
1402where
1403    T: AuxArrayElement + PartialEq,
1404{
1405    fn eq(&self, other: &AuxArray<'_, T>) -> bool {
1406        use AuxArray::*;
1407        match (self, other) {
1408            (TargetType(v), TargetType(v_other)) => v == v_other,
1409            (RawLeBytes(v), RawLeBytes(v_other)) => v == v_other,
1410            (TargetType(_), RawLeBytes(_)) => self.iter().eq(other.iter()),
1411            (RawLeBytes(_), TargetType(_)) => self.iter().eq(other.iter()),
1412        }
1413    }
1414}
1415
1416/// Create AuxArrays from slices of allowed target types.
1417impl<'a, I, T> From<&'a T> for AuxArray<'a, I>
1418where
1419    I: AuxArrayElement,
1420    T: AsRef<[I]> + ?Sized,
1421{
1422    fn from(src: &'a T) -> Self {
1423        AuxArray::TargetType(AuxArrayTargetType {
1424            slice: src.as_ref(),
1425        })
1426    }
1427}
1428
1429impl<'a, T> AuxArray<'a, T>
1430where
1431    T: AuxArrayElement,
1432{
1433    /// Returns the element at a position or None if out of bounds.
1434    pub fn get(&self, index: usize) -> Option<T> {
1435        match self {
1436            AuxArray::TargetType(v) => v.get(index),
1437            AuxArray::RawLeBytes(v) => v.get(index),
1438        }
1439    }
1440
1441    /// Returns the number of elements in the array.
1442    pub fn len(&self) -> usize {
1443        match self {
1444            AuxArray::TargetType(a) => a.len(),
1445            AuxArray::RawLeBytes(a) => a.len(),
1446        }
1447    }
1448
1449    /// Returns true if the array contains no elements.
1450    pub fn is_empty(&self) -> bool {
1451        self.len() == 0
1452    }
1453
1454    /// Returns an iterator over the array.
1455    pub fn iter(&self) -> AuxArrayIter<T> {
1456        AuxArrayIter {
1457            index: 0,
1458            array: self,
1459        }
1460    }
1461
1462    /// Create AuxArrays from raw byte slices borrowed from `bam::Record`.
1463    fn from_bytes(bytes: &'a [u8]) -> Self {
1464        Self::RawLeBytes(AuxArrayRawLeBytes {
1465            slice: bytes,
1466            phantom_data: PhantomData,
1467        })
1468    }
1469}
1470
1471/// Encapsulates slice of target type.
1472#[doc(hidden)]
1473#[derive(Debug, PartialEq)]
1474pub struct AuxArrayTargetType<'a, T> {
1475    slice: &'a [T],
1476}
1477
1478impl<'a, T> AuxArrayTargetType<'a, T>
1479where
1480    T: AuxArrayElement,
1481{
1482    fn get(&self, index: usize) -> Option<T> {
1483        self.slice.get(index).copied()
1484    }
1485
1486    fn len(&self) -> usize {
1487        self.slice.len()
1488    }
1489}
1490
1491/// Encapsulates slice of raw bytes to prevent it from being accidentally accessed.
1492#[doc(hidden)]
1493#[derive(Debug, PartialEq)]
1494pub struct AuxArrayRawLeBytes<'a, T> {
1495    slice: &'a [u8],
1496    phantom_data: PhantomData<T>,
1497}
1498
1499impl<'a, T> AuxArrayRawLeBytes<'a, T>
1500where
1501    T: AuxArrayElement,
1502{
1503    fn get(&self, index: usize) -> Option<T> {
1504        let type_size = std::mem::size_of::<T>();
1505        if index * type_size + type_size > self.slice.len() {
1506            return None;
1507        }
1508        T::from_le_bytes(&self.slice[index * type_size..][..type_size])
1509    }
1510
1511    fn len(&self) -> usize {
1512        self.slice.len() / std::mem::size_of::<T>()
1513    }
1514}
1515
1516/// Aux array iterator
1517///
1518/// This struct is created by the [`AuxArray::iter`] method.
1519pub struct AuxArrayIter<'a, T> {
1520    index: usize,
1521    array: &'a AuxArray<'a, T>,
1522}
1523
1524impl<T> Iterator for AuxArrayIter<'_, T>
1525where
1526    T: AuxArrayElement,
1527{
1528    type Item = T;
1529    fn next(&mut self) -> Option<Self::Item> {
1530        let value = self.array.get(self.index);
1531        self.index += 1;
1532        value
1533    }
1534
1535    fn size_hint(&self) -> (usize, Option<usize>) {
1536        let array_length = self.array.len() - self.index;
1537        (array_length, Some(array_length))
1538    }
1539}
1540
1541/// Auxiliary data iterator
1542///
1543/// This struct is created by the [`Record::aux_iter`] method.
1544///
1545/// This iterator returns `Result`s that wrap tuples containing
1546/// a slice which represents the two-byte tag (`&[u8; 2]`) as
1547/// well as an `Aux` enum that wraps the associated value.
1548///
1549/// When an error occurs, the `Err` variant will be returned
1550/// and the iterator will not be able to advance anymore.
1551pub struct AuxIter<'a> {
1552    aux: &'a [u8],
1553}
1554
1555impl<'a> Iterator for AuxIter<'a> {
1556    type Item = Result<(&'a [u8], Aux<'a>)>;
1557
1558    fn next(&mut self) -> Option<Self::Item> {
1559        // We're finished
1560        if self.aux.is_empty() {
1561            return None;
1562        }
1563        // Incomplete aux data
1564        if (1..=3).contains(&self.aux.len()) {
1565            // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1566            self.aux = &[];
1567            return Some(Err(Error::BamAuxParsingError));
1568        }
1569        let tag = &self.aux[..2];
1570        Some(unsafe {
1571            let data_ptr = self.aux[2..].as_ptr();
1572            Record::read_aux_field(data_ptr)
1573                .map(|(aux, offset)| {
1574                    self.aux = &self.aux[offset..];
1575                    (tag, aux)
1576                })
1577                .map_err(|e| {
1578                    // In the case of an error, we can not safely advance in the aux data, so we terminate the Iteration
1579                    self.aux = &[];
1580                    e
1581                })
1582        })
1583    }
1584}
1585
1586static DECODE_BASE: &[u8] = b"=ACMGRSVTWYHKDBN";
1587static ENCODE_BASE: [u8; 256] = [
1588    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1589    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1590    1, 2, 4, 8, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, 15, 1, 14, 2, 13, 15, 15, 4, 11, 15,
1591    15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, 15, 1, 14, 2,
1592    13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15,
1593    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1594    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1595    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1596    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1597    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1598    15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1599];
1600
1601#[inline]
1602fn encoded_base(encoded_seq: &[u8], i: usize) -> u8 {
1603    (encoded_seq[i / 2] >> ((!i & 1) << 2)) & 0b1111
1604}
1605
1606#[inline]
1607unsafe fn encoded_base_unchecked(encoded_seq: &[u8], i: usize) -> u8 {
1608    (encoded_seq.get_unchecked(i / 2) >> ((!i & 1) << 2)) & 0b1111
1609}
1610
1611#[inline]
1612fn decode_base_unchecked(base: u8) -> &'static u8 {
1613    unsafe { DECODE_BASE.get_unchecked(base as usize) }
1614}
1615
1616/// The sequence of a record.
1617#[derive(Debug, Copy, Clone)]
1618pub struct Seq<'a> {
1619    pub encoded: &'a [u8],
1620    len: usize,
1621}
1622
1623impl<'a> Seq<'a> {
1624    /// Return encoded base. Complexity: O(1).
1625    #[inline]
1626    pub fn encoded_base(&self, i: usize) -> u8 {
1627        encoded_base(self.encoded, i)
1628    }
1629
1630    /// Return encoded base. Complexity: O(1).
1631    #[inline]
1632    pub unsafe fn encoded_base_unchecked(&self, i: usize) -> u8 {
1633        encoded_base_unchecked(self.encoded, i)
1634    }
1635
1636    /// Obtain decoded base without performing bounds checking.
1637    /// Use index based access seq()[i], for checked, safe access.
1638    /// Complexity: O(1).
1639    #[inline]
1640    pub unsafe fn decoded_base_unchecked(&self, i: usize) -> u8 {
1641        *decode_base_unchecked(self.encoded_base_unchecked(i))
1642    }
1643
1644    /// Return decoded sequence. Complexity: O(m) with m being the read length.
1645    pub fn as_bytes(&self) -> Vec<u8> {
1646        (0..self.len()).map(|i| self[i]).collect()
1647    }
1648    /// Return a specific range of nucleotides (sequence-based 0) as Cow<str>, return Err if out of bounds
1649    pub fn rangeextract<T>(&self, range: T) -> std::io::Result<Cow<str>>
1650    where
1651        T: RangeBounds<usize> + SliceIndex<str>,
1652        <T as SliceIndex<str>>::Output: std::string::ToString,
1653    {
1654        if self.is_empty() {
1655            return Err(std::io::Error::from(std::io::ErrorKind::InvalidInput))
1656        }
1657        let seq = match String::from_utf8(self.as_bytes().to_vec()) {
1658            Ok(d) if d.is_ascii() => d,
1659            _ => return Err(std::io::Error::from(std::io::ErrorKind::InvalidData)),
1660        };
1661        let substring = match seq.get(range) {
1662            Some(d) => d,
1663            None => return Err(std::io::Error::from(std::io::ErrorKind::InvalidInput)),
1664        };
1665
1666        Ok(Cow::Owned(substring.to_string()))
1667    }
1668    /// Return length (in bases) of the sequence.
1669    pub fn len(&self) -> usize {
1670        self.len
1671    }
1672
1673    pub fn is_empty(&self) -> bool {
1674        self.len() == 0
1675    }
1676}
1677
1678impl<'a> ops::Index<usize> for Seq<'a> {
1679    type Output = u8;
1680
1681    /// Return decoded base at given position within read. Complexity: O(1).
1682    fn index(&self, index: usize) -> &u8 {
1683        decode_base_unchecked(self.encoded_base(index))
1684    }
1685}
1686
1687unsafe impl<'a> Send for Seq<'a> {}
1688unsafe impl<'a> Sync for Seq<'a> {}
1689
1690#[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1691#[derive(PartialEq, PartialOrd, Eq, Debug, Clone, Copy, Hash)]
1692pub enum Cigar {
1693    Match(u32),    // M
1694    Ins(u32),      // I
1695    Del(u32),      // D
1696    RefSkip(u32),  // N
1697    SoftClip(u32), // S
1698    HardClip(u32), // H
1699    Pad(u32),      // P
1700    Equal(u32),    // =
1701    Diff(u32),     // X
1702}
1703
1704impl Cigar {
1705    fn encode(self) -> u32 {
1706        match self {
1707            Cigar::Match(len) => len << 4, // | 0,
1708            Cigar::Ins(len) => len << 4 | 1,
1709            Cigar::Del(len) => len << 4 | 2,
1710            Cigar::RefSkip(len) => len << 4 | 3,
1711            Cigar::SoftClip(len) => len << 4 | 4,
1712            Cigar::HardClip(len) => len << 4 | 5,
1713            Cigar::Pad(len) => len << 4 | 6,
1714            Cigar::Equal(len) => len << 4 | 7,
1715            Cigar::Diff(len) => len << 4 | 8,
1716        }
1717    }
1718
1719    /// Return the length of the CIGAR.
1720    pub fn len(self) -> u32 {
1721        match self {
1722            Cigar::Match(len) => len,
1723            Cigar::Ins(len) => len,
1724            Cigar::Del(len) => len,
1725            Cigar::RefSkip(len) => len,
1726            Cigar::SoftClip(len) => len,
1727            Cigar::HardClip(len) => len,
1728            Cigar::Pad(len) => len,
1729            Cigar::Equal(len) => len,
1730            Cigar::Diff(len) => len,
1731        }
1732    }
1733
1734    pub fn is_empty(self) -> bool {
1735        self.len() == 0
1736    }
1737
1738    /// Return the character representing the CIGAR.
1739    pub fn char(self) -> char {
1740        match self {
1741            Cigar::Match(_) => 'M',
1742            Cigar::Ins(_) => 'I',
1743            Cigar::Del(_) => 'D',
1744            Cigar::RefSkip(_) => 'N',
1745            Cigar::SoftClip(_) => 'S',
1746            Cigar::HardClip(_) => 'H',
1747            Cigar::Pad(_) => 'P',
1748            Cigar::Equal(_) => '=',
1749            Cigar::Diff(_) => 'X',
1750        }
1751    }
1752}
1753
1754impl fmt::Display for Cigar {
1755    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
1756        fmt.write_fmt(format_args!("{}{}", self.len(), self.char()))
1757    }
1758}
1759
1760unsafe impl Send for Cigar {}
1761unsafe impl Sync for Cigar {}
1762
1763custom_derive! {
1764    /// A CIGAR string. This type wraps around a `Vec<Cigar>`.
1765    ///
1766    /// # Example
1767    ///
1768    /// ```
1769    /// use extended_htslib::bam::record::{Cigar, CigarString};
1770    ///
1771    /// let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
1772    ///
1773    /// // access by index
1774    /// assert_eq!(cigar[0], Cigar::Match(100));
1775    /// // format into classical string representation
1776    /// assert_eq!(format!("{}", cigar), "100M10S");
1777    /// // iterate
1778    /// for op in &cigar {
1779    ///    println!("{}", op);
1780    /// }
1781    /// ```
1782    #[cfg_attr(feature = "serde_feature", derive(Serialize, Deserialize))]
1783    #[derive(NewtypeDeref,
1784            NewtypeDerefMut,
1785             NewtypeIndex(usize),
1786             NewtypeIndexMut(usize),
1787             NewtypeFrom,
1788             PartialEq,
1789             PartialOrd,
1790             Eq,
1791             NewtypeDebug,
1792             Clone,
1793             Hash
1794    )]
1795    pub struct CigarString(pub Vec<Cigar>);
1796}
1797
1798impl CigarString {
1799    /// Create a `CigarStringView` from this CigarString at position `pos`
1800    pub fn into_view(self, pos: i64) -> CigarStringView {
1801        CigarStringView::new(self, pos)
1802    }
1803
1804    /// Calculate the bam cigar from the alignment struct. x is the target string
1805    /// and y is the reference. `hard_clip` controls how unaligned read bases are encoded in the
1806    /// cigar string. Set to true to use the hard clip (`H`) code, or false to use soft clip
1807    /// (`S`) code. See the [SAM spec](https://samtools.github.io/hts-specs/SAMv1.pdf) for more details.
1808    pub fn from_alignment(alignment: &Alignment, hard_clip: bool) -> Self {
1809        match alignment.mode {
1810            AlignmentMode::Global => {
1811                panic!(" Bam cigar fn not supported for Global Alignment mode")
1812            }
1813            AlignmentMode::Local => panic!(" Bam cigar fn not supported for Local Alignment mode"),
1814            _ => {}
1815        }
1816
1817        let mut cigar = Vec::new();
1818        if alignment.operations.is_empty() {
1819            return CigarString(cigar);
1820        }
1821
1822        let add_op = |op: AlignmentOperation, length: u32, cigar: &mut Vec<Cigar>| match op {
1823            AlignmentOperation::Del => cigar.push(Cigar::Del(length)),
1824            AlignmentOperation::Ins => cigar.push(Cigar::Ins(length)),
1825            AlignmentOperation::Subst => cigar.push(Cigar::Diff(length)),
1826            AlignmentOperation::Match => cigar.push(Cigar::Equal(length)),
1827            _ => {}
1828        };
1829
1830        if alignment.xstart > 0 {
1831            cigar.push(if hard_clip {
1832                Cigar::HardClip(alignment.xstart as u32)
1833            } else {
1834                Cigar::SoftClip(alignment.xstart as u32)
1835            });
1836        }
1837
1838        let mut last = alignment.operations[0];
1839        let mut k = 1u32;
1840        for &op in alignment.operations[1..].iter() {
1841            if op == last {
1842                k += 1;
1843            } else {
1844                add_op(last, k, &mut cigar);
1845                k = 1;
1846            }
1847            last = op;
1848        }
1849        add_op(last, k, &mut cigar);
1850        if alignment.xlen > alignment.xend {
1851            cigar.push(if hard_clip {
1852                Cigar::HardClip((alignment.xlen - alignment.xend) as u32)
1853            } else {
1854                Cigar::SoftClip((alignment.xlen - alignment.xend) as u32)
1855            });
1856        }
1857
1858        CigarString(cigar)
1859    }
1860}
1861
1862impl TryFrom<&[u8]> for CigarString {
1863    type Error = Error;
1864
1865    /// Create a CigarString from given &[u8].
1866    /// # Example
1867    /// ```
1868    /// use extended_htslib::bam::record::*;
1869    /// use extended_htslib::bam::record::CigarString;
1870    /// use extended_htslib::bam::record::Cigar::*;
1871    /// use std::convert::TryFrom;
1872    ///
1873    /// let cigar_str = "2H10M5X3=2H".as_bytes();
1874    /// let cigar = CigarString::try_from(cigar_str)
1875    ///     .expect("Unable to parse cigar string.");
1876    /// let expected_cigar = CigarString(vec![
1877    ///     HardClip(2),
1878    ///     Match(10),
1879    ///     Diff(5),
1880    ///     Equal(3),
1881    ///     HardClip(2),
1882    /// ]);
1883    /// assert_eq!(cigar, expected_cigar);
1884    /// ```
1885    fn try_from(bytes: &[u8]) -> Result<Self> {
1886        let mut inner = Vec::new();
1887        let mut i = 0;
1888        let text_len = bytes.len();
1889        while i < text_len {
1890            let mut j = i;
1891            while j < text_len && bytes[j].is_ascii_digit() {
1892                j += 1;
1893            }
1894            // check that length is provided
1895            if i == j {
1896                return Err(Error::BamParseCigar {
1897                    msg: "Expected length before cigar operation [0-9]+[MIDNSHP=X]".to_owned(),
1898                });
1899            }
1900            // get the length of the operation
1901            let s = str::from_utf8(&bytes[i..j]).map_err(|_| Error::BamParseCigar {
1902                msg: format!("Invalid utf-8 bytes '{:?}'.", &bytes[i..j]),
1903            })?;
1904            let n = s.parse().map_err(|_| Error::BamParseCigar {
1905                msg: format!("Unable to parse &str '{:?}' to u32.", s),
1906            })?;
1907            // get the operation
1908            let op = &bytes[j];
1909            inner.push(match op {
1910                b'M' => Cigar::Match(n),
1911                b'I' => Cigar::Ins(n),
1912                b'D' => Cigar::Del(n),
1913                b'N' => Cigar::RefSkip(n),
1914                b'H' => {
1915                    if i == 0 || j + 1 == text_len {
1916                        Cigar::HardClip(n)
1917                    } else {
1918                        return Err(Error::BamParseCigar {
1919                            msg: "Hard clipping ('H') is only valid at the start or end of a cigar."
1920                                .to_owned(),
1921                        });
1922                    }
1923                }
1924                b'S' => {
1925                    if i == 0
1926                        || j + 1 == text_len
1927                        || bytes[i-1] == b'H'
1928                        || bytes[j+1..].iter().all(|c| c.is_ascii_digit() || *c == b'H') {
1929                        Cigar::SoftClip(n)
1930                    } else {
1931                        return Err(Error::BamParseCigar {
1932                        msg: "Soft clips ('S') can only have hard clips ('H') between them and the end of the CIGAR string."
1933                            .to_owned(),
1934                        });
1935                    }
1936                },
1937                b'P' => Cigar::Pad(n),
1938                b'=' => Cigar::Equal(n),
1939                b'X' => Cigar::Diff(n),
1940                op => {
1941                    return Err(Error::BamParseCigar {
1942                        msg: format!("Expected cigar operation [MIDNSHP=X] but got [{}]", op),
1943                    })
1944                }
1945            });
1946            i = j + 1;
1947        }
1948        Ok(CigarString(inner))
1949    }
1950}
1951
1952impl TryFrom<&str> for CigarString {
1953    type Error = Error;
1954
1955    /// Create a CigarString from given &str.
1956    /// # Example
1957    /// ```
1958    /// use extended_htslib::bam::record::*;
1959    /// use extended_htslib::bam::record::CigarString;
1960    /// use extended_htslib::bam::record::Cigar::*;
1961    /// use std::convert::TryFrom;
1962    ///
1963    /// let cigar_str = "2H10M5X3=2H";
1964    /// let cigar = CigarString::try_from(cigar_str)
1965    ///     .expect("Unable to parse cigar string.");
1966    /// let expected_cigar = CigarString(vec![
1967    ///     HardClip(2),
1968    ///     Match(10),
1969    ///     Diff(5),
1970    ///     Equal(3),
1971    ///     HardClip(2),
1972    /// ]);
1973    /// assert_eq!(cigar, expected_cigar);
1974    /// ```
1975    fn try_from(text: &str) -> Result<Self> {
1976        let bytes = text.as_bytes();
1977        if text.chars().count() != bytes.len() {
1978            return Err(Error::BamParseCigar {
1979                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
1980            });
1981        }
1982        CigarString::try_from(bytes)
1983    }
1984}
1985
1986impl<'a> CigarString {
1987    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
1988        self.into_iter()
1989    }
1990}
1991
1992impl<'a> IntoIterator for &'a CigarString {
1993    type Item = &'a Cigar;
1994    type IntoIter = ::std::slice::Iter<'a, Cigar>;
1995
1996    fn into_iter(self) -> Self::IntoIter {
1997        self.0.iter()
1998    }
1999}
2000
2001impl fmt::Display for CigarString {
2002    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2003        for op in self {
2004            fmt.write_fmt(format_args!("{}{}", op.len(), op.char()))?;
2005        }
2006        Ok(())
2007    }
2008}
2009
2010// Get number of leading/trailing softclips if a CigarString taking hardclips into account
2011fn calc_softclips<'a>(mut cigar: impl DoubleEndedIterator<Item = &'a Cigar>) -> i64 {
2012    match (cigar.next(), cigar.next()) {
2013        (Some(Cigar::HardClip(_)), Some(Cigar::SoftClip(s))) | (Some(Cigar::SoftClip(s)), _) => {
2014            *s as i64
2015        }
2016        _ => 0,
2017    }
2018}
2019
2020#[derive(Eq, PartialEq, Clone, Debug)]
2021pub struct CigarStringView {
2022    inner: CigarString,
2023    pos: i64,
2024}
2025
2026impl CigarStringView {
2027    /// Construct a new CigarStringView from a CigarString at a position
2028    pub fn new(c: CigarString, pos: i64) -> CigarStringView {
2029        CigarStringView { inner: c, pos }
2030    }
2031
2032    /// Get (exclusive) end position of alignment.
2033    pub fn end_pos(&self) -> i64 {
2034        let mut pos = self.pos;
2035        for c in self {
2036            match c {
2037                Cigar::Match(l)
2038                | Cigar::RefSkip(l)
2039                | Cigar::Del(l)
2040                | Cigar::Equal(l)
2041                | Cigar::Diff(l) => pos += *l as i64,
2042                // these don't add to end_pos on reference
2043                Cigar::Ins(_) | Cigar::SoftClip(_) | Cigar::HardClip(_) | Cigar::Pad(_) => (),
2044            }
2045        }
2046        pos
2047    }
2048
2049    /// Get the start position of the alignment (0-based).
2050    pub fn pos(&self) -> i64 {
2051        self.pos
2052    }
2053
2054    /// Get number of bases softclipped at the beginning of the alignment.
2055    pub fn leading_softclips(&self) -> i64 {
2056        calc_softclips(self.iter())
2057    }
2058
2059    /// Get number of bases softclipped at the end of the alignment.
2060    pub fn trailing_softclips(&self) -> i64 {
2061        calc_softclips(self.iter().rev())
2062    }
2063
2064    /// Get number of bases hardclipped at the beginning of the alignment.
2065    pub fn leading_hardclips(&self) -> i64 {
2066        self.first().map_or(0, |cigar| {
2067            if let Cigar::HardClip(s) = cigar {
2068                *s as i64
2069            } else {
2070                0
2071            }
2072        })
2073    }
2074
2075    /// Get number of bases hardclipped at the end of the alignment.
2076    pub fn trailing_hardclips(&self) -> i64 {
2077        self.last().map_or(0, |cigar| {
2078            if let Cigar::HardClip(s) = cigar {
2079                *s as i64
2080            } else {
2081                0
2082            }
2083        })
2084    }
2085
2086    /// For a given position in the reference, get corresponding position within read.
2087    /// If reference position is outside of the read alignment, return None.
2088    ///
2089    /// # Arguments
2090    ///
2091    /// * `ref_pos` - the reference position
2092    /// * `include_softclips` - if true, softclips will be considered as matches or mismatches
2093    /// * `include_dels` - if true, positions within deletions will be considered (first reference matching read position after deletion will be returned)
2094    ///
2095    pub fn read_pos(
2096        &self,
2097        ref_pos: u32,
2098        include_softclips: bool,
2099        include_dels: bool,
2100    ) -> Result<Option<u32>> {
2101        let mut rpos = self.pos as u32; // reference position
2102        let mut qpos = 0u32; // position within read
2103        let mut j = 0; // index into cigar operation vector
2104
2105        // find first cigar operation referring to qpos = 0 (and thus bases in record.seq()),
2106        // because all augmentations of qpos and rpos before that are invalid
2107        for (i, c) in self.iter().enumerate() {
2108            match c {
2109                Cigar::Match(_) |
2110                Cigar::Diff(_)  |
2111                Cigar::Equal(_) |
2112                // this is unexpected, but bwa + GATK indel realignment can produce insertions
2113                // before matching positions
2114                Cigar::Ins(_) => {
2115                    j = i;
2116                    break;
2117                },
2118                Cigar::SoftClip(l) => {
2119                    j = i;
2120                    if include_softclips {
2121                        // Alignment starts with softclip and we want to include it in the
2122                        // projection of the reference position. However, the POS field does not
2123                        // include the softclip. Hence we have to subtract its length.
2124                        rpos = rpos.saturating_sub(*l);
2125                    }
2126                    break;
2127                },
2128                Cigar::Del(l) => {
2129                    // METHOD: leading deletions can happen in case of trimmed reads where
2130                    // a primer has been removed AFTER read mapping.
2131                    // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming
2132                    // with fgbio. While leading deletions should be impossible with
2133                    // normal read mapping, they make perfect sense with primer trimming
2134                    // because the mapper still had the evidence to decide in favor of
2135                    // the deletion via the primer sequence.
2136                    rpos += l;
2137                },
2138                Cigar::RefSkip(_) => {
2139                    return Err(Error::BamUnexpectedCigarOperation {
2140                        msg: "'reference skip' (N) found before any operation describing read sequence".to_owned()
2141                    });
2142                },
2143                Cigar::HardClip(_) if i > 0 && i < self.len()-1 => {
2144                    return Err(Error::BamUnexpectedCigarOperation{
2145                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2146                    });
2147                },
2148                // if we have reached the end of the CigarString with only pads and hard clips, we have no read position matching the variant
2149                Cigar::Pad(_) | Cigar::HardClip(_) if i == self.len()-1 => return Ok(None),
2150                // skip leading HardClips and Pads, as they consume neither read sequence nor reference sequence
2151                Cigar::Pad(_) | Cigar::HardClip(_) => ()
2152            }
2153        }
2154
2155        let contains_ref_pos = |cigar_op_start: u32, cigar_op_length: u32| {
2156            cigar_op_start <= ref_pos && cigar_op_start + cigar_op_length > ref_pos
2157        };
2158
2159        while rpos <= ref_pos && j < self.len() {
2160            match self[j] {
2161                // potential SNV evidence
2162                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) if contains_ref_pos(rpos, l) => {
2163                    // difference between desired position and first position of current cigar
2164                    // operation
2165                    qpos += ref_pos - rpos;
2166                    return Ok(Some(qpos));
2167                }
2168                Cigar::SoftClip(l) if include_softclips && contains_ref_pos(rpos, l) => {
2169                    qpos += ref_pos - rpos;
2170                    return Ok(Some(qpos));
2171                }
2172                Cigar::Del(l) if include_dels && contains_ref_pos(rpos, l) => {
2173                    // qpos shall resemble the start of the deletion
2174                    return Ok(Some(qpos));
2175                }
2176                // for others, just increase pos and qpos as needed
2177                Cigar::Match(l) | Cigar::Diff(l) | Cigar::Equal(l) => {
2178                    rpos += l;
2179                    qpos += l;
2180                    j += 1;
2181                }
2182                Cigar::SoftClip(l) => {
2183                    qpos += l;
2184                    j += 1;
2185                    if include_softclips {
2186                        rpos += l;
2187                    }
2188                }
2189                Cigar::Ins(l) => {
2190                    qpos += l;
2191                    j += 1;
2192                }
2193                Cigar::RefSkip(l) | Cigar::Del(l) => {
2194                    rpos += l;
2195                    j += 1;
2196                }
2197                Cigar::Pad(_) => {
2198                    j += 1;
2199                }
2200                Cigar::HardClip(_) if j < self.len() - 1 => {
2201                    return Err(Error::BamUnexpectedCigarOperation{
2202                        msg: "'hard clip' (H) found in between operations, contradicting SAMv1 spec that hard clips can only be at the ends of reads".to_owned()
2203                    });
2204                }
2205                Cigar::HardClip(_) => return Ok(None),
2206            }
2207        }
2208
2209        Ok(None)
2210    }
2211
2212    /// transfer ownership of the Cigar out of the CigarView
2213    pub fn take(self) -> CigarString {
2214        self.inner
2215    }
2216}
2217
2218impl ops::Deref for CigarStringView {
2219    type Target = CigarString;
2220
2221    fn deref(&self) -> &CigarString {
2222        &self.inner
2223    }
2224}
2225
2226impl ops::Index<usize> for CigarStringView {
2227    type Output = Cigar;
2228
2229    fn index(&self, index: usize) -> &Cigar {
2230        self.inner.index(index)
2231    }
2232}
2233
2234impl ops::IndexMut<usize> for CigarStringView {
2235    fn index_mut(&mut self, index: usize) -> &mut Cigar {
2236        self.inner.index_mut(index)
2237    }
2238}
2239
2240impl<'a> CigarStringView {
2241    pub fn iter(&'a self) -> ::std::slice::Iter<'a, Cigar> {
2242        self.inner.into_iter()
2243    }
2244}
2245
2246impl<'a> IntoIterator for &'a CigarStringView {
2247    type Item = &'a Cigar;
2248    type IntoIter = ::std::slice::Iter<'a, Cigar>;
2249
2250    fn into_iter(self) -> Self::IntoIter {
2251        self.inner.into_iter()
2252    }
2253}
2254
2255impl fmt::Display for CigarStringView {
2256    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
2257        self.inner.fmt(fmt)
2258    }
2259}
2260
2261pub struct BaseModificationMetadata {
2262    pub strand: i32,
2263    pub implicit: i32,
2264    pub canonical: u8,
2265}
2266
2267/// struct containing the internal state required to access
2268/// the base modifications for a bam::Record
2269pub struct BaseModificationState<'a> {
2270    record: &'a Record,
2271    state: *mut htslib::hts_base_mod_state,
2272    buffer: Vec<htslib::hts_base_mod>,
2273    buffer_pos: i32,
2274}
2275
2276impl BaseModificationState<'_> {
2277    /// Initialize a new BaseModification struct from a bam::Record
2278    /// This function allocates memory for the state structure
2279    /// and initializes the iterator to the start of the modification
2280    /// records.
2281    fn new<'a>(r: &'a Record) -> Result<BaseModificationState<'a>> {
2282        let mut bm = unsafe {
2283            BaseModificationState {
2284                record: r,
2285                state: hts_sys::hts_base_mod_state_alloc(),
2286                buffer: Vec::new(),
2287                buffer_pos: -1,
2288            }
2289        };
2290
2291        if bm.state.is_null() {
2292            panic!("Unable to allocate memory for hts_base_mod_state");
2293        }
2294
2295        // parse the MM tag to initialize the state
2296        unsafe {
2297            let ret = hts_sys::bam_parse_basemod(bm.record.inner_ptr(), bm.state);
2298            if ret != 0 {
2299                return Err(Error::BamBaseModificationTagNotFound);
2300            }
2301        }
2302
2303        let types = bm.recorded();
2304        bm.buffer.reserve(types.len());
2305        return Ok(bm);
2306    }
2307
2308    pub fn buffer_next_mods(&mut self) -> Result<usize> {
2309        unsafe {
2310            let ret = hts_sys::bam_next_basemod(
2311                self.record.inner_ptr(),
2312                self.state,
2313                self.buffer.as_mut_ptr(),
2314                self.buffer.capacity() as i32,
2315                &mut self.buffer_pos,
2316            );
2317
2318            if ret < 0 {
2319                return Err(Error::BamBaseModificationIterationFailed);
2320            }
2321
2322            // the htslib API won't write more than buffer.capacity() mods to the output array but it will
2323            // return the actual number of modifications found. We return an error to the caller
2324            // in the case where there was insufficient storage to return all mods.
2325            if ret as usize > self.buffer.capacity() {
2326                return Err(Error::BamBaseModificationTooManyMods);
2327            }
2328
2329            // we read the modifications directly into the vector, which does
2330            // not update the length so needs to be manually set
2331            self.buffer.set_len(ret as usize);
2332
2333            return Ok(ret as usize);
2334        }
2335    }
2336
2337    /// Return an array containing the modification codes listed for this record.
2338    /// Positive values are ascii character codes (eg m), negative values are chEBI codes.
2339    pub fn recorded<'a>(&self) -> &'a [i32] {
2340        unsafe {
2341            let mut n: i32 = 0;
2342            let data_ptr: *const i32 = hts_sys::bam_mods_recorded(self.state, &mut n);
2343
2344            // htslib should not return a null pointer, even when there are no base mods
2345            if data_ptr.is_null() {
2346                panic!("Unable to obtain pointer to base modifications");
2347            }
2348            assert!(n >= 0);
2349            return slice::from_raw_parts(data_ptr, n as usize);
2350        }
2351    }
2352
2353    /// Return metadata for the specified character code indicating the strand
2354    /// the base modification was called on, whether the tag uses implicit mode
2355    /// and the ascii code for the canonical base.
2356    /// If there are multiple modifications with the same code this will return the data
2357    /// for the first mod.  See https://github.com/samtools/htslib/issues/1635
2358    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2359        unsafe {
2360            let mut strand: i32 = 0;
2361            let mut implicit: i32 = 0;
2362            // This may be i8 or u8 in hts_sys.
2363            let mut canonical: c_char = 0;
2364
2365            let ret = hts_sys::bam_mods_query_type(
2366                self.state,
2367                code,
2368                &mut strand,
2369                &mut implicit,
2370                &mut canonical,
2371            );
2372            if ret == -1 {
2373                return Err(Error::BamBaseModificationTypeNotFound);
2374            } else {
2375                return Ok(BaseModificationMetadata {
2376                    strand,
2377                    implicit,
2378                    canonical: canonical.try_into().unwrap(),
2379                });
2380            }
2381        }
2382    }
2383}
2384
2385impl Drop for BaseModificationState<'_> {
2386    fn drop<'a>(&mut self) {
2387        unsafe {
2388            hts_sys::hts_base_mod_state_free(self.state);
2389        }
2390    }
2391}
2392
2393/// Iterator over the base modifications that returns
2394/// a vector for all of the mods at each position
2395pub struct BaseModificationsPositionIter<'a> {
2396    mod_state: BaseModificationState<'a>,
2397}
2398
2399impl BaseModificationsPositionIter<'_> {
2400    fn new<'a>(r: &'a Record) -> Result<BaseModificationsPositionIter<'a>> {
2401        let state = BaseModificationState::new(r)?;
2402        Ok(BaseModificationsPositionIter { mod_state: state })
2403    }
2404
2405    pub fn recorded<'a>(&self) -> &'a [i32] {
2406        return self.mod_state.recorded();
2407    }
2408
2409    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2410        return self.mod_state.query_type(code);
2411    }
2412}
2413
2414impl<'a> Iterator for BaseModificationsPositionIter<'a> {
2415    type Item = Result<(i32, Vec<hts_sys::hts_base_mod>)>;
2416
2417    fn next(&mut self) -> Option<Self::Item> {
2418        let ret = self.mod_state.buffer_next_mods();
2419
2420        // Three possible things happened in buffer_next_mods:
2421        // 1. the htslib API call was successful but there are no more mods
2422        // 2. ths htslib API call was successful and we read some mods
2423        // 3. the htslib API call failed, we propogate the error wrapped in an option
2424        match ret {
2425            Ok(num_mods) => {
2426                if num_mods == 0 {
2427                    return None;
2428                } else {
2429                    let data = (self.mod_state.buffer_pos, self.mod_state.buffer.clone());
2430                    return Some(Ok(data));
2431                }
2432            }
2433            Err(e) => return Some(Err(e)),
2434        }
2435    }
2436}
2437
2438/// Iterator over the base modifications that returns
2439/// the next modification found, one by one
2440pub struct BaseModificationsIter<'a> {
2441    mod_state: BaseModificationState<'a>,
2442    buffer_idx: usize,
2443}
2444
2445impl BaseModificationsIter<'_> {
2446    fn new<'a>(r: &'a Record) -> Result<BaseModificationsIter<'a>> {
2447        let state = BaseModificationState::new(r)?;
2448        Ok(BaseModificationsIter {
2449            mod_state: state,
2450            buffer_idx: 0,
2451        })
2452    }
2453
2454    pub fn recorded<'a>(&self) -> &'a [i32] {
2455        return self.mod_state.recorded();
2456    }
2457
2458    pub fn query_type<'a>(&self, code: i32) -> Result<BaseModificationMetadata> {
2459        return self.mod_state.query_type(code);
2460    }
2461}
2462
2463impl<'a> Iterator for BaseModificationsIter<'a> {
2464    type Item = Result<(i32, hts_sys::hts_base_mod)>;
2465
2466    fn next(&mut self) -> Option<Self::Item> {
2467        if self.buffer_idx == self.mod_state.buffer.len() {
2468            // need to use the internal state to read the next
2469            // set of modifications into the buffer
2470            let ret = self.mod_state.buffer_next_mods();
2471
2472            match ret {
2473                Ok(num_mods) => {
2474                    if num_mods == 0 {
2475                        // done iterating
2476                        return None;
2477                    } else {
2478                        // we read some mods, reset the position in the buffer then fall through
2479                        self.buffer_idx = 0;
2480                    }
2481                }
2482                Err(e) => return Some(Err(e)),
2483            }
2484        }
2485
2486        // if we got here when there are mods buffered that we haven't emitted yet
2487        assert!(self.buffer_idx < self.mod_state.buffer.len());
2488        let data = (
2489            self.mod_state.buffer_pos,
2490            self.mod_state.buffer[self.buffer_idx],
2491        );
2492        self.buffer_idx += 1;
2493        return Some(Ok(data));
2494    }
2495}
2496
2497#[cfg(test)]
2498mod tests {
2499    use super::*;
2500
2501    #[test]
2502    fn test_cigar_string() {
2503        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]);
2504
2505        assert_eq!(cigar[0], Cigar::Match(100));
2506        assert_eq!(format!("{}", cigar), "100M10S");
2507        for op in &cigar {
2508            println!("{}", op);
2509        }
2510    }
2511
2512    #[test]
2513    fn test_cigar_string_view_pos() {
2514        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(5);
2515        assert_eq!(cigar.pos(), 5);
2516    }
2517
2518    #[test]
2519    fn test_cigar_string_leading_softclips() {
2520        let cigar = CigarString(vec![Cigar::SoftClip(10), Cigar::Match(100)]).into_view(0);
2521        assert_eq!(cigar.leading_softclips(), 10);
2522        let cigar2 = CigarString(vec![
2523            Cigar::HardClip(5),
2524            Cigar::SoftClip(10),
2525            Cigar::Match(100),
2526        ])
2527        .into_view(0);
2528        assert_eq!(cigar2.leading_softclips(), 10);
2529    }
2530
2531    #[test]
2532    fn test_cigar_string_trailing_softclips() {
2533        let cigar = CigarString(vec![Cigar::Match(100), Cigar::SoftClip(10)]).into_view(0);
2534        assert_eq!(cigar.trailing_softclips(), 10);
2535        let cigar2 = CigarString(vec![
2536            Cigar::Match(100),
2537            Cigar::SoftClip(10),
2538            Cigar::HardClip(5),
2539        ])
2540        .into_view(0);
2541        assert_eq!(cigar2.trailing_softclips(), 10);
2542    }
2543
2544    #[test]
2545    fn test_cigar_read_pos() {
2546        let vpos = 5; // variant position
2547
2548        // Ignore leading HardClip
2549        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2550        // var:                       V
2551        // c01: 7H                 M  M
2552        // qpos:                  00 01
2553        let c01 = CigarString(vec![Cigar::HardClip(7), Cigar::Match(2)]).into_view(4);
2554        assert_eq!(c01.read_pos(vpos, false, false).unwrap(), Some(1));
2555
2556        // Skip leading SoftClip or use as pre-POS matches
2557        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2558        // var:                       V
2559        // c02: 5H2S         M  M  M  M  M  M
2560        // qpos:  00        02 03 04 05 06 07
2561        // c02: 5H     S  S  M  M  M  M  M  M
2562        // qpos:      00 01 02 03 04 05 06 07
2563        let c02 = CigarString(vec![Cigar::SoftClip(2), Cigar::Match(6)]).into_view(2);
2564        assert_eq!(c02.read_pos(vpos, false, false).unwrap(), Some(5));
2565        assert_eq!(c02.read_pos(vpos, true, false).unwrap(), Some(5));
2566
2567        // Skip leading SoftClip returning None for unmatched reference positiong or use as
2568        // pre-POS matches
2569        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2570        // var:                       V
2571        // c03:  3S                      M  M
2572        // qpos: 00                     03 04
2573        // c03:                 S  S  S  M  M
2574        // qpos:               00 01 02 03 04
2575        let c03 = CigarString(vec![Cigar::SoftClip(3), Cigar::Match(6)]).into_view(6);
2576        assert_eq!(c03.read_pos(vpos, false, false).unwrap(), None);
2577        assert_eq!(c03.read_pos(vpos, true, false).unwrap(), Some(2));
2578
2579        // Skip leading Insertion before variant position
2580        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2581        // var:                       V
2582        // c04:  3I                X  X  X
2583        // qpos: 00               03 04 05
2584        let c04 = CigarString(vec![Cigar::Ins(3), Cigar::Diff(3)]).into_view(4);
2585        assert_eq!(c04.read_pos(vpos, true, false).unwrap(), Some(4));
2586
2587        // Matches and deletion before variant position
2588        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2589        // var:                       V
2590        // c05:        =  =  D  D  X  =  =
2591        // qpos:      00 01       02 03 04 05
2592        let c05 = CigarString(vec![
2593            Cigar::Equal(2),
2594            Cigar::Del(2),
2595            Cigar::Diff(1),
2596            Cigar::Equal(2),
2597        ])
2598        .into_view(0);
2599        assert_eq!(c05.read_pos(vpos, true, false).unwrap(), Some(3));
2600
2601        // single nucleotide Deletion covering variant position
2602        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2603        // var:                       V
2604        // c06:                 =  =  D  X  X
2605        // qpos:               00 01    02 03
2606        let c06 = CigarString(vec![Cigar::Equal(2), Cigar::Del(1), Cigar::Diff(2)]).into_view(3);
2607        assert_eq!(c06.read_pos(vpos, false, true).unwrap(), Some(2));
2608        assert_eq!(c06.read_pos(vpos, false, false).unwrap(), None);
2609
2610        // three nucleotide Deletion covering variant position
2611        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2612        // var:                       V
2613        // c07:              =  =  D  D  D  M  M
2614        // qpos:            00 01          02 03
2615        let c07 = CigarString(vec![Cigar::Equal(2), Cigar::Del(3), Cigar::Match(2)]).into_view(2);
2616        assert_eq!(c07.read_pos(vpos, false, true).unwrap(), Some(2));
2617        assert_eq!(c07.read_pos(vpos, false, false).unwrap(), None);
2618
2619        // three nucleotide RefSkip covering variant position
2620        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2621        // var:                       V
2622        // c08:              =  X  N  N  N  M  M
2623        // qpos:            00 01          02 03
2624        let c08 = CigarString(vec![
2625            Cigar::Equal(1),
2626            Cigar::Diff(1),
2627            Cigar::RefSkip(3),
2628            Cigar::Match(2),
2629        ])
2630        .into_view(2);
2631        assert_eq!(c08.read_pos(vpos, false, true).unwrap(), None);
2632        assert_eq!(c08.read_pos(vpos, false, false).unwrap(), None);
2633
2634        // internal hard clip before variant pos
2635        // ref:       00 01 02 03    04 05 06 07 08 09 10 11 12 13 14 15
2636        // var:                          V
2637        // c09: 3H           =  = 3H  =  =
2638        // qpos:            00 01    02 03
2639        let c09 = CigarString(vec![
2640            Cigar::HardClip(3),
2641            Cigar::Equal(2),
2642            Cigar::HardClip(3),
2643            Cigar::Equal(2),
2644        ])
2645        .into_view(2);
2646        assert_eq!(c09.read_pos(vpos, false, true).is_err(), true);
2647
2648        // Deletion right before variant position
2649        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2650        // var:                       V
2651        // c10:           M  M  D  D  M  M
2652        // qpos:         00 01       02 03
2653        let c10 = CigarString(vec![Cigar::Match(2), Cigar::Del(2), Cigar::Match(2)]).into_view(1);
2654        assert_eq!(c10.read_pos(vpos, false, false).unwrap(), Some(2));
2655
2656        // Insertion right before variant position
2657        // ref:       00 01 02 03 04    05 06 07 08 09 10 11 12 13 14 15
2658        // var:                          V
2659        // c11:                 M  M 3I  M
2660        // qpos:               00 01 02 05 06
2661        let c11 = CigarString(vec![Cigar::Match(2), Cigar::Ins(3), Cigar::Match(2)]).into_view(3);
2662        assert_eq!(c11.read_pos(vpos, false, false).unwrap(), Some(5));
2663
2664        // Insertion right after variant position
2665        // ref:       00 01 02 03 04 05    06 07 08 09 10 11 12 13 14 15
2666        // var:                       V
2667        // c12:                 M  M  M 2I  =
2668        // qpos:               00 01 02 03 05
2669        let c12 = CigarString(vec![Cigar::Match(3), Cigar::Ins(2), Cigar::Equal(1)]).into_view(3);
2670        assert_eq!(c12.read_pos(vpos, false, false).unwrap(), Some(2));
2671
2672        // Deletion right after variant position
2673        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2674        // var:                       V
2675        // c13:                 M  M  M  D  =
2676        // qpos:               00 01 02    03
2677        let c13 = CigarString(vec![Cigar::Match(3), Cigar::Del(1), Cigar::Equal(1)]).into_view(3);
2678        assert_eq!(c13.read_pos(vpos, false, false).unwrap(), Some(2));
2679
2680        // A messy and complicated example, including a Pad operation
2681        let vpos2 = 15;
2682        // ref:       00    01 02    03 04 05    06 07 08 09 10 11 12 13 14 15
2683        // var:                                                           V
2684        // c14: 5H3S   = 2P  M  X 3I  M  M  D 2I  =  =  N  N  N  M  M  M  =  =  5S2H
2685        // qpos:  00  03    04 05 06 09 10    11 13 14          15 16 17 18 19
2686        let c14 = CigarString(vec![
2687            Cigar::HardClip(5),
2688            Cigar::SoftClip(3),
2689            Cigar::Equal(1),
2690            Cigar::Pad(2),
2691            Cigar::Match(1),
2692            Cigar::Diff(1),
2693            Cigar::Ins(3),
2694            Cigar::Match(2),
2695            Cigar::Del(1),
2696            Cigar::Ins(2),
2697            Cigar::Equal(2),
2698            Cigar::RefSkip(3),
2699            Cigar::Match(3),
2700            Cigar::Equal(2),
2701            Cigar::SoftClip(5),
2702            Cigar::HardClip(2),
2703        ])
2704        .into_view(0);
2705        assert_eq!(c14.read_pos(vpos2, false, false).unwrap(), Some(19));
2706
2707        // HardClip after Pad
2708        // ref:       00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
2709        // var:                       V
2710        // c15: 5P1H            =  =  =
2711        // qpos:               00 01 02
2712        let c15 =
2713            CigarString(vec![Cigar::Pad(5), Cigar::HardClip(1), Cigar::Equal(3)]).into_view(3);
2714        assert_eq!(c15.read_pos(vpos, false, false).is_err(), true);
2715
2716        // only HardClip and Pad operations
2717        // c16: 7H5P2H
2718        let c16 =
2719            CigarString(vec![Cigar::HardClip(7), Cigar::Pad(5), Cigar::HardClip(2)]).into_view(3);
2720        assert_eq!(c16.read_pos(vpos, false, false).unwrap(), None);
2721    }
2722
2723    #[test]
2724    fn test_clone() {
2725        let mut rec = Record::new();
2726        rec.set_pos(300);
2727        rec.set_qname(b"read1");
2728        let clone = rec.clone();
2729        assert_eq!(rec, clone);
2730    }
2731
2732    #[test]
2733    fn test_flags() {
2734        let mut rec = Record::new();
2735
2736        rec.set_paired();
2737        assert_eq!(rec.is_paired(), true);
2738
2739        rec.set_supplementary();
2740        assert_eq!(rec.is_supplementary(), true);
2741        assert_eq!(rec.is_supplementary(), true);
2742
2743        rec.unset_paired();
2744        assert_eq!(rec.is_paired(), false);
2745        assert_eq!(rec.is_supplementary(), true);
2746
2747        rec.unset_supplementary();
2748        assert_eq!(rec.is_paired(), false);
2749        assert_eq!(rec.is_supplementary(), false);
2750    }
2751
2752    #[test]
2753    fn test_cigar_parse() {
2754        let cigar = "1S20M1D2I3X1=2H";
2755        let parsed = CigarString::try_from(cigar).unwrap();
2756        assert_eq!(parsed.to_string(), cigar);
2757    }
2758}
2759
2760#[cfg(test)]
2761mod alignment_cigar_tests {
2762    use super::*;
2763    use crate::bam::{Read, Reader};
2764    use bio_types::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
2765    use bio_types::alignment::{Alignment, AlignmentMode};
2766
2767    #[test]
2768    fn test_cigar() {
2769        let alignment = Alignment {
2770            score: 5,
2771            xstart: 3,
2772            ystart: 0,
2773            xend: 9,
2774            yend: 10,
2775            ylen: 10,
2776            xlen: 10,
2777            operations: vec![Match, Match, Match, Subst, Ins, Ins, Del, Del],
2778            mode: AlignmentMode::Semiglobal,
2779        };
2780        assert_eq!(alignment.cigar(false), "3S3=1X2I2D1S");
2781        assert_eq!(
2782            CigarString::from_alignment(&alignment, false).0,
2783            vec![
2784                Cigar::SoftClip(3),
2785                Cigar::Equal(3),
2786                Cigar::Diff(1),
2787                Cigar::Ins(2),
2788                Cigar::Del(2),
2789                Cigar::SoftClip(1),
2790            ]
2791        );
2792
2793        let alignment = Alignment {
2794            score: 5,
2795            xstart: 0,
2796            ystart: 5,
2797            xend: 4,
2798            yend: 10,
2799            ylen: 10,
2800            xlen: 5,
2801            operations: vec![Yclip(5), Match, Subst, Subst, Ins, Del, Del, Xclip(1)],
2802            mode: AlignmentMode::Custom,
2803        };
2804        assert_eq!(alignment.cigar(false), "1=2X1I2D1S");
2805        assert_eq!(alignment.cigar(true), "1=2X1I2D1H");
2806        assert_eq!(
2807            CigarString::from_alignment(&alignment, false).0,
2808            vec![
2809                Cigar::Equal(1),
2810                Cigar::Diff(2),
2811                Cigar::Ins(1),
2812                Cigar::Del(2),
2813                Cigar::SoftClip(1),
2814            ]
2815        );
2816        assert_eq!(
2817            CigarString::from_alignment(&alignment, true).0,
2818            vec![
2819                Cigar::Equal(1),
2820                Cigar::Diff(2),
2821                Cigar::Ins(1),
2822                Cigar::Del(2),
2823                Cigar::HardClip(1),
2824            ]
2825        );
2826
2827        let alignment = Alignment {
2828            score: 5,
2829            xstart: 0,
2830            ystart: 5,
2831            xend: 3,
2832            yend: 8,
2833            ylen: 10,
2834            xlen: 3,
2835            operations: vec![Yclip(5), Subst, Match, Subst, Yclip(2)],
2836            mode: AlignmentMode::Custom,
2837        };
2838        assert_eq!(alignment.cigar(false), "1X1=1X");
2839        assert_eq!(
2840            CigarString::from_alignment(&alignment, false).0,
2841            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2842        );
2843
2844        let alignment = Alignment {
2845            score: 5,
2846            xstart: 0,
2847            ystart: 5,
2848            xend: 3,
2849            yend: 8,
2850            ylen: 10,
2851            xlen: 3,
2852            operations: vec![Subst, Match, Subst],
2853            mode: AlignmentMode::Semiglobal,
2854        };
2855        assert_eq!(alignment.cigar(false), "1X1=1X");
2856        assert_eq!(
2857            CigarString::from_alignment(&alignment, false).0,
2858            vec![Cigar::Diff(1), Cigar::Equal(1), Cigar::Diff(1)]
2859        );
2860    }
2861
2862    #[test]
2863    fn test_read_orientation_f1r2() {
2864        let mut bam = Reader::from_path(&"test/test_paired.sam").unwrap();
2865
2866        for res in bam.records() {
2867            let record = res.unwrap();
2868            assert_eq!(
2869                record.read_pair_orientation(),
2870                SequenceReadPairOrientation::F1R2
2871            );
2872        }
2873    }
2874
2875    #[test]
2876    fn test_read_orientation_f2r1() {
2877        let mut bam = Reader::from_path(&"test/test_nonstandard_orientation.sam").unwrap();
2878
2879        for res in bam.records() {
2880            let record = res.unwrap();
2881            assert_eq!(
2882                record.read_pair_orientation(),
2883                SequenceReadPairOrientation::F2R1
2884            );
2885        }
2886    }
2887
2888    #[test]
2889    fn test_read_orientation_supplementary() {
2890        let mut bam = Reader::from_path(&"test/test_orientation_supplementary.sam").unwrap();
2891
2892        for res in bam.records() {
2893            let record = res.unwrap();
2894            assert_eq!(
2895                record.read_pair_orientation(),
2896                SequenceReadPairOrientation::F2R1
2897            );
2898        }
2899    }
2900
2901    #[test]
2902    pub fn test_cigar_parsing_non_ascii_error() {
2903        let cigar_str = "43ጷ";
2904        let expected_error = Err(Error::BamParseCigar {
2905                msg: "CIGAR string contained non-ASCII characters, which are not valid. Valid are [0-9MIDNSHP=X].".to_owned(),
2906            });
2907
2908        let result = CigarString::try_from(cigar_str);
2909        assert_eq!(expected_error, result);
2910    }
2911
2912    #[test]
2913    pub fn test_cigar_parsing() {
2914        // parsing test cases
2915        let cigar_strs = vec![
2916            "1H10M4D100I300N1102=10P25X11S", // test every cigar opt
2917            "100M",                          // test a single op
2918            "",                              // test empty input
2919            "1H1=1H",                        // test simple hardclip
2920            "1S1=1S",                        // test simple softclip
2921            "11H11S11=11S11H",               // test complex softclip
2922            "10H",
2923            "10S",
2924        ];
2925        // expected results
2926        let cigars = vec![
2927            CigarString(vec![
2928                Cigar::HardClip(1),
2929                Cigar::Match(10),
2930                Cigar::Del(4),
2931                Cigar::Ins(100),
2932                Cigar::RefSkip(300),
2933                Cigar::Equal(1102),
2934                Cigar::Pad(10),
2935                Cigar::Diff(25),
2936                Cigar::SoftClip(11),
2937            ]),
2938            CigarString(vec![Cigar::Match(100)]),
2939            CigarString(vec![]),
2940            CigarString(vec![
2941                Cigar::HardClip(1),
2942                Cigar::Equal(1),
2943                Cigar::HardClip(1),
2944            ]),
2945            CigarString(vec![
2946                Cigar::SoftClip(1),
2947                Cigar::Equal(1),
2948                Cigar::SoftClip(1),
2949            ]),
2950            CigarString(vec![
2951                Cigar::HardClip(11),
2952                Cigar::SoftClip(11),
2953                Cigar::Equal(11),
2954                Cigar::SoftClip(11),
2955                Cigar::HardClip(11),
2956            ]),
2957            CigarString(vec![Cigar::HardClip(10)]),
2958            CigarString(vec![Cigar::SoftClip(10)]),
2959        ];
2960        // compare
2961        for (&cigar_str, truth) in cigar_strs.iter().zip(cigars.iter()) {
2962            let cigar_parse = CigarString::try_from(cigar_str)
2963                .expect(&format!("Unable to parse cigar: {}", cigar_str));
2964            assert_eq!(&cigar_parse, truth);
2965        }
2966    }
2967}
2968
2969#[cfg(test)]
2970mod basemod_tests {
2971    use crate::bam::{Read, Reader};
2972
2973    #[test]
2974    pub fn test_count_recorded() {
2975        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
2976
2977        for r in bam.records() {
2978            let record = r.unwrap();
2979            if let Ok(mods) = record.basemods_iter() {
2980                let n = mods.recorded().len();
2981                assert_eq!(n, 3);
2982            };
2983        }
2984    }
2985
2986    #[test]
2987    pub fn test_query_type() {
2988        let mut bam = Reader::from_path(&"test/base_mods/MM-orient.sam").unwrap();
2989
2990        let mut n_fwd = 0;
2991        let mut n_rev = 0;
2992
2993        for r in bam.records() {
2994            let record = r.unwrap();
2995            if let Ok(mods) = record.basemods_iter() {
2996                for mod_code in mods.recorded() {
2997                    if let Ok(mod_metadata) = mods.query_type(*mod_code) {
2998                        if mod_metadata.strand == 0 {
2999                            n_fwd += 1;
3000                        }
3001                        if mod_metadata.strand == 1 {
3002                            n_rev += 1;
3003                        }
3004                    }
3005                }
3006            };
3007        }
3008        assert_eq!(n_fwd, 2);
3009        assert_eq!(n_rev, 2);
3010    }
3011
3012    #[test]
3013    pub fn test_mod_iter() {
3014        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
3015        let expected_positions = [1, 7, 12, 13, 13, 22, 30, 31];
3016        let mut i = 0;
3017
3018        for r in bam.records() {
3019            let record = r.unwrap();
3020            for res in record.basemods_iter().unwrap() {
3021                if let Ok((position, _m)) = res {
3022                    assert_eq!(position, expected_positions[i]);
3023                    i += 1;
3024                }
3025            }
3026        }
3027    }
3028
3029    #[test]
3030    pub fn test_position_iter() {
3031        let mut bam = Reader::from_path(&"test/base_mods/MM-double.sam").unwrap();
3032        let expected_positions = [1, 7, 12, 13, 22, 30, 31];
3033        let expected_counts = [1, 1, 1, 2, 1, 1, 1];
3034        let mut i = 0;
3035
3036        for r in bam.records() {
3037            let record = r.unwrap();
3038            for res in record.basemods_position_iter().unwrap() {
3039                if let Ok((position, elements)) = res {
3040                    assert_eq!(position, expected_positions[i]);
3041                    assert_eq!(elements.len(), expected_counts[i]);
3042                    i += 1;
3043                }
3044            }
3045        }
3046    }
3047}