1use crate::core::{GenomicReader, GenomicRecordIterator};
39use crate::error::{Error, Result};
40use flate2::{Decompress, FlushDecompress};
41use std::collections::HashMap;
42use std::fs::File;
43use std::io::{BufReader, Read};
44use std::path::Path;
45
46const MAX_BLOCK_SIZE: usize = 65536;
48
49const BAM_MAGIC: [u8; 4] = [b'B', b'A', b'M', 1];
51
52const SEQ_LOOKUP: [u8; 16] = [
55 b'=', b'A', b'C', b'M', b'G', b'R', b'S', b'V', b'T', b'W', b'Y', b'H', b'K', b'D', b'B', b'N',
56];
57
58#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub enum CigarOp {
61 Match(u32),
63 Ins(u32),
65 Del(u32),
67 RefSkip(u32),
69 SoftClip(u32),
71 HardClip(u32),
73 Pad(u32),
75 Equal(u32),
77 Diff(u32),
79}
80
81impl CigarOp {
82 pub fn from_encoded(encoded: u32) -> Result<Self> {
84 let len = encoded >> 4;
85 let op = (encoded & 0xF) as u8;
86
87 match op {
88 0 => Ok(CigarOp::Match(len)),
89 1 => Ok(CigarOp::Ins(len)),
90 2 => Ok(CigarOp::Del(len)),
91 3 => Ok(CigarOp::RefSkip(len)),
92 4 => Ok(CigarOp::SoftClip(len)),
93 5 => Ok(CigarOp::HardClip(len)),
94 6 => Ok(CigarOp::Pad(len)),
95 7 => Ok(CigarOp::Equal(len)),
96 8 => Ok(CigarOp::Diff(len)),
97 _ => Err(Error::InvalidInput(format!(
98 "Invalid CIGAR operation: {}",
99 op
100 ))),
101 }
102 }
103
104 pub fn len(&self) -> u32 {
106 match self {
107 CigarOp::Match(l)
108 | CigarOp::Ins(l)
109 | CigarOp::Del(l)
110 | CigarOp::RefSkip(l)
111 | CigarOp::SoftClip(l)
112 | CigarOp::HardClip(l)
113 | CigarOp::Pad(l)
114 | CigarOp::Equal(l)
115 | CigarOp::Diff(l) => *l,
116 }
117 }
118
119 pub fn consumes_ref(&self) -> bool {
121 matches!(
122 self,
123 CigarOp::Match(_)
124 | CigarOp::Del(_)
125 | CigarOp::RefSkip(_)
126 | CigarOp::Equal(_)
127 | CigarOp::Diff(_)
128 )
129 }
130
131 pub fn consumes_query(&self) -> bool {
133 matches!(
134 self,
135 CigarOp::Match(_)
136 | CigarOp::Ins(_)
137 | CigarOp::SoftClip(_)
138 | CigarOp::Equal(_)
139 | CigarOp::Diff(_)
140 )
141 }
142
143 pub fn to_char(&self) -> char {
145 match self {
146 CigarOp::Match(_) => 'M',
147 CigarOp::Ins(_) => 'I',
148 CigarOp::Del(_) => 'D',
149 CigarOp::RefSkip(_) => 'N',
150 CigarOp::SoftClip(_) => 'S',
151 CigarOp::HardClip(_) => 'H',
152 CigarOp::Pad(_) => 'P',
153 CigarOp::Equal(_) => '=',
154 CigarOp::Diff(_) => 'X',
155 }
156 }
157
158 pub fn parse_cigar_string(cigar_str: &str) -> Result<Vec<Self>> {
162 if cigar_str == "*" {
164 return Ok(Vec::new());
165 }
166
167 let mut operations = Vec::new();
168 let mut num_str = String::new();
169
170 for ch in cigar_str.chars() {
171 if ch.is_ascii_digit() {
172 num_str.push(ch);
173 } else {
174 if num_str.is_empty() {
175 return Err(Error::Parse(format!(
176 "Invalid CIGAR string: operation '{}' without length",
177 ch
178 )));
179 }
180
181 let len = num_str
182 .parse::<u32>()
183 .map_err(|_| Error::Parse(format!("Invalid CIGAR length: {}", num_str)))?;
184
185 let op = match ch {
186 'M' => CigarOp::Match(len),
187 'I' => CigarOp::Ins(len),
188 'D' => CigarOp::Del(len),
189 'N' => CigarOp::RefSkip(len),
190 'S' => CigarOp::SoftClip(len),
191 'H' => CigarOp::HardClip(len),
192 'P' => CigarOp::Pad(len),
193 '=' => CigarOp::Equal(len),
194 'X' => CigarOp::Diff(len),
195 _ => return Err(Error::Parse(format!("Invalid CIGAR operation: {}", ch))),
196 };
197
198 operations.push(op);
199 num_str.clear();
200 }
201 }
202
203 if !num_str.is_empty() {
204 return Err(Error::Parse(format!(
205 "Invalid CIGAR string: trailing digits '{}'",
206 num_str
207 )));
208 }
209
210 Ok(operations)
211 }
212}
213
214impl std::fmt::Display for CigarOp {
215 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216 write!(f, "{}{}", self.len(), self.to_char())
217 }
218}
219
220#[derive(Debug, Clone)]
222pub struct RefSeq {
223 pub name: String,
224 pub length: u32,
225}
226
227#[derive(Debug, Clone)]
229pub struct BamHeader {
230 pub text: String,
232 pub references: Vec<RefSeq>,
234}
235
236impl BamHeader {
237 pub fn ref_name(&self, idx: i32) -> Option<&str> {
239 if idx < 0 {
240 return None;
241 }
242 self.references.get(idx as usize).map(|r| r.name.as_str())
243 }
244}
245
246#[derive(Debug, Clone)]
248pub struct BamRecord {
249 pub qname: String,
251 pub flag: u16,
253 pub refid: i32,
255 pub rname: String,
258 pub pos: i32,
260 pub mapq: u8,
262 pub cigar: Vec<CigarOp>,
264 pub next_refid: i32,
266 pub next_pos: i32,
268 pub tlen: i32,
270 pub seq: Vec<u8>,
272 pub qual: Vec<u8>,
274 pub tags: HashMap<String, TagValue>,
276}
277
278impl BamRecord {
279 pub fn is_paired(&self) -> bool {
281 (self.flag & 0x1) != 0
282 }
283
284 pub fn is_proper_pair(&self) -> bool {
286 (self.flag & 0x2) != 0
287 }
288
289 pub fn is_unmapped(&self) -> bool {
291 (self.flag & 0x4) != 0
292 }
293
294 pub fn is_mate_unmapped(&self) -> bool {
296 (self.flag & 0x8) != 0
297 }
298
299 pub fn is_reverse(&self) -> bool {
301 (self.flag & 0x10) != 0
302 }
303
304 pub fn is_mate_reverse(&self) -> bool {
306 (self.flag & 0x20) != 0
307 }
308
309 pub fn is_first_in_pair(&self) -> bool {
311 (self.flag & 0x40) != 0
312 }
313
314 pub fn is_second_in_pair(&self) -> bool {
316 (self.flag & 0x80) != 0
317 }
318
319 pub fn is_secondary(&self) -> bool {
321 (self.flag & 0x100) != 0
322 }
323
324 pub fn is_qc_fail(&self) -> bool {
326 (self.flag & 0x200) != 0
327 }
328
329 pub fn is_duplicate(&self) -> bool {
331 (self.flag & 0x400) != 0
332 }
333
334 pub fn is_supplementary(&self) -> bool {
336 (self.flag & 0x800) != 0
337 }
338
339 pub fn cigar_string(&self) -> String {
341 self.cigar.iter().map(|op| op.to_string()).collect()
342 }
343
344 pub fn seq_string(&self) -> String {
346 String::from_utf8_lossy(&self.seq).to_string()
347 }
348}
349
350#[derive(Debug, Clone)]
352pub enum TagValue {
353 Char(u8),
354 Int(i32),
355 Float(f32),
356 String(String),
357 ByteArray(Vec<u8>),
358}
359
360pub struct BamReader<R: Read> {
367 inner: BufReader<R>,
368 current_block: Vec<u8>,
369 block_pos: usize,
370 eof: bool,
371 header: Option<BamHeader>,
372 decompressor: Decompress,
374 compressed_buf: Vec<u8>,
376 blocks_decompressed: usize,
378 file_path: Option<std::path::PathBuf>,
380}
381
382impl BamReader<File> {
383 pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
393 let path_buf = path.as_ref().to_path_buf();
394 let file = File::open(&path_buf)?;
395 let mut reader = Self::new(file);
396 reader.file_path = Some(path_buf);
397 Ok(reader)
398 }
399
400 pub fn compute_stats(
401 self,
402 config: Option<crate::parallel::ParallelConfig>,
403 ) -> Result<super::stats::BamStats> {
404 super::stats::BamStats::par_compute(self, config)
406 }
407}
408
409impl<R: Read> BamReader<R> {
410 pub fn new(reader: R) -> Self {
412 Self {
413 inner: BufReader::new(reader),
414 current_block: Vec::new(),
415 block_pos: 0,
416 eof: false,
417 header: None,
418 decompressor: Decompress::new(false), compressed_buf: Vec::with_capacity(MAX_BLOCK_SIZE),
420 blocks_decompressed: 0,
421 file_path: None,
422 }
423 }
424
425 pub fn read_header(&mut self) -> Result<&BamHeader> {
429 if self.header.is_some() {
430 return Ok(self.header.as_ref().unwrap());
431 }
432
433 let mut magic = [0u8; 4];
435 self.read_exact(&mut magic)?;
436
437 if magic != BAM_MAGIC {
438 return Err(Error::InvalidInput(format!(
439 "Invalid BAM magic number: expected {:?}, got {:?}",
440 BAM_MAGIC, magic
441 )));
442 }
443
444 let mut l_text_buf = [0u8; 4];
446 self.read_exact(&mut l_text_buf)?;
447 let l_text = u32::from_le_bytes(l_text_buf) as usize;
448
449 let mut text_buf = vec![0u8; l_text];
451 self.read_exact(&mut text_buf)?;
452 let text = String::from_utf8_lossy(&text_buf).to_string();
453
454 let mut n_ref_buf = [0u8; 4];
456 self.read_exact(&mut n_ref_buf)?;
457 let n_ref = u32::from_le_bytes(n_ref_buf) as usize;
458
459 let mut references = Vec::with_capacity(n_ref);
461 for _ in 0..n_ref {
462 let mut l_name_buf = [0u8; 4];
464 self.read_exact(&mut l_name_buf)?;
465 let l_name = u32::from_le_bytes(l_name_buf) as usize;
466
467 let mut name_buf = vec![0u8; l_name];
469 self.read_exact(&mut name_buf)?;
470 if name_buf.last() == Some(&0) {
472 name_buf.pop();
473 }
474 let name = String::from_utf8_lossy(&name_buf).to_string();
475
476 let mut l_ref_buf = [0u8; 4];
478 self.read_exact(&mut l_ref_buf)?;
479 let length = u32::from_le_bytes(l_ref_buf);
480
481 references.push(RefSeq { name, length });
482 }
483
484 self.header = Some(BamHeader { text, references });
485 Ok(self.header.as_ref().unwrap())
486 }
487
488 pub fn header(&self) -> Option<&BamHeader> {
490 self.header.as_ref()
491 }
492
493 pub fn set_header(&mut self, header: BamHeader) {
506 self.header = Some(header);
507 }
508
509 pub fn blocks_decompressed(&self) -> usize {
514 self.blocks_decompressed
515 }
516
517 pub fn file_path(&self) -> Option<&std::path::Path> {
522 self.file_path.as_deref()
523 }
524
525 pub fn parse_record(&self, data: &[u8]) -> Result<BamRecord> {
527 let mut pos = 0;
529
530 macro_rules! read_bytes {
532 ($n:expr) => {{
533 if pos + $n > data.len() {
534 return Err(Error::InvalidInput(
535 "Unexpected end of BAM record".to_string(),
536 ));
537 }
538 let bytes = &data[pos..pos + $n];
539 pos += $n;
540 bytes
541 }};
542 }
543
544 let _block_size = u32::from_le_bytes(read_bytes!(4).try_into().unwrap());
546
547 let refid = i32::from_le_bytes(read_bytes!(4).try_into().unwrap());
549
550 let rname = if let Some(header) = &self.header {
552 header
553 .ref_name(refid)
554 .unwrap_or("*") .to_string()
556 } else {
557 if refid >= 0 {
559 format!("refid_{}", refid)
560 } else {
561 "*".to_string()
562 }
563 };
564
565 let record_pos = i32::from_le_bytes(read_bytes!(4).try_into().unwrap());
566 let l_read_name = read_bytes!(1)[0];
567 let mapq = read_bytes!(1)[0];
568 let _bin = u16::from_le_bytes(read_bytes!(2).try_into().unwrap()); let n_cigar_op = u16::from_le_bytes(read_bytes!(2).try_into().unwrap());
570 let flag = u16::from_le_bytes(read_bytes!(2).try_into().unwrap());
571 let l_seq = u32::from_le_bytes(read_bytes!(4).try_into().unwrap());
572 let next_refid = i32::from_le_bytes(read_bytes!(4).try_into().unwrap());
573 let next_pos = i32::from_le_bytes(read_bytes!(4).try_into().unwrap());
574 let tlen = i32::from_le_bytes(read_bytes!(4).try_into().unwrap());
575
576 let qname_bytes = read_bytes!(l_read_name as usize);
578 let qname_len = if qname_bytes.last() == Some(&0) {
580 qname_bytes.len() - 1
581 } else {
582 qname_bytes.len()
583 };
584 let qname = String::from_utf8_lossy(&qname_bytes[..qname_len]).to_string();
585
586 let mut cigar = Vec::with_capacity(n_cigar_op as usize);
588 for _ in 0..n_cigar_op {
589 let encoded = u32::from_le_bytes(read_bytes!(4).try_into().unwrap());
590 cigar.push(CigarOp::from_encoded(encoded)?);
591 }
592
593 let seq_bytes_len = ((l_seq + 1) / 2) as usize;
595 let seq_buf = read_bytes!(seq_bytes_len);
596
597 let mut seq = Vec::with_capacity(l_seq as usize);
599 for byte in seq_buf {
600 seq.push(SEQ_LOOKUP[(byte >> 4) as usize]);
601 seq.push(SEQ_LOOKUP[(byte & 0x0F) as usize]);
602 }
603 seq.truncate(l_seq as usize);
605
606 let qual_bytes = read_bytes!(l_seq as usize);
608 let qual = if l_seq > 0 && qual_bytes[0] == 0xFF && qual_bytes.iter().all(|&q| q == 0xFF) {
610 Vec::new()
611 } else {
612 qual_bytes.to_vec()
613 };
614
615 let mut tags = HashMap::new();
617 while pos < data.len() {
618 if pos + 3 > data.len() {
620 break;
621 }
622
623 let tag_bytes = read_bytes!(2);
625 let tag = String::from_utf8_lossy(tag_bytes).to_string();
626
627 let val_type = read_bytes!(1)[0];
629
630 let value = match val_type {
631 b'A' => TagValue::Char(read_bytes!(1)[0]),
632 b'c' => TagValue::Int(i8::from_le_bytes(read_bytes!(1).try_into().unwrap()) as i32),
633 b'C' => TagValue::Int(read_bytes!(1)[0] as i32),
634 b's' => {
635 TagValue::Int(i16::from_le_bytes(read_bytes!(2).try_into().unwrap()) as i32)
636 }
637 b'S' => {
638 TagValue::Int(u16::from_le_bytes(read_bytes!(2).try_into().unwrap()) as i32)
639 }
640 b'i' => TagValue::Int(i32::from_le_bytes(read_bytes!(4).try_into().unwrap())),
641 b'I' => {
642 TagValue::Int(u32::from_le_bytes(read_bytes!(4).try_into().unwrap()) as i32)
643 }
644 b'f' => TagValue::Float(f32::from_le_bytes(read_bytes!(4).try_into().unwrap())),
645 b'Z' => {
646 let start = pos;
648 while pos < data.len() && data[pos] != 0 {
649 pos += 1;
650 }
651 let s = &data[start..pos];
652 if pos < data.len() {
653 pos += 1; }
655 TagValue::String(String::from_utf8_lossy(s).to_string())
656 }
657 b'H' => {
658 let start = pos;
660 while pos < data.len() && data[pos] != 0 {
661 pos += 1;
662 }
663 let s = &data[start..pos];
664 if pos < data.len() {
665 pos += 1; }
667 TagValue::String(String::from_utf8_lossy(s).to_string())
668 }
669 b'B' => {
670 let array_type = read_bytes!(1)[0];
672 let count = u32::from_le_bytes(read_bytes!(4).try_into().unwrap()) as usize;
673 let mut arr = Vec::new();
674 for _ in 0..count {
675 match array_type {
676 b'c' => arr.push(read_bytes!(1)[0]),
677 b'C' => arr.push(read_bytes!(1)[0]),
678 b's' => arr.extend_from_slice(read_bytes!(2)),
679 b'S' => arr.extend_from_slice(read_bytes!(2)),
680 b'i' => arr.extend_from_slice(read_bytes!(4)),
681 b'I' => arr.extend_from_slice(read_bytes!(4)),
682 b'f' => arr.extend_from_slice(read_bytes!(4)),
683 _ => {
684 return Err(Error::InvalidInput(format!(
685 "Unknown array type: {}",
686 array_type
687 )))
688 }
689 }
690 }
691 TagValue::ByteArray(arr)
692 }
693 _ => {
694 return Err(Error::InvalidInput(format!(
695 "Unknown tag type: {}",
696 val_type as char
697 )))
698 }
699 };
700
701 tags.insert(tag, value);
702 }
703
704 Ok(BamRecord {
705 qname,
706 flag,
707 refid,
708 rname,
709 pos: record_pos,
710 mapq,
711 cigar,
712 next_refid,
713 next_pos,
714 tlen,
715 seq,
716 qual,
717 tags,
718 })
719 }
720
721 pub fn records(&mut self) -> BamRecordIterator<'_, R> {
725 BamRecordIterator { reader: self }
726 }
727
728 fn read_exact(&mut self, buf: &mut [u8]) -> std::io::Result<()> {
730 let mut total_read = 0;
731 while total_read < buf.len() {
732 let n = self.read(&mut buf[total_read..])?;
733 if n == 0 {
734 return Err(std::io::Error::new(
735 std::io::ErrorKind::UnexpectedEof,
736 "unexpected end of file",
737 ));
738 }
739 total_read += n;
740 }
741 Ok(())
742 }
743
744 fn read_next_block(&mut self) -> Result<bool> {
748 if self.eof {
749 return Ok(false);
750 }
751
752 let mut header = [0u8; 10];
754 match self.inner.read_exact(&mut header) {
755 Ok(_) => {}
756 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
757 self.eof = true;
758 return Ok(false);
759 }
760 Err(e) => return Err(Error::Io(e)),
761 }
762
763 if header[0] != 31 || header[1] != 139 {
765 return Err(Error::InvalidInput(
766 "Invalid bam block: bad gzip magic number".to_string(),
767 ));
768 }
769
770 let flags = header[3];
772 let has_extra = (flags & 0x04) != 0;
773
774 if !has_extra {
775 return Err(Error::InvalidInput(
776 "Invalid bam block: missing extra field".to_string(),
777 ));
778 }
779
780 let mut xlen_buf = [0u8; 2];
782 self.inner.read_exact(&mut xlen_buf)?;
783 let xlen = u16::from_le_bytes(xlen_buf) as usize;
784
785 let mut extra = vec![0u8; xlen];
787 self.inner.read_exact(&mut extra)?;
788
789 let bsize = self.parse_bam_extra(&extra)?;
791
792 let compressed_size = bsize + 1 - 10 - 2 - xlen - 8;
796
797 self.compressed_buf.clear();
799 self.compressed_buf.resize(compressed_size, 0);
800 self.inner.read_exact(&mut self.compressed_buf)?;
801
802 let mut footer = [0u8; 8];
804 self.inner.read_exact(&mut footer)?;
805
806 self.decompressor.reset(false);
808
809 self.current_block.clear();
812 self.current_block.resize(MAX_BLOCK_SIZE, 0);
813
814 let before_out = self.decompressor.total_out();
816
817 self.decompressor
818 .decompress(
819 &self.compressed_buf,
820 &mut self.current_block,
821 FlushDecompress::Finish,
822 )
823 .map_err(|e| Error::InvalidInput(format!("Decompression failed: {}", e)))?;
824
825 let bytes_out = (self.decompressor.total_out() - before_out) as usize;
827
828 self.current_block.truncate(bytes_out);
830
831 if self.current_block.len() > MAX_BLOCK_SIZE {
833 return Err(Error::InvalidInput(format!(
834 "BGZF block too large: {} bytes (max {})",
835 self.current_block.len(),
836 MAX_BLOCK_SIZE
837 )));
838 }
839
840 self.block_pos = 0;
841 self.blocks_decompressed += 1;
842 Ok(true)
843 }
844
845 fn parse_bam_extra(&self, extra: &[u8]) -> Result<usize> {
847 let mut pos = 0;
848
849 while pos + 4 <= extra.len() {
850 let si1 = extra[pos];
851 let si2 = extra[pos + 1];
852 let slen = u16::from_le_bytes([extra[pos + 2], extra[pos + 3]]) as usize;
853
854 if si1 == 66 && si2 == 67 {
856 if pos + 4 + slen > extra.len() {
857 return Err(Error::InvalidInput("bam extra field truncated".to_string()));
858 }
859
860 if slen != 2 {
862 return Err(Error::InvalidInput(format!(
863 "Invalid bam subfield length: {}",
864 slen
865 )));
866 }
867
868 let bsize = u16::from_le_bytes([extra[pos + 4], extra[pos + 5]]) as usize;
869 return Ok(bsize);
870 }
871
872 pos += 4 + slen;
873 }
874
875 Err(Error::InvalidInput(
876 "bam subfield not found in extra field".to_string(),
877 ))
878 }
879}
880
881impl<R: Read> Read for BamReader<R> {
882 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
883 if buf.is_empty() {
884 return Ok(0);
885 }
886
887 if self.block_pos >= self.current_block.len() {
889 match self.read_next_block() {
890 Ok(true) => {}
891 Ok(false) => return Ok(0), Err(e) => return Err(std::io::Error::new(std::io::ErrorKind::Other, e)),
893 }
894 }
895
896 let available = self.current_block.len() - self.block_pos;
898 let to_copy = buf.len().min(available);
899
900 buf[..to_copy]
901 .copy_from_slice(&self.current_block[self.block_pos..self.block_pos + to_copy]);
902 self.block_pos += to_copy;
903
904 Ok(to_copy)
905 }
906}
907
908impl<R: Read> GenomicRecordIterator for BamReader<R> {
910 type Record = BamRecord;
911
912 fn next_raw(&mut self) -> Result<Option<Vec<u8>>> {
913 let mut size_buf = [0u8; 4];
915 match self.read_exact(&mut size_buf) {
916 Ok(_) => {}
917 Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
918 return Ok(None);
919 }
920 Err(e) => return Err(e.into()),
921 }
922
923 let block_size = u32::from_le_bytes(size_buf);
924
925 let mut record_data = vec![0u8; block_size as usize + 4]; record_data[0..4].copy_from_slice(&size_buf);
928 self.read_exact(&mut record_data[4..])?;
929 Ok(Some(record_data))
930 }
931
932 fn next_record(&mut self) -> Result<Option<Self::Record>> {
933 let record_data = self.next_raw()?;
935 if record_data.is_none() {
936 return Ok(None);
937 }
938 let record_data = record_data.unwrap();
939 self.parse_record(&record_data).map(Some)
941 }
942}
943
944impl<R: Read> GenomicReader for BamReader<R> {
946 type Metadata = BamHeader;
947
948 fn metadata(&self) -> &Self::Metadata {
949 self.header
950 .as_ref()
951 .expect("Header not read yet. Call read_header() first.")
952 }
953}
954
955pub struct BamRecordIterator<'a, R: Read> {
960 reader: &'a mut BamReader<R>,
961}
962
963impl<'a, R: Read> Iterator for BamRecordIterator<'a, R> {
964 type Item = Result<BamRecord>;
965
966 fn next(&mut self) -> Option<Self::Item> {
967 match self.reader.next_record() {
968 Ok(Some(record)) => Some(Ok(record)),
969 Ok(None) => None,
970 Err(e) => Some(Err(e)),
971 }
972 }
973}
974
975#[cfg(test)]
976mod tests {
977 use super::*;
978 use std::io::Cursor;
979
980 #[test]
981 fn test_cigar_op_encoding() {
982 let encoded = (10 << 4) | 0; let op = CigarOp::from_encoded(encoded).unwrap();
985 assert_eq!(op, CigarOp::Match(10));
986 assert_eq!(op.len(), 10);
987 assert!(op.consumes_ref());
988 assert!(op.consumes_query());
989 assert_eq!(op.to_char(), 'M');
990 }
991
992 #[test]
993 fn test_cigar_op_display() {
994 assert_eq!(CigarOp::Match(10).to_string(), "10M");
995 assert_eq!(CigarOp::Ins(5).to_string(), "5I");
996 assert_eq!(CigarOp::Del(3).to_string(), "3D");
997 }
998
999 #[test]
1000 fn test_bam_reader_creation() {
1001 let data = vec![0u8; 1024];
1002 let cursor = Cursor::new(data);
1003 let _reader = BamReader::new(cursor);
1004 }
1006
1007 #[test]
1008 fn test_bam_reader_invalid_magic() {
1009 let data = vec![0u8; 100];
1011 let cursor = Cursor::new(data);
1012 let mut reader = BamReader::new(cursor);
1013
1014 let mut buf = vec![0u8; 10];
1015 let result = reader.read(&mut buf);
1016
1017 assert!(result.is_err() || result.unwrap() == 0);
1019 }
1020
1021 #[test]
1022 fn test_parse_bgzf_extra_valid() {
1023 let reader = BamReader::new(Cursor::new(vec![]));
1024
1025 let extra = vec![66, 67, 2, 0, 100, 0];
1027 let result = reader.parse_bam_extra(&extra);
1028
1029 assert!(result.is_ok());
1030 assert_eq!(result.unwrap(), 100);
1031 }
1032
1033 #[test]
1034 fn test_parse_bgzf_extra_invalid() {
1035 let reader = BamReader::new(Cursor::new(vec![]));
1036
1037 let extra = vec![1, 2, 2, 0, 100, 0];
1039 let result = reader.parse_bam_extra(&extra);
1040
1041 assert!(result.is_err());
1042 }
1043
1044 #[test]
1045 fn test_parse_bgzf_extra_truncated() {
1046 let reader = BamReader::new(Cursor::new(vec![]));
1047
1048 let extra = vec![66, 67, 2, 0];
1050 let result = reader.parse_bam_extra(&extra);
1051
1052 assert!(result.is_err());
1053 }
1054
1055 #[test]
1056 fn test_max_block_size() {
1057 assert_eq!(MAX_BLOCK_SIZE, 65536);
1058 }
1059
1060 #[test]
1061 fn test_bam_reader_from_empty() {
1062 let cursor = Cursor::new(vec![]);
1063 let mut reader = BamReader::new(cursor);
1064
1065 let mut buf = vec![0u8; 10];
1066 let result = reader.read(&mut buf);
1067
1068 assert!(result.is_ok());
1070 assert_eq!(result.unwrap(), 0);
1071 }
1072
1073 }