use crate::core::read_filter::ReadFilter;
use crate::engine::position::Position;
use rust_htslib::bam::{
self,
pileup::{Alignment, Pileup},
record::Record,
HeaderView,
};
use serde::Serialize;
use smartstring::{alias::String, LazyCompact, SmartString};
use std::default;
#[derive(Debug, Serialize, Default)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub struct PileupPosition {
#[serde(rename = "CHR")]
pub ref_seq: SmartString<LazyCompact>,
pub pos: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub ref_base: Option<char>,
pub depth: u32,
pub a: u32,
pub c: u32,
pub g: u32,
pub t: u32,
pub n: u32,
pub ins: u32,
pub del: u32,
pub ref_skip: u32,
pub fail: u32,
pub near_max_depth: bool,
}
impl Position for PileupPosition {
fn new(ref_seq: String, pos: u32) -> Self {
PileupPosition {
ref_seq: SmartString::from(ref_seq.as_str()),
pos,
..default::Default::default()
}
}
}
impl PileupPosition {
#[inline(always)]
fn update<F: ReadFilter>(
&mut self,
alignment: &Alignment,
record: &Record,
read_filter: &F,
base_filter: Option<u8>,
) {
if !read_filter.filter_read(record, Some(alignment)) {
self.depth -= 1;
self.fail += 1;
return;
}
if alignment.is_refskip() {
self.ref_skip += 1;
self.depth -= 1; } else if alignment.is_del() {
self.del += 1;
} else {
let qpos = alignment
.qpos()
.expect("Pileup alignment should have a query position");
let is_low_qual = base_filter.is_some_and(|cutoff| record.qual()[qpos] < cutoff);
if is_low_qual {
self.n += 1;
} else {
match record.seq()[qpos].to_ascii_uppercase() {
b'A' => self.a += 1,
b'C' => self.c += 1,
b'G' => self.g += 1,
b'T' => self.t += 1,
_ => self.n += 1,
}
}
if let bam::pileup::Indel::Ins(_len) = alignment.indel() {
self.ins += 1;
}
}
}
#[inline]
pub fn mark_near_max_depth(&mut self, max_depth: u32) {
self.near_max_depth = false;
if max_depth == 0 {
return;
}
let combined_depth = self
.depth
.saturating_add(self.ref_skip)
.saturating_add(self.fail);
let effective_depth = combined_depth.min(self.depth);
let lhs = (effective_depth as u128).saturating_mul(10_000);
let rhs = (max_depth as u128).saturating_mul(9_999);
self.near_max_depth = lhs >= rhs;
}
#[inline]
pub fn from_pileup<F: ReadFilter>(
pileup: Pileup,
header: &bam::HeaderView,
read_filter: &F,
base_filter: Option<u8>,
) -> Self {
let name = Self::compact_refseq(header, pileup.tid());
let mut pos = Self::new(name, pileup.pos());
pos.depth = pileup.depth();
for alignment in pileup.alignments() {
let record = alignment.record();
Self::update(&mut pos, &alignment, &record, read_filter, base_filter);
}
pos
}
#[inline]
pub fn compact_refseq(header: &HeaderView, tid: u32) -> SmartString<LazyCompact> {
let name = std::str::from_utf8(header.tid2name(tid)).unwrap();
String::from(name)
}
}
#[cfg(test)]
mod tests {
use crate::core::read_filter::DefaultReadFilter;
use super::*;
use rust_htslib::bam::{IndexedReader, Read};
use std::path::Path;
#[test]
fn mark_near_max_depth_uses_observed_depth() {
let mut position = PileupPosition {
depth: 99_989,
fail: 900,
..Default::default()
};
position.mark_near_max_depth(100_000);
assert!(!position.near_max_depth);
position.depth = 99_990;
position.mark_near_max_depth(100_000);
assert!(position.near_max_depth);
position.depth = 100_000;
position.mark_near_max_depth(100_000);
assert!(position.near_max_depth);
position.depth = 98_500;
position.mark_near_max_depth(100_000);
assert!(!position.near_max_depth);
position.depth = 10;
position.mark_near_max_depth(0);
assert!(!position.near_max_depth);
}
#[test]
fn test_pileup_position_from_bam() {
let bam_path = Path::new("test/chr22.bam");
let site_pos = 50783283;
if bam_path.exists() {
let mut bam = IndexedReader::from_path(bam_path).expect("Failed to open BAM file");
bam.fetch(("chr22", site_pos - 1, site_pos))
.expect("Failed to fetch region");
let header = bam.header().to_owned();
let read_filter = DefaultReadFilter::new(255);
for pileup_result in bam.pileup() {
let pileup = pileup_result.expect("Failed to get pileup");
let position = PileupPosition::from_pileup(
pileup,
&header,
&read_filter,
Some(30), );
if !(position.pos == site_pos - 1) {
continue;
}
eprintln!("Reference: {}", position.ref_seq);
eprintln!("Position: {}", position.pos + 1);
eprintln!("Reference base: {:?}", position.ref_base);
eprintln!("Depth: {}", position.depth);
eprintln!(
"A: {}, C: {}, G: {}, T: {}, N: {} Insertions: {}, Deletions: {}, RefSkips: {}, Fail: {}",
position.a, position.c, position.g, position.t, position.n, position.ins, position.del, position.ref_skip, position.fail
);
assert_eq!(position.c, 2291);
assert_eq!(position.t, 3);
eprintln!("Near max depth: {}", position.near_max_depth);
}
} else {
eprintln!("Test BAM file not found, skipping test");
panic!("Test BAM file not found");
}
}
}