use std::collections::HashMap;
use cyanea_core::{CyaneaError, Result};
use crate::sam::SamRecord;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum CigarOp {
Align(usize),
Ins(usize),
Del(usize),
SoftClip(usize),
Skip(usize),
}
fn parse_cigar_ops(cigar: &str) -> Result<Vec<CigarOp>> {
if cigar == "*" {
return Ok(Vec::new());
}
let mut ops = Vec::new();
let mut num_start = 0;
for (i, c) in cigar.char_indices() {
if c.is_ascii_digit() {
continue;
}
let len: usize = cigar[num_start..i]
.parse()
.map_err(|_| CyaneaError::Parse(format!("invalid CIGAR length in '{cigar}'")))?;
let op = match c {
'M' | '=' | 'X' => CigarOp::Align(len),
'I' => CigarOp::Ins(len),
'D' | 'N' => CigarOp::Del(len),
'S' => CigarOp::SoftClip(len),
'H' | 'P' => CigarOp::Skip(len),
_ => {
return Err(CyaneaError::Parse(format!(
"unknown CIGAR operation '{c}' in '{cigar}'"
)));
}
};
ops.push(op);
num_start = i + 1;
}
Ok(ops)
}
pub fn base_index(base: u8) -> usize {
match base {
b'A' | b'a' => 0,
b'C' | b'c' => 1,
b'G' | b'g' => 2,
b'T' | b't' => 3,
b'N' | b'n' => 4,
_ => 5, }
}
fn is_reverse(flag: u16) -> bool {
flag & 0x10 != 0
}
#[derive(Debug, Clone)]
pub struct InsertionEvidence {
pub bases: Vec<u8>,
pub mean_quality: u8,
}
#[derive(Debug, Clone)]
pub struct PileupColumn {
pub rname: String,
pub pos: u64,
pub ref_base: u8,
pub depth: u32,
pub bases: Vec<u8>,
pub qualities: Vec<u8>,
pub base_counts: [u32; 6],
pub quality_sums: [u64; 6],
pub mapping_qualities: Vec<u8>,
pub insertions: Vec<InsertionEvidence>,
}
#[derive(Debug, Clone)]
pub struct Pileup {
pub rname: String,
pub columns: Vec<PileupColumn>,
}
#[derive(Debug, Clone)]
pub struct DepthStats {
pub rname: String,
pub length: u64,
pub covered: u64,
pub breadth: f64,
pub min_depth: u32,
pub max_depth: u32,
pub mean_depth: f64,
pub median_depth: f64,
}
struct ColumnBuilder {
bases: Vec<u8>,
qualities: Vec<u8>,
base_counts: [u32; 6],
quality_sums: [u64; 6],
mapping_qualities: Vec<u8>,
insertions: Vec<InsertionEvidence>,
}
impl ColumnBuilder {
fn new() -> Self {
Self {
bases: Vec::new(),
qualities: Vec::new(),
base_counts: [0; 6],
quality_sums: [0; 6],
mapping_qualities: Vec::new(),
insertions: Vec::new(),
}
}
fn add(&mut self, base: u8, qual: u8, mapq: u8) {
self.bases.push(base);
self.qualities.push(qual);
self.mapping_qualities.push(mapq);
let idx = base_index(base);
self.base_counts[idx] += 1;
self.quality_sums[idx] += qual as u64;
}
fn add_insertion(&mut self, evidence: InsertionEvidence) {
self.insertions.push(evidence);
}
fn into_column(self, rname: &str, pos: u64, ref_base: u8) -> PileupColumn {
PileupColumn {
rname: rname.to_string(),
pos,
ref_base,
depth: self.bases.len() as u32,
bases: self.bases,
qualities: self.qualities,
base_counts: self.base_counts,
quality_sums: self.quality_sums,
mapping_qualities: self.mapping_qualities,
insertions: self.insertions,
}
}
}
fn walk_read(
record: &SamRecord,
columns: &mut HashMap<u64, ColumnBuilder>,
) -> Result<()> {
let ops = parse_cigar_ops(&record.cigar)?;
if ops.is_empty() {
return Ok(());
}
let mut ref_pos = record.pos.saturating_sub(1);
let mut query_pos: usize = 0;
let seq = record.sequence.as_bytes();
let qual = record.quality.as_bytes();
let reverse = is_reverse(record.flag);
let mapq = record.mapq;
for op in &ops {
match *op {
CigarOp::Align(len) => {
for _ in 0..len {
let base = if query_pos < seq.len() {
let b = seq[query_pos];
if reverse {
b.to_ascii_lowercase()
} else {
b.to_ascii_uppercase()
}
} else if reverse {
b'n'
} else {
b'N'
};
let q = if query_pos < qual.len() && qual != b"*" {
qual[query_pos].saturating_sub(33)
} else {
0
};
columns.entry(ref_pos).or_insert_with(ColumnBuilder::new).add(base, q, mapq);
ref_pos += 1;
query_pos += 1;
}
}
CigarOp::Ins(len) => {
if len > 0 && query_pos + len <= seq.len() {
let ins_bases: Vec<u8> = seq[query_pos..query_pos + len]
.iter()
.map(|&b| {
if reverse {
b.to_ascii_lowercase()
} else {
b.to_ascii_uppercase()
}
})
.collect();
let mean_q = if qual != b"*" && query_pos + len <= qual.len() {
let sum: u32 = qual[query_pos..query_pos + len]
.iter()
.map(|&q| q.saturating_sub(33) as u32)
.sum();
(sum / len as u32) as u8
} else {
0
};
let evidence = InsertionEvidence {
bases: ins_bases,
mean_quality: mean_q,
};
let anchor = if ref_pos > 0 { ref_pos - 1 } else { ref_pos };
columns
.entry(anchor)
.or_insert_with(ColumnBuilder::new)
.add_insertion(evidence);
}
query_pos += len;
}
CigarOp::Del(len) => {
for _ in 0..len {
let del_base = if reverse { b'#' } else { b'*' };
columns.entry(ref_pos).or_insert_with(ColumnBuilder::new).add(del_base, 0, mapq);
ref_pos += 1;
}
}
CigarOp::SoftClip(len) => {
query_pos += len;
}
CigarOp::Skip(len) => {
let _ = len;
}
}
}
Ok(())
}
pub fn pileup(
records: &[SamRecord],
reference: Option<&HashMap<String, Vec<u8>>>,
) -> Result<Vec<Pileup>> {
let mut groups: HashMap<String, Vec<&SamRecord>> = HashMap::new();
for rec in records {
if rec.is_unmapped() || rec.cigar == "*" {
continue;
}
groups.entry(rec.rname.clone()).or_default().push(rec);
}
let mut pileups: Vec<Pileup> = Vec::new();
let mut rnames: Vec<String> = groups.keys().cloned().collect();
rnames.sort();
for rname in rnames {
let recs = &groups[&rname];
let ref_seq = reference.and_then(|r| r.get(&rname));
let p = build_pileup(&rname, recs, ref_seq.map(|v| v.as_slice()))?;
pileups.push(p);
}
Ok(pileups)
}
pub fn pileup_region(
records: &[SamRecord],
rname: &str,
reference: Option<&[u8]>,
) -> Result<Pileup> {
let filtered: Vec<&SamRecord> = records
.iter()
.filter(|r| r.rname == rname && r.is_mapped() && r.cigar != "*")
.collect();
build_pileup(rname, &filtered, reference)
}
fn build_pileup(
rname: &str,
records: &[&SamRecord],
ref_seq: Option<&[u8]>,
) -> Result<Pileup> {
let mut columns: HashMap<u64, ColumnBuilder> = HashMap::new();
for rec in records {
walk_read(rec, &mut columns)?;
}
let mut positions: Vec<u64> = columns.keys().copied().collect();
positions.sort_unstable();
let cols: Vec<PileupColumn> = positions
.into_iter()
.map(|pos| {
let builder = columns.remove(&pos).unwrap();
let ref_base = ref_seq
.and_then(|s| s.get(pos as usize).copied())
.unwrap_or(b'N');
builder.into_column(rname, pos, ref_base)
})
.collect();
Ok(Pileup {
rname: rname.to_string(),
columns: cols,
})
}
pub fn pileup_to_mpileup(pileup: &Pileup) -> String {
let mut lines = Vec::with_capacity(pileup.columns.len());
for col in &pileup.columns {
let ref_upper = col.ref_base.to_ascii_uppercase();
let mut bases_str = String::with_capacity(col.bases.len());
let mut quals_str = String::with_capacity(col.qualities.len());
for (i, &base) in col.bases.iter().enumerate() {
if base == b'*' || base == b'#' {
bases_str.push('*');
} else {
let base_upper = base.to_ascii_uppercase();
if base_upper == ref_upper && ref_upper != b'N' {
if base.is_ascii_uppercase() {
bases_str.push('.');
} else {
bases_str.push(',');
}
} else {
bases_str.push(base as char);
}
}
let q = if i < col.qualities.len() {
col.qualities[i]
} else {
0
};
quals_str.push((q + 33) as char);
}
lines.push(format!(
"{}\t{}\t{}\t{}\t{}\t{}",
col.rname,
col.pos + 1,
ref_upper as char,
col.depth,
bases_str,
quals_str,
));
}
lines.join("\n")
}
pub fn depth_stats(pileup: &Pileup) -> DepthStats {
if pileup.columns.is_empty() {
return DepthStats {
rname: pileup.rname.clone(),
length: 0,
covered: 0,
breadth: 0.0,
min_depth: 0,
max_depth: 0,
mean_depth: 0.0,
median_depth: 0.0,
};
}
let min_pos = pileup.columns.first().unwrap().pos;
let max_pos = pileup.columns.last().unwrap().pos;
let length = max_pos - min_pos + 1;
let mut depths: Vec<u32> = vec![0; length as usize];
for col in &pileup.columns {
let idx = (col.pos - min_pos) as usize;
depths[idx] = col.depth;
}
let covered = depths.iter().filter(|&&d| d > 0).count() as u64;
let min_depth = *depths.iter().min().unwrap();
let max_depth = *depths.iter().max().unwrap();
let sum: u64 = depths.iter().map(|&d| d as u64).sum();
let mean_depth = sum as f64 / length as f64;
let mut sorted = depths.clone();
sorted.sort_unstable();
let median_depth = if sorted.len().is_multiple_of(2) {
let mid = sorted.len() / 2;
(sorted[mid - 1] as f64 + sorted[mid] as f64) / 2.0
} else {
sorted[sorted.len() / 2] as f64
};
let breadth = covered as f64 / length as f64;
DepthStats {
rname: pileup.rname.clone(),
length,
covered,
breadth,
min_depth,
max_depth,
mean_depth,
median_depth,
}
}
#[cfg(feature = "vcf")]
#[deprecated(note = "Use call_variants from the variant-calling feature")]
pub fn call_snps(
pileup: &Pileup,
min_depth: u32,
min_alt_freq: f64,
min_alt_count: u32,
) -> Vec<cyanea_omics::variant::Variant> {
use cyanea_omics::variant::Variant;
let mut variants = Vec::new();
for col in &pileup.columns {
if col.depth < min_depth {
continue;
}
let ref_upper = col.ref_base.to_ascii_uppercase();
let ref_idx = base_index(ref_upper);
let mut best_idx = usize::MAX;
let mut best_count: u32 = 0;
for i in 0..5 {
if i == ref_idx {
continue;
}
if col.base_counts[i] > best_count {
best_count = col.base_counts[i];
best_idx = i;
}
}
if best_idx == usize::MAX || best_count < min_alt_count {
continue;
}
let alt_freq = best_count as f64 / col.depth as f64;
if alt_freq < min_alt_freq {
continue;
}
let alt_base = match best_idx {
0 => b'A',
1 => b'C',
2 => b'G',
3 => b'T',
_ => b'N',
};
let qual = if alt_freq >= 1.0 {
99.0
} else {
let raw = -10.0 * (1.0 - alt_freq).log10();
raw.min(99.0)
};
if let Ok(mut v) = Variant::new(
&col.rname,
col.pos + 1,
vec![ref_upper],
vec![vec![alt_base]],
) {
v.quality = Some(qual);
variants.push(v);
}
}
variants
}
#[cfg(test)]
mod tests {
use super::*;
fn make_record(
qname: &str,
flag: u16,
rname: &str,
pos: u64,
mapq: u8,
cigar: &str,
seq: &str,
qual: &str,
) -> SamRecord {
SamRecord {
qname: qname.to_string(),
flag,
rname: rname.to_string(),
pos,
mapq,
cigar: cigar.to_string(),
rnext: "*".to_string(),
pnext: 0,
tlen: 0,
sequence: seq.to_string(),
quality: qual.to_string(),
}
}
#[test]
fn test_cigar_simple_match() {
let ops = parse_cigar_ops("50M").unwrap();
assert_eq!(ops, vec![CigarOp::Align(50)]);
}
#[test]
fn test_cigar_complex() {
let ops = parse_cigar_ops("10M2I3D5M").unwrap();
assert_eq!(
ops,
vec![
CigarOp::Align(10),
CigarOp::Ins(2),
CigarOp::Del(3),
CigarOp::Align(5),
]
);
}
#[test]
fn test_cigar_star() {
let ops = parse_cigar_ops("*").unwrap();
assert!(ops.is_empty());
}
#[test]
fn test_cigar_eq_x_mapped_to_align() {
let ops = parse_cigar_ops("5=3X2M").unwrap();
assert_eq!(
ops,
vec![CigarOp::Align(5), CigarOp::Align(3), CigarOp::Align(2)]
);
}
#[test]
fn test_cigar_invalid_char() {
let result = parse_cigar_ops("10Z");
assert!(result.is_err());
}
#[test]
fn test_pileup_single_read() {
let records = vec![make_record("r1", 0, "chr1", 1, 60, "4M", "ACGT", "IIII")];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups.len(), 1);
assert_eq!(pileups[0].rname, "chr1");
assert_eq!(pileups[0].columns.len(), 4);
assert_eq!(pileups[0].columns[0].pos, 0);
assert_eq!(pileups[0].columns[0].depth, 1);
assert_eq!(pileups[0].columns[0].bases, vec![b'A']);
assert_eq!(pileups[0].columns[0].base_counts[0], 1); }
#[test]
fn test_pileup_two_overlapping_reads() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "ACGT", "IIII"),
make_record("r2", 0, "chr1", 3, 60, "4M", "TTAA", "IIII"),
];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups.len(), 1);
assert_eq!(pileups[0].columns.len(), 6);
let col2 = &pileups[0].columns[2];
assert_eq!(col2.pos, 2);
assert_eq!(col2.depth, 2);
assert_eq!(col2.bases, vec![b'G', b'T']);
}
#[test]
fn test_pileup_deletion() {
let records = vec![make_record("r1", 0, "chr1", 1, 60, "2M2D2M", "ACGT", "IIII")];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns.len(), 6);
assert_eq!(pileups[0].columns[2].bases, vec![b'*']);
assert_eq!(pileups[0].columns[2].base_counts[5], 1); assert_eq!(pileups[0].columns[3].bases, vec![b'*']);
}
#[test]
fn test_pileup_insertion() {
let records = vec![make_record(
"r1", 0, "chr1", 1, 60, "3M2I3M", "ACGTTAAA", "IIIIIIII",
)];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns.len(), 6);
}
#[test]
fn test_pileup_soft_clip() {
let records = vec![make_record(
"r1", 0, "chr1", 1, 60, "2S3M2S", "NNACGNN", "IIIIIII",
)];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns.len(), 3);
assert_eq!(pileups[0].columns[0].bases, vec![b'A']);
assert_eq!(pileups[0].columns[1].bases, vec![b'C']);
assert_eq!(pileups[0].columns[2].bases, vec![b'G']);
}
#[test]
fn test_pileup_unmapped_skipped() {
let records = vec![
make_record("r1", 4, "*", 0, 0, "*", "ACGT", "*"), make_record("r2", 0, "chr1", 1, 60, "4M", "TTTT", "IIII"),
];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups.len(), 1);
assert_eq!(pileups[0].columns[0].depth, 1); }
#[test]
fn test_pileup_star_cigar_skipped() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "*", "ACGT", "IIII"),
make_record("r2", 0, "chr1", 1, 60, "4M", "TTTT", "IIII"),
];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups.len(), 1);
assert_eq!(pileups[0].columns[0].depth, 1);
}
#[test]
fn test_pileup_with_reference() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"ACGTACGT".to_vec());
let records = vec![make_record("r1", 0, "chr1", 1, 60, "4M", "ACGT", "IIII")];
let pileups = pileup(&records, Some(&reference)).unwrap();
assert_eq!(pileups[0].columns[0].ref_base, b'A');
assert_eq!(pileups[0].columns[1].ref_base, b'C');
assert_eq!(pileups[0].columns[2].ref_base, b'G');
assert_eq!(pileups[0].columns[3].ref_base, b'T');
}
#[test]
fn test_pileup_multi_reference() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "AAAA", "IIII"),
make_record("r2", 0, "chr2", 1, 60, "4M", "CCCC", "IIII"),
];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups.len(), 2);
assert_eq!(pileups[0].rname, "chr1");
assert_eq!(pileups[1].rname, "chr2");
}
#[test]
fn test_pileup_region_filters() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "AAAA", "IIII"),
make_record("r2", 0, "chr2", 1, 60, "4M", "CCCC", "IIII"),
make_record("r3", 0, "chr1", 5, 60, "4M", "GGGG", "IIII"),
];
let p = pileup_region(&records, "chr1", None).unwrap();
assert_eq!(p.rname, "chr1");
assert_eq!(p.columns.len(), 8);
}
#[test]
fn test_mpileup_basic_format() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"ACGT".to_vec());
let records = vec![make_record("r1", 0, "chr1", 1, 60, "4M", "ACGT", "IIII")];
let pileups = pileup(&records, Some(&reference)).unwrap();
let output = pileup_to_mpileup(&pileups[0]);
let lines: Vec<&str> = output.lines().collect();
assert_eq!(lines.len(), 4);
assert!(lines[0].starts_with("chr1\t1\tA\t1\t"));
}
#[test]
fn test_mpileup_match_dot_comma() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"AAAA".to_vec());
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "AAAA", "IIII"), make_record("r2", 16, "chr1", 1, 60, "4M", "AAAA", "IIII"), ];
let pileups = pileup(&records, Some(&reference)).unwrap();
let output = pileup_to_mpileup(&pileups[0]);
let lines: Vec<&str> = output.lines().collect();
let fields: Vec<&str> = lines[0].split('\t').collect();
assert_eq!(fields[4], ".,");
}
#[test]
fn test_mpileup_mismatch_case() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"AAAA".to_vec());
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "CCCC", "IIII"), make_record("r2", 16, "chr1", 1, 60, "4M", "CCCC", "IIII"), ];
let pileups = pileup(&records, Some(&reference)).unwrap();
let output = pileup_to_mpileup(&pileups[0]);
let lines: Vec<&str> = output.lines().collect();
let fields: Vec<&str> = lines[0].split('\t').collect();
assert_eq!(fields[4], "Cc");
}
#[test]
fn test_depth_stats_basic() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "ACGT", "IIII"),
make_record("r2", 0, "chr1", 3, 60, "4M", "TTAA", "IIII"),
];
let pileups = pileup(&records, None).unwrap();
let stats = depth_stats(&pileups[0]);
assert_eq!(stats.rname, "chr1");
assert_eq!(stats.length, 6); assert_eq!(stats.covered, 6);
assert_eq!(stats.min_depth, 1);
assert_eq!(stats.max_depth, 2);
assert!((stats.breadth - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_depth_stats_gaps() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "2M", "AC", "II"),
make_record("r2", 0, "chr1", 5, 60, "2M", "GT", "II"),
];
let pileups = pileup(&records, None).unwrap();
let stats = depth_stats(&pileups[0]);
assert_eq!(stats.length, 6);
assert_eq!(stats.covered, 4);
assert_eq!(stats.min_depth, 0);
assert_eq!(stats.max_depth, 1);
assert!((stats.breadth - 4.0 / 6.0).abs() < 1e-10);
}
#[test]
fn test_depth_stats_empty() {
let p = Pileup {
rname: "chr1".to_string(),
columns: vec![],
};
let stats = depth_stats(&p);
assert_eq!(stats.length, 0);
assert_eq!(stats.covered, 0);
assert_eq!(stats.min_depth, 0);
assert_eq!(stats.max_depth, 0);
assert_eq!(stats.mean_depth, 0.0);
}
#[cfg(feature = "vcf")]
#[allow(deprecated)]
mod snp_tests {
use super::*;
#[test]
fn test_call_snps_basic() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"AAAA".to_vec());
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "1M", "T", "I"),
make_record("r2", 0, "chr1", 1, 60, "1M", "T", "I"),
make_record("r3", 0, "chr1", 1, 60, "1M", "T", "I"),
make_record("r4", 0, "chr1", 1, 60, "1M", "A", "I"),
];
let pileups = pileup(&records, Some(&reference)).unwrap();
let variants = call_snps(&pileups[0], 1, 0.2, 1);
assert_eq!(variants.len(), 1);
assert_eq!(variants[0].chrom, "chr1");
assert_eq!(variants[0].position, 1); assert_eq!(variants[0].ref_allele, vec![b'A']);
assert_eq!(variants[0].alt_alleles, vec![vec![b'T']]);
}
#[test]
fn test_call_snps_below_threshold() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"AAAA".to_vec());
let mut records: Vec<SamRecord> = (0..9)
.map(|i| make_record(&format!("r{i}"), 0, "chr1", 1, 60, "1M", "A", "I"))
.collect();
records.push(make_record("r9", 0, "chr1", 1, 60, "1M", "T", "I"));
let pileups = pileup(&records, Some(&reference)).unwrap();
let variants = call_snps(&pileups[0], 1, 0.2, 1);
assert!(variants.is_empty());
}
#[test]
fn test_call_snps_min_depth() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"AAAA".to_vec());
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "1M", "T", "I"),
make_record("r2", 0, "chr1", 1, 60, "1M", "T", "I"),
];
let pileups = pileup(&records, Some(&reference)).unwrap();
let variants = call_snps(&pileups[0], 5, 0.1, 1);
assert!(variants.is_empty());
}
#[test]
fn test_call_snps_variant_quality() {
let mut reference = HashMap::new();
reference.insert("chr1".to_string(), b"AAAA".to_vec());
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "1M", "G", "I"),
make_record("r2", 0, "chr1", 1, 60, "1M", "G", "I"),
];
let pileups = pileup(&records, Some(&reference)).unwrap();
let variants = call_snps(&pileups[0], 1, 0.1, 1);
assert_eq!(variants.len(), 1);
assert!(variants[0].quality.unwrap() >= 90.0);
}
}
#[test]
fn test_pileup_empty_records() {
let records: Vec<SamRecord> = vec![];
let pileups = pileup(&records, None).unwrap();
assert!(pileups.is_empty());
}
#[test]
fn test_pileup_quality_star() {
let records = vec![make_record("r1", 0, "chr1", 1, 60, "4M", "ACGT", "*")];
let pileups = pileup(&records, None).unwrap();
for col in &pileups[0].columns {
for &q in &col.qualities {
assert_eq!(q, 0);
}
}
}
#[test]
fn test_pileup_reverse_strand_bases() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "ACGT", "IIII"), make_record("r2", 16, "chr1", 1, 60, "4M", "ACGT", "IIII"), ];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns[0].bases, vec![b'A', b'a']);
assert_eq!(pileups[0].columns[1].bases, vec![b'C', b'c']);
assert_eq!(pileups[0].columns[2].bases, vec![b'G', b'g']);
assert_eq!(pileups[0].columns[3].bases, vec![b'T', b't']);
}
#[test]
fn test_pileup_hard_clip() {
let records = vec![make_record("r1", 0, "chr1", 1, 60, "5H3M5H", "ACG", "III")];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns.len(), 3);
assert_eq!(pileups[0].columns[0].bases, vec![b'A']);
}
#[test]
fn test_pileup_n_splice() {
let records = vec![make_record(
"r1", 0, "chr1", 1, 60, "2M3N2M", "ACGT", "IIII",
)];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns.len(), 7);
assert_eq!(pileups[0].columns[2].bases, vec![b'*']);
}
#[test]
fn test_cigar_hard_soft_clip_combo() {
let records = vec![make_record(
"r1", 0, "chr1", 1, 60, "3H2S4M", "NNACGT", "IIIIII",
)];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns.len(), 4);
assert_eq!(pileups[0].columns[0].bases, vec![b'A']);
assert_eq!(pileups[0].columns[1].bases, vec![b'C']);
assert_eq!(pileups[0].columns[2].bases, vec![b'G']);
assert_eq!(pileups[0].columns[3].bases, vec![b'T']);
}
#[test]
fn test_depth_stats_median() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "4M", "AAAA", "IIII"),
make_record("r2", 0, "chr1", 1, 60, "4M", "AAAA", "IIII"),
make_record("r3", 0, "chr1", 1, 60, "6M", "AAAAAA", "IIIIII"),
];
let pileups = pileup(&records, None).unwrap();
let stats = depth_stats(&pileups[0]);
assert_eq!(stats.min_depth, 1);
assert_eq!(stats.max_depth, 3);
assert!((stats.median_depth - 3.0).abs() < f64::EPSILON);
}
#[test]
fn test_insertion_evidence_from_cigar() {
let records = vec![make_record(
"r1", 0, "chr1", 1, 60, "3M2I3M", "ACGTTAAA", "IIIIIIII",
)];
let pileups = pileup(&records, None).unwrap();
assert_eq!(pileups[0].columns.len(), 6);
let anchor_col = &pileups[0].columns[2];
assert_eq!(anchor_col.pos, 2);
assert_eq!(anchor_col.insertions.len(), 1);
assert_eq!(anchor_col.insertions[0].bases, b"TT");
assert!(anchor_col.insertions[0].mean_quality > 0);
}
#[test]
fn test_insertion_multiple_reads() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "2M2I2M", "ACTTGG", "IIIIII"),
make_record("r2", 0, "chr1", 1, 60, "2M2I2M", "ACTTGG", "IIIIII"),
make_record("r3", 0, "chr1", 1, 60, "2M3I2M", "ACAAATT", "IIIIIII"),
];
let pileups = pileup(&records, None).unwrap();
let anchor_col = &pileups[0].columns[1];
assert_eq!(anchor_col.insertions.len(), 3);
}
#[test]
fn test_mapping_qualities_recorded() {
let records = vec![
make_record("r1", 0, "chr1", 1, 60, "3M", "ACG", "III"),
make_record("r2", 0, "chr1", 1, 30, "3M", "ACG", "III"),
];
let pileups = pileup(&records, None).unwrap();
for col in &pileups[0].columns {
assert_eq!(col.mapping_qualities.len(), 2);
assert_eq!(col.mapping_qualities[0], 60);
assert_eq!(col.mapping_qualities[1], 30);
}
}
#[test]
fn test_deletion_has_mapping_quality() {
let records = vec![make_record("r1", 0, "chr1", 1, 42, "2M1D2M", "ACGT", "IIII")];
let pileups = pileup(&records, None).unwrap();
let del_col = &pileups[0].columns[2];
assert_eq!(del_col.bases, vec![b'*']);
assert_eq!(del_col.mapping_qualities, vec![42]);
}
}