mod helpers;
use helpers::{SamBuilder, read_metrics_tsv};
use noodles::core::Position;
use noodles::sam::alignment::RecordBuf;
use noodles::sam::alignment::record::Flags;
use noodles::sam::alignment::record::MappingQuality;
use noodles::sam::alignment::record::cigar::{Op, op::Kind};
use noodles::sam::alignment::record_buf::{Cigar, QualityScores, Sequence};
use riker_lib::collector::Collector;
use riker_lib::commands::basic::{
BASE_DIST_PLOT_SUFFIX, BASE_DIST_SUFFIX, BaseDistributionByCycleMetric, BasicCollector,
MEAN_QUAL_PLOT_SUFFIX, MEAN_QUAL_SUFFIX, MeanQualityByCycleMetric, QUAL_DIST_PLOT_SUFFIX,
QUAL_DIST_SUFFIX, QualityScoreDistributionMetric,
};
use tempfile::TempDir;
fn run_basic(builder: &SamBuilder) -> (TempDir, std::path::PathBuf) {
let bam = builder.to_temp_bam().unwrap();
let dir = TempDir::new().unwrap();
let prefix = dir.path().join("out");
let header = builder.header().clone();
let mut collector = BasicCollector::new(bam.path(), &prefix);
collector.initialize(&header).unwrap();
let mut reader =
riker_lib::sam::alignment_reader::AlignmentReader::open(bam.path(), None).unwrap();
let hdr = reader.header().clone();
let requirements = collector.field_needs();
for result in reader.riker_records(&requirements) {
let record = result.unwrap();
collector.accept(&record, &hdr).unwrap();
}
collector.finish().unwrap();
(dir, prefix)
}
fn make_record(name: &str, flags: Flags, seq: &[u8], quals: &[u8]) -> RecordBuf {
let cigar: Cigar = [Op::new(Kind::Match, seq.len())].into_iter().collect();
RecordBuf::builder()
.set_name(name)
.set_flags(flags)
.set_reference_sequence_id(0)
.set_alignment_start(Position::new(1).unwrap())
.set_mapping_quality(MappingQuality::new(60).unwrap())
.set_cigar(cigar)
.set_sequence(Sequence::from(seq.to_vec()))
.set_quality_scores(QualityScores::from(quals.to_vec()))
.build()
}
fn make_paired_record(
name: &str,
is_r2: bool,
is_reverse: bool,
seq: &[u8],
quals: &[u8],
) -> RecordBuf {
let mut flags = Flags::SEGMENTED | Flags::PROPERLY_SEGMENTED;
if is_r2 {
flags |= Flags::LAST_SEGMENT;
} else {
flags |= Flags::FIRST_SEGMENT;
}
if is_reverse {
flags |= Flags::REVERSE_COMPLEMENTED;
}
let cigar: Cigar = [Op::new(Kind::Match, seq.len())].into_iter().collect();
RecordBuf::builder()
.set_name(name)
.set_flags(flags)
.set_reference_sequence_id(0)
.set_alignment_start(Position::new(1).unwrap())
.set_mapping_quality(MappingQuality::new(60).unwrap())
.set_cigar(cigar)
.set_mate_reference_sequence_id(0)
.set_mate_alignment_start(Position::new(100).unwrap())
.set_template_length(if is_r2 { -200 } else { 200 })
.set_sequence(Sequence::from(seq.to_vec()))
.set_quality_scores(QualityScores::from(quals.to_vec()))
.build()
}
#[test]
fn test_paired_reads() {
let mut builder = SamBuilder::new();
builder.add_record(make_paired_record("r1", false, false, b"ACGT", &[10, 20, 30, 40]));
builder.add_record(make_paired_record("r1", true, true, b"TTTT", &[20, 20, 20, 20]));
let (_dir, prefix) = run_basic(&builder);
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
let r1: Vec<_> = base_dist.iter().filter(|m| m.read_end == 1).collect();
assert_eq!(r1.len(), 4);
assert_float_eq!(r1[0].frac_a, 1.0, 1e-5);
assert_float_eq!(r1[0].frac_c, 0.0, 1e-5);
assert_float_eq!(r1[1].frac_c, 1.0, 1e-5);
assert_float_eq!(r1[2].frac_g, 1.0, 1e-5);
assert_float_eq!(r1[3].frac_t, 1.0, 1e-5);
let r2: Vec<_> = base_dist.iter().filter(|m| m.read_end == 2).collect();
assert_eq!(r2.len(), 4);
for m in &r2 {
assert_float_eq!(m.frac_t, 1.0, 1e-5);
}
let mq_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), MEAN_QUAL_SUFFIX));
let mean_qual: Vec<MeanQualityByCycleMetric> = read_metrics_tsv(&mq_path).unwrap();
assert_eq!(mean_qual.len(), 8);
assert_float_eq!(mean_qual[0].mean_quality, 10.0, 0.01);
assert_float_eq!(mean_qual[1].mean_quality, 20.0, 0.01);
assert_float_eq!(mean_qual[2].mean_quality, 30.0, 0.01);
assert_float_eq!(mean_qual[3].mean_quality, 40.0, 0.01);
assert_eq!(mean_qual[4].cycle, 5);
for m in &mean_qual[4..8] {
assert_float_eq!(m.mean_quality, 20.0, 0.01);
}
let qd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX));
let qual_dist: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&qd_path).unwrap();
let q10 = qual_dist.iter().find(|m| m.quality == 10).unwrap();
assert_eq!(q10.count, 1);
let q20 = qual_dist.iter().find(|m| m.quality == 20).unwrap();
assert_eq!(q20.count, 5);
let q30 = qual_dist.iter().find(|m| m.quality == 30).unwrap();
assert_eq!(q30.count, 1);
let q40 = qual_dist.iter().find(|m| m.quality == 40).unwrap();
assert_eq!(q40.count, 1);
}
#[test]
fn test_unpaired_forward_read() {
let mut builder = SamBuilder::new();
let flags = Flags::empty();
builder.add_record(make_record("r1", flags, b"ACG", &[10, 20, 30]));
let (_dir, prefix) = run_basic(&builder);
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
assert_eq!(base_dist.len(), 3);
assert!(base_dist.iter().all(|m| m.read_end == 1));
assert_eq!(base_dist[0].cycle, 1);
assert_float_eq!(base_dist[0].frac_a, 1.0, 1e-5);
assert_eq!(base_dist[1].cycle, 2);
assert_float_eq!(base_dist[1].frac_c, 1.0, 1e-5);
assert_eq!(base_dist[2].cycle, 3);
assert_float_eq!(base_dist[2].frac_g, 1.0, 1e-5);
}
#[test]
fn test_unpaired_reverse_read() {
let mut builder = SamBuilder::new();
let flags = Flags::REVERSE_COMPLEMENTED;
builder.add_record(make_record("r1", flags, b"ACG", &[10, 20, 30]));
let (_dir, prefix) = run_basic(&builder);
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
assert_eq!(base_dist.len(), 3);
assert_eq!(base_dist[0].cycle, 1);
assert_float_eq!(base_dist[0].frac_g, 1.0, 1e-5);
assert_eq!(base_dist[1].cycle, 2);
assert_float_eq!(base_dist[1].frac_c, 1.0, 1e-5);
assert_eq!(base_dist[2].cycle, 3);
assert_float_eq!(base_dist[2].frac_a, 1.0, 1e-5);
let mq_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), MEAN_QUAL_SUFFIX));
let mean_qual: Vec<MeanQualityByCycleMetric> = read_metrics_tsv(&mq_path).unwrap();
assert_eq!(mean_qual.len(), 3);
assert_float_eq!(mean_qual[0].mean_quality, 30.0, 0.01); assert_float_eq!(mean_qual[1].mean_quality, 20.0, 0.01); assert_float_eq!(mean_qual[2].mean_quality, 10.0, 0.01); }
#[test]
fn test_n_bases_in_base_dist_excluded_from_qual_dist() {
let mut builder = SamBuilder::new();
let flags = Flags::empty();
builder.add_record(make_record("r1", flags, b"ANT", &[10, 20, 30]));
let (_dir, prefix) = run_basic(&builder);
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
assert_eq!(base_dist.len(), 3);
assert_float_eq!(base_dist[1].frac_n, 1.0, 1e-5);
let qd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX));
let qual_dist: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&qd_path).unwrap();
assert_eq!(qual_dist.len(), 2);
let q10 = qual_dist.iter().find(|m| m.quality == 10).unwrap();
assert_eq!(q10.count, 1);
let q30 = qual_dist.iter().find(|m| m.quality == 30).unwrap();
assert_eq!(q30.count, 1);
assert!(qual_dist.iter().find(|m| m.quality == 20).is_none());
}
#[test]
fn test_secondary_supplementary_skipped() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"ACGT", &[30, 30, 30, 30]));
builder.add_record(make_record("r2", Flags::SECONDARY, b"TTTT", &[30, 30, 30, 30]));
builder.add_record(make_record("r3", Flags::SUPPLEMENTARY, b"GGGG", &[30, 30, 30, 30]));
let (_dir, prefix) = run_basic(&builder);
let qd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX));
let qual_dist: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&qd_path).unwrap();
let total: u64 = qual_dist.iter().map(|m| m.count).sum();
assert_eq!(total, 4);
}
#[test]
fn test_qc_fail_skipped() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"AC", &[30, 30]));
builder.add_record(make_record("r2", Flags::QC_FAIL, b"GT", &[30, 30]));
let (_dir, prefix) = run_basic(&builder);
let qd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX));
let qual_dist: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&qd_path).unwrap();
let total: u64 = qual_dist.iter().map(|m| m.count).sum();
assert_eq!(total, 2); }
#[test]
fn test_duplicates_included() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"AC", &[30, 30]));
builder.add_record(make_record("r2", Flags::DUPLICATE, b"GT", &[30, 30]));
let (_dir, prefix) = run_basic(&builder);
let qd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX));
let qual_dist: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&qd_path).unwrap();
let total: u64 = qual_dist.iter().map(|m| m.count).sum();
assert_eq!(total, 4); }
#[test]
fn test_mixed_read_lengths() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"ACG", &[10, 20, 30]));
builder.add_record(make_record("r2", Flags::empty(), b"TTTTT", &[40, 40, 40, 40, 40]));
let (_dir, prefix) = run_basic(&builder);
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
assert_eq!(base_dist.len(), 5);
assert_float_eq!(base_dist[0].frac_a, 0.5, 1e-5);
assert_float_eq!(base_dist[0].frac_t, 0.5, 1e-5);
assert_float_eq!(base_dist[3].frac_t, 1.0, 1e-5);
assert_float_eq!(base_dist[4].frac_t, 1.0, 1e-5);
}
#[test]
fn test_empty_bam() {
let builder = SamBuilder::new();
let (_dir, prefix) = run_basic(&builder);
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
assert!(base_dist.is_empty());
let mq_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), MEAN_QUAL_SUFFIX));
let mean_qual: Vec<MeanQualityByCycleMetric> = read_metrics_tsv(&mq_path).unwrap();
assert!(mean_qual.is_empty());
let qd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX));
let qual_dist: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&qd_path).unwrap();
assert!(qual_dist.is_empty());
}
#[test]
fn test_multiple_reads_accumulation() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"AA", &[10, 20]));
builder.add_record(make_record("r2", Flags::empty(), b"CC", &[30, 40]));
let (_dir, prefix) = run_basic(&builder);
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
assert_eq!(base_dist.len(), 2);
assert_float_eq!(base_dist[0].frac_a, 0.5, 1e-5);
assert_float_eq!(base_dist[0].frac_c, 0.5, 1e-5);
assert_float_eq!(base_dist[1].frac_a, 0.5, 1e-5);
assert_float_eq!(base_dist[1].frac_c, 0.5, 1e-5);
let mq_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), MEAN_QUAL_SUFFIX));
let mean_qual: Vec<MeanQualityByCycleMetric> = read_metrics_tsv(&mq_path).unwrap();
assert_eq!(mean_qual.len(), 2);
assert_float_eq!(mean_qual[0].mean_quality, 20.0, 0.01);
assert_float_eq!(mean_qual[1].mean_quality, 30.0, 0.01);
}
#[test]
fn test_plot_files_created() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"ACGT", &[30, 30, 30, 30]));
let (_dir, prefix) = run_basic(&builder);
for suffix in [BASE_DIST_PLOT_SUFFIX, MEAN_QUAL_PLOT_SUFFIX, QUAL_DIST_PLOT_SUFFIX] {
let path = std::path::PathBuf::from(format!("{}{suffix}", prefix.to_str().unwrap()));
assert!(path.exists(), "Missing plot file: {}", path.display());
assert!(std::fs::metadata(&path).unwrap().len() > 0, "Empty plot file: {}", path.display());
}
}
#[test]
fn test_truncated_quality_scores_do_not_panic() {
use noodles::sam::Header;
use riker_lib::sam::riker_record::RikerRecord;
let buf = make_record("truncated", Flags::empty(), b"ACGT", &[30, 30]);
let header = Header::default();
let record = RikerRecord::from_alignment_record(&header, &buf).unwrap();
let dir = TempDir::new().unwrap();
let prefix = dir.path().join("out");
let mut collector = BasicCollector::new(std::path::Path::new("none"), &prefix);
collector.initialize(&header).unwrap();
collector.accept(&record, &header).unwrap();
collector.finish().unwrap();
let bd_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX));
let base_dist: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&bd_path).unwrap();
assert_eq!(base_dist.len(), 2, "expected 2 cycles in base distribution");
let mq_path =
std::path::PathBuf::from(format!("{}{}", prefix.to_str().unwrap(), MEAN_QUAL_SUFFIX));
let mean_qual: Vec<MeanQualityByCycleMetric> = read_metrics_tsv(&mq_path).unwrap();
assert_eq!(mean_qual.len(), 2, "expected 2 cycles with quality data");
}
#[test]
fn test_lowercase_bases_handled_case_insensitively() {
let mut upper = SamBuilder::new();
upper.add_record(make_record("u", Flags::empty(), b"ACGT", &[30, 30, 30, 30]));
let (_dir_u, prefix_u) = run_basic(&upper);
let mut lower = SamBuilder::new();
lower.add_record(make_record("l", Flags::empty(), b"acgt", &[30, 30, 30, 30]));
let (_dir_l, prefix_l) = run_basic(&lower);
let bd_u: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&std::path::PathBuf::from(
format!("{}{}", prefix_u.to_str().unwrap(), BASE_DIST_SUFFIX),
))
.unwrap();
let bd_l: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&std::path::PathBuf::from(
format!("{}{}", prefix_l.to_str().unwrap(), BASE_DIST_SUFFIX),
))
.unwrap();
assert_eq!(bd_u.len(), bd_l.len());
for (mu, ml) in bd_u.iter().zip(bd_l.iter()) {
assert_float_eq!(mu.frac_a, ml.frac_a, 1e-9);
assert_float_eq!(mu.frac_c, ml.frac_c, 1e-9);
assert_float_eq!(mu.frac_g, ml.frac_g, 1e-9);
assert_float_eq!(mu.frac_t, ml.frac_t, 1e-9);
assert_float_eq!(mu.frac_n, ml.frac_n, 1e-9);
}
}
#[test]
fn test_iupac_ambiguity_codes_excluded_from_qual_dist() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"AWC", &[10, 20, 30]));
let (_dir, prefix) = run_basic(&builder);
let bd: Vec<BaseDistributionByCycleMetric> = read_metrics_tsv(&std::path::PathBuf::from(
format!("{}{}", prefix.to_str().unwrap(), BASE_DIST_SUFFIX),
))
.unwrap();
assert_eq!(bd.len(), 3);
assert_float_eq!(bd[1].frac_n, 1.0, 1e-9);
assert_float_eq!(bd[1].frac_a, 0.0, 1e-9);
let qd: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&std::path::PathBuf::from(
format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX),
))
.unwrap();
assert_eq!(qd.len(), 2, "expected only A's and C's qualities to appear");
assert!(qd.iter().any(|m| m.quality == 10 && m.count == 1));
assert!(qd.iter().any(|m| m.quality == 30 && m.count == 1));
assert!(qd.iter().all(|m| m.quality != 20), "W's Q20 should be excluded");
}
#[test]
fn test_qual_histogram_bank_merge() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("r1", Flags::empty(), b"ACGTACGT", &[35; 8]));
let (_dir, prefix) = run_basic(&builder);
let qd: Vec<QualityScoreDistributionMetric> = read_metrics_tsv(&std::path::PathBuf::from(
format!("{}{}", prefix.to_str().unwrap(), QUAL_DIST_SUFFIX),
))
.unwrap();
let q35 = qd.iter().find(|m| m.quality == 35).expect("expected Q35 entry");
assert_eq!(q35.count, 8, "all 8 bases should be counted across the 4 banks");
}
#[test]
fn test_reverse_strand_with_heterogeneous_qualities() {
let mut builder = SamBuilder::new();
builder.add_record(make_record("rev", Flags::REVERSE_COMPLEMENTED, b"ACGT", &[10, 20, 30, 40]));
let (_dir, prefix) = run_basic(&builder);
let mq: Vec<MeanQualityByCycleMetric> = read_metrics_tsv(&std::path::PathBuf::from(format!(
"{}{}",
prefix.to_str().unwrap(),
MEAN_QUAL_SUFFIX
)))
.unwrap();
assert_eq!(mq.len(), 4);
assert_float_eq!(mq[0].mean_quality, 40.0, 0.01);
assert_float_eq!(mq[1].mean_quality, 30.0, 0.01);
assert_float_eq!(mq[2].mean_quality, 20.0, 0.01);
assert_float_eq!(mq[3].mean_quality, 10.0, 0.01);
}