use std::collections::HashSet;
use std::path::{Path, PathBuf};
use anyhow::Result;
use clap::Args;
use kuva::plot::LinePlot;
use kuva::plot::legend::LegendPosition;
use kuva::plot::scatter::ScatterPlot;
use kuva::render::annotations::ReferenceLine;
use kuva::render::layout::{Layout, TickFormat};
use kuva::render::plots::Plot;
use noodles::sam::Header;
use noodles::sam::alignment::RecordBuf;
use riker_derive::MetricDocs;
use serde::{Deserialize, Serialize};
use crate::collector::Collector;
use crate::commands::command::Command;
use crate::commands::common::{InputOptions, OutputOptions, ReferenceOptions};
use crate::fasta::Fasta;
use crate::metrics::{serialize_f64_2dp, serialize_f64_5dp, write_tsv};
use crate::plotting::{
FG_BLUE, FG_GRAY, FG_GREEN, FG_SKY, FG_TEAL, PLOT_HEIGHT, PLOT_WIDTH, write_twin_y_plot_pdf,
};
use crate::progress::ProgressLogger;
use crate::sam::alignment_reader::AlignmentReader;
use crate::sam::record_utils::{derive_sample, get_integer_tag};
use crate::sequence_dict::SequenceDictionary;
pub const DETAIL_SUFFIX: &str = ".gcbias-detail.txt";
pub const SUMMARY_SUFFIX: &str = ".gcbias-summary.txt";
pub const PLOT_SUFFIX: &str = ".gcbias-chart.pdf";
const NUM_GC_BINS: usize = 101;
const MAX_N_IN_WINDOW: u32 = 4;
const GC_FLAG: u8 = 0x01;
const N_FLAG: u8 = 0x02;
static BASE_FLAGS: [u8; 256] = {
let mut table = [0u8; 256];
table[b'G' as usize] = GC_FLAG;
table[b'g' as usize] = GC_FLAG;
table[b'C' as usize] = GC_FLAG;
table[b'c' as usize] = GC_FLAG;
table[b'N' as usize] = N_FLAG;
table[b'n' as usize] = N_FLAG;
table
};
#[riker_derive::multi_options("gcbias", "GC Bias Options")]
#[derive(Args, Debug, Clone)]
#[command()]
pub struct GcBiasOptions {
#[arg(long, default_value_t = false)]
pub exclude_duplicates: bool,
#[arg(long, default_value_t = GcBiasOptions::DEFAULT_WINDOW_SIZE)]
pub window_size: u32,
#[arg(long, default_value_t = GcBiasOptions::DEFAULT_MIN_MAPQ)]
pub min_mapq: u8,
#[arg(long, default_value_t = false)]
pub exclude_supplementary: bool,
}
impl GcBiasOptions {
const DEFAULT_WINDOW_SIZE: u32 = 100;
const DEFAULT_MIN_MAPQ: u8 = 0;
}
impl Default for GcBiasOptions {
fn default() -> Self {
Self {
exclude_duplicates: false,
window_size: Self::DEFAULT_WINDOW_SIZE,
min_mapq: Self::DEFAULT_MIN_MAPQ,
exclude_supplementary: false,
}
}
}
#[derive(Args, Debug, Clone)]
#[command(
long_about,
after_long_help = "\
Examples:
riker gcbias -i input.bam -o out_prefix -R ref.fa
riker gcbias -i input.bam -o out_prefix -R ref.fa --window-size 150"
)]
pub struct GcBias {
#[command(flatten)]
pub input: InputOptions,
#[command(flatten)]
pub output: OutputOptions,
#[command(flatten)]
pub reference: ReferenceOptions,
#[command(flatten)]
pub options: GcBiasOptions,
}
impl Command for GcBias {
fn execute(&self) -> Result<()> {
let (mut reader, header) =
AlignmentReader::new(&self.input.input, Some(&self.reference.reference))?;
let reference = Fasta::from_path(&self.reference.reference)?;
let mut collector =
GcBiasCollector::new(&self.input.input, &self.output.output, reference, &self.options);
collector.initialize(&header)?;
let mut progress = ProgressLogger::new("gcbias", "reads", 5_000_000);
for result in reader.record_bufs(&header) {
let record = result?;
progress.record_with(&record, &header);
collector.accept(&record, &header)?;
}
progress.finish();
collector.finish()
}
}
pub struct GcBiasCollector {
detail_path: PathBuf,
summary_path: PathBuf,
plot_path: PathBuf,
plot_title: String,
input_path: PathBuf,
reference: Fasta,
window_size: u32,
exclude_duplicates: bool,
min_mapq: u8,
exclude_supplementary: bool,
dict: Option<SequenceDictionary>,
current_contig_id: Option<usize>,
current_gc_at_pos: Vec<u8>, visited_contigs: HashSet<usize>,
windows_by_gc: [u64; NUM_GC_BINS],
reads_by_gc: [u64; NUM_GC_BINS],
bases_by_gc: [u64; NUM_GC_BINS],
errors_by_gc: [u64; NUM_GC_BINS],
quality_sum_by_gc: [u64; NUM_GC_BINS],
quality_bases_by_gc: [u64; NUM_GC_BINS],
total_clusters: u64,
aligned_reads: u64,
sample: String,
}
impl GcBiasCollector {
#[must_use]
pub fn new(input: &Path, prefix: &Path, reference: Fasta, options: &GcBiasOptions) -> Self {
let detail_path = super::command::output_path(prefix, DETAIL_SUFFIX);
let summary_path = super::command::output_path(prefix, SUMMARY_SUFFIX);
let plot_path = super::command::output_path(prefix, PLOT_SUFFIX);
Self {
detail_path,
summary_path,
plot_path,
plot_title: String::new(),
input_path: input.to_path_buf(),
reference,
window_size: options.window_size,
exclude_duplicates: options.exclude_duplicates,
min_mapq: options.min_mapq,
exclude_supplementary: options.exclude_supplementary,
dict: None,
current_contig_id: None,
current_gc_at_pos: Vec::new(),
visited_contigs: HashSet::new(),
windows_by_gc: [0u64; NUM_GC_BINS],
reads_by_gc: [0u64; NUM_GC_BINS],
bases_by_gc: [0u64; NUM_GC_BINS],
errors_by_gc: [0u64; NUM_GC_BINS],
quality_sum_by_gc: [0u64; NUM_GC_BINS],
quality_bases_by_gc: [0u64; NUM_GC_BINS],
total_clusters: 0,
aligned_reads: 0,
sample: String::new(),
}
}
fn process_record(&mut self, record: &RecordBuf) -> Result<()> {
let flags = record.flags();
if flags.is_unmapped()
|| flags.is_secondary()
|| flags.is_qc_fail()
|| (self.exclude_supplementary && flags.is_supplementary())
{
return Ok(());
}
if self.exclude_duplicates && flags.is_duplicate() {
return Ok(());
}
let mapq = record.mapping_quality().map_or(255u8, u8::from);
if mapq < self.min_mapq {
return Ok(());
}
if !flags.is_segmented() || flags.is_first_segment() {
self.total_clusters += 1;
}
self.aligned_reads += 1;
let Some(ref_id) = record.reference_sequence_id() else {
return Ok(());
};
if Some(ref_id) != self.current_contig_id {
let name = self.dict.as_ref().unwrap().get_by_index(ref_id).map_or("", |m| m.name());
let seq = self.reference.load_contig(name, false)?;
let (gc_at_pos, window_counts) = scan_contig_gc(&seq, self.window_size);
self.current_gc_at_pos = gc_at_pos;
for (bin, count) in window_counts.iter().enumerate() {
self.windows_by_gc[bin] += count;
}
self.visited_contigs.insert(ref_id);
self.current_contig_id = Some(ref_id);
}
let alignment_start = record.alignment_start().map_or(0, |p| usize::from(p) - 1); let pos = if flags.is_reverse_complemented() {
let ref_span = record.cigar().alignment_span();
let alignment_end = alignment_start + ref_span;
alignment_end.saturating_sub(self.window_size as usize)
} else {
alignment_start
};
if pos >= self.current_gc_at_pos.len() {
return Ok(());
}
let gc = self.current_gc_at_pos[pos];
if gc > 100 {
return Ok(());
}
let gc = gc as usize;
self.reads_by_gc[gc] += 1;
let read_len = record.sequence().len() as u64;
self.bases_by_gc[gc] += read_len;
let nm = get_integer_tag(record, *b"NM").unwrap_or(0);
self.errors_by_gc[gc] += u64::from(nm);
let qual_bytes: &[u8] = record.quality_scores().as_ref();
if !qual_bytes.is_empty() {
self.quality_sum_by_gc[gc] += qual_bytes.iter().map(|&q| u64::from(q)).sum::<u64>();
self.quality_bases_by_gc[gc] += qual_bytes.len() as u64;
}
Ok(())
}
fn finish_metrics(&self) -> Result<()> {
let total_reads: u64 = self.reads_by_gc.iter().sum();
let total_windows: u64 = self.windows_by_gc.iter().sum();
let mean_reads_per_window =
if total_windows > 0 { total_reads as f64 / total_windows as f64 } else { 0.0 };
let detail_rows: Vec<GcBiasDetailMetric> = (0..NUM_GC_BINS)
.map(|gc| {
let windows = self.windows_by_gc[gc];
let reads = self.reads_by_gc[gc];
let bases = self.bases_by_gc[gc];
let errors = self.errors_by_gc[gc];
let (normalized_coverage, error_bar_width) =
normalized_coverage_and_error(reads, windows, mean_reads_per_window);
let reported_base_quality = {
let qbases = self.quality_bases_by_gc[gc];
if qbases > 0 { self.quality_sum_by_gc[gc] as f64 / qbases as f64 } else { 0.0 }
};
let empirical_base_quality = if bases > 0 && errors > 0 {
-10.0 * (errors as f64 / bases as f64).log10()
} else {
0.0
};
GcBiasDetailMetric {
sample: self.sample.clone(),
gc: gc as u64,
windows,
read_starts: reads,
reported_base_quality,
empirical_base_quality,
normalized_coverage,
error_bar_width,
}
})
.collect();
let (at_dropout, gc_dropout) = compute_dropout(&self.windows_by_gc, &self.reads_by_gc);
let quintile_nc = |start: usize, end: usize| -> f64 {
if mean_reads_per_window == 0.0 {
return 0.0;
}
let sum_reads: u64 = self.reads_by_gc[start..=end].iter().sum();
let sum_windows: u64 = self.windows_by_gc[start..=end].iter().sum();
if sum_windows == 0 {
0.0
} else {
sum_reads as f64 / (sum_windows as f64 * mean_reads_per_window)
}
};
let summary = GcBiasSummaryMetric {
sample: self.sample.clone(),
window_size: u64::from(self.window_size),
total_clusters: self.total_clusters,
aligned_reads: self.aligned_reads,
at_dropout,
gc_dropout,
gc_0_19_normcov: quintile_nc(0, 19),
gc_20_39_normcov: quintile_nc(20, 39),
gc_40_59_normcov: quintile_nc(40, 59),
gc_60_79_normcov: quintile_nc(60, 79),
gc_80_100_normcov: quintile_nc(80, 100),
};
write_tsv(&self.detail_path, &detail_rows)?;
write_tsv(&self.summary_path, &[summary])?;
self.plot_chart(&detail_rows)?;
log::info!(
"gcbias: total_clusters={}, aligned_reads={}, at_dropout={at_dropout:.3}, \
gc_dropout={gc_dropout:.3}, detail={}, summary={}, plot={}",
self.total_clusters,
self.aligned_reads,
self.detail_path.display(),
self.summary_path.display(),
self.plot_path.display(),
);
Ok(())
}
fn plot_chart(&self, detail_rows: &[GcBiasDetailMetric]) -> Result<()> {
let y_max = 2.0_f64;
let max_windows = detail_rows.iter().map(|r| r.windows).max().unwrap_or(1).max(1);
let window_scale = (y_max * 0.25) / max_windows as f64;
let window_xy: Vec<(f64, f64)> =
detail_rows.iter().map(|r| (r.gc as f64, r.windows as f64 * window_scale)).collect();
let windows_line = LinePlot::new()
.with_data(window_xy)
.with_color(FG_SKY)
.with_stroke_width(1.0)
.with_step()
.with_fill()
.with_fill_opacity(0.3)
.with_legend("Genome GC");
let nc_xy: Vec<(f64, f64)> = detail_rows
.iter()
.filter(|r| r.windows > 0)
.map(|r| (r.gc as f64, r.normalized_coverage.min(y_max)))
.collect();
let nc_scatter = ScatterPlot::new()
.with_data(nc_xy)
.with_color(FG_BLUE)
.with_size(4.0)
.with_legend("Coverage");
let primary: Vec<Plot> = vec![windows_line.into(), nc_scatter.into()];
let mut secondary: Vec<Plot> = Vec::new();
let reported_bq: Vec<(f64, f64)> = detail_rows
.iter()
.filter(|r| r.windows > 0 && r.reported_base_quality > 0.0)
.map(|r| (r.gc as f64, r.reported_base_quality))
.collect();
if !reported_bq.is_empty() {
secondary.push(
LinePlot::new()
.with_data(reported_bq)
.with_color(FG_GREEN)
.with_stroke_width(1.0)
.with_legend("Reported BQ")
.into(),
);
}
let empirical_bq: Vec<(f64, f64)> = detail_rows
.iter()
.filter(|r| r.windows > 0 && r.empirical_base_quality > 0.0)
.map(|r| (r.gc as f64, r.empirical_base_quality))
.collect();
if !empirical_bq.is_empty() {
secondary.push(
LinePlot::new()
.with_data(empirical_bq)
.with_color(FG_TEAL)
.with_stroke_width(1.0)
.with_legend("Empirical BQ")
.into(),
);
}
let layout = Layout::auto_from_plots(&primary)
.with_width(PLOT_WIDTH)
.with_height(PLOT_HEIGHT)
.with_x_axis_min(0.0)
.with_x_axis_max(100.0)
.with_y_axis_min(0.0)
.with_y_axis_max(y_max)
.with_y2_range(0.0, 40.0)
.with_x_tick_format(TickFormat::Integer)
.with_y_tick_format(TickFormat::Fixed(1))
.with_y2_tick_format(TickFormat::Integer)
.with_title(&self.plot_title)
.with_x_label("GC%")
.with_y_label("Normalized Coverage")
.with_y2_label("Base Quality")
.with_reference_line(
ReferenceLine::horizontal(1.0).with_color(FG_GRAY).with_dasharray(""),
)
.with_legend_position(LegendPosition::InsideBottomRight)
.with_legend_box(false);
write_twin_y_plot_pdf(primary, secondary, layout, &self.plot_path)
}
}
impl Collector for GcBiasCollector {
fn initialize(&mut self, header: &Header) -> Result<()> {
self.reference.validate_bam_header(header)?;
self.dict = Some(SequenceDictionary::from(header));
self.sample = derive_sample(&self.input_path, header);
self.plot_title = format!("GC Bias of {}", self.sample);
Ok(())
}
fn accept(&mut self, record: &RecordBuf, _header: &Header) -> Result<()> {
self.process_record(record)
}
fn finish(&mut self) -> Result<()> {
let dict = self.dict.as_ref().unwrap();
for ref_id in 0..dict.len() {
if !self.visited_contigs.contains(&ref_id) {
let name = dict[ref_id].name();
let seq = self.reference.load_contig(name, false)?;
let (_, window_counts) = scan_contig_gc(&seq, self.window_size);
for (bin, count) in window_counts.iter().enumerate() {
self.windows_by_gc[bin] += count;
}
}
}
self.finish_metrics()
}
fn name(&self) -> &'static str {
"gcbias"
}
}
#[derive(Debug, Serialize, Deserialize, MetricDocs)]
pub struct GcBiasDetailMetric {
pub sample: String,
pub gc: u64,
pub windows: u64,
pub read_starts: u64,
#[serde(serialize_with = "serialize_f64_2dp")]
pub reported_base_quality: f64,
#[serde(serialize_with = "serialize_f64_2dp")]
pub empirical_base_quality: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub normalized_coverage: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub error_bar_width: f64,
}
#[derive(Debug, Serialize, Deserialize, MetricDocs)]
pub struct GcBiasSummaryMetric {
pub sample: String,
pub window_size: u64,
pub total_clusters: u64,
pub aligned_reads: u64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub at_dropout: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub gc_dropout: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub gc_0_19_normcov: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub gc_20_39_normcov: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub gc_40_59_normcov: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub gc_60_79_normcov: f64,
#[serde(serialize_with = "serialize_f64_5dp")]
pub gc_80_100_normcov: f64,
}
fn normalized_coverage_and_error(reads: u64, windows: u64, mean_rpw: f64) -> (f64, f64) {
if windows == 0 || mean_rpw == 0.0 {
return (0.0, 0.0);
}
let nc = (reads as f64 / windows as f64) / mean_rpw;
let eb = ((reads as f64).sqrt() / windows as f64) / mean_rpw;
(nc, eb)
}
fn compute_dropout(
windows_by_gc: &[u64; NUM_GC_BINS],
reads_by_gc: &[u64; NUM_GC_BINS],
) -> (f64, f64) {
let total_windows: u64 = windows_by_gc.iter().sum();
let total_reads: u64 = reads_by_gc.iter().sum();
if total_windows == 0 || total_reads == 0 {
return (0.0, 0.0);
}
let mut at_drop = 0.0_f64;
let mut gc_drop = 0.0_f64;
for gc in 0..NUM_GC_BINS {
let relative_windows = windows_by_gc[gc] as f64 / total_windows as f64;
let relative_reads = reads_by_gc[gc] as f64 / total_reads as f64;
let dropout = (relative_windows - relative_reads) * 100.0;
if dropout > 0.0 {
if gc <= 50 {
at_drop += dropout;
} else {
gc_drop += dropout;
}
}
}
(at_drop, gc_drop)
}
fn gc_percentage(gc_count: u32, window_size: u32) -> u8 {
#[allow(clippy::cast_possible_truncation)]
let pct = ((u64::from(gc_count) * 100) / u64::from(window_size)) as u8;
pct
}
fn scan_contig_gc(seq: &[u8], window_size: u32) -> (Vec<u8>, [u64; NUM_GC_BINS]) {
let ws = window_size as usize;
let mut gc_at_pos = vec![u8::MAX; seq.len()];
let mut window_counts = [0u64; NUM_GC_BINS];
if seq.len() < ws {
return (gc_at_pos, window_counts);
}
let mut gc_count: u32 = 0;
let mut n_count: u32 = 0;
for &b in &seq[..ws] {
let flags = BASE_FLAGS[b as usize];
gc_count += u32::from(flags & GC_FLAG);
n_count += u32::from((flags & N_FLAG) >> 1);
}
if n_count <= MAX_N_IN_WINDOW {
let gc_pct = gc_percentage(gc_count, window_size);
gc_at_pos[0] = gc_pct;
window_counts[gc_pct as usize] += 1;
}
for i in 1..=(seq.len() - ws) {
let leaving_flags = BASE_FLAGS[seq[i - 1] as usize];
let entering_flags = BASE_FLAGS[seq[i + ws - 1] as usize];
gc_count -= u32::from(leaving_flags & GC_FLAG);
gc_count += u32::from(entering_flags & GC_FLAG);
n_count -= u32::from((leaving_flags & N_FLAG) >> 1);
n_count += u32::from((entering_flags & N_FLAG) >> 1);
if n_count <= MAX_N_IN_WINDOW {
let gc_pct = gc_percentage(gc_count, window_size);
gc_at_pos[i] = gc_pct;
window_counts[gc_pct as usize] += 1;
}
}
(gc_at_pos, window_counts)
}
#[cfg(test)]
#[allow(clippy::float_cmp)]
mod tests {
use super::*;
#[test]
fn test_base_flags_gc() {
assert_eq!(BASE_FLAGS[b'G' as usize] & GC_FLAG, GC_FLAG);
assert_eq!(BASE_FLAGS[b'g' as usize] & GC_FLAG, GC_FLAG);
assert_eq!(BASE_FLAGS[b'C' as usize] & GC_FLAG, GC_FLAG);
assert_eq!(BASE_FLAGS[b'c' as usize] & GC_FLAG, GC_FLAG);
assert_eq!(BASE_FLAGS[b'A' as usize] & GC_FLAG, 0);
assert_eq!(BASE_FLAGS[b'T' as usize] & GC_FLAG, 0);
assert_eq!(BASE_FLAGS[b'N' as usize] & GC_FLAG, 0);
}
#[test]
fn test_base_flags_n() {
assert_eq!(BASE_FLAGS[b'N' as usize] & N_FLAG, N_FLAG);
assert_eq!(BASE_FLAGS[b'n' as usize] & N_FLAG, N_FLAG);
assert_eq!(BASE_FLAGS[b'A' as usize] & N_FLAG, 0);
assert_eq!(BASE_FLAGS[b'G' as usize] & N_FLAG, 0);
}
#[test]
fn test_scan_contig_gc_case_insensitive() {
let upper = b"GGGGAAAA";
let lower = b"ggggaaaa";
let mixed = b"GgGgAaAa";
let (gc_u, counts_u) = scan_contig_gc(upper, 4);
let (gc_l, counts_l) = scan_contig_gc(lower, 4);
let (gc_m, counts_m) = scan_contig_gc(mixed, 4);
assert_eq!(gc_u, gc_l);
assert_eq!(gc_u, gc_m);
assert_eq!(counts_u, counts_l);
assert_eq!(counts_u, counts_m);
}
#[test]
fn test_scan_contig_gc_simple() {
let seq = b"AAAAAAAA";
let (gc, counts) = scan_contig_gc(seq, 4);
assert_eq!(gc.len(), 8);
assert_eq!(gc[0], 0); assert_eq!(gc[4], 0); assert_eq!(gc[5], u8::MAX); assert_eq!(counts[0], 5); }
#[test]
fn test_scan_contig_gc_mixed() {
let seq = b"GGGGAAAA";
let (gc, counts) = scan_contig_gc(seq, 4);
assert_eq!(gc[0], 100);
assert_eq!(gc[1], 75);
assert_eq!(gc[2], 50);
assert_eq!(gc[3], 25);
assert_eq!(gc[4], 0);
assert_eq!(counts[0], 1);
assert_eq!(counts[25], 1);
assert_eq!(counts[50], 1);
assert_eq!(counts[75], 1);
assert_eq!(counts[100], 1);
}
#[test]
fn test_scan_contig_gc_with_ns() {
let seq = b"NNNNNAAAA";
let (gc, _) = scan_contig_gc(seq, 4);
assert_eq!(gc[0], 0); assert_eq!(gc[1], 0);
assert_eq!(gc[2], 0);
}
#[test]
fn test_scan_contig_gc_too_many_ns() {
let seq = b"NNNNNNNNN";
let (gc, counts) = scan_contig_gc(seq, 5);
for &v in &gc[..5] {
assert_eq!(v, u8::MAX, "window with >4 Ns should be invalid");
}
assert_eq!(counts.iter().sum::<u64>(), 0);
}
#[test]
fn test_scan_contig_gc_short_seq() {
let seq = b"GC";
let (gc, counts) = scan_contig_gc(seq, 4);
assert_eq!(gc.len(), 2);
assert_eq!(gc[0], u8::MAX);
assert_eq!(gc[1], u8::MAX);
assert_eq!(counts.iter().sum::<u64>(), 0);
}
#[test]
fn test_dropout_calculation() {
let total_windows = 100u64;
let total_reads = 100u64;
let rel_w = 40.0 / total_windows as f64;
let rel_r = 20.0 / total_reads as f64;
let dropout = (rel_w - rel_r) * 100.0;
assert!((dropout - 20.0).abs() < f64::EPSILON);
}
#[test]
fn test_phred_quality_calculation() {
let errors = 1.0_f64;
let bases = 1000.0_f64;
let phred = -10.0 * (errors / bases).log10();
assert!((phred - 30.0).abs() < 0.001);
}
#[test]
fn test_scan_contig_gc_window_size_1() {
let seq = b"GCAT";
let (gc, counts) = scan_contig_gc(seq, 1);
assert_eq!(gc[0], 100); assert_eq!(gc[1], 100); assert_eq!(gc[2], 0); assert_eq!(gc[3], 0); assert_eq!(counts[0], 2);
assert_eq!(counts[100], 2);
}
#[test]
fn test_scan_contig_gc_all_gc() {
let seq = b"GCGCGCGC";
let (gc, counts) = scan_contig_gc(seq, 4);
for (pos, &val) in gc.iter().enumerate().take(5) {
assert_eq!(val, 100, "pos {pos} should be 100% GC");
}
assert_eq!(counts[100], 5);
}
#[test]
fn test_scan_contig_gc_boundary_n_count() {
let seq = b"NNNNANNNN";
let (gc, _) = scan_contig_gc(seq, 5);
assert_ne!(gc[0], u8::MAX);
assert_eq!(gc[0], 0); assert_ne!(gc[4], u8::MAX);
assert_eq!(gc[4], 0);
let seq2 = b"NNNNN";
let (gc2, _) = scan_contig_gc(seq2, 5);
assert_eq!(gc2[0], u8::MAX);
}
#[test]
fn test_nc_normal() {
let (nc, eb) = normalized_coverage_and_error(100, 50, 1.0);
assert!((nc - 2.0).abs() < 1e-9);
assert!((eb - 0.2).abs() < 1e-9); }
#[test]
fn test_nc_zero_windows() {
let (nc, eb) = normalized_coverage_and_error(100, 0, 1.0);
assert_eq!(nc, 0.0);
assert_eq!(eb, 0.0);
}
#[test]
fn test_nc_zero_mean() {
let (nc, eb) = normalized_coverage_and_error(100, 50, 0.0);
assert_eq!(nc, 0.0);
assert_eq!(eb, 0.0);
}
#[test]
fn test_compute_dropout_uniform() {
let mut windows = [0u64; NUM_GC_BINS];
let mut reads = [0u64; NUM_GC_BINS];
windows[25] = 50;
windows[75] = 50;
reads[25] = 50;
reads[75] = 50;
let (at, gc) = compute_dropout(&windows, &reads);
assert_eq!(at, 0.0);
assert_eq!(gc, 0.0);
}
#[test]
fn test_compute_dropout_at_deficit() {
let mut windows = [0u64; NUM_GC_BINS];
let mut reads = [0u64; NUM_GC_BINS];
windows[25] = 60; windows[75] = 40;
reads[25] = 40; reads[75] = 60;
let (at, gc) = compute_dropout(&windows, &reads);
assert!(at > 0.0, "AT dropout should be > 0, got {at}");
assert!(gc == 0.0, "GC dropout should be 0, got {gc}");
}
#[test]
fn test_compute_dropout_gc_deficit() {
let mut windows = [0u64; NUM_GC_BINS];
let mut reads = [0u64; NUM_GC_BINS];
windows[25] = 40;
windows[75] = 60; reads[25] = 60;
reads[75] = 40; let (at, gc) = compute_dropout(&windows, &reads);
assert!(at == 0.0, "AT dropout should be 0, got {at}");
assert!(gc > 0.0, "GC dropout should be > 0, got {gc}");
}
#[test]
fn test_compute_dropout_empty() {
let windows = [0u64; NUM_GC_BINS];
let reads = [0u64; NUM_GC_BINS];
let (at, gc) = compute_dropout(&windows, &reads);
assert_eq!(at, 0.0);
assert_eq!(gc, 0.0);
}
}