use std::fmt::{Display, Formatter, Result as FmtResult};
use std::path::PathBuf;
use std::sync::OnceLock;
use crate::bam_utils::{BamStats, Iv, get_bam_header, progress_bar};
use crate::genomic_intervals::{IntervalMaker, Shift, Truncate};
use crate::read_filter::BamReadFilter;
use crate::signal_normalization::NormalizationMethod;
use ahash::HashMap;
use anyhow::{Context, Result};
#[cfg(test)]
use bigtools::BigWigRead;
use bigtools::beddata::BedParserStreamingIterator;
use bigtools::{BigWigWrite, Value};
use comfy_table::{
Cell, CellAlignment, ContentArrangement, Table, modifiers::UTF8_ROUND_CORNERS,
presets::UTF8_FULL,
};
use indicatif::ParallelProgressIterator;
use log::{debug, info};
use noodles::bam;
use polars::lazy::dsl::col;
use polars::prelude::*;
use rayon::prelude::*;
use regex::Regex;
use rust_lapper::Lapper;
fn is_scaffold(name: &str) -> bool {
static SCAFFOLD_REGEX: OnceLock<Regex> = OnceLock::new();
SCAFFOLD_REGEX
.get_or_init(|| Regex::new("(chrUn.*|.*alt|.*random)").unwrap())
.is_match(name)
}
fn shift_is_noop(shift: Shift) -> bool {
shift.five_prime == 0
&& shift.three_prime == 0
&& shift.five_prime_reverse == 0
&& shift.three_prime_reverse == 0
}
fn write_bedgraph(
df: DataFrame,
outfile: PathBuf,
ignore_scaffold_chromosomes: bool,
) -> Result<()> {
let mut df = match ignore_scaffold_chromosomes {
true => {
let mask = df
.column("chrom")?
.str()?
.iter()
.flatten()
.map(|s| !is_scaffold(s))
.collect::<BooleanChunked>();
df.filter(&mask)?
}
false => df,
};
df.sort_in_place(["chrom", "start"], SortMultipleOptions::default())?;
let mut df = df.select(["chrom", "start", "end", "score"])?;
let mut file = std::fs::File::create(outfile)?;
CsvWriter::new(&mut file)
.include_header(false)
.with_separator(b'\t')
.finish(&mut df)?;
Ok(())
}
fn collapse_equal_bins(df: DataFrame, score_columns: Option<Vec<PlSmallStr>>) -> Result<DataFrame> {
let original_rows = df.height();
let mut df = df.sort(["chrom", "start"], Default::default())?;
let shifted_chromosome = df.column("chrom")?.shift(1);
let same_chrom = df.column("chrom")?.equal(&shifted_chromosome)?;
let score_columns = score_columns.unwrap_or_else(|| vec!["score".into()]);
let scores = df.select(score_columns.clone())?;
let scores_shifted = df.select(score_columns.clone())?.shift(1);
let mut same_scores = Vec::new();
for (score, score_shifted) in scores.iter().zip(scores_shifted.iter()) {
let same_score = score.equal(score_shifted)?;
same_scores.push(same_score);
}
let same_chrom_and_score = same_chrom
& same_scores
.iter()
.fold(same_scores[0].clone(), |acc, x| acc & x.clone());
let group = cum_sum(&same_chrom_and_score.into_series(), false)?
.with_name("groups".into())
.into_column();
let df = df
.with_column(group)?
.clone()
.lazy()
.group_by(["groups"])
.agg(&[
col("chrom").first(),
col("start").min(),
col("end").max(),
col("score").sum(),
])
.collect()?;
let collapsed_rows = df.height();
let reduction_pct = ((original_rows - collapsed_rows) as f64 / original_rows as f64) * 100.0;
debug!(
"Collapsed bins: {} rows → {} rows ({:.1}% reduction)",
original_rows, collapsed_rows, reduction_pct
);
Ok(df)
}
pub struct BamPileup {
file_path: PathBuf,
bin_size: u64,
norm_method: NormalizationMethod,
scale_factor: f32,
use_fragment: bool,
filter: BamReadFilter,
collapse: bool,
ignore_scaffold_chromosomes: bool,
shift: Option<Shift>,
truncate: Option<Truncate>,
nthreads: u32,
}
impl Display for BamPileup {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
let counting_mode = if self.use_fragment {
"fragments"
} else {
"reads"
};
let stats = self.filter.stats();
let normalization_factor = if matches!(self.norm_method, NormalizationMethod::Raw)
|| stats.n_reads_after_filtering() > 0
{
format!("{:.6}", self.normalization_factor())
} else {
"pending".to_string()
};
let mut rows = vec![
("input", self.file_path.display().to_string()),
("bin size", format!("{} bp", self.bin_size)),
("normalization", format!("{:?}", self.norm_method)),
("normalization factor", normalization_factor),
("scale factor", format!("{:.3}", self.scale_factor)),
("counting", counting_mode.to_string()),
("output threads", self.nthreads.to_string()),
(
"scaffolds",
if self.ignore_scaffold_chromosomes {
"ignored".to_string()
} else {
"included".to_string()
},
),
];
if let Some(shift) = self.shift.filter(|shift| !shift_is_noop(*shift)) {
rows.push((
"shift",
format!(
"{},{},{},{}",
shift.five_prime,
shift.three_prime,
shift.five_prime_reverse,
shift.three_prime_reverse
),
));
}
if let Some(truncate) = self.truncate {
rows.push(("truncate", format!("{truncate:?}")));
}
let mut table = Table::new();
table
.load_preset(UTF8_FULL)
.apply_modifier(UTF8_ROUND_CORNERS)
.set_content_arrangement(ContentArrangement::Dynamic)
.set_header(vec!["setting", "value"]);
for (label, value) in rows {
table.add_row(vec![
Cell::new(label),
Cell::new(value).set_alignment(CellAlignment::Right),
]);
}
write!(f, "Coverage Run Details\n{table}")
}
}
impl BamPileup {
fn log_filter_stats(&self, prefix: &str) {
let stats = self.filter.stats();
debug!(
"{} filter stats: total_reads={}, reads_passing={}, filtered={}, pass_rate={:.2}%",
prefix,
stats.n_total(),
stats.n_reads_after_filtering(),
stats
.n_total()
.saturating_sub(stats.n_reads_after_filtering()),
if stats.n_total() > 0 {
(stats.n_reads_after_filtering() as f64 / stats.n_total() as f64) * 100.0
} else {
0.0
}
);
if stats.n_failed_mapq() > 0 {
debug!(" - Filtered by MAPQ: {}", stats.n_failed_mapq());
}
if stats.n_failed_length() > 0 {
debug!(" - Filtered by length: {}", stats.n_failed_length());
}
if stats.n_incorrect_strand() > 0 {
debug!(" - Filtered by strand: {}", stats.n_incorrect_strand());
}
if stats.n_failed_proper_pair() > 0 {
debug!(
" - Filtered by proper pair: {}",
stats.n_failed_proper_pair()
);
}
if stats.n_failed_blacklist() > 0 {
debug!(" - Filtered by blacklist: {}", stats.n_failed_blacklist());
}
if stats.n_failed_barcode() > 0 {
debug!(" - Filtered by barcode: {}", stats.n_failed_barcode());
}
if stats.n_not_in_read_group() > 0 {
debug!(
" - Filtered by read group: {}",
stats.n_not_in_read_group()
);
}
if stats.n_failed_tag_filter() > 0 {
debug!(" - Filtered by tag: {}", stats.n_failed_tag_filter());
}
if stats.n_failed_fragment_length() > 0 {
debug!(
" - Filtered by fragment length: {}",
stats.n_failed_fragment_length()
);
}
}
#[allow(clippy::too_many_arguments)]
pub fn new(
file_path: PathBuf,
bin_size: u64,
norm_method: NormalizationMethod,
scale_factor: f32,
use_fragment: bool,
filter: BamReadFilter,
collapse_intervals: bool,
ignore_scaffold_chromosomes: bool,
shift: Option<Shift>,
truncate: Option<Truncate>,
nthreads: Option<u32>,
) -> Self {
Self {
file_path,
bin_size,
norm_method,
scale_factor,
use_fragment,
filter,
collapse: collapse_intervals,
ignore_scaffold_chromosomes,
shift,
truncate,
nthreads: nthreads.unwrap_or(6),
}
}
pub fn normalization_factor(&self) -> f64 {
let filter_stats = self.filter.stats();
let n_reads = filter_stats.n_reads_after_filtering();
self.norm_method
.scale_factor(self.scale_factor, self.bin_size, n_reads)
}
pub fn pileup_chromosome(&self, chrom: &str) -> Result<Vec<Iv>> {
let bam_stats = BamStats::new(self.file_path.clone())?;
let genomic_chunks = bam_stats.chromosome_chunks(chrom, self.bin_size)?;
let chromsizes_refid = bam_stats.chromsizes_ref_id()?;
let n_total_chunks = genomic_chunks.len();
let header = get_bam_header(self.file_path.clone())?;
let pileup: Vec<Iv> = genomic_chunks
.into_par_iter()
.progress_with(progress_bar(
n_total_chunks as u64,
format!("Performing pileup for chromosome {chrom}"),
))
.map(|region| {
let mut reader = bam::io::indexed_reader::Builder::default()
.build_from_path(self.file_path.clone())
.context("Failed to open BAM file")?;
let records = reader.query(&header, ®ion)?;
let intervals: Vec<Iv> = records
.records()
.filter_map(|record| record.ok())
.filter_map(|record| {
IntervalMaker::new(
record,
&header,
&chromsizes_refid,
&self.filter,
self.use_fragment,
self.shift,
self.truncate,
)
.coords()
.map(|(s, e, _dtlen)| Iv {
start: s,
stop: e,
val: 1,
})
})
.collect();
let lapper = Lapper::new(intervals);
let region_interval = region.interval();
let region_start = region_interval
.start()
.context("Failed to get region start")?
.get();
let region_end = region_interval
.end()
.context("Failed to get region end")?
.get();
let mut bin_counts: Vec<Iv> = Vec::new();
let mut start = region_start;
while start < region_end {
let end = (start + self.bin_size as usize).min(region_end);
let count = lapper.count(start, end);
match self.collapse {
true => {
if let Some(last) = bin_counts.last_mut() {
if last.val == count as u32 {
last.stop = end;
} else {
bin_counts.push(Iv {
start,
stop: end,
val: count as u32,
});
}
} else {
bin_counts.push(Iv {
start,
stop: end,
val: count as u32,
});
}
}
false => {
bin_counts.push(Iv {
start,
stop: end,
val: count as u32,
});
}
}
start = end;
}
Ok(bin_counts)
})
.fold(Vec::new, |mut acc, result: Result<Vec<Iv>>| {
if let Ok(mut intervals) = result {
acc.append(&mut intervals);
}
acc
})
.reduce(Vec::new, |mut acc, mut vec| {
acc.append(&mut vec);
acc
});
let interval_count = pileup.len();
let total_coverage: u64 = pileup.iter().map(|iv| (iv.stop - iv.start) as u64).sum();
info!(
"Chromosome {} pileup complete: {} intervals, {} bp coverage",
chrom, interval_count, total_coverage
);
self.log_filter_stats(&format!("Chromosome {}", chrom));
Ok(pileup)
}
fn pileup_genome(&self) -> Result<HashMap<String, Vec<Iv>>> {
let bam_stats = BamStats::new(self.file_path.clone())?;
let genomic_chunks = bam_stats.genome_chunks(self.bin_size)?;
let chromsizes_refid = bam_stats.chromsizes_ref_id()?;
let n_total_chunks = genomic_chunks.len();
info!("{self}");
info!("Processing {n_total_chunks} genomic chunks");
let header = get_bam_header(self.file_path.clone())?;
let pileup = genomic_chunks
.into_par_iter()
.progress_with(progress_bar(
n_total_chunks as u64,
"Performing pileup".to_string(),
))
.map(|region| {
let mut reader = bam::io::indexed_reader::Builder::default()
.build_from_path(self.file_path.clone())
.context("Failed to open BAM file")?;
let records = reader.query(&header, ®ion)?;
let intervals: Vec<Iv> = records
.records()
.filter_map(|record| record.ok())
.filter_map(|record| {
IntervalMaker::new(
record,
&header,
&chromsizes_refid,
&self.filter,
self.use_fragment,
self.shift,
self.truncate,
)
.coords()
.map(|(s, e, _dtlen)| Iv {
start: s,
stop: e,
val: 1,
})
})
.collect();
let lapper = Lapper::new(intervals);
let region_interval = region.interval();
let region_start = region_interval
.start()
.context("Failed to get region start")?
.get();
let region_end = region_interval
.end()
.context("Failed to get region end")?
.get();
let mut bin_counts: Vec<rust_lapper::Interval<usize, u32>> = Vec::new();
let mut start = region_start;
while start < region_end {
let end = (start + self.bin_size as usize).min(region_end);
let count = lapper.count(start, end);
match self.collapse {
true => {
if let Some(last) = bin_counts.last_mut() {
if last.val == count as u32 {
last.stop = end;
} else {
bin_counts.push(rust_lapper::Interval {
start,
stop: end,
val: count as u32,
});
}
} else {
bin_counts.push(rust_lapper::Interval {
start,
stop: end,
val: count as u32,
});
}
}
false => {
bin_counts.push(rust_lapper::Interval {
start,
stop: end,
val: count as u32,
});
}
}
start = end;
}
Ok((region.name().to_owned().to_string(), bin_counts))
})
.fold(
HashMap::<String, Vec<Iv>>::default,
|mut acc, result: Result<(String, Vec<Iv>)>| {
if let Ok((chrom, intervals)) = result {
acc.entry(chrom).or_default().extend(intervals);
}
acc
},
)
.reduce(HashMap::default, |mut acc, map| {
for (key, mut value) in map {
acc.entry(key).or_default().append(&mut value);
}
acc
});
let n_chromosomes = pileup.len();
let total_intervals: usize = pileup.values().map(|ivs| ivs.len()).sum();
let total_bp_covered: u64 = pileup
.values()
.flat_map(|ivs| ivs.iter())
.map(|iv| (iv.stop - iv.start) as u64)
.sum();
info!(
"Pileup complete: {} chromosomes, {} intervals, {} bp spanned",
n_chromosomes, total_intervals, total_bp_covered
);
info!("Read filtering: {}", self.filter.stats());
self.log_filter_stats("Overall");
Ok(pileup)
}
fn pileup_to_polars(&self) -> Result<DataFrame> {
let pileup = self.pileup_genome()?;
let (chroms, starts, ends, scores) = pileup
.into_par_iter()
.map(|(chrom, intervals)| {
let n = intervals.len();
let mut start_vec = Vec::with_capacity(n);
let mut end_vec = Vec::with_capacity(n);
let mut score_vec = Vec::with_capacity(n);
for iv in intervals {
start_vec.push(iv.start as u64);
end_vec.push(iv.stop as u64);
score_vec.push(iv.val);
}
let chrom_vec = vec![chrom; n];
(chrom_vec, start_vec, end_vec, score_vec)
})
.reduce(
|| (Vec::new(), Vec::new(), Vec::new(), Vec::new()),
|(mut chrom_a, mut start_a, mut end_a, mut score_a),
(chrom_b, start_b, end_b, score_b)| {
chrom_a.extend(chrom_b);
start_a.extend(start_b);
end_a.extend(end_b);
score_a.extend(score_b);
(chrom_a, start_a, end_a, score_a)
},
);
let df = DataFrame::new(vec![
Column::new("chrom".into(), chroms),
Column::new("start".into(), starts),
Column::new("end".into(), ends),
Column::new("score".into(), scores),
])?;
let row_count = df.height();
debug!(
"Pileup DataFrame created: {} intervals across all chromosomes",
row_count
);
Ok(df)
}
fn pileup_normalised(&self) -> Result<DataFrame> {
let mut df = self.pileup_to_polars()?;
let norm_factor = self.normalization_factor();
debug!(
"Applying normalization (method: {:?}, factor: {:.6})",
self.norm_method, norm_factor
);
let norm_scores = df.column("score")?.cast(&DataType::Float64)? * norm_factor;
df.with_column(norm_scores)?;
Ok(df)
}
pub fn to_bedgraph(&self, outfile: PathBuf) -> Result<()> {
info!("Writing bedGraph to {}", outfile.display());
let df = self.pileup_normalised()?;
write_bedgraph(df, outfile.clone(), self.ignore_scaffold_chromosomes)?;
if outfile.exists() {
let file_size = std::fs::metadata(&outfile)?.len();
info!(
"Wrote bedGraph: {} ({:.2} MB)",
outfile.display(),
file_size as f64 / 1_000_000.0
);
}
Ok(())
}
pub fn to_bigwig(&self, outfile: PathBuf) -> Result<()> {
let bam_stats = BamStats::new(self.file_path.clone())?;
let header = bam_stats.header();
let all_chromsizes = bam_stats.chromsizes_ref_name()?;
let mut chrom_sizes_vec = Vec::new();
for (name, len) in all_chromsizes {
if self.ignore_scaffold_chromosomes && is_scaffold(&name) {
continue;
}
let len_u32 = u32::try_from(len).context(format!(
"Chromosome {} length {} exceeds u32::MAX",
name, len
))?;
chrom_sizes_vec.push((name, len_u32));
}
let chrom_sizes: std::collections::HashMap<String, u32> =
chrom_sizes_vec.into_iter().collect();
let mut df = self.pileup_normalised()?;
if self.ignore_scaffold_chromosomes {
let mask = df
.column("chrom")?
.str()?
.iter()
.flatten()
.map(|s| !is_scaffold(s))
.collect::<BooleanChunked>();
df = df.filter(&mask)?;
}
let chrom_idx: HashMap<String, u32> = header
.reference_sequences()
.iter()
.enumerate()
.map(|(i, (name, _))| (String::from_utf8_lossy(name).to_string(), i as u32))
.collect();
let chrom_idx_col = df
.column("chrom")?
.str()?
.iter()
.flatten()
.map(|s| {
chrom_idx
.get(s)
.copied()
.ok_or_else(|| anyhow::anyhow!("Chromosome {} not in BAM header", s))
})
.collect::<Result<Vec<u32>>>()?;
df.with_column(Column::new("chrom_idx".into(), chrom_idx_col))?;
let start_max = df.column("start")?.u64()?.max().unwrap_or(0);
let end_max = df.column("end")?.u64()?.max().unwrap_or(0);
let _start_max_u32 = u32::try_from(start_max).context("start position exceeds u32::MAX")?;
let _end_max_u32 = u32::try_from(end_max).context("end position exceeds u32::MAX")?;
df = df
.lazy()
.with_column(col("start").cast(DataType::UInt32))
.with_column(col("end").cast(DataType::UInt32))
.with_column(col("score").cast(DataType::Float32))
.collect()?;
df = df.sort(["chrom_idx", "start"], SortMultipleOptions::default())?;
df.rechunk_mut();
let idx_slice = df.column("chrom_idx")?.u32()?.cont_slice()?;
let start_slice = df.column("start")?.u32()?.cont_slice()?;
let end_slice = df.column("end")?.u32()?.cont_slice()?;
let score_slice = df.column("score")?.f32()?.cont_slice()?;
let mut chrom_names = vec!["".to_string(); chrom_idx.len()];
for (name, idx) in &chrom_idx {
chrom_names[*idx as usize] = name.clone();
}
let n_rows = df.height();
let iter = (0..n_rows).map(|i| {
let name: &str = chrom_names[idx_slice[i] as usize].as_str();
(
name,
Value {
start: start_slice[i],
end: end_slice[i],
value: score_slice[i],
},
)
});
let bed_iter = BedParserStreamingIterator::wrap_infallible_iter(iter, true);
let bw_writer = BigWigWrite::create_file(&outfile, chrom_sizes)?;
let runtime = tokio::runtime::Builder::new_multi_thread()
.worker_threads(self.nthreads as usize)
.build()?;
bw_writer.write(bed_iter, runtime)?;
info!("Wrote BigWig: {}", outfile.display());
Ok(())
}
}
pub struct MultiBamPileup {
bam_pileups: Vec<BamPileup>,
}
impl MultiBamPileup {
pub fn new(pileups: Vec<BamPileup>) -> Self {
Self {
bam_pileups: pileups,
}
}
fn _validate_bam_pileups(&self) -> Result<()> {
let bin_sizes: Vec<u64> = self.bam_pileups.iter().map(|p| p.bin_size).collect();
if bin_sizes.iter().all(|&x| x == bin_sizes[0]) {
debug!("All BAM pileups have the same bin size: {}", bin_sizes[0]);
} else {
anyhow::bail!("All BAM pileups must have the same bin size");
}
let collapse: Vec<bool> = self.bam_pileups.iter().map(|p| p.collapse).collect();
if collapse.iter().all(|x| !(*x)) {
debug!("All BAM pileups have collapse set to false");
} else {
anyhow::bail!("All BAM pileups must have collapse set to false");
}
Ok(())
}
fn pileup(&self) -> Result<DataFrame> {
self._validate_bam_pileups()?;
let mut dfs = Vec::new();
let names: Vec<PlSmallStr> = self
.bam_pileups
.iter()
.map(|p| p.file_path.clone().to_str().unwrap().into())
.collect::<Vec<_>>();
for pileup in &self.bam_pileups {
let df = pileup.pileup_to_polars()?;
dfs.push(df);
}
let df = dfs[0]
.clone()
.lazy()
.rename(["score"], [names[0].clone()], true);
let scores = dfs
.iter()
.skip(1)
.zip(names.iter().skip(1))
.map(|(df, n)| df.column("score").unwrap().clone().with_name(n.clone()))
.collect::<Vec<Column>>();
let scores_df = DataFrame::new(scores)?.lazy();
let df = df
.join(
scores_df,
[col("chrom"), col("start"), col("end")],
[col("chrom"), col("start"), col("end")],
JoinArgs::new(JoinType::Full),
)
.collect()?;
let df = collapse_equal_bins(df, Some(names))?;
Ok(df)
}
pub fn to_tsv(&self, outfile: &PathBuf) -> Result<()> {
let mut df = self.pileup()?;
let mut file = std::fs::File::create(outfile)?;
CsvWriter::new(&mut file)
.include_header(true)
.with_separator(b'\t')
.finish(&mut df)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::read_filter::BamReadFilter;
use crate::signal_normalization::NormalizationMethod;
use std::path::PathBuf;
use tempfile::TempDir;
fn create_test_bam_pileup() -> BamPileup {
BamPileup::new(
PathBuf::from("test.bam"),
1000,
NormalizationMethod::Raw,
1.0,
false,
BamReadFilter::default(),
false,
false,
None,
None,
None,
)
}
fn get_test_data_dir() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.parent()
.expect("Failed to get workspace root")
.join("test/data")
}
#[test]
fn test_bam_pileup_new() {
let file_path = PathBuf::from("test.bam");
let pileup = BamPileup::new(
file_path.clone(),
1000,
NormalizationMethod::RPKM,
2.0,
true,
BamReadFilter::default(),
true,
false,
None,
None,
None,
);
assert_eq!(pileup.file_path, file_path);
assert_eq!(pileup.bin_size, 1000);
assert_eq!(pileup.scale_factor, 2.0);
assert!(pileup.use_fragment);
assert!(pileup.collapse);
}
#[test]
fn test_write_bedgraph() {
let temp_dir = TempDir::new().unwrap();
let output_path = temp_dir.path().join("test.bedgraph");
let df = DataFrame::new(vec![
Column::new("chrom".into(), vec!["chr1", "chr1", "chr2"]),
Column::new("start".into(), vec![0u64, 1000u64, 0u64]),
Column::new("end".into(), vec![1000u64, 2000u64, 1000u64]),
Column::new("score".into(), vec![10u32, 20u32, 15u32]),
])
.unwrap();
let result = write_bedgraph(df, output_path.clone(), true);
assert!(result.is_ok());
assert!(output_path.exists());
let contents = std::fs::read_to_string(output_path).unwrap();
let lines: Vec<&str> = contents.trim().split('\n').collect();
assert_eq!(lines.len(), 3);
}
#[test]
fn test_collapse_equal_bins() {
let df = DataFrame::new(vec![
Column::new("chrom".into(), vec!["chr1", "chr1", "chr1", "chr1"]),
Column::new("start".into(), vec![0u64, 1000u64, 2000u64, 3000u64]),
Column::new("end".into(), vec![1000u64, 2000u64, 3000u64, 4000u64]),
Column::new("score".into(), vec![10u32, 10u32, 20u32, 20u32]),
])
.unwrap();
let result = collapse_equal_bins(df, None).unwrap();
assert!(result.height() < 4);
let score_col = result.column("score").unwrap();
let score = score_col.sum_reduce().expect("Failed to sum scores");
assert_eq!(score.as_any_value().try_extract::<u32>().unwrap(), 60);
}
#[test]
fn test_bam_pileup_display() {
let pileup = create_test_bam_pileup();
let display_str = format!("{pileup}");
assert!(display_str.contains("Coverage Run Details"));
assert!(display_str.contains("input"));
assert!(display_str.contains("test.bam"));
assert!(display_str.contains("bin size"));
assert!(display_str.contains("1000 bp"));
assert!(display_str.contains("normalization"));
assert!(display_str.contains("Raw"));
assert!(display_str.contains("normalization factor"));
assert!(display_str.contains("1.000000"));
assert!(display_str.contains("scale factor"));
assert!(display_str.contains("counting"));
assert!(display_str.contains("reads"));
}
#[test]
fn test_bam_to_bigwig_raw_scale() {
let test_data_dir = get_test_data_dir();
let temp_dir = TempDir::new().unwrap();
let bam_file = test_data_dir.join("test.bam");
let output_path = temp_dir.path().join("test.bw");
let pileup = BamPileup::new(
bam_file,
1000,
NormalizationMethod::Raw,
1.0,
false,
BamReadFilter::default(),
true,
false,
None,
None,
None,
);
let result = pileup.to_bigwig(output_path.clone());
result.expect("Failed to create BigWig file");
assert!(output_path.exists(), "BigWig file was not created");
info!("BigWig file created at: {}", output_path.display());
let bw = BigWigRead::open_file(output_path.clone());
assert!(bw.is_ok(), "Failed to open BigWig file");
let mut bw = bw.unwrap();
let stats = bw.get_summary().expect("Failed to get BigWig summary");
assert_eq!(stats.min_val, 0.0);
assert_eq!(stats.max_val, 117.0);
}
#[test]
fn test_bam_to_bigwig_cpm_scale() {
let test_data_dir = get_test_data_dir();
let temp_dir = TempDir::new().unwrap();
let bam_file = test_data_dir.join("test.bam");
let output_path = temp_dir.path().join("test_cpm.bw");
let pileup = BamPileup::new(
bam_file,
1000,
NormalizationMethod::CPM,
1.0,
false,
BamReadFilter::default(),
true,
false,
None,
None,
None,
);
let result = pileup.to_bigwig(output_path.clone());
result.expect("Failed to create BigWig file");
assert!(output_path.exists(), "BigWig file was not created");
info!("BigWig file created at: {}", output_path.display());
let bw = BigWigRead::open_file(output_path.clone());
assert!(bw.is_ok(), "Failed to open BigWig file");
let mut bw = bw.unwrap();
let stats = bw.get_summary().expect("Failed to get BigWig summary");
println!("BigWig stats: {stats:?}");
assert_eq!(stats.min_val, 0.0);
assert_eq!(stats.max_val, 10134.2568359375);
}
#[test]
fn test_bam_to_bigwig_rpkm_scale() {
let test_data_dir = get_test_data_dir();
let temp_dir = TempDir::new().unwrap();
let bam_file = test_data_dir.join("test.bam");
let output_path = temp_dir.path().join("test_rpkm.bw");
let pileup = BamPileup::new(
bam_file,
1000,
NormalizationMethod::RPKM,
1.0,
false,
BamReadFilter::default(),
false,
false,
None,
None,
None,
);
let result = pileup.to_bigwig(output_path.clone());
result.expect("Failed to create BigWig file");
assert!(output_path.exists(), "BigWig file was not created");
info!("BigWig file created at: {}", output_path.display());
let bw = BigWigRead::open_file(output_path.clone());
assert!(bw.is_ok(), "Failed to open BigWig file");
let mut bw = bw.unwrap();
let stats = bw.get_summary().expect("Failed to get BigWig summary");
println!("BigWig stats: {stats:?}");
assert_eq!(stats.min_val, 0.0);
assert_eq!(stats.max_val, 10134.2568359375);
}
}