use crate::core::GenomicRecordIterator;
use crate::error::Result;
use crate::parallel::Mergeable;
use crate::stats::{CategoryCounter, RunningStats};
use super::reader::{BedReader, BedRecord};
#[derive(Debug, Clone)]
pub struct BedStats {
pub total_intervals: usize,
pub bed3_count: usize,
pub bed6_count: usize,
pub bed12_count: usize,
pub length_stats: RunningStats,
pub length_distribution: CategoryCounter<u64>,
pub intervals_per_chrom: CategoryCounter<String>,
pub score_stats: RunningStats,
pub plus_strand: usize,
pub minus_strand: usize,
pub unstranded: usize,
pub block_count_stats: RunningStats,
pub total_blocks: usize,
pub total_span: u64,
pub min_length: u64,
pub max_length: u64,
}
impl Default for BedStats {
fn default() -> Self {
Self::new()
}
}
impl BedStats {
pub fn new() -> Self {
Self {
total_intervals: 0,
bed3_count: 0,
bed6_count: 0,
bed12_count: 0,
length_stats: RunningStats::new(),
length_distribution: CategoryCounter::new(),
intervals_per_chrom: CategoryCounter::new(),
score_stats: RunningStats::new(),
plus_strand: 0,
minus_strand: 0,
unstranded: 0,
block_count_stats: RunningStats::new(),
total_blocks: 0,
total_span: 0,
min_length: u64::MAX,
max_length: 0,
}
}
pub fn update(&mut self, record: &BedRecord) {
self.total_intervals += 1;
if record.is_bed12() {
self.bed12_count += 1;
} else if record.is_bed6() {
self.bed6_count += 1;
} else {
self.bed3_count += 1;
}
let length = record.len();
self.length_stats.push(length as f64);
self.length_distribution.increment(length);
self.total_span += length;
self.min_length = self.min_length.min(length);
self.max_length = self.max_length.max(length);
self.intervals_per_chrom.increment_str(&record.chrom);
if let Some(score) = record.score {
self.score_stats.push(score);
}
match record.strand {
Some('+') => self.plus_strand += 1,
Some('-') => self.minus_strand += 1,
_ => self.unstranded += 1,
}
if let Some(block_count) = record.block_count {
self.block_count_stats.push(block_count as f64);
self.total_blocks += block_count as usize;
}
}
pub fn compute(reader: &mut BedReader) -> Result<Self> {
let mut stats = Self::new();
while let Some(record) = reader.next_record()? {
stats.update(&record);
}
Ok(stats)
}
pub fn mean_length(&self) -> Option<f64> {
self.length_stats.mean()
}
pub fn std_length(&self) -> Option<f64> {
self.length_stats.std_dev()
}
pub fn mean_score(&self) -> Option<f64> {
self.score_stats.mean()
}
pub fn std_score(&self) -> Option<f64> {
self.score_stats.std_dev()
}
pub fn mean_blocks(&self) -> Option<f64> {
self.block_count_stats.mean()
}
pub fn plus_strand_percent(&self) -> f64 {
if self.total_intervals == 0 {
0.0
} else {
(self.plus_strand as f64 / self.total_intervals as f64) * 100.0
}
}
pub fn minus_strand_percent(&self) -> f64 {
if self.total_intervals == 0 {
0.0
} else {
(self.minus_strand as f64 / self.total_intervals as f64) * 100.0
}
}
pub fn print_summary(&self) {
println!("=== BED Statistics ===\n");
println!("Basic Counts:");
println!(" Total intervals: {:>12}", self.total_intervals);
println!(" BED3 (minimal): {:>12}", self.bed3_count);
println!(" BED6 (standard): {:>12}", self.bed6_count);
println!(" BED12 (full): {:>12}", self.bed12_count);
println!();
println!("Interval Lengths:");
println!(
" Mean: {:>12.2}",
self.mean_length().unwrap_or(0.0)
);
println!(
" Std Dev: {:>12.2}",
self.std_length().unwrap_or(0.0)
);
println!(" Min: {:>12}", self.min_length);
println!(" Max: {:>12}", self.max_length);
println!(" Total span: {:>12}", self.total_span);
println!();
if self.bed6_count > 0 || self.bed12_count > 0 {
println!("Strand Distribution:");
println!(
" Plus (+): {:>11.1}%",
self.plus_strand_percent()
);
println!(
" Minus (-): {:>11.1}%",
self.minus_strand_percent()
);
println!(" Unstranded (.): {:>12}", self.unstranded);
println!();
if self.score_stats.count() > 0 {
println!("Score Statistics:");
println!(
" Mean: {:>12.2}",
self.mean_score().unwrap_or(0.0)
);
println!(
" Std Dev: {:>12.2}",
self.std_score().unwrap_or(0.0)
);
println!();
}
}
if self.bed12_count > 0 {
println!("Block/Exon Structure (BED12):");
println!(" Total blocks: {:>12}", self.total_blocks);
println!(
" Mean blocks/int: {:>12.2}",
self.mean_blocks().unwrap_or(0.0)
);
println!();
}
println!("Top Chromosomes:");
let mut chrom_vec: Vec<_> = self.intervals_per_chrom.iter().collect();
chrom_vec.sort_by(|a, b| b.1.cmp(a.1)); for (chrom, count) in chrom_vec.iter().take(10) {
let percent = (**count as f64 / self.total_intervals as f64) * 100.0;
println!(" {:>12}: {:>10} ({:>5.1}%)", chrom, count, percent);
}
println!();
if self.total_intervals > 0 {
println!("Length Distribution (most common):");
let mut length_vec: Vec<_> = self.length_distribution.iter().collect();
length_vec.sort_by(|a, b| b.1.cmp(a.1)); for (length, count) in length_vec.iter().take(10) {
let percent = (**count as f64 / self.total_intervals as f64) * 100.0;
println!(" {:>12} bp: {:>8} ({:>5.1}%)", length, count, percent);
}
}
}
}
impl Mergeable for BedStats {
fn merge(&mut self, other: Self) {
self.total_intervals += other.total_intervals;
self.bed3_count += other.bed3_count;
self.bed6_count += other.bed6_count;
self.bed12_count += other.bed12_count;
self.plus_strand += other.plus_strand;
self.minus_strand += other.minus_strand;
self.unstranded += other.unstranded;
self.total_blocks += other.total_blocks;
self.total_span += other.total_span;
self.min_length = self.min_length.min(other.min_length);
self.max_length = self.max_length.max(other.max_length);
self.length_stats.merge(other.length_stats);
self.score_stats.merge(other.score_stats);
self.block_count_stats.merge(other.block_count_stats);
self.length_distribution.merge(other.length_distribution);
self.intervals_per_chrom.merge(other.intervals_per_chrom);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bed_stats_new() {
let stats = BedStats::new();
assert_eq!(stats.total_intervals, 0);
assert_eq!(stats.bed3_count, 0);
assert_eq!(stats.bed6_count, 0);
}
#[test]
fn test_bed_stats_merge() {
let mut stats1 = BedStats::new();
stats1.total_intervals = 100;
stats1.bed3_count = 50;
stats1.bed6_count = 30;
stats1.total_span = 10000;
let mut stats2 = BedStats::new();
stats2.total_intervals = 50;
stats2.bed3_count = 25;
stats2.bed6_count = 15;
stats2.total_span = 5000;
stats1.merge(stats2);
assert_eq!(stats1.total_intervals, 150);
assert_eq!(stats1.bed3_count, 75);
assert_eq!(stats1.bed6_count, 45);
assert_eq!(stats1.total_span, 15000);
}
#[test]
fn test_bed_stats_percentages() {
let mut stats = BedStats::new();
stats.total_intervals = 100;
stats.plus_strand = 60;
stats.minus_strand = 40;
assert!((stats.plus_strand_percent() - 60.0).abs() < 0.1);
assert!((stats.minus_strand_percent() - 40.0).abs() < 0.1);
}
#[test]
fn test_bed_stats_empty() {
let stats = BedStats::new();
assert_eq!(stats.plus_strand_percent(), 0.0);
assert_eq!(stats.minus_strand_percent(), 0.0);
}
}