rustynetics 0.1.4

A high-performance genomics libary specialized in handling BAM and BigWig files
Documentation
use std::collections::BTreeMap;
use std::process;

use clap::{Arg, ArgAction, Command};

mod common;

fn main() {
    let matches = Command::new("bigwig-histogram")
        .about("Print a value histogram for a BigWig track")
        .arg(
            Arg::new("bins")
                .short('b')
                .long("bins")
                .default_value("100"),
        )
        .arg(Arg::new("bin-size").long("bin-size").default_value("0"))
        .arg(
            Arg::new("bin-summary")
                .long("bin-summary")
                .default_value("mean")
                .value_parser([
                    "mean",
                    "max",
                    "min",
                    "discrete mean",
                    "discrete max",
                    "discrete min",
                    "variance",
                ]),
        )
        .arg(
            Arg::new("cumulative")
                .short('c')
                .long("cumulative")
                .action(ArgAction::SetTrue),
        )
        .arg(
            Arg::new("verbose")
                .short('v')
                .long("verbose")
                .action(ArgAction::SetTrue),
        )
        .arg(Arg::new("input").required(true).index(1))
        .get_matches();

    let input = matches.get_one::<String>("input").unwrap();
    let bins: usize = matches
        .get_one::<String>("bins")
        .unwrap()
        .parse()
        .unwrap_or_else(|error| {
            eprintln!("invalid histogram bin count: {error}");
            process::exit(1);
        });
    let bin_size: usize = matches
        .get_one::<String>("bin-size")
        .unwrap()
        .parse()
        .unwrap_or_else(|error| {
            eprintln!("invalid track bin size: {error}");
            process::exit(1);
        });
    let summary = common::parse_bin_summary(matches.get_one::<String>("bin-summary").unwrap())
        .unwrap_or_else(|error| {
            eprintln!("{error}");
            process::exit(1);
        });
    let cumulative = matches.get_flag("cumulative");

    let track = common::import_simple_track(input, "", summary, bin_size, 0, f64::NAN)
        .unwrap_or_else(|error| {
            eprintln!("importing BigWig failed: {error}");
            process::exit(1);
        });
    let values = common::collect_track_values(&track).unwrap_or_else(|error| {
        eprintln!("collecting values failed: {error}");
        process::exit(1);
    });

    println!("{:>15}\t{:>15}", "x", "y");
    if values.is_empty() {
        return;
    }

    if bins == 0 {
        let mut counts: BTreeMap<u64, usize> = BTreeMap::new();
        for value in &values {
            *counts.entry(value.to_bits()).or_insert(0) += 1;
        }
        let mut running = 0usize;
        for (bits, count) in counts {
            running += count;
            let y = if cumulative {
                running as f64
            } else {
                count as f64
            };
            println!("{:15e}\t{:15.6}", f64::from_bits(bits), y);
        }
        return;
    }

    let min = values.iter().copied().fold(f64::INFINITY, f64::min);
    let max = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
    let width = if max == min {
        1.0
    } else {
        (max - min) / bins as f64
    };
    let mut counts = vec![0usize; bins];
    for value in &values {
        let mut idx = if max == min {
            0
        } else {
            ((value - min) / width).floor() as usize
        };
        if idx >= bins {
            idx = bins - 1;
        }
        counts[idx] += 1;
    }

    let mut running = 0usize;
    for (i, count) in counts.into_iter().enumerate() {
        running += count;
        let x = min + i as f64 * width;
        let y = if cumulative {
            running as f64
        } else {
            count as f64
        };
        println!("{:15e}\t{:15.6}", x, y);
    }
}