rustynetics 0.1.4

A high-performance genomics libary specialized in handling BAM and BigWig files
Documentation
use std::cmp::Ordering;
use std::collections::HashMap;
use std::process;

use clap::{Arg, ArgAction, Command};

use rustynetics::track_generic::{GenericMutableTrack, GenericTrack};

mod common;

fn main() {
    let matches = Command::new("bigwig-counts-to-quantiles")
        .about("Convert BigWig counts to empirical quantiles")
        .arg(Arg::new("bin-size").long("bin-size").default_value("0"))
        .arg(
            Arg::new("bin-summary")
                .long("bin-summary")
                .default_value("mean")
                .value_parser([
                    "mean",
                    "max",
                    "min",
                    "discrete mean",
                    "discrete max",
                    "discrete min",
                    "variance",
                ]),
        )
        .arg(Arg::new("initial-value").long("initial-value"))
        .arg(
            Arg::new("verbose")
                .short('v')
                .long("verbose")
                .action(ArgAction::Count),
        )
        .arg(Arg::new("input").required(true).index(1))
        .arg(Arg::new("output").required(true).index(2))
        .get_matches();

    let input = matches.get_one::<String>("input").unwrap();
    let output = matches.get_one::<String>("output").unwrap();
    let bin_size: usize = matches
        .get_one::<String>("bin-size")
        .unwrap()
        .parse()
        .unwrap_or_else(|error| {
            eprintln!("invalid bin size: {error}");
            process::exit(1);
        });
    let summary = common::parse_bin_summary(matches.get_one::<String>("bin-summary").unwrap())
        .unwrap_or_else(|error| {
            eprintln!("{error}");
            process::exit(1);
        });
    let init = common::parse_initial_value(
        matches
            .get_one::<String>("initial-value")
            .map(String::as_str),
        f64::NAN,
    )
    .unwrap_or_else(|error| {
        eprintln!("invalid initial value: {error}");
        process::exit(1);
    });
    let verbose = matches.get_count("verbose") > 0;

    if verbose {
        eprintln!("Importing BigWig `{input}`...");
    }
    let mut track = common::import_simple_track(input, "", summary, bin_size, 0, init)
        .unwrap_or_else(|error| {
            eprintln!("importing BigWig failed: {error}");
            process::exit(1);
        });

    let mut counts = HashMap::new();
    GenericTrack::wrap(&track)
        .map(|_, _, value| {
            if !value.is_nan() {
                *counts.entry(value.to_bits()).or_insert(0usize) += 1;
            }
        })
        .unwrap_or_else(|error| {
            eprintln!("counting track values failed: {error}");
            process::exit(1);
        });

    let mut dist: Vec<(f64, usize)> = counts
        .into_iter()
        .map(|(bits, count)| (f64::from_bits(bits), count))
        .collect();
    dist.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(Ordering::Equal));
    let mut quantiles = HashMap::new();
    let total: usize = dist.iter().map(|(_, count)| *count).sum();
    if total > 0 {
        let mut rank = 0usize;
        for (value, count) in dist {
            rank += count;
            quantiles.insert(value.to_bits(), rank as f64 / total as f64);
        }
    }

    if verbose {
        eprintln!("Converting counts to quantiles...");
    }
    GenericMutableTrack::wrap(&mut track)
        .map(|_, _, value| {
            if value.is_nan() {
                value
            } else {
                *quantiles.get(&value.to_bits()).unwrap_or(&value)
            }
        })
        .unwrap_or_else(|error| {
            eprintln!("mapping quantiles failed: {error}");
            process::exit(1);
        });

    if verbose {
        eprintln!("Writing BigWig `{output}`...");
    }
    if let Err(error) = common::export_simple_track(&track, output) {
        eprintln!("writing output failed: {error}");
        process::exit(1);
    }
}