rustynetics 0.1.4

A high-performance genomics libary specialized in handling BAM and BigWig files
Documentation
use std::io::BufRead;
use std::process;

use clap::{Arg, ArgAction, Command};

use rustynetics::genome::Genome;
use rustynetics::track::MutableTrack;
use rustynetics::track_simple::SimpleTrack;

mod common;

fn import_table(
    reader: &mut dyn BufRead,
    track: &mut SimpleTrack,
    column_name: &str,
) -> Result<(), Box<dyn std::error::Error>> {
    let mut line = String::new();
    if reader.read_line(&mut line)? == 0 {
        return Err("invalid table: missing sequence information".into());
    }
    let fields: Vec<_> = line.split_whitespace().collect();
    if fields.len() != 2 {
        return Err("invalid table: first line must have two columns".into());
    }
    let seqname = fields[1];
    let mut sequence = track.get_sequence_mut(seqname)?;

    line.clear();
    if reader.read_line(&mut line)? == 0 {
        return Err("invalid table: missing header".into());
    }
    let header: Vec<_> = line.split_whitespace().collect();
    let column_idx = header
        .iter()
        .position(|name| *name == column_name)
        .ok_or_else(|| format!("column `{column_name}` not found"))?;

    for i in 0..sequence.n_bins() {
        line.clear();
        if reader.read_line(&mut line)? == 0 {
            break;
        }
        let fields: Vec<_> = line.split_whitespace().collect();
        if column_idx >= fields.len() {
            return Err(format!("invalid table row at bin {i}").into());
        }
        sequence.set_bin(i, fields[column_idx].parse()?);
    }

    Ok(())
}

fn main() {
    let matches = Command::new("chromhmm-tables-to-bigwig")
        .about("Convert ChromHMM per-chromosome tables to a BigWig track")
        .arg(Arg::new("initial-value").long("initial-value"))
        .arg(
            Arg::new("verbose")
                .short('v')
                .long("verbose")
                .action(ArgAction::Count),
        )
        .arg(Arg::new("genome").required(true).index(1))
        .arg(Arg::new("bin-size").required(true).index(2))
        .arg(Arg::new("column").required(true).index(3))
        .arg(Arg::new("output").required(true).index(4))
        .arg(Arg::new("tables").required(true).index(5).num_args(1..))
        .get_matches();

    let genome_path = matches.get_one::<String>("genome").unwrap();
    let bin_size: usize = matches
        .get_one::<String>("bin-size")
        .unwrap()
        .parse()
        .unwrap_or_else(|error| {
            eprintln!("invalid bin size: {error}");
            process::exit(1);
        });
    let column = matches.get_one::<String>("column").unwrap();
    let output = matches.get_one::<String>("output").unwrap();
    let tables: Vec<_> = matches
        .get_many::<String>("tables")
        .unwrap()
        .map(String::as_str)
        .collect();
    let init = common::parse_initial_value(
        matches
            .get_one::<String>("initial-value")
            .map(String::as_str),
        0.0,
    )
    .unwrap_or_else(|error| {
        eprintln!("invalid initial value: {error}");
        process::exit(1);
    });

    let mut genome = Genome::default();
    genome.import(genome_path).unwrap_or_else(|error| {
        eprintln!("reading genome failed: {error}");
        process::exit(1);
    });

    let mut track = SimpleTrack::alloc(String::new(), genome, init, bin_size);
    for table in tables {
        let mut reader = common::open_reader(Some(table)).unwrap_or_else(|error| {
            eprintln!("opening table `{table}` failed: {error}");
            process::exit(1);
        });
        import_table(&mut reader, &mut track, column).unwrap_or_else(|error| {
            eprintln!("importing table `{table}` failed: {error}");
            process::exit(1);
        });
    }

    if let Err(error) = common::export_simple_track(&track, output) {
        eprintln!("writing BigWig failed: {error}");
        process::exit(1);
    }
}