predictosaurus 0.11.3

Uncertainty aware haplotype based genomic variant effect prediction
use crate::cli::{Command, Format, Predictosaurus};
use crate::graph::duck::{
    create_paths, create_scores, feature_graph, read_scores, write_graphs, write_scores,
};
use crate::graph::paths::Cds;
use crate::graph::peptide::write_peptides;
use crate::graph::transcript::transcripts;
use crate::graph::VariantGraph;
use crate::show::{render_html_paths, render_scores, render_tsv_paths, render_vl_paths};
use crate::utils::bcf::get_targets;
use crate::utils::create_output_dir;
use anyhow::{Context, Result};
use bio::bio_types::strand::Strand;
use bio::io::gff;
use bio::io::gff::GffType;
use bio::stats::{LogProb, Prob};
use clap::Parser;
use env_logger::Env;
use itertools::Itertools;
use log::{debug, info};
use rayon::prelude::*;
use rayon::ThreadPoolBuilder;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};

mod annotation;
mod cli;
mod graph;
mod impact;
mod show;
mod transcription;
mod translation;
mod utils;

fn main() -> Result<()> {
    let args = Predictosaurus::parse();
    let log_level = if args.verbose { "info" } else { "off" };
    env_logger::Builder::from_env(Env::default().default_filter_or(log_level)).init();
    if let Some(n) = args.threads {
        ThreadPoolBuilder::new()
            .num_threads(n)
            .build_global()
            .expect("Failed to build thread pool");
    }
    args.command.run()
}

impl Command {
    fn run(&self) -> Result<()> {
        match self {
            Command::Build {
                calls,
                observations,
                min_prob_present,
                min_vaf,
                output,
            } => {
                let targets = get_targets(calls)?;
                let mut graphs = targets
                    .into_par_iter()
                    .filter_map(|target| {
                        info!("Building graph for target {target}");
                        let variant_graph = VariantGraph::build(
                            calls,
                            observations,
                            &target,
                            LogProb::from(Prob(*min_prob_present)),
                            *min_vaf,
                        )
                        .ok()?;
                        info!(
                            "Finished building graph for target {} with {} nodes",
                            target,
                            variant_graph.graph.node_count()
                        );
                        if !variant_graph.is_empty() {
                            Some((target, variant_graph))
                        } else {
                            None
                        }
                    })
                    .collect();
                write_graphs(graphs, output)?;
            }
            Command::Process {
                features,
                reference,
                graph,
                distance_metric,
                haplotype_metric,
                output,
                max_cds_length,
                genome_build,
            } => {
                create_scores(output)?;
                info!("Reading reference genome from {reference:?}");
                let reference_genome = utils::fasta::read_reference(reference);
                let write_lock = Arc::new(Mutex::new(()));
                let transcripts = transcripts(features, graph, *max_cds_length)?;
                let total = transcripts.len();
                transcripts.into_par_iter().enumerate().try_for_each(
                    |(i, transcript)| -> anyhow::Result<()> {
                        info!(
                            "Processing transcript {} ({}/{})",
                            transcript.name(),
                            i + 1,
                            total
                        );
                        let scores = transcript.scores(
                            graph,
                            &reference_genome,
                            *haplotype_metric,
                            *distance_metric,
                            *genome_build,
                        )?;
                        info!(
                            "Writing scores for {} different haplotypes for transcript {}",
                            scores.len(),
                            transcript.name()
                        );
                        let _lock = write_lock.lock().unwrap();
                        write_scores(output, scores, transcript)?;
                        Ok(())
                    },
                )?;
            }
            Command::Peptides {
                features,
                reference,
                graph,
                interval,
                sample,
                output,
                events,
                min_event_prob,
                background_events,
                max_background_event_prob,
                max_cds_length,
            } => {
                info!("Reading reference genome from {reference:?}");
                let reference_genome = utils::fasta::read_reference(reference);
                let mut peptides = Vec::new();
                for transcript in transcripts(features, graph, *max_cds_length)? {
                    info!("Processing transcript {}", transcript.name());
                    let transcript_peptides = transcript.peptides(
                        graph,
                        &reference_genome,
                        interval.clone(),
                        sample,
                        events,
                        LogProb::from(Prob(*min_event_prob)),
                        background_events,
                        LogProb::from(Prob(*max_background_event_prob)),
                    )?;
                    peptides.extend(transcript_peptides);
                }
                info!("Writing peptides to {output:?}");
                write_peptides(peptides, output)?;
            }
            Command::Plot { input, output } => {
                if let Some(parent) = output.parent() {
                    create_output_dir(parent)?;
                }
                let scores = read_scores(input)?;
                render_scores(output, &scores)?;
            }
        }
        Ok(())
    }
}