genome-sh 0.1.0

The jq of genomics. Fast, local, human-readable variant analysis.
use std::path::PathBuf;

use anyhow::{Context, Result};
use clap::Args;

use crate::db::Database;
use crate::io::vcf::VcfReader;
use crate::output::Format;

/// Filters for annotation output.
#[derive(Clone, Debug, clap::ValueEnum)]
pub enum AnnotateFilter {
    /// All variants.
    All,
    /// Only clinically significant variants (Pathogenic, Likely Pathogenic).
    Clinical,
    /// Only rare variants (AF < 0.01).
    Rare,
}

#[derive(Args)]
pub struct AnnotateArgs {
    /// Input VCF file (use "-" for stdin).
    pub input: PathBuf,

    /// Output file. Defaults to stdout.
    #[arg(short, long)]
    pub output: Option<PathBuf>,

    /// Output format.
    #[arg(short, long, default_value = "human")]
    pub format: Format,

    /// Filter variants.
    #[arg(long, default_value = "all")]
    pub filter: AnnotateFilter,

    /// Generate an HTML report.
    #[arg(long)]
    pub report: Option<PathBuf>,
}

impl AnnotateArgs {
    pub async fn run(self) -> Result<()> {
        let db = Database::open()
            .context("Failed to open variant database. Run `genome db install` first.")?;

        let mut reader = VcfReader::open(&self.input)
            .await
            .context("Failed to open input VCF file")?;

        let mut count = 0u64;
        let mut annotated = 0u64;

        while let Some(record) = reader.next_record().await? {
            count += 1;

            let annotation = db.annotate_vcf_record(&record)?;

            if annotation.is_some() {
                annotated += 1;
            }

            // TODO: Write annotated record to output
        }

        eprintln!("Processed {count} variants, {annotated} annotated.");

        Ok(())
    }
}