use crate::common::Args as CommonArgs;
use crate::db::keys::Var;
use crate::db::{ContigIdMap, PipelineConfig, write_contig_dictionary};
use crate::pbs::seqvars::CaddRecord;
use anyhow::Error;
use clap::Parser;
use prost::Message;
use std::collections::HashSet;
use std::fs::File;
use std::sync::Arc;
#[derive(Parser, Debug, Clone)]
#[command(about = "Construct CADD score RocksDB database", long_about = None)]
pub struct Args {
#[command(flatten)]
pub common: crate::db::CommonPipelineArgs,
}
pub mod cli {
pub use super::Args;
}
#[derive(serde::Deserialize, Clone)]
struct CaddRecordRow {
chrom: String,
pos: i32,
r#ref: String,
alt: String,
raw_score: f32,
phred: f32,
}
pub fn run(_common: &CommonArgs, args: &Args) -> Result<(), Error> {
let config = PipelineConfig {
assembly: &args.common.assembly,
input: &args.common.input,
output: &args.common.output,
batch_size: args.common.batch_size,
no_progress: args.common.no_progress,
threads: args.common.threads,
db_type: "cadd",
schema_version: "1.0",
extra_meta: std::collections::HashMap::new(),
};
let open_reader = |path: &std::path::Path| {
let file = File::open(path)?;
let (reader, _) = niffler::get_reader(Box::new(file))?;
let rdr = csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(false)
.comment(Some(b'#'))
.from_reader(reader);
Ok((rdr, csv::StringRecord::new()))
};
let chrom_to_id: ContigIdMap = ContigIdMap::default();
let chrom_to_id_closure = Arc::clone(&chrom_to_id);
crate::db::run_tsv_pipeline(config, open_reader, move |record, _, contig_manager| {
let row: CaddRecordRow = record.deserialize(None)?;
let (chrom_std, chrom_id) =
crate::db::get_or_intern_contig(&row.chrom, contig_manager, &chrom_to_id_closure);
let var = Var::new(
chrom_std.clone(),
row.pos,
row.r#ref.clone(),
row.alt.clone(),
);
let key = var.encode_with_id(chrom_id);
let record_pb = CaddRecord {
raw_score: row.raw_score,
phred: row.phred,
};
let mut value = Vec::new();
record_pb.encode(&mut value)?;
let var_label = format!("{}:{}{}>{}", chrom_std, row.pos, row.r#ref, row.alt);
Ok((vec![(key, value, var_label)], HashSet::new()))
})?;
tracing::info!("Writing contig index metadata mapping into the meta CF...");
write_contig_dictionary(&args.common.output, "cadd", &chrom_to_id)?;
Ok(())
}