use crate::common::Args as CommonArgs;
use crate::db::keys::Var;
use crate::db::{PipelineConfig, write_contig_dictionary};
use crate::pbs::seqvars::DbsnpRecord;
use anyhow::Error;
use clap::Parser;
use itertools::Itertools;
use noodles::vcf::variant::record::Ids;
use prost::Message;
use rustc_hash::FxHashMap;
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, RwLock};
#[derive(Parser, Debug, Clone)]
#[command(about = "Construct dbSNP RocksDB database", long_about = None)]
pub struct Args {
#[command(flatten)]
pub common: crate::db::CommonPipelineArgs,
}
pub mod cli {
pub use super::Args;
}
pub fn run(_common: &CommonArgs, args: &Args) -> Result<(), Error> {
let config = PipelineConfig {
assembly: &args.common.assembly,
input: &args.common.input,
output: &args.common.output,
batch_size: args.common.batch_size,
no_progress: args.common.no_progress,
threads: args.common.threads,
db_type: "dbsnp",
schema_version: "3.0",
extra_meta: HashMap::new(),
};
let header_modifier = |header: &mut noodles::vcf::Header| {
if let Some(info) = header.infos_mut().get_mut("RS") {
*info.type_mut() = noodles::vcf::header::record::value::map::info::Type::String;
}
};
let chrom_to_id = Arc::new(RwLock::new(FxHashMap::default()));
let chrom_to_id_closure = Arc::clone(&chrom_to_id);
crate::db::run_vcf_pipeline(
config,
Some(header_modifier),
move |record, contig_manager| {
let mut kvs = Vec::new();
let chrom = record.reference_sequence_name();
let pos = match record.variant_start() {
Some(start) => start.get() as i32,
None => return Ok((kvs, HashSet::new())),
};
let rs_id = if record.ids().is_empty() {
"".to_string()
} else {
record.ids().iter().join(";")
};
if rs_id.is_empty() {
return Ok((kvs, HashSet::new()));
}
let (chrom_std, chrom_id) =
crate::db::get_or_intern_contig(chrom, contig_manager, &chrom_to_id_closure);
let reference = record.reference_bases();
for alt in record.alternate_bases().as_ref() {
let alt_str = alt.to_string();
let var = Var::new(
chrom_std.clone(),
pos,
reference.to_string(),
alt_str.clone(),
);
let key = var.encode_with_id(chrom_id);
let record_pb = DbsnpRecord {
allele: alt_str,
rs_id: rs_id.clone(),
};
let mut value = Vec::new();
record_pb.encode(&mut value)?;
let var_label = format!(
"{}:{}{}>{}",
var.chrom, var.pos, var.reference, var.alternative
);
kvs.push((key, value, var_label));
}
Ok((kvs, HashSet::new()))
},
)?;
tracing::info!("Writing dbSNP contig index metadata mapping into the meta CF...");
write_contig_dictionary(&args.common.output, "dbsnp", &chrom_to_id)?;
Ok(())
}