use std::{
fs::File,
io::{BufRead as _, BufReader},
str::FromStr,
sync::Arc,
};
use byteorder::ByteOrder as _;
use prost::Message as _;
pub use crate::gnomad_pbs::exac_cnv::CnvType;
use crate::gnomad_pbs::exac_cnv::{Population, Record};
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
enum CurrentSection {
#[default]
Dels,
Dups,
}
impl FromStr for Population {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(match s {
"ExAC-AFR" => Population::Afr,
"ExAC-AMR" => Population::Amr,
"ExAC-EAS" => Population::Eas,
"ExAC-FIN" => Population::Fin,
"ExAC-NFE" => Population::Nfe,
"ExAC-SAS" => Population::Sas,
"ExAC-OTH" => Population::Other,
_ => anyhow::bail!("unknown population: {}", s),
})
}
}
pub fn import(
db: &Arc<rocksdb::DBWithThreadMode<rocksdb::MultiThreaded>>,
cf_data: &Arc<rocksdb::BoundColumnFamily>,
path_in_tsv: &str,
) -> Result<(), anyhow::Error> {
tracing::info!("- selected ExAC CNV import for GRCh37");
let reader = File::open(path_in_tsv)
.map(BufReader::new)
.map_err(|e| anyhow::anyhow!("could not open file: {}", e))?;
let mut section = CurrentSection::Dels;
let mut idx: u32 = 0;
for line in reader.lines() {
let line = line.map_err(|e| anyhow::anyhow!("could not read line: {}", e))?;
if line.starts_with("track name=delControls") {
section = CurrentSection::Dels;
} else if line.starts_with("track name=dupControls") {
section = CurrentSection::Dups;
} else {
let arr = line.trim().split(' ').collect::<Vec<_>>();
let record = Record {
chrom: arr[0]
.strip_prefix("chr")
.ok_or_else(|| anyhow::anyhow!("invalid chromosome: {}", arr[0]))?
.to_string(),
start: arr[1]
.parse()
.map_err(|e| anyhow::anyhow!("invalid start: {}", e))?,
stop: arr[2]
.parse()
.map_err(|e| anyhow::anyhow!("invalid stop: {}", e))?,
sv_type: match section {
CurrentSection::Dels => CnvType::Del,
CurrentSection::Dups => CnvType::Dup,
} as i32,
population: arr[3].parse::<Population>()? as i32,
};
let buf = record.encode_to_vec();
let mut key = [0; 4];
byteorder::LittleEndian::write_u32(&mut key[0..4], idx);
db.put_cf(cf_data, key, &buf)?;
idx += 1;
}
}
tracing::info!(" - imported {} records", idx);
Ok(())
}