use std::path::Path;
use anyhow::{Context, Result};
use tokio::io::BufReader;
#[derive(Debug, Clone)]
pub struct VcfRecord {
pub chrom: String,
pub pos: u64,
pub id: Option<String>,
pub reference: String,
pub alt: String,
pub qual: Option<f64>,
pub filter: Option<String>,
pub info: String,
}
pub struct VcfReader {
lines: tokio::io::Lines<BufReader<Box<dyn tokio::io::AsyncRead + Unpin + Send>>>,
}
impl VcfReader {
pub async fn open(path: &Path) -> Result<Self> {
let reader: Box<dyn tokio::io::AsyncRead + Unpin + Send> = if path.to_string_lossy() == "-"
{
Box::new(tokio::io::stdin())
} else {
let file = tokio::fs::File::open(path)
.await
.with_context(|| format!("Cannot open {}", path.display()))?;
if path.extension().is_some_and(|ext| ext == "gz") {
let bgzf_reader = noodles::bgzf::r#async::reader::Reader::new(file);
Box::new(bgzf_reader)
} else {
Box::new(file)
}
};
let buf_reader = BufReader::new(reader);
let lines = tokio::io::AsyncBufReadExt::lines(buf_reader);
Ok(Self { lines })
}
pub async fn next_record(&mut self) -> Result<Option<VcfRecord>> {
loop {
match self.lines.next_line().await? {
None => return Ok(None),
Some(line) => {
if line.starts_with('#') {
continue;
}
return parse_vcf_line(&line).map(Some);
}
}
}
}
}
fn parse_vcf_line(line: &str) -> Result<VcfRecord> {
let fields: Vec<&str> = line.splitn(8, '\t').collect();
if fields.len() < 5 {
anyhow::bail!("Invalid VCF line: too few fields");
}
let chrom = normalize_chrom(fields[0]);
let pos: u64 = fields[1].parse().context("Invalid position")?;
let id = if fields[2] == "." {
None
} else {
Some(fields[2].to_string())
};
let reference = fields[3].to_string();
let alt = fields[4].to_string();
let qual = fields.get(5).and_then(|q| q.parse().ok());
let filter = fields.get(6).map(|f| f.to_string());
let info = fields.get(7).unwrap_or(&"").to_string();
Ok(VcfRecord {
chrom,
pos,
id,
reference,
alt,
qual,
filter,
info,
})
}
fn normalize_chrom(chrom: &str) -> String {
if chrom.starts_with("chr") {
chrom.to_string()
} else {
format!("chr{chrom}")
}
}