use std::collections::HashMap;
use cyanea_core::{CyaneaError, Result};
use cyanea_omics::variant::{Variant, VariantFilter};
pub fn split_multiallelic(variant: &Variant) -> Vec<Variant> {
if variant.alt_alleles.len() <= 1 {
return vec![variant.clone()];
}
variant
.alt_alleles
.iter()
.map(|alt| Variant {
chrom: variant.chrom.clone(),
position: variant.position,
id: variant.id.clone(),
ref_allele: variant.ref_allele.clone(),
alt_alleles: vec![alt.clone()],
quality: variant.quality,
filter: variant.filter.clone(),
})
.collect()
}
pub fn join_biallelic(variants: &[Variant]) -> Option<Variant> {
if variants.is_empty() {
return None;
}
let first = &variants[0];
for v in &variants[1..] {
if v.chrom != first.chrom || v.position != first.position || v.ref_allele != first.ref_allele
{
return None;
}
}
let mut alt_alleles: Vec<Vec<u8>> = Vec::new();
for v in variants {
for alt in &v.alt_alleles {
if !alt_alleles.contains(alt) {
alt_alleles.push(alt.clone());
}
}
}
let quality = variants.iter().find_map(|v| v.quality);
Some(Variant {
chrom: first.chrom.clone(),
position: first.position,
id: first.id.clone(),
ref_allele: first.ref_allele.clone(),
alt_alleles,
quality,
filter: first.filter.clone(),
})
}
pub fn normalize_variant(variant: &Variant) -> Variant {
if variant.alt_alleles.is_empty() {
return variant.clone();
}
let mut ref_allele = variant.ref_allele.clone();
let mut alt_alleles = variant.alt_alleles.clone();
let mut position = variant.position;
loop {
if ref_allele.len() <= 1 || alt_alleles.iter().any(|a| a.len() <= 1) {
break;
}
let last_ref = *ref_allele.last().unwrap();
if alt_alleles.iter().all(|a| *a.last().unwrap() == last_ref) {
ref_allele.pop();
for alt in &mut alt_alleles {
alt.pop();
}
} else {
break;
}
}
loop {
if ref_allele.len() <= 1 || alt_alleles.iter().any(|a| a.len() <= 1) {
break;
}
let first_ref = ref_allele[0];
if alt_alleles.iter().all(|a| a[0] == first_ref) {
ref_allele.remove(0);
for alt in &mut alt_alleles {
alt.remove(0);
}
position += 1;
} else {
break;
}
}
Variant {
chrom: variant.chrom.clone(),
position,
id: variant.id.clone(),
ref_allele,
alt_alleles,
quality: variant.quality,
filter: variant.filter.clone(),
}
}
pub fn merge_variants(inputs: &[&[Variant]]) -> Vec<Variant> {
use std::collections::BTreeMap;
let mut groups: BTreeMap<(String, u64, Vec<u8>), Vec<&Variant>> = BTreeMap::new();
for &input in inputs {
for v in input {
let key = (v.chrom.clone(), v.position, v.ref_allele.clone());
groups.entry(key).or_default().push(v);
}
}
let mut result = Vec::new();
for ((_chrom, _pos, _ref_allele), variants) in groups {
let owned: Vec<Variant> = variants.into_iter().cloned().collect();
if let Some(merged) = join_biallelic(&owned) {
result.push(merged);
} else {
result.extend(owned);
}
}
result
}
#[derive(Debug, Clone)]
pub enum FilterExpr {
QualCmp {
op: CmpOp,
value: f64,
},
TypeEq(String),
ChromEq(String),
FilterPass,
And(Box<FilterExpr>, Box<FilterExpr>),
Or(Box<FilterExpr>, Box<FilterExpr>),
Not(Box<FilterExpr>),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CmpOp {
Gt,
Ge,
Lt,
Le,
Eq,
Ne,
}
pub fn parse_filter(expr: &str) -> Result<FilterExpr> {
let tokens = tokenize(expr)?;
let (node, rest) = parse_or(&tokens)?;
if !rest.is_empty() {
return Err(CyaneaError::Parse(format!(
"unexpected tokens in filter expression: {:?}",
rest
)));
}
Ok(node)
}
pub fn eval_filter(variant: &Variant, expr: &FilterExpr) -> bool {
match expr {
FilterExpr::QualCmp { op, value } => {
let q = variant.quality.unwrap_or(0.0);
match op {
CmpOp::Gt => q > *value,
CmpOp::Ge => q >= *value,
CmpOp::Lt => q < *value,
CmpOp::Le => q <= *value,
CmpOp::Eq => (q - value).abs() < f64::EPSILON,
CmpOp::Ne => (q - value).abs() >= f64::EPSILON,
}
}
FilterExpr::TypeEq(t) => {
let t_upper = t.to_uppercase();
match t_upper.as_str() {
"SNV" | "SNP" => variant.is_snv(),
"INDEL" => variant.is_indel(),
_ => false,
}
}
FilterExpr::ChromEq(c) => variant.chrom == *c,
FilterExpr::FilterPass => variant.filter == VariantFilter::Pass,
FilterExpr::And(a, b) => eval_filter(variant, a) && eval_filter(variant, b),
FilterExpr::Or(a, b) => eval_filter(variant, a) || eval_filter(variant, b),
FilterExpr::Not(a) => !eval_filter(variant, a),
}
}
pub fn filter_variants(variants: &[Variant], expr: &FilterExpr) -> Vec<Variant> {
variants
.iter()
.filter(|v| eval_filter(v, expr))
.cloned()
.collect()
}
#[derive(Debug, Clone, PartialEq)]
enum Token {
Ident(String),
Number(f64),
Op(String), And, Or, Not, LParen,
RParen,
}
fn tokenize(expr: &str) -> Result<Vec<Token>> {
let mut tokens = Vec::new();
let bytes = expr.as_bytes();
let mut i = 0;
while i < bytes.len() {
match bytes[i] {
b' ' | b'\t' => i += 1,
b'(' => {
tokens.push(Token::LParen);
i += 1;
}
b')' => {
tokens.push(Token::RParen);
i += 1;
}
b'&' if i + 1 < bytes.len() && bytes[i + 1] == b'&' => {
tokens.push(Token::And);
i += 2;
}
b'|' if i + 1 < bytes.len() && bytes[i + 1] == b'|' => {
tokens.push(Token::Or);
i += 2;
}
b'!' if i + 1 < bytes.len() && bytes[i + 1] == b'=' => {
tokens.push(Token::Op("!=".to_string()));
i += 2;
}
b'!' => {
tokens.push(Token::Not);
i += 1;
}
b'=' if i + 1 < bytes.len() && bytes[i + 1] == b'=' => {
tokens.push(Token::Op("==".to_string()));
i += 2;
}
b'>' if i + 1 < bytes.len() && bytes[i + 1] == b'=' => {
tokens.push(Token::Op(">=".to_string()));
i += 2;
}
b'<' if i + 1 < bytes.len() && bytes[i + 1] == b'=' => {
tokens.push(Token::Op("<=".to_string()));
i += 2;
}
b'>' => {
tokens.push(Token::Op(">".to_string()));
i += 1;
}
b'<' => {
tokens.push(Token::Op("<".to_string()));
i += 1;
}
b'0'..=b'9' | b'.' if {
bytes[i] != b'.' || (i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit())
} =>
{
let start = i;
while i < bytes.len() && (bytes[i].is_ascii_digit() || bytes[i] == b'.') {
i += 1;
}
let num_str = &expr[start..i];
let num: f64 = num_str.parse().map_err(|_| {
CyaneaError::Parse(format!("invalid number in filter: '{}'", num_str))
})?;
tokens.push(Token::Number(num));
}
_ if bytes[i].is_ascii_alphabetic() || bytes[i] == b'_' => {
let start = i;
while i < bytes.len()
&& (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_' || bytes[i] == b'.')
{
i += 1;
}
tokens.push(Token::Ident(expr[start..i].to_string()));
}
_ => {
return Err(CyaneaError::Parse(format!(
"unexpected character '{}' in filter expression",
bytes[i] as char
)));
}
}
}
Ok(tokens)
}
fn parse_or<'a>(tokens: &'a [Token]) -> Result<(FilterExpr, &'a [Token])> {
let (mut left, mut rest) = parse_and(tokens)?;
while !rest.is_empty() && rest[0] == Token::Or {
let (right, r) = parse_and(&rest[1..])?;
left = FilterExpr::Or(Box::new(left), Box::new(right));
rest = r;
}
Ok((left, rest))
}
fn parse_and<'a>(tokens: &'a [Token]) -> Result<(FilterExpr, &'a [Token])> {
let (mut left, mut rest) = parse_not(tokens)?;
while !rest.is_empty() && rest[0] == Token::And {
let (right, r) = parse_not(&rest[1..])?;
left = FilterExpr::And(Box::new(left), Box::new(right));
rest = r;
}
Ok((left, rest))
}
fn parse_not<'a>(tokens: &'a [Token]) -> Result<(FilterExpr, &'a [Token])> {
if !tokens.is_empty() && tokens[0] == Token::Not {
let (inner, rest) = parse_not(&tokens[1..])?;
Ok((FilterExpr::Not(Box::new(inner)), rest))
} else {
parse_primary(tokens)
}
}
fn parse_primary<'a>(tokens: &'a [Token]) -> Result<(FilterExpr, &'a [Token])> {
if tokens.is_empty() {
return Err(CyaneaError::Parse(
"unexpected end of filter expression".into(),
));
}
if tokens[0] == Token::LParen {
let (inner, rest) = parse_or(&tokens[1..])?;
if rest.is_empty() || rest[0] != Token::RParen {
return Err(CyaneaError::Parse(
"missing closing parenthesis in filter".into(),
));
}
return Ok((inner, &rest[1..]));
}
if let Token::Ident(name) = &tokens[0] {
if tokens.len() < 3 {
return Err(CyaneaError::Parse(format!(
"incomplete filter expression after '{}'",
name
)));
}
if let Token::Op(op) = &tokens[1] {
match name.as_str() {
"QUAL" => {
if let Token::Number(val) = &tokens[2] {
let cmp = match op.as_str() {
">" => CmpOp::Gt,
">=" => CmpOp::Ge,
"<" => CmpOp::Lt,
"<=" => CmpOp::Le,
"==" => CmpOp::Eq,
"!=" => CmpOp::Ne,
_ => {
return Err(CyaneaError::Parse(format!(
"unsupported operator '{}'",
op
)))
}
};
return Ok((FilterExpr::QualCmp { op: cmp, value: *val }, &tokens[3..]));
}
}
"TYPE" => {
if op == "==" {
if let Token::Ident(val) = &tokens[2] {
return Ok((FilterExpr::TypeEq(val.clone()), &tokens[3..]));
}
}
}
"CHROM" => {
if op == "==" {
if let Token::Ident(val) = &tokens[2] {
return Ok((FilterExpr::ChromEq(val.clone()), &tokens[3..]));
}
}
}
"FILTER" => {
if op == "==" {
if let Token::Ident(val) = &tokens[2] {
if val == "PASS" {
return Ok((FilterExpr::FilterPass, &tokens[3..]));
}
}
}
}
_ => {}
}
}
return Err(CyaneaError::Parse(format!(
"unsupported filter field or expression: '{}'",
name
)));
}
Err(CyaneaError::Parse(format!(
"unexpected token in filter: {:?}",
tokens[0]
)))
}
fn variant_key(v: &Variant) -> (String, u64, Vec<u8>, Vec<Vec<u8>>) {
let mut alts = v.alt_alleles.clone();
alts.sort();
(v.chrom.clone(), v.position, v.ref_allele.clone(), alts)
}
pub fn variant_intersection(a: &[Variant], b: &[Variant]) -> Vec<Variant> {
let b_keys: std::collections::HashSet<_> = b.iter().map(|v| variant_key(v)).collect();
a.iter()
.filter(|v| b_keys.contains(&variant_key(v)))
.cloned()
.collect()
}
pub fn variant_complement(a: &[Variant], b: &[Variant]) -> Vec<Variant> {
let b_keys: std::collections::HashSet<_> = b.iter().map(|v| variant_key(v)).collect();
a.iter()
.filter(|v| !b_keys.contains(&variant_key(v)))
.cloned()
.collect()
}
#[derive(Debug, Clone, Default)]
pub struct ConcordanceStats {
pub shared: usize,
pub a_only: usize,
pub b_only: usize,
}
pub fn variant_concordance(a: &[Variant], b: &[Variant]) -> ConcordanceStats {
let a_keys: std::collections::HashSet<_> = a.iter().map(|v| variant_key(v)).collect();
let b_keys: std::collections::HashSet<_> = b.iter().map(|v| variant_key(v)).collect();
let shared = a_keys.intersection(&b_keys).count();
let a_only = a_keys.difference(&b_keys).count();
let b_only = b_keys.difference(&a_keys).count();
ConcordanceStats {
shared,
a_only,
b_only,
}
}
#[derive(Debug, Clone)]
pub struct DetailedVcfStats {
pub total: u64,
pub snv_count: u64,
pub insertion_count: u64,
pub deletion_count: u64,
pub ti_tv_ratio: f64,
pub pass_count: u64,
pub multiallelic_count: u64,
pub quality_histogram: Vec<(f64, u64)>,
pub per_chrom_counts: Vec<(String, u64)>,
pub indel_size_distribution: Vec<(i64, u64)>,
}
pub fn detailed_vcf_stats(variants: &[Variant]) -> DetailedVcfStats {
let mut total: u64 = 0;
let mut snv_count: u64 = 0;
let mut insertion_count: u64 = 0;
let mut deletion_count: u64 = 0;
let mut transitions: u64 = 0;
let mut transversions: u64 = 0;
let mut pass_count: u64 = 0;
let mut multiallelic_count: u64 = 0;
let bin_count = 11;
let mut quality_bins = vec![0u64; bin_count];
let mut chrom_counts: HashMap<String, u64> = HashMap::new();
let mut indel_sizes: HashMap<i64, u64> = HashMap::new();
for v in variants {
total += 1;
*chrom_counts.entry(v.chrom.clone()).or_default() += 1;
if v.is_snv() {
snv_count += 1;
if v.is_transition() {
transitions += 1;
} else if v.is_transversion() {
transversions += 1;
}
}
for alt in &v.alt_alleles {
let ref_len = v.ref_allele.len() as i64;
let alt_len = alt.len() as i64;
if alt_len > ref_len {
insertion_count += 1;
let size = alt_len - ref_len;
*indel_sizes.entry(size).or_default() += 1;
} else if alt_len < ref_len {
deletion_count += 1;
let size = alt_len - ref_len; *indel_sizes.entry(size).or_default() += 1;
}
}
if v.filter == VariantFilter::Pass {
pass_count += 1;
}
if v.alt_alleles.len() > 1 {
multiallelic_count += 1;
}
if let Some(q) = v.quality {
let bin = (q / 10.0).floor() as usize;
let bin = bin.min(bin_count - 1);
quality_bins[bin] += 1;
}
}
let ti_tv_ratio = if transversions > 0 {
transitions as f64 / transversions as f64
} else if transitions > 0 {
f64::INFINITY
} else {
0.0
};
let quality_histogram: Vec<(f64, u64)> = quality_bins
.into_iter()
.enumerate()
.filter(|(_, count)| *count > 0)
.map(|(i, count)| (i as f64 * 10.0, count))
.collect();
let mut per_chrom_counts: Vec<(String, u64)> = chrom_counts.into_iter().collect();
per_chrom_counts.sort_by(|a, b| a.0.cmp(&b.0));
let mut indel_size_distribution: Vec<(i64, u64)> = indel_sizes.into_iter().collect();
indel_size_distribution.sort_by_key(|&(size, _)| size);
DetailedVcfStats {
total,
snv_count,
insertion_count,
deletion_count,
ti_tv_ratio,
pass_count,
multiallelic_count,
quality_histogram,
per_chrom_counts,
indel_size_distribution,
}
}
#[derive(Debug, Clone)]
pub struct AnnotationSource {
pub key: String,
pub entries: HashMap<(String, u64, Vec<u8>, Vec<u8>), String>,
}
pub fn annotate_variants(
variants: &[Variant],
sources: &[AnnotationSource],
) -> Vec<(Variant, Vec<(String, String)>)> {
variants
.iter()
.map(|v| {
let mut annotations = Vec::new();
for source in sources {
for alt in &v.alt_alleles {
let key = (
v.chrom.clone(),
v.position,
v.ref_allele.clone(),
alt.clone(),
);
if let Some(value) = source.entries.get(&key) {
annotations.push((source.key.clone(), value.clone()));
}
}
}
(v.clone(), annotations)
})
.collect()
}
pub fn annotate_variant_ids(
variants: &mut [Variant],
id_map: &HashMap<(String, u64, Vec<u8>, Vec<u8>), String>,
) {
for v in variants.iter_mut() {
for alt in &v.alt_alleles {
let key = (
v.chrom.clone(),
v.position,
v.ref_allele.clone(),
alt.clone(),
);
if let Some(id) = id_map.get(&key) {
v.id = Some(id.clone());
break;
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn snv(chrom: &str, pos: u64, ref_a: &str, alt: &str, qual: Option<f64>) -> Variant {
Variant {
chrom: chrom.to_string(),
position: pos,
id: None,
ref_allele: ref_a.as_bytes().to_vec(),
alt_alleles: vec![alt.as_bytes().to_vec()],
quality: qual,
filter: VariantFilter::Pass,
}
}
fn variant(
chrom: &str,
pos: u64,
ref_a: &str,
alts: &[&str],
qual: Option<f64>,
filter: VariantFilter,
) -> Variant {
Variant {
chrom: chrom.to_string(),
position: pos,
id: None,
ref_allele: ref_a.as_bytes().to_vec(),
alt_alleles: alts.iter().map(|a| a.as_bytes().to_vec()).collect(),
quality: qual,
filter,
}
}
#[test]
fn split_multiallelic_single() {
let v = snv("chr1", 100, "A", "G", Some(30.0));
let result = split_multiallelic(&v);
assert_eq!(result.len(), 1);
assert_eq!(result[0].alt_alleles.len(), 1);
}
#[test]
fn split_multiallelic_multi() {
let v = variant("chr1", 100, "A", &["G", "T", "C"], Some(30.0), VariantFilter::Pass);
let result = split_multiallelic(&v);
assert_eq!(result.len(), 3);
assert_eq!(result[0].alt_alleles, vec![b"G".to_vec()]);
assert_eq!(result[1].alt_alleles, vec![b"T".to_vec()]);
assert_eq!(result[2].alt_alleles, vec![b"C".to_vec()]);
for r in &result {
assert_eq!(r.position, 100);
assert_eq!(r.ref_allele, b"A");
}
}
#[test]
fn join_biallelic_basic() {
let v1 = snv("chr1", 100, "A", "G", Some(30.0));
let v2 = snv("chr1", 100, "A", "T", Some(40.0));
let joined = join_biallelic(&[v1, v2]).unwrap();
assert_eq!(joined.alt_alleles.len(), 2);
assert!(joined.alt_alleles.contains(&b"G".to_vec()));
assert!(joined.alt_alleles.contains(&b"T".to_vec()));
}
#[test]
fn join_biallelic_different_pos() {
let v1 = snv("chr1", 100, "A", "G", None);
let v2 = snv("chr1", 200, "A", "T", None);
assert!(join_biallelic(&[v1, v2]).is_none());
}
#[test]
fn join_biallelic_empty() {
assert!(join_biallelic(&[]).is_none());
}
#[test]
fn split_join_roundtrip() {
let v = variant("chr1", 100, "A", &["G", "T"], Some(30.0), VariantFilter::Pass);
let split = split_multiallelic(&v);
let joined = join_biallelic(&split).unwrap();
assert_eq!(joined.alt_alleles.len(), 2);
}
#[test]
fn normalize_trim_suffix() {
let v = Variant {
chrom: "chr1".to_string(),
position: 100,
id: None,
ref_allele: b"ACGT".to_vec(),
alt_alleles: vec![b"AGT".to_vec()],
quality: None,
filter: VariantFilter::Pass,
};
let n = normalize_variant(&v);
assert_eq!(n.ref_allele, b"AC");
assert_eq!(n.alt_alleles, vec![b"A".to_vec()]);
assert_eq!(n.position, 100);
}
#[test]
fn normalize_trim_prefix() {
let v = Variant {
chrom: "chr1".to_string(),
position: 100,
id: None,
ref_allele: b"TAC".to_vec(),
alt_alleles: vec![b"TA".to_vec()],
quality: None,
filter: VariantFilter::Pass,
};
let n = normalize_variant(&v);
assert_eq!(n.ref_allele, b"AC");
assert_eq!(n.alt_alleles, vec![b"A".to_vec()]);
assert_eq!(n.position, 101);
}
#[test]
fn normalize_no_change_for_snv() {
let v = snv("chr1", 100, "A", "G", None);
let n = normalize_variant(&v);
assert_eq!(n.ref_allele, b"A");
assert_eq!(n.alt_alleles, vec![b"G".to_vec()]);
assert_eq!(n.position, 100);
}
#[test]
fn merge_overlapping() {
let set1 = vec![snv("chr1", 100, "A", "G", Some(30.0))];
let set2 = vec![snv("chr1", 100, "A", "T", Some(40.0))];
let merged = merge_variants(&[&set1, &set2]);
assert_eq!(merged.len(), 1);
assert_eq!(merged[0].alt_alleles.len(), 2);
}
#[test]
fn merge_non_overlapping() {
let set1 = vec![snv("chr1", 100, "A", "G", Some(30.0))];
let set2 = vec![snv("chr1", 200, "C", "T", Some(40.0))];
let merged = merge_variants(&[&set1, &set2]);
assert_eq!(merged.len(), 2);
}
#[test]
fn filter_qual_gt() {
let expr = parse_filter("QUAL>30").unwrap();
let v1 = snv("chr1", 100, "A", "G", Some(50.0));
let v2 = snv("chr1", 200, "C", "T", Some(20.0));
assert!(eval_filter(&v1, &expr));
assert!(!eval_filter(&v2, &expr));
}
#[test]
fn filter_qual_ge() {
let expr = parse_filter("QUAL>=30").unwrap();
let v = snv("chr1", 100, "A", "G", Some(30.0));
assert!(eval_filter(&v, &expr));
}
#[test]
fn filter_type_snv() {
let expr = parse_filter("TYPE==SNV").unwrap();
let snv_v = snv("chr1", 100, "A", "G", None);
let indel = variant("chr1", 200, "AC", &["A"], None, VariantFilter::Pass);
assert!(eval_filter(&snv_v, &expr));
assert!(!eval_filter(&indel, &expr));
}
#[test]
fn filter_type_indel() {
let expr = parse_filter("TYPE==INDEL").unwrap();
let indel = variant("chr1", 200, "AC", &["A"], None, VariantFilter::Pass);
assert!(eval_filter(&indel, &expr));
}
#[test]
fn filter_chrom() {
let expr = parse_filter("CHROM==chr1").unwrap();
let v1 = snv("chr1", 100, "A", "G", None);
let v2 = snv("chr2", 100, "A", "G", None);
assert!(eval_filter(&v1, &expr));
assert!(!eval_filter(&v2, &expr));
}
#[test]
fn filter_pass() {
let expr = parse_filter("FILTER==PASS").unwrap();
let pass_v = snv("chr1", 100, "A", "G", None);
let fail_v = variant("chr1", 100, "A", &["G"], None, VariantFilter::Fail(vec!["LQ".into()]));
assert!(eval_filter(&pass_v, &expr));
assert!(!eval_filter(&fail_v, &expr));
}
#[test]
fn filter_and() {
let expr = parse_filter("QUAL>30 && CHROM==chr1").unwrap();
let v1 = snv("chr1", 100, "A", "G", Some(50.0));
let v2 = snv("chr2", 100, "A", "G", Some(50.0));
let v3 = snv("chr1", 100, "A", "G", Some(10.0));
assert!(eval_filter(&v1, &expr));
assert!(!eval_filter(&v2, &expr));
assert!(!eval_filter(&v3, &expr));
}
#[test]
fn filter_or() {
let expr = parse_filter("CHROM==chr1 || CHROM==chr2").unwrap();
let v1 = snv("chr1", 100, "A", "G", None);
let v2 = snv("chr2", 100, "A", "G", None);
let v3 = snv("chr3", 100, "A", "G", None);
assert!(eval_filter(&v1, &expr));
assert!(eval_filter(&v2, &expr));
assert!(!eval_filter(&v3, &expr));
}
#[test]
fn filter_not() {
let expr = parse_filter("!CHROM==chr1").unwrap();
let v1 = snv("chr1", 100, "A", "G", None);
let v2 = snv("chr2", 100, "A", "G", None);
assert!(!eval_filter(&v1, &expr));
assert!(eval_filter(&v2, &expr));
}
#[test]
fn filter_parentheses() {
let expr = parse_filter("(QUAL>30 || QUAL<10) && CHROM==chr1").unwrap();
let v1 = snv("chr1", 100, "A", "G", Some(50.0));
let v2 = snv("chr1", 100, "A", "G", Some(5.0));
let v3 = snv("chr1", 100, "A", "G", Some(20.0));
assert!(eval_filter(&v1, &expr));
assert!(eval_filter(&v2, &expr));
assert!(!eval_filter(&v3, &expr));
}
#[test]
fn filter_variants_vec() {
let expr = parse_filter("QUAL>30").unwrap();
let variants = vec![
snv("chr1", 100, "A", "G", Some(50.0)),
snv("chr1", 200, "C", "T", Some(20.0)),
snv("chr1", 300, "G", "A", Some(40.0)),
];
let filtered = filter_variants(&variants, &expr);
assert_eq!(filtered.len(), 2);
}
#[test]
fn filter_invalid_expr() {
assert!(parse_filter("").is_err());
assert!(parse_filter("UNKNOWN>5").is_err());
}
#[test]
fn intersection_basic() {
let a = vec![
snv("chr1", 100, "A", "G", None),
snv("chr1", 200, "C", "T", None),
];
let b = vec![
snv("chr1", 100, "A", "G", None),
snv("chr1", 300, "G", "A", None),
];
let result = variant_intersection(&a, &b);
assert_eq!(result.len(), 1);
assert_eq!(result[0].position, 100);
}
#[test]
fn complement_basic() {
let a = vec![
snv("chr1", 100, "A", "G", None),
snv("chr1", 200, "C", "T", None),
];
let b = vec![snv("chr1", 100, "A", "G", None)];
let result = variant_complement(&a, &b);
assert_eq!(result.len(), 1);
assert_eq!(result[0].position, 200);
}
#[test]
fn concordance_basic() {
let a = vec![
snv("chr1", 100, "A", "G", None),
snv("chr1", 200, "C", "T", None),
];
let b = vec![
snv("chr1", 100, "A", "G", None),
snv("chr1", 300, "G", "A", None),
];
let stats = variant_concordance(&a, &b);
assert_eq!(stats.shared, 1);
assert_eq!(stats.a_only, 1);
assert_eq!(stats.b_only, 1);
}
#[test]
fn concordance_identical() {
let a = vec![snv("chr1", 100, "A", "G", None)];
let b = vec![snv("chr1", 100, "A", "G", None)];
let stats = variant_concordance(&a, &b);
assert_eq!(stats.shared, 1);
assert_eq!(stats.a_only, 0);
assert_eq!(stats.b_only, 0);
}
#[test]
fn concordance_disjoint() {
let a = vec![snv("chr1", 100, "A", "G", None)];
let b = vec![snv("chr1", 200, "C", "T", None)];
let stats = variant_concordance(&a, &b);
assert_eq!(stats.shared, 0);
assert_eq!(stats.a_only, 1);
assert_eq!(stats.b_only, 1);
}
#[test]
fn detailed_stats_basic() {
let variants = vec![
snv("chr1", 100, "A", "G", Some(30.0)), snv("chr1", 200, "C", "T", Some(40.0)), snv("chr1", 300, "A", "C", Some(50.0)), variant("chr1", 400, "AC", &["A"], Some(20.0), VariantFilter::Pass), variant("chr1", 500, "A", &["G", "T"], Some(60.0), VariantFilter::Pass), ];
let stats = detailed_vcf_stats(&variants);
assert_eq!(stats.total, 5);
assert_eq!(stats.snv_count, 4); assert_eq!(stats.deletion_count, 1);
assert_eq!(stats.pass_count, 5);
assert_eq!(stats.multiallelic_count, 1);
assert!((stats.ti_tv_ratio - 3.0).abs() < 0.01);
}
#[test]
fn detailed_stats_empty() {
let stats = detailed_vcf_stats(&[]);
assert_eq!(stats.total, 0);
assert_eq!(stats.ti_tv_ratio, 0.0);
}
#[test]
fn detailed_stats_quality_histogram() {
let variants = vec![
snv("chr1", 100, "A", "G", Some(5.0)),
snv("chr1", 200, "C", "T", Some(15.0)),
snv("chr1", 300, "G", "A", Some(15.0)),
snv("chr1", 400, "T", "C", Some(105.0)),
];
let stats = detailed_vcf_stats(&variants);
assert_eq!(stats.quality_histogram.len(), 3);
}
#[test]
fn detailed_stats_insertions() {
let variants = vec![
variant("chr1", 100, "A", &["AT"], None, VariantFilter::Pass),
variant("chr1", 200, "A", &["ATG"], None, VariantFilter::Pass),
];
let stats = detailed_vcf_stats(&variants);
assert_eq!(stats.insertion_count, 2);
assert_eq!(stats.deletion_count, 0);
}
#[test]
fn detailed_stats_per_chrom() {
let variants = vec![
snv("chr1", 100, "A", "G", Some(30.0)),
snv("chr1", 200, "C", "T", Some(40.0)),
snv("chr2", 100, "A", "C", Some(50.0)),
snv("chr3", 300, "G", "A", Some(20.0)),
];
let stats = detailed_vcf_stats(&variants);
assert_eq!(stats.per_chrom_counts.len(), 3);
assert_eq!(stats.per_chrom_counts[0], ("chr1".to_string(), 2));
assert_eq!(stats.per_chrom_counts[1], ("chr2".to_string(), 1));
assert_eq!(stats.per_chrom_counts[2], ("chr3".to_string(), 1));
}
#[test]
fn detailed_stats_indel_sizes() {
let variants = vec![
variant("chr1", 100, "A", &["AT"], None, VariantFilter::Pass),
variant("chr1", 200, "A", &["ATG"], None, VariantFilter::Pass),
variant("chr1", 300, "AC", &["A"], None, VariantFilter::Pass),
variant("chr1", 400, "ACGT", &["A"], None, VariantFilter::Pass),
variant("chr1", 500, "G", &["GT"], None, VariantFilter::Pass),
];
let stats = detailed_vcf_stats(&variants);
assert_eq!(stats.indel_size_distribution.len(), 4);
assert_eq!(stats.indel_size_distribution[0], (-3, 1)); assert_eq!(stats.indel_size_distribution[1], (-1, 1)); assert_eq!(stats.indel_size_distribution[2], (1, 2)); assert_eq!(stats.indel_size_distribution[3], (2, 1)); }
#[test]
fn annotate_with_rsid() {
let variants = vec![
snv("chr1", 100, "A", "G", Some(30.0)),
snv("chr1", 200, "C", "T", Some(40.0)),
];
let mut entries = HashMap::new();
entries.insert(
("chr1".to_string(), 100, b"A".to_vec(), b"G".to_vec()),
"rs12345".to_string(),
);
let source = AnnotationSource {
key: "dbSNP_ID".to_string(),
entries,
};
let annotated = annotate_variants(&variants, &[source]);
assert_eq!(annotated.len(), 2);
assert_eq!(annotated[0].1.len(), 1);
assert_eq!(annotated[0].1[0].0, "dbSNP_ID");
assert_eq!(annotated[0].1[0].1, "rs12345");
assert!(annotated[1].1.is_empty());
}
#[test]
fn annotate_no_match() {
let variants = vec![snv("chr1", 100, "A", "G", Some(30.0))];
let source = AnnotationSource {
key: "gnomAD_AF".to_string(),
entries: HashMap::new(),
};
let annotated = annotate_variants(&variants, &[source]);
assert_eq!(annotated.len(), 1);
assert!(annotated[0].1.is_empty());
}
#[test]
fn annotate_multiple_sources() {
let variants = vec![snv("chr1", 100, "A", "G", Some(30.0))];
let mut entries1 = HashMap::new();
entries1.insert(
("chr1".to_string(), 100, b"A".to_vec(), b"G".to_vec()),
"rs12345".to_string(),
);
let mut entries2 = HashMap::new();
entries2.insert(
("chr1".to_string(), 100, b"A".to_vec(), b"G".to_vec()),
"0.05".to_string(),
);
let sources = vec![
AnnotationSource {
key: "dbSNP_ID".to_string(),
entries: entries1,
},
AnnotationSource {
key: "gnomAD_AF".to_string(),
entries: entries2,
},
];
let annotated = annotate_variants(&variants, &sources);
assert_eq!(annotated[0].1.len(), 2);
assert_eq!(annotated[0].1[0].0, "dbSNP_ID");
assert_eq!(annotated[0].1[1].0, "gnomAD_AF");
}
#[test]
fn annotate_variant_ids_basic() {
let mut variants = vec![
snv("chr1", 100, "A", "G", Some(30.0)),
snv("chr1", 200, "C", "T", Some(40.0)),
];
let mut id_map = HashMap::new();
id_map.insert(
("chr1".to_string(), 100, b"A".to_vec(), b"G".to_vec()),
"rs99999".to_string(),
);
annotate_variant_ids(&mut variants, &id_map);
assert_eq!(variants[0].id, Some("rs99999".to_string()));
assert_eq!(variants[1].id, None);
}
}