rsomics-vcf-stats 0.1.0

Basic VCF variant statistics — SNP/indel counts, Ti/Tv ratio
Documentation
#![allow(clippy::cast_precision_loss)]

use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;

use rsomics_common::{Result, RsomicsError};
use serde::Serialize;

#[derive(Debug, Default, Serialize)]
pub struct VcfStats {
    pub total: u64,
    pub snps: u64,
    pub insertions: u64,
    pub deletions: u64,
    pub mnps: u64,
    pub transitions: u64,
    pub transversions: u64,
    pub ti_tv: f64,
}

fn is_transition(r: u8, a: u8) -> bool {
    matches!(
        (r, a),
        (b'A', b'G') | (b'G', b'A') | (b'C', b'T') | (b'T', b'C')
    )
}

pub fn stats(input: &Path) -> Result<VcfStats> {
    let file = File::open(input)
        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
    let reader = BufReader::new(file);
    let mut s = VcfStats::default();

    for line in reader.lines() {
        let line = line.map_err(RsomicsError::Io)?;
        if line.starts_with('#') {
            continue;
        }
        let fields: Vec<&str> = line.splitn(6, '\t').collect();
        if fields.len() < 5 {
            continue;
        }
        let ref_allele = fields[3];
        let alt_field = fields[4];

        for alt in alt_field.split(',') {
            if alt == "." || alt == "*" {
                continue;
            }
            s.total += 1;
            let rlen = ref_allele.len();
            let alen = alt.len();

            if rlen == 1 && alen == 1 {
                s.snps += 1;
                let rb = ref_allele.as_bytes()[0].to_ascii_uppercase();
                let ab = alt.as_bytes()[0].to_ascii_uppercase();
                if is_transition(rb, ab) {
                    s.transitions += 1;
                } else {
                    s.transversions += 1;
                }
            } else if rlen > alen {
                s.deletions += 1;
            } else if alen > rlen {
                s.insertions += 1;
            } else {
                s.mnps += 1;
            }
        }
    }

    s.ti_tv = if s.transversions > 0 {
        s.transitions as f64 / s.transversions as f64
    } else {
        0.0
    };

    Ok(s)
}