Skip to main content

rsomics_vcf_stats/
lib.rs

1#![allow(clippy::cast_precision_loss)]
2
3use std::fs::File;
4use std::io::{BufRead, BufReader};
5use std::path::Path;
6
7use rsomics_common::{Result, RsomicsError};
8use serde::Serialize;
9
10#[derive(Debug, Default, Serialize)]
11pub struct VcfStats {
12    pub total: u64,
13    pub snps: u64,
14    pub insertions: u64,
15    pub deletions: u64,
16    pub mnps: u64,
17    pub transitions: u64,
18    pub transversions: u64,
19    pub ti_tv: f64,
20}
21
22fn is_transition(r: u8, a: u8) -> bool {
23    matches!(
24        (r, a),
25        (b'A', b'G') | (b'G', b'A') | (b'C', b'T') | (b'T', b'C')
26    )
27}
28
29pub fn stats(input: &Path) -> Result<VcfStats> {
30    let file = File::open(input)
31        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", input.display())))?;
32    let reader = BufReader::new(file);
33    let mut s = VcfStats::default();
34
35    for line in reader.lines() {
36        let line = line.map_err(RsomicsError::Io)?;
37        if line.starts_with('#') {
38            continue;
39        }
40        let fields: Vec<&str> = line.splitn(6, '\t').collect();
41        if fields.len() < 5 {
42            continue;
43        }
44        let ref_allele = fields[3];
45        let alt_field = fields[4];
46
47        for alt in alt_field.split(',') {
48            if alt == "." || alt == "*" {
49                continue;
50            }
51            s.total += 1;
52            let rlen = ref_allele.len();
53            let alen = alt.len();
54
55            if rlen == 1 && alen == 1 {
56                s.snps += 1;
57                let rb = ref_allele.as_bytes()[0].to_ascii_uppercase();
58                let ab = alt.as_bytes()[0].to_ascii_uppercase();
59                if is_transition(rb, ab) {
60                    s.transitions += 1;
61                } else {
62                    s.transversions += 1;
63                }
64            } else if rlen > alen {
65                s.deletions += 1;
66            } else if alen > rlen {
67                s.insertions += 1;
68            } else {
69                s.mnps += 1;
70            }
71        }
72    }
73
74    s.ti_tv = if s.transversions > 0 {
75        s.transitions as f64 / s.transversions as f64
76    } else {
77        0.0
78    };
79
80    Ok(s)
81}