tsg_core/io/
vcf.rs

1use crate::graph::TSGraph;
2use anyhow::Result;
3use std::io::Write;
4
5static VCF_HEADER: &[&str] = &[
6    "##fileformat=VCFv4.3",
7    "##source=tsg",
8    "##INFO=<ID=CANONICAL,Number=0,Type=Flag,Description=\"Canonical splice site\">",
9    "##INFO=<ID=NONCANONICAL,Number=0,Type=Flag,Description=\"Noncanonical splice site\">",
10    "##INFO=<ID=BOUNDARY,Number=1,Type=String,Description=\"The coding exon boundary type of event, BOTH, LEFT, RIGHT, NEITHER.\">",
11    "##INFO=<ID=DP1,Number=1,Type=Integer,Description=\"Total read depth at the breakpoint1\">",
12    "##INFO=<ID=DP2,Number=1,Type=Integer,Description=\"Total read depth at the breakpoint2\">",
13    "##INFO=<ID=SR,Number=1,Type=Integer,Description=\"The number of support reads for the breakpoints\">",
14    "##INFO=<ID=OSR,Number=1,Type=Integer,Description=\"The number of support reads for the breakpoints before rescuer\">",
15    "##INFO=<ID=PSI,Number=1,Type=Float,Description=\"Estimated Percent splice-in in the range (0,1], representing the percentage of NLS transcripts\">",
16    "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">",
17    "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"The type of event, DEL, TDUP, IDUP, INV, TRA.\">",
18    "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">",
19    "##INFO=<ID=CHR2,Number=1,Type=String,Description=\"Chromosome for END coordinate in case of a translocation\">",
20    "##INFO=<ID=SVEND,Number=1,Type=Integer,Description=\"2nd position of the structural variant\">",
21    "##INFO=<ID=END,Number=1,Type=Integer,Description=\"A placeholder for END coordinate in case of a translocation\">",
22    "##INFO=<ID=STRAND1,Number=1,Type=String,Description=\"Strand for breakpoint1\">",
23    "##INFO=<ID=STRAND2,Number=1,Type=String,Description=\"Strand for breakpoint2\">",
24    "##INFO=<ID=MODE1,Number=1,Type=String,Description=\"Mode for softclipped reads at breakpoint1\">",
25    "##INFO=<ID=MODE2,Number=1,Type=String,Description=\"Mode for softclipped reads at breakpoint2\">",
26    "##INFO=<ID=GENE1,Number=1,Type=String,Description=\"Overlapped coding gene for breakpoint1\">",
27    "##INFO=<ID=GENE2,Number=1,Type=String,Description=\"Overlapped coding gene for breakpoint2\">",
28    "##INFO=<ID=MEGAEXON1,Number=.,Type=String,Description=\"ID for source mega exon\">",
29    "##INFO=<ID=MEGAEXON2,Number=.,Type=String,Description=\"ID for target mega exon\">",
30    "##INFO=<ID=HOMSEQ,Number=1,Type=String,Description=\"MicroHomology sequence\">",
31    "##INFO=<ID=INSSEQ,Number=1,Type=String,Description=\"MicroInsertion sequence\">",
32    "##INFO=<ID=TRANSCRIPT_ID,Number=.,Type=String,Description=\"Transcript ID\">",
33    "##INFO=<ID=GENE_ID,Number=1,Type=String,Description=\"Gene ID\">",
34    "##INFO=<ID=SR_ID,Number=.,Type=String,Description=\"Support read ID\">",
35    "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
36    "##ALT=<ID=DEL,Description=\"Deletion\">",
37    "##ALT=<ID=TDUP,Description=\"Tandem duplication\">",
38    "##ALT=<ID=IDUP,Description=\"Inverted duplication\">",
39    "##ALT=<ID=INV,Description=\"Inversion\">",
40    "##ALT=<ID=TRA,Description=\"Translocation\">",
41    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT",
42];
43
44pub fn to_vcf<W: Write>(tsg_graph: &TSGraph, writer: &mut W) -> Result<()> {
45    let paths = tsg_graph.traverse_all_graphs()?;
46
47    let header = VCF_HEADER;
48    for line in header {
49        writeln!(writer, "{}", line)?;
50    }
51
52    for path in paths {
53        let seq = path.to_vcf()?;
54        writeln!(writer, "{}", seq)?;
55    }
56    Ok(())
57}
58
59#[cfg(test)]
60mod tests {
61    use super::*;
62
63    #[test]
64    fn test_to_vcf() {
65        let tsg_graph = TSGraph::from_file("tests/data/test.tsg").unwrap();
66        let output = "tests/data/test.vcf";
67        let file = std::fs::File::create(output).unwrap();
68        let mut writer = std::io::BufWriter::new(file);
69        to_vcf(&tsg_graph, &mut writer).unwrap();
70    }
71}