1use crate::graph::TSGraph;
2use anyhow::Result;
3use std::io::Write;
4
5static VCF_HEADER: &[&str] = &[
6 "##fileformat=VCFv4.3",
7 "##source=tsg",
8 "##INFO=<ID=CANONICAL,Number=0,Type=Flag,Description=\"Canonical splice site\">",
9 "##INFO=<ID=NONCANONICAL,Number=0,Type=Flag,Description=\"Noncanonical splice site\">",
10 "##INFO=<ID=BOUNDARY,Number=1,Type=String,Description=\"The coding exon boundary type of event, BOTH, LEFT, RIGHT, NEITHER.\">",
11 "##INFO=<ID=DP1,Number=1,Type=Integer,Description=\"Total read depth at the breakpoint1\">",
12 "##INFO=<ID=DP2,Number=1,Type=Integer,Description=\"Total read depth at the breakpoint2\">",
13 "##INFO=<ID=SR,Number=1,Type=Integer,Description=\"The number of support reads for the breakpoints\">",
14 "##INFO=<ID=OSR,Number=1,Type=Integer,Description=\"The number of support reads for the breakpoints before rescuer\">",
15 "##INFO=<ID=PSI,Number=1,Type=Float,Description=\"Estimated Percent splice-in in the range (0,1], representing the percentage of NLS transcripts\">",
16 "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">",
17 "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"The type of event, DEL, TDUP, IDUP, INV, TRA.\">",
18 "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">",
19 "##INFO=<ID=CHR2,Number=1,Type=String,Description=\"Chromosome for END coordinate in case of a translocation\">",
20 "##INFO=<ID=SVEND,Number=1,Type=Integer,Description=\"2nd position of the structural variant\">",
21 "##INFO=<ID=END,Number=1,Type=Integer,Description=\"A placeholder for END coordinate in case of a translocation\">",
22 "##INFO=<ID=STRAND1,Number=1,Type=String,Description=\"Strand for breakpoint1\">",
23 "##INFO=<ID=STRAND2,Number=1,Type=String,Description=\"Strand for breakpoint2\">",
24 "##INFO=<ID=MODE1,Number=1,Type=String,Description=\"Mode for softclipped reads at breakpoint1\">",
25 "##INFO=<ID=MODE2,Number=1,Type=String,Description=\"Mode for softclipped reads at breakpoint2\">",
26 "##INFO=<ID=GENE1,Number=1,Type=String,Description=\"Overlapped coding gene for breakpoint1\">",
27 "##INFO=<ID=GENE2,Number=1,Type=String,Description=\"Overlapped coding gene for breakpoint2\">",
28 "##INFO=<ID=MEGAEXON1,Number=.,Type=String,Description=\"ID for source mega exon\">",
29 "##INFO=<ID=MEGAEXON2,Number=.,Type=String,Description=\"ID for target mega exon\">",
30 "##INFO=<ID=HOMSEQ,Number=1,Type=String,Description=\"MicroHomology sequence\">",
31 "##INFO=<ID=INSSEQ,Number=1,Type=String,Description=\"MicroInsertion sequence\">",
32 "##INFO=<ID=TRANSCRIPT_ID,Number=.,Type=String,Description=\"Transcript ID\">",
33 "##INFO=<ID=GENE_ID,Number=1,Type=String,Description=\"Gene ID\">",
34 "##INFO=<ID=SR_ID,Number=.,Type=String,Description=\"Support read ID\">",
35 "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
36 "##ALT=<ID=DEL,Description=\"Deletion\">",
37 "##ALT=<ID=TDUP,Description=\"Tandem duplication\">",
38 "##ALT=<ID=IDUP,Description=\"Inverted duplication\">",
39 "##ALT=<ID=INV,Description=\"Inversion\">",
40 "##ALT=<ID=TRA,Description=\"Translocation\">",
41 "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT",
42];
43
44pub fn to_vcf<W: Write>(tsg_graph: &TSGraph, writer: &mut W) -> Result<()> {
45 let paths = tsg_graph.traverse_all_graphs()?;
46
47 let header = VCF_HEADER;
48 for line in header {
49 writeln!(writer, "{}", line)?;
50 }
51
52 for path in paths {
53 let seq = path.to_vcf()?;
54 writeln!(writer, "{}", seq)?;
55 }
56 Ok(())
57}
58
59#[cfg(test)]
60mod tests {
61 use super::*;
62
63 #[test]
64 fn test_to_vcf() {
65 let tsg_graph = TSGraph::from_file("tests/data/test.tsg").unwrap();
66 let output = "tests/data/test.vcf";
67 let file = std::fs::File::create(output).unwrap();
68 let mut writer = std::io::BufWriter::new(file);
69 to_vcf(&tsg_graph, &mut writer).unwrap();
70 }
71}