tsg 0.1.1

A tool to analyze and manipulate transcript segment graph (TSG)
use std::path::Path;

use crate::graph::TSGraph;
use anyhow::Result;
use std::io::Write;

static VCF_HEADER: &[&str] = &[
    "##fileformat=VCFv4.3",
    "##source=tsg",
    "##INFO=<ID=CANONICAL,Number=0,Type=Flag,Description=\"Canonical splice site\">",
    "##INFO=<ID=NONCANONICAL,Number=0,Type=Flag,Description=\"Noncanonical splice site\">",
    "##INFO=<ID=BOUNDARY,Number=1,Type=String,Description=\"The coding exon boundary type of event, BOTH, LEFT, RIGHT, NEITHER.\">",
    "##INFO=<ID=DP1,Number=1,Type=Integer,Description=\"Total read depth at the breakpoint1\">",
    "##INFO=<ID=DP2,Number=1,Type=Integer,Description=\"Total read depth at the breakpoint2\">",
    "##INFO=<ID=SR,Number=1,Type=Integer,Description=\"The number of support reads for the breakpoints\">",
    "##INFO=<ID=OSR,Number=1,Type=Integer,Description=\"The number of support reads for the breakpoints before rescuer\">",
    "##INFO=<ID=PSI,Number=1,Type=Float,Description=\"Estimated Percent splice-in in the range (0,1], representing the percentage of NLS transcripts\">",
    "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect SV\">",
    "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"The type of event, DEL, TDUP, IDUP, INV, TRA.\">",
    "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">",
    "##INFO=<ID=CHR2,Number=1,Type=String,Description=\"Chromosome for END coordinate in case of a translocation\">",
    "##INFO=<ID=SVEND,Number=1,Type=Integer,Description=\"2nd position of the structural variant\">",
    "##INFO=<ID=END,Number=1,Type=Integer,Description=\"A placeholder for END coordinate in case of a translocation\">",
    "##INFO=<ID=STRAND1,Number=1,Type=String,Description=\"Strand for breakpoint1\">",
    "##INFO=<ID=STRAND2,Number=1,Type=String,Description=\"Strand for breakpoint2\">",
    "##INFO=<ID=MODE1,Number=1,Type=String,Description=\"Mode for softclipped reads at breakpoint1\">",
    "##INFO=<ID=MODE2,Number=1,Type=String,Description=\"Mode for softclipped reads at breakpoint2\">",
    "##INFO=<ID=GENE1,Number=1,Type=String,Description=\"Overlapped coding gene for breakpoint1\">",
    "##INFO=<ID=GENE2,Number=1,Type=String,Description=\"Overlapped coding gene for breakpoint2\">",
    "##INFO=<ID=MEGAEXON1,Number=.,Type=String,Description=\"ID for source mega exon\">",
    "##INFO=<ID=MEGAEXON2,Number=.,Type=String,Description=\"ID for target mega exon\">",
    "##INFO=<ID=HOMSEQ,Number=1,Type=String,Description=\"MicroHomology sequence\">",
    "##INFO=<ID=INSSEQ,Number=1,Type=String,Description=\"MicroInsertion sequence\">",
    "##INFO=<ID=TRANSCRIPT_ID,Number=.,Type=String,Description=\"Transcript ID\">",
    "##INFO=<ID=GENE_ID,Number=1,Type=String,Description=\"Gene ID\">",
    "##INFO=<ID=SR_ID,Number=.,Type=String,Description=\"Support read ID\">",
    "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">",
    "##ALT=<ID=DEL,Description=\"Deletion\">",
    "##ALT=<ID=TDUP,Description=\"Tandem duplication\">",
    "##ALT=<ID=IDUP,Description=\"Inverted duplication\">",
    "##ALT=<ID=INV,Description=\"Inversion\">",
    "##ALT=<ID=TRA,Description=\"Translocation\">",
    "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT",
];

pub fn to_vcf<P: AsRef<Path>>(tsg_graph: &TSGraph, output: P) -> Result<()> {
    let paths = tsg_graph.traverse()?;
    let output_file = std::fs::File::create(output)?;
    let mut writer = std::io::BufWriter::new(output_file);

    let header = VCF_HEADER;

    for line in header {
        writeln!(writer, "{}", line)?;
    }

    for path in paths {
        let seq = path.to_vcf()?;
        writeln!(writer, "{}", seq)?;
    }
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_to_vcf() {
        let tsg_graph = TSGraph::from_file("tests/data/test.tsg").unwrap();
        let output = "tests/data/test.vcf";
        to_vcf(&tsg_graph, output).unwrap();
    }
}