bed2gff/
lines.rs

1use crate::bed::BedRecord;
2use crate::codon::*;
3
4use std::cmp::{max, min};
5
6pub fn build_gff_line(
7    record: &BedRecord,
8    gene: &String,
9    gene_type: &str,
10    exon_start: u32,
11    exon_end: u32,
12    frame: u32,
13    exon: i16,
14    result: &mut Vec<(String, String, u32, u32, String, String, String)>,
15) {
16    assert!(record.tx_start < record.tx_end);
17
18    let phase = match frame {
19        0 => "0",
20        1 => "2",
21        2 => "1",
22        _ => ".",
23    };
24
25    let mut attr = String::new();
26
27    if gene_type == "transcript" {
28        attr.push_str(&format!(
29            "ID={};Parent={};gene_id={};transcript_id={}",
30            record.name, gene, gene, record.name
31        ));
32    } else {
33        if exon >= 0 {
34            let (exon_id, nexon) = if record.strand == "+" {
35                let exon_id = exon + 1;
36                (exon_id as u16, exon + 1)
37            } else {
38                let exon_id = record.exon_count - exon as u16;
39                (exon_id, exon_id as i16)
40            };
41
42            attr.push_str(&format!(
43                "ID={}:{}.{};Parent={};gene_id={};transcript_id={};exon_number={}",
44                gene_type, record.name, exon_id, record.name, gene, record.name, nexon
45            ));
46        } else {
47            let prefix = match gene_type {
48                "five_prime_utr" => "5UTR",
49                "three_prime_utr" => "3UTR",
50                _ => panic!("Invalid gene type"),
51            };
52
53            attr.push_str(&format!(
54                "ID={}:{};Parent={};gene_id={};transcript_id={}",
55                prefix, record.name, record.name, gene, record.name
56            ));
57        }
58    }
59
60    result.push((
61        record.chrom.clone(),
62        gene_type.to_string(),
63        exon_start + 1,
64        exon_end,
65        record.strand.clone(),
66        phase.to_string(),
67        attr,
68    ));
69}
70
71pub fn write_features(
72    i: usize,
73    record: &BedRecord,
74    gene: &String,
75    // first_utr_end: u32,
76    cds_start: u32,
77    cds_end: u32,
78    // last_utr_start: u32,
79    frame: u32,
80    result: &mut Vec<(String, String, u32, u32, String, String, String)>,
81) {
82    let exon_start = record.exon_start[i];
83    let exon_end = record.exon_end[i];
84
85    // if exon_start < first_utr_end {
86    //     let end = min(exon_end, first_utr_end);
87    //     let utr_type = if record.strand == "+" {
88    //         "five_prime_utr"
89    //     } else {
90    //         "three_prime_utr"
91    //     };
92    //     build_gff_line(record, gene, utr_type, exon_start, end, frame, -1, result);
93    // }
94
95    if record.cds_start < exon_end && exon_start < record.cds_end {
96        let start = max(exon_start, cds_start);
97        let end = min(exon_end, cds_end);
98
99        if start < end {
100            build_gff_line(record, gene, "CDS", start, end, frame, i as i16, result);
101        }
102    }
103
104    // if exon_end > last_utr_start {
105    //     let start = max(exon_start, last_utr_start);
106    //     let utr_type = if record.strand == "+" {
107    //         "three_prime_utr"
108    //     } else {
109    //         "five_prime_utr"
110    //     };
111    //     build_gff_line(record, gene, utr_type, start, exon_end, frame, -1, result);
112    // }
113}
114
115pub fn write_codon(
116    record: &BedRecord,
117    gene: &String,
118    gene_type: &str,
119    codon: Codon,
120    result: &mut Vec<(String, String, u32, u32, String, String, String)>,
121) {
122    build_gff_line(
123        record,
124        gene,
125        gene_type,
126        codon.start,
127        codon.end,
128        0,
129        codon.index as i16,
130        result,
131    );
132
133    if codon.start2 < codon.end2 {
134        build_gff_line(
135            record,
136            gene,
137            gene_type,
138            codon.start,
139            codon.end,
140            codon.start2,
141            (codon.end - codon.start) as i16,
142            result,
143        );
144    }
145}