atglib/gtf/
transcript.rs

1use core::convert::TryFrom;
2use std::fmt;
3
4use crate::gtf::{GtfFeature, GtfRecord};
5use crate::models::{CdsStat, Exon, Strand, Transcript, TranscriptBuilder};
6use crate::utils::errors::ParseGtfError;
7
8/// Groups all [`GtfRecord`] object that belong to one Transcript
9///
10/// Most transcripts are composed of several [`GtfRecord`]s.
11/// [`GtfRecordsGroup`] handles the grouping and allows
12/// conversions into [`Transcript`](Transcript).
13pub struct GtfRecordsGroup {
14    transcript: String,
15    exons: Vec<GtfRecord>,
16    sorted: bool,
17}
18
19impl GtfRecordsGroup {
20    pub fn new(transcript_id: &str) -> Self {
21        Self {
22            transcript: transcript_id.to_string(),
23            exons: vec![],
24            sorted: false,
25        }
26    }
27
28    pub fn add_exon(&mut self, exon: GtfRecord) {
29        self.exons.push(exon)
30    }
31
32    pub fn transcript(&self) -> &str {
33        &self.transcript
34    }
35
36    pub fn gene(&self) -> &str {
37        self.exons[0].gene()
38    }
39
40    pub fn chrom(&self) -> &str {
41        self.exons[0].chrom()
42    }
43
44    pub fn strand(&self) -> &Strand {
45        self.exons[0].strand()
46    }
47
48    pub fn score(&self) -> &Option<f32> {
49        self.exons[0].score()
50    }
51
52    fn prepare(&mut self) {
53        self.exons.sort_unstable_by_key(|x| x.start());
54        self.exons.reverse();
55        self.sorted = true;
56    }
57
58    fn next_exon(&mut self) -> Option<Exon> {
59        if self.exons.is_empty() {
60            return None;
61        }
62        let mut exon = Exon::from(self.exons.pop().unwrap()); // cannot fail, we test for emptyness of the exon vec
63
64        loop {
65            let line = self.exons.pop();
66            if let Some(x) = line {
67                // TODO: Change this to only merge book-ended
68                // features if the two features are CDS + StopCodon.
69                // All other features should be kept separate, even if they
70                // are right next to each other.
71                // Genetically speaking, they would be considered as one exon
72                // and not two separate exons. But to keep the input data intact,
73                // this should be changed.
74                if x.start() <= (exon.end() + 1) {
75                    exon = x.add_to_exon(exon);
76                } else {
77                    self.exons.push(x);
78                    break;
79                }
80            } else {
81                break;
82            }
83        }
84        Some(exon)
85    }
86
87    /// Returns all exons of the transcript as `Vector`
88    ///
89    /// All rows of the GFT file are grouped by genomic location
90    pub fn exons(&mut self) -> Vec<Exon> {
91        if !self.sorted {
92            self.prepare();
93        }
94        let mut exons: Vec<Exon> = vec![];
95        while let Some(x) = self.next_exon() {
96            exons.push(x)
97        }
98
99        exons
100    }
101
102    fn cds_start_stat(&self) -> CdsStat {
103        self.cds_stat(GtfFeature::StartCodon)
104    }
105
106    fn cds_end_stat(&self) -> CdsStat {
107        self.cds_stat(GtfFeature::StopCodon)
108    }
109
110    fn cds_stat(&self, start_stop: GtfFeature) -> CdsStat {
111        let mut cds_present = false;
112        for exon in &self.exons {
113            if exon.feature() == &start_stop {
114                return CdsStat::Complete;
115            }
116            if exon.feature() == &GtfFeature::CDS {
117                cds_present = true;
118            }
119        }
120        if cds_present {
121            CdsStat::Incomplete
122        } else {
123            CdsStat::Unknown
124        }
125    }
126}
127
128impl Default for GtfRecordsGroup {
129    fn default() -> Self {
130        Self::new("Unknown")
131    }
132}
133
134impl fmt::Display for GtfRecordsGroup {
135    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
136        write!(
137            f,
138            "GTFT {} with {} exons",
139            self.transcript,
140            self.exons.len()
141        )
142    }
143}
144
145impl TryFrom<GtfRecordsGroup> for Transcript {
146    type Error = ParseGtfError;
147    /// Returns a `Transcript` based on features of the GTF file,
148    /// belonging to one transcript
149    fn try_from(mut gtf_transcript: GtfRecordsGroup) -> Result<Self, ParseGtfError> {
150        if gtf_transcript.exons.is_empty() {
151            return Err(ParseGtfError {
152                message: format!("No exons in {}", gtf_transcript),
153            });
154        }
155        let transcript = TranscriptBuilder::new()
156            .name(gtf_transcript.transcript())
157            .gene(gtf_transcript.gene())
158            .chrom(gtf_transcript.chrom())
159            .strand(*gtf_transcript.strand())
160            .cds_start_codon_stat(gtf_transcript.cds_start_stat())?
161            .cds_stop_codon_stat(gtf_transcript.cds_end_stat())?
162            .score(*gtf_transcript.score())
163            .build();
164        match transcript {
165            Ok(mut x) => {
166                x.append_exons(&mut gtf_transcript.exons());
167                Ok(x)
168            }
169            _ => Err(ParseGtfError {
170                message: "Unable to build Transcript".to_string(),
171            }),
172        }
173    }
174}