use std::cmp::PartialEq;
use std::fmt;
use serde::{Deserialize, Serialize};
use crate::models::codon::Codon;
use crate::models::utils::{CdsStat, Strand};
use crate::models::{Exon, Frame};
use crate::utils::errors::BuildTranscriptError;
pub type CoordinateVector<'a> = Vec<Coordinate<'a>>;
pub type Coordinate<'a> = (&'a str, u32, u32);
#[derive(Debug, Serialize, Deserialize)]
pub struct Transcript {
bin: Option<u16>,
name: String,
chrom: String,
strand: Strand,
cds_start_stat: CdsStat,
cds_end_stat: CdsStat,
exons: Vec<Exon>,
gene_symbol: String,
score: Option<f32>,
}
impl Transcript {
pub fn bin(&self) -> &Option<u16> {
&self.bin
}
pub fn name(&self) -> &str {
&self.name
}
pub fn gene(&self) -> &str {
&self.gene_symbol
}
pub fn chrom(&self) -> &str {
&self.chrom
}
pub fn strand(&self) -> Strand {
self.strand
}
pub fn strand_mut(&mut self) -> &mut Strand {
&mut self.strand
}
pub fn cds_start_stat(&self) -> CdsStat {
self.cds_start_stat
}
pub fn cds_end_stat(&self) -> CdsStat {
self.cds_end_stat
}
pub fn cds_start_codon_stat(&self) -> CdsStat {
match self.strand {
Strand::Minus => self.cds_end_stat(),
_ => self.cds_start_stat(),
}
}
pub fn cds_stop_codon_stat(&self) -> CdsStat {
match self.strand {
Strand::Minus => self.cds_start_stat(),
_ => self.cds_end_stat(),
}
}
pub fn exons(&self) -> &Vec<Exon> {
&self.exons
}
pub fn exons_mut(&mut self) -> &mut Vec<Exon> {
&mut self.exons
}
pub fn score(&self) -> Option<f32> {
self.score
}
pub fn push_exon(&mut self, exon: Exon) {
self.exons.push(exon)
}
pub fn append_exons(&mut self, exons: &mut Vec<Exon>) {
self.exons.append(exons)
}
pub fn set_cds_start_stat(&mut self, stat: CdsStat) {
self.cds_start_stat = stat;
}
pub fn set_cds_end_stat(&mut self, stat: CdsStat) {
self.cds_end_stat = stat;
}
pub fn forward(&self) -> bool {
match self.strand {
Strand::Plus => true,
Strand::Unknown => true,
Strand::Minus => false,
}
}
pub fn exon_count(&self) -> usize {
self.exons.len()
}
pub fn tx_start(&self) -> u32 {
self.exons[0].start()
}
pub fn tx_end(&self) -> u32 {
self.exons[self.exons.len() - 1].end()
}
pub fn cds_start(&self) -> Option<u32> {
for exon in &self.exons {
if let Some(x) = exon.cds_start() {
return Some(*x);
};
}
None
}
pub fn cds_end(&self) -> Option<u32> {
for exon in self.exons.iter().rev() {
if let Some(x) = exon.cds_end() {
return Some(*x);
};
}
None
}
pub fn is_coding(&self) -> bool {
for exon in &self.exons {
if exon.is_coding() {
return true;
}
}
false
}
pub fn start_codon(&self) -> Vec<(u32, u32, Frame)> {
if !self.is_coding() {
return vec![];
}
let codon = match self.strand {
Strand::Minus => Codon::upstream(self, &self.cds_end().unwrap()),
Strand::Plus => Codon::downstream(self, &self.cds_start().unwrap()),
_ => return vec![],
};
if let Ok(res) = codon {
res.to_tuple()
} else {
vec![]
}
}
pub fn stop_codon(&self) -> Vec<(u32, u32, Frame)> {
if !self.is_coding() {
return vec![];
}
let codon = match self.strand {
Strand::Minus => Codon::downstream(self, &self.cds_start().unwrap()), Strand::Plus => Codon::upstream(self, &self.cds_end().unwrap()), _ => return vec![],
};
if let Ok(res) = codon {
res.to_tuple()
} else {
vec![]
}
}
pub fn exon_coordinates(&self) -> CoordinateVector {
let mut coords: CoordinateVector = vec![];
for exon in self.exons() {
coords.push((self.chrom(), exon.start(), exon.end()))
}
coords
}
pub fn cds_coordinates(&self) -> CoordinateVector {
let mut coords: CoordinateVector = vec![];
for exon in self.exons() {
if exon.is_coding() {
coords.push((
self.chrom(),
exon.cds_start().unwrap(),
exon.cds_end().unwrap(),
))
}
}
coords
}
pub fn utr_coordinates(&self) -> CoordinateVector {
let mut coords: CoordinateVector = vec![];
for exon in self.exons() {
if exon.is_coding() {
let cds_start = exon.cds_start().unwrap(); let cds_end = exon.cds_end().unwrap(); if cds_start > exon.start() {
coords.push((self.chrom(), exon.start(), cds_start - 1))
}
if cds_end < exon.end() {
coords.push((self.chrom(), cds_end + 1, exon.end()))
}
} else {
coords.push((self.chrom(), exon.start(), exon.end()))
}
}
coords
}
pub fn utr5_coordinates(&self) -> CoordinateVector {
if !self.is_coding() {
vec![]
} else {
let mut utr = self.utr_coordinates();
if self.forward() {
let start = self.cds_start().unwrap(); utr.retain(|coord| coord.2 < start);
} else {
let end = self.cds_end().unwrap(); utr.retain(|coord| coord.1 > end);
}
utr
}
}
pub fn utr3_coordinates(&self) -> CoordinateVector {
if !self.is_coding() {
vec![]
} else {
let mut utr = self.utr_coordinates();
if self.forward() {
let end = self.cds_end().unwrap(); utr.retain(|coord| coord.1 > end);
} else {
let start = self.cds_start().unwrap(); utr.retain(|coord| coord.2 < start);
}
utr
}
}
}
impl fmt::Display for Transcript {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"[{}] {} ({}:{}-{})",
self.gene_symbol,
self.name,
self.chrom,
self.tx_start(),
self.tx_end()
)
}
}
impl PartialEq for Transcript {
fn eq(&self, other: &Self) -> bool {
if self.name() != other.name() {
return false;
};
if self.chrom() != other.chrom() {
return false;
};
if self.strand() != other.strand() {
return false;
};
if self.gene() != other.gene() {
return false;
};
if self.cds_start_stat() != other.cds_start_stat() {
return false;
};
if self.cds_end_stat() != other.cds_end_stat() {
return false;
};
if self.exon_count() != other.exon_count() {
return false;
};
for (exon_a, exon_b) in self.exons().iter().zip(other.exons()) {
if exon_a != exon_b {
return false;
}
}
true
}
}
pub struct TranscriptBuilder<'a> {
bin: Option<u16>,
name: Option<&'a str>,
chrom: Option<&'a str>,
strand: Strand,
cds_start_stat: CdsStat,
cds_end_stat: CdsStat,
gene_symbol: Option<&'a str>,
score: Option<f32>,
}
impl<'a> Default for TranscriptBuilder<'a> {
fn default() -> Self {
Self::new()
}
}
impl<'a> TranscriptBuilder<'a> {
pub fn new() -> Self {
Self {
bin: None,
name: None,
chrom: None,
strand: Strand::Unknown,
cds_start_stat: CdsStat::None,
cds_end_stat: CdsStat::None,
gene_symbol: None,
score: None,
}
}
pub fn bin(&mut self, bin: Option<u16>) -> &mut Self {
self.bin = bin;
self
}
pub fn name(&mut self, name: &'a str) -> &mut Self {
self.name = Some(name);
self
}
pub fn chrom(&mut self, chrom: &'a str) -> &mut Self {
self.chrom = Some(chrom);
self
}
pub fn gene(&mut self, gene_symbol: &'a str) -> &mut Self {
self.gene_symbol = Some(gene_symbol);
self
}
pub fn strand(&mut self, strand: Strand) -> &mut Self {
self.strand = strand;
self
}
pub fn cds_start_stat(&mut self, cds_start_stat: CdsStat) -> &mut Self {
self.cds_start_stat = cds_start_stat;
self
}
pub fn cds_start_codon_stat(
&mut self,
stat: CdsStat,
) -> Result<&mut Self, BuildTranscriptError> {
match self.strand {
Strand::Plus => Ok(self.cds_start_stat(stat)),
Strand::Minus => Ok(self.cds_end_stat(stat)),
_ => Err(BuildTranscriptError::new(
"Cannot set CDS-Startcodon-Stat without defined strand",
)),
}
}
pub fn cds_end_stat(&mut self, cds_end_stat: CdsStat) -> &mut Self {
self.cds_end_stat = cds_end_stat;
self
}
pub fn cds_stop_codon_stat(
&mut self,
stat: CdsStat,
) -> Result<&mut Self, BuildTranscriptError> {
match self.strand {
Strand::Plus => Ok(self.cds_end_stat(stat)),
Strand::Minus => Ok(self.cds_start_stat(stat)),
_ => Err(BuildTranscriptError::new(
"Cannot set CDS-Startcodon-Stat without defined strand",
)),
}
}
pub fn score(&mut self, score: Option<f32>) -> &mut Self {
self.score = score;
self
}
pub fn build(&self) -> Result<Transcript, BuildTranscriptError> {
let t = Transcript {
bin: self.bin,
name: match self.name {
Some(x) => x.to_string(),
None => return Err(BuildTranscriptError::new("No name specified")),
},
chrom: match self.chrom {
Some(x) => x.to_string(),
None => return Err(BuildTranscriptError::new("No chromosome specified")),
},
strand: self.strand,
cds_start_stat: self.cds_start_stat,
cds_end_stat: self.cds_end_stat,
exons: vec![],
gene_symbol: match self.gene_symbol {
Some(x) => x.to_string(),
None => return Err(BuildTranscriptError::new("No gene symbol specified")),
},
score: self.score,
};
Ok(t)
}
}
#[cfg(test)]
mod tests {
use crate::models::utils::Strand;
use crate::tests::transcripts::standard_transcript;
#[test]
fn test_utr_coordinates() {
let transcript = standard_transcript();
let coords = transcript.utr_coordinates();
assert_eq!(coords.len(), 4);
let starts: Vec<u32> = coords.iter().map(|x| x.1).collect();
let ends: Vec<u32> = coords.iter().map(|x| x.2).collect();
assert_eq!(starts, vec![11, 21, 45, 51]);
assert_eq!(ends, vec![15, 23, 45, 55]);
}
#[test]
fn test_utr5_coordinates() {
let mut transcript = standard_transcript();
let coords = transcript.utr5_coordinates();
assert_eq!(coords.len(), 2);
let starts: Vec<u32> = coords.iter().map(|x| x.1).collect();
let ends: Vec<u32> = coords.iter().map(|x| x.2).collect();
assert_eq!(starts, vec![11, 21]);
assert_eq!(ends, vec![15, 23]);
let strand = transcript.strand_mut();
*strand = Strand::Minus;
let coords = transcript.utr5_coordinates();
assert_eq!(coords.len(), 2);
let starts: Vec<u32> = coords.iter().map(|x| x.1).collect();
let ends: Vec<u32> = coords.iter().map(|x| x.2).collect();
assert_eq!(starts, vec![45, 51]);
assert_eq!(ends, vec![45, 55]);
}
#[test]
fn test_utr3_coordinates() {
let mut transcript = standard_transcript();
let coords = transcript.utr3_coordinates();
assert_eq!(coords.len(), 2);
let starts: Vec<u32> = coords.iter().map(|x| x.1).collect();
let ends: Vec<u32> = coords.iter().map(|x| x.2).collect();
assert_eq!(starts, vec![45, 51]);
assert_eq!(ends, vec![45, 55]);
let strand = transcript.strand_mut();
*strand = Strand::Minus;
let coords = transcript.utr3_coordinates();
assert_eq!(coords.len(), 2);
let starts: Vec<u32> = coords.iter().map(|x| x.1).collect();
let ends: Vec<u32> = coords.iter().map(|x| x.2).collect();
assert_eq!(starts, vec![11, 21]);
assert_eq!(ends, vec![15, 23]);
}
#[test]
fn test_cds_coordinates() {
let transcript = standard_transcript();
let coords = transcript.cds_coordinates();
assert_eq!(coords.len(), 3);
let starts: Vec<u32> = coords.iter().map(|x| x.1).collect();
let ends: Vec<u32> = coords.iter().map(|x| x.2).collect();
assert_eq!(starts, vec![24, 31, 41]);
assert_eq!(ends, vec![25, 35, 44]);
}
#[test]
fn test_exon_coordinates() {
let transcript = standard_transcript();
let coords = transcript.exon_coordinates();
assert_eq!(coords.len(), 5);
let starts: Vec<u32> = coords.iter().map(|x| x.1).collect();
let ends: Vec<u32> = coords.iter().map(|x| x.2).collect();
assert_eq!(starts, vec![11, 21, 31, 41, 51]);
assert_eq!(ends, vec![15, 25, 35, 45, 55]);
}
}