use crate::models::Frame;
use serde::{Deserialize, Serialize};
use std::fmt;
/// Represents a genomic exon.
///
/// Exons can be coding and non-coding.
/// Coding exons have CDS start and end position and a [frame-offset](crate::models::Frame).
///
/// ```rust
/// use atglib::models::{Exon, Frame};
///
/// let start = 1;
/// let end = 10;
/// let non_coding_exon = Exon::new(start, end, None, None, Frame::None);
///
/// assert_eq!(non_coding_exon.is_coding(), false);
///
/// let coding_exon = Exon::new(start, end, Some(start), Some(end), Frame::Zero);
///
/// assert_eq!(coding_exon.is_coding(), true);
/// ```
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct Exon {
// u32 max value is 4,294,967,295 => This is sufficient for every human chromosome.
// If you are working with species with chromsomes with more than 4 Mb per chromosome
// this library will not work
start: u32,
end: u32,
cds_start: Option<u32>,
cds_end: Option<u32>,
frame_offset: Frame,
}
impl Exon {
/// create a new Exon
///
/// ```rust
/// use atglib::models::{Exon, Frame};
///
/// let start = 1;
/// let end = 10;
/// let non_coding_exon = Exon::new(start, end, None, None, Frame::None);
///
/// assert_eq!(non_coding_exon.is_coding(), false);
///
/// let coding_exon = Exon::new(start, end, Some(start), Some(end), Frame::Zero);
///
/// assert_eq!(coding_exon.is_coding(), true);
/// ```
pub fn new(
start: u32,
end: u32,
cds_start: Option<u32>,
cds_end: Option<u32>,
frame_offset: Frame,
) -> Exon {
Exon {
start,
end,
cds_start,
cds_end,
frame_offset,
}
}
/// Genomic start (leftmost) position of the exon
pub fn start(&self) -> u32 {
self.start
}
/// modify the [`start`](Exon::start)
pub fn start_mut(&mut self) -> &mut u32 {
&mut self.start
}
/// Genomic end (rightmost) position of the exon
pub fn end(&self) -> u32 {
self.end
}
/// modify the [`end`](Exon::end)
pub fn end_mut(&mut self) -> &mut u32 {
&mut self.end
}
/// If the exon is coding, it contains the leftmost genomic
/// coding nucleotide position
pub fn cds_start(&self) -> &Option<u32> {
&self.cds_start
}
/// modify the [`cds_start`](Exon::cds_start)
pub fn cds_start_mut(&mut self) -> &mut Option<u32> {
&mut self.cds_start
}
/// If the exon is coding, it contains the rightmost genomic
/// coding nucleotide position
pub fn cds_end(&self) -> &Option<u32> {
&self.cds_end
}
/// modify the [`cds_end`](Exon::cds_end)
pub fn cds_end_mut(&mut self) -> &mut Option<u32> {
&mut self.cds_end
}
/// If the exon is coding, the [Frame](crate::models::Frame)
/// specifies the offset of the reading frame
pub fn frame_offset(&self) -> &Frame {
&self.frame_offset
}
/// modify the [frame offset](Exon::frame_offset)
pub fn frame_offset_mut(&mut self) -> &mut Frame {
&mut self.frame_offset
}
/// Returns true if the exon contains a coding sequence (CDS)
///
/// # Examples
///
/// ```rust
/// use atglib::models::{Exon, Frame};
///
/// let start = 1;
/// let end = 2;
/// let mut a = Exon::new(start, end, None, None, Frame::None);
/// assert_eq!(a.is_coding(), false);
/// *a.cds_start_mut() = Some(1);
/// *a.cds_end_mut() = Some(2);
/// assert_eq!(a.is_coding(), true);
/// ```
pub fn is_coding(&self) -> bool {
self.cds_start.is_some()
}
/// Returns the number of bp of the exon
pub fn len(&self) -> u32 {
// counting the first base as part of the exon
self.end - self.start + 1
}
/// Only implemented to satisfy clippy... Exons cannot be empty.
pub fn is_empty(&self) -> bool {
false
}
/// Returns the number of bp of the exon's coding sequence
/// Non-coding exons have 0 bp coding sequence
pub fn coding_len(&self) -> u32 {
if !self.is_coding() {
return 0;
}
// counting the first base as part of the ORF
// using unwrap here is safe, because the exon is coding
self.cds_end.unwrap() - self.cds_start.unwrap() + 1 // cannot fail, exon is coding
}
/// Returns the coding frame of the next coding exon
pub fn downstream_frame(&self) -> Option<Frame> {
if !self.is_coding() {
return None;
}
let frame = (3 - (self.coding_len() % 3)) % 3;
// using unwrap here is safe, because we ensure to only have 0,1,2 frame
match self.frame_offset + Frame::from_int(frame).unwrap() {
// cannot fail, frame is %3
Ok(x) => Some(x),
Err(_) => None,
}
}
pub fn set_frame(&mut self, frame: Frame) {
self.frame_offset = frame;
}
}
impl fmt::Display for Exon {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Exon ({}-{}) [{}-{}] ^{}^",
self.start,
self.end,
self.cds_start.unwrap_or(0),
self.cds_end.unwrap_or(0),
self.frame_offset
)
}
}