use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum Strand {
Plus,
Minus,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[non_exhaustive]
pub enum TranscriptTier {
ManeSelect,
ManePlusClinical,
RefSeqSelect,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct Exon {
pub exon_number: u16,
pub genomic_start: u64,
pub genomic_end: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct CdsSegment {
pub exon_index: u16,
pub genomic_start: u64,
pub genomic_end: u64,
pub phase: u8,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Biotype {
ProteinCoding,
NonCodingRna,
LncRna,
AntisenseRna,
SnoRna,
SnRna,
RnaseMrpRna,
TelomeraseRna,
VaultRna,
Other(String),
Unknown,
}
impl Biotype {
pub fn as_str(&self) -> &str {
match self {
Self::ProteinCoding => "protein_coding",
Self::NonCodingRna => "non_coding_rna",
Self::LncRna => "lncRNA",
Self::AntisenseRna => "antisense_RNA",
Self::SnoRna => "snoRNA",
Self::SnRna => "snRNA",
Self::RnaseMrpRna => "RNase_MRP_RNA",
Self::TelomeraseRna => "telomerase_RNA",
Self::VaultRna => "vault_RNA",
Self::Other(s) => s.as_str(),
Self::Unknown => "unknown",
}
}
pub fn from_label(label: &str) -> Self {
match label {
"protein_coding" => Self::ProteinCoding,
"non_coding_rna" => Self::NonCodingRna,
"lncRNA" => Self::LncRna,
"antisense_RNA" => Self::AntisenseRna,
"snoRNA" => Self::SnoRna,
"snRNA" => Self::SnRna,
"RNase_MRP_RNA" => Self::RnaseMrpRna,
"telomerase_RNA" => Self::TelomeraseRna,
"vault_RNA" => Self::VaultRna,
"unknown" | "" => Self::Unknown,
other => Self::Other(other.to_string()),
}
}
pub fn is_protein_coding(&self) -> bool {
matches!(self, Self::ProteinCoding)
}
}
impl Serialize for Biotype {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
serializer.serialize_str(self.as_str())
}
}
impl<'de> Deserialize<'de> for Biotype {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let raw = String::deserialize(deserializer)?;
Ok(Self::from_label(&raw))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TranscriptModel {
pub accession: String,
pub protein_accession: Option<String>,
pub gene_symbol: String,
pub hgnc_id: Option<String>,
pub ensembl_accession: Option<String>,
pub chrom: String,
pub strand: Strand,
pub tx_start: u64,
pub tx_end: u64,
pub cds_genomic_start: Option<u64>,
pub cds_genomic_end: Option<u64>,
pub exons: Vec<Exon>,
pub cds_segments: Vec<CdsSegment>,
pub tier: TranscriptTier,
pub biotype: Biotype,
pub exon_count: u16,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn biotype_roundtrips_known_variants() {
for variant in [
Biotype::ProteinCoding,
Biotype::NonCodingRna,
Biotype::LncRna,
Biotype::AntisenseRna,
Biotype::SnoRna,
Biotype::SnRna,
Biotype::RnaseMrpRna,
Biotype::TelomeraseRna,
Biotype::VaultRna,
Biotype::Unknown,
] {
let encoded = rmp_serde::to_vec_named(&variant).unwrap();
let decoded: Biotype = rmp_serde::from_slice(&encoded).unwrap();
assert_eq!(decoded, variant);
}
}
#[test]
fn biotype_preserves_unknown_upstream_label() {
let custom = Biotype::from_label("misc_RNA");
assert!(matches!(&custom, Biotype::Other(s) if s == "misc_RNA"));
let encoded = rmp_serde::to_vec_named(&custom).unwrap();
let decoded: Biotype = rmp_serde::from_slice(&encoded).unwrap();
assert_eq!(decoded, custom);
assert_eq!(decoded.as_str(), "misc_RNA");
}
#[test]
fn biotype_from_label_normalizes_empty_to_unknown() {
assert_eq!(Biotype::from_label(""), Biotype::Unknown);
assert_eq!(Biotype::from_label("unknown"), Biotype::Unknown);
}
}