1use cyanea_core::{Annotated, Summarizable};
7
8use crate::genomic::{GenomicInterval, Strand};
9
10#[derive(Debug, Clone, PartialEq, Eq)]
12#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
13pub enum GeneType {
14 ProteinCoding,
15 LncRNA,
16 MiRNA,
17 RRNA,
18 TRNA,
19 Pseudogene,
20 Other(String),
21}
22
23impl core::fmt::Display for GeneType {
24 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
25 match self {
26 GeneType::ProteinCoding => write!(f, "protein_coding"),
27 GeneType::LncRNA => write!(f, "lncRNA"),
28 GeneType::MiRNA => write!(f, "miRNA"),
29 GeneType::RRNA => write!(f, "rRNA"),
30 GeneType::TRNA => write!(f, "tRNA"),
31 GeneType::Pseudogene => write!(f, "pseudogene"),
32 GeneType::Other(s) => write!(f, "{s}"),
33 }
34 }
35}
36
37#[derive(Debug, Clone, PartialEq, Eq)]
39#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
40pub struct Exon {
41 pub exon_number: u32,
42 pub start: u64,
44 pub end: u64,
46}
47
48impl Exon {
49 pub fn len(&self) -> u64 {
51 self.end - self.start
52 }
53
54 pub fn is_empty(&self) -> bool {
56 self.start == self.end
57 }
58}
59
60#[derive(Debug, Clone, PartialEq, Eq)]
62#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
63pub struct Transcript {
64 pub transcript_id: String,
65 pub start: u64,
67 pub end: u64,
69 pub exons: Vec<Exon>,
70 pub cds_start: Option<u64>,
72 pub cds_end: Option<u64>,
74}
75
76impl Transcript {
77 pub fn len(&self) -> u64 {
79 self.end - self.start
80 }
81
82 pub fn is_empty(&self) -> bool {
84 self.start == self.end
85 }
86
87 pub fn n_exons(&self) -> usize {
89 self.exons.len()
90 }
91
92 pub fn exonic_length(&self) -> u64 {
94 self.exons.iter().map(|e| e.len()).sum()
95 }
96
97 pub fn to_genomic_interval(&self, chrom: &str, strand: Strand) -> GenomicInterval {
99 GenomicInterval {
100 chrom: chrom.into(),
101 start: self.start,
102 end: self.end,
103 strand,
104 }
105 }
106}
107
108#[derive(Debug, Clone)]
110#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
111pub struct Gene {
112 pub gene_id: String,
113 pub gene_name: String,
114 pub chrom: String,
115 pub start: u64,
117 pub end: u64,
119 pub strand: Strand,
120 pub gene_type: GeneType,
121 pub transcripts: Vec<Transcript>,
122}
123
124impl Gene {
125 pub fn len(&self) -> u64 {
127 self.end - self.start
128 }
129
130 pub fn is_empty(&self) -> bool {
132 self.start == self.end
133 }
134
135 pub fn n_transcripts(&self) -> usize {
137 self.transcripts.len()
138 }
139
140 pub fn to_genomic_interval(&self) -> GenomicInterval {
142 GenomicInterval {
143 chrom: self.chrom.clone(),
144 start: self.start,
145 end: self.end,
146 strand: self.strand,
147 }
148 }
149
150 pub fn is_protein_coding(&self) -> bool {
152 self.gene_type == GeneType::ProteinCoding
153 }
154}
155
156impl Annotated for Gene {
157 fn name(&self) -> &str {
158 &self.gene_name
159 }
160}
161
162impl Summarizable for Gene {
163 fn summary(&self) -> String {
164 format!(
165 "Gene: {} ({}:{}-{}, {}, {}, {} transcripts)",
166 self.gene_name,
167 self.chrom,
168 self.start,
169 self.end,
170 self.strand,
171 self.gene_type,
172 self.n_transcripts()
173 )
174 }
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180
181 fn sample_gene() -> Gene {
182 Gene {
183 gene_id: "ENSG00000141510".into(),
184 gene_name: "TP53".into(),
185 chrom: "chr17".into(),
186 start: 7668421,
187 end: 7687490,
188 strand: Strand::Reverse,
189 gene_type: GeneType::ProteinCoding,
190 transcripts: vec![
191 Transcript {
192 transcript_id: "ENST00000269305".into(),
193 start: 7668421,
194 end: 7687490,
195 exons: vec![
196 Exon { exon_number: 1, start: 7668421, end: 7668586 },
197 Exon { exon_number: 2, start: 7670609, end: 7670715 },
198 Exon { exon_number: 3, start: 7673534, end: 7673608 },
199 ],
200 cds_start: Some(7668421),
201 cds_end: Some(7687490),
202 },
203 Transcript {
204 transcript_id: "ENST00000413465".into(),
205 start: 7669608,
206 end: 7687490,
207 exons: vec![
208 Exon { exon_number: 1, start: 7669608, end: 7669690 },
209 ],
210 cds_start: None,
211 cds_end: None,
212 },
213 ],
214 }
215 }
216
217 #[test]
218 fn test_exon_len() {
219 let exon = Exon { exon_number: 1, start: 100, end: 300 };
220 assert_eq!(exon.len(), 200);
221 }
222
223 #[test]
224 fn test_transcript_exonic_length() {
225 let gene = sample_gene();
226 let tx = &gene.transcripts[0];
227 assert_eq!(tx.exonic_length(), 345);
230 }
231
232 #[test]
233 fn test_transcript_n_exons() {
234 let gene = sample_gene();
235 assert_eq!(gene.transcripts[0].n_exons(), 3);
236 assert_eq!(gene.transcripts[1].n_exons(), 1);
237 }
238
239 #[test]
240 fn test_transcript_to_interval() {
241 let gene = sample_gene();
242 let tx = &gene.transcripts[0];
243 let iv = tx.to_genomic_interval("chr17", Strand::Reverse);
244 assert_eq!(iv.chrom, "chr17");
245 assert_eq!(iv.start, 7668421);
246 assert_eq!(iv.end, 7687490);
247 assert_eq!(iv.strand, Strand::Reverse);
248 }
249
250 #[test]
251 fn test_gene_len() {
252 let gene = sample_gene();
253 assert_eq!(gene.len(), 7687490 - 7668421);
254 }
255
256 #[test]
257 fn test_gene_n_transcripts() {
258 let gene = sample_gene();
259 assert_eq!(gene.n_transcripts(), 2);
260 }
261
262 #[test]
263 fn test_gene_to_interval() {
264 let gene = sample_gene();
265 let iv = gene.to_genomic_interval();
266 assert_eq!(iv.chrom, "chr17");
267 assert_eq!(iv.strand, Strand::Reverse);
268 }
269
270 #[test]
271 fn test_gene_is_protein_coding() {
272 let gene = sample_gene();
273 assert!(gene.is_protein_coding());
274 }
275
276 #[test]
277 fn test_annotated() {
278 let gene = sample_gene();
279 assert_eq!(gene.name(), "TP53");
280 }
281
282 #[test]
283 fn test_summary() {
284 let gene = sample_gene();
285 assert_eq!(
286 gene.summary(),
287 "Gene: TP53 (chr17:7668421-7687490, -, protein_coding, 2 transcripts)"
288 );
289 }
290
291 #[test]
292 fn test_gene_type_display() {
293 assert_eq!(GeneType::ProteinCoding.to_string(), "protein_coding");
294 assert_eq!(GeneType::LncRNA.to_string(), "lncRNA");
295 assert_eq!(GeneType::Other("snRNA".into()).to_string(), "snRNA");
296 }
297}