1use serde::{Deserialize, Serialize};
17use std::cmp::Ordering;
18use std::sync::OnceLock;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
22pub enum GenomeBuild {
23 GRCh37,
25 #[default]
27 GRCh38,
28 Unknown,
30}
31
32impl std::fmt::Display for GenomeBuild {
33 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
34 match self {
35 GenomeBuild::GRCh37 => write!(f, "GRCh37"),
36 GenomeBuild::GRCh38 => write!(f, "GRCh38"),
37 GenomeBuild::Unknown => write!(f, "Unknown"),
38 }
39 }
40}
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
44pub enum Strand {
45 #[serde(rename = "+")]
46 #[default]
47 Plus,
48 #[serde(rename = "-")]
49 Minus,
50}
51
52impl std::fmt::Display for Strand {
53 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 match self {
55 Strand::Plus => write!(f, "+"),
56 Strand::Minus => write!(f, "-"),
57 }
58 }
59}
60
61#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
63pub struct Exon {
64 pub number: u32,
66 pub start: u64,
68 pub end: u64,
70 #[serde(skip_serializing_if = "Option::is_none")]
72 pub genomic_start: Option<u64>,
73 #[serde(skip_serializing_if = "Option::is_none")]
75 pub genomic_end: Option<u64>,
76}
77
78#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
80pub struct Intron {
81 pub number: u32,
83 pub upstream_exon: u32,
85 pub downstream_exon: u32,
87 #[serde(skip_serializing_if = "Option::is_none")]
89 pub genomic_start: Option<u64>,
90 #[serde(skip_serializing_if = "Option::is_none")]
92 pub genomic_end: Option<u64>,
93 pub tx_5prime_boundary: u64,
95 pub tx_3prime_boundary: u64,
97}
98
99impl Intron {
100 pub fn new(
102 number: u32,
103 upstream_exon: u32,
104 downstream_exon: u32,
105 tx_5prime_boundary: u64,
106 tx_3prime_boundary: u64,
107 ) -> Self {
108 Self {
109 number,
110 upstream_exon,
111 downstream_exon,
112 genomic_start: None,
113 genomic_end: None,
114 tx_5prime_boundary,
115 tx_3prime_boundary,
116 }
117 }
118
119 pub fn with_genomic(
121 number: u32,
122 upstream_exon: u32,
123 downstream_exon: u32,
124 tx_5prime_boundary: u64,
125 tx_3prime_boundary: u64,
126 genomic_start: u64,
127 genomic_end: u64,
128 ) -> Self {
129 Self {
130 number,
131 upstream_exon,
132 downstream_exon,
133 genomic_start: Some(genomic_start),
134 genomic_end: Some(genomic_end),
135 tx_5prime_boundary,
136 tx_3prime_boundary,
137 }
138 }
139
140 pub fn genomic_length(&self) -> Option<u64> {
144 match (self.genomic_start, self.genomic_end) {
145 (Some(start), Some(end)) if end >= start => Some((end - start).saturating_add(1)),
146 _ => None,
147 }
148 }
149
150 pub fn contains_genomic(&self, pos: u64) -> bool {
152 match (self.genomic_start, self.genomic_end) {
153 (Some(start), Some(end)) => pos >= start && pos <= end,
154 _ => false,
155 }
156 }
157}
158
159impl Exon {
160 pub fn new(number: u32, start: u64, end: u64) -> Self {
162 Self {
163 number,
164 start,
165 end,
166 genomic_start: None,
167 genomic_end: None,
168 }
169 }
170
171 pub fn with_genomic(
173 number: u32,
174 start: u64,
175 end: u64,
176 genomic_start: u64,
177 genomic_end: u64,
178 ) -> Self {
179 Self {
180 number,
181 start,
182 end,
183 genomic_start: Some(genomic_start),
184 genomic_end: Some(genomic_end),
185 }
186 }
187
188 pub fn len(&self) -> u64 {
190 if self.end >= self.start {
191 self.end - self.start + 1
192 } else {
193 0
194 }
195 }
196
197 pub fn is_empty(&self) -> bool {
198 self.len() == 0
199 }
200
201 pub fn contains(&self, pos: u64) -> bool {
203 pos >= self.start && pos <= self.end
204 }
205}
206
207#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
209pub enum ManeStatus {
210 #[default]
212 None,
213 Select,
215 PlusClinical,
217}
218
219impl ManeStatus {
220 pub fn is_mane(&self) -> bool {
222 !matches!(self, ManeStatus::None)
223 }
224
225 pub fn is_select(&self) -> bool {
227 matches!(self, ManeStatus::Select)
228 }
229
230 pub fn is_plus_clinical(&self) -> bool {
232 matches!(self, ManeStatus::PlusClinical)
233 }
234
235 pub fn priority(&self) -> u8 {
237 match self {
238 ManeStatus::Select => 0,
239 ManeStatus::PlusClinical => 1,
240 ManeStatus::None => 2,
241 }
242 }
243}
244
245impl std::fmt::Display for ManeStatus {
246 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
247 match self {
248 ManeStatus::None => write!(f, ""),
249 ManeStatus::Select => write!(f, "MANE Select"),
250 ManeStatus::PlusClinical => write!(f, "MANE Plus Clinical"),
251 }
252 }
253}
254
255#[derive(Debug, Serialize, Deserialize)]
257pub struct Transcript {
258 pub id: String,
260
261 #[serde(skip_serializing_if = "Option::is_none")]
263 pub gene_symbol: Option<String>,
264
265 pub strand: Strand,
267
268 pub sequence: String,
270
271 #[serde(skip_serializing_if = "Option::is_none")]
273 pub cds_start: Option<u64>,
274
275 #[serde(skip_serializing_if = "Option::is_none")]
277 pub cds_end: Option<u64>,
278
279 pub exons: Vec<Exon>,
281
282 #[serde(skip_serializing_if = "Option::is_none")]
284 pub chromosome: Option<String>,
285
286 #[serde(skip_serializing_if = "Option::is_none")]
288 pub genomic_start: Option<u64>,
289
290 #[serde(skip_serializing_if = "Option::is_none")]
292 pub genomic_end: Option<u64>,
293
294 #[serde(default)]
296 pub genome_build: GenomeBuild,
297
298 #[serde(default)]
300 pub mane_status: ManeStatus,
301
302 #[serde(skip_serializing_if = "Option::is_none")]
304 pub refseq_match: Option<String>,
305
306 #[serde(skip_serializing_if = "Option::is_none")]
308 pub ensembl_match: Option<String>,
309
310 #[serde(skip)]
313 pub(crate) cached_introns: OnceLock<Vec<Intron>>,
314}
315
316impl Clone for Transcript {
317 fn clone(&self) -> Self {
318 Self {
319 id: self.id.clone(),
320 gene_symbol: self.gene_symbol.clone(),
321 strand: self.strand,
322 sequence: self.sequence.clone(),
323 cds_start: self.cds_start,
324 cds_end: self.cds_end,
325 exons: self.exons.clone(),
326 chromosome: self.chromosome.clone(),
327 genomic_start: self.genomic_start,
328 genomic_end: self.genomic_end,
329 genome_build: self.genome_build,
330 mane_status: self.mane_status,
331 refseq_match: self.refseq_match.clone(),
332 ensembl_match: self.ensembl_match.clone(),
333 cached_introns: OnceLock::new(),
335 }
336 }
337}
338
339impl PartialEq for Transcript {
340 fn eq(&self, other: &Self) -> bool {
341 self.id == other.id
343 && self.gene_symbol == other.gene_symbol
344 && self.strand == other.strand
345 && self.sequence == other.sequence
346 && self.cds_start == other.cds_start
347 && self.cds_end == other.cds_end
348 && self.exons == other.exons
349 && self.chromosome == other.chromosome
350 && self.genomic_start == other.genomic_start
351 && self.genomic_end == other.genomic_end
352 && self.genome_build == other.genome_build
353 && self.mane_status == other.mane_status
354 && self.refseq_match == other.refseq_match
355 && self.ensembl_match == other.ensembl_match
356 }
357}
358
359impl Eq for Transcript {}
360
361impl Transcript {
362 #[allow(clippy::too_many_arguments)]
364 pub fn new(
365 id: String,
366 gene_symbol: Option<String>,
367 strand: Strand,
368 sequence: String,
369 cds_start: Option<u64>,
370 cds_end: Option<u64>,
371 exons: Vec<Exon>,
372 chromosome: Option<String>,
373 genomic_start: Option<u64>,
374 genomic_end: Option<u64>,
375 genome_build: GenomeBuild,
376 mane_status: ManeStatus,
377 refseq_match: Option<String>,
378 ensembl_match: Option<String>,
379 ) -> Self {
380 Self {
381 id,
382 gene_symbol,
383 strand,
384 sequence,
385 cds_start,
386 cds_end,
387 exons,
388 chromosome,
389 genomic_start,
390 genomic_end,
391 genome_build,
392 mane_status,
393 refseq_match,
394 ensembl_match,
395 cached_introns: OnceLock::new(),
396 }
397 }
398
399 pub fn sequence_length(&self) -> u64 {
401 self.sequence.len() as u64
402 }
403
404 pub fn is_coding(&self) -> bool {
406 self.cds_start.is_some() && self.cds_end.is_some()
407 }
408
409 pub fn cds_length(&self) -> Option<u64> {
411 match (self.cds_start, self.cds_end) {
412 (Some(start), Some(end)) if end >= start => Some(end - start + 1),
413 _ => None,
414 }
415 }
416
417 pub fn get_sequence(&self, start: u64, end: u64) -> Option<&str> {
419 let start = start as usize;
420 let end = end as usize;
421 if end <= self.sequence.len() && start < end {
422 Some(&self.sequence[start..end])
423 } else {
424 None
425 }
426 }
427
428 pub fn exon_at(&self, pos: u64) -> Option<&Exon> {
433 self.exons
435 .binary_search_by(|e| {
436 if pos < e.start {
437 Ordering::Greater
438 } else if pos > e.end {
439 Ordering::Less
440 } else {
441 Ordering::Equal
442 }
443 })
444 .ok()
445 .map(|i| &self.exons[i])
446 }
447
448 pub fn utr5_length(&self) -> Option<u64> {
450 self.cds_start.map(|s| s.saturating_sub(1))
451 }
452
453 pub fn utr3_length(&self) -> Option<u64> {
455 self.cds_end
456 .map(|e| self.sequence_length().saturating_sub(e))
457 }
458
459 pub fn has_genomic_coords(&self) -> bool {
461 self.chromosome.is_some() && self.genomic_start.is_some() && self.genomic_end.is_some()
462 }
463
464 pub fn genomic_length(&self) -> Option<u64> {
466 match (self.genomic_start, self.genomic_end) {
467 (Some(start), Some(end)) if end >= start => Some(end - start + 1),
468 _ => None,
469 }
470 }
471
472 pub fn contains_genomic_pos(&self, pos: u64) -> bool {
474 match (self.genomic_start, self.genomic_end) {
475 (Some(start), Some(end)) => pos >= start && pos <= end,
476 _ => false,
477 }
478 }
479
480 pub fn is_mane_select(&self) -> bool {
482 self.mane_status.is_select()
483 }
484
485 pub fn is_mane_plus_clinical(&self) -> bool {
487 self.mane_status.is_plus_clinical()
488 }
489
490 pub fn is_mane(&self) -> bool {
492 self.mane_status.is_mane()
493 }
494
495 pub fn introns(&self) -> &[Intron] {
500 self.cached_introns.get_or_init(|| self.compute_introns())
501 }
502
503 pub fn calculate_introns(&self) -> Vec<Intron> {
511 self.introns().to_vec()
512 }
513
514 fn compute_introns(&self) -> Vec<Intron> {
516 let mut introns = Vec::new();
517
518 let mut sorted_exons: Vec<_> = self.exons.iter().collect();
520 sorted_exons.sort_by_key(|e| e.start);
521
522 for (i, window) in sorted_exons.windows(2).enumerate() {
523 let upstream = window[0];
524 let downstream = window[1];
525
526 let intron_number = (i + 1) as u32;
527
528 let (genomic_start, genomic_end) = match self.strand {
530 Strand::Plus => {
531 let g_start = upstream.genomic_end.map(|e| e + 1);
533 let g_end = downstream.genomic_start.map(|s| s - 1);
534 (g_start, g_end)
535 }
536 Strand::Minus => {
537 let g_start = downstream.genomic_end.map(|e| e + 1);
540 let g_end = upstream.genomic_start.map(|s| s - 1);
541 (g_start, g_end)
542 }
543 };
544
545 let mut intron = Intron::new(
546 intron_number,
547 upstream.number,
548 downstream.number,
549 upstream.end, downstream.start, );
552
553 if let (Some(gs), Some(ge)) = (genomic_start, genomic_end) {
554 if ge >= gs {
555 intron.genomic_start = Some(gs);
556 intron.genomic_end = Some(ge);
557 }
558 }
559
560 introns.push(intron);
561 }
562
563 introns
564 }
565
566 pub fn find_intron_at_genomic(&self, genomic_pos: u64) -> Option<(Intron, IntronPosition)> {
572 for intron in self.introns() {
574 if intron.contains_genomic(genomic_pos) {
575 let (g_start, g_end) = (intron.genomic_start?, intron.genomic_end?);
576 let intron_length = (g_end - g_start).saturating_add(1);
578
579 let (dist_to_5prime, dist_to_3prime) = match self.strand {
582 Strand::Plus => {
583 let from_5prime = (genomic_pos - g_start).saturating_add(1); let from_3prime = (g_end - genomic_pos).saturating_add(1);
586 (from_5prime, from_3prime)
587 }
588 Strand::Minus => {
589 let from_5prime = (g_end - genomic_pos).saturating_add(1);
591 let from_3prime = (genomic_pos - g_start).saturating_add(1);
592 (from_5prime, from_3prime)
593 }
594 };
595
596 let position = if dist_to_5prime <= dist_to_3prime {
598 IntronPosition {
600 intron_number: intron.number,
601 boundary: IntronBoundary::FivePrime,
602 offset: dist_to_5prime as i64,
603 tx_boundary_pos: intron.tx_5prime_boundary,
604 intron_length,
605 }
606 } else {
607 IntronPosition {
609 intron_number: intron.number,
610 boundary: IntronBoundary::ThreePrime,
611 offset: -(dist_to_3prime as i64),
612 tx_boundary_pos: intron.tx_3prime_boundary,
613 intron_length,
614 }
615 };
616
617 return Some((intron.clone(), position));
618 }
619 }
620
621 None
622 }
623
624 pub fn intron_count(&self) -> usize {
626 if self.exons.len() > 1 {
627 self.exons.len() - 1
628 } else {
629 0
630 }
631 }
632
633 pub fn find_intron_at_tx_boundary(&self, tx_boundary: u64, offset: i64) -> Option<&Intron> {
645 for intron in self.introns() {
646 if offset > 0 && intron.tx_5prime_boundary == tx_boundary {
647 return Some(intron);
649 } else if offset < 0 && intron.tx_3prime_boundary == tx_boundary {
650 return Some(intron);
652 }
653 }
654 None
655 }
656
657 pub fn intronic_to_genomic(&self, tx_boundary: u64, offset: i64) -> Option<u64> {
666 let intron = self.find_intron_at_tx_boundary(tx_boundary, offset)?;
667
668 let (g_start, g_end) = (intron.genomic_start?, intron.genomic_end?);
670
671 match self.strand {
672 Strand::Plus => {
673 if offset > 0 {
674 Some(g_start + offset as u64 - 1)
677 } else {
678 Some(g_end - (-offset) as u64 + 1)
681 }
682 }
683 Strand::Minus => {
684 if offset > 0 {
686 Some(g_end - offset as u64 + 1)
689 } else {
690 Some(g_start + (-offset) as u64 - 1)
693 }
694 }
695 }
696 }
697
698 pub fn genomic_to_intronic(&self, genomic_pos: u64) -> Option<(u64, i64)> {
708 let (intron, intron_pos) = self.find_intron_at_genomic(genomic_pos)?;
709
710 match intron_pos.boundary {
711 IntronBoundary::FivePrime => {
712 Some((intron.tx_5prime_boundary, intron_pos.offset))
714 }
715 IntronBoundary::ThreePrime => {
716 Some((intron.tx_3prime_boundary, intron_pos.offset))
718 }
719 }
720 }
721}
722
723#[derive(Debug, Clone, Copy, PartialEq, Eq)]
725pub struct IntronPosition {
726 pub intron_number: u32,
728 pub boundary: IntronBoundary,
730 pub offset: i64,
732 pub tx_boundary_pos: u64,
734 pub intron_length: u64,
736}
737
738impl IntronPosition {
739 pub fn is_deep_intronic(&self) -> bool {
741 self.offset.abs() > 50
742 }
743
744 pub fn is_canonical_splice_site(&self) -> bool {
746 self.offset.abs() <= 2
747 }
748
749 pub fn is_near_splice_site(&self) -> bool {
751 self.offset.abs() <= 10
752 }
753
754 pub fn is_extended_splice_region(&self) -> bool {
756 self.offset.abs() <= 20
757 }
758
759 pub fn splice_site_type(&self) -> SpliceSiteType {
761 let abs_offset = self.offset.abs();
762 match self.boundary {
763 IntronBoundary::FivePrime => {
764 if abs_offset <= 2 {
766 SpliceSiteType::DonorCanonical
767 } else if abs_offset <= 6 {
768 SpliceSiteType::DonorExtended
769 } else if abs_offset <= 20 {
770 SpliceSiteType::DonorRegion
771 } else if abs_offset <= 50 {
772 SpliceSiteType::NearSplice
773 } else {
774 SpliceSiteType::DeepIntronic
775 }
776 }
777 IntronBoundary::ThreePrime => {
778 if abs_offset <= 2 {
780 SpliceSiteType::AcceptorCanonical
781 } else if abs_offset <= 12 {
782 SpliceSiteType::AcceptorExtended
784 } else if abs_offset <= 20 {
785 SpliceSiteType::AcceptorRegion
786 } else if abs_offset <= 50 {
787 SpliceSiteType::NearSplice
788 } else {
789 SpliceSiteType::DeepIntronic
790 }
791 }
792 }
793 }
794
795 pub fn distance_from_donor(&self) -> Option<u64> {
797 match self.boundary {
798 IntronBoundary::FivePrime => Some(self.offset.unsigned_abs()),
799 IntronBoundary::ThreePrime => {
800 Some(self.intron_length - self.offset.unsigned_abs())
802 }
803 }
804 }
805
806 pub fn distance_from_acceptor(&self) -> Option<u64> {
808 match self.boundary {
809 IntronBoundary::ThreePrime => Some(self.offset.unsigned_abs()),
810 IntronBoundary::FivePrime => {
811 Some(self.intron_length - self.offset.unsigned_abs())
813 }
814 }
815 }
816}
817
818#[derive(Debug, Clone, Copy, PartialEq, Eq)]
820pub enum IntronBoundary {
821 FivePrime,
823 ThreePrime,
825}
826
827#[derive(Debug, Clone, Copy, PartialEq, Eq)]
829pub enum SpliceSiteType {
830 DonorCanonical,
832 DonorExtended,
834 DonorRegion,
836 AcceptorCanonical,
838 AcceptorExtended,
840 AcceptorRegion,
842 NearSplice,
844 DeepIntronic,
846}
847
848#[cfg(test)]
849mod tests {
850 use super::*;
851
852 fn make_test_transcript() -> Transcript {
853 Transcript {
854 id: "NM_000088.3".to_string(),
855 gene_symbol: Some("COL1A1".to_string()),
856 strand: Strand::Plus,
857 sequence: "ATGCATGCATGCATGCATGC".to_string(), cds_start: Some(5),
859 cds_end: Some(15),
860 exons: vec![
861 Exon::new(1, 1, 7),
862 Exon::new(2, 8, 14),
863 Exon::new(3, 15, 20),
864 ],
865 chromosome: None,
866 genomic_start: None,
867 genomic_end: None,
868 genome_build: GenomeBuild::default(),
869 mane_status: ManeStatus::default(),
870 refseq_match: None,
871 ensembl_match: None,
872 cached_introns: OnceLock::new(),
873 }
874 }
875
876 fn make_test_transcript_with_genomic() -> Transcript {
877 Transcript {
878 id: "NM_000088.4".to_string(),
879 gene_symbol: Some("COL1A1".to_string()),
880 strand: Strand::Plus,
881 sequence: "ATGCATGCATGCATGCATGC".to_string(),
882 cds_start: Some(5),
883 cds_end: Some(15),
884 exons: vec![
885 Exon::with_genomic(1, 1, 7, 50189542, 50189548),
886 Exon::with_genomic(2, 8, 14, 50190100, 50190106),
887 Exon::with_genomic(3, 15, 20, 50190500, 50190505),
888 ],
889 chromosome: Some("chr17".to_string()),
890 genomic_start: Some(50189542),
891 genomic_end: Some(50190505),
892 genome_build: GenomeBuild::GRCh38,
893 mane_status: ManeStatus::Select,
894 refseq_match: None,
895 ensembl_match: Some("ENST00000123456.5".to_string()),
896 cached_introns: OnceLock::new(),
897 }
898 }
899
900 #[test]
901 fn test_transcript_sequence_length() {
902 let tx = make_test_transcript();
903 assert_eq!(tx.sequence_length(), 20);
904 }
905
906 #[test]
907 fn test_transcript_is_coding() {
908 let tx = make_test_transcript();
909 assert!(tx.is_coding());
910
911 let noncoding = Transcript {
912 cds_start: None,
913 cds_end: None,
914 ..tx
915 };
916 assert!(!noncoding.is_coding());
917 }
918
919 #[test]
920 fn test_transcript_cds_length() {
921 let tx = make_test_transcript();
922 assert_eq!(tx.cds_length(), Some(11));
923 }
924
925 #[test]
926 fn test_exon_contains() {
927 let exon = Exon::new(1, 10, 20);
928 assert!(exon.contains(10));
929 assert!(exon.contains(15));
930 assert!(exon.contains(20));
931 assert!(!exon.contains(9));
932 assert!(!exon.contains(21));
933 }
934
935 #[test]
936 fn test_exon_at() {
937 let tx = make_test_transcript();
938 let exon = tx.exon_at(10).unwrap();
939 assert_eq!(exon.number, 2);
940 }
941
942 #[test]
943 fn test_get_sequence() {
944 let tx = make_test_transcript();
945 assert_eq!(tx.get_sequence(0, 3), Some("ATG"));
946 }
947
948 #[test]
949 fn test_strand_display() {
950 assert_eq!(format!("{}", Strand::Plus), "+");
951 assert_eq!(format!("{}", Strand::Minus), "-");
952 }
953
954 #[test]
955 fn test_genome_build_display() {
956 assert_eq!(format!("{}", GenomeBuild::GRCh37), "GRCh37");
957 assert_eq!(format!("{}", GenomeBuild::GRCh38), "GRCh38");
958 assert_eq!(format!("{}", GenomeBuild::Unknown), "Unknown");
959 }
960
961 #[test]
962 fn test_genome_build_default() {
963 assert_eq!(GenomeBuild::default(), GenomeBuild::GRCh38);
964 }
965
966 #[test]
967 fn test_exon_with_genomic() {
968 let exon = Exon::with_genomic(1, 1, 100, 50000, 50099);
969 assert_eq!(exon.number, 1);
970 assert_eq!(exon.start, 1);
971 assert_eq!(exon.end, 100);
972 assert_eq!(exon.genomic_start, Some(50000));
973 assert_eq!(exon.genomic_end, Some(50099));
974 }
975
976 #[test]
977 fn test_transcript_has_genomic_coords() {
978 let tx = make_test_transcript();
979 assert!(!tx.has_genomic_coords());
980
981 let tx_genomic = make_test_transcript_with_genomic();
982 assert!(tx_genomic.has_genomic_coords());
983 }
984
985 #[test]
986 fn test_transcript_genomic_length() {
987 let tx = make_test_transcript();
988 assert_eq!(tx.genomic_length(), None);
989
990 let tx_genomic = make_test_transcript_with_genomic();
991 assert_eq!(tx_genomic.genomic_length(), Some(964));
993 }
994
995 #[test]
996 fn test_transcript_contains_genomic_pos() {
997 let tx = make_test_transcript();
998 assert!(!tx.contains_genomic_pos(50189542));
999
1000 let tx_genomic = make_test_transcript_with_genomic();
1001 assert!(tx_genomic.contains_genomic_pos(50189542)); assert!(tx_genomic.contains_genomic_pos(50190000)); assert!(tx_genomic.contains_genomic_pos(50190505)); assert!(!tx_genomic.contains_genomic_pos(50189541)); assert!(!tx_genomic.contains_genomic_pos(50190506)); }
1007
1008 #[test]
1009 fn test_mane_status_default() {
1010 assert_eq!(ManeStatus::default(), ManeStatus::None);
1011 }
1012
1013 #[test]
1014 fn test_mane_status_methods() {
1015 assert!(!ManeStatus::None.is_mane());
1016 assert!(!ManeStatus::None.is_select());
1017 assert!(!ManeStatus::None.is_plus_clinical());
1018
1019 assert!(ManeStatus::Select.is_mane());
1020 assert!(ManeStatus::Select.is_select());
1021 assert!(!ManeStatus::Select.is_plus_clinical());
1022
1023 assert!(ManeStatus::PlusClinical.is_mane());
1024 assert!(!ManeStatus::PlusClinical.is_select());
1025 assert!(ManeStatus::PlusClinical.is_plus_clinical());
1026 }
1027
1028 #[test]
1029 fn test_mane_status_priority() {
1030 assert!(ManeStatus::Select.priority() < ManeStatus::PlusClinical.priority());
1031 assert!(ManeStatus::PlusClinical.priority() < ManeStatus::None.priority());
1032 }
1033
1034 #[test]
1035 fn test_mane_status_display() {
1036 assert_eq!(format!("{}", ManeStatus::None), "");
1037 assert_eq!(format!("{}", ManeStatus::Select), "MANE Select");
1038 assert_eq!(
1039 format!("{}", ManeStatus::PlusClinical),
1040 "MANE Plus Clinical"
1041 );
1042 }
1043
1044 #[test]
1045 fn test_transcript_mane_methods() {
1046 let tx = make_test_transcript();
1047 assert!(!tx.is_mane());
1048 assert!(!tx.is_mane_select());
1049 assert!(!tx.is_mane_plus_clinical());
1050
1051 let tx_mane = make_test_transcript_with_genomic();
1052 assert!(tx_mane.is_mane());
1053 assert!(tx_mane.is_mane_select());
1054 assert!(!tx_mane.is_mane_plus_clinical());
1055 }
1056
1057 #[test]
1058 fn test_transcript_matched_accessions() {
1059 let tx = make_test_transcript_with_genomic();
1060 assert_eq!(tx.refseq_match, None);
1061 assert_eq!(tx.ensembl_match, Some("ENST00000123456.5".to_string()));
1062 }
1063
1064 #[test]
1065 fn test_calculate_introns() {
1066 let tx = make_test_transcript_with_genomic();
1067 let introns = tx.calculate_introns();
1068
1069 assert_eq!(introns.len(), 2);
1071
1072 let intron1 = &introns[0];
1074 assert_eq!(intron1.number, 1);
1075 assert_eq!(intron1.upstream_exon, 1);
1076 assert_eq!(intron1.downstream_exon, 2);
1077 assert_eq!(intron1.tx_5prime_boundary, 7); assert_eq!(intron1.tx_3prime_boundary, 8); let intron2 = &introns[1];
1082 assert_eq!(intron2.number, 2);
1083 assert_eq!(intron2.upstream_exon, 2);
1084 assert_eq!(intron2.downstream_exon, 3);
1085 }
1086
1087 #[test]
1088 fn test_intron_genomic_coords() {
1089 let tx = make_test_transcript_with_genomic();
1090 let introns = tx.calculate_introns();
1091
1092 let intron1 = &introns[0];
1094 assert_eq!(intron1.genomic_start, Some(50189549));
1095 assert_eq!(intron1.genomic_end, Some(50190099));
1096 assert!(intron1.genomic_length().is_some());
1097 assert_eq!(intron1.genomic_length().unwrap(), 551);
1098 }
1099
1100 #[test]
1101 fn test_intron_contains_genomic() {
1102 let tx = make_test_transcript_with_genomic();
1103 let introns = tx.calculate_introns();
1104 let intron1 = &introns[0];
1105
1106 assert!(intron1.contains_genomic(50189600));
1108 assert!(intron1.contains_genomic(50190000));
1109
1110 assert!(!intron1.contains_genomic(50189548)); assert!(!intron1.contains_genomic(50190100)); }
1114
1115 #[test]
1116 fn test_find_intron_at_genomic() {
1117 let tx = make_test_transcript_with_genomic();
1118
1119 let result = tx.find_intron_at_genomic(50189550);
1121 assert!(result.is_some());
1122 let (intron, pos) = result.unwrap();
1123 assert_eq!(intron.number, 1);
1124 assert_eq!(pos.intron_number, 1);
1125 assert_eq!(pos.boundary, IntronBoundary::FivePrime);
1126 assert_eq!(pos.offset, 2); let result = tx.find_intron_at_genomic(50190098);
1130 assert!(result.is_some());
1131 let (_, pos) = result.unwrap();
1132 assert_eq!(pos.boundary, IntronBoundary::ThreePrime);
1133 assert!(pos.offset < 0);
1134 }
1135
1136 #[test]
1137 fn test_intron_position_splice_site_type() {
1138 let pos = IntronPosition {
1140 intron_number: 1,
1141 boundary: IntronBoundary::FivePrime,
1142 offset: 1,
1143 tx_boundary_pos: 100,
1144 intron_length: 500,
1145 };
1146 assert_eq!(pos.splice_site_type(), SpliceSiteType::DonorCanonical);
1147 assert!(pos.is_canonical_splice_site());
1148 assert!(!pos.is_deep_intronic());
1149
1150 let pos = IntronPosition {
1152 intron_number: 1,
1153 boundary: IntronBoundary::ThreePrime,
1154 offset: -2,
1155 tx_boundary_pos: 200,
1156 intron_length: 500,
1157 };
1158 assert_eq!(pos.splice_site_type(), SpliceSiteType::AcceptorCanonical);
1159
1160 let pos = IntronPosition {
1162 intron_number: 1,
1163 boundary: IntronBoundary::FivePrime,
1164 offset: 100,
1165 tx_boundary_pos: 100,
1166 intron_length: 500,
1167 };
1168 assert_eq!(pos.splice_site_type(), SpliceSiteType::DeepIntronic);
1169 assert!(pos.is_deep_intronic());
1170 }
1171
1172 #[test]
1173 fn test_intron_position_distances() {
1174 let pos = IntronPosition {
1175 intron_number: 1,
1176 boundary: IntronBoundary::FivePrime,
1177 offset: 10,
1178 tx_boundary_pos: 100,
1179 intron_length: 500,
1180 };
1181
1182 assert_eq!(pos.distance_from_donor(), Some(10));
1184 assert_eq!(pos.distance_from_acceptor(), Some(490));
1186 }
1187
1188 #[test]
1189 fn test_intron_count() {
1190 let tx = make_test_transcript_with_genomic();
1191 assert_eq!(tx.intron_count(), 2);
1192
1193 let single_exon = Transcript {
1195 id: "NR_TEST.1".to_string(),
1196 gene_symbol: None,
1197 strand: Strand::Plus,
1198 sequence: "A".repeat(100),
1199 cds_start: None,
1200 cds_end: None,
1201 exons: vec![Exon::new(1, 1, 100)],
1202 chromosome: None,
1203 genomic_start: None,
1204 genomic_end: None,
1205 genome_build: GenomeBuild::default(),
1206 mane_status: ManeStatus::default(),
1207 refseq_match: None,
1208 ensembl_match: None,
1209 cached_introns: std::sync::OnceLock::new(),
1210 };
1211 assert_eq!(single_exon.intron_count(), 0);
1212 }
1213}