ferro_hgvs/reference/provider.rs
1//! Reference provider trait
2//!
3//! Defines the interface for accessing reference sequence data.
4
5use crate::error::FerroError;
6use crate::reference::transcript::Transcript;
7
8/// Trait for providing reference sequence data
9///
10/// Implementations might include:
11/// - MockProvider for testing
12/// - SeqRepoProvider for local sequence databases
13/// - NCBIProvider for remote NCBI E-utilities
14pub trait ReferenceProvider {
15 /// Get a transcript by its accession
16 fn get_transcript(&self, id: &str) -> Result<Transcript, FerroError>;
17
18 /// Get a sequence region
19 ///
20 /// # Arguments
21 ///
22 /// * `id` - Sequence accession
23 /// * `start` - 0-based start position
24 /// * `end` - 0-based end position (exclusive)
25 fn get_sequence(&self, id: &str, start: u64, end: u64) -> Result<String, FerroError>;
26
27 /// Check if a transcript exists
28 fn has_transcript(&self, id: &str) -> bool {
29 self.get_transcript(id).is_ok()
30 }
31
32 /// Get genomic sequence for a contig/chromosome
33 ///
34 /// This is used for normalizing intronic variants which require access
35 /// to genomic (not transcript) sequence data.
36 ///
37 /// # Arguments
38 ///
39 /// * `contig` - Chromosome/contig accession (e.g., "NC_000001.11", "chr1")
40 /// * `start` - 0-based start position
41 /// * `end` - 0-based end position (exclusive)
42 ///
43 /// # Returns
44 ///
45 /// The genomic sequence, or an error if genomic data is not available.
46 /// The default implementation returns an error indicating genomic data
47 /// is not available.
48 fn get_genomic_sequence(
49 &self,
50 contig: &str,
51 start: u64,
52 end: u64,
53 ) -> Result<String, FerroError> {
54 Err(FerroError::GenomicReferenceNotAvailable {
55 contig: contig.to_string(),
56 start,
57 end,
58 })
59 }
60
61 /// Check if this provider has genomic sequence data
62 ///
63 /// Returns true if `get_genomic_sequence` can return data.
64 fn has_genomic_data(&self) -> bool {
65 false
66 }
67
68 /// Get protein sequence for a protein accession
69 ///
70 /// This is used for validating protein variants against the reference
71 /// protein sequence.
72 ///
73 /// # Arguments
74 ///
75 /// * `accession` - Protein accession (e.g., "NP_000079.2", "ENSP00000256509")
76 /// * `start` - 0-based start position (amino acid)
77 /// * `end` - 0-based end position (exclusive)
78 ///
79 /// # Returns
80 ///
81 /// The protein sequence (amino acid letters), or an error if not available.
82 fn get_protein_sequence(
83 &self,
84 accession: &str,
85 start: u64,
86 end: u64,
87 ) -> Result<String, FerroError> {
88 Err(FerroError::ProteinReferenceNotAvailable {
89 accession: accession.to_string(),
90 start,
91 end,
92 })
93 }
94
95 /// Check if this provider has protein sequence data
96 fn has_protein_data(&self) -> bool {
97 false
98 }
99}
100
101/// Blanket implementation for boxed trait objects
102impl ReferenceProvider for Box<dyn ReferenceProvider> {
103 fn get_transcript(&self, id: &str) -> Result<Transcript, FerroError> {
104 (**self).get_transcript(id)
105 }
106
107 fn get_sequence(&self, id: &str, start: u64, end: u64) -> Result<String, FerroError> {
108 (**self).get_sequence(id, start, end)
109 }
110
111 fn has_transcript(&self, id: &str) -> bool {
112 (**self).has_transcript(id)
113 }
114
115 fn get_genomic_sequence(
116 &self,
117 contig: &str,
118 start: u64,
119 end: u64,
120 ) -> Result<String, FerroError> {
121 (**self).get_genomic_sequence(contig, start, end)
122 }
123
124 fn has_genomic_data(&self) -> bool {
125 (**self).has_genomic_data()
126 }
127
128 fn get_protein_sequence(
129 &self,
130 accession: &str,
131 start: u64,
132 end: u64,
133 ) -> Result<String, FerroError> {
134 (**self).get_protein_sequence(accession, start, end)
135 }
136
137 fn has_protein_data(&self) -> bool {
138 (**self).has_protein_data()
139 }
140}