Skip to main content

ferro_hgvs/reference/
provider.rs

1//! Reference provider trait
2//!
3//! Defines the interface for accessing reference sequence data.
4
5use crate::error::FerroError;
6use crate::reference::transcript::Transcript;
7
8/// Trait for providing reference sequence data
9///
10/// Implementations might include:
11/// - MockProvider for testing
12/// - SeqRepoProvider for local sequence databases
13/// - NCBIProvider for remote NCBI E-utilities
14pub trait ReferenceProvider {
15    /// Get a transcript by its accession
16    fn get_transcript(&self, id: &str) -> Result<Transcript, FerroError>;
17
18    /// Get a sequence region
19    ///
20    /// # Arguments
21    ///
22    /// * `id` - Sequence accession
23    /// * `start` - 0-based start position
24    /// * `end` - 0-based end position (exclusive)
25    fn get_sequence(&self, id: &str, start: u64, end: u64) -> Result<String, FerroError>;
26
27    /// Check if a transcript exists
28    fn has_transcript(&self, id: &str) -> bool {
29        self.get_transcript(id).is_ok()
30    }
31
32    /// Get genomic sequence for a contig/chromosome
33    ///
34    /// This is used for normalizing intronic variants which require access
35    /// to genomic (not transcript) sequence data.
36    ///
37    /// # Arguments
38    ///
39    /// * `contig` - Chromosome/contig accession (e.g., "NC_000001.11", "chr1")
40    /// * `start` - 0-based start position
41    /// * `end` - 0-based end position (exclusive)
42    ///
43    /// # Returns
44    ///
45    /// The genomic sequence, or an error if genomic data is not available.
46    /// The default implementation returns an error indicating genomic data
47    /// is not available.
48    fn get_genomic_sequence(
49        &self,
50        contig: &str,
51        start: u64,
52        end: u64,
53    ) -> Result<String, FerroError> {
54        Err(FerroError::GenomicReferenceNotAvailable {
55            contig: contig.to_string(),
56            start,
57            end,
58        })
59    }
60
61    /// Check if this provider has genomic sequence data
62    ///
63    /// Returns true if `get_genomic_sequence` can return data.
64    fn has_genomic_data(&self) -> bool {
65        false
66    }
67
68    /// Get protein sequence for a protein accession
69    ///
70    /// This is used for validating protein variants against the reference
71    /// protein sequence.
72    ///
73    /// # Arguments
74    ///
75    /// * `accession` - Protein accession (e.g., "NP_000079.2", "ENSP00000256509")
76    /// * `start` - 0-based start position (amino acid)
77    /// * `end` - 0-based end position (exclusive)
78    ///
79    /// # Returns
80    ///
81    /// The protein sequence (amino acid letters), or an error if not available.
82    fn get_protein_sequence(
83        &self,
84        accession: &str,
85        start: u64,
86        end: u64,
87    ) -> Result<String, FerroError> {
88        Err(FerroError::ProteinReferenceNotAvailable {
89            accession: accession.to_string(),
90            start,
91            end,
92        })
93    }
94
95    /// Check if this provider has protein sequence data
96    fn has_protein_data(&self) -> bool {
97        false
98    }
99}
100
101/// Blanket implementation for boxed trait objects
102impl ReferenceProvider for Box<dyn ReferenceProvider> {
103    fn get_transcript(&self, id: &str) -> Result<Transcript, FerroError> {
104        (**self).get_transcript(id)
105    }
106
107    fn get_sequence(&self, id: &str, start: u64, end: u64) -> Result<String, FerroError> {
108        (**self).get_sequence(id, start, end)
109    }
110
111    fn has_transcript(&self, id: &str) -> bool {
112        (**self).has_transcript(id)
113    }
114
115    fn get_genomic_sequence(
116        &self,
117        contig: &str,
118        start: u64,
119        end: u64,
120    ) -> Result<String, FerroError> {
121        (**self).get_genomic_sequence(contig, start, end)
122    }
123
124    fn has_genomic_data(&self) -> bool {
125        (**self).has_genomic_data()
126    }
127
128    fn get_protein_sequence(
129        &self,
130        accession: &str,
131        start: u64,
132        end: u64,
133    ) -> Result<String, FerroError> {
134        (**self).get_protein_sequence(accession, start, end)
135    }
136
137    fn has_protein_data(&self) -> bool {
138        (**self).has_protein_data()
139    }
140}