1use super::helpers::{
4 build_alt_codon, build_base_result, compute_cdna_position_exonic,
5 compute_cdna_position_for_cds, fetch_ref_codon, finalize_consequences,
6 is_incomplete_terminal_codon,
7};
8use super::{Consequence, ConsequenceResult};
9use crate::codon::{complement, format_amino_acids, format_codons, translate_codon_for_transcript};
10use crate::error::VarEffectError;
11use crate::fasta::FastaReader;
12use crate::hgvs_c;
13use crate::locate::{
14 LocateIndex, VariantLocation, format_exon_number, format_intron_number, locate_variant,
15};
16use crate::types::{Strand, TranscriptModel, TranscriptTier};
17
18pub fn annotate_snv(
49 chrom: &str,
50 pos: u64,
51 ref_base: u8,
52 alt_base: u8,
53 transcript: &TranscriptModel,
54 locate_index: &LocateIndex,
55 fasta: &FastaReader,
56) -> Result<ConsequenceResult, VarEffectError> {
57 if !fasta.verify_ref(chrom, pos, &[ref_base])? {
58 let expected = fasta.fetch_base(chrom, pos)?;
59 return Err(VarEffectError::RefMismatch {
60 chrom: chrom.to_string(),
61 pos,
62 expected: String::from(expected as char),
63 got: String::from(ref_base as char),
64 });
65 }
66 annotate_snv_verified(
67 chrom,
68 pos,
69 ref_base,
70 alt_base,
71 transcript,
72 locate_index,
73 fasta,
74 )
75}
76
77pub(super) fn annotate_snv_verified(
81 chrom: &str,
82 pos: u64,
83 ref_base: u8,
84 alt_base: u8,
85 transcript: &TranscriptModel,
86 locate_index: &LocateIndex,
87 fasta: &FastaReader,
88) -> Result<ConsequenceResult, VarEffectError> {
89 let location = locate_variant(chrom, pos, pos + 1, transcript, locate_index)?;
90 let hgvs_c =
91 hgvs_c::format_snv_hgvs(pos, ref_base, alt_base, &location, transcript, locate_index)?;
92
93 let mut result = match location {
94 VariantLocation::CdsExon {
95 exon_index,
96 cds_offset,
97 codon_number,
98 codon_position,
99 is_splice_region,
100 ..
101 } => annotate_cds_snv(
102 chrom,
103 alt_base,
104 transcript,
105 locate_index,
106 fasta,
107 exon_index,
108 cds_offset,
109 codon_number,
110 codon_position,
111 is_splice_region,
112 ),
113
114 VariantLocation::SpliceDonor { intron_index, .. } => {
115 let mut result = build_base_result(transcript, vec![Consequence::SpliceDonorVariant]);
116 result.intron = Some(format_intron_number(intron_index, transcript.exon_count));
117 Ok(result)
118 }
119
120 VariantLocation::SpliceAcceptor { intron_index, .. } => {
121 let mut result =
122 build_base_result(transcript, vec![Consequence::SpliceAcceptorVariant]);
123 result.intron = Some(format_intron_number(intron_index, transcript.exon_count));
124 Ok(result)
125 }
126
127 VariantLocation::SpliceRegion { intron_index, .. } => {
128 let mut result = build_base_result(
129 transcript,
130 vec![Consequence::SpliceRegionVariant, Consequence::IntronVariant],
131 );
132 result.intron = Some(format_intron_number(intron_index, transcript.exon_count));
133 Ok(result)
134 }
135
136 VariantLocation::Intron { intron_index, .. } => {
137 let mut result = build_base_result(transcript, vec![Consequence::IntronVariant]);
138 result.intron = Some(format_intron_number(intron_index, transcript.exon_count));
139 Ok(result)
140 }
141
142 VariantLocation::FivePrimeUtr {
143 exon_index,
144 is_splice_region,
145 ..
146 } => {
147 let mut consequences = vec![Consequence::FivePrimeUtrVariant];
148 if is_splice_region {
149 consequences.push(Consequence::SpliceRegionVariant);
150 }
151 let mut result = build_base_result(transcript, consequences);
152 result.exon = Some(format_exon_number(exon_index, transcript.exon_count));
153 result.cdna_position = compute_cdna_position_exonic(pos, transcript);
154 result.cdna_position_end = result.cdna_position;
155 Ok(result)
156 }
157
158 VariantLocation::ThreePrimeUtr {
159 exon_index,
160 is_splice_region,
161 ..
162 } => {
163 let mut consequences = vec![Consequence::ThreePrimeUtrVariant];
164 if is_splice_region {
165 consequences.push(Consequence::SpliceRegionVariant);
166 }
167 let mut result = build_base_result(transcript, consequences);
168 result.exon = Some(format_exon_number(exon_index, transcript.exon_count));
169 result.cdna_position = compute_cdna_position_exonic(pos, transcript);
170 result.cdna_position_end = result.cdna_position;
171 Ok(result)
172 }
173
174 VariantLocation::Upstream { .. } => Ok(build_base_result(
175 transcript,
176 vec![Consequence::UpstreamGeneVariant],
177 )),
178
179 VariantLocation::Downstream { .. } => Ok(build_base_result(
180 transcript,
181 vec![Consequence::DownstreamGeneVariant],
182 )),
183
184 VariantLocation::NonCodingExon {
185 exon_index,
186 is_splice_region,
187 } => {
188 let mut consequences = vec![Consequence::NonCodingTranscriptExonVariant];
189 if is_splice_region {
190 consequences.push(Consequence::SpliceRegionVariant);
191 }
192 let mut result = build_base_result(transcript, consequences);
193 result.exon = Some(format_exon_number(exon_index, transcript.exon_count));
194 result.cdna_position = compute_cdna_position_exonic(pos, transcript);
195 result.cdna_position_end = result.cdna_position;
196 Ok(result)
197 }
198
199 VariantLocation::NonCodingIntron { intron_index, .. } => {
200 let mut result = build_base_result(transcript, vec![Consequence::IntronVariant]);
201 result.intron = Some(format_intron_number(intron_index, transcript.exon_count));
202 Ok(result)
203 }
204
205 VariantLocation::Distal => Err(VarEffectError::Malformed(
206 "variant is distal to transcript -- callers should filter by overlap first".to_string(),
207 )),
208 }?;
209
210 result.hgvs_c = hgvs_c;
211
212 if result.consequences.contains(&Consequence::StopLost) {
214 if let Some(ref aa_str) = result.amino_acids
215 && let Some((_, alt_str)) = aa_str.split_once('/')
216 && let Some(&alt_byte) = alt_str.as_bytes().first()
217 {
218 result.hgvs_p = Some(crate::hgvs_p::format_hgvs_p_extension(
219 alt_byte,
220 result
221 .protein_start
222 .expect("protein_start always set for coding SNV"),
223 chrom,
224 transcript,
225 locate_index,
226 fasta,
227 )?);
228 }
229 } else {
230 result.hgvs_p = crate::hgvs_p::format_hgvs_p_snv(
231 &result.consequences,
232 result.amino_acids.as_deref(),
233 result.protein_start,
234 );
235 }
236 Ok(result)
237}
238
239#[allow(clippy::too_many_arguments)]
241fn annotate_cds_snv(
242 chrom: &str,
243 alt_base: u8,
244 transcript: &TranscriptModel,
245 index: &LocateIndex,
246 fasta: &FastaReader,
247 exon_index: u16,
248 cds_offset: u32,
249 codon_number: u32,
250 codon_position: u8,
251 is_splice_region: bool,
252) -> Result<ConsequenceResult, VarEffectError> {
253 let codon_start_offset = cds_offset - codon_position as u32;
254
255 if is_incomplete_terminal_codon(cds_offset, index) {
256 let consequences = vec![Consequence::IncompleteTerminalCodonVariant];
257 let mut result = build_base_result(transcript, consequences);
258 result.exon = Some(format_exon_number(exon_index, transcript.exon_count));
259 result.cds_position = Some(cds_offset + 1);
260 result.cds_position_end = Some(cds_offset + 1);
261 result.cdna_position = Some(compute_cdna_position_for_cds(cds_offset, index));
262 result.cdna_position_end = result.cdna_position;
263 return Ok(result);
264 }
265
266 let ref_codon = fetch_ref_codon(codon_start_offset, chrom, transcript, index, fasta)?;
267
268 let coding_alt = match transcript.strand {
270 Strand::Plus => alt_base,
271 Strand::Minus => complement(alt_base),
272 };
273 let alt_codon = build_alt_codon(&ref_codon, codon_position, coding_alt);
274
275 let is_mito = transcript.chrom == "chrM";
276 let ref_aa = translate_codon_for_transcript(&ref_codon, is_mito);
277 let alt_aa = translate_codon_for_transcript(&alt_codon, is_mito);
278
279 if ref_aa == b'X' || alt_aa == b'X' {
281 let mut consequences = vec![Consequence::CodingSequenceVariant];
282 if is_splice_region {
283 consequences.push(Consequence::SpliceRegionVariant);
284 }
285 let mut result = build_base_result(transcript, consequences);
286 result.exon = Some(format_exon_number(exon_index, transcript.exon_count));
287 result.cds_position = Some(cds_offset + 1);
288 result.cds_position_end = Some(cds_offset + 1);
289 result.cdna_position = Some(compute_cdna_position_for_cds(cds_offset, index));
290 result.cdna_position_end = Some(compute_cdna_position_for_cds(cds_offset, index));
291 return Ok(result);
292 }
293
294 let coding_consequence = if ref_aa == alt_aa {
295 if codon_number == 1 && ref_aa == b'M' {
296 Consequence::StartRetainedVariant
297 } else if ref_aa == b'*' {
298 Consequence::StopRetainedVariant
299 } else {
300 Consequence::SynonymousVariant
301 }
302 } else if alt_aa == b'*' {
303 Consequence::StopGained
304 } else if ref_aa == b'*' {
305 Consequence::StopLost
306 } else if codon_number == 1 && ref_aa == b'M' {
307 Consequence::StartLost
308 } else {
309 Consequence::MissenseVariant
310 };
311
312 let mut consequences = vec![coding_consequence];
313 if is_splice_region {
314 consequences.push(Consequence::SpliceRegionVariant);
315 }
316 let impact = finalize_consequences(&mut consequences);
317 let predicts_nmd = coding_consequence == Consequence::StopGained
318 && super::nmd::predicts_nmd(cds_offset + 1, index);
319
320 Ok(ConsequenceResult {
321 transcript: transcript.accession.clone(),
322 gene_symbol: transcript.gene_symbol.clone(),
323 protein_accession: transcript.protein_accession.clone(),
324 consequences,
325 impact,
326 protein_start: Some(codon_number),
327 protein_end: Some(codon_number),
328 codons: Some(format_codons(&ref_codon, &alt_codon, codon_position)),
329 amino_acids: Some(format_amino_acids(ref_aa, alt_aa)),
330 exon: Some(format_exon_number(exon_index, transcript.exon_count)),
331 intron: None,
332 cds_position: Some(cds_offset + 1),
333 cds_position_end: Some(cds_offset + 1),
334 cdna_position: Some(compute_cdna_position_for_cds(cds_offset, index)),
335 cdna_position_end: Some(compute_cdna_position_for_cds(cds_offset, index)),
336 strand: transcript.strand,
337 biotype: transcript.biotype.clone(),
338 is_mane_select: transcript.tier == TranscriptTier::ManeSelect,
339 is_mane_plus_clinical: transcript.tier == TranscriptTier::ManePlusClinical,
340 is_refseq_select: transcript.tier == TranscriptTier::RefSeqSelect,
341 hgvs_c: None,
342 hgvs_p: None,
343 predicts_nmd,
344 })
345}