1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
use crate::annotate::seqvars::csq::SequenceReporting;
use clap::Args as ClapArgs;
use strum::{Display, VariantArray};
#[derive(Debug, ClapArgs)]
#[group(required = true, multiple = true)]
pub struct Sources {
/// Transcript database containing the transcript information.
///
/// Pre-built databases are available at https://github.com/varfish-org/mehari-data-tx/releases
#[arg(long)]
pub transcripts: Option<Vec<String>>,
/// Frequency database.
///
/// The frequency database contains gnomAD frequencies for the variants.
/// Pre-built databases are available at TODO
#[arg(long)]
pub frequencies: Option<Vec<String>>,
/// ClinVar database.
///
/// The ClinVar database contains clinical significance information for the variants.
/// Pre-built databases are available at https://github.com/varfish-org/annonars-data-clinvar/releases
#[arg(long)]
pub clinvar: Option<Vec<String>>,
}
#[derive(Debug, ClapArgs, Default, Clone)]
pub struct PredictorSettings {
/// Enable vep compatibility mode.
/// This enables specific normalization flags and less fine-grained vep consequence terms.
#[arg(long, default_value_t = false, hide = true)]
pub vep_compatibility_mode: bool,
#[clap(flatten)]
pub transcript_settings: TranscriptSettings,
#[clap(flatten)]
pub reporting_settings: ReportingSettings,
#[clap(flatten)]
pub normalization_settings: NormalizationSettings,
#[clap(flatten)]
pub compound_settings: CompoundSettings,
}
impl PredictorSettings {
pub fn do_not_normalize_variants(&self) -> bool {
self.normalization_settings.do_not_normalize_variants
}
pub fn do_not_renormalize_g(&self) -> bool {
self.normalization_settings.do_not_renormalize_g
}
pub fn vep_consequence_terms(&self) -> bool {
self.reporting_settings.use_vep_consequence_terms || self.vep_compatibility_mode
}
}
#[derive(Debug, ClapArgs, Default, Clone)]
pub struct TranscriptSettings {
/// Whether to report only the most severe consequence, grouped by gene, transcript, or allele.
#[arg(long)]
pub report_most_severe_consequence_by: Option<ConsequenceBy>,
/// Which kind of transcript to pick / restrict to. Default is not to pick at all.
///
/// Depending on `--pick-transcript-mode`, if multiple transcripts match the selection,
/// either the first one is kept or all are kept.
#[arg(long)]
pub pick_transcript: Vec<TranscriptPickType>,
/// Determines how to handle multiple transcripts. Default is to keep all.
///
/// When transcript picking is enabled via `--pick-transcript`,
/// either keep the first one found or keep all that match.
#[arg(long, default_value = "all")]
pub pick_transcript_mode: TranscriptPickMode,
}
#[derive(Debug, ClapArgs, Default, Clone)]
pub struct ReportingSettings {
/// Whether to keep intergenic variants.
#[arg(long, default_value_t = false)]
pub keep_intergenic: bool,
/// Whether to report splice variants in UTRs.
#[arg(long, default_value_t = false)]
pub discard_utr_splice_variants: bool,
/// Whether to use less fine-grained VEP consequence terms.
#[arg(long, default_value_t = false, hide = true)]
use_vep_consequence_terms: bool,
/// Whether to report cDNA sequence.
#[arg(long, value_enum, default_value_t = SequenceReporting::None)]
pub report_cdna_sequence: SequenceReporting,
/// Whether to report protein sequence.
#[arg(long, value_enum, default_value_t = SequenceReporting::None)]
pub report_protein_sequence: SequenceReporting,
}
#[derive(Debug, ClapArgs, Default, Clone)]
pub struct NormalizationSettings {
/// Whether to do hgvs shifting for hgvs.g like vep does
#[arg(long, default_value_t = false, hide = true)]
vep_hgvs_shift: bool,
/// Whether to skip HGVS normalization.
#[arg(long, default_value_t = false, hide = true)]
do_not_normalize_variants: bool,
/// Whether to skip re-normalizing genomic variants.
#[arg(long, default_value_t = false, hide = true)]
do_not_renormalize_g: bool,
}
#[derive(
Debug,
Copy,
Clone,
PartialEq,
Eq,
PartialOrd,
Ord,
Display,
clap::ValueEnum,
VariantArray,
parse_display::FromStr,
)]
pub enum ConsequenceBy {
Gene,
Transcript,
// or "Variant"?
Allele,
}
#[derive(
Debug,
Copy,
Clone,
PartialEq,
Eq,
PartialOrd,
Ord,
Display,
clap::ValueEnum,
VariantArray,
parse_display::FromStr,
)]
pub enum TranscriptPickType {
ManeSelect,
ManeSelectBackport,
ManePlusClinical,
ManePlusClinicalBackport,
Length,
EnsemblCanonical,
EnsemblCanonicalBackport,
RefSeqSelect,
RefSeqSelectBackport,
GencodePrimary,
GencodePrimaryBackport,
Basic,
BasicBackport,
}
#[derive(Debug, Copy, Clone, Display, clap::ValueEnum, Default)]
pub enum TranscriptPickMode {
First,
#[default]
All,
}
#[derive(Debug, clap::Args, Default, Clone)]
pub struct CompoundSettings {
/// Experimental: Enable variant grouping to evaluate the compound effect of multiple variants on the same transcript.
/// When disabled, Mehari evaluates each variant independently.
#[arg(long, default_value_t = false)]
pub enable_compound_variants: bool,
/// Experimental: The strategy used to evaluate grouped variants for compound effects.
#[arg(long, value_enum, default_value_t = PhasingStrategy::Strict)]
pub phasing_strategy: PhasingStrategy,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, clap::ValueEnum)]
pub enum PhasingStrategy {
/// Variants are only grouped if explicitly phased ('|') and sharing a Phase Set (PS).
/// Unphased variants are evaluated independently.
#[default]
Strict,
/// Respects explicit phasing, but treats homozygous variants as universally phased
/// Unphased heterozygous variants remain independent.
Relaxed,
/// Completely ignores phasing metadata and _assumes_ all variants on the transcript are on the same haplotype.
Ignore,
}