use crate::io::config::{ArtifactConfig, MutationConfig, RandomMutationConfig};
use super::presets::PresetOverlay;
pub fn get(name: &str) -> anyhow::Result<PresetOverlay> {
match name {
"lung_adeno" => Ok(preset_lung_adeno()),
"colorectal" => Ok(preset_colorectal()),
"breast_tnbc" => Ok(preset_breast_tnbc()),
"melanoma" => Ok(preset_melanoma()),
"aml" => Ok(preset_aml()),
"prostate" => Ok(preset_prostate()),
"pancreatic" => Ok(preset_pancreatic()),
"glioblastoma" => Ok(preset_glioblastoma()),
other => anyhow::bail!(
"unknown cancer preset '{}'; valid choices: lung_adeno, colorectal, \
breast_tnbc, melanoma, aml, prostate, pancreatic, glioblastoma",
other
),
}
}
pub fn all_names() -> &'static [&'static str] {
&[
"lung_adeno",
"colorectal",
"breast_tnbc",
"melanoma",
"aml",
"prostate",
"pancreatic",
"glioblastoma",
]
}
const fn muts_from_per_mb(per_mb: usize) -> usize {
per_mb * 3_000
}
fn preset_lung_adeno() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.60),
mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(8),
vaf_min: 0.01,
vaf_max: 0.60,
snv_fraction: 0.85,
indel_fraction: 0.10,
mnv_fraction: 0.05,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
..Default::default()
}
}
fn preset_colorectal() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.65),
mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(5),
vaf_min: 0.01,
vaf_max: 0.65,
snv_fraction: 0.82,
indel_fraction: 0.13,
mnv_fraction: 0.05,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
..Default::default()
}
}
fn preset_breast_tnbc() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.55),
mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(5),
vaf_min: 0.01,
vaf_max: 0.55,
snv_fraction: 0.75,
indel_fraction: 0.20,
mnv_fraction: 0.05,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
artifacts: Some(ArtifactConfig {
ffpe_damage_rate: None,
oxog_rate: None,
duplicate_rate: Some(0.05),
pcr_error_rate: None,
}),
..Default::default()
}
}
fn preset_melanoma() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.70),
mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(30),
vaf_min: 0.01,
vaf_max: 0.70,
snv_fraction: 0.88,
indel_fraction: 0.05,
mnv_fraction: 0.07,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
..Default::default()
}
}
fn preset_aml() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.80),
mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(1),
vaf_min: 0.05,
vaf_max: 0.80,
snv_fraction: 0.80,
indel_fraction: 0.15,
mnv_fraction: 0.05,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
..Default::default()
}
}
fn preset_prostate() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.50),
mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(2),
vaf_min: 0.01,
vaf_max: 0.50,
snv_fraction: 0.82,
indel_fraction: 0.13,
mnv_fraction: 0.05,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
..Default::default()
}
}
fn preset_pancreatic() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.25), mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(3),
vaf_min: 0.005,
vaf_max: 0.25,
snv_fraction: 0.82,
indel_fraction: 0.13,
mnv_fraction: 0.05,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
..Default::default()
}
}
fn preset_glioblastoma() -> PresetOverlay {
PresetOverlay {
coverage: Some(30.0),
purity: Some(0.65),
mutations: Some(MutationConfig {
vcf: None,
random: Some(RandomMutationConfig {
count: muts_from_per_mb(4),
vaf_min: 0.01,
vaf_max: 0.65,
snv_fraction: 0.82,
indel_fraction: 0.13,
mnv_fraction: 0.05,
signature: None,
}),
sv_signature: None,
sv_count: 0,
include_driver_mutations: false,
}),
..Default::default()
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct DriverMutation {
pub gene: &'static str,
pub alteration: &'static str,
pub prevalence: f32,
pub chrom: Option<&'static str>,
pub pos: Option<u64>,
pub ref_allele: Option<&'static [u8]>,
pub alt_allele: Option<&'static [u8]>,
}
pub fn drivers_for(preset_name: &str) -> &'static [DriverMutation] {
match preset_name {
"lung_adeno" => LUNG_ADENO_DRIVERS,
"colorectal" => COLORECTAL_DRIVERS,
"breast_tnbc" => BREAST_TNBC_DRIVERS,
"melanoma" => MELANOMA_DRIVERS,
"aml" => AML_DRIVERS,
"prostate" => PROSTATE_DRIVERS,
"pancreatic" => PANCREATIC_DRIVERS,
"glioblastoma" => GLIOBLASTOMA_DRIVERS,
_ => &[],
}
}
static LUNG_ADENO_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "KRAS",
alteration: "G12C",
prevalence: 0.13,
chrom: Some("chr12"),
pos: Some(25_227_342),
ref_allele: Some(b"G"),
alt_allele: Some(b"T"),
},
DriverMutation {
gene: "EGFR",
alteration: "L858R",
prevalence: 0.10,
chrom: Some("chr7"),
pos: Some(55_191_822),
ref_allele: Some(b"T"),
alt_allele: Some(b"G"),
},
DriverMutation {
gene: "EGFR",
alteration: "exon19del",
prevalence: 0.10,
chrom: Some("chr7"),
pos: Some(55_174_772),
ref_allele: Some(b"AATTAAGAGAAGCAA"),
alt_allele: Some(b"A"),
},
DriverMutation {
gene: "TP53",
alteration: "various",
prevalence: 0.46,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "STK11",
alteration: "various",
prevalence: 0.17,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "KEAP1",
alteration: "various",
prevalence: 0.12,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
];
static COLORECTAL_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "KRAS",
alteration: "G12D/V",
prevalence: 0.45,
chrom: Some("chr12"),
pos: Some(25_227_343),
ref_allele: Some(b"G"),
alt_allele: Some(b"A"),
},
DriverMutation {
gene: "APC",
alteration: "truncating",
prevalence: 0.80,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "TP53",
alteration: "various",
prevalence: 0.60,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "SMAD4",
alteration: "various",
prevalence: 0.15,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "PIK3CA",
alteration: "various",
prevalence: 0.20,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
];
static BREAST_TNBC_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "TP53",
alteration: "various",
prevalence: 0.80,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "BRCA1",
alteration: "germline/somatic",
prevalence: 0.20,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "RB1",
alteration: "loss",
prevalence: 0.20,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "PIK3CA",
alteration: "various",
prevalence: 0.10,
chrom: Some("chr3"),
pos: Some(179_234_296),
ref_allele: Some(b"A"),
alt_allele: Some(b"G"),
},
];
static MELANOMA_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "BRAF",
alteration: "V600E",
prevalence: 0.50,
chrom: Some("chr7"),
pos: Some(140_753_335),
ref_allele: Some(b"A"),
alt_allele: Some(b"T"),
},
DriverMutation {
gene: "NRAS",
alteration: "Q61R/K",
prevalence: 0.20,
chrom: Some("chr1"),
pos: Some(114_716_126),
ref_allele: Some(b"A"),
alt_allele: Some(b"G"),
},
DriverMutation {
gene: "TERT",
alteration: "promoter C228T/C250T",
prevalence: 0.74,
chrom: Some("chr5"),
pos: Some(1_295_228),
ref_allele: Some(b"C"),
alt_allele: Some(b"T"),
},
DriverMutation {
gene: "CDKN2A",
alteration: "various",
prevalence: 0.40,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "PTEN",
alteration: "loss",
prevalence: 0.20,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
];
static AML_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "FLT3",
alteration: "ITD",
prevalence: 0.25,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "NPM1",
alteration: "W288fs",
prevalence: 0.30,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "DNMT3A",
alteration: "R882H",
prevalence: 0.22,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "IDH1",
alteration: "R132H",
prevalence: 0.08,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "IDH2",
alteration: "R140Q",
prevalence: 0.12,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "TET2",
alteration: "various",
prevalence: 0.10,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
];
static PROSTATE_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "TMPRSS2-ERG",
alteration: "fusion",
prevalence: 0.50,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "AR",
alteration: "amplification",
prevalence: 0.30,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "PTEN",
alteration: "loss",
prevalence: 0.25,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "TP53",
alteration: "various",
prevalence: 0.25,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "BRCA2",
alteration: "germline/somatic",
prevalence: 0.12,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
];
static PANCREATIC_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "KRAS",
alteration: "G12D",
prevalence: 0.92,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "TP53",
alteration: "various",
prevalence: 0.72,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "SMAD4",
alteration: "various",
prevalence: 0.32,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "CDKN2A",
alteration: "various",
prevalence: 0.29,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
];
static GLIOBLASTOMA_DRIVERS: &[DriverMutation] = &[
DriverMutation {
gene: "EGFR",
alteration: "amplification/EGFRvIII",
prevalence: 0.57,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "PTEN",
alteration: "loss",
prevalence: 0.40,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "TP53",
alteration: "various",
prevalence: 0.28,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "TERT",
alteration: "promoter C228T/C250T",
prevalence: 0.72,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
DriverMutation {
gene: "IDH1",
alteration: "R132H (IDH-mutant subset)",
prevalence: 0.10,
chrom: None,
pos: None,
ref_allele: None,
alt_allele: None,
},
];
#[cfg(test)]
mod tests {
use super::*;
use crate::cli::presets::apply_preset_to_config;
use crate::io::config::{Config, FragmentConfig, OutputConfig, QualityConfig, SampleConfig};
use std::path::PathBuf;
fn base_config() -> Config {
Config {
reference: PathBuf::from("/dev/null"),
output: OutputConfig {
directory: PathBuf::from("/tmp"),
fastq: true,
bam: false,
truth_vcf: false,
manifest: false,
germline_vcf: false,
single_read_bam: false,
mapq: 60,
annotate_reads: false,
},
sample: SampleConfig::default(),
fragment: FragmentConfig::default(),
quality: QualityConfig::default(),
tumour: None,
mutations: None,
umi: None,
artifacts: None,
seed: None,
threads: None,
chromosomes: None,
regions_bed: None,
copy_number: None,
gc_bias: None,
samples: None,
capture: None,
performance: Default::default(),
preset: None,
vafs: None,
germline: None,
paired: None,
}
}
const HIGH_TMB_THRESHOLD_MUTS: usize = muts_from_per_mb(10);
const LOW_TMB_THRESHOLD_MUTS: usize = muts_from_per_mb(2);
#[test]
fn test_all_cancer_presets_valid() {
for name in all_names() {
let overlay = get(name).unwrap_or_else(|e| panic!("preset '{}' failed: {}", name, e));
let mut cfg = base_config();
apply_preset_to_config(&mut cfg, &overlay);
assert!(
cfg.sample.coverage > 0.0,
"preset 'cancer:{name}' must produce positive coverage"
);
let muts = cfg
.mutations
.unwrap_or_else(|| panic!("preset 'cancer:{name}' must set mutations"));
let rand = muts
.random
.unwrap_or_else(|| panic!("preset 'cancer:{name}' must set random mutations"));
assert!(rand.count > 0, "preset 'cancer:{name}' must have count > 0");
assert!(
rand.vaf_min > 0.0,
"preset 'cancer:{name}' vaf_min must be > 0"
);
assert!(
rand.vaf_max > rand.vaf_min,
"preset 'cancer:{name}' vaf_max must exceed vaf_min"
);
let frac_sum = rand.snv_fraction + rand.indel_fraction + rand.mnv_fraction;
assert!(
(frac_sum - 1.0).abs() < 1e-6,
"preset 'cancer:{name}' type fractions must sum to 1.0, got {frac_sum}"
);
let tumour = cfg
.tumour
.unwrap_or_else(|| panic!("preset 'cancer:{name}' must set tumour"));
assert!(
(0.0..=1.0).contains(&tumour.purity),
"preset 'cancer:{name}' purity {} must be in [0, 1]",
tumour.purity
);
}
}
#[test]
fn test_lung_adeno_signatures() {
let overlay = get("lung_adeno").unwrap();
let rand = overlay.mutations.as_ref().unwrap().random.as_ref().unwrap();
assert!(
rand.snv_fraction >= 0.80,
"lung_adeno SNV fraction {} should be ≥ 0.80 (SBS4 is SNV-dominant)",
rand.snv_fraction
);
assert!(
rand.count >= muts_from_per_mb(5),
"lung_adeno mutation count {} should be at least {} (SBS4 elevated TMB)",
rand.count,
muts_from_per_mb(5),
);
let purity = overlay.purity.unwrap();
assert!(
(0.30..=0.90).contains(&purity),
"lung_adeno purity {purity} out of expected range [0.30, 0.90]"
);
}
#[test]
fn test_melanoma_high_tmb() {
let overlay = get("melanoma").unwrap();
let rand = overlay.mutations.as_ref().unwrap().random.as_ref().unwrap();
assert!(
rand.count > HIGH_TMB_THRESHOLD_MUTS,
"melanoma mutation count {} should exceed high-TMB threshold {}",
rand.count,
HIGH_TMB_THRESHOLD_MUTS,
);
assert!(
rand.snv_fraction >= 0.82,
"melanoma SNV fraction {} should be ≥ 0.82 (SBS7 is C>T dominant)",
rand.snv_fraction
);
}
#[test]
fn test_aml_low_tmb() {
let overlay = get("aml").unwrap();
let rand = overlay.mutations.as_ref().unwrap().random.as_ref().unwrap();
assert!(
rand.count <= LOW_TMB_THRESHOLD_MUTS,
"aml mutation count {} should be ≤ low-TMB threshold {}",
rand.count,
LOW_TMB_THRESHOLD_MUTS,
);
}
#[test]
fn test_driver_mutations_included() {
let lung_drivers = drivers_for("lung_adeno");
assert!(
lung_drivers.iter().any(|d| d.gene == "KRAS"),
"lung_adeno drivers must include KRAS"
);
assert!(
lung_drivers.iter().any(|d| d.gene == "EGFR"),
"lung_adeno drivers must include EGFR"
);
let mel_drivers = drivers_for("melanoma");
assert!(
mel_drivers
.iter()
.any(|d| d.gene == "BRAF" && d.alteration.contains("V600E")),
"melanoma drivers must include BRAF V600E"
);
let aml_drivers = drivers_for("aml");
assert!(
aml_drivers.iter().any(|d| d.gene == "FLT3"),
"aml drivers must include FLT3"
);
assert!(
aml_drivers.iter().any(|d| d.gene == "NPM1"),
"aml drivers must include NPM1"
);
assert!(
aml_drivers.iter().any(|d| d.gene == "DNMT3A"),
"aml drivers must include DNMT3A"
);
let pdac_drivers = drivers_for("pancreatic");
assert!(
pdac_drivers
.iter()
.any(|d| d.gene == "KRAS" && d.alteration.contains("G12D")),
"pancreatic drivers must include KRAS G12D"
);
for name in all_names() {
assert!(
!drivers_for(name).is_empty(),
"cancer preset '{name}' must have at least one driver mutation defined"
);
}
}
#[test]
fn test_preset_override() {
let overlay = get("melanoma").unwrap();
let mut cfg = base_config();
apply_preset_to_config(&mut cfg, &overlay);
assert!(
cfg.sample.coverage > 0.0,
"melanoma preset should set coverage"
);
let user_coverage = 100.0_f64;
let user_purity = 0.3_f64;
cfg.sample.coverage = user_coverage;
if let Some(ref mut t) = cfg.tumour {
t.purity = user_purity;
}
assert!(
(cfg.sample.coverage - user_coverage).abs() < 1e-9,
"user coverage {user_coverage} must survive after override"
);
assert!(
(cfg.tumour.as_ref().unwrap().purity - user_purity).abs() < 1e-9,
"user purity {user_purity} must survive after override"
);
}
}