1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# tumor_normal.yaml — Matched tumour/normal pair
#
# Uses multi-sample mode (samples: key) to generate two samples from a
# shared clonal architecture: one tumour and one germline normal.
#
# The normal sample sets tumour_fraction: 0.0, so it receives no somatic
# mutations. The tumour sample sets tumour_fraction: 1.0 to apply the full
# configured purity.
#
# Both samples are written to the same output directory, each in its own
# sub-directory. The manifest.tsv lists both samples with their roles.
# This layout is the standard input for paired somatic variant callers such
# as Mutect2, Strelka2, and VarDict.
#
# Run:
# varforge simulate --config examples/tumor_normal.yaml
reference: ${reference} # set with --set reference=/path/to/hg38.fa
output:
directory: out/tumor_normal
fastq: true
bam: true
truth_vcf: true
manifest: true
fragment:
model: normal
mean: 300.0
sd: 50.0
quality:
mean_quality: 36
tail_decay: 0.003
# Clonal architecture defined at the top level and shared by both samples.
tumour:
purity: 0.65
ploidy: 2
clones:
- id: trunk
ccf: 1.0
mutations:
random:
count: 1000
vaf_min: 0.05
vaf_max: 0.65
snv_fraction: 0.80
indel_fraction: 0.15
mnv_fraction: 0.05
# Multi-sample series: tumour and matched normal.
samples:
- name: TUMOUR
coverage: 60.0
tumour_fraction: 1.0 # all somatic mutations present at full purity
fragment_model: normal
- name: NORMAL
coverage: 30.0
tumour_fraction: 0.0 # germline only — no somatic variants
fragment_model: normal
gc_bias:
enabled: true
model: default
severity: 1.0
seed: 2024
threads: 8