1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# twist_duplex_benchmark.yaml — Twist Biosciences duplex benchmarking scenario
#
# Reference benchmark configuration for evaluating duplex consensus pipelines
# (fgbio, HUMID) and low-VAF variant callers (Mutect2, VarDict, Strelka2) on
# Twist Biosciences hybrid-capture duplex UMI sequencing data.
#
# Sequencing model (Twist Comprehensive Exome Panel parameters):
# - 2 000× raw coverage → ~500× duplex consensus depth
# - 5 bp inline dual UMIs with 2 bp AT spacer (Twist layout: UMI + AT + template)
# - 90 % duplex conversion rate, 0.1 % UMI sequencing error rate
# - Fragment sizes centred at 170 bp, SD 30 bp (plasma-compatible)
# - PCR amplification: 10 cycles, mean family size 3.5×
#
# Variant load:
# - 50 SNVs at VAF 0.001–0.05 (ctDNA-relevant ultra-low frequency)
# - 10 indels at VAF 0.005–0.05
# - 5 HRD-signature SVs at VAF 0.01–0.1
#
# Success criteria (see docs/features/twist-duplex-benchmarking.md):
# - Duplex conversion rate >= 90 %
# - AB/BA strand concordance >= 95 % per variant
# - VAF accuracy: |observed - expected| < 20 % of expected VAF
# - On-target fraction >= 95 %
# - Coverage CV <= 0.25 across targets
#
# Run:
# varforge simulate --config examples/twist_duplex_benchmark.yaml \
# --set reference=/path/to/hg38.fa \
# --set output_dir=/path/to/output \
# --set targets_bed=examples/twist_duplex_benchmark_targets.bed
#
# Downstream consensus calling (fgbio example):
# fgbio GroupReadsByUmi \
# --input=output/TWIST_DUPLEX.bam \
# --strategy=adjacency --edits=1 \
# --output=grouped.bam
# fgbio CallDuplexConsensusReads \
# --input=grouped.bam --output=consensus.bam \
# --min-reads=1 1 1
reference: ${reference}
output:
directory: ${output_dir}
fastq: true
bam: true
truth_vcf: true
manifest: true
sample:
name: TWIST_DUPLEX
read_length: 150
coverage: 2000.0
platform: illumina
fragment:
model: normal
mean: 170.0
sd: 30.0
quality:
mean_quality: 37
tail_decay: 0.002
tumour:
purity: 1.0
ploidy: 2
mutations:
random:
# 60 small mutations: ~50 SNVs (83 %) and ~10 indels (17 %).
# VAF range 0.001–0.05 covers the ultra-low ctDNA detection window
# targeted by Twist duplex sequencing (LOD ~0.1 % at 2000x raw coverage).
count: 60
vaf_min: 0.001
vaf_max: 0.05
snv_fraction: 0.833
indel_fraction: 0.167
mnv_fraction: 0.0
# HRD-signature SVs: large deletions characteristic of BRCA1/2 and HR-deficient
# tumours. VAF 0.01–0.1 tests SV caller sensitivity in duplex data.
sv_signature: HRD
sv_count: 5
umi:
length: 5
duplex: true
pcr_cycles: 10
family_size_mean: 3.5
family_size_sd: 1.5
inline: true
spacer: "AT"
duplex_conversion_rate: 0.90
error_rate: 0.001
capture:
enabled: true
# Provide a BED file of capture targets. The example file contains 100
# synthetic targets on chr1. Replace with the actual panel BED for
# production benchmarking.
targets_bed: ${targets_bed}
off_target_fraction: 0.03
coverage_uniformity: 0.15
edge_dropoff_bases: 50
coverage_cv_target: 0.25
on_target_fraction_target: 0.95
seed: 2024
threads: 8