varforge 0.2.0

Synthetic cancer sequencing test data generator
Documentation
# tumor_normal.yaml — Matched tumour/normal pair
#
# Uses multi-sample mode (samples: key) to generate two samples from a
# shared clonal architecture: one tumour and one germline normal.
#
# The normal sample sets tumour_fraction: 0.0, so it receives no somatic
# mutations. The tumour sample sets tumour_fraction: 1.0 to apply the full
# configured purity.
#
# Both samples are written to the same output directory, each in its own
# sub-directory. The manifest.tsv lists both samples with their roles.
# This layout is the standard input for paired somatic variant callers such
# as Mutect2, Strelka2, and VarDict.
#
# Run:
#   varforge simulate --config examples/tumor_normal.yaml

reference: ${reference}  # set with --set reference=/path/to/hg38.fa

output:
  directory: out/tumor_normal
  fastq: true
  bam: true
  truth_vcf: true
  manifest: true

fragment:
  model: normal
  mean: 300.0
  sd: 50.0

quality:
  mean_quality: 36
  tail_decay: 0.003

# Clonal architecture defined at the top level and shared by both samples.
tumour:
  purity: 0.65
  ploidy: 2
  clones:
    - id: trunk
      ccf: 1.0

mutations:
  random:
    count: 1000
    vaf_min: 0.05
    vaf_max: 0.65
    snv_fraction: 0.80
    indel_fraction: 0.15
    mnv_fraction: 0.05

# Multi-sample series: tumour and matched normal.
samples:
  - name: TUMOUR
    coverage: 60.0
    tumour_fraction: 1.0   # all somatic mutations present at full purity
    fragment_model: normal

  - name: NORMAL
    coverage: 30.0
    tumour_fraction: 0.0   # germline only — no somatic variants
    fragment_model: normal

gc_bias:
  enabled: true
  model: default
  severity: 1.0

seed: 2024
threads: 8