varforge 0.2.0

Synthetic cancer sequencing test data generator
Documentation
# subclonal.yaml — Complex clonal architecture with subclones
#
# Models a tumour with a four-clone hierarchy: one founding clone, two
# parallel early subclones (branching evolution), and one late subclone
# that arose within branch B.
#
# Clone tree:
#
#   founding (CCF 1.0)
#   ├── branch_a (CCF 0.45)  — e.g. acquired KRAS mutation
#   └── branch_b (CCF 0.55)  — e.g. acquired TP53 mutation
#       └── late_b (CCF 0.20) — e.g. acquired drug resistance allele
#
# Effective VAF for a mutation private to branch_b at 60% purity:
#   VAF ≈ purity × CCF_b / 2  =  0.60 × 0.55 / 2  ≈  0.165
#
# Suitable for benchmarking subclonal deconvolution tools (PyClone-VI,
# MOBSTER, Canopy) and clonal evolution analysis pipelines.
#
# Run:
#   varforge simulate --config examples/subclonal.yaml

reference: ${reference}  # set with --set reference=/path/to/hg38.fa

output:
  directory: out/subclonal
  fastq: true
  bam: true
  truth_vcf: true
  manifest: true

sample:
  name: SUBCLONAL_TUMOUR
  read_length: 150
  coverage: 60.0
  platform: illumina

fragment:
  model: normal
  mean: 290.0
  sd: 45.0

quality:
  mean_quality: 36
  tail_decay: 0.003

tumour:
  purity: 0.60
  ploidy: 2
  clones:
    - id: founding
      ccf: 1.0
    - id: branch_a
      ccf: 0.45
      parent: founding
    - id: branch_b
      ccf: 0.55
      parent: founding
    - id: late_b
      ccf: 0.20
      parent: branch_b

mutations:
  random:
    count: 2000
    vaf_min: 0.01
    vaf_max: 0.60
    snv_fraction: 0.80
    indel_fraction: 0.15
    mnv_fraction: 0.05

# Chromosome-level copy number alterations.
copy_number:
  - region: "chr8:128000000-146000000"   # MYC amplification (8q24)
    tumor_cn: 6
    normal_cn: 2
    major_cn: 5
    minor_cn: 1
  - region: "chr17:7500000-8000000"      # TP53 locus deletion (17p13)
    tumor_cn: 1
    normal_cn: 2
    major_cn: 1
    minor_cn: 0
  - region: "chr9:21900000-22100000"     # CDKN2A homozygous deletion (9p21)
    tumor_cn: 0
    normal_cn: 2
    major_cn: 0
    minor_cn: 0

gc_bias:
  enabled: true
  model: default
  severity: 1.0

seed: 3141
threads: 8