varforge 0.2.0

Synthetic cancer sequencing test data generator
Documentation
# ffpe_artifacts.yaml — FFPE-damaged tumour sample
#
# Formalin-fixed paraffin-embedded (FFPE) tissue introduces characteristic
# sequencing artefacts that confound somatic variant calling:
#
#   - C>T / G>A transitions from cytosine deamination (FFPE damage)
#   - 8-oxoG lesions causing C>A / G>T transversions (oxidative damage)
#   - Elevated PCR duplicate rate from degraded, short input DNA
#
# This configuration models a poorly preserved FFPE block with moderate
# deamination and oxidative damage.  Suitable for benchmarking FFPE-aware
# variant callers and artefact-filtering pipelines.
#
# Run:
#   varforge simulate --config examples/ffpe_artifacts.yaml

reference: ${reference}  # set with --set reference=/path/to/hg38.fa

output:
  directory: out/ffpe_artifacts
  fastq: true
  bam: true
  truth_vcf: true
  manifest: true

sample:
  name: FFPE_TUMOUR
  read_length: 150
  coverage: 100.0
  platform: illumina

# FFPE-derived DNA tends to be fragmented; shorter fragment distribution
# than fresh-frozen tissue.
fragment:
  model: normal
  mean: 180.0
  sd: 60.0

quality:
  mean_quality: 33   # FFPE quality slightly lower than fresh-frozen
  tail_decay: 0.005

tumour:
  purity: 0.40   # FFPE purity estimates are often lower due to necrosis
  ploidy: 2

mutations:
  random:
    count: 500
    vaf_min: 0.05
    vaf_max: 0.40
    snv_fraction: 0.80
    indel_fraction: 0.15
    mnv_fraction: 0.05

artifacts:
  ffpe_damage_rate: 0.02    # 2% C>T deamination rate
  oxog_rate: 0.01           # 1% 8-oxoG oxidative damage rate
  duplicate_rate: 0.25      # elevated duplication from fragmented input
  pcr_error_rate: 0.001     # slight PCR error elevation

seed: 9999
threads: 4