#[cfg(test)]
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use crate::io::config::Config;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReferenceInfo {
pub path: PathBuf,
pub genome_size: u64,
pub chromosomes: usize,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct SimulationStatistics {
pub total_read_pairs: u64,
pub total_bases: u64,
pub variants_spiked: u64,
pub variants_by_type: HashMap<String, u64>,
pub mean_coverage_achieved: f64,
pub regions_simulated: usize,
pub wall_time_seconds: f64,
pub reads_per_second: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Manifest {
pub varforge_version: String,
pub timestamp: String,
pub seed: Option<u64>,
pub config: Config,
pub reference: ReferenceInfo,
pub output_files: HashMap<String, PathBuf>,
pub statistics: SimulationStatistics,
}
impl Manifest {
pub fn new(config: Config, reference_info: ReferenceInfo) -> Self {
let seed = config.seed;
let timestamp = current_timestamp_utc();
Self {
varforge_version: env!("CARGO_PKG_VERSION").to_string(),
timestamp,
seed,
config,
reference: reference_info,
output_files: HashMap::new(),
statistics: SimulationStatistics::default(),
}
}
pub fn add_output_file(&mut self, key: &str, path: &Path) {
self.output_files
.insert(key.to_string(), path.to_path_buf());
}
pub fn set_statistics(&mut self, stats: SimulationStatistics) {
self.statistics = stats;
}
#[cfg(test)]
pub fn write(&self, path: &Path) -> Result<()> {
let json =
serde_json::to_string_pretty(self).context("failed to serialise manifest to JSON")?;
std::fs::write(path, json)
.with_context(|| format!("failed to write manifest to {}", path.display()))?;
Ok(())
}
}
fn current_timestamp_utc() -> String {
let secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let mut days = secs / 86400;
let time_of_day = secs % 86400;
let hh = time_of_day / 3600;
let mm = (time_of_day % 3600) / 60;
let ss = time_of_day % 60;
let mut year = 1970u32;
loop {
let days_in_year = if is_leap(year) { 366 } else { 365 };
if days < days_in_year {
break;
}
days -= days_in_year;
year += 1;
}
let months = [
31u64,
if is_leap(year) { 29 } else { 28 },
31,
30,
31,
30,
31,
31,
30,
31,
30,
31,
];
let mut month = 1u32;
for &m in &months {
if days < m {
break;
}
days -= m;
month += 1;
}
let day = days + 1;
format!(
"{:04}-{:02}-{:02}T{:02}:{:02}:{:02}Z",
year, month, day, hh, mm, ss
)
}
fn is_leap(year: u32) -> bool {
(year % 4 == 0 && year % 100 != 0) || year % 400 == 0
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
use tempfile::TempDir;
use crate::io::config::{
Config, FragmentConfig, FragmentModel, OutputConfig, QualityConfig, SampleConfig,
};
fn make_config() -> Config {
Config {
reference: PathBuf::from("/tmp/ref.fa"),
output: OutputConfig {
directory: PathBuf::from("/tmp/out"),
fastq: true,
bam: false,
truth_vcf: true,
manifest: true,
germline_vcf: false,
single_read_bam: false,
mapq: 60,
annotate_reads: false,
},
sample: SampleConfig {
name: "SAMPLE".to_string(),
read_length: 150,
coverage: 30.0,
platform: None,
},
fragment: FragmentConfig {
model: FragmentModel::Normal,
mean: 300.0,
sd: 50.0,
long_read: None,
end_motif_model: None,
ctdna_fraction: None,
mono_sd: None,
di_sd: None,
},
quality: QualityConfig {
mean_quality: 36,
tail_decay: 0.003,
profile_path: None,
},
tumour: None,
mutations: None,
umi: None,
artifacts: None,
copy_number: None,
gc_bias: None,
samples: None,
capture: None,
seed: Some(42),
threads: None,
chromosomes: None,
regions_bed: None,
performance: Default::default(),
preset: None,
vafs: None,
germline: None,
paired: None,
}
}
fn make_reference_info() -> ReferenceInfo {
ReferenceInfo {
path: PathBuf::from("/tmp/ref.fa"),
genome_size: 3_088_286_401,
chromosomes: 25,
}
}
#[test]
fn test_manifest_creation() {
let cfg = make_config();
let ref_info = make_reference_info();
let mut manifest = Manifest::new(cfg, ref_info.clone());
assert_eq!(manifest.varforge_version, env!("CARGO_PKG_VERSION"));
assert_eq!(manifest.seed, Some(42));
assert_eq!(manifest.reference.genome_size, 3_088_286_401);
assert_eq!(manifest.reference.chromosomes, 25);
manifest.add_output_file("fastq_r1", Path::new("output/sample_R1.fastq.gz"));
assert!(manifest.output_files.contains_key("fastq_r1"));
let stats = SimulationStatistics {
total_read_pairs: 450_000_000,
variants_spiked: 5_000,
wall_time_seconds: 2_400.0,
..Default::default()
};
manifest.set_statistics(stats);
assert_eq!(manifest.statistics.total_read_pairs, 450_000_000);
assert_eq!(manifest.statistics.variants_spiked, 5_000);
}
#[test]
fn test_manifest_json_valid() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("manifest.json");
let cfg = make_config();
let ref_info = make_reference_info();
let manifest = Manifest::new(cfg, ref_info);
manifest.write(&path).expect("write should succeed");
let raw = std::fs::read_to_string(&path).expect("file should exist");
let parsed: serde_json::Value =
serde_json::from_str(&raw).expect("output must be valid JSON");
assert!(parsed.get("varforge_version").is_some());
assert!(parsed.get("timestamp").is_some());
assert!(parsed.get("config").is_some());
assert!(parsed.get("reference").is_some());
assert!(parsed.get("output_files").is_some());
assert!(parsed.get("statistics").is_some());
}
#[test]
fn test_statistics_populated() {
let cfg = make_config();
let ref_info = make_reference_info();
let mut manifest = Manifest::new(cfg, ref_info);
let mut by_type = HashMap::new();
by_type.insert("SNV".to_string(), 4_000u64);
by_type.insert("Indel".to_string(), 750u64);
by_type.insert("MNV".to_string(), 250u64);
let stats = SimulationStatistics {
total_read_pairs: 450_000_000,
total_bases: 135_000_000_000,
variants_spiked: 5_000,
variants_by_type: by_type,
mean_coverage_achieved: 30.2,
regions_simulated: 1_500,
wall_time_seconds: 2_400.0,
reads_per_second: 187_500.0,
};
manifest.set_statistics(stats);
let s = &manifest.statistics;
assert_eq!(s.total_read_pairs, 450_000_000);
assert_eq!(s.total_bases, 135_000_000_000);
assert_eq!(s.variants_spiked, 5_000);
assert_eq!(s.variants_by_type["SNV"], 4_000);
assert_eq!(s.variants_by_type["Indel"], 750);
assert_eq!(s.variants_by_type["MNV"], 250);
assert!((s.mean_coverage_achieved - 30.2).abs() < 1e-9);
assert_eq!(s.regions_simulated, 1_500);
assert!((s.wall_time_seconds - 2_400.0).abs() < 1e-9);
assert!((s.reads_per_second - 187_500.0).abs() < 1e-9);
}
#[test]
fn test_config_included() {
let dir = TempDir::new().unwrap();
let path = dir.path().join("manifest.json");
let cfg = make_config();
let ref_info = make_reference_info();
let manifest = Manifest::new(cfg, ref_info);
manifest.write(&path).unwrap();
let raw = std::fs::read_to_string(&path).unwrap();
let parsed: serde_json::Value = serde_json::from_str(&raw).unwrap();
let config_section = parsed.get("config").expect("config key must exist");
assert_eq!(config_section["sample"]["name"].as_str().unwrap(), "SAMPLE");
assert_eq!(
config_section["sample"]["read_length"].as_u64().unwrap(),
150
);
assert!((config_section["sample"]["coverage"].as_f64().unwrap() - 30.0).abs() < 1e-9);
assert_eq!(config_section["seed"].as_u64().unwrap(), 42);
}
#[test]
fn test_file_paths_relative() {
let cfg = make_config();
let ref_info = make_reference_info();
let mut manifest = Manifest::new(cfg, ref_info);
manifest.add_output_file("fastq_r1", Path::new("output/sample_R1.fastq.gz"));
manifest.add_output_file("fastq_r2", Path::new("output/sample_R2.fastq.gz"));
manifest.add_output_file("bam", Path::new("output/sample.bam"));
manifest.add_output_file("truth_vcf", Path::new("output/truth.vcf.gz"));
assert_eq!(manifest.output_files.len(), 4);
for (key, p) in &manifest.output_files {
assert!(
p.is_relative(),
"output file '{}' has an absolute path: {}",
key,
p.display()
);
}
let json = serde_json::to_string(&manifest).unwrap();
let back: Manifest = serde_json::from_str(&json).unwrap();
assert_eq!(
back.output_files["fastq_r1"],
PathBuf::from("output/sample_R1.fastq.gz")
);
}
}