// Machine Learning Hyperparameter Sweep Workflow with Shared Parameters
// Demonstrates using workflow-level shared parameters to avoid repeating parameter definitions
// This creates 3 * 3 * 2 = 18 training jobs with different hyperparameter combinations
//
// Compare this to hyperparameter_sweep.json5 - same result, but parameters defined only once!
{
  name: "hyperparameter_sweep_shared_params",
  description: "Grid search over learning rate, batch size, and optimizer (using shared parameters)",

  // Workflow-level shared parameters - defined once, used by multiple jobs and files
  parameters: {
    lr: "[0.0001,0.001,0.01]",
    batch_size: "[16,32,64]",
    optimizer: "['adam','sgd']"
  },

  jobs: [
    // Prepare datasets (one-time setup jobs - no parameters needed)
    {
      name: "prepare_train_data",
      command: "python scripts/prepare_data.py --split=train --output=/data/train.pkl",
      resource_requirements: "data_prep",
      output_files: ["train_data"]
    },
    {
      name: "prepare_val_data",
      command: "python scripts/prepare_data.py --split=validation --output=/data/validation.pkl",
      resource_requirements: "data_prep",
      output_files: ["val_data"]
    },
    // Training jobs - uses shared parameters
    // This single job spec expands to 18 jobs
    {
      name: "train_lr{lr:.4f}_bs{batch_size}_opt{optimizer}",
      command: "python train.py --learning-rate={lr} --batch-size={batch_size} --optimizer={optimizer} --train-data=/data/train.pkl --val-data=/data/validation.pkl --model-output=/models/model_lr{lr:.4f}_bs{batch_size}_opt{optimizer}.pt --metrics-output=/results/metrics_lr{lr:.4f}_bs{batch_size}_opt{optimizer}.json",
      resource_requirements: "gpu_training",
      depends_on: ["prepare_train_data", "prepare_val_data"],
      input_files: ["train_data", "val_data"],
      output_files: [
        "model_lr{lr:.4f}_bs{batch_size}_opt{optimizer}",
        "metrics_lr{lr:.4f}_bs{batch_size}_opt{optimizer}"
      ],
      use_parameters: ["lr", "batch_size", "optimizer"]
    },
    // Aggregate results from all training runs
    {
      name: "aggregate_results",
      command: "python scripts/aggregate_metrics.py --input-dir=/results --output=/results/summary.csv",
      resource_requirements: "minimal",
      // This will be expanded to wait for all 18 training jobs
      depends_on: ["train_lr{lr:.4f}_bs{batch_size}_opt{optimizer}"],
      // Wait for all metrics files
      input_files: ["metrics_lr{lr:.4f}_bs{batch_size}_opt{optimizer}"],
      use_parameters: ["lr", "batch_size", "optimizer"]
    }
  ],

  files: [
    { name: "train_data", path: "/data/train.pkl" },
    { name: "val_data", path: "/data/validation.pkl" },
    // Model files - one per hyperparameter combination
    {
      name: "model_lr{lr:.4f}_bs{batch_size}_opt{optimizer}",
      path: "/models/model_lr{lr:.4f}_bs{batch_size}_opt{optimizer}.pt",
      use_parameters: ["lr", "batch_size", "optimizer"]
    },
    // Metrics files - one per hyperparameter combination
    {
      name: "metrics_lr{lr:.4f}_bs{batch_size}_opt{optimizer}",
      path: "/results/metrics_lr{lr:.4f}_bs{batch_size}_opt{optimizer}.json",
      use_parameters: ["lr", "batch_size", "optimizer"]
    }
  ],

  resource_requirements: [
    { name: "minimal", num_cpus: 1, num_gpus: 0, num_nodes: 1, memory: "2g", runtime: "PT5M" },
    { name: "data_prep", num_cpus: 4, num_gpus: 0, num_nodes: 1, memory: "8g", runtime: "PT30M" },
    { name: "gpu_training", num_cpus: 8, num_gpus: 1, num_nodes: 1, memory: "32g", runtime: "PT2H" }
  ]
}
