{
  // Workflow metadata
  "name": "sample_data_processing_workflow",

  "description": "A sample workflow that demonstrates data processing with multiple jobs",

  // Job definitions
  "jobs": [
    {
      "name": "data_download",
      "command": "wget https://example.com/data.csv -O input_data.csv",
      "invocation_script": null,
      "cancel_on_blocking_job_failure": false,
      "resource_requirements": "small_job",
      "depends_on": null,
      "input_files": null,
      "output_files": [
        "raw_data"
      ],
      "input_user_data": null,
      "output_user_data": [
        "download_metadata"
      ],
      "scheduler": "default_scheduler"
    },
    {
      "name": "data_validation",
      "command": "python validate_data.py input_data.csv",
      "invocation_script": "#!/bin/bash\nset -e\nexport PYTHONPATH=/opt/validation:$PYTHONPATH\n",
      "cancel_on_blocking_job_failure": true,
      "resource_requirements": "small_job",
      "depends_on": [
        "data_download"
      ],
      "input_files": [
        "raw_data",
        "validation_script"
      ],
      "output_files": [
        "validated_data"
      ],
      "input_user_data": [
        "download_metadata"
      ],
      "output_user_data": [
        "validation_results"
      ],
      "scheduler": "default_scheduler"
    },
    {
      "name": "data_analysis",
      "command": "python analyze_data.py validated_data.csv --output results.json",
      "invocation_script": null,
      "cancel_on_blocking_job_failure": true,
      "resource_requirements": "large_job",
      "depends_on": [
        "data_validation"
      ],
      "input_files": [
        "validated_data",
        "analysis_script"
      ],
      "output_files": [
        "analysis_results"
      ],
      "input_user_data": [
        "validation_results"
      ],
      "output_user_data": [
        "final_analysis"
      ],
      "scheduler": "gpu_scheduler"
    }
  ],
  
  // File definitions
  "files": [
    {
      "name": "raw_data",
      "path": "/data/input/raw_data.csv"
    },
    {
      "name": "validated_data",
      "path": "/data/processed/validated_data.csv"
    },
    {
      "name": "analysis_results",
      "path": "/data/output/results.json"
    },
    {
      "name": "validation_script",
      "path": "/scripts/validate_data.py"
    },
    {
      "name": "analysis_script",
      "path": "/scripts/analyze_data.py"
    }
  ],
  
  // User data
  "user_data": [
    {
      "name": "download_metadata",
      "data": {
        "source_url": "https://example.com/data.csv",
        "download_timestamp": "2024-01-15T10:30:00Z",
        "file_size_bytes": 1048576
      },
      "is_ephemeral": true
    },
    {
      "name": "validation_results",
      "data": {
        "validation_rules": [
          "no_nulls",
          "valid_dates",
          "numeric_ranges"
        ],
        "passed": true,
        "row_count": 10000
      },
      "is_ephemeral": false
    },
    {
      "name": "final_analysis",
      "data": {
        "analysis_type": "statistical_summary",
        "confidence_level": 0.95
      },
      "is_ephemeral": false
    }
  ],
  
  // Resource requirements
  "resource_requirements": [
    {
      "name": "small_job",
      "num_cpus": 1,
      "num_gpus": 0,
      "num_nodes": 1,
      "memory": "2g",
      "runtime": "PT30M"
    },
    {
      "name": "large_job",
      "num_cpus": 4,
      "num_gpus": 1,
      "num_nodes": 1,
      "memory": "16g",
      "runtime": "PT2H"
    }
  ],
  
  // Slurm schedulers
  "slurm_schedulers": [
    {
      "name": "default_scheduler",
      "account": "project_account",
      "gres": null,
      "mem": "8G",
      "nodes": 1,
      "ntasks_per_node": 1,
      "partition": "general",
      "qos": "normal",
      "tmp": "10G",
      "walltime": "01:00:00",
      "extra": "--constraint=haswell"
    },
    {
      "name": "gpu_scheduler",
      "account": "gpu_project",
      "gres": "gpu:1",
      "mem": "32G",
      "nodes": 1,
      "ntasks_per_node": 1,
      "partition": "gpu",
      "qos": "high",
      "tmp": "50G",
      "walltime": "04:00:00",
      "extra": "--constraint=v100"
    }
  ],

}
