{
  // Workflow metadata
  "name": "Data Processing Pipeline with Actions",

  "description": "Demonstrates multiple action types for different job groups",

  // Job definitions
  "jobs": [
    {
      "name": "download_dataset_{dataset}",
      "command": "wget https://data.example.com/{dataset}.tar.gz -O raw/{dataset}.tar.gz && tar -xzf raw/{dataset}.tar.gz -C raw/",
      "resource_requirements": "download",
      "parameters": {
        "dataset": "['users', 'transactions', 'products', 'reviews']"
      }
    },
    {
      "name": "validate_{dataset}",
      "command": "python scripts/validate.py --input raw/{dataset} --output validated/{dataset}",
      "resource_requirements": "cpu",
      "depends_on": [
        "download_dataset_{dataset}"
      ],
      "parameters": {
        "dataset": "['users', 'transactions', 'products', 'reviews']"
      }
    },
    {
      "name": "transform_{dataset}",
      "command": "python scripts/transform.py --input validated/{dataset} --output transformed/{dataset}",
      "resource_requirements": "cpu",
      "depends_on": [
        "validate_{dataset}"
      ],
      "parameters": {
        "dataset": "['users', 'transactions', 'products', 'reviews']"
      }
    },
    {
      "name": "aggregate_data",
      "command": "python scripts/aggregate.py --input transformed/ --output aggregated/final.parquet",
      "resource_requirements": "large",
      "depends_on_regexes": [
        "transform_.*"
      ]
    },
    {
      "name": "generate_report",
      "command": "python scripts/report.py --data aggregated/final.parquet --output reports/summary.html",
      "resource_requirements": "cpu",
      "depends_on": [
        "aggregate_data"
      ]
    }
  ],
  
  // Resource requirements
  "resource_requirements": [
    {
      "name": "download",
      "num_cpus": 1,
      "num_gpus": 0,
      "num_nodes": 1,
      "memory": "2g",
      "runtime": "PT30M"
    },
    {
      "name": "cpu",
      "num_cpus": 4,
      "num_gpus": 0,
      "num_nodes": 1,
      "memory": "8g",
      "runtime": "PT1H"
    },
    {
      "name": "large",
      "num_cpus": 16,
      "num_gpus": 0,
      "num_nodes": 1,
      "memory": "64g",
      "runtime": "PT2H"
    }
  ],
  
  // Workflow actions
  "actions": [
    {
      "trigger_type": "on_workflow_start",
      "action_type": "run_commands",
      "commands": [
        "echo '=== Data Pipeline Started at $(date) ===' | tee pipeline.log",
        "mkdir -p raw validated transformed aggregated reports",
        "echo 'Workspace initialized'"
      ]
    },
    {
      "trigger_type": "on_jobs_ready",
      "action_type": "run_commands",
      "job_name_regexes": [
        "download_dataset_.*"
      ],
      "commands": [
        "echo 'Download phase starting...' | tee -a pipeline.log",
        "echo 'Monitoring network bandwidth...'"
      ]
    },
    {
      "trigger_type": "on_jobs_complete",
      "action_type": "run_commands",
      "job_name_regexes": [
        "validate_.*"
      ],
      "commands": [
        "echo 'Validation complete. Cleaning up raw data...' | tee -a pipeline.log",
        "du -sh raw/",
        "rm -rf raw/*.tar.gz",
        "echo 'Raw archives removed to save space'"
      ]
    },
    {
      "trigger_type": "on_jobs_complete",
      "action_type": "run_commands",
      "job_name_regexes": [
        "transform_.*"
      ],
      "commands": [
        "echo 'Transformation complete. Archiving validated data...' | tee -a pipeline.log",
        "tar -czf archives/validated_$(date +%Y%m%d).tar.gz validated/",
        "rm -rf validated/",
        "echo 'Validated data archived and removed'"
      ]
    },
    {
      "trigger_type": "on_workflow_complete",
      "action_type": "run_commands",
      "commands": [
        "echo '=== Pipeline Completed at $(date) ===' | tee -a pipeline.log",
        "echo 'Generating pipeline metrics...'",
        "python scripts/metrics.py --log pipeline.log --output reports/metrics.json",
        "echo 'Final data size:'",
        "du -sh aggregated/ transformed/ archives/",
        "echo 'Sending notification...'",
        "curl -X POST https://hooks.slack.com/services/YOUR/WEBHOOK -d '{\"text\":\"Data pipeline completed successfully\"}'",
        "echo 'Pipeline complete!'"
      ]
    }
  ],

}