{
"name": "sample_data_processing_workflow",
"description": "A sample workflow that demonstrates data processing with multiple jobs",
"jobs": [
{
"name": "data_download",
"command": "wget https://example.com/data.csv -O input_data.csv",
"invocation_script": null,
"cancel_on_blocking_job_failure": false,
"resource_requirements": "small_job",
"depends_on": null,
"input_files": null,
"output_files": ["raw_data"],
"input_user_data": null,
"output_user_data": ["download_metadata"],
"scheduler": "default_scheduler"
},
{
"name": "data_validation",
"command": "python validate_data.py input_data.csv",
"invocation_script": "#!/bin/bash\nset -e\nexport PYTHONPATH=/opt/validation:$PYTHONPATH\n",
"cancel_on_blocking_job_failure": true,
"resource_requirements": "small_job",
"depends_on": ["data_download"],
"input_files": ["raw_data", "validation_script"],
"output_files": ["validated_data"],
"input_user_data": ["download_metadata"],
"output_user_data": ["validation_results"],
"scheduler": "default_scheduler"
},
{
"name": "data_analysis",
"command": "python analyze_data.py validated_data.csv --output results.json",
"invocation_script": null,
"cancel_on_blocking_job_failure": true,
"resource_requirements": "large_job",
"depends_on": ["data_validation"],
"input_files": ["validated_data", "analysis_script"],
"output_files": ["analysis_results"],
"input_user_data": ["validation_results"],
"output_user_data": ["final_analysis"],
"scheduler": "gpu_scheduler"
}
],
"files": [
{
"name": "raw_data",
"path": "/data/input/raw_data.csv"
},
{
"name": "validated_data",
"path": "/data/processed/validated_data.csv"
},
{
"name": "analysis_results",
"path": "/data/output/results.json"
},
{
"name": "validation_script",
"path": "/scripts/validate_data.py"
},
{
"name": "analysis_script",
"path": "/scripts/analyze_data.py"
}
],
"user_data": [
{
"name": "download_metadata",
"data": {
"source_url": "https://example.com/data.csv",
"download_timestamp": "2024-01-15T10:30:00Z",
"file_size_bytes": 1048576
},
"is_ephemeral": true
},
{
"name": "validation_results",
"data": {
"validation_rules": ["no_nulls", "valid_dates", "numeric_ranges"],
"passed": true,
"row_count": 10000
},
"is_ephemeral": false
},
{
"name": "final_analysis",
"data": {
"analysis_type": "statistical_summary",
"confidence_level": 0.95
},
"is_ephemeral": false
}
],
"resource_requirements": [
{
"name": "small_job",
"num_cpus": 1,
"num_gpus": 0,
"num_nodes": 1,
"memory": "2g",
"runtime": "PT30M"
},
{
"name": "large_job",
"num_cpus": 4,
"num_gpus": 1,
"num_nodes": 1,
"memory": "16g",
"runtime": "PT2H"
}
],
"slurm_schedulers": [
{
"name": "default_scheduler",
"account": "project_account",
"gres": null,
"nodes": 1,
"partition": "general",
"tmp": "10G",
"walltime": "01:00:00",
"extra": "--constraint=haswell"
},
{
"name": "gpu_scheduler",
"account": "gpu_project",
"gres": "gpu:1",
"nodes": 1,
"partition": "gpu",
"tmp": "50G",
"walltime": "04:00:00",
"extra": "--constraint=v100"
}
]
}