Skip to main content

gha_container_proof/
workflow.rs

1//! Workflow YAML scanner for `check-workflow`.
2//!
3//! Walks `.github/workflows/*.yml`, finds:
4//!
5//! - `jobs.<job_id>.container` (string or object),
6//! - `steps[*].uses: docker://...`,
7//! - `steps[*].uses: ./local-action` whose `action.yml` declares
8//!   `runs.using: docker`.
9//!
10//! Emits one [`Subject`] per surface plus receipt-level rollup checks.
11
12use anyhow::{Context, Result};
13use camino::{Utf8Path, Utf8PathBuf};
14use serde_yaml::{Mapping, Value as YamlValue};
15use std::fs;
16
17use crate::model::{Check, Subject};
18use crate::plan::{ActionPlanInput, JobPlanInput, plan_action, plan_job};
19
20#[derive(Debug, Clone)]
21pub struct CheckWorkflowOptions {
22    pub repo_root: Utf8PathBuf,
23    pub workspace: Utf8PathBuf,
24    pub workflows: Vec<Utf8PathBuf>,
25    pub runner_os: crate::model::RunnerOs,
26}
27
28#[derive(Debug, Clone, Default)]
29pub struct ScanResult {
30    pub subjects: Vec<Subject>,
31    pub checks: Vec<Check>,
32}
33
34pub fn scan_workflows(options: &CheckWorkflowOptions) -> Result<ScanResult> {
35    let workflows = discover_workflows(options)?;
36    let mut result = ScanResult::default();
37
38    if workflows.is_empty() {
39        result.checks.push(Check::skip(
40            "container.workflow.read",
41            format!(
42                "no workflows discovered under {} or via --workflow",
43                options.repo_root.join(".github/workflows")
44            ),
45        ));
46        return Ok(result);
47    }
48
49    for workflow in workflows {
50        scan_workflow(options, &workflow, &mut result)?;
51    }
52
53    Ok(result)
54}
55
56fn discover_workflows(options: &CheckWorkflowOptions) -> Result<Vec<Utf8PathBuf>> {
57    if !options.workflows.is_empty() {
58        return Ok(options.workflows.clone());
59    }
60
61    let dir = options.repo_root.join(".github").join("workflows");
62    if !dir.exists() {
63        return Ok(Vec::new());
64    }
65
66    let mut workflows = Vec::new();
67    for item in fs::read_dir(&dir).with_context(|| format!("reading workflows dir {dir}"))? {
68        let entry = item?;
69        let path = Utf8PathBuf::from_path_buf(entry.path())
70            .map_err(|_| anyhow::anyhow!("non-UTF-8 workflow path"))?;
71        if matches!(path.extension(), Some("yml" | "yaml")) {
72            workflows.push(path);
73        }
74    }
75    workflows.sort();
76    Ok(workflows)
77}
78
79fn scan_workflow(
80    options: &CheckWorkflowOptions,
81    workflow: &Utf8Path,
82    result: &mut ScanResult,
83) -> Result<()> {
84    let raw =
85        fs::read_to_string(workflow).with_context(|| format!("reading workflow {workflow}"))?;
86    let yaml: YamlValue =
87        serde_yaml::from_str(&raw).with_context(|| format!("parsing workflow {workflow}"))?;
88
89    result.checks.push(
90        Check::pass(
91            "container.workflow.read",
92            format!("workflow `{workflow}` parsed"),
93        )
94        .at(workflow.to_string()),
95    );
96
97    let Some(jobs) = yaml.get("jobs").and_then(YamlValue::as_mapping) else {
98        result.checks.push(
99            Check::skip(
100                "container.workflow.jobs_missing",
101                format!("workflow `{workflow}` has no jobs mapping"),
102            )
103            .at(workflow.to_string()),
104        );
105        return Ok(());
106    };
107
108    let mut produced_any = false;
109
110    for (job_key, job_value) in jobs {
111        let job_id = yaml_string(job_key).unwrap_or_else(|| "<unknown>".to_owned());
112        let Some(job_mapping) = job_value.as_mapping() else {
113            result.checks.push(
114                Check::fail("container.workflow.job", "job value must be a mapping")
115                    .at(format!("{workflow}:jobs.{job_id}")),
116            );
117            continue;
118        };
119
120        // Job container.
121        if let Some(container_value) = get_in(job_mapping, "container") {
122            let job_input = job_input_from_container(
123                options,
124                &job_id,
125                container_value,
126                runs_on(job_mapping),
127                format!("{workflow}:jobs.{job_id}.container"),
128            );
129            let mut subject = plan_job(&job_input);
130            subject.finalize();
131            result.subjects.push(subject);
132            produced_any = true;
133        }
134
135        // Service-container delegation hint.
136        if job_mapping
137            .get(YamlValue::String("services".to_owned()))
138            .and_then(YamlValue::as_mapping)
139            .is_some_and(|map| !map.is_empty())
140        {
141            result.checks.push(
142                Check::pass(
143                    "container.services.delegated",
144                    format!(
145                        "job `{job_id}` also declares `services`; delegate to gha-service-proof"
146                    ),
147                )
148                .at(format!("{workflow}:jobs.{job_id}.services")),
149            );
150        }
151
152        // Steps: docker:// and local action manifests.
153        if let Some(steps) = job_mapping
154            .get(YamlValue::String("steps".to_owned()))
155            .and_then(YamlValue::as_sequence)
156        {
157            for (index, step) in steps.iter().enumerate() {
158                let Some(step_map) = step.as_mapping() else {
159                    continue;
160                };
161                let Some(uses) = string_in(step_map, "uses") else {
162                    continue;
163                };
164                let step_id =
165                    string_in(step_map, "id").unwrap_or_else(|| format!("step-{}", index + 1));
166                let location = format!("{workflow}:jobs.{job_id}.steps.{step_id}.uses");
167
168                if let Some(image) = uses.strip_prefix("docker://") {
169                    let action_input = ActionPlanInput {
170                        action_ref: uses.clone(),
171                        step_id: Some(step_id),
172                        action_path: None,
173                        using: Some("docker".to_owned()),
174                        image: Some(format!("docker://{image}")),
175                        entrypoint: None,
176                        pre_entrypoint: None,
177                        post_entrypoint: None,
178                        args: Vec::new(),
179                        env: Vec::new(),
180                        location: Some(location),
181                    };
182                    let mut subject = plan_action(&action_input);
183                    subject.finalize();
184                    result.subjects.push(subject);
185                    produced_any = true;
186                } else if uses.starts_with("./") {
187                    let action_dir = options.workspace.join(uses.trim_start_matches("./"));
188                    let manifest_path = pick_action_file(&action_dir);
189                    let action_input = ActionPlanInput {
190                        action_ref: uses.clone(),
191                        step_id: Some(step_id),
192                        action_path: manifest_path.clone(),
193                        using: None,
194                        image: None,
195                        entrypoint: None,
196                        pre_entrypoint: None,
197                        post_entrypoint: None,
198                        args: Vec::new(),
199                        env: Vec::new(),
200                        location: Some(location),
201                    };
202                    let mut subject = plan_action(&action_input);
203                    // Only count this as a container subject if the action is
204                    // actually Docker-flavored. Composite/Node actions are
205                    // surfaced as "unsupported" (info only) and dropped here so
206                    // the rollup stays focused.
207                    let is_docker_action = subject.checks.iter().any(|check| {
208                        check.id == "action.using.docker"
209                            || check.id == "action.image.docker_uri"
210                            || check.id == "action.image.dockerfile"
211                            || check.id == "action.image.dockerfile_missing"
212                    });
213                    if is_docker_action || manifest_path.is_none() {
214                        subject.finalize();
215                        result.subjects.push(subject);
216                        produced_any = true;
217                    }
218                }
219                // Remote `owner/repo@ref` action references are intentionally
220                // skipped here; check-workflow is local-first. Use
221                // `plan-action` with --action-path on a mirrored manifest.
222            }
223        }
224    }
225
226    if !produced_any {
227        result.checks.push(
228            Check::skip(
229                "container.workflow.no_container",
230                format!(
231                    "workflow `{workflow}` defines no job containers, docker:// uses, or local Docker actions"
232                ),
233            )
234            .at(workflow.to_string()),
235        );
236    } else {
237        result.checks.push(
238            Check::pass(
239                "container.workflow.subjects",
240                format!(
241                    "workflow `{workflow}` produced {} container subject(s)",
242                    result
243                        .subjects
244                        .iter()
245                        .filter(|subject| subject.checks.iter().any(|c| c
246                            .location
247                            .as_deref()
248                            .is_some_and(|loc| loc.starts_with(workflow.as_str()))))
249                        .count()
250                ),
251            )
252            .at(workflow.to_string()),
253        );
254    }
255
256    Ok(())
257}
258
259fn job_input_from_container(
260    options: &CheckWorkflowOptions,
261    job_id: &str,
262    container: &YamlValue,
263    runs_on: Vec<String>,
264    location: String,
265) -> JobPlanInput {
266    let mut input = JobPlanInput {
267        job_id: job_id.to_owned(),
268        runner_os: options.runner_os,
269        runs_on,
270        container_image: None,
271        env: Vec::new(),
272        ports: Vec::new(),
273        volumes: Vec::new(),
274        options: String::new(),
275        credentials_username_present: false,
276        credentials_password_present: false,
277        location: Some(location),
278    };
279
280    if let Some(image) = yaml_string(container) {
281        input.container_image = Some(image);
282        return input;
283    }
284
285    let Some(mapping) = container.as_mapping() else {
286        return input;
287    };
288
289    input.container_image = string_in(mapping, "image");
290    input.env = string_map(mapping, "env");
291    input.ports = string_list(mapping, "ports");
292    input.volumes = string_list(mapping, "volumes");
293    input.options = string_in(mapping, "options").unwrap_or_default();
294
295    if let Some(credentials) = mapping
296        .get(YamlValue::String("credentials".to_owned()))
297        .and_then(YamlValue::as_mapping)
298    {
299        input.credentials_username_present = string_in(credentials, "username")
300            .map(|value| !value.trim().is_empty())
301            .unwrap_or(false);
302        input.credentials_password_present = string_in(credentials, "password")
303            .map(|value| !value.trim().is_empty())
304            .unwrap_or(false);
305    }
306
307    input
308}
309
310fn runs_on(job_mapping: &Mapping) -> Vec<String> {
311    let Some(value) = job_mapping.get(YamlValue::String("runs-on".to_owned())) else {
312        return Vec::new();
313    };
314    list_from_value(value)
315}
316
317fn pick_action_file(dir: &Utf8Path) -> Option<Utf8PathBuf> {
318    let yml = dir.join("action.yml");
319    if yml.exists() {
320        return Some(yml);
321    }
322    let yaml = dir.join("action.yaml");
323    if yaml.exists() {
324        return Some(yaml);
325    }
326    None
327}
328
329fn get_in<'a>(mapping: &'a Mapping, key: &str) -> Option<&'a YamlValue> {
330    mapping.get(YamlValue::String(key.to_owned()))
331}
332
333fn string_in(mapping: &Mapping, key: &str) -> Option<String> {
334    get_in(mapping, key).and_then(yaml_string)
335}
336
337fn string_list(mapping: &Mapping, key: &str) -> Vec<String> {
338    get_in(mapping, key)
339        .map(list_from_value)
340        .unwrap_or_default()
341}
342
343fn list_from_value(value: &YamlValue) -> Vec<String> {
344    match value {
345        YamlValue::Sequence(items) => items.iter().filter_map(yaml_string).collect(),
346        _ => yaml_string(value).into_iter().collect(),
347    }
348}
349
350fn string_map(mapping: &Mapping, key: &str) -> Vec<(String, String)> {
351    let Some(inner) = get_in(mapping, key).and_then(YamlValue::as_mapping) else {
352        return Vec::new();
353    };
354    inner
355        .iter()
356        .filter_map(|(k, v)| match (yaml_string(k), yaml_string(v)) {
357            (Some(k), Some(v)) => Some((k, v)),
358            _ => None,
359        })
360        .collect()
361}
362
363fn yaml_string(value: &YamlValue) -> Option<String> {
364    match value {
365        YamlValue::String(value) => Some(value.clone()),
366        YamlValue::Number(value) => Some(value.to_string()),
367        YamlValue::Bool(value) => Some(value.to_string()),
368        _ => None,
369    }
370}
371
372#[cfg(test)]
373mod tests {
374    use super::*;
375    use crate::model::{Compatibility, RunnerOs};
376    use tempfile::tempdir;
377
378    fn write_workflow(dir: &Utf8Path, yaml: &str) -> Utf8PathBuf {
379        let path = dir.join(".github").join("workflows").join("ci.yml");
380        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
381        std::fs::write(&path, yaml).unwrap();
382        path
383    }
384
385    fn options_for(dir: &Utf8Path) -> CheckWorkflowOptions {
386        CheckWorkflowOptions {
387            repo_root: dir.to_owned(),
388            workspace: dir.to_owned(),
389            workflows: Vec::new(),
390            runner_os: RunnerOs::Linux,
391        }
392    }
393
394    #[test]
395    fn finds_simple_string_container() {
396        let tmp = tempdir().unwrap();
397        let root = Utf8PathBuf::from_path_buf(tmp.path().to_path_buf()).unwrap();
398        write_workflow(
399            &root,
400            "name: x\non: push\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    container: node:22\n    steps:\n      - run: echo\n",
401        );
402        let result = scan_workflows(&options_for(&root)).unwrap();
403        assert_eq!(result.subjects.len(), 1);
404        assert_eq!(result.subjects[0].image.as_deref(), Some("node:22"));
405    }
406
407    #[test]
408    fn finds_object_container_with_options_and_credentials() {
409        let tmp = tempdir().unwrap();
410        let root = Utf8PathBuf::from_path_buf(tmp.path().to_path_buf()).unwrap();
411        write_workflow(
412            &root,
413            "name: x\non: push\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    container:\n      image: node:22\n      credentials:\n        username: u\n        password: p\n      ports:\n        - 3000\n      volumes:\n        - /host:/cache\n      options: --cpus 2\n",
414        );
415        let result = scan_workflows(&options_for(&root)).unwrap();
416        let subject = &result.subjects[0];
417        assert_eq!(subject.image.as_deref(), Some("node:22"));
418        assert!(
419            subject
420                .credentials_redacted
421                .contains(&"password".to_owned())
422        );
423        assert!(
424            subject
425                .checks
426                .iter()
427                .any(|c| c.id == "container.port.parse")
428        );
429    }
430
431    #[test]
432    fn flags_windows_runs_on() {
433        let tmp = tempdir().unwrap();
434        let root = Utf8PathBuf::from_path_buf(tmp.path().to_path_buf()).unwrap();
435        write_workflow(
436            &root,
437            "name: x\non: push\njobs:\n  build:\n    runs-on: windows-latest\n    container: node:22\n",
438        );
439        let result = scan_workflows(&options_for(&root)).unwrap();
440        let subject = &result.subjects[0];
441        assert_eq!(subject.classification, Compatibility::Unsupported);
442        assert!(
443            subject
444                .checks
445                .iter()
446                .any(|c| c.id == "container.runs_on.linux"
447                    && c.status == crate::model::CheckStatus::Fail)
448        );
449    }
450
451    #[test]
452    fn flags_network_in_options() {
453        let tmp = tempdir().unwrap();
454        let root = Utf8PathBuf::from_path_buf(tmp.path().to_path_buf()).unwrap();
455        write_workflow(
456            &root,
457            "name: x\non: push\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    container:\n      image: node:22\n      options: --network host\n",
458        );
459        let result = scan_workflows(&options_for(&root)).unwrap();
460        let subject = &result.subjects[0];
461        assert!(
462            subject
463                .checks
464                .iter()
465                .any(|c| c.id == "container.options.network")
466        );
467    }
468
469    #[test]
470    fn finds_docker_uses() {
471        let tmp = tempdir().unwrap();
472        let root = Utf8PathBuf::from_path_buf(tmp.path().to_path_buf()).unwrap();
473        write_workflow(
474            &root,
475            "name: x\non: push\njobs:\n  scan:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: docker://alpine:3\n",
476        );
477        let result = scan_workflows(&options_for(&root)).unwrap();
478        assert_eq!(result.subjects.len(), 1);
479        assert_eq!(
480            result.subjects[0].action_ref.as_deref(),
481            Some("docker://alpine:3")
482        );
483    }
484
485    #[test]
486    fn finds_services_and_emits_delegation_check() {
487        let tmp = tempdir().unwrap();
488        let root = Utf8PathBuf::from_path_buf(tmp.path().to_path_buf()).unwrap();
489        write_workflow(
490            &root,
491            "name: x\non: push\njobs:\n  build:\n    runs-on: ubuntu-22.04\n    container: node:22\n    services:\n      postgres:\n        image: postgres:16\n",
492        );
493        let result = scan_workflows(&options_for(&root)).unwrap();
494        assert!(
495            result
496                .checks
497                .iter()
498                .any(|c| c.id == "container.services.delegated")
499        );
500    }
501}