Skip to main content

taudit_parse_gha/
lib.rs

1use std::collections::{BTreeMap, HashMap};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Metadata key for marking inferred (not precisely mapped) secret references.
9const META_INFERRED_VAL: &str = "true";
10
11/// GitHub Actions workflow parser.
12pub struct GhaParser;
13
14impl PipelineParser for GhaParser {
15    fn platform(&self) -> &str {
16        "github-actions"
17    }
18
19    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
20        let mut de = serde_yaml::Deserializer::from_str(content);
21        let doc = de
22            .next()
23            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
24        let workflow: GhaWorkflow = GhaWorkflow::deserialize(doc)
25            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
26        let extra_docs = de.next().is_some();
27
28        let mut graph = AuthorityGraph::new(source.clone());
29        graph
30            .metadata
31            .insert(META_PLATFORM.into(), "github-actions".into());
32        if workflow.permissions.is_none() {
33            // Negative-space marker: lets the
34            // `no_workflow_level_permissions_block` rule detect the absence
35            // of any top-level `permissions:` declaration without re-reading
36            // the source YAML. The same rule will additionally check for the
37            // absence of any per-job permissions block.
38            graph
39                .metadata
40                .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
41        }
42        if extra_docs {
43            graph.mark_partial(
44                GapKind::Expression,
45                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
46            );
47        }
48        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
49        let mut artifact_ids: HashMap<String, NodeId> = HashMap::new();
50
51        // Workflow-level `env:` may be a template expression (e.g. `env: ${{ matrix }}`)
52        // whose shape is unknown statically. Mark Partial once and skip env processing
53        // for that scope; static rules cannot reason about runtime-resolved env shapes.
54        if let Some(EnvSpec::Template(_)) = workflow.env {
55            graph.mark_partial(
56                GapKind::Expression,
57                "workflow-level env: uses template expression — environment variable shape unknown"
58                    .to_string(),
59            );
60        }
61
62        let is_pull_request_target = workflow
63            .triggers
64            .as_ref()
65            .map(trigger_has_pull_request_target)
66            .unwrap_or(false);
67
68        // Record every recognised trigger as a comma-separated list so rules
69        // can reason about combinations (e.g. `pull_request_target`,
70        // `pull_request`, `workflow_run`, `issue_comment`). Backwards-compatible:
71        // existing single-value consumers that match exact strings on
72        // `pull_request_target` are preserved by writing that token first when
73        // present.
74        let trigger_list = collect_trigger_names(workflow.triggers.as_ref());
75        if !trigger_list.is_empty() {
76            // Place pull_request_target first so consumers that use string
77            // equality (older rules) still match the canonical legacy value.
78            let mut ordered: Vec<&str> = Vec::new();
79            if trigger_list.iter().any(|t| t == "pull_request_target") {
80                ordered.push("pull_request_target");
81            }
82            for t in &trigger_list {
83                if t != "pull_request_target" {
84                    ordered.push(t);
85                }
86            }
87            // If we only have `pull_request_target`, write it bare so the
88            // legacy `== "pull_request_target"` predicate keeps working.
89            let value = if ordered.len() == 1 {
90                ordered[0].to_string()
91            } else {
92                ordered.join(",")
93            };
94            graph.metadata.insert(META_TRIGGER.into(), value);
95        } else if is_pull_request_target {
96            graph
97                .metadata
98                .insert(META_TRIGGER.into(), "pull_request_target".into());
99        }
100
101        // Stamp the full trigger list so non-PRT-only rules can fire on
102        // issue_comment, pull_request_review*, workflow_run, etc. Use a
103        // separate key from META_TRIGGER so the existing
104        // trigger_context_mismatch contract is preserved.
105        if let Some(triggers) = workflow.triggers.as_ref() {
106            let names = collect_trigger_names(Some(triggers));
107            if !names.is_empty() {
108                graph.metadata.insert(META_TRIGGERS.into(), names.join(","));
109            }
110            let inputs = collect_dispatch_inputs(triggers);
111            if !inputs.is_empty() {
112                graph
113                    .metadata
114                    .insert(META_DISPATCH_INPUTS.into(), inputs.join(","));
115            }
116            let call_inputs = collect_workflow_call_inputs(triggers);
117            if !call_inputs.is_empty() {
118                graph
119                    .metadata
120                    .insert(META_GHA_WORKFLOW_CALL_INPUTS.into(), call_inputs.join(","));
121            }
122        }
123
124        // Workflow-level permissions -> GITHUB_TOKEN identity node. When the
125        // workflow omits `permissions:`, the token still exists; its actual
126        // scope is inherited from enterprise/org/repo defaults, which are
127        // outside the YAML. Model it as unknown authority instead of absent.
128        let token_id = if let Some(ref perms) = workflow.permissions {
129            let perm_string = perms.to_string();
130            let scope = IdentityScope::from_permissions(&perm_string);
131            let mut meta = HashMap::new();
132            meta.insert(META_PERMISSIONS.into(), perm_string.clone());
133            meta.insert(
134                META_IDENTITY_SCOPE.into(),
135                format!("{scope:?}").to_lowercase(),
136            );
137            // OIDC: id-token: write → token is OIDC-capable (federated scope).
138            // Check the formatted substring directly — Permissions::Map fmt produces
139            // "id-token: write" so this won't false-positive on "contents: write".
140            if perm_string.contains("id-token: write") || perm_string == "write-all" {
141                meta.insert(META_OIDC.into(), "true".into());
142            }
143            Some(graph.add_node_with_metadata(
144                NodeKind::Identity,
145                "GITHUB_TOKEN",
146                TrustZone::FirstParty,
147                meta,
148            ))
149        } else {
150            let mut meta = HashMap::new();
151            meta.insert(META_IDENTITY_SCOPE.into(), "unknown".into());
152            meta.insert(META_IMPLICIT.into(), "true".into());
153            Some(graph.add_node_with_metadata(
154                NodeKind::Identity,
155                "GITHUB_TOKEN",
156                TrustZone::FirstParty,
157                meta,
158            ))
159        };
160
161        // Accumulator for `jobs.<id>.outputs.*` records across every job.
162        // Format described on `META_JOB_OUTPUTS`. Built bottom-up here, then
163        // serialized into graph metadata once after the job loop finishes.
164        let mut job_output_records: Vec<String> = Vec::new();
165
166        // Iterate jobs in sorted order so node IDs (and therefore every
167        // edge `from`/`to`, every finding `nodes_involved`, every JSON
168        // emit) are byte-deterministic across runs.
169        let mut sorted_jobs: Vec<(&String, &GhaJob)> = workflow.jobs.iter().collect();
170        sorted_jobs.sort_by(|a, b| a.0.cmp(b.0));
171        for (job_name, job) in sorted_jobs {
172            // YAML `steps[].id` -> bool tracking whether that step holds an
173            // OIDC identity. Used when classifying job outputs that read
174            // `${{ steps.<id>.outputs.X }}` so R4 can distinguish OIDC-derived
175            // values from plain step outputs.
176            let mut step_oidc_by_yaml_id: HashMap<String, bool> = HashMap::new();
177            // Job-level `env:` may be a template expression (e.g. `env: ${{ matrix }}`)
178            // whose shape is unknown statically. Mark Partial once per job and skip
179            // env processing for that scope.
180            if let Some(EnvSpec::Template(_)) = job.env {
181                graph.mark_partial(
182                    GapKind::Expression,
183                    format!(
184                        "job '{job_name}' env: uses template expression — environment variable shape unknown"
185                    ),
186                );
187            }
188
189            // Job-level permissions override workflow-level
190            let job_token_id = if let Some(ref perms) = job.permissions {
191                let perm_string = perms.to_string();
192                let scope = IdentityScope::from_permissions(&perm_string);
193                let mut meta = HashMap::new();
194                meta.insert(META_PERMISSIONS.into(), perm_string.clone());
195                meta.insert(
196                    META_IDENTITY_SCOPE.into(),
197                    format!("{scope:?}").to_lowercase(),
198                );
199                if perm_string.contains("id-token: write") {
200                    meta.insert(META_OIDC.into(), "true".into());
201                }
202                Some(graph.add_node_with_metadata(
203                    NodeKind::Identity,
204                    format!("GITHUB_TOKEN ({job_name})"),
205                    TrustZone::FirstParty,
206                    meta,
207                ))
208            } else {
209                token_id
210            };
211
212            // Reusable workflow: job.uses= means this job delegates to another workflow.
213            // We cannot resolve it inline — mark the graph partial and skip steps.
214            if let Some(ref uses) = job.uses {
215                let trust_zone = if is_pin_semantically_valid(uses) {
216                    TrustZone::ThirdParty
217                } else {
218                    TrustZone::Untrusted
219                };
220                let rw_id = graph.add_node(NodeKind::Image, uses, trust_zone);
221                // Synthetic step represents this job delegating to the called workflow
222                let job_step_id = graph.add_node(NodeKind::Step, job_name, TrustZone::FirstParty);
223                if let Some(node) = graph.nodes.get_mut(job_step_id) {
224                    node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
225                    node.metadata.insert(
226                        META_GHA_ACTION.into(),
227                        uses.split('@').next().unwrap_or(uses).into(),
228                    );
229                    if let Some(runs_on) = job.runs_on.as_ref().and_then(yaml_value_compact) {
230                        node.metadata.insert(META_GHA_RUNS_ON.into(), runs_on);
231                    }
232                    let condition = combined_condition(job.if_cond.as_deref(), None);
233                    if let Some(condition) = condition {
234                        node.metadata.insert(META_CONDITION.into(), condition);
235                    }
236                    if let Some(with) = job.with.as_ref() {
237                        let mut entries: Vec<(&String, &serde_yaml::Value)> = with.iter().collect();
238                        entries.sort_by(|a, b| a.0.cmp(b.0));
239                        let rendered: Vec<String> = entries
240                            .into_iter()
241                            .filter_map(|(key, value)| {
242                                yaml_scalar_to_string(value).map(|scalar| format!("{key}={scalar}"))
243                            })
244                            .collect();
245                        if !rendered.is_empty() {
246                            node.metadata
247                                .insert(META_GHA_WITH_INPUTS.into(), rendered.join("\n"));
248                        }
249                    }
250                    // Stamp `secrets: inherit` so downstream rules can flag wide-open
251                    // secret forwarding. The `secrets:` block on a reusable-workflow
252                    // call is either the literal string "inherit" or a mapping —
253                    // only the string form forwards every caller secret.
254                    if let Some(serde_yaml::Value::String(s)) = job.secrets.as_ref() {
255                        if s == "inherit" {
256                            node.metadata
257                                .insert(META_SECRETS_INHERIT.into(), "true".into());
258                        }
259                    }
260                }
261                graph.add_edge(job_step_id, rw_id, EdgeKind::DelegatesTo);
262                if let Some(tok_id) = job_token_id {
263                    graph.add_edge(job_step_id, tok_id, EdgeKind::HasAccessTo);
264                }
265
266                // F13: workflow-level `env:` is in scope for the caller's
267                // evaluation of `secrets:` mapping values and `with:` inputs
268                // even when delegating to a reusable workflow. Job-level
269                // `env:` does NOT propagate into reusable-workflow callees per
270                // GHA semantics, so we merge ONLY workflow.env. (The synthetic
271                // step represents the caller-side evaluation context, not the
272                // callee's execution environment.)
273                if let Some(env_map) = workflow.env.as_ref().and_then(EnvSpec::as_map) {
274                    let mut entries: Vec<(&String, &String)> = env_map.iter().collect();
275                    entries.sort_by(|a, b| a.0.cmp(b.0));
276                    for (_k, env_val) in entries {
277                        for secret_name in iter_secret_refs(env_val) {
278                            let secret_id =
279                                find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
280                            graph.add_edge(job_step_id, secret_id, EdgeKind::HasAccessTo);
281                        }
282                    }
283                }
284
285                // F6: `secrets:` mapping form on a reusable-workflow call —
286                // `secrets: { CHILD: ${{ secrets.PARENT }} }`. Each value is a
287                // template expression evaluated in the caller context, so any
288                // `secrets.X` reference produces a HasAccessTo edge to the
289                // caller-side secret. (The literal string `inherit` form is
290                // already handled above.) Sorted by key for determinism —
291                // mirrors the v1.1.0-beta.1 sort pattern used elsewhere.
292                if let Some(serde_yaml::Value::Mapping(map)) = job.secrets.as_ref() {
293                    let mut entries: Vec<(&str, &str)> = map
294                        .iter()
295                        .filter_map(|(k, v)| Some((k.as_str()?, v.as_str()?)))
296                        .collect();
297                    entries.sort_by(|a, b| a.0.cmp(b.0));
298                    for (_child_name, val) in entries {
299                        for secret_name in iter_secret_refs(val) {
300                            let secret_id =
301                                find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
302                            graph.add_edge(job_step_id, secret_id, EdgeKind::HasAccessTo);
303                        }
304                    }
305                }
306
307                graph.mark_partial(
308                    GapKind::Structural,
309                    format!(
310                        "reusable workflow '{uses}' in job '{job_name}' cannot be resolved inline — authority within the called workflow is unknown"
311                    ),
312                );
313                continue;
314            }
315
316            // Matrix strategy: authority shape may differ per matrix entry — mark Partial
317            if job
318                .strategy
319                .as_ref()
320                .and_then(|s| s.get("matrix"))
321                .is_some()
322            {
323                graph.mark_partial(
324                    GapKind::Expression,
325                    format!(
326                        "job '{job_name}' uses matrix strategy — authority shape may differ per matrix entry"
327                    ),
328                );
329            }
330
331            // Self-hosted runner detection: `runs-on: self-hosted` or a sequence
332            // that includes `self-hosted`. Creates an Image node tagged with
333            // META_SELF_HOSTED so downstream rules can flag the job. Hosted
334            // runners (ubuntu-latest, etc.) are not represented as Image nodes —
335            // this keeps the graph focused on non-default attack surface.
336            if is_self_hosted_runner(job.runs_on.as_ref()) {
337                let runner_name = runner_label(job.runs_on.as_ref()).unwrap_or("self-hosted");
338                let mut meta = HashMap::new();
339                meta.insert(META_SELF_HOSTED.into(), "true".into());
340                graph.add_node_with_metadata(
341                    NodeKind::Image,
342                    runner_name,
343                    TrustZone::FirstParty,
344                    meta,
345                );
346            }
347
348            // Container: job-level container image — add as Image node and capture ID
349            // so each step in this job can be linked to it via UsesImage.
350            let container_image_id: Option<NodeId> = if let Some(ref container) = job.container {
351                let image_str = container.image();
352                let pinned = is_docker_digest_pinned(image_str);
353                let trust_zone = if pinned {
354                    TrustZone::ThirdParty
355                } else {
356                    TrustZone::Untrusted
357                };
358                let mut meta = HashMap::new();
359                meta.insert(META_CONTAINER.into(), "true".into());
360                if let Some(options) = container.options() {
361                    if !options.is_empty() {
362                        meta.insert(META_GHA_CONTAINER_OPTIONS.into(), options.to_string());
363                    }
364                }
365                if pinned {
366                    if let Some(digest) = image_str.split("@sha256:").nth(1) {
367                        meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
368                    }
369                }
370                Some(graph.add_node_with_metadata(NodeKind::Image, image_str, trust_zone, meta))
371            } else {
372                None
373            };
374
375            for (step_idx, step) in job.steps.iter().enumerate() {
376                let default_name = format!("{job_name}[{step_idx}]");
377                let step_name = step.name.as_deref().unwrap_or(&default_name);
378
379                // Determine trust zone and create image node if `uses:` present
380                let (trust_zone, image_node_id) = if let Some(ref uses) = step.uses {
381                    let (zone, image_id) = classify_action(uses, &mut graph);
382                    (zone, Some(image_id))
383                } else if is_pull_request_target {
384                    // run: step in a pull_request_target workflow — may execute fork code
385                    (TrustZone::Untrusted, None)
386                } else {
387                    // Inline `run:` step — first party
388                    (TrustZone::FirstParty, None)
389                };
390
391                let step_id = graph.add_node(NodeKind::Step, step_name, trust_zone);
392
393                // Stamp parent job name so consumers (e.g. `taudit map --job`)
394                // can attribute steps back to their containing job. Also
395                // stamp the raw `run:` script body so script-aware rules
396                // (runtime_script_fetched_from_floating_url,
397                // untrusted_api_response_to_env_sink) can pattern-match on
398                // the actual command text the runner will execute.
399                if let Some(node) = graph.nodes.get_mut(step_id) {
400                    node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
401                    if let Some(runs_on) = job.runs_on.as_ref().and_then(yaml_value_compact) {
402                        node.metadata.insert(META_GHA_RUNS_ON.into(), runs_on);
403                    }
404                    let condition =
405                        combined_condition(job.if_cond.as_deref(), step.if_cond.as_deref());
406                    if let Some(condition) = condition {
407                        node.metadata.insert(META_CONDITION.into(), condition);
408                    }
409                    if let Some(ref uses) = step.uses {
410                        let action = uses.split('@').next().unwrap_or(uses);
411                        node.metadata.insert(META_GHA_ACTION.into(), action.into());
412                        if let Some(with) = step.with.as_ref() {
413                            let mut entries: Vec<(&String, &serde_yaml::Value)> =
414                                with.iter().collect();
415                            entries.sort_by(|a, b| a.0.cmp(b.0));
416                            let mut rendered = Vec::new();
417                            for (key, value) in entries {
418                                if let Some(scalar) = yaml_scalar_to_string(value) {
419                                    rendered.push(format!("{key}={scalar}"));
420                                }
421                            }
422                            if !rendered.is_empty() {
423                                node.metadata
424                                    .insert(META_GHA_WITH_INPUTS.into(), rendered.join("\n"));
425                            }
426                        }
427                    }
428                    if let Some(ref body) = step.run {
429                        if !body.is_empty() {
430                            node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
431                        }
432                    }
433                    // Fork-check stamping. A step inherits its job-level
434                    // `if:` (if any) plus its own `if:`. Either one carrying
435                    // the standard fork-check pattern is sufficient — both
436                    // forms guard the step from running on fork-PR contexts.
437                    let job_check = job
438                        .if_cond
439                        .as_deref()
440                        .map(is_fork_check_expression)
441                        .unwrap_or(false);
442                    let step_check = step
443                        .if_cond
444                        .as_deref()
445                        .map(is_fork_check_expression)
446                        .unwrap_or(false);
447                    if job_check || step_check {
448                        node.metadata.insert(META_FORK_CHECK.into(), "true".into());
449                    }
450                }
451
452                // Link step to action image
453                if let Some(img_id) = image_node_id {
454                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
455                }
456
457                // Composite action references (`uses: ./path`) are NOT inlined.
458                //
459                // Earlier versions walked the filesystem from `pipeline_file`'s
460                // parent looking for an `action.yml` to inline. That made the
461                // graph dependent on (a) whether `pipeline_file` was absolute
462                // or relative, (b) the binary's CWD, and (c) whether the
463                // consumer copied the YAML to a sandbox without the surrounding
464                // repo. Same input bytes, different graphs — a parser-purity
465                // violation.
466                //
467                // We now treat all `./local-action` references as Partial and
468                // record a Structural completeness gap. This matches the
469                // schema's additive-only semver discipline (findings only get
470                // MORE conservative). Downstream rules that care about the
471                // inlined sub-steps will simply not fire — preferred over
472                // CWD-dependent false confidence.
473                if let Some(ref uses) = step.uses {
474                    if uses.starts_with("./") {
475                        graph.mark_partial(
476                            GapKind::Structural,
477                            format!(
478                                "composite action not resolved (local action '{uses}' — taudit does not read filesystem)"
479                            ),
480                        );
481                    }
482                }
483
484                // Link step to job container — steps run inside the container's execution
485                // environment, so a floating container is a supply chain risk for every step.
486                if let Some(img_id) = container_image_id {
487                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
488                }
489
490                // Link step to GITHUB_TOKEN if it exists
491                if let Some(tok_id) = job_token_id {
492                    graph.add_edge(step_id, tok_id, EdgeKind::HasAccessTo);
493                }
494
495                // Cloud identity inference: detect known OIDC cloud auth actions and
496                // create an Identity node representing the assumed cloud identity.
497                let mut step_holds_oidc = false;
498                if let Some(ref uses) = step.uses {
499                    if let Some(cloud_id) =
500                        classify_cloud_auth(uses, step.with.as_ref(), &mut graph)
501                    {
502                        graph.add_edge(step_id, cloud_id, EdgeKind::HasAccessTo);
503                        step_holds_oidc = true;
504                    }
505                }
506                // The job's GITHUB_TOKEN itself can be OIDC-capable
507                // (`permissions: id-token: write`). When that's the case every
508                // step in the job inherits the OIDC scope.
509                if let Some(tok_id) = job_token_id {
510                    if let Some(tok_node) = graph.nodes.get(tok_id) {
511                        if tok_node.metadata.contains_key(META_OIDC) {
512                            step_holds_oidc = true;
513                        }
514                    }
515                }
516                if let Some(ref yaml_id) = step.id {
517                    step_oidc_by_yaml_id.insert(yaml_id.clone(), step_holds_oidc);
518                }
519
520                // Attestation action detection
521                if let Some(ref uses) = step.uses {
522                    let action = uses.split('@').next().unwrap_or(uses);
523                    if matches!(
524                        action,
525                        "actions/attest-build-provenance" | "sigstore/cosign-installer"
526                    ) {
527                        if let Some(node) = graph.nodes.get_mut(step_id) {
528                            node.metadata.insert(META_ATTESTS.into(), "true".into());
529                        }
530                    }
531                }
532
533                // actions/checkout detection. Tag unconditionally — downstream rules
534                // gate on trigger context (pull_request / pull_request_target) to
535                // decide whether the checkout is pulling untrusted fork code. Tagging
536                // here avoids trigger-ordering dependencies across jobs.
537                if let Some(ref uses) = step.uses {
538                    let action = uses.split('@').next().unwrap_or(uses);
539                    if action == "actions/checkout" {
540                        if let Some(node) = graph.nodes.get_mut(step_id) {
541                            node.metadata
542                                .insert(META_CHECKOUT_SELF.into(), "true".into());
543                            // Stamp the verbatim `with.ref` value (if any) so
544                            // taint rules (R6) can see whether dispatch input
545                            // flows into a checkout ref.
546                            if let Some(with) = step.with.as_ref() {
547                                if let Some(r) = with.get("ref").and_then(yaml_scalar_to_string) {
548                                    node.metadata.insert(META_CHECKOUT_REF.into(), r);
549                                }
550                            }
551                        }
552                    }
553                }
554
555                // Stamp the raw `run:` body so script-body rules (R6
556                // manual_dispatch_input_to_url_or_command) can pattern-match
557                // without needing a parser hook of their own. Mirrors the
558                // META_SCRIPT_BODY contract used by the ADO inline-script rules.
559                if let Some(ref run) = step.run {
560                    if !run.is_empty() {
561                        if let Some(node) = graph.nodes.get_mut(step_id) {
562                            node.metadata.insert(META_SCRIPT_BODY.into(), run.clone());
563                        }
564                    }
565                }
566
567                // Artifact-download detection. The known artifact-download
568                // actions are flagged structurally so downstream rules can
569                // correlate "download → interpret" pairs in the same job
570                // without re-walking the YAML.
571                if let Some(ref uses) = step.uses {
572                    let action = uses.split('@').next().unwrap_or(uses);
573                    if matches!(
574                        action,
575                        "actions/download-artifact" | "dawidd6/action-download-artifact"
576                    ) {
577                        if let Some(node) = graph.nodes.get_mut(step_id) {
578                            node.metadata
579                                .insert(META_DOWNLOADS_ARTIFACT.into(), "true".into());
580                        }
581                    }
582                }
583
584                // Artifact graph edges: upload → Produces, download → Consumes.
585                // These let artifact_boundary_crossing fire when an untrusted
586                // producer step hands off to a privileged consumer step.
587                if let Some(ref uses) = step.uses {
588                    let action = uses.split('@').next().unwrap_or(uses);
589                    if action == "actions/upload-artifact" {
590                        // Only create an artifact edge when `name:` is explicitly
591                        // set. Anonymous uploads (no name) can't be correlated with
592                        // a specific download and would silently merge unrelated
593                        // jobs — skip them to avoid false positives.
594                        if let Some(artifact_name) = step
595                            .with
596                            .as_ref()
597                            .and_then(|w| w.get("name"))
598                            .and_then(yaml_scalar_to_string)
599                        {
600                            // Artifact inherits the producer step's trust zone so
601                            // future rules checking the artifact node see the right
602                            // provenance (BUG-3 fix).
603                            let art_id = find_or_create_artifact(
604                                &mut graph,
605                                &mut artifact_ids,
606                                &artifact_name,
607                                trust_zone,
608                            );
609                            graph.add_edge(step_id, art_id, EdgeKind::Produces);
610                        }
611                    } else if matches!(
612                        action,
613                        "actions/download-artifact" | "dawidd6/action-download-artifact"
614                    ) {
615                        // Same rationale: omitting `name:` means "download all
616                        // artifacts" (wildcard), which we can't correlate to a
617                        // specific producer — skip to avoid incorrect Consumes
618                        // edges — skip to avoid incorrect Consumes edges.
619                        if let Some(artifact_name) = step
620                            .with
621                            .as_ref()
622                            .and_then(|w| w.get("name"))
623                            .and_then(yaml_scalar_to_string)
624                        {
625                            // If the upload step hasn't been seen yet, use Untrusted
626                            // as a conservative default. The zone will be correct when
627                            // the upload is processed first (the common cross-job flow).
628                            let art_id = find_or_create_artifact(
629                                &mut graph,
630                                &mut artifact_ids,
631                                &artifact_name,
632                                TrustZone::Untrusted,
633                            );
634                            graph.add_edge(art_id, step_id, EdgeKind::Consumes);
635                        }
636                    }
637                }
638
639                // Artifact-interpretation detection. A step that pipes a file
640                // into a privileged sink (`>> $GITHUB_ENV`/`>> $GITHUB_OUTPUT`,
641                // `eval`, `unzip`/`tar -x`, or `cat`/`jq`-with-redirect) is
642                // treated as an interpreter of any artifact downloaded earlier
643                // in the same job. Mirrors the existing GITHUB_ENV gate logic
644                // — broad substring match keeps the rule deterministic.
645                if let Some(ref run) = step.run {
646                    let interprets = run.contains("unzip ")
647                        || run.contains("unzip\n")
648                        || run.contains("tar -x")
649                        || run.contains("tar x")
650                        || run.contains(" eval ")
651                        || run.contains("\neval ")
652                        || run.starts_with("eval ")
653                        || run.contains(" cat ")
654                        || run.contains("\ncat ")
655                        || run.starts_with("cat ")
656                        || run.contains("jq ");
657                    if interprets {
658                        if let Some(node) = graph.nodes.get_mut(step_id) {
659                            node.metadata
660                                .insert(META_INTERPRETS_ARTIFACT.into(), "true".into());
661                        }
662                    }
663                }
664                // actions/github-script bodies that post comments back to PRs
665                // are also considered interpretation sinks — the `script:` body
666                // typically reads a downloaded file and posts its content.
667                if let Some(ref uses) = step.uses {
668                    let action = uses.split('@').next().unwrap_or(uses);
669                    if action == "actions/github-script" {
670                        if let Some(with) = step.with.as_ref() {
671                            if let Some(script) = with.get("script").and_then(yaml_scalar_to_string)
672                            {
673                                let posts_comment = script.contains("createComment")
674                                    || script.contains("updateComment")
675                                    || script.contains("createCommitComment")
676                                    || script.contains("createReview");
677                                let reads_file = script.contains("readFileSync")
678                                    || script.contains("readFile(")
679                                    || script.contains("require('fs')")
680                                    || script.contains("require(\"fs\")");
681                                if posts_comment && reads_file {
682                                    if let Some(node) = graph.nodes.get_mut(step_id) {
683                                        node.metadata
684                                            .insert(META_INTERPRETS_ARTIFACT.into(), "true".into());
685                                    }
686                                }
687                            }
688                        }
689                    }
690                }
691
692                // Build the EFFECTIVE per-step env map by merging workflow ⊕
693                // job ⊕ step (step wins, then job, then workflow). GHA semantics:
694                // a step-level `env: { K: literal }` SHADOWS the workflow- or
695                // job-level value of `K` for that step at runtime. If we add
696                // HasAccessTo edges from each scope independently, a literal
697                // shadow at the step level still leaves a phantom edge to the
698                // outer secret — a false positive. Merge first, then emit edges
699                // only for the effective values.
700                //
701                // If step.env is a template expression, we cannot statically
702                // know which keys it covers — mark Partial once and fall back
703                // to the workflow⊕job effective map (best-effort, but at least
704                // we record the gap).
705                //
706                // Iterate keys in sorted order so secret-node creation order
707                // is deterministic across runs (HashMap iteration is randomised
708                // per process; secret IDs leak that randomness into the JSON
709                // output otherwise).
710                let step_env_template = matches!(step.env.as_ref(), Some(EnvSpec::Template(_)));
711                if step_env_template {
712                    graph.mark_partial(
713                        GapKind::Expression,
714                        format!(
715                            "step '{step_name}' in job '{job_name}' env: uses template expression — environment variable shape unknown"
716                        ),
717                    );
718                }
719
720                let mut effective_env: HashMap<String, String> = HashMap::new();
721                if let Some(env_map) = workflow.env.as_ref().and_then(EnvSpec::as_map) {
722                    for (k, v) in env_map {
723                        effective_env.insert(k.clone(), v.clone());
724                    }
725                }
726                if let Some(env_map) = job.env.as_ref().and_then(EnvSpec::as_map) {
727                    for (k, v) in env_map {
728                        effective_env.insert(k.clone(), v.clone());
729                    }
730                }
731                if let Some(EnvSpec::Map(env_map)) = step.env.as_ref() {
732                    for (k, v) in env_map {
733                        effective_env.insert(k.clone(), v.clone());
734                    }
735                }
736
737                let mut effective_entries: Vec<(&String, &String)> = effective_env.iter().collect();
738                effective_entries.sort_by(|a, b| a.0.cmp(b.0));
739                if !effective_entries.is_empty() {
740                    let rendered_env: Vec<String> = effective_entries
741                        .iter()
742                        .map(|(k, v)| format!("{k}={v}"))
743                        .collect();
744                    if let Some(node) = graph.nodes.get_mut(step_id) {
745                        node.metadata
746                            .insert(META_GHA_ENV_ASSIGNMENTS.into(), rendered_env.join("\n"));
747                    }
748                }
749                for (_k, env_val) in effective_entries {
750                    // Walk every `secrets.X` reference inside the value's
751                    // template spans — concatenated multi-secret values
752                    // (`${{ secrets.A }}-${{ secrets.B }}`) yield BOTH names.
753                    for secret_name in iter_secret_refs(env_val) {
754                        let secret_id =
755                            find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
756                        graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
757                    }
758                }
759
760                // Process secrets from `with:` block, plus detect any
761                // `${{ env.X }}` reference. `env.X` does NOT produce a
762                // HasAccessTo edge (the value is sourced from the ambient
763                // runner environment, not directly from the secrets store)
764                // but it IS the consumer half of the env-gate laundering
765                // pattern that `secret_via_env_gate_to_untrusted_consumer`
766                // detects. Stamping META_READS_ENV here lets the rule run
767                // without re-walking the YAML.
768                //
769                // Sort keys so secret node creation order is deterministic
770                // across runs.
771                if let Some(ref with) = step.with {
772                    let mut reads_env = false;
773                    let mut entries: Vec<(&String, &serde_yaml::Value)> = with.iter().collect();
774                    entries.sort_by(|a, b| a.0.cmp(b.0));
775                    for (_k, val) in entries {
776                        // Multi-secret-aware: a single `with:` value may
777                        // concatenate several secrets (`${{ secrets.A }}-${{ secrets.B }}`).
778                        for scalar in yaml_scalar_strings(val) {
779                            for secret_name in iter_secret_refs(&scalar) {
780                                let secret_id =
781                                    find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
782                                graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
783                            }
784                            if is_env_reference(&scalar) {
785                                reads_env = true;
786                            }
787                        }
788                    }
789                    if reads_env {
790                        if let Some(node) = graph.nodes.get_mut(step_id) {
791                            node.metadata.insert(META_READS_ENV.into(), "true".into());
792                        }
793                    }
794                }
795
796                // Stamp the raw `run:` body as META_SCRIPT_BODY so script-aware
797                // rules (script_injection_via_untrusted_context, gh_cli_with_default_token_escalating, …)
798                // can pattern-match against it without re-parsing the YAML.
799                if let Some(ref run) = step.run {
800                    if !run.is_empty() {
801                        if let Some(node) = graph.nodes.get_mut(step_id) {
802                            node.metadata.insert(META_SCRIPT_BODY.into(), run.clone());
803                        }
804                    }
805                }
806
807                // For `actions/github-script`, the JS body lives in `with.script:`.
808                // Stamp it as META_SCRIPT_BODY too — the same script-injection
809                // patterns apply (interpolation of github.event.* into JS code).
810                if let Some(ref uses) = step.uses {
811                    let action = uses.split('@').next().unwrap_or(uses);
812                    if action == "actions/github-script" {
813                        if let Some(with) = step.with.as_ref() {
814                            if let Some(script) = with.get("script").and_then(yaml_scalar_to_string)
815                            {
816                                if !script.is_empty() {
817                                    if let Some(node) = graph.nodes.get_mut(step_id) {
818                                        node.metadata.insert(META_SCRIPT_BODY.into(), script);
819                                    }
820                                }
821                            }
822                        }
823                    }
824                }
825
826                // Interactive debug actions (tmate / upterm) — stamp the action ref so
827                // `interactive_debug_action_in_authority_workflow` can flag it without
828                // re-walking the steps. Match by action prefix (any version).
829                if let Some(ref uses) = step.uses {
830                    let action = uses.split('@').next().unwrap_or(uses);
831                    let is_debug = matches!(
832                        action,
833                        "mxschmitt/action-tmate"
834                            | "lhotari/action-upterm"
835                            | "actions/tmate"
836                            | "owenthereal/action-upterm"
837                            | "csexton/debugger-action"
838                    );
839                    if is_debug {
840                        if let Some(node) = graph.nodes.get_mut(step_id) {
841                            node.metadata
842                                .insert(META_INTERACTIVE_DEBUG.into(), uses.clone());
843                        }
844                    }
845                }
846
847                // `actions/cache` — stamp the `key:` input so the cache-poisoning
848                // rule can pattern-match against PR-derived expressions
849                // (github.head_ref / event.pull_request.head.ref / actor).
850                // Covers the top-level action and the save/restore variants.
851                if let Some(ref uses) = step.uses {
852                    let action = uses.split('@').next().unwrap_or(uses);
853                    let is_cache = matches!(
854                        action,
855                        "actions/cache" | "actions/cache/save" | "actions/cache/restore"
856                    );
857                    if is_cache {
858                        if let Some(with) = step.with.as_ref() {
859                            if let Some(key) = with.get("key").and_then(yaml_scalar_to_string) {
860                                if !key.is_empty() {
861                                    if let Some(node) = graph.nodes.get_mut(step_id) {
862                                        node.metadata.insert(META_CACHE_KEY.into(), key);
863                                    }
864                                }
865                            }
866                        }
867                    }
868                }
869
870                // Detect inferred secrets in `run:` script blocks. Only counts
871                // `secrets.X` references that appear INSIDE a `${{ … }}` template
872                // span — literal substrings in shell paths or comments
873                // (`# loads /etc/secrets.conf`, `cp $SECRETS_DIR/secrets.json`)
874                // do not produce phantom Secret nodes.
875                if let Some(ref run) = step.run {
876                    // Collect names first to avoid borrowing `run` while we
877                    // mutate `graph`, and to dedupe per-step (a single run
878                    // body that mentions `secrets.X` 5× still needs only one
879                    // HasAccessTo edge).
880                    let mut seen: std::collections::BTreeSet<&str> =
881                        std::collections::BTreeSet::new();
882                    for name in iter_secret_refs(run) {
883                        seen.insert(name);
884                    }
885                    for secret_name in seen {
886                        let secret_id =
887                            find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
888                        // Mark as inferred — not precisely mapped.
889                        if let Some(node) = graph.nodes.get_mut(secret_id) {
890                            node.metadata
891                                .insert(META_INFERRED.into(), META_INFERRED_VAL.into());
892                        }
893                        graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
894                        graph.mark_partial(
895                            GapKind::Expression,
896                            format!(
897                                "secret '{secret_name}' referenced in run: script — inferred, not precisely mapped"
898                            ),
899                        );
900                    }
901                }
902
903                // Detect writes to the GHA environment gate.
904                // Broad detection: presence of GITHUB_ENV or GITHUB_PATH in a run script
905                // covers every redirect form (`>> $GITHUB_ENV`, `>> "$GITHUB_ENV"`,
906                // `>> ${GITHUB_ENV}`, `tee -a $GITHUB_PATH`, etc.) without brittle
907                // multi-variant string matching. Reading these vars without writing is
908                // extremely rare in practice, making this an acceptable tradeoff for
909                // completeness.
910                if let Some(ref run) = step.run {
911                    let writes_gate = run.contains("GITHUB_ENV") || run.contains("GITHUB_PATH");
912                    if writes_gate {
913                        if let Some(node) = graph.nodes.get_mut(step_id) {
914                            node.metadata
915                                .insert(META_WRITES_ENV_GATE.into(), "true".into());
916                        }
917                    }
918                    // `${{ env.X }}` references inside a run: body — same
919                    // consumer signal as the with: detection above. A run
920                    // step that interpolates env via the template engine
921                    // is reading from the runner-managed env table just
922                    // like a uses: action would.
923                    if is_env_reference(run) {
924                        if let Some(node) = graph.nodes.get_mut(step_id) {
925                            node.metadata.insert(META_READS_ENV.into(), "true".into());
926                        }
927                    }
928                }
929            }
930
931            // ── Job outputs (`jobs.<id>.outputs.<name>: <expression>`) ──────
932            // Classify each output value by source so R4
933            // (sensitive_value_in_job_output) can fire on credentials whose
934            // values land in the unmasked `needs.<job>.outputs.*` channel.
935            if let Some(outputs) = job.outputs.as_ref() {
936                // Sort by output name so META_JOB_OUTPUTS is byte-deterministic
937                // across runs. `outputs` is a HashMap (randomised iteration);
938                // mirror the v1.1.0-beta.1 pattern used elsewhere.
939                let mut output_entries: Vec<(&String, &String)> = outputs.iter().collect();
940                output_entries.sort_by(|a, b| a.0.cmp(b.0));
941                for (out_name, out_value) in output_entries {
942                    let source = classify_job_output_source(out_value, &step_oidc_by_yaml_id);
943                    job_output_records.push(format!("{job_name}\t{out_name}\t{source}"));
944                }
945            }
946        }
947
948        if !job_output_records.is_empty() {
949            graph
950                .metadata
951                .insert(META_JOB_OUTPUTS.into(), job_output_records.join("|"));
952        }
953
954        // Cross-platform misclassification trap (red-team R2 #5): a YAML file
955        // wrapping ADO/GitLab content in a `jobs:` mapping deserializes here
956        // without errors but yields no recognisable Step nodes. Marking
957        // Partial surfaces the gap rather than silently returning a clean
958        // graph with completeness=complete (which a CI gate would treat as
959        // "passed").
960        let step_count = graph
961            .nodes
962            .iter()
963            .filter(|n| n.kind == NodeKind::Step)
964            .count();
965        if step_count == 0 && !workflow.jobs.is_empty() {
966            graph.mark_partial(
967                GapKind::Structural,
968                "jobs: parsed but produced 0 step nodes — possible non-GHA YAML wrong-platform-classified".to_string(),
969            );
970        }
971
972        graph.stamp_edge_authority_summaries();
973        Ok(graph)
974    }
975}
976
977/// Classify a `jobs.<id>.outputs.<name>` value by its highest-risk source.
978/// Order of precedence: `secret` > `oidc` > `step_output` > `literal`. Strict
979/// substring scanning — covers every quoting variant GHA accepts because the
980/// expression body always contains `secrets.X` or `steps.X.outputs.Y`
981/// verbatim regardless of whitespace inside `${{ … }}`.
982fn classify_job_output_source(
983    value: &str,
984    step_oidc_by_yaml_id: &HashMap<String, bool>,
985) -> &'static str {
986    if value.contains("secrets.") {
987        return "secret";
988    }
989    // Look for `steps.<id>.outputs.` and check each referenced step's OIDC bit.
990    let mut cursor = 0;
991    let mut saw_step_output = false;
992    while let Some(rel) = value[cursor..].find("steps.") {
993        let abs = cursor + rel + "steps.".len();
994        let rest = &value[abs..];
995        // Step id terminates at `.` (we expect `.outputs.` to follow).
996        let id_end = rest
997            .find(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
998            .unwrap_or(rest.len());
999        let step_yaml_id = &rest[..id_end];
1000        if !step_yaml_id.is_empty() && rest[id_end..].starts_with(".outputs.") {
1001            saw_step_output = true;
1002            if step_oidc_by_yaml_id
1003                .get(step_yaml_id)
1004                .copied()
1005                .unwrap_or(false)
1006            {
1007                return "oidc";
1008            }
1009        }
1010        cursor = abs + id_end;
1011    }
1012    if saw_step_output {
1013        "step_output"
1014    } else {
1015        "literal"
1016    }
1017}
1018
1019/// Returns true if the workflow's `on:` triggers include `pull_request_target`.
1020/// GHA `on:` is polymorphic: string, sequence, or mapping.
1021/// Returns true when a GHA `if:` expression matches the standard fork-check
1022/// pattern: `github.event.pull_request.head.repo.fork == false` (or the
1023/// negated `!= true`), or the equivalent
1024/// `github.event.pull_request.head.repo.full_name == github.repository`.
1025/// Whitespace is normalised before matching so the canonical Grafana form
1026/// (`if: github.event.pull_request.head.repo.full_name == github.repository`)
1027/// is detected alongside the more terse `repo.fork == false` variant.
1028///
1029/// The check is conservative — it requires the canonical predicate on the
1030/// raw expression. Wrapping the predicate inside a larger boolean
1031/// expression that ANDs additional clauses (e.g. `&& github.actor != ...`)
1032/// is still detected because the substring match on the canonical form is
1033/// preserved. ORing it away (`|| true`) would defeat the check, but that
1034/// pattern is not seen in practice and would itself be a code-review red
1035/// flag.
1036pub fn is_fork_check_expression(expr: &str) -> bool {
1037    let normalised: String = expr.split_whitespace().collect::<Vec<_>>().join(" ");
1038    let lower = normalised.to_lowercase();
1039    // `repo.fork == false` (and the negated `!= true`)
1040    if lower.contains("github.event.pull_request.head.repo.fork == false")
1041        || lower.contains("github.event.pull_request.head.repo.fork != true")
1042    {
1043        return true;
1044    }
1045    // `head.repo.full_name == github.repository` — Grafana canonical form.
1046    // Tolerate either ordering of the equality operands.
1047    if lower.contains("github.event.pull_request.head.repo.full_name == github.repository")
1048        || lower.contains("github.repository == github.event.pull_request.head.repo.full_name")
1049    {
1050        return true;
1051    }
1052    false
1053}
1054
1055fn trigger_has_pull_request_target(triggers: &serde_yaml::Value) -> bool {
1056    collect_trigger_names(Some(triggers))
1057        .iter()
1058        .any(|t| t == "pull_request_target")
1059}
1060
1061/// Collects every trigger name from a workflow's `on:` field. Returns the
1062/// canonical event tokens (`pull_request`, `pull_request_target`,
1063/// `workflow_run`, `issue_comment`, `push`, etc.) in source order, deduped.
1064fn collect_trigger_names(triggers: Option<&serde_yaml::Value>) -> Vec<String> {
1065    let mut out: Vec<String> = Vec::new();
1066    let mut push_unique = |s: &str| {
1067        if !s.is_empty() && !out.iter().any(|e| e == s) {
1068            out.push(s.to_string());
1069        }
1070    };
1071    let Some(val) = triggers else {
1072        return out;
1073    };
1074    match val {
1075        serde_yaml::Value::String(s) => push_unique(s),
1076        serde_yaml::Value::Sequence(seq) => {
1077            for v in seq {
1078                if let Some(s) = v.as_str() {
1079                    push_unique(s);
1080                }
1081            }
1082        }
1083        serde_yaml::Value::Mapping(map) => {
1084            for (k, _) in map {
1085                if let Some(s) = k.as_str() {
1086                    push_unique(s);
1087                }
1088            }
1089        }
1090        _ => {}
1091    }
1092    out
1093}
1094
1095/// Extract the list of `workflow_dispatch.inputs.<name>` keys declared by a
1096/// workflow. Returns an empty Vec if `on:` is not a mapping, has no
1097/// `workflow_dispatch` entry, or the entry has no `inputs:` mapping.
1098fn collect_dispatch_inputs(triggers: &serde_yaml::Value) -> Vec<String> {
1099    let map = match triggers {
1100        serde_yaml::Value::Mapping(m) => m,
1101        _ => return Vec::new(),
1102    };
1103    let dispatch = match map
1104        .iter()
1105        .find(|(k, _)| k.as_str() == Some("workflow_dispatch"))
1106    {
1107        Some((_, v)) => v,
1108        None => return Vec::new(),
1109    };
1110    let inputs = match dispatch.get("inputs").and_then(|v| v.as_mapping()) {
1111        Some(m) => m,
1112        None => return Vec::new(),
1113    };
1114    inputs
1115        .iter()
1116        .filter_map(|(k, _)| k.as_str().map(str::to_string))
1117        .collect()
1118}
1119
1120/// Extract the list of `workflow_call.inputs.<name>` keys declared by a
1121/// reusable workflow. Returns an empty Vec if `on:` is not a mapping, has no
1122/// `workflow_call` trigger, or the trigger has no `inputs:` mapping.
1123fn collect_workflow_call_inputs(triggers: &serde_yaml::Value) -> Vec<String> {
1124    let map = match triggers {
1125        serde_yaml::Value::Mapping(m) => m,
1126        _ => return Vec::new(),
1127    };
1128    let call = match map
1129        .iter()
1130        .find(|(k, _)| k.as_str() == Some("workflow_call"))
1131    {
1132        Some((_, v)) => v,
1133        None => return Vec::new(),
1134    };
1135    let inputs = match call.get("inputs").and_then(|v| v.as_mapping()) {
1136        Some(m) => m,
1137        None => return Vec::new(),
1138    };
1139    inputs
1140        .iter()
1141        .filter_map(|(k, _)| k.as_str().map(str::to_string))
1142        .collect()
1143}
1144
1145/// Returns true if `runs-on` names a self-hosted runner.
1146///
1147/// GHA `runs-on` is polymorphic: a string (`ubuntu-latest`, `self-hosted`), a
1148/// sequence (`[self-hosted, linux, x64]`), or — for group selection — a mapping
1149/// (`{ group: my-group, labels: [...] }`). Any form that contains `self-hosted`
1150/// (as a string, sequence entry, or label entry) is considered self-hosted.
1151/// Explicit `group:` without `self-hosted` is also self-hosted by construction.
1152fn is_self_hosted_runner(runs_on: Option<&serde_yaml::Value>) -> bool {
1153    const SH: &str = "self-hosted";
1154    let Some(val) = runs_on else {
1155        return false;
1156    };
1157    match val {
1158        serde_yaml::Value::String(s) => s == SH,
1159        serde_yaml::Value::Sequence(seq) => seq
1160            .iter()
1161            .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
1162        serde_yaml::Value::Mapping(map) => {
1163            if map.contains_key("group") {
1164                return true;
1165            }
1166            if let Some(labels) = map.get("labels") {
1167                match labels {
1168                    serde_yaml::Value::String(s) => s == SH,
1169                    serde_yaml::Value::Sequence(seq) => seq
1170                        .iter()
1171                        .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
1172                    _ => false,
1173                }
1174            } else {
1175                false
1176            }
1177        }
1178        _ => false,
1179    }
1180}
1181
1182/// Extract a human-readable label from a `runs-on` value for naming the Image
1183/// node. Prefers the first non-`self-hosted` label in a sequence (more specific),
1184/// falls back to the string value or "self-hosted".
1185fn runner_label(runs_on: Option<&serde_yaml::Value>) -> Option<&str> {
1186    let val = runs_on?;
1187    match val {
1188        serde_yaml::Value::String(s) => Some(s.as_str()),
1189        serde_yaml::Value::Sequence(seq) => {
1190            for v in seq {
1191                if let Some(s) = v.as_str() {
1192                    if s != "self-hosted" {
1193                        return Some(s);
1194                    }
1195                }
1196            }
1197            seq.first().and_then(|v| v.as_str())
1198        }
1199        serde_yaml::Value::Mapping(map) => map.get("group").and_then(|v| v.as_str()),
1200        _ => None,
1201    }
1202}
1203
1204/// Classify a `uses:` reference into trust zone and create image node.
1205fn classify_action(uses: &str, graph: &mut AuthorityGraph) -> (TrustZone, NodeId) {
1206    let semantically_pinned = is_pin_semantically_valid(uses);
1207    let is_local = uses.starts_with("./");
1208
1209    let zone = if is_local {
1210        TrustZone::FirstParty
1211    } else if semantically_pinned {
1212        TrustZone::ThirdParty
1213    } else {
1214        TrustZone::Untrusted
1215    };
1216
1217    let mut meta = HashMap::new();
1218    // Record digest metadata if structurally pinned (even if semantically
1219    // invalid — the SHA is still useful for diagnostics/display).
1220    if is_sha_pinned(uses) {
1221        if let Some(sha) = uses.split('@').next_back() {
1222            meta.insert(META_DIGEST.into(), sha.into());
1223        }
1224    }
1225
1226    let id = graph.add_node_with_metadata(NodeKind::Image, uses, zone, meta);
1227    (zone, id)
1228}
1229
1230/// Yields every `secrets.<name>` reference found INSIDE any `${{ … }}` template
1231/// span in the input. Whitespace-tolerant (handles `${{secrets.X}}`,
1232/// `${{ secrets.X }}`, `${{   secrets.X   }}`, tabs, newlines). Handles
1233/// concatenated multi-secret values (`${{ secrets.A }}-${{ secrets.B }}` yields
1234/// both `A` and `B`). Does NOT match literal `secrets.X` substrings outside
1235/// template spans (shell paths, comments, JSON file names like `secrets.json`).
1236///
1237/// UTF-8-aware: uses `char_indices`, never byte arithmetic into the middle of a
1238/// multi-byte sequence. Zero regex — keeps the parser ReDoS-free.
1239///
1240/// Implementation: scan for `${{` opens, find the matching `}}` close (or end
1241/// of string if unterminated), then scan only the inner span for `secrets.`
1242/// followed by an identifier (`[A-Za-z0-9_]+`). The identifier terminates at
1243/// the first non-identifier char, which catches `secrets.A }}-${{ secrets.B`,
1244/// `secrets.A || secrets.B`, etc.
1245fn iter_secret_refs(s: &str) -> impl Iterator<Item = &str> {
1246    SecretRefIter {
1247        src: s,
1248        cursor: 0,
1249        // When inside a template span, this is `Some(end_byte_offset)`.
1250        // When outside, this is `None`.
1251        span_end: None,
1252    }
1253}
1254
1255struct SecretRefIter<'a> {
1256    src: &'a str,
1257    cursor: usize,
1258    span_end: Option<usize>,
1259}
1260
1261impl<'a> Iterator for SecretRefIter<'a> {
1262    type Item = &'a str;
1263
1264    fn next(&mut self) -> Option<&'a str> {
1265        loop {
1266            // If we're not inside a template span, find the next one.
1267            if self.span_end.is_none() {
1268                let rel = self.src.get(self.cursor..)?.find("${{")?;
1269                let span_start = self.cursor + rel + 3; // skip "${{"
1270                                                        // Locate the matching "}}" so we only scan WITHIN this template.
1271                                                        // GHA does not nest `${{`, so a flat search is correct.
1272                let inner = &self.src[span_start..];
1273                let span_len = inner.find("}}").unwrap_or(inner.len());
1274                self.cursor = span_start;
1275                self.span_end = Some(span_start + span_len);
1276            }
1277            let span_end = self.span_end.expect("span_end set just above");
1278
1279            if self.cursor >= span_end {
1280                // Done with this span — advance past `}}` (2 bytes) and resume.
1281                self.cursor = span_end.saturating_add(2).min(self.src.len());
1282                self.span_end = None;
1283                continue;
1284            }
1285            let window = &self.src[self.cursor..span_end];
1286            let Some(rel) = window.find("secrets.") else {
1287                self.cursor = span_end.saturating_add(2).min(self.src.len());
1288                self.span_end = None;
1289                continue;
1290            };
1291            let name_start = self.cursor + rel + "secrets.".len();
1292            // Identifier terminates at first non-[A-Za-z0-9_] char (or span end).
1293            let tail = &self.src[name_start..span_end];
1294            let name_len = tail
1295                .char_indices()
1296                .find(|(_, c)| !c.is_ascii_alphanumeric() && *c != '_')
1297                .map(|(i, _)| i)
1298                .unwrap_or(tail.len());
1299            // Advance cursor past this identifier so the next call resumes
1300            // after it (lets us find a second secret in the same span).
1301            self.cursor = name_start + name_len;
1302            if name_len == 0 {
1303                // `secrets.` followed by no identifier — skip and continue.
1304                continue;
1305            }
1306            return Some(&self.src[name_start..name_start + name_len]);
1307        }
1308    }
1309}
1310
1311/// True for any `${{ env.<NAME> }}` template expression. Covers the
1312/// canonical $GITHUB_ENV laundering consumer pattern (a step reads
1313/// `env.CLOUD_KEY` after a previous step wrote `CLOUD_KEY=$secret` to
1314/// `$GITHUB_ENV`) without conflating with ordinary first-party `env:`
1315/// declarations on the consuming step itself. We tolerate the lenient
1316/// whitespace forms GHA accepts (`${{env.X}}`, `${{   env.X   }}`).
1317fn is_env_reference(val: &str) -> bool {
1318    // Cheap fast path — bail before substring scan if the marker isn't
1319    // present at all. The `env.` substring on its own is too noisy
1320    // (matches `step.env.X`, `inputs.env_var`), so we anchor on the
1321    // GHA template open-brace plus any whitespace.
1322    if !val.contains("${{") {
1323        return false;
1324    }
1325    // Strip whitespace around any template-open and look for the literal
1326    // token sequence `env.`. This catches `${{env.X}}`, `${{ env.X }}`,
1327    // and `${{    env.X    }}` while rejecting `${{ steps.x.env.foo }}`.
1328    let mut idx = 0;
1329    while let Some(rel) = val[idx..].find("${{") {
1330        let after = &val[idx + rel + 3..];
1331        let trimmed = after.trim_start();
1332        if trimmed.starts_with("env.") {
1333            return true;
1334        }
1335        idx += rel + 3;
1336    }
1337    false
1338}
1339
1340fn find_or_create_secret(
1341    graph: &mut AuthorityGraph,
1342    cache: &mut HashMap<String, NodeId>,
1343    name: &str,
1344) -> NodeId {
1345    if let Some(&id) = cache.get(name) {
1346        return id;
1347    }
1348    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1349    cache.insert(name.to_string(), id);
1350    id
1351}
1352
1353fn find_or_create_artifact(
1354    graph: &mut AuthorityGraph,
1355    cache: &mut HashMap<String, NodeId>,
1356    name: &str,
1357    zone: TrustZone,
1358) -> NodeId {
1359    if let Some(&id) = cache.get(name) {
1360        return id;
1361    }
1362    let id = graph.add_node(NodeKind::Artifact, name, zone);
1363    cache.insert(name.to_string(), id);
1364    id
1365}
1366
1367/// Detect known OIDC cloud authentication actions and create an Identity node
1368/// representing the cloud identity that will be assumed.
1369///
1370/// Only handles the OIDC/federated path — static credential inputs (e.g.
1371/// `aws-secret-access-key: ${{ secrets.X }}`) are already captured by the
1372/// regular `with:` secret scanning and don't need a separate Identity node.
1373///
1374/// Returns `Some(NodeId)` of the created Identity, or `None` if not recognized.
1375fn classify_cloud_auth(
1376    uses: &str,
1377    with: Option<&HashMap<String, serde_yaml::Value>>,
1378    graph: &mut AuthorityGraph,
1379) -> Option<NodeId> {
1380    // Strip `@version` — match any version of the action
1381    let action = uses.split('@').next().unwrap_or(uses);
1382
1383    match action {
1384        "aws-actions/configure-aws-credentials" => {
1385            // OIDC path: role-to-assume present (no static access key needed)
1386            let w = with?;
1387            let role = w.get("role-to-assume").and_then(yaml_scalar_to_string)?;
1388            // ARN format: arn:aws:iam::123456789012:role/my-role
1389            // Split on '/' to get the role name; fall back to the full value.
1390            let short = role.split('/').next_back().unwrap_or(role.as_str());
1391            let mut meta = HashMap::new();
1392            meta.insert(META_OIDC.into(), "true".into());
1393            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1394            meta.insert(META_PERMISSIONS.into(), "AWS role assumption (OIDC)".into());
1395            Some(graph.add_node_with_metadata(
1396                NodeKind::Identity,
1397                format!("AWS/{short}"),
1398                TrustZone::FirstParty,
1399                meta,
1400            ))
1401        }
1402        "google-github-actions/auth" => {
1403            // OIDC path: workload_identity_provider present
1404            let w = with?;
1405            let provider = w
1406                .get("workload_identity_provider")
1407                .and_then(yaml_scalar_to_string)?;
1408            let short = provider.split('/').next_back().unwrap_or(provider.as_str());
1409            let mut meta = HashMap::new();
1410            meta.insert(META_OIDC.into(), "true".into());
1411            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1412            meta.insert(
1413                META_PERMISSIONS.into(),
1414                "GCP workload identity federation".into(),
1415            );
1416            Some(graph.add_node_with_metadata(
1417                NodeKind::Identity,
1418                format!("GCP/{short}"),
1419                TrustZone::FirstParty,
1420                meta,
1421            ))
1422        }
1423        "azure/login" => {
1424            // OIDC path: client-id present without client-secret
1425            let w = with?;
1426            let client_id = w.get("client-id").and_then(yaml_scalar_to_string)?;
1427            // Only treat as OIDC if no static client-secret is provided
1428            if w.contains_key("client-secret") {
1429                return None; // static SP creds captured by with: secret scanning
1430            }
1431            let mut meta = HashMap::new();
1432            meta.insert(META_OIDC.into(), "true".into());
1433            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1434            meta.insert(
1435                META_PERMISSIONS.into(),
1436                "Azure federated credential (OIDC)".into(),
1437            );
1438            Some(graph.add_node_with_metadata(
1439                NodeKind::Identity,
1440                format!("Azure/{client_id}"),
1441                TrustZone::FirstParty,
1442                meta,
1443            ))
1444        }
1445        _ => None,
1446    }
1447}
1448
1449// ── Serde models for GHA YAML ──────────────────────────
1450
1451/// Flexible permissions: can be a string ("write-all") or a map.
1452///
1453/// The map variant uses `BTreeMap` (not `HashMap`) so the rendered
1454/// `Display` output (`{ contents: read, id-token: write }`) is sorted by
1455/// scope name and byte-deterministic across runs. `META_PERMISSIONS` is
1456/// emitted into JSON / SARIF / `taudit map` text directly, and a HashMap's
1457/// randomised iteration order otherwise leaks into every artifact. The
1458/// substring check at the workflow- and job-permissions emission sites
1459/// (`perm_string.contains("id-token: write")`) still works — BTreeMap
1460/// produces the same `key: value` shape, just sorted.
1461#[derive(Debug, Clone, Deserialize)]
1462#[serde(untagged)]
1463pub enum Permissions {
1464    String(String),
1465    Map(BTreeMap<String, String>),
1466}
1467
1468/// Polymorphic `env:` block. Normally a map of name → value, but in some
1469/// real-world workflows the entire `env:` value is a template expression
1470/// (e.g. `env: ${{ matrix }}`), where the shape resolves at runtime.
1471///
1472/// When the value is a template string, downstream code must mark the graph
1473/// Partial — environment variable shape is unknown to static analysis.
1474///
1475/// The map variant uses a custom deserializer (`deserialize_env_map`) that
1476/// stringifies scalar values. GHA accepts non-string scalars in env values
1477/// (`COVERAGE: false`, `RUST_BACKTRACE: 1`, `TARGET_FLAGS:` (null)); a strict
1478/// `HashMap<String, String>` rejects them and breaks 200+ real-world workflows.
1479#[derive(Debug, Clone, Deserialize)]
1480#[serde(untagged)]
1481pub enum EnvSpec {
1482    #[serde(deserialize_with = "deserialize_env_map")]
1483    Map(HashMap<String, String>),
1484    Template(String),
1485}
1486
1487/// Deserialize a GHA `env:` map, stringifying scalar values so that
1488/// non-string scalars (booleans, numbers, null, YAML anchors resolving
1489/// to scalars) round-trip into `HashMap<String, String>`.
1490///
1491/// Rejects nested mappings/sequences — those would indicate the value
1492/// is not a real env value and we should fall through to the `Template`
1493/// variant or fail loudly. Null values become the empty string, matching
1494/// how GHA itself surfaces an unset env var.
1495fn deserialize_env_map<'de, D>(deserializer: D) -> Result<HashMap<String, String>, D::Error>
1496where
1497    D: serde::Deserializer<'de>,
1498{
1499    use serde::de::Error;
1500    let raw: HashMap<String, serde_yaml::Value> = HashMap::deserialize(deserializer)?;
1501    let mut out = HashMap::with_capacity(raw.len());
1502    for (k, v) in raw {
1503        let s = match v {
1504            serde_yaml::Value::String(s) => s,
1505            serde_yaml::Value::Bool(b) => b.to_string(),
1506            serde_yaml::Value::Number(n) => n.to_string(),
1507            serde_yaml::Value::Null => String::new(),
1508            // Mappings / sequences in env values are not legal GHA — but
1509            // rather than crash the whole workflow, fail this variant so
1510            // the untagged enum can try `Template` next.
1511            other => {
1512                return Err(D::Error::custom(format!(
1513                    "env value for `{k}` is not a scalar: {other:?}"
1514                )))
1515            }
1516        };
1517        out.insert(k, s);
1518    }
1519    Ok(out)
1520}
1521
1522fn yaml_scalar_to_string(value: &serde_yaml::Value) -> Option<String> {
1523    match value {
1524        serde_yaml::Value::String(s) => Some(s.clone()),
1525        serde_yaml::Value::Bool(b) => Some(b.to_string()),
1526        serde_yaml::Value::Number(n) => Some(n.to_string()),
1527        serde_yaml::Value::Null => Some(String::new()),
1528        _ => None,
1529    }
1530}
1531
1532fn yaml_value_compact(value: &serde_yaml::Value) -> Option<String> {
1533    match value {
1534        serde_yaml::Value::Sequence(seq) => {
1535            let parts: Vec<String> = seq.iter().filter_map(yaml_scalar_to_string).collect();
1536            if parts.is_empty() {
1537                None
1538            } else {
1539                Some(parts.join(","))
1540            }
1541        }
1542        serde_yaml::Value::Mapping(map) => {
1543            let mut parts: Vec<String> = map
1544                .iter()
1545                .filter_map(|(k, v)| {
1546                    Some(format!(
1547                        "{}={}",
1548                        yaml_scalar_to_string(k)?,
1549                        yaml_value_compact(v)?
1550                    ))
1551                })
1552                .collect();
1553            parts.sort();
1554            if parts.is_empty() {
1555                None
1556            } else {
1557                Some(parts.join(","))
1558            }
1559        }
1560        scalar => yaml_scalar_to_string(scalar),
1561    }
1562}
1563
1564fn combined_condition(job_if: Option<&str>, step_if: Option<&str>) -> Option<String> {
1565    match (job_if, step_if) {
1566        (Some(job), Some(step)) if !job.is_empty() && !step.is_empty() => {
1567            Some(format!("{job} AND {step}"))
1568        }
1569        (Some(job), _) if !job.is_empty() => Some(job.to_string()),
1570        (_, Some(step)) if !step.is_empty() => Some(step.to_string()),
1571        _ => None,
1572    }
1573}
1574
1575fn yaml_scalar_strings(value: &serde_yaml::Value) -> Vec<String> {
1576    match value {
1577        serde_yaml::Value::Sequence(seq) => seq.iter().filter_map(yaml_scalar_to_string).collect(),
1578        serde_yaml::Value::Mapping(map) => map.values().filter_map(yaml_scalar_to_string).collect(),
1579        scalar => yaml_scalar_to_string(scalar).into_iter().collect(),
1580    }
1581}
1582
1583impl EnvSpec {
1584    /// Returns the env map if statically known, or `None` if it is a template
1585    /// expression whose shape resolves at runtime.
1586    pub fn as_map(&self) -> Option<&HashMap<String, String>> {
1587        match self {
1588            EnvSpec::Map(m) => Some(m),
1589            EnvSpec::Template(_) => None,
1590        }
1591    }
1592
1593    /// Returns the raw template expression, if this `env:` is a template.
1594    pub fn as_template(&self) -> Option<&str> {
1595        match self {
1596            EnvSpec::Template(s) => Some(s.as_str()),
1597            EnvSpec::Map(_) => None,
1598        }
1599    }
1600}
1601
1602impl std::fmt::Display for Permissions {
1603    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1604        match self {
1605            Permissions::String(s) => write!(f, "{s}"),
1606            Permissions::Map(m) => {
1607                let parts: Vec<String> = m.iter().map(|(k, v)| format!("{k}: {v}")).collect();
1608                write!(f, "{{ {} }}", parts.join(", "))
1609            }
1610        }
1611    }
1612}
1613
1614#[derive(Debug, Deserialize)]
1615pub struct GhaWorkflow {
1616    /// Workflow trigger(s). Polymorphic: string, sequence, or mapping.
1617    #[serde(rename = "on", default)]
1618    pub triggers: Option<serde_yaml::Value>,
1619    #[serde(default)]
1620    pub permissions: Option<Permissions>,
1621    /// Workflow-level env vars, inherited by all jobs and steps.
1622    /// Polymorphic: typically a map, but can be a template expression
1623    /// (e.g. `env: ${{ matrix }}`) whose shape is unknown statically.
1624    #[serde(default)]
1625    pub env: Option<EnvSpec>,
1626    #[serde(default)]
1627    pub jobs: HashMap<String, GhaJob>,
1628}
1629
1630/// Job-level container config. Polymorphic: string image or map with `image:` key.
1631#[derive(Debug, Deserialize)]
1632#[serde(untagged)]
1633pub enum ContainerConfig {
1634    Image(String),
1635    Full {
1636        image: String,
1637        #[serde(default)]
1638        options: Option<String>,
1639    },
1640}
1641
1642impl ContainerConfig {
1643    pub fn image(&self) -> &str {
1644        match self {
1645            ContainerConfig::Image(s) => s,
1646            ContainerConfig::Full { image, .. } => image,
1647        }
1648    }
1649
1650    pub fn options(&self) -> Option<&str> {
1651        match self {
1652            ContainerConfig::Image(_) => None,
1653            ContainerConfig::Full { options, .. } => options.as_deref(),
1654        }
1655    }
1656}
1657
1658#[derive(Debug, Deserialize)]
1659pub struct GhaJob {
1660    #[serde(default)]
1661    pub permissions: Option<Permissions>,
1662    /// Job-level env vars. Polymorphic: typically a map, but can be a
1663    /// template expression (e.g. `env: ${{ matrix }}`) whose shape is unknown
1664    /// statically.
1665    #[serde(default)]
1666    pub env: Option<EnvSpec>,
1667    #[serde(default)]
1668    pub steps: Vec<GhaStep>,
1669    /// Reusable workflow reference — `uses: owner/repo/.github/workflows/foo.yml@ref`
1670    #[serde(default)]
1671    pub uses: Option<String>,
1672    /// `with:` inputs passed to a reusable workflow call.
1673    #[serde(rename = "with", default)]
1674    pub with: Option<HashMap<String, serde_yaml::Value>>,
1675    /// `secrets:` block on a reusable-workflow `uses:` call. Polymorphic:
1676    /// the literal string `inherit` (`secrets: inherit`) or a mapping of
1677    /// secret-name → expression (`secrets: { TOKEN: ${{ secrets.X }} }`).
1678    /// We accept it as opaque YAML and inspect the variant in the parser.
1679    #[serde(default)]
1680    pub secrets: Option<serde_yaml::Value>,
1681    /// Job container image.
1682    #[serde(default)]
1683    pub container: Option<ContainerConfig>,
1684    /// Matrix/strategy configuration. When a matrix is present, the authority
1685    /// shape may differ per matrix entry — graph is marked Partial.
1686    #[serde(default)]
1687    pub strategy: Option<serde_yaml::Value>,
1688    /// Runner label(s). Can be a string (`ubuntu-latest`), a sequence
1689    /// (`[self-hosted, linux]`), or absent for reusable workflows.
1690    #[serde(rename = "runs-on", default)]
1691    pub runs_on: Option<serde_yaml::Value>,
1692    /// `jobs.<id>.outputs:` map (output name → expression). Captured for the
1693    /// `sensitive_value_in_job_output` rule which inspects each value for
1694    /// `secrets.*` / `steps.*.outputs.*` references and credential-shaped
1695    /// names. Empty / absent for jobs that declare no outputs.
1696    #[serde(default)]
1697    pub outputs: Option<HashMap<String, String>>,
1698    /// Job-level `if:` condition. Captured verbatim so rules can scan for
1699    /// the standard fork-check pattern
1700    /// (`github.event.pull_request.head.repo.fork == false` or the
1701    /// equivalent `head.repo.full_name == github.repository`). Job-level
1702    /// `if:` applies to every step the job contains.
1703    #[serde(rename = "if", default)]
1704    pub if_cond: Option<String>,
1705}
1706
1707#[derive(Debug, Deserialize)]
1708pub struct GhaStep {
1709    pub name: Option<String>,
1710    /// Optional YAML `id:` — the symbolic name used by `steps.<id>.outputs.*`
1711    /// references in expressions. Captured so output-flow rules can resolve
1712    /// which step produced a referenced output.
1713    pub id: Option<String>,
1714    pub uses: Option<String>,
1715    pub run: Option<String>,
1716    /// Step-level env vars. Polymorphic: typically a map, but can be a
1717    /// template expression (e.g. `env: ${{ matrix }}`) whose shape is unknown
1718    /// statically.
1719    #[serde(default)]
1720    pub env: Option<EnvSpec>,
1721    #[serde(rename = "with", default)]
1722    pub with: Option<HashMap<String, serde_yaml::Value>>,
1723    /// Step-level `if:` condition. Captured verbatim so rules can detect
1724    /// the standard fork-check pattern.
1725    #[serde(rename = "if", default)]
1726    pub if_cond: Option<String>,
1727}
1728
1729#[cfg(test)]
1730mod tests {
1731    use super::*;
1732
1733    fn parse(yaml: &str) -> AuthorityGraph {
1734        let parser = GhaParser;
1735        let source = PipelineSource {
1736            file: "test.yml".into(),
1737            repo: None,
1738            git_ref: None,
1739            commit_sha: None,
1740        };
1741        parser.parse(yaml, &source).unwrap()
1742    }
1743
1744    #[test]
1745    fn parses_simple_workflow() {
1746        let yaml = r#"
1747permissions: write-all
1748jobs:
1749  build:
1750    steps:
1751      - name: Checkout
1752        uses: actions/checkout@v4
1753      - name: Build
1754        run: make build
1755"#;
1756        let graph = parse(yaml);
1757        assert!(graph.nodes.len() >= 3); // GITHUB_TOKEN + 2 steps + 1 image
1758    }
1759
1760    #[test]
1761    fn detects_secret_in_env() {
1762        let yaml = r#"
1763jobs:
1764  deploy:
1765    steps:
1766      - name: Deploy
1767        run: ./deploy.sh
1768        env:
1769          AWS_KEY: "${{ secrets.AWS_ACCESS_KEY_ID }}"
1770"#;
1771        let graph = parse(yaml);
1772        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1773        assert_eq!(secrets.len(), 1);
1774        assert_eq!(secrets[0].name, "AWS_ACCESS_KEY_ID");
1775    }
1776
1777    #[test]
1778    fn classifies_unpinned_action_as_untrusted() {
1779        let yaml = r#"
1780jobs:
1781  ci:
1782    steps:
1783      - uses: actions/checkout@v4
1784"#;
1785        let graph = parse(yaml);
1786        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1787        assert_eq!(images.len(), 1);
1788        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1789    }
1790
1791    #[test]
1792    fn classifies_sha_pinned_action_as_third_party() {
1793        let yaml = r#"
1794jobs:
1795  ci:
1796    steps:
1797      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
1798"#;
1799        let graph = parse(yaml);
1800        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1801        assert_eq!(images.len(), 1);
1802        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1803    }
1804
1805    #[test]
1806    fn classifies_local_action_as_first_party() {
1807        let yaml = r#"
1808jobs:
1809  ci:
1810    steps:
1811      - uses: ./.github/actions/my-action
1812"#;
1813        let graph = parse(yaml);
1814        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1815        assert_eq!(images.len(), 1);
1816        assert_eq!(images[0].trust_zone, TrustZone::FirstParty);
1817    }
1818
1819    #[test]
1820    fn detects_secret_in_with() {
1821        let yaml = r#"
1822jobs:
1823  deploy:
1824    steps:
1825      - name: Publish
1826        uses: some-org/publish@v1
1827        with:
1828          token: "${{ secrets.NPM_TOKEN }}"
1829"#;
1830        let graph = parse(yaml);
1831        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1832        assert_eq!(secrets.len(), 1);
1833        assert_eq!(secrets[0].name, "NPM_TOKEN");
1834    }
1835
1836    #[test]
1837    fn uses_step_records_action_and_scalar_with_inputs() {
1838        let yaml = r#"
1839jobs:
1840  deploy:
1841    steps:
1842      - uses: aws-actions/amazon-ecr-login@v2
1843        with:
1844          mask-password: false
1845          registries: "123456789012"
1846"#;
1847        let graph = parse(yaml);
1848        let step = graph
1849            .nodes_of_kind(NodeKind::Step)
1850            .find(|n| n.name == "deploy[0]")
1851            .expect("uses step");
1852        assert_eq!(
1853            step.metadata.get(META_GHA_ACTION).map(String::as_str),
1854            Some("aws-actions/amazon-ecr-login")
1855        );
1856        let inputs = step
1857            .metadata
1858            .get(META_GHA_WITH_INPUTS)
1859            .expect("with inputs");
1860        assert!(inputs.contains("mask-password=false"));
1861        assert!(inputs.contains("registries=123456789012"));
1862    }
1863
1864    #[test]
1865    fn parser_stamps_new_exploit_rule_metadata() {
1866        let yaml = r#"
1867on:
1868  workflow_call:
1869    inputs:
1870      image:
1871        type: string
1872jobs:
1873  call:
1874    uses: org/repo/.github/workflows/reuse.yml@main
1875    runs-on: ${{ inputs.runner }}
1876    secrets: inherit
1877    with:
1878      image: ${{ inputs.image }}
1879  deploy:
1880    runs-on: [ubuntu-latest]
1881    if: ${{ needs.plan.outputs.pr_run_mode == 'upload' }}
1882    env:
1883      NODE_OPTIONS: --require=./hook.js
1884    container:
1885      image: ${{ inputs.image }}
1886      options: --privileged
1887    steps:
1888      - name: Publish
1889        if: ${{ github.event_name == 'push' }}
1890        run: npm publish
1891"#;
1892        let graph = parse(yaml);
1893        assert_eq!(
1894            graph
1895                .metadata
1896                .get(META_GHA_WORKFLOW_CALL_INPUTS)
1897                .map(String::as_str),
1898            Some("image")
1899        );
1900
1901        let call = graph
1902            .nodes_of_kind(NodeKind::Step)
1903            .find(|n| n.name == "call")
1904            .expect("synthetic reusable call step");
1905        assert_eq!(
1906            call.metadata.get(META_SECRETS_INHERIT).map(String::as_str),
1907            Some("true")
1908        );
1909        assert!(
1910            call.metadata
1911                .get(META_GHA_WITH_INPUTS)
1912                .map(|v| v.contains("image=${{ inputs.image }}"))
1913                .unwrap_or(false),
1914            "reusable-call with inputs should be stamped"
1915        );
1916        assert_eq!(
1917            call.metadata.get(META_GHA_RUNS_ON).map(String::as_str),
1918            Some("${{ inputs.runner }}")
1919        );
1920
1921        let publish = graph
1922            .nodes_of_kind(NodeKind::Step)
1923            .find(|n| n.name == "Publish")
1924            .expect("publish step");
1925        assert!(
1926            publish
1927                .metadata
1928                .get(META_GHA_ENV_ASSIGNMENTS)
1929                .map(|v| v.contains("NODE_OPTIONS=--require=./hook.js"))
1930                .unwrap_or(false),
1931            "effective env assignments should be stamped on steps"
1932        );
1933        assert_eq!(
1934            publish.metadata.get(META_CONDITION).map(String::as_str),
1935            Some("${{ needs.plan.outputs.pr_run_mode == 'upload' }} AND ${{ github.event_name == 'push' }}")
1936        );
1937
1938        let container = graph
1939            .nodes_of_kind(NodeKind::Image)
1940            .find(|n| n.metadata.get(META_CONTAINER).map(String::as_str) == Some("true"))
1941            .expect("container image node");
1942        assert_eq!(
1943            container
1944                .metadata
1945                .get(META_GHA_CONTAINER_OPTIONS)
1946                .map(String::as_str),
1947            Some("--privileged")
1948        );
1949    }
1950
1951    #[test]
1952    fn with_non_scalar_values_do_not_fail_parse() {
1953        let yaml = r#"
1954jobs:
1955  check:
1956    steps:
1957      - name: Label
1958        uses: actions/github-script@v7
1959        with:
1960          script: |
1961            core.info("ok")
1962          labels:
1963            - bug
1964            - ci
1965          token: "${{ secrets.GITHUB_TOKEN }}"
1966"#;
1967        let graph = parse(yaml);
1968        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1969        assert!(
1970            secrets.iter().any(|s| s.name == "GITHUB_TOKEN"),
1971            "scalar values inside with: must still be scanned for secrets"
1972        );
1973    }
1974
1975    #[test]
1976    fn inferred_secret_in_run_block_detected() {
1977        let yaml = r#"
1978jobs:
1979  deploy:
1980    steps:
1981      - name: Deploy
1982        run: |
1983          curl -H "Authorization: ${{ secrets.API_TOKEN }}" https://api.example.com
1984"#;
1985        let graph = parse(yaml);
1986        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1987        assert_eq!(secrets.len(), 1);
1988        assert_eq!(secrets[0].name, "API_TOKEN");
1989        assert_eq!(
1990            secrets[0].metadata.get(META_INFERRED),
1991            Some(&"true".to_string())
1992        );
1993        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1994        assert!(!graph.completeness_gaps.is_empty());
1995        // Inferred secret in a `run:` shell script — the structure is intact,
1996        // a value-shaped reference hides behind a shell-script expression.
1997        assert!(
1998            graph.completeness_gap_kinds.contains(&GapKind::Expression),
1999            "inferred secret in run: must record an Expression-kind gap, got: {:?}",
2000            graph.completeness_gap_kinds
2001        );
2002    }
2003
2004    #[test]
2005    fn job_level_env_inherited_by_steps() {
2006        let yaml = r#"
2007jobs:
2008  build:
2009    env:
2010      DB_PASSWORD: "${{ secrets.DB_PASSWORD }}"
2011    steps:
2012      - name: Step A
2013        run: echo "a"
2014      - name: Step B
2015        run: echo "b"
2016"#;
2017        let graph = parse(yaml);
2018        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2019        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
2020
2021        // Both steps should have access to the secret
2022        let secret_id = secrets[0].id;
2023        let accessing_steps = graph
2024            .edges_to(secret_id)
2025            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2026            .count();
2027        assert_eq!(accessing_steps, 2, "both steps inherit job-level env");
2028    }
2029
2030    #[test]
2031    fn identity_scope_set_on_token() {
2032        let yaml = r#"
2033permissions: write-all
2034jobs:
2035  ci:
2036    steps:
2037      - run: echo hi
2038"#;
2039        let graph = parse(yaml);
2040        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2041        assert_eq!(identities.len(), 1);
2042        assert_eq!(
2043            identities[0].metadata.get(META_IDENTITY_SCOPE),
2044            Some(&"broad".to_string())
2045        );
2046    }
2047
2048    #[test]
2049    fn constrained_identity_scope() {
2050        let yaml = r#"
2051permissions:
2052  contents: read
2053jobs:
2054  ci:
2055    steps:
2056      - run: echo hi
2057"#;
2058        let graph = parse(yaml);
2059        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2060        assert_eq!(identities.len(), 1);
2061        assert_eq!(
2062            identities[0].metadata.get(META_IDENTITY_SCOPE),
2063            Some(&"constrained".to_string())
2064        );
2065    }
2066
2067    #[test]
2068    fn pull_request_target_string_trigger_marks_run_steps_untrusted() {
2069        let yaml = r#"
2070on: pull_request_target
2071jobs:
2072  check:
2073    steps:
2074      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
2075        with:
2076          ref: ${{ github.event.pull_request.head.sha }}
2077      - run: npm test
2078"#;
2079        let graph = parse(yaml);
2080        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2081        assert_eq!(steps.len(), 2);
2082
2083        // run: step should be Untrusted (might execute fork code)
2084        let run_step = steps.iter().find(|s| s.name.contains("check[1]")).unwrap();
2085        assert_eq!(
2086            run_step.trust_zone,
2087            TrustZone::Untrusted,
2088            "run: step in pull_request_target workflow should be Untrusted"
2089        );
2090
2091        // uses: step keeps its own trust zone (SHA-pinned = ThirdParty)
2092        let checkout_step = steps.iter().find(|s| s.name.contains("check[0]")).unwrap();
2093        assert_eq!(checkout_step.trust_zone, TrustZone::ThirdParty);
2094    }
2095
2096    #[test]
2097    fn pull_request_target_sequence_trigger_marks_run_steps_untrusted() {
2098        let yaml = r#"
2099on: [push, pull_request_target]
2100jobs:
2101  ci:
2102    steps:
2103      - run: echo hi
2104"#;
2105        let graph = parse(yaml);
2106        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2107        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
2108    }
2109
2110    #[test]
2111    fn pull_request_target_mapping_trigger_marks_run_steps_untrusted() {
2112        let yaml = r#"
2113on:
2114  pull_request_target:
2115    types: [opened, synchronize]
2116jobs:
2117  ci:
2118    steps:
2119      - run: echo hi
2120"#;
2121        let graph = parse(yaml);
2122        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2123        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
2124    }
2125
2126    #[test]
2127    fn push_trigger_does_not_mark_run_steps_untrusted() {
2128        let yaml = r#"
2129on: push
2130jobs:
2131  ci:
2132    steps:
2133      - run: echo hi
2134"#;
2135        let graph = parse(yaml);
2136        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2137        assert_eq!(
2138            steps[0].trust_zone,
2139            TrustZone::FirstParty,
2140            "push-triggered run: steps should remain FirstParty"
2141        );
2142    }
2143
2144    #[test]
2145    fn workflow_level_env_inherited_by_all_steps() {
2146        let yaml = r#"
2147env:
2148  DB_URL: "${{ secrets.DATABASE_URL }}"
2149jobs:
2150  build:
2151    steps:
2152      - name: Step A
2153        run: echo "a"
2154  test:
2155    steps:
2156      - name: Step B
2157        run: echo "b"
2158"#;
2159        let graph = parse(yaml);
2160        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2161        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
2162
2163        // Both steps in both jobs should inherit the workflow-level secret
2164        let secret_id = secrets[0].id;
2165        let accessing_steps = graph
2166            .edges_to(secret_id)
2167            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2168            .count();
2169        assert_eq!(accessing_steps, 2, "both steps inherit workflow-level env");
2170    }
2171
2172    #[test]
2173    fn matrix_strategy_marks_graph_partial() {
2174        let yaml = r#"
2175jobs:
2176  test:
2177    strategy:
2178      matrix:
2179        os: [ubuntu-latest, windows-latest, macos-latest]
2180    steps:
2181      - run: echo hi
2182"#;
2183        let graph = parse(yaml);
2184        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2185        assert!(
2186            graph.completeness_gaps.iter().any(|g| g.contains("matrix")),
2187            "matrix strategy should be recorded as a completeness gap"
2188        );
2189        // Matrix is a runtime expression hiding values across job instances —
2190        // the graph structure for one matrix entry is intact.
2191        assert!(
2192            graph.completeness_gap_kinds.contains(&GapKind::Expression),
2193            "matrix strategy must record an Expression-kind gap, got: {:?}",
2194            graph.completeness_gap_kinds
2195        );
2196    }
2197
2198    #[test]
2199    fn job_without_matrix_does_not_mark_partial() {
2200        let yaml = r#"
2201jobs:
2202  build:
2203    steps:
2204      - run: cargo build
2205"#;
2206        let graph = parse(yaml);
2207        assert_eq!(graph.completeness, AuthorityCompleteness::Complete);
2208    }
2209
2210    #[test]
2211    fn reusable_workflow_creates_image_and_marks_partial() {
2212        let yaml = r#"
2213jobs:
2214  call:
2215    uses: org/repo/.github/workflows/deploy.yml@main
2216"#;
2217        let graph = parse(yaml);
2218        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2219        assert_eq!(images.len(), 1);
2220        assert_eq!(images[0].name, "org/repo/.github/workflows/deploy.yml@main");
2221        assert_eq!(images[0].trust_zone, TrustZone::Untrusted); // not SHA-pinned
2222
2223        // Step node representing the job delegation
2224        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2225        assert_eq!(steps.len(), 1);
2226        assert_eq!(steps[0].name, "call");
2227
2228        // DelegatesTo edge from step to reusable workflow image
2229        let delegates: Vec<_> = graph
2230            .edges_from(steps[0].id)
2231            .filter(|e| e.kind == EdgeKind::DelegatesTo)
2232            .collect();
2233        assert_eq!(delegates.len(), 1);
2234
2235        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2236        // Reusable workflow `uses:` is unresolvable here — the called workflow's
2237        // authority chain is invisible, which is a Structural gap, not an
2238        // expression substitution.
2239        assert!(
2240            graph.completeness_gap_kinds.contains(&GapKind::Structural),
2241            "reusable workflow must record a Structural-kind gap, got: {:?}",
2242            graph.completeness_gap_kinds
2243        );
2244    }
2245
2246    #[test]
2247    fn reusable_workflow_sha_pinned_is_third_party() {
2248        let yaml = r#"
2249jobs:
2250  call:
2251    uses: org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29
2252"#;
2253        let graph = parse(yaml);
2254        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2255        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
2256    }
2257
2258    #[test]
2259    fn container_unpinned_creates_image_node_untrusted() {
2260        let yaml = r#"
2261jobs:
2262  build:
2263    container: ubuntu:22.04
2264    steps:
2265      - run: echo hi
2266"#;
2267        let graph = parse(yaml);
2268        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2269        assert_eq!(images.len(), 1);
2270        assert_eq!(images[0].name, "ubuntu:22.04");
2271        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
2272        assert_eq!(
2273            images[0].metadata.get(META_CONTAINER),
2274            Some(&"true".to_string())
2275        );
2276    }
2277
2278    #[test]
2279    fn container_digest_pinned_creates_image_node_third_party() {
2280        let yaml = r#"
2281jobs:
2282  build:
2283    container:
2284      image: "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
2285    steps:
2286      - run: echo hi
2287"#;
2288        let graph = parse(yaml);
2289        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2290        assert_eq!(images.len(), 1);
2291        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
2292        assert_eq!(
2293            images[0].metadata.get(META_CONTAINER),
2294            Some(&"true".to_string())
2295        );
2296    }
2297
2298    #[test]
2299    fn oidc_permission_tags_identity_with_meta_oidc() {
2300        let yaml = r#"
2301permissions:
2302  id-token: write
2303  contents: read
2304jobs:
2305  ci:
2306    steps:
2307      - run: echo hi
2308"#;
2309        let graph = parse(yaml);
2310        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2311        assert_eq!(identities.len(), 1);
2312        assert_eq!(
2313            identities[0].metadata.get(META_OIDC),
2314            Some(&"true".to_string()),
2315            "id-token: write should mark identity as OIDC-capable"
2316        );
2317    }
2318
2319    #[test]
2320    fn non_oidc_permission_does_not_tag_meta_oidc() {
2321        let yaml = r#"
2322permissions:
2323  contents: read
2324jobs:
2325  ci:
2326    steps:
2327      - run: echo hi
2328"#;
2329        let graph = parse(yaml);
2330        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2331        assert_eq!(identities.len(), 1);
2332        assert!(
2333            !identities[0].metadata.contains_key(META_OIDC),
2334            "contents:read should not tag as OIDC"
2335        );
2336    }
2337
2338    #[test]
2339    fn contents_write_without_id_token_does_not_tag_oidc() {
2340        // Regression: "contents: write" contains "write" but not "id-token: write".
2341        // Should NOT be tagged as OIDC-capable.
2342        let yaml = r#"
2343permissions:
2344  contents: write
2345jobs:
2346  ci:
2347    steps:
2348      - run: echo hi
2349"#;
2350        let graph = parse(yaml);
2351        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2352        assert_eq!(identities.len(), 1);
2353        assert!(
2354            !identities[0].metadata.contains_key(META_OIDC),
2355            "contents:write without id-token must not be tagged OIDC"
2356        );
2357    }
2358
2359    #[test]
2360    fn write_all_permission_tags_identity_as_oidc() {
2361        // `permissions: write-all` grants every permission including id-token: write.
2362        let yaml = r#"
2363permissions: write-all
2364jobs:
2365  ci:
2366    steps:
2367      - run: echo hi
2368"#;
2369        let graph = parse(yaml);
2370        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2371        assert_eq!(identities.len(), 1);
2372        assert_eq!(
2373            identities[0].metadata.get(META_OIDC),
2374            Some(&"true".to_string()),
2375            "write-all grants all permissions including id-token: write"
2376        );
2377    }
2378
2379    #[test]
2380    fn container_steps_linked_to_container_image() {
2381        let yaml = r#"
2382jobs:
2383  build:
2384    container: ubuntu:22.04
2385    steps:
2386      - name: Step A
2387        run: echo "a"
2388      - name: Step B
2389        run: echo "b"
2390"#;
2391        let graph = parse(yaml);
2392        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2393        assert_eq!(images.len(), 1);
2394        let container_id = images[0].id;
2395
2396        // Both steps must have UsesImage edges to the container
2397        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2398        assert_eq!(steps.len(), 2);
2399        for step in &steps {
2400            let links: Vec<_> = graph
2401                .edges_from(step.id)
2402                .filter(|e| e.kind == EdgeKind::UsesImage && e.to == container_id)
2403                .collect();
2404            assert_eq!(
2405                links.len(),
2406                1,
2407                "step '{}' must link to container",
2408                step.name
2409            );
2410        }
2411    }
2412
2413    #[test]
2414    fn container_authority_propagates_to_floating_image() {
2415        // Integration: authority from a step running in a floating container should
2416        // propagate to the container Image node (Untrusted), generating a finding.
2417        let yaml = r#"
2418permissions: write-all
2419jobs:
2420  build:
2421    container: ubuntu:22.04
2422    steps:
2423      - run: echo hi
2424"#;
2425        use taudit_core::propagation::DEFAULT_MAX_HOPS;
2426        use taudit_core::rules;
2427        let graph = parse(yaml);
2428        let findings = rules::run_all_rules(&graph, DEFAULT_MAX_HOPS);
2429        // Should detect: GITHUB_TOKEN (broad) propagates to ubuntu:22.04 (Untrusted) via step
2430        assert!(
2431            findings
2432                .iter()
2433                .any(|f| f.category == taudit_core::finding::FindingCategory::AuthorityPropagation),
2434            "authority should propagate from step to floating container"
2435        );
2436    }
2437
2438    #[test]
2439    fn aws_oidc_creates_identity_node() {
2440        let yaml = r#"
2441jobs:
2442  deploy:
2443    steps:
2444      - name: Configure AWS credentials
2445        uses: aws-actions/configure-aws-credentials@v4
2446        with:
2447          role-to-assume: arn:aws:iam::123456789012:role/my-deploy-role
2448          aws-region: us-east-1
2449"#;
2450        let graph = parse(yaml);
2451        let identities: Vec<_> = graph
2452            .nodes_of_kind(NodeKind::Identity)
2453            .filter(|n| n.name != "GITHUB_TOKEN")
2454            .collect();
2455        assert_eq!(identities.len(), 1);
2456        // ARN arn:aws:iam::123456789012:role/my-deploy-role → last '/' segment
2457        assert_eq!(identities[0].name, "AWS/my-deploy-role");
2458        assert_eq!(
2459            identities[0].metadata.get(META_OIDC),
2460            Some(&"true".to_string())
2461        );
2462        assert_eq!(
2463            identities[0].metadata.get(META_IDENTITY_SCOPE),
2464            Some(&"broad".to_string())
2465        );
2466    }
2467
2468    #[test]
2469    fn gcp_oidc_creates_identity_node() {
2470        let yaml = r#"
2471jobs:
2472  deploy:
2473    steps:
2474      - name: Authenticate to GCP
2475        uses: google-github-actions/auth@v2
2476        with:
2477          workload_identity_provider: projects/123/locations/global/workloadIdentityPools/my-pool/providers/my-provider
2478          service_account: my-sa@my-project.iam.gserviceaccount.com
2479"#;
2480        let graph = parse(yaml);
2481        let identities: Vec<_> = graph
2482            .nodes_of_kind(NodeKind::Identity)
2483            .filter(|n| n.name != "GITHUB_TOKEN")
2484            .collect();
2485        assert_eq!(identities.len(), 1);
2486        assert!(identities[0].name.starts_with("GCP/"));
2487        assert_eq!(
2488            identities[0].metadata.get(META_OIDC),
2489            Some(&"true".to_string())
2490        );
2491    }
2492
2493    #[test]
2494    fn azure_oidc_creates_identity_node() {
2495        let yaml = r#"
2496jobs:
2497  deploy:
2498    steps:
2499      - name: Azure login
2500        uses: azure/login@v2
2501        with:
2502          client-id: ${{ vars.AZURE_CLIENT_ID }}
2503          tenant-id: ${{ vars.AZURE_TENANT_ID }}
2504          subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
2505"#;
2506        let graph = parse(yaml);
2507        let identities: Vec<_> = graph
2508            .nodes_of_kind(NodeKind::Identity)
2509            .filter(|n| n.name != "GITHUB_TOKEN")
2510            .collect();
2511        assert_eq!(identities.len(), 1);
2512        assert!(identities[0].name.starts_with("Azure/"));
2513        assert_eq!(
2514            identities[0].metadata.get(META_OIDC),
2515            Some(&"true".to_string())
2516        );
2517    }
2518
2519    #[test]
2520    fn azure_static_sp_does_not_create_identity_node() {
2521        // When client-secret is present, it's a static service principal — not OIDC.
2522        // The secret scanning in with: handles this; classify_cloud_auth returns None.
2523        let yaml = r#"
2524jobs:
2525  deploy:
2526    steps:
2527      - name: Azure login
2528        uses: azure/login@v2
2529        with:
2530          client-id: my-client-id
2531          client-secret: ${{ secrets.AZURE_CLIENT_SECRET }}
2532          tenant-id: my-tenant
2533"#;
2534        let graph = parse(yaml);
2535        // Identity node should NOT be created by cloud auth inference
2536        let identities: Vec<_> = graph
2537            .nodes_of_kind(NodeKind::Identity)
2538            .filter(|n| n.name != "GITHUB_TOKEN")
2539            .collect();
2540        assert!(
2541            identities.is_empty(),
2542            "static SP should not create an OIDC Identity node"
2543        );
2544        // But the secret SHOULD be captured by existing with: scanning
2545        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2546        assert_eq!(secrets.len(), 1);
2547        assert_eq!(secrets[0].name, "AZURE_CLIENT_SECRET");
2548    }
2549
2550    #[test]
2551    fn aws_static_creds_do_not_create_identity_node() {
2552        // Static access key path — no role-to-assume, so classify_cloud_auth returns None.
2553        // The access key secret is captured by with: scanning.
2554        let yaml = r#"
2555jobs:
2556  deploy:
2557    steps:
2558      - uses: aws-actions/configure-aws-credentials@v4
2559        with:
2560          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
2561          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
2562          aws-region: us-east-1
2563"#;
2564        let graph = parse(yaml);
2565        let identities: Vec<_> = graph
2566            .nodes_of_kind(NodeKind::Identity)
2567            .filter(|n| n.name != "GITHUB_TOKEN")
2568            .collect();
2569        assert!(
2570            identities.is_empty(),
2571            "static AWS creds must not create Identity node"
2572        );
2573        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2574        assert_eq!(secrets.len(), 2, "both static secrets captured");
2575    }
2576
2577    #[test]
2578    fn pull_request_target_sets_meta_trigger_on_graph() {
2579        let yaml = r#"
2580on: pull_request_target
2581jobs:
2582  ci:
2583    steps:
2584      - run: echo hi
2585"#;
2586        let graph = parse(yaml);
2587        assert_eq!(
2588            graph.metadata.get(META_TRIGGER),
2589            Some(&"pull_request_target".to_string())
2590        );
2591    }
2592
2593    #[test]
2594    fn github_env_write_in_run_sets_meta_writes_env_gate() {
2595        let yaml = r#"
2596jobs:
2597  build:
2598    steps:
2599      - name: Set version
2600        run: echo "VERSION=1.0" >> $GITHUB_ENV
2601"#;
2602        let graph = parse(yaml);
2603        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2604        assert_eq!(steps.len(), 1);
2605        assert_eq!(
2606            steps[0].metadata.get(META_WRITES_ENV_GATE),
2607            Some(&"true".to_string()),
2608            "run: with >> $GITHUB_ENV must mark META_WRITES_ENV_GATE"
2609        );
2610    }
2611
2612    #[test]
2613    fn attest_action_sets_meta_attests() {
2614        let yaml = r#"
2615jobs:
2616  release:
2617    steps:
2618      - name: Attest
2619        uses: actions/attest-build-provenance@v1
2620        with:
2621          subject-path: dist/*
2622"#;
2623        let graph = parse(yaml);
2624        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2625        assert_eq!(steps.len(), 1);
2626        assert_eq!(
2627            steps[0].metadata.get(META_ATTESTS),
2628            Some(&"true".to_string())
2629        );
2630    }
2631
2632    #[test]
2633    fn self_hosted_string_runs_on_creates_image_with_self_hosted_metadata() {
2634        let yaml = r#"
2635jobs:
2636  build:
2637    runs-on: self-hosted
2638    steps:
2639      - run: echo hi
2640"#;
2641        let graph = parse(yaml);
2642        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2643        let runner = images
2644            .iter()
2645            .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
2646            .expect("self-hosted runner Image node must be created");
2647        assert_eq!(
2648            runner.metadata.get(META_SELF_HOSTED),
2649            Some(&"true".to_string())
2650        );
2651    }
2652
2653    #[test]
2654    fn self_hosted_in_sequence_runs_on_creates_image_with_self_hosted_metadata() {
2655        let yaml = r#"
2656jobs:
2657  build:
2658    runs-on: [self-hosted, linux, x64]
2659    steps:
2660      - run: echo hi
2661"#;
2662        let graph = parse(yaml);
2663        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2664        let runner = images
2665            .iter()
2666            .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
2667            .expect("self-hosted runner Image node must be created");
2668        assert_eq!(
2669            runner.metadata.get(META_SELF_HOSTED),
2670            Some(&"true".to_string())
2671        );
2672    }
2673
2674    #[test]
2675    fn hosted_runner_does_not_create_self_hosted_image() {
2676        let yaml = r#"
2677jobs:
2678  build:
2679    runs-on: ubuntu-latest
2680    steps:
2681      - run: echo hi
2682"#;
2683        let graph = parse(yaml);
2684        let self_hosted_images: Vec<_> = graph
2685            .nodes_of_kind(NodeKind::Image)
2686            .filter(|i| i.metadata.contains_key(META_SELF_HOSTED))
2687            .collect();
2688        assert!(
2689            self_hosted_images.is_empty(),
2690            "hosted runner must not produce a self-hosted Image node"
2691        );
2692    }
2693
2694    #[test]
2695    fn actions_checkout_step_tagged_with_meta_checkout_self() {
2696        let yaml = r#"
2697jobs:
2698  ci:
2699    steps:
2700      - uses: actions/checkout@v4
2701      - run: echo hi
2702"#;
2703        let graph = parse(yaml);
2704        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2705        let checkout_step = steps
2706            .iter()
2707            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
2708            .expect("actions/checkout step must be tagged META_CHECKOUT_SELF");
2709        assert_eq!(
2710            checkout_step.metadata.get(META_CHECKOUT_SELF),
2711            Some(&"true".to_string())
2712        );
2713    }
2714
2715    #[test]
2716    fn actions_checkout_sha_pinned_also_tagged() {
2717        let yaml = r#"
2718jobs:
2719  ci:
2720    steps:
2721      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
2722"#;
2723        let graph = parse(yaml);
2724        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2725        assert_eq!(steps.len(), 1);
2726        assert_eq!(
2727            steps[0].metadata.get(META_CHECKOUT_SELF),
2728            Some(&"true".to_string()),
2729            "SHA-pinned checkout must still be tagged — rule gates on trigger context"
2730        );
2731    }
2732
2733    #[test]
2734    fn non_checkout_uses_not_tagged_checkout_self() {
2735        let yaml = r#"
2736jobs:
2737  ci:
2738    steps:
2739      - uses: some-org/other-action@v1
2740"#;
2741        let graph = parse(yaml);
2742        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2743        assert_eq!(steps.len(), 1);
2744        assert!(
2745            !steps[0].metadata.contains_key(META_CHECKOUT_SELF),
2746            "non-checkout uses: must not be tagged"
2747        );
2748    }
2749
2750    /// Build a unique temp directory under the OS temp root. We avoid pulling
2751    /// in the `tempfile` crate (no new deps allowed) — uniqueness comes from
2752    /// PID + a per-call atomic counter, which is sufficient for serial tests.
2753    fn make_temp_dir(label: &str) -> std::path::PathBuf {
2754        use std::sync::atomic::{AtomicU64, Ordering};
2755        static COUNTER: AtomicU64 = AtomicU64::new(0);
2756        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
2757        let dir = std::env::temp_dir().join(format!(
2758            "taudit-gha-test-{}-{}-{}",
2759            std::process::id(),
2760            n,
2761            label
2762        ));
2763        let _ = std::fs::remove_dir_all(&dir);
2764        std::fs::create_dir_all(&dir).expect("create temp dir");
2765        dir
2766    }
2767
2768    fn parse_at(yaml: &str, file: &str) -> AuthorityGraph {
2769        let parser = GhaParser;
2770        let source = PipelineSource {
2771            file: file.into(),
2772            repo: None,
2773            git_ref: None,
2774            commit_sha: None,
2775        };
2776        parser.parse(yaml, &source).unwrap()
2777    }
2778
2779    #[test]
2780    fn composite_action_reference_marks_graph_partial_without_inlining() {
2781        // Post-fix behaviour: composite-action references are NOT inlined.
2782        // Earlier versions walked the filesystem from the workflow's directory
2783        // looking for `action.yml`; that made the graph CWD-dependent. We now
2784        // mark the graph Partial with a Structural gap and never read disk.
2785        let dir = make_temp_dir("composite-no-inline");
2786        let workflows_dir = dir.join(".github/workflows");
2787        let action_dir = dir.join(".github/actions/my-action");
2788        std::fs::create_dir_all(&workflows_dir).unwrap();
2789        std::fs::create_dir_all(&action_dir).unwrap();
2790
2791        // Real action.yml on disk — must be ignored.
2792        let action_yml = r#"
2793name: My Action
2794runs:
2795  using: composite
2796  steps:
2797    - name: Install deps
2798      run: npm install
2799      shell: bash
2800"#;
2801        std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2802
2803        let workflow = r#"
2804jobs:
2805  ci:
2806    steps:
2807      - name: Run my action
2808        uses: ./.github/actions/my-action
2809"#;
2810        let workflow_path = workflows_dir.join("ci.yml");
2811        std::fs::write(&workflow_path, workflow).unwrap();
2812
2813        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2814
2815        // Only the calling step — no inlining.
2816        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2817        assert_eq!(steps.len(), 1, "no composite-action step inlining");
2818
2819        // Graph is Partial with a Structural gap mentioning the local action.
2820        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2821        assert!(
2822            graph.completeness_gap_kinds.contains(&GapKind::Structural),
2823            "local action reference must record a Structural-kind gap, got: {:?}",
2824            graph.completeness_gap_kinds
2825        );
2826        assert!(
2827            graph
2828                .completeness_gaps
2829                .iter()
2830                .any(|g| g.contains("composite action not resolved")
2831                    && g.contains("./.github/actions/my-action")),
2832            "gap reason must name the action and explain non-resolution, got: {:?}",
2833            graph.completeness_gaps
2834        );
2835
2836        let _ = std::fs::remove_dir_all(&dir);
2837    }
2838
2839    #[test]
2840    fn missing_action_yml_marks_graph_partial() {
2841        // Whether `action.yml` exists on disk is irrelevant after the fix —
2842        // any `./local-action` reference is treated as Partial without
2843        // touching the filesystem.
2844        let dir = make_temp_dir("missing-action");
2845        let workflows_dir = dir.join(".github/workflows");
2846        std::fs::create_dir_all(&workflows_dir).unwrap();
2847
2848        let workflow = r#"
2849jobs:
2850  ci:
2851    steps:
2852      - uses: ./.github/actions/missing-action
2853"#;
2854        let workflow_path = workflows_dir.join("ci.yml");
2855        std::fs::write(&workflow_path, workflow).unwrap();
2856
2857        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2858
2859        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2860        assert!(
2861            graph.completeness_gaps.iter().any(
2862                |g| g.contains("composite action not resolved") && g.contains("missing-action")
2863            ),
2864            "missing local action must be recorded as a completeness gap, got: {:?}",
2865            graph.completeness_gaps
2866        );
2867        assert!(
2868            graph.completeness_gap_kinds.contains(&GapKind::Structural),
2869            "unresolved composite action must record a Structural-kind gap, got: {:?}",
2870            graph.completeness_gap_kinds
2871        );
2872
2873        let _ = std::fs::remove_dir_all(&dir);
2874    }
2875
2876    #[test]
2877    fn non_composite_local_action_marks_graph_partial() {
2878        // Post-fix: we don't read action.yml, so we cannot distinguish
2879        // composite from docker locally. Either way the answer is the same:
2880        // mark Partial and don't pretend to know what's inside.
2881        let dir = make_temp_dir("non-composite");
2882        let workflows_dir = dir.join(".github/workflows");
2883        let action_dir = dir.join(".github/actions/docker-action");
2884        std::fs::create_dir_all(&workflows_dir).unwrap();
2885        std::fs::create_dir_all(&action_dir).unwrap();
2886
2887        let action_yml = r#"
2888name: Docker Action
2889runs:
2890  using: docker
2891  image: Dockerfile
2892"#;
2893        std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2894
2895        let workflow = r#"
2896jobs:
2897  ci:
2898    steps:
2899      - uses: ./.github/actions/docker-action
2900"#;
2901        let workflow_path = workflows_dir.join("ci.yml");
2902        std::fs::write(&workflow_path, workflow).unwrap();
2903
2904        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2905
2906        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2907        assert!(
2908            graph.completeness_gap_kinds.contains(&GapKind::Structural),
2909            "local action reference must record a Structural-kind gap, got: {:?}",
2910            graph.completeness_gap_kinds
2911        );
2912
2913        // Only the calling step exists — no inlined sub-steps.
2914        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2915        assert_eq!(steps.len(), 1, "must not inline any sub-steps");
2916
2917        let _ = std::fs::remove_dir_all(&dir);
2918    }
2919
2920    #[test]
2921    fn composite_action_secrets_not_captured_after_partial_marking() {
2922        // Post-fix: secrets that live INSIDE a composite action's `action.yml`
2923        // are NOT visible to the parser (we don't read the file). The graph
2924        // is marked Partial so downstream rules know there's hidden authority.
2925        // This is the deliberate trade-off vs CWD-dependent inlining.
2926        let dir = make_temp_dir("composite-secrets-hidden");
2927        let workflows_dir = dir.join(".github/workflows");
2928        let action_dir = dir.join(".github/actions/deploy");
2929        std::fs::create_dir_all(&workflows_dir).unwrap();
2930        std::fs::create_dir_all(&action_dir).unwrap();
2931
2932        let action_yml = r#"
2933name: Deploy
2934runs:
2935  using: composite
2936  steps:
2937    - name: Push
2938      run: |
2939        curl -H "Authorization: ${{ secrets.DEPLOY_TOKEN }}" https://example.com
2940      shell: bash
2941"#;
2942        std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2943
2944        let workflow = r#"
2945jobs:
2946  release:
2947    steps:
2948      - uses: ./.github/actions/deploy
2949"#;
2950        let workflow_path = workflows_dir.join("release.yml");
2951        std::fs::write(&workflow_path, workflow).unwrap();
2952
2953        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2954
2955        let secret_names: Vec<_> = graph
2956            .nodes_of_kind(NodeKind::Secret)
2957            .map(|s| s.name.as_str())
2958            .collect();
2959        assert!(
2960            !secret_names.contains(&"DEPLOY_TOKEN"),
2961            "secret hidden inside composite action must NOT leak into the graph, got: {secret_names:?}"
2962        );
2963        assert_eq!(
2964            graph.completeness,
2965            AuthorityCompleteness::Partial,
2966            "composite action reference must mark graph Partial"
2967        );
2968
2969        let _ = std::fs::remove_dir_all(&dir);
2970    }
2971
2972    #[test]
2973    fn step_env_literal_shadows_workflow_level_secret() {
2974        // regression: step.env shadowing must drop workflow/job-level secret
2975        // edges for the shadowed key
2976        //
2977        // GHA semantics: a step-level `env: { K: literal }` shadows the
2978        // workflow- or job-level value of `K` for that step. Earlier versions
2979        // emitted HasAccessTo edges from EACH scope independently, so a
2980        // literal shadow at the step level still left a phantom edge from the
2981        // step to the outer secret. After the fix, edges are emitted only for
2982        // the merged effective env map.
2983        let yaml = r#"
2984on: pull_request_target
2985env:
2986  TOKEN: ${{ secrets.PROD_TOKEN }}
2987jobs:
2988  build:
2989    steps:
2990      - run: ./scan.sh
2991        env:
2992          TOKEN: literal-non-secret
2993"#;
2994        let graph = parse(yaml);
2995
2996        // The PROD_TOKEN secret node may or may not exist (deduplication
2997        // could keep it from being created at all). What MUST hold is: no
2998        // step has a HasAccessTo edge to a Secret node named PROD_TOKEN.
2999        let prod_token_id = graph
3000            .nodes_of_kind(NodeKind::Secret)
3001            .find(|n| n.name == "PROD_TOKEN")
3002            .map(|n| n.id);
3003
3004        if let Some(secret_id) = prod_token_id {
3005            let leaks = graph
3006                .edges_to(secret_id)
3007                .filter(|e| e.kind == EdgeKind::HasAccessTo)
3008                .count();
3009            assert_eq!(
3010                leaks, 0,
3011                "step-level env literal must shadow workflow-level secret — \
3012                 expected 0 HasAccessTo edges to PROD_TOKEN, found {leaks}"
3013            );
3014        }
3015    }
3016
3017    #[test]
3018    fn step_env_secret_shadows_workflow_level_secret() {
3019        // Variant of the shadowing test where step.env replaces the same key
3020        // with a DIFFERENT secret. The step must have access to the new
3021        // secret only — not the shadowed one.
3022        let yaml = r#"
3023on: pull_request_target
3024env:
3025  TOKEN: ${{ secrets.PROD_TOKEN }}
3026jobs:
3027  build:
3028    steps:
3029      - run: ./scan.sh
3030        env:
3031          TOKEN: ${{ secrets.STAGING_TOKEN }}
3032"#;
3033        let graph = parse(yaml);
3034
3035        let secret_names: Vec<_> = graph
3036            .nodes_of_kind(NodeKind::Secret)
3037            .map(|s| s.name.clone())
3038            .collect();
3039
3040        // STAGING_TOKEN must be present; PROD_TOKEN must not be reachable.
3041        assert!(
3042            secret_names.contains(&"STAGING_TOKEN".to_string()),
3043            "shadowing secret must be in the graph, got: {secret_names:?}"
3044        );
3045
3046        let prod_id = graph
3047            .nodes_of_kind(NodeKind::Secret)
3048            .find(|n| n.name == "PROD_TOKEN")
3049            .map(|n| n.id);
3050        if let Some(prod_id) = prod_id {
3051            let leaks = graph
3052                .edges_to(prod_id)
3053                .filter(|e| e.kind == EdgeKind::HasAccessTo)
3054                .count();
3055            assert_eq!(
3056                leaks, 0,
3057                "step-level env secret must shadow workflow-level secret \
3058                 (no HasAccessTo edge to PROD_TOKEN), found {leaks}"
3059            );
3060        }
3061    }
3062
3063    #[test]
3064    fn composite_action_resolution_does_not_depend_on_cwd() {
3065        // regression: composite-action resolution must not depend on CWD
3066        //
3067        // Before the fix, `resolve_local_action_path` walked up from
3068        // `pipeline_file`'s parent calling `Path::exists()` on disk; the same
3069        // YAML produced different graphs depending on (a) whether
3070        // `pipeline_file` was absolute or relative, (b) the binary's CWD,
3071        // (c) whether the consumer copied the YAML to a sandbox without the
3072        // surrounding repo. After the B1 fix, the parser never reads the
3073        // filesystem for composite actions — `./local-action` references are
3074        // unconditionally Partial.
3075        let dir = make_temp_dir("cwd-independence");
3076        let workflows_dir = dir.join(".github/workflows");
3077        let action_dir = dir.join(".github/actions/x");
3078        std::fs::create_dir_all(&workflows_dir).unwrap();
3079        std::fs::create_dir_all(&action_dir).unwrap();
3080
3081        let action_yml = r#"
3082name: X
3083runs:
3084  using: composite
3085  steps:
3086    - run: echo hi
3087      shell: bash
3088"#;
3089        std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
3090
3091        let workflow = r#"
3092jobs:
3093  ci:
3094    steps:
3095      - uses: ./.github/actions/x
3096"#;
3097        let workflow_path = workflows_dir.join("ci.yml");
3098        std::fs::write(&workflow_path, workflow).unwrap();
3099
3100        // Parse 1: from CWD inside the temp dir, with a relative pipeline_file.
3101        let prev_cwd = std::env::current_dir().ok();
3102        std::env::set_current_dir(&dir).unwrap();
3103        let graph_inside = parse_at(workflow, ".github/workflows/ci.yml");
3104        if let Some(p) = prev_cwd {
3105            std::env::set_current_dir(p).unwrap();
3106        }
3107
3108        // Parse 2: from outside the temp dir, with an absolute pipeline_file.
3109        let abs_workflow_path = workflow_path.to_str().unwrap().to_string();
3110        let graph_outside = parse_at(workflow, &abs_workflow_path);
3111
3112        // B1: both must be Partial — composite action filesystem walking is
3113        // gone, so neither inlines.
3114        assert_eq!(
3115            graph_inside.completeness,
3116            AuthorityCompleteness::Partial,
3117            "graph parsed from inside the worktree must be Partial"
3118        );
3119        assert_eq!(
3120            graph_outside.completeness,
3121            AuthorityCompleteness::Partial,
3122            "graph parsed from outside the worktree must be Partial"
3123        );
3124        // CWD-independence: completeness values must match exactly.
3125        assert_eq!(
3126            graph_inside.completeness, graph_outside.completeness,
3127            "CWD-relative vs absolute pipeline_file must produce identical completeness"
3128        );
3129        // And neither path may inline composite-action sub-steps.
3130        assert_eq!(
3131            graph_inside.nodes_of_kind(NodeKind::Step).count(),
3132            1,
3133            "inside parse must not inline composite sub-steps"
3134        );
3135        assert_eq!(
3136            graph_outside.nodes_of_kind(NodeKind::Step).count(),
3137            1,
3138            "outside parse must not inline composite sub-steps"
3139        );
3140
3141        let _ = std::fs::remove_dir_all(&dir);
3142    }
3143
3144    #[test]
3145    fn workflow_level_permissions_create_identity() {
3146        let yaml = r#"
3147permissions: write-all
3148jobs:
3149  ci:
3150    steps:
3151      - run: echo hi
3152"#;
3153        let graph = parse(yaml);
3154        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
3155        assert_eq!(identities.len(), 1);
3156        assert_eq!(identities[0].name, "GITHUB_TOKEN");
3157        assert_eq!(
3158            identities[0].metadata.get(META_PERMISSIONS).unwrap(),
3159            "write-all"
3160        );
3161    }
3162
3163    #[test]
3164    fn omitted_workflow_permissions_create_unknown_implicit_identity() {
3165        let yaml = r#"
3166jobs:
3167  ci:
3168    steps:
3169      - run: echo hi
3170"#;
3171        let graph = parse(yaml);
3172        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
3173        assert_eq!(identities.len(), 1);
3174        assert_eq!(identities[0].name, "GITHUB_TOKEN");
3175        assert_eq!(
3176            identities[0].metadata.get(META_IDENTITY_SCOPE).unwrap(),
3177            "unknown"
3178        );
3179        assert_eq!(identities[0].metadata.get(META_IMPLICIT).unwrap(), "true");
3180    }
3181
3182    #[test]
3183    fn job_env_template_expression_does_not_crash_and_marks_partial() {
3184        // Real-world repro from scikit-learn unit-tests.yml: job-level `env:`
3185        // is a bare template expression (`${{ matrix }}`) instead of a map.
3186        // Historically the GHA parser deserialized env: as `HashMap<String,String>`
3187        // and crashed with "invalid type: string ..., expected a map". The parser
3188        // must now tolerate this gracefully: parse succeeds, graph is marked Partial
3189        // with a reason that mentions the template-shaped env.
3190        let yaml = r#"
3191jobs:
3192  unit-tests:
3193    env: ${{ matrix }}
3194    steps:
3195      - run: pytest
3196"#;
3197        let graph = parse(yaml);
3198        // No crash — parse returned a graph.
3199        assert!(
3200            matches!(graph.completeness, AuthorityCompleteness::Partial),
3201            "graph must be marked Partial when env: is a template expression"
3202        );
3203        let saw_template_gap = graph
3204            .completeness_gaps
3205            .iter()
3206            .any(|g| g.contains("env:") && g.contains("template"));
3207        assert!(
3208            saw_template_gap,
3209            "completeness_gaps must mention env: template, got: {:?}",
3210            graph.completeness_gaps
3211        );
3212        // Job-level `env:` as a template expression is the canonical
3213        // Expression-kind gap — env shape is hidden, structure is intact.
3214        assert!(
3215            graph.completeness_gap_kinds.contains(&GapKind::Expression),
3216            "job-level env: template must record an Expression-kind gap, got: {:?}",
3217            graph.completeness_gap_kinds
3218        );
3219        // Steps still parsed normally.
3220        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3221        assert_eq!(steps.len(), 1, "the single step must still be parsed");
3222    }
3223
3224    #[test]
3225    fn env_with_non_string_scalar_values_parses() {
3226        // Real-world repro from BurntSushi/ripgrep ci.yml and many others:
3227        // GHA env values can be booleans (`COVERAGE: false`), integers
3228        // (`RUST_BACKTRACE: 1`), or null (`TARGET_FLAGS:`). A naive
3229        // HashMap<String, String> deserializer rejects these. After the fix,
3230        // they round-trip — booleans/numbers as their textual form,
3231        // null as the empty string.
3232        let yaml = r#"
3233jobs:
3234  test:
3235    env:
3236      RUST_BACKTRACE: 1
3237      COVERAGE: false
3238      TARGET_FLAGS:
3239      CARGO: cargo
3240    steps:
3241      - run: cargo test
3242"#;
3243        let graph = parse(yaml);
3244        // Parse must succeed and produce the step node.
3245        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3246        assert_eq!(steps.len(), 1, "expected the single step to parse");
3247        // Graph stays Complete — env: is a real map, not a template.
3248        assert!(
3249            !matches!(graph.completeness, AuthorityCompleteness::Partial)
3250                || !graph
3251                    .completeness_gaps
3252                    .iter()
3253                    .any(|g| g.contains("env:") && g.contains("template")),
3254            "non-string env values must not mark the graph Partial via the env-template path"
3255        );
3256    }
3257
3258    #[test]
3259    fn step_env_with_boolean_and_integer_values_parses() {
3260        // Same regression class but at step-level env: instead of job-level.
3261        let yaml = r#"
3262jobs:
3263  build:
3264    steps:
3265      - name: build
3266        run: make
3267        env:
3268          DEBUG: true
3269          RETRIES: 3
3270          OPTIONAL_FLAG:
3271"#;
3272        let graph = parse(yaml);
3273        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3274        assert_eq!(steps.len(), 1);
3275    }
3276
3277    #[test]
3278    fn meta_job_name_set_on_step_nodes() {
3279        let yaml = r#"
3280jobs:
3281  build:
3282    steps:
3283      - name: Checkout
3284        uses: actions/checkout@v4
3285      - name: Compile
3286        run: make build
3287"#;
3288        let graph = parse(yaml);
3289        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3290        assert!(!steps.is_empty(), "expected at least one Step node");
3291        for step in &steps {
3292            assert_eq!(
3293                step.metadata.get(META_JOB_NAME).map(String::as_str),
3294                Some("build"),
3295                "Step {:?} missing META_JOB_NAME=build",
3296                step.name
3297            );
3298        }
3299    }
3300
3301    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
3302
3303    #[test]
3304    fn jobs_without_steps_marks_partial() {
3305        // `jobs:` is non-empty (parser deserializes them happily) but every
3306        // job has no `steps:` block — the GHA parser produces 0 Step nodes.
3307        // This is the canonical "wrong-platform smuggle" shape: an attacker
3308        // gets a misclassified file past auto-detect, no recognisable steps
3309        // get materialised, and the previous behaviour was completeness =
3310        // complete + 0 findings = "passed". Now Partial.
3311        let yaml = r#"
3312on:
3313  push:
3314jobs:
3315  build:
3316    runs-on: ubuntu-latest
3317"#;
3318        let graph = parse(yaml);
3319        let step_count = graph
3320            .nodes
3321            .iter()
3322            .filter(|n| n.kind == NodeKind::Step)
3323            .count();
3324        assert_eq!(step_count, 0, "no steps: present means 0 Step nodes");
3325        assert_eq!(
3326            graph.completeness,
3327            AuthorityCompleteness::Partial,
3328            "0-step-nodes despite non-empty jobs: must mark Partial"
3329        );
3330        assert!(
3331            graph
3332                .completeness_gaps
3333                .iter()
3334                .any(|g| g.contains("0 step nodes")),
3335            "completeness_gaps must mention 0 step nodes: {:?}",
3336            graph.completeness_gaps
3337        );
3338        // `jobs:` parsed but produced 0 step nodes — the graph carrier is
3339        // missing entirely. Structural, because there are no recognisable
3340        // steps for the authority chain to attach to.
3341        assert!(
3342            graph.completeness_gap_kinds.contains(&GapKind::Structural),
3343            "0-step-nodes gap must be Structural, got: {:?}",
3344            graph.completeness_gap_kinds
3345        );
3346    }
3347
3348    #[test]
3349    fn empty_workflow_no_jobs_does_not_mark_partial_for_zero_steps() {
3350        // An entirely empty workflow (no `jobs:` at all) has nothing to
3351        // classify — completeness should not flip to Partial just because
3352        // there are zero step nodes (the source had no carrier).
3353        let yaml = "name: empty\non:\n  push:\n";
3354        let graph = parse(yaml);
3355        let zero_step_gap = graph
3356            .completeness_gaps
3357            .iter()
3358            .any(|g| g.contains("0 step nodes"));
3359        assert!(
3360            !zero_step_gap,
3361            "no jobs: in source means no 0-step gap reason; got: {:?}",
3362            graph.completeness_gaps
3363        );
3364    }
3365
3366    // -- B3 regression: all-zero SHA must not be treated as pinned --
3367
3368    #[test]
3369    fn all_zero_sha_action_is_untrusted() {
3370        let yaml = r#"
3371jobs:
3372  ci:
3373    steps:
3374      - uses: actions/setup-python@0000000000000000000000000000000000000000
3375"#;
3376        let graph = parse(yaml);
3377        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3378        assert_eq!(images.len(), 1);
3379        assert_eq!(
3380            images[0].trust_zone,
3381            TrustZone::Untrusted,
3382            "all-zero SHA must be classified as Untrusted, not ThirdParty"
3383        );
3384    }
3385
3386    #[test]
3387    fn real_sha_pinned_action_is_third_party() {
3388        // Non-zero 40-char hex SHA -- the normal legitimate case.
3389        let yaml = r#"
3390jobs:
3391  ci:
3392    steps:
3393      - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
3394"#;
3395        let graph = parse(yaml);
3396        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3397        assert_eq!(images.len(), 1);
3398        assert_eq!(
3399            images[0].trust_zone,
3400            TrustZone::ThirdParty,
3401            "legitimate SHA-pinned action must be classified as ThirdParty"
3402        );
3403    }
3404
3405    #[test]
3406    fn upload_artifact_creates_produces_edge() {
3407        let yaml = r#"
3408permissions:
3409  contents: read
3410jobs:
3411  build:
3412    steps:
3413      - uses: actions/upload-artifact@v4
3414        with:
3415          name: my-dist
3416          path: ./dist
3417"#;
3418        let graph = parse(yaml);
3419        let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3420        assert_eq!(
3421            artifacts.len(),
3422            1,
3423            "upload-artifact should create one Artifact node"
3424        );
3425        assert_eq!(artifacts[0].name, "my-dist");
3426        let produces_edges: Vec<_> = graph
3427            .edges
3428            .iter()
3429            .filter(|e| e.kind == EdgeKind::Produces && e.to == artifacts[0].id)
3430            .collect();
3431        assert_eq!(
3432            produces_edges.len(),
3433            1,
3434            "upload step must have Produces edge to artifact"
3435        );
3436    }
3437
3438    #[test]
3439    fn download_artifact_creates_consumes_edge() {
3440        let yaml = r#"
3441jobs:
3442  deploy:
3443    steps:
3444      - uses: actions/download-artifact@v4
3445        with:
3446          name: my-dist
3447"#;
3448        let graph = parse(yaml);
3449        let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3450        assert_eq!(
3451            artifacts.len(),
3452            1,
3453            "download-artifact should create one Artifact node"
3454        );
3455        let consumes_edges: Vec<_> = graph
3456            .edges
3457            .iter()
3458            .filter(|e| e.kind == EdgeKind::Consumes && e.from == artifacts[0].id)
3459            .collect();
3460        assert_eq!(
3461            consumes_edges.len(),
3462            1,
3463            "download step must have Consumes edge from artifact"
3464        );
3465    }
3466
3467    #[test]
3468    fn upload_download_same_name_share_artifact_node() {
3469        let yaml = r#"
3470permissions:
3471  contents: read
3472jobs:
3473  build:
3474    steps:
3475      - uses: actions/upload-artifact@v4
3476        with:
3477          name: shared-dist
3478          path: ./dist
3479  deploy:
3480    steps:
3481      - uses: actions/download-artifact@v4
3482        with:
3483          name: shared-dist
3484"#;
3485        let graph = parse(yaml);
3486        let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3487        assert_eq!(
3488            artifacts.len(),
3489            1,
3490            "same artifact name must reuse the same Artifact node"
3491        );
3492        let produces: Vec<_> = graph
3493            .edges
3494            .iter()
3495            .filter(|e| e.kind == EdgeKind::Produces)
3496            .collect();
3497        let consumes: Vec<_> = graph
3498            .edges
3499            .iter()
3500            .filter(|e| e.kind == EdgeKind::Consumes)
3501            .collect();
3502        assert_eq!(produces.len(), 1, "one Produces edge");
3503        assert_eq!(consumes.len(), 1, "one Consumes edge");
3504        assert_eq!(produces[0].to, artifacts[0].id);
3505        assert_eq!(consumes[0].from, artifacts[0].id);
3506    }
3507
3508    #[test]
3509    fn upload_artifact_without_name_creates_no_edge() {
3510        // upload-artifact with no `name:` must not create an Artifact node or
3511        // Produces edge (anonymous uploads can't be correlated and would silently
3512        // merge unrelated jobs).
3513        let yaml = r#"
3514jobs:
3515  build:
3516    steps:
3517      - uses: actions/upload-artifact@v4
3518        with:
3519          path: ./dist
3520"#;
3521        let graph = parse(yaml);
3522        let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3523        assert!(
3524            artifacts.is_empty(),
3525            "upload-artifact without name: must not create an Artifact node; got: {artifacts:#?}"
3526        );
3527        let produces: Vec<_> = graph
3528            .edges
3529            .iter()
3530            .filter(|e| e.kind == EdgeKind::Produces)
3531            .collect();
3532        assert!(
3533            produces.is_empty(),
3534            "upload-artifact without name: must not create a Produces edge"
3535        );
3536    }
3537
3538    #[test]
3539    fn download_artifact_without_name_creates_no_edge() {
3540        // download-artifact with no `name:` means "download all" (wildcard) —
3541        // we can't correlate it to a specific producer, so no Consumes edge
3542        // should be created.
3543        let yaml = r#"
3544jobs:
3545  deploy:
3546    steps:
3547      - uses: actions/download-artifact@v4
3548"#;
3549        let graph = parse(yaml);
3550        let consumes: Vec<_> = graph
3551            .edges
3552            .iter()
3553            .filter(|e| e.kind == EdgeKind::Consumes)
3554            .collect();
3555        assert!(
3556            consumes.is_empty(),
3557            "download-artifact without name: must not create a Consumes edge"
3558        );
3559    }
3560
3561    // ── Regression: F1 (P1) ────────────────────────────────────────────────
3562    // The legacy run-script extractor matched every literal `secrets.X`
3563    // substring — comments and shell paths produced phantom Secret nodes
3564    // (`json`, `conf`). The fix walks only INSIDE `${{ … }}` template spans.
3565    #[test]
3566    fn secret_extractor_ignores_literal_substrings_outside_template_spans() {
3567        let yaml = r#"
3568jobs:
3569  deploy:
3570    steps:
3571      - name: Mixed shell + template
3572        run: |
3573          # loads /etc/secrets.conf
3574          cp $SECRETS_DIR/secrets.json /tmp/
3575          curl -H "Authorization: ${{ secrets.REAL_TOKEN }}" https://api.example.com
3576"#;
3577        let graph = parse(yaml);
3578        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3579        assert_eq!(
3580            secrets.len(),
3581            1,
3582            "only `REAL_TOKEN` should be a Secret node — phantoms `conf`/`json` must not appear; got: {:?}",
3583            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
3584        );
3585        assert_eq!(secrets[0].name, "REAL_TOKEN");
3586    }
3587
3588    // ── Regression: F2 (P1) ────────────────────────────────────────────────
3589    // The previous `is_secret_reference` required the literal `${{ secrets.`
3590    // (one canonical space). GHA accepts every whitespace variant. This test
3591    // pins the tight, no-space form.
3592    #[test]
3593    fn secret_extractor_handles_tight_template_spacing() {
3594        let yaml = r#"
3595jobs:
3596  deploy:
3597    steps:
3598      - name: Tight template
3599        run: echo "x"
3600        env:
3601          TOK: "${{secrets.TIGHT}}"
3602"#;
3603        let graph = parse(yaml);
3604        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3605        assert_eq!(secrets.len(), 1);
3606        assert_eq!(secrets[0].name, "TIGHT");
3607        let secret_id = secrets[0].id;
3608        let edges = graph
3609            .edges_to(secret_id)
3610            .filter(|e| e.kind == EdgeKind::HasAccessTo)
3611            .count();
3612        assert_eq!(
3613            edges, 1,
3614            "tight `${{{{secrets.X}}}}` must produce HasAccessTo edge"
3615        );
3616    }
3617
3618    // ── Regression: F3 (P1) ────────────────────────────────────────────────
3619    // The previous `extract_secret_name` returned ONLY the first secret per
3620    // value. Concatenated multi-secret values silently dropped the rest.
3621    #[test]
3622    fn secret_extractor_finds_all_secrets_in_concatenated_value() {
3623        let yaml = r#"
3624jobs:
3625  deploy:
3626    steps:
3627      - name: Concatenated
3628        run: echo "x"
3629        env:
3630          COMBINED: "${{ secrets.A }}-${{ secrets.B }}"
3631"#;
3632        let graph = parse(yaml);
3633        let secret_names: std::collections::BTreeSet<&str> = graph
3634            .nodes_of_kind(NodeKind::Secret)
3635            .map(|n| n.name.as_str())
3636            .collect();
3637        assert!(secret_names.contains("A"), "secret A must be detected");
3638        assert!(secret_names.contains("B"), "secret B must be detected");
3639        assert_eq!(
3640            secret_names.len(),
3641            2,
3642            "exactly two secrets, got: {secret_names:?}"
3643        );
3644        // Both edges from the step.
3645        for name in ["A", "B"] {
3646            let id = graph
3647                .nodes_of_kind(NodeKind::Secret)
3648                .find(|n| n.name == name)
3649                .expect("secret node")
3650                .id;
3651            let edges = graph
3652                .edges_to(id)
3653                .filter(|e| e.kind == EdgeKind::HasAccessTo)
3654                .count();
3655            assert!(edges >= 1, "missing HasAccessTo edge for secret {name}");
3656        }
3657    }
3658
3659    // ── Regression: F6 (P1) ────────────────────────────────────────────────
3660    // Reusable workflow `secrets:` mapping form was silently dropped — only
3661    // the literal `inherit` string was honoured.
3662    #[test]
3663    fn reusable_workflow_secrets_mapping_form_propagates_edges() {
3664        let yaml = r#"
3665jobs:
3666  call:
3667    uses: ./.github/workflows/reusable.yml
3668    secrets:
3669      CHILD: ${{ secrets.PARENT }}
3670      OTHER: ${{ secrets.SECONDARY }}
3671"#;
3672        let graph = parse(yaml);
3673        let secret_names: std::collections::BTreeSet<&str> = graph
3674            .nodes_of_kind(NodeKind::Secret)
3675            .map(|n| n.name.as_str())
3676            .collect();
3677        assert!(
3678            secret_names.contains("PARENT"),
3679            "secrets: mapping value `${{{{ secrets.PARENT }}}}` must produce a Secret node; got: {secret_names:?}"
3680        );
3681        assert!(
3682            secret_names.contains("SECONDARY"),
3683            "secrets: mapping must iterate ALL keys, not just the first; got: {secret_names:?}"
3684        );
3685        // The synthetic step (named after the job) holds the HasAccessTo edges.
3686        let parent_id = graph
3687            .nodes_of_kind(NodeKind::Secret)
3688            .find(|n| n.name == "PARENT")
3689            .unwrap()
3690            .id;
3691        let edges = graph
3692            .edges_to(parent_id)
3693            .filter(|e| e.kind == EdgeKind::HasAccessTo)
3694            .count();
3695        assert!(edges >= 1, "synthetic step must HasAccessTo PARENT");
3696    }
3697
3698    // ── Regression: F13 (P2) ───────────────────────────────────────────────
3699    // The synthetic step created for `job.uses:` skipped workflow-level env
3700    // secret edges. workflow.env IS in scope for the caller's evaluation of
3701    // a reusable-workflow call (the caller resolves `${{ secrets.X }}` /
3702    // `${{ env.X }}` BEFORE handing values to the callee).
3703    #[test]
3704    fn reusable_workflow_synthetic_step_inherits_workflow_env_secrets() {
3705        let yaml = r#"
3706env:
3707  GLOBAL_TOKEN: "${{ secrets.GLOBAL }}"
3708jobs:
3709  call:
3710    uses: ./.github/workflows/reusable.yml
3711"#;
3712        let graph = parse(yaml);
3713        let global = graph
3714            .nodes_of_kind(NodeKind::Secret)
3715            .find(|n| n.name == "GLOBAL");
3716        assert!(
3717            global.is_some(),
3718            "workflow.env secret `GLOBAL` must produce a Secret node visible to the synthetic step"
3719        );
3720        let global_id = global.unwrap().id;
3721        let edges = graph
3722            .edges_to(global_id)
3723            .filter(|e| e.kind == EdgeKind::HasAccessTo)
3724            .count();
3725        assert!(
3726            edges >= 1,
3727            "synthetic step for reusable workflow must inherit workflow.env HasAccessTo edge"
3728        );
3729    }
3730
3731    // ── Regression: F4 (P1) ────────────────────────────────────────────────
3732    // `META_JOB_OUTPUTS` was built by iterating a HashMap — randomised order
3733    // leaked into JSON / SARIF output. Multiple runs must produce a
3734    // byte-identical string.
3735    #[test]
3736    fn gha_meta_job_outputs_is_deterministic_across_runs() {
3737        let yaml = r#"
3738jobs:
3739  emit:
3740    runs-on: ubuntu-latest
3741    outputs:
3742      zebra: literal-z
3743      apple: literal-a
3744      mango: literal-m
3745      kilo: literal-k
3746      foxtrot: literal-f
3747    steps:
3748      - run: echo hi
3749"#;
3750        let mut prev: Option<String> = None;
3751        for i in 0..9 {
3752            let graph = parse(yaml);
3753            let cur = graph
3754                .metadata
3755                .get(META_JOB_OUTPUTS)
3756                .cloned()
3757                .unwrap_or_default();
3758            assert!(
3759                !cur.is_empty(),
3760                "META_JOB_OUTPUTS must be populated on a workflow with outputs"
3761            );
3762            if let Some(p) = &prev {
3763                assert_eq!(
3764                    p, &cur,
3765                    "META_JOB_OUTPUTS drifted on run {i}: {p:?} vs {cur:?}"
3766                );
3767            }
3768            prev = Some(cur);
3769        }
3770    }
3771
3772    // ── Regression: F5 (P1) ────────────────────────────────────────────────
3773    // `Permissions::Map` rendered through HashMap iteration — order leaked
3774    // into META_PERMISSIONS in JSON / SARIF / `taudit map`. With a BTreeMap
3775    // backing the map variant, the rendered string is now sorted by key.
3776    #[test]
3777    fn gha_meta_permissions_is_deterministic_across_runs() {
3778        let yaml = r#"
3779permissions:
3780  contents: read
3781  id-token: write
3782  packages: write
3783  actions: read
3784  pull-requests: write
3785jobs:
3786  ci:
3787    steps:
3788      - run: echo hi
3789"#;
3790        let mut prev: Option<String> = None;
3791        for i in 0..9 {
3792            let graph = parse(yaml);
3793            let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
3794            assert_eq!(identities.len(), 1, "one GITHUB_TOKEN identity");
3795            let cur = identities[0]
3796                .metadata
3797                .get(META_PERMISSIONS)
3798                .cloned()
3799                .expect("META_PERMISSIONS must be stamped");
3800            if let Some(p) = &prev {
3801                assert_eq!(
3802                    p, &cur,
3803                    "META_PERMISSIONS drifted on run {i}: {p:?} vs {cur:?}"
3804                );
3805            }
3806            prev = Some(cur);
3807        }
3808    }
3809}