Skip to main content

taudit_parse_gha/
lib.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use serde::Deserialize;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7use taudit_core::ports::PipelineParser;
8
9/// Metadata key for marking inferred (not precisely mapped) secret references.
10const META_INFERRED_VAL: &str = "true";
11
12/// Local metadata key marking a Step node that was inlined from a composite
13/// action's `runs.steps`. Lets downstream consumers (and tests) distinguish
14/// inlined sub-steps from steps written directly in the workflow.
15const META_COMPOSITE_STEP: &str = "composite_step";
16
17/// Local metadata key on the inlined Step recording the source action path
18/// (e.g. `./.github/actions/my-action`) so consumers can attribute findings.
19const META_COMPOSITE_SOURCE: &str = "composite_source";
20
21/// GitHub Actions workflow parser.
22pub struct GhaParser;
23
24impl PipelineParser for GhaParser {
25    fn platform(&self) -> &str {
26        "github-actions"
27    }
28
29    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
30        let mut de = serde_yaml::Deserializer::from_str(content);
31        let doc = de
32            .next()
33            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
34        let workflow: GhaWorkflow = GhaWorkflow::deserialize(doc)
35            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
36        let extra_docs = de.next().is_some();
37
38        let mut graph = AuthorityGraph::new(source.clone());
39        graph
40            .metadata
41            .insert(META_PLATFORM.into(), "github-actions".into());
42        if workflow.permissions.is_none() {
43            // Negative-space marker: lets the
44            // `no_workflow_level_permissions_block` rule detect the absence
45            // of any top-level `permissions:` declaration without re-reading
46            // the source YAML. The same rule will additionally check for the
47            // absence of any per-job permissions block.
48            graph
49                .metadata
50                .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
51        }
52        if extra_docs {
53            graph.mark_partial(
54                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
55            );
56        }
57        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
58
59        // Workflow-level `env:` may be a template expression (e.g. `env: ${{ matrix }}`)
60        // whose shape is unknown statically. Mark Partial once and skip env processing
61        // for that scope; static rules cannot reason about runtime-resolved env shapes.
62        if let Some(EnvSpec::Template(_)) = workflow.env {
63            graph.mark_partial(
64                "workflow-level env: uses template expression — environment variable shape unknown"
65                    .to_string(),
66            );
67        }
68
69        let is_pull_request_target = workflow
70            .triggers
71            .as_ref()
72            .map(trigger_has_pull_request_target)
73            .unwrap_or(false);
74
75        // Record every recognised trigger as a comma-separated list so rules
76        // can reason about combinations (e.g. `pull_request_target`,
77        // `pull_request`, `workflow_run`, `issue_comment`). Backwards-compatible:
78        // existing single-value consumers that match exact strings on
79        // `pull_request_target` are preserved by writing that token first when
80        // present.
81        let trigger_list = collect_trigger_names(workflow.triggers.as_ref());
82        if !trigger_list.is_empty() {
83            // Place pull_request_target first so consumers that use string
84            // equality (older rules) still match the canonical legacy value.
85            let mut ordered: Vec<&str> = Vec::new();
86            if trigger_list.iter().any(|t| t == "pull_request_target") {
87                ordered.push("pull_request_target");
88            }
89            for t in &trigger_list {
90                if t != "pull_request_target" {
91                    ordered.push(t);
92                }
93            }
94            // If we only have `pull_request_target`, write it bare so the
95            // legacy `== "pull_request_target"` predicate keeps working.
96            let value = if ordered.len() == 1 {
97                ordered[0].to_string()
98            } else {
99                ordered.join(",")
100            };
101            graph.metadata.insert(META_TRIGGER.into(), value);
102        } else if is_pull_request_target {
103            graph
104                .metadata
105                .insert(META_TRIGGER.into(), "pull_request_target".into());
106        }
107
108        // Workflow-level permissions -> GITHUB_TOKEN identity node
109        let token_id = if let Some(ref perms) = workflow.permissions {
110            let perm_string = perms.to_string();
111            let scope = IdentityScope::from_permissions(&perm_string);
112            let mut meta = HashMap::new();
113            meta.insert(META_PERMISSIONS.into(), perm_string.clone());
114            meta.insert(
115                META_IDENTITY_SCOPE.into(),
116                format!("{scope:?}").to_lowercase(),
117            );
118            // OIDC: id-token: write → token is OIDC-capable (federated scope).
119            // Check the formatted substring directly — Permissions::Map fmt produces
120            // "id-token: write" so this won't false-positive on "contents: write".
121            if perm_string.contains("id-token: write") || perm_string == "write-all" {
122                meta.insert(META_OIDC.into(), "true".into());
123            }
124            Some(graph.add_node_with_metadata(
125                NodeKind::Identity,
126                "GITHUB_TOKEN",
127                TrustZone::FirstParty,
128                meta,
129            ))
130        } else {
131            None
132        };
133
134        // Iterate jobs in sorted order so node IDs (and therefore every
135        // edge `from`/`to`, every finding `nodes_involved`, every JSON
136        // emit) are byte-deterministic across runs. `workflow.jobs` is a
137        // HashMap whose iteration order is randomised per process — without
138        // sorting here, two runs of the same file produce different node
139        // IDs, which silently breaks `taudit diff`, cache keys, and any
140        // downstream SIEM that hashes the JSON.
141        let mut sorted_jobs: Vec<(&String, &GhaJob)> = workflow.jobs.iter().collect();
142        sorted_jobs.sort_by(|a, b| a.0.cmp(b.0));
143        for (job_name, job) in sorted_jobs {
144            // Job-level `env:` may be a template expression (e.g. `env: ${{ matrix }}`)
145            // whose shape is unknown statically. Mark Partial once per job and skip
146            // env processing for that scope.
147            if let Some(EnvSpec::Template(_)) = job.env {
148                graph.mark_partial(format!(
149                    "job '{job_name}' env: uses template expression — environment variable shape unknown"
150                ));
151            }
152
153            // Job-level permissions override workflow-level
154            let job_token_id = if let Some(ref perms) = job.permissions {
155                let perm_string = perms.to_string();
156                let scope = IdentityScope::from_permissions(&perm_string);
157                let mut meta = HashMap::new();
158                meta.insert(META_PERMISSIONS.into(), perm_string.clone());
159                meta.insert(
160                    META_IDENTITY_SCOPE.into(),
161                    format!("{scope:?}").to_lowercase(),
162                );
163                if perm_string.contains("id-token: write") {
164                    meta.insert(META_OIDC.into(), "true".into());
165                }
166                Some(graph.add_node_with_metadata(
167                    NodeKind::Identity,
168                    format!("GITHUB_TOKEN ({job_name})"),
169                    TrustZone::FirstParty,
170                    meta,
171                ))
172            } else {
173                token_id
174            };
175
176            // Reusable workflow: job.uses= means this job delegates to another workflow.
177            // We cannot resolve it inline — mark the graph partial and skip steps.
178            if let Some(ref uses) = job.uses {
179                let trust_zone = if is_sha_pinned(uses) {
180                    TrustZone::ThirdParty
181                } else {
182                    TrustZone::Untrusted
183                };
184                let rw_id = graph.add_node(NodeKind::Image, uses, trust_zone);
185                // Synthetic step represents this job delegating to the called workflow
186                let job_step_id = graph.add_node(NodeKind::Step, job_name, TrustZone::FirstParty);
187                if let Some(node) = graph.nodes.get_mut(job_step_id) {
188                    node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
189                }
190                graph.add_edge(job_step_id, rw_id, EdgeKind::DelegatesTo);
191                if let Some(tok_id) = job_token_id {
192                    graph.add_edge(job_step_id, tok_id, EdgeKind::HasAccessTo);
193                }
194                graph.mark_partial(format!(
195                    "reusable workflow '{uses}' in job '{job_name}' cannot be resolved inline — authority within the called workflow is unknown"
196                ));
197                continue;
198            }
199
200            // Matrix strategy: authority shape may differ per matrix entry — mark Partial
201            if job
202                .strategy
203                .as_ref()
204                .and_then(|s| s.get("matrix"))
205                .is_some()
206            {
207                graph.mark_partial(format!(
208                    "job '{job_name}' uses matrix strategy — authority shape may differ per matrix entry"
209                ));
210            }
211
212            // Self-hosted runner detection: `runs-on: self-hosted` or a sequence
213            // that includes `self-hosted`. Creates an Image node tagged with
214            // META_SELF_HOSTED so downstream rules can flag the job. Hosted
215            // runners (ubuntu-latest, etc.) are not represented as Image nodes —
216            // this keeps the graph focused on non-default attack surface.
217            if is_self_hosted_runner(job.runs_on.as_ref()) {
218                let runner_name = runner_label(job.runs_on.as_ref()).unwrap_or("self-hosted");
219                let mut meta = HashMap::new();
220                meta.insert(META_SELF_HOSTED.into(), "true".into());
221                graph.add_node_with_metadata(
222                    NodeKind::Image,
223                    runner_name,
224                    TrustZone::FirstParty,
225                    meta,
226                );
227            }
228
229            // Container: job-level container image — add as Image node and capture ID
230            // so each step in this job can be linked to it via UsesImage.
231            let container_image_id: Option<NodeId> = if let Some(ref container) = job.container {
232                let image_str = container.image();
233                let pinned = is_docker_digest_pinned(image_str);
234                let trust_zone = if pinned {
235                    TrustZone::ThirdParty
236                } else {
237                    TrustZone::Untrusted
238                };
239                let mut meta = HashMap::new();
240                meta.insert(META_CONTAINER.into(), "true".into());
241                if pinned {
242                    if let Some(digest) = image_str.split("@sha256:").nth(1) {
243                        meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
244                    }
245                }
246                Some(graph.add_node_with_metadata(NodeKind::Image, image_str, trust_zone, meta))
247            } else {
248                None
249            };
250
251            for (step_idx, step) in job.steps.iter().enumerate() {
252                let default_name = format!("{job_name}[{step_idx}]");
253                let step_name = step.name.as_deref().unwrap_or(&default_name);
254
255                // Determine trust zone and create image node if `uses:` present
256                let (trust_zone, image_node_id) = if let Some(ref uses) = step.uses {
257                    let (zone, image_id) = classify_action(uses, &mut graph);
258                    (zone, Some(image_id))
259                } else if is_pull_request_target {
260                    // run: step in a pull_request_target workflow — may execute fork code
261                    (TrustZone::Untrusted, None)
262                } else {
263                    // Inline `run:` step — first party
264                    (TrustZone::FirstParty, None)
265                };
266
267                let step_id = graph.add_node(NodeKind::Step, step_name, trust_zone);
268
269                // Stamp parent job name so consumers (e.g. `taudit map --job`)
270                // can attribute steps back to their containing job. Also
271                // stamp the raw `run:` script body so script-aware rules
272                // (runtime_script_fetched_from_floating_url,
273                // untrusted_api_response_to_env_sink) can pattern-match on
274                // the actual command text the runner will execute.
275                if let Some(node) = graph.nodes.get_mut(step_id) {
276                    node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
277                    if let Some(ref body) = step.run {
278                        if !body.is_empty() {
279                            node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
280                        }
281                    }
282                    // Fork-check stamping. A step inherits its job-level
283                    // `if:` (if any) plus its own `if:`. Either one carrying
284                    // the standard fork-check pattern is sufficient — both
285                    // forms guard the step from running on fork-PR contexts.
286                    let job_check = job
287                        .if_cond
288                        .as_deref()
289                        .map(is_fork_check_expression)
290                        .unwrap_or(false);
291                    let step_check = step
292                        .if_cond
293                        .as_deref()
294                        .map(is_fork_check_expression)
295                        .unwrap_or(false);
296                    if job_check || step_check {
297                        node.metadata.insert(META_FORK_CHECK.into(), "true".into());
298                    }
299                }
300
301                // Link step to action image
302                if let Some(img_id) = image_node_id {
303                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
304                }
305
306                // Composite action inlining: if this step uses a local action
307                // (`./path`), try to load its `action.yml` and inline `runs.steps`
308                // as Step nodes with DelegatesTo edges from the calling step.
309                // On any failure (missing file, non-composite, parse error) the
310                // helper marks the graph Partial and returns without inlining.
311                if let Some(ref uses) = step.uses {
312                    if uses.starts_with("./") {
313                        try_inline_composite_action(
314                            uses,
315                            &source.file,
316                            step_id,
317                            job_name,
318                            job_token_id,
319                            container_image_id,
320                            is_pull_request_target,
321                            &mut graph,
322                            &mut secret_ids,
323                        );
324                    }
325                }
326
327                // Link step to job container — steps run inside the container's execution
328                // environment, so a floating container is a supply chain risk for every step.
329                if let Some(img_id) = container_image_id {
330                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
331                }
332
333                // Link step to GITHUB_TOKEN if it exists
334                if let Some(tok_id) = job_token_id {
335                    graph.add_edge(step_id, tok_id, EdgeKind::HasAccessTo);
336                }
337
338                // Cloud identity inference: detect known OIDC cloud auth actions and
339                // create an Identity node representing the assumed cloud identity.
340                if let Some(ref uses) = step.uses {
341                    if let Some(cloud_id) =
342                        classify_cloud_auth(uses, step.with.as_ref(), &mut graph)
343                    {
344                        graph.add_edge(step_id, cloud_id, EdgeKind::HasAccessTo);
345                    }
346                }
347
348                // Attestation action detection
349                if let Some(ref uses) = step.uses {
350                    let action = uses.split('@').next().unwrap_or(uses);
351                    if matches!(
352                        action,
353                        "actions/attest-build-provenance" | "sigstore/cosign-installer"
354                    ) {
355                        if let Some(node) = graph.nodes.get_mut(step_id) {
356                            node.metadata.insert(META_ATTESTS.into(), "true".into());
357                        }
358                    }
359                }
360
361                // actions/checkout detection. Tag unconditionally — downstream rules
362                // gate on trigger context (pull_request / pull_request_target) to
363                // decide whether the checkout is pulling untrusted fork code. Tagging
364                // here avoids trigger-ordering dependencies across jobs.
365                if let Some(ref uses) = step.uses {
366                    let action = uses.split('@').next().unwrap_or(uses);
367                    if action == "actions/checkout" {
368                        if let Some(node) = graph.nodes.get_mut(step_id) {
369                            node.metadata
370                                .insert(META_CHECKOUT_SELF.into(), "true".into());
371                        }
372                    }
373                }
374
375                // Process secrets from workflow-level `env:` (inherited by all jobs/steps).
376                // Template-shaped envs are skipped here — graph already marked Partial above.
377                // Iterate env keys in sorted order so secret-node creation
378                // order is deterministic across runs (HashMap iteration is
379                // randomised per process; secret IDs leak that randomness
380                // into the JSON output otherwise).
381                if let Some(env_map) = workflow.env.as_ref().and_then(EnvSpec::as_map) {
382                    let mut entries: Vec<(&String, &String)> = env_map.iter().collect();
383                    entries.sort_by(|a, b| a.0.cmp(b.0));
384                    for (_k, env_val) in entries {
385                        if is_secret_reference(env_val) {
386                            let secret_name = extract_secret_name(env_val);
387                            let secret_id =
388                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
389                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
390                        }
391                    }
392                }
393
394                // Process secrets from job-level `env:` (inherited by all steps).
395                // Template-shaped envs are skipped here — graph already marked Partial above.
396                if let Some(env_map) = job.env.as_ref().and_then(EnvSpec::as_map) {
397                    let mut entries: Vec<(&String, &String)> = env_map.iter().collect();
398                    entries.sort_by(|a, b| a.0.cmp(b.0));
399                    for (_k, env_val) in entries {
400                        if is_secret_reference(env_val) {
401                            let secret_name = extract_secret_name(env_val);
402                            let secret_id =
403                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
404                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
405                        }
406                    }
407                }
408
409                // Process secrets from step-level `env:` block.
410                // If this step's env: is a template expression, mark Partial once for
411                // this step and skip env processing.
412                match step.env.as_ref() {
413                    Some(EnvSpec::Map(env_map)) => {
414                        let mut entries: Vec<(&String, &String)> = env_map.iter().collect();
415                        entries.sort_by(|a, b| a.0.cmp(b.0));
416                        for (_k, env_val) in entries {
417                            if is_secret_reference(env_val) {
418                                let secret_name = extract_secret_name(env_val);
419                                let secret_id = find_or_create_secret(
420                                    &mut graph,
421                                    &mut secret_ids,
422                                    &secret_name,
423                                );
424                                graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
425                            }
426                        }
427                    }
428                    Some(EnvSpec::Template(_)) => {
429                        graph.mark_partial(format!(
430                            "step '{step_name}' in job '{job_name}' env: uses template expression — environment variable shape unknown"
431                        ));
432                    }
433                    None => {}
434                }
435
436                // Process secrets from `with:` block, plus detect any
437                // `${{ env.X }}` reference. `env.X` does NOT produce a
438                // HasAccessTo edge (the value is sourced from the ambient
439                // runner environment, not directly from the secrets store)
440                // but it IS the consumer half of the env-gate laundering
441                // pattern that `secret_via_env_gate_to_untrusted_consumer`
442                // detects. Stamping META_READS_ENV here lets the rule run
443                // without re-walking the YAML.
444                //
445                // Sort keys so secret node creation order is deterministic
446                // across runs.
447                if let Some(ref with) = step.with {
448                    let mut reads_env = false;
449                    let mut entries: Vec<(&String, &String)> = with.iter().collect();
450                    entries.sort_by(|a, b| a.0.cmp(b.0));
451                    for (_k, val) in entries {
452                        if is_secret_reference(val) {
453                            let secret_name = extract_secret_name(val);
454                            let secret_id =
455                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
456                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
457                        }
458                        if is_env_reference(val) {
459                            reads_env = true;
460                        }
461                    }
462                    if reads_env {
463                        if let Some(node) = graph.nodes.get_mut(step_id) {
464                            node.metadata.insert(META_READS_ENV.into(), "true".into());
465                        }
466                    }
467                }
468
469                // Detect inferred secrets in `run:` script blocks
470                if let Some(ref run) = step.run {
471                    if run.contains("${{ secrets.") {
472                        // Extract secret names from the shell script
473                        let mut pos = 0;
474                        while let Some(start) = run[pos..].find("secrets.") {
475                            let abs_start = pos + start + 8;
476                            let remaining = &run[abs_start..];
477                            let end = remaining
478                                .find(|c: char| !c.is_alphanumeric() && c != '_')
479                                .unwrap_or(remaining.len());
480                            let secret_name = &remaining[..end];
481                            if !secret_name.is_empty() {
482                                let secret_id =
483                                    find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
484                                // Mark as inferred — not precisely mapped
485                                if let Some(node) = graph.nodes.get_mut(secret_id) {
486                                    node.metadata
487                                        .insert(META_INFERRED.into(), META_INFERRED_VAL.into());
488                                }
489                                graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
490                                graph.mark_partial(format!(
491                                    "secret '{secret_name}' referenced in run: script — inferred, not precisely mapped"
492                                ));
493                            }
494                            pos = abs_start + end;
495                        }
496                    }
497                }
498
499                // Detect writes to the GHA environment gate.
500                // Broad detection: presence of GITHUB_ENV or GITHUB_PATH in a run script
501                // covers every redirect form (`>> $GITHUB_ENV`, `>> "$GITHUB_ENV"`,
502                // `>> ${GITHUB_ENV}`, `tee -a $GITHUB_PATH`, etc.) without brittle
503                // multi-variant string matching. Reading these vars without writing is
504                // extremely rare in practice, making this an acceptable tradeoff for
505                // completeness.
506                if let Some(ref run) = step.run {
507                    let writes_gate = run.contains("GITHUB_ENV") || run.contains("GITHUB_PATH");
508                    if writes_gate {
509                        if let Some(node) = graph.nodes.get_mut(step_id) {
510                            node.metadata
511                                .insert(META_WRITES_ENV_GATE.into(), "true".into());
512                        }
513                    }
514                    // `${{ env.X }}` references inside a run: body — same
515                    // consumer signal as the with: detection above. A run
516                    // step that interpolates env via the template engine
517                    // is reading from the runner-managed env table just
518                    // like a uses: action would.
519                    if is_env_reference(run) {
520                        if let Some(node) = graph.nodes.get_mut(step_id) {
521                            node.metadata.insert(META_READS_ENV.into(), "true".into());
522                        }
523                    }
524                }
525            }
526        }
527
528        // Cross-platform misclassification trap (red-team R2 #5): a YAML file
529        // wrapping ADO/GitLab content in a `jobs:` mapping deserializes here
530        // without errors but yields no recognisable Step nodes. Marking
531        // Partial surfaces the gap rather than silently returning a clean
532        // graph with completeness=complete (which a CI gate would treat as
533        // "passed").
534        let step_count = graph
535            .nodes
536            .iter()
537            .filter(|n| n.kind == NodeKind::Step)
538            .count();
539        if step_count == 0 && !workflow.jobs.is_empty() {
540            graph.mark_partial(
541                "jobs: parsed but produced 0 step nodes — possible non-GHA YAML wrong-platform-classified".to_string(),
542            );
543        }
544
545        Ok(graph)
546    }
547}
548
549/// Returns true if the workflow's `on:` triggers include `pull_request_target`.
550/// GHA `on:` is polymorphic: string, sequence, or mapping.
551/// Returns true when a GHA `if:` expression matches the standard fork-check
552/// pattern: `github.event.pull_request.head.repo.fork == false` (or the
553/// negated `!= true`), or the equivalent
554/// `github.event.pull_request.head.repo.full_name == github.repository`.
555/// Whitespace is normalised before matching so the canonical Grafana form
556/// (`if: github.event.pull_request.head.repo.full_name == github.repository`)
557/// is detected alongside the more terse `repo.fork == false` variant.
558///
559/// The check is conservative — it requires the canonical predicate on the
560/// raw expression. Wrapping the predicate inside a larger boolean
561/// expression that ANDs additional clauses (e.g. `&& github.actor != ...`)
562/// is still detected because the substring match on the canonical form is
563/// preserved. ORing it away (`|| true`) would defeat the check, but that
564/// pattern is not seen in practice and would itself be a code-review red
565/// flag.
566pub fn is_fork_check_expression(expr: &str) -> bool {
567    let normalised: String = expr.split_whitespace().collect::<Vec<_>>().join(" ");
568    let lower = normalised.to_lowercase();
569    // `repo.fork == false` (and the negated `!= true`)
570    if lower.contains("github.event.pull_request.head.repo.fork == false")
571        || lower.contains("github.event.pull_request.head.repo.fork != true")
572    {
573        return true;
574    }
575    // `head.repo.full_name == github.repository` — Grafana canonical form.
576    // Tolerate either ordering of the equality operands.
577    if lower.contains("github.event.pull_request.head.repo.full_name == github.repository")
578        || lower.contains("github.repository == github.event.pull_request.head.repo.full_name")
579    {
580        return true;
581    }
582    false
583}
584
585fn trigger_has_pull_request_target(triggers: &serde_yaml::Value) -> bool {
586    const PRT: &str = "pull_request_target";
587    match triggers {
588        serde_yaml::Value::String(s) => s == PRT,
589        serde_yaml::Value::Sequence(seq) => seq
590            .iter()
591            .any(|v| v.as_str().map(|s| s == PRT).unwrap_or(false)),
592        serde_yaml::Value::Mapping(map) => map
593            .iter()
594            .any(|(k, _)| k.as_str().map(|s| s == PRT).unwrap_or(false)),
595        _ => false,
596    }
597}
598
599/// Collects every trigger name from a workflow's `on:` field. Returns the
600/// canonical event tokens (`pull_request`, `pull_request_target`,
601/// `workflow_run`, `issue_comment`, `push`, etc.) in source order, deduped.
602fn collect_trigger_names(triggers: Option<&serde_yaml::Value>) -> Vec<String> {
603    let mut out: Vec<String> = Vec::new();
604    let mut push_unique = |s: &str| {
605        if !s.is_empty() && !out.iter().any(|e| e == s) {
606            out.push(s.to_string());
607        }
608    };
609    let Some(val) = triggers else {
610        return out;
611    };
612    match val {
613        serde_yaml::Value::String(s) => push_unique(s),
614        serde_yaml::Value::Sequence(seq) => {
615            for v in seq {
616                if let Some(s) = v.as_str() {
617                    push_unique(s);
618                }
619            }
620        }
621        serde_yaml::Value::Mapping(map) => {
622            for (k, _) in map {
623                if let Some(s) = k.as_str() {
624                    push_unique(s);
625                }
626            }
627        }
628        _ => {}
629    }
630    out
631}
632
633/// Returns true if `runs-on` names a self-hosted runner.
634///
635/// GHA `runs-on` is polymorphic: a string (`ubuntu-latest`, `self-hosted`), a
636/// sequence (`[self-hosted, linux, x64]`), or — for group selection — a mapping
637/// (`{ group: my-group, labels: [...] }`). Any form that contains `self-hosted`
638/// (as a string, sequence entry, or label entry) is considered self-hosted.
639/// Explicit `group:` without `self-hosted` is also self-hosted by construction.
640fn is_self_hosted_runner(runs_on: Option<&serde_yaml::Value>) -> bool {
641    const SH: &str = "self-hosted";
642    let Some(val) = runs_on else {
643        return false;
644    };
645    match val {
646        serde_yaml::Value::String(s) => s == SH,
647        serde_yaml::Value::Sequence(seq) => seq
648            .iter()
649            .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
650        serde_yaml::Value::Mapping(map) => {
651            if map.contains_key("group") {
652                return true;
653            }
654            if let Some(labels) = map.get("labels") {
655                match labels {
656                    serde_yaml::Value::String(s) => s == SH,
657                    serde_yaml::Value::Sequence(seq) => seq
658                        .iter()
659                        .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
660                    _ => false,
661                }
662            } else {
663                false
664            }
665        }
666        _ => false,
667    }
668}
669
670/// Extract a human-readable label from a `runs-on` value for naming the Image
671/// node. Prefers the first non-`self-hosted` label in a sequence (more specific),
672/// falls back to the string value or "self-hosted".
673fn runner_label(runs_on: Option<&serde_yaml::Value>) -> Option<&str> {
674    let val = runs_on?;
675    match val {
676        serde_yaml::Value::String(s) => Some(s.as_str()),
677        serde_yaml::Value::Sequence(seq) => {
678            for v in seq {
679                if let Some(s) = v.as_str() {
680                    if s != "self-hosted" {
681                        return Some(s);
682                    }
683                }
684            }
685            seq.first().and_then(|v| v.as_str())
686        }
687        serde_yaml::Value::Mapping(map) => map.get("group").and_then(|v| v.as_str()),
688        _ => None,
689    }
690}
691
692/// Classify a `uses:` reference into trust zone and create image node.
693fn classify_action(uses: &str, graph: &mut AuthorityGraph) -> (TrustZone, NodeId) {
694    let pinned = is_sha_pinned(uses);
695    let is_local = uses.starts_with("./");
696
697    let zone = if is_local {
698        TrustZone::FirstParty
699    } else if pinned {
700        TrustZone::ThirdParty
701    } else {
702        TrustZone::Untrusted
703    };
704
705    let mut meta = HashMap::new();
706    if pinned {
707        if let Some(sha) = uses.split('@').next_back() {
708            meta.insert(META_DIGEST.into(), sha.into());
709        }
710    }
711
712    let id = graph.add_node_with_metadata(NodeKind::Image, uses, zone, meta);
713    (zone, id)
714}
715
716/// Resolve a local action path (e.g. `./.github/actions/my-action`) against
717/// the workflow's filesystem location. Tries each ancestor of the workflow
718/// file's directory, returning the first `action.yml` (or `action.yaml`)
719/// found. Returns `None` if no matching file exists within ~6 levels up.
720///
721/// The cap exists because (a) GHA repos rarely nest workflows that deep and
722/// (b) without it we'd `stat` the entire path-to-root for every local action.
723fn resolve_local_action_path(pipeline_file: &str, uses_path: &str) -> Option<std::path::PathBuf> {
724    let start = Path::new(pipeline_file).parent().unwrap_or(Path::new("."));
725    let mut current = Some(start);
726    for _ in 0..6 {
727        let dir = current?;
728        let candidate = dir.join(uses_path);
729        let yml = candidate.join("action.yml");
730        if yml.exists() {
731            return Some(yml);
732        }
733        let yaml = candidate.join("action.yaml");
734        if yaml.exists() {
735            return Some(yaml);
736        }
737        current = dir.parent();
738    }
739    None
740}
741
742/// Try to load and inline a local composite action's steps into the graph.
743///
744/// Resolves `uses_path` (a `./...` reference) relative to the workflow file's
745/// directory, reads `action.yml`, and — only if `runs.using == "composite"` —
746/// creates a Step node per `runs.steps` entry with a DelegatesTo edge from
747/// `calling_step_id`. Inlined steps inherit the calling job's GITHUB_TOKEN and
748/// container image links, run the same secret-detection logic over their
749/// `env:` / `with:` / `run:` fields, and adopt the parent's trust zone rules
750/// (Untrusted for `run:` steps under `pull_request_target`).
751///
752/// On any unresolvable case (file not found, parse error, non-composite
753/// `using`, missing `runs.steps`) the graph is marked Partial with a reason
754/// and the function returns without inlining. We never propagate errors — a
755/// missing action.yml is a completeness gap, not a fatal parse failure.
756#[allow(clippy::too_many_arguments)]
757fn try_inline_composite_action(
758    uses_path: &str,
759    pipeline_file: &str,
760    calling_step_id: NodeId,
761    job_name: &str,
762    job_token_id: Option<NodeId>,
763    container_image_id: Option<NodeId>,
764    is_pull_request_target: bool,
765    graph: &mut AuthorityGraph,
766    secret_cache: &mut HashMap<String, NodeId>,
767) {
768    // GHA semantics: `./...` paths in `uses:` are resolved relative to the
769    // **repository root**, not the workflow file's directory. We don't know
770    // where the repo root is from a single file path, so we walk up from the
771    // workflow's directory probing for the action at each level. This handles
772    // both the common case (`.github/workflows/x.yml` → repo root two levels
773    // up) and edge cases (workflow at repo root, nested mono-repos).
774    let action_path = match resolve_local_action_path(pipeline_file, uses_path) {
775        Some(p) => p,
776        None => {
777            graph.mark_partial(format!("composite action not found: {uses_path}"));
778            return;
779        }
780    };
781
782    let content = match std::fs::read_to_string(&action_path) {
783        Ok(c) => c,
784        Err(e) => {
785            graph.mark_partial(format!(
786                "failed to read composite action '{uses_path}': {e}"
787            ));
788            return;
789        }
790    };
791
792    let action: serde_yaml::Value = match serde_yaml::from_str(&content) {
793        Ok(v) => v,
794        Err(e) => {
795            graph.mark_partial(format!(
796                "failed to parse composite action '{uses_path}': {e}"
797            ));
798            return;
799        }
800    };
801
802    // Only `using: composite` is supported. docker/node20/etc. hide steps
803    // behind a runtime we cannot introspect — mark Partial.
804    let using = action
805        .get("runs")
806        .and_then(|r| r.get("using"))
807        .and_then(|u| u.as_str())
808        .unwrap_or("");
809    if using != "composite" {
810        graph.mark_partial(format!(
811            "non-composite local action: {uses_path} (using: {using})"
812        ));
813        return;
814    }
815
816    let steps = match action
817        .get("runs")
818        .and_then(|r| r.get("steps"))
819        .and_then(|s| s.as_sequence())
820    {
821        Some(s) => s,
822        None => {
823            graph.mark_partial(format!("composite action '{uses_path}' has no runs.steps"));
824            return;
825        }
826    };
827
828    for (idx, step) in steps.iter().enumerate() {
829        let step_map = match step.as_mapping() {
830            Some(m) => m,
831            None => continue,
832        };
833
834        let name = step_map
835            .get("name")
836            .and_then(|v| v.as_str())
837            .map(|s| s.to_string())
838            .unwrap_or_else(|| format!("{uses_path}[{idx}]"));
839
840        let uses = step_map.get("uses").and_then(|v| v.as_str());
841        let run = step_map.get("run").and_then(|v| v.as_str());
842
843        // Trust zone mirrors the workflow-level rule: `run:` steps under a
844        // pull_request_target trigger may execute fork code.
845        let (trust_zone, image_node_id) = if let Some(u) = uses {
846            let (zone, image_id) = classify_action(u, graph);
847            (zone, Some(image_id))
848        } else if is_pull_request_target {
849            (TrustZone::Untrusted, None)
850        } else {
851            (TrustZone::FirstParty, None)
852        };
853
854        let inlined_id = graph.add_node(NodeKind::Step, &name, trust_zone);
855        // Tag so downstream consumers can identify inlined sub-steps.
856        if let Some(node) = graph.nodes.get_mut(inlined_id) {
857            node.metadata
858                .insert(META_COMPOSITE_STEP.into(), "true".into());
859            node.metadata
860                .insert(META_COMPOSITE_SOURCE.into(), uses_path.into());
861            // Inlined sub-steps belong to the calling job — propagate parent
862            // job name so per-job filtering captures composite-action steps too.
863            node.metadata.insert(META_JOB_NAME.into(), job_name.into());
864            // Stamp the script body for inlined `run:` steps so script-aware
865            // rules see them too.
866            if let Some(body) = run {
867                if !body.is_empty() {
868                    node.metadata
869                        .insert(META_SCRIPT_BODY.into(), body.to_string());
870                }
871            }
872        }
873
874        // DelegatesTo edge: calling step → inlined sub-step.
875        graph.add_edge(calling_step_id, inlined_id, EdgeKind::DelegatesTo);
876
877        if let Some(img_id) = image_node_id {
878            graph.add_edge(inlined_id, img_id, EdgeKind::UsesImage);
879        }
880        if let Some(img_id) = container_image_id {
881            graph.add_edge(inlined_id, img_id, EdgeKind::UsesImage);
882        }
883        if let Some(tok_id) = job_token_id {
884            graph.add_edge(inlined_id, tok_id, EdgeKind::HasAccessTo);
885        }
886
887        // Secret detection on `env:` block.
888        if let Some(env_val) = step_map.get("env").and_then(|v| v.as_mapping()) {
889            for v in env_val.values() {
890                if let Some(s) = v.as_str() {
891                    if is_secret_reference(s) {
892                        let secret_name = extract_secret_name(s);
893                        let secret_id = find_or_create_secret(graph, secret_cache, &secret_name);
894                        graph.add_edge(inlined_id, secret_id, EdgeKind::HasAccessTo);
895                    }
896                }
897            }
898        }
899
900        // Secret detection on `with:` block.
901        if let Some(with_val) = step_map.get("with").and_then(|v| v.as_mapping()) {
902            for v in with_val.values() {
903                if let Some(s) = v.as_str() {
904                    if is_secret_reference(s) {
905                        let secret_name = extract_secret_name(s);
906                        let secret_id = find_or_create_secret(graph, secret_cache, &secret_name);
907                        graph.add_edge(inlined_id, secret_id, EdgeKind::HasAccessTo);
908                    }
909                }
910            }
911        }
912
913        // Inferred secrets in `run:` script blocks (mirrors workflow-level logic).
914        if let Some(run_str) = run {
915            if run_str.contains("${{ secrets.") {
916                let mut pos = 0;
917                while let Some(start) = run_str[pos..].find("secrets.") {
918                    let abs_start = pos + start + 8;
919                    let remaining = &run_str[abs_start..];
920                    let end = remaining
921                        .find(|c: char| !c.is_alphanumeric() && c != '_')
922                        .unwrap_or(remaining.len());
923                    let secret_name = &remaining[..end];
924                    if !secret_name.is_empty() {
925                        let secret_id = find_or_create_secret(graph, secret_cache, secret_name);
926                        if let Some(node) = graph.nodes.get_mut(secret_id) {
927                            node.metadata
928                                .insert(META_INFERRED.into(), META_INFERRED_VAL.into());
929                        }
930                        graph.add_edge(inlined_id, secret_id, EdgeKind::HasAccessTo);
931                        graph.mark_partial(format!(
932                            "secret '{secret_name}' referenced in composite action run: script — inferred, not precisely mapped"
933                        ));
934                    }
935                    pos = abs_start + end;
936                }
937            }
938
939            // GHA env-gate write detection (mirrors workflow-level logic).
940            let writes_gate = run_str.contains("GITHUB_ENV") || run_str.contains("GITHUB_PATH");
941            if writes_gate {
942                if let Some(node) = graph.nodes.get_mut(inlined_id) {
943                    node.metadata
944                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
945                }
946            }
947        }
948    }
949}
950
951fn is_secret_reference(val: &str) -> bool {
952    val.contains("${{ secrets.")
953}
954
955/// True for any `${{ env.<NAME> }}` template expression. Covers the
956/// canonical $GITHUB_ENV laundering consumer pattern (a step reads
957/// `env.CLOUD_KEY` after a previous step wrote `CLOUD_KEY=$secret` to
958/// `$GITHUB_ENV`) without conflating with ordinary first-party `env:`
959/// declarations on the consuming step itself. We tolerate the lenient
960/// whitespace forms GHA accepts (`${{env.X}}`, `${{   env.X   }}`).
961fn is_env_reference(val: &str) -> bool {
962    // Cheap fast path — bail before substring scan if the marker isn't
963    // present at all. The `env.` substring on its own is too noisy
964    // (matches `step.env.X`, `inputs.env_var`), so we anchor on the
965    // GHA template open-brace plus any whitespace.
966    if !val.contains("${{") {
967        return false;
968    }
969    // Strip whitespace around any template-open and look for the literal
970    // token sequence `env.`. This catches `${{env.X}}`, `${{ env.X }}`,
971    // and `${{    env.X    }}` while rejecting `${{ steps.x.env.foo }}`.
972    let mut idx = 0;
973    while let Some(rel) = val[idx..].find("${{") {
974        let after = &val[idx + rel + 3..];
975        let trimmed = after.trim_start();
976        if trimmed.starts_with("env.") {
977            return true;
978        }
979        idx += rel + 3;
980    }
981    false
982}
983
984fn extract_secret_name(val: &str) -> String {
985    // Extract from patterns like "${{ secrets.MY_SECRET }}"
986    if let Some(start) = val.find("secrets.") {
987        let after = &val[start + 8..];
988        let end = after
989            .find(|c: char| !c.is_alphanumeric() && c != '_')
990            .unwrap_or(after.len());
991        after[..end].to_string()
992    } else {
993        val.to_string()
994    }
995}
996
997fn find_or_create_secret(
998    graph: &mut AuthorityGraph,
999    cache: &mut HashMap<String, NodeId>,
1000    name: &str,
1001) -> NodeId {
1002    if let Some(&id) = cache.get(name) {
1003        return id;
1004    }
1005    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1006    cache.insert(name.to_string(), id);
1007    id
1008}
1009
1010/// Detect known OIDC cloud authentication actions and create an Identity node
1011/// representing the cloud identity that will be assumed.
1012///
1013/// Only handles the OIDC/federated path — static credential inputs (e.g.
1014/// `aws-secret-access-key: ${{ secrets.X }}`) are already captured by the
1015/// regular `with:` secret scanning and don't need a separate Identity node.
1016///
1017/// Returns `Some(NodeId)` of the created Identity, or `None` if not recognized.
1018fn classify_cloud_auth(
1019    uses: &str,
1020    with: Option<&HashMap<String, String>>,
1021    graph: &mut AuthorityGraph,
1022) -> Option<NodeId> {
1023    // Strip `@version` — match any version of the action
1024    let action = uses.split('@').next().unwrap_or(uses);
1025
1026    match action {
1027        "aws-actions/configure-aws-credentials" => {
1028            // OIDC path: role-to-assume present (no static access key needed)
1029            let w = with?;
1030            let role = w.get("role-to-assume")?;
1031            // ARN format: arn:aws:iam::123456789012:role/my-role
1032            // Split on '/' to get the role name; fall back to the full value.
1033            let short = role.split('/').next_back().unwrap_or(role.as_str());
1034            let mut meta = HashMap::new();
1035            meta.insert(META_OIDC.into(), "true".into());
1036            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1037            meta.insert(META_PERMISSIONS.into(), "AWS role assumption (OIDC)".into());
1038            Some(graph.add_node_with_metadata(
1039                NodeKind::Identity,
1040                format!("AWS/{short}"),
1041                TrustZone::FirstParty,
1042                meta,
1043            ))
1044        }
1045        "google-github-actions/auth" => {
1046            // OIDC path: workload_identity_provider present
1047            let w = with?;
1048            let provider = w.get("workload_identity_provider")?;
1049            let short = provider.split('/').next_back().unwrap_or(provider.as_str());
1050            let mut meta = HashMap::new();
1051            meta.insert(META_OIDC.into(), "true".into());
1052            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1053            meta.insert(
1054                META_PERMISSIONS.into(),
1055                "GCP workload identity federation".into(),
1056            );
1057            Some(graph.add_node_with_metadata(
1058                NodeKind::Identity,
1059                format!("GCP/{short}"),
1060                TrustZone::FirstParty,
1061                meta,
1062            ))
1063        }
1064        "azure/login" => {
1065            // OIDC path: client-id present without client-secret
1066            let w = with?;
1067            let client_id = w.get("client-id")?;
1068            // Only treat as OIDC if no static client-secret is provided
1069            if w.contains_key("client-secret") {
1070                return None; // static SP creds captured by with: secret scanning
1071            }
1072            let mut meta = HashMap::new();
1073            meta.insert(META_OIDC.into(), "true".into());
1074            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1075            meta.insert(
1076                META_PERMISSIONS.into(),
1077                "Azure federated credential (OIDC)".into(),
1078            );
1079            Some(graph.add_node_with_metadata(
1080                NodeKind::Identity,
1081                format!("Azure/{client_id}"),
1082                TrustZone::FirstParty,
1083                meta,
1084            ))
1085        }
1086        _ => None,
1087    }
1088}
1089
1090// ── Serde models for GHA YAML ──────────────────────────
1091
1092/// Flexible permissions: can be a string ("write-all") or a map.
1093#[derive(Debug, Clone, Deserialize)]
1094#[serde(untagged)]
1095pub enum Permissions {
1096    String(String),
1097    Map(HashMap<String, String>),
1098}
1099
1100/// Polymorphic `env:` block. Normally a map of name → value, but in some
1101/// real-world workflows the entire `env:` value is a template expression
1102/// (e.g. `env: ${{ matrix }}`), where the shape resolves at runtime.
1103///
1104/// When the value is a template string, downstream code must mark the graph
1105/// Partial — environment variable shape is unknown to static analysis.
1106///
1107/// The map variant uses a custom deserializer (`deserialize_env_map`) that
1108/// stringifies scalar values. GHA accepts non-string scalars in env values
1109/// (`COVERAGE: false`, `RUST_BACKTRACE: 1`, `TARGET_FLAGS:` (null)); a strict
1110/// `HashMap<String, String>` rejects them and breaks 200+ real-world workflows.
1111#[derive(Debug, Clone, Deserialize)]
1112#[serde(untagged)]
1113pub enum EnvSpec {
1114    #[serde(deserialize_with = "deserialize_env_map")]
1115    Map(HashMap<String, String>),
1116    Template(String),
1117}
1118
1119/// Deserialize a GHA `env:` map, stringifying scalar values so that
1120/// non-string scalars (booleans, numbers, null, YAML anchors resolving
1121/// to scalars) round-trip into `HashMap<String, String>`.
1122///
1123/// Rejects nested mappings/sequences — those would indicate the value
1124/// is not a real env value and we should fall through to the `Template`
1125/// variant or fail loudly. Null values become the empty string, matching
1126/// how GHA itself surfaces an unset env var.
1127fn deserialize_env_map<'de, D>(deserializer: D) -> Result<HashMap<String, String>, D::Error>
1128where
1129    D: serde::Deserializer<'de>,
1130{
1131    use serde::de::Error;
1132    let raw: HashMap<String, serde_yaml::Value> = HashMap::deserialize(deserializer)?;
1133    let mut out = HashMap::with_capacity(raw.len());
1134    for (k, v) in raw {
1135        let s = match v {
1136            serde_yaml::Value::String(s) => s,
1137            serde_yaml::Value::Bool(b) => b.to_string(),
1138            serde_yaml::Value::Number(n) => n.to_string(),
1139            serde_yaml::Value::Null => String::new(),
1140            // Mappings / sequences in env values are not legal GHA — but
1141            // rather than crash the whole workflow, fail this variant so
1142            // the untagged enum can try `Template` next.
1143            other => {
1144                return Err(D::Error::custom(format!(
1145                    "env value for `{k}` is not a scalar: {other:?}"
1146                )))
1147            }
1148        };
1149        out.insert(k, s);
1150    }
1151    Ok(out)
1152}
1153
1154impl EnvSpec {
1155    /// Returns the env map if statically known, or `None` if it is a template
1156    /// expression whose shape resolves at runtime.
1157    pub fn as_map(&self) -> Option<&HashMap<String, String>> {
1158        match self {
1159            EnvSpec::Map(m) => Some(m),
1160            EnvSpec::Template(_) => None,
1161        }
1162    }
1163
1164    /// Returns the raw template expression, if this `env:` is a template.
1165    pub fn as_template(&self) -> Option<&str> {
1166        match self {
1167            EnvSpec::Template(s) => Some(s.as_str()),
1168            EnvSpec::Map(_) => None,
1169        }
1170    }
1171}
1172
1173impl std::fmt::Display for Permissions {
1174    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1175        match self {
1176            Permissions::String(s) => write!(f, "{s}"),
1177            Permissions::Map(m) => {
1178                let parts: Vec<String> = m.iter().map(|(k, v)| format!("{k}: {v}")).collect();
1179                write!(f, "{{ {} }}", parts.join(", "))
1180            }
1181        }
1182    }
1183}
1184
1185#[derive(Debug, Deserialize)]
1186pub struct GhaWorkflow {
1187    /// Workflow trigger(s). Polymorphic: string, sequence, or mapping.
1188    #[serde(rename = "on", default)]
1189    pub triggers: Option<serde_yaml::Value>,
1190    #[serde(default)]
1191    pub permissions: Option<Permissions>,
1192    /// Workflow-level env vars, inherited by all jobs and steps.
1193    /// Polymorphic: typically a map, but can be a template expression
1194    /// (e.g. `env: ${{ matrix }}`) whose shape is unknown statically.
1195    #[serde(default)]
1196    pub env: Option<EnvSpec>,
1197    #[serde(default)]
1198    pub jobs: HashMap<String, GhaJob>,
1199}
1200
1201/// Job-level container config. Polymorphic: string image or map with `image:` key.
1202#[derive(Debug, Deserialize)]
1203#[serde(untagged)]
1204pub enum ContainerConfig {
1205    Image(String),
1206    Full { image: String },
1207}
1208
1209impl ContainerConfig {
1210    pub fn image(&self) -> &str {
1211        match self {
1212            ContainerConfig::Image(s) => s,
1213            ContainerConfig::Full { image } => image,
1214        }
1215    }
1216}
1217
1218#[derive(Debug, Deserialize)]
1219pub struct GhaJob {
1220    #[serde(default)]
1221    pub permissions: Option<Permissions>,
1222    /// Job-level env vars. Polymorphic: typically a map, but can be a
1223    /// template expression (e.g. `env: ${{ matrix }}`) whose shape is unknown
1224    /// statically.
1225    #[serde(default)]
1226    pub env: Option<EnvSpec>,
1227    #[serde(default)]
1228    pub steps: Vec<GhaStep>,
1229    /// Reusable workflow reference — `uses: owner/repo/.github/workflows/foo.yml@ref`
1230    #[serde(default)]
1231    pub uses: Option<String>,
1232    /// Job container image.
1233    #[serde(default)]
1234    pub container: Option<ContainerConfig>,
1235    /// Matrix/strategy configuration. When a matrix is present, the authority
1236    /// shape may differ per matrix entry — graph is marked Partial.
1237    #[serde(default)]
1238    pub strategy: Option<serde_yaml::Value>,
1239    /// Runner label(s). Can be a string (`ubuntu-latest`), a sequence
1240    /// (`[self-hosted, linux]`), or absent for reusable workflows.
1241    #[serde(rename = "runs-on", default)]
1242    pub runs_on: Option<serde_yaml::Value>,
1243    /// Job-level `if:` condition. Captured verbatim so rules can scan for
1244    /// the standard fork-check pattern
1245    /// (`github.event.pull_request.head.repo.fork == false` or the
1246    /// equivalent `head.repo.full_name == github.repository`). Job-level
1247    /// `if:` applies to every step the job contains.
1248    #[serde(rename = "if", default)]
1249    pub if_cond: Option<String>,
1250}
1251
1252#[derive(Debug, Deserialize)]
1253pub struct GhaStep {
1254    pub name: Option<String>,
1255    pub uses: Option<String>,
1256    pub run: Option<String>,
1257    /// Step-level env vars. Polymorphic: typically a map, but can be a
1258    /// template expression (e.g. `env: ${{ matrix }}`) whose shape is unknown
1259    /// statically.
1260    #[serde(default)]
1261    pub env: Option<EnvSpec>,
1262    #[serde(rename = "with", default)]
1263    pub with: Option<HashMap<String, String>>,
1264    /// Step-level `if:` condition. Captured verbatim so rules can detect
1265    /// the standard fork-check pattern.
1266    #[serde(rename = "if", default)]
1267    pub if_cond: Option<String>,
1268}
1269
1270#[cfg(test)]
1271mod tests {
1272    use super::*;
1273
1274    fn parse(yaml: &str) -> AuthorityGraph {
1275        let parser = GhaParser;
1276        let source = PipelineSource {
1277            file: "test.yml".into(),
1278            repo: None,
1279            git_ref: None,
1280            commit_sha: None,
1281        };
1282        parser.parse(yaml, &source).unwrap()
1283    }
1284
1285    #[test]
1286    fn parses_simple_workflow() {
1287        let yaml = r#"
1288permissions: write-all
1289jobs:
1290  build:
1291    steps:
1292      - name: Checkout
1293        uses: actions/checkout@v4
1294      - name: Build
1295        run: make build
1296"#;
1297        let graph = parse(yaml);
1298        assert!(graph.nodes.len() >= 3); // GITHUB_TOKEN + 2 steps + 1 image
1299    }
1300
1301    #[test]
1302    fn detects_secret_in_env() {
1303        let yaml = r#"
1304jobs:
1305  deploy:
1306    steps:
1307      - name: Deploy
1308        run: ./deploy.sh
1309        env:
1310          AWS_KEY: "${{ secrets.AWS_ACCESS_KEY_ID }}"
1311"#;
1312        let graph = parse(yaml);
1313        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1314        assert_eq!(secrets.len(), 1);
1315        assert_eq!(secrets[0].name, "AWS_ACCESS_KEY_ID");
1316    }
1317
1318    #[test]
1319    fn classifies_unpinned_action_as_untrusted() {
1320        let yaml = r#"
1321jobs:
1322  ci:
1323    steps:
1324      - uses: actions/checkout@v4
1325"#;
1326        let graph = parse(yaml);
1327        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1328        assert_eq!(images.len(), 1);
1329        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1330    }
1331
1332    #[test]
1333    fn classifies_sha_pinned_action_as_third_party() {
1334        let yaml = r#"
1335jobs:
1336  ci:
1337    steps:
1338      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
1339"#;
1340        let graph = parse(yaml);
1341        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1342        assert_eq!(images.len(), 1);
1343        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1344    }
1345
1346    #[test]
1347    fn classifies_local_action_as_first_party() {
1348        let yaml = r#"
1349jobs:
1350  ci:
1351    steps:
1352      - uses: ./.github/actions/my-action
1353"#;
1354        let graph = parse(yaml);
1355        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1356        assert_eq!(images.len(), 1);
1357        assert_eq!(images[0].trust_zone, TrustZone::FirstParty);
1358    }
1359
1360    #[test]
1361    fn detects_secret_in_with() {
1362        let yaml = r#"
1363jobs:
1364  deploy:
1365    steps:
1366      - name: Publish
1367        uses: some-org/publish@v1
1368        with:
1369          token: "${{ secrets.NPM_TOKEN }}"
1370"#;
1371        let graph = parse(yaml);
1372        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1373        assert_eq!(secrets.len(), 1);
1374        assert_eq!(secrets[0].name, "NPM_TOKEN");
1375    }
1376
1377    #[test]
1378    fn inferred_secret_in_run_block_detected() {
1379        let yaml = r#"
1380jobs:
1381  deploy:
1382    steps:
1383      - name: Deploy
1384        run: |
1385          curl -H "Authorization: ${{ secrets.API_TOKEN }}" https://api.example.com
1386"#;
1387        let graph = parse(yaml);
1388        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1389        assert_eq!(secrets.len(), 1);
1390        assert_eq!(secrets[0].name, "API_TOKEN");
1391        assert_eq!(
1392            secrets[0].metadata.get(META_INFERRED),
1393            Some(&"true".to_string())
1394        );
1395        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1396        assert!(!graph.completeness_gaps.is_empty());
1397    }
1398
1399    #[test]
1400    fn job_level_env_inherited_by_steps() {
1401        let yaml = r#"
1402jobs:
1403  build:
1404    env:
1405      DB_PASSWORD: "${{ secrets.DB_PASSWORD }}"
1406    steps:
1407      - name: Step A
1408        run: echo "a"
1409      - name: Step B
1410        run: echo "b"
1411"#;
1412        let graph = parse(yaml);
1413        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1414        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
1415
1416        // Both steps should have access to the secret
1417        let secret_id = secrets[0].id;
1418        let accessing_steps = graph
1419            .edges_to(secret_id)
1420            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1421            .count();
1422        assert_eq!(accessing_steps, 2, "both steps inherit job-level env");
1423    }
1424
1425    #[test]
1426    fn identity_scope_set_on_token() {
1427        let yaml = r#"
1428permissions: write-all
1429jobs:
1430  ci:
1431    steps:
1432      - run: echo hi
1433"#;
1434        let graph = parse(yaml);
1435        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1436        assert_eq!(identities.len(), 1);
1437        assert_eq!(
1438            identities[0].metadata.get(META_IDENTITY_SCOPE),
1439            Some(&"broad".to_string())
1440        );
1441    }
1442
1443    #[test]
1444    fn constrained_identity_scope() {
1445        let yaml = r#"
1446permissions:
1447  contents: read
1448jobs:
1449  ci:
1450    steps:
1451      - run: echo hi
1452"#;
1453        let graph = parse(yaml);
1454        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1455        assert_eq!(identities.len(), 1);
1456        assert_eq!(
1457            identities[0].metadata.get(META_IDENTITY_SCOPE),
1458            Some(&"constrained".to_string())
1459        );
1460    }
1461
1462    #[test]
1463    fn pull_request_target_string_trigger_marks_run_steps_untrusted() {
1464        let yaml = r#"
1465on: pull_request_target
1466jobs:
1467  check:
1468    steps:
1469      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
1470        with:
1471          ref: ${{ github.event.pull_request.head.sha }}
1472      - run: npm test
1473"#;
1474        let graph = parse(yaml);
1475        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1476        assert_eq!(steps.len(), 2);
1477
1478        // run: step should be Untrusted (might execute fork code)
1479        let run_step = steps.iter().find(|s| s.name.contains("check[1]")).unwrap();
1480        assert_eq!(
1481            run_step.trust_zone,
1482            TrustZone::Untrusted,
1483            "run: step in pull_request_target workflow should be Untrusted"
1484        );
1485
1486        // uses: step keeps its own trust zone (SHA-pinned = ThirdParty)
1487        let checkout_step = steps.iter().find(|s| s.name.contains("check[0]")).unwrap();
1488        assert_eq!(checkout_step.trust_zone, TrustZone::ThirdParty);
1489    }
1490
1491    #[test]
1492    fn pull_request_target_sequence_trigger_marks_run_steps_untrusted() {
1493        let yaml = r#"
1494on: [push, pull_request_target]
1495jobs:
1496  ci:
1497    steps:
1498      - run: echo hi
1499"#;
1500        let graph = parse(yaml);
1501        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1502        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
1503    }
1504
1505    #[test]
1506    fn pull_request_target_mapping_trigger_marks_run_steps_untrusted() {
1507        let yaml = r#"
1508on:
1509  pull_request_target:
1510    types: [opened, synchronize]
1511jobs:
1512  ci:
1513    steps:
1514      - run: echo hi
1515"#;
1516        let graph = parse(yaml);
1517        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1518        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
1519    }
1520
1521    #[test]
1522    fn push_trigger_does_not_mark_run_steps_untrusted() {
1523        let yaml = r#"
1524on: push
1525jobs:
1526  ci:
1527    steps:
1528      - run: echo hi
1529"#;
1530        let graph = parse(yaml);
1531        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1532        assert_eq!(
1533            steps[0].trust_zone,
1534            TrustZone::FirstParty,
1535            "push-triggered run: steps should remain FirstParty"
1536        );
1537    }
1538
1539    #[test]
1540    fn workflow_level_env_inherited_by_all_steps() {
1541        let yaml = r#"
1542env:
1543  DB_URL: "${{ secrets.DATABASE_URL }}"
1544jobs:
1545  build:
1546    steps:
1547      - name: Step A
1548        run: echo "a"
1549  test:
1550    steps:
1551      - name: Step B
1552        run: echo "b"
1553"#;
1554        let graph = parse(yaml);
1555        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1556        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
1557
1558        // Both steps in both jobs should inherit the workflow-level secret
1559        let secret_id = secrets[0].id;
1560        let accessing_steps = graph
1561            .edges_to(secret_id)
1562            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1563            .count();
1564        assert_eq!(accessing_steps, 2, "both steps inherit workflow-level env");
1565    }
1566
1567    #[test]
1568    fn matrix_strategy_marks_graph_partial() {
1569        let yaml = r#"
1570jobs:
1571  test:
1572    strategy:
1573      matrix:
1574        os: [ubuntu-latest, windows-latest, macos-latest]
1575    steps:
1576      - run: echo hi
1577"#;
1578        let graph = parse(yaml);
1579        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1580        assert!(
1581            graph.completeness_gaps.iter().any(|g| g.contains("matrix")),
1582            "matrix strategy should be recorded as a completeness gap"
1583        );
1584    }
1585
1586    #[test]
1587    fn job_without_matrix_does_not_mark_partial() {
1588        let yaml = r#"
1589jobs:
1590  build:
1591    steps:
1592      - run: cargo build
1593"#;
1594        let graph = parse(yaml);
1595        assert_eq!(graph.completeness, AuthorityCompleteness::Complete);
1596    }
1597
1598    #[test]
1599    fn reusable_workflow_creates_image_and_marks_partial() {
1600        let yaml = r#"
1601jobs:
1602  call:
1603    uses: org/repo/.github/workflows/deploy.yml@main
1604"#;
1605        let graph = parse(yaml);
1606        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1607        assert_eq!(images.len(), 1);
1608        assert_eq!(images[0].name, "org/repo/.github/workflows/deploy.yml@main");
1609        assert_eq!(images[0].trust_zone, TrustZone::Untrusted); // not SHA-pinned
1610
1611        // Step node representing the job delegation
1612        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1613        assert_eq!(steps.len(), 1);
1614        assert_eq!(steps[0].name, "call");
1615
1616        // DelegatesTo edge from step to reusable workflow image
1617        let delegates: Vec<_> = graph
1618            .edges_from(steps[0].id)
1619            .filter(|e| e.kind == EdgeKind::DelegatesTo)
1620            .collect();
1621        assert_eq!(delegates.len(), 1);
1622
1623        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1624    }
1625
1626    #[test]
1627    fn reusable_workflow_sha_pinned_is_third_party() {
1628        let yaml = r#"
1629jobs:
1630  call:
1631    uses: org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29
1632"#;
1633        let graph = parse(yaml);
1634        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1635        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1636    }
1637
1638    #[test]
1639    fn container_unpinned_creates_image_node_untrusted() {
1640        let yaml = r#"
1641jobs:
1642  build:
1643    container: ubuntu:22.04
1644    steps:
1645      - run: echo hi
1646"#;
1647        let graph = parse(yaml);
1648        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1649        assert_eq!(images.len(), 1);
1650        assert_eq!(images[0].name, "ubuntu:22.04");
1651        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1652        assert_eq!(
1653            images[0].metadata.get(META_CONTAINER),
1654            Some(&"true".to_string())
1655        );
1656    }
1657
1658    #[test]
1659    fn container_digest_pinned_creates_image_node_third_party() {
1660        let yaml = r#"
1661jobs:
1662  build:
1663    container:
1664      image: "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
1665    steps:
1666      - run: echo hi
1667"#;
1668        let graph = parse(yaml);
1669        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1670        assert_eq!(images.len(), 1);
1671        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1672        assert_eq!(
1673            images[0].metadata.get(META_CONTAINER),
1674            Some(&"true".to_string())
1675        );
1676    }
1677
1678    #[test]
1679    fn oidc_permission_tags_identity_with_meta_oidc() {
1680        let yaml = r#"
1681permissions:
1682  id-token: write
1683  contents: read
1684jobs:
1685  ci:
1686    steps:
1687      - run: echo hi
1688"#;
1689        let graph = parse(yaml);
1690        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1691        assert_eq!(identities.len(), 1);
1692        assert_eq!(
1693            identities[0].metadata.get(META_OIDC),
1694            Some(&"true".to_string()),
1695            "id-token: write should mark identity as OIDC-capable"
1696        );
1697    }
1698
1699    #[test]
1700    fn non_oidc_permission_does_not_tag_meta_oidc() {
1701        let yaml = r#"
1702permissions:
1703  contents: read
1704jobs:
1705  ci:
1706    steps:
1707      - run: echo hi
1708"#;
1709        let graph = parse(yaml);
1710        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1711        assert_eq!(identities.len(), 1);
1712        assert!(
1713            !identities[0].metadata.contains_key(META_OIDC),
1714            "contents:read should not tag as OIDC"
1715        );
1716    }
1717
1718    #[test]
1719    fn contents_write_without_id_token_does_not_tag_oidc() {
1720        // Regression: "contents: write" contains "write" but not "id-token: write".
1721        // Should NOT be tagged as OIDC-capable.
1722        let yaml = r#"
1723permissions:
1724  contents: write
1725jobs:
1726  ci:
1727    steps:
1728      - run: echo hi
1729"#;
1730        let graph = parse(yaml);
1731        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1732        assert_eq!(identities.len(), 1);
1733        assert!(
1734            !identities[0].metadata.contains_key(META_OIDC),
1735            "contents:write without id-token must not be tagged OIDC"
1736        );
1737    }
1738
1739    #[test]
1740    fn write_all_permission_tags_identity_as_oidc() {
1741        // `permissions: write-all` grants every permission including id-token: write.
1742        let yaml = r#"
1743permissions: write-all
1744jobs:
1745  ci:
1746    steps:
1747      - run: echo hi
1748"#;
1749        let graph = parse(yaml);
1750        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1751        assert_eq!(identities.len(), 1);
1752        assert_eq!(
1753            identities[0].metadata.get(META_OIDC),
1754            Some(&"true".to_string()),
1755            "write-all grants all permissions including id-token: write"
1756        );
1757    }
1758
1759    #[test]
1760    fn container_steps_linked_to_container_image() {
1761        let yaml = r#"
1762jobs:
1763  build:
1764    container: ubuntu:22.04
1765    steps:
1766      - name: Step A
1767        run: echo "a"
1768      - name: Step B
1769        run: echo "b"
1770"#;
1771        let graph = parse(yaml);
1772        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1773        assert_eq!(images.len(), 1);
1774        let container_id = images[0].id;
1775
1776        // Both steps must have UsesImage edges to the container
1777        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1778        assert_eq!(steps.len(), 2);
1779        for step in &steps {
1780            let links: Vec<_> = graph
1781                .edges_from(step.id)
1782                .filter(|e| e.kind == EdgeKind::UsesImage && e.to == container_id)
1783                .collect();
1784            assert_eq!(
1785                links.len(),
1786                1,
1787                "step '{}' must link to container",
1788                step.name
1789            );
1790        }
1791    }
1792
1793    #[test]
1794    fn container_authority_propagates_to_floating_image() {
1795        // Integration: authority from a step running in a floating container should
1796        // propagate to the container Image node (Untrusted), generating a finding.
1797        let yaml = r#"
1798permissions: write-all
1799jobs:
1800  build:
1801    container: ubuntu:22.04
1802    steps:
1803      - run: echo hi
1804"#;
1805        use taudit_core::propagation::DEFAULT_MAX_HOPS;
1806        use taudit_core::rules;
1807        let graph = parse(yaml);
1808        let findings = rules::run_all_rules(&graph, DEFAULT_MAX_HOPS);
1809        // Should detect: GITHUB_TOKEN (broad) propagates to ubuntu:22.04 (Untrusted) via step
1810        assert!(
1811            findings
1812                .iter()
1813                .any(|f| f.category == taudit_core::finding::FindingCategory::AuthorityPropagation),
1814            "authority should propagate from step to floating container"
1815        );
1816    }
1817
1818    #[test]
1819    fn aws_oidc_creates_identity_node() {
1820        let yaml = r#"
1821jobs:
1822  deploy:
1823    steps:
1824      - name: Configure AWS credentials
1825        uses: aws-actions/configure-aws-credentials@v4
1826        with:
1827          role-to-assume: arn:aws:iam::123456789012:role/my-deploy-role
1828          aws-region: us-east-1
1829"#;
1830        let graph = parse(yaml);
1831        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1832        assert_eq!(identities.len(), 1);
1833        // ARN arn:aws:iam::123456789012:role/my-deploy-role → last '/' segment
1834        assert_eq!(identities[0].name, "AWS/my-deploy-role");
1835        assert_eq!(
1836            identities[0].metadata.get(META_OIDC),
1837            Some(&"true".to_string())
1838        );
1839        assert_eq!(
1840            identities[0].metadata.get(META_IDENTITY_SCOPE),
1841            Some(&"broad".to_string())
1842        );
1843    }
1844
1845    #[test]
1846    fn gcp_oidc_creates_identity_node() {
1847        let yaml = r#"
1848jobs:
1849  deploy:
1850    steps:
1851      - name: Authenticate to GCP
1852        uses: google-github-actions/auth@v2
1853        with:
1854          workload_identity_provider: projects/123/locations/global/workloadIdentityPools/my-pool/providers/my-provider
1855          service_account: my-sa@my-project.iam.gserviceaccount.com
1856"#;
1857        let graph = parse(yaml);
1858        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1859        assert_eq!(identities.len(), 1);
1860        assert!(identities[0].name.starts_with("GCP/"));
1861        assert_eq!(
1862            identities[0].metadata.get(META_OIDC),
1863            Some(&"true".to_string())
1864        );
1865    }
1866
1867    #[test]
1868    fn azure_oidc_creates_identity_node() {
1869        let yaml = r#"
1870jobs:
1871  deploy:
1872    steps:
1873      - name: Azure login
1874        uses: azure/login@v2
1875        with:
1876          client-id: ${{ vars.AZURE_CLIENT_ID }}
1877          tenant-id: ${{ vars.AZURE_TENANT_ID }}
1878          subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
1879"#;
1880        let graph = parse(yaml);
1881        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1882        assert_eq!(identities.len(), 1);
1883        assert!(identities[0].name.starts_with("Azure/"));
1884        assert_eq!(
1885            identities[0].metadata.get(META_OIDC),
1886            Some(&"true".to_string())
1887        );
1888    }
1889
1890    #[test]
1891    fn azure_static_sp_does_not_create_identity_node() {
1892        // When client-secret is present, it's a static service principal — not OIDC.
1893        // The secret scanning in with: handles this; classify_cloud_auth returns None.
1894        let yaml = r#"
1895jobs:
1896  deploy:
1897    steps:
1898      - name: Azure login
1899        uses: azure/login@v2
1900        with:
1901          client-id: my-client-id
1902          client-secret: ${{ secrets.AZURE_CLIENT_SECRET }}
1903          tenant-id: my-tenant
1904"#;
1905        let graph = parse(yaml);
1906        // Identity node should NOT be created by cloud auth inference
1907        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1908        assert!(
1909            identities.is_empty(),
1910            "static SP should not create an OIDC Identity node"
1911        );
1912        // But the secret SHOULD be captured by existing with: scanning
1913        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1914        assert_eq!(secrets.len(), 1);
1915        assert_eq!(secrets[0].name, "AZURE_CLIENT_SECRET");
1916    }
1917
1918    #[test]
1919    fn aws_static_creds_do_not_create_identity_node() {
1920        // Static access key path — no role-to-assume, so classify_cloud_auth returns None.
1921        // The access key secret is captured by with: scanning.
1922        let yaml = r#"
1923jobs:
1924  deploy:
1925    steps:
1926      - uses: aws-actions/configure-aws-credentials@v4
1927        with:
1928          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
1929          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
1930          aws-region: us-east-1
1931"#;
1932        let graph = parse(yaml);
1933        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1934        assert!(
1935            identities.is_empty(),
1936            "static AWS creds must not create Identity node"
1937        );
1938        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1939        assert_eq!(secrets.len(), 2, "both static secrets captured");
1940    }
1941
1942    #[test]
1943    fn pull_request_target_sets_meta_trigger_on_graph() {
1944        let yaml = r#"
1945on: pull_request_target
1946jobs:
1947  ci:
1948    steps:
1949      - run: echo hi
1950"#;
1951        let graph = parse(yaml);
1952        assert_eq!(
1953            graph.metadata.get(META_TRIGGER),
1954            Some(&"pull_request_target".to_string())
1955        );
1956    }
1957
1958    #[test]
1959    fn github_env_write_in_run_sets_meta_writes_env_gate() {
1960        let yaml = r#"
1961jobs:
1962  build:
1963    steps:
1964      - name: Set version
1965        run: echo "VERSION=1.0" >> $GITHUB_ENV
1966"#;
1967        let graph = parse(yaml);
1968        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1969        assert_eq!(steps.len(), 1);
1970        assert_eq!(
1971            steps[0].metadata.get(META_WRITES_ENV_GATE),
1972            Some(&"true".to_string()),
1973            "run: with >> $GITHUB_ENV must mark META_WRITES_ENV_GATE"
1974        );
1975    }
1976
1977    #[test]
1978    fn attest_action_sets_meta_attests() {
1979        let yaml = r#"
1980jobs:
1981  release:
1982    steps:
1983      - name: Attest
1984        uses: actions/attest-build-provenance@v1
1985        with:
1986          subject-path: dist/*
1987"#;
1988        let graph = parse(yaml);
1989        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1990        assert_eq!(steps.len(), 1);
1991        assert_eq!(
1992            steps[0].metadata.get(META_ATTESTS),
1993            Some(&"true".to_string())
1994        );
1995    }
1996
1997    #[test]
1998    fn self_hosted_string_runs_on_creates_image_with_self_hosted_metadata() {
1999        let yaml = r#"
2000jobs:
2001  build:
2002    runs-on: self-hosted
2003    steps:
2004      - run: echo hi
2005"#;
2006        let graph = parse(yaml);
2007        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2008        let runner = images
2009            .iter()
2010            .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
2011            .expect("self-hosted runner Image node must be created");
2012        assert_eq!(
2013            runner.metadata.get(META_SELF_HOSTED),
2014            Some(&"true".to_string())
2015        );
2016    }
2017
2018    #[test]
2019    fn self_hosted_in_sequence_runs_on_creates_image_with_self_hosted_metadata() {
2020        let yaml = r#"
2021jobs:
2022  build:
2023    runs-on: [self-hosted, linux, x64]
2024    steps:
2025      - run: echo hi
2026"#;
2027        let graph = parse(yaml);
2028        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2029        let runner = images
2030            .iter()
2031            .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
2032            .expect("self-hosted runner Image node must be created");
2033        assert_eq!(
2034            runner.metadata.get(META_SELF_HOSTED),
2035            Some(&"true".to_string())
2036        );
2037    }
2038
2039    #[test]
2040    fn hosted_runner_does_not_create_self_hosted_image() {
2041        let yaml = r#"
2042jobs:
2043  build:
2044    runs-on: ubuntu-latest
2045    steps:
2046      - run: echo hi
2047"#;
2048        let graph = parse(yaml);
2049        let self_hosted_images: Vec<_> = graph
2050            .nodes_of_kind(NodeKind::Image)
2051            .filter(|i| i.metadata.contains_key(META_SELF_HOSTED))
2052            .collect();
2053        assert!(
2054            self_hosted_images.is_empty(),
2055            "hosted runner must not produce a self-hosted Image node"
2056        );
2057    }
2058
2059    #[test]
2060    fn actions_checkout_step_tagged_with_meta_checkout_self() {
2061        let yaml = r#"
2062jobs:
2063  ci:
2064    steps:
2065      - uses: actions/checkout@v4
2066      - run: echo hi
2067"#;
2068        let graph = parse(yaml);
2069        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2070        let checkout_step = steps
2071            .iter()
2072            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
2073            .expect("actions/checkout step must be tagged META_CHECKOUT_SELF");
2074        assert_eq!(
2075            checkout_step.metadata.get(META_CHECKOUT_SELF),
2076            Some(&"true".to_string())
2077        );
2078    }
2079
2080    #[test]
2081    fn actions_checkout_sha_pinned_also_tagged() {
2082        let yaml = r#"
2083jobs:
2084  ci:
2085    steps:
2086      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
2087"#;
2088        let graph = parse(yaml);
2089        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2090        assert_eq!(steps.len(), 1);
2091        assert_eq!(
2092            steps[0].metadata.get(META_CHECKOUT_SELF),
2093            Some(&"true".to_string()),
2094            "SHA-pinned checkout must still be tagged — rule gates on trigger context"
2095        );
2096    }
2097
2098    #[test]
2099    fn non_checkout_uses_not_tagged_checkout_self() {
2100        let yaml = r#"
2101jobs:
2102  ci:
2103    steps:
2104      - uses: some-org/other-action@v1
2105"#;
2106        let graph = parse(yaml);
2107        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2108        assert_eq!(steps.len(), 1);
2109        assert!(
2110            !steps[0].metadata.contains_key(META_CHECKOUT_SELF),
2111            "non-checkout uses: must not be tagged"
2112        );
2113    }
2114
2115    /// Build a unique temp directory under the OS temp root. We avoid pulling
2116    /// in the `tempfile` crate (no new deps allowed) — uniqueness comes from
2117    /// PID + a per-call atomic counter, which is sufficient for serial tests.
2118    fn make_temp_dir(label: &str) -> std::path::PathBuf {
2119        use std::sync::atomic::{AtomicU64, Ordering};
2120        static COUNTER: AtomicU64 = AtomicU64::new(0);
2121        let n = COUNTER.fetch_add(1, Ordering::Relaxed);
2122        let dir = std::env::temp_dir().join(format!(
2123            "taudit-gha-test-{}-{}-{}",
2124            std::process::id(),
2125            n,
2126            label
2127        ));
2128        let _ = std::fs::remove_dir_all(&dir);
2129        std::fs::create_dir_all(&dir).expect("create temp dir");
2130        dir
2131    }
2132
2133    fn parse_at(yaml: &str, file: &str) -> AuthorityGraph {
2134        let parser = GhaParser;
2135        let source = PipelineSource {
2136            file: file.into(),
2137            repo: None,
2138            git_ref: None,
2139            commit_sha: None,
2140        };
2141        parser.parse(yaml, &source).unwrap()
2142    }
2143
2144    #[test]
2145    fn composite_action_steps_inlined_into_graph() {
2146        let dir = make_temp_dir("composite-inline");
2147        let workflows_dir = dir.join(".github/workflows");
2148        let action_dir = dir.join(".github/actions/my-action");
2149        std::fs::create_dir_all(&workflows_dir).unwrap();
2150        std::fs::create_dir_all(&action_dir).unwrap();
2151
2152        let action_yml = r#"
2153name: My Action
2154runs:
2155  using: composite
2156  steps:
2157    - name: Install deps
2158      run: npm install
2159      shell: bash
2160    - name: Build
2161      uses: actions/setup-node@v4
2162      with:
2163        node-version: '18'
2164"#;
2165        std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2166
2167        let workflow = r#"
2168jobs:
2169  ci:
2170    steps:
2171      - name: Run my action
2172        uses: ./.github/actions/my-action
2173"#;
2174        let workflow_path = workflows_dir.join("ci.yml");
2175        std::fs::write(&workflow_path, workflow).unwrap();
2176
2177        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2178
2179        // Calling step + 2 inlined steps = 3 Step nodes.
2180        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2181        assert_eq!(steps.len(), 3, "calling step + 2 inlined sub-steps");
2182
2183        let inlined: Vec<_> = steps
2184            .iter()
2185            .filter(|s| s.metadata.contains_key(META_COMPOSITE_STEP))
2186            .collect();
2187        assert_eq!(inlined.len(), 2, "two inlined composite steps");
2188        assert!(inlined.iter().any(|s| s.name == "Install deps"));
2189        assert!(inlined.iter().any(|s| s.name == "Build"));
2190
2191        // Calling step has DelegatesTo edges to both inlined steps.
2192        let calling = steps
2193            .iter()
2194            .find(|s| !s.metadata.contains_key(META_COMPOSITE_STEP))
2195            .expect("calling step present");
2196        let delegates: Vec<_> = graph
2197            .edges_from(calling.id)
2198            .filter(|e| e.kind == EdgeKind::DelegatesTo)
2199            .collect();
2200        assert_eq!(delegates.len(), 2, "two DelegatesTo edges to inlined steps");
2201
2202        // The inlined `uses: actions/setup-node@v4` step must produce an Image node.
2203        assert!(
2204            graph
2205                .nodes_of_kind(NodeKind::Image)
2206                .any(|n| n.name == "actions/setup-node@v4"),
2207            "inlined uses: must create Image node"
2208        );
2209
2210        let _ = std::fs::remove_dir_all(&dir);
2211    }
2212
2213    #[test]
2214    fn missing_action_yml_marks_graph_partial() {
2215        let dir = make_temp_dir("missing-action");
2216        let workflows_dir = dir.join(".github/workflows");
2217        std::fs::create_dir_all(&workflows_dir).unwrap();
2218
2219        // Note: no action.yml created — the path doesn't exist.
2220        let workflow = r#"
2221jobs:
2222  ci:
2223    steps:
2224      - uses: ./.github/actions/missing-action
2225"#;
2226        let workflow_path = workflows_dir.join("ci.yml");
2227        std::fs::write(&workflow_path, workflow).unwrap();
2228
2229        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2230
2231        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2232        assert!(
2233            graph
2234                .completeness_gaps
2235                .iter()
2236                .any(|g| g.contains("composite action not found") && g.contains("missing-action")),
2237            "missing action.yml must be recorded as a completeness gap, got: {:?}",
2238            graph.completeness_gaps
2239        );
2240
2241        let _ = std::fs::remove_dir_all(&dir);
2242    }
2243
2244    #[test]
2245    fn non_composite_local_action_marks_graph_partial() {
2246        let dir = make_temp_dir("non-composite");
2247        let workflows_dir = dir.join(".github/workflows");
2248        let action_dir = dir.join(".github/actions/docker-action");
2249        std::fs::create_dir_all(&workflows_dir).unwrap();
2250        std::fs::create_dir_all(&action_dir).unwrap();
2251
2252        // Docker-based local action: steps are inside the image, not visible.
2253        let action_yml = r#"
2254name: Docker Action
2255runs:
2256  using: docker
2257  image: Dockerfile
2258"#;
2259        std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2260
2261        let workflow = r#"
2262jobs:
2263  ci:
2264    steps:
2265      - uses: ./.github/actions/docker-action
2266"#;
2267        let workflow_path = workflows_dir.join("ci.yml");
2268        std::fs::write(&workflow_path, workflow).unwrap();
2269
2270        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2271
2272        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2273        assert!(
2274            graph
2275                .completeness_gaps
2276                .iter()
2277                .any(|g| g.contains("non-composite local action")),
2278            "docker action must mark graph Partial, got: {:?}",
2279            graph.completeness_gaps
2280        );
2281
2282        // No inlined steps — only the calling step.
2283        let inlined: Vec<_> = graph
2284            .nodes_of_kind(NodeKind::Step)
2285            .filter(|s| s.metadata.contains_key(META_COMPOSITE_STEP))
2286            .collect();
2287        assert!(inlined.is_empty(), "non-composite must not inline steps");
2288
2289        let _ = std::fs::remove_dir_all(&dir);
2290    }
2291
2292    #[test]
2293    fn composite_action_inlined_step_secrets_captured() {
2294        let dir = make_temp_dir("composite-secrets");
2295        let workflows_dir = dir.join(".github/workflows");
2296        let action_dir = dir.join(".github/actions/deploy");
2297        std::fs::create_dir_all(&workflows_dir).unwrap();
2298        std::fs::create_dir_all(&action_dir).unwrap();
2299
2300        let action_yml = r#"
2301name: Deploy
2302runs:
2303  using: composite
2304  steps:
2305    - name: Push
2306      run: |
2307        curl -H "Authorization: ${{ secrets.DEPLOY_TOKEN }}" https://example.com
2308      shell: bash
2309    - name: Notify
2310      uses: some-org/notify@v1
2311      with:
2312        api-key: "${{ secrets.NOTIFY_KEY }}"
2313"#;
2314        std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2315
2316        let workflow = r#"
2317jobs:
2318  release:
2319    steps:
2320      - uses: ./.github/actions/deploy
2321"#;
2322        let workflow_path = workflows_dir.join("release.yml");
2323        std::fs::write(&workflow_path, workflow).unwrap();
2324
2325        let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2326
2327        let secret_names: Vec<_> = graph
2328            .nodes_of_kind(NodeKind::Secret)
2329            .map(|s| s.name.as_str())
2330            .collect();
2331        assert!(
2332            secret_names.contains(&"DEPLOY_TOKEN"),
2333            "run: secret in composite step must be captured, got: {secret_names:?}"
2334        );
2335        assert!(
2336            secret_names.contains(&"NOTIFY_KEY"),
2337            "with: secret in composite step must be captured, got: {secret_names:?}"
2338        );
2339
2340        let _ = std::fs::remove_dir_all(&dir);
2341    }
2342
2343    #[test]
2344    fn workflow_level_permissions_create_identity() {
2345        let yaml = r#"
2346permissions: write-all
2347jobs:
2348  ci:
2349    steps:
2350      - run: echo hi
2351"#;
2352        let graph = parse(yaml);
2353        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2354        assert_eq!(identities.len(), 1);
2355        assert_eq!(identities[0].name, "GITHUB_TOKEN");
2356        assert_eq!(
2357            identities[0].metadata.get(META_PERMISSIONS).unwrap(),
2358            "write-all"
2359        );
2360    }
2361
2362    #[test]
2363    fn job_env_template_expression_does_not_crash_and_marks_partial() {
2364        // Real-world repro from scikit-learn unit-tests.yml: job-level `env:`
2365        // is a bare template expression (`${{ matrix }}`) instead of a map.
2366        // Historically the GHA parser deserialized env: as `HashMap<String,String>`
2367        // and crashed with "invalid type: string ..., expected a map". The parser
2368        // must now tolerate this gracefully: parse succeeds, graph is marked Partial
2369        // with a reason that mentions the template-shaped env.
2370        let yaml = r#"
2371jobs:
2372  unit-tests:
2373    env: ${{ matrix }}
2374    steps:
2375      - run: pytest
2376"#;
2377        let graph = parse(yaml);
2378        // No crash — parse returned a graph.
2379        assert!(
2380            matches!(graph.completeness, AuthorityCompleteness::Partial),
2381            "graph must be marked Partial when env: is a template expression"
2382        );
2383        let saw_template_gap = graph
2384            .completeness_gaps
2385            .iter()
2386            .any(|g| g.contains("env:") && g.contains("template"));
2387        assert!(
2388            saw_template_gap,
2389            "completeness_gaps must mention env: template, got: {:?}",
2390            graph.completeness_gaps
2391        );
2392        // Steps still parsed normally.
2393        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2394        assert_eq!(steps.len(), 1, "the single step must still be parsed");
2395    }
2396
2397    #[test]
2398    fn env_with_non_string_scalar_values_parses() {
2399        // Real-world repro from BurntSushi/ripgrep ci.yml and many others:
2400        // GHA env values can be booleans (`COVERAGE: false`), integers
2401        // (`RUST_BACKTRACE: 1`), or null (`TARGET_FLAGS:`). A naive
2402        // HashMap<String, String> deserializer rejects these. After the fix,
2403        // they round-trip — booleans/numbers as their textual form,
2404        // null as the empty string.
2405        let yaml = r#"
2406jobs:
2407  test:
2408    env:
2409      RUST_BACKTRACE: 1
2410      COVERAGE: false
2411      TARGET_FLAGS:
2412      CARGO: cargo
2413    steps:
2414      - run: cargo test
2415"#;
2416        let graph = parse(yaml);
2417        // Parse must succeed and produce the step node.
2418        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2419        assert_eq!(steps.len(), 1, "expected the single step to parse");
2420        // Graph stays Complete — env: is a real map, not a template.
2421        assert!(
2422            !matches!(graph.completeness, AuthorityCompleteness::Partial)
2423                || !graph
2424                    .completeness_gaps
2425                    .iter()
2426                    .any(|g| g.contains("env:") && g.contains("template")),
2427            "non-string env values must not mark the graph Partial via the env-template path"
2428        );
2429    }
2430
2431    #[test]
2432    fn step_env_with_boolean_and_integer_values_parses() {
2433        // Same regression class but at step-level env: instead of job-level.
2434        let yaml = r#"
2435jobs:
2436  build:
2437    steps:
2438      - name: build
2439        run: make
2440        env:
2441          DEBUG: true
2442          RETRIES: 3
2443          OPTIONAL_FLAG:
2444"#;
2445        let graph = parse(yaml);
2446        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2447        assert_eq!(steps.len(), 1);
2448    }
2449
2450    #[test]
2451    fn meta_job_name_set_on_step_nodes() {
2452        let yaml = r#"
2453jobs:
2454  build:
2455    steps:
2456      - name: Checkout
2457        uses: actions/checkout@v4
2458      - name: Compile
2459        run: make build
2460"#;
2461        let graph = parse(yaml);
2462        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2463        assert!(!steps.is_empty(), "expected at least one Step node");
2464        for step in &steps {
2465            assert_eq!(
2466                step.metadata.get(META_JOB_NAME).map(String::as_str),
2467                Some("build"),
2468                "Step {:?} missing META_JOB_NAME=build",
2469                step.name
2470            );
2471        }
2472    }
2473
2474    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
2475
2476    #[test]
2477    fn jobs_without_steps_marks_partial() {
2478        // `jobs:` is non-empty (parser deserializes them happily) but every
2479        // job has no `steps:` block — the GHA parser produces 0 Step nodes.
2480        // This is the canonical "wrong-platform smuggle" shape: an attacker
2481        // gets a misclassified file past auto-detect, no recognisable steps
2482        // get materialised, and the previous behaviour was completeness =
2483        // complete + 0 findings = "passed". Now Partial.
2484        let yaml = r#"
2485on:
2486  push:
2487jobs:
2488  build:
2489    runs-on: ubuntu-latest
2490"#;
2491        let graph = parse(yaml);
2492        let step_count = graph
2493            .nodes
2494            .iter()
2495            .filter(|n| n.kind == NodeKind::Step)
2496            .count();
2497        assert_eq!(step_count, 0, "no steps: present means 0 Step nodes");
2498        assert_eq!(
2499            graph.completeness,
2500            AuthorityCompleteness::Partial,
2501            "0-step-nodes despite non-empty jobs: must mark Partial"
2502        );
2503        assert!(
2504            graph
2505                .completeness_gaps
2506                .iter()
2507                .any(|g| g.contains("0 step nodes")),
2508            "completeness_gaps must mention 0 step nodes: {:?}",
2509            graph.completeness_gaps
2510        );
2511    }
2512
2513    #[test]
2514    fn empty_workflow_no_jobs_does_not_mark_partial_for_zero_steps() {
2515        // An entirely empty workflow (no `jobs:` at all) has nothing to
2516        // classify — completeness should not flip to Partial just because
2517        // there are zero step nodes (the source had no carrier).
2518        let yaml = "name: empty\non:\n  push:\n";
2519        let graph = parse(yaml);
2520        let zero_step_gap = graph
2521            .completeness_gaps
2522            .iter()
2523            .any(|g| g.contains("0 step nodes"));
2524        assert!(
2525            !zero_step_gap,
2526            "no jobs: in source means no 0-step gap reason; got: {:?}",
2527            graph.completeness_gaps
2528        );
2529    }
2530}