Skip to main content

taudit_parse_gha/
lib.rs

1use std::collections::HashMap;
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Metadata key for marking inferred (not precisely mapped) secret references.
9const META_INFERRED_VAL: &str = "true";
10
11/// GitHub Actions workflow parser.
12pub struct GhaParser;
13
14impl PipelineParser for GhaParser {
15    fn platform(&self) -> &str {
16        "github-actions"
17    }
18
19    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
20        let mut de = serde_yaml::Deserializer::from_str(content);
21        let doc = de
22            .next()
23            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
24        let workflow: GhaWorkflow = GhaWorkflow::deserialize(doc)
25            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
26        let extra_docs = de.next().is_some();
27
28        let mut graph = AuthorityGraph::new(source.clone());
29        if extra_docs {
30            graph.mark_partial(
31                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
32            );
33        }
34        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
35
36        let is_pull_request_target = workflow
37            .triggers
38            .as_ref()
39            .map(trigger_has_pull_request_target)
40            .unwrap_or(false);
41
42        if is_pull_request_target {
43            graph
44                .metadata
45                .insert(META_TRIGGER.into(), "pull_request_target".into());
46        }
47
48        // Workflow-level permissions -> GITHUB_TOKEN identity node
49        let token_id = if let Some(ref perms) = workflow.permissions {
50            let perm_string = perms.to_string();
51            let scope = IdentityScope::from_permissions(&perm_string);
52            let mut meta = HashMap::new();
53            meta.insert(META_PERMISSIONS.into(), perm_string.clone());
54            meta.insert(
55                META_IDENTITY_SCOPE.into(),
56                format!("{scope:?}").to_lowercase(),
57            );
58            // OIDC: id-token: write → token is OIDC-capable (federated scope).
59            // Check the formatted substring directly — Permissions::Map fmt produces
60            // "id-token: write" so this won't false-positive on "contents: write".
61            if perm_string.contains("id-token: write") || perm_string == "write-all" {
62                meta.insert(META_OIDC.into(), "true".into());
63            }
64            Some(graph.add_node_with_metadata(
65                NodeKind::Identity,
66                "GITHUB_TOKEN",
67                TrustZone::FirstParty,
68                meta,
69            ))
70        } else {
71            None
72        };
73
74        for (job_name, job) in &workflow.jobs {
75            // Job-level permissions override workflow-level
76            let job_token_id = if let Some(ref perms) = job.permissions {
77                let perm_string = perms.to_string();
78                let scope = IdentityScope::from_permissions(&perm_string);
79                let mut meta = HashMap::new();
80                meta.insert(META_PERMISSIONS.into(), perm_string.clone());
81                meta.insert(
82                    META_IDENTITY_SCOPE.into(),
83                    format!("{scope:?}").to_lowercase(),
84                );
85                if perm_string.contains("id-token: write") {
86                    meta.insert(META_OIDC.into(), "true".into());
87                }
88                Some(graph.add_node_with_metadata(
89                    NodeKind::Identity,
90                    format!("GITHUB_TOKEN ({job_name})"),
91                    TrustZone::FirstParty,
92                    meta,
93                ))
94            } else {
95                token_id
96            };
97
98            // Reusable workflow: job.uses= means this job delegates to another workflow.
99            // We cannot resolve it inline — mark the graph partial and skip steps.
100            if let Some(ref uses) = job.uses {
101                let trust_zone = if is_sha_pinned(uses) {
102                    TrustZone::ThirdParty
103                } else {
104                    TrustZone::Untrusted
105                };
106                let rw_id = graph.add_node(NodeKind::Image, uses, trust_zone);
107                // Synthetic step represents this job delegating to the called workflow
108                let job_step_id = graph.add_node(NodeKind::Step, job_name, TrustZone::FirstParty);
109                graph.add_edge(job_step_id, rw_id, EdgeKind::DelegatesTo);
110                if let Some(tok_id) = job_token_id {
111                    graph.add_edge(job_step_id, tok_id, EdgeKind::HasAccessTo);
112                }
113                graph.mark_partial(format!(
114                    "reusable workflow '{uses}' in job '{job_name}' cannot be resolved inline — authority within the called workflow is unknown"
115                ));
116                continue;
117            }
118
119            // Matrix strategy: authority shape may differ per matrix entry — mark Partial
120            if job
121                .strategy
122                .as_ref()
123                .and_then(|s| s.get("matrix"))
124                .is_some()
125            {
126                graph.mark_partial(format!(
127                    "job '{job_name}' uses matrix strategy — authority shape may differ per matrix entry"
128                ));
129            }
130
131            // Self-hosted runner detection: `runs-on: self-hosted` or a sequence
132            // that includes `self-hosted`. Creates an Image node tagged with
133            // META_SELF_HOSTED so downstream rules can flag the job. Hosted
134            // runners (ubuntu-latest, etc.) are not represented as Image nodes —
135            // this keeps the graph focused on non-default attack surface.
136            if is_self_hosted_runner(job.runs_on.as_ref()) {
137                let runner_name = runner_label(job.runs_on.as_ref()).unwrap_or("self-hosted");
138                let mut meta = HashMap::new();
139                meta.insert(META_SELF_HOSTED.into(), "true".into());
140                graph.add_node_with_metadata(
141                    NodeKind::Image,
142                    runner_name,
143                    TrustZone::FirstParty,
144                    meta,
145                );
146            }
147
148            // Container: job-level container image — add as Image node and capture ID
149            // so each step in this job can be linked to it via UsesImage.
150            let container_image_id: Option<NodeId> = if let Some(ref container) = job.container {
151                let image_str = container.image();
152                let pinned = is_docker_digest_pinned(image_str);
153                let trust_zone = if pinned {
154                    TrustZone::ThirdParty
155                } else {
156                    TrustZone::Untrusted
157                };
158                let mut meta = HashMap::new();
159                meta.insert(META_CONTAINER.into(), "true".into());
160                if pinned {
161                    if let Some(digest) = image_str.split("@sha256:").nth(1) {
162                        meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
163                    }
164                }
165                Some(graph.add_node_with_metadata(NodeKind::Image, image_str, trust_zone, meta))
166            } else {
167                None
168            };
169
170            for (step_idx, step) in job.steps.iter().enumerate() {
171                let default_name = format!("{job_name}[{step_idx}]");
172                let step_name = step.name.as_deref().unwrap_or(&default_name);
173
174                // Determine trust zone and create image node if `uses:` present
175                let (trust_zone, image_node_id) = if let Some(ref uses) = step.uses {
176                    let (zone, image_id) = classify_action(uses, &mut graph);
177                    (zone, Some(image_id))
178                } else if is_pull_request_target {
179                    // run: step in a pull_request_target workflow — may execute fork code
180                    (TrustZone::Untrusted, None)
181                } else {
182                    // Inline `run:` step — first party
183                    (TrustZone::FirstParty, None)
184                };
185
186                let step_id = graph.add_node(NodeKind::Step, step_name, trust_zone);
187
188                // Link step to action image
189                if let Some(img_id) = image_node_id {
190                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
191                }
192
193                // Link step to job container — steps run inside the container's execution
194                // environment, so a floating container is a supply chain risk for every step.
195                if let Some(img_id) = container_image_id {
196                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
197                }
198
199                // Link step to GITHUB_TOKEN if it exists
200                if let Some(tok_id) = job_token_id {
201                    graph.add_edge(step_id, tok_id, EdgeKind::HasAccessTo);
202                }
203
204                // Cloud identity inference: detect known OIDC cloud auth actions and
205                // create an Identity node representing the assumed cloud identity.
206                if let Some(ref uses) = step.uses {
207                    if let Some(cloud_id) =
208                        classify_cloud_auth(uses, step.with.as_ref(), &mut graph)
209                    {
210                        graph.add_edge(step_id, cloud_id, EdgeKind::HasAccessTo);
211                    }
212                }
213
214                // Attestation action detection
215                if let Some(ref uses) = step.uses {
216                    let action = uses.split('@').next().unwrap_or(uses);
217                    if matches!(
218                        action,
219                        "actions/attest-build-provenance" | "sigstore/cosign-installer"
220                    ) {
221                        if let Some(node) = graph.nodes.get_mut(step_id) {
222                            node.metadata.insert(META_ATTESTS.into(), "true".into());
223                        }
224                    }
225                }
226
227                // actions/checkout detection. Tag unconditionally — downstream rules
228                // gate on trigger context (pull_request / pull_request_target) to
229                // decide whether the checkout is pulling untrusted fork code. Tagging
230                // here avoids trigger-ordering dependencies across jobs.
231                if let Some(ref uses) = step.uses {
232                    let action = uses.split('@').next().unwrap_or(uses);
233                    if action == "actions/checkout" {
234                        if let Some(node) = graph.nodes.get_mut(step_id) {
235                            node.metadata
236                                .insert(META_CHECKOUT_SELF.into(), "true".into());
237                        }
238                    }
239                }
240
241                // Process secrets from workflow-level `env:` (inherited by all jobs/steps)
242                if let Some(ref env) = workflow.env {
243                    for env_val in env.values() {
244                        if is_secret_reference(env_val) {
245                            let secret_name = extract_secret_name(env_val);
246                            let secret_id =
247                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
248                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
249                        }
250                    }
251                }
252
253                // Process secrets from job-level `env:` (inherited by all steps)
254                if let Some(ref env) = job.env {
255                    for env_val in env.values() {
256                        if is_secret_reference(env_val) {
257                            let secret_name = extract_secret_name(env_val);
258                            let secret_id =
259                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
260                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
261                        }
262                    }
263                }
264
265                // Process secrets from step-level `env:` block
266                if let Some(ref env) = step.env {
267                    for env_val in env.values() {
268                        if is_secret_reference(env_val) {
269                            let secret_name = extract_secret_name(env_val);
270                            let secret_id =
271                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
272                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
273                        }
274                    }
275                }
276
277                // Process secrets from `with:` block
278                if let Some(ref with) = step.with {
279                    for val in with.values() {
280                        if is_secret_reference(val) {
281                            let secret_name = extract_secret_name(val);
282                            let secret_id =
283                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
284                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
285                        }
286                    }
287                }
288
289                // Detect inferred secrets in `run:` script blocks
290                if let Some(ref run) = step.run {
291                    if run.contains("${{ secrets.") {
292                        // Extract secret names from the shell script
293                        let mut pos = 0;
294                        while let Some(start) = run[pos..].find("secrets.") {
295                            let abs_start = pos + start + 8;
296                            let remaining = &run[abs_start..];
297                            let end = remaining
298                                .find(|c: char| !c.is_alphanumeric() && c != '_')
299                                .unwrap_or(remaining.len());
300                            let secret_name = &remaining[..end];
301                            if !secret_name.is_empty() {
302                                let secret_id =
303                                    find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
304                                // Mark as inferred — not precisely mapped
305                                if let Some(node) = graph.nodes.get_mut(secret_id) {
306                                    node.metadata
307                                        .insert(META_INFERRED.into(), META_INFERRED_VAL.into());
308                                }
309                                graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
310                                graph.mark_partial(format!(
311                                    "secret '{secret_name}' referenced in run: script — inferred, not precisely mapped"
312                                ));
313                            }
314                            pos = abs_start + end;
315                        }
316                    }
317                }
318
319                // Detect writes to the GHA environment gate.
320                // Broad detection: presence of GITHUB_ENV or GITHUB_PATH in a run script
321                // covers every redirect form (`>> $GITHUB_ENV`, `>> "$GITHUB_ENV"`,
322                // `>> ${GITHUB_ENV}`, `tee -a $GITHUB_PATH`, etc.) without brittle
323                // multi-variant string matching. Reading these vars without writing is
324                // extremely rare in practice, making this an acceptable tradeoff for
325                // completeness.
326                if let Some(ref run) = step.run {
327                    let writes_gate = run.contains("GITHUB_ENV") || run.contains("GITHUB_PATH");
328                    if writes_gate {
329                        if let Some(node) = graph.nodes.get_mut(step_id) {
330                            node.metadata
331                                .insert(META_WRITES_ENV_GATE.into(), "true".into());
332                        }
333                    }
334                }
335            }
336        }
337
338        Ok(graph)
339    }
340}
341
342/// Returns true if the workflow's `on:` triggers include `pull_request_target`.
343/// GHA `on:` is polymorphic: string, sequence, or mapping.
344fn trigger_has_pull_request_target(triggers: &serde_yaml::Value) -> bool {
345    const PRT: &str = "pull_request_target";
346    match triggers {
347        serde_yaml::Value::String(s) => s == PRT,
348        serde_yaml::Value::Sequence(seq) => seq
349            .iter()
350            .any(|v| v.as_str().map(|s| s == PRT).unwrap_or(false)),
351        serde_yaml::Value::Mapping(map) => map
352            .iter()
353            .any(|(k, _)| k.as_str().map(|s| s == PRT).unwrap_or(false)),
354        _ => false,
355    }
356}
357
358/// Returns true if `runs-on` names a self-hosted runner.
359///
360/// GHA `runs-on` is polymorphic: a string (`ubuntu-latest`, `self-hosted`), a
361/// sequence (`[self-hosted, linux, x64]`), or — for group selection — a mapping
362/// (`{ group: my-group, labels: [...] }`). Any form that contains `self-hosted`
363/// (as a string, sequence entry, or label entry) is considered self-hosted.
364/// Explicit `group:` without `self-hosted` is also self-hosted by construction.
365fn is_self_hosted_runner(runs_on: Option<&serde_yaml::Value>) -> bool {
366    const SH: &str = "self-hosted";
367    let Some(val) = runs_on else {
368        return false;
369    };
370    match val {
371        serde_yaml::Value::String(s) => s == SH,
372        serde_yaml::Value::Sequence(seq) => seq
373            .iter()
374            .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
375        serde_yaml::Value::Mapping(map) => {
376            if map.contains_key("group") {
377                return true;
378            }
379            if let Some(labels) = map.get("labels") {
380                match labels {
381                    serde_yaml::Value::String(s) => s == SH,
382                    serde_yaml::Value::Sequence(seq) => seq
383                        .iter()
384                        .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
385                    _ => false,
386                }
387            } else {
388                false
389            }
390        }
391        _ => false,
392    }
393}
394
395/// Extract a human-readable label from a `runs-on` value for naming the Image
396/// node. Prefers the first non-`self-hosted` label in a sequence (more specific),
397/// falls back to the string value or "self-hosted".
398fn runner_label(runs_on: Option<&serde_yaml::Value>) -> Option<&str> {
399    let val = runs_on?;
400    match val {
401        serde_yaml::Value::String(s) => Some(s.as_str()),
402        serde_yaml::Value::Sequence(seq) => {
403            for v in seq {
404                if let Some(s) = v.as_str() {
405                    if s != "self-hosted" {
406                        return Some(s);
407                    }
408                }
409            }
410            seq.first().and_then(|v| v.as_str())
411        }
412        serde_yaml::Value::Mapping(map) => map.get("group").and_then(|v| v.as_str()),
413        _ => None,
414    }
415}
416
417/// Classify a `uses:` reference into trust zone and create image node.
418fn classify_action(uses: &str, graph: &mut AuthorityGraph) -> (TrustZone, NodeId) {
419    let pinned = is_sha_pinned(uses);
420    let is_local = uses.starts_with("./");
421
422    let zone = if is_local {
423        TrustZone::FirstParty
424    } else if pinned {
425        TrustZone::ThirdParty
426    } else {
427        TrustZone::Untrusted
428    };
429
430    let mut meta = HashMap::new();
431    if pinned {
432        if let Some(sha) = uses.split('@').next_back() {
433            meta.insert(META_DIGEST.into(), sha.into());
434        }
435    }
436
437    let id = graph.add_node_with_metadata(NodeKind::Image, uses, zone, meta);
438    (zone, id)
439}
440
441fn is_secret_reference(val: &str) -> bool {
442    val.contains("${{ secrets.")
443}
444
445fn extract_secret_name(val: &str) -> String {
446    // Extract from patterns like "${{ secrets.MY_SECRET }}"
447    if let Some(start) = val.find("secrets.") {
448        let after = &val[start + 8..];
449        let end = after
450            .find(|c: char| !c.is_alphanumeric() && c != '_')
451            .unwrap_or(after.len());
452        after[..end].to_string()
453    } else {
454        val.to_string()
455    }
456}
457
458fn find_or_create_secret(
459    graph: &mut AuthorityGraph,
460    cache: &mut HashMap<String, NodeId>,
461    name: &str,
462) -> NodeId {
463    if let Some(&id) = cache.get(name) {
464        return id;
465    }
466    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
467    cache.insert(name.to_string(), id);
468    id
469}
470
471/// Detect known OIDC cloud authentication actions and create an Identity node
472/// representing the cloud identity that will be assumed.
473///
474/// Only handles the OIDC/federated path — static credential inputs (e.g.
475/// `aws-secret-access-key: ${{ secrets.X }}`) are already captured by the
476/// regular `with:` secret scanning and don't need a separate Identity node.
477///
478/// Returns `Some(NodeId)` of the created Identity, or `None` if not recognized.
479fn classify_cloud_auth(
480    uses: &str,
481    with: Option<&HashMap<String, String>>,
482    graph: &mut AuthorityGraph,
483) -> Option<NodeId> {
484    // Strip `@version` — match any version of the action
485    let action = uses.split('@').next().unwrap_or(uses);
486
487    match action {
488        "aws-actions/configure-aws-credentials" => {
489            // OIDC path: role-to-assume present (no static access key needed)
490            let w = with?;
491            let role = w.get("role-to-assume")?;
492            // ARN format: arn:aws:iam::123456789012:role/my-role
493            // Split on '/' to get the role name; fall back to the full value.
494            let short = role.split('/').next_back().unwrap_or(role.as_str());
495            let mut meta = HashMap::new();
496            meta.insert(META_OIDC.into(), "true".into());
497            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
498            meta.insert(META_PERMISSIONS.into(), "AWS role assumption (OIDC)".into());
499            Some(graph.add_node_with_metadata(
500                NodeKind::Identity,
501                format!("AWS/{short}"),
502                TrustZone::FirstParty,
503                meta,
504            ))
505        }
506        "google-github-actions/auth" => {
507            // OIDC path: workload_identity_provider present
508            let w = with?;
509            let provider = w.get("workload_identity_provider")?;
510            let short = provider.split('/').next_back().unwrap_or(provider.as_str());
511            let mut meta = HashMap::new();
512            meta.insert(META_OIDC.into(), "true".into());
513            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
514            meta.insert(
515                META_PERMISSIONS.into(),
516                "GCP workload identity federation".into(),
517            );
518            Some(graph.add_node_with_metadata(
519                NodeKind::Identity,
520                format!("GCP/{short}"),
521                TrustZone::FirstParty,
522                meta,
523            ))
524        }
525        "azure/login" => {
526            // OIDC path: client-id present without client-secret
527            let w = with?;
528            let client_id = w.get("client-id")?;
529            // Only treat as OIDC if no static client-secret is provided
530            if w.contains_key("client-secret") {
531                return None; // static SP creds captured by with: secret scanning
532            }
533            let mut meta = HashMap::new();
534            meta.insert(META_OIDC.into(), "true".into());
535            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
536            meta.insert(
537                META_PERMISSIONS.into(),
538                "Azure federated credential (OIDC)".into(),
539            );
540            Some(graph.add_node_with_metadata(
541                NodeKind::Identity,
542                format!("Azure/{client_id}"),
543                TrustZone::FirstParty,
544                meta,
545            ))
546        }
547        _ => None,
548    }
549}
550
551// ── Serde models for GHA YAML ──────────────────────────
552
553/// Flexible permissions: can be a string ("write-all") or a map.
554#[derive(Debug, Clone, Deserialize)]
555#[serde(untagged)]
556pub enum Permissions {
557    String(String),
558    Map(HashMap<String, String>),
559}
560
561impl std::fmt::Display for Permissions {
562    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
563        match self {
564            Permissions::String(s) => write!(f, "{s}"),
565            Permissions::Map(m) => {
566                let parts: Vec<String> = m.iter().map(|(k, v)| format!("{k}: {v}")).collect();
567                write!(f, "{{ {} }}", parts.join(", "))
568            }
569        }
570    }
571}
572
573#[derive(Debug, Deserialize)]
574pub struct GhaWorkflow {
575    /// Workflow trigger(s). Polymorphic: string, sequence, or mapping.
576    #[serde(rename = "on", default)]
577    pub triggers: Option<serde_yaml::Value>,
578    #[serde(default)]
579    pub permissions: Option<Permissions>,
580    /// Workflow-level env vars, inherited by all jobs and steps.
581    #[serde(default)]
582    pub env: Option<HashMap<String, String>>,
583    #[serde(default)]
584    pub jobs: HashMap<String, GhaJob>,
585}
586
587/// Job-level container config. Polymorphic: string image or map with `image:` key.
588#[derive(Debug, Deserialize)]
589#[serde(untagged)]
590pub enum ContainerConfig {
591    Image(String),
592    Full { image: String },
593}
594
595impl ContainerConfig {
596    pub fn image(&self) -> &str {
597        match self {
598            ContainerConfig::Image(s) => s,
599            ContainerConfig::Full { image } => image,
600        }
601    }
602}
603
604#[derive(Debug, Deserialize)]
605pub struct GhaJob {
606    #[serde(default)]
607    pub permissions: Option<Permissions>,
608    #[serde(default)]
609    pub env: Option<HashMap<String, String>>,
610    #[serde(default)]
611    pub steps: Vec<GhaStep>,
612    /// Reusable workflow reference — `uses: owner/repo/.github/workflows/foo.yml@ref`
613    #[serde(default)]
614    pub uses: Option<String>,
615    /// Job container image.
616    #[serde(default)]
617    pub container: Option<ContainerConfig>,
618    /// Matrix/strategy configuration. When a matrix is present, the authority
619    /// shape may differ per matrix entry — graph is marked Partial.
620    #[serde(default)]
621    pub strategy: Option<serde_yaml::Value>,
622    /// Runner label(s). Can be a string (`ubuntu-latest`), a sequence
623    /// (`[self-hosted, linux]`), or absent for reusable workflows.
624    #[serde(rename = "runs-on", default)]
625    pub runs_on: Option<serde_yaml::Value>,
626}
627
628#[derive(Debug, Deserialize)]
629pub struct GhaStep {
630    pub name: Option<String>,
631    pub uses: Option<String>,
632    pub run: Option<String>,
633    #[serde(default)]
634    pub env: Option<HashMap<String, String>>,
635    #[serde(rename = "with", default)]
636    pub with: Option<HashMap<String, String>>,
637}
638
639#[cfg(test)]
640mod tests {
641    use super::*;
642
643    fn parse(yaml: &str) -> AuthorityGraph {
644        let parser = GhaParser;
645        let source = PipelineSource {
646            file: "test.yml".into(),
647            repo: None,
648            git_ref: None,
649        };
650        parser.parse(yaml, &source).unwrap()
651    }
652
653    #[test]
654    fn parses_simple_workflow() {
655        let yaml = r#"
656permissions: write-all
657jobs:
658  build:
659    steps:
660      - name: Checkout
661        uses: actions/checkout@v4
662      - name: Build
663        run: make build
664"#;
665        let graph = parse(yaml);
666        assert!(graph.nodes.len() >= 3); // GITHUB_TOKEN + 2 steps + 1 image
667    }
668
669    #[test]
670    fn detects_secret_in_env() {
671        let yaml = r#"
672jobs:
673  deploy:
674    steps:
675      - name: Deploy
676        run: ./deploy.sh
677        env:
678          AWS_KEY: "${{ secrets.AWS_ACCESS_KEY_ID }}"
679"#;
680        let graph = parse(yaml);
681        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
682        assert_eq!(secrets.len(), 1);
683        assert_eq!(secrets[0].name, "AWS_ACCESS_KEY_ID");
684    }
685
686    #[test]
687    fn classifies_unpinned_action_as_untrusted() {
688        let yaml = r#"
689jobs:
690  ci:
691    steps:
692      - uses: actions/checkout@v4
693"#;
694        let graph = parse(yaml);
695        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
696        assert_eq!(images.len(), 1);
697        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
698    }
699
700    #[test]
701    fn classifies_sha_pinned_action_as_third_party() {
702        let yaml = r#"
703jobs:
704  ci:
705    steps:
706      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
707"#;
708        let graph = parse(yaml);
709        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
710        assert_eq!(images.len(), 1);
711        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
712    }
713
714    #[test]
715    fn classifies_local_action_as_first_party() {
716        let yaml = r#"
717jobs:
718  ci:
719    steps:
720      - uses: ./.github/actions/my-action
721"#;
722        let graph = parse(yaml);
723        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
724        assert_eq!(images.len(), 1);
725        assert_eq!(images[0].trust_zone, TrustZone::FirstParty);
726    }
727
728    #[test]
729    fn detects_secret_in_with() {
730        let yaml = r#"
731jobs:
732  deploy:
733    steps:
734      - name: Publish
735        uses: some-org/publish@v1
736        with:
737          token: "${{ secrets.NPM_TOKEN }}"
738"#;
739        let graph = parse(yaml);
740        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
741        assert_eq!(secrets.len(), 1);
742        assert_eq!(secrets[0].name, "NPM_TOKEN");
743    }
744
745    #[test]
746    fn inferred_secret_in_run_block_detected() {
747        let yaml = r#"
748jobs:
749  deploy:
750    steps:
751      - name: Deploy
752        run: |
753          curl -H "Authorization: ${{ secrets.API_TOKEN }}" https://api.example.com
754"#;
755        let graph = parse(yaml);
756        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
757        assert_eq!(secrets.len(), 1);
758        assert_eq!(secrets[0].name, "API_TOKEN");
759        assert_eq!(
760            secrets[0].metadata.get(META_INFERRED),
761            Some(&"true".to_string())
762        );
763        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
764        assert!(!graph.completeness_gaps.is_empty());
765    }
766
767    #[test]
768    fn job_level_env_inherited_by_steps() {
769        let yaml = r#"
770jobs:
771  build:
772    env:
773      DB_PASSWORD: "${{ secrets.DB_PASSWORD }}"
774    steps:
775      - name: Step A
776        run: echo "a"
777      - name: Step B
778        run: echo "b"
779"#;
780        let graph = parse(yaml);
781        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
782        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
783
784        // Both steps should have access to the secret
785        let secret_id = secrets[0].id;
786        let accessing_steps = graph
787            .edges_to(secret_id)
788            .filter(|e| e.kind == EdgeKind::HasAccessTo)
789            .count();
790        assert_eq!(accessing_steps, 2, "both steps inherit job-level env");
791    }
792
793    #[test]
794    fn identity_scope_set_on_token() {
795        let yaml = r#"
796permissions: write-all
797jobs:
798  ci:
799    steps:
800      - run: echo hi
801"#;
802        let graph = parse(yaml);
803        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
804        assert_eq!(identities.len(), 1);
805        assert_eq!(
806            identities[0].metadata.get(META_IDENTITY_SCOPE),
807            Some(&"broad".to_string())
808        );
809    }
810
811    #[test]
812    fn constrained_identity_scope() {
813        let yaml = r#"
814permissions:
815  contents: read
816jobs:
817  ci:
818    steps:
819      - run: echo hi
820"#;
821        let graph = parse(yaml);
822        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
823        assert_eq!(identities.len(), 1);
824        assert_eq!(
825            identities[0].metadata.get(META_IDENTITY_SCOPE),
826            Some(&"constrained".to_string())
827        );
828    }
829
830    #[test]
831    fn pull_request_target_string_trigger_marks_run_steps_untrusted() {
832        let yaml = r#"
833on: pull_request_target
834jobs:
835  check:
836    steps:
837      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
838        with:
839          ref: ${{ github.event.pull_request.head.sha }}
840      - run: npm test
841"#;
842        let graph = parse(yaml);
843        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
844        assert_eq!(steps.len(), 2);
845
846        // run: step should be Untrusted (might execute fork code)
847        let run_step = steps.iter().find(|s| s.name.contains("check[1]")).unwrap();
848        assert_eq!(
849            run_step.trust_zone,
850            TrustZone::Untrusted,
851            "run: step in pull_request_target workflow should be Untrusted"
852        );
853
854        // uses: step keeps its own trust zone (SHA-pinned = ThirdParty)
855        let checkout_step = steps.iter().find(|s| s.name.contains("check[0]")).unwrap();
856        assert_eq!(checkout_step.trust_zone, TrustZone::ThirdParty);
857    }
858
859    #[test]
860    fn pull_request_target_sequence_trigger_marks_run_steps_untrusted() {
861        let yaml = r#"
862on: [push, pull_request_target]
863jobs:
864  ci:
865    steps:
866      - run: echo hi
867"#;
868        let graph = parse(yaml);
869        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
870        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
871    }
872
873    #[test]
874    fn pull_request_target_mapping_trigger_marks_run_steps_untrusted() {
875        let yaml = r#"
876on:
877  pull_request_target:
878    types: [opened, synchronize]
879jobs:
880  ci:
881    steps:
882      - run: echo hi
883"#;
884        let graph = parse(yaml);
885        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
886        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
887    }
888
889    #[test]
890    fn push_trigger_does_not_mark_run_steps_untrusted() {
891        let yaml = r#"
892on: push
893jobs:
894  ci:
895    steps:
896      - run: echo hi
897"#;
898        let graph = parse(yaml);
899        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
900        assert_eq!(
901            steps[0].trust_zone,
902            TrustZone::FirstParty,
903            "push-triggered run: steps should remain FirstParty"
904        );
905    }
906
907    #[test]
908    fn workflow_level_env_inherited_by_all_steps() {
909        let yaml = r#"
910env:
911  DB_URL: "${{ secrets.DATABASE_URL }}"
912jobs:
913  build:
914    steps:
915      - name: Step A
916        run: echo "a"
917  test:
918    steps:
919      - name: Step B
920        run: echo "b"
921"#;
922        let graph = parse(yaml);
923        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
924        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
925
926        // Both steps in both jobs should inherit the workflow-level secret
927        let secret_id = secrets[0].id;
928        let accessing_steps = graph
929            .edges_to(secret_id)
930            .filter(|e| e.kind == EdgeKind::HasAccessTo)
931            .count();
932        assert_eq!(accessing_steps, 2, "both steps inherit workflow-level env");
933    }
934
935    #[test]
936    fn matrix_strategy_marks_graph_partial() {
937        let yaml = r#"
938jobs:
939  test:
940    strategy:
941      matrix:
942        os: [ubuntu-latest, windows-latest, macos-latest]
943    steps:
944      - run: echo hi
945"#;
946        let graph = parse(yaml);
947        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
948        assert!(
949            graph.completeness_gaps.iter().any(|g| g.contains("matrix")),
950            "matrix strategy should be recorded as a completeness gap"
951        );
952    }
953
954    #[test]
955    fn job_without_matrix_does_not_mark_partial() {
956        let yaml = r#"
957jobs:
958  build:
959    steps:
960      - run: cargo build
961"#;
962        let graph = parse(yaml);
963        assert_eq!(graph.completeness, AuthorityCompleteness::Complete);
964    }
965
966    #[test]
967    fn reusable_workflow_creates_image_and_marks_partial() {
968        let yaml = r#"
969jobs:
970  call:
971    uses: org/repo/.github/workflows/deploy.yml@main
972"#;
973        let graph = parse(yaml);
974        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
975        assert_eq!(images.len(), 1);
976        assert_eq!(images[0].name, "org/repo/.github/workflows/deploy.yml@main");
977        assert_eq!(images[0].trust_zone, TrustZone::Untrusted); // not SHA-pinned
978
979        // Step node representing the job delegation
980        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
981        assert_eq!(steps.len(), 1);
982        assert_eq!(steps[0].name, "call");
983
984        // DelegatesTo edge from step to reusable workflow image
985        let delegates: Vec<_> = graph
986            .edges_from(steps[0].id)
987            .filter(|e| e.kind == EdgeKind::DelegatesTo)
988            .collect();
989        assert_eq!(delegates.len(), 1);
990
991        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
992    }
993
994    #[test]
995    fn reusable_workflow_sha_pinned_is_third_party() {
996        let yaml = r#"
997jobs:
998  call:
999    uses: org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29
1000"#;
1001        let graph = parse(yaml);
1002        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1003        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1004    }
1005
1006    #[test]
1007    fn container_unpinned_creates_image_node_untrusted() {
1008        let yaml = r#"
1009jobs:
1010  build:
1011    container: ubuntu:22.04
1012    steps:
1013      - run: echo hi
1014"#;
1015        let graph = parse(yaml);
1016        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1017        assert_eq!(images.len(), 1);
1018        assert_eq!(images[0].name, "ubuntu:22.04");
1019        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1020        assert_eq!(
1021            images[0].metadata.get(META_CONTAINER),
1022            Some(&"true".to_string())
1023        );
1024    }
1025
1026    #[test]
1027    fn container_digest_pinned_creates_image_node_third_party() {
1028        let yaml = r#"
1029jobs:
1030  build:
1031    container:
1032      image: "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
1033    steps:
1034      - run: echo hi
1035"#;
1036        let graph = parse(yaml);
1037        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1038        assert_eq!(images.len(), 1);
1039        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1040        assert_eq!(
1041            images[0].metadata.get(META_CONTAINER),
1042            Some(&"true".to_string())
1043        );
1044    }
1045
1046    #[test]
1047    fn oidc_permission_tags_identity_with_meta_oidc() {
1048        let yaml = r#"
1049permissions:
1050  id-token: write
1051  contents: read
1052jobs:
1053  ci:
1054    steps:
1055      - run: echo hi
1056"#;
1057        let graph = parse(yaml);
1058        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1059        assert_eq!(identities.len(), 1);
1060        assert_eq!(
1061            identities[0].metadata.get(META_OIDC),
1062            Some(&"true".to_string()),
1063            "id-token: write should mark identity as OIDC-capable"
1064        );
1065    }
1066
1067    #[test]
1068    fn non_oidc_permission_does_not_tag_meta_oidc() {
1069        let yaml = r#"
1070permissions:
1071  contents: read
1072jobs:
1073  ci:
1074    steps:
1075      - run: echo hi
1076"#;
1077        let graph = parse(yaml);
1078        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1079        assert_eq!(identities.len(), 1);
1080        assert!(
1081            !identities[0].metadata.contains_key(META_OIDC),
1082            "contents:read should not tag as OIDC"
1083        );
1084    }
1085
1086    #[test]
1087    fn contents_write_without_id_token_does_not_tag_oidc() {
1088        // Regression: "contents: write" contains "write" but not "id-token: write".
1089        // Should NOT be tagged as OIDC-capable.
1090        let yaml = r#"
1091permissions:
1092  contents: write
1093jobs:
1094  ci:
1095    steps:
1096      - run: echo hi
1097"#;
1098        let graph = parse(yaml);
1099        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1100        assert_eq!(identities.len(), 1);
1101        assert!(
1102            !identities[0].metadata.contains_key(META_OIDC),
1103            "contents:write without id-token must not be tagged OIDC"
1104        );
1105    }
1106
1107    #[test]
1108    fn write_all_permission_tags_identity_as_oidc() {
1109        // `permissions: write-all` grants every permission including id-token: write.
1110        let yaml = r#"
1111permissions: write-all
1112jobs:
1113  ci:
1114    steps:
1115      - run: echo hi
1116"#;
1117        let graph = parse(yaml);
1118        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1119        assert_eq!(identities.len(), 1);
1120        assert_eq!(
1121            identities[0].metadata.get(META_OIDC),
1122            Some(&"true".to_string()),
1123            "write-all grants all permissions including id-token: write"
1124        );
1125    }
1126
1127    #[test]
1128    fn container_steps_linked_to_container_image() {
1129        let yaml = r#"
1130jobs:
1131  build:
1132    container: ubuntu:22.04
1133    steps:
1134      - name: Step A
1135        run: echo "a"
1136      - name: Step B
1137        run: echo "b"
1138"#;
1139        let graph = parse(yaml);
1140        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1141        assert_eq!(images.len(), 1);
1142        let container_id = images[0].id;
1143
1144        // Both steps must have UsesImage edges to the container
1145        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1146        assert_eq!(steps.len(), 2);
1147        for step in &steps {
1148            let links: Vec<_> = graph
1149                .edges_from(step.id)
1150                .filter(|e| e.kind == EdgeKind::UsesImage && e.to == container_id)
1151                .collect();
1152            assert_eq!(
1153                links.len(),
1154                1,
1155                "step '{}' must link to container",
1156                step.name
1157            );
1158        }
1159    }
1160
1161    #[test]
1162    fn container_authority_propagates_to_floating_image() {
1163        // Integration: authority from a step running in a floating container should
1164        // propagate to the container Image node (Untrusted), generating a finding.
1165        let yaml = r#"
1166permissions: write-all
1167jobs:
1168  build:
1169    container: ubuntu:22.04
1170    steps:
1171      - run: echo hi
1172"#;
1173        use taudit_core::propagation::DEFAULT_MAX_HOPS;
1174        use taudit_core::rules;
1175        let graph = parse(yaml);
1176        let findings = rules::run_all_rules(&graph, DEFAULT_MAX_HOPS);
1177        // Should detect: GITHUB_TOKEN (broad) propagates to ubuntu:22.04 (Untrusted) via step
1178        assert!(
1179            findings
1180                .iter()
1181                .any(|f| f.category == taudit_core::finding::FindingCategory::AuthorityPropagation),
1182            "authority should propagate from step to floating container"
1183        );
1184    }
1185
1186    #[test]
1187    fn aws_oidc_creates_identity_node() {
1188        let yaml = r#"
1189jobs:
1190  deploy:
1191    steps:
1192      - name: Configure AWS credentials
1193        uses: aws-actions/configure-aws-credentials@v4
1194        with:
1195          role-to-assume: arn:aws:iam::123456789012:role/my-deploy-role
1196          aws-region: us-east-1
1197"#;
1198        let graph = parse(yaml);
1199        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1200        assert_eq!(identities.len(), 1);
1201        // ARN arn:aws:iam::123456789012:role/my-deploy-role → last '/' segment
1202        assert_eq!(identities[0].name, "AWS/my-deploy-role");
1203        assert_eq!(
1204            identities[0].metadata.get(META_OIDC),
1205            Some(&"true".to_string())
1206        );
1207        assert_eq!(
1208            identities[0].metadata.get(META_IDENTITY_SCOPE),
1209            Some(&"broad".to_string())
1210        );
1211    }
1212
1213    #[test]
1214    fn gcp_oidc_creates_identity_node() {
1215        let yaml = r#"
1216jobs:
1217  deploy:
1218    steps:
1219      - name: Authenticate to GCP
1220        uses: google-github-actions/auth@v2
1221        with:
1222          workload_identity_provider: projects/123/locations/global/workloadIdentityPools/my-pool/providers/my-provider
1223          service_account: my-sa@my-project.iam.gserviceaccount.com
1224"#;
1225        let graph = parse(yaml);
1226        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1227        assert_eq!(identities.len(), 1);
1228        assert!(identities[0].name.starts_with("GCP/"));
1229        assert_eq!(
1230            identities[0].metadata.get(META_OIDC),
1231            Some(&"true".to_string())
1232        );
1233    }
1234
1235    #[test]
1236    fn azure_oidc_creates_identity_node() {
1237        let yaml = r#"
1238jobs:
1239  deploy:
1240    steps:
1241      - name: Azure login
1242        uses: azure/login@v2
1243        with:
1244          client-id: ${{ vars.AZURE_CLIENT_ID }}
1245          tenant-id: ${{ vars.AZURE_TENANT_ID }}
1246          subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
1247"#;
1248        let graph = parse(yaml);
1249        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1250        assert_eq!(identities.len(), 1);
1251        assert!(identities[0].name.starts_with("Azure/"));
1252        assert_eq!(
1253            identities[0].metadata.get(META_OIDC),
1254            Some(&"true".to_string())
1255        );
1256    }
1257
1258    #[test]
1259    fn azure_static_sp_does_not_create_identity_node() {
1260        // When client-secret is present, it's a static service principal — not OIDC.
1261        // The secret scanning in with: handles this; classify_cloud_auth returns None.
1262        let yaml = r#"
1263jobs:
1264  deploy:
1265    steps:
1266      - name: Azure login
1267        uses: azure/login@v2
1268        with:
1269          client-id: my-client-id
1270          client-secret: ${{ secrets.AZURE_CLIENT_SECRET }}
1271          tenant-id: my-tenant
1272"#;
1273        let graph = parse(yaml);
1274        // Identity node should NOT be created by cloud auth inference
1275        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1276        assert!(
1277            identities.is_empty(),
1278            "static SP should not create an OIDC Identity node"
1279        );
1280        // But the secret SHOULD be captured by existing with: scanning
1281        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1282        assert_eq!(secrets.len(), 1);
1283        assert_eq!(secrets[0].name, "AZURE_CLIENT_SECRET");
1284    }
1285
1286    #[test]
1287    fn aws_static_creds_do_not_create_identity_node() {
1288        // Static access key path — no role-to-assume, so classify_cloud_auth returns None.
1289        // The access key secret is captured by with: scanning.
1290        let yaml = r#"
1291jobs:
1292  deploy:
1293    steps:
1294      - uses: aws-actions/configure-aws-credentials@v4
1295        with:
1296          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
1297          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
1298          aws-region: us-east-1
1299"#;
1300        let graph = parse(yaml);
1301        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1302        assert!(
1303            identities.is_empty(),
1304            "static AWS creds must not create Identity node"
1305        );
1306        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1307        assert_eq!(secrets.len(), 2, "both static secrets captured");
1308    }
1309
1310    #[test]
1311    fn pull_request_target_sets_meta_trigger_on_graph() {
1312        let yaml = r#"
1313on: pull_request_target
1314jobs:
1315  ci:
1316    steps:
1317      - run: echo hi
1318"#;
1319        let graph = parse(yaml);
1320        assert_eq!(
1321            graph.metadata.get(META_TRIGGER),
1322            Some(&"pull_request_target".to_string())
1323        );
1324    }
1325
1326    #[test]
1327    fn github_env_write_in_run_sets_meta_writes_env_gate() {
1328        let yaml = r#"
1329jobs:
1330  build:
1331    steps:
1332      - name: Set version
1333        run: echo "VERSION=1.0" >> $GITHUB_ENV
1334"#;
1335        let graph = parse(yaml);
1336        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1337        assert_eq!(steps.len(), 1);
1338        assert_eq!(
1339            steps[0].metadata.get(META_WRITES_ENV_GATE),
1340            Some(&"true".to_string()),
1341            "run: with >> $GITHUB_ENV must mark META_WRITES_ENV_GATE"
1342        );
1343    }
1344
1345    #[test]
1346    fn attest_action_sets_meta_attests() {
1347        let yaml = r#"
1348jobs:
1349  release:
1350    steps:
1351      - name: Attest
1352        uses: actions/attest-build-provenance@v1
1353        with:
1354          subject-path: dist/*
1355"#;
1356        let graph = parse(yaml);
1357        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1358        assert_eq!(steps.len(), 1);
1359        assert_eq!(
1360            steps[0].metadata.get(META_ATTESTS),
1361            Some(&"true".to_string())
1362        );
1363    }
1364
1365    #[test]
1366    fn self_hosted_string_runs_on_creates_image_with_self_hosted_metadata() {
1367        let yaml = r#"
1368jobs:
1369  build:
1370    runs-on: self-hosted
1371    steps:
1372      - run: echo hi
1373"#;
1374        let graph = parse(yaml);
1375        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1376        let runner = images
1377            .iter()
1378            .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
1379            .expect("self-hosted runner Image node must be created");
1380        assert_eq!(
1381            runner.metadata.get(META_SELF_HOSTED),
1382            Some(&"true".to_string())
1383        );
1384    }
1385
1386    #[test]
1387    fn self_hosted_in_sequence_runs_on_creates_image_with_self_hosted_metadata() {
1388        let yaml = r#"
1389jobs:
1390  build:
1391    runs-on: [self-hosted, linux, x64]
1392    steps:
1393      - run: echo hi
1394"#;
1395        let graph = parse(yaml);
1396        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1397        let runner = images
1398            .iter()
1399            .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
1400            .expect("self-hosted runner Image node must be created");
1401        assert_eq!(
1402            runner.metadata.get(META_SELF_HOSTED),
1403            Some(&"true".to_string())
1404        );
1405    }
1406
1407    #[test]
1408    fn hosted_runner_does_not_create_self_hosted_image() {
1409        let yaml = r#"
1410jobs:
1411  build:
1412    runs-on: ubuntu-latest
1413    steps:
1414      - run: echo hi
1415"#;
1416        let graph = parse(yaml);
1417        let self_hosted_images: Vec<_> = graph
1418            .nodes_of_kind(NodeKind::Image)
1419            .filter(|i| i.metadata.contains_key(META_SELF_HOSTED))
1420            .collect();
1421        assert!(
1422            self_hosted_images.is_empty(),
1423            "hosted runner must not produce a self-hosted Image node"
1424        );
1425    }
1426
1427    #[test]
1428    fn actions_checkout_step_tagged_with_meta_checkout_self() {
1429        let yaml = r#"
1430jobs:
1431  ci:
1432    steps:
1433      - uses: actions/checkout@v4
1434      - run: echo hi
1435"#;
1436        let graph = parse(yaml);
1437        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1438        let checkout_step = steps
1439            .iter()
1440            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
1441            .expect("actions/checkout step must be tagged META_CHECKOUT_SELF");
1442        assert_eq!(
1443            checkout_step.metadata.get(META_CHECKOUT_SELF),
1444            Some(&"true".to_string())
1445        );
1446    }
1447
1448    #[test]
1449    fn actions_checkout_sha_pinned_also_tagged() {
1450        let yaml = r#"
1451jobs:
1452  ci:
1453    steps:
1454      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
1455"#;
1456        let graph = parse(yaml);
1457        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1458        assert_eq!(steps.len(), 1);
1459        assert_eq!(
1460            steps[0].metadata.get(META_CHECKOUT_SELF),
1461            Some(&"true".to_string()),
1462            "SHA-pinned checkout must still be tagged — rule gates on trigger context"
1463        );
1464    }
1465
1466    #[test]
1467    fn non_checkout_uses_not_tagged_checkout_self() {
1468        let yaml = r#"
1469jobs:
1470  ci:
1471    steps:
1472      - uses: some-org/other-action@v1
1473"#;
1474        let graph = parse(yaml);
1475        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1476        assert_eq!(steps.len(), 1);
1477        assert!(
1478            !steps[0].metadata.contains_key(META_CHECKOUT_SELF),
1479            "non-checkout uses: must not be tagged"
1480        );
1481    }
1482
1483    #[test]
1484    fn workflow_level_permissions_create_identity() {
1485        let yaml = r#"
1486permissions: write-all
1487jobs:
1488  ci:
1489    steps:
1490      - run: echo hi
1491"#;
1492        let graph = parse(yaml);
1493        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1494        assert_eq!(identities.len(), 1);
1495        assert_eq!(identities[0].name, "GITHUB_TOKEN");
1496        assert_eq!(
1497            identities[0].metadata.get(META_PERMISSIONS).unwrap(),
1498            "write-all"
1499        );
1500    }
1501}