Skip to main content

taudit_parse_gha/
lib.rs

1use std::collections::HashMap;
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Metadata key for marking inferred (not precisely mapped) secret references.
9const META_INFERRED_VAL: &str = "true";
10
11/// GitHub Actions workflow parser.
12pub struct GhaParser;
13
14impl PipelineParser for GhaParser {
15    fn platform(&self) -> &str {
16        "github-actions"
17    }
18
19    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
20        let mut de = serde_yaml::Deserializer::from_str(content);
21        let doc = de
22            .next()
23            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
24        let workflow: GhaWorkflow = GhaWorkflow::deserialize(doc)
25            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
26        let extra_docs = de.next().is_some();
27
28        let mut graph = AuthorityGraph::new(source.clone());
29        if extra_docs {
30            graph.mark_partial(
31                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
32            );
33        }
34        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
35
36        let is_pull_request_target = workflow
37            .triggers
38            .as_ref()
39            .map(trigger_has_pull_request_target)
40            .unwrap_or(false);
41
42        // Workflow-level permissions -> GITHUB_TOKEN identity node
43        let token_id = if let Some(ref perms) = workflow.permissions {
44            let perm_string = perms.to_string();
45            let scope = IdentityScope::from_permissions(&perm_string);
46            let mut meta = HashMap::new();
47            meta.insert(META_PERMISSIONS.into(), perm_string.clone());
48            meta.insert(
49                META_IDENTITY_SCOPE.into(),
50                format!("{scope:?}").to_lowercase(),
51            );
52            // OIDC: id-token: write → token is OIDC-capable (federated scope).
53            // Check the formatted substring directly — Permissions::Map fmt produces
54            // "id-token: write" so this won't false-positive on "contents: write".
55            if perm_string.contains("id-token: write") || perm_string == "write-all" {
56                meta.insert(META_OIDC.into(), "true".into());
57            }
58            Some(graph.add_node_with_metadata(
59                NodeKind::Identity,
60                "GITHUB_TOKEN",
61                TrustZone::FirstParty,
62                meta,
63            ))
64        } else {
65            None
66        };
67
68        for (job_name, job) in &workflow.jobs {
69            // Job-level permissions override workflow-level
70            let job_token_id = if let Some(ref perms) = job.permissions {
71                let perm_string = perms.to_string();
72                let scope = IdentityScope::from_permissions(&perm_string);
73                let mut meta = HashMap::new();
74                meta.insert(META_PERMISSIONS.into(), perm_string.clone());
75                meta.insert(
76                    META_IDENTITY_SCOPE.into(),
77                    format!("{scope:?}").to_lowercase(),
78                );
79                if perm_string.contains("id-token: write") {
80                    meta.insert(META_OIDC.into(), "true".into());
81                }
82                Some(graph.add_node_with_metadata(
83                    NodeKind::Identity,
84                    format!("GITHUB_TOKEN ({job_name})"),
85                    TrustZone::FirstParty,
86                    meta,
87                ))
88            } else {
89                token_id
90            };
91
92            // Reusable workflow: job.uses= means this job delegates to another workflow.
93            // We cannot resolve it inline — mark the graph partial and skip steps.
94            if let Some(ref uses) = job.uses {
95                let trust_zone = if is_sha_pinned(uses) {
96                    TrustZone::ThirdParty
97                } else {
98                    TrustZone::Untrusted
99                };
100                let rw_id = graph.add_node(NodeKind::Image, uses, trust_zone);
101                // Synthetic step represents this job delegating to the called workflow
102                let job_step_id = graph.add_node(NodeKind::Step, job_name, TrustZone::FirstParty);
103                graph.add_edge(job_step_id, rw_id, EdgeKind::DelegatesTo);
104                if let Some(tok_id) = job_token_id {
105                    graph.add_edge(job_step_id, tok_id, EdgeKind::HasAccessTo);
106                }
107                graph.mark_partial(format!(
108                    "reusable workflow '{uses}' in job '{job_name}' cannot be resolved inline — authority within the called workflow is unknown"
109                ));
110                continue;
111            }
112
113            // Matrix strategy: authority shape may differ per matrix entry — mark Partial
114            if job
115                .strategy
116                .as_ref()
117                .and_then(|s| s.get("matrix"))
118                .is_some()
119            {
120                graph.mark_partial(format!(
121                    "job '{job_name}' uses matrix strategy — authority shape may differ per matrix entry"
122                ));
123            }
124
125            // Container: job-level container image — add as Image node and capture ID
126            // so each step in this job can be linked to it via UsesImage.
127            let container_image_id: Option<NodeId> = if let Some(ref container) = job.container {
128                let image_str = container.image();
129                let pinned = is_docker_digest_pinned(image_str);
130                let trust_zone = if pinned {
131                    TrustZone::ThirdParty
132                } else {
133                    TrustZone::Untrusted
134                };
135                let mut meta = HashMap::new();
136                meta.insert(META_CONTAINER.into(), "true".into());
137                if pinned {
138                    if let Some(digest) = image_str.split("@sha256:").nth(1) {
139                        meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
140                    }
141                }
142                Some(graph.add_node_with_metadata(NodeKind::Image, image_str, trust_zone, meta))
143            } else {
144                None
145            };
146
147            for (step_idx, step) in job.steps.iter().enumerate() {
148                let default_name = format!("{job_name}[{step_idx}]");
149                let step_name = step.name.as_deref().unwrap_or(&default_name);
150
151                // Determine trust zone and create image node if `uses:` present
152                let (trust_zone, image_node_id) = if let Some(ref uses) = step.uses {
153                    let (zone, image_id) = classify_action(uses, &mut graph);
154                    (zone, Some(image_id))
155                } else if is_pull_request_target {
156                    // run: step in a pull_request_target workflow — may execute fork code
157                    (TrustZone::Untrusted, None)
158                } else {
159                    // Inline `run:` step — first party
160                    (TrustZone::FirstParty, None)
161                };
162
163                let step_id = graph.add_node(NodeKind::Step, step_name, trust_zone);
164
165                // Link step to action image
166                if let Some(img_id) = image_node_id {
167                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
168                }
169
170                // Link step to job container — steps run inside the container's execution
171                // environment, so a floating container is a supply chain risk for every step.
172                if let Some(img_id) = container_image_id {
173                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
174                }
175
176                // Link step to GITHUB_TOKEN if it exists
177                if let Some(tok_id) = job_token_id {
178                    graph.add_edge(step_id, tok_id, EdgeKind::HasAccessTo);
179                }
180
181                // Cloud identity inference: detect known OIDC cloud auth actions and
182                // create an Identity node representing the assumed cloud identity.
183                if let Some(ref uses) = step.uses {
184                    if let Some(cloud_id) =
185                        classify_cloud_auth(uses, step.with.as_ref(), &mut graph)
186                    {
187                        graph.add_edge(step_id, cloud_id, EdgeKind::HasAccessTo);
188                    }
189                }
190
191                // Process secrets from workflow-level `env:` (inherited by all jobs/steps)
192                if let Some(ref env) = workflow.env {
193                    for env_val in env.values() {
194                        if is_secret_reference(env_val) {
195                            let secret_name = extract_secret_name(env_val);
196                            let secret_id =
197                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
198                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
199                        }
200                    }
201                }
202
203                // Process secrets from job-level `env:` (inherited by all steps)
204                if let Some(ref env) = job.env {
205                    for env_val in env.values() {
206                        if is_secret_reference(env_val) {
207                            let secret_name = extract_secret_name(env_val);
208                            let secret_id =
209                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
210                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
211                        }
212                    }
213                }
214
215                // Process secrets from step-level `env:` block
216                if let Some(ref env) = step.env {
217                    for env_val in env.values() {
218                        if is_secret_reference(env_val) {
219                            let secret_name = extract_secret_name(env_val);
220                            let secret_id =
221                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
222                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
223                        }
224                    }
225                }
226
227                // Process secrets from `with:` block
228                if let Some(ref with) = step.with {
229                    for val in with.values() {
230                        if is_secret_reference(val) {
231                            let secret_name = extract_secret_name(val);
232                            let secret_id =
233                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
234                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
235                        }
236                    }
237                }
238
239                // Detect inferred secrets in `run:` script blocks
240                if let Some(ref run) = step.run {
241                    if run.contains("${{ secrets.") {
242                        // Extract secret names from the shell script
243                        let mut pos = 0;
244                        while let Some(start) = run[pos..].find("secrets.") {
245                            let abs_start = pos + start + 8;
246                            let remaining = &run[abs_start..];
247                            let end = remaining
248                                .find(|c: char| !c.is_alphanumeric() && c != '_')
249                                .unwrap_or(remaining.len());
250                            let secret_name = &remaining[..end];
251                            if !secret_name.is_empty() {
252                                let secret_id =
253                                    find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
254                                // Mark as inferred — not precisely mapped
255                                if let Some(node) = graph.nodes.get_mut(secret_id) {
256                                    node.metadata
257                                        .insert(META_INFERRED.into(), META_INFERRED_VAL.into());
258                                }
259                                graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
260                                graph.mark_partial(format!(
261                                    "secret '{secret_name}' referenced in run: script — inferred, not precisely mapped"
262                                ));
263                            }
264                            pos = abs_start + end;
265                        }
266                    }
267                }
268            }
269        }
270
271        Ok(graph)
272    }
273}
274
275/// Returns true if the workflow's `on:` triggers include `pull_request_target`.
276/// GHA `on:` is polymorphic: string, sequence, or mapping.
277fn trigger_has_pull_request_target(triggers: &serde_yaml::Value) -> bool {
278    const PRT: &str = "pull_request_target";
279    match triggers {
280        serde_yaml::Value::String(s) => s == PRT,
281        serde_yaml::Value::Sequence(seq) => seq
282            .iter()
283            .any(|v| v.as_str().map(|s| s == PRT).unwrap_or(false)),
284        serde_yaml::Value::Mapping(map) => map
285            .iter()
286            .any(|(k, _)| k.as_str().map(|s| s == PRT).unwrap_or(false)),
287        _ => false,
288    }
289}
290
291/// Classify a `uses:` reference into trust zone and create image node.
292fn classify_action(uses: &str, graph: &mut AuthorityGraph) -> (TrustZone, NodeId) {
293    let pinned = is_sha_pinned(uses);
294    let is_local = uses.starts_with("./");
295
296    let zone = if is_local {
297        TrustZone::FirstParty
298    } else if pinned {
299        TrustZone::ThirdParty
300    } else {
301        TrustZone::Untrusted
302    };
303
304    let mut meta = HashMap::new();
305    if pinned {
306        if let Some(sha) = uses.split('@').next_back() {
307            meta.insert(META_DIGEST.into(), sha.into());
308        }
309    }
310
311    let id = graph.add_node_with_metadata(NodeKind::Image, uses, zone, meta);
312    (zone, id)
313}
314
315fn is_secret_reference(val: &str) -> bool {
316    val.contains("${{ secrets.")
317}
318
319fn extract_secret_name(val: &str) -> String {
320    // Extract from patterns like "${{ secrets.MY_SECRET }}"
321    if let Some(start) = val.find("secrets.") {
322        let after = &val[start + 8..];
323        let end = after
324            .find(|c: char| !c.is_alphanumeric() && c != '_')
325            .unwrap_or(after.len());
326        after[..end].to_string()
327    } else {
328        val.to_string()
329    }
330}
331
332fn find_or_create_secret(
333    graph: &mut AuthorityGraph,
334    cache: &mut HashMap<String, NodeId>,
335    name: &str,
336) -> NodeId {
337    if let Some(&id) = cache.get(name) {
338        return id;
339    }
340    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
341    cache.insert(name.to_string(), id);
342    id
343}
344
345/// Detect known OIDC cloud authentication actions and create an Identity node
346/// representing the cloud identity that will be assumed.
347///
348/// Only handles the OIDC/federated path — static credential inputs (e.g.
349/// `aws-secret-access-key: ${{ secrets.X }}`) are already captured by the
350/// regular `with:` secret scanning and don't need a separate Identity node.
351///
352/// Returns `Some(NodeId)` of the created Identity, or `None` if not recognized.
353fn classify_cloud_auth(
354    uses: &str,
355    with: Option<&HashMap<String, String>>,
356    graph: &mut AuthorityGraph,
357) -> Option<NodeId> {
358    // Strip `@version` — match any version of the action
359    let action = uses.split('@').next().unwrap_or(uses);
360
361    match action {
362        "aws-actions/configure-aws-credentials" => {
363            // OIDC path: role-to-assume present (no static access key needed)
364            let w = with?;
365            let role = w.get("role-to-assume")?;
366            // ARN format: arn:aws:iam::123456789012:role/my-role
367            // Split on '/' to get the role name; fall back to the full value.
368            let short = role.split('/').next_back().unwrap_or(role.as_str());
369            let mut meta = HashMap::new();
370            meta.insert(META_OIDC.into(), "true".into());
371            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
372            meta.insert(META_PERMISSIONS.into(), "AWS role assumption (OIDC)".into());
373            Some(graph.add_node_with_metadata(
374                NodeKind::Identity,
375                format!("AWS/{short}"),
376                TrustZone::FirstParty,
377                meta,
378            ))
379        }
380        "google-github-actions/auth" => {
381            // OIDC path: workload_identity_provider present
382            let w = with?;
383            let provider = w.get("workload_identity_provider")?;
384            let short = provider.split('/').next_back().unwrap_or(provider.as_str());
385            let mut meta = HashMap::new();
386            meta.insert(META_OIDC.into(), "true".into());
387            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
388            meta.insert(
389                META_PERMISSIONS.into(),
390                "GCP workload identity federation".into(),
391            );
392            Some(graph.add_node_with_metadata(
393                NodeKind::Identity,
394                format!("GCP/{short}"),
395                TrustZone::FirstParty,
396                meta,
397            ))
398        }
399        "azure/login" => {
400            // OIDC path: client-id present without client-secret
401            let w = with?;
402            let client_id = w.get("client-id")?;
403            // Only treat as OIDC if no static client-secret is provided
404            if w.contains_key("client-secret") {
405                return None; // static SP creds captured by with: secret scanning
406            }
407            let mut meta = HashMap::new();
408            meta.insert(META_OIDC.into(), "true".into());
409            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
410            meta.insert(
411                META_PERMISSIONS.into(),
412                "Azure federated credential (OIDC)".into(),
413            );
414            Some(graph.add_node_with_metadata(
415                NodeKind::Identity,
416                format!("Azure/{client_id}"),
417                TrustZone::FirstParty,
418                meta,
419            ))
420        }
421        _ => None,
422    }
423}
424
425// ── Serde models for GHA YAML ──────────────────────────
426
427/// Flexible permissions: can be a string ("write-all") or a map.
428#[derive(Debug, Clone, Deserialize)]
429#[serde(untagged)]
430pub enum Permissions {
431    String(String),
432    Map(HashMap<String, String>),
433}
434
435impl std::fmt::Display for Permissions {
436    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
437        match self {
438            Permissions::String(s) => write!(f, "{s}"),
439            Permissions::Map(m) => {
440                let parts: Vec<String> = m.iter().map(|(k, v)| format!("{k}: {v}")).collect();
441                write!(f, "{{ {} }}", parts.join(", "))
442            }
443        }
444    }
445}
446
447#[derive(Debug, Deserialize)]
448pub struct GhaWorkflow {
449    /// Workflow trigger(s). Polymorphic: string, sequence, or mapping.
450    #[serde(rename = "on", default)]
451    pub triggers: Option<serde_yaml::Value>,
452    #[serde(default)]
453    pub permissions: Option<Permissions>,
454    /// Workflow-level env vars, inherited by all jobs and steps.
455    #[serde(default)]
456    pub env: Option<HashMap<String, String>>,
457    #[serde(default)]
458    pub jobs: HashMap<String, GhaJob>,
459}
460
461/// Job-level container config. Polymorphic: string image or map with `image:` key.
462#[derive(Debug, Deserialize)]
463#[serde(untagged)]
464pub enum ContainerConfig {
465    Image(String),
466    Full { image: String },
467}
468
469impl ContainerConfig {
470    pub fn image(&self) -> &str {
471        match self {
472            ContainerConfig::Image(s) => s,
473            ContainerConfig::Full { image } => image,
474        }
475    }
476}
477
478#[derive(Debug, Deserialize)]
479pub struct GhaJob {
480    #[serde(default)]
481    pub permissions: Option<Permissions>,
482    #[serde(default)]
483    pub env: Option<HashMap<String, String>>,
484    #[serde(default)]
485    pub steps: Vec<GhaStep>,
486    /// Reusable workflow reference — `uses: owner/repo/.github/workflows/foo.yml@ref`
487    #[serde(default)]
488    pub uses: Option<String>,
489    /// Job container image.
490    #[serde(default)]
491    pub container: Option<ContainerConfig>,
492    /// Matrix/strategy configuration. When a matrix is present, the authority
493    /// shape may differ per matrix entry — graph is marked Partial.
494    #[serde(default)]
495    pub strategy: Option<serde_yaml::Value>,
496}
497
498#[derive(Debug, Deserialize)]
499pub struct GhaStep {
500    pub name: Option<String>,
501    pub uses: Option<String>,
502    pub run: Option<String>,
503    #[serde(default)]
504    pub env: Option<HashMap<String, String>>,
505    #[serde(rename = "with", default)]
506    pub with: Option<HashMap<String, String>>,
507}
508
509#[cfg(test)]
510mod tests {
511    use super::*;
512
513    fn parse(yaml: &str) -> AuthorityGraph {
514        let parser = GhaParser;
515        let source = PipelineSource {
516            file: "test.yml".into(),
517            repo: None,
518            git_ref: None,
519        };
520        parser.parse(yaml, &source).unwrap()
521    }
522
523    #[test]
524    fn parses_simple_workflow() {
525        let yaml = r#"
526permissions: write-all
527jobs:
528  build:
529    steps:
530      - name: Checkout
531        uses: actions/checkout@v4
532      - name: Build
533        run: make build
534"#;
535        let graph = parse(yaml);
536        assert!(graph.nodes.len() >= 3); // GITHUB_TOKEN + 2 steps + 1 image
537    }
538
539    #[test]
540    fn detects_secret_in_env() {
541        let yaml = r#"
542jobs:
543  deploy:
544    steps:
545      - name: Deploy
546        run: ./deploy.sh
547        env:
548          AWS_KEY: "${{ secrets.AWS_ACCESS_KEY_ID }}"
549"#;
550        let graph = parse(yaml);
551        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
552        assert_eq!(secrets.len(), 1);
553        assert_eq!(secrets[0].name, "AWS_ACCESS_KEY_ID");
554    }
555
556    #[test]
557    fn classifies_unpinned_action_as_untrusted() {
558        let yaml = r#"
559jobs:
560  ci:
561    steps:
562      - uses: actions/checkout@v4
563"#;
564        let graph = parse(yaml);
565        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
566        assert_eq!(images.len(), 1);
567        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
568    }
569
570    #[test]
571    fn classifies_sha_pinned_action_as_third_party() {
572        let yaml = r#"
573jobs:
574  ci:
575    steps:
576      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
577"#;
578        let graph = parse(yaml);
579        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
580        assert_eq!(images.len(), 1);
581        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
582    }
583
584    #[test]
585    fn classifies_local_action_as_first_party() {
586        let yaml = r#"
587jobs:
588  ci:
589    steps:
590      - uses: ./.github/actions/my-action
591"#;
592        let graph = parse(yaml);
593        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
594        assert_eq!(images.len(), 1);
595        assert_eq!(images[0].trust_zone, TrustZone::FirstParty);
596    }
597
598    #[test]
599    fn detects_secret_in_with() {
600        let yaml = r#"
601jobs:
602  deploy:
603    steps:
604      - name: Publish
605        uses: some-org/publish@v1
606        with:
607          token: "${{ secrets.NPM_TOKEN }}"
608"#;
609        let graph = parse(yaml);
610        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
611        assert_eq!(secrets.len(), 1);
612        assert_eq!(secrets[0].name, "NPM_TOKEN");
613    }
614
615    #[test]
616    fn inferred_secret_in_run_block_detected() {
617        let yaml = r#"
618jobs:
619  deploy:
620    steps:
621      - name: Deploy
622        run: |
623          curl -H "Authorization: ${{ secrets.API_TOKEN }}" https://api.example.com
624"#;
625        let graph = parse(yaml);
626        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
627        assert_eq!(secrets.len(), 1);
628        assert_eq!(secrets[0].name, "API_TOKEN");
629        assert_eq!(
630            secrets[0].metadata.get(META_INFERRED),
631            Some(&"true".to_string())
632        );
633        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
634        assert!(!graph.completeness_gaps.is_empty());
635    }
636
637    #[test]
638    fn job_level_env_inherited_by_steps() {
639        let yaml = r#"
640jobs:
641  build:
642    env:
643      DB_PASSWORD: "${{ secrets.DB_PASSWORD }}"
644    steps:
645      - name: Step A
646        run: echo "a"
647      - name: Step B
648        run: echo "b"
649"#;
650        let graph = parse(yaml);
651        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
652        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
653
654        // Both steps should have access to the secret
655        let secret_id = secrets[0].id;
656        let accessing_steps = graph
657            .edges_to(secret_id)
658            .filter(|e| e.kind == EdgeKind::HasAccessTo)
659            .count();
660        assert_eq!(accessing_steps, 2, "both steps inherit job-level env");
661    }
662
663    #[test]
664    fn identity_scope_set_on_token() {
665        let yaml = r#"
666permissions: write-all
667jobs:
668  ci:
669    steps:
670      - run: echo hi
671"#;
672        let graph = parse(yaml);
673        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
674        assert_eq!(identities.len(), 1);
675        assert_eq!(
676            identities[0].metadata.get(META_IDENTITY_SCOPE),
677            Some(&"broad".to_string())
678        );
679    }
680
681    #[test]
682    fn constrained_identity_scope() {
683        let yaml = r#"
684permissions:
685  contents: read
686jobs:
687  ci:
688    steps:
689      - run: echo hi
690"#;
691        let graph = parse(yaml);
692        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
693        assert_eq!(identities.len(), 1);
694        assert_eq!(
695            identities[0].metadata.get(META_IDENTITY_SCOPE),
696            Some(&"constrained".to_string())
697        );
698    }
699
700    #[test]
701    fn pull_request_target_string_trigger_marks_run_steps_untrusted() {
702        let yaml = r#"
703on: pull_request_target
704jobs:
705  check:
706    steps:
707      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
708        with:
709          ref: ${{ github.event.pull_request.head.sha }}
710      - run: npm test
711"#;
712        let graph = parse(yaml);
713        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
714        assert_eq!(steps.len(), 2);
715
716        // run: step should be Untrusted (might execute fork code)
717        let run_step = steps.iter().find(|s| s.name.contains("check[1]")).unwrap();
718        assert_eq!(
719            run_step.trust_zone,
720            TrustZone::Untrusted,
721            "run: step in pull_request_target workflow should be Untrusted"
722        );
723
724        // uses: step keeps its own trust zone (SHA-pinned = ThirdParty)
725        let checkout_step = steps.iter().find(|s| s.name.contains("check[0]")).unwrap();
726        assert_eq!(checkout_step.trust_zone, TrustZone::ThirdParty);
727    }
728
729    #[test]
730    fn pull_request_target_sequence_trigger_marks_run_steps_untrusted() {
731        let yaml = r#"
732on: [push, pull_request_target]
733jobs:
734  ci:
735    steps:
736      - run: echo hi
737"#;
738        let graph = parse(yaml);
739        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
740        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
741    }
742
743    #[test]
744    fn pull_request_target_mapping_trigger_marks_run_steps_untrusted() {
745        let yaml = r#"
746on:
747  pull_request_target:
748    types: [opened, synchronize]
749jobs:
750  ci:
751    steps:
752      - run: echo hi
753"#;
754        let graph = parse(yaml);
755        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
756        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
757    }
758
759    #[test]
760    fn push_trigger_does_not_mark_run_steps_untrusted() {
761        let yaml = r#"
762on: push
763jobs:
764  ci:
765    steps:
766      - run: echo hi
767"#;
768        let graph = parse(yaml);
769        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
770        assert_eq!(
771            steps[0].trust_zone,
772            TrustZone::FirstParty,
773            "push-triggered run: steps should remain FirstParty"
774        );
775    }
776
777    #[test]
778    fn workflow_level_env_inherited_by_all_steps() {
779        let yaml = r#"
780env:
781  DB_URL: "${{ secrets.DATABASE_URL }}"
782jobs:
783  build:
784    steps:
785      - name: Step A
786        run: echo "a"
787  test:
788    steps:
789      - name: Step B
790        run: echo "b"
791"#;
792        let graph = parse(yaml);
793        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
794        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
795
796        // Both steps in both jobs should inherit the workflow-level secret
797        let secret_id = secrets[0].id;
798        let accessing_steps = graph
799            .edges_to(secret_id)
800            .filter(|e| e.kind == EdgeKind::HasAccessTo)
801            .count();
802        assert_eq!(accessing_steps, 2, "both steps inherit workflow-level env");
803    }
804
805    #[test]
806    fn matrix_strategy_marks_graph_partial() {
807        let yaml = r#"
808jobs:
809  test:
810    strategy:
811      matrix:
812        os: [ubuntu-latest, windows-latest, macos-latest]
813    steps:
814      - run: echo hi
815"#;
816        let graph = parse(yaml);
817        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
818        assert!(
819            graph.completeness_gaps.iter().any(|g| g.contains("matrix")),
820            "matrix strategy should be recorded as a completeness gap"
821        );
822    }
823
824    #[test]
825    fn job_without_matrix_does_not_mark_partial() {
826        let yaml = r#"
827jobs:
828  build:
829    steps:
830      - run: cargo build
831"#;
832        let graph = parse(yaml);
833        assert_eq!(graph.completeness, AuthorityCompleteness::Complete);
834    }
835
836    #[test]
837    fn reusable_workflow_creates_image_and_marks_partial() {
838        let yaml = r#"
839jobs:
840  call:
841    uses: org/repo/.github/workflows/deploy.yml@main
842"#;
843        let graph = parse(yaml);
844        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
845        assert_eq!(images.len(), 1);
846        assert_eq!(images[0].name, "org/repo/.github/workflows/deploy.yml@main");
847        assert_eq!(images[0].trust_zone, TrustZone::Untrusted); // not SHA-pinned
848
849        // Step node representing the job delegation
850        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
851        assert_eq!(steps.len(), 1);
852        assert_eq!(steps[0].name, "call");
853
854        // DelegatesTo edge from step to reusable workflow image
855        let delegates: Vec<_> = graph
856            .edges_from(steps[0].id)
857            .filter(|e| e.kind == EdgeKind::DelegatesTo)
858            .collect();
859        assert_eq!(delegates.len(), 1);
860
861        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
862    }
863
864    #[test]
865    fn reusable_workflow_sha_pinned_is_third_party() {
866        let yaml = r#"
867jobs:
868  call:
869    uses: org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29
870"#;
871        let graph = parse(yaml);
872        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
873        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
874    }
875
876    #[test]
877    fn container_unpinned_creates_image_node_untrusted() {
878        let yaml = r#"
879jobs:
880  build:
881    container: ubuntu:22.04
882    steps:
883      - run: echo hi
884"#;
885        let graph = parse(yaml);
886        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
887        assert_eq!(images.len(), 1);
888        assert_eq!(images[0].name, "ubuntu:22.04");
889        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
890        assert_eq!(
891            images[0].metadata.get(META_CONTAINER),
892            Some(&"true".to_string())
893        );
894    }
895
896    #[test]
897    fn container_digest_pinned_creates_image_node_third_party() {
898        let yaml = r#"
899jobs:
900  build:
901    container:
902      image: "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
903    steps:
904      - run: echo hi
905"#;
906        let graph = parse(yaml);
907        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
908        assert_eq!(images.len(), 1);
909        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
910        assert_eq!(
911            images[0].metadata.get(META_CONTAINER),
912            Some(&"true".to_string())
913        );
914    }
915
916    #[test]
917    fn oidc_permission_tags_identity_with_meta_oidc() {
918        let yaml = r#"
919permissions:
920  id-token: write
921  contents: read
922jobs:
923  ci:
924    steps:
925      - run: echo hi
926"#;
927        let graph = parse(yaml);
928        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
929        assert_eq!(identities.len(), 1);
930        assert_eq!(
931            identities[0].metadata.get(META_OIDC),
932            Some(&"true".to_string()),
933            "id-token: write should mark identity as OIDC-capable"
934        );
935    }
936
937    #[test]
938    fn non_oidc_permission_does_not_tag_meta_oidc() {
939        let yaml = r#"
940permissions:
941  contents: read
942jobs:
943  ci:
944    steps:
945      - run: echo hi
946"#;
947        let graph = parse(yaml);
948        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
949        assert_eq!(identities.len(), 1);
950        assert!(
951            !identities[0].metadata.contains_key(META_OIDC),
952            "contents:read should not tag as OIDC"
953        );
954    }
955
956    #[test]
957    fn contents_write_without_id_token_does_not_tag_oidc() {
958        // Regression: "contents: write" contains "write" but not "id-token: write".
959        // Should NOT be tagged as OIDC-capable.
960        let yaml = r#"
961permissions:
962  contents: write
963jobs:
964  ci:
965    steps:
966      - run: echo hi
967"#;
968        let graph = parse(yaml);
969        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
970        assert_eq!(identities.len(), 1);
971        assert!(
972            !identities[0].metadata.contains_key(META_OIDC),
973            "contents:write without id-token must not be tagged OIDC"
974        );
975    }
976
977    #[test]
978    fn write_all_permission_tags_identity_as_oidc() {
979        // `permissions: write-all` grants every permission including id-token: write.
980        let yaml = r#"
981permissions: write-all
982jobs:
983  ci:
984    steps:
985      - run: echo hi
986"#;
987        let graph = parse(yaml);
988        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
989        assert_eq!(identities.len(), 1);
990        assert_eq!(
991            identities[0].metadata.get(META_OIDC),
992            Some(&"true".to_string()),
993            "write-all grants all permissions including id-token: write"
994        );
995    }
996
997    #[test]
998    fn container_steps_linked_to_container_image() {
999        let yaml = r#"
1000jobs:
1001  build:
1002    container: ubuntu:22.04
1003    steps:
1004      - name: Step A
1005        run: echo "a"
1006      - name: Step B
1007        run: echo "b"
1008"#;
1009        let graph = parse(yaml);
1010        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1011        assert_eq!(images.len(), 1);
1012        let container_id = images[0].id;
1013
1014        // Both steps must have UsesImage edges to the container
1015        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1016        assert_eq!(steps.len(), 2);
1017        for step in &steps {
1018            let links: Vec<_> = graph
1019                .edges_from(step.id)
1020                .filter(|e| e.kind == EdgeKind::UsesImage && e.to == container_id)
1021                .collect();
1022            assert_eq!(
1023                links.len(),
1024                1,
1025                "step '{}' must link to container",
1026                step.name
1027            );
1028        }
1029    }
1030
1031    #[test]
1032    fn container_authority_propagates_to_floating_image() {
1033        // Integration: authority from a step running in a floating container should
1034        // propagate to the container Image node (Untrusted), generating a finding.
1035        let yaml = r#"
1036permissions: write-all
1037jobs:
1038  build:
1039    container: ubuntu:22.04
1040    steps:
1041      - run: echo hi
1042"#;
1043        use taudit_core::propagation::DEFAULT_MAX_HOPS;
1044        use taudit_core::rules;
1045        let graph = parse(yaml);
1046        let findings = rules::run_all_rules(&graph, DEFAULT_MAX_HOPS);
1047        // Should detect: GITHUB_TOKEN (broad) propagates to ubuntu:22.04 (Untrusted) via step
1048        assert!(
1049            findings
1050                .iter()
1051                .any(|f| f.category == taudit_core::finding::FindingCategory::AuthorityPropagation),
1052            "authority should propagate from step to floating container"
1053        );
1054    }
1055
1056    #[test]
1057    fn aws_oidc_creates_identity_node() {
1058        let yaml = r#"
1059jobs:
1060  deploy:
1061    steps:
1062      - name: Configure AWS credentials
1063        uses: aws-actions/configure-aws-credentials@v4
1064        with:
1065          role-to-assume: arn:aws:iam::123456789012:role/my-deploy-role
1066          aws-region: us-east-1
1067"#;
1068        let graph = parse(yaml);
1069        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1070        assert_eq!(identities.len(), 1);
1071        // ARN arn:aws:iam::123456789012:role/my-deploy-role → last '/' segment
1072        assert_eq!(identities[0].name, "AWS/my-deploy-role");
1073        assert_eq!(
1074            identities[0].metadata.get(META_OIDC),
1075            Some(&"true".to_string())
1076        );
1077        assert_eq!(
1078            identities[0].metadata.get(META_IDENTITY_SCOPE),
1079            Some(&"broad".to_string())
1080        );
1081    }
1082
1083    #[test]
1084    fn gcp_oidc_creates_identity_node() {
1085        let yaml = r#"
1086jobs:
1087  deploy:
1088    steps:
1089      - name: Authenticate to GCP
1090        uses: google-github-actions/auth@v2
1091        with:
1092          workload_identity_provider: projects/123/locations/global/workloadIdentityPools/my-pool/providers/my-provider
1093          service_account: my-sa@my-project.iam.gserviceaccount.com
1094"#;
1095        let graph = parse(yaml);
1096        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1097        assert_eq!(identities.len(), 1);
1098        assert!(identities[0].name.starts_with("GCP/"));
1099        assert_eq!(
1100            identities[0].metadata.get(META_OIDC),
1101            Some(&"true".to_string())
1102        );
1103    }
1104
1105    #[test]
1106    fn azure_oidc_creates_identity_node() {
1107        let yaml = r#"
1108jobs:
1109  deploy:
1110    steps:
1111      - name: Azure login
1112        uses: azure/login@v2
1113        with:
1114          client-id: ${{ vars.AZURE_CLIENT_ID }}
1115          tenant-id: ${{ vars.AZURE_TENANT_ID }}
1116          subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
1117"#;
1118        let graph = parse(yaml);
1119        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1120        assert_eq!(identities.len(), 1);
1121        assert!(identities[0].name.starts_with("Azure/"));
1122        assert_eq!(
1123            identities[0].metadata.get(META_OIDC),
1124            Some(&"true".to_string())
1125        );
1126    }
1127
1128    #[test]
1129    fn azure_static_sp_does_not_create_identity_node() {
1130        // When client-secret is present, it's a static service principal — not OIDC.
1131        // The secret scanning in with: handles this; classify_cloud_auth returns None.
1132        let yaml = r#"
1133jobs:
1134  deploy:
1135    steps:
1136      - name: Azure login
1137        uses: azure/login@v2
1138        with:
1139          client-id: my-client-id
1140          client-secret: ${{ secrets.AZURE_CLIENT_SECRET }}
1141          tenant-id: my-tenant
1142"#;
1143        let graph = parse(yaml);
1144        // Identity node should NOT be created by cloud auth inference
1145        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1146        assert!(
1147            identities.is_empty(),
1148            "static SP should not create an OIDC Identity node"
1149        );
1150        // But the secret SHOULD be captured by existing with: scanning
1151        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1152        assert_eq!(secrets.len(), 1);
1153        assert_eq!(secrets[0].name, "AZURE_CLIENT_SECRET");
1154    }
1155
1156    #[test]
1157    fn aws_static_creds_do_not_create_identity_node() {
1158        // Static access key path — no role-to-assume, so classify_cloud_auth returns None.
1159        // The access key secret is captured by with: scanning.
1160        let yaml = r#"
1161jobs:
1162  deploy:
1163    steps:
1164      - uses: aws-actions/configure-aws-credentials@v4
1165        with:
1166          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
1167          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
1168          aws-region: us-east-1
1169"#;
1170        let graph = parse(yaml);
1171        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1172        assert!(
1173            identities.is_empty(),
1174            "static AWS creds must not create Identity node"
1175        );
1176        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1177        assert_eq!(secrets.len(), 2, "both static secrets captured");
1178    }
1179
1180    #[test]
1181    fn workflow_level_permissions_create_identity() {
1182        let yaml = r#"
1183permissions: write-all
1184jobs:
1185  ci:
1186    steps:
1187      - run: echo hi
1188"#;
1189        let graph = parse(yaml);
1190        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1191        assert_eq!(identities.len(), 1);
1192        assert_eq!(identities[0].name, "GITHUB_TOKEN");
1193        assert_eq!(
1194            identities[0].metadata.get(META_PERMISSIONS).unwrap(),
1195            "write-all"
1196        );
1197    }
1198}