Skip to main content

taudit_parse_gha/
lib.rs

1use std::collections::HashMap;
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Metadata key for marking inferred (not precisely mapped) secret references.
9const META_INFERRED_VAL: &str = "true";
10
11/// GitHub Actions workflow parser.
12pub struct GhaParser;
13
14impl PipelineParser for GhaParser {
15    fn platform(&self) -> &str {
16        "github-actions"
17    }
18
19    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
20        let workflow: GhaWorkflow = serde_yaml::from_str(content)
21            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
22
23        let mut graph = AuthorityGraph::new(source.clone());
24        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
25
26        let is_pull_request_target = workflow
27            .triggers
28            .as_ref()
29            .map(trigger_has_pull_request_target)
30            .unwrap_or(false);
31
32        // Workflow-level permissions -> GITHUB_TOKEN identity node
33        let token_id = if let Some(ref perms) = workflow.permissions {
34            let perm_string = perms.to_string();
35            let scope = IdentityScope::from_permissions(&perm_string);
36            let mut meta = HashMap::new();
37            meta.insert(META_PERMISSIONS.into(), perm_string.clone());
38            meta.insert(META_IDENTITY_SCOPE.into(), format!("{scope:?}").to_lowercase());
39            // OIDC: id-token: write → token is OIDC-capable (federated scope).
40            // Check the formatted substring directly — Permissions::Map fmt produces
41            // "id-token: write" so this won't false-positive on "contents: write".
42            if perm_string.contains("id-token: write") || perm_string == "write-all" {
43                meta.insert(META_OIDC.into(), "true".into());
44            }
45            Some(graph.add_node_with_metadata(
46                NodeKind::Identity,
47                "GITHUB_TOKEN",
48                TrustZone::FirstParty,
49                meta,
50            ))
51        } else {
52            None
53        };
54
55        for (job_name, job) in &workflow.jobs {
56            // Job-level permissions override workflow-level
57            let job_token_id = if let Some(ref perms) = job.permissions {
58                let perm_string = perms.to_string();
59                let scope = IdentityScope::from_permissions(&perm_string);
60                let mut meta = HashMap::new();
61                meta.insert(META_PERMISSIONS.into(), perm_string.clone());
62                meta.insert(META_IDENTITY_SCOPE.into(), format!("{scope:?}").to_lowercase());
63                if perm_string.contains("id-token: write") {
64                    meta.insert(META_OIDC.into(), "true".into());
65                }
66                Some(graph.add_node_with_metadata(
67                    NodeKind::Identity,
68                    format!("GITHUB_TOKEN ({})", job_name),
69                    TrustZone::FirstParty,
70                    meta,
71                ))
72            } else {
73                token_id
74            };
75
76            // Reusable workflow: job.uses= means this job delegates to another workflow.
77            // We cannot resolve it inline — mark the graph partial and skip steps.
78            if let Some(ref uses) = job.uses {
79                let trust_zone = if is_sha_pinned(uses) {
80                    TrustZone::ThirdParty
81                } else {
82                    TrustZone::Untrusted
83                };
84                let rw_id = graph.add_node(NodeKind::Image, uses, trust_zone);
85                // Synthetic step represents this job delegating to the called workflow
86                let job_step_id =
87                    graph.add_node(NodeKind::Step, job_name, TrustZone::FirstParty);
88                graph.add_edge(job_step_id, rw_id, EdgeKind::DelegatesTo);
89                if let Some(tok_id) = job_token_id {
90                    graph.add_edge(job_step_id, tok_id, EdgeKind::HasAccessTo);
91                }
92                graph.mark_partial(format!(
93                    "reusable workflow '{}' in job '{}' cannot be resolved inline — authority within the called workflow is unknown",
94                    uses, job_name
95                ));
96                continue;
97            }
98
99            // Matrix strategy: authority shape may differ per matrix entry — mark Partial
100            if job
101                .strategy
102                .as_ref()
103                .and_then(|s| s.get("matrix"))
104                .is_some()
105            {
106                graph.mark_partial(format!(
107                    "job '{}' uses matrix strategy — authority shape may differ per matrix entry",
108                    job_name
109                ));
110            }
111
112            // Container: job-level container image — add as Image node and capture ID
113            // so each step in this job can be linked to it via UsesImage.
114            let container_image_id: Option<NodeId> = if let Some(ref container) = job.container {
115                let image_str = container.image();
116                let pinned = is_docker_digest_pinned(image_str);
117                let trust_zone = if pinned {
118                    TrustZone::ThirdParty
119                } else {
120                    TrustZone::Untrusted
121                };
122                let mut meta = HashMap::new();
123                meta.insert(META_CONTAINER.into(), "true".into());
124                if pinned {
125                    if let Some(digest) = image_str.split("@sha256:").nth(1) {
126                        meta.insert(META_DIGEST.into(), format!("sha256:{}", digest));
127                    }
128                }
129                Some(graph.add_node_with_metadata(NodeKind::Image, image_str, trust_zone, meta))
130            } else {
131                None
132            };
133
134            for (step_idx, step) in job.steps.iter().enumerate() {
135                let default_name = format!("{}[{}]", job_name, step_idx);
136                let step_name = step.name.as_deref().unwrap_or(&default_name);
137
138                // Determine trust zone and create image node if `uses:` present
139                let (trust_zone, image_node_id) = if let Some(ref uses) = step.uses {
140                    let (zone, image_id) = classify_action(uses, &mut graph);
141                    (zone, Some(image_id))
142                } else if is_pull_request_target {
143                    // run: step in a pull_request_target workflow — may execute fork code
144                    (TrustZone::Untrusted, None)
145                } else {
146                    // Inline `run:` step — first party
147                    (TrustZone::FirstParty, None)
148                };
149
150                let step_id = graph.add_node(NodeKind::Step, step_name, trust_zone);
151
152                // Link step to action image
153                if let Some(img_id) = image_node_id {
154                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
155                }
156
157                // Link step to job container — steps run inside the container's execution
158                // environment, so a floating container is a supply chain risk for every step.
159                if let Some(img_id) = container_image_id {
160                    graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
161                }
162
163                // Link step to GITHUB_TOKEN if it exists
164                if let Some(tok_id) = job_token_id {
165                    graph.add_edge(step_id, tok_id, EdgeKind::HasAccessTo);
166                }
167
168                // Cloud identity inference: detect known OIDC cloud auth actions and
169                // create an Identity node representing the assumed cloud identity.
170                if let Some(ref uses) = step.uses {
171                    if let Some(cloud_id) =
172                        classify_cloud_auth(uses, step.with.as_ref(), &mut graph)
173                    {
174                        graph.add_edge(step_id, cloud_id, EdgeKind::HasAccessTo);
175                    }
176                }
177
178                // Process secrets from workflow-level `env:` (inherited by all jobs/steps)
179                if let Some(ref env) = workflow.env {
180                    for env_val in env.values() {
181                        if is_secret_reference(env_val) {
182                            let secret_name = extract_secret_name(env_val);
183                            let secret_id =
184                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
185                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
186                        }
187                    }
188                }
189
190                // Process secrets from job-level `env:` (inherited by all steps)
191                if let Some(ref env) = job.env {
192                    for env_val in env.values() {
193                        if is_secret_reference(env_val) {
194                            let secret_name = extract_secret_name(env_val);
195                            let secret_id =
196                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
197                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
198                        }
199                    }
200                }
201
202                // Process secrets from step-level `env:` block
203                if let Some(ref env) = step.env {
204                    for env_val in env.values() {
205                        if is_secret_reference(env_val) {
206                            let secret_name = extract_secret_name(env_val);
207                            let secret_id =
208                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
209                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
210                        }
211                    }
212                }
213
214                // Process secrets from `with:` block
215                if let Some(ref with) = step.with {
216                    for val in with.values() {
217                        if is_secret_reference(val) {
218                            let secret_name = extract_secret_name(val);
219                            let secret_id =
220                                find_or_create_secret(&mut graph, &mut secret_ids, &secret_name);
221                            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
222                        }
223                    }
224                }
225
226                // Detect inferred secrets in `run:` script blocks
227                if let Some(ref run) = step.run {
228                    if run.contains("${{ secrets.") {
229                        // Extract secret names from the shell script
230                        let mut pos = 0;
231                        while let Some(start) = run[pos..].find("secrets.") {
232                            let abs_start = pos + start + 8;
233                            let remaining = &run[abs_start..];
234                            let end = remaining
235                                .find(|c: char| !c.is_alphanumeric() && c != '_')
236                                .unwrap_or(remaining.len());
237                            let secret_name = &remaining[..end];
238                            if !secret_name.is_empty() {
239                                let secret_id = find_or_create_secret(
240                                    &mut graph,
241                                    &mut secret_ids,
242                                    secret_name,
243                                );
244                                // Mark as inferred — not precisely mapped
245                                if let Some(node) = graph.nodes.get_mut(secret_id) {
246                                    node.metadata
247                                        .insert(META_INFERRED.into(), META_INFERRED_VAL.into());
248                                }
249                                graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
250                                graph.mark_partial(format!(
251                                    "secret '{}' referenced in run: script — inferred, not precisely mapped",
252                                    secret_name
253                                ));
254                            }
255                            pos = abs_start + end;
256                        }
257                    }
258                }
259            }
260        }
261
262        Ok(graph)
263    }
264}
265
266/// Returns true if the workflow's `on:` triggers include `pull_request_target`.
267/// GHA `on:` is polymorphic: string, sequence, or mapping.
268fn trigger_has_pull_request_target(triggers: &serde_yaml::Value) -> bool {
269    const PRT: &str = "pull_request_target";
270    match triggers {
271        serde_yaml::Value::String(s) => s == PRT,
272        serde_yaml::Value::Sequence(seq) => seq
273            .iter()
274            .any(|v| v.as_str().map(|s| s == PRT).unwrap_or(false)),
275        serde_yaml::Value::Mapping(map) => map
276            .iter()
277            .any(|(k, _)| k.as_str().map(|s| s == PRT).unwrap_or(false)),
278        _ => false,
279    }
280}
281
282/// Classify a `uses:` reference into trust zone and create image node.
283fn classify_action(uses: &str, graph: &mut AuthorityGraph) -> (TrustZone, NodeId) {
284    let pinned = is_sha_pinned(uses);
285    let is_local = uses.starts_with("./");
286
287    let zone = if is_local {
288        TrustZone::FirstParty
289    } else if pinned {
290        TrustZone::ThirdParty
291    } else {
292        TrustZone::Untrusted
293    };
294
295    let mut meta = HashMap::new();
296    if pinned {
297        if let Some(sha) = uses.split('@').next_back() {
298            meta.insert(META_DIGEST.into(), sha.into());
299        }
300    }
301
302    let id = graph.add_node_with_metadata(NodeKind::Image, uses, zone, meta);
303    (zone, id)
304}
305
306fn is_secret_reference(val: &str) -> bool {
307    val.contains("${{ secrets.")
308}
309
310fn extract_secret_name(val: &str) -> String {
311    // Extract from patterns like "${{ secrets.MY_SECRET }}"
312    if let Some(start) = val.find("secrets.") {
313        let after = &val[start + 8..];
314        let end = after
315            .find(|c: char| !c.is_alphanumeric() && c != '_')
316            .unwrap_or(after.len());
317        after[..end].to_string()
318    } else {
319        val.to_string()
320    }
321}
322
323fn find_or_create_secret(
324    graph: &mut AuthorityGraph,
325    cache: &mut HashMap<String, NodeId>,
326    name: &str,
327) -> NodeId {
328    if let Some(&id) = cache.get(name) {
329        return id;
330    }
331    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
332    cache.insert(name.to_string(), id);
333    id
334}
335
336/// Detect known OIDC cloud authentication actions and create an Identity node
337/// representing the cloud identity that will be assumed.
338///
339/// Only handles the OIDC/federated path — static credential inputs (e.g.
340/// `aws-secret-access-key: ${{ secrets.X }}`) are already captured by the
341/// regular `with:` secret scanning and don't need a separate Identity node.
342///
343/// Returns `Some(NodeId)` of the created Identity, or `None` if not recognized.
344fn classify_cloud_auth(
345    uses: &str,
346    with: Option<&HashMap<String, String>>,
347    graph: &mut AuthorityGraph,
348) -> Option<NodeId> {
349    // Strip `@version` — match any version of the action
350    let action = uses.split('@').next().unwrap_or(uses);
351
352    match action {
353        "aws-actions/configure-aws-credentials" => {
354            // OIDC path: role-to-assume present (no static access key needed)
355            let w = with?;
356            let role = w.get("role-to-assume")?;
357            // ARN format: arn:aws:iam::123456789012:role/my-role
358            // Split on '/' to get the role name; fall back to the full value.
359            let short = role.split('/').next_back().unwrap_or(role.as_str());
360            let mut meta = HashMap::new();
361            meta.insert(META_OIDC.into(), "true".into());
362            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
363            meta.insert(META_PERMISSIONS.into(), "AWS role assumption (OIDC)".into());
364            Some(graph.add_node_with_metadata(
365                NodeKind::Identity,
366                format!("AWS/{short}"),
367                TrustZone::FirstParty,
368                meta,
369            ))
370        }
371        "google-github-actions/auth" => {
372            // OIDC path: workload_identity_provider present
373            let w = with?;
374            let provider = w.get("workload_identity_provider")?;
375            let short = provider.split('/').next_back().unwrap_or(provider.as_str());
376            let mut meta = HashMap::new();
377            meta.insert(META_OIDC.into(), "true".into());
378            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
379            meta.insert(META_PERMISSIONS.into(), "GCP workload identity federation".into());
380            Some(graph.add_node_with_metadata(
381                NodeKind::Identity,
382                format!("GCP/{short}"),
383                TrustZone::FirstParty,
384                meta,
385            ))
386        }
387        "azure/login" => {
388            // OIDC path: client-id present without client-secret
389            let w = with?;
390            let client_id = w.get("client-id")?;
391            // Only treat as OIDC if no static client-secret is provided
392            if w.contains_key("client-secret") {
393                return None; // static SP creds captured by with: secret scanning
394            }
395            let mut meta = HashMap::new();
396            meta.insert(META_OIDC.into(), "true".into());
397            meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
398            meta.insert(META_PERMISSIONS.into(), "Azure federated credential (OIDC)".into());
399            Some(graph.add_node_with_metadata(
400                NodeKind::Identity,
401                format!("Azure/{client_id}"),
402                TrustZone::FirstParty,
403                meta,
404            ))
405        }
406        _ => None,
407    }
408}
409
410// ── Serde models for GHA YAML ──────────────────────────
411
412/// Flexible permissions: can be a string ("write-all") or a map.
413#[derive(Debug, Clone, Deserialize)]
414#[serde(untagged)]
415pub enum Permissions {
416    String(String),
417    Map(HashMap<String, String>),
418}
419
420impl std::fmt::Display for Permissions {
421    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
422        match self {
423            Permissions::String(s) => write!(f, "{s}"),
424            Permissions::Map(m) => {
425                let parts: Vec<String> = m.iter().map(|(k, v)| format!("{k}: {v}")).collect();
426                write!(f, "{{ {} }}", parts.join(", "))
427            }
428        }
429    }
430}
431
432#[derive(Debug, Deserialize)]
433pub struct GhaWorkflow {
434    /// Workflow trigger(s). Polymorphic: string, sequence, or mapping.
435    #[serde(rename = "on", default)]
436    pub triggers: Option<serde_yaml::Value>,
437    #[serde(default)]
438    pub permissions: Option<Permissions>,
439    /// Workflow-level env vars, inherited by all jobs and steps.
440    #[serde(default)]
441    pub env: Option<HashMap<String, String>>,
442    #[serde(default)]
443    pub jobs: HashMap<String, GhaJob>,
444}
445
446/// Job-level container config. Polymorphic: string image or map with `image:` key.
447#[derive(Debug, Deserialize)]
448#[serde(untagged)]
449pub enum ContainerConfig {
450    Image(String),
451    Full { image: String },
452}
453
454impl ContainerConfig {
455    pub fn image(&self) -> &str {
456        match self {
457            ContainerConfig::Image(s) => s,
458            ContainerConfig::Full { image } => image,
459        }
460    }
461}
462
463#[derive(Debug, Deserialize)]
464pub struct GhaJob {
465    #[serde(default)]
466    pub permissions: Option<Permissions>,
467    #[serde(default)]
468    pub env: Option<HashMap<String, String>>,
469    #[serde(default)]
470    pub steps: Vec<GhaStep>,
471    /// Reusable workflow reference — `uses: owner/repo/.github/workflows/foo.yml@ref`
472    #[serde(default)]
473    pub uses: Option<String>,
474    /// Job container image.
475    #[serde(default)]
476    pub container: Option<ContainerConfig>,
477    /// Matrix/strategy configuration. When a matrix is present, the authority
478    /// shape may differ per matrix entry — graph is marked Partial.
479    #[serde(default)]
480    pub strategy: Option<serde_yaml::Value>,
481}
482
483#[derive(Debug, Deserialize)]
484pub struct GhaStep {
485    pub name: Option<String>,
486    pub uses: Option<String>,
487    pub run: Option<String>,
488    #[serde(default)]
489    pub env: Option<HashMap<String, String>>,
490    #[serde(rename = "with", default)]
491    pub with: Option<HashMap<String, String>>,
492}
493
494#[cfg(test)]
495mod tests {
496    use super::*;
497
498    fn parse(yaml: &str) -> AuthorityGraph {
499        let parser = GhaParser;
500        let source = PipelineSource {
501            file: "test.yml".into(),
502            repo: None,
503            git_ref: None,
504        };
505        parser.parse(yaml, &source).unwrap()
506    }
507
508    #[test]
509    fn parses_simple_workflow() {
510        let yaml = r#"
511permissions: write-all
512jobs:
513  build:
514    steps:
515      - name: Checkout
516        uses: actions/checkout@v4
517      - name: Build
518        run: make build
519"#;
520        let graph = parse(yaml);
521        assert!(graph.nodes.len() >= 3); // GITHUB_TOKEN + 2 steps + 1 image
522    }
523
524    #[test]
525    fn detects_secret_in_env() {
526        let yaml = r#"
527jobs:
528  deploy:
529    steps:
530      - name: Deploy
531        run: ./deploy.sh
532        env:
533          AWS_KEY: "${{ secrets.AWS_ACCESS_KEY_ID }}"
534"#;
535        let graph = parse(yaml);
536        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
537        assert_eq!(secrets.len(), 1);
538        assert_eq!(secrets[0].name, "AWS_ACCESS_KEY_ID");
539    }
540
541    #[test]
542    fn classifies_unpinned_action_as_untrusted() {
543        let yaml = r#"
544jobs:
545  ci:
546    steps:
547      - uses: actions/checkout@v4
548"#;
549        let graph = parse(yaml);
550        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
551        assert_eq!(images.len(), 1);
552        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
553    }
554
555    #[test]
556    fn classifies_sha_pinned_action_as_third_party() {
557        let yaml = r#"
558jobs:
559  ci:
560    steps:
561      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
562"#;
563        let graph = parse(yaml);
564        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
565        assert_eq!(images.len(), 1);
566        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
567    }
568
569    #[test]
570    fn classifies_local_action_as_first_party() {
571        let yaml = r#"
572jobs:
573  ci:
574    steps:
575      - uses: ./.github/actions/my-action
576"#;
577        let graph = parse(yaml);
578        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
579        assert_eq!(images.len(), 1);
580        assert_eq!(images[0].trust_zone, TrustZone::FirstParty);
581    }
582
583    #[test]
584    fn detects_secret_in_with() {
585        let yaml = r#"
586jobs:
587  deploy:
588    steps:
589      - name: Publish
590        uses: some-org/publish@v1
591        with:
592          token: "${{ secrets.NPM_TOKEN }}"
593"#;
594        let graph = parse(yaml);
595        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
596        assert_eq!(secrets.len(), 1);
597        assert_eq!(secrets[0].name, "NPM_TOKEN");
598    }
599
600    #[test]
601    fn inferred_secret_in_run_block_detected() {
602        let yaml = r#"
603jobs:
604  deploy:
605    steps:
606      - name: Deploy
607        run: |
608          curl -H "Authorization: ${{ secrets.API_TOKEN }}" https://api.example.com
609"#;
610        let graph = parse(yaml);
611        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
612        assert_eq!(secrets.len(), 1);
613        assert_eq!(secrets[0].name, "API_TOKEN");
614        assert_eq!(
615            secrets[0].metadata.get(META_INFERRED),
616            Some(&"true".to_string())
617        );
618        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
619        assert!(!graph.completeness_gaps.is_empty());
620    }
621
622    #[test]
623    fn job_level_env_inherited_by_steps() {
624        let yaml = r#"
625jobs:
626  build:
627    env:
628      DB_PASSWORD: "${{ secrets.DB_PASSWORD }}"
629    steps:
630      - name: Step A
631        run: echo "a"
632      - name: Step B
633        run: echo "b"
634"#;
635        let graph = parse(yaml);
636        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
637        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
638
639        // Both steps should have access to the secret
640        let secret_id = secrets[0].id;
641        let accessing_steps = graph
642            .edges_to(secret_id)
643            .filter(|e| e.kind == EdgeKind::HasAccessTo)
644            .count();
645        assert_eq!(accessing_steps, 2, "both steps inherit job-level env");
646    }
647
648    #[test]
649    fn identity_scope_set_on_token() {
650        let yaml = r#"
651permissions: write-all
652jobs:
653  ci:
654    steps:
655      - run: echo hi
656"#;
657        let graph = parse(yaml);
658        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
659        assert_eq!(identities.len(), 1);
660        assert_eq!(
661            identities[0].metadata.get(META_IDENTITY_SCOPE),
662            Some(&"broad".to_string())
663        );
664    }
665
666    #[test]
667    fn constrained_identity_scope() {
668        let yaml = r#"
669permissions:
670  contents: read
671jobs:
672  ci:
673    steps:
674      - run: echo hi
675"#;
676        let graph = parse(yaml);
677        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
678        assert_eq!(identities.len(), 1);
679        assert_eq!(
680            identities[0].metadata.get(META_IDENTITY_SCOPE),
681            Some(&"constrained".to_string())
682        );
683    }
684
685    #[test]
686    fn pull_request_target_string_trigger_marks_run_steps_untrusted() {
687        let yaml = r#"
688on: pull_request_target
689jobs:
690  check:
691    steps:
692      - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
693        with:
694          ref: ${{ github.event.pull_request.head.sha }}
695      - run: npm test
696"#;
697        let graph = parse(yaml);
698        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
699        assert_eq!(steps.len(), 2);
700
701        // run: step should be Untrusted (might execute fork code)
702        let run_step = steps.iter().find(|s| s.name.contains("check[1]")).unwrap();
703        assert_eq!(
704            run_step.trust_zone,
705            TrustZone::Untrusted,
706            "run: step in pull_request_target workflow should be Untrusted"
707        );
708
709        // uses: step keeps its own trust zone (SHA-pinned = ThirdParty)
710        let checkout_step = steps.iter().find(|s| s.name.contains("check[0]")).unwrap();
711        assert_eq!(checkout_step.trust_zone, TrustZone::ThirdParty);
712    }
713
714    #[test]
715    fn pull_request_target_sequence_trigger_marks_run_steps_untrusted() {
716        let yaml = r#"
717on: [push, pull_request_target]
718jobs:
719  ci:
720    steps:
721      - run: echo hi
722"#;
723        let graph = parse(yaml);
724        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
725        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
726    }
727
728    #[test]
729    fn pull_request_target_mapping_trigger_marks_run_steps_untrusted() {
730        let yaml = r#"
731on:
732  pull_request_target:
733    types: [opened, synchronize]
734jobs:
735  ci:
736    steps:
737      - run: echo hi
738"#;
739        let graph = parse(yaml);
740        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
741        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
742    }
743
744    #[test]
745    fn push_trigger_does_not_mark_run_steps_untrusted() {
746        let yaml = r#"
747on: push
748jobs:
749  ci:
750    steps:
751      - run: echo hi
752"#;
753        let graph = parse(yaml);
754        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
755        assert_eq!(
756            steps[0].trust_zone,
757            TrustZone::FirstParty,
758            "push-triggered run: steps should remain FirstParty"
759        );
760    }
761
762    #[test]
763    fn workflow_level_env_inherited_by_all_steps() {
764        let yaml = r#"
765env:
766  DB_URL: "${{ secrets.DATABASE_URL }}"
767jobs:
768  build:
769    steps:
770      - name: Step A
771        run: echo "a"
772  test:
773    steps:
774      - name: Step B
775        run: echo "b"
776"#;
777        let graph = parse(yaml);
778        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
779        assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
780
781        // Both steps in both jobs should inherit the workflow-level secret
782        let secret_id = secrets[0].id;
783        let accessing_steps = graph
784            .edges_to(secret_id)
785            .filter(|e| e.kind == EdgeKind::HasAccessTo)
786            .count();
787        assert_eq!(accessing_steps, 2, "both steps inherit workflow-level env");
788    }
789
790    #[test]
791    fn matrix_strategy_marks_graph_partial() {
792        let yaml = r#"
793jobs:
794  test:
795    strategy:
796      matrix:
797        os: [ubuntu-latest, windows-latest, macos-latest]
798    steps:
799      - run: echo hi
800"#;
801        let graph = parse(yaml);
802        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
803        assert!(
804            graph
805                .completeness_gaps
806                .iter()
807                .any(|g| g.contains("matrix")),
808            "matrix strategy should be recorded as a completeness gap"
809        );
810    }
811
812    #[test]
813    fn job_without_matrix_does_not_mark_partial() {
814        let yaml = r#"
815jobs:
816  build:
817    steps:
818      - run: cargo build
819"#;
820        let graph = parse(yaml);
821        assert_eq!(graph.completeness, AuthorityCompleteness::Complete);
822    }
823
824    #[test]
825    fn reusable_workflow_creates_image_and_marks_partial() {
826        let yaml = r#"
827jobs:
828  call:
829    uses: org/repo/.github/workflows/deploy.yml@main
830"#;
831        let graph = parse(yaml);
832        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
833        assert_eq!(images.len(), 1);
834        assert_eq!(images[0].name, "org/repo/.github/workflows/deploy.yml@main");
835        assert_eq!(images[0].trust_zone, TrustZone::Untrusted); // not SHA-pinned
836
837        // Step node representing the job delegation
838        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
839        assert_eq!(steps.len(), 1);
840        assert_eq!(steps[0].name, "call");
841
842        // DelegatesTo edge from step to reusable workflow image
843        let delegates: Vec<_> = graph
844            .edges_from(steps[0].id)
845            .filter(|e| e.kind == EdgeKind::DelegatesTo)
846            .collect();
847        assert_eq!(delegates.len(), 1);
848
849        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
850    }
851
852    #[test]
853    fn reusable_workflow_sha_pinned_is_third_party() {
854        let yaml = r#"
855jobs:
856  call:
857    uses: org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29
858"#;
859        let graph = parse(yaml);
860        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
861        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
862    }
863
864    #[test]
865    fn container_unpinned_creates_image_node_untrusted() {
866        let yaml = r#"
867jobs:
868  build:
869    container: ubuntu:22.04
870    steps:
871      - run: echo hi
872"#;
873        let graph = parse(yaml);
874        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
875        assert_eq!(images.len(), 1);
876        assert_eq!(images[0].name, "ubuntu:22.04");
877        assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
878        assert_eq!(
879            images[0].metadata.get(META_CONTAINER),
880            Some(&"true".to_string())
881        );
882    }
883
884    #[test]
885    fn container_digest_pinned_creates_image_node_third_party() {
886        let yaml = r#"
887jobs:
888  build:
889    container:
890      image: "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
891    steps:
892      - run: echo hi
893"#;
894        let graph = parse(yaml);
895        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
896        assert_eq!(images.len(), 1);
897        assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
898        assert_eq!(
899            images[0].metadata.get(META_CONTAINER),
900            Some(&"true".to_string())
901        );
902    }
903
904    #[test]
905    fn oidc_permission_tags_identity_with_meta_oidc() {
906        let yaml = r#"
907permissions:
908  id-token: write
909  contents: read
910jobs:
911  ci:
912    steps:
913      - run: echo hi
914"#;
915        let graph = parse(yaml);
916        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
917        assert_eq!(identities.len(), 1);
918        assert_eq!(
919            identities[0].metadata.get(META_OIDC),
920            Some(&"true".to_string()),
921            "id-token: write should mark identity as OIDC-capable"
922        );
923    }
924
925    #[test]
926    fn non_oidc_permission_does_not_tag_meta_oidc() {
927        let yaml = r#"
928permissions:
929  contents: read
930jobs:
931  ci:
932    steps:
933      - run: echo hi
934"#;
935        let graph = parse(yaml);
936        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
937        assert_eq!(identities.len(), 1);
938        assert!(
939            identities[0].metadata.get(META_OIDC).is_none(),
940            "contents:read should not tag as OIDC"
941        );
942    }
943
944    #[test]
945    fn contents_write_without_id_token_does_not_tag_oidc() {
946        // Regression: "contents: write" contains "write" but not "id-token: write".
947        // Should NOT be tagged as OIDC-capable.
948        let yaml = r#"
949permissions:
950  contents: write
951jobs:
952  ci:
953    steps:
954      - run: echo hi
955"#;
956        let graph = parse(yaml);
957        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
958        assert_eq!(identities.len(), 1);
959        assert!(
960            identities[0].metadata.get(META_OIDC).is_none(),
961            "contents:write without id-token must not be tagged OIDC"
962        );
963    }
964
965    #[test]
966    fn write_all_permission_tags_identity_as_oidc() {
967        // `permissions: write-all` grants every permission including id-token: write.
968        let yaml = r#"
969permissions: write-all
970jobs:
971  ci:
972    steps:
973      - run: echo hi
974"#;
975        let graph = parse(yaml);
976        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
977        assert_eq!(identities.len(), 1);
978        assert_eq!(
979            identities[0].metadata.get(META_OIDC),
980            Some(&"true".to_string()),
981            "write-all grants all permissions including id-token: write"
982        );
983    }
984
985    #[test]
986    fn container_steps_linked_to_container_image() {
987        let yaml = r#"
988jobs:
989  build:
990    container: ubuntu:22.04
991    steps:
992      - name: Step A
993        run: echo "a"
994      - name: Step B
995        run: echo "b"
996"#;
997        let graph = parse(yaml);
998        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
999        assert_eq!(images.len(), 1);
1000        let container_id = images[0].id;
1001
1002        // Both steps must have UsesImage edges to the container
1003        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1004        assert_eq!(steps.len(), 2);
1005        for step in &steps {
1006            let links: Vec<_> = graph
1007                .edges_from(step.id)
1008                .filter(|e| e.kind == EdgeKind::UsesImage && e.to == container_id)
1009                .collect();
1010            assert_eq!(links.len(), 1, "step '{}' must link to container", step.name);
1011        }
1012    }
1013
1014    #[test]
1015    fn container_authority_propagates_to_floating_image() {
1016        // Integration: authority from a step running in a floating container should
1017        // propagate to the container Image node (Untrusted), generating a finding.
1018        let yaml = r#"
1019permissions: write-all
1020jobs:
1021  build:
1022    container: ubuntu:22.04
1023    steps:
1024      - run: echo hi
1025"#;
1026        use taudit_core::rules;
1027        use taudit_core::propagation::DEFAULT_MAX_HOPS;
1028        let graph = parse(yaml);
1029        let findings = rules::run_all_rules(&graph, DEFAULT_MAX_HOPS);
1030        // Should detect: GITHUB_TOKEN (broad) propagates to ubuntu:22.04 (Untrusted) via step
1031        assert!(
1032            findings.iter().any(|f| f.category == taudit_core::finding::FindingCategory::AuthorityPropagation),
1033            "authority should propagate from step to floating container"
1034        );
1035    }
1036
1037    #[test]
1038    fn aws_oidc_creates_identity_node() {
1039        let yaml = r#"
1040jobs:
1041  deploy:
1042    steps:
1043      - name: Configure AWS credentials
1044        uses: aws-actions/configure-aws-credentials@v4
1045        with:
1046          role-to-assume: arn:aws:iam::123456789012:role/my-deploy-role
1047          aws-region: us-east-1
1048"#;
1049        let graph = parse(yaml);
1050        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1051        assert_eq!(identities.len(), 1);
1052        // ARN arn:aws:iam::123456789012:role/my-deploy-role → last '/' segment
1053        assert_eq!(identities[0].name, "AWS/my-deploy-role");
1054        assert_eq!(
1055            identities[0].metadata.get(META_OIDC),
1056            Some(&"true".to_string())
1057        );
1058        assert_eq!(
1059            identities[0].metadata.get(META_IDENTITY_SCOPE),
1060            Some(&"broad".to_string())
1061        );
1062    }
1063
1064    #[test]
1065    fn gcp_oidc_creates_identity_node() {
1066        let yaml = r#"
1067jobs:
1068  deploy:
1069    steps:
1070      - name: Authenticate to GCP
1071        uses: google-github-actions/auth@v2
1072        with:
1073          workload_identity_provider: projects/123/locations/global/workloadIdentityPools/my-pool/providers/my-provider
1074          service_account: my-sa@my-project.iam.gserviceaccount.com
1075"#;
1076        let graph = parse(yaml);
1077        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1078        assert_eq!(identities.len(), 1);
1079        assert!(identities[0].name.starts_with("GCP/"));
1080        assert_eq!(
1081            identities[0].metadata.get(META_OIDC),
1082            Some(&"true".to_string())
1083        );
1084    }
1085
1086    #[test]
1087    fn azure_oidc_creates_identity_node() {
1088        let yaml = r#"
1089jobs:
1090  deploy:
1091    steps:
1092      - name: Azure login
1093        uses: azure/login@v2
1094        with:
1095          client-id: ${{ vars.AZURE_CLIENT_ID }}
1096          tenant-id: ${{ vars.AZURE_TENANT_ID }}
1097          subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
1098"#;
1099        let graph = parse(yaml);
1100        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1101        assert_eq!(identities.len(), 1);
1102        assert!(identities[0].name.starts_with("Azure/"));
1103        assert_eq!(
1104            identities[0].metadata.get(META_OIDC),
1105            Some(&"true".to_string())
1106        );
1107    }
1108
1109    #[test]
1110    fn azure_static_sp_does_not_create_identity_node() {
1111        // When client-secret is present, it's a static service principal — not OIDC.
1112        // The secret scanning in with: handles this; classify_cloud_auth returns None.
1113        let yaml = r#"
1114jobs:
1115  deploy:
1116    steps:
1117      - name: Azure login
1118        uses: azure/login@v2
1119        with:
1120          client-id: my-client-id
1121          client-secret: ${{ secrets.AZURE_CLIENT_SECRET }}
1122          tenant-id: my-tenant
1123"#;
1124        let graph = parse(yaml);
1125        // Identity node should NOT be created by cloud auth inference
1126        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1127        assert!(
1128            identities.is_empty(),
1129            "static SP should not create an OIDC Identity node"
1130        );
1131        // But the secret SHOULD be captured by existing with: scanning
1132        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1133        assert_eq!(secrets.len(), 1);
1134        assert_eq!(secrets[0].name, "AZURE_CLIENT_SECRET");
1135    }
1136
1137    #[test]
1138    fn aws_static_creds_do_not_create_identity_node() {
1139        // Static access key path — no role-to-assume, so classify_cloud_auth returns None.
1140        // The access key secret is captured by with: scanning.
1141        let yaml = r#"
1142jobs:
1143  deploy:
1144    steps:
1145      - uses: aws-actions/configure-aws-credentials@v4
1146        with:
1147          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
1148          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
1149          aws-region: us-east-1
1150"#;
1151        let graph = parse(yaml);
1152        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1153        assert!(identities.is_empty(), "static AWS creds must not create Identity node");
1154        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1155        assert_eq!(secrets.len(), 2, "both static secrets captured");
1156    }
1157
1158    #[test]
1159    fn workflow_level_permissions_create_identity() {
1160        let yaml = r#"
1161permissions: write-all
1162jobs:
1163  ci:
1164    steps:
1165      - run: echo hi
1166"#;
1167        let graph = parse(yaml);
1168        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1169        assert_eq!(identities.len(), 1);
1170        assert_eq!(identities[0].name, "GITHUB_TOKEN");
1171        assert_eq!(
1172            identities[0].metadata.get(META_PERMISSIONS).unwrap(),
1173            "write-all"
1174        );
1175    }
1176}