1use std::collections::{BTreeMap, HashMap};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8const META_INFERRED_VAL: &str = "true";
10
11pub struct GhaParser;
13
14impl PipelineParser for GhaParser {
15 fn platform(&self) -> &str {
16 "github-actions"
17 }
18
19 fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
20 let mut de = serde_yaml::Deserializer::from_str(content);
21 let doc = de
22 .next()
23 .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
24 let workflow: GhaWorkflow = GhaWorkflow::deserialize(doc)
25 .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
26 let extra_docs = de.next().is_some();
27
28 let mut graph = AuthorityGraph::new(source.clone());
29 graph
30 .metadata
31 .insert(META_PLATFORM.into(), "github-actions".into());
32 if workflow.permissions.is_none() {
33 graph
39 .metadata
40 .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
41 }
42 if extra_docs {
43 graph.mark_partial(
44 GapKind::Expression,
45 "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
46 );
47 }
48 let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
49 let mut artifact_ids: HashMap<String, NodeId> = HashMap::new();
50
51 if let Some(EnvSpec::Template(_)) = workflow.env {
55 graph.mark_partial(
56 GapKind::Expression,
57 "workflow-level env: uses template expression — environment variable shape unknown"
58 .to_string(),
59 );
60 }
61
62 let is_pull_request_target = workflow
63 .triggers
64 .as_ref()
65 .map(trigger_has_pull_request_target)
66 .unwrap_or(false);
67
68 let trigger_list = collect_trigger_names(workflow.triggers.as_ref());
75 if !trigger_list.is_empty() {
76 let mut ordered: Vec<&str> = Vec::new();
79 if trigger_list.iter().any(|t| t == "pull_request_target") {
80 ordered.push("pull_request_target");
81 }
82 for t in &trigger_list {
83 if t != "pull_request_target" {
84 ordered.push(t);
85 }
86 }
87 let value = if ordered.len() == 1 {
90 ordered[0].to_string()
91 } else {
92 ordered.join(",")
93 };
94 graph.metadata.insert(META_TRIGGER.into(), value);
95 } else if is_pull_request_target {
96 graph
97 .metadata
98 .insert(META_TRIGGER.into(), "pull_request_target".into());
99 }
100
101 if let Some(triggers) = workflow.triggers.as_ref() {
106 let names = collect_trigger_names(Some(triggers));
107 if !names.is_empty() {
108 graph.metadata.insert(META_TRIGGERS.into(), names.join(","));
109 }
110 let inputs = collect_dispatch_inputs(triggers);
111 if !inputs.is_empty() {
112 graph
113 .metadata
114 .insert(META_DISPATCH_INPUTS.into(), inputs.join(","));
115 }
116 let call_inputs = collect_workflow_call_inputs(triggers);
117 if !call_inputs.is_empty() {
118 graph
119 .metadata
120 .insert(META_GHA_WORKFLOW_CALL_INPUTS.into(), call_inputs.join(","));
121 }
122 }
123
124 let token_id = if let Some(ref perms) = workflow.permissions {
129 let perm_string = perms.to_string();
130 let scope = IdentityScope::from_permissions(&perm_string);
131 let mut meta = HashMap::new();
132 meta.insert(META_PERMISSIONS.into(), perm_string.clone());
133 meta.insert(
134 META_IDENTITY_SCOPE.into(),
135 format!("{scope:?}").to_lowercase(),
136 );
137 if perm_string.contains("id-token: write") || perm_string == "write-all" {
141 meta.insert(META_OIDC.into(), "true".into());
142 }
143 Some(graph.add_node_with_metadata(
144 NodeKind::Identity,
145 "GITHUB_TOKEN",
146 TrustZone::FirstParty,
147 meta,
148 ))
149 } else {
150 let mut meta = HashMap::new();
151 meta.insert(META_IDENTITY_SCOPE.into(), "unknown".into());
152 meta.insert(META_IMPLICIT.into(), "true".into());
153 Some(graph.add_node_with_metadata(
154 NodeKind::Identity,
155 "GITHUB_TOKEN",
156 TrustZone::FirstParty,
157 meta,
158 ))
159 };
160
161 let mut job_output_records: Vec<String> = Vec::new();
165
166 let mut sorted_jobs: Vec<(&String, &GhaJob)> = workflow.jobs.iter().collect();
170 sorted_jobs.sort_by(|a, b| a.0.cmp(b.0));
171 for (job_name, job) in sorted_jobs {
172 let mut step_oidc_by_yaml_id: HashMap<String, bool> = HashMap::new();
177 if let Some(EnvSpec::Template(_)) = job.env {
181 graph.mark_partial(
182 GapKind::Expression,
183 format!(
184 "job '{job_name}' env: uses template expression — environment variable shape unknown"
185 ),
186 );
187 }
188
189 let job_token_id = if let Some(ref perms) = job.permissions {
191 let perm_string = perms.to_string();
192 let scope = IdentityScope::from_permissions(&perm_string);
193 let mut meta = HashMap::new();
194 meta.insert(META_PERMISSIONS.into(), perm_string.clone());
195 meta.insert(
196 META_IDENTITY_SCOPE.into(),
197 format!("{scope:?}").to_lowercase(),
198 );
199 if perm_string.contains("id-token: write") {
200 meta.insert(META_OIDC.into(), "true".into());
201 }
202 Some(graph.add_node_with_metadata(
203 NodeKind::Identity,
204 format!("GITHUB_TOKEN ({job_name})"),
205 TrustZone::FirstParty,
206 meta,
207 ))
208 } else {
209 token_id
210 };
211
212 if let Some(ref uses) = job.uses {
215 let trust_zone = if is_pin_semantically_valid(uses) {
216 TrustZone::ThirdParty
217 } else {
218 TrustZone::Untrusted
219 };
220 let rw_id = graph.add_node(NodeKind::Image, uses, trust_zone);
221 let job_step_id = graph.add_node(NodeKind::Step, job_name, TrustZone::FirstParty);
223 if let Some(node) = graph.nodes.get_mut(job_step_id) {
224 node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
225 node.metadata.insert(
226 META_GHA_ACTION.into(),
227 uses.split('@').next().unwrap_or(uses).into(),
228 );
229 if let Some(runs_on) = job.runs_on.as_ref().and_then(yaml_value_compact) {
230 node.metadata.insert(META_GHA_RUNS_ON.into(), runs_on);
231 }
232 let condition = combined_condition(job.if_cond.as_deref(), None);
233 if let Some(condition) = condition {
234 node.metadata.insert(META_CONDITION.into(), condition);
235 }
236 if let Some(with) = job.with.as_ref() {
237 let mut entries: Vec<(&String, &serde_yaml::Value)> = with.iter().collect();
238 entries.sort_by(|a, b| a.0.cmp(b.0));
239 let rendered: Vec<String> = entries
240 .into_iter()
241 .filter_map(|(key, value)| {
242 yaml_scalar_to_string(value).map(|scalar| format!("{key}={scalar}"))
243 })
244 .collect();
245 if !rendered.is_empty() {
246 node.metadata
247 .insert(META_GHA_WITH_INPUTS.into(), rendered.join("\n"));
248 }
249 }
250 if let Some(serde_yaml::Value::String(s)) = job.secrets.as_ref() {
255 if s == "inherit" {
256 node.metadata
257 .insert(META_SECRETS_INHERIT.into(), "true".into());
258 }
259 }
260 }
261 graph.add_edge(job_step_id, rw_id, EdgeKind::DelegatesTo);
262 if let Some(tok_id) = job_token_id {
263 graph.add_edge(job_step_id, tok_id, EdgeKind::HasAccessTo);
264 }
265
266 if let Some(env_map) = workflow.env.as_ref().and_then(EnvSpec::as_map) {
274 let mut entries: Vec<(&String, &String)> = env_map.iter().collect();
275 entries.sort_by(|a, b| a.0.cmp(b.0));
276 for (_k, env_val) in entries {
277 for secret_name in iter_secret_refs(env_val) {
278 let secret_id =
279 find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
280 graph.add_edge(job_step_id, secret_id, EdgeKind::HasAccessTo);
281 }
282 }
283 }
284
285 if let Some(serde_yaml::Value::Mapping(map)) = job.secrets.as_ref() {
293 let mut entries: Vec<(&str, &str)> = map
294 .iter()
295 .filter_map(|(k, v)| Some((k.as_str()?, v.as_str()?)))
296 .collect();
297 entries.sort_by(|a, b| a.0.cmp(b.0));
298 for (_child_name, val) in entries {
299 for secret_name in iter_secret_refs(val) {
300 let secret_id =
301 find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
302 graph.add_edge(job_step_id, secret_id, EdgeKind::HasAccessTo);
303 }
304 }
305 }
306
307 graph.mark_partial(
308 GapKind::Structural,
309 format!(
310 "reusable workflow '{uses}' in job '{job_name}' cannot be resolved inline — authority within the called workflow is unknown"
311 ),
312 );
313 continue;
314 }
315
316 if job
318 .strategy
319 .as_ref()
320 .and_then(|s| s.get("matrix"))
321 .is_some()
322 {
323 graph.mark_partial(
324 GapKind::Expression,
325 format!(
326 "job '{job_name}' uses matrix strategy — authority shape may differ per matrix entry"
327 ),
328 );
329 }
330
331 if is_self_hosted_runner(job.runs_on.as_ref()) {
337 let runner_name = runner_label(job.runs_on.as_ref()).unwrap_or("self-hosted");
338 let mut meta = HashMap::new();
339 meta.insert(META_SELF_HOSTED.into(), "true".into());
340 graph.add_node_with_metadata(
341 NodeKind::Image,
342 runner_name,
343 TrustZone::FirstParty,
344 meta,
345 );
346 }
347
348 let container_image_id: Option<NodeId> = if let Some(ref container) = job.container {
351 let image_str = container.image();
352 let pinned = is_docker_digest_pinned(image_str);
353 let trust_zone = if pinned {
354 TrustZone::ThirdParty
355 } else {
356 TrustZone::Untrusted
357 };
358 let mut meta = HashMap::new();
359 meta.insert(META_CONTAINER.into(), "true".into());
360 if let Some(options) = container.options() {
361 if !options.is_empty() {
362 meta.insert(META_GHA_CONTAINER_OPTIONS.into(), options.to_string());
363 }
364 }
365 if pinned {
366 if let Some(digest) = image_str.split("@sha256:").nth(1) {
367 meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
368 }
369 }
370 Some(graph.add_node_with_metadata(NodeKind::Image, image_str, trust_zone, meta))
371 } else {
372 None
373 };
374
375 for (step_idx, step) in job.steps.iter().enumerate() {
376 let default_name = format!("{job_name}[{step_idx}]");
377 let step_name = step.name.as_deref().unwrap_or(&default_name);
378
379 let (trust_zone, image_node_id) = if let Some(ref uses) = step.uses {
381 let (zone, image_id) = classify_action(uses, &mut graph);
382 (zone, Some(image_id))
383 } else if is_pull_request_target {
384 (TrustZone::Untrusted, None)
386 } else {
387 (TrustZone::FirstParty, None)
389 };
390
391 let step_id = graph.add_node(NodeKind::Step, step_name, trust_zone);
392
393 if let Some(node) = graph.nodes.get_mut(step_id) {
400 node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
401 if let Some(runs_on) = job.runs_on.as_ref().and_then(yaml_value_compact) {
402 node.metadata.insert(META_GHA_RUNS_ON.into(), runs_on);
403 }
404 let condition =
405 combined_condition(job.if_cond.as_deref(), step.if_cond.as_deref());
406 if let Some(condition) = condition {
407 node.metadata.insert(META_CONDITION.into(), condition);
408 }
409 if let Some(ref uses) = step.uses {
410 let action = uses.split('@').next().unwrap_or(uses);
411 node.metadata.insert(META_GHA_ACTION.into(), action.into());
412 if let Some(with) = step.with.as_ref() {
413 let mut entries: Vec<(&String, &serde_yaml::Value)> =
414 with.iter().collect();
415 entries.sort_by(|a, b| a.0.cmp(b.0));
416 let mut rendered = Vec::new();
417 for (key, value) in entries {
418 if let Some(scalar) = yaml_scalar_to_string(value) {
419 rendered.push(format!("{key}={scalar}"));
420 }
421 }
422 if !rendered.is_empty() {
423 node.metadata
424 .insert(META_GHA_WITH_INPUTS.into(), rendered.join("\n"));
425 }
426 }
427 }
428 if let Some(ref body) = step.run {
429 if !body.is_empty() {
430 node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
431 }
432 }
433 let job_check = job
438 .if_cond
439 .as_deref()
440 .map(is_fork_check_expression)
441 .unwrap_or(false);
442 let step_check = step
443 .if_cond
444 .as_deref()
445 .map(is_fork_check_expression)
446 .unwrap_or(false);
447 if job_check || step_check {
448 node.metadata.insert(META_FORK_CHECK.into(), "true".into());
449 }
450 }
451
452 if let Some(img_id) = image_node_id {
454 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
455 }
456
457 if let Some(ref uses) = step.uses {
474 if uses.starts_with("./") {
475 graph.mark_partial(
476 GapKind::Structural,
477 format!(
478 "composite action not resolved (local action '{uses}' — taudit does not read filesystem)"
479 ),
480 );
481 }
482 }
483
484 if let Some(img_id) = container_image_id {
487 graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
488 }
489
490 if let Some(tok_id) = job_token_id {
492 graph.add_edge(step_id, tok_id, EdgeKind::HasAccessTo);
493 }
494
495 let mut step_holds_oidc = false;
498 if let Some(ref uses) = step.uses {
499 if let Some(cloud_id) =
500 classify_cloud_auth(uses, step.with.as_ref(), &mut graph)
501 {
502 graph.add_edge(step_id, cloud_id, EdgeKind::HasAccessTo);
503 step_holds_oidc = true;
504 }
505 }
506 if let Some(tok_id) = job_token_id {
510 if let Some(tok_node) = graph.nodes.get(tok_id) {
511 if tok_node.metadata.contains_key(META_OIDC) {
512 step_holds_oidc = true;
513 }
514 }
515 }
516 if let Some(ref yaml_id) = step.id {
517 step_oidc_by_yaml_id.insert(yaml_id.clone(), step_holds_oidc);
518 }
519
520 if let Some(ref uses) = step.uses {
522 let action = uses.split('@').next().unwrap_or(uses);
523 if matches!(
524 action,
525 "actions/attest-build-provenance" | "sigstore/cosign-installer"
526 ) {
527 if let Some(node) = graph.nodes.get_mut(step_id) {
528 node.metadata.insert(META_ATTESTS.into(), "true".into());
529 }
530 }
531 }
532
533 if let Some(ref uses) = step.uses {
538 let action = uses.split('@').next().unwrap_or(uses);
539 if action == "actions/checkout" {
540 if let Some(node) = graph.nodes.get_mut(step_id) {
541 node.metadata
542 .insert(META_CHECKOUT_SELF.into(), "true".into());
543 if let Some(with) = step.with.as_ref() {
547 if let Some(r) = with.get("ref").and_then(yaml_scalar_to_string) {
548 node.metadata.insert(META_CHECKOUT_REF.into(), r);
549 }
550 }
551 }
552 }
553 }
554
555 if let Some(ref run) = step.run {
560 if !run.is_empty() {
561 if let Some(node) = graph.nodes.get_mut(step_id) {
562 node.metadata.insert(META_SCRIPT_BODY.into(), run.clone());
563 }
564 }
565 }
566
567 if let Some(ref uses) = step.uses {
572 let action = uses.split('@').next().unwrap_or(uses);
573 if matches!(
574 action,
575 "actions/download-artifact" | "dawidd6/action-download-artifact"
576 ) {
577 if let Some(node) = graph.nodes.get_mut(step_id) {
578 node.metadata
579 .insert(META_DOWNLOADS_ARTIFACT.into(), "true".into());
580 }
581 }
582 }
583
584 if let Some(ref uses) = step.uses {
588 let action = uses.split('@').next().unwrap_or(uses);
589 if action == "actions/upload-artifact" {
590 if let Some(artifact_name) = step
595 .with
596 .as_ref()
597 .and_then(|w| w.get("name"))
598 .and_then(yaml_scalar_to_string)
599 {
600 let art_id = find_or_create_artifact(
604 &mut graph,
605 &mut artifact_ids,
606 &artifact_name,
607 trust_zone,
608 );
609 graph.add_edge(step_id, art_id, EdgeKind::Produces);
610 }
611 } else if matches!(
612 action,
613 "actions/download-artifact" | "dawidd6/action-download-artifact"
614 ) {
615 if let Some(artifact_name) = step
620 .with
621 .as_ref()
622 .and_then(|w| w.get("name"))
623 .and_then(yaml_scalar_to_string)
624 {
625 let art_id = find_or_create_artifact(
629 &mut graph,
630 &mut artifact_ids,
631 &artifact_name,
632 TrustZone::Untrusted,
633 );
634 graph.add_edge(art_id, step_id, EdgeKind::Consumes);
635 }
636 }
637 }
638
639 if let Some(ref run) = step.run {
646 let interprets = run.contains("unzip ")
647 || run.contains("unzip\n")
648 || run.contains("tar -x")
649 || run.contains("tar x")
650 || run.contains(" eval ")
651 || run.contains("\neval ")
652 || run.starts_with("eval ")
653 || run.contains(" cat ")
654 || run.contains("\ncat ")
655 || run.starts_with("cat ")
656 || run.contains("jq ");
657 if interprets {
658 if let Some(node) = graph.nodes.get_mut(step_id) {
659 node.metadata
660 .insert(META_INTERPRETS_ARTIFACT.into(), "true".into());
661 }
662 }
663 }
664 if let Some(ref uses) = step.uses {
668 let action = uses.split('@').next().unwrap_or(uses);
669 if action == "actions/github-script" {
670 if let Some(with) = step.with.as_ref() {
671 if let Some(script) = with.get("script").and_then(yaml_scalar_to_string)
672 {
673 let posts_comment = script.contains("createComment")
674 || script.contains("updateComment")
675 || script.contains("createCommitComment")
676 || script.contains("createReview");
677 let reads_file = script.contains("readFileSync")
678 || script.contains("readFile(")
679 || script.contains("require('fs')")
680 || script.contains("require(\"fs\")");
681 if posts_comment && reads_file {
682 if let Some(node) = graph.nodes.get_mut(step_id) {
683 node.metadata
684 .insert(META_INTERPRETS_ARTIFACT.into(), "true".into());
685 }
686 }
687 }
688 }
689 }
690 }
691
692 let step_env_template = matches!(step.env.as_ref(), Some(EnvSpec::Template(_)));
711 if step_env_template {
712 graph.mark_partial(
713 GapKind::Expression,
714 format!(
715 "step '{step_name}' in job '{job_name}' env: uses template expression — environment variable shape unknown"
716 ),
717 );
718 }
719
720 let mut effective_env: HashMap<String, String> = HashMap::new();
721 if let Some(env_map) = workflow.env.as_ref().and_then(EnvSpec::as_map) {
722 for (k, v) in env_map {
723 effective_env.insert(k.clone(), v.clone());
724 }
725 }
726 if let Some(env_map) = job.env.as_ref().and_then(EnvSpec::as_map) {
727 for (k, v) in env_map {
728 effective_env.insert(k.clone(), v.clone());
729 }
730 }
731 if let Some(EnvSpec::Map(env_map)) = step.env.as_ref() {
732 for (k, v) in env_map {
733 effective_env.insert(k.clone(), v.clone());
734 }
735 }
736
737 let mut effective_entries: Vec<(&String, &String)> = effective_env.iter().collect();
738 effective_entries.sort_by(|a, b| a.0.cmp(b.0));
739 if !effective_entries.is_empty() {
740 let rendered_env: Vec<String> = effective_entries
741 .iter()
742 .map(|(k, v)| format!("{k}={v}"))
743 .collect();
744 if let Some(node) = graph.nodes.get_mut(step_id) {
745 node.metadata
746 .insert(META_GHA_ENV_ASSIGNMENTS.into(), rendered_env.join("\n"));
747 }
748 }
749 for (_k, env_val) in effective_entries {
750 for secret_name in iter_secret_refs(env_val) {
754 let secret_id =
755 find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
756 graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
757 }
758 }
759
760 if let Some(ref with) = step.with {
772 let mut reads_env = false;
773 let mut entries: Vec<(&String, &serde_yaml::Value)> = with.iter().collect();
774 entries.sort_by(|a, b| a.0.cmp(b.0));
775 for (_k, val) in entries {
776 for scalar in yaml_scalar_strings(val) {
779 for secret_name in iter_secret_refs(&scalar) {
780 let secret_id =
781 find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
782 graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
783 }
784 if is_env_reference(&scalar) {
785 reads_env = true;
786 }
787 }
788 }
789 if reads_env {
790 if let Some(node) = graph.nodes.get_mut(step_id) {
791 node.metadata.insert(META_READS_ENV.into(), "true".into());
792 }
793 }
794 }
795
796 if let Some(ref run) = step.run {
800 if !run.is_empty() {
801 if let Some(node) = graph.nodes.get_mut(step_id) {
802 node.metadata.insert(META_SCRIPT_BODY.into(), run.clone());
803 }
804 }
805 }
806
807 if let Some(ref uses) = step.uses {
811 let action = uses.split('@').next().unwrap_or(uses);
812 if action == "actions/github-script" {
813 if let Some(with) = step.with.as_ref() {
814 if let Some(script) = with.get("script").and_then(yaml_scalar_to_string)
815 {
816 if !script.is_empty() {
817 if let Some(node) = graph.nodes.get_mut(step_id) {
818 node.metadata.insert(META_SCRIPT_BODY.into(), script);
819 }
820 }
821 }
822 }
823 }
824 }
825
826 if let Some(ref uses) = step.uses {
830 let action = uses.split('@').next().unwrap_or(uses);
831 let is_debug = matches!(
832 action,
833 "mxschmitt/action-tmate"
834 | "lhotari/action-upterm"
835 | "actions/tmate"
836 | "owenthereal/action-upterm"
837 | "csexton/debugger-action"
838 );
839 if is_debug {
840 if let Some(node) = graph.nodes.get_mut(step_id) {
841 node.metadata
842 .insert(META_INTERACTIVE_DEBUG.into(), uses.clone());
843 }
844 }
845 }
846
847 if let Some(ref uses) = step.uses {
852 let action = uses.split('@').next().unwrap_or(uses);
853 let is_cache = matches!(
854 action,
855 "actions/cache" | "actions/cache/save" | "actions/cache/restore"
856 );
857 if is_cache {
858 if let Some(with) = step.with.as_ref() {
859 if let Some(key) = with.get("key").and_then(yaml_scalar_to_string) {
860 if !key.is_empty() {
861 if let Some(node) = graph.nodes.get_mut(step_id) {
862 node.metadata.insert(META_CACHE_KEY.into(), key);
863 }
864 }
865 }
866 }
867 }
868 }
869
870 if let Some(ref run) = step.run {
876 let mut seen: std::collections::BTreeSet<&str> =
881 std::collections::BTreeSet::new();
882 for name in iter_secret_refs(run) {
883 seen.insert(name);
884 }
885 for secret_name in seen {
886 let secret_id =
887 find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
888 if let Some(node) = graph.nodes.get_mut(secret_id) {
890 node.metadata
891 .insert(META_INFERRED.into(), META_INFERRED_VAL.into());
892 }
893 graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
894 graph.mark_partial(
895 GapKind::Expression,
896 format!(
897 "secret '{secret_name}' referenced in run: script — inferred, not precisely mapped"
898 ),
899 );
900 }
901 }
902
903 if let Some(ref run) = step.run {
911 let writes_gate = run.contains("GITHUB_ENV") || run.contains("GITHUB_PATH");
912 if writes_gate {
913 if let Some(node) = graph.nodes.get_mut(step_id) {
914 node.metadata
915 .insert(META_WRITES_ENV_GATE.into(), "true".into());
916 }
917 }
918 if is_env_reference(run) {
924 if let Some(node) = graph.nodes.get_mut(step_id) {
925 node.metadata.insert(META_READS_ENV.into(), "true".into());
926 }
927 }
928 }
929 }
930
931 if let Some(outputs) = job.outputs.as_ref() {
936 let mut output_entries: Vec<(&String, &String)> = outputs.iter().collect();
940 output_entries.sort_by(|a, b| a.0.cmp(b.0));
941 for (out_name, out_value) in output_entries {
942 let source = classify_job_output_source(out_value, &step_oidc_by_yaml_id);
943 job_output_records.push(format!("{job_name}\t{out_name}\t{source}"));
944 }
945 }
946 }
947
948 if !job_output_records.is_empty() {
949 graph
950 .metadata
951 .insert(META_JOB_OUTPUTS.into(), job_output_records.join("|"));
952 }
953
954 let step_count = graph
961 .nodes
962 .iter()
963 .filter(|n| n.kind == NodeKind::Step)
964 .count();
965 if step_count == 0 && !workflow.jobs.is_empty() {
966 graph.mark_partial(
967 GapKind::Structural,
968 "jobs: parsed but produced 0 step nodes — possible non-GHA YAML wrong-platform-classified".to_string(),
969 );
970 }
971
972 graph.stamp_edge_authority_summaries();
973 Ok(graph)
974 }
975}
976
977fn classify_job_output_source(
983 value: &str,
984 step_oidc_by_yaml_id: &HashMap<String, bool>,
985) -> &'static str {
986 if value.contains("secrets.") {
987 return "secret";
988 }
989 let mut cursor = 0;
991 let mut saw_step_output = false;
992 while let Some(rel) = value[cursor..].find("steps.") {
993 let abs = cursor + rel + "steps.".len();
994 let rest = &value[abs..];
995 let id_end = rest
997 .find(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
998 .unwrap_or(rest.len());
999 let step_yaml_id = &rest[..id_end];
1000 if !step_yaml_id.is_empty() && rest[id_end..].starts_with(".outputs.") {
1001 saw_step_output = true;
1002 if step_oidc_by_yaml_id
1003 .get(step_yaml_id)
1004 .copied()
1005 .unwrap_or(false)
1006 {
1007 return "oidc";
1008 }
1009 }
1010 cursor = abs + id_end;
1011 }
1012 if saw_step_output {
1013 "step_output"
1014 } else {
1015 "literal"
1016 }
1017}
1018
1019pub fn is_fork_check_expression(expr: &str) -> bool {
1037 let normalised: String = expr.split_whitespace().collect::<Vec<_>>().join(" ");
1038 let lower = normalised.to_lowercase();
1039 if lower.contains("github.event.pull_request.head.repo.fork == false")
1041 || lower.contains("github.event.pull_request.head.repo.fork != true")
1042 {
1043 return true;
1044 }
1045 if lower.contains("github.event.pull_request.head.repo.full_name == github.repository")
1048 || lower.contains("github.repository == github.event.pull_request.head.repo.full_name")
1049 {
1050 return true;
1051 }
1052 false
1053}
1054
1055fn trigger_has_pull_request_target(triggers: &serde_yaml::Value) -> bool {
1056 collect_trigger_names(Some(triggers))
1057 .iter()
1058 .any(|t| t == "pull_request_target")
1059}
1060
1061fn collect_trigger_names(triggers: Option<&serde_yaml::Value>) -> Vec<String> {
1065 let mut out: Vec<String> = Vec::new();
1066 let mut push_unique = |s: &str| {
1067 if !s.is_empty() && !out.iter().any(|e| e == s) {
1068 out.push(s.to_string());
1069 }
1070 };
1071 let Some(val) = triggers else {
1072 return out;
1073 };
1074 match val {
1075 serde_yaml::Value::String(s) => push_unique(s),
1076 serde_yaml::Value::Sequence(seq) => {
1077 for v in seq {
1078 if let Some(s) = v.as_str() {
1079 push_unique(s);
1080 }
1081 }
1082 }
1083 serde_yaml::Value::Mapping(map) => {
1084 for (k, _) in map {
1085 if let Some(s) = k.as_str() {
1086 push_unique(s);
1087 }
1088 }
1089 }
1090 _ => {}
1091 }
1092 out
1093}
1094
1095fn collect_dispatch_inputs(triggers: &serde_yaml::Value) -> Vec<String> {
1099 let map = match triggers {
1100 serde_yaml::Value::Mapping(m) => m,
1101 _ => return Vec::new(),
1102 };
1103 let dispatch = match map
1104 .iter()
1105 .find(|(k, _)| k.as_str() == Some("workflow_dispatch"))
1106 {
1107 Some((_, v)) => v,
1108 None => return Vec::new(),
1109 };
1110 let inputs = match dispatch.get("inputs").and_then(|v| v.as_mapping()) {
1111 Some(m) => m,
1112 None => return Vec::new(),
1113 };
1114 inputs
1115 .iter()
1116 .filter_map(|(k, _)| k.as_str().map(str::to_string))
1117 .collect()
1118}
1119
1120fn collect_workflow_call_inputs(triggers: &serde_yaml::Value) -> Vec<String> {
1124 let map = match triggers {
1125 serde_yaml::Value::Mapping(m) => m,
1126 _ => return Vec::new(),
1127 };
1128 let call = match map
1129 .iter()
1130 .find(|(k, _)| k.as_str() == Some("workflow_call"))
1131 {
1132 Some((_, v)) => v,
1133 None => return Vec::new(),
1134 };
1135 let inputs = match call.get("inputs").and_then(|v| v.as_mapping()) {
1136 Some(m) => m,
1137 None => return Vec::new(),
1138 };
1139 inputs
1140 .iter()
1141 .filter_map(|(k, _)| k.as_str().map(str::to_string))
1142 .collect()
1143}
1144
1145fn is_self_hosted_runner(runs_on: Option<&serde_yaml::Value>) -> bool {
1153 const SH: &str = "self-hosted";
1154 let Some(val) = runs_on else {
1155 return false;
1156 };
1157 match val {
1158 serde_yaml::Value::String(s) => s == SH,
1159 serde_yaml::Value::Sequence(seq) => seq
1160 .iter()
1161 .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
1162 serde_yaml::Value::Mapping(map) => {
1163 if map.contains_key("group") {
1164 return true;
1165 }
1166 if let Some(labels) = map.get("labels") {
1167 match labels {
1168 serde_yaml::Value::String(s) => s == SH,
1169 serde_yaml::Value::Sequence(seq) => seq
1170 .iter()
1171 .any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
1172 _ => false,
1173 }
1174 } else {
1175 false
1176 }
1177 }
1178 _ => false,
1179 }
1180}
1181
1182fn runner_label(runs_on: Option<&serde_yaml::Value>) -> Option<&str> {
1186 let val = runs_on?;
1187 match val {
1188 serde_yaml::Value::String(s) => Some(s.as_str()),
1189 serde_yaml::Value::Sequence(seq) => {
1190 for v in seq {
1191 if let Some(s) = v.as_str() {
1192 if s != "self-hosted" {
1193 return Some(s);
1194 }
1195 }
1196 }
1197 seq.first().and_then(|v| v.as_str())
1198 }
1199 serde_yaml::Value::Mapping(map) => map.get("group").and_then(|v| v.as_str()),
1200 _ => None,
1201 }
1202}
1203
1204fn classify_action(uses: &str, graph: &mut AuthorityGraph) -> (TrustZone, NodeId) {
1206 let semantically_pinned = is_pin_semantically_valid(uses);
1207 let is_local = uses.starts_with("./");
1208
1209 let zone = if is_local {
1210 TrustZone::FirstParty
1211 } else if semantically_pinned {
1212 TrustZone::ThirdParty
1213 } else {
1214 TrustZone::Untrusted
1215 };
1216
1217 let mut meta = HashMap::new();
1218 if is_sha_pinned(uses) {
1221 if let Some(sha) = uses.split('@').next_back() {
1222 meta.insert(META_DIGEST.into(), sha.into());
1223 }
1224 }
1225
1226 let id = graph.add_node_with_metadata(NodeKind::Image, uses, zone, meta);
1227 (zone, id)
1228}
1229
1230fn iter_secret_refs(s: &str) -> impl Iterator<Item = &str> {
1246 SecretRefIter {
1247 src: s,
1248 cursor: 0,
1249 span_end: None,
1252 }
1253}
1254
1255struct SecretRefIter<'a> {
1256 src: &'a str,
1257 cursor: usize,
1258 span_end: Option<usize>,
1259}
1260
1261impl<'a> Iterator for SecretRefIter<'a> {
1262 type Item = &'a str;
1263
1264 fn next(&mut self) -> Option<&'a str> {
1265 loop {
1266 if self.span_end.is_none() {
1268 let rel = self.src.get(self.cursor..)?.find("${{")?;
1269 let span_start = self.cursor + rel + 3; let inner = &self.src[span_start..];
1273 let span_len = inner.find("}}").unwrap_or(inner.len());
1274 self.cursor = span_start;
1275 self.span_end = Some(span_start + span_len);
1276 }
1277 let span_end = self.span_end.expect("span_end set just above");
1278
1279 if self.cursor >= span_end {
1280 self.cursor = span_end.saturating_add(2).min(self.src.len());
1282 self.span_end = None;
1283 continue;
1284 }
1285 let window = &self.src[self.cursor..span_end];
1286 let Some(rel) = window.find("secrets.") else {
1287 self.cursor = span_end.saturating_add(2).min(self.src.len());
1288 self.span_end = None;
1289 continue;
1290 };
1291 let name_start = self.cursor + rel + "secrets.".len();
1292 let tail = &self.src[name_start..span_end];
1294 let name_len = tail
1295 .char_indices()
1296 .find(|(_, c)| !c.is_ascii_alphanumeric() && *c != '_')
1297 .map(|(i, _)| i)
1298 .unwrap_or(tail.len());
1299 self.cursor = name_start + name_len;
1302 if name_len == 0 {
1303 continue;
1305 }
1306 return Some(&self.src[name_start..name_start + name_len]);
1307 }
1308 }
1309}
1310
1311fn is_env_reference(val: &str) -> bool {
1318 if !val.contains("${{") {
1323 return false;
1324 }
1325 let mut idx = 0;
1329 while let Some(rel) = val[idx..].find("${{") {
1330 let after = &val[idx + rel + 3..];
1331 let trimmed = after.trim_start();
1332 if trimmed.starts_with("env.") {
1333 return true;
1334 }
1335 idx += rel + 3;
1336 }
1337 false
1338}
1339
1340fn find_or_create_secret(
1341 graph: &mut AuthorityGraph,
1342 cache: &mut HashMap<String, NodeId>,
1343 name: &str,
1344) -> NodeId {
1345 if let Some(&id) = cache.get(name) {
1346 return id;
1347 }
1348 let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1349 cache.insert(name.to_string(), id);
1350 id
1351}
1352
1353fn find_or_create_artifact(
1354 graph: &mut AuthorityGraph,
1355 cache: &mut HashMap<String, NodeId>,
1356 name: &str,
1357 zone: TrustZone,
1358) -> NodeId {
1359 if let Some(&id) = cache.get(name) {
1360 return id;
1361 }
1362 let id = graph.add_node(NodeKind::Artifact, name, zone);
1363 cache.insert(name.to_string(), id);
1364 id
1365}
1366
1367fn classify_cloud_auth(
1376 uses: &str,
1377 with: Option<&HashMap<String, serde_yaml::Value>>,
1378 graph: &mut AuthorityGraph,
1379) -> Option<NodeId> {
1380 let action = uses.split('@').next().unwrap_or(uses);
1382
1383 match action {
1384 "aws-actions/configure-aws-credentials" => {
1385 let w = with?;
1387 let role = w.get("role-to-assume").and_then(yaml_scalar_to_string)?;
1388 let short = role.split('/').next_back().unwrap_or(role.as_str());
1391 let mut meta = HashMap::new();
1392 meta.insert(META_OIDC.into(), "true".into());
1393 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1394 meta.insert(META_PERMISSIONS.into(), "AWS role assumption (OIDC)".into());
1395 Some(graph.add_node_with_metadata(
1396 NodeKind::Identity,
1397 format!("AWS/{short}"),
1398 TrustZone::FirstParty,
1399 meta,
1400 ))
1401 }
1402 "google-github-actions/auth" => {
1403 let w = with?;
1405 let provider = w
1406 .get("workload_identity_provider")
1407 .and_then(yaml_scalar_to_string)?;
1408 let short = provider.split('/').next_back().unwrap_or(provider.as_str());
1409 let mut meta = HashMap::new();
1410 meta.insert(META_OIDC.into(), "true".into());
1411 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1412 meta.insert(
1413 META_PERMISSIONS.into(),
1414 "GCP workload identity federation".into(),
1415 );
1416 Some(graph.add_node_with_metadata(
1417 NodeKind::Identity,
1418 format!("GCP/{short}"),
1419 TrustZone::FirstParty,
1420 meta,
1421 ))
1422 }
1423 "azure/login" => {
1424 let w = with?;
1426 let client_id = w.get("client-id").and_then(yaml_scalar_to_string)?;
1427 if w.contains_key("client-secret") {
1429 return None; }
1431 let mut meta = HashMap::new();
1432 meta.insert(META_OIDC.into(), "true".into());
1433 meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1434 meta.insert(
1435 META_PERMISSIONS.into(),
1436 "Azure federated credential (OIDC)".into(),
1437 );
1438 Some(graph.add_node_with_metadata(
1439 NodeKind::Identity,
1440 format!("Azure/{client_id}"),
1441 TrustZone::FirstParty,
1442 meta,
1443 ))
1444 }
1445 _ => None,
1446 }
1447}
1448
1449#[derive(Debug, Clone, Deserialize)]
1462#[serde(untagged)]
1463pub enum Permissions {
1464 String(String),
1465 Map(BTreeMap<String, String>),
1466}
1467
1468#[derive(Debug, Clone, Deserialize)]
1480#[serde(untagged)]
1481pub enum EnvSpec {
1482 #[serde(deserialize_with = "deserialize_env_map")]
1483 Map(HashMap<String, String>),
1484 Template(String),
1485}
1486
1487fn deserialize_env_map<'de, D>(deserializer: D) -> Result<HashMap<String, String>, D::Error>
1496where
1497 D: serde::Deserializer<'de>,
1498{
1499 use serde::de::Error;
1500 let raw: HashMap<String, serde_yaml::Value> = HashMap::deserialize(deserializer)?;
1501 let mut out = HashMap::with_capacity(raw.len());
1502 for (k, v) in raw {
1503 let s = match v {
1504 serde_yaml::Value::String(s) => s,
1505 serde_yaml::Value::Bool(b) => b.to_string(),
1506 serde_yaml::Value::Number(n) => n.to_string(),
1507 serde_yaml::Value::Null => String::new(),
1508 other => {
1512 return Err(D::Error::custom(format!(
1513 "env value for `{k}` is not a scalar: {other:?}"
1514 )))
1515 }
1516 };
1517 out.insert(k, s);
1518 }
1519 Ok(out)
1520}
1521
1522fn yaml_scalar_to_string(value: &serde_yaml::Value) -> Option<String> {
1523 match value {
1524 serde_yaml::Value::String(s) => Some(s.clone()),
1525 serde_yaml::Value::Bool(b) => Some(b.to_string()),
1526 serde_yaml::Value::Number(n) => Some(n.to_string()),
1527 serde_yaml::Value::Null => Some(String::new()),
1528 _ => None,
1529 }
1530}
1531
1532fn yaml_value_compact(value: &serde_yaml::Value) -> Option<String> {
1533 match value {
1534 serde_yaml::Value::Sequence(seq) => {
1535 let parts: Vec<String> = seq.iter().filter_map(yaml_scalar_to_string).collect();
1536 if parts.is_empty() {
1537 None
1538 } else {
1539 Some(parts.join(","))
1540 }
1541 }
1542 serde_yaml::Value::Mapping(map) => {
1543 let mut parts: Vec<String> = map
1544 .iter()
1545 .filter_map(|(k, v)| {
1546 Some(format!(
1547 "{}={}",
1548 yaml_scalar_to_string(k)?,
1549 yaml_value_compact(v)?
1550 ))
1551 })
1552 .collect();
1553 parts.sort();
1554 if parts.is_empty() {
1555 None
1556 } else {
1557 Some(parts.join(","))
1558 }
1559 }
1560 scalar => yaml_scalar_to_string(scalar),
1561 }
1562}
1563
1564fn combined_condition(job_if: Option<&str>, step_if: Option<&str>) -> Option<String> {
1565 match (job_if, step_if) {
1566 (Some(job), Some(step)) if !job.is_empty() && !step.is_empty() => {
1567 Some(format!("{job} AND {step}"))
1568 }
1569 (Some(job), _) if !job.is_empty() => Some(job.to_string()),
1570 (_, Some(step)) if !step.is_empty() => Some(step.to_string()),
1571 _ => None,
1572 }
1573}
1574
1575fn yaml_scalar_strings(value: &serde_yaml::Value) -> Vec<String> {
1576 match value {
1577 serde_yaml::Value::Sequence(seq) => seq.iter().filter_map(yaml_scalar_to_string).collect(),
1578 serde_yaml::Value::Mapping(map) => map.values().filter_map(yaml_scalar_to_string).collect(),
1579 scalar => yaml_scalar_to_string(scalar).into_iter().collect(),
1580 }
1581}
1582
1583impl EnvSpec {
1584 pub fn as_map(&self) -> Option<&HashMap<String, String>> {
1587 match self {
1588 EnvSpec::Map(m) => Some(m),
1589 EnvSpec::Template(_) => None,
1590 }
1591 }
1592
1593 pub fn as_template(&self) -> Option<&str> {
1595 match self {
1596 EnvSpec::Template(s) => Some(s.as_str()),
1597 EnvSpec::Map(_) => None,
1598 }
1599 }
1600}
1601
1602impl std::fmt::Display for Permissions {
1603 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1604 match self {
1605 Permissions::String(s) => write!(f, "{s}"),
1606 Permissions::Map(m) => {
1607 let parts: Vec<String> = m.iter().map(|(k, v)| format!("{k}: {v}")).collect();
1608 write!(f, "{{ {} }}", parts.join(", "))
1609 }
1610 }
1611 }
1612}
1613
1614#[derive(Debug, Deserialize)]
1615pub struct GhaWorkflow {
1616 #[serde(rename = "on", default)]
1618 pub triggers: Option<serde_yaml::Value>,
1619 #[serde(default)]
1620 pub permissions: Option<Permissions>,
1621 #[serde(default)]
1625 pub env: Option<EnvSpec>,
1626 #[serde(default)]
1627 pub jobs: HashMap<String, GhaJob>,
1628}
1629
1630#[derive(Debug, Deserialize)]
1632#[serde(untagged)]
1633pub enum ContainerConfig {
1634 Image(String),
1635 Full {
1636 image: String,
1637 #[serde(default)]
1638 options: Option<String>,
1639 },
1640}
1641
1642impl ContainerConfig {
1643 pub fn image(&self) -> &str {
1644 match self {
1645 ContainerConfig::Image(s) => s,
1646 ContainerConfig::Full { image, .. } => image,
1647 }
1648 }
1649
1650 pub fn options(&self) -> Option<&str> {
1651 match self {
1652 ContainerConfig::Image(_) => None,
1653 ContainerConfig::Full { options, .. } => options.as_deref(),
1654 }
1655 }
1656}
1657
1658#[derive(Debug, Deserialize)]
1659pub struct GhaJob {
1660 #[serde(default)]
1661 pub permissions: Option<Permissions>,
1662 #[serde(default)]
1666 pub env: Option<EnvSpec>,
1667 #[serde(default)]
1668 pub steps: Vec<GhaStep>,
1669 #[serde(default)]
1671 pub uses: Option<String>,
1672 #[serde(rename = "with", default)]
1674 pub with: Option<HashMap<String, serde_yaml::Value>>,
1675 #[serde(default)]
1680 pub secrets: Option<serde_yaml::Value>,
1681 #[serde(default)]
1683 pub container: Option<ContainerConfig>,
1684 #[serde(default)]
1687 pub strategy: Option<serde_yaml::Value>,
1688 #[serde(rename = "runs-on", default)]
1691 pub runs_on: Option<serde_yaml::Value>,
1692 #[serde(default)]
1697 pub outputs: Option<HashMap<String, String>>,
1698 #[serde(rename = "if", default)]
1704 pub if_cond: Option<String>,
1705}
1706
1707#[derive(Debug, Deserialize)]
1708pub struct GhaStep {
1709 pub name: Option<String>,
1710 pub id: Option<String>,
1714 pub uses: Option<String>,
1715 pub run: Option<String>,
1716 #[serde(default)]
1720 pub env: Option<EnvSpec>,
1721 #[serde(rename = "with", default)]
1722 pub with: Option<HashMap<String, serde_yaml::Value>>,
1723 #[serde(rename = "if", default)]
1726 pub if_cond: Option<String>,
1727}
1728
1729#[cfg(test)]
1730mod tests {
1731 use super::*;
1732
1733 fn parse(yaml: &str) -> AuthorityGraph {
1734 let parser = GhaParser;
1735 let source = PipelineSource {
1736 file: "test.yml".into(),
1737 repo: None,
1738 git_ref: None,
1739 commit_sha: None,
1740 };
1741 parser.parse(yaml, &source).unwrap()
1742 }
1743
1744 #[test]
1745 fn parses_simple_workflow() {
1746 let yaml = r#"
1747permissions: write-all
1748jobs:
1749 build:
1750 steps:
1751 - name: Checkout
1752 uses: actions/checkout@v4
1753 - name: Build
1754 run: make build
1755"#;
1756 let graph = parse(yaml);
1757 assert!(graph.nodes.len() >= 3); }
1759
1760 #[test]
1761 fn detects_secret_in_env() {
1762 let yaml = r#"
1763jobs:
1764 deploy:
1765 steps:
1766 - name: Deploy
1767 run: ./deploy.sh
1768 env:
1769 AWS_KEY: "${{ secrets.AWS_ACCESS_KEY_ID }}"
1770"#;
1771 let graph = parse(yaml);
1772 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1773 assert_eq!(secrets.len(), 1);
1774 assert_eq!(secrets[0].name, "AWS_ACCESS_KEY_ID");
1775 }
1776
1777 #[test]
1778 fn classifies_unpinned_action_as_untrusted() {
1779 let yaml = r#"
1780jobs:
1781 ci:
1782 steps:
1783 - uses: actions/checkout@v4
1784"#;
1785 let graph = parse(yaml);
1786 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1787 assert_eq!(images.len(), 1);
1788 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
1789 }
1790
1791 #[test]
1792 fn classifies_sha_pinned_action_as_third_party() {
1793 let yaml = r#"
1794jobs:
1795 ci:
1796 steps:
1797 - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
1798"#;
1799 let graph = parse(yaml);
1800 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1801 assert_eq!(images.len(), 1);
1802 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
1803 }
1804
1805 #[test]
1806 fn classifies_local_action_as_first_party() {
1807 let yaml = r#"
1808jobs:
1809 ci:
1810 steps:
1811 - uses: ./.github/actions/my-action
1812"#;
1813 let graph = parse(yaml);
1814 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1815 assert_eq!(images.len(), 1);
1816 assert_eq!(images[0].trust_zone, TrustZone::FirstParty);
1817 }
1818
1819 #[test]
1820 fn detects_secret_in_with() {
1821 let yaml = r#"
1822jobs:
1823 deploy:
1824 steps:
1825 - name: Publish
1826 uses: some-org/publish@v1
1827 with:
1828 token: "${{ secrets.NPM_TOKEN }}"
1829"#;
1830 let graph = parse(yaml);
1831 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1832 assert_eq!(secrets.len(), 1);
1833 assert_eq!(secrets[0].name, "NPM_TOKEN");
1834 }
1835
1836 #[test]
1837 fn uses_step_records_action_and_scalar_with_inputs() {
1838 let yaml = r#"
1839jobs:
1840 deploy:
1841 steps:
1842 - uses: aws-actions/amazon-ecr-login@v2
1843 with:
1844 mask-password: false
1845 registries: "123456789012"
1846"#;
1847 let graph = parse(yaml);
1848 let step = graph
1849 .nodes_of_kind(NodeKind::Step)
1850 .find(|n| n.name == "deploy[0]")
1851 .expect("uses step");
1852 assert_eq!(
1853 step.metadata.get(META_GHA_ACTION).map(String::as_str),
1854 Some("aws-actions/amazon-ecr-login")
1855 );
1856 let inputs = step
1857 .metadata
1858 .get(META_GHA_WITH_INPUTS)
1859 .expect("with inputs");
1860 assert!(inputs.contains("mask-password=false"));
1861 assert!(inputs.contains("registries=123456789012"));
1862 }
1863
1864 #[test]
1865 fn parser_stamps_new_exploit_rule_metadata() {
1866 let yaml = r#"
1867on:
1868 workflow_call:
1869 inputs:
1870 image:
1871 type: string
1872jobs:
1873 call:
1874 uses: org/repo/.github/workflows/reuse.yml@main
1875 runs-on: ${{ inputs.runner }}
1876 secrets: inherit
1877 with:
1878 image: ${{ inputs.image }}
1879 deploy:
1880 runs-on: [ubuntu-latest]
1881 if: ${{ needs.plan.outputs.pr_run_mode == 'upload' }}
1882 env:
1883 NODE_OPTIONS: --require=./hook.js
1884 container:
1885 image: ${{ inputs.image }}
1886 options: --privileged
1887 steps:
1888 - name: Publish
1889 if: ${{ github.event_name == 'push' }}
1890 run: npm publish
1891"#;
1892 let graph = parse(yaml);
1893 assert_eq!(
1894 graph
1895 .metadata
1896 .get(META_GHA_WORKFLOW_CALL_INPUTS)
1897 .map(String::as_str),
1898 Some("image")
1899 );
1900
1901 let call = graph
1902 .nodes_of_kind(NodeKind::Step)
1903 .find(|n| n.name == "call")
1904 .expect("synthetic reusable call step");
1905 assert_eq!(
1906 call.metadata.get(META_SECRETS_INHERIT).map(String::as_str),
1907 Some("true")
1908 );
1909 assert!(
1910 call.metadata
1911 .get(META_GHA_WITH_INPUTS)
1912 .map(|v| v.contains("image=${{ inputs.image }}"))
1913 .unwrap_or(false),
1914 "reusable-call with inputs should be stamped"
1915 );
1916 assert_eq!(
1917 call.metadata.get(META_GHA_RUNS_ON).map(String::as_str),
1918 Some("${{ inputs.runner }}")
1919 );
1920
1921 let publish = graph
1922 .nodes_of_kind(NodeKind::Step)
1923 .find(|n| n.name == "Publish")
1924 .expect("publish step");
1925 assert!(
1926 publish
1927 .metadata
1928 .get(META_GHA_ENV_ASSIGNMENTS)
1929 .map(|v| v.contains("NODE_OPTIONS=--require=./hook.js"))
1930 .unwrap_or(false),
1931 "effective env assignments should be stamped on steps"
1932 );
1933 assert_eq!(
1934 publish.metadata.get(META_CONDITION).map(String::as_str),
1935 Some("${{ needs.plan.outputs.pr_run_mode == 'upload' }} AND ${{ github.event_name == 'push' }}")
1936 );
1937
1938 let container = graph
1939 .nodes_of_kind(NodeKind::Image)
1940 .find(|n| n.metadata.get(META_CONTAINER).map(String::as_str) == Some("true"))
1941 .expect("container image node");
1942 assert_eq!(
1943 container
1944 .metadata
1945 .get(META_GHA_CONTAINER_OPTIONS)
1946 .map(String::as_str),
1947 Some("--privileged")
1948 );
1949 }
1950
1951 #[test]
1952 fn with_non_scalar_values_do_not_fail_parse() {
1953 let yaml = r#"
1954jobs:
1955 check:
1956 steps:
1957 - name: Label
1958 uses: actions/github-script@v7
1959 with:
1960 script: |
1961 core.info("ok")
1962 labels:
1963 - bug
1964 - ci
1965 token: "${{ secrets.GITHUB_TOKEN }}"
1966"#;
1967 let graph = parse(yaml);
1968 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1969 assert!(
1970 secrets.iter().any(|s| s.name == "GITHUB_TOKEN"),
1971 "scalar values inside with: must still be scanned for secrets"
1972 );
1973 }
1974
1975 #[test]
1976 fn inferred_secret_in_run_block_detected() {
1977 let yaml = r#"
1978jobs:
1979 deploy:
1980 steps:
1981 - name: Deploy
1982 run: |
1983 curl -H "Authorization: ${{ secrets.API_TOKEN }}" https://api.example.com
1984"#;
1985 let graph = parse(yaml);
1986 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1987 assert_eq!(secrets.len(), 1);
1988 assert_eq!(secrets[0].name, "API_TOKEN");
1989 assert_eq!(
1990 secrets[0].metadata.get(META_INFERRED),
1991 Some(&"true".to_string())
1992 );
1993 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1994 assert!(!graph.completeness_gaps.is_empty());
1995 assert!(
1998 graph.completeness_gap_kinds.contains(&GapKind::Expression),
1999 "inferred secret in run: must record an Expression-kind gap, got: {:?}",
2000 graph.completeness_gap_kinds
2001 );
2002 }
2003
2004 #[test]
2005 fn job_level_env_inherited_by_steps() {
2006 let yaml = r#"
2007jobs:
2008 build:
2009 env:
2010 DB_PASSWORD: "${{ secrets.DB_PASSWORD }}"
2011 steps:
2012 - name: Step A
2013 run: echo "a"
2014 - name: Step B
2015 run: echo "b"
2016"#;
2017 let graph = parse(yaml);
2018 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2019 assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
2020
2021 let secret_id = secrets[0].id;
2023 let accessing_steps = graph
2024 .edges_to(secret_id)
2025 .filter(|e| e.kind == EdgeKind::HasAccessTo)
2026 .count();
2027 assert_eq!(accessing_steps, 2, "both steps inherit job-level env");
2028 }
2029
2030 #[test]
2031 fn identity_scope_set_on_token() {
2032 let yaml = r#"
2033permissions: write-all
2034jobs:
2035 ci:
2036 steps:
2037 - run: echo hi
2038"#;
2039 let graph = parse(yaml);
2040 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2041 assert_eq!(identities.len(), 1);
2042 assert_eq!(
2043 identities[0].metadata.get(META_IDENTITY_SCOPE),
2044 Some(&"broad".to_string())
2045 );
2046 }
2047
2048 #[test]
2049 fn constrained_identity_scope() {
2050 let yaml = r#"
2051permissions:
2052 contents: read
2053jobs:
2054 ci:
2055 steps:
2056 - run: echo hi
2057"#;
2058 let graph = parse(yaml);
2059 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2060 assert_eq!(identities.len(), 1);
2061 assert_eq!(
2062 identities[0].metadata.get(META_IDENTITY_SCOPE),
2063 Some(&"constrained".to_string())
2064 );
2065 }
2066
2067 #[test]
2068 fn pull_request_target_string_trigger_marks_run_steps_untrusted() {
2069 let yaml = r#"
2070on: pull_request_target
2071jobs:
2072 check:
2073 steps:
2074 - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
2075 with:
2076 ref: ${{ github.event.pull_request.head.sha }}
2077 - run: npm test
2078"#;
2079 let graph = parse(yaml);
2080 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2081 assert_eq!(steps.len(), 2);
2082
2083 let run_step = steps.iter().find(|s| s.name.contains("check[1]")).unwrap();
2085 assert_eq!(
2086 run_step.trust_zone,
2087 TrustZone::Untrusted,
2088 "run: step in pull_request_target workflow should be Untrusted"
2089 );
2090
2091 let checkout_step = steps.iter().find(|s| s.name.contains("check[0]")).unwrap();
2093 assert_eq!(checkout_step.trust_zone, TrustZone::ThirdParty);
2094 }
2095
2096 #[test]
2097 fn pull_request_target_sequence_trigger_marks_run_steps_untrusted() {
2098 let yaml = r#"
2099on: [push, pull_request_target]
2100jobs:
2101 ci:
2102 steps:
2103 - run: echo hi
2104"#;
2105 let graph = parse(yaml);
2106 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2107 assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
2108 }
2109
2110 #[test]
2111 fn pull_request_target_mapping_trigger_marks_run_steps_untrusted() {
2112 let yaml = r#"
2113on:
2114 pull_request_target:
2115 types: [opened, synchronize]
2116jobs:
2117 ci:
2118 steps:
2119 - run: echo hi
2120"#;
2121 let graph = parse(yaml);
2122 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2123 assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
2124 }
2125
2126 #[test]
2127 fn push_trigger_does_not_mark_run_steps_untrusted() {
2128 let yaml = r#"
2129on: push
2130jobs:
2131 ci:
2132 steps:
2133 - run: echo hi
2134"#;
2135 let graph = parse(yaml);
2136 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2137 assert_eq!(
2138 steps[0].trust_zone,
2139 TrustZone::FirstParty,
2140 "push-triggered run: steps should remain FirstParty"
2141 );
2142 }
2143
2144 #[test]
2145 fn workflow_level_env_inherited_by_all_steps() {
2146 let yaml = r#"
2147env:
2148 DB_URL: "${{ secrets.DATABASE_URL }}"
2149jobs:
2150 build:
2151 steps:
2152 - name: Step A
2153 run: echo "a"
2154 test:
2155 steps:
2156 - name: Step B
2157 run: echo "b"
2158"#;
2159 let graph = parse(yaml);
2160 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2161 assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
2162
2163 let secret_id = secrets[0].id;
2165 let accessing_steps = graph
2166 .edges_to(secret_id)
2167 .filter(|e| e.kind == EdgeKind::HasAccessTo)
2168 .count();
2169 assert_eq!(accessing_steps, 2, "both steps inherit workflow-level env");
2170 }
2171
2172 #[test]
2173 fn matrix_strategy_marks_graph_partial() {
2174 let yaml = r#"
2175jobs:
2176 test:
2177 strategy:
2178 matrix:
2179 os: [ubuntu-latest, windows-latest, macos-latest]
2180 steps:
2181 - run: echo hi
2182"#;
2183 let graph = parse(yaml);
2184 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2185 assert!(
2186 graph.completeness_gaps.iter().any(|g| g.contains("matrix")),
2187 "matrix strategy should be recorded as a completeness gap"
2188 );
2189 assert!(
2192 graph.completeness_gap_kinds.contains(&GapKind::Expression),
2193 "matrix strategy must record an Expression-kind gap, got: {:?}",
2194 graph.completeness_gap_kinds
2195 );
2196 }
2197
2198 #[test]
2199 fn job_without_matrix_does_not_mark_partial() {
2200 let yaml = r#"
2201jobs:
2202 build:
2203 steps:
2204 - run: cargo build
2205"#;
2206 let graph = parse(yaml);
2207 assert_eq!(graph.completeness, AuthorityCompleteness::Complete);
2208 }
2209
2210 #[test]
2211 fn reusable_workflow_creates_image_and_marks_partial() {
2212 let yaml = r#"
2213jobs:
2214 call:
2215 uses: org/repo/.github/workflows/deploy.yml@main
2216"#;
2217 let graph = parse(yaml);
2218 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2219 assert_eq!(images.len(), 1);
2220 assert_eq!(images[0].name, "org/repo/.github/workflows/deploy.yml@main");
2221 assert_eq!(images[0].trust_zone, TrustZone::Untrusted); let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2225 assert_eq!(steps.len(), 1);
2226 assert_eq!(steps[0].name, "call");
2227
2228 let delegates: Vec<_> = graph
2230 .edges_from(steps[0].id)
2231 .filter(|e| e.kind == EdgeKind::DelegatesTo)
2232 .collect();
2233 assert_eq!(delegates.len(), 1);
2234
2235 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2236 assert!(
2240 graph.completeness_gap_kinds.contains(&GapKind::Structural),
2241 "reusable workflow must record a Structural-kind gap, got: {:?}",
2242 graph.completeness_gap_kinds
2243 );
2244 }
2245
2246 #[test]
2247 fn reusable_workflow_sha_pinned_is_third_party() {
2248 let yaml = r#"
2249jobs:
2250 call:
2251 uses: org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29
2252"#;
2253 let graph = parse(yaml);
2254 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2255 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
2256 }
2257
2258 #[test]
2259 fn container_unpinned_creates_image_node_untrusted() {
2260 let yaml = r#"
2261jobs:
2262 build:
2263 container: ubuntu:22.04
2264 steps:
2265 - run: echo hi
2266"#;
2267 let graph = parse(yaml);
2268 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2269 assert_eq!(images.len(), 1);
2270 assert_eq!(images[0].name, "ubuntu:22.04");
2271 assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
2272 assert_eq!(
2273 images[0].metadata.get(META_CONTAINER),
2274 Some(&"true".to_string())
2275 );
2276 }
2277
2278 #[test]
2279 fn container_digest_pinned_creates_image_node_third_party() {
2280 let yaml = r#"
2281jobs:
2282 build:
2283 container:
2284 image: "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
2285 steps:
2286 - run: echo hi
2287"#;
2288 let graph = parse(yaml);
2289 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2290 assert_eq!(images.len(), 1);
2291 assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
2292 assert_eq!(
2293 images[0].metadata.get(META_CONTAINER),
2294 Some(&"true".to_string())
2295 );
2296 }
2297
2298 #[test]
2299 fn oidc_permission_tags_identity_with_meta_oidc() {
2300 let yaml = r#"
2301permissions:
2302 id-token: write
2303 contents: read
2304jobs:
2305 ci:
2306 steps:
2307 - run: echo hi
2308"#;
2309 let graph = parse(yaml);
2310 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2311 assert_eq!(identities.len(), 1);
2312 assert_eq!(
2313 identities[0].metadata.get(META_OIDC),
2314 Some(&"true".to_string()),
2315 "id-token: write should mark identity as OIDC-capable"
2316 );
2317 }
2318
2319 #[test]
2320 fn non_oidc_permission_does_not_tag_meta_oidc() {
2321 let yaml = r#"
2322permissions:
2323 contents: read
2324jobs:
2325 ci:
2326 steps:
2327 - run: echo hi
2328"#;
2329 let graph = parse(yaml);
2330 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2331 assert_eq!(identities.len(), 1);
2332 assert!(
2333 !identities[0].metadata.contains_key(META_OIDC),
2334 "contents:read should not tag as OIDC"
2335 );
2336 }
2337
2338 #[test]
2339 fn contents_write_without_id_token_does_not_tag_oidc() {
2340 let yaml = r#"
2343permissions:
2344 contents: write
2345jobs:
2346 ci:
2347 steps:
2348 - run: echo hi
2349"#;
2350 let graph = parse(yaml);
2351 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2352 assert_eq!(identities.len(), 1);
2353 assert!(
2354 !identities[0].metadata.contains_key(META_OIDC),
2355 "contents:write without id-token must not be tagged OIDC"
2356 );
2357 }
2358
2359 #[test]
2360 fn write_all_permission_tags_identity_as_oidc() {
2361 let yaml = r#"
2363permissions: write-all
2364jobs:
2365 ci:
2366 steps:
2367 - run: echo hi
2368"#;
2369 let graph = parse(yaml);
2370 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2371 assert_eq!(identities.len(), 1);
2372 assert_eq!(
2373 identities[0].metadata.get(META_OIDC),
2374 Some(&"true".to_string()),
2375 "write-all grants all permissions including id-token: write"
2376 );
2377 }
2378
2379 #[test]
2380 fn container_steps_linked_to_container_image() {
2381 let yaml = r#"
2382jobs:
2383 build:
2384 container: ubuntu:22.04
2385 steps:
2386 - name: Step A
2387 run: echo "a"
2388 - name: Step B
2389 run: echo "b"
2390"#;
2391 let graph = parse(yaml);
2392 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2393 assert_eq!(images.len(), 1);
2394 let container_id = images[0].id;
2395
2396 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2398 assert_eq!(steps.len(), 2);
2399 for step in &steps {
2400 let links: Vec<_> = graph
2401 .edges_from(step.id)
2402 .filter(|e| e.kind == EdgeKind::UsesImage && e.to == container_id)
2403 .collect();
2404 assert_eq!(
2405 links.len(),
2406 1,
2407 "step '{}' must link to container",
2408 step.name
2409 );
2410 }
2411 }
2412
2413 #[test]
2414 fn container_authority_propagates_to_floating_image() {
2415 let yaml = r#"
2418permissions: write-all
2419jobs:
2420 build:
2421 container: ubuntu:22.04
2422 steps:
2423 - run: echo hi
2424"#;
2425 use taudit_core::propagation::DEFAULT_MAX_HOPS;
2426 use taudit_core::rules;
2427 let graph = parse(yaml);
2428 let findings = rules::run_all_rules(&graph, DEFAULT_MAX_HOPS);
2429 assert!(
2431 findings
2432 .iter()
2433 .any(|f| f.category == taudit_core::finding::FindingCategory::AuthorityPropagation),
2434 "authority should propagate from step to floating container"
2435 );
2436 }
2437
2438 #[test]
2439 fn aws_oidc_creates_identity_node() {
2440 let yaml = r#"
2441jobs:
2442 deploy:
2443 steps:
2444 - name: Configure AWS credentials
2445 uses: aws-actions/configure-aws-credentials@v4
2446 with:
2447 role-to-assume: arn:aws:iam::123456789012:role/my-deploy-role
2448 aws-region: us-east-1
2449"#;
2450 let graph = parse(yaml);
2451 let identities: Vec<_> = graph
2452 .nodes_of_kind(NodeKind::Identity)
2453 .filter(|n| n.name != "GITHUB_TOKEN")
2454 .collect();
2455 assert_eq!(identities.len(), 1);
2456 assert_eq!(identities[0].name, "AWS/my-deploy-role");
2458 assert_eq!(
2459 identities[0].metadata.get(META_OIDC),
2460 Some(&"true".to_string())
2461 );
2462 assert_eq!(
2463 identities[0].metadata.get(META_IDENTITY_SCOPE),
2464 Some(&"broad".to_string())
2465 );
2466 }
2467
2468 #[test]
2469 fn gcp_oidc_creates_identity_node() {
2470 let yaml = r#"
2471jobs:
2472 deploy:
2473 steps:
2474 - name: Authenticate to GCP
2475 uses: google-github-actions/auth@v2
2476 with:
2477 workload_identity_provider: projects/123/locations/global/workloadIdentityPools/my-pool/providers/my-provider
2478 service_account: my-sa@my-project.iam.gserviceaccount.com
2479"#;
2480 let graph = parse(yaml);
2481 let identities: Vec<_> = graph
2482 .nodes_of_kind(NodeKind::Identity)
2483 .filter(|n| n.name != "GITHUB_TOKEN")
2484 .collect();
2485 assert_eq!(identities.len(), 1);
2486 assert!(identities[0].name.starts_with("GCP/"));
2487 assert_eq!(
2488 identities[0].metadata.get(META_OIDC),
2489 Some(&"true".to_string())
2490 );
2491 }
2492
2493 #[test]
2494 fn azure_oidc_creates_identity_node() {
2495 let yaml = r#"
2496jobs:
2497 deploy:
2498 steps:
2499 - name: Azure login
2500 uses: azure/login@v2
2501 with:
2502 client-id: ${{ vars.AZURE_CLIENT_ID }}
2503 tenant-id: ${{ vars.AZURE_TENANT_ID }}
2504 subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
2505"#;
2506 let graph = parse(yaml);
2507 let identities: Vec<_> = graph
2508 .nodes_of_kind(NodeKind::Identity)
2509 .filter(|n| n.name != "GITHUB_TOKEN")
2510 .collect();
2511 assert_eq!(identities.len(), 1);
2512 assert!(identities[0].name.starts_with("Azure/"));
2513 assert_eq!(
2514 identities[0].metadata.get(META_OIDC),
2515 Some(&"true".to_string())
2516 );
2517 }
2518
2519 #[test]
2520 fn azure_static_sp_does_not_create_identity_node() {
2521 let yaml = r#"
2524jobs:
2525 deploy:
2526 steps:
2527 - name: Azure login
2528 uses: azure/login@v2
2529 with:
2530 client-id: my-client-id
2531 client-secret: ${{ secrets.AZURE_CLIENT_SECRET }}
2532 tenant-id: my-tenant
2533"#;
2534 let graph = parse(yaml);
2535 let identities: Vec<_> = graph
2537 .nodes_of_kind(NodeKind::Identity)
2538 .filter(|n| n.name != "GITHUB_TOKEN")
2539 .collect();
2540 assert!(
2541 identities.is_empty(),
2542 "static SP should not create an OIDC Identity node"
2543 );
2544 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2546 assert_eq!(secrets.len(), 1);
2547 assert_eq!(secrets[0].name, "AZURE_CLIENT_SECRET");
2548 }
2549
2550 #[test]
2551 fn aws_static_creds_do_not_create_identity_node() {
2552 let yaml = r#"
2555jobs:
2556 deploy:
2557 steps:
2558 - uses: aws-actions/configure-aws-credentials@v4
2559 with:
2560 aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
2561 aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
2562 aws-region: us-east-1
2563"#;
2564 let graph = parse(yaml);
2565 let identities: Vec<_> = graph
2566 .nodes_of_kind(NodeKind::Identity)
2567 .filter(|n| n.name != "GITHUB_TOKEN")
2568 .collect();
2569 assert!(
2570 identities.is_empty(),
2571 "static AWS creds must not create Identity node"
2572 );
2573 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2574 assert_eq!(secrets.len(), 2, "both static secrets captured");
2575 }
2576
2577 #[test]
2578 fn pull_request_target_sets_meta_trigger_on_graph() {
2579 let yaml = r#"
2580on: pull_request_target
2581jobs:
2582 ci:
2583 steps:
2584 - run: echo hi
2585"#;
2586 let graph = parse(yaml);
2587 assert_eq!(
2588 graph.metadata.get(META_TRIGGER),
2589 Some(&"pull_request_target".to_string())
2590 );
2591 }
2592
2593 #[test]
2594 fn github_env_write_in_run_sets_meta_writes_env_gate() {
2595 let yaml = r#"
2596jobs:
2597 build:
2598 steps:
2599 - name: Set version
2600 run: echo "VERSION=1.0" >> $GITHUB_ENV
2601"#;
2602 let graph = parse(yaml);
2603 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2604 assert_eq!(steps.len(), 1);
2605 assert_eq!(
2606 steps[0].metadata.get(META_WRITES_ENV_GATE),
2607 Some(&"true".to_string()),
2608 "run: with >> $GITHUB_ENV must mark META_WRITES_ENV_GATE"
2609 );
2610 }
2611
2612 #[test]
2613 fn attest_action_sets_meta_attests() {
2614 let yaml = r#"
2615jobs:
2616 release:
2617 steps:
2618 - name: Attest
2619 uses: actions/attest-build-provenance@v1
2620 with:
2621 subject-path: dist/*
2622"#;
2623 let graph = parse(yaml);
2624 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2625 assert_eq!(steps.len(), 1);
2626 assert_eq!(
2627 steps[0].metadata.get(META_ATTESTS),
2628 Some(&"true".to_string())
2629 );
2630 }
2631
2632 #[test]
2633 fn self_hosted_string_runs_on_creates_image_with_self_hosted_metadata() {
2634 let yaml = r#"
2635jobs:
2636 build:
2637 runs-on: self-hosted
2638 steps:
2639 - run: echo hi
2640"#;
2641 let graph = parse(yaml);
2642 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2643 let runner = images
2644 .iter()
2645 .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
2646 .expect("self-hosted runner Image node must be created");
2647 assert_eq!(
2648 runner.metadata.get(META_SELF_HOSTED),
2649 Some(&"true".to_string())
2650 );
2651 }
2652
2653 #[test]
2654 fn self_hosted_in_sequence_runs_on_creates_image_with_self_hosted_metadata() {
2655 let yaml = r#"
2656jobs:
2657 build:
2658 runs-on: [self-hosted, linux, x64]
2659 steps:
2660 - run: echo hi
2661"#;
2662 let graph = parse(yaml);
2663 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2664 let runner = images
2665 .iter()
2666 .find(|i| i.metadata.contains_key(META_SELF_HOSTED))
2667 .expect("self-hosted runner Image node must be created");
2668 assert_eq!(
2669 runner.metadata.get(META_SELF_HOSTED),
2670 Some(&"true".to_string())
2671 );
2672 }
2673
2674 #[test]
2675 fn hosted_runner_does_not_create_self_hosted_image() {
2676 let yaml = r#"
2677jobs:
2678 build:
2679 runs-on: ubuntu-latest
2680 steps:
2681 - run: echo hi
2682"#;
2683 let graph = parse(yaml);
2684 let self_hosted_images: Vec<_> = graph
2685 .nodes_of_kind(NodeKind::Image)
2686 .filter(|i| i.metadata.contains_key(META_SELF_HOSTED))
2687 .collect();
2688 assert!(
2689 self_hosted_images.is_empty(),
2690 "hosted runner must not produce a self-hosted Image node"
2691 );
2692 }
2693
2694 #[test]
2695 fn actions_checkout_step_tagged_with_meta_checkout_self() {
2696 let yaml = r#"
2697jobs:
2698 ci:
2699 steps:
2700 - uses: actions/checkout@v4
2701 - run: echo hi
2702"#;
2703 let graph = parse(yaml);
2704 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2705 let checkout_step = steps
2706 .iter()
2707 .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
2708 .expect("actions/checkout step must be tagged META_CHECKOUT_SELF");
2709 assert_eq!(
2710 checkout_step.metadata.get(META_CHECKOUT_SELF),
2711 Some(&"true".to_string())
2712 );
2713 }
2714
2715 #[test]
2716 fn actions_checkout_sha_pinned_also_tagged() {
2717 let yaml = r#"
2718jobs:
2719 ci:
2720 steps:
2721 - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
2722"#;
2723 let graph = parse(yaml);
2724 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2725 assert_eq!(steps.len(), 1);
2726 assert_eq!(
2727 steps[0].metadata.get(META_CHECKOUT_SELF),
2728 Some(&"true".to_string()),
2729 "SHA-pinned checkout must still be tagged — rule gates on trigger context"
2730 );
2731 }
2732
2733 #[test]
2734 fn non_checkout_uses_not_tagged_checkout_self() {
2735 let yaml = r#"
2736jobs:
2737 ci:
2738 steps:
2739 - uses: some-org/other-action@v1
2740"#;
2741 let graph = parse(yaml);
2742 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2743 assert_eq!(steps.len(), 1);
2744 assert!(
2745 !steps[0].metadata.contains_key(META_CHECKOUT_SELF),
2746 "non-checkout uses: must not be tagged"
2747 );
2748 }
2749
2750 fn make_temp_dir(label: &str) -> std::path::PathBuf {
2754 use std::sync::atomic::{AtomicU64, Ordering};
2755 static COUNTER: AtomicU64 = AtomicU64::new(0);
2756 let n = COUNTER.fetch_add(1, Ordering::Relaxed);
2757 let dir = std::env::temp_dir().join(format!(
2758 "taudit-gha-test-{}-{}-{}",
2759 std::process::id(),
2760 n,
2761 label
2762 ));
2763 let _ = std::fs::remove_dir_all(&dir);
2764 std::fs::create_dir_all(&dir).expect("create temp dir");
2765 dir
2766 }
2767
2768 fn parse_at(yaml: &str, file: &str) -> AuthorityGraph {
2769 let parser = GhaParser;
2770 let source = PipelineSource {
2771 file: file.into(),
2772 repo: None,
2773 git_ref: None,
2774 commit_sha: None,
2775 };
2776 parser.parse(yaml, &source).unwrap()
2777 }
2778
2779 #[test]
2780 fn composite_action_reference_marks_graph_partial_without_inlining() {
2781 let dir = make_temp_dir("composite-no-inline");
2786 let workflows_dir = dir.join(".github/workflows");
2787 let action_dir = dir.join(".github/actions/my-action");
2788 std::fs::create_dir_all(&workflows_dir).unwrap();
2789 std::fs::create_dir_all(&action_dir).unwrap();
2790
2791 let action_yml = r#"
2793name: My Action
2794runs:
2795 using: composite
2796 steps:
2797 - name: Install deps
2798 run: npm install
2799 shell: bash
2800"#;
2801 std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2802
2803 let workflow = r#"
2804jobs:
2805 ci:
2806 steps:
2807 - name: Run my action
2808 uses: ./.github/actions/my-action
2809"#;
2810 let workflow_path = workflows_dir.join("ci.yml");
2811 std::fs::write(&workflow_path, workflow).unwrap();
2812
2813 let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2814
2815 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2817 assert_eq!(steps.len(), 1, "no composite-action step inlining");
2818
2819 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2821 assert!(
2822 graph.completeness_gap_kinds.contains(&GapKind::Structural),
2823 "local action reference must record a Structural-kind gap, got: {:?}",
2824 graph.completeness_gap_kinds
2825 );
2826 assert!(
2827 graph
2828 .completeness_gaps
2829 .iter()
2830 .any(|g| g.contains("composite action not resolved")
2831 && g.contains("./.github/actions/my-action")),
2832 "gap reason must name the action and explain non-resolution, got: {:?}",
2833 graph.completeness_gaps
2834 );
2835
2836 let _ = std::fs::remove_dir_all(&dir);
2837 }
2838
2839 #[test]
2840 fn missing_action_yml_marks_graph_partial() {
2841 let dir = make_temp_dir("missing-action");
2845 let workflows_dir = dir.join(".github/workflows");
2846 std::fs::create_dir_all(&workflows_dir).unwrap();
2847
2848 let workflow = r#"
2849jobs:
2850 ci:
2851 steps:
2852 - uses: ./.github/actions/missing-action
2853"#;
2854 let workflow_path = workflows_dir.join("ci.yml");
2855 std::fs::write(&workflow_path, workflow).unwrap();
2856
2857 let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2858
2859 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2860 assert!(
2861 graph.completeness_gaps.iter().any(
2862 |g| g.contains("composite action not resolved") && g.contains("missing-action")
2863 ),
2864 "missing local action must be recorded as a completeness gap, got: {:?}",
2865 graph.completeness_gaps
2866 );
2867 assert!(
2868 graph.completeness_gap_kinds.contains(&GapKind::Structural),
2869 "unresolved composite action must record a Structural-kind gap, got: {:?}",
2870 graph.completeness_gap_kinds
2871 );
2872
2873 let _ = std::fs::remove_dir_all(&dir);
2874 }
2875
2876 #[test]
2877 fn non_composite_local_action_marks_graph_partial() {
2878 let dir = make_temp_dir("non-composite");
2882 let workflows_dir = dir.join(".github/workflows");
2883 let action_dir = dir.join(".github/actions/docker-action");
2884 std::fs::create_dir_all(&workflows_dir).unwrap();
2885 std::fs::create_dir_all(&action_dir).unwrap();
2886
2887 let action_yml = r#"
2888name: Docker Action
2889runs:
2890 using: docker
2891 image: Dockerfile
2892"#;
2893 std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2894
2895 let workflow = r#"
2896jobs:
2897 ci:
2898 steps:
2899 - uses: ./.github/actions/docker-action
2900"#;
2901 let workflow_path = workflows_dir.join("ci.yml");
2902 std::fs::write(&workflow_path, workflow).unwrap();
2903
2904 let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2905
2906 assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2907 assert!(
2908 graph.completeness_gap_kinds.contains(&GapKind::Structural),
2909 "local action reference must record a Structural-kind gap, got: {:?}",
2910 graph.completeness_gap_kinds
2911 );
2912
2913 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2915 assert_eq!(steps.len(), 1, "must not inline any sub-steps");
2916
2917 let _ = std::fs::remove_dir_all(&dir);
2918 }
2919
2920 #[test]
2921 fn composite_action_secrets_not_captured_after_partial_marking() {
2922 let dir = make_temp_dir("composite-secrets-hidden");
2927 let workflows_dir = dir.join(".github/workflows");
2928 let action_dir = dir.join(".github/actions/deploy");
2929 std::fs::create_dir_all(&workflows_dir).unwrap();
2930 std::fs::create_dir_all(&action_dir).unwrap();
2931
2932 let action_yml = r#"
2933name: Deploy
2934runs:
2935 using: composite
2936 steps:
2937 - name: Push
2938 run: |
2939 curl -H "Authorization: ${{ secrets.DEPLOY_TOKEN }}" https://example.com
2940 shell: bash
2941"#;
2942 std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
2943
2944 let workflow = r#"
2945jobs:
2946 release:
2947 steps:
2948 - uses: ./.github/actions/deploy
2949"#;
2950 let workflow_path = workflows_dir.join("release.yml");
2951 std::fs::write(&workflow_path, workflow).unwrap();
2952
2953 let graph = parse_at(workflow, workflow_path.to_str().unwrap());
2954
2955 let secret_names: Vec<_> = graph
2956 .nodes_of_kind(NodeKind::Secret)
2957 .map(|s| s.name.as_str())
2958 .collect();
2959 assert!(
2960 !secret_names.contains(&"DEPLOY_TOKEN"),
2961 "secret hidden inside composite action must NOT leak into the graph, got: {secret_names:?}"
2962 );
2963 assert_eq!(
2964 graph.completeness,
2965 AuthorityCompleteness::Partial,
2966 "composite action reference must mark graph Partial"
2967 );
2968
2969 let _ = std::fs::remove_dir_all(&dir);
2970 }
2971
2972 #[test]
2973 fn step_env_literal_shadows_workflow_level_secret() {
2974 let yaml = r#"
2984on: pull_request_target
2985env:
2986 TOKEN: ${{ secrets.PROD_TOKEN }}
2987jobs:
2988 build:
2989 steps:
2990 - run: ./scan.sh
2991 env:
2992 TOKEN: literal-non-secret
2993"#;
2994 let graph = parse(yaml);
2995
2996 let prod_token_id = graph
3000 .nodes_of_kind(NodeKind::Secret)
3001 .find(|n| n.name == "PROD_TOKEN")
3002 .map(|n| n.id);
3003
3004 if let Some(secret_id) = prod_token_id {
3005 let leaks = graph
3006 .edges_to(secret_id)
3007 .filter(|e| e.kind == EdgeKind::HasAccessTo)
3008 .count();
3009 assert_eq!(
3010 leaks, 0,
3011 "step-level env literal must shadow workflow-level secret — \
3012 expected 0 HasAccessTo edges to PROD_TOKEN, found {leaks}"
3013 );
3014 }
3015 }
3016
3017 #[test]
3018 fn step_env_secret_shadows_workflow_level_secret() {
3019 let yaml = r#"
3023on: pull_request_target
3024env:
3025 TOKEN: ${{ secrets.PROD_TOKEN }}
3026jobs:
3027 build:
3028 steps:
3029 - run: ./scan.sh
3030 env:
3031 TOKEN: ${{ secrets.STAGING_TOKEN }}
3032"#;
3033 let graph = parse(yaml);
3034
3035 let secret_names: Vec<_> = graph
3036 .nodes_of_kind(NodeKind::Secret)
3037 .map(|s| s.name.clone())
3038 .collect();
3039
3040 assert!(
3042 secret_names.contains(&"STAGING_TOKEN".to_string()),
3043 "shadowing secret must be in the graph, got: {secret_names:?}"
3044 );
3045
3046 let prod_id = graph
3047 .nodes_of_kind(NodeKind::Secret)
3048 .find(|n| n.name == "PROD_TOKEN")
3049 .map(|n| n.id);
3050 if let Some(prod_id) = prod_id {
3051 let leaks = graph
3052 .edges_to(prod_id)
3053 .filter(|e| e.kind == EdgeKind::HasAccessTo)
3054 .count();
3055 assert_eq!(
3056 leaks, 0,
3057 "step-level env secret must shadow workflow-level secret \
3058 (no HasAccessTo edge to PROD_TOKEN), found {leaks}"
3059 );
3060 }
3061 }
3062
3063 #[test]
3064 fn composite_action_resolution_does_not_depend_on_cwd() {
3065 let dir = make_temp_dir("cwd-independence");
3076 let workflows_dir = dir.join(".github/workflows");
3077 let action_dir = dir.join(".github/actions/x");
3078 std::fs::create_dir_all(&workflows_dir).unwrap();
3079 std::fs::create_dir_all(&action_dir).unwrap();
3080
3081 let action_yml = r#"
3082name: X
3083runs:
3084 using: composite
3085 steps:
3086 - run: echo hi
3087 shell: bash
3088"#;
3089 std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
3090
3091 let workflow = r#"
3092jobs:
3093 ci:
3094 steps:
3095 - uses: ./.github/actions/x
3096"#;
3097 let workflow_path = workflows_dir.join("ci.yml");
3098 std::fs::write(&workflow_path, workflow).unwrap();
3099
3100 let prev_cwd = std::env::current_dir().ok();
3102 std::env::set_current_dir(&dir).unwrap();
3103 let graph_inside = parse_at(workflow, ".github/workflows/ci.yml");
3104 if let Some(p) = prev_cwd {
3105 std::env::set_current_dir(p).unwrap();
3106 }
3107
3108 let abs_workflow_path = workflow_path.to_str().unwrap().to_string();
3110 let graph_outside = parse_at(workflow, &abs_workflow_path);
3111
3112 assert_eq!(
3115 graph_inside.completeness,
3116 AuthorityCompleteness::Partial,
3117 "graph parsed from inside the worktree must be Partial"
3118 );
3119 assert_eq!(
3120 graph_outside.completeness,
3121 AuthorityCompleteness::Partial,
3122 "graph parsed from outside the worktree must be Partial"
3123 );
3124 assert_eq!(
3126 graph_inside.completeness, graph_outside.completeness,
3127 "CWD-relative vs absolute pipeline_file must produce identical completeness"
3128 );
3129 assert_eq!(
3131 graph_inside.nodes_of_kind(NodeKind::Step).count(),
3132 1,
3133 "inside parse must not inline composite sub-steps"
3134 );
3135 assert_eq!(
3136 graph_outside.nodes_of_kind(NodeKind::Step).count(),
3137 1,
3138 "outside parse must not inline composite sub-steps"
3139 );
3140
3141 let _ = std::fs::remove_dir_all(&dir);
3142 }
3143
3144 #[test]
3145 fn workflow_level_permissions_create_identity() {
3146 let yaml = r#"
3147permissions: write-all
3148jobs:
3149 ci:
3150 steps:
3151 - run: echo hi
3152"#;
3153 let graph = parse(yaml);
3154 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
3155 assert_eq!(identities.len(), 1);
3156 assert_eq!(identities[0].name, "GITHUB_TOKEN");
3157 assert_eq!(
3158 identities[0].metadata.get(META_PERMISSIONS).unwrap(),
3159 "write-all"
3160 );
3161 }
3162
3163 #[test]
3164 fn omitted_workflow_permissions_create_unknown_implicit_identity() {
3165 let yaml = r#"
3166jobs:
3167 ci:
3168 steps:
3169 - run: echo hi
3170"#;
3171 let graph = parse(yaml);
3172 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
3173 assert_eq!(identities.len(), 1);
3174 assert_eq!(identities[0].name, "GITHUB_TOKEN");
3175 assert_eq!(
3176 identities[0].metadata.get(META_IDENTITY_SCOPE).unwrap(),
3177 "unknown"
3178 );
3179 assert_eq!(identities[0].metadata.get(META_IMPLICIT).unwrap(), "true");
3180 }
3181
3182 #[test]
3183 fn job_env_template_expression_does_not_crash_and_marks_partial() {
3184 let yaml = r#"
3191jobs:
3192 unit-tests:
3193 env: ${{ matrix }}
3194 steps:
3195 - run: pytest
3196"#;
3197 let graph = parse(yaml);
3198 assert!(
3200 matches!(graph.completeness, AuthorityCompleteness::Partial),
3201 "graph must be marked Partial when env: is a template expression"
3202 );
3203 let saw_template_gap = graph
3204 .completeness_gaps
3205 .iter()
3206 .any(|g| g.contains("env:") && g.contains("template"));
3207 assert!(
3208 saw_template_gap,
3209 "completeness_gaps must mention env: template, got: {:?}",
3210 graph.completeness_gaps
3211 );
3212 assert!(
3215 graph.completeness_gap_kinds.contains(&GapKind::Expression),
3216 "job-level env: template must record an Expression-kind gap, got: {:?}",
3217 graph.completeness_gap_kinds
3218 );
3219 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3221 assert_eq!(steps.len(), 1, "the single step must still be parsed");
3222 }
3223
3224 #[test]
3225 fn env_with_non_string_scalar_values_parses() {
3226 let yaml = r#"
3233jobs:
3234 test:
3235 env:
3236 RUST_BACKTRACE: 1
3237 COVERAGE: false
3238 TARGET_FLAGS:
3239 CARGO: cargo
3240 steps:
3241 - run: cargo test
3242"#;
3243 let graph = parse(yaml);
3244 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3246 assert_eq!(steps.len(), 1, "expected the single step to parse");
3247 assert!(
3249 !matches!(graph.completeness, AuthorityCompleteness::Partial)
3250 || !graph
3251 .completeness_gaps
3252 .iter()
3253 .any(|g| g.contains("env:") && g.contains("template")),
3254 "non-string env values must not mark the graph Partial via the env-template path"
3255 );
3256 }
3257
3258 #[test]
3259 fn step_env_with_boolean_and_integer_values_parses() {
3260 let yaml = r#"
3262jobs:
3263 build:
3264 steps:
3265 - name: build
3266 run: make
3267 env:
3268 DEBUG: true
3269 RETRIES: 3
3270 OPTIONAL_FLAG:
3271"#;
3272 let graph = parse(yaml);
3273 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3274 assert_eq!(steps.len(), 1);
3275 }
3276
3277 #[test]
3278 fn meta_job_name_set_on_step_nodes() {
3279 let yaml = r#"
3280jobs:
3281 build:
3282 steps:
3283 - name: Checkout
3284 uses: actions/checkout@v4
3285 - name: Compile
3286 run: make build
3287"#;
3288 let graph = parse(yaml);
3289 let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3290 assert!(!steps.is_empty(), "expected at least one Step node");
3291 for step in &steps {
3292 assert_eq!(
3293 step.metadata.get(META_JOB_NAME).map(String::as_str),
3294 Some("build"),
3295 "Step {:?} missing META_JOB_NAME=build",
3296 step.name
3297 );
3298 }
3299 }
3300
3301 #[test]
3304 fn jobs_without_steps_marks_partial() {
3305 let yaml = r#"
3312on:
3313 push:
3314jobs:
3315 build:
3316 runs-on: ubuntu-latest
3317"#;
3318 let graph = parse(yaml);
3319 let step_count = graph
3320 .nodes
3321 .iter()
3322 .filter(|n| n.kind == NodeKind::Step)
3323 .count();
3324 assert_eq!(step_count, 0, "no steps: present means 0 Step nodes");
3325 assert_eq!(
3326 graph.completeness,
3327 AuthorityCompleteness::Partial,
3328 "0-step-nodes despite non-empty jobs: must mark Partial"
3329 );
3330 assert!(
3331 graph
3332 .completeness_gaps
3333 .iter()
3334 .any(|g| g.contains("0 step nodes")),
3335 "completeness_gaps must mention 0 step nodes: {:?}",
3336 graph.completeness_gaps
3337 );
3338 assert!(
3342 graph.completeness_gap_kinds.contains(&GapKind::Structural),
3343 "0-step-nodes gap must be Structural, got: {:?}",
3344 graph.completeness_gap_kinds
3345 );
3346 }
3347
3348 #[test]
3349 fn empty_workflow_no_jobs_does_not_mark_partial_for_zero_steps() {
3350 let yaml = "name: empty\non:\n push:\n";
3354 let graph = parse(yaml);
3355 let zero_step_gap = graph
3356 .completeness_gaps
3357 .iter()
3358 .any(|g| g.contains("0 step nodes"));
3359 assert!(
3360 !zero_step_gap,
3361 "no jobs: in source means no 0-step gap reason; got: {:?}",
3362 graph.completeness_gaps
3363 );
3364 }
3365
3366 #[test]
3369 fn all_zero_sha_action_is_untrusted() {
3370 let yaml = r#"
3371jobs:
3372 ci:
3373 steps:
3374 - uses: actions/setup-python@0000000000000000000000000000000000000000
3375"#;
3376 let graph = parse(yaml);
3377 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3378 assert_eq!(images.len(), 1);
3379 assert_eq!(
3380 images[0].trust_zone,
3381 TrustZone::Untrusted,
3382 "all-zero SHA must be classified as Untrusted, not ThirdParty"
3383 );
3384 }
3385
3386 #[test]
3387 fn real_sha_pinned_action_is_third_party() {
3388 let yaml = r#"
3390jobs:
3391 ci:
3392 steps:
3393 - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
3394"#;
3395 let graph = parse(yaml);
3396 let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3397 assert_eq!(images.len(), 1);
3398 assert_eq!(
3399 images[0].trust_zone,
3400 TrustZone::ThirdParty,
3401 "legitimate SHA-pinned action must be classified as ThirdParty"
3402 );
3403 }
3404
3405 #[test]
3406 fn upload_artifact_creates_produces_edge() {
3407 let yaml = r#"
3408permissions:
3409 contents: read
3410jobs:
3411 build:
3412 steps:
3413 - uses: actions/upload-artifact@v4
3414 with:
3415 name: my-dist
3416 path: ./dist
3417"#;
3418 let graph = parse(yaml);
3419 let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3420 assert_eq!(
3421 artifacts.len(),
3422 1,
3423 "upload-artifact should create one Artifact node"
3424 );
3425 assert_eq!(artifacts[0].name, "my-dist");
3426 let produces_edges: Vec<_> = graph
3427 .edges
3428 .iter()
3429 .filter(|e| e.kind == EdgeKind::Produces && e.to == artifacts[0].id)
3430 .collect();
3431 assert_eq!(
3432 produces_edges.len(),
3433 1,
3434 "upload step must have Produces edge to artifact"
3435 );
3436 }
3437
3438 #[test]
3439 fn download_artifact_creates_consumes_edge() {
3440 let yaml = r#"
3441jobs:
3442 deploy:
3443 steps:
3444 - uses: actions/download-artifact@v4
3445 with:
3446 name: my-dist
3447"#;
3448 let graph = parse(yaml);
3449 let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3450 assert_eq!(
3451 artifacts.len(),
3452 1,
3453 "download-artifact should create one Artifact node"
3454 );
3455 let consumes_edges: Vec<_> = graph
3456 .edges
3457 .iter()
3458 .filter(|e| e.kind == EdgeKind::Consumes && e.from == artifacts[0].id)
3459 .collect();
3460 assert_eq!(
3461 consumes_edges.len(),
3462 1,
3463 "download step must have Consumes edge from artifact"
3464 );
3465 }
3466
3467 #[test]
3468 fn upload_download_same_name_share_artifact_node() {
3469 let yaml = r#"
3470permissions:
3471 contents: read
3472jobs:
3473 build:
3474 steps:
3475 - uses: actions/upload-artifact@v4
3476 with:
3477 name: shared-dist
3478 path: ./dist
3479 deploy:
3480 steps:
3481 - uses: actions/download-artifact@v4
3482 with:
3483 name: shared-dist
3484"#;
3485 let graph = parse(yaml);
3486 let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3487 assert_eq!(
3488 artifacts.len(),
3489 1,
3490 "same artifact name must reuse the same Artifact node"
3491 );
3492 let produces: Vec<_> = graph
3493 .edges
3494 .iter()
3495 .filter(|e| e.kind == EdgeKind::Produces)
3496 .collect();
3497 let consumes: Vec<_> = graph
3498 .edges
3499 .iter()
3500 .filter(|e| e.kind == EdgeKind::Consumes)
3501 .collect();
3502 assert_eq!(produces.len(), 1, "one Produces edge");
3503 assert_eq!(consumes.len(), 1, "one Consumes edge");
3504 assert_eq!(produces[0].to, artifacts[0].id);
3505 assert_eq!(consumes[0].from, artifacts[0].id);
3506 }
3507
3508 #[test]
3509 fn upload_artifact_without_name_creates_no_edge() {
3510 let yaml = r#"
3514jobs:
3515 build:
3516 steps:
3517 - uses: actions/upload-artifact@v4
3518 with:
3519 path: ./dist
3520"#;
3521 let graph = parse(yaml);
3522 let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
3523 assert!(
3524 artifacts.is_empty(),
3525 "upload-artifact without name: must not create an Artifact node; got: {artifacts:#?}"
3526 );
3527 let produces: Vec<_> = graph
3528 .edges
3529 .iter()
3530 .filter(|e| e.kind == EdgeKind::Produces)
3531 .collect();
3532 assert!(
3533 produces.is_empty(),
3534 "upload-artifact without name: must not create a Produces edge"
3535 );
3536 }
3537
3538 #[test]
3539 fn download_artifact_without_name_creates_no_edge() {
3540 let yaml = r#"
3544jobs:
3545 deploy:
3546 steps:
3547 - uses: actions/download-artifact@v4
3548"#;
3549 let graph = parse(yaml);
3550 let consumes: Vec<_> = graph
3551 .edges
3552 .iter()
3553 .filter(|e| e.kind == EdgeKind::Consumes)
3554 .collect();
3555 assert!(
3556 consumes.is_empty(),
3557 "download-artifact without name: must not create a Consumes edge"
3558 );
3559 }
3560
3561 #[test]
3566 fn secret_extractor_ignores_literal_substrings_outside_template_spans() {
3567 let yaml = r#"
3568jobs:
3569 deploy:
3570 steps:
3571 - name: Mixed shell + template
3572 run: |
3573 # loads /etc/secrets.conf
3574 cp $SECRETS_DIR/secrets.json /tmp/
3575 curl -H "Authorization: ${{ secrets.REAL_TOKEN }}" https://api.example.com
3576"#;
3577 let graph = parse(yaml);
3578 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3579 assert_eq!(
3580 secrets.len(),
3581 1,
3582 "only `REAL_TOKEN` should be a Secret node — phantoms `conf`/`json` must not appear; got: {:?}",
3583 secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
3584 );
3585 assert_eq!(secrets[0].name, "REAL_TOKEN");
3586 }
3587
3588 #[test]
3593 fn secret_extractor_handles_tight_template_spacing() {
3594 let yaml = r#"
3595jobs:
3596 deploy:
3597 steps:
3598 - name: Tight template
3599 run: echo "x"
3600 env:
3601 TOK: "${{secrets.TIGHT}}"
3602"#;
3603 let graph = parse(yaml);
3604 let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3605 assert_eq!(secrets.len(), 1);
3606 assert_eq!(secrets[0].name, "TIGHT");
3607 let secret_id = secrets[0].id;
3608 let edges = graph
3609 .edges_to(secret_id)
3610 .filter(|e| e.kind == EdgeKind::HasAccessTo)
3611 .count();
3612 assert_eq!(
3613 edges, 1,
3614 "tight `${{{{secrets.X}}}}` must produce HasAccessTo edge"
3615 );
3616 }
3617
3618 #[test]
3622 fn secret_extractor_finds_all_secrets_in_concatenated_value() {
3623 let yaml = r#"
3624jobs:
3625 deploy:
3626 steps:
3627 - name: Concatenated
3628 run: echo "x"
3629 env:
3630 COMBINED: "${{ secrets.A }}-${{ secrets.B }}"
3631"#;
3632 let graph = parse(yaml);
3633 let secret_names: std::collections::BTreeSet<&str> = graph
3634 .nodes_of_kind(NodeKind::Secret)
3635 .map(|n| n.name.as_str())
3636 .collect();
3637 assert!(secret_names.contains("A"), "secret A must be detected");
3638 assert!(secret_names.contains("B"), "secret B must be detected");
3639 assert_eq!(
3640 secret_names.len(),
3641 2,
3642 "exactly two secrets, got: {secret_names:?}"
3643 );
3644 for name in ["A", "B"] {
3646 let id = graph
3647 .nodes_of_kind(NodeKind::Secret)
3648 .find(|n| n.name == name)
3649 .expect("secret node")
3650 .id;
3651 let edges = graph
3652 .edges_to(id)
3653 .filter(|e| e.kind == EdgeKind::HasAccessTo)
3654 .count();
3655 assert!(edges >= 1, "missing HasAccessTo edge for secret {name}");
3656 }
3657 }
3658
3659 #[test]
3663 fn reusable_workflow_secrets_mapping_form_propagates_edges() {
3664 let yaml = r#"
3665jobs:
3666 call:
3667 uses: ./.github/workflows/reusable.yml
3668 secrets:
3669 CHILD: ${{ secrets.PARENT }}
3670 OTHER: ${{ secrets.SECONDARY }}
3671"#;
3672 let graph = parse(yaml);
3673 let secret_names: std::collections::BTreeSet<&str> = graph
3674 .nodes_of_kind(NodeKind::Secret)
3675 .map(|n| n.name.as_str())
3676 .collect();
3677 assert!(
3678 secret_names.contains("PARENT"),
3679 "secrets: mapping value `${{{{ secrets.PARENT }}}}` must produce a Secret node; got: {secret_names:?}"
3680 );
3681 assert!(
3682 secret_names.contains("SECONDARY"),
3683 "secrets: mapping must iterate ALL keys, not just the first; got: {secret_names:?}"
3684 );
3685 let parent_id = graph
3687 .nodes_of_kind(NodeKind::Secret)
3688 .find(|n| n.name == "PARENT")
3689 .unwrap()
3690 .id;
3691 let edges = graph
3692 .edges_to(parent_id)
3693 .filter(|e| e.kind == EdgeKind::HasAccessTo)
3694 .count();
3695 assert!(edges >= 1, "synthetic step must HasAccessTo PARENT");
3696 }
3697
3698 #[test]
3704 fn reusable_workflow_synthetic_step_inherits_workflow_env_secrets() {
3705 let yaml = r#"
3706env:
3707 GLOBAL_TOKEN: "${{ secrets.GLOBAL }}"
3708jobs:
3709 call:
3710 uses: ./.github/workflows/reusable.yml
3711"#;
3712 let graph = parse(yaml);
3713 let global = graph
3714 .nodes_of_kind(NodeKind::Secret)
3715 .find(|n| n.name == "GLOBAL");
3716 assert!(
3717 global.is_some(),
3718 "workflow.env secret `GLOBAL` must produce a Secret node visible to the synthetic step"
3719 );
3720 let global_id = global.unwrap().id;
3721 let edges = graph
3722 .edges_to(global_id)
3723 .filter(|e| e.kind == EdgeKind::HasAccessTo)
3724 .count();
3725 assert!(
3726 edges >= 1,
3727 "synthetic step for reusable workflow must inherit workflow.env HasAccessTo edge"
3728 );
3729 }
3730
3731 #[test]
3736 fn gha_meta_job_outputs_is_deterministic_across_runs() {
3737 let yaml = r#"
3738jobs:
3739 emit:
3740 runs-on: ubuntu-latest
3741 outputs:
3742 zebra: literal-z
3743 apple: literal-a
3744 mango: literal-m
3745 kilo: literal-k
3746 foxtrot: literal-f
3747 steps:
3748 - run: echo hi
3749"#;
3750 let mut prev: Option<String> = None;
3751 for i in 0..9 {
3752 let graph = parse(yaml);
3753 let cur = graph
3754 .metadata
3755 .get(META_JOB_OUTPUTS)
3756 .cloned()
3757 .unwrap_or_default();
3758 assert!(
3759 !cur.is_empty(),
3760 "META_JOB_OUTPUTS must be populated on a workflow with outputs"
3761 );
3762 if let Some(p) = &prev {
3763 assert_eq!(
3764 p, &cur,
3765 "META_JOB_OUTPUTS drifted on run {i}: {p:?} vs {cur:?}"
3766 );
3767 }
3768 prev = Some(cur);
3769 }
3770 }
3771
3772 #[test]
3777 fn gha_meta_permissions_is_deterministic_across_runs() {
3778 let yaml = r#"
3779permissions:
3780 contents: read
3781 id-token: write
3782 packages: write
3783 actions: read
3784 pull-requests: write
3785jobs:
3786 ci:
3787 steps:
3788 - run: echo hi
3789"#;
3790 let mut prev: Option<String> = None;
3791 for i in 0..9 {
3792 let graph = parse(yaml);
3793 let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
3794 assert_eq!(identities.len(), 1, "one GITHUB_TOKEN identity");
3795 let cur = identities[0]
3796 .metadata
3797 .get(META_PERMISSIONS)
3798 .cloned()
3799 .expect("META_PERMISSIONS must be stamped");
3800 if let Some(p) = &prev {
3801 assert_eq!(
3802 p, &cur,
3803 "META_PERMISSIONS drifted on run {i}: {p:?} vs {cur:?}"
3804 );
3805 }
3806 prev = Some(cur);
3807 }
3808 }
3809}