use std::collections::{BTreeMap, HashMap};
use serde::Deserialize;
use taudit_core::error::TauditError;
use taudit_core::graph::*;
use taudit_core::ports::PipelineParser;
const META_INFERRED_VAL: &str = "true";
pub struct GhaParser;
impl PipelineParser for GhaParser {
fn platform(&self) -> &str {
"github-actions"
}
fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
let mut de = serde_yaml::Deserializer::from_str(content);
let doc = de
.next()
.ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
let workflow: GhaWorkflow = GhaWorkflow::deserialize(doc)
.map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
let extra_docs = de.next().is_some();
let mut graph = AuthorityGraph::new(source.clone());
graph
.metadata
.insert(META_PLATFORM.into(), "github-actions".into());
if workflow.permissions.is_none() {
graph
.metadata
.insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
}
if extra_docs {
graph.mark_partial(
GapKind::Expression,
"file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
);
}
let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
let mut artifact_ids: HashMap<String, NodeId> = HashMap::new();
if let Some(EnvSpec::Template(_)) = workflow.env {
graph.mark_partial(
GapKind::Expression,
"workflow-level env: uses template expression — environment variable shape unknown"
.to_string(),
);
}
let is_pull_request_target = workflow
.triggers
.as_ref()
.map(trigger_has_pull_request_target)
.unwrap_or(false);
let trigger_list = collect_trigger_names(workflow.triggers.as_ref());
if !trigger_list.is_empty() {
let mut ordered: Vec<&str> = Vec::new();
if trigger_list.iter().any(|t| t == "pull_request_target") {
ordered.push("pull_request_target");
}
for t in &trigger_list {
if t != "pull_request_target" {
ordered.push(t);
}
}
let value = if ordered.len() == 1 {
ordered[0].to_string()
} else {
ordered.join(",")
};
graph.metadata.insert(META_TRIGGER.into(), value);
} else if is_pull_request_target {
graph
.metadata
.insert(META_TRIGGER.into(), "pull_request_target".into());
}
if let Some(triggers) = workflow.triggers.as_ref() {
let names = collect_trigger_names(Some(triggers));
if !names.is_empty() {
graph.metadata.insert(META_TRIGGERS.into(), names.join(","));
}
let inputs = collect_dispatch_inputs(triggers);
if !inputs.is_empty() {
graph
.metadata
.insert(META_DISPATCH_INPUTS.into(), inputs.join(","));
}
let call_inputs = collect_workflow_call_inputs(triggers);
if !call_inputs.is_empty() {
graph
.metadata
.insert(META_GHA_WORKFLOW_CALL_INPUTS.into(), call_inputs.join(","));
}
}
let token_id = if let Some(ref perms) = workflow.permissions {
let perm_string = perms.to_string();
let scope = IdentityScope::from_permissions(&perm_string);
let mut meta = HashMap::new();
meta.insert(META_PERMISSIONS.into(), perm_string.clone());
meta.insert(
META_IDENTITY_SCOPE.into(),
format!("{scope:?}").to_lowercase(),
);
if perm_string.contains("id-token: write") || perm_string == "write-all" {
meta.insert(META_OIDC.into(), "true".into());
}
Some(graph.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN",
TrustZone::FirstParty,
meta,
))
} else {
let mut meta = HashMap::new();
meta.insert(META_IDENTITY_SCOPE.into(), "unknown".into());
meta.insert(META_IMPLICIT.into(), "true".into());
Some(graph.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN",
TrustZone::FirstParty,
meta,
))
};
let mut job_output_records: Vec<String> = Vec::new();
let mut sorted_jobs: Vec<(&String, &GhaJob)> = workflow.jobs.iter().collect();
sorted_jobs.sort_by(|a, b| a.0.cmp(b.0));
for (job_name, job) in sorted_jobs {
let mut step_oidc_by_yaml_id: HashMap<String, bool> = HashMap::new();
if let Some(EnvSpec::Template(_)) = job.env {
graph.mark_partial(
GapKind::Expression,
format!(
"job '{job_name}' env: uses template expression — environment variable shape unknown"
),
);
}
let job_token_id = if let Some(ref perms) = job.permissions {
let perm_string = perms.to_string();
let scope = IdentityScope::from_permissions(&perm_string);
let mut meta = HashMap::new();
meta.insert(META_PERMISSIONS.into(), perm_string.clone());
meta.insert(
META_IDENTITY_SCOPE.into(),
format!("{scope:?}").to_lowercase(),
);
if perm_string.contains("id-token: write") {
meta.insert(META_OIDC.into(), "true".into());
}
Some(graph.add_node_with_metadata(
NodeKind::Identity,
format!("GITHUB_TOKEN ({job_name})"),
TrustZone::FirstParty,
meta,
))
} else {
token_id
};
if let Some(ref uses) = job.uses {
let trust_zone = if is_pin_semantically_valid(uses) {
TrustZone::ThirdParty
} else {
TrustZone::Untrusted
};
let rw_id = graph.add_node(NodeKind::Image, uses, trust_zone);
let job_step_id = graph.add_node(NodeKind::Step, job_name, TrustZone::FirstParty);
if let Some(node) = graph.nodes.get_mut(job_step_id) {
node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
node.metadata.insert(
META_GHA_ACTION.into(),
uses.split('@').next().unwrap_or(uses).into(),
);
if let Some(runs_on) = job.runs_on.as_ref().and_then(yaml_value_compact) {
node.metadata.insert(META_GHA_RUNS_ON.into(), runs_on);
}
let condition = combined_condition(job.if_cond.as_deref(), None);
if let Some(condition) = condition {
node.metadata.insert(META_CONDITION.into(), condition);
}
if let Some(with) = job.with.as_ref() {
let mut entries: Vec<(&String, &serde_yaml::Value)> = with.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
let rendered: Vec<String> = entries
.into_iter()
.filter_map(|(key, value)| {
yaml_scalar_to_string(value).map(|scalar| format!("{key}={scalar}"))
})
.collect();
if !rendered.is_empty() {
node.metadata
.insert(META_GHA_WITH_INPUTS.into(), rendered.join("\n"));
}
}
if let Some(serde_yaml::Value::String(s)) = job.secrets.as_ref() {
if s == "inherit" {
node.metadata
.insert(META_SECRETS_INHERIT.into(), "true".into());
}
}
}
graph.add_edge(job_step_id, rw_id, EdgeKind::DelegatesTo);
if let Some(tok_id) = job_token_id {
graph.add_edge(job_step_id, tok_id, EdgeKind::HasAccessTo);
}
if let Some(env_map) = workflow.env.as_ref().and_then(EnvSpec::as_map) {
let mut entries: Vec<(&String, &String)> = env_map.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
for (_k, env_val) in entries {
for secret_name in iter_secret_refs(env_val) {
let secret_id =
find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
graph.add_edge(job_step_id, secret_id, EdgeKind::HasAccessTo);
}
}
}
if let Some(serde_yaml::Value::Mapping(map)) = job.secrets.as_ref() {
let mut entries: Vec<(&str, &str)> = map
.iter()
.filter_map(|(k, v)| Some((k.as_str()?, v.as_str()?)))
.collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
for (_child_name, val) in entries {
for secret_name in iter_secret_refs(val) {
let secret_id =
find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
graph.add_edge(job_step_id, secret_id, EdgeKind::HasAccessTo);
}
}
}
graph.mark_partial(
GapKind::Structural,
format!(
"reusable workflow '{uses}' in job '{job_name}' cannot be resolved inline — authority within the called workflow is unknown"
),
);
continue;
}
if job
.strategy
.as_ref()
.and_then(|s| s.get("matrix"))
.is_some()
{
graph.mark_partial(
GapKind::Expression,
format!(
"job '{job_name}' uses matrix strategy — authority shape may differ per matrix entry"
),
);
}
if is_self_hosted_runner(job.runs_on.as_ref()) {
let runner_name = runner_label(job.runs_on.as_ref()).unwrap_or("self-hosted");
let mut meta = HashMap::new();
meta.insert(META_SELF_HOSTED.into(), "true".into());
graph.add_node_with_metadata(
NodeKind::Image,
runner_name,
TrustZone::FirstParty,
meta,
);
}
let container_image_id: Option<NodeId> = if let Some(ref container) = job.container {
let image_str = container.image();
let pinned = is_docker_digest_pinned(image_str);
let trust_zone = if pinned {
TrustZone::ThirdParty
} else {
TrustZone::Untrusted
};
let mut meta = HashMap::new();
meta.insert(META_CONTAINER.into(), "true".into());
if let Some(options) = container.options() {
if !options.is_empty() {
meta.insert(META_GHA_CONTAINER_OPTIONS.into(), options.to_string());
}
}
if pinned {
if let Some(digest) = image_str.split("@sha256:").nth(1) {
meta.insert(META_DIGEST.into(), format!("sha256:{digest}"));
}
}
Some(graph.add_node_with_metadata(NodeKind::Image, image_str, trust_zone, meta))
} else {
None
};
for (step_idx, step) in job.steps.iter().enumerate() {
let default_name = format!("{job_name}[{step_idx}]");
let step_name = step.name.as_deref().unwrap_or(&default_name);
let (trust_zone, image_node_id) = if let Some(ref uses) = step.uses {
let (zone, image_id) = classify_action(uses, &mut graph);
(zone, Some(image_id))
} else if is_pull_request_target {
(TrustZone::Untrusted, None)
} else {
(TrustZone::FirstParty, None)
};
let step_id = graph.add_node(NodeKind::Step, step_name, trust_zone);
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_JOB_NAME.into(), job_name.clone());
if let Some(runs_on) = job.runs_on.as_ref().and_then(yaml_value_compact) {
node.metadata.insert(META_GHA_RUNS_ON.into(), runs_on);
}
let condition =
combined_condition(job.if_cond.as_deref(), step.if_cond.as_deref());
if let Some(condition) = condition {
node.metadata.insert(META_CONDITION.into(), condition);
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
node.metadata.insert(META_GHA_ACTION.into(), action.into());
if let Some(with) = step.with.as_ref() {
let mut entries: Vec<(&String, &serde_yaml::Value)> =
with.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
let mut rendered = Vec::new();
for (key, value) in entries {
if let Some(scalar) = yaml_scalar_to_string(value) {
rendered.push(format!("{key}={scalar}"));
}
}
if !rendered.is_empty() {
node.metadata
.insert(META_GHA_WITH_INPUTS.into(), rendered.join("\n"));
}
}
}
if let Some(ref body) = step.run {
if !body.is_empty() {
node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
}
}
let job_check = job
.if_cond
.as_deref()
.map(is_fork_check_expression)
.unwrap_or(false);
let step_check = step
.if_cond
.as_deref()
.map(is_fork_check_expression)
.unwrap_or(false);
if job_check || step_check {
node.metadata.insert(META_FORK_CHECK.into(), "true".into());
}
}
if let Some(img_id) = image_node_id {
graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
}
if let Some(ref uses) = step.uses {
if uses.starts_with("./") {
graph.mark_partial(
GapKind::Structural,
format!(
"composite action not resolved (local action '{uses}' — taudit does not read filesystem)"
),
);
}
}
if let Some(img_id) = container_image_id {
graph.add_edge(step_id, img_id, EdgeKind::UsesImage);
}
if let Some(tok_id) = job_token_id {
graph.add_edge(step_id, tok_id, EdgeKind::HasAccessTo);
}
let mut step_holds_oidc = false;
if let Some(ref uses) = step.uses {
if let Some(cloud_id) =
classify_cloud_auth(uses, step.with.as_ref(), &mut graph)
{
graph.add_edge(step_id, cloud_id, EdgeKind::HasAccessTo);
step_holds_oidc = true;
}
}
if let Some(tok_id) = job_token_id {
if let Some(tok_node) = graph.nodes.get(tok_id) {
if tok_node.metadata.contains_key(META_OIDC) {
step_holds_oidc = true;
}
}
}
if let Some(ref yaml_id) = step.id {
step_oidc_by_yaml_id.insert(yaml_id.clone(), step_holds_oidc);
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
if matches!(
action,
"actions/attest-build-provenance" | "sigstore/cosign-installer"
) {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_ATTESTS.into(), "true".into());
}
}
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
if action == "actions/checkout" {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_CHECKOUT_SELF.into(), "true".into());
if let Some(with) = step.with.as_ref() {
if let Some(r) = with.get("ref").and_then(yaml_scalar_to_string) {
node.metadata.insert(META_CHECKOUT_REF.into(), r);
}
}
}
}
}
if let Some(ref run) = step.run {
if !run.is_empty() {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_SCRIPT_BODY.into(), run.clone());
}
}
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
if matches!(
action,
"actions/download-artifact" | "dawidd6/action-download-artifact"
) {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_DOWNLOADS_ARTIFACT.into(), "true".into());
}
}
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
if action == "actions/upload-artifact" {
if let Some(artifact_name) = step
.with
.as_ref()
.and_then(|w| w.get("name"))
.and_then(yaml_scalar_to_string)
{
let art_id = find_or_create_artifact(
&mut graph,
&mut artifact_ids,
&artifact_name,
trust_zone,
);
graph.add_edge(step_id, art_id, EdgeKind::Produces);
}
} else if matches!(
action,
"actions/download-artifact" | "dawidd6/action-download-artifact"
) {
if let Some(artifact_name) = step
.with
.as_ref()
.and_then(|w| w.get("name"))
.and_then(yaml_scalar_to_string)
{
let art_id = find_or_create_artifact(
&mut graph,
&mut artifact_ids,
&artifact_name,
TrustZone::Untrusted,
);
graph.add_edge(art_id, step_id, EdgeKind::Consumes);
}
}
}
if let Some(ref run) = step.run {
let interprets = run.contains("unzip ")
|| run.contains("unzip\n")
|| run.contains("tar -x")
|| run.contains("tar x")
|| run.contains(" eval ")
|| run.contains("\neval ")
|| run.starts_with("eval ")
|| run.contains(" cat ")
|| run.contains("\ncat ")
|| run.starts_with("cat ")
|| run.contains("jq ");
if interprets {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_INTERPRETS_ARTIFACT.into(), "true".into());
}
}
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
if action == "actions/github-script" {
if let Some(with) = step.with.as_ref() {
if let Some(script) = with.get("script").and_then(yaml_scalar_to_string)
{
let posts_comment = script.contains("createComment")
|| script.contains("updateComment")
|| script.contains("createCommitComment")
|| script.contains("createReview");
let reads_file = script.contains("readFileSync")
|| script.contains("readFile(")
|| script.contains("require('fs')")
|| script.contains("require(\"fs\")");
if posts_comment && reads_file {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_INTERPRETS_ARTIFACT.into(), "true".into());
}
}
}
}
}
}
let step_env_template = matches!(step.env.as_ref(), Some(EnvSpec::Template(_)));
if step_env_template {
graph.mark_partial(
GapKind::Expression,
format!(
"step '{step_name}' in job '{job_name}' env: uses template expression — environment variable shape unknown"
),
);
}
let mut effective_env: HashMap<String, String> = HashMap::new();
if let Some(env_map) = workflow.env.as_ref().and_then(EnvSpec::as_map) {
for (k, v) in env_map {
effective_env.insert(k.clone(), v.clone());
}
}
if let Some(env_map) = job.env.as_ref().and_then(EnvSpec::as_map) {
for (k, v) in env_map {
effective_env.insert(k.clone(), v.clone());
}
}
if let Some(EnvSpec::Map(env_map)) = step.env.as_ref() {
for (k, v) in env_map {
effective_env.insert(k.clone(), v.clone());
}
}
let mut effective_entries: Vec<(&String, &String)> = effective_env.iter().collect();
effective_entries.sort_by(|a, b| a.0.cmp(b.0));
if !effective_entries.is_empty() {
let rendered_env: Vec<String> = effective_entries
.iter()
.map(|(k, v)| format!("{k}={v}"))
.collect();
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_GHA_ENV_ASSIGNMENTS.into(), rendered_env.join("\n"));
}
}
for (_k, env_val) in effective_entries {
for secret_name in iter_secret_refs(env_val) {
let secret_id =
find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
}
}
if let Some(ref with) = step.with {
let mut reads_env = false;
let mut entries: Vec<(&String, &serde_yaml::Value)> = with.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
for (_k, val) in entries {
for scalar in yaml_scalar_strings(val) {
for secret_name in iter_secret_refs(&scalar) {
let secret_id =
find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
}
if is_env_reference(&scalar) {
reads_env = true;
}
}
}
if reads_env {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_READS_ENV.into(), "true".into());
}
}
}
if let Some(ref run) = step.run {
if !run.is_empty() {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_SCRIPT_BODY.into(), run.clone());
}
}
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
if action == "actions/github-script" {
if let Some(with) = step.with.as_ref() {
if let Some(script) = with.get("script").and_then(yaml_scalar_to_string)
{
if !script.is_empty() {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_SCRIPT_BODY.into(), script);
}
}
}
}
}
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
let is_debug = matches!(
action,
"mxschmitt/action-tmate"
| "lhotari/action-upterm"
| "actions/tmate"
| "owenthereal/action-upterm"
| "csexton/debugger-action"
);
if is_debug {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_INTERACTIVE_DEBUG.into(), uses.clone());
}
}
}
if let Some(ref uses) = step.uses {
let action = uses.split('@').next().unwrap_or(uses);
let is_cache = matches!(
action,
"actions/cache" | "actions/cache/save" | "actions/cache/restore"
);
if is_cache {
if let Some(with) = step.with.as_ref() {
if let Some(key) = with.get("key").and_then(yaml_scalar_to_string) {
if !key.is_empty() {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_CACHE_KEY.into(), key);
}
}
}
}
}
}
if let Some(ref run) = step.run {
let mut seen: std::collections::BTreeSet<&str> =
std::collections::BTreeSet::new();
for name in iter_secret_refs(run) {
seen.insert(name);
}
for secret_name in seen {
let secret_id =
find_or_create_secret(&mut graph, &mut secret_ids, secret_name);
if let Some(node) = graph.nodes.get_mut(secret_id) {
node.metadata
.insert(META_INFERRED.into(), META_INFERRED_VAL.into());
}
graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
graph.mark_partial(
GapKind::Expression,
format!(
"secret '{secret_name}' referenced in run: script — inferred, not precisely mapped"
),
);
}
}
if let Some(ref run) = step.run {
let writes_gate = run.contains("GITHUB_ENV") || run.contains("GITHUB_PATH");
if writes_gate {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_WRITES_ENV_GATE.into(), "true".into());
}
}
if is_env_reference(run) {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_READS_ENV.into(), "true".into());
}
}
}
}
if let Some(outputs) = job.outputs.as_ref() {
let mut output_entries: Vec<(&String, &String)> = outputs.iter().collect();
output_entries.sort_by(|a, b| a.0.cmp(b.0));
for (out_name, out_value) in output_entries {
let source = classify_job_output_source(out_value, &step_oidc_by_yaml_id);
job_output_records.push(format!("{job_name}\t{out_name}\t{source}"));
}
}
}
if !job_output_records.is_empty() {
graph
.metadata
.insert(META_JOB_OUTPUTS.into(), job_output_records.join("|"));
}
let step_count = graph
.nodes
.iter()
.filter(|n| n.kind == NodeKind::Step)
.count();
if step_count == 0 && !workflow.jobs.is_empty() {
graph.mark_partial(
GapKind::Structural,
"jobs: parsed but produced 0 step nodes — possible non-GHA YAML wrong-platform-classified".to_string(),
);
}
graph.stamp_edge_authority_summaries();
Ok(graph)
}
}
fn classify_job_output_source(
value: &str,
step_oidc_by_yaml_id: &HashMap<String, bool>,
) -> &'static str {
if value.contains("secrets.") {
return "secret";
}
let mut cursor = 0;
let mut saw_step_output = false;
while let Some(rel) = value[cursor..].find("steps.") {
let abs = cursor + rel + "steps.".len();
let rest = &value[abs..];
let id_end = rest
.find(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
.unwrap_or(rest.len());
let step_yaml_id = &rest[..id_end];
if !step_yaml_id.is_empty() && rest[id_end..].starts_with(".outputs.") {
saw_step_output = true;
if step_oidc_by_yaml_id
.get(step_yaml_id)
.copied()
.unwrap_or(false)
{
return "oidc";
}
}
cursor = abs + id_end;
}
if saw_step_output {
"step_output"
} else {
"literal"
}
}
pub fn is_fork_check_expression(expr: &str) -> bool {
let normalised: String = expr.split_whitespace().collect::<Vec<_>>().join(" ");
let lower = normalised.to_lowercase();
if lower.contains("github.event.pull_request.head.repo.fork == false")
|| lower.contains("github.event.pull_request.head.repo.fork != true")
{
return true;
}
if lower.contains("github.event.pull_request.head.repo.full_name == github.repository")
|| lower.contains("github.repository == github.event.pull_request.head.repo.full_name")
{
return true;
}
false
}
fn trigger_has_pull_request_target(triggers: &serde_yaml::Value) -> bool {
collect_trigger_names(Some(triggers))
.iter()
.any(|t| t == "pull_request_target")
}
fn collect_trigger_names(triggers: Option<&serde_yaml::Value>) -> Vec<String> {
let mut out: Vec<String> = Vec::new();
let mut push_unique = |s: &str| {
if !s.is_empty() && !out.iter().any(|e| e == s) {
out.push(s.to_string());
}
};
let Some(val) = triggers else {
return out;
};
match val {
serde_yaml::Value::String(s) => push_unique(s),
serde_yaml::Value::Sequence(seq) => {
for v in seq {
if let Some(s) = v.as_str() {
push_unique(s);
}
}
}
serde_yaml::Value::Mapping(map) => {
for (k, _) in map {
if let Some(s) = k.as_str() {
push_unique(s);
}
}
}
_ => {}
}
out
}
fn collect_dispatch_inputs(triggers: &serde_yaml::Value) -> Vec<String> {
let map = match triggers {
serde_yaml::Value::Mapping(m) => m,
_ => return Vec::new(),
};
let dispatch = match map
.iter()
.find(|(k, _)| k.as_str() == Some("workflow_dispatch"))
{
Some((_, v)) => v,
None => return Vec::new(),
};
let inputs = match dispatch.get("inputs").and_then(|v| v.as_mapping()) {
Some(m) => m,
None => return Vec::new(),
};
inputs
.iter()
.filter_map(|(k, _)| k.as_str().map(str::to_string))
.collect()
}
fn collect_workflow_call_inputs(triggers: &serde_yaml::Value) -> Vec<String> {
let map = match triggers {
serde_yaml::Value::Mapping(m) => m,
_ => return Vec::new(),
};
let call = match map
.iter()
.find(|(k, _)| k.as_str() == Some("workflow_call"))
{
Some((_, v)) => v,
None => return Vec::new(),
};
let inputs = match call.get("inputs").and_then(|v| v.as_mapping()) {
Some(m) => m,
None => return Vec::new(),
};
inputs
.iter()
.filter_map(|(k, _)| k.as_str().map(str::to_string))
.collect()
}
fn is_self_hosted_runner(runs_on: Option<&serde_yaml::Value>) -> bool {
const SH: &str = "self-hosted";
let Some(val) = runs_on else {
return false;
};
match val {
serde_yaml::Value::String(s) => s == SH,
serde_yaml::Value::Sequence(seq) => seq
.iter()
.any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
serde_yaml::Value::Mapping(map) => {
if map.contains_key("group") {
return true;
}
if let Some(labels) = map.get("labels") {
match labels {
serde_yaml::Value::String(s) => s == SH,
serde_yaml::Value::Sequence(seq) => seq
.iter()
.any(|v| v.as_str().map(|s| s == SH).unwrap_or(false)),
_ => false,
}
} else {
false
}
}
_ => false,
}
}
fn runner_label(runs_on: Option<&serde_yaml::Value>) -> Option<&str> {
let val = runs_on?;
match val {
serde_yaml::Value::String(s) => Some(s.as_str()),
serde_yaml::Value::Sequence(seq) => {
for v in seq {
if let Some(s) = v.as_str() {
if s != "self-hosted" {
return Some(s);
}
}
}
seq.first().and_then(|v| v.as_str())
}
serde_yaml::Value::Mapping(map) => map.get("group").and_then(|v| v.as_str()),
_ => None,
}
}
fn classify_action(uses: &str, graph: &mut AuthorityGraph) -> (TrustZone, NodeId) {
let semantically_pinned = is_pin_semantically_valid(uses);
let is_local = uses.starts_with("./");
let zone = if is_local {
TrustZone::FirstParty
} else if semantically_pinned {
TrustZone::ThirdParty
} else {
TrustZone::Untrusted
};
let mut meta = HashMap::new();
if is_sha_pinned(uses) {
if let Some(sha) = uses.split('@').next_back() {
meta.insert(META_DIGEST.into(), sha.into());
}
}
let id = graph.add_node_with_metadata(NodeKind::Image, uses, zone, meta);
(zone, id)
}
fn iter_secret_refs(s: &str) -> impl Iterator<Item = &str> {
SecretRefIter {
src: s,
cursor: 0,
span_end: None,
}
}
struct SecretRefIter<'a> {
src: &'a str,
cursor: usize,
span_end: Option<usize>,
}
impl<'a> Iterator for SecretRefIter<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<&'a str> {
loop {
if self.span_end.is_none() {
let rel = self.src.get(self.cursor..)?.find("${{")?;
let span_start = self.cursor + rel + 3; let inner = &self.src[span_start..];
let span_len = inner.find("}}").unwrap_or(inner.len());
self.cursor = span_start;
self.span_end = Some(span_start + span_len);
}
let span_end = self.span_end.expect("span_end set just above");
if self.cursor >= span_end {
self.cursor = span_end.saturating_add(2).min(self.src.len());
self.span_end = None;
continue;
}
let window = &self.src[self.cursor..span_end];
let Some(rel) = window.find("secrets.") else {
self.cursor = span_end.saturating_add(2).min(self.src.len());
self.span_end = None;
continue;
};
let name_start = self.cursor + rel + "secrets.".len();
let tail = &self.src[name_start..span_end];
let name_len = tail
.char_indices()
.find(|(_, c)| !c.is_ascii_alphanumeric() && *c != '_')
.map(|(i, _)| i)
.unwrap_or(tail.len());
self.cursor = name_start + name_len;
if name_len == 0 {
continue;
}
return Some(&self.src[name_start..name_start + name_len]);
}
}
}
fn is_env_reference(val: &str) -> bool {
if !val.contains("${{") {
return false;
}
let mut idx = 0;
while let Some(rel) = val[idx..].find("${{") {
let after = &val[idx + rel + 3..];
let trimmed = after.trim_start();
if trimmed.starts_with("env.") {
return true;
}
idx += rel + 3;
}
false
}
fn find_or_create_secret(
graph: &mut AuthorityGraph,
cache: &mut HashMap<String, NodeId>,
name: &str,
) -> NodeId {
if let Some(&id) = cache.get(name) {
return id;
}
let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
cache.insert(name.to_string(), id);
id
}
fn find_or_create_artifact(
graph: &mut AuthorityGraph,
cache: &mut HashMap<String, NodeId>,
name: &str,
zone: TrustZone,
) -> NodeId {
if let Some(&id) = cache.get(name) {
return id;
}
let id = graph.add_node(NodeKind::Artifact, name, zone);
cache.insert(name.to_string(), id);
id
}
fn classify_cloud_auth(
uses: &str,
with: Option<&HashMap<String, serde_yaml::Value>>,
graph: &mut AuthorityGraph,
) -> Option<NodeId> {
let action = uses.split('@').next().unwrap_or(uses);
match action {
"aws-actions/configure-aws-credentials" => {
let w = with?;
let role = w.get("role-to-assume").and_then(yaml_scalar_to_string)?;
let short = role.split('/').next_back().unwrap_or(role.as_str());
let mut meta = HashMap::new();
meta.insert(META_OIDC.into(), "true".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
meta.insert(META_PERMISSIONS.into(), "AWS role assumption (OIDC)".into());
Some(graph.add_node_with_metadata(
NodeKind::Identity,
format!("AWS/{short}"),
TrustZone::FirstParty,
meta,
))
}
"google-github-actions/auth" => {
let w = with?;
let provider = w
.get("workload_identity_provider")
.and_then(yaml_scalar_to_string)?;
let short = provider.split('/').next_back().unwrap_or(provider.as_str());
let mut meta = HashMap::new();
meta.insert(META_OIDC.into(), "true".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
meta.insert(
META_PERMISSIONS.into(),
"GCP workload identity federation".into(),
);
Some(graph.add_node_with_metadata(
NodeKind::Identity,
format!("GCP/{short}"),
TrustZone::FirstParty,
meta,
))
}
"azure/login" => {
let w = with?;
let client_id = w.get("client-id").and_then(yaml_scalar_to_string)?;
if w.contains_key("client-secret") {
return None; }
let mut meta = HashMap::new();
meta.insert(META_OIDC.into(), "true".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
meta.insert(
META_PERMISSIONS.into(),
"Azure federated credential (OIDC)".into(),
);
Some(graph.add_node_with_metadata(
NodeKind::Identity,
format!("Azure/{client_id}"),
TrustZone::FirstParty,
meta,
))
}
_ => None,
}
}
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum Permissions {
String(String),
Map(BTreeMap<String, String>),
}
#[derive(Debug, Clone, Deserialize)]
#[serde(untagged)]
pub enum EnvSpec {
#[serde(deserialize_with = "deserialize_env_map")]
Map(HashMap<String, String>),
Template(String),
}
fn deserialize_env_map<'de, D>(deserializer: D) -> Result<HashMap<String, String>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::Error;
let raw: HashMap<String, serde_yaml::Value> = HashMap::deserialize(deserializer)?;
let mut out = HashMap::with_capacity(raw.len());
for (k, v) in raw {
let s = match v {
serde_yaml::Value::String(s) => s,
serde_yaml::Value::Bool(b) => b.to_string(),
serde_yaml::Value::Number(n) => n.to_string(),
serde_yaml::Value::Null => String::new(),
other => {
return Err(D::Error::custom(format!(
"env value for `{k}` is not a scalar: {other:?}"
)))
}
};
out.insert(k, s);
}
Ok(out)
}
fn yaml_scalar_to_string(value: &serde_yaml::Value) -> Option<String> {
match value {
serde_yaml::Value::String(s) => Some(s.clone()),
serde_yaml::Value::Bool(b) => Some(b.to_string()),
serde_yaml::Value::Number(n) => Some(n.to_string()),
serde_yaml::Value::Null => Some(String::new()),
_ => None,
}
}
fn yaml_value_compact(value: &serde_yaml::Value) -> Option<String> {
match value {
serde_yaml::Value::Sequence(seq) => {
let parts: Vec<String> = seq.iter().filter_map(yaml_scalar_to_string).collect();
if parts.is_empty() {
None
} else {
Some(parts.join(","))
}
}
serde_yaml::Value::Mapping(map) => {
let mut parts: Vec<String> = map
.iter()
.filter_map(|(k, v)| {
Some(format!(
"{}={}",
yaml_scalar_to_string(k)?,
yaml_value_compact(v)?
))
})
.collect();
parts.sort();
if parts.is_empty() {
None
} else {
Some(parts.join(","))
}
}
scalar => yaml_scalar_to_string(scalar),
}
}
fn combined_condition(job_if: Option<&str>, step_if: Option<&str>) -> Option<String> {
match (job_if, step_if) {
(Some(job), Some(step)) if !job.is_empty() && !step.is_empty() => {
Some(format!("{job} AND {step}"))
}
(Some(job), _) if !job.is_empty() => Some(job.to_string()),
(_, Some(step)) if !step.is_empty() => Some(step.to_string()),
_ => None,
}
}
fn yaml_scalar_strings(value: &serde_yaml::Value) -> Vec<String> {
match value {
serde_yaml::Value::Sequence(seq) => seq.iter().filter_map(yaml_scalar_to_string).collect(),
serde_yaml::Value::Mapping(map) => map.values().filter_map(yaml_scalar_to_string).collect(),
scalar => yaml_scalar_to_string(scalar).into_iter().collect(),
}
}
impl EnvSpec {
pub fn as_map(&self) -> Option<&HashMap<String, String>> {
match self {
EnvSpec::Map(m) => Some(m),
EnvSpec::Template(_) => None,
}
}
pub fn as_template(&self) -> Option<&str> {
match self {
EnvSpec::Template(s) => Some(s.as_str()),
EnvSpec::Map(_) => None,
}
}
}
impl std::fmt::Display for Permissions {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Permissions::String(s) => write!(f, "{s}"),
Permissions::Map(m) => {
let parts: Vec<String> = m.iter().map(|(k, v)| format!("{k}: {v}")).collect();
write!(f, "{{ {} }}", parts.join(", "))
}
}
}
}
#[derive(Debug, Deserialize)]
pub struct GhaWorkflow {
#[serde(rename = "on", default)]
pub triggers: Option<serde_yaml::Value>,
#[serde(default)]
pub permissions: Option<Permissions>,
#[serde(default)]
pub env: Option<EnvSpec>,
#[serde(default)]
pub jobs: HashMap<String, GhaJob>,
}
#[derive(Debug, Deserialize)]
#[serde(untagged)]
pub enum ContainerConfig {
Image(String),
Full {
image: String,
#[serde(default)]
options: Option<String>,
},
}
impl ContainerConfig {
pub fn image(&self) -> &str {
match self {
ContainerConfig::Image(s) => s,
ContainerConfig::Full { image, .. } => image,
}
}
pub fn options(&self) -> Option<&str> {
match self {
ContainerConfig::Image(_) => None,
ContainerConfig::Full { options, .. } => options.as_deref(),
}
}
}
#[derive(Debug, Deserialize)]
pub struct GhaJob {
#[serde(default)]
pub permissions: Option<Permissions>,
#[serde(default)]
pub env: Option<EnvSpec>,
#[serde(default)]
pub steps: Vec<GhaStep>,
#[serde(default)]
pub uses: Option<String>,
#[serde(rename = "with", default)]
pub with: Option<HashMap<String, serde_yaml::Value>>,
#[serde(default)]
pub secrets: Option<serde_yaml::Value>,
#[serde(default)]
pub container: Option<ContainerConfig>,
#[serde(default)]
pub strategy: Option<serde_yaml::Value>,
#[serde(rename = "runs-on", default)]
pub runs_on: Option<serde_yaml::Value>,
#[serde(default)]
pub outputs: Option<HashMap<String, String>>,
#[serde(rename = "if", default)]
pub if_cond: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct GhaStep {
pub name: Option<String>,
pub id: Option<String>,
pub uses: Option<String>,
pub run: Option<String>,
#[serde(default)]
pub env: Option<EnvSpec>,
#[serde(rename = "with", default)]
pub with: Option<HashMap<String, serde_yaml::Value>>,
#[serde(rename = "if", default)]
pub if_cond: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(yaml: &str) -> AuthorityGraph {
let parser = GhaParser;
let source = PipelineSource {
file: "test.yml".into(),
repo: None,
git_ref: None,
commit_sha: None,
};
parser.parse(yaml, &source).unwrap()
}
#[test]
fn parses_simple_workflow() {
let yaml = r#"
permissions: write-all
jobs:
build:
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Build
run: make build
"#;
let graph = parse(yaml);
assert!(graph.nodes.len() >= 3); }
#[test]
fn detects_secret_in_env() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Deploy
run: ./deploy.sh
env:
AWS_KEY: "${{ secrets.AWS_ACCESS_KEY_ID }}"
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "AWS_ACCESS_KEY_ID");
}
#[test]
fn classifies_unpinned_action_as_untrusted() {
let yaml = r#"
jobs:
ci:
steps:
- uses: actions/checkout@v4
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
}
#[test]
fn classifies_sha_pinned_action_as_third_party() {
let yaml = r#"
jobs:
ci:
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
}
#[test]
fn classifies_local_action_as_first_party() {
let yaml = r#"
jobs:
ci:
steps:
- uses: ./.github/actions/my-action
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].trust_zone, TrustZone::FirstParty);
}
#[test]
fn detects_secret_in_with() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Publish
uses: some-org/publish@v1
with:
token: "${{ secrets.NPM_TOKEN }}"
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "NPM_TOKEN");
}
#[test]
fn uses_step_records_action_and_scalar_with_inputs() {
let yaml = r#"
jobs:
deploy:
steps:
- uses: aws-actions/amazon-ecr-login@v2
with:
mask-password: false
registries: "123456789012"
"#;
let graph = parse(yaml);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "deploy[0]")
.expect("uses step");
assert_eq!(
step.metadata.get(META_GHA_ACTION).map(String::as_str),
Some("aws-actions/amazon-ecr-login")
);
let inputs = step
.metadata
.get(META_GHA_WITH_INPUTS)
.expect("with inputs");
assert!(inputs.contains("mask-password=false"));
assert!(inputs.contains("registries=123456789012"));
}
#[test]
fn parser_stamps_new_exploit_rule_metadata() {
let yaml = r#"
on:
workflow_call:
inputs:
image:
type: string
jobs:
call:
uses: org/repo/.github/workflows/reuse.yml@main
runs-on: ${{ inputs.runner }}
secrets: inherit
with:
image: ${{ inputs.image }}
deploy:
runs-on: [ubuntu-latest]
if: ${{ needs.plan.outputs.pr_run_mode == 'upload' }}
env:
NODE_OPTIONS: --require=./hook.js
container:
image: ${{ inputs.image }}
options: --privileged
steps:
- name: Publish
if: ${{ github.event_name == 'push' }}
run: npm publish
"#;
let graph = parse(yaml);
assert_eq!(
graph
.metadata
.get(META_GHA_WORKFLOW_CALL_INPUTS)
.map(String::as_str),
Some("image")
);
let call = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "call")
.expect("synthetic reusable call step");
assert_eq!(
call.metadata.get(META_SECRETS_INHERIT).map(String::as_str),
Some("true")
);
assert!(
call.metadata
.get(META_GHA_WITH_INPUTS)
.map(|v| v.contains("image=${{ inputs.image }}"))
.unwrap_or(false),
"reusable-call with inputs should be stamped"
);
assert_eq!(
call.metadata.get(META_GHA_RUNS_ON).map(String::as_str),
Some("${{ inputs.runner }}")
);
let publish = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Publish")
.expect("publish step");
assert!(
publish
.metadata
.get(META_GHA_ENV_ASSIGNMENTS)
.map(|v| v.contains("NODE_OPTIONS=--require=./hook.js"))
.unwrap_or(false),
"effective env assignments should be stamped on steps"
);
assert_eq!(
publish.metadata.get(META_CONDITION).map(String::as_str),
Some("${{ needs.plan.outputs.pr_run_mode == 'upload' }} AND ${{ github.event_name == 'push' }}")
);
let container = graph
.nodes_of_kind(NodeKind::Image)
.find(|n| n.metadata.get(META_CONTAINER).map(String::as_str) == Some("true"))
.expect("container image node");
assert_eq!(
container
.metadata
.get(META_GHA_CONTAINER_OPTIONS)
.map(String::as_str),
Some("--privileged")
);
}
#[test]
fn with_non_scalar_values_do_not_fail_parse() {
let yaml = r#"
jobs:
check:
steps:
- name: Label
uses: actions/github-script@v7
with:
script: |
core.info("ok")
labels:
- bug
- ci
token: "${{ secrets.GITHUB_TOKEN }}"
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert!(
secrets.iter().any(|s| s.name == "GITHUB_TOKEN"),
"scalar values inside with: must still be scanned for secrets"
);
}
#[test]
fn inferred_secret_in_run_block_detected() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Deploy
run: |
curl -H "Authorization: ${{ secrets.API_TOKEN }}" https://api.example.com
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "API_TOKEN");
assert_eq!(
secrets[0].metadata.get(META_INFERRED),
Some(&"true".to_string())
);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(!graph.completeness_gaps.is_empty());
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Expression),
"inferred secret in run: must record an Expression-kind gap, got: {:?}",
graph.completeness_gap_kinds
);
}
#[test]
fn job_level_env_inherited_by_steps() {
let yaml = r#"
jobs:
build:
env:
DB_PASSWORD: "${{ secrets.DB_PASSWORD }}"
steps:
- name: Step A
run: echo "a"
- name: Step B
run: echo "b"
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
let secret_id = secrets[0].id;
let accessing_steps = graph
.edges_to(secret_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert_eq!(accessing_steps, 2, "both steps inherit job-level env");
}
#[test]
fn identity_scope_set_on_token() {
let yaml = r#"
permissions: write-all
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(
identities[0].metadata.get(META_IDENTITY_SCOPE),
Some(&"broad".to_string())
);
}
#[test]
fn constrained_identity_scope() {
let yaml = r#"
permissions:
contents: read
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(
identities[0].metadata.get(META_IDENTITY_SCOPE),
Some(&"constrained".to_string())
);
}
#[test]
fn pull_request_target_string_trigger_marks_run_steps_untrusted() {
let yaml = r#"
on: pull_request_target
jobs:
check:
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
with:
ref: ${{ github.event.pull_request.head.sha }}
- run: npm test
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 2);
let run_step = steps.iter().find(|s| s.name.contains("check[1]")).unwrap();
assert_eq!(
run_step.trust_zone,
TrustZone::Untrusted,
"run: step in pull_request_target workflow should be Untrusted"
);
let checkout_step = steps.iter().find(|s| s.name.contains("check[0]")).unwrap();
assert_eq!(checkout_step.trust_zone, TrustZone::ThirdParty);
}
#[test]
fn pull_request_target_sequence_trigger_marks_run_steps_untrusted() {
let yaml = r#"
on: [push, pull_request_target]
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
}
#[test]
fn pull_request_target_mapping_trigger_marks_run_steps_untrusted() {
let yaml = r#"
on:
pull_request_target:
types: [opened, synchronize]
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
}
#[test]
fn push_trigger_does_not_mark_run_steps_untrusted() {
let yaml = r#"
on: push
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(
steps[0].trust_zone,
TrustZone::FirstParty,
"push-triggered run: steps should remain FirstParty"
);
}
#[test]
fn workflow_level_env_inherited_by_all_steps() {
let yaml = r#"
env:
DB_URL: "${{ secrets.DATABASE_URL }}"
jobs:
build:
steps:
- name: Step A
run: echo "a"
test:
steps:
- name: Step B
run: echo "b"
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1, "one secret node (deduplicated)");
let secret_id = secrets[0].id;
let accessing_steps = graph
.edges_to(secret_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert_eq!(accessing_steps, 2, "both steps inherit workflow-level env");
}
#[test]
fn matrix_strategy_marks_graph_partial() {
let yaml = r#"
jobs:
test:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- run: echo hi
"#;
let graph = parse(yaml);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gaps.iter().any(|g| g.contains("matrix")),
"matrix strategy should be recorded as a completeness gap"
);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Expression),
"matrix strategy must record an Expression-kind gap, got: {:?}",
graph.completeness_gap_kinds
);
}
#[test]
fn job_without_matrix_does_not_mark_partial() {
let yaml = r#"
jobs:
build:
steps:
- run: cargo build
"#;
let graph = parse(yaml);
assert_eq!(graph.completeness, AuthorityCompleteness::Complete);
}
#[test]
fn reusable_workflow_creates_image_and_marks_partial() {
let yaml = r#"
jobs:
call:
uses: org/repo/.github/workflows/deploy.yml@main
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].name, "org/repo/.github/workflows/deploy.yml@main");
assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert_eq!(steps[0].name, "call");
let delegates: Vec<_> = graph
.edges_from(steps[0].id)
.filter(|e| e.kind == EdgeKind::DelegatesTo)
.collect();
assert_eq!(delegates.len(), 1);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Structural),
"reusable workflow must record a Structural-kind gap, got: {:?}",
graph.completeness_gap_kinds
);
}
#[test]
fn reusable_workflow_sha_pinned_is_third_party() {
let yaml = r#"
jobs:
call:
uses: org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
}
#[test]
fn container_unpinned_creates_image_node_untrusted() {
let yaml = r#"
jobs:
build:
container: ubuntu:22.04
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].name, "ubuntu:22.04");
assert_eq!(images[0].trust_zone, TrustZone::Untrusted);
assert_eq!(
images[0].metadata.get(META_CONTAINER),
Some(&"true".to_string())
);
}
#[test]
fn container_digest_pinned_creates_image_node_third_party() {
let yaml = r#"
jobs:
build:
container:
image: "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b"
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].trust_zone, TrustZone::ThirdParty);
assert_eq!(
images[0].metadata.get(META_CONTAINER),
Some(&"true".to_string())
);
}
#[test]
fn oidc_permission_tags_identity_with_meta_oidc() {
let yaml = r#"
permissions:
id-token: write
contents: read
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(
identities[0].metadata.get(META_OIDC),
Some(&"true".to_string()),
"id-token: write should mark identity as OIDC-capable"
);
}
#[test]
fn non_oidc_permission_does_not_tag_meta_oidc() {
let yaml = r#"
permissions:
contents: read
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert!(
!identities[0].metadata.contains_key(META_OIDC),
"contents:read should not tag as OIDC"
);
}
#[test]
fn contents_write_without_id_token_does_not_tag_oidc() {
let yaml = r#"
permissions:
contents: write
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert!(
!identities[0].metadata.contains_key(META_OIDC),
"contents:write without id-token must not be tagged OIDC"
);
}
#[test]
fn write_all_permission_tags_identity_as_oidc() {
let yaml = r#"
permissions: write-all
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(
identities[0].metadata.get(META_OIDC),
Some(&"true".to_string()),
"write-all grants all permissions including id-token: write"
);
}
#[test]
fn container_steps_linked_to_container_image() {
let yaml = r#"
jobs:
build:
container: ubuntu:22.04
steps:
- name: Step A
run: echo "a"
- name: Step B
run: echo "b"
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
let container_id = images[0].id;
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 2);
for step in &steps {
let links: Vec<_> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::UsesImage && e.to == container_id)
.collect();
assert_eq!(
links.len(),
1,
"step '{}' must link to container",
step.name
);
}
}
#[test]
fn container_authority_propagates_to_floating_image() {
let yaml = r#"
permissions: write-all
jobs:
build:
container: ubuntu:22.04
steps:
- run: echo hi
"#;
use taudit_core::propagation::DEFAULT_MAX_HOPS;
use taudit_core::rules;
let graph = parse(yaml);
let findings = rules::run_all_rules(&graph, DEFAULT_MAX_HOPS);
assert!(
findings
.iter()
.any(|f| f.category == taudit_core::finding::FindingCategory::AuthorityPropagation),
"authority should propagate from step to floating container"
);
}
#[test]
fn aws_oidc_creates_identity_node() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::123456789012:role/my-deploy-role
aws-region: us-east-1
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| n.name != "GITHUB_TOKEN")
.collect();
assert_eq!(identities.len(), 1);
assert_eq!(identities[0].name, "AWS/my-deploy-role");
assert_eq!(
identities[0].metadata.get(META_OIDC),
Some(&"true".to_string())
);
assert_eq!(
identities[0].metadata.get(META_IDENTITY_SCOPE),
Some(&"broad".to_string())
);
}
#[test]
fn gcp_oidc_creates_identity_node() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Authenticate to GCP
uses: google-github-actions/auth@v2
with:
workload_identity_provider: projects/123/locations/global/workloadIdentityPools/my-pool/providers/my-provider
service_account: my-sa@my-project.iam.gserviceaccount.com
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| n.name != "GITHUB_TOKEN")
.collect();
assert_eq!(identities.len(), 1);
assert!(identities[0].name.starts_with("GCP/"));
assert_eq!(
identities[0].metadata.get(META_OIDC),
Some(&"true".to_string())
);
}
#[test]
fn azure_oidc_creates_identity_node() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Azure login
uses: azure/login@v2
with:
client-id: ${{ vars.AZURE_CLIENT_ID }}
tenant-id: ${{ vars.AZURE_TENANT_ID }}
subscription-id: ${{ vars.AZURE_SUBSCRIPTION_ID }}
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| n.name != "GITHUB_TOKEN")
.collect();
assert_eq!(identities.len(), 1);
assert!(identities[0].name.starts_with("Azure/"));
assert_eq!(
identities[0].metadata.get(META_OIDC),
Some(&"true".to_string())
);
}
#[test]
fn azure_static_sp_does_not_create_identity_node() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Azure login
uses: azure/login@v2
with:
client-id: my-client-id
client-secret: ${{ secrets.AZURE_CLIENT_SECRET }}
tenant-id: my-tenant
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| n.name != "GITHUB_TOKEN")
.collect();
assert!(
identities.is_empty(),
"static SP should not create an OIDC Identity node"
);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "AZURE_CLIENT_SECRET");
}
#[test]
fn aws_static_creds_do_not_create_identity_node() {
let yaml = r#"
jobs:
deploy:
steps:
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-1
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| n.name != "GITHUB_TOKEN")
.collect();
assert!(
identities.is_empty(),
"static AWS creds must not create Identity node"
);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 2, "both static secrets captured");
}
#[test]
fn pull_request_target_sets_meta_trigger_on_graph() {
let yaml = r#"
on: pull_request_target
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
assert_eq!(
graph.metadata.get(META_TRIGGER),
Some(&"pull_request_target".to_string())
);
}
#[test]
fn github_env_write_in_run_sets_meta_writes_env_gate() {
let yaml = r#"
jobs:
build:
steps:
- name: Set version
run: echo "VERSION=1.0" >> $GITHUB_ENV
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert_eq!(
steps[0].metadata.get(META_WRITES_ENV_GATE),
Some(&"true".to_string()),
"run: with >> $GITHUB_ENV must mark META_WRITES_ENV_GATE"
);
}
#[test]
fn attest_action_sets_meta_attests() {
let yaml = r#"
jobs:
release:
steps:
- name: Attest
uses: actions/attest-build-provenance@v1
with:
subject-path: dist/*
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert_eq!(
steps[0].metadata.get(META_ATTESTS),
Some(&"true".to_string())
);
}
#[test]
fn self_hosted_string_runs_on_creates_image_with_self_hosted_metadata() {
let yaml = r#"
jobs:
build:
runs-on: self-hosted
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
let runner = images
.iter()
.find(|i| i.metadata.contains_key(META_SELF_HOSTED))
.expect("self-hosted runner Image node must be created");
assert_eq!(
runner.metadata.get(META_SELF_HOSTED),
Some(&"true".to_string())
);
}
#[test]
fn self_hosted_in_sequence_runs_on_creates_image_with_self_hosted_metadata() {
let yaml = r#"
jobs:
build:
runs-on: [self-hosted, linux, x64]
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
let runner = images
.iter()
.find(|i| i.metadata.contains_key(META_SELF_HOSTED))
.expect("self-hosted runner Image node must be created");
assert_eq!(
runner.metadata.get(META_SELF_HOSTED),
Some(&"true".to_string())
);
}
#[test]
fn hosted_runner_does_not_create_self_hosted_image() {
let yaml = r#"
jobs:
build:
runs-on: ubuntu-latest
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let self_hosted_images: Vec<_> = graph
.nodes_of_kind(NodeKind::Image)
.filter(|i| i.metadata.contains_key(META_SELF_HOSTED))
.collect();
assert!(
self_hosted_images.is_empty(),
"hosted runner must not produce a self-hosted Image node"
);
}
#[test]
fn actions_checkout_step_tagged_with_meta_checkout_self() {
let yaml = r#"
jobs:
ci:
steps:
- uses: actions/checkout@v4
- run: echo hi
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
let checkout_step = steps
.iter()
.find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
.expect("actions/checkout step must be tagged META_CHECKOUT_SELF");
assert_eq!(
checkout_step.metadata.get(META_CHECKOUT_SELF),
Some(&"true".to_string())
);
}
#[test]
fn actions_checkout_sha_pinned_also_tagged() {
let yaml = r#"
jobs:
ci:
steps:
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert_eq!(
steps[0].metadata.get(META_CHECKOUT_SELF),
Some(&"true".to_string()),
"SHA-pinned checkout must still be tagged — rule gates on trigger context"
);
}
#[test]
fn non_checkout_uses_not_tagged_checkout_self() {
let yaml = r#"
jobs:
ci:
steps:
- uses: some-org/other-action@v1
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert!(
!steps[0].metadata.contains_key(META_CHECKOUT_SELF),
"non-checkout uses: must not be tagged"
);
}
fn make_temp_dir(label: &str) -> std::path::PathBuf {
use std::sync::atomic::{AtomicU64, Ordering};
static COUNTER: AtomicU64 = AtomicU64::new(0);
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
let dir = std::env::temp_dir().join(format!(
"taudit-gha-test-{}-{}-{}",
std::process::id(),
n,
label
));
let _ = std::fs::remove_dir_all(&dir);
std::fs::create_dir_all(&dir).expect("create temp dir");
dir
}
fn parse_at(yaml: &str, file: &str) -> AuthorityGraph {
let parser = GhaParser;
let source = PipelineSource {
file: file.into(),
repo: None,
git_ref: None,
commit_sha: None,
};
parser.parse(yaml, &source).unwrap()
}
#[test]
fn composite_action_reference_marks_graph_partial_without_inlining() {
let dir = make_temp_dir("composite-no-inline");
let workflows_dir = dir.join(".github/workflows");
let action_dir = dir.join(".github/actions/my-action");
std::fs::create_dir_all(&workflows_dir).unwrap();
std::fs::create_dir_all(&action_dir).unwrap();
let action_yml = r#"
name: My Action
runs:
using: composite
steps:
- name: Install deps
run: npm install
shell: bash
"#;
std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
let workflow = r#"
jobs:
ci:
steps:
- name: Run my action
uses: ./.github/actions/my-action
"#;
let workflow_path = workflows_dir.join("ci.yml");
std::fs::write(&workflow_path, workflow).unwrap();
let graph = parse_at(workflow, workflow_path.to_str().unwrap());
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1, "no composite-action step inlining");
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Structural),
"local action reference must record a Structural-kind gap, got: {:?}",
graph.completeness_gap_kinds
);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("composite action not resolved")
&& g.contains("./.github/actions/my-action")),
"gap reason must name the action and explain non-resolution, got: {:?}",
graph.completeness_gaps
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn missing_action_yml_marks_graph_partial() {
let dir = make_temp_dir("missing-action");
let workflows_dir = dir.join(".github/workflows");
std::fs::create_dir_all(&workflows_dir).unwrap();
let workflow = r#"
jobs:
ci:
steps:
- uses: ./.github/actions/missing-action
"#;
let workflow_path = workflows_dir.join("ci.yml");
std::fs::write(&workflow_path, workflow).unwrap();
let graph = parse_at(workflow, workflow_path.to_str().unwrap());
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gaps.iter().any(
|g| g.contains("composite action not resolved") && g.contains("missing-action")
),
"missing local action must be recorded as a completeness gap, got: {:?}",
graph.completeness_gaps
);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Structural),
"unresolved composite action must record a Structural-kind gap, got: {:?}",
graph.completeness_gap_kinds
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn non_composite_local_action_marks_graph_partial() {
let dir = make_temp_dir("non-composite");
let workflows_dir = dir.join(".github/workflows");
let action_dir = dir.join(".github/actions/docker-action");
std::fs::create_dir_all(&workflows_dir).unwrap();
std::fs::create_dir_all(&action_dir).unwrap();
let action_yml = r#"
name: Docker Action
runs:
using: docker
image: Dockerfile
"#;
std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
let workflow = r#"
jobs:
ci:
steps:
- uses: ./.github/actions/docker-action
"#;
let workflow_path = workflows_dir.join("ci.yml");
std::fs::write(&workflow_path, workflow).unwrap();
let graph = parse_at(workflow, workflow_path.to_str().unwrap());
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Structural),
"local action reference must record a Structural-kind gap, got: {:?}",
graph.completeness_gap_kinds
);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1, "must not inline any sub-steps");
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn composite_action_secrets_not_captured_after_partial_marking() {
let dir = make_temp_dir("composite-secrets-hidden");
let workflows_dir = dir.join(".github/workflows");
let action_dir = dir.join(".github/actions/deploy");
std::fs::create_dir_all(&workflows_dir).unwrap();
std::fs::create_dir_all(&action_dir).unwrap();
let action_yml = r#"
name: Deploy
runs:
using: composite
steps:
- name: Push
run: |
curl -H "Authorization: ${{ secrets.DEPLOY_TOKEN }}" https://example.com
shell: bash
"#;
std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
let workflow = r#"
jobs:
release:
steps:
- uses: ./.github/actions/deploy
"#;
let workflow_path = workflows_dir.join("release.yml");
std::fs::write(&workflow_path, workflow).unwrap();
let graph = parse_at(workflow, workflow_path.to_str().unwrap());
let secret_names: Vec<_> = graph
.nodes_of_kind(NodeKind::Secret)
.map(|s| s.name.as_str())
.collect();
assert!(
!secret_names.contains(&"DEPLOY_TOKEN"),
"secret hidden inside composite action must NOT leak into the graph, got: {secret_names:?}"
);
assert_eq!(
graph.completeness,
AuthorityCompleteness::Partial,
"composite action reference must mark graph Partial"
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn step_env_literal_shadows_workflow_level_secret() {
let yaml = r#"
on: pull_request_target
env:
TOKEN: ${{ secrets.PROD_TOKEN }}
jobs:
build:
steps:
- run: ./scan.sh
env:
TOKEN: literal-non-secret
"#;
let graph = parse(yaml);
let prod_token_id = graph
.nodes_of_kind(NodeKind::Secret)
.find(|n| n.name == "PROD_TOKEN")
.map(|n| n.id);
if let Some(secret_id) = prod_token_id {
let leaks = graph
.edges_to(secret_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert_eq!(
leaks, 0,
"step-level env literal must shadow workflow-level secret — \
expected 0 HasAccessTo edges to PROD_TOKEN, found {leaks}"
);
}
}
#[test]
fn step_env_secret_shadows_workflow_level_secret() {
let yaml = r#"
on: pull_request_target
env:
TOKEN: ${{ secrets.PROD_TOKEN }}
jobs:
build:
steps:
- run: ./scan.sh
env:
TOKEN: ${{ secrets.STAGING_TOKEN }}
"#;
let graph = parse(yaml);
let secret_names: Vec<_> = graph
.nodes_of_kind(NodeKind::Secret)
.map(|s| s.name.clone())
.collect();
assert!(
secret_names.contains(&"STAGING_TOKEN".to_string()),
"shadowing secret must be in the graph, got: {secret_names:?}"
);
let prod_id = graph
.nodes_of_kind(NodeKind::Secret)
.find(|n| n.name == "PROD_TOKEN")
.map(|n| n.id);
if let Some(prod_id) = prod_id {
let leaks = graph
.edges_to(prod_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert_eq!(
leaks, 0,
"step-level env secret must shadow workflow-level secret \
(no HasAccessTo edge to PROD_TOKEN), found {leaks}"
);
}
}
#[test]
fn composite_action_resolution_does_not_depend_on_cwd() {
let dir = make_temp_dir("cwd-independence");
let workflows_dir = dir.join(".github/workflows");
let action_dir = dir.join(".github/actions/x");
std::fs::create_dir_all(&workflows_dir).unwrap();
std::fs::create_dir_all(&action_dir).unwrap();
let action_yml = r#"
name: X
runs:
using: composite
steps:
- run: echo hi
shell: bash
"#;
std::fs::write(action_dir.join("action.yml"), action_yml).unwrap();
let workflow = r#"
jobs:
ci:
steps:
- uses: ./.github/actions/x
"#;
let workflow_path = workflows_dir.join("ci.yml");
std::fs::write(&workflow_path, workflow).unwrap();
let prev_cwd = std::env::current_dir().ok();
std::env::set_current_dir(&dir).unwrap();
let graph_inside = parse_at(workflow, ".github/workflows/ci.yml");
if let Some(p) = prev_cwd {
std::env::set_current_dir(p).unwrap();
}
let abs_workflow_path = workflow_path.to_str().unwrap().to_string();
let graph_outside = parse_at(workflow, &abs_workflow_path);
assert_eq!(
graph_inside.completeness,
AuthorityCompleteness::Partial,
"graph parsed from inside the worktree must be Partial"
);
assert_eq!(
graph_outside.completeness,
AuthorityCompleteness::Partial,
"graph parsed from outside the worktree must be Partial"
);
assert_eq!(
graph_inside.completeness, graph_outside.completeness,
"CWD-relative vs absolute pipeline_file must produce identical completeness"
);
assert_eq!(
graph_inside.nodes_of_kind(NodeKind::Step).count(),
1,
"inside parse must not inline composite sub-steps"
);
assert_eq!(
graph_outside.nodes_of_kind(NodeKind::Step).count(),
1,
"outside parse must not inline composite sub-steps"
);
let _ = std::fs::remove_dir_all(&dir);
}
#[test]
fn workflow_level_permissions_create_identity() {
let yaml = r#"
permissions: write-all
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(identities[0].name, "GITHUB_TOKEN");
assert_eq!(
identities[0].metadata.get(META_PERMISSIONS).unwrap(),
"write-all"
);
}
#[test]
fn omitted_workflow_permissions_create_unknown_implicit_identity() {
let yaml = r#"
jobs:
ci:
steps:
- run: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(identities[0].name, "GITHUB_TOKEN");
assert_eq!(
identities[0].metadata.get(META_IDENTITY_SCOPE).unwrap(),
"unknown"
);
assert_eq!(identities[0].metadata.get(META_IMPLICIT).unwrap(), "true");
}
#[test]
fn job_env_template_expression_does_not_crash_and_marks_partial() {
let yaml = r#"
jobs:
unit-tests:
env: ${{ matrix }}
steps:
- run: pytest
"#;
let graph = parse(yaml);
assert!(
matches!(graph.completeness, AuthorityCompleteness::Partial),
"graph must be marked Partial when env: is a template expression"
);
let saw_template_gap = graph
.completeness_gaps
.iter()
.any(|g| g.contains("env:") && g.contains("template"));
assert!(
saw_template_gap,
"completeness_gaps must mention env: template, got: {:?}",
graph.completeness_gaps
);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Expression),
"job-level env: template must record an Expression-kind gap, got: {:?}",
graph.completeness_gap_kinds
);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1, "the single step must still be parsed");
}
#[test]
fn env_with_non_string_scalar_values_parses() {
let yaml = r#"
jobs:
test:
env:
RUST_BACKTRACE: 1
COVERAGE: false
TARGET_FLAGS:
CARGO: cargo
steps:
- run: cargo test
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1, "expected the single step to parse");
assert!(
!matches!(graph.completeness, AuthorityCompleteness::Partial)
|| !graph
.completeness_gaps
.iter()
.any(|g| g.contains("env:") && g.contains("template")),
"non-string env values must not mark the graph Partial via the env-template path"
);
}
#[test]
fn step_env_with_boolean_and_integer_values_parses() {
let yaml = r#"
jobs:
build:
steps:
- name: build
run: make
env:
DEBUG: true
RETRIES: 3
OPTIONAL_FLAG:
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
}
#[test]
fn meta_job_name_set_on_step_nodes() {
let yaml = r#"
jobs:
build:
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Compile
run: make build
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert!(!steps.is_empty(), "expected at least one Step node");
for step in &steps {
assert_eq!(
step.metadata.get(META_JOB_NAME).map(String::as_str),
Some("build"),
"Step {:?} missing META_JOB_NAME=build",
step.name
);
}
}
#[test]
fn jobs_without_steps_marks_partial() {
let yaml = r#"
on:
push:
jobs:
build:
runs-on: ubuntu-latest
"#;
let graph = parse(yaml);
let step_count = graph
.nodes
.iter()
.filter(|n| n.kind == NodeKind::Step)
.count();
assert_eq!(step_count, 0, "no steps: present means 0 Step nodes");
assert_eq!(
graph.completeness,
AuthorityCompleteness::Partial,
"0-step-nodes despite non-empty jobs: must mark Partial"
);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("0 step nodes")),
"completeness_gaps must mention 0 step nodes: {:?}",
graph.completeness_gaps
);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Structural),
"0-step-nodes gap must be Structural, got: {:?}",
graph.completeness_gap_kinds
);
}
#[test]
fn empty_workflow_no_jobs_does_not_mark_partial_for_zero_steps() {
let yaml = "name: empty\non:\n push:\n";
let graph = parse(yaml);
let zero_step_gap = graph
.completeness_gaps
.iter()
.any(|g| g.contains("0 step nodes"));
assert!(
!zero_step_gap,
"no jobs: in source means no 0-step gap reason; got: {:?}",
graph.completeness_gaps
);
}
#[test]
fn all_zero_sha_action_is_untrusted() {
let yaml = r#"
jobs:
ci:
steps:
- uses: actions/setup-python@0000000000000000000000000000000000000000
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(
images[0].trust_zone,
TrustZone::Untrusted,
"all-zero SHA must be classified as Untrusted, not ThirdParty"
);
}
#[test]
fn real_sha_pinned_action_is_third_party() {
let yaml = r#"
jobs:
ci:
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(
images[0].trust_zone,
TrustZone::ThirdParty,
"legitimate SHA-pinned action must be classified as ThirdParty"
);
}
#[test]
fn upload_artifact_creates_produces_edge() {
let yaml = r#"
permissions:
contents: read
jobs:
build:
steps:
- uses: actions/upload-artifact@v4
with:
name: my-dist
path: ./dist
"#;
let graph = parse(yaml);
let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
assert_eq!(
artifacts.len(),
1,
"upload-artifact should create one Artifact node"
);
assert_eq!(artifacts[0].name, "my-dist");
let produces_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::Produces && e.to == artifacts[0].id)
.collect();
assert_eq!(
produces_edges.len(),
1,
"upload step must have Produces edge to artifact"
);
}
#[test]
fn download_artifact_creates_consumes_edge() {
let yaml = r#"
jobs:
deploy:
steps:
- uses: actions/download-artifact@v4
with:
name: my-dist
"#;
let graph = parse(yaml);
let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
assert_eq!(
artifacts.len(),
1,
"download-artifact should create one Artifact node"
);
let consumes_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::Consumes && e.from == artifacts[0].id)
.collect();
assert_eq!(
consumes_edges.len(),
1,
"download step must have Consumes edge from artifact"
);
}
#[test]
fn upload_download_same_name_share_artifact_node() {
let yaml = r#"
permissions:
contents: read
jobs:
build:
steps:
- uses: actions/upload-artifact@v4
with:
name: shared-dist
path: ./dist
deploy:
steps:
- uses: actions/download-artifact@v4
with:
name: shared-dist
"#;
let graph = parse(yaml);
let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
assert_eq!(
artifacts.len(),
1,
"same artifact name must reuse the same Artifact node"
);
let produces: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::Produces)
.collect();
let consumes: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::Consumes)
.collect();
assert_eq!(produces.len(), 1, "one Produces edge");
assert_eq!(consumes.len(), 1, "one Consumes edge");
assert_eq!(produces[0].to, artifacts[0].id);
assert_eq!(consumes[0].from, artifacts[0].id);
}
#[test]
fn upload_artifact_without_name_creates_no_edge() {
let yaml = r#"
jobs:
build:
steps:
- uses: actions/upload-artifact@v4
with:
path: ./dist
"#;
let graph = parse(yaml);
let artifacts: Vec<_> = graph.nodes_of_kind(NodeKind::Artifact).collect();
assert!(
artifacts.is_empty(),
"upload-artifact without name: must not create an Artifact node; got: {artifacts:#?}"
);
let produces: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::Produces)
.collect();
assert!(
produces.is_empty(),
"upload-artifact without name: must not create a Produces edge"
);
}
#[test]
fn download_artifact_without_name_creates_no_edge() {
let yaml = r#"
jobs:
deploy:
steps:
- uses: actions/download-artifact@v4
"#;
let graph = parse(yaml);
let consumes: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::Consumes)
.collect();
assert!(
consumes.is_empty(),
"download-artifact without name: must not create a Consumes edge"
);
}
#[test]
fn secret_extractor_ignores_literal_substrings_outside_template_spans() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Mixed shell + template
run: |
# loads /etc/secrets.conf
cp $SECRETS_DIR/secrets.json /tmp/
curl -H "Authorization: ${{ secrets.REAL_TOKEN }}" https://api.example.com
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(
secrets.len(),
1,
"only `REAL_TOKEN` should be a Secret node — phantoms `conf`/`json` must not appear; got: {:?}",
secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
);
assert_eq!(secrets[0].name, "REAL_TOKEN");
}
#[test]
fn secret_extractor_handles_tight_template_spacing() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Tight template
run: echo "x"
env:
TOK: "${{secrets.TIGHT}}"
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "TIGHT");
let secret_id = secrets[0].id;
let edges = graph
.edges_to(secret_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert_eq!(
edges, 1,
"tight `${{{{secrets.X}}}}` must produce HasAccessTo edge"
);
}
#[test]
fn secret_extractor_finds_all_secrets_in_concatenated_value() {
let yaml = r#"
jobs:
deploy:
steps:
- name: Concatenated
run: echo "x"
env:
COMBINED: "${{ secrets.A }}-${{ secrets.B }}"
"#;
let graph = parse(yaml);
let secret_names: std::collections::BTreeSet<&str> = graph
.nodes_of_kind(NodeKind::Secret)
.map(|n| n.name.as_str())
.collect();
assert!(secret_names.contains("A"), "secret A must be detected");
assert!(secret_names.contains("B"), "secret B must be detected");
assert_eq!(
secret_names.len(),
2,
"exactly two secrets, got: {secret_names:?}"
);
for name in ["A", "B"] {
let id = graph
.nodes_of_kind(NodeKind::Secret)
.find(|n| n.name == name)
.expect("secret node")
.id;
let edges = graph
.edges_to(id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert!(edges >= 1, "missing HasAccessTo edge for secret {name}");
}
}
#[test]
fn reusable_workflow_secrets_mapping_form_propagates_edges() {
let yaml = r#"
jobs:
call:
uses: ./.github/workflows/reusable.yml
secrets:
CHILD: ${{ secrets.PARENT }}
OTHER: ${{ secrets.SECONDARY }}
"#;
let graph = parse(yaml);
let secret_names: std::collections::BTreeSet<&str> = graph
.nodes_of_kind(NodeKind::Secret)
.map(|n| n.name.as_str())
.collect();
assert!(
secret_names.contains("PARENT"),
"secrets: mapping value `${{{{ secrets.PARENT }}}}` must produce a Secret node; got: {secret_names:?}"
);
assert!(
secret_names.contains("SECONDARY"),
"secrets: mapping must iterate ALL keys, not just the first; got: {secret_names:?}"
);
let parent_id = graph
.nodes_of_kind(NodeKind::Secret)
.find(|n| n.name == "PARENT")
.unwrap()
.id;
let edges = graph
.edges_to(parent_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert!(edges >= 1, "synthetic step must HasAccessTo PARENT");
}
#[test]
fn reusable_workflow_synthetic_step_inherits_workflow_env_secrets() {
let yaml = r#"
env:
GLOBAL_TOKEN: "${{ secrets.GLOBAL }}"
jobs:
call:
uses: ./.github/workflows/reusable.yml
"#;
let graph = parse(yaml);
let global = graph
.nodes_of_kind(NodeKind::Secret)
.find(|n| n.name == "GLOBAL");
assert!(
global.is_some(),
"workflow.env secret `GLOBAL` must produce a Secret node visible to the synthetic step"
);
let global_id = global.unwrap().id;
let edges = graph
.edges_to(global_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.count();
assert!(
edges >= 1,
"synthetic step for reusable workflow must inherit workflow.env HasAccessTo edge"
);
}
#[test]
fn gha_meta_job_outputs_is_deterministic_across_runs() {
let yaml = r#"
jobs:
emit:
runs-on: ubuntu-latest
outputs:
zebra: literal-z
apple: literal-a
mango: literal-m
kilo: literal-k
foxtrot: literal-f
steps:
- run: echo hi
"#;
let mut prev: Option<String> = None;
for i in 0..9 {
let graph = parse(yaml);
let cur = graph
.metadata
.get(META_JOB_OUTPUTS)
.cloned()
.unwrap_or_default();
assert!(
!cur.is_empty(),
"META_JOB_OUTPUTS must be populated on a workflow with outputs"
);
if let Some(p) = &prev {
assert_eq!(
p, &cur,
"META_JOB_OUTPUTS drifted on run {i}: {p:?} vs {cur:?}"
);
}
prev = Some(cur);
}
}
#[test]
fn gha_meta_permissions_is_deterministic_across_runs() {
let yaml = r#"
permissions:
contents: read
id-token: write
packages: write
actions: read
pull-requests: write
jobs:
ci:
steps:
- run: echo hi
"#;
let mut prev: Option<String> = None;
for i in 0..9 {
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1, "one GITHUB_TOKEN identity");
let cur = identities[0]
.metadata
.get(META_PERMISSIONS)
.cloned()
.expect("META_PERMISSIONS must be stamped");
if let Some(p) = &prev {
assert_eq!(
p, &cur,
"META_PERMISSIONS drifted on run {i}: {p:?} vs {cur:?}"
);
}
prev = Some(cur);
}
}
}