taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use base64::Engine;
4use serde::Deserialize;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7use taudit_core::ports::PipelineParser;
8
9/// Optional Azure DevOps enrichment inputs plumbed from CLI flags.
10///
11/// This is Phase 3A scaffolding only: parser wiring + metadata-safe handling.
12/// No network calls are performed yet.
13#[derive(Debug, Clone, Default, PartialEq, Eq)]
14pub struct AdoParserContext {
15    /// Azure DevOps organization name (optional).
16    pub org: Option<String>,
17    /// Azure DevOps project name (optional).
18    pub project: Option<String>,
19    /// Azure DevOps PAT (optional). Never persisted into graph metadata.
20    pub pat: Option<String>,
21}
22
23impl AdoParserContext {
24    fn is_empty(&self) -> bool {
25        self.org.is_none() && self.project.is_none() && self.pat.is_none()
26    }
27}
28
29const META_ADO_ORG: &str = "ado_org";
30const META_ADO_PROJECT: &str = "ado_project";
31const META_ADO_PAT_PRESENT: &str = "ado_pat_present";
32const META_ADO_VG_ENRICHMENT_READY: &str = "ado_variable_group_enrichment_ready";
33const META_ADO_VG_ENRICHED: &str = "ado_variable_group_enriched";
34
35type AdoVariableGroupIndex = HashMap<String, HashMap<String, bool>>;
36
37/// Regex-free check: does `s` contain `terraform apply` followed by
38/// `-auto-approve` or `--auto-approve` (anywhere on the same line, or on a
39/// nearby line when the previous line ends in a shell continuation `\` /
40/// PowerShell continuation `` ` ``)?
41///
42/// Case-sensitive on purpose — Terraform's CLI is case-sensitive and these
43/// tokens never appear capitalised in real-world pipelines.
44fn script_does_terraform_auto_apply(s: &str) -> bool {
45    let lines: Vec<&str> = s.lines().collect();
46    for (i, raw_line) in lines.iter().enumerate() {
47        // Strip trailing comment.
48        let line = raw_line.split('#').next().unwrap_or("");
49        if !(line.contains("terraform apply") || line.contains("terraform\tapply")) {
50            continue;
51        }
52        if line.contains("auto-approve") {
53            return true;
54        }
55        // Continuation: peek a few lines forward for the flag.
56        let mut continuing = line.trim_end().ends_with('\\') || line.trim_end().ends_with('`');
57        let mut j = i + 1;
58        while continuing && j < lines.len() && j < i + 4 {
59            let next = lines[j].split('#').next().unwrap_or("");
60            if next.contains("auto-approve") {
61                return true;
62            }
63            continuing = next.trim_end().ends_with('\\') || next.trim_end().ends_with('`');
64            j += 1;
65        }
66    }
67    false
68}
69
70/// Azure DevOps YAML pipeline parser.
71pub struct AdoParser;
72
73impl AdoParser {
74    /// Parse an ADO pipeline with optional CLI-provided context for future
75    /// variable-group enrichment.
76    pub fn parse_with_context(
77        &self,
78        content: &str,
79        source: &PipelineSource,
80        ctx: Option<&AdoParserContext>,
81    ) -> Result<AuthorityGraph, TauditError> {
82        let mut de = serde_yaml::Deserializer::from_str(content);
83        let doc = de
84            .next()
85            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
86        let pipeline: AdoPipeline = match AdoPipeline::deserialize(doc) {
87            Ok(p) => p,
88            Err(e) => {
89                // Real-world ADO template fragments often wrap their root content in
90                // a parameter conditional like `- ${{ if eq(parameters.X, true) }}:`
91                // followed by a list of jobs. That is not a standard YAML mapping at
92                // the root, so serde_yaml fails with a "did not find expected key"
93                // error. These files are intended to be `template:`-included from a
94                // parent pipeline; analyzing them in isolation is not meaningful.
95                // Return a near-empty graph marked Partial instead of crashing the scan.
96                let msg = e.to_string();
97                if msg.contains("invalid type: sequence, expected struct AdoPipeline") {
98                    if let Some(recovered) = recover_after_leading_root_sequence(content) {
99                        let pipeline: AdoPipeline = serde_yaml::from_str(recovered)
100                            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
101                        let mut graph = build_ado_graph(pipeline, false, source, content, ctx);
102                        graph.mark_partial(
103                            GapKind::Structural,
104                            "ADO file starts with a root-level sequence before the pipeline mapping — recovered by analyzing the later pipeline mapping only".to_string(),
105                        );
106                        graph.stamp_edge_authority_summaries();
107                        return Ok(graph);
108                    }
109                }
110
111                let looks_like_template_fragment = (msg.contains("did not find expected key")
112                    || (msg.contains("parameters")
113                        && msg.contains("invalid type: map")
114                        && msg.contains("expected a sequence")))
115                    && has_root_parameter_conditional(content);
116                if looks_like_template_fragment {
117                    let mut graph = AuthorityGraph::new(source.clone());
118                    graph
119                        .metadata
120                        .insert(META_PLATFORM.into(), "azure-devops".into());
121                    apply_parser_context_metadata(&mut graph, ctx);
122                    graph.mark_partial(
123                        GapKind::Structural,
124                        "ADO template fragment with top-level parameter conditional — root structure depends on parent pipeline context".to_string(),
125                    );
126                    graph.stamp_edge_authority_summaries();
127                    return Ok(graph);
128                }
129                return Err(TauditError::Parse(format!("YAML parse error: {e}")));
130            }
131        };
132        let extra_docs = de.next().is_some();
133
134        let mut graph = build_ado_graph(pipeline, extra_docs, source, content, ctx);
135        graph.stamp_edge_authority_summaries();
136        Ok(graph)
137    }
138}
139
140impl PipelineParser for AdoParser {
141    fn platform(&self) -> &str {
142        "azure-devops"
143    }
144
145    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
146        self.parse_with_context(content, source, None)
147    }
148}
149
150fn build_ado_graph(
151    pipeline: AdoPipeline,
152    extra_docs: bool,
153    source: &PipelineSource,
154    content: &str,
155    ctx: Option<&AdoParserContext>,
156) -> AuthorityGraph {
157    let mut graph = AuthorityGraph::new(source.clone());
158    graph
159        .metadata
160        .insert(META_PLATFORM.into(), "azure-devops".into());
161    apply_parser_context_metadata(&mut graph, ctx);
162    if extra_docs {
163        graph.mark_partial(
164            GapKind::Expression,
165            "file contains multiple YAML documents (--- separator) — only the first was analyzed"
166                .to_string(),
167        );
168    }
169    mark_unresolved_top_level_carriers(content, &mut graph);
170
171    // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
172    // A genuine ADO PR trigger is always a mapping (`pr:\n  branches:...`) or a
173    // sequence (`pr:\n  - main`). Scalar opt-out forms — `pr: none`, `pr: ~`,
174    // `pr: false`, `pr: ""` — must NOT be treated as active triggers.
175    // Checking is_mapping()||is_sequence() is more robust than enumerating every
176    // scalar opt-out value (serde_yaml 0.9 parses "none" as a string, "~" as a
177    // string, and `null` as null — the shape test handles all forms uniformly).
178    let has_pr_trigger = pipeline
179        .pr
180        .as_ref()
181        .map(|v| v.is_mapping() || v.is_sequence())
182        .unwrap_or(false);
183    if has_pr_trigger {
184        graph.metadata.insert(META_TRIGGER.into(), "pr".into());
185    }
186
187    // Capture resources.repositories[] declarations and detect aliases that
188    // are actually referenced by an `extends:`, `template: x@alias`, or
189    // `checkout: alias`. The result is JSON-encoded into graph metadata
190    // for the `template_extends_unpinned_branch` rule to consume.
191    process_repositories(&pipeline, content, &mut graph);
192
193    // Capture top-level `parameters:` declarations (used by
194    // parameter_interpolation_into_shell). ADO defaults missing `type:`
195    // to string, so a missing/empty type is treated as a string.
196    if let Some(ref params) = pipeline.parameters {
197        for p in params {
198            let name = match p.name.as_ref() {
199                Some(n) if !n.is_empty() => n.clone(),
200                _ => continue,
201            };
202            let param_type = p.param_type.clone().unwrap_or_default();
203            let has_values_allowlist = p.values.as_ref().map(|v| !v.is_empty()).unwrap_or(false);
204            graph.parameters.insert(
205                name,
206                ParamSpec {
207                    param_type,
208                    has_values_allowlist,
209                },
210            );
211        }
212    }
213
214    let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
215
216    // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
217    // Tagged implicit: ADO injects this token into every task by platform design;
218    // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
219    let mut meta = HashMap::new();
220    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
221    meta.insert(META_IMPLICIT.into(), "true".into());
222    let token_id = graph.add_node_with_metadata(
223        NodeKind::Identity,
224        "System.AccessToken",
225        TrustZone::FirstParty,
226        meta,
227    );
228
229    // Pipeline-level permissions block — when present and non-broad (no write
230    // permissions), downgrade System.AccessToken from broad → constrained so
231    // over_privileged_identity does not fire on already-restricted pipelines.
232    if let Some(ref perms_val) = pipeline.permissions {
233        if !ado_permissions_are_broad(perms_val) {
234            let perms_str = ado_permissions_display(perms_val);
235            graph.nodes[token_id]
236                .metadata
237                .insert(META_IDENTITY_SCOPE.into(), "constrained".into());
238            graph.nodes[token_id]
239                .metadata
240                .insert(META_PERMISSIONS.into(), perms_str);
241        }
242    }
243
244    // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
245    process_pool(&pipeline.pool, &pipeline.workspace, &mut graph);
246
247    // Pipeline-level variable groups and named secrets.
248    // pipeline_plain_vars tracks non-secret named variables so $(VAR) refs
249    // in scripts don't generate false-positive Secret nodes for plain
250    // config values. Stage/job scopes clone and extend this set so plain
251    // variables do not leak sideways into unrelated stages or jobs.
252    // pipeline_has_variable_groups is set when any pipeline-scope group is encountered so
253    // extract_dollar_paren_secrets can avoid creating per-variable Secret
254    // nodes from opaque groups (BUG-3).
255    let mut pipeline_plain_vars: HashSet<String> = HashSet::new();
256    let mut pipeline_has_variable_groups = false;
257    let variable_group_index = maybe_fetch_variable_group_index(ctx, &mut graph);
258    let pipeline_secret_ids = process_variables(
259        &pipeline.variables,
260        &mut graph,
261        &mut secret_ids,
262        "pipeline",
263        &mut pipeline_plain_vars,
264        &mut pipeline_has_variable_groups,
265        variable_group_index.as_ref(),
266    );
267
268    // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
269    if let Some(ref stages) = pipeline.stages {
270        for stage in stages {
271            // Stage-level template reference — delegate and mark Partial
272            if let Some(ref tpl) = stage.template {
273                let stage_name = stage.stage.as_deref().unwrap_or("stage");
274                add_template_delegation(stage_name, tpl, token_id, None, &mut graph);
275                continue;
276            }
277
278            let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
279            let mut stage_plain_vars = pipeline_plain_vars.clone();
280            let mut stage_has_variable_groups = false;
281            let stage_secret_ids = process_variables(
282                &stage.variables,
283                &mut graph,
284                &mut secret_ids,
285                &stage_name,
286                &mut stage_plain_vars,
287                &mut stage_has_variable_groups,
288                variable_group_index.as_ref(),
289            );
290            let stage_scope_has_variable_groups =
291                pipeline_has_variable_groups || stage_has_variable_groups;
292
293            let stage_condition = non_empty_condition(&stage.condition);
294            if let Some(c) = stage_condition {
295                mark_condition_partial(&mut graph, "stage", &stage_name, c);
296            }
297            let stage_depends_on =
298                explicit_depends_on_csv(&stage.depends_on, &mut graph, "stage", &stage_name);
299
300            for job in &stage.jobs {
301                let job_name = job.effective_name();
302                let mut job_plain_vars = stage_plain_vars.clone();
303                let mut job_has_variable_groups = false;
304                let job_secret_ids = process_variables(
305                    &job.variables,
306                    &mut graph,
307                    &mut secret_ids,
308                    &job_name,
309                    &mut job_plain_vars,
310                    &mut job_has_variable_groups,
311                    variable_group_index.as_ref(),
312                );
313                let step_scope_has_variable_groups =
314                    stage_scope_has_variable_groups || job_has_variable_groups;
315
316                let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
317                process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
318
319                let all_secrets: Vec<NodeId> = pipeline_secret_ids
320                    .iter()
321                    .chain(&stage_secret_ids)
322                    .chain(&job_secret_ids)
323                    .copied()
324                    .collect();
325
326                let steps_start = graph.nodes.len();
327
328                let job_condition = non_empty_condition(&job.condition);
329                if let Some(c) = job_condition {
330                    mark_condition_partial(&mut graph, "job", &job_name, c);
331                }
332                // Job's `dependsOn:` overrides any stage-level value when both
333                // are present (job-level wins for the job's own ordering); fall
334                // back to the stage-level value otherwise so the chain still
335                // surfaces on the steps.
336                let job_depends_on =
337                    explicit_depends_on_csv(&job.depends_on, &mut graph, "job", &job_name)
338                        .or_else(|| stage_depends_on.clone());
339
340                let outer_condition = join_conditions(stage_condition, job_condition);
341
342                let job_steps = job.all_steps();
343                process_steps(
344                    &job_steps,
345                    &job_name,
346                    token_id,
347                    &all_secrets,
348                    &job_plain_vars,
349                    step_scope_has_variable_groups,
350                    outer_condition.as_deref(),
351                    job_depends_on.as_deref(),
352                    &mut graph,
353                    &mut secret_ids,
354                );
355
356                if let Some(ref tpl) = job.template {
357                    add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
358                }
359
360                if job.has_environment_binding() {
361                    tag_job_steps_env_approval(&mut graph, steps_start);
362                }
363            }
364        }
365    } else if let Some(ref jobs) = pipeline.jobs {
366        for job in jobs {
367            let job_name = job.effective_name();
368            let mut job_plain_vars = pipeline_plain_vars.clone();
369            let mut job_has_variable_groups = false;
370            let job_secret_ids = process_variables(
371                &job.variables,
372                &mut graph,
373                &mut secret_ids,
374                &job_name,
375                &mut job_plain_vars,
376                &mut job_has_variable_groups,
377                variable_group_index.as_ref(),
378            );
379            let step_scope_has_variable_groups =
380                pipeline_has_variable_groups || job_has_variable_groups;
381
382            let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
383            process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
384
385            let all_secrets: Vec<NodeId> = pipeline_secret_ids
386                .iter()
387                .chain(&job_secret_ids)
388                .copied()
389                .collect();
390
391            let steps_start = graph.nodes.len();
392
393            let job_condition = non_empty_condition(&job.condition);
394            if let Some(c) = job_condition {
395                mark_condition_partial(&mut graph, "job", &job_name, c);
396            }
397            let job_depends_on =
398                explicit_depends_on_csv(&job.depends_on, &mut graph, "job", &job_name);
399
400            let job_steps = job.all_steps();
401            process_steps(
402                &job_steps,
403                &job_name,
404                token_id,
405                &all_secrets,
406                &job_plain_vars,
407                step_scope_has_variable_groups,
408                job_condition,
409                job_depends_on.as_deref(),
410                &mut graph,
411                &mut secret_ids,
412            );
413
414            if let Some(ref tpl) = job.template {
415                add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
416            }
417
418            if job.has_environment_binding() {
419                tag_job_steps_env_approval(&mut graph, steps_start);
420            }
421        }
422    } else if let Some(ref steps) = pipeline.steps {
423        process_steps(
424            steps,
425            "pipeline",
426            token_id,
427            &pipeline_secret_ids,
428            &pipeline_plain_vars,
429            pipeline_has_variable_groups,
430            None,
431            None,
432            &mut graph,
433            &mut secret_ids,
434        );
435    }
436
437    // Cross-platform misclassification trap (red-team R2 #5): a YAML file
438    // shaped like ADO at the top level (stages/jobs/steps present) but whose
439    // body uses constructs the ADO parser doesn't recognise will deserialize
440    // without errors and yield no Step nodes. Marking Partial surfaces the
441    // gap instead of returning completeness=complete on a clean-but-empty
442    // graph (which a CI gate would treat as "passed").
443    let step_count = graph
444        .nodes
445        .iter()
446        .filter(|n| n.kind == NodeKind::Step)
447        .count();
448    let had_step_carrier = pipeline.stages.as_ref().is_some_and(|s| !s.is_empty())
449        || pipeline.jobs.as_ref().is_some_and(|j| !j.is_empty())
450        || pipeline.steps.as_ref().is_some_and(|s| !s.is_empty());
451    if step_count == 0 && had_step_carrier {
452        graph.mark_partial(
453                GapKind::Structural,
454                "stages/jobs/steps parsed but produced 0 step nodes — possible non-ADO YAML wrong-platform-classified".to_string(),
455            );
456    }
457
458    graph.stamp_edge_authority_summaries();
459    graph
460}
461
462fn apply_parser_context_metadata(graph: &mut AuthorityGraph, ctx: Option<&AdoParserContext>) {
463    let Some(ctx) = ctx.filter(|c| !c.is_empty()) else {
464        return;
465    };
466
467    if let Some(org) = ctx.org.as_ref().filter(|v| !v.trim().is_empty()) {
468        graph
469            .metadata
470            .insert(META_ADO_ORG.into(), org.trim().to_string());
471    }
472    if let Some(project) = ctx.project.as_ref().filter(|v| !v.trim().is_empty()) {
473        graph
474            .metadata
475            .insert(META_ADO_PROJECT.into(), project.trim().to_string());
476    }
477
478    let pat_present = ctx.pat.as_ref().is_some_and(|v| !v.trim().is_empty());
479    graph
480        .metadata
481        .insert(META_ADO_PAT_PRESENT.into(), pat_present.to_string());
482
483    let enrichment_ready = graph.metadata.contains_key(META_ADO_ORG)
484        && graph.metadata.contains_key(META_ADO_PROJECT)
485        && pat_present;
486    graph.metadata.insert(
487        META_ADO_VG_ENRICHMENT_READY.into(),
488        enrichment_ready.to_string(),
489    );
490}
491
492fn maybe_fetch_variable_group_index(
493    ctx: Option<&AdoParserContext>,
494    graph: &mut AuthorityGraph,
495) -> Option<AdoVariableGroupIndex> {
496    let ctx = ctx?;
497    if graph
498        .metadata
499        .get(META_ADO_VG_ENRICHMENT_READY)
500        .is_none_or(|v| v != "true")
501    {
502        return None;
503    }
504
505    match fetch_variable_group_index(ctx) {
506        Ok(index) => {
507            graph
508                .metadata
509                .insert(META_ADO_VG_ENRICHED.into(), "true".into());
510            Some(index)
511        }
512        Err(err) => {
513            graph
514                .metadata
515                .insert(META_ADO_VG_ENRICHED.into(), "false".into());
516            graph.mark_partial(
517                GapKind::Structural,
518                format!(
519                    "warning: ADO variable-group enrichment failed ({err}) — falling back to static variable-group modelling"
520                ),
521            );
522            None
523        }
524    }
525}
526
527fn fetch_variable_group_index(ctx: &AdoParserContext) -> Result<AdoVariableGroupIndex, String> {
528    let org = ctx
529        .org
530        .as_deref()
531        .map(str::trim)
532        .filter(|v| !v.is_empty())
533        .ok_or_else(|| "missing org".to_string())?;
534    let project = ctx
535        .project
536        .as_deref()
537        .map(str::trim)
538        .filter(|v| !v.is_empty())
539        .ok_or_else(|| "missing project".to_string())?;
540    let pat = ctx
541        .pat
542        .as_deref()
543        .map(str::trim)
544        .filter(|v| !v.is_empty())
545        .ok_or_else(|| "missing PAT".to_string())?;
546
547    let org_base = if org.starts_with("http://") || org.starts_with("https://") {
548        org.trim_end_matches('/').to_string()
549    } else {
550        format!("https://dev.azure.com/{}", org.trim_matches('/'))
551    };
552    let project_segment = project.replace(' ', "%20");
553    let url = format!(
554        "{org_base}/{project_segment}/_apis/distributedtask/variablegroups?api-version=7.1"
555    );
556    let auth = format!(
557        "Basic {}",
558        base64::engine::general_purpose::STANDARD.encode(format!(":{pat}"))
559    );
560
561    let response = ureq::get(&url)
562        .set("Accept", "application/json")
563        .set("Authorization", &auth)
564        .call()
565        .map_err(map_ureq_error)?;
566
567    let body: serde_json::Value = response
568        .into_json()
569        .map_err(|e| format!("invalid JSON response: {e}"))?;
570    parse_variable_group_index_from_json(&body)
571}
572
573fn map_ureq_error(err: ureq::Error) -> String {
574    match err {
575        ureq::Error::Status(code, _) => format!("HTTP {code} from variablegroups API"),
576        ureq::Error::Transport(t) => t.to_string(),
577    }
578}
579
580fn parse_variable_group_index_from_json(
581    body: &serde_json::Value,
582) -> Result<AdoVariableGroupIndex, String> {
583    let mut index: AdoVariableGroupIndex = HashMap::new();
584    let values = body
585        .get("value")
586        .and_then(|v| v.as_array())
587        .ok_or_else(|| "response missing 'value' array".to_string())?;
588
589    for item in values {
590        let Some(group_name) = item.get("name").and_then(|v| v.as_str()) else {
591            continue;
592        };
593        let mut group_vars: HashMap<String, bool> = HashMap::new();
594        if let Some(vars_obj) = item.get("variables").and_then(|v| v.as_object()) {
595            for (var_name, meta) in vars_obj {
596                let is_secret = meta
597                    .get("isSecret")
598                    .and_then(|v| v.as_bool())
599                    .unwrap_or(false);
600                group_vars.insert(var_name.clone(), is_secret);
601            }
602        }
603        index.insert(group_name.to_string(), group_vars);
604    }
605
606    Ok(index)
607}
608
609/// Returns `Some(trimmed)` when an ADO `condition:` value is present and
610/// carries non-whitespace content. Empty strings and pure-whitespace values
611/// (which ADO treats as "no condition", same as omitting the key) yield
612/// `None` so the parser does not mark a Partial-Expression gap for noise.
613fn non_empty_condition(c: &Option<String>) -> Option<&str> {
614    let s = c.as_deref()?.trim();
615    if s.is_empty() {
616        None
617    } else {
618        Some(s)
619    }
620}
621
622/// Join the optional outer condition chain (already AND-joined for stage and
623/// job) with this scope's condition, producing the final ` AND `-joined chain
624/// to stamp on Step nodes via `META_CONDITION`. Either side may be absent.
625fn join_conditions(outer: Option<&str>, inner: Option<&str>) -> Option<String> {
626    match (outer, inner) {
627        (None, None) => None,
628        (Some(o), None) => Some(o.to_string()),
629        (None, Some(i)) => Some(i.to_string()),
630        (Some(o), Some(i)) => Some(format!("{o} AND {i}")),
631    }
632}
633
634/// Top-level `stages:` and `jobs:` carriers may be supplied as template
635/// expressions (for example `stages: ${{ parameters.stages }}`). The serde
636/// model accepts those shapes so parsing can continue, but they hide the
637/// authority-carrying job/step graph until runtime. Mark them explicitly
638/// Partial instead of returning a clean Complete graph with no steps.
639fn mark_unresolved_top_level_carriers(content: &str, graph: &mut AuthorityGraph) {
640    let mut de = serde_yaml::Deserializer::from_str(content);
641    let Some(doc) = de.next() else {
642        return;
643    };
644    let Ok(value) = serde_yaml::Value::deserialize(doc) else {
645        return;
646    };
647    let Some(map) = value.as_mapping() else {
648        return;
649    };
650
651    for key in ["stages", "jobs"] {
652        let Some(value) = map.get(key) else {
653            continue;
654        };
655        if is_ado_template_expression_scalar(value) {
656            graph.mark_partial(
657                GapKind::Expression,
658                format!(
659                    "ADO top-level `{key}:` uses a template expression — {key} cannot be enumerated statically"
660                ),
661            );
662        }
663    }
664}
665
666fn is_ado_template_expression_scalar(value: &serde_yaml::Value) -> bool {
667    value
668        .as_str()
669        .map(|s| {
670            let trimmed = s.trim();
671            trimmed.starts_with("${{") && trimmed.ends_with("}}")
672        })
673        .unwrap_or(false)
674}
675
676/// Mark the graph Partial with `GapKind::Expression` and a reason that names
677/// the scope kind ("stage" / "job" / "step"), the entity's display name, and
678/// the literal condition text — enough for an operator to grep findings
679/// against `condition:` clauses in the source pipeline.
680fn mark_condition_partial(
681    graph: &mut AuthorityGraph,
682    scope_kind: &str,
683    name: &str,
684    condition: &str,
685) {
686    graph.mark_partial(
687        GapKind::Expression,
688        format!(
689            "ADO {scope_kind} '{name}' condition: '{condition}' — runtime evaluation not modelled"
690        ),
691    );
692}
693
694/// Normalize explicit `dependsOn:` to a comma-joined predecessor list.
695///
696/// ADO accepts string and list-of-strings forms, both of which are statically
697/// representable and returned here. Any other YAML shape is usually a template
698/// expression or conditional object that resolves at runtime; in that case we
699/// return `None` and mark the graph Partial-Expression so completeness is not
700/// overstated.
701fn explicit_depends_on_csv(
702    depends_on: &Option<DependsOn>,
703    graph: &mut AuthorityGraph,
704    scope_kind: &str,
705    name: &str,
706) -> Option<String> {
707    let d = depends_on.as_ref()?;
708    match d {
709        DependsOn::Single(s) => {
710            let trimmed = s.trim();
711            if trimmed.is_empty() {
712                None
713            } else {
714                Some(trimmed.to_string())
715            }
716        }
717        DependsOn::Multiple(v) => {
718            let csv = v
719                .iter()
720                .map(|s| s.trim())
721                .filter(|s| !s.is_empty())
722                .collect::<Vec<_>>()
723                .join(",");
724            if csv.is_empty() {
725                None
726            } else {
727                Some(csv)
728            }
729        }
730        DependsOn::Other(raw) => {
731            mark_depends_on_partial(graph, scope_kind, name, raw);
732            None
733        }
734    }
735}
736
737fn mark_depends_on_partial(
738    graph: &mut AuthorityGraph,
739    scope_kind: &str,
740    name: &str,
741    raw: &serde_yaml::Value,
742) {
743    let shape = match raw {
744        serde_yaml::Value::Null => "null",
745        serde_yaml::Value::Bool(_) => "bool",
746        serde_yaml::Value::Number(_) => "number",
747        serde_yaml::Value::String(_) => "string",
748        serde_yaml::Value::Sequence(_) => "sequence",
749        serde_yaml::Value::Mapping(_) => "mapping",
750        serde_yaml::Value::Tagged(_) => "tagged",
751    };
752    graph.mark_partial(
753        GapKind::Expression,
754        format!(
755            "ADO {scope_kind} '{name}' dependsOn uses unsupported {shape} form — runtime expansion not modelled"
756        ),
757    );
758}
759
760/// Process an ADO `pool:` block. ADO pools come in two shapes:
761///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
762///   - `pool: { name: my-pool }` (named pool — self-hosted)
763///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
764///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
765///
766/// Creates an Image node representing the agent environment. Self-hosted pools
767/// Returns `true` when an ADO pipeline-level `permissions:` value implies a
768/// broad (write-capable) token scope, `false` when every scope is `none` or
769/// `read` (i.e. the token has been explicitly restricted).
770///
771/// ADO permission values are the strings `"read"`, `"write"`, and `"none"`.
772/// Any unrecognised shape is conservatively treated as broad.
773fn ado_permissions_are_broad(perms: &serde_yaml::Value) -> bool {
774    if let Some(map) = perms.as_mapping() {
775        map.values().any(|v| v.as_str() == Some("write"))
776    } else {
777        // Scalar form: ADO accepts "read", "write", "none" as pipeline-level
778        // permission values. "read" and "none" are constrained; "write" is
779        // broad. Anything else (null, tilde, empty, unrecognised string) is
780        // conservatively treated as broad (unknown = risky).
781        matches!(perms.as_str(), Some("write"))
782    }
783}
784
785/// Format an ADO `permissions:` YAML value into a compact human-readable
786/// string for the finding message (e.g. `"contents: none, idToken: none"`).
787fn ado_permissions_display(perms: &serde_yaml::Value) -> String {
788    if let Some(map) = perms.as_mapping() {
789        map.iter()
790            .filter_map(|(k, v)| {
791                let key = k.as_str()?;
792                let val = v.as_str().unwrap_or("?");
793                Some(format!("{key}: {val}"))
794            })
795            .collect::<Vec<_>>()
796            .join(", ")
797    } else {
798        perms.as_str().unwrap_or("none").to_string()
799    }
800}
801
802/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
803///
804/// When `workspace` is provided and contains `clean:` with a truthy value
805/// (`true`, `all`, `outputs`, `resources`), the Image node is also tagged
806/// with META_WORKSPACE_CLEAN.
807fn process_pool(
808    pool: &Option<serde_yaml::Value>,
809    workspace: &Option<serde_yaml::Value>,
810    graph: &mut AuthorityGraph,
811) {
812    let Some(pool_val) = pool else {
813        return;
814    };
815
816    let (image_name, is_self_hosted) = match pool_val {
817        serde_yaml::Value::String(s) => (s.clone(), true),
818        serde_yaml::Value::Mapping(map) => {
819            let name = map.get("name").and_then(|v| v.as_str());
820            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
821            match (name, vm_image) {
822                (_, Some(vm)) => (vm.to_string(), false),
823                (Some(n), None) => (n.to_string(), true),
824                (None, None) => return,
825            }
826        }
827        _ => return,
828    };
829
830    let mut meta = HashMap::new();
831    if is_self_hosted {
832        meta.insert(META_SELF_HOSTED.into(), "true".into());
833    }
834    if has_workspace_clean(workspace) {
835        meta.insert(META_WORKSPACE_CLEAN.into(), "true".into());
836    }
837    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
838}
839
840/// Returns `true` when the ADO `workspace:` value specifies a `clean:` setting
841/// that wipes the workspace between runs. Recognised truthy forms:
842///   - `workspace: { clean: all }`
843///   - `workspace: { clean: outputs }`
844///   - `workspace: { clean: resources }`
845///   - `workspace: { clean: true }`
846fn has_workspace_clean(workspace: &Option<serde_yaml::Value>) -> bool {
847    let Some(ws) = workspace else {
848        return false;
849    };
850    let Some(map) = ws.as_mapping() else {
851        return false;
852    };
853    let Some(clean) = map.get("clean") else {
854        return false;
855    };
856    match clean {
857        serde_yaml::Value::Bool(b) => *b,
858        serde_yaml::Value::String(s) => {
859            let lower = s.to_ascii_lowercase();
860            matches!(lower.as_str(), "all" | "outputs" | "resources" | "true")
861        }
862        _ => false,
863    }
864}
865
866/// Scan the parsed pipeline for `resources.repositories[]` declarations and
867/// determine which aliases are referenced inside the same file. Stores the
868/// result as a JSON-encoded array in `graph.metadata[META_REPOSITORIES]`.
869///
870/// Usage signal — an alias is "used" when it appears in any of:
871///   - `template: <path>@<alias>` (anywhere — top-level extends, stage, job, step)
872///   - `extends:` referencing `template: <path>@<alias>`
873///   - `checkout: <alias>` (steps consume an external repo into the workspace)
874///
875/// The `extends:` and per-step `template:` references are resolved by walking
876/// the parsed Value tree; the raw text is only used for the `checkout:` case
877/// (cheap substring scan, robust to YAML shape variation).
878fn process_repositories(pipeline: &AdoPipeline, raw_content: &str, graph: &mut AuthorityGraph) {
879    let resources = match pipeline.resources.as_ref() {
880        Some(r) if !r.repositories.is_empty() => r,
881        _ => return,
882    };
883
884    // Collect all aliases referenced as `template: x@alias`. We walk every
885    // `template:` field appearing in the parsed pipeline (extends and steps
886    // already deserialize to their own paths; stages/jobs use the per-job
887    // template field). The raw YAML walk via serde_yaml::Value covers all
888    // shapes uniformly without re-deriving structure-specific models.
889    let mut used_aliases: HashSet<String> = HashSet::new();
890
891    if let Some(ref ext) = pipeline.extends {
892        collect_template_alias_refs(ext, &mut used_aliases);
893    }
894    if let Ok(value) = serde_yaml::from_str::<serde_yaml::Value>(raw_content) {
895        collect_template_alias_refs(&value, &mut used_aliases);
896        collect_checkout_alias_refs(&value, &mut used_aliases);
897    }
898
899    // Build the JSON-encoded repository descriptor list.
900    let mut entries: Vec<serde_json::Value> = Vec::with_capacity(resources.repositories.len());
901    for repo in &resources.repositories {
902        let Some(alias) = repo.repository.as_ref().filter(|s| !s.is_empty()) else {
903            continue;
904        };
905        let used = used_aliases.contains(alias);
906        let mut obj = serde_json::Map::new();
907        obj.insert("alias".into(), serde_json::Value::String(alias.clone()));
908        if let Some(ref t) = repo.repo_type {
909            obj.insert("repo_type".into(), serde_json::Value::String(t.clone()));
910        }
911        if let Some(ref n) = repo.name {
912            obj.insert("name".into(), serde_json::Value::String(n.clone()));
913        }
914        if let Some(ref r) = repo.git_ref {
915            obj.insert("ref".into(), serde_json::Value::String(r.clone()));
916        }
917        obj.insert("used".into(), serde_json::Value::Bool(used));
918        entries.push(serde_json::Value::Object(obj));
919    }
920
921    if let Ok(json) = serde_json::to_string(&serde_json::Value::Array(entries)) {
922        graph.metadata.insert(META_REPOSITORIES.into(), json);
923    }
924}
925
926/// Walk a YAML value and record every `template: <ref>@<alias>` alias seen.
927/// Recurses into mappings and sequences so it catches references in extends,
928/// stages, jobs, steps, and conditional blocks indiscriminately.
929fn collect_template_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
930    match value {
931        serde_yaml::Value::Mapping(map) => {
932            for (k, v) in map {
933                if k.as_str() == Some("template") {
934                    if let Some(s) = v.as_str() {
935                        if let Some(alias) = parse_template_alias(s) {
936                            sink.insert(alias);
937                        }
938                    }
939                }
940                collect_template_alias_refs(v, sink);
941            }
942        }
943        serde_yaml::Value::Sequence(seq) => {
944            for v in seq {
945                collect_template_alias_refs(v, sink);
946            }
947        }
948        _ => {}
949    }
950}
951
952/// Walk a YAML value and record every `checkout: <alias>` value seen, except
953/// `self` and `none` which are platform keywords (not external repo aliases).
954fn collect_checkout_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
955    match value {
956        serde_yaml::Value::Mapping(map) => {
957            for (k, v) in map {
958                if k.as_str() == Some("checkout") {
959                    if let Some(s) = v.as_str() {
960                        if s != "self" && s != "none" && !s.is_empty() {
961                            sink.insert(s.to_string());
962                        }
963                    }
964                }
965                collect_checkout_alias_refs(v, sink);
966            }
967        }
968        serde_yaml::Value::Sequence(seq) => {
969            for v in seq {
970                collect_checkout_alias_refs(v, sink);
971            }
972        }
973        _ => {}
974    }
975}
976
977/// Extract `<alias>` from a `template: <path>@<alias>` reference. Returns
978/// None for plain in-repo paths (`templates/deploy.yml`) which target the
979/// current pipeline's repo, not an external `resources.repositories[]` entry.
980fn parse_template_alias(template_ref: &str) -> Option<String> {
981    let at = template_ref.rfind('@')?;
982    let alias = &template_ref[at + 1..];
983    if alias.is_empty() {
984        None
985    } else {
986        Some(alias.to_string())
987    }
988}
989
990/// Tag every Step node added since `start_idx` with META_ENV_APPROVAL.
991/// Used after `process_steps` for a job whose `environment:` is configured —
992/// the environment binding indicates the job sits behind a manual approval
993/// gate, which is an isolation boundary that breaks automatic propagation.
994fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
995    for node in graph.nodes.iter_mut().skip(start_idx) {
996        if node.kind == NodeKind::Step {
997            node.metadata
998                .insert(META_ENV_APPROVAL.into(), "true".into());
999        }
1000    }
1001}
1002
1003/// Process a variable list, creating Secret nodes and returning their IDs.
1004/// Returns IDs for secrets only (not variable groups, which are opaque).
1005/// Populates `plain_vars` with the names of non-secret named variables so
1006/// downstream `$(VAR)` scanning can skip them.
1007fn process_variables(
1008    variables: &Option<AdoVariables>,
1009    graph: &mut AuthorityGraph,
1010    cache: &mut HashMap<String, NodeId>,
1011    scope: &str,
1012    plain_vars: &mut HashSet<String>,
1013    has_variable_groups: &mut bool,
1014    variable_group_index: Option<&AdoVariableGroupIndex>,
1015) -> Vec<NodeId> {
1016    let mut ids = Vec::new();
1017
1018    let vars = match variables.as_ref() {
1019        Some(v) => v,
1020        None => return ids,
1021    };
1022
1023    for var in &vars.0 {
1024        match var {
1025            AdoVariable::Group { group } => {
1026                // Skip template-expression group names like `${{ parameters.env }}`.
1027                // We can't resolve them statically — mark Partial but don't create
1028                // a misleading Secret node with the expression as its name.
1029                if group.contains("${{") {
1030                    graph.mark_partial(
1031                        GapKind::Expression,
1032                        format!(
1033                            "variable group in {scope} uses template expression — group name unresolvable at parse time"
1034                        ),
1035                    );
1036                    continue;
1037                }
1038
1039                if let Some(group_vars) = variable_group_index.and_then(|idx| idx.get(group)) {
1040                    for (var_name, is_secret) in group_vars {
1041                        if *is_secret {
1042                            let id = find_or_create_secret(graph, cache, var_name);
1043                            ids.push(id);
1044                        } else {
1045                            plain_vars.insert(var_name.clone());
1046                        }
1047                    }
1048                    continue;
1049                }
1050
1051                *has_variable_groups = true;
1052                let mut meta = HashMap::new();
1053                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
1054                let id = graph.add_node_with_metadata(
1055                    NodeKind::Secret,
1056                    group.as_str(),
1057                    TrustZone::FirstParty,
1058                    meta,
1059                );
1060                cache.insert(group.clone(), id);
1061                ids.push(id);
1062                graph.mark_partial(
1063                    GapKind::Structural,
1064                    format!(
1065                        "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
1066                    ),
1067                );
1068            }
1069            AdoVariable::Named {
1070                name, is_secret, ..
1071            } => {
1072                if *is_secret {
1073                    let id = find_or_create_secret(graph, cache, name);
1074                    ids.push(id);
1075                } else {
1076                    plain_vars.insert(name.clone());
1077                }
1078            }
1079        }
1080    }
1081
1082    ids
1083}
1084
1085/// Process a list of ADO steps, adding nodes and edges to the graph.
1086///
1087/// `outer_condition` is the AND-joined chain of stage- and job-level
1088/// `condition:` expressions that gate this step's containing job at runtime.
1089/// When present, it (combined with any per-step `condition:`) is stamped onto
1090/// every emitted Step node via `META_CONDITION` so downstream rules can see
1091/// that the step is conditionally reachable.
1092///
1093/// `outer_depends_on` is the comma-joined `dependsOn:` predecessor list
1094/// inherited from the job (or stage). Stamped onto Step nodes via
1095/// `META_DEPENDS_ON` only when non-default (the parser does not synthesise
1096/// the implicit "depends on previous job/stage" link).
1097#[allow(clippy::too_many_arguments)]
1098fn process_steps(
1099    steps: &[AdoStep],
1100    job_name: &str,
1101    token_id: NodeId,
1102    inherited_secrets: &[NodeId],
1103    plain_vars: &HashSet<String>,
1104    has_variable_groups: bool,
1105    outer_condition: Option<&str>,
1106    outer_depends_on: Option<&str>,
1107    graph: &mut AuthorityGraph,
1108    cache: &mut HashMap<String, NodeId>,
1109) {
1110    for (idx, step) in steps.iter().enumerate() {
1111        // Template step — delegation, mark partial
1112        if let Some(ref tpl) = step.template {
1113            let step_name = step
1114                .display_name
1115                .as_deref()
1116                .or(step.name.as_deref())
1117                .map(|s| s.to_string())
1118                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
1119            add_template_delegation(&step_name, tpl, token_id, Some(job_name), graph);
1120            continue;
1121        }
1122
1123        // Determine step kind and trust zone
1124        let (step_name, trust_zone, inline_script) = classify_step(step, job_name, idx);
1125
1126        // Step-level condition: mark Partial-Expression and join with the
1127        // outer (stage + job) chain so the step's META_CONDITION reflects the
1128        // full ` AND `-joined gate it actually sits behind at runtime.
1129        let step_condition = non_empty_condition(&step.condition);
1130        if let Some(c) = step_condition {
1131            mark_condition_partial(graph, "step", &step_name, c);
1132        }
1133        let effective_condition = join_conditions(outer_condition, step_condition);
1134
1135        // Step-level `dependsOn:` overrides the inherited (job-level) value
1136        // when present. Default behaviour (no key) inherits from the job —
1137        // and at the job level we already only stamped non-default values,
1138        // so absence at both layers means we stamp nothing.
1139        let effective_depends_on =
1140            explicit_depends_on_csv(&step.depends_on, graph, "step", &step_name)
1141                .or_else(|| outer_depends_on.map(|s| s.to_string()));
1142
1143        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
1144
1145        // Stamp parent job name so consumers (e.g. `taudit map --job`) can
1146        // attribute steps back to their containing job.
1147        if let Some(node) = graph.nodes.get_mut(step_id) {
1148            node.metadata.insert(META_JOB_NAME.into(), job_name.into());
1149            // Stamp the raw inline script body so script-aware rules
1150            // (env-export of secrets, secret materialisation to files,
1151            // Key Vault → plaintext) can pattern-match on the actual
1152            // command text the agent will run.
1153            if let Some(ref body) = inline_script {
1154                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
1155            }
1156            // Stamp the AND-joined chain of stage/job/step `condition:`
1157            // expressions that gate this step at runtime. Consumed by
1158            // `apply_compensating_controls` to downgrade severity on
1159            // findings whose firing step is gated behind a conditional.
1160            if let Some(ref c) = effective_condition {
1161                node.metadata.insert(META_CONDITION.into(), c.clone());
1162            }
1163            // Stamp the comma-joined non-default `dependsOn:` predecessor
1164            // list. No consumer rule yet — parser-side hook for future
1165            // cross-job taint analysis.
1166            if let Some(ref d) = effective_depends_on {
1167                if !d.is_empty() {
1168                    node.metadata.insert(META_DEPENDS_ON.into(), d.clone());
1169                }
1170            }
1171        }
1172
1173        // Every step has access to System.AccessToken
1174        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
1175
1176        // checkout step with persistCredentials: true writes the token to .git/config on disk,
1177        // making it accessible to all subsequent steps and filesystem-level attackers.
1178        if step.checkout.is_some() && step.persist_credentials == Some(true) {
1179            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
1180        }
1181
1182        // `checkout: self` pulls the repo being built. In a PR trigger context this
1183        // is the untrusted fork head — tag the step so downstream rules can gate on
1184        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
1185        if let Some(ref ck) = step.checkout {
1186            if ck == "self" {
1187                if let Some(node) = graph.nodes.get_mut(step_id) {
1188                    node.metadata
1189                        .insert(META_CHECKOUT_SELF.into(), "true".into());
1190                }
1191            }
1192        }
1193
1194        // Inherited pipeline/stage/job secrets
1195        for &secret_id in inherited_secrets {
1196            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
1197        }
1198
1199        // Service connection detection from task inputs (case-insensitive key match)
1200        if let Some(ref inputs) = step.inputs {
1201            let service_conn_keys = [
1202                "azuresubscription",
1203                "connectedservicename",
1204                "connectedservicenamearm",
1205                "kubernetesserviceconnection",
1206                "environmentservicename",
1207                "backendservicearm",
1208            ];
1209            // determinism: sort by key — same YAML must produce same NodeId order
1210            let mut input_entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1211            input_entries.sort_by(|a, b| a.0.cmp(b.0));
1212            for (raw_key, val) in input_entries {
1213                let lower = raw_key.to_lowercase();
1214                if !service_conn_keys.contains(&lower.as_str()) {
1215                    continue;
1216                }
1217                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
1218                if !conn_name.starts_with("$(") {
1219                    // Stamp the connection name onto the step itself so rules
1220                    // that need the name (e.g. terraform_auto_approve_in_prod)
1221                    // don't have to traverse edges.
1222                    if let Some(node) = graph.nodes.get_mut(step_id) {
1223                        node.metadata
1224                            .insert(META_SERVICE_CONNECTION_NAME.into(), conn_name.to_string());
1225                    }
1226
1227                    let mut meta = HashMap::new();
1228                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
1229                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1230                    // ADO pipeline YAML does not embed the authentication scheme
1231                    // of the service endpoint (WorkloadIdentityFederation vs.
1232                    // ServicePrincipal), so we cannot reliably determine whether a
1233                    // connection uses OIDC.  Leave META_OIDC unset -- the safe
1234                    // default -- so that rules like service_connection_scope_mismatch
1235                    // can fire on classic SPN connections.
1236                    let conn_id = graph.add_node_with_metadata(
1237                        NodeKind::Identity,
1238                        conn_name,
1239                        TrustZone::FirstParty,
1240                        meta,
1241                    );
1242                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
1243                }
1244            }
1245
1246            // addSpnToEnvironment: true exposes federated SPN material
1247            // (idToken, servicePrincipalKey, servicePrincipalId, tenantId)
1248            // to the step's inline script via env vars. Stamp the step so
1249            // addspn_with_inline_script can pattern-match without traversal.
1250            if let Some(val) = input_value(inputs, "addSpnToEnvironment") {
1251                let truthy = match val {
1252                    serde_yaml::Value::Bool(b) => *b,
1253                    serde_yaml::Value::String(s) => s.eq_ignore_ascii_case("true"),
1254                    _ => false,
1255                };
1256                if truthy {
1257                    if let Some(node) = graph.nodes.get_mut(step_id) {
1258                        node.metadata
1259                            .insert(META_ADD_SPN_TO_ENV.into(), "true".into());
1260                    }
1261                }
1262            }
1263
1264            // TerraformCLI@N / TerraformTaskV1..V4 with command: apply +
1265            // commandOptions containing auto-approve = same as inline
1266            // `terraform apply --auto-approve`. Detect once here so the rule
1267            // can read a single META_TERRAFORM_AUTO_APPROVE marker.
1268            let task_lower = step
1269                .task
1270                .as_deref()
1271                .map(|t| t.to_lowercase())
1272                .unwrap_or_default();
1273            let is_terraform_task = task_lower.starts_with("terraformcli@")
1274                || task_lower.starts_with("terraformtask@")
1275                || task_lower.starts_with("terraformtaskv");
1276            if is_terraform_task {
1277                let cmd_lower = input_str(inputs, "command")
1278                    .map(|s| s.to_lowercase())
1279                    .unwrap_or_default();
1280                let opts = input_str(inputs, "commandOptions").unwrap_or("");
1281                if cmd_lower == "apply" && opts.contains("auto-approve") {
1282                    if let Some(node) = graph.nodes.get_mut(step_id) {
1283                        node.metadata
1284                            .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
1285                    }
1286                }
1287            }
1288
1289            // Detect $(varName) references in task input values
1290            // determinism: sort by key — same YAML must produce same NodeId order
1291            let mut paren_entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1292            paren_entries.sort_by(|a, b| a.0.cmp(b.0));
1293            for (_k, val) in paren_entries {
1294                if let Some(s) = yaml_value_as_str(val) {
1295                    extract_dollar_paren_secrets(
1296                        s,
1297                        step_id,
1298                        plain_vars,
1299                        has_variable_groups,
1300                        graph,
1301                        cache,
1302                    );
1303                }
1304            }
1305        }
1306
1307        // Inline-script detection of `terraform apply --auto-approve`.
1308        // Done after inputs processing so we can OR the two signals into a
1309        // single META_TERRAFORM_AUTO_APPROVE marker on the step.
1310        if let Some(ref body) = inline_script {
1311            if script_does_terraform_auto_apply(body) {
1312                if let Some(node) = graph.nodes.get_mut(step_id) {
1313                    node.metadata
1314                        .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
1315                }
1316            }
1317        }
1318
1319        // Detect $(varName) in step env values
1320        if let Some(ref env) = step.env {
1321            // determinism: sort by key — same YAML must produce same NodeId order
1322            let mut env_entries: Vec<(&String, &serde_yaml::Value)> = env.iter().collect();
1323            env_entries.sort_by(|a, b| a.0.cmp(b.0));
1324            for (_k, val) in env_entries {
1325                if let Some(s) = yaml_scalar_to_string(val) {
1326                    extract_dollar_paren_secrets(
1327                        &s,
1328                        step_id,
1329                        plain_vars,
1330                        has_variable_groups,
1331                        graph,
1332                        cache,
1333                    );
1334                }
1335            }
1336        }
1337
1338        // Detect $(varName) in inline script text
1339        if let Some(ref script) = inline_script {
1340            extract_dollar_paren_secrets(
1341                script,
1342                step_id,
1343                plain_vars,
1344                has_variable_groups,
1345                graph,
1346                cache,
1347            );
1348        }
1349
1350        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines.
1351        // META_WRITES_ENV_GATE marks the step as writing to the env gate (always).
1352        // META_ENV_GATE_WRITES_SECRET_VALUE marks when the written value contains a
1353        // $(secretRef) expression — i.e., a secret is being propagated (BUG-4: plain
1354        // integer writes like `##vso[task.setvariable variable=Count]3` should not
1355        // fire as secret-exfiltration findings).
1356        if let Some(ref script) = inline_script {
1357            let lower = script.to_lowercase();
1358            if lower.contains("##vso[task.setvariable") {
1359                if let Some(node) = graph.nodes.get_mut(step_id) {
1360                    node.metadata
1361                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
1362                    node.metadata
1363                        .insert(META_SETVARIABLE_ADO.into(), "true".into());
1364                    if setvariable_value_contains_secret_ref(script) {
1365                        node.metadata
1366                            .insert(META_ENV_GATE_WRITES_SECRET_VALUE.into(), "true".into());
1367                    }
1368                }
1369            }
1370        }
1371    }
1372}
1373
1374/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
1375///
1376/// `inline_script_text` is populated whenever the step has script content —
1377/// either as a top-level `script:`/`bash:`/`powershell:`/`pwsh:` key, or as a
1378/// task input (`Bash@3.inputs.script`, `PowerShell@2.inputs.script`,
1379/// `AzureCLI@2.inputs.inlineScript`, `AzurePowerShell@5.inputs.Inline`, …).
1380/// Task-input keys are matched case-insensitively because the ADO YAML schema
1381/// is itself case-insensitive on input names.
1382fn classify_step(
1383    step: &AdoStep,
1384    job_name: &str,
1385    idx: usize,
1386) -> (String, TrustZone, Option<String>) {
1387    let default_name = || format!("{job_name}[{idx}]");
1388
1389    let name = step
1390        .display_name
1391        .as_deref()
1392        .or(step.name.as_deref())
1393        .map(|s| s.to_string())
1394        .unwrap_or_else(default_name);
1395
1396    if step.task.is_some() {
1397        // Task step — script body may live in inputs.{script,inlineScript,Inline}.
1398        let inline = extract_task_inline_script(step.inputs.as_ref());
1399        (name, TrustZone::Untrusted, inline)
1400    } else if let Some(ref s) = step.script {
1401        (name, TrustZone::FirstParty, Some(s.clone()))
1402    } else if let Some(ref s) = step.bash {
1403        (name, TrustZone::FirstParty, Some(s.clone()))
1404    } else if let Some(ref s) = step.powershell {
1405        (name, TrustZone::FirstParty, Some(s.clone()))
1406    } else if let Some(ref s) = step.pwsh {
1407        (name, TrustZone::FirstParty, Some(s.clone()))
1408    } else {
1409        (name, TrustZone::FirstParty, None)
1410    }
1411}
1412
1413/// Pull an inline script body out of a task step's `inputs:` mapping.
1414/// Recognises the three common conventions:
1415///   - `inputs.script` (Bash@3, PowerShell@2 — when targetType: inline)
1416///   - `inputs.inlineScript` (AzureCLI@2)
1417///   - `inputs.Inline` (AzurePowerShell@5 — note the capital I)
1418///
1419/// Match is case-insensitive so a hand-written pipeline using `Script:` or
1420/// `INLINESCRIPT:` is still picked up.
1421fn extract_task_inline_script(
1422    inputs: Option<&HashMap<String, serde_yaml::Value>>,
1423) -> Option<String> {
1424    let inputs = inputs?;
1425    const KEYS: &[&str] = &["script", "inlinescript", "inline"];
1426    // determinism: sort by key — same YAML must produce same NodeId order
1427    // (first-match semantics: ensure the same key wins across runs when more
1428    // than one of `script`/`inlineScript`/`Inline` is present in the same task)
1429    let mut entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1430    entries.sort_by(|a, b| a.0.cmp(b.0));
1431    for (raw_key, val) in entries {
1432        let lower = raw_key.to_lowercase();
1433        if KEYS.contains(&lower.as_str()) {
1434            if let Some(s) = val.as_str() {
1435                if !s.is_empty() {
1436                    return Some(s.to_string());
1437                }
1438            }
1439        }
1440    }
1441    None
1442}
1443
1444fn input_value<'a>(
1445    inputs: &'a HashMap<String, serde_yaml::Value>,
1446    wanted: &str,
1447) -> Option<&'a serde_yaml::Value> {
1448    let mut entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1449    entries.sort_by(|a, b| a.0.cmp(b.0));
1450    entries
1451        .into_iter()
1452        .find(|(key, _)| key.eq_ignore_ascii_case(wanted))
1453        .map(|(_, value)| value)
1454}
1455
1456fn input_str<'a>(inputs: &'a HashMap<String, serde_yaml::Value>, wanted: &str) -> Option<&'a str> {
1457    input_value(inputs, wanted).and_then(yaml_value_as_str)
1458}
1459
1460/// Add a DelegatesTo edge from a synthetic step node to a template image node.
1461///
1462/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
1463/// pull code from an external repository and are Untrusted. Plain relative paths like
1464/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
1465/// treats `./local-action`.
1466///
1467/// `job_name` is `Some` when the delegation is created inside a job's scope
1468/// (job-level template, or template step inside `process_steps`); it is `None`
1469/// for stage-level template delegations that don't belong to a specific job.
1470fn add_template_delegation(
1471    step_name: &str,
1472    template_path: &str,
1473    token_id: NodeId,
1474    job_name: Option<&str>,
1475    graph: &mut AuthorityGraph,
1476) {
1477    let tpl_trust_zone = if template_path.contains('@') {
1478        TrustZone::Untrusted
1479    } else {
1480        TrustZone::FirstParty
1481    };
1482    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
1483    if let Some(jn) = job_name {
1484        if let Some(node) = graph.nodes.get_mut(step_id) {
1485            node.metadata.insert(META_JOB_NAME.into(), jn.into());
1486        }
1487    }
1488    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
1489    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
1490    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
1491    graph.mark_partial(
1492        GapKind::Structural,
1493        format!(
1494            "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
1495        ),
1496    );
1497}
1498
1499/// Returns true if a `##vso[task.setvariable ...]VALUE` call's VALUE contains
1500/// an ADO `$(secretRef)` expression — i.e., the step is writing a secret-derived
1501/// value into the environment gate (BUG-4: plain integers and PowerShell vars
1502/// like `$psVar` should not fire the secret-exfiltration rule).
1503///
1504/// `$$(VAR)` is the documented ADO escape (literal output, not substitution)
1505/// and is intentionally NOT treated as a secret reference.
1506fn setvariable_value_contains_secret_ref(script: &str) -> bool {
1507    for line in script.lines() {
1508        let lower = line.to_lowercase();
1509        if !lower.contains("##vso[task.setvariable") {
1510            continue;
1511        }
1512        // The value starts after the closing `]` of the ##vso directive.
1513        if let Some(close_bracket) = line.find(']') {
1514            let value_part = &line[close_bracket + 1..];
1515            if contains_unescaped_dollar_paren(value_part) {
1516                return true;
1517            }
1518        }
1519    }
1520    false
1521}
1522
1523/// True iff `s` contains a `$(` substitution that is NOT preceded by another
1524/// `$` (the `$$(VAR)` escape form is rejected). Used by both the setvariable
1525/// secret-ref detector and any future caller that needs the same semantics
1526/// without going through the full Secret-node creation path.
1527fn contains_unescaped_dollar_paren(s: &str) -> bool {
1528    let bytes = s.as_bytes();
1529    let mut i = 0;
1530    while i + 1 < bytes.len() {
1531        if bytes[i] == b'$' && bytes[i + 1] == b'(' {
1532            if i > 0 && bytes[i - 1] == b'$' {
1533                // Escaped — skip to end of the (...) group and continue.
1534                let after_open = i + 2;
1535                if let Some(end_offset) = s[after_open..].find(')') {
1536                    i = after_open + end_offset + 1;
1537                    continue;
1538                }
1539                i += 2;
1540                continue;
1541            }
1542            return true;
1543        }
1544        i += 1;
1545    }
1546    false
1547}
1548
1549/// Extract `$(varName)` references from a string, creating Secret nodes for
1550/// non-predefined and non-plain ADO variables.
1551/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
1552/// is treated as a variable reference. This rejects PowerShell sub-expressions
1553/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
1554/// and anything with spaces or special characters.
1555///
1556/// `$$(VAR)` is the documented ADO escape — it renders as a literal `$(VAR)`
1557/// in output and is **not** a substitution. We skip these without creating a
1558/// Secret node so that documentation strings like `echo "use $$(BUILD_BUILDID)"`
1559/// don't manufacture phantom HasAccessTo edges (BUG-4).
1560fn extract_dollar_paren_secrets(
1561    text: &str,
1562    step_id: NodeId,
1563    plain_vars: &HashSet<String>,
1564    has_variable_groups: bool,
1565    graph: &mut AuthorityGraph,
1566    cache: &mut HashMap<String, NodeId>,
1567) {
1568    let mut pos = 0;
1569    let bytes = text.as_bytes();
1570    while pos < bytes.len() {
1571        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
1572            // Honour the `$$(VAR)` escape — second `$` makes the whole token a
1573            // literal in ADO's output, not a substitution. Skip past the
1574            // closing `)` without creating a Secret node.
1575            if pos > 0 && bytes[pos - 1] == b'$' {
1576                let start = pos + 2;
1577                if let Some(end_offset) = text[start..].find(')') {
1578                    pos = start + end_offset + 1;
1579                    continue;
1580                }
1581                pos += 1;
1582                continue;
1583            }
1584            let start = pos + 2;
1585            if let Some(end_offset) = text[start..].find(')') {
1586                let var_name = &text[start..start + end_offset];
1587                // BUG-3: when variable groups are present in this scope (or an
1588                // ancestor scope) the group is opaque — any $(VAR) could be a
1589                // plain config value from the group. Only create a Secret node
1590                // if the var was explicitly declared as a secret (is already
1591                // in cache) or there are no groups *along the inheritance chain*.
1592                let already_declared_secret = cache.contains_key(var_name);
1593                if is_valid_ado_identifier(var_name)
1594                    && !is_predefined_ado_var(var_name)
1595                    && !plain_vars.contains(var_name)
1596                    && (!has_variable_groups || already_declared_secret)
1597                {
1598                    let id = find_or_create_secret(graph, cache, var_name);
1599                    // Mark secrets embedded in -var flag arguments: their values appear in
1600                    // pipeline logs (command string is logged before masking, and Terraform
1601                    // itself logs -var values in plan output and debug traces).
1602                    if is_in_terraform_var_flag(text, pos) {
1603                        if let Some(node) = graph.nodes.get_mut(id) {
1604                            node.metadata
1605                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
1606                        }
1607                    }
1608                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
1609                }
1610                pos = start + end_offset + 1;
1611                continue;
1612            }
1613        }
1614        pos += 1;
1615    }
1616}
1617
1618/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag
1619/// argument. Two requirements (BUG-3 — the previous heuristic just checked
1620/// `line_before.contains("-var") && line_before.contains('=')`, which matched
1621/// `--var-file=`, `extra-vars=`, `-vargs=`, anything-with-`-var`-and-`=`):
1622///
1623/// 1. The case-insensitive token `terraform` must appear earlier on the same
1624///    line, OR on a prior line that is connected to the current line via a
1625///    shell continuation chain (trailing `\` for POSIX, trailing `` ` `` for
1626///    PowerShell). This admits `terraform.exe`, `tfwrapper terraform`,
1627///    `aws-vault exec ... terraform`, and the common heredoc shape:
1628///    `terraform apply \`
1629///    `  -var "db=$(secret)"`
1630///
1631/// 2. Immediately before the `$(VAR)` substitution position there must be a
1632///    `-var ` (with a trailing space) or `-var=` literal. This rejects
1633///    `-var-file=`, `--var-file=`, `extra-vars=`, `-vargs=`, etc., where the
1634///    character following the literal `-var` is not space or `=`.
1635fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
1636    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
1637    let line_before = &text[line_start..var_pos];
1638
1639    // (2) `-var ` (space) or `-var=` immediately within line_before.
1640    let has_var_flag = line_before.contains("-var ") || line_before.contains("-var=");
1641    if !has_var_flag {
1642        return false;
1643    }
1644
1645    // (1) `terraform` appears earlier on the same line — fast path.
1646    let lower_line = line_before.to_lowercase();
1647    if lower_line.contains("terraform") {
1648        return true;
1649    }
1650
1651    // (1, fallback) Walk backwards through continuation chain. The previous
1652    // line must end in a continuation character for it to extend onto our
1653    // line; once we hit a non-continuing line we stop.
1654    let mut cursor_end = line_start; // exclusive of '\n' separator
1655    while cursor_end > 0 {
1656        // The byte at cursor_end-1 is `\n`; the prior line spans from the
1657        // previous `\n` (exclusive) to cursor_end-1.
1658        let nl_idx = cursor_end.saturating_sub(1);
1659        let prev_line_start = text[..nl_idx].rfind('\n').map(|p| p + 1).unwrap_or(0);
1660        let prev_line = &text[prev_line_start..nl_idx];
1661        let trimmed = prev_line.trim_end();
1662        let continues = trimmed.ends_with('\\') || trimmed.ends_with('`');
1663        if !continues {
1664            return false;
1665        }
1666        if prev_line.to_lowercase().contains("terraform") {
1667            return true;
1668        }
1669        cursor_end = prev_line_start;
1670    }
1671    false
1672}
1673
1674/// Returns true if `name` is a valid ADO variable identifier.
1675/// ADO variable names start with a letter and contain only letters, digits,
1676/// and underscores. Anything else — PowerShell vars (`$name`), template
1677/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
1678/// (`name -join ','`) — is rejected.
1679fn is_valid_ado_identifier(name: &str) -> bool {
1680    let mut chars = name.chars();
1681    match chars.next() {
1682        Some(first) if first.is_ascii_alphabetic() => {
1683            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
1684        }
1685        _ => false,
1686    }
1687}
1688
1689/// Returns true if a variable name is a well-known ADO predefined variable.
1690/// These are system-provided and never represent secrets.
1691fn is_predefined_ado_var(name: &str) -> bool {
1692    let prefixes = [
1693        "Build.",
1694        "Agent.",
1695        "System.",
1696        "Pipeline.",
1697        "Release.",
1698        "Environment.",
1699        "Strategy.",
1700        "Deployment.",
1701        "Resources.",
1702        "TF_BUILD",
1703    ];
1704    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
1705}
1706
1707fn find_or_create_secret(
1708    graph: &mut AuthorityGraph,
1709    cache: &mut HashMap<String, NodeId>,
1710    name: &str,
1711) -> NodeId {
1712    if let Some(&id) = cache.get(name) {
1713        return id;
1714    }
1715    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1716    cache.insert(name.to_string(), id);
1717    id
1718}
1719
1720fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
1721    val.as_str()
1722}
1723
1724fn yaml_scalar_to_string(value: &serde_yaml::Value) -> Option<String> {
1725    match value {
1726        serde_yaml::Value::String(s) => Some(s.clone()),
1727        serde_yaml::Value::Bool(b) => Some(b.to_string()),
1728        serde_yaml::Value::Number(n) => Some(n.to_string()),
1729        serde_yaml::Value::Null => Some(String::new()),
1730        _ => None,
1731    }
1732}
1733
1734// ── Serde models for ADO YAML ─────────────────────────────
1735
1736/// Top-level ADO pipeline definition.
1737/// ADO pipelines come in three shapes:
1738///   (a) stages → jobs → steps
1739///   (b) jobs → steps (no stages key)
1740///   (c) steps only (no stages or jobs key)
1741#[derive(Debug, Deserialize)]
1742pub struct AdoPipeline {
1743    #[serde(default)]
1744    pub trigger: Option<serde_yaml::Value>,
1745    #[serde(default)]
1746    pub pr: Option<serde_yaml::Value>,
1747    #[serde(default)]
1748    pub variables: Option<AdoVariables>,
1749    /// `stages:` is normally a sequence of stage objects, but real-world
1750    /// pipelines also use `stages: ${{ parameters.stages }}` (a template
1751    /// expression that resolves at runtime to a list). The custom
1752    /// deserializer accepts both shapes; non-sequence shapes resolve to
1753    /// `None` and the graph is marked Partial downstream.
1754    #[serde(default, deserialize_with = "deserialize_optional_stages")]
1755    pub stages: Option<Vec<AdoStage>>,
1756    #[serde(default, deserialize_with = "deserialize_optional_jobs")]
1757    pub jobs: Option<Vec<AdoJob>>,
1758    #[serde(default)]
1759    pub steps: Option<Vec<AdoStep>>,
1760    #[serde(default)]
1761    pub pool: Option<serde_yaml::Value>,
1762    /// Pipeline-level `workspace:` block. The only security-relevant field is
1763    /// `clean:` (`outputs`, `resources`, `all`, or `true`), which causes the
1764    /// agent to wipe the workspace between runs. Used to tag self-hosted Image
1765    /// nodes with `META_WORKSPACE_CLEAN`.
1766    #[serde(default)]
1767    pub workspace: Option<serde_yaml::Value>,
1768    /// `resources:` block — repository declarations, container declarations,
1769    /// pipeline declarations. We only consume `repositories[]` today.
1770    /// Pre-2019 ADO accepts a sequence form (`resources: [- repo: self]`)
1771    /// which has no `repositories:` key — the custom deserializer accepts
1772    /// both shapes and treats the sequence form as an empty resources block.
1773    #[serde(default, deserialize_with = "deserialize_optional_resources")]
1774    pub resources: Option<AdoResources>,
1775    /// Top-level `extends:` directive — `extends: { template: x@alias, ... }`.
1776    /// Captured raw so we can scan for `template: x@alias` references that
1777    /// consume a `resources.repositories[]` entry.
1778    #[serde(default)]
1779    pub extends: Option<serde_yaml::Value>,
1780    /// Top-level `parameters:` declarations. Each entry has at minimum a
1781    /// `name`; `type` defaults to `string` when omitted. `values:` is an
1782    /// optional allowlist that constrains caller input.
1783    /// ADO accepts two shapes: the typed sequence form
1784    /// (`- name: foo \n type: string \n default: bar`) and the legacy
1785    /// untyped map form (`parameters: { foo: bar, baz: '' }`) used in
1786    /// older template fragments. The custom deserializer normalizes both.
1787    #[serde(default, deserialize_with = "deserialize_optional_parameters")]
1788    pub parameters: Option<Vec<AdoParameter>>,
1789    /// Pipeline-level `permissions:` block. Controls the scope of
1790    /// `System.AccessToken` for all jobs in the pipeline unless overridden
1791    /// at the job level. Parsed to detect explicit scope restriction (e.g.
1792    /// `contents: none`) so `over_privileged_identity` doesn't fire on
1793    /// pipelines that have already locked down their token.
1794    #[serde(default)]
1795    pub permissions: Option<serde_yaml::Value>,
1796}
1797
1798/// Accept either a sequence of `AdoParameter` (modern typed form) or a
1799/// mapping of parameter name → default value (legacy untyped form used in
1800/// many template fragments). For the map form, each key becomes an
1801/// `AdoParameter` with the key as `name` and no type/values. Returns `None`
1802/// for any other shape (e.g. a bare template expression).
1803///
1804/// Implemented as a serde Visitor (rather than going through
1805/// `serde_yaml::Value`) so that downstream struct deserialization uses
1806/// serde's native lazy iteration — this avoids serde_yaml's strict
1807/// duplicate-key detection on `${{ else }}`-style template-conditional
1808/// keys that appear in stage/job `parameters:` blocks of unrelated entries.
1809fn deserialize_optional_parameters<'de, D>(
1810    deserializer: D,
1811) -> Result<Option<Vec<AdoParameter>>, D::Error>
1812where
1813    D: serde::Deserializer<'de>,
1814{
1815    use serde::de::{MapAccess, SeqAccess, Visitor};
1816    use std::fmt;
1817
1818    struct ParamsVisitor;
1819
1820    impl<'de> Visitor<'de> for ParamsVisitor {
1821        type Value = Option<Vec<AdoParameter>>;
1822
1823        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1824            f.write_str("a sequence of parameter declarations, a mapping of name→default, null, or a template expression")
1825        }
1826
1827        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1828            Ok(None)
1829        }
1830
1831        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1832            Ok(None)
1833        }
1834
1835        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1836            d.deserialize_any(self)
1837        }
1838
1839        // Bare scalar (template expression like `${{ parameters.X }}`) —
1840        // can't statically enumerate; treat as absent.
1841        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1842            Ok(None)
1843        }
1844        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1845            Ok(None)
1846        }
1847        fn visit_bool<E: serde::de::Error>(self, _v: bool) -> Result<Self::Value, E> {
1848            Ok(None)
1849        }
1850        fn visit_i64<E: serde::de::Error>(self, _v: i64) -> Result<Self::Value, E> {
1851            Ok(None)
1852        }
1853        fn visit_u64<E: serde::de::Error>(self, _v: u64) -> Result<Self::Value, E> {
1854            Ok(None)
1855        }
1856        fn visit_f64<E: serde::de::Error>(self, _v: f64) -> Result<Self::Value, E> {
1857            Ok(None)
1858        }
1859
1860        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1861            let mut out = Vec::new();
1862            while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
1863                if let Ok(p) = serde_yaml::from_value::<AdoParameter>(item) {
1864                    out.push(p);
1865                }
1866            }
1867            Ok(Some(out))
1868        }
1869
1870        fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
1871            // Legacy untyped map form: name → default-value. We collect
1872            // names; defaults are intentionally discarded (matches typed-
1873            // form semantics where `default:` is also ignored).
1874            let mut out = Vec::new();
1875            while let Some(key) = map.next_key::<serde_yaml::Value>()? {
1876                let _ignore = map.next_value::<serde::de::IgnoredAny>()?;
1877                let name = match key {
1878                    serde_yaml::Value::String(s) if !s.is_empty() => s,
1879                    _ => continue,
1880                };
1881                out.push(AdoParameter {
1882                    name: Some(name),
1883                    param_type: None,
1884                    values: None,
1885                });
1886            }
1887            Ok(Some(out))
1888        }
1889    }
1890
1891    deserializer.deserialize_any(ParamsVisitor)
1892}
1893
1894/// Accept either an `AdoResources` mapping (modern form with `repositories:`,
1895/// `containers:`, `pipelines:`) or the legacy sequence form (`resources: [-
1896/// repo: self]`, pre-2019 ADO syntax). The legacy form has no
1897/// `repositories:` key, so we return an empty `AdoResources` for it — the
1898/// repository-tracking rules then see no aliases to track, which is correct
1899/// (legacy `repo: self` declares no external repositories).
1900fn deserialize_optional_resources<'de, D>(deserializer: D) -> Result<Option<AdoResources>, D::Error>
1901where
1902    D: serde::Deserializer<'de>,
1903{
1904    use serde::de::{MapAccess, SeqAccess, Visitor};
1905    use std::fmt;
1906
1907    struct ResourcesVisitor;
1908
1909    impl<'de> Visitor<'de> for ResourcesVisitor {
1910        type Value = Option<AdoResources>;
1911
1912        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1913            f.write_str("an AdoResources mapping or a legacy `- repo:` sequence")
1914        }
1915
1916        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1917            Ok(None)
1918        }
1919        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1920            Ok(None)
1921        }
1922        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1923            d.deserialize_any(self)
1924        }
1925
1926        // Legacy sequence form — drain it without producing any
1927        // repository entries. Modern rules track aliases via the
1928        // `AdoResources.repositories[]` shape, which the legacy form
1929        // does not produce.
1930        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1931            while seq.next_element::<serde::de::IgnoredAny>()?.is_some() {}
1932            Ok(Some(AdoResources::default()))
1933        }
1934
1935        fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
1936            let r = AdoResources::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
1937            Ok(Some(r))
1938        }
1939    }
1940
1941    deserializer.deserialize_any(ResourcesVisitor)
1942}
1943
1944/// Accept either a sequence of `AdoStage` (the normal form) or a bare
1945/// template expression (`stages: ${{ parameters.stages }}`) which resolves
1946/// at runtime. For the template-expression case, return `None` so the
1947/// pipeline still parses; the graph will simply contain no stages from this
1948/// scope (downstream code already handles empty stage lists).
1949fn deserialize_optional_stages<'de, D>(deserializer: D) -> Result<Option<Vec<AdoStage>>, D::Error>
1950where
1951    D: serde::Deserializer<'de>,
1952{
1953    use serde::de::{MapAccess, SeqAccess, Visitor};
1954    use std::fmt;
1955
1956    struct StagesVisitor;
1957
1958    impl<'de> Visitor<'de> for StagesVisitor {
1959        type Value = Option<Vec<AdoStage>>;
1960
1961        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1962            f.write_str("a sequence of stages or a template expression")
1963        }
1964
1965        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1966            Ok(None)
1967        }
1968        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1969            Ok(None)
1970        }
1971        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1972            d.deserialize_any(self)
1973        }
1974        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1975            Ok(None)
1976        }
1977        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1978            Ok(None)
1979        }
1980
1981        fn visit_seq<A: SeqAccess<'de>>(self, seq: A) -> Result<Self::Value, A::Error> {
1982            let stages =
1983                Vec::<AdoStage>::deserialize(serde::de::value::SeqAccessDeserializer::new(seq))?;
1984            Ok(Some(stages))
1985        }
1986
1987        fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
1988            let stage = AdoStage::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
1989            Ok(Some(vec![stage]))
1990        }
1991    }
1992
1993    deserializer.deserialize_any(StagesVisitor)
1994}
1995
1996fn deserialize_optional_jobs<'de, D>(deserializer: D) -> Result<Option<Vec<AdoJob>>, D::Error>
1997where
1998    D: serde::Deserializer<'de>,
1999{
2000    deserialize_jobs(deserializer).map(Some)
2001}
2002
2003fn deserialize_jobs<'de, D>(deserializer: D) -> Result<Vec<AdoJob>, D::Error>
2004where
2005    D: serde::Deserializer<'de>,
2006{
2007    use serde::de::{MapAccess, SeqAccess, Visitor};
2008    use std::fmt;
2009
2010    struct JobsVisitor;
2011
2012    impl<'de> Visitor<'de> for JobsVisitor {
2013        type Value = Vec<AdoJob>;
2014
2015        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2016            f.write_str("a sequence of ADO jobs, a map of job-name to job body, null, or a template expression")
2017        }
2018
2019        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
2020            Ok(Vec::new())
2021        }
2022        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
2023            Ok(Vec::new())
2024        }
2025        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
2026            d.deserialize_any(self)
2027        }
2028        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
2029            Ok(Vec::new())
2030        }
2031        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
2032            Ok(Vec::new())
2033        }
2034
2035        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
2036            let mut out = Vec::new();
2037            while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
2038                if let Ok(job) = serde_yaml::from_value::<AdoJob>(item) {
2039                    out.push(job);
2040                }
2041            }
2042            Ok(out)
2043        }
2044
2045        fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
2046            let mut out = Vec::new();
2047            while let Some(key) = map.next_key::<serde_yaml::Value>()? {
2048                let value = map.next_value::<serde_yaml::Value>()?;
2049                let name = match key {
2050                    serde_yaml::Value::String(s) if !s.is_empty() => s,
2051                    _ => continue,
2052                };
2053                let Ok(mut job) = serde_yaml::from_value::<AdoJob>(value) else {
2054                    continue;
2055                };
2056                if job.job.is_none() && job.deployment.is_none() {
2057                    job.job = Some(name);
2058                }
2059                out.push(job);
2060            }
2061            Ok(out)
2062        }
2063    }
2064
2065    deserializer.deserialize_any(JobsVisitor)
2066}
2067
2068fn deserialize_optional_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
2069where
2070    D: serde::Deserializer<'de>,
2071{
2072    let value = Option::<serde_yaml::Value>::deserialize(deserializer)?;
2073    let Some(value) = value else {
2074        return Ok(None);
2075    };
2076    let parsed = match value {
2077        serde_yaml::Value::Bool(b) => Some(b),
2078        serde_yaml::Value::String(s) => match s.trim().to_ascii_lowercase().as_str() {
2079            "true" | "yes" | "y" | "on" | "1" => Some(true),
2080            "false" | "no" | "n" | "off" | "0" => Some(false),
2081            _ => None,
2082        },
2083        serde_yaml::Value::Number(n) => n.as_i64().map(|v| v != 0),
2084        serde_yaml::Value::Null => None,
2085        _ => None,
2086    };
2087    Ok(parsed)
2088}
2089
2090/// `resources:` block. Only `repositories[]` is modelled today.
2091#[derive(Debug, Default, Deserialize)]
2092pub struct AdoResources {
2093    #[serde(default)]
2094    pub repositories: Vec<AdoRepository>,
2095}
2096
2097/// A single `resources.repositories[]` entry — declares an external repo
2098/// alias the pipeline can consume via `template: x@alias`, `extends:`, or
2099/// `checkout: alias`.
2100#[derive(Debug, Deserialize)]
2101pub struct AdoRepository {
2102    /// The alias used by consumers (`template: file@<repository>`).
2103    #[serde(default)]
2104    pub repository: Option<String>,
2105    /// `git`, `github`, `bitbucket`, or `azureGit`.
2106    #[serde(default, rename = "type")]
2107    pub repo_type: Option<String>,
2108    /// Full repo path (e.g. `org/repo`).
2109    #[serde(default)]
2110    pub name: Option<String>,
2111    /// Optional ref. Absent = default branch (mutable). Present forms:
2112    /// `refs/tags/v1.2.3`, `refs/heads/main`, bare branch `main`, or a SHA.
2113    #[serde(default, rename = "ref")]
2114    pub git_ref: Option<String>,
2115}
2116
2117/// Pipeline / template `parameters:` entry. We deliberately ignore `default:`
2118/// — only the name, type, and `values:` allowlist matter for our rules.
2119#[derive(Debug, Deserialize)]
2120pub struct AdoParameter {
2121    #[serde(default)]
2122    pub name: Option<String>,
2123    #[serde(rename = "type", default)]
2124    pub param_type: Option<String>,
2125    #[serde(default)]
2126    pub values: Option<Vec<serde_yaml::Value>>,
2127}
2128
2129/// ADO `dependsOn:` accepts two YAML shapes — a single string
2130/// (`dependsOn: my_job`) or a sequence of strings
2131/// (`dependsOn: [a, b, c]`). The untagged enum normalises both at
2132/// deserialization time so callers can iterate uniformly.
2133#[derive(Debug, Deserialize, Clone)]
2134#[serde(untagged)]
2135pub enum DependsOn {
2136    Single(String),
2137    Multiple(Vec<String>),
2138    Other(serde_yaml::Value),
2139}
2140
2141impl DependsOn {
2142    /// Comma-joined predecessor list suitable for stamping into
2143    /// `META_DEPENDS_ON` on a Step node. Empty entries are dropped.
2144    pub fn as_csv(&self) -> String {
2145        match self {
2146            DependsOn::Single(s) => s.trim().to_string(),
2147            DependsOn::Multiple(v) => v
2148                .iter()
2149                .map(|s| s.trim())
2150                .filter(|s| !s.is_empty())
2151                .collect::<Vec<_>>()
2152                .join(","),
2153            DependsOn::Other(_) => String::new(),
2154        }
2155    }
2156}
2157
2158#[derive(Debug, Deserialize)]
2159pub struct AdoStage {
2160    /// Stage identifier. Absent when the stage entry is a template reference.
2161    #[serde(default)]
2162    pub stage: Option<String>,
2163    /// Stage-level template reference (`- template: path/to/stage.yml`).
2164    #[serde(default)]
2165    pub template: Option<String>,
2166    #[serde(default)]
2167    pub variables: Option<AdoVariables>,
2168    #[serde(default, deserialize_with = "deserialize_jobs")]
2169    pub jobs: Vec<AdoJob>,
2170    /// Stage-level runtime gate. ADO evaluates this expression at queue time;
2171    /// when false, every job (and therefore every step) inside the stage is
2172    /// skipped. The parser cannot evaluate the expression statically, so its
2173    /// presence is recorded as a Partial-Expression gap and its text is stamped
2174    /// onto child Step nodes via `META_CONDITION`.
2175    #[serde(default, skip_serializing_if = "Option::is_none")]
2176    pub condition: Option<String>,
2177    /// Stage-level explicit `dependsOn:`. Default behaviour is "depends on the
2178    /// previous stage" — only the explicit form is captured.
2179    #[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
2180    pub depends_on: Option<DependsOn>,
2181}
2182
2183#[derive(Debug, Deserialize)]
2184pub struct AdoJob {
2185    /// Regular job identifier
2186    #[serde(default)]
2187    pub job: Option<String>,
2188    /// Deployment job identifier
2189    #[serde(default)]
2190    pub deployment: Option<String>,
2191    #[serde(default)]
2192    pub variables: Option<AdoVariables>,
2193    #[serde(default)]
2194    pub steps: Option<Vec<AdoStep>>,
2195    /// Deployment-job nested strategy: runOnce/rolling/canary all share the
2196    /// shape `strategy.{runOnce,rolling,canary}.deploy.steps`. We only need
2197    /// the steps — the strategy choice itself doesn't change authority flow.
2198    #[serde(default)]
2199    pub strategy: Option<AdoStrategy>,
2200    #[serde(default)]
2201    pub pool: Option<serde_yaml::Value>,
2202    /// Job-level `workspace:` block. The only security-relevant field is
2203    /// `clean:` which causes the agent to wipe the workspace between runs.
2204    #[serde(default)]
2205    pub workspace: Option<serde_yaml::Value>,
2206    /// Job-level template reference
2207    #[serde(default)]
2208    pub template: Option<String>,
2209    /// Deployment-job environment binding. Two YAML shapes:
2210    ///
2211    ///   - `environment: production` (string shorthand)
2212    ///   - `environment: { name: staging, resourceType: VirtualMachine }` (mapping)
2213    ///
2214    /// When present, the environment may have approvals/checks attached in ADO's
2215    /// environment configuration. Approvals are a manual gate — authority cannot
2216    /// propagate past one without human intervention. We treat any `environment:`
2217    /// binding as an approval candidate and tag the job's steps so propagation
2218    /// rules can downgrade severity. (We can't see the approval config from YAML
2219    /// alone; the binding is the strongest signal available at parse time.)
2220    #[serde(default)]
2221    pub environment: Option<serde_yaml::Value>,
2222    /// Job-level runtime gate. Evaluated at job-queue time; controls whether
2223    /// the job's steps run. Cannot be statically evaluated — recorded as a
2224    /// Partial-Expression gap and stamped onto the job's Step nodes via
2225    /// `META_CONDITION` (joined with any stage-level condition).
2226    #[serde(default, skip_serializing_if = "Option::is_none")]
2227    pub condition: Option<String>,
2228    /// Job-level explicit `dependsOn:`. Default behaviour is "depends on the
2229    /// previous job" — only the explicit form is captured.
2230    #[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
2231    pub depends_on: Option<DependsOn>,
2232}
2233
2234impl AdoJob {
2235    pub fn effective_name(&self) -> String {
2236        self.job
2237            .as_deref()
2238            .or(self.deployment.as_deref())
2239            .unwrap_or("job")
2240            .to_string()
2241    }
2242
2243    /// Returns the effective step list for this job.
2244    ///
2245    /// Regular jobs put steps under `steps:` directly. Deployment jobs nest
2246    /// them under `strategy.{runOnce,rolling,canary}.{deploy,preDeploy,
2247    /// postDeploy,routeTraffic,onSuccess,onFailure}.steps`. We merge all
2248    /// strategy-nested step lists into a single sequence so downstream rules
2249    /// see them as part of the job. Order: regular `steps:` first, then any
2250    /// strategy-nested steps in deterministic phase order.
2251    pub fn all_steps(&self) -> Vec<AdoStep> {
2252        let mut out: Vec<AdoStep> = Vec::new();
2253        if let Some(ref s) = self.steps {
2254            out.extend(s.iter().cloned());
2255        }
2256        if let Some(ref strat) = self.strategy {
2257            for phase in strat.phases() {
2258                if let Some(ref s) = phase.steps {
2259                    out.extend(s.iter().cloned());
2260                }
2261            }
2262        }
2263        out
2264    }
2265
2266    /// Returns true when the job is bound to an `environment:` — either the
2267    /// string form (`environment: production`) or the mapping form with a
2268    /// non-empty `name:` field. An empty mapping or empty string is ignored.
2269    pub fn has_environment_binding(&self) -> bool {
2270        match self.environment.as_ref() {
2271            None => false,
2272            Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
2273            Some(serde_yaml::Value::Mapping(m)) => m
2274                .get("name")
2275                .and_then(|v| v.as_str())
2276                .map(|s| !s.trim().is_empty())
2277                .unwrap_or(false),
2278            _ => false,
2279        }
2280    }
2281}
2282
2283/// Deployment-job `strategy:` block. ADO ships three strategies — runOnce,
2284/// rolling, canary — each with multiple lifecycle phases that may carry
2285/// their own step list. We capture all of them; the AdoJob::all_steps
2286/// helper flattens them into one sequence.
2287#[derive(Debug, Default, Deserialize, Clone)]
2288pub struct AdoStrategy {
2289    #[serde(default, rename = "runOnce")]
2290    pub run_once: Option<AdoStrategyRunOnce>,
2291    #[serde(default)]
2292    pub rolling: Option<AdoStrategyRunOnce>,
2293    #[serde(default)]
2294    pub canary: Option<AdoStrategyRunOnce>,
2295}
2296
2297impl AdoStrategy {
2298    /// Iterate over every populated lifecycle phase across all strategies.
2299    pub fn phases(&self) -> Vec<&AdoStrategyPhase> {
2300        let mut out: Vec<&AdoStrategyPhase> = Vec::new();
2301        for runner in [&self.run_once, &self.rolling, &self.canary]
2302            .iter()
2303            .copied()
2304            .flatten()
2305        {
2306            for phase in [
2307                &runner.deploy,
2308                &runner.pre_deploy,
2309                &runner.post_deploy,
2310                &runner.route_traffic,
2311            ]
2312            .into_iter()
2313            .flatten()
2314            {
2315                out.push(phase);
2316            }
2317            if let Some(ref on) = runner.on {
2318                if let Some(ref s) = on.success {
2319                    out.push(s);
2320                }
2321                if let Some(ref f) = on.failure {
2322                    out.push(f);
2323                }
2324            }
2325        }
2326        out
2327    }
2328}
2329
2330/// Lifecycle phases carried by every deployment strategy. Each phase may
2331/// have its own `steps:`. Covering all six avoids silently dropping
2332/// privileged setup/teardown steps from the authority graph.
2333#[derive(Debug, Default, Deserialize, Clone)]
2334pub struct AdoStrategyRunOnce {
2335    #[serde(default)]
2336    pub deploy: Option<AdoStrategyPhase>,
2337    #[serde(default, rename = "preDeploy")]
2338    pub pre_deploy: Option<AdoStrategyPhase>,
2339    #[serde(default, rename = "postDeploy")]
2340    pub post_deploy: Option<AdoStrategyPhase>,
2341    #[serde(default, rename = "routeTraffic")]
2342    pub route_traffic: Option<AdoStrategyPhase>,
2343    #[serde(default)]
2344    pub on: Option<AdoStrategyOn>,
2345}
2346
2347#[derive(Debug, Default, Deserialize, Clone)]
2348pub struct AdoStrategyOn {
2349    #[serde(default)]
2350    pub success: Option<AdoStrategyPhase>,
2351    #[serde(default)]
2352    pub failure: Option<AdoStrategyPhase>,
2353}
2354
2355#[derive(Debug, Default, Deserialize, Clone)]
2356pub struct AdoStrategyPhase {
2357    #[serde(default)]
2358    pub steps: Option<Vec<AdoStep>>,
2359}
2360
2361#[derive(Debug, Deserialize, Clone)]
2362pub struct AdoStep {
2363    /// Task reference e.g. `AzureCLI@2`
2364    #[serde(default)]
2365    pub task: Option<String>,
2366    /// Inline script (cmd/sh)
2367    #[serde(default)]
2368    pub script: Option<String>,
2369    /// Inline bash script
2370    #[serde(default)]
2371    pub bash: Option<String>,
2372    /// Inline PowerShell script
2373    #[serde(default)]
2374    pub powershell: Option<String>,
2375    /// Cross-platform PowerShell
2376    #[serde(default)]
2377    pub pwsh: Option<String>,
2378    /// Step-level template reference
2379    #[serde(default)]
2380    pub template: Option<String>,
2381    #[serde(rename = "displayName", default)]
2382    pub display_name: Option<String>,
2383    /// Legacy name alias
2384    #[serde(default)]
2385    pub name: Option<String>,
2386    #[serde(default)]
2387    pub env: Option<HashMap<String, serde_yaml::Value>>,
2388    /// Task inputs (key → value, but values may be nested)
2389    #[serde(default)]
2390    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
2391    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
2392    #[serde(default)]
2393    pub checkout: Option<String>,
2394    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
2395    #[serde(
2396        rename = "persistCredentials",
2397        default,
2398        deserialize_with = "deserialize_optional_bool"
2399    )]
2400    pub persist_credentials: Option<bool>,
2401    /// Step-level runtime gate. Evaluated by the agent before it dispatches
2402    /// the step; when false the step is skipped (status: Skipped). Cannot be
2403    /// statically evaluated — recorded as a Partial-Expression gap and stamped
2404    /// onto the Step node via `META_CONDITION`, joined with any
2405    /// stage/job-level conditions stacked above.
2406    #[serde(default, skip_serializing_if = "Option::is_none")]
2407    pub condition: Option<String>,
2408    /// Step-level explicit `dependsOn:`. Rare on individual steps (more common
2409    /// at job/stage level) but accepted by ADO; captured for symmetry.
2410    #[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
2411    pub depends_on: Option<DependsOn>,
2412}
2413
2414/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
2415/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
2416#[derive(Debug, Default)]
2417pub struct AdoVariables(pub Vec<AdoVariable>);
2418
2419impl<'de> serde::Deserialize<'de> for AdoVariables {
2420    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
2421    where
2422        D: serde::Deserializer<'de>,
2423    {
2424        let raw = serde_yaml::Value::deserialize(deserializer)?;
2425        let mut vars = Vec::new();
2426
2427        match raw {
2428            serde_yaml::Value::Sequence(seq) => {
2429                for item in seq {
2430                    if let Some(map) = item.as_mapping() {
2431                        if let Some(group_val) = map.get("group") {
2432                            if let Some(group) = group_val.as_str() {
2433                                vars.push(AdoVariable::Group {
2434                                    group: group.to_string(),
2435                                });
2436                                continue;
2437                            }
2438                        }
2439                        let name = map
2440                            .get("name")
2441                            .and_then(|v| v.as_str())
2442                            .unwrap_or("")
2443                            .to_string();
2444                        let value = map
2445                            .get("value")
2446                            .and_then(|v| v.as_str())
2447                            .unwrap_or("")
2448                            .to_string();
2449                        let is_secret = map
2450                            .get("isSecret")
2451                            .and_then(|v| v.as_bool())
2452                            .unwrap_or(false);
2453                        vars.push(AdoVariable::Named {
2454                            name,
2455                            value,
2456                            is_secret,
2457                        });
2458                    }
2459                }
2460            }
2461            serde_yaml::Value::Mapping(map) => {
2462                for (k, v) in map {
2463                    let name = k.as_str().unwrap_or("").to_string();
2464                    let value = v.as_str().unwrap_or("").to_string();
2465                    vars.push(AdoVariable::Named {
2466                        name,
2467                        value,
2468                        is_secret: false,
2469                    });
2470                }
2471            }
2472            _ => {}
2473        }
2474
2475        Ok(AdoVariables(vars))
2476    }
2477}
2478
2479#[derive(Debug)]
2480pub enum AdoVariable {
2481    Group {
2482        group: String,
2483    },
2484    Named {
2485        name: String,
2486        value: String,
2487        is_secret: bool,
2488    },
2489}
2490
2491/// Heuristic: does this YAML have a top-level parameter conditional wrapper
2492/// (e.g. `- ${{ if eq(parameters.X, true) }}:`) at column 0 or as the first
2493/// list item? This is the construct that breaks root-level mapping parses but
2494/// is valid in an ADO template fragment included by a parent pipeline.
2495fn has_root_parameter_conditional(content: &str) -> bool {
2496    for line in content.lines() {
2497        let trimmed = line.trim_start();
2498        // Strip an optional leading list marker so we match both
2499        // `- ${{ if ... }}:` and bare `${{ if ... }}:` forms.
2500        let candidate = trimmed.strip_prefix("- ").unwrap_or(trimmed);
2501        if candidate.starts_with("${{")
2502            && (candidate.contains("if ") || candidate.contains("if("))
2503            && candidate.trim_end().ends_with(":")
2504        {
2505            return true;
2506        }
2507    }
2508    false
2509}
2510
2511fn recover_after_leading_root_sequence(content: &str) -> Option<&str> {
2512    for (idx, _) in content.char_indices() {
2513        if idx == 0 {
2514            continue;
2515        }
2516        if !is_root_pipeline_key_line(content[idx..].lines().next().unwrap_or_default()) {
2517            continue;
2518        }
2519        let recovered = &content[idx..];
2520        if serde_yaml::from_str::<AdoPipeline>(recovered).is_ok() {
2521            return Some(recovered);
2522        }
2523    }
2524    None
2525}
2526
2527fn is_root_pipeline_key_line(line: &str) -> bool {
2528    if line.starts_with(char::is_whitespace) || !line.ends_with(':') {
2529        return false;
2530    }
2531    let key = line.trim_end_matches(':').trim();
2532    matches!(
2533        key,
2534        "trigger"
2535            | "pr"
2536            | "pool"
2537            | "variables"
2538            | "resources"
2539            | "stages"
2540            | "jobs"
2541            | "steps"
2542            | "extends"
2543            | "parameters"
2544            | "permissions"
2545    )
2546}
2547
2548#[cfg(test)]
2549mod tests {
2550    use super::*;
2551    use std::io::{Read, Write};
2552    use std::net::TcpListener;
2553    use std::thread;
2554
2555    fn parse(yaml: &str) -> AuthorityGraph {
2556        let parser = AdoParser;
2557        let source = PipelineSource {
2558            file: "azure-pipelines.yml".into(),
2559            repo: None,
2560            git_ref: None,
2561            commit_sha: None,
2562        };
2563        parser.parse(yaml, &source).unwrap()
2564    }
2565
2566    fn parse_with_ctx(yaml: &str, ctx: &AdoParserContext) -> AuthorityGraph {
2567        let parser = AdoParser;
2568        let source = PipelineSource {
2569            file: "azure-pipelines.yml".into(),
2570            repo: None,
2571            git_ref: None,
2572            commit_sha: None,
2573        };
2574        parser.parse_with_context(yaml, &source, Some(ctx)).unwrap()
2575    }
2576
2577    fn spawn_variable_groups_server(response_json: &'static str) -> String {
2578        let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock server");
2579        let addr = listener.local_addr().expect("local addr");
2580        thread::spawn(move || {
2581            if let Ok((mut stream, _)) = listener.accept() {
2582                let mut buf = [0_u8; 2048];
2583                let _ = stream.read(&mut buf);
2584                let body = response_json.as_bytes();
2585                let header = format!(
2586                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
2587                    body.len()
2588                );
2589                let _ = stream.write_all(header.as_bytes());
2590                let _ = stream.write_all(body);
2591            }
2592        });
2593        format!("http://{addr}")
2594    }
2595
2596    #[test]
2597    fn parses_simple_pipeline() {
2598        let yaml = r#"
2599trigger:
2600  - main
2601
2602jobs:
2603  - job: Build
2604    steps:
2605      - script: echo hello
2606        displayName: Say hello
2607"#;
2608        let graph = parse(yaml);
2609        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
2610    }
2611
2612    #[test]
2613    fn system_access_token_created() {
2614        let yaml = r#"
2615steps:
2616  - script: echo hi
2617"#;
2618        let graph = parse(yaml);
2619        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2620        assert_eq!(identities.len(), 1);
2621        assert_eq!(identities[0].name, "System.AccessToken");
2622        assert_eq!(
2623            identities[0].metadata.get(META_IDENTITY_SCOPE),
2624            Some(&"broad".to_string())
2625        );
2626    }
2627
2628    #[test]
2629    fn variable_group_creates_secret_and_marks_partial() {
2630        let yaml = r#"
2631variables:
2632  - group: MySecretGroup
2633
2634steps:
2635  - script: echo hi
2636"#;
2637        let graph = parse(yaml);
2638        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2639        assert_eq!(secrets.len(), 1);
2640        assert_eq!(secrets[0].name, "MySecretGroup");
2641        assert_eq!(
2642            secrets[0].metadata.get(META_VARIABLE_GROUP),
2643            Some(&"true".to_string())
2644        );
2645        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2646        assert!(
2647            graph
2648                .completeness_gaps
2649                .iter()
2650                .any(|g| g.contains("MySecretGroup")),
2651            "completeness gap should name the variable group"
2652        );
2653        // External variable group is unresolvable without ADO API access —
2654        // that's a Structural break in the authority chain, not an expression
2655        // substitution.
2656        assert!(
2657            graph.completeness_gap_kinds.contains(&GapKind::Structural),
2658            "variable group gap must be Structural, got: {:?}",
2659            graph.completeness_gap_kinds
2660        );
2661    }
2662
2663    #[test]
2664    fn variable_group_enrichment_resolves_plain_and_secret_vars() {
2665        let yaml = r#"
2666variables:
2667  - group: MySecretGroup
2668
2669steps:
2670  - script: |
2671      echo $(PUBLIC_FLAG)
2672      echo $(DB_PASSWORD)
2673"#;
2674        let org_url = spawn_variable_groups_server(
2675            r#"{"value":[{"name":"MySecretGroup","variables":{"PUBLIC_FLAG":{"value":"1","isSecret":false},"DB_PASSWORD":{"isSecret":true}}}]}"#,
2676        );
2677        let ctx = AdoParserContext {
2678            org: Some(org_url),
2679            project: Some("DemoProject".to_string()),
2680            pat: Some("dummy-pat".to_string()),
2681        };
2682
2683        let graph = parse_with_ctx(yaml, &ctx);
2684        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2685        assert!(
2686            secrets.iter().any(|n| n.name == "DB_PASSWORD"),
2687            "secret variable from enriched group must be modelled as Secret"
2688        );
2689        assert!(
2690            !secrets.iter().any(|n| n.name == "MySecretGroup"),
2691            "resolved group should not be represented as an opaque group-secret node"
2692        );
2693        assert!(
2694            !graph
2695                .completeness_gaps
2696                .iter()
2697                .any(|g| g.contains("MySecretGroup") && g.contains("unresolvable")),
2698            "resolved group must not emit unresolvable-group partial gap"
2699        );
2700        assert_eq!(
2701            graph.metadata.get(META_ADO_VG_ENRICHED),
2702            Some(&"true".to_string())
2703        );
2704    }
2705
2706    #[test]
2707    fn variable_group_enrichment_failure_falls_back_to_static_model() {
2708        let yaml = r#"
2709variables:
2710  - group: MySecretGroup
2711steps:
2712  - script: echo hi
2713"#;
2714        let unused_port = {
2715            let probe = TcpListener::bind("127.0.0.1:0").expect("bind probe listener");
2716            let p = probe.local_addr().expect("probe addr").port();
2717            drop(probe);
2718            p
2719        };
2720        let ctx = AdoParserContext {
2721            org: Some(format!("http://127.0.0.1:{unused_port}")),
2722            project: Some("DemoProject".to_string()),
2723            pat: Some("dummy-pat".to_string()),
2724        };
2725
2726        let graph = parse_with_ctx(yaml, &ctx);
2727        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2728        assert!(
2729            graph
2730                .completeness_gaps
2731                .iter()
2732                .any(|g| g.contains("enrichment failed")),
2733            "failed enrichment should produce warning partial gap"
2734        );
2735        assert!(
2736            graph
2737                .nodes_of_kind(NodeKind::Secret)
2738                .any(|n| n.name == "MySecretGroup"),
2739            "on failure parser must fall back to opaque group-secret behaviour"
2740        );
2741        assert_eq!(
2742            graph.metadata.get(META_ADO_VG_ENRICHED),
2743            Some(&"false".to_string())
2744        );
2745    }
2746
2747    #[test]
2748    fn task_with_azure_subscription_creates_service_connection_identity() {
2749        let yaml = r#"
2750steps:
2751  - task: AzureCLI@2
2752    displayName: Deploy to Azure
2753    inputs:
2754      azureSubscription: MyServiceConnection
2755      scriptType: bash
2756      inlineScript: az group list
2757"#;
2758        let graph = parse(yaml);
2759        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2760        // System.AccessToken + service connection
2761        assert_eq!(identities.len(), 2);
2762        let conn = identities
2763            .iter()
2764            .find(|i| i.name == "MyServiceConnection")
2765            .unwrap();
2766        assert_eq!(
2767            conn.metadata.get(META_SERVICE_CONNECTION),
2768            Some(&"true".to_string())
2769        );
2770        assert_eq!(
2771            conn.metadata.get(META_IDENTITY_SCOPE),
2772            Some(&"broad".to_string())
2773        );
2774    }
2775
2776    #[test]
2777    fn service_connection_does_not_get_unconditional_oidc_tag() {
2778        let yaml = r#"
2779steps:
2780  - task: AzureCLI@2
2781    displayName: Deploy to Azure
2782    inputs:
2783      azureSubscription: MyClassicSpnConnection
2784      scriptType: bash
2785      inlineScript: az group list
2786"#;
2787        let graph = parse(yaml);
2788        let conn = graph
2789            .nodes_of_kind(NodeKind::Identity)
2790            .find(|i| i.name == "MyClassicSpnConnection")
2791            .expect("service connection identity should exist");
2792        assert_eq!(
2793            conn.metadata.get(META_OIDC),
2794            None,
2795            "service connections must not be tagged META_OIDC without a clear OIDC signal"
2796        );
2797    }
2798
2799    #[test]
2800    fn task_with_connected_service_name_creates_identity() {
2801        let yaml = r#"
2802steps:
2803  - task: SqlAzureDacpacDeployment@1
2804    inputs:
2805      ConnectedServiceNameARM: MySqlConnection
2806"#;
2807        let graph = parse(yaml);
2808        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2809        assert!(
2810            identities.iter().any(|i| i.name == "MySqlConnection"),
2811            "connectedServiceNameARM should create identity"
2812        );
2813    }
2814
2815    #[test]
2816    fn script_step_classified_as_first_party() {
2817        let yaml = r#"
2818steps:
2819  - script: echo hi
2820    displayName: Say hi
2821"#;
2822        let graph = parse(yaml);
2823        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2824        assert_eq!(steps.len(), 1);
2825        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
2826    }
2827
2828    #[test]
2829    fn bash_step_classified_as_first_party() {
2830        let yaml = r#"
2831steps:
2832  - bash: echo hi
2833"#;
2834        let graph = parse(yaml);
2835        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2836        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
2837    }
2838
2839    #[test]
2840    fn task_step_classified_as_untrusted() {
2841        let yaml = r#"
2842steps:
2843  - task: DotNetCoreCLI@2
2844    inputs:
2845      command: build
2846"#;
2847        let graph = parse(yaml);
2848        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2849        assert_eq!(steps.len(), 1);
2850        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
2851    }
2852
2853    #[test]
2854    fn dollar_paren_var_in_script_creates_secret() {
2855        let yaml = r#"
2856steps:
2857  - script: |
2858      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
2859    displayName: Call API
2860"#;
2861        let graph = parse(yaml);
2862        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2863        assert_eq!(secrets.len(), 1);
2864        assert_eq!(secrets[0].name, "MY_API_TOKEN");
2865    }
2866
2867    #[test]
2868    fn predefined_ado_var_not_treated_as_secret() {
2869        let yaml = r#"
2870steps:
2871  - script: |
2872      echo $(Build.BuildId)
2873      echo $(Agent.WorkFolder)
2874      echo $(System.DefaultWorkingDirectory)
2875    displayName: Print vars
2876"#;
2877        let graph = parse(yaml);
2878        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2879        assert!(
2880            secrets.is_empty(),
2881            "predefined ADO vars should not be treated as secrets, got: {:?}",
2882            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
2883        );
2884    }
2885
2886    #[test]
2887    fn template_reference_creates_delegates_to_and_marks_partial() {
2888        let yaml = r#"
2889steps:
2890  - template: steps/deploy.yml
2891    parameters:
2892      env: production
2893"#;
2894        let graph = parse(yaml);
2895        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2896        assert_eq!(steps.len(), 1);
2897
2898        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2899        assert_eq!(images.len(), 1);
2900        assert_eq!(images[0].name, "steps/deploy.yml");
2901
2902        let delegates: Vec<_> = graph
2903            .edges_from(steps[0].id)
2904            .filter(|e| e.kind == EdgeKind::DelegatesTo)
2905            .collect();
2906        assert_eq!(delegates.len(), 1);
2907
2908        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2909    }
2910
2911    #[test]
2912    fn top_level_steps_no_jobs() {
2913        let yaml = r#"
2914steps:
2915  - script: echo a
2916  - script: echo b
2917"#;
2918        let graph = parse(yaml);
2919        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2920        assert_eq!(steps.len(), 2);
2921    }
2922
2923    #[test]
2924    fn top_level_jobs_no_stages() {
2925        let yaml = r#"
2926jobs:
2927  - job: JobA
2928    steps:
2929      - script: echo a
2930  - job: JobB
2931    steps:
2932      - script: echo b
2933"#;
2934        let graph = parse(yaml);
2935        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2936        assert_eq!(steps.len(), 2);
2937    }
2938
2939    #[test]
2940    fn stages_with_nested_jobs_parsed() {
2941        let yaml = r#"
2942stages:
2943  - stage: Build
2944    jobs:
2945      - job: Compile
2946        steps:
2947          - script: cargo build
2948  - stage: Test
2949    jobs:
2950      - job: UnitTest
2951        steps:
2952          - script: cargo test
2953"#;
2954        let graph = parse(yaml);
2955        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2956        assert_eq!(steps.len(), 2);
2957    }
2958
2959    #[test]
2960    fn all_steps_linked_to_system_access_token() {
2961        let yaml = r#"
2962steps:
2963  - script: echo a
2964  - task: SomeTask@1
2965    inputs: {}
2966"#;
2967        let graph = parse(yaml);
2968        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2969        assert_eq!(token.len(), 1);
2970        let token_id = token[0].id;
2971
2972        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2973        for step in &steps {
2974            let links: Vec<_> = graph
2975                .edges_from(step.id)
2976                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
2977                .collect();
2978            assert_eq!(
2979                links.len(),
2980                1,
2981                "step '{}' must link to System.AccessToken",
2982                step.name
2983            );
2984        }
2985    }
2986
2987    #[test]
2988    fn named_secret_variable_creates_secret_node() {
2989        let yaml = r#"
2990variables:
2991  - name: MY_PASSWORD
2992    value: dummy
2993    isSecret: true
2994
2995steps:
2996  - script: echo hi
2997"#;
2998        let graph = parse(yaml);
2999        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3000        assert_eq!(secrets.len(), 1);
3001        assert_eq!(secrets[0].name, "MY_PASSWORD");
3002    }
3003
3004    #[test]
3005    fn variables_as_mapping_parsed() {
3006        let yaml = r#"
3007variables:
3008  MY_VAR: hello
3009  ANOTHER_VAR: world
3010
3011steps:
3012  - script: echo hi
3013"#;
3014        let graph = parse(yaml);
3015        // Mapping-style variables without isSecret — no secret nodes created
3016        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3017        assert!(
3018            secrets.is_empty(),
3019            "plain mapping vars should not create secret nodes"
3020        );
3021    }
3022
3023    #[test]
3024    fn persist_credentials_creates_persists_to_edge() {
3025        let yaml = r#"
3026steps:
3027  - checkout: self
3028    persistCredentials: true
3029  - script: git push
3030"#;
3031        let graph = parse(yaml);
3032        let token_id = graph
3033            .nodes_of_kind(NodeKind::Identity)
3034            .find(|n| n.name == "System.AccessToken")
3035            .expect("System.AccessToken must exist")
3036            .id;
3037
3038        let persists_edges: Vec<_> = graph
3039            .edges
3040            .iter()
3041            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
3042            .collect();
3043        assert_eq!(
3044            persists_edges.len(),
3045            1,
3046            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
3047        );
3048    }
3049
3050    #[test]
3051    fn persist_credentials_string_true_creates_persists_to_edge() {
3052        let yaml = r#"
3053steps:
3054  - checkout: self
3055    persistCredentials: "true"
3056"#;
3057        let graph = parse(yaml);
3058        assert!(
3059            graph.edges.iter().any(|e| e.kind == EdgeKind::PersistsTo),
3060            "string true is accepted by ADO and must be treated as true"
3061        );
3062    }
3063
3064    #[test]
3065    fn jobs_mapping_form_parses() {
3066        let yaml = r#"
3067jobs:
3068  build:
3069    steps:
3070      - script: build.sh
3071        displayName: Build
3072"#;
3073        let graph = parse(yaml);
3074        assert!(
3075            graph
3076                .nodes_of_kind(NodeKind::Step)
3077                .any(|s| s.name == "Build"),
3078            "jobs: map form must produce step nodes"
3079        );
3080    }
3081
3082    #[test]
3083    fn step_env_non_string_scalar_values_parse() {
3084        let yaml = r#"
3085steps:
3086  - script: echo hi
3087    env:
3088      FEATURE_ENABLED: true
3089      RETRIES: 3
3090      EMPTY:
3091"#;
3092        let graph = parse(yaml);
3093        assert!(
3094            graph.nodes_of_kind(NodeKind::Step).next().is_some(),
3095            "scalar env values should not reject the whole ADO file"
3096        );
3097    }
3098
3099    #[test]
3100    fn checkout_without_persist_credentials_no_persists_to_edge() {
3101        let yaml = r#"
3102steps:
3103  - checkout: self
3104  - script: echo hi
3105"#;
3106        let graph = parse(yaml);
3107        let persists_edges: Vec<_> = graph
3108            .edges
3109            .iter()
3110            .filter(|e| e.kind == EdgeKind::PersistsTo)
3111            .collect();
3112        assert!(
3113            persists_edges.is_empty(),
3114            "checkout without persistCredentials should not produce PersistsTo edge"
3115        );
3116    }
3117
3118    #[test]
3119    fn var_flag_secret_marked_as_cli_flag_exposed() {
3120        let yaml = r#"
3121steps:
3122  - script: |
3123      terraform apply \
3124        -var "db_password=$(db_password)" \
3125        -var "api_key=$(api_key)"
3126    displayName: Terraform apply
3127"#;
3128        let graph = parse(yaml);
3129        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3130        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
3131        for secret in &secrets {
3132            assert_eq!(
3133                secret.metadata.get(META_CLI_FLAG_EXPOSED),
3134                Some(&"true".to_string()),
3135                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
3136                secret.name
3137            );
3138        }
3139    }
3140
3141    #[test]
3142    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
3143        let yaml = r#"
3144steps:
3145  - script: |
3146      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
3147"#;
3148        let graph = parse(yaml);
3149        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3150        assert_eq!(secrets.len(), 1);
3151        assert!(
3152            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
3153            "non -var secret should not be marked as cli_flag_exposed"
3154        );
3155    }
3156
3157    #[test]
3158    fn step_linked_to_variable_group_secret() {
3159        let yaml = r#"
3160variables:
3161  - group: ProdSecrets
3162
3163steps:
3164  - script: deploy.sh
3165"#;
3166        let graph = parse(yaml);
3167        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3168        assert_eq!(secrets.len(), 1);
3169        let secret_id = secrets[0].id;
3170
3171        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3172        let links: Vec<_> = graph
3173            .edges_from(steps[0].id)
3174            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
3175            .collect();
3176        assert_eq!(
3177            links.len(),
3178            1,
3179            "step should be linked to variable group secret"
3180        );
3181    }
3182
3183    #[test]
3184    fn pr_trigger_sets_meta_trigger_on_graph() {
3185        let yaml = r#"
3186pr:
3187  - '*'
3188
3189steps:
3190  - script: echo hi
3191"#;
3192        let graph = parse(yaml);
3193        assert_eq!(
3194            graph.metadata.get(META_TRIGGER),
3195            Some(&"pr".to_string()),
3196            "ADO pr: trigger should set graph META_TRIGGER"
3197        );
3198    }
3199
3200    #[test]
3201    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
3202        let yaml = r#"
3203pool:
3204  name: my-self-hosted-pool
3205
3206steps:
3207  - script: echo hi
3208"#;
3209        let graph = parse(yaml);
3210        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3211        assert_eq!(images.len(), 1);
3212        assert_eq!(images[0].name, "my-self-hosted-pool");
3213        assert_eq!(
3214            images[0].metadata.get(META_SELF_HOSTED),
3215            Some(&"true".to_string()),
3216            "pool.name without vmImage must be tagged self-hosted"
3217        );
3218    }
3219
3220    #[test]
3221    fn vm_image_pool_is_not_tagged_self_hosted() {
3222        let yaml = r#"
3223pool:
3224  vmImage: ubuntu-latest
3225
3226steps:
3227  - script: echo hi
3228"#;
3229        let graph = parse(yaml);
3230        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3231        assert_eq!(images.len(), 1);
3232        assert_eq!(images[0].name, "ubuntu-latest");
3233        assert!(
3234            !images[0].metadata.contains_key(META_SELF_HOSTED),
3235            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
3236        );
3237    }
3238
3239    #[test]
3240    fn checkout_self_step_tagged_with_meta_checkout_self() {
3241        let yaml = r#"
3242steps:
3243  - checkout: self
3244  - script: echo hi
3245"#;
3246        let graph = parse(yaml);
3247        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3248        assert_eq!(steps.len(), 2);
3249        let checkout_step = steps
3250            .iter()
3251            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
3252            .expect("one step must be tagged META_CHECKOUT_SELF");
3253        assert_eq!(
3254            checkout_step.metadata.get(META_CHECKOUT_SELF),
3255            Some(&"true".to_string())
3256        );
3257    }
3258
3259    #[test]
3260    fn vso_setvariable_sets_meta_writes_env_gate() {
3261        let yaml = r###"
3262steps:
3263  - script: |
3264      echo "##vso[task.setvariable variable=FOO]bar"
3265    displayName: Set variable
3266"###;
3267        let graph = parse(yaml);
3268        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3269        assert_eq!(steps.len(), 1);
3270        assert_eq!(
3271            steps[0].metadata.get(META_WRITES_ENV_GATE),
3272            Some(&"true".to_string()),
3273            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
3274        );
3275    }
3276
3277    #[test]
3278    fn environment_key_tags_job_with_env_approval() {
3279        // String form: `environment: production`
3280        let yaml_string_form = r#"
3281jobs:
3282  - deployment: DeployWeb
3283    environment: production
3284    steps:
3285      - script: echo deploying
3286        displayName: Deploy
3287"#;
3288        let g1 = parse(yaml_string_form);
3289        let tagged: Vec<_> = g1
3290            .nodes_of_kind(NodeKind::Step)
3291            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
3292            .collect();
3293        assert!(
3294            !tagged.is_empty(),
3295            "string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
3296        );
3297
3298        // Mapping form: `environment: { name: staging }`
3299        let yaml_mapping_form = r#"
3300jobs:
3301  - deployment: DeployAPI
3302    environment:
3303      name: staging
3304      resourceType: VirtualMachine
3305    steps:
3306      - script: echo deploying
3307        displayName: Deploy
3308"#;
3309        let g2 = parse(yaml_mapping_form);
3310        let tagged2: Vec<_> = g2
3311            .nodes_of_kind(NodeKind::Step)
3312            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
3313            .collect();
3314        assert!(
3315            !tagged2.is_empty(),
3316            "mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
3317        );
3318
3319        // Negative: a job with no `environment:` must not be tagged
3320        let yaml_no_env = r#"
3321jobs:
3322  - job: Build
3323    steps:
3324      - script: echo building
3325"#;
3326        let g3 = parse(yaml_no_env);
3327        let any_tagged = g3
3328            .nodes_of_kind(NodeKind::Step)
3329            .any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
3330        assert!(
3331            !any_tagged,
3332            "jobs without `environment:` must not carry META_ENV_APPROVAL"
3333        );
3334    }
3335
3336    #[test]
3337    fn root_parameter_conditional_template_fragment_does_not_crash_and_marks_partial() {
3338        // Real-world repro: an ADO template fragment whose root content is wrapped
3339        // in a parameter conditional (`- ${{ if eq(parameters.X, true) }}:`) followed
3340        // by a list of jobs. This is valid when `template:`-included from a parent
3341        // pipeline, but parsing it standalone fails with "did not find expected key".
3342        // The parser must now return a Partial graph instead of a fatal error.
3343        let yaml = r#"
3344parameters:
3345  msabs_ws2022: false
3346
3347- ${{ if eq(parameters.msabs_ws2022, true) }}:
3348  - job: packer_ws2022
3349    displayName: Build WS2022 Gold Image
3350    steps:
3351      - task: PackerTool@0
3352"#;
3353        let parser = AdoParser;
3354        let source = PipelineSource {
3355            file: "fragment.yml".into(),
3356            repo: None,
3357            git_ref: None,
3358            commit_sha: None,
3359        };
3360        let result = parser.parse(yaml, &source);
3361        let graph = result.expect("template fragment must not crash the parser");
3362        assert!(
3363            matches!(graph.completeness, AuthorityCompleteness::Partial),
3364            "template-fragment graph must be marked Partial"
3365        );
3366        let saw_fragment_gap = graph
3367            .completeness_gaps
3368            .iter()
3369            .any(|g| g.contains("template fragment") && g.contains("parent pipeline"));
3370        assert!(
3371            saw_fragment_gap,
3372            "completeness_gaps must mention the template-fragment reason, got: {:?}",
3373            graph.completeness_gaps
3374        );
3375        // A template fragment's root structure depends on the parent pipeline
3376        // — this is a Structural break, not a missing expression value.
3377        assert_eq!(
3378            graph.completeness_gap_kinds.len(),
3379            1,
3380            "template-fragment graph should record exactly one gap kind"
3381        );
3382        assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
3383    }
3384
3385    #[test]
3386    fn environment_tag_isolated_to_gated_job_only() {
3387        // Two jobs side by side: only the deployment job has environment.
3388        // Steps from the non-gated job must NOT be tagged.
3389        let yaml = r#"
3390jobs:
3391  - job: Build
3392    steps:
3393      - script: echo build
3394        displayName: build-step
3395  - deployment: DeployProd
3396    environment: production
3397    steps:
3398      - script: echo deploy
3399        displayName: deploy-step
3400"#;
3401        let g = parse(yaml);
3402        let build_step = g
3403            .nodes_of_kind(NodeKind::Step)
3404            .find(|s| s.name == "build-step")
3405            .expect("build-step must exist");
3406        let deploy_step = g
3407            .nodes_of_kind(NodeKind::Step)
3408            .find(|s| s.name == "deploy-step")
3409            .expect("deploy-step must exist");
3410        assert!(
3411            !build_step.metadata.contains_key(META_ENV_APPROVAL),
3412            "non-gated job's step must not be tagged"
3413        );
3414        assert_eq!(
3415            deploy_step.metadata.get(META_ENV_APPROVAL),
3416            Some(&"true".to_string()),
3417            "gated deployment job's step must be tagged"
3418        );
3419    }
3420
3421    // ── resources.repositories[] capture ──────────────────────
3422
3423    fn repos_meta(graph: &AuthorityGraph) -> Vec<serde_json::Value> {
3424        let raw = graph
3425            .metadata
3426            .get(META_REPOSITORIES)
3427            .expect("META_REPOSITORIES must be set");
3428        serde_json::from_str(raw).expect("META_REPOSITORIES must be valid JSON")
3429    }
3430
3431    #[test]
3432    fn resources_repositories_captured_with_used_flag_when_referenced_by_extends() {
3433        let yaml = r#"
3434resources:
3435  repositories:
3436    - repository: shared-templates
3437      type: git
3438      name: Platform/shared-templates
3439      ref: refs/heads/main
3440
3441extends:
3442  template: pipeline.yml@shared-templates
3443"#;
3444        let graph = parse(yaml);
3445        let entries = repos_meta(&graph);
3446        assert_eq!(entries.len(), 1);
3447        let e = &entries[0];
3448        assert_eq!(e["alias"], "shared-templates");
3449        assert_eq!(e["repo_type"], "git");
3450        assert_eq!(e["name"], "Platform/shared-templates");
3451        assert_eq!(e["ref"], "refs/heads/main");
3452        assert_eq!(e["used"], true);
3453    }
3454
3455    #[test]
3456    fn resources_repositories_used_via_checkout_alias() {
3457        // Mirrors the msigeurope-adf-finance-reporting corpus shape.
3458        let yaml = r#"
3459resources:
3460  repositories:
3461    - repository: adf_publish
3462      type: git
3463      name: org/adf-finance-reporting
3464      ref: refs/heads/adf_publish
3465
3466jobs:
3467  - job: deploy
3468    steps:
3469      - checkout: adf_publish
3470"#;
3471        let graph = parse(yaml);
3472        let entries = repos_meta(&graph);
3473        assert_eq!(entries.len(), 1);
3474        assert_eq!(entries[0]["alias"], "adf_publish");
3475        assert_eq!(entries[0]["used"], true);
3476    }
3477
3478    #[test]
3479    fn resources_repositories_unreferenced_alias_is_marked_not_used() {
3480        // Declared but no `template: x@alias`, no `checkout: alias`, no extends.
3481        let yaml = r#"
3482resources:
3483  repositories:
3484    - repository: orphan-templates
3485      type: git
3486      name: Platform/orphan
3487      ref: main
3488
3489jobs:
3490  - job: build
3491    steps:
3492      - script: echo hi
3493"#;
3494        let graph = parse(yaml);
3495        let entries = repos_meta(&graph);
3496        assert_eq!(entries.len(), 1);
3497        assert_eq!(entries[0]["alias"], "orphan-templates");
3498        assert_eq!(entries[0]["used"], false);
3499    }
3500
3501    #[test]
3502    fn resources_repositories_absent_when_no_resources_block() {
3503        let yaml = r#"
3504jobs:
3505  - job: build
3506    steps:
3507      - script: echo hi
3508"#;
3509        let graph = parse(yaml);
3510        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
3511    }
3512
3513    #[test]
3514    fn parse_template_alias_extracts_segment_after_at() {
3515        assert_eq!(
3516            parse_template_alias("steps/deploy.yml@templates"),
3517            Some("templates".to_string())
3518        );
3519        assert_eq!(parse_template_alias("local/path.yml"), None);
3520        assert_eq!(parse_template_alias("path@"), None);
3521    }
3522
3523    #[test]
3524    fn parameters_as_map_form_parses_as_named_parameters() {
3525        // Real-world repro from Azure/aks-engine, PowerShell/PowerShell, dotnet/maui:
3526        // legacy template fragments declare `parameters:` as a mapping of
3527        // name → default-value rather than the modern typed sequence form.
3528        // Both shapes must parse; the map form yields parameters with names
3529        // but no type/values allowlist (so they default to "string" downstream).
3530        let yaml = r#"
3531parameters:
3532  name: ''
3533  k8sRelease: ''
3534  apimodel: 'examples/e2e-tests/kubernetes/release/default/definition.json'
3535  createVNET: false
3536
3537jobs:
3538  - job: build
3539    steps:
3540      - script: echo $(name)
3541"#;
3542        let graph = parse(yaml);
3543        // Parse must succeed and capture the four parameter names.
3544        assert!(graph.parameters.contains_key("name"));
3545        assert!(graph.parameters.contains_key("k8sRelease"));
3546        assert!(graph.parameters.contains_key("apimodel"));
3547        assert!(graph.parameters.contains_key("createVNET"));
3548        assert_eq!(graph.parameters.len(), 4);
3549    }
3550
3551    #[test]
3552    fn parameters_as_typed_sequence_form_still_parses() {
3553        // Make sure the modern form still works after the polymorphic
3554        // deserializer change.
3555        let yaml = r#"
3556parameters:
3557  - name: env
3558    type: string
3559    default: prod
3560    values:
3561      - prod
3562      - staging
3563  - name: skipTests
3564    type: boolean
3565    default: false
3566
3567jobs:
3568  - job: build
3569    steps:
3570      - script: echo hi
3571"#;
3572        let graph = parse(yaml);
3573        let env_param = graph.parameters.get("env").expect("env captured");
3574        assert_eq!(env_param.param_type, "string");
3575        assert!(env_param.has_values_allowlist);
3576        let skip_param = graph
3577            .parameters
3578            .get("skipTests")
3579            .expect("skipTests captured");
3580        assert_eq!(skip_param.param_type, "boolean");
3581        assert!(!skip_param.has_values_allowlist);
3582    }
3583
3584    #[test]
3585    fn resources_as_legacy_sequence_form_parses_to_empty_resources() {
3586        // Real-world repro from Azure/azure-cli, Chinachu/Mirakurun: pre-2019
3587        // ADO syntax allows `resources:` as a list of `- repo: self` entries,
3588        // not the modern `resources: { repositories: [...] }` mapping. Modern
3589        // ADO still tolerates the legacy form. We must accept both shapes
3590        // without crashing the parse.
3591        let yaml = r#"
3592resources:
3593- repo: self
3594
3595trigger:
3596  - main
3597
3598jobs:
3599  - job: build
3600    steps:
3601      - script: echo hi
3602"#;
3603        let graph = parse(yaml);
3604        // No external repositories declared (legacy form has none) — so the
3605        // META_REPOSITORIES metadata key is absent.
3606        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
3607        // But the job still parses.
3608        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3609        assert_eq!(steps.len(), 1);
3610    }
3611
3612    #[test]
3613    fn stages_as_template_expression_marks_partial_expression_gap() {
3614        // Real-world repro from dotnet/diagnostics templatePublic.yml:
3615        // `stages: ${{ parameters.stages }}` resolves at runtime. The static
3616        // parser cannot enumerate stages from a template expression. Accept
3617        // the file without crashing, but expose the under-modelled authority
3618        // carrier as a typed Partial-Expression gap.
3619        let yaml = r#"
3620parameters:
3621  - name: stages
3622    type: stageList
3623
3624stages: ${{ parameters.stages }}
3625"#;
3626        let graph = parse(yaml);
3627        // Graph must exist (no crash).
3628        assert!(graph.parameters.contains_key("stages"));
3629        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
3630        assert!(
3631            graph.completeness_gap_kinds.contains(&GapKind::Expression),
3632            "dynamic stages carrier must be an Expression gap, got: {:?}",
3633            graph.completeness_gap_kinds
3634        );
3635        assert!(
3636            graph
3637                .completeness_gaps
3638                .iter()
3639                .any(|g| g.contains("top-level `stages:`") && g.contains("template expression")),
3640            "gap must identify the dynamic stages carrier, got: {:?}",
3641            graph.completeness_gaps
3642        );
3643    }
3644
3645    #[test]
3646    fn jobs_as_template_expression_marks_partial_expression_gap() {
3647        let yaml = r#"
3648parameters:
3649  - name: jobs
3650    type: jobList
3651
3652jobs: ${{ parameters.jobs }}
3653"#;
3654        let graph = parse(yaml);
3655        assert!(graph.parameters.contains_key("jobs"));
3656        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
3657        assert!(
3658            graph.completeness_gap_kinds.contains(&GapKind::Expression),
3659            "dynamic jobs carrier must be an Expression gap, got: {:?}",
3660            graph.completeness_gap_kinds
3661        );
3662        assert!(
3663            graph
3664                .completeness_gaps
3665                .iter()
3666                .any(|g| g.contains("top-level `jobs:`") && g.contains("template expression")),
3667            "gap must identify the dynamic jobs carrier, got: {:?}",
3668            graph.completeness_gaps
3669        );
3670    }
3671
3672    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
3673
3674    #[test]
3675    fn jobs_carrier_without_steps_marks_partial() {
3676        // ADO `jobs:` carrier present but each job has no `steps:` and no
3677        // `template:`. process_steps([]) adds nothing. Result: 0 Step nodes
3678        // despite a non-empty job carrier — must mark Partial so a CI gate
3679        // doesn't treat completeness=complete + 0 findings as "passed".
3680        let yaml = r#"
3681jobs:
3682  - job: build
3683    pool:
3684      vmImage: ubuntu-latest
3685"#;
3686        let graph = parse(yaml);
3687        let step_count = graph
3688            .nodes
3689            .iter()
3690            .filter(|n| n.kind == NodeKind::Step)
3691            .count();
3692        assert_eq!(step_count, 0);
3693        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
3694        assert!(
3695            graph
3696                .completeness_gaps
3697                .iter()
3698                .any(|g| g.contains("0 step nodes")),
3699            "completeness_gaps must mention 0 step nodes: {:?}",
3700            graph.completeness_gaps
3701        );
3702        // A jobs/steps carrier that yields zero step nodes is a Structural
3703        // break — the authority chain stops mid-graph rather than hiding a
3704        // value behind an expression.
3705        assert!(
3706            graph.completeness_gap_kinds.contains(&GapKind::Structural),
3707            "0-step-nodes gap must be Structural, got: {:?}",
3708            graph.completeness_gap_kinds
3709        );
3710    }
3711
3712    #[test]
3713    fn jobs_carrier_with_empty_jobs_list_does_not_mark_partial() {
3714        // Defensive: an empty `jobs:` list is NOT a carrier — there is no
3715        // job content to be confused about. Stays Complete.
3716        let yaml = r#"
3717jobs: []
3718"#;
3719        let graph = parse(yaml);
3720        let zero_step_gap = graph
3721            .completeness_gaps
3722            .iter()
3723            .any(|g| g.contains("0 step nodes"));
3724        assert!(
3725            !zero_step_gap,
3726            "empty jobs: list is not a carrier; got: {:?}",
3727            graph.completeness_gaps
3728        );
3729    }
3730
3731    // ── Bug regression: pr: none not suppressing PR-specific rules ──────────
3732
3733    #[test]
3734    fn pr_none_does_not_set_meta_trigger() {
3735        // `pr: none` is an explicit opt-out. Parser must require a mapping or
3736        // sequence for a real PR trigger; scalars are all opt-outs.
3737        let yaml = r#"
3738schedules:
3739  - cron: "0 5 * * 1"
3740pr: none
3741trigger: none
3742steps:
3743  - script: echo hello
3744"#;
3745        let graph = parse(yaml);
3746        assert!(
3747            !graph.metadata.contains_key(META_TRIGGER),
3748            "pr: none must not set META_TRIGGER; got: {:?}",
3749            graph.metadata.get(META_TRIGGER)
3750        );
3751    }
3752
3753    #[test]
3754    fn pr_tilde_does_not_set_meta_trigger() {
3755        // `pr: ~` is YAML null written as tilde — also an opt-out.
3756        let yaml = "pr: ~\nsteps:\n  - script: echo hello\n";
3757        let graph = parse(yaml);
3758        assert!(
3759            !graph.metadata.contains_key(META_TRIGGER),
3760            "pr: ~ must not set META_TRIGGER; got: {:?}",
3761            graph.metadata.get(META_TRIGGER)
3762        );
3763    }
3764
3765    #[test]
3766    fn pr_false_does_not_set_meta_trigger() {
3767        // `pr: false` — boolean false means disabled.
3768        let yaml = "pr: false\nsteps:\n  - script: echo hello\n";
3769        let graph = parse(yaml);
3770        assert!(
3771            !graph.metadata.contains_key(META_TRIGGER),
3772            "pr: false must not set META_TRIGGER; got: {:?}",
3773            graph.metadata.get(META_TRIGGER)
3774        );
3775    }
3776
3777    #[test]
3778    fn pr_sequence_sets_meta_trigger() {
3779        // Shorthand sequence form: `pr:\n  - main` is also a real PR trigger.
3780        let yaml = "pr:\n  - main\nsteps:\n  - script: echo hello\n";
3781        let graph = parse(yaml);
3782        assert_eq!(
3783            graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
3784            Some("pr"),
3785            "pr: [main] must set META_TRIGGER=pr"
3786        );
3787    }
3788
3789    #[test]
3790    fn pr_with_branches_sets_meta_trigger() {
3791        // Positive guard: a real PR trigger mapping must still set META_TRIGGER.
3792        let yaml = r#"
3793pr:
3794  branches:
3795    include:
3796      - main
3797steps:
3798  - script: echo hello
3799"#;
3800        let graph = parse(yaml);
3801        assert_eq!(
3802            graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
3803            Some("pr"),
3804            "real pr: block must set META_TRIGGER=pr"
3805        );
3806    }
3807
3808    // ── Bug regression: permissions: contents: none parsed as empty string ──
3809    // E2E test: parser → rule — the only test that catches the full chain.
3810
3811    #[test]
3812    fn over_privileged_identity_does_not_fire_when_permissions_contents_none() {
3813        // Full chain: ADO parser + over_privileged_identity rule.
3814        // Previously the parser ignored `permissions:`, leaving the token at
3815        // broad scope and firing the rule on every restricted pipeline.
3816        use taudit_core::rules::over_privileged_identity;
3817        let yaml = r#"
3818trigger: none
3819permissions:
3820  contents: none
3821steps:
3822  - script: echo hello
3823"#;
3824        let graph = parse(yaml);
3825        let findings = over_privileged_identity(&graph);
3826        let token_findings: Vec<_> = findings
3827            .iter()
3828            .filter(|f| {
3829                f.nodes_involved.iter().any(|&id| {
3830                    graph
3831                        .node(id)
3832                        .map(|n| n.name == "System.AccessToken")
3833                        .unwrap_or(false)
3834                })
3835            })
3836            .collect();
3837        assert!(
3838            token_findings.is_empty(),
3839            "over_privileged_identity must not fire on System.AccessToken when \
3840             permissions: contents: none is set; got: {token_findings:#?}"
3841        );
3842    }
3843
3844    #[test]
3845    fn pipeline_level_permissions_none_constrains_token() {
3846        // `permissions: contents: none` at pipeline level must downgrade
3847        // System.AccessToken from broad → constrained so over_privileged_identity
3848        // does not fire on an already-locked-down pipeline.
3849        let yaml = r#"
3850trigger: none
3851permissions:
3852  contents: none
3853steps:
3854  - script: echo hello
3855"#;
3856        let graph = parse(yaml);
3857        let token = graph
3858            .nodes_of_kind(NodeKind::Identity)
3859            .find(|n| n.name == "System.AccessToken")
3860            .expect("System.AccessToken must always be present");
3861        assert_eq!(
3862            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3863            Some("constrained"),
3864            "permissions: contents: none must constrain the token; got: {:?}",
3865            token.metadata.get(META_IDENTITY_SCOPE)
3866        );
3867    }
3868
3869    #[test]
3870    fn pipeline_level_permissions_write_keeps_token_broad() {
3871        // A pipeline with write permissions must keep System.AccessToken broad.
3872        let yaml = r#"
3873trigger: none
3874permissions:
3875  contents: write
3876steps:
3877  - script: echo hello
3878"#;
3879        let graph = parse(yaml);
3880        let token = graph
3881            .nodes_of_kind(NodeKind::Identity)
3882            .find(|n| n.name == "System.AccessToken")
3883            .expect("System.AccessToken must always be present");
3884        assert_eq!(
3885            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3886            Some("broad"),
3887            "permissions: contents: write must keep the token broad; got: {:?}",
3888            token.metadata.get(META_IDENTITY_SCOPE)
3889        );
3890    }
3891
3892    #[test]
3893    fn pipeline_level_permissions_read_scalar_constrains_token() {
3894        // `permissions: read` (scalar, not a map) must also downgrade the token.
3895        // Previously the scalar branch treated "read" as broad (incorrect).
3896        let yaml = "trigger: none\npermissions: read\nsteps:\n  - script: echo hello\n";
3897        let graph = parse(yaml);
3898        let token = graph
3899            .nodes_of_kind(NodeKind::Identity)
3900            .find(|n| n.name == "System.AccessToken")
3901            .expect("System.AccessToken must always be present");
3902        assert_eq!(
3903            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3904            Some("constrained"),
3905            "permissions: read must constrain the token; got: {:?}",
3906            token.metadata.get(META_IDENTITY_SCOPE)
3907        );
3908    }
3909
3910    #[test]
3911    fn pipeline_level_permissions_write_scalar_keeps_token_broad() {
3912        // `permissions: write` (scalar) keeps the token broad.
3913        let yaml = "trigger: none\npermissions: write\nsteps:\n  - script: echo hello\n";
3914        let graph = parse(yaml);
3915        let token = graph
3916            .nodes_of_kind(NodeKind::Identity)
3917            .find(|n| n.name == "System.AccessToken")
3918            .expect("System.AccessToken must always be present");
3919        assert_eq!(
3920            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3921            Some("broad"),
3922            "permissions: write scalar must keep token broad; got: {:?}",
3923            token.metadata.get(META_IDENTITY_SCOPE)
3924        );
3925    }
3926
3927    #[test]
3928    fn pipeline_level_permissions_contents_read_constrains_token() {
3929        // Map form with contents: read — should constrain.
3930        let yaml =
3931            "trigger: none\npermissions:\n  contents: read\nsteps:\n  - script: echo hello\n";
3932        let graph = parse(yaml);
3933        let token = graph
3934            .nodes_of_kind(NodeKind::Identity)
3935            .find(|n| n.name == "System.AccessToken")
3936            .expect("System.AccessToken must always be present");
3937        assert_eq!(
3938            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3939            Some("constrained"),
3940            "permissions: contents: read must constrain; got: {:?}",
3941            token.metadata.get(META_IDENTITY_SCOPE)
3942        );
3943    }
3944
3945    #[test]
3946    fn empty_pipeline_does_not_mark_partial_for_zero_steps() {
3947        // No top-level stages/jobs/steps at all — there's no carrier, so the
3948        // 0-step-nodes guard must NOT fire. A genuinely empty pipeline stays
3949        // Complete.
3950        let yaml = r#"
3951trigger:
3952  - main
3953"#;
3954        let graph = parse(yaml);
3955        let zero_step_gap = graph
3956            .completeness_gaps
3957            .iter()
3958            .any(|g| g.contains("0 step nodes"));
3959        assert!(
3960            !zero_step_gap,
3961            "no carrier means no 0-step gap reason; got: {:?}",
3962            graph.completeness_gaps
3963        );
3964    }
3965
3966    /// regression: ADO HashMap iteration must be deterministic across runs.
3967    ///
3968    /// Before the fix, `step.env` and `step.inputs` (both `HashMap`s populated
3969    /// by serde_yaml) were iterated in HashMap-random order at four call sites
3970    /// in `taudit-parse-ado`. That randomness leaked into `NodeId` allocation
3971    /// (Secret/Identity nodes get IDs in the order they're added) and edge
3972    /// append order, which then leaked into `pipeline_identity_material_hash`
3973    /// and silently broke baseline suppression — same YAML, different hash on
3974    /// each run.
3975    ///
3976    /// Fixture uses non-alphabetic-insertion-order keys (`Z_VAR/A_VAR/M_VAR/...`)
3977    /// so the pre-fix HashMap bucket ordering is overwhelmingly unlikely to
3978    /// align with the now-enforced sorted iteration. We parse the same YAML
3979    /// nine times in sequence and assert that
3980    /// `compute_pipeline_identity_material_hash` is byte-identical across all
3981    /// runs. Mirrors `taudit-report-json`'s
3982    /// `json_output_is_byte_deterministic_across_runs` test pattern.
3983    #[test]
3984    fn ado_hashmap_iteration_is_deterministic_across_runs() {
3985        // Multiple `$(VAR)` references in both `env:` and task `inputs:` so
3986        // every secret-creating HashMap-iteration site in the parser is
3987        // exercised. Names chosen so HashMap hash bucket order has near-zero
3988        // chance of accidentally aligning with the enforced sorted order.
3989        let yaml = r#"
3990trigger:
3991  - main
3992
3993pool:
3994  vmImage: ubuntu-latest
3995
3996steps:
3997  - task: AzureCLI@2
3998    displayName: Deploy
3999    inputs:
4000      azureSubscription: $(SUB_CONN)
4001      scriptType: bash
4002      inlineScript: |
4003        echo $(MIDDLE_INPUT_VAR)
4004        echo $(ALPHA_INPUT_VAR)
4005        echo $(ZULU_INPUT_VAR)
4006    env:
4007      Z_VAR: $(Z_SECRET)
4008      A_VAR: $(A_SECRET)
4009      M_VAR: $(M_SECRET)
4010      Q_VAR: $(Q_SECRET)
4011      B_VAR: $(B_SECRET)
4012"#;
4013
4014        // Capture the structural shape of the graph that the bug report
4015        // identified as drifting: NodeId allocation order (id, kind, name,
4016        // trust_zone) and edge append order ((from, to, kind)). We
4017        // intentionally exclude `node.metadata` from the comparison — that
4018        // map's serialisation is a separate concern handled by the JSON sink
4019        // (see `taudit-report-json::json_output_is_byte_deterministic_across_runs`).
4020        fn structural_fingerprint(graph: &taudit_core::graph::AuthorityGraph) -> String {
4021            let mut out = String::new();
4022            for n in &graph.nodes {
4023                out.push_str(&format!(
4024                    "N {} {:?} {} {:?}\n",
4025                    n.id, n.kind, n.name, n.trust_zone
4026                ));
4027            }
4028            for e in &graph.edges {
4029                out.push_str(&format!("E {} {} {:?}\n", e.from, e.to, e.kind));
4030            }
4031            out
4032        }
4033
4034        let mut hashes: Vec<String> = Vec::with_capacity(9);
4035        let mut fingerprints: Vec<String> = Vec::with_capacity(9);
4036        for _ in 0..9 {
4037            let graph = parse(yaml);
4038            hashes.push(taudit_core::baselines::compute_pipeline_identity_material_hash(&graph));
4039            fingerprints.push(structural_fingerprint(&graph));
4040        }
4041
4042        let first_hash = &hashes[0];
4043        for (i, h) in hashes.iter().enumerate().skip(1) {
4044            assert_eq!(
4045                first_hash, h,
4046                "run 0 and run {i} produced different pipeline_identity_material_hash \
4047                 — ADO parser HashMap iteration is non-deterministic"
4048            );
4049        }
4050
4051        let first_fp = &fingerprints[0];
4052        for (i, fp) in fingerprints.iter().enumerate().skip(1) {
4053            assert_eq!(
4054                first_fp, fp,
4055                "run 0 and run {i} produced different graph node-id / edge ordering \
4056                 — ADO parser HashMap iteration is non-deterministic"
4057            );
4058        }
4059    }
4060
4061    // ── condition: / dependsOn: modelling (RC blocker A) ─────────────────────
4062    //
4063    // The ADO parser previously ignored stage / job / step `condition:` and
4064    // `dependsOn:` keys entirely, which made `apply_compensating_controls`
4065    // unable to credit conditional runtime gates and caused
4066    // `trigger_context_mismatch`-class rules to fire at full severity on
4067    // jobs the runtime would never execute on a PR build (deep audit
4068    // 02-ado-parser.md, finding 10).
4069
4070    #[test]
4071    fn step_condition_marks_partial_with_expression_gap() {
4072        let yaml = r#"
4073steps:
4074  - script: deploy.sh
4075    displayName: Deploy
4076    condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
4077"#;
4078        let graph = parse(yaml);
4079        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
4080        assert!(
4081            graph.completeness_gap_kinds.contains(&GapKind::Expression),
4082            "step condition must produce an Expression gap, got: {:?}",
4083            graph.completeness_gap_kinds
4084        );
4085        // Reason text must cite the conditional so an operator can grep
4086        // findings against the source pipeline's `condition:` clauses.
4087        assert!(
4088            graph.completeness_gaps.iter().any(|g| g.contains("step")
4089                && g.contains("Deploy")
4090                && g.contains("eq(variables['Build.SourceBranch']")),
4091            "gap reason must name scope, step, and condition: {:?}",
4092            graph.completeness_gaps
4093        );
4094    }
4095
4096    #[test]
4097    fn job_condition_propagates_to_step_metadata() {
4098        let yaml = r#"
4099jobs:
4100  - job: DeployProd
4101    condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
4102    steps:
4103      - script: deploy.sh
4104        displayName: Run deploy
4105"#;
4106        let graph = parse(yaml);
4107        let step = graph
4108            .nodes_of_kind(NodeKind::Step)
4109            .find(|n| n.name == "Run deploy")
4110            .expect("step node must exist");
4111        // No step-level condition was declared, so META_CONDITION carries
4112        // ONLY the job-level expression — verbatim, no ` AND ` joiner.
4113        assert_eq!(
4114            step.metadata.get(META_CONDITION),
4115            Some(&"eq(variables['Build.SourceBranch'], 'refs/heads/main')".to_string()),
4116            "job-level condition must propagate to step META_CONDITION"
4117        );
4118        // Job-level condition also marks the graph Partial-Expression so
4119        // downstream consumers know the runtime gate is opaque.
4120        assert!(graph.completeness_gap_kinds.contains(&GapKind::Expression));
4121    }
4122
4123    #[test]
4124    fn stacked_conditions_join_with_and() {
4125        let yaml = r#"
4126stages:
4127  - stage: Deploy
4128    condition: succeeded()
4129    jobs:
4130      - job: Prod
4131        condition: eq(variables['env'], 'prod')
4132        steps:
4133          - script: deploy.sh
4134            displayName: Deploy step
4135            condition: ne(variables['Build.Reason'], 'PullRequest')
4136"#;
4137        let graph = parse(yaml);
4138        let step = graph
4139            .nodes_of_kind(NodeKind::Step)
4140            .find(|n| n.name == "Deploy step")
4141            .expect("step node must exist");
4142        let chain = step
4143            .metadata
4144            .get(META_CONDITION)
4145            .expect("step must carry META_CONDITION");
4146        // Stage → Job → Step joined with ` AND ` in declaration order.
4147        assert_eq!(
4148            chain,
4149            "succeeded() AND eq(variables['env'], 'prod') AND ne(variables['Build.Reason'], 'PullRequest')",
4150            "stacked conditions must AND-join in stage→job→step order"
4151        );
4152        // Each scope's condition contributed a separate gap reason.
4153        let expression_gap_count = graph
4154            .completeness_gap_kinds
4155            .iter()
4156            .filter(|k| **k == GapKind::Expression)
4157            .count();
4158        assert!(
4159            expression_gap_count >= 3,
4160            "stage + job + step conditions must each mark Partial-Expression, got {expression_gap_count}"
4161        );
4162    }
4163
4164    #[test]
4165    fn depends_on_string_form_parses() {
4166        let yaml = r#"
4167jobs:
4168  - job: Build
4169    steps:
4170      - script: build.sh
4171  - job: Deploy
4172    dependsOn: Build
4173    steps:
4174      - script: deploy.sh
4175        displayName: Deploy
4176"#;
4177        let graph = parse(yaml);
4178        let step = graph
4179            .nodes_of_kind(NodeKind::Step)
4180            .find(|n| n.name == "Deploy")
4181            .expect("Deploy step must exist");
4182        assert_eq!(
4183            step.metadata.get(META_DEPENDS_ON),
4184            Some(&"Build".to_string()),
4185            "single-string dependsOn must stamp the predecessor name verbatim"
4186        );
4187    }
4188
4189    #[test]
4190    fn depends_on_sequence_form_parses() {
4191        let yaml = r#"
4192jobs:
4193  - job: A
4194    steps: [{ script: a.sh }]
4195  - job: B
4196    steps: [{ script: b.sh }]
4197  - job: C
4198    steps: [{ script: c.sh }]
4199  - job: Final
4200    dependsOn:
4201      - A
4202      - B
4203      - C
4204    steps:
4205      - script: final.sh
4206        displayName: Final step
4207"#;
4208        let graph = parse(yaml);
4209        let step = graph
4210            .nodes_of_kind(NodeKind::Step)
4211            .find(|n| n.name == "Final step")
4212            .expect("Final step must exist");
4213        assert_eq!(
4214            step.metadata.get(META_DEPENDS_ON),
4215            Some(&"A,B,C".to_string()),
4216            "sequence-form dependsOn must comma-join predecessors in declaration order"
4217        );
4218    }
4219
4220    #[test]
4221    fn step_depends_on_mapping_marks_partial_expression() {
4222        let yaml = "steps:\n  - script: echo hi\n    displayName: Mixed depends\n    dependsOn:\n      \"${{ if eq(parameters.extra, true) }}\":\n        - Prep\n";
4223        let graph = parse(yaml);
4224        let step = graph
4225            .nodes_of_kind(NodeKind::Step)
4226            .find(|n| n.name == "Mixed depends")
4227            .expect("step exists");
4228        assert!(
4229            !step.metadata.contains_key(META_DEPENDS_ON),
4230            "unresolved mapping dependsOn must not stamp META_DEPENDS_ON"
4231        );
4232        assert!(
4233            graph.completeness_gap_kinds.contains(&GapKind::Expression),
4234            "mapping dependsOn must mark Partial-Expression"
4235        );
4236        assert!(
4237            graph.completeness_gaps.iter().any(|g| g.contains("step")
4238                && g.contains("Mixed depends")
4239                && g.contains("dependsOn")),
4240            "gap reason must name scope, step, and dependsOn"
4241        );
4242    }
4243
4244    #[test]
4245    fn stage_depends_on_mapping_does_not_fake_inherited_dependency() {
4246        let yaml = "stages:\n  - stage: Build\n    jobs:\n      - job: BuildJob\n        steps:\n          - script: echo build\n  - stage: Deploy\n    dependsOn:\n      \"${{ if eq(parameters.release, true) }}\":\n        - Build\n    jobs:\n      - job: DeployJob\n        steps:\n          - script: echo deploy\n            displayName: Deploy step\n";
4247        let graph = parse(yaml);
4248        let step = graph
4249            .nodes_of_kind(NodeKind::Step)
4250            .find(|n| n.name == "Deploy step")
4251            .expect("deploy step exists");
4252        assert!(
4253            !step.metadata.contains_key(META_DEPENDS_ON),
4254            "unresolved stage dependsOn must not flow into child step metadata"
4255        );
4256        assert!(
4257            graph
4258                .completeness_gaps
4259                .iter()
4260                .any(|g| g.contains("stage") && g.contains("Deploy") && g.contains("dependsOn")),
4261            "gap reason must cite stage-level dependsOn expression"
4262        );
4263    }
4264
4265    #[test]
4266    fn conditional_step_finding_is_downgraded_via_compensating_control() {
4267        // Untrusted task step (TrustZone::Untrusted) with access to a
4268        // pipeline secret would normally fire `untrusted_with_authority`
4269        // at Critical. With a `condition:` gate present on the job, the
4270        // Suppression-5 ADO conditional-gate CC must downgrade to High,
4271        // record the original severity, and credit the gate as a CC.
4272        let yaml = r#"
4273variables:
4274  - name: DEPLOY_KEY
4275    value: $(MySecret)
4276    isSecret: true
4277jobs:
4278  - job: ProdDeploy
4279    condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
4280    steps:
4281      - task: AzureCLI@2
4282        displayName: Deploy to prod
4283        inputs:
4284          azureSubscription: ProdConnection
4285          scriptType: bash
4286          inlineScript: |
4287            echo "$(DEPLOY_KEY)" > /tmp/key
4288            az login --service-principal -u $SP -p $(DEPLOY_KEY)
4289"#;
4290        let graph = parse(yaml);
4291        let mut findings =
4292            taudit_core::rules::run_all_rules(&graph, taudit_core::propagation::DEFAULT_MAX_HOPS);
4293        // Find the Critical finding the rule would have emitted absent the
4294        // compensating-control pass — note `run_all_rules` already applies
4295        // the CC pass, so post-pass severity is what we read here.
4296        let f = findings
4297            .iter_mut()
4298            .find(|f| {
4299                f.category == taudit_core::finding::FindingCategory::UntrustedWithAuthority
4300                    && f.message.contains("DEPLOY_KEY")
4301            })
4302            .expect(
4303                "untrusted_with_authority must fire on the AzureCLI@2 step accessing DEPLOY_KEY",
4304            );
4305        assert_eq!(
4306            f.severity,
4307            taudit_core::finding::Severity::High,
4308            "Critical must be downgraded one tier to High by the ADO conditional-gate CC"
4309        );
4310        assert_eq!(
4311            f.extras.original_severity,
4312            Some(taudit_core::finding::Severity::Critical),
4313            "original_severity must record Critical so the audit trail survives"
4314        );
4315        assert!(
4316            f.extras
4317                .compensating_controls
4318                .iter()
4319                .any(|c| c.starts_with("ADO conditional gate")),
4320            "compensating_controls must include the ADO conditional-gate entry, got: {:?}",
4321            f.extras.compensating_controls
4322        );
4323    }
4324
4325    #[test]
4326    fn variable_groups_are_scoped_to_their_stage_or_job() {
4327        let yaml = r#"
4328stages:
4329  - stage: UsesGroup
4330    variables:
4331      - group: OpaqueGroup
4332    jobs:
4333      - job: A
4334        steps:
4335          - script: echo $(OPAQUE_VALUE)
4336  - stage: NoGroup
4337    jobs:
4338      - job: B
4339        steps:
4340          - script: echo $(STAGE_TWO_SECRET)
4341"#;
4342        let graph = parse(yaml);
4343        assert!(
4344            graph
4345                .nodes_of_kind(NodeKind::Secret)
4346                .any(|n| n.name == "STAGE_TWO_SECRET"),
4347            "variable group in first stage must not suppress secret refs in unrelated stages"
4348        );
4349    }
4350
4351    #[test]
4352    fn plain_variables_are_scoped_to_their_stage_or_job() {
4353        let yaml = r#"
4354stages:
4355  - stage: PlainStage
4356    variables:
4357      - name: SHARED_NAME
4358        value: plain
4359    jobs:
4360      - job: A
4361        steps:
4362          - script: echo $(SHARED_NAME)
4363  - stage: SecretRefStage
4364    jobs:
4365      - job: B
4366        steps:
4367          - script: echo $(SHARED_NAME)
4368"#;
4369        let graph = parse(yaml);
4370        assert!(
4371            graph
4372                .nodes_of_kind(NodeKind::Secret)
4373                .any(|n| n.name == "SHARED_NAME"),
4374            "plain variable in one stage must not suppress same-name secret refs in another stage"
4375        );
4376    }
4377
4378    #[test]
4379    fn parser_context_stamps_only_safe_metadata() {
4380        let yaml = "steps:\n  - script: echo hi\n";
4381        let parser = AdoParser;
4382        let source = PipelineSource {
4383            file: "ctx.yml".to_string(),
4384            repo: None,
4385            git_ref: None,
4386            commit_sha: None,
4387        };
4388        let ctx = AdoParserContext {
4389            org: Some("org-a".to_string()),
4390            project: Some("project-a".to_string()),
4391            pat: Some("very-secret-pat".to_string()),
4392        };
4393
4394        let graph = parser
4395            .parse_with_context(yaml, &source, Some(&ctx))
4396            .expect("parse succeeds");
4397
4398        assert_eq!(graph.metadata.get("ado_org"), Some(&"org-a".to_string()));
4399        assert_eq!(
4400            graph.metadata.get("ado_project"),
4401            Some(&"project-a".to_string())
4402        );
4403        assert_eq!(
4404            graph.metadata.get("ado_pat_present"),
4405            Some(&"true".to_string())
4406        );
4407        assert_eq!(
4408            graph.metadata.get("ado_variable_group_enrichment_ready"),
4409            Some(&"true".to_string())
4410        );
4411        assert!(
4412            !graph
4413                .metadata
4414                .values()
4415                .any(|v| v.contains("very-secret-pat")),
4416            "PAT must never be persisted into graph metadata"
4417        );
4418    }
4419
4420    #[test]
4421    fn parser_context_absent_preserves_existing_metadata_shape() {
4422        let yaml = "steps:\n  - script: echo hi\n";
4423        let graph = parse(yaml);
4424
4425        assert!(!graph.metadata.contains_key("ado_org"));
4426        assert!(!graph.metadata.contains_key("ado_project"));
4427        assert!(!graph.metadata.contains_key("ado_pat_present"));
4428        assert!(!graph
4429            .metadata
4430            .contains_key("ado_variable_group_enrichment_ready"));
4431    }
4432
4433    #[test]
4434    fn escaped_ado_variable_refs_are_not_secret_refs() {
4435        let yaml = r###"
4436steps:
4437  - script: |
4438      echo $$(NOT_A_SECRET)
4439      echo "##vso[task.setvariable variable=Count]$$(NOT_A_SECRET)"
4440    displayName: Escaped
4441"###;
4442        let graph = parse(yaml);
4443        assert!(
4444            !graph
4445                .nodes_of_kind(NodeKind::Secret)
4446                .any(|n| n.name == "NOT_A_SECRET"),
4447            "$$(VAR) is an escaped literal and must not create a Secret node"
4448        );
4449        let step = graph
4450            .nodes_of_kind(NodeKind::Step)
4451            .find(|n| n.name == "Escaped")
4452            .expect("step exists");
4453        assert!(
4454            !step
4455                .metadata
4456                .contains_key(META_ENV_GATE_WRITES_SECRET_VALUE),
4457            "escaped setvariable value must not be treated as secret-derived"
4458        );
4459    }
4460
4461    #[test]
4462    fn terraform_var_flag_detection_ignores_var_file() {
4463        let yaml = r#"
4464steps:
4465  - script: terraform apply -var-file=$(TFVARS_FILE)
4466    displayName: Var file
4467  - script: terraform apply -var "password=$(TF_PASSWORD)"
4468    displayName: Var value
4469"#;
4470        let graph = parse(yaml);
4471        let tfvars = graph
4472            .nodes_of_kind(NodeKind::Secret)
4473            .find(|n| n.name == "TFVARS_FILE")
4474            .expect("TFVARS_FILE secret exists");
4475        assert!(
4476            !tfvars.metadata.contains_key(META_CLI_FLAG_EXPOSED),
4477            "-var-file path should not be classified as an exposed -var value"
4478        );
4479        let password = graph
4480            .nodes_of_kind(NodeKind::Secret)
4481            .find(|n| n.name == "TF_PASSWORD")
4482            .expect("TF_PASSWORD secret exists");
4483        assert_eq!(
4484            password
4485                .metadata
4486                .get(META_CLI_FLAG_EXPOSED)
4487                .map(String::as_str),
4488            Some("true"),
4489            "-var key=$(SECRET) should still be marked as command-line exposed"
4490        );
4491    }
4492
4493    #[test]
4494    fn task_input_lookup_is_case_insensitive() {
4495        let yaml = r#"
4496steps:
4497  - task: TerraformTaskV4@4
4498    displayName: Terraform
4499    inputs:
4500      Command: apply
4501      CommandOptions: -auto-approve
4502  - task: AzureCLI@2
4503    displayName: SPN
4504    inputs:
4505      AddSpnToEnvironment: TRUE
4506      InLineScRiPt: echo hi
4507"#;
4508        let graph = parse(yaml);
4509        let terraform = graph
4510            .nodes_of_kind(NodeKind::Step)
4511            .find(|n| n.name == "Terraform")
4512            .expect("terraform step");
4513        assert_eq!(
4514            terraform
4515                .metadata
4516                .get(META_TERRAFORM_AUTO_APPROVE)
4517                .map(String::as_str),
4518            Some("true")
4519        );
4520        let spn = graph
4521            .nodes_of_kind(NodeKind::Step)
4522            .find(|n| n.name == "SPN")
4523            .expect("spn step");
4524        assert_eq!(
4525            spn.metadata.get(META_ADD_SPN_TO_ENV).map(String::as_str),
4526            Some("true")
4527        );
4528        assert_eq!(
4529            spn.metadata.get(META_SCRIPT_BODY).map(String::as_str),
4530            Some("echo hi"),
4531            "mixed-case inline script input key should be detected"
4532        );
4533    }
4534}
taudit_parse_ado/lib.rs

taudit_parse_ado/
lib.rs