taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use base64::Engine;
4use serde::Deserialize;
5use taudit_core::error::TauditError;
6use taudit_core::graph::*;
7use taudit_core::ports::PipelineParser;
8
9/// Optional Azure DevOps enrichment inputs plumbed from CLI flags.
10///
11/// This is Phase 3A scaffolding only: parser wiring + metadata-safe handling.
12/// No network calls are performed yet.
13#[derive(Debug, Clone, Default, PartialEq, Eq)]
14pub struct AdoParserContext {
15    /// Azure DevOps organization name (optional).
16    pub org: Option<String>,
17    /// Azure DevOps project name (optional).
18    pub project: Option<String>,
19    /// Azure DevOps PAT (optional). Never persisted into graph metadata.
20    pub pat: Option<String>,
21}
22
23impl AdoParserContext {
24    fn is_empty(&self) -> bool {
25        self.org.is_none() && self.project.is_none() && self.pat.is_none()
26    }
27}
28
29const META_ADO_ORG: &str = "ado_org";
30const META_ADO_PROJECT: &str = "ado_project";
31const META_ADO_PAT_PRESENT: &str = "ado_pat_present";
32const META_ADO_VG_ENRICHMENT_READY: &str = "ado_variable_group_enrichment_ready";
33const META_ADO_VG_ENRICHED: &str = "ado_variable_group_enriched";
34
35type AdoVariableGroupIndex = HashMap<String, HashMap<String, bool>>;
36
37/// Regex-free check: does `s` contain `terraform apply` followed by
38/// `-auto-approve` or `--auto-approve` (anywhere on the same line, or on a
39/// nearby line when the previous line ends in a shell continuation `\` /
40/// PowerShell continuation `` ` ``)?
41///
42/// Case-sensitive on purpose — Terraform's CLI is case-sensitive and these
43/// tokens never appear capitalised in real-world pipelines.
44fn script_does_terraform_auto_apply(s: &str) -> bool {
45    let lines: Vec<&str> = s.lines().collect();
46    for (i, raw_line) in lines.iter().enumerate() {
47        // Strip trailing comment.
48        let line = raw_line.split('#').next().unwrap_or("");
49        if !(line.contains("terraform apply") || line.contains("terraform\tapply")) {
50            continue;
51        }
52        if line.contains("auto-approve") {
53            return true;
54        }
55        // Continuation: peek a few lines forward for the flag.
56        let mut continuing = line.trim_end().ends_with('\\') || line.trim_end().ends_with('`');
57        let mut j = i + 1;
58        while continuing && j < lines.len() && j < i + 4 {
59            let next = lines[j].split('#').next().unwrap_or("");
60            if next.contains("auto-approve") {
61                return true;
62            }
63            continuing = next.trim_end().ends_with('\\') || next.trim_end().ends_with('`');
64            j += 1;
65        }
66    }
67    false
68}
69
70/// Azure DevOps YAML pipeline parser.
71pub struct AdoParser;
72
73impl AdoParser {
74    /// Parse an ADO pipeline with optional CLI-provided context for future
75    /// variable-group enrichment.
76    pub fn parse_with_context(
77        &self,
78        content: &str,
79        source: &PipelineSource,
80        ctx: Option<&AdoParserContext>,
81    ) -> Result<AuthorityGraph, TauditError> {
82        let mut de = serde_yaml::Deserializer::from_str(content);
83        let doc = de
84            .next()
85            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
86        let pipeline: AdoPipeline = match AdoPipeline::deserialize(doc) {
87            Ok(p) => p,
88            Err(e) => {
89                // Real-world ADO template fragments often wrap their root content in
90                // a parameter conditional like `- ${{ if eq(parameters.X, true) }}:`
91                // followed by a list of jobs. That is not a standard YAML mapping at
92                // the root, so serde_yaml fails with a "did not find expected key"
93                // error. These files are intended to be `template:`-included from a
94                // parent pipeline; analyzing them in isolation is not meaningful.
95                // Return a near-empty graph marked Partial instead of crashing the scan.
96                let msg = e.to_string();
97                if msg.contains("invalid type: sequence, expected struct AdoPipeline") {
98                    if let Some(recovered) = recover_after_leading_root_sequence(content) {
99                        let pipeline: AdoPipeline = serde_yaml::from_str(recovered)
100                            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
101                        let mut graph = build_ado_graph(pipeline, false, source, content, ctx);
102                        graph.mark_partial(
103                            GapKind::Structural,
104                            "ADO file starts with a root-level sequence before the pipeline mapping — recovered by analyzing the later pipeline mapping only".to_string(),
105                        );
106                        graph.stamp_edge_authority_summaries();
107                        return Ok(graph);
108                    }
109                }
110
111                let looks_like_template_fragment = (msg.contains("did not find expected key")
112                    || (msg.contains("parameters")
113                        && msg.contains("invalid type: map")
114                        && msg.contains("expected a sequence")))
115                    && has_root_parameter_conditional(content);
116                if looks_like_template_fragment {
117                    let mut graph = AuthorityGraph::new(source.clone());
118                    graph
119                        .metadata
120                        .insert(META_PLATFORM.into(), "azure-devops".into());
121                    apply_parser_context_metadata(&mut graph, ctx);
122                    graph.mark_partial(
123                        GapKind::Structural,
124                        "ADO template fragment with top-level parameter conditional — root structure depends on parent pipeline context".to_string(),
125                    );
126                    graph.stamp_edge_authority_summaries();
127                    return Ok(graph);
128                }
129                return Err(TauditError::Parse(format!("YAML parse error: {e}")));
130            }
131        };
132        let extra_docs = de.next().is_some();
133
134        let mut graph = build_ado_graph(pipeline, extra_docs, source, content, ctx);
135        graph.stamp_edge_authority_summaries();
136        Ok(graph)
137    }
138}
139
140impl PipelineParser for AdoParser {
141    fn platform(&self) -> &str {
142        "azure-devops"
143    }
144
145    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
146        self.parse_with_context(content, source, None)
147    }
148}
149
150fn build_ado_graph(
151    pipeline: AdoPipeline,
152    extra_docs: bool,
153    source: &PipelineSource,
154    content: &str,
155    ctx: Option<&AdoParserContext>,
156) -> AuthorityGraph {
157    let mut graph = AuthorityGraph::new(source.clone());
158    graph
159        .metadata
160        .insert(META_PLATFORM.into(), "azure-devops".into());
161    apply_parser_context_metadata(&mut graph, ctx);
162    if extra_docs {
163        graph.mark_partial(
164            GapKind::Expression,
165            "file contains multiple YAML documents (--- separator) — only the first was analyzed"
166                .to_string(),
167        );
168    }
169    mark_unresolved_top_level_carriers(content, &mut graph);
170
171    // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
172    // A genuine ADO PR trigger is always a mapping (`pr:\n  branches:...`) or a
173    // sequence (`pr:\n  - main`). Scalar opt-out forms — `pr: none`, `pr: ~`,
174    // `pr: false`, `pr: ""` — must NOT be treated as active triggers.
175    // Checking is_mapping()||is_sequence() is more robust than enumerating every
176    // scalar opt-out value (serde_yaml 0.9 parses "none" as a string, "~" as a
177    // string, and `null` as null — the shape test handles all forms uniformly).
178    let has_pr_trigger = pipeline
179        .pr
180        .as_ref()
181        .map(|v| v.is_mapping() || v.is_sequence())
182        .unwrap_or(false);
183    if has_pr_trigger {
184        graph.metadata.insert(META_TRIGGER.into(), "pr".into());
185    }
186
187    // Capture resources.repositories[] declarations and detect aliases that
188    // are actually referenced by an `extends:`, `template: x@alias`, or
189    // `checkout: alias`. The result is JSON-encoded into graph metadata
190    // for the `template_extends_unpinned_branch` rule to consume.
191    process_repositories(&pipeline, content, &mut graph);
192
193    // Capture top-level `parameters:` declarations (used by
194    // parameter_interpolation_into_shell). ADO defaults missing `type:`
195    // to string, so a missing/empty type is treated as a string.
196    if let Some(ref params) = pipeline.parameters {
197        for p in params {
198            let name = match p.name.as_ref() {
199                Some(n) if !n.is_empty() => n.clone(),
200                _ => continue,
201            };
202            let param_type = p.param_type.clone().unwrap_or_default();
203            let has_values_allowlist = p.values.as_ref().map(|v| !v.is_empty()).unwrap_or(false);
204            graph.parameters.insert(
205                name,
206                ParamSpec {
207                    param_type,
208                    has_values_allowlist,
209                },
210            );
211        }
212    }
213
214    let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
215
216    // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
217    // Tagged implicit: ADO injects this token into every task by platform design;
218    // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
219    let mut meta = HashMap::new();
220    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
221    meta.insert(META_IMPLICIT.into(), "true".into());
222    let token_id = graph.add_node_with_metadata(
223        NodeKind::Identity,
224        "System.AccessToken",
225        TrustZone::FirstParty,
226        meta,
227    );
228
229    // Pipeline-level permissions block — when present and non-broad (no write
230    // permissions), downgrade System.AccessToken from broad → constrained so
231    // over_privileged_identity does not fire on already-restricted pipelines.
232    if let Some(ref perms_val) = pipeline.permissions {
233        if !ado_permissions_are_broad(perms_val) {
234            let perms_str = ado_permissions_display(perms_val);
235            graph.nodes[token_id]
236                .metadata
237                .insert(META_IDENTITY_SCOPE.into(), "constrained".into());
238            graph.nodes[token_id]
239                .metadata
240                .insert(META_PERMISSIONS.into(), perms_str);
241        }
242    }
243
244    // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
245    process_pool(&pipeline.pool, &pipeline.workspace, &mut graph);
246
247    // Pipeline-level variable groups and named secrets.
248    // pipeline_plain_vars tracks non-secret named variables so $(VAR) refs
249    // in scripts don't generate false-positive Secret nodes for plain
250    // config values. Stage/job scopes clone and extend this set so plain
251    // variables do not leak sideways into unrelated stages or jobs.
252    // pipeline_has_variable_groups is set when any pipeline-scope group is encountered so
253    // extract_dollar_paren_secrets can avoid creating per-variable Secret
254    // nodes from opaque groups (BUG-3).
255    let mut pipeline_plain_vars: HashSet<String> = HashSet::new();
256    let mut pipeline_has_variable_groups = false;
257    let variable_group_index = maybe_fetch_variable_group_index(ctx, &mut graph);
258    let pipeline_secret_ids = process_variables(
259        &pipeline.variables,
260        &mut graph,
261        &mut secret_ids,
262        "pipeline",
263        &mut pipeline_plain_vars,
264        &mut pipeline_has_variable_groups,
265        variable_group_index.as_ref(),
266    );
267
268    // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
269    if let Some(ref stages) = pipeline.stages {
270        for stage in stages {
271            // Stage-level template reference — delegate and mark Partial
272            if let Some(ref tpl) = stage.template {
273                let stage_name = stage.stage.as_deref().unwrap_or("stage");
274                add_template_delegation(stage_name, tpl, token_id, None, &mut graph);
275                continue;
276            }
277
278            let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
279            let mut stage_plain_vars = pipeline_plain_vars.clone();
280            let mut stage_has_variable_groups = false;
281            let stage_secret_ids = process_variables(
282                &stage.variables,
283                &mut graph,
284                &mut secret_ids,
285                &stage_name,
286                &mut stage_plain_vars,
287                &mut stage_has_variable_groups,
288                variable_group_index.as_ref(),
289            );
290            let stage_scope_has_variable_groups =
291                pipeline_has_variable_groups || stage_has_variable_groups;
292
293            let stage_condition = non_empty_condition(&stage.condition);
294            if let Some(c) = stage_condition {
295                mark_condition_partial(&mut graph, "stage", &stage_name, c);
296            }
297            let stage_depends_on =
298                explicit_depends_on_csv(&stage.depends_on, &mut graph, "stage", &stage_name);
299
300            for job in &stage.jobs {
301                let job_name = job.effective_name();
302                let mut job_plain_vars = stage_plain_vars.clone();
303                let mut job_has_variable_groups = false;
304                let job_secret_ids = process_variables(
305                    &job.variables,
306                    &mut graph,
307                    &mut secret_ids,
308                    &job_name,
309                    &mut job_plain_vars,
310                    &mut job_has_variable_groups,
311                    variable_group_index.as_ref(),
312                );
313                let step_scope_has_variable_groups =
314                    stage_scope_has_variable_groups || job_has_variable_groups;
315
316                let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
317                process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
318
319                let all_secrets: Vec<NodeId> = pipeline_secret_ids
320                    .iter()
321                    .chain(&stage_secret_ids)
322                    .chain(&job_secret_ids)
323                    .copied()
324                    .collect();
325
326                let steps_start = graph.nodes.len();
327
328                let job_condition = non_empty_condition(&job.condition);
329                if let Some(c) = job_condition {
330                    mark_condition_partial(&mut graph, "job", &job_name, c);
331                }
332                // Job's `dependsOn:` overrides any stage-level value when both
333                // are present (job-level wins for the job's own ordering); fall
334                // back to the stage-level value otherwise so the chain still
335                // surfaces on the steps.
336                let job_depends_on =
337                    explicit_depends_on_csv(&job.depends_on, &mut graph, "job", &job_name)
338                        .or_else(|| stage_depends_on.clone());
339
340                let outer_condition = join_conditions(stage_condition, job_condition);
341
342                let job_steps = job.all_steps();
343                process_steps(
344                    &job_steps,
345                    &job_name,
346                    token_id,
347                    &all_secrets,
348                    &job_plain_vars,
349                    step_scope_has_variable_groups,
350                    outer_condition.as_deref(),
351                    job_depends_on.as_deref(),
352                    &mut graph,
353                    &mut secret_ids,
354                );
355
356                if let Some(ref tpl) = job.template {
357                    add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
358                }
359
360                if job.has_environment_binding() {
361                    tag_job_steps_env_approval(&mut graph, steps_start);
362                }
363            }
364        }
365    } else if let Some(ref jobs) = pipeline.jobs {
366        for job in jobs {
367            let job_name = job.effective_name();
368            let mut job_plain_vars = pipeline_plain_vars.clone();
369            let mut job_has_variable_groups = false;
370            let job_secret_ids = process_variables(
371                &job.variables,
372                &mut graph,
373                &mut secret_ids,
374                &job_name,
375                &mut job_plain_vars,
376                &mut job_has_variable_groups,
377                variable_group_index.as_ref(),
378            );
379            let step_scope_has_variable_groups =
380                pipeline_has_variable_groups || job_has_variable_groups;
381
382            let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
383            process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
384
385            let all_secrets: Vec<NodeId> = pipeline_secret_ids
386                .iter()
387                .chain(&job_secret_ids)
388                .copied()
389                .collect();
390
391            let steps_start = graph.nodes.len();
392
393            let job_condition = non_empty_condition(&job.condition);
394            if let Some(c) = job_condition {
395                mark_condition_partial(&mut graph, "job", &job_name, c);
396            }
397            let job_depends_on =
398                explicit_depends_on_csv(&job.depends_on, &mut graph, "job", &job_name);
399
400            let job_steps = job.all_steps();
401            process_steps(
402                &job_steps,
403                &job_name,
404                token_id,
405                &all_secrets,
406                &job_plain_vars,
407                step_scope_has_variable_groups,
408                job_condition,
409                job_depends_on.as_deref(),
410                &mut graph,
411                &mut secret_ids,
412            );
413
414            if let Some(ref tpl) = job.template {
415                add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
416            }
417
418            if job.has_environment_binding() {
419                tag_job_steps_env_approval(&mut graph, steps_start);
420            }
421        }
422    } else if let Some(ref steps) = pipeline.steps {
423        process_steps(
424            steps,
425            "pipeline",
426            token_id,
427            &pipeline_secret_ids,
428            &pipeline_plain_vars,
429            pipeline_has_variable_groups,
430            None,
431            None,
432            &mut graph,
433            &mut secret_ids,
434        );
435    }
436
437    // Cross-platform misclassification trap (red-team R2 #5): a YAML file
438    // shaped like ADO at the top level (stages/jobs/steps present) but whose
439    // body uses constructs the ADO parser doesn't recognise will deserialize
440    // without errors and yield no Step nodes. Marking Partial surfaces the
441    // gap instead of returning completeness=complete on a clean-but-empty
442    // graph (which a CI gate would treat as "passed").
443    let step_count = graph
444        .nodes
445        .iter()
446        .filter(|n| n.kind == NodeKind::Step)
447        .count();
448    let had_step_carrier = pipeline.stages.as_ref().is_some_and(|s| !s.is_empty())
449        || pipeline.jobs.as_ref().is_some_and(|j| !j.is_empty())
450        || pipeline.steps.as_ref().is_some_and(|s| !s.is_empty());
451    if step_count == 0 && had_step_carrier {
452        graph.mark_partial(
453                GapKind::Structural,
454                "stages/jobs/steps parsed but produced 0 step nodes — possible non-ADO YAML wrong-platform-classified".to_string(),
455            );
456    }
457
458    graph.stamp_edge_authority_summaries();
459    graph
460}
461
462fn apply_parser_context_metadata(graph: &mut AuthorityGraph, ctx: Option<&AdoParserContext>) {
463    let Some(ctx) = ctx.filter(|c| !c.is_empty()) else {
464        return;
465    };
466
467    if let Some(org) = ctx.org.as_ref().filter(|v| !v.trim().is_empty()) {
468        graph
469            .metadata
470            .insert(META_ADO_ORG.into(), org.trim().to_string());
471    }
472    if let Some(project) = ctx.project.as_ref().filter(|v| !v.trim().is_empty()) {
473        graph
474            .metadata
475            .insert(META_ADO_PROJECT.into(), project.trim().to_string());
476    }
477
478    let pat_present = ctx.pat.as_ref().is_some_and(|v| !v.trim().is_empty());
479    graph
480        .metadata
481        .insert(META_ADO_PAT_PRESENT.into(), pat_present.to_string());
482
483    let enrichment_ready = graph.metadata.contains_key(META_ADO_ORG)
484        && graph.metadata.contains_key(META_ADO_PROJECT)
485        && pat_present;
486    graph.metadata.insert(
487        META_ADO_VG_ENRICHMENT_READY.into(),
488        enrichment_ready.to_string(),
489    );
490}
491
492fn maybe_fetch_variable_group_index(
493    ctx: Option<&AdoParserContext>,
494    graph: &mut AuthorityGraph,
495) -> Option<AdoVariableGroupIndex> {
496    let ctx = ctx?;
497    if graph
498        .metadata
499        .get(META_ADO_VG_ENRICHMENT_READY)
500        .is_none_or(|v| v != "true")
501    {
502        return None;
503    }
504
505    match fetch_variable_group_index(ctx) {
506        Ok(index) => {
507            graph
508                .metadata
509                .insert(META_ADO_VG_ENRICHED.into(), "true".into());
510            Some(index)
511        }
512        Err(err) => {
513            graph
514                .metadata
515                .insert(META_ADO_VG_ENRICHED.into(), "false".into());
516            graph.mark_partial(
517                GapKind::Structural,
518                format!(
519                    "warning: ADO variable-group enrichment failed ({err}) — falling back to static variable-group modelling"
520                ),
521            );
522            None
523        }
524    }
525}
526
527fn fetch_variable_group_index(ctx: &AdoParserContext) -> Result<AdoVariableGroupIndex, String> {
528    let org = ctx
529        .org
530        .as_deref()
531        .map(str::trim)
532        .filter(|v| !v.is_empty())
533        .ok_or_else(|| "missing org".to_string())?;
534    let project = ctx
535        .project
536        .as_deref()
537        .map(str::trim)
538        .filter(|v| !v.is_empty())
539        .ok_or_else(|| "missing project".to_string())?;
540    let pat = ctx
541        .pat
542        .as_deref()
543        .map(str::trim)
544        .filter(|v| !v.is_empty())
545        .ok_or_else(|| "missing PAT".to_string())?;
546
547    let org_base = if org.starts_with("http://") || org.starts_with("https://") {
548        org.trim_end_matches('/').to_string()
549    } else {
550        format!("https://dev.azure.com/{}", org.trim_matches('/'))
551    };
552    let project_segment = project.replace(' ', "%20");
553    let url = format!(
554        "{org_base}/{project_segment}/_apis/distributedtask/variablegroups?api-version=7.1"
555    );
556    let auth = format!(
557        "Basic {}",
558        base64::engine::general_purpose::STANDARD.encode(format!(":{pat}"))
559    );
560
561    let mut response = ureq::get(&url)
562        .header("Accept", "application/json")
563        .header("Authorization", &auth)
564        .call()
565        .map_err(map_ureq_error)?;
566
567    let body: serde_json::Value = response
568        .body_mut()
569        .read_json()
570        .map_err(|e| format!("invalid JSON response: {e}"))?;
571    parse_variable_group_index_from_json(&body)
572}
573
574fn map_ureq_error(err: ureq::Error) -> String {
575    match err {
576        ureq::Error::StatusCode(code) => format!("HTTP {code} from variablegroups API"),
577        other => other.to_string(),
578    }
579}
580
581fn parse_variable_group_index_from_json(
582    body: &serde_json::Value,
583) -> Result<AdoVariableGroupIndex, String> {
584    let mut index: AdoVariableGroupIndex = HashMap::new();
585    let values = body
586        .get("value")
587        .and_then(|v| v.as_array())
588        .ok_or_else(|| "response missing 'value' array".to_string())?;
589
590    for item in values {
591        let Some(group_name) = item.get("name").and_then(|v| v.as_str()) else {
592            continue;
593        };
594        let mut group_vars: HashMap<String, bool> = HashMap::new();
595        if let Some(vars_obj) = item.get("variables").and_then(|v| v.as_object()) {
596            for (var_name, meta) in vars_obj {
597                let is_secret = meta
598                    .get("isSecret")
599                    .and_then(|v| v.as_bool())
600                    .unwrap_or(false);
601                group_vars.insert(var_name.clone(), is_secret);
602            }
603        }
604        index.insert(group_name.to_string(), group_vars);
605    }
606
607    Ok(index)
608}
609
610/// Returns `Some(trimmed)` when an ADO `condition:` value is present and
611/// carries non-whitespace content. Empty strings and pure-whitespace values
612/// (which ADO treats as "no condition", same as omitting the key) yield
613/// `None` so the parser does not mark a Partial-Expression gap for noise.
614fn non_empty_condition(c: &Option<String>) -> Option<&str> {
615    let s = c.as_deref()?.trim();
616    if s.is_empty() {
617        None
618    } else {
619        Some(s)
620    }
621}
622
623/// Join the optional outer condition chain (already AND-joined for stage and
624/// job) with this scope's condition, producing the final ` AND `-joined chain
625/// to stamp on Step nodes via `META_CONDITION`. Either side may be absent.
626fn join_conditions(outer: Option<&str>, inner: Option<&str>) -> Option<String> {
627    match (outer, inner) {
628        (None, None) => None,
629        (Some(o), None) => Some(o.to_string()),
630        (None, Some(i)) => Some(i.to_string()),
631        (Some(o), Some(i)) => Some(format!("{o} AND {i}")),
632    }
633}
634
635/// Top-level `stages:` and `jobs:` carriers may be supplied as template
636/// expressions (for example `stages: ${{ parameters.stages }}`). The serde
637/// model accepts those shapes so parsing can continue, but they hide the
638/// authority-carrying job/step graph until runtime. Mark them explicitly
639/// Partial instead of returning a clean Complete graph with no steps.
640fn mark_unresolved_top_level_carriers(content: &str, graph: &mut AuthorityGraph) {
641    let mut de = serde_yaml::Deserializer::from_str(content);
642    let Some(doc) = de.next() else {
643        return;
644    };
645    let Ok(value) = serde_yaml::Value::deserialize(doc) else {
646        return;
647    };
648    let Some(map) = value.as_mapping() else {
649        return;
650    };
651
652    for key in ["stages", "jobs"] {
653        let Some(value) = map.get(key) else {
654            continue;
655        };
656        if is_ado_template_expression_scalar(value) {
657            graph.mark_partial(
658                GapKind::Expression,
659                format!(
660                    "ADO top-level `{key}:` uses a template expression — {key} cannot be enumerated statically"
661                ),
662            );
663        }
664    }
665}
666
667fn is_ado_template_expression_scalar(value: &serde_yaml::Value) -> bool {
668    value
669        .as_str()
670        .map(|s| {
671            let trimmed = s.trim();
672            trimmed.starts_with("${{") && trimmed.ends_with("}}")
673        })
674        .unwrap_or(false)
675}
676
677/// Mark the graph Partial with `GapKind::Expression` and a reason that names
678/// the scope kind ("stage" / "job" / "step"), the entity's display name, and
679/// the literal condition text — enough for an operator to grep findings
680/// against `condition:` clauses in the source pipeline.
681fn mark_condition_partial(
682    graph: &mut AuthorityGraph,
683    scope_kind: &str,
684    name: &str,
685    condition: &str,
686) {
687    graph.mark_partial(
688        GapKind::Expression,
689        format!(
690            "ADO {scope_kind} '{name}' condition: '{condition}' — runtime evaluation not modelled"
691        ),
692    );
693}
694
695/// Normalize explicit `dependsOn:` to a comma-joined predecessor list.
696///
697/// ADO accepts string and list-of-strings forms, both of which are statically
698/// representable and returned here. Any other YAML shape is usually a template
699/// expression or conditional object that resolves at runtime; in that case we
700/// return `None` and mark the graph Partial-Expression so completeness is not
701/// overstated.
702fn explicit_depends_on_csv(
703    depends_on: &Option<DependsOn>,
704    graph: &mut AuthorityGraph,
705    scope_kind: &str,
706    name: &str,
707) -> Option<String> {
708    let d = depends_on.as_ref()?;
709    match d {
710        DependsOn::Single(s) => {
711            let trimmed = s.trim();
712            if trimmed.is_empty() {
713                None
714            } else {
715                Some(trimmed.to_string())
716            }
717        }
718        DependsOn::Multiple(v) => {
719            let csv = v
720                .iter()
721                .map(|s| s.trim())
722                .filter(|s| !s.is_empty())
723                .collect::<Vec<_>>()
724                .join(",");
725            if csv.is_empty() {
726                None
727            } else {
728                Some(csv)
729            }
730        }
731        DependsOn::Other(raw) => {
732            mark_depends_on_partial(graph, scope_kind, name, raw);
733            None
734        }
735    }
736}
737
738fn mark_depends_on_partial(
739    graph: &mut AuthorityGraph,
740    scope_kind: &str,
741    name: &str,
742    raw: &serde_yaml::Value,
743) {
744    let shape = match raw {
745        serde_yaml::Value::Null => "null",
746        serde_yaml::Value::Bool(_) => "bool",
747        serde_yaml::Value::Number(_) => "number",
748        serde_yaml::Value::String(_) => "string",
749        serde_yaml::Value::Sequence(_) => "sequence",
750        serde_yaml::Value::Mapping(_) => "mapping",
751        serde_yaml::Value::Tagged(_) => "tagged",
752    };
753    graph.mark_partial(
754        GapKind::Expression,
755        format!(
756            "ADO {scope_kind} '{name}' dependsOn uses unsupported {shape} form — runtime expansion not modelled"
757        ),
758    );
759}
760
761/// Process an ADO `pool:` block. ADO pools come in two shapes:
762///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
763///   - `pool: { name: my-pool }` (named pool — self-hosted)
764///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
765///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
766///
767/// Creates an Image node representing the agent environment. Self-hosted pools
768/// Returns `true` when an ADO pipeline-level `permissions:` value implies a
769/// broad (write-capable) token scope, `false` when every scope is `none` or
770/// `read` (i.e. the token has been explicitly restricted).
771///
772/// ADO permission values are the strings `"read"`, `"write"`, and `"none"`.
773/// Any unrecognised shape is conservatively treated as broad.
774fn ado_permissions_are_broad(perms: &serde_yaml::Value) -> bool {
775    if let Some(map) = perms.as_mapping() {
776        map.values().any(|v| v.as_str() == Some("write"))
777    } else {
778        // Scalar form: ADO accepts "read", "write", "none" as pipeline-level
779        // permission values. "read" and "none" are constrained; "write" is
780        // broad. Anything else (null, tilde, empty, unrecognised string) is
781        // conservatively treated as broad (unknown = risky).
782        matches!(perms.as_str(), Some("write"))
783    }
784}
785
786/// Format an ADO `permissions:` YAML value into a compact human-readable
787/// string for the finding message (e.g. `"contents: none, idToken: none"`).
788fn ado_permissions_display(perms: &serde_yaml::Value) -> String {
789    if let Some(map) = perms.as_mapping() {
790        map.iter()
791            .filter_map(|(k, v)| {
792                let key = k.as_str()?;
793                let val = v.as_str().unwrap_or("?");
794                Some(format!("{key}: {val}"))
795            })
796            .collect::<Vec<_>>()
797            .join(", ")
798    } else {
799        perms.as_str().unwrap_or("none").to_string()
800    }
801}
802
803/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
804///
805/// When `workspace` is provided and contains `clean:` with a truthy value
806/// (`true`, `all`, `outputs`, `resources`), the Image node is also tagged
807/// with META_WORKSPACE_CLEAN.
808fn process_pool(
809    pool: &Option<serde_yaml::Value>,
810    workspace: &Option<serde_yaml::Value>,
811    graph: &mut AuthorityGraph,
812) {
813    let Some(pool_val) = pool else {
814        return;
815    };
816
817    let (image_name, is_self_hosted) = match pool_val {
818        serde_yaml::Value::String(s) => (s.clone(), true),
819        serde_yaml::Value::Mapping(map) => {
820            let name = map.get("name").and_then(|v| v.as_str());
821            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
822            match (name, vm_image) {
823                (_, Some(vm)) => (vm.to_string(), false),
824                (Some(n), None) => (n.to_string(), true),
825                (None, None) => return,
826            }
827        }
828        _ => return,
829    };
830
831    let mut meta = HashMap::new();
832    if is_self_hosted {
833        meta.insert(META_SELF_HOSTED.into(), "true".into());
834    }
835    if has_workspace_clean(workspace) {
836        meta.insert(META_WORKSPACE_CLEAN.into(), "true".into());
837    }
838    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
839}
840
841/// Returns `true` when the ADO `workspace:` value specifies a `clean:` setting
842/// that wipes the workspace between runs. Recognised truthy forms:
843///   - `workspace: { clean: all }`
844///   - `workspace: { clean: outputs }`
845///   - `workspace: { clean: resources }`
846///   - `workspace: { clean: true }`
847fn has_workspace_clean(workspace: &Option<serde_yaml::Value>) -> bool {
848    let Some(ws) = workspace else {
849        return false;
850    };
851    let Some(map) = ws.as_mapping() else {
852        return false;
853    };
854    let Some(clean) = map.get("clean") else {
855        return false;
856    };
857    match clean {
858        serde_yaml::Value::Bool(b) => *b,
859        serde_yaml::Value::String(s) => {
860            let lower = s.to_ascii_lowercase();
861            matches!(lower.as_str(), "all" | "outputs" | "resources" | "true")
862        }
863        _ => false,
864    }
865}
866
867/// Scan the parsed pipeline for `resources.repositories[]` declarations and
868/// determine which aliases are referenced inside the same file. Stores the
869/// result as a JSON-encoded array in `graph.metadata[META_REPOSITORIES]`.
870///
871/// Usage signal — an alias is "used" when it appears in any of:
872///   - `template: <path>@<alias>` (anywhere — top-level extends, stage, job, step)
873///   - `extends:` referencing `template: <path>@<alias>`
874///   - `checkout: <alias>` (steps consume an external repo into the workspace)
875///
876/// The `extends:` and per-step `template:` references are resolved by walking
877/// the parsed Value tree; the raw text is only used for the `checkout:` case
878/// (cheap substring scan, robust to YAML shape variation).
879fn process_repositories(pipeline: &AdoPipeline, raw_content: &str, graph: &mut AuthorityGraph) {
880    let resources = match pipeline.resources.as_ref() {
881        Some(r) if !r.repositories.is_empty() => r,
882        _ => return,
883    };
884
885    // Collect all aliases referenced as `template: x@alias`. We walk every
886    // `template:` field appearing in the parsed pipeline (extends and steps
887    // already deserialize to their own paths; stages/jobs use the per-job
888    // template field). The raw YAML walk via serde_yaml::Value covers all
889    // shapes uniformly without re-deriving structure-specific models.
890    let mut used_aliases: HashSet<String> = HashSet::new();
891
892    if let Some(ref ext) = pipeline.extends {
893        collect_template_alias_refs(ext, &mut used_aliases);
894    }
895    if let Ok(value) = serde_yaml::from_str::<serde_yaml::Value>(raw_content) {
896        collect_template_alias_refs(&value, &mut used_aliases);
897        collect_checkout_alias_refs(&value, &mut used_aliases);
898    }
899
900    // Build the JSON-encoded repository descriptor list.
901    let mut entries: Vec<serde_json::Value> = Vec::with_capacity(resources.repositories.len());
902    for repo in &resources.repositories {
903        let Some(alias) = repo.repository.as_ref().filter(|s| !s.is_empty()) else {
904            continue;
905        };
906        let used = used_aliases.contains(alias);
907        let mut obj = serde_json::Map::new();
908        obj.insert("alias".into(), serde_json::Value::String(alias.clone()));
909        if let Some(ref t) = repo.repo_type {
910            obj.insert("repo_type".into(), serde_json::Value::String(t.clone()));
911        }
912        if let Some(ref n) = repo.name {
913            obj.insert("name".into(), serde_json::Value::String(n.clone()));
914        }
915        if let Some(ref r) = repo.git_ref {
916            obj.insert("ref".into(), serde_json::Value::String(r.clone()));
917        }
918        obj.insert("used".into(), serde_json::Value::Bool(used));
919        entries.push(serde_json::Value::Object(obj));
920    }
921
922    if let Ok(json) = serde_json::to_string(&serde_json::Value::Array(entries)) {
923        graph.metadata.insert(META_REPOSITORIES.into(), json);
924    }
925}
926
927/// Walk a YAML value and record every `template: <ref>@<alias>` alias seen.
928/// Recurses into mappings and sequences so it catches references in extends,
929/// stages, jobs, steps, and conditional blocks indiscriminately.
930fn collect_template_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
931    match value {
932        serde_yaml::Value::Mapping(map) => {
933            for (k, v) in map {
934                if k.as_str() == Some("template") {
935                    if let Some(s) = v.as_str() {
936                        if let Some(alias) = parse_template_alias(s) {
937                            sink.insert(alias);
938                        }
939                    }
940                }
941                collect_template_alias_refs(v, sink);
942            }
943        }
944        serde_yaml::Value::Sequence(seq) => {
945            for v in seq {
946                collect_template_alias_refs(v, sink);
947            }
948        }
949        _ => {}
950    }
951}
952
953/// Walk a YAML value and record every `checkout: <alias>` value seen, except
954/// `self` and `none` which are platform keywords (not external repo aliases).
955fn collect_checkout_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
956    match value {
957        serde_yaml::Value::Mapping(map) => {
958            for (k, v) in map {
959                if k.as_str() == Some("checkout") {
960                    if let Some(s) = v.as_str() {
961                        if s != "self" && s != "none" && !s.is_empty() {
962                            sink.insert(s.to_string());
963                        }
964                    }
965                }
966                collect_checkout_alias_refs(v, sink);
967            }
968        }
969        serde_yaml::Value::Sequence(seq) => {
970            for v in seq {
971                collect_checkout_alias_refs(v, sink);
972            }
973        }
974        _ => {}
975    }
976}
977
978/// Extract `<alias>` from a `template: <path>@<alias>` reference. Returns
979/// None for plain in-repo paths (`templates/deploy.yml`) which target the
980/// current pipeline's repo, not an external `resources.repositories[]` entry.
981fn parse_template_alias(template_ref: &str) -> Option<String> {
982    let at = template_ref.rfind('@')?;
983    let alias = &template_ref[at + 1..];
984    if alias.is_empty() {
985        None
986    } else {
987        Some(alias.to_string())
988    }
989}
990
991/// Tag every Step node added since `start_idx` with META_ENV_APPROVAL.
992/// Used after `process_steps` for a job whose `environment:` is configured —
993/// the environment binding indicates the job sits behind a manual approval
994/// gate, which is an isolation boundary that breaks automatic propagation.
995fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
996    for node in graph.nodes.iter_mut().skip(start_idx) {
997        if node.kind == NodeKind::Step {
998            node.metadata
999                .insert(META_ENV_APPROVAL.into(), "true".into());
1000        }
1001    }
1002}
1003
1004/// Process a variable list, creating Secret nodes and returning their IDs.
1005/// Returns IDs for secrets only (not variable groups, which are opaque).
1006/// Populates `plain_vars` with the names of non-secret named variables so
1007/// downstream `$(VAR)` scanning can skip them.
1008fn process_variables(
1009    variables: &Option<AdoVariables>,
1010    graph: &mut AuthorityGraph,
1011    cache: &mut HashMap<String, NodeId>,
1012    scope: &str,
1013    plain_vars: &mut HashSet<String>,
1014    has_variable_groups: &mut bool,
1015    variable_group_index: Option<&AdoVariableGroupIndex>,
1016) -> Vec<NodeId> {
1017    let mut ids = Vec::new();
1018
1019    let vars = match variables.as_ref() {
1020        Some(v) => v,
1021        None => return ids,
1022    };
1023
1024    for var in &vars.0 {
1025        match var {
1026            AdoVariable::Group { group } => {
1027                // Skip template-expression group names like `${{ parameters.env }}`.
1028                // We can't resolve them statically — mark Partial but don't create
1029                // a misleading Secret node with the expression as its name.
1030                if group.contains("${{") {
1031                    graph.mark_partial(
1032                        GapKind::Expression,
1033                        format!(
1034                            "variable group in {scope} uses template expression — group name unresolvable at parse time"
1035                        ),
1036                    );
1037                    continue;
1038                }
1039
1040                if let Some(group_vars) = variable_group_index.and_then(|idx| idx.get(group)) {
1041                    for (var_name, is_secret) in group_vars {
1042                        if *is_secret {
1043                            let id = find_or_create_secret(graph, cache, var_name);
1044                            ids.push(id);
1045                        } else {
1046                            plain_vars.insert(var_name.clone());
1047                        }
1048                    }
1049                    continue;
1050                }
1051
1052                *has_variable_groups = true;
1053                let mut meta = HashMap::new();
1054                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
1055                let id = graph.add_node_with_metadata(
1056                    NodeKind::Secret,
1057                    group.as_str(),
1058                    TrustZone::FirstParty,
1059                    meta,
1060                );
1061                cache.insert(group.clone(), id);
1062                ids.push(id);
1063                graph.mark_partial(
1064                    GapKind::Structural,
1065                    format!(
1066                        "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
1067                    ),
1068                );
1069            }
1070            AdoVariable::Named {
1071                name, is_secret, ..
1072            } => {
1073                if *is_secret {
1074                    let id = find_or_create_secret(graph, cache, name);
1075                    ids.push(id);
1076                } else {
1077                    plain_vars.insert(name.clone());
1078                }
1079            }
1080        }
1081    }
1082
1083    ids
1084}
1085
1086/// Process a list of ADO steps, adding nodes and edges to the graph.
1087///
1088/// `outer_condition` is the AND-joined chain of stage- and job-level
1089/// `condition:` expressions that gate this step's containing job at runtime.
1090/// When present, it (combined with any per-step `condition:`) is stamped onto
1091/// every emitted Step node via `META_CONDITION` so downstream rules can see
1092/// that the step is conditionally reachable.
1093///
1094/// `outer_depends_on` is the comma-joined `dependsOn:` predecessor list
1095/// inherited from the job (or stage). Stamped onto Step nodes via
1096/// `META_DEPENDS_ON` only when non-default (the parser does not synthesise
1097/// the implicit "depends on previous job/stage" link).
1098#[allow(clippy::too_many_arguments)]
1099fn process_steps(
1100    steps: &[AdoStep],
1101    job_name: &str,
1102    token_id: NodeId,
1103    inherited_secrets: &[NodeId],
1104    plain_vars: &HashSet<String>,
1105    has_variable_groups: bool,
1106    outer_condition: Option<&str>,
1107    outer_depends_on: Option<&str>,
1108    graph: &mut AuthorityGraph,
1109    cache: &mut HashMap<String, NodeId>,
1110) {
1111    for (idx, step) in steps.iter().enumerate() {
1112        // Template step — delegation, mark partial
1113        if let Some(ref tpl) = step.template {
1114            let step_name = step
1115                .display_name
1116                .as_deref()
1117                .or(step.name.as_deref())
1118                .map(|s| s.to_string())
1119                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
1120            add_template_delegation(&step_name, tpl, token_id, Some(job_name), graph);
1121            continue;
1122        }
1123
1124        // Determine step kind and trust zone
1125        let (step_name, trust_zone, inline_script) = classify_step(step, job_name, idx);
1126
1127        // Step-level condition: mark Partial-Expression and join with the
1128        // outer (stage + job) chain so the step's META_CONDITION reflects the
1129        // full ` AND `-joined gate it actually sits behind at runtime.
1130        let step_condition = non_empty_condition(&step.condition);
1131        if let Some(c) = step_condition {
1132            mark_condition_partial(graph, "step", &step_name, c);
1133        }
1134        let effective_condition = join_conditions(outer_condition, step_condition);
1135
1136        // Step-level `dependsOn:` overrides the inherited (job-level) value
1137        // when present. Default behaviour (no key) inherits from the job —
1138        // and at the job level we already only stamped non-default values,
1139        // so absence at both layers means we stamp nothing.
1140        let effective_depends_on =
1141            explicit_depends_on_csv(&step.depends_on, graph, "step", &step_name)
1142                .or_else(|| outer_depends_on.map(|s| s.to_string()));
1143
1144        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
1145
1146        // Stamp parent job name so consumers (e.g. `taudit map --job`) can
1147        // attribute steps back to their containing job.
1148        if let Some(node) = graph.nodes.get_mut(step_id) {
1149            node.metadata.insert(META_JOB_NAME.into(), job_name.into());
1150            // Stamp the raw inline script body so script-aware rules
1151            // (env-export of secrets, secret materialisation to files,
1152            // Key Vault → plaintext) can pattern-match on the actual
1153            // command text the agent will run.
1154            if let Some(ref body) = inline_script {
1155                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
1156            }
1157            // Stamp the AND-joined chain of stage/job/step `condition:`
1158            // expressions that gate this step at runtime. Consumed by
1159            // `apply_compensating_controls` to downgrade severity on
1160            // findings whose firing step is gated behind a conditional.
1161            if let Some(ref c) = effective_condition {
1162                node.metadata.insert(META_CONDITION.into(), c.clone());
1163            }
1164            // Stamp the comma-joined non-default `dependsOn:` predecessor
1165            // list. No consumer rule yet — parser-side hook for future
1166            // cross-job taint analysis.
1167            if let Some(ref d) = effective_depends_on {
1168                if !d.is_empty() {
1169                    node.metadata.insert(META_DEPENDS_ON.into(), d.clone());
1170                }
1171            }
1172        }
1173
1174        // Every step has access to System.AccessToken
1175        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
1176
1177        // checkout step with persistCredentials: true writes the token to .git/config on disk,
1178        // making it accessible to all subsequent steps and filesystem-level attackers.
1179        if step.checkout.is_some() && step.persist_credentials == Some(true) {
1180            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
1181        }
1182
1183        // `checkout: self` pulls the repo being built. In a PR trigger context this
1184        // is the untrusted fork head — tag the step so downstream rules can gate on
1185        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
1186        if let Some(ref ck) = step.checkout {
1187            if ck == "self" {
1188                if let Some(node) = graph.nodes.get_mut(step_id) {
1189                    node.metadata
1190                        .insert(META_CHECKOUT_SELF.into(), "true".into());
1191                }
1192            }
1193        }
1194
1195        // Inherited pipeline/stage/job secrets
1196        for &secret_id in inherited_secrets {
1197            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
1198        }
1199
1200        // Service connection detection from task inputs (case-insensitive key match)
1201        if let Some(ref inputs) = step.inputs {
1202            let service_conn_keys = [
1203                "azuresubscription",
1204                "connectedservicename",
1205                "connectedservicenamearm",
1206                "kubernetesserviceconnection",
1207                "environmentservicename",
1208                "backendservicearm",
1209            ];
1210            // determinism: sort by key — same YAML must produce same NodeId order
1211            let mut input_entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1212            input_entries.sort_by(|a, b| a.0.cmp(b.0));
1213            for (raw_key, val) in input_entries {
1214                let lower = raw_key.to_lowercase();
1215                if !service_conn_keys.contains(&lower.as_str()) {
1216                    continue;
1217                }
1218                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
1219                if !conn_name.starts_with("$(") {
1220                    // Stamp the connection name onto the step itself so rules
1221                    // that need the name (e.g. terraform_auto_approve_in_prod)
1222                    // don't have to traverse edges.
1223                    if let Some(node) = graph.nodes.get_mut(step_id) {
1224                        node.metadata
1225                            .insert(META_SERVICE_CONNECTION_NAME.into(), conn_name.to_string());
1226                    }
1227
1228                    let mut meta = HashMap::new();
1229                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
1230                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
1231                    // ADO pipeline YAML does not embed the authentication scheme
1232                    // of the service endpoint (WorkloadIdentityFederation vs.
1233                    // ServicePrincipal), so we cannot reliably determine whether a
1234                    // connection uses OIDC.  Leave META_OIDC unset -- the safe
1235                    // default -- so that rules like service_connection_scope_mismatch
1236                    // can fire on classic SPN connections.
1237                    let conn_id = graph.add_node_with_metadata(
1238                        NodeKind::Identity,
1239                        conn_name,
1240                        TrustZone::FirstParty,
1241                        meta,
1242                    );
1243                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
1244                }
1245            }
1246
1247            // addSpnToEnvironment: true exposes federated SPN material
1248            // (idToken, servicePrincipalKey, servicePrincipalId, tenantId)
1249            // to the step's inline script via env vars. Stamp the step so
1250            // addspn_with_inline_script can pattern-match without traversal.
1251            if let Some(val) = input_value(inputs, "addSpnToEnvironment") {
1252                let truthy = match val {
1253                    serde_yaml::Value::Bool(b) => *b,
1254                    serde_yaml::Value::String(s) => s.eq_ignore_ascii_case("true"),
1255                    _ => false,
1256                };
1257                if truthy {
1258                    if let Some(node) = graph.nodes.get_mut(step_id) {
1259                        node.metadata
1260                            .insert(META_ADD_SPN_TO_ENV.into(), "true".into());
1261                    }
1262                }
1263            }
1264
1265            // TerraformCLI@N / TerraformTaskV1..V4 with command: apply +
1266            // commandOptions containing auto-approve = same as inline
1267            // `terraform apply --auto-approve`. Detect once here so the rule
1268            // can read a single META_TERRAFORM_AUTO_APPROVE marker.
1269            let task_lower = step
1270                .task
1271                .as_deref()
1272                .map(|t| t.to_lowercase())
1273                .unwrap_or_default();
1274            let is_terraform_task = task_lower.starts_with("terraformcli@")
1275                || task_lower.starts_with("terraformtask@")
1276                || task_lower.starts_with("terraformtaskv");
1277            if is_terraform_task {
1278                let cmd_lower = input_str(inputs, "command")
1279                    .map(|s| s.to_lowercase())
1280                    .unwrap_or_default();
1281                let opts = input_str(inputs, "commandOptions").unwrap_or("");
1282                if cmd_lower == "apply" && opts.contains("auto-approve") {
1283                    if let Some(node) = graph.nodes.get_mut(step_id) {
1284                        node.metadata
1285                            .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
1286                    }
1287                }
1288            }
1289
1290            // Detect $(varName) references in task input values
1291            // determinism: sort by key — same YAML must produce same NodeId order
1292            let mut paren_entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1293            paren_entries.sort_by(|a, b| a.0.cmp(b.0));
1294            for (_k, val) in paren_entries {
1295                if let Some(s) = yaml_value_as_str(val) {
1296                    extract_dollar_paren_secrets(
1297                        s,
1298                        step_id,
1299                        plain_vars,
1300                        has_variable_groups,
1301                        graph,
1302                        cache,
1303                    );
1304                }
1305            }
1306        }
1307
1308        // Inline-script detection of `terraform apply --auto-approve`.
1309        // Done after inputs processing so we can OR the two signals into a
1310        // single META_TERRAFORM_AUTO_APPROVE marker on the step.
1311        if let Some(ref body) = inline_script {
1312            if script_does_terraform_auto_apply(body) {
1313                if let Some(node) = graph.nodes.get_mut(step_id) {
1314                    node.metadata
1315                        .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
1316                }
1317            }
1318        }
1319
1320        // Detect $(varName) in step env values
1321        if let Some(ref env) = step.env {
1322            // determinism: sort by key — same YAML must produce same NodeId order
1323            let mut env_entries: Vec<(&String, &serde_yaml::Value)> = env.iter().collect();
1324            env_entries.sort_by(|a, b| a.0.cmp(b.0));
1325            for (_k, val) in env_entries {
1326                if let Some(s) = yaml_scalar_to_string(val) {
1327                    extract_dollar_paren_secrets(
1328                        &s,
1329                        step_id,
1330                        plain_vars,
1331                        has_variable_groups,
1332                        graph,
1333                        cache,
1334                    );
1335                }
1336            }
1337        }
1338
1339        // Detect $(varName) in inline script text
1340        if let Some(ref script) = inline_script {
1341            extract_dollar_paren_secrets(
1342                script,
1343                step_id,
1344                plain_vars,
1345                has_variable_groups,
1346                graph,
1347                cache,
1348            );
1349        }
1350
1351        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines.
1352        // META_WRITES_ENV_GATE marks the step as writing to the env gate (always).
1353        // META_ENV_GATE_WRITES_SECRET_VALUE marks when the written value contains a
1354        // $(secretRef) expression — i.e., a secret is being propagated (BUG-4: plain
1355        // integer writes like `##vso[task.setvariable variable=Count]3` should not
1356        // fire as secret-exfiltration findings).
1357        if let Some(ref script) = inline_script {
1358            let lower = script.to_lowercase();
1359            if lower.contains("##vso[task.setvariable") {
1360                if let Some(node) = graph.nodes.get_mut(step_id) {
1361                    node.metadata
1362                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
1363                    node.metadata
1364                        .insert(META_SETVARIABLE_ADO.into(), "true".into());
1365                    if setvariable_value_contains_secret_ref(script) {
1366                        node.metadata
1367                            .insert(META_ENV_GATE_WRITES_SECRET_VALUE.into(), "true".into());
1368                    }
1369                }
1370            }
1371        }
1372    }
1373}
1374
1375/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
1376///
1377/// `inline_script_text` is populated whenever the step has script content —
1378/// either as a top-level `script:`/`bash:`/`powershell:`/`pwsh:` key, or as a
1379/// task input (`Bash@3.inputs.script`, `PowerShell@2.inputs.script`,
1380/// `AzureCLI@2.inputs.inlineScript`, `AzurePowerShell@5.inputs.Inline`, …).
1381/// Task-input keys are matched case-insensitively because the ADO YAML schema
1382/// is itself case-insensitive on input names.
1383fn classify_step(
1384    step: &AdoStep,
1385    job_name: &str,
1386    idx: usize,
1387) -> (String, TrustZone, Option<String>) {
1388    let default_name = || format!("{job_name}[{idx}]");
1389
1390    let name = step
1391        .display_name
1392        .as_deref()
1393        .or(step.name.as_deref())
1394        .map(|s| s.to_string())
1395        .unwrap_or_else(default_name);
1396
1397    if step.task.is_some() {
1398        // Task step — script body may live in inputs.{script,inlineScript,Inline}.
1399        let inline = extract_task_inline_script(step.inputs.as_ref());
1400        (name, TrustZone::Untrusted, inline)
1401    } else if let Some(ref s) = step.script {
1402        (name, TrustZone::FirstParty, Some(s.clone()))
1403    } else if let Some(ref s) = step.bash {
1404        (name, TrustZone::FirstParty, Some(s.clone()))
1405    } else if let Some(ref s) = step.powershell {
1406        (name, TrustZone::FirstParty, Some(s.clone()))
1407    } else if let Some(ref s) = step.pwsh {
1408        (name, TrustZone::FirstParty, Some(s.clone()))
1409    } else {
1410        (name, TrustZone::FirstParty, None)
1411    }
1412}
1413
1414/// Pull an inline script body out of a task step's `inputs:` mapping.
1415/// Recognises the three common conventions:
1416///   - `inputs.script` (Bash@3, PowerShell@2 — when targetType: inline)
1417///   - `inputs.inlineScript` (AzureCLI@2)
1418///   - `inputs.Inline` (AzurePowerShell@5 — note the capital I)
1419///
1420/// Match is case-insensitive so a hand-written pipeline using `Script:` or
1421/// `INLINESCRIPT:` is still picked up.
1422fn extract_task_inline_script(
1423    inputs: Option<&HashMap<String, serde_yaml::Value>>,
1424) -> Option<String> {
1425    let inputs = inputs?;
1426    const KEYS: &[&str] = &["script", "inlinescript", "inline"];
1427    // determinism: sort by key — same YAML must produce same NodeId order
1428    // (first-match semantics: ensure the same key wins across runs when more
1429    // than one of `script`/`inlineScript`/`Inline` is present in the same task)
1430    let mut entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1431    entries.sort_by(|a, b| a.0.cmp(b.0));
1432    for (raw_key, val) in entries {
1433        let lower = raw_key.to_lowercase();
1434        if KEYS.contains(&lower.as_str()) {
1435            if let Some(s) = val.as_str() {
1436                if !s.is_empty() {
1437                    return Some(s.to_string());
1438                }
1439            }
1440        }
1441    }
1442    None
1443}
1444
1445fn input_value<'a>(
1446    inputs: &'a HashMap<String, serde_yaml::Value>,
1447    wanted: &str,
1448) -> Option<&'a serde_yaml::Value> {
1449    let mut entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
1450    entries.sort_by(|a, b| a.0.cmp(b.0));
1451    entries
1452        .into_iter()
1453        .find(|(key, _)| key.eq_ignore_ascii_case(wanted))
1454        .map(|(_, value)| value)
1455}
1456
1457fn input_str<'a>(inputs: &'a HashMap<String, serde_yaml::Value>, wanted: &str) -> Option<&'a str> {
1458    input_value(inputs, wanted).and_then(yaml_value_as_str)
1459}
1460
1461/// Add a DelegatesTo edge from a synthetic step node to a template image node.
1462///
1463/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
1464/// pull code from an external repository and are Untrusted. Plain relative paths like
1465/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
1466/// treats `./local-action`.
1467///
1468/// `job_name` is `Some` when the delegation is created inside a job's scope
1469/// (job-level template, or template step inside `process_steps`); it is `None`
1470/// for stage-level template delegations that don't belong to a specific job.
1471fn add_template_delegation(
1472    step_name: &str,
1473    template_path: &str,
1474    token_id: NodeId,
1475    job_name: Option<&str>,
1476    graph: &mut AuthorityGraph,
1477) {
1478    let tpl_trust_zone = if template_path.contains('@') {
1479        TrustZone::Untrusted
1480    } else {
1481        TrustZone::FirstParty
1482    };
1483    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
1484    if let Some(jn) = job_name {
1485        if let Some(node) = graph.nodes.get_mut(step_id) {
1486            node.metadata.insert(META_JOB_NAME.into(), jn.into());
1487        }
1488    }
1489    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
1490    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
1491    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
1492    graph.mark_partial(
1493        GapKind::Structural,
1494        format!(
1495            "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
1496        ),
1497    );
1498}
1499
1500/// Returns true if a `##vso[task.setvariable ...]VALUE` call's VALUE contains
1501/// an ADO `$(secretRef)` expression — i.e., the step is writing a secret-derived
1502/// value into the environment gate (BUG-4: plain integers and PowerShell vars
1503/// like `$psVar` should not fire the secret-exfiltration rule).
1504///
1505/// `$$(VAR)` is the documented ADO escape (literal output, not substitution)
1506/// and is intentionally NOT treated as a secret reference.
1507fn setvariable_value_contains_secret_ref(script: &str) -> bool {
1508    for line in script.lines() {
1509        let lower = line.to_lowercase();
1510        if !lower.contains("##vso[task.setvariable") {
1511            continue;
1512        }
1513        // The value starts after the closing `]` of the ##vso directive.
1514        if let Some(close_bracket) = line.find(']') {
1515            let value_part = &line[close_bracket + 1..];
1516            if contains_unescaped_dollar_paren(value_part) {
1517                return true;
1518            }
1519        }
1520    }
1521    false
1522}
1523
1524/// True iff `s` contains a `$(` substitution that is NOT preceded by another
1525/// `$` (the `$$(VAR)` escape form is rejected). Used by both the setvariable
1526/// secret-ref detector and any future caller that needs the same semantics
1527/// without going through the full Secret-node creation path.
1528fn contains_unescaped_dollar_paren(s: &str) -> bool {
1529    let bytes = s.as_bytes();
1530    let mut i = 0;
1531    while i + 1 < bytes.len() {
1532        if bytes[i] == b'$' && bytes[i + 1] == b'(' {
1533            if i > 0 && bytes[i - 1] == b'$' {
1534                // Escaped — skip to end of the (...) group and continue.
1535                let after_open = i + 2;
1536                if let Some(end_offset) = s[after_open..].find(')') {
1537                    i = after_open + end_offset + 1;
1538                    continue;
1539                }
1540                i += 2;
1541                continue;
1542            }
1543            return true;
1544        }
1545        i += 1;
1546    }
1547    false
1548}
1549
1550/// Extract `$(varName)` references from a string, creating Secret nodes for
1551/// non-predefined and non-plain ADO variables.
1552/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
1553/// is treated as a variable reference. This rejects PowerShell sub-expressions
1554/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
1555/// and anything with spaces or special characters.
1556///
1557/// `$$(VAR)` is the documented ADO escape — it renders as a literal `$(VAR)`
1558/// in output and is **not** a substitution. We skip these without creating a
1559/// Secret node so that documentation strings like `echo "use $$(BUILD_BUILDID)"`
1560/// don't manufacture phantom HasAccessTo edges (BUG-4).
1561fn extract_dollar_paren_secrets(
1562    text: &str,
1563    step_id: NodeId,
1564    plain_vars: &HashSet<String>,
1565    has_variable_groups: bool,
1566    graph: &mut AuthorityGraph,
1567    cache: &mut HashMap<String, NodeId>,
1568) {
1569    let mut pos = 0;
1570    let bytes = text.as_bytes();
1571    while pos < bytes.len() {
1572        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
1573            // Honour the `$$(VAR)` escape — second `$` makes the whole token a
1574            // literal in ADO's output, not a substitution. Skip past the
1575            // closing `)` without creating a Secret node.
1576            if pos > 0 && bytes[pos - 1] == b'$' {
1577                let start = pos + 2;
1578                if let Some(end_offset) = text[start..].find(')') {
1579                    pos = start + end_offset + 1;
1580                    continue;
1581                }
1582                pos += 1;
1583                continue;
1584            }
1585            let start = pos + 2;
1586            if let Some(end_offset) = text[start..].find(')') {
1587                let var_name = &text[start..start + end_offset];
1588                // BUG-3: when variable groups are present in this scope (or an
1589                // ancestor scope) the group is opaque — any $(VAR) could be a
1590                // plain config value from the group. Only create a Secret node
1591                // if the var was explicitly declared as a secret (is already
1592                // in cache) or there are no groups *along the inheritance chain*.
1593                let already_declared_secret = cache.contains_key(var_name);
1594                if is_valid_ado_identifier(var_name)
1595                    && !is_predefined_ado_var(var_name)
1596                    && !plain_vars.contains(var_name)
1597                    && (!has_variable_groups || already_declared_secret)
1598                {
1599                    let id = find_or_create_secret(graph, cache, var_name);
1600                    // Mark secrets embedded in -var flag arguments: their values appear in
1601                    // pipeline logs (command string is logged before masking, and Terraform
1602                    // itself logs -var values in plan output and debug traces).
1603                    if is_in_terraform_var_flag(text, pos) {
1604                        if let Some(node) = graph.nodes.get_mut(id) {
1605                            node.metadata
1606                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
1607                        }
1608                    }
1609                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
1610                }
1611                pos = start + end_offset + 1;
1612                continue;
1613            }
1614        }
1615        pos += 1;
1616    }
1617}
1618
1619/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag
1620/// argument. Two requirements (BUG-3 — the previous heuristic just checked
1621/// `line_before.contains("-var") && line_before.contains('=')`, which matched
1622/// `--var-file=`, `extra-vars=`, `-vargs=`, anything-with-`-var`-and-`=`):
1623///
1624/// 1. The case-insensitive token `terraform` must appear earlier on the same
1625///    line, OR on a prior line that is connected to the current line via a
1626///    shell continuation chain (trailing `\` for POSIX, trailing `` ` `` for
1627///    PowerShell). This admits `terraform.exe`, `tfwrapper terraform`,
1628///    `aws-vault exec ... terraform`, and the common heredoc shape:
1629///    `terraform apply \`
1630///    `  -var "db=$(secret)"`
1631///
1632/// 2. Immediately before the `$(VAR)` substitution position there must be a
1633///    `-var ` (with a trailing space) or `-var=` literal. This rejects
1634///    `-var-file=`, `--var-file=`, `extra-vars=`, `-vargs=`, etc., where the
1635///    character following the literal `-var` is not space or `=`.
1636fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
1637    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
1638    let line_before = &text[line_start..var_pos];
1639
1640    // (2) `-var ` (space) or `-var=` immediately within line_before.
1641    let has_var_flag = line_before.contains("-var ") || line_before.contains("-var=");
1642    if !has_var_flag {
1643        return false;
1644    }
1645
1646    // (1) `terraform` appears earlier on the same line — fast path.
1647    let lower_line = line_before.to_lowercase();
1648    if lower_line.contains("terraform") {
1649        return true;
1650    }
1651
1652    // (1, fallback) Walk backwards through continuation chain. The previous
1653    // line must end in a continuation character for it to extend onto our
1654    // line; once we hit a non-continuing line we stop.
1655    let mut cursor_end = line_start; // exclusive of '\n' separator
1656    while cursor_end > 0 {
1657        // The byte at cursor_end-1 is `\n`; the prior line spans from the
1658        // previous `\n` (exclusive) to cursor_end-1.
1659        let nl_idx = cursor_end.saturating_sub(1);
1660        let prev_line_start = text[..nl_idx].rfind('\n').map(|p| p + 1).unwrap_or(0);
1661        let prev_line = &text[prev_line_start..nl_idx];
1662        let trimmed = prev_line.trim_end();
1663        let continues = trimmed.ends_with('\\') || trimmed.ends_with('`');
1664        if !continues {
1665            return false;
1666        }
1667        if prev_line.to_lowercase().contains("terraform") {
1668            return true;
1669        }
1670        cursor_end = prev_line_start;
1671    }
1672    false
1673}
1674
1675/// Returns true if `name` is a valid ADO variable identifier.
1676/// ADO variable names start with a letter and contain only letters, digits,
1677/// and underscores. Anything else — PowerShell vars (`$name`), template
1678/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
1679/// (`name -join ','`) — is rejected.
1680fn is_valid_ado_identifier(name: &str) -> bool {
1681    let mut chars = name.chars();
1682    match chars.next() {
1683        Some(first) if first.is_ascii_alphabetic() => {
1684            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
1685        }
1686        _ => false,
1687    }
1688}
1689
1690/// Returns true if a variable name is a well-known ADO predefined variable.
1691/// These are system-provided and never represent secrets.
1692fn is_predefined_ado_var(name: &str) -> bool {
1693    let prefixes = [
1694        "Build.",
1695        "Agent.",
1696        "System.",
1697        "Pipeline.",
1698        "Release.",
1699        "Environment.",
1700        "Strategy.",
1701        "Deployment.",
1702        "Resources.",
1703        "TF_BUILD",
1704    ];
1705    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
1706}
1707
1708fn find_or_create_secret(
1709    graph: &mut AuthorityGraph,
1710    cache: &mut HashMap<String, NodeId>,
1711    name: &str,
1712) -> NodeId {
1713    if let Some(&id) = cache.get(name) {
1714        return id;
1715    }
1716    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1717    cache.insert(name.to_string(), id);
1718    id
1719}
1720
1721fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
1722    val.as_str()
1723}
1724
1725fn yaml_scalar_to_string(value: &serde_yaml::Value) -> Option<String> {
1726    match value {
1727        serde_yaml::Value::String(s) => Some(s.clone()),
1728        serde_yaml::Value::Bool(b) => Some(b.to_string()),
1729        serde_yaml::Value::Number(n) => Some(n.to_string()),
1730        serde_yaml::Value::Null => Some(String::new()),
1731        _ => None,
1732    }
1733}
1734
1735// ── Serde models for ADO YAML ─────────────────────────────
1736
1737/// Top-level ADO pipeline definition.
1738/// ADO pipelines come in three shapes:
1739///   (a) stages → jobs → steps
1740///   (b) jobs → steps (no stages key)
1741///   (c) steps only (no stages or jobs key)
1742#[derive(Debug, Deserialize)]
1743pub struct AdoPipeline {
1744    #[serde(default)]
1745    pub trigger: Option<serde_yaml::Value>,
1746    #[serde(default)]
1747    pub pr: Option<serde_yaml::Value>,
1748    #[serde(default)]
1749    pub variables: Option<AdoVariables>,
1750    /// `stages:` is normally a sequence of stage objects, but real-world
1751    /// pipelines also use `stages: ${{ parameters.stages }}` (a template
1752    /// expression that resolves at runtime to a list). The custom
1753    /// deserializer accepts both shapes; non-sequence shapes resolve to
1754    /// `None` and the graph is marked Partial downstream.
1755    #[serde(default, deserialize_with = "deserialize_optional_stages")]
1756    pub stages: Option<Vec<AdoStage>>,
1757    #[serde(default, deserialize_with = "deserialize_optional_jobs")]
1758    pub jobs: Option<Vec<AdoJob>>,
1759    #[serde(default)]
1760    pub steps: Option<Vec<AdoStep>>,
1761    #[serde(default)]
1762    pub pool: Option<serde_yaml::Value>,
1763    /// Pipeline-level `workspace:` block. The only security-relevant field is
1764    /// `clean:` (`outputs`, `resources`, `all`, or `true`), which causes the
1765    /// agent to wipe the workspace between runs. Used to tag self-hosted Image
1766    /// nodes with `META_WORKSPACE_CLEAN`.
1767    #[serde(default)]
1768    pub workspace: Option<serde_yaml::Value>,
1769    /// `resources:` block — repository declarations, container declarations,
1770    /// pipeline declarations. We only consume `repositories[]` today.
1771    /// Pre-2019 ADO accepts a sequence form (`resources: [- repo: self]`)
1772    /// which has no `repositories:` key — the custom deserializer accepts
1773    /// both shapes and treats the sequence form as an empty resources block.
1774    #[serde(default, deserialize_with = "deserialize_optional_resources")]
1775    pub resources: Option<AdoResources>,
1776    /// Top-level `extends:` directive — `extends: { template: x@alias, ... }`.
1777    /// Captured raw so we can scan for `template: x@alias` references that
1778    /// consume a `resources.repositories[]` entry.
1779    #[serde(default)]
1780    pub extends: Option<serde_yaml::Value>,
1781    /// Top-level `parameters:` declarations. Each entry has at minimum a
1782    /// `name`; `type` defaults to `string` when omitted. `values:` is an
1783    /// optional allowlist that constrains caller input.
1784    /// ADO accepts two shapes: the typed sequence form
1785    /// (`- name: foo \n type: string \n default: bar`) and the legacy
1786    /// untyped map form (`parameters: { foo: bar, baz: '' }`) used in
1787    /// older template fragments. The custom deserializer normalizes both.
1788    #[serde(default, deserialize_with = "deserialize_optional_parameters")]
1789    pub parameters: Option<Vec<AdoParameter>>,
1790    /// Pipeline-level `permissions:` block. Controls the scope of
1791    /// `System.AccessToken` for all jobs in the pipeline unless overridden
1792    /// at the job level. Parsed to detect explicit scope restriction (e.g.
1793    /// `contents: none`) so `over_privileged_identity` doesn't fire on
1794    /// pipelines that have already locked down their token.
1795    #[serde(default)]
1796    pub permissions: Option<serde_yaml::Value>,
1797}
1798
1799/// Accept either a sequence of `AdoParameter` (modern typed form) or a
1800/// mapping of parameter name → default value (legacy untyped form used in
1801/// many template fragments). For the map form, each key becomes an
1802/// `AdoParameter` with the key as `name` and no type/values. Returns `None`
1803/// for any other shape (e.g. a bare template expression).
1804///
1805/// Implemented as a serde Visitor (rather than going through
1806/// `serde_yaml::Value`) so that downstream struct deserialization uses
1807/// serde's native lazy iteration — this avoids serde_yaml's strict
1808/// duplicate-key detection on `${{ else }}`-style template-conditional
1809/// keys that appear in stage/job `parameters:` blocks of unrelated entries.
1810fn deserialize_optional_parameters<'de, D>(
1811    deserializer: D,
1812) -> Result<Option<Vec<AdoParameter>>, D::Error>
1813where
1814    D: serde::Deserializer<'de>,
1815{
1816    use serde::de::{MapAccess, SeqAccess, Visitor};
1817    use std::fmt;
1818
1819    struct ParamsVisitor;
1820
1821    impl<'de> Visitor<'de> for ParamsVisitor {
1822        type Value = Option<Vec<AdoParameter>>;
1823
1824        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1825            f.write_str("a sequence of parameter declarations, a mapping of name→default, null, or a template expression")
1826        }
1827
1828        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1829            Ok(None)
1830        }
1831
1832        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1833            Ok(None)
1834        }
1835
1836        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1837            d.deserialize_any(self)
1838        }
1839
1840        // Bare scalar (template expression like `${{ parameters.X }}`) —
1841        // can't statically enumerate; treat as absent.
1842        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1843            Ok(None)
1844        }
1845        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1846            Ok(None)
1847        }
1848        fn visit_bool<E: serde::de::Error>(self, _v: bool) -> Result<Self::Value, E> {
1849            Ok(None)
1850        }
1851        fn visit_i64<E: serde::de::Error>(self, _v: i64) -> Result<Self::Value, E> {
1852            Ok(None)
1853        }
1854        fn visit_u64<E: serde::de::Error>(self, _v: u64) -> Result<Self::Value, E> {
1855            Ok(None)
1856        }
1857        fn visit_f64<E: serde::de::Error>(self, _v: f64) -> Result<Self::Value, E> {
1858            Ok(None)
1859        }
1860
1861        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1862            let mut out = Vec::new();
1863            while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
1864                if let Ok(p) = serde_yaml::from_value::<AdoParameter>(item) {
1865                    out.push(p);
1866                }
1867            }
1868            Ok(Some(out))
1869        }
1870
1871        fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
1872            // Legacy untyped map form: name → default-value. We collect
1873            // names; defaults are intentionally discarded (matches typed-
1874            // form semantics where `default:` is also ignored).
1875            let mut out = Vec::new();
1876            while let Some(key) = map.next_key::<serde_yaml::Value>()? {
1877                let _ignore = map.next_value::<serde::de::IgnoredAny>()?;
1878                let name = match key {
1879                    serde_yaml::Value::String(s) if !s.is_empty() => s,
1880                    _ => continue,
1881                };
1882                out.push(AdoParameter {
1883                    name: Some(name),
1884                    param_type: None,
1885                    values: None,
1886                });
1887            }
1888            Ok(Some(out))
1889        }
1890    }
1891
1892    deserializer.deserialize_any(ParamsVisitor)
1893}
1894
1895/// Accept either an `AdoResources` mapping (modern form with `repositories:`,
1896/// `containers:`, `pipelines:`) or the legacy sequence form (`resources: [-
1897/// repo: self]`, pre-2019 ADO syntax). The legacy form has no
1898/// `repositories:` key, so we return an empty `AdoResources` for it — the
1899/// repository-tracking rules then see no aliases to track, which is correct
1900/// (legacy `repo: self` declares no external repositories).
1901fn deserialize_optional_resources<'de, D>(deserializer: D) -> Result<Option<AdoResources>, D::Error>
1902where
1903    D: serde::Deserializer<'de>,
1904{
1905    use serde::de::{MapAccess, SeqAccess, Visitor};
1906    use std::fmt;
1907
1908    struct ResourcesVisitor;
1909
1910    impl<'de> Visitor<'de> for ResourcesVisitor {
1911        type Value = Option<AdoResources>;
1912
1913        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1914            f.write_str("an AdoResources mapping or a legacy `- repo:` sequence")
1915        }
1916
1917        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1918            Ok(None)
1919        }
1920        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1921            Ok(None)
1922        }
1923        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1924            d.deserialize_any(self)
1925        }
1926
1927        // Legacy sequence form — drain it without producing any
1928        // repository entries. Modern rules track aliases via the
1929        // `AdoResources.repositories[]` shape, which the legacy form
1930        // does not produce.
1931        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1932            while seq.next_element::<serde::de::IgnoredAny>()?.is_some() {}
1933            Ok(Some(AdoResources::default()))
1934        }
1935
1936        fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
1937            let r = AdoResources::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
1938            Ok(Some(r))
1939        }
1940    }
1941
1942    deserializer.deserialize_any(ResourcesVisitor)
1943}
1944
1945/// Accept either a sequence of `AdoStage` (the normal form) or a bare
1946/// template expression (`stages: ${{ parameters.stages }}`) which resolves
1947/// at runtime. For the template-expression case, return `None` so the
1948/// pipeline still parses; the graph will simply contain no stages from this
1949/// scope (downstream code already handles empty stage lists).
1950fn deserialize_optional_stages<'de, D>(deserializer: D) -> Result<Option<Vec<AdoStage>>, D::Error>
1951where
1952    D: serde::Deserializer<'de>,
1953{
1954    use serde::de::{MapAccess, SeqAccess, Visitor};
1955    use std::fmt;
1956
1957    struct StagesVisitor;
1958
1959    impl<'de> Visitor<'de> for StagesVisitor {
1960        type Value = Option<Vec<AdoStage>>;
1961
1962        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1963            f.write_str("a sequence of stages or a template expression")
1964        }
1965
1966        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1967            Ok(None)
1968        }
1969        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1970            Ok(None)
1971        }
1972        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1973            d.deserialize_any(self)
1974        }
1975        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1976            Ok(None)
1977        }
1978        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1979            Ok(None)
1980        }
1981
1982        fn visit_seq<A: SeqAccess<'de>>(self, seq: A) -> Result<Self::Value, A::Error> {
1983            let stages =
1984                Vec::<AdoStage>::deserialize(serde::de::value::SeqAccessDeserializer::new(seq))?;
1985            Ok(Some(stages))
1986        }
1987
1988        fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
1989            let stage = AdoStage::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
1990            Ok(Some(vec![stage]))
1991        }
1992    }
1993
1994    deserializer.deserialize_any(StagesVisitor)
1995}
1996
1997fn deserialize_optional_jobs<'de, D>(deserializer: D) -> Result<Option<Vec<AdoJob>>, D::Error>
1998where
1999    D: serde::Deserializer<'de>,
2000{
2001    deserialize_jobs(deserializer).map(Some)
2002}
2003
2004fn deserialize_jobs<'de, D>(deserializer: D) -> Result<Vec<AdoJob>, D::Error>
2005where
2006    D: serde::Deserializer<'de>,
2007{
2008    use serde::de::{MapAccess, SeqAccess, Visitor};
2009    use std::fmt;
2010
2011    struct JobsVisitor;
2012
2013    impl<'de> Visitor<'de> for JobsVisitor {
2014        type Value = Vec<AdoJob>;
2015
2016        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2017            f.write_str("a sequence of ADO jobs, a map of job-name to job body, null, or a template expression")
2018        }
2019
2020        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
2021            Ok(Vec::new())
2022        }
2023        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
2024            Ok(Vec::new())
2025        }
2026        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
2027            d.deserialize_any(self)
2028        }
2029        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
2030            Ok(Vec::new())
2031        }
2032        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
2033            Ok(Vec::new())
2034        }
2035
2036        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
2037            let mut out = Vec::new();
2038            while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
2039                if let Ok(job) = serde_yaml::from_value::<AdoJob>(item) {
2040                    out.push(job);
2041                }
2042            }
2043            Ok(out)
2044        }
2045
2046        fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
2047            let mut out = Vec::new();
2048            while let Some(key) = map.next_key::<serde_yaml::Value>()? {
2049                let value = map.next_value::<serde_yaml::Value>()?;
2050                let name = match key {
2051                    serde_yaml::Value::String(s) if !s.is_empty() => s,
2052                    _ => continue,
2053                };
2054                let Ok(mut job) = serde_yaml::from_value::<AdoJob>(value) else {
2055                    continue;
2056                };
2057                if job.job.is_none() && job.deployment.is_none() {
2058                    job.job = Some(name);
2059                }
2060                out.push(job);
2061            }
2062            Ok(out)
2063        }
2064    }
2065
2066    deserializer.deserialize_any(JobsVisitor)
2067}
2068
2069fn deserialize_optional_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
2070where
2071    D: serde::Deserializer<'de>,
2072{
2073    let value = Option::<serde_yaml::Value>::deserialize(deserializer)?;
2074    let Some(value) = value else {
2075        return Ok(None);
2076    };
2077    let parsed = match value {
2078        serde_yaml::Value::Bool(b) => Some(b),
2079        serde_yaml::Value::String(s) => match s.trim().to_ascii_lowercase().as_str() {
2080            "true" | "yes" | "y" | "on" | "1" => Some(true),
2081            "false" | "no" | "n" | "off" | "0" => Some(false),
2082            _ => None,
2083        },
2084        serde_yaml::Value::Number(n) => n.as_i64().map(|v| v != 0),
2085        serde_yaml::Value::Null => None,
2086        _ => None,
2087    };
2088    Ok(parsed)
2089}
2090
2091/// `resources:` block. Only `repositories[]` is modelled today.
2092#[derive(Debug, Default, Deserialize)]
2093pub struct AdoResources {
2094    #[serde(default)]
2095    pub repositories: Vec<AdoRepository>,
2096}
2097
2098/// A single `resources.repositories[]` entry — declares an external repo
2099/// alias the pipeline can consume via `template: x@alias`, `extends:`, or
2100/// `checkout: alias`.
2101#[derive(Debug, Deserialize)]
2102pub struct AdoRepository {
2103    /// The alias used by consumers (`template: file@<repository>`).
2104    #[serde(default)]
2105    pub repository: Option<String>,
2106    /// `git`, `github`, `bitbucket`, or `azureGit`.
2107    #[serde(default, rename = "type")]
2108    pub repo_type: Option<String>,
2109    /// Full repo path (e.g. `org/repo`).
2110    #[serde(default)]
2111    pub name: Option<String>,
2112    /// Optional ref. Absent = default branch (mutable). Present forms:
2113    /// `refs/tags/v1.2.3`, `refs/heads/main`, bare branch `main`, or a SHA.
2114    #[serde(default, rename = "ref")]
2115    pub git_ref: Option<String>,
2116}
2117
2118/// Pipeline / template `parameters:` entry. We deliberately ignore `default:`
2119/// — only the name, type, and `values:` allowlist matter for our rules.
2120#[derive(Debug, Deserialize)]
2121pub struct AdoParameter {
2122    #[serde(default)]
2123    pub name: Option<String>,
2124    #[serde(rename = "type", default)]
2125    pub param_type: Option<String>,
2126    #[serde(default)]
2127    pub values: Option<Vec<serde_yaml::Value>>,
2128}
2129
2130/// ADO `dependsOn:` accepts two YAML shapes — a single string
2131/// (`dependsOn: my_job`) or a sequence of strings
2132/// (`dependsOn: [a, b, c]`). The untagged enum normalises both at
2133/// deserialization time so callers can iterate uniformly.
2134#[derive(Debug, Deserialize, Clone)]
2135#[serde(untagged)]
2136pub enum DependsOn {
2137    Single(String),
2138    Multiple(Vec<String>),
2139    Other(serde_yaml::Value),
2140}
2141
2142impl DependsOn {
2143    /// Comma-joined predecessor list suitable for stamping into
2144    /// `META_DEPENDS_ON` on a Step node. Empty entries are dropped.
2145    pub fn as_csv(&self) -> String {
2146        match self {
2147            DependsOn::Single(s) => s.trim().to_string(),
2148            DependsOn::Multiple(v) => v
2149                .iter()
2150                .map(|s| s.trim())
2151                .filter(|s| !s.is_empty())
2152                .collect::<Vec<_>>()
2153                .join(","),
2154            DependsOn::Other(_) => String::new(),
2155        }
2156    }
2157}
2158
2159#[derive(Debug, Deserialize)]
2160pub struct AdoStage {
2161    /// Stage identifier. Absent when the stage entry is a template reference.
2162    #[serde(default)]
2163    pub stage: Option<String>,
2164    /// Stage-level template reference (`- template: path/to/stage.yml`).
2165    #[serde(default)]
2166    pub template: Option<String>,
2167    #[serde(default)]
2168    pub variables: Option<AdoVariables>,
2169    #[serde(default, deserialize_with = "deserialize_jobs")]
2170    pub jobs: Vec<AdoJob>,
2171    /// Stage-level runtime gate. ADO evaluates this expression at queue time;
2172    /// when false, every job (and therefore every step) inside the stage is
2173    /// skipped. The parser cannot evaluate the expression statically, so its
2174    /// presence is recorded as a Partial-Expression gap and its text is stamped
2175    /// onto child Step nodes via `META_CONDITION`.
2176    #[serde(default, skip_serializing_if = "Option::is_none")]
2177    pub condition: Option<String>,
2178    /// Stage-level explicit `dependsOn:`. Default behaviour is "depends on the
2179    /// previous stage" — only the explicit form is captured.
2180    #[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
2181    pub depends_on: Option<DependsOn>,
2182}
2183
2184#[derive(Debug, Deserialize)]
2185pub struct AdoJob {
2186    /// Regular job identifier
2187    #[serde(default)]
2188    pub job: Option<String>,
2189    /// Deployment job identifier
2190    #[serde(default)]
2191    pub deployment: Option<String>,
2192    #[serde(default)]
2193    pub variables: Option<AdoVariables>,
2194    #[serde(default)]
2195    pub steps: Option<Vec<AdoStep>>,
2196    /// Deployment-job nested strategy: runOnce/rolling/canary all share the
2197    /// shape `strategy.{runOnce,rolling,canary}.deploy.steps`. We only need
2198    /// the steps — the strategy choice itself doesn't change authority flow.
2199    #[serde(default)]
2200    pub strategy: Option<AdoStrategy>,
2201    #[serde(default)]
2202    pub pool: Option<serde_yaml::Value>,
2203    /// Job-level `workspace:` block. The only security-relevant field is
2204    /// `clean:` which causes the agent to wipe the workspace between runs.
2205    #[serde(default)]
2206    pub workspace: Option<serde_yaml::Value>,
2207    /// Job-level template reference
2208    #[serde(default)]
2209    pub template: Option<String>,
2210    /// Deployment-job environment binding. Two YAML shapes:
2211    ///
2212    ///   - `environment: production` (string shorthand)
2213    ///   - `environment: { name: staging, resourceType: VirtualMachine }` (mapping)
2214    ///
2215    /// When present, the environment may have approvals/checks attached in ADO's
2216    /// environment configuration. Approvals are a manual gate — authority cannot
2217    /// propagate past one without human intervention. We treat any `environment:`
2218    /// binding as an approval candidate and tag the job's steps so propagation
2219    /// rules can downgrade severity. (We can't see the approval config from YAML
2220    /// alone; the binding is the strongest signal available at parse time.)
2221    #[serde(default)]
2222    pub environment: Option<serde_yaml::Value>,
2223    /// Job-level runtime gate. Evaluated at job-queue time; controls whether
2224    /// the job's steps run. Cannot be statically evaluated — recorded as a
2225    /// Partial-Expression gap and stamped onto the job's Step nodes via
2226    /// `META_CONDITION` (joined with any stage-level condition).
2227    #[serde(default, skip_serializing_if = "Option::is_none")]
2228    pub condition: Option<String>,
2229    /// Job-level explicit `dependsOn:`. Default behaviour is "depends on the
2230    /// previous job" — only the explicit form is captured.
2231    #[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
2232    pub depends_on: Option<DependsOn>,
2233}
2234
2235impl AdoJob {
2236    pub fn effective_name(&self) -> String {
2237        self.job
2238            .as_deref()
2239            .or(self.deployment.as_deref())
2240            .unwrap_or("job")
2241            .to_string()
2242    }
2243
2244    /// Returns the effective step list for this job.
2245    ///
2246    /// Regular jobs put steps under `steps:` directly. Deployment jobs nest
2247    /// them under `strategy.{runOnce,rolling,canary}.{deploy,preDeploy,
2248    /// postDeploy,routeTraffic,onSuccess,onFailure}.steps`. We merge all
2249    /// strategy-nested step lists into a single sequence so downstream rules
2250    /// see them as part of the job. Order: regular `steps:` first, then any
2251    /// strategy-nested steps in deterministic phase order.
2252    pub fn all_steps(&self) -> Vec<AdoStep> {
2253        let mut out: Vec<AdoStep> = Vec::new();
2254        if let Some(ref s) = self.steps {
2255            out.extend(s.iter().cloned());
2256        }
2257        if let Some(ref strat) = self.strategy {
2258            for phase in strat.phases() {
2259                if let Some(ref s) = phase.steps {
2260                    out.extend(s.iter().cloned());
2261                }
2262            }
2263        }
2264        out
2265    }
2266
2267    /// Returns true when the job is bound to an `environment:` — either the
2268    /// string form (`environment: production`) or the mapping form with a
2269    /// non-empty `name:` field. An empty mapping or empty string is ignored.
2270    pub fn has_environment_binding(&self) -> bool {
2271        match self.environment.as_ref() {
2272            None => false,
2273            Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
2274            Some(serde_yaml::Value::Mapping(m)) => m
2275                .get("name")
2276                .and_then(|v| v.as_str())
2277                .map(|s| !s.trim().is_empty())
2278                .unwrap_or(false),
2279            _ => false,
2280        }
2281    }
2282}
2283
2284/// Deployment-job `strategy:` block. ADO ships three strategies — runOnce,
2285/// rolling, canary — each with multiple lifecycle phases that may carry
2286/// their own step list. We capture all of them; the AdoJob::all_steps
2287/// helper flattens them into one sequence.
2288#[derive(Debug, Default, Deserialize, Clone)]
2289pub struct AdoStrategy {
2290    #[serde(default, rename = "runOnce")]
2291    pub run_once: Option<AdoStrategyRunOnce>,
2292    #[serde(default)]
2293    pub rolling: Option<AdoStrategyRunOnce>,
2294    #[serde(default)]
2295    pub canary: Option<AdoStrategyRunOnce>,
2296}
2297
2298impl AdoStrategy {
2299    /// Iterate over every populated lifecycle phase across all strategies.
2300    pub fn phases(&self) -> Vec<&AdoStrategyPhase> {
2301        let mut out: Vec<&AdoStrategyPhase> = Vec::new();
2302        for runner in [&self.run_once, &self.rolling, &self.canary]
2303            .iter()
2304            .copied()
2305            .flatten()
2306        {
2307            for phase in [
2308                &runner.deploy,
2309                &runner.pre_deploy,
2310                &runner.post_deploy,
2311                &runner.route_traffic,
2312            ]
2313            .into_iter()
2314            .flatten()
2315            {
2316                out.push(phase);
2317            }
2318            if let Some(ref on) = runner.on {
2319                if let Some(ref s) = on.success {
2320                    out.push(s);
2321                }
2322                if let Some(ref f) = on.failure {
2323                    out.push(f);
2324                }
2325            }
2326        }
2327        out
2328    }
2329}
2330
2331/// Lifecycle phases carried by every deployment strategy. Each phase may
2332/// have its own `steps:`. Covering all six avoids silently dropping
2333/// privileged setup/teardown steps from the authority graph.
2334#[derive(Debug, Default, Deserialize, Clone)]
2335pub struct AdoStrategyRunOnce {
2336    #[serde(default)]
2337    pub deploy: Option<AdoStrategyPhase>,
2338    #[serde(default, rename = "preDeploy")]
2339    pub pre_deploy: Option<AdoStrategyPhase>,
2340    #[serde(default, rename = "postDeploy")]
2341    pub post_deploy: Option<AdoStrategyPhase>,
2342    #[serde(default, rename = "routeTraffic")]
2343    pub route_traffic: Option<AdoStrategyPhase>,
2344    #[serde(default)]
2345    pub on: Option<AdoStrategyOn>,
2346}
2347
2348#[derive(Debug, Default, Deserialize, Clone)]
2349pub struct AdoStrategyOn {
2350    #[serde(default)]
2351    pub success: Option<AdoStrategyPhase>,
2352    #[serde(default)]
2353    pub failure: Option<AdoStrategyPhase>,
2354}
2355
2356#[derive(Debug, Default, Deserialize, Clone)]
2357pub struct AdoStrategyPhase {
2358    #[serde(default)]
2359    pub steps: Option<Vec<AdoStep>>,
2360}
2361
2362#[derive(Debug, Deserialize, Clone)]
2363pub struct AdoStep {
2364    /// Task reference e.g. `AzureCLI@2`
2365    #[serde(default)]
2366    pub task: Option<String>,
2367    /// Inline script (cmd/sh)
2368    #[serde(default)]
2369    pub script: Option<String>,
2370    /// Inline bash script
2371    #[serde(default)]
2372    pub bash: Option<String>,
2373    /// Inline PowerShell script
2374    #[serde(default)]
2375    pub powershell: Option<String>,
2376    /// Cross-platform PowerShell
2377    #[serde(default)]
2378    pub pwsh: Option<String>,
2379    /// Step-level template reference
2380    #[serde(default)]
2381    pub template: Option<String>,
2382    #[serde(rename = "displayName", default)]
2383    pub display_name: Option<String>,
2384    /// Legacy name alias
2385    #[serde(default)]
2386    pub name: Option<String>,
2387    #[serde(default)]
2388    pub env: Option<HashMap<String, serde_yaml::Value>>,
2389    /// Task inputs (key → value, but values may be nested)
2390    #[serde(default)]
2391    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
2392    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
2393    #[serde(default)]
2394    pub checkout: Option<String>,
2395    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
2396    #[serde(
2397        rename = "persistCredentials",
2398        default,
2399        deserialize_with = "deserialize_optional_bool"
2400    )]
2401    pub persist_credentials: Option<bool>,
2402    /// Step-level runtime gate. Evaluated by the agent before it dispatches
2403    /// the step; when false the step is skipped (status: Skipped). Cannot be
2404    /// statically evaluated — recorded as a Partial-Expression gap and stamped
2405    /// onto the Step node via `META_CONDITION`, joined with any
2406    /// stage/job-level conditions stacked above.
2407    #[serde(default, skip_serializing_if = "Option::is_none")]
2408    pub condition: Option<String>,
2409    /// Step-level explicit `dependsOn:`. Rare on individual steps (more common
2410    /// at job/stage level) but accepted by ADO; captured for symmetry.
2411    #[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
2412    pub depends_on: Option<DependsOn>,
2413}
2414
2415/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
2416/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
2417#[derive(Debug, Default)]
2418pub struct AdoVariables(pub Vec<AdoVariable>);
2419
2420impl<'de> serde::Deserialize<'de> for AdoVariables {
2421    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
2422    where
2423        D: serde::Deserializer<'de>,
2424    {
2425        let raw = serde_yaml::Value::deserialize(deserializer)?;
2426        let mut vars = Vec::new();
2427
2428        match raw {
2429            serde_yaml::Value::Sequence(seq) => {
2430                for item in seq {
2431                    if let Some(map) = item.as_mapping() {
2432                        if let Some(group_val) = map.get("group") {
2433                            if let Some(group) = group_val.as_str() {
2434                                vars.push(AdoVariable::Group {
2435                                    group: group.to_string(),
2436                                });
2437                                continue;
2438                            }
2439                        }
2440                        let name = map
2441                            .get("name")
2442                            .and_then(|v| v.as_str())
2443                            .unwrap_or("")
2444                            .to_string();
2445                        let value = map
2446                            .get("value")
2447                            .and_then(|v| v.as_str())
2448                            .unwrap_or("")
2449                            .to_string();
2450                        let is_secret = map
2451                            .get("isSecret")
2452                            .and_then(|v| v.as_bool())
2453                            .unwrap_or(false);
2454                        vars.push(AdoVariable::Named {
2455                            name,
2456                            value,
2457                            is_secret,
2458                        });
2459                    }
2460                }
2461            }
2462            serde_yaml::Value::Mapping(map) => {
2463                for (k, v) in map {
2464                    let name = k.as_str().unwrap_or("").to_string();
2465                    let value = v.as_str().unwrap_or("").to_string();
2466                    vars.push(AdoVariable::Named {
2467                        name,
2468                        value,
2469                        is_secret: false,
2470                    });
2471                }
2472            }
2473            _ => {}
2474        }
2475
2476        Ok(AdoVariables(vars))
2477    }
2478}
2479
2480#[derive(Debug)]
2481pub enum AdoVariable {
2482    Group {
2483        group: String,
2484    },
2485    Named {
2486        name: String,
2487        value: String,
2488        is_secret: bool,
2489    },
2490}
2491
2492/// Heuristic: does this YAML have a top-level parameter conditional wrapper
2493/// (e.g. `- ${{ if eq(parameters.X, true) }}:`) at column 0 or as the first
2494/// list item? This is the construct that breaks root-level mapping parses but
2495/// is valid in an ADO template fragment included by a parent pipeline.
2496fn has_root_parameter_conditional(content: &str) -> bool {
2497    for line in content.lines() {
2498        let trimmed = line.trim_start();
2499        // Strip an optional leading list marker so we match both
2500        // `- ${{ if ... }}:` and bare `${{ if ... }}:` forms.
2501        let candidate = trimmed.strip_prefix("- ").unwrap_or(trimmed);
2502        if candidate.starts_with("${{")
2503            && (candidate.contains("if ") || candidate.contains("if("))
2504            && candidate.trim_end().ends_with(":")
2505        {
2506            return true;
2507        }
2508    }
2509    false
2510}
2511
2512fn recover_after_leading_root_sequence(content: &str) -> Option<&str> {
2513    for (idx, _) in content.char_indices() {
2514        if idx == 0 {
2515            continue;
2516        }
2517        if !is_root_pipeline_key_line(content[idx..].lines().next().unwrap_or_default()) {
2518            continue;
2519        }
2520        let recovered = &content[idx..];
2521        if serde_yaml::from_str::<AdoPipeline>(recovered).is_ok() {
2522            return Some(recovered);
2523        }
2524    }
2525    None
2526}
2527
2528fn is_root_pipeline_key_line(line: &str) -> bool {
2529    if line.starts_with(char::is_whitespace) || !line.ends_with(':') {
2530        return false;
2531    }
2532    let key = line.trim_end_matches(':').trim();
2533    matches!(
2534        key,
2535        "trigger"
2536            | "pr"
2537            | "pool"
2538            | "variables"
2539            | "resources"
2540            | "stages"
2541            | "jobs"
2542            | "steps"
2543            | "extends"
2544            | "parameters"
2545            | "permissions"
2546    )
2547}
2548
2549#[cfg(test)]
2550mod tests {
2551    use super::*;
2552    use std::io::{Read, Write};
2553    use std::net::TcpListener;
2554    use std::thread;
2555
2556    fn parse(yaml: &str) -> AuthorityGraph {
2557        let parser = AdoParser;
2558        let source = PipelineSource {
2559            file: "azure-pipelines.yml".into(),
2560            repo: None,
2561            git_ref: None,
2562            commit_sha: None,
2563        };
2564        parser.parse(yaml, &source).unwrap()
2565    }
2566
2567    fn parse_with_ctx(yaml: &str, ctx: &AdoParserContext) -> AuthorityGraph {
2568        let parser = AdoParser;
2569        let source = PipelineSource {
2570            file: "azure-pipelines.yml".into(),
2571            repo: None,
2572            git_ref: None,
2573            commit_sha: None,
2574        };
2575        parser.parse_with_context(yaml, &source, Some(ctx)).unwrap()
2576    }
2577
2578    fn spawn_variable_groups_server(response_json: &'static str) -> String {
2579        let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock server");
2580        let addr = listener.local_addr().expect("local addr");
2581        thread::spawn(move || {
2582            if let Ok((mut stream, _)) = listener.accept() {
2583                let mut buf = [0_u8; 2048];
2584                let _ = stream.read(&mut buf);
2585                let body = response_json.as_bytes();
2586                let header = format!(
2587                    "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
2588                    body.len()
2589                );
2590                let _ = stream.write_all(header.as_bytes());
2591                let _ = stream.write_all(body);
2592            }
2593        });
2594        format!("http://{addr}")
2595    }
2596
2597    #[test]
2598    fn parses_simple_pipeline() {
2599        let yaml = r#"
2600trigger:
2601  - main
2602
2603jobs:
2604  - job: Build
2605    steps:
2606      - script: echo hello
2607        displayName: Say hello
2608"#;
2609        let graph = parse(yaml);
2610        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
2611    }
2612
2613    #[test]
2614    fn system_access_token_created() {
2615        let yaml = r#"
2616steps:
2617  - script: echo hi
2618"#;
2619        let graph = parse(yaml);
2620        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2621        assert_eq!(identities.len(), 1);
2622        assert_eq!(identities[0].name, "System.AccessToken");
2623        assert_eq!(
2624            identities[0].metadata.get(META_IDENTITY_SCOPE),
2625            Some(&"broad".to_string())
2626        );
2627    }
2628
2629    #[test]
2630    fn variable_group_creates_secret_and_marks_partial() {
2631        let yaml = r#"
2632variables:
2633  - group: MySecretGroup
2634
2635steps:
2636  - script: echo hi
2637"#;
2638        let graph = parse(yaml);
2639        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2640        assert_eq!(secrets.len(), 1);
2641        assert_eq!(secrets[0].name, "MySecretGroup");
2642        assert_eq!(
2643            secrets[0].metadata.get(META_VARIABLE_GROUP),
2644            Some(&"true".to_string())
2645        );
2646        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2647        assert!(
2648            graph
2649                .completeness_gaps
2650                .iter()
2651                .any(|g| g.contains("MySecretGroup")),
2652            "completeness gap should name the variable group"
2653        );
2654        // External variable group is unresolvable without ADO API access —
2655        // that's a Structural break in the authority chain, not an expression
2656        // substitution.
2657        assert!(
2658            graph.completeness_gap_kinds.contains(&GapKind::Structural),
2659            "variable group gap must be Structural, got: {:?}",
2660            graph.completeness_gap_kinds
2661        );
2662    }
2663
2664    #[test]
2665    fn variable_group_enrichment_resolves_plain_and_secret_vars() {
2666        let yaml = r#"
2667variables:
2668  - group: MySecretGroup
2669
2670steps:
2671  - script: |
2672      echo $(PUBLIC_FLAG)
2673      echo $(DB_PASSWORD)
2674"#;
2675        let org_url = spawn_variable_groups_server(
2676            r#"{"value":[{"name":"MySecretGroup","variables":{"PUBLIC_FLAG":{"value":"1","isSecret":false},"DB_PASSWORD":{"isSecret":true}}}]}"#,
2677        );
2678        let ctx = AdoParserContext {
2679            org: Some(org_url),
2680            project: Some("DemoProject".to_string()),
2681            pat: Some("dummy-pat".to_string()),
2682        };
2683
2684        let graph = parse_with_ctx(yaml, &ctx);
2685        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2686        assert!(
2687            secrets.iter().any(|n| n.name == "DB_PASSWORD"),
2688            "secret variable from enriched group must be modelled as Secret"
2689        );
2690        assert!(
2691            !secrets.iter().any(|n| n.name == "MySecretGroup"),
2692            "resolved group should not be represented as an opaque group-secret node"
2693        );
2694        assert!(
2695            !graph
2696                .completeness_gaps
2697                .iter()
2698                .any(|g| g.contains("MySecretGroup") && g.contains("unresolvable")),
2699            "resolved group must not emit unresolvable-group partial gap"
2700        );
2701        assert_eq!(
2702            graph.metadata.get(META_ADO_VG_ENRICHED),
2703            Some(&"true".to_string())
2704        );
2705    }
2706
2707    #[test]
2708    fn variable_group_enrichment_failure_falls_back_to_static_model() {
2709        let yaml = r#"
2710variables:
2711  - group: MySecretGroup
2712steps:
2713  - script: echo hi
2714"#;
2715        let unused_port = {
2716            let probe = TcpListener::bind("127.0.0.1:0").expect("bind probe listener");
2717            let p = probe.local_addr().expect("probe addr").port();
2718            drop(probe);
2719            p
2720        };
2721        let ctx = AdoParserContext {
2722            org: Some(format!("http://127.0.0.1:{unused_port}")),
2723            project: Some("DemoProject".to_string()),
2724            pat: Some("dummy-pat".to_string()),
2725        };
2726
2727        let graph = parse_with_ctx(yaml, &ctx);
2728        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2729        assert!(
2730            graph
2731                .completeness_gaps
2732                .iter()
2733                .any(|g| g.contains("enrichment failed")),
2734            "failed enrichment should produce warning partial gap"
2735        );
2736        assert!(
2737            graph
2738                .nodes_of_kind(NodeKind::Secret)
2739                .any(|n| n.name == "MySecretGroup"),
2740            "on failure parser must fall back to opaque group-secret behaviour"
2741        );
2742        assert_eq!(
2743            graph.metadata.get(META_ADO_VG_ENRICHED),
2744            Some(&"false".to_string())
2745        );
2746    }
2747
2748    #[test]
2749    fn task_with_azure_subscription_creates_service_connection_identity() {
2750        let yaml = r#"
2751steps:
2752  - task: AzureCLI@2
2753    displayName: Deploy to Azure
2754    inputs:
2755      azureSubscription: MyServiceConnection
2756      scriptType: bash
2757      inlineScript: az group list
2758"#;
2759        let graph = parse(yaml);
2760        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2761        // System.AccessToken + service connection
2762        assert_eq!(identities.len(), 2);
2763        let conn = identities
2764            .iter()
2765            .find(|i| i.name == "MyServiceConnection")
2766            .unwrap();
2767        assert_eq!(
2768            conn.metadata.get(META_SERVICE_CONNECTION),
2769            Some(&"true".to_string())
2770        );
2771        assert_eq!(
2772            conn.metadata.get(META_IDENTITY_SCOPE),
2773            Some(&"broad".to_string())
2774        );
2775    }
2776
2777    #[test]
2778    fn service_connection_does_not_get_unconditional_oidc_tag() {
2779        let yaml = r#"
2780steps:
2781  - task: AzureCLI@2
2782    displayName: Deploy to Azure
2783    inputs:
2784      azureSubscription: MyClassicSpnConnection
2785      scriptType: bash
2786      inlineScript: az group list
2787"#;
2788        let graph = parse(yaml);
2789        let conn = graph
2790            .nodes_of_kind(NodeKind::Identity)
2791            .find(|i| i.name == "MyClassicSpnConnection")
2792            .expect("service connection identity should exist");
2793        assert_eq!(
2794            conn.metadata.get(META_OIDC),
2795            None,
2796            "service connections must not be tagged META_OIDC without a clear OIDC signal"
2797        );
2798    }
2799
2800    #[test]
2801    fn task_with_connected_service_name_creates_identity() {
2802        let yaml = r#"
2803steps:
2804  - task: SqlAzureDacpacDeployment@1
2805    inputs:
2806      ConnectedServiceNameARM: MySqlConnection
2807"#;
2808        let graph = parse(yaml);
2809        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2810        assert!(
2811            identities.iter().any(|i| i.name == "MySqlConnection"),
2812            "connectedServiceNameARM should create identity"
2813        );
2814    }
2815
2816    #[test]
2817    fn script_step_classified_as_first_party() {
2818        let yaml = r#"
2819steps:
2820  - script: echo hi
2821    displayName: Say hi
2822"#;
2823        let graph = parse(yaml);
2824        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2825        assert_eq!(steps.len(), 1);
2826        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
2827    }
2828
2829    #[test]
2830    fn bash_step_classified_as_first_party() {
2831        let yaml = r#"
2832steps:
2833  - bash: echo hi
2834"#;
2835        let graph = parse(yaml);
2836        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2837        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
2838    }
2839
2840    #[test]
2841    fn task_step_classified_as_untrusted() {
2842        let yaml = r#"
2843steps:
2844  - task: DotNetCoreCLI@2
2845    inputs:
2846      command: build
2847"#;
2848        let graph = parse(yaml);
2849        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2850        assert_eq!(steps.len(), 1);
2851        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
2852    }
2853
2854    #[test]
2855    fn dollar_paren_var_in_script_creates_secret() {
2856        let yaml = r#"
2857steps:
2858  - script: |
2859      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
2860    displayName: Call API
2861"#;
2862        let graph = parse(yaml);
2863        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2864        assert_eq!(secrets.len(), 1);
2865        assert_eq!(secrets[0].name, "MY_API_TOKEN");
2866    }
2867
2868    #[test]
2869    fn predefined_ado_var_not_treated_as_secret() {
2870        let yaml = r#"
2871steps:
2872  - script: |
2873      echo $(Build.BuildId)
2874      echo $(Agent.WorkFolder)
2875      echo $(System.DefaultWorkingDirectory)
2876    displayName: Print vars
2877"#;
2878        let graph = parse(yaml);
2879        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2880        assert!(
2881            secrets.is_empty(),
2882            "predefined ADO vars should not be treated as secrets, got: {:?}",
2883            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
2884        );
2885    }
2886
2887    #[test]
2888    fn template_reference_creates_delegates_to_and_marks_partial() {
2889        let yaml = r#"
2890steps:
2891  - template: steps/deploy.yml
2892    parameters:
2893      env: production
2894"#;
2895        let graph = parse(yaml);
2896        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2897        assert_eq!(steps.len(), 1);
2898
2899        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2900        assert_eq!(images.len(), 1);
2901        assert_eq!(images[0].name, "steps/deploy.yml");
2902
2903        let delegates: Vec<_> = graph
2904            .edges_from(steps[0].id)
2905            .filter(|e| e.kind == EdgeKind::DelegatesTo)
2906            .collect();
2907        assert_eq!(delegates.len(), 1);
2908
2909        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2910    }
2911
2912    #[test]
2913    fn top_level_steps_no_jobs() {
2914        let yaml = r#"
2915steps:
2916  - script: echo a
2917  - script: echo b
2918"#;
2919        let graph = parse(yaml);
2920        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2921        assert_eq!(steps.len(), 2);
2922    }
2923
2924    #[test]
2925    fn top_level_jobs_no_stages() {
2926        let yaml = r#"
2927jobs:
2928  - job: JobA
2929    steps:
2930      - script: echo a
2931  - job: JobB
2932    steps:
2933      - script: echo b
2934"#;
2935        let graph = parse(yaml);
2936        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2937        assert_eq!(steps.len(), 2);
2938    }
2939
2940    #[test]
2941    fn stages_with_nested_jobs_parsed() {
2942        let yaml = r#"
2943stages:
2944  - stage: Build
2945    jobs:
2946      - job: Compile
2947        steps:
2948          - script: cargo build
2949  - stage: Test
2950    jobs:
2951      - job: UnitTest
2952        steps:
2953          - script: cargo test
2954"#;
2955        let graph = parse(yaml);
2956        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2957        assert_eq!(steps.len(), 2);
2958    }
2959
2960    #[test]
2961    fn all_steps_linked_to_system_access_token() {
2962        let yaml = r#"
2963steps:
2964  - script: echo a
2965  - task: SomeTask@1
2966    inputs: {}
2967"#;
2968        let graph = parse(yaml);
2969        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
2970        assert_eq!(token.len(), 1);
2971        let token_id = token[0].id;
2972
2973        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2974        for step in &steps {
2975            let links: Vec<_> = graph
2976                .edges_from(step.id)
2977                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
2978                .collect();
2979            assert_eq!(
2980                links.len(),
2981                1,
2982                "step '{}' must link to System.AccessToken",
2983                step.name
2984            );
2985        }
2986    }
2987
2988    #[test]
2989    fn named_secret_variable_creates_secret_node() {
2990        let yaml = r#"
2991variables:
2992  - name: MY_PASSWORD
2993    value: dummy
2994    isSecret: true
2995
2996steps:
2997  - script: echo hi
2998"#;
2999        let graph = parse(yaml);
3000        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3001        assert_eq!(secrets.len(), 1);
3002        assert_eq!(secrets[0].name, "MY_PASSWORD");
3003    }
3004
3005    #[test]
3006    fn variables_as_mapping_parsed() {
3007        let yaml = r#"
3008variables:
3009  MY_VAR: hello
3010  ANOTHER_VAR: world
3011
3012steps:
3013  - script: echo hi
3014"#;
3015        let graph = parse(yaml);
3016        // Mapping-style variables without isSecret — no secret nodes created
3017        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3018        assert!(
3019            secrets.is_empty(),
3020            "plain mapping vars should not create secret nodes"
3021        );
3022    }
3023
3024    #[test]
3025    fn persist_credentials_creates_persists_to_edge() {
3026        let yaml = r#"
3027steps:
3028  - checkout: self
3029    persistCredentials: true
3030  - script: git push
3031"#;
3032        let graph = parse(yaml);
3033        let token_id = graph
3034            .nodes_of_kind(NodeKind::Identity)
3035            .find(|n| n.name == "System.AccessToken")
3036            .expect("System.AccessToken must exist")
3037            .id;
3038
3039        let persists_edges: Vec<_> = graph
3040            .edges
3041            .iter()
3042            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
3043            .collect();
3044        assert_eq!(
3045            persists_edges.len(),
3046            1,
3047            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
3048        );
3049    }
3050
3051    #[test]
3052    fn persist_credentials_string_true_creates_persists_to_edge() {
3053        let yaml = r#"
3054steps:
3055  - checkout: self
3056    persistCredentials: "true"
3057"#;
3058        let graph = parse(yaml);
3059        assert!(
3060            graph.edges.iter().any(|e| e.kind == EdgeKind::PersistsTo),
3061            "string true is accepted by ADO and must be treated as true"
3062        );
3063    }
3064
3065    #[test]
3066    fn jobs_mapping_form_parses() {
3067        let yaml = r#"
3068jobs:
3069  build:
3070    steps:
3071      - script: build.sh
3072        displayName: Build
3073"#;
3074        let graph = parse(yaml);
3075        assert!(
3076            graph
3077                .nodes_of_kind(NodeKind::Step)
3078                .any(|s| s.name == "Build"),
3079            "jobs: map form must produce step nodes"
3080        );
3081    }
3082
3083    #[test]
3084    fn step_env_non_string_scalar_values_parse() {
3085        let yaml = r#"
3086steps:
3087  - script: echo hi
3088    env:
3089      FEATURE_ENABLED: true
3090      RETRIES: 3
3091      EMPTY:
3092"#;
3093        let graph = parse(yaml);
3094        assert!(
3095            graph.nodes_of_kind(NodeKind::Step).next().is_some(),
3096            "scalar env values should not reject the whole ADO file"
3097        );
3098    }
3099
3100    #[test]
3101    fn checkout_without_persist_credentials_no_persists_to_edge() {
3102        let yaml = r#"
3103steps:
3104  - checkout: self
3105  - script: echo hi
3106"#;
3107        let graph = parse(yaml);
3108        let persists_edges: Vec<_> = graph
3109            .edges
3110            .iter()
3111            .filter(|e| e.kind == EdgeKind::PersistsTo)
3112            .collect();
3113        assert!(
3114            persists_edges.is_empty(),
3115            "checkout without persistCredentials should not produce PersistsTo edge"
3116        );
3117    }
3118
3119    #[test]
3120    fn var_flag_secret_marked_as_cli_flag_exposed() {
3121        let yaml = r#"
3122steps:
3123  - script: |
3124      terraform apply \
3125        -var "db_password=$(db_password)" \
3126        -var "api_key=$(api_key)"
3127    displayName: Terraform apply
3128"#;
3129        let graph = parse(yaml);
3130        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3131        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
3132        for secret in &secrets {
3133            assert_eq!(
3134                secret.metadata.get(META_CLI_FLAG_EXPOSED),
3135                Some(&"true".to_string()),
3136                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
3137                secret.name
3138            );
3139        }
3140    }
3141
3142    #[test]
3143    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
3144        let yaml = r#"
3145steps:
3146  - script: |
3147      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
3148"#;
3149        let graph = parse(yaml);
3150        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3151        assert_eq!(secrets.len(), 1);
3152        assert!(
3153            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
3154            "non -var secret should not be marked as cli_flag_exposed"
3155        );
3156    }
3157
3158    #[test]
3159    fn step_linked_to_variable_group_secret() {
3160        let yaml = r#"
3161variables:
3162  - group: ProdSecrets
3163
3164steps:
3165  - script: deploy.sh
3166"#;
3167        let graph = parse(yaml);
3168        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
3169        assert_eq!(secrets.len(), 1);
3170        let secret_id = secrets[0].id;
3171
3172        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3173        let links: Vec<_> = graph
3174            .edges_from(steps[0].id)
3175            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
3176            .collect();
3177        assert_eq!(
3178            links.len(),
3179            1,
3180            "step should be linked to variable group secret"
3181        );
3182    }
3183
3184    #[test]
3185    fn pr_trigger_sets_meta_trigger_on_graph() {
3186        let yaml = r#"
3187pr:
3188  - '*'
3189
3190steps:
3191  - script: echo hi
3192"#;
3193        let graph = parse(yaml);
3194        assert_eq!(
3195            graph.metadata.get(META_TRIGGER),
3196            Some(&"pr".to_string()),
3197            "ADO pr: trigger should set graph META_TRIGGER"
3198        );
3199    }
3200
3201    #[test]
3202    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
3203        let yaml = r#"
3204pool:
3205  name: my-self-hosted-pool
3206
3207steps:
3208  - script: echo hi
3209"#;
3210        let graph = parse(yaml);
3211        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3212        assert_eq!(images.len(), 1);
3213        assert_eq!(images[0].name, "my-self-hosted-pool");
3214        assert_eq!(
3215            images[0].metadata.get(META_SELF_HOSTED),
3216            Some(&"true".to_string()),
3217            "pool.name without vmImage must be tagged self-hosted"
3218        );
3219    }
3220
3221    #[test]
3222    fn vm_image_pool_is_not_tagged_self_hosted() {
3223        let yaml = r#"
3224pool:
3225  vmImage: ubuntu-latest
3226
3227steps:
3228  - script: echo hi
3229"#;
3230        let graph = parse(yaml);
3231        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
3232        assert_eq!(images.len(), 1);
3233        assert_eq!(images[0].name, "ubuntu-latest");
3234        assert!(
3235            !images[0].metadata.contains_key(META_SELF_HOSTED),
3236            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
3237        );
3238    }
3239
3240    #[test]
3241    fn checkout_self_step_tagged_with_meta_checkout_self() {
3242        let yaml = r#"
3243steps:
3244  - checkout: self
3245  - script: echo hi
3246"#;
3247        let graph = parse(yaml);
3248        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3249        assert_eq!(steps.len(), 2);
3250        let checkout_step = steps
3251            .iter()
3252            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
3253            .expect("one step must be tagged META_CHECKOUT_SELF");
3254        assert_eq!(
3255            checkout_step.metadata.get(META_CHECKOUT_SELF),
3256            Some(&"true".to_string())
3257        );
3258    }
3259
3260    #[test]
3261    fn vso_setvariable_sets_meta_writes_env_gate() {
3262        let yaml = r###"
3263steps:
3264  - script: |
3265      echo "##vso[task.setvariable variable=FOO]bar"
3266    displayName: Set variable
3267"###;
3268        let graph = parse(yaml);
3269        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3270        assert_eq!(steps.len(), 1);
3271        assert_eq!(
3272            steps[0].metadata.get(META_WRITES_ENV_GATE),
3273            Some(&"true".to_string()),
3274            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
3275        );
3276    }
3277
3278    #[test]
3279    fn environment_key_tags_job_with_env_approval() {
3280        // String form: `environment: production`
3281        let yaml_string_form = r#"
3282jobs:
3283  - deployment: DeployWeb
3284    environment: production
3285    steps:
3286      - script: echo deploying
3287        displayName: Deploy
3288"#;
3289        let g1 = parse(yaml_string_form);
3290        let tagged: Vec<_> = g1
3291            .nodes_of_kind(NodeKind::Step)
3292            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
3293            .collect();
3294        assert!(
3295            !tagged.is_empty(),
3296            "string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
3297        );
3298
3299        // Mapping form: `environment: { name: staging }`
3300        let yaml_mapping_form = r#"
3301jobs:
3302  - deployment: DeployAPI
3303    environment:
3304      name: staging
3305      resourceType: VirtualMachine
3306    steps:
3307      - script: echo deploying
3308        displayName: Deploy
3309"#;
3310        let g2 = parse(yaml_mapping_form);
3311        let tagged2: Vec<_> = g2
3312            .nodes_of_kind(NodeKind::Step)
3313            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
3314            .collect();
3315        assert!(
3316            !tagged2.is_empty(),
3317            "mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
3318        );
3319
3320        // Negative: a job with no `environment:` must not be tagged
3321        let yaml_no_env = r#"
3322jobs:
3323  - job: Build
3324    steps:
3325      - script: echo building
3326"#;
3327        let g3 = parse(yaml_no_env);
3328        let any_tagged = g3
3329            .nodes_of_kind(NodeKind::Step)
3330            .any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
3331        assert!(
3332            !any_tagged,
3333            "jobs without `environment:` must not carry META_ENV_APPROVAL"
3334        );
3335    }
3336
3337    #[test]
3338    fn root_parameter_conditional_template_fragment_does_not_crash_and_marks_partial() {
3339        // Real-world repro: an ADO template fragment whose root content is wrapped
3340        // in a parameter conditional (`- ${{ if eq(parameters.X, true) }}:`) followed
3341        // by a list of jobs. This is valid when `template:`-included from a parent
3342        // pipeline, but parsing it standalone fails with "did not find expected key".
3343        // The parser must now return a Partial graph instead of a fatal error.
3344        let yaml = r#"
3345parameters:
3346  msabs_ws2022: false
3347
3348- ${{ if eq(parameters.msabs_ws2022, true) }}:
3349  - job: packer_ws2022
3350    displayName: Build WS2022 Gold Image
3351    steps:
3352      - task: PackerTool@0
3353"#;
3354        let parser = AdoParser;
3355        let source = PipelineSource {
3356            file: "fragment.yml".into(),
3357            repo: None,
3358            git_ref: None,
3359            commit_sha: None,
3360        };
3361        let result = parser.parse(yaml, &source);
3362        let graph = result.expect("template fragment must not crash the parser");
3363        assert!(
3364            matches!(graph.completeness, AuthorityCompleteness::Partial),
3365            "template-fragment graph must be marked Partial"
3366        );
3367        let saw_fragment_gap = graph
3368            .completeness_gaps
3369            .iter()
3370            .any(|g| g.contains("template fragment") && g.contains("parent pipeline"));
3371        assert!(
3372            saw_fragment_gap,
3373            "completeness_gaps must mention the template-fragment reason, got: {:?}",
3374            graph.completeness_gaps
3375        );
3376        // A template fragment's root structure depends on the parent pipeline
3377        // — this is a Structural break, not a missing expression value.
3378        assert_eq!(
3379            graph.completeness_gap_kinds.len(),
3380            1,
3381            "template-fragment graph should record exactly one gap kind"
3382        );
3383        assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
3384    }
3385
3386    #[test]
3387    fn environment_tag_isolated_to_gated_job_only() {
3388        // Two jobs side by side: only the deployment job has environment.
3389        // Steps from the non-gated job must NOT be tagged.
3390        let yaml = r#"
3391jobs:
3392  - job: Build
3393    steps:
3394      - script: echo build
3395        displayName: build-step
3396  - deployment: DeployProd
3397    environment: production
3398    steps:
3399      - script: echo deploy
3400        displayName: deploy-step
3401"#;
3402        let g = parse(yaml);
3403        let build_step = g
3404            .nodes_of_kind(NodeKind::Step)
3405            .find(|s| s.name == "build-step")
3406            .expect("build-step must exist");
3407        let deploy_step = g
3408            .nodes_of_kind(NodeKind::Step)
3409            .find(|s| s.name == "deploy-step")
3410            .expect("deploy-step must exist");
3411        assert!(
3412            !build_step.metadata.contains_key(META_ENV_APPROVAL),
3413            "non-gated job's step must not be tagged"
3414        );
3415        assert_eq!(
3416            deploy_step.metadata.get(META_ENV_APPROVAL),
3417            Some(&"true".to_string()),
3418            "gated deployment job's step must be tagged"
3419        );
3420    }
3421
3422    // ── resources.repositories[] capture ──────────────────────
3423
3424    fn repos_meta(graph: &AuthorityGraph) -> Vec<serde_json::Value> {
3425        let raw = graph
3426            .metadata
3427            .get(META_REPOSITORIES)
3428            .expect("META_REPOSITORIES must be set");
3429        serde_json::from_str(raw).expect("META_REPOSITORIES must be valid JSON")
3430    }
3431
3432    #[test]
3433    fn resources_repositories_captured_with_used_flag_when_referenced_by_extends() {
3434        let yaml = r#"
3435resources:
3436  repositories:
3437    - repository: shared-templates
3438      type: git
3439      name: Platform/shared-templates
3440      ref: refs/heads/main
3441
3442extends:
3443  template: pipeline.yml@shared-templates
3444"#;
3445        let graph = parse(yaml);
3446        let entries = repos_meta(&graph);
3447        assert_eq!(entries.len(), 1);
3448        let e = &entries[0];
3449        assert_eq!(e["alias"], "shared-templates");
3450        assert_eq!(e["repo_type"], "git");
3451        assert_eq!(e["name"], "Platform/shared-templates");
3452        assert_eq!(e["ref"], "refs/heads/main");
3453        assert_eq!(e["used"], true);
3454    }
3455
3456    #[test]
3457    fn resources_repositories_used_via_checkout_alias() {
3458        // Mirrors the msigeurope-adf-finance-reporting corpus shape.
3459        let yaml = r#"
3460resources:
3461  repositories:
3462    - repository: adf_publish
3463      type: git
3464      name: org/adf-finance-reporting
3465      ref: refs/heads/adf_publish
3466
3467jobs:
3468  - job: deploy
3469    steps:
3470      - checkout: adf_publish
3471"#;
3472        let graph = parse(yaml);
3473        let entries = repos_meta(&graph);
3474        assert_eq!(entries.len(), 1);
3475        assert_eq!(entries[0]["alias"], "adf_publish");
3476        assert_eq!(entries[0]["used"], true);
3477    }
3478
3479    #[test]
3480    fn resources_repositories_unreferenced_alias_is_marked_not_used() {
3481        // Declared but no `template: x@alias`, no `checkout: alias`, no extends.
3482        let yaml = r#"
3483resources:
3484  repositories:
3485    - repository: orphan-templates
3486      type: git
3487      name: Platform/orphan
3488      ref: main
3489
3490jobs:
3491  - job: build
3492    steps:
3493      - script: echo hi
3494"#;
3495        let graph = parse(yaml);
3496        let entries = repos_meta(&graph);
3497        assert_eq!(entries.len(), 1);
3498        assert_eq!(entries[0]["alias"], "orphan-templates");
3499        assert_eq!(entries[0]["used"], false);
3500    }
3501
3502    #[test]
3503    fn resources_repositories_absent_when_no_resources_block() {
3504        let yaml = r#"
3505jobs:
3506  - job: build
3507    steps:
3508      - script: echo hi
3509"#;
3510        let graph = parse(yaml);
3511        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
3512    }
3513
3514    #[test]
3515    fn parse_template_alias_extracts_segment_after_at() {
3516        assert_eq!(
3517            parse_template_alias("steps/deploy.yml@templates"),
3518            Some("templates".to_string())
3519        );
3520        assert_eq!(parse_template_alias("local/path.yml"), None);
3521        assert_eq!(parse_template_alias("path@"), None);
3522    }
3523
3524    #[test]
3525    fn parameters_as_map_form_parses_as_named_parameters() {
3526        // Real-world repro from Azure/aks-engine, PowerShell/PowerShell, dotnet/maui:
3527        // legacy template fragments declare `parameters:` as a mapping of
3528        // name → default-value rather than the modern typed sequence form.
3529        // Both shapes must parse; the map form yields parameters with names
3530        // but no type/values allowlist (so they default to "string" downstream).
3531        let yaml = r#"
3532parameters:
3533  name: ''
3534  k8sRelease: ''
3535  apimodel: 'examples/e2e-tests/kubernetes/release/default/definition.json'
3536  createVNET: false
3537
3538jobs:
3539  - job: build
3540    steps:
3541      - script: echo $(name)
3542"#;
3543        let graph = parse(yaml);
3544        // Parse must succeed and capture the four parameter names.
3545        assert!(graph.parameters.contains_key("name"));
3546        assert!(graph.parameters.contains_key("k8sRelease"));
3547        assert!(graph.parameters.contains_key("apimodel"));
3548        assert!(graph.parameters.contains_key("createVNET"));
3549        assert_eq!(graph.parameters.len(), 4);
3550    }
3551
3552    #[test]
3553    fn parameters_as_typed_sequence_form_still_parses() {
3554        // Make sure the modern form still works after the polymorphic
3555        // deserializer change.
3556        let yaml = r#"
3557parameters:
3558  - name: env
3559    type: string
3560    default: prod
3561    values:
3562      - prod
3563      - staging
3564  - name: skipTests
3565    type: boolean
3566    default: false
3567
3568jobs:
3569  - job: build
3570    steps:
3571      - script: echo hi
3572"#;
3573        let graph = parse(yaml);
3574        let env_param = graph.parameters.get("env").expect("env captured");
3575        assert_eq!(env_param.param_type, "string");
3576        assert!(env_param.has_values_allowlist);
3577        let skip_param = graph
3578            .parameters
3579            .get("skipTests")
3580            .expect("skipTests captured");
3581        assert_eq!(skip_param.param_type, "boolean");
3582        assert!(!skip_param.has_values_allowlist);
3583    }
3584
3585    #[test]
3586    fn resources_as_legacy_sequence_form_parses_to_empty_resources() {
3587        // Real-world repro from Azure/azure-cli, Chinachu/Mirakurun: pre-2019
3588        // ADO syntax allows `resources:` as a list of `- repo: self` entries,
3589        // not the modern `resources: { repositories: [...] }` mapping. Modern
3590        // ADO still tolerates the legacy form. We must accept both shapes
3591        // without crashing the parse.
3592        let yaml = r#"
3593resources:
3594- repo: self
3595
3596trigger:
3597  - main
3598
3599jobs:
3600  - job: build
3601    steps:
3602      - script: echo hi
3603"#;
3604        let graph = parse(yaml);
3605        // No external repositories declared (legacy form has none) — so the
3606        // META_REPOSITORIES metadata key is absent.
3607        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
3608        // But the job still parses.
3609        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
3610        assert_eq!(steps.len(), 1);
3611    }
3612
3613    #[test]
3614    fn stages_as_template_expression_marks_partial_expression_gap() {
3615        // Real-world repro from dotnet/diagnostics templatePublic.yml:
3616        // `stages: ${{ parameters.stages }}` resolves at runtime. The static
3617        // parser cannot enumerate stages from a template expression. Accept
3618        // the file without crashing, but expose the under-modelled authority
3619        // carrier as a typed Partial-Expression gap.
3620        let yaml = r#"
3621parameters:
3622  - name: stages
3623    type: stageList
3624
3625stages: ${{ parameters.stages }}
3626"#;
3627        let graph = parse(yaml);
3628        // Graph must exist (no crash).
3629        assert!(graph.parameters.contains_key("stages"));
3630        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
3631        assert!(
3632            graph.completeness_gap_kinds.contains(&GapKind::Expression),
3633            "dynamic stages carrier must be an Expression gap, got: {:?}",
3634            graph.completeness_gap_kinds
3635        );
3636        assert!(
3637            graph
3638                .completeness_gaps
3639                .iter()
3640                .any(|g| g.contains("top-level `stages:`") && g.contains("template expression")),
3641            "gap must identify the dynamic stages carrier, got: {:?}",
3642            graph.completeness_gaps
3643        );
3644    }
3645
3646    #[test]
3647    fn jobs_as_template_expression_marks_partial_expression_gap() {
3648        let yaml = r#"
3649parameters:
3650  - name: jobs
3651    type: jobList
3652
3653jobs: ${{ parameters.jobs }}
3654"#;
3655        let graph = parse(yaml);
3656        assert!(graph.parameters.contains_key("jobs"));
3657        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
3658        assert!(
3659            graph.completeness_gap_kinds.contains(&GapKind::Expression),
3660            "dynamic jobs carrier must be an Expression gap, got: {:?}",
3661            graph.completeness_gap_kinds
3662        );
3663        assert!(
3664            graph
3665                .completeness_gaps
3666                .iter()
3667                .any(|g| g.contains("top-level `jobs:`") && g.contains("template expression")),
3668            "gap must identify the dynamic jobs carrier, got: {:?}",
3669            graph.completeness_gaps
3670        );
3671    }
3672
3673    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
3674
3675    #[test]
3676    fn jobs_carrier_without_steps_marks_partial() {
3677        // ADO `jobs:` carrier present but each job has no `steps:` and no
3678        // `template:`. process_steps([]) adds nothing. Result: 0 Step nodes
3679        // despite a non-empty job carrier — must mark Partial so a CI gate
3680        // doesn't treat completeness=complete + 0 findings as "passed".
3681        let yaml = r#"
3682jobs:
3683  - job: build
3684    pool:
3685      vmImage: ubuntu-latest
3686"#;
3687        let graph = parse(yaml);
3688        let step_count = graph
3689            .nodes
3690            .iter()
3691            .filter(|n| n.kind == NodeKind::Step)
3692            .count();
3693        assert_eq!(step_count, 0);
3694        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
3695        assert!(
3696            graph
3697                .completeness_gaps
3698                .iter()
3699                .any(|g| g.contains("0 step nodes")),
3700            "completeness_gaps must mention 0 step nodes: {:?}",
3701            graph.completeness_gaps
3702        );
3703        // A jobs/steps carrier that yields zero step nodes is a Structural
3704        // break — the authority chain stops mid-graph rather than hiding a
3705        // value behind an expression.
3706        assert!(
3707            graph.completeness_gap_kinds.contains(&GapKind::Structural),
3708            "0-step-nodes gap must be Structural, got: {:?}",
3709            graph.completeness_gap_kinds
3710        );
3711    }
3712
3713    #[test]
3714    fn jobs_carrier_with_empty_jobs_list_does_not_mark_partial() {
3715        // Defensive: an empty `jobs:` list is NOT a carrier — there is no
3716        // job content to be confused about. Stays Complete.
3717        let yaml = r#"
3718jobs: []
3719"#;
3720        let graph = parse(yaml);
3721        let zero_step_gap = graph
3722            .completeness_gaps
3723            .iter()
3724            .any(|g| g.contains("0 step nodes"));
3725        assert!(
3726            !zero_step_gap,
3727            "empty jobs: list is not a carrier; got: {:?}",
3728            graph.completeness_gaps
3729        );
3730    }
3731
3732    // ── Bug regression: pr: none not suppressing PR-specific rules ──────────
3733
3734    #[test]
3735    fn pr_none_does_not_set_meta_trigger() {
3736        // `pr: none` is an explicit opt-out. Parser must require a mapping or
3737        // sequence for a real PR trigger; scalars are all opt-outs.
3738        let yaml = r#"
3739schedules:
3740  - cron: "0 5 * * 1"
3741pr: none
3742trigger: none
3743steps:
3744  - script: echo hello
3745"#;
3746        let graph = parse(yaml);
3747        assert!(
3748            !graph.metadata.contains_key(META_TRIGGER),
3749            "pr: none must not set META_TRIGGER; got: {:?}",
3750            graph.metadata.get(META_TRIGGER)
3751        );
3752    }
3753
3754    #[test]
3755    fn pr_tilde_does_not_set_meta_trigger() {
3756        // `pr: ~` is YAML null written as tilde — also an opt-out.
3757        let yaml = "pr: ~\nsteps:\n  - script: echo hello\n";
3758        let graph = parse(yaml);
3759        assert!(
3760            !graph.metadata.contains_key(META_TRIGGER),
3761            "pr: ~ must not set META_TRIGGER; got: {:?}",
3762            graph.metadata.get(META_TRIGGER)
3763        );
3764    }
3765
3766    #[test]
3767    fn pr_false_does_not_set_meta_trigger() {
3768        // `pr: false` — boolean false means disabled.
3769        let yaml = "pr: false\nsteps:\n  - script: echo hello\n";
3770        let graph = parse(yaml);
3771        assert!(
3772            !graph.metadata.contains_key(META_TRIGGER),
3773            "pr: false must not set META_TRIGGER; got: {:?}",
3774            graph.metadata.get(META_TRIGGER)
3775        );
3776    }
3777
3778    #[test]
3779    fn pr_sequence_sets_meta_trigger() {
3780        // Shorthand sequence form: `pr:\n  - main` is also a real PR trigger.
3781        let yaml = "pr:\n  - main\nsteps:\n  - script: echo hello\n";
3782        let graph = parse(yaml);
3783        assert_eq!(
3784            graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
3785            Some("pr"),
3786            "pr: [main] must set META_TRIGGER=pr"
3787        );
3788    }
3789
3790    #[test]
3791    fn pr_with_branches_sets_meta_trigger() {
3792        // Positive guard: a real PR trigger mapping must still set META_TRIGGER.
3793        let yaml = r#"
3794pr:
3795  branches:
3796    include:
3797      - main
3798steps:
3799  - script: echo hello
3800"#;
3801        let graph = parse(yaml);
3802        assert_eq!(
3803            graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
3804            Some("pr"),
3805            "real pr: block must set META_TRIGGER=pr"
3806        );
3807    }
3808
3809    // ── Bug regression: permissions: contents: none parsed as empty string ──
3810    // E2E test: parser → rule — the only test that catches the full chain.
3811
3812    #[test]
3813    fn over_privileged_identity_does_not_fire_when_permissions_contents_none() {
3814        // Full chain: ADO parser + over_privileged_identity rule.
3815        // Previously the parser ignored `permissions:`, leaving the token at
3816        // broad scope and firing the rule on every restricted pipeline.
3817        use taudit_core::rules::over_privileged_identity;
3818        let yaml = r#"
3819trigger: none
3820permissions:
3821  contents: none
3822steps:
3823  - script: echo hello
3824"#;
3825        let graph = parse(yaml);
3826        let findings = over_privileged_identity(&graph);
3827        let token_findings: Vec<_> = findings
3828            .iter()
3829            .filter(|f| {
3830                f.nodes_involved.iter().any(|&id| {
3831                    graph
3832                        .node(id)
3833                        .map(|n| n.name == "System.AccessToken")
3834                        .unwrap_or(false)
3835                })
3836            })
3837            .collect();
3838        assert!(
3839            token_findings.is_empty(),
3840            "over_privileged_identity must not fire on System.AccessToken when \
3841             permissions: contents: none is set; got: {token_findings:#?}"
3842        );
3843    }
3844
3845    #[test]
3846    fn pipeline_level_permissions_none_constrains_token() {
3847        // `permissions: contents: none` at pipeline level must downgrade
3848        // System.AccessToken from broad → constrained so over_privileged_identity
3849        // does not fire on an already-locked-down pipeline.
3850        let yaml = r#"
3851trigger: none
3852permissions:
3853  contents: none
3854steps:
3855  - script: echo hello
3856"#;
3857        let graph = parse(yaml);
3858        let token = graph
3859            .nodes_of_kind(NodeKind::Identity)
3860            .find(|n| n.name == "System.AccessToken")
3861            .expect("System.AccessToken must always be present");
3862        assert_eq!(
3863            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3864            Some("constrained"),
3865            "permissions: contents: none must constrain the token; got: {:?}",
3866            token.metadata.get(META_IDENTITY_SCOPE)
3867        );
3868    }
3869
3870    #[test]
3871    fn pipeline_level_permissions_write_keeps_token_broad() {
3872        // A pipeline with write permissions must keep System.AccessToken broad.
3873        let yaml = r#"
3874trigger: none
3875permissions:
3876  contents: write
3877steps:
3878  - script: echo hello
3879"#;
3880        let graph = parse(yaml);
3881        let token = graph
3882            .nodes_of_kind(NodeKind::Identity)
3883            .find(|n| n.name == "System.AccessToken")
3884            .expect("System.AccessToken must always be present");
3885        assert_eq!(
3886            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3887            Some("broad"),
3888            "permissions: contents: write must keep the token broad; got: {:?}",
3889            token.metadata.get(META_IDENTITY_SCOPE)
3890        );
3891    }
3892
3893    #[test]
3894    fn pipeline_level_permissions_read_scalar_constrains_token() {
3895        // `permissions: read` (scalar, not a map) must also downgrade the token.
3896        // Previously the scalar branch treated "read" as broad (incorrect).
3897        let yaml = "trigger: none\npermissions: read\nsteps:\n  - script: echo hello\n";
3898        let graph = parse(yaml);
3899        let token = graph
3900            .nodes_of_kind(NodeKind::Identity)
3901            .find(|n| n.name == "System.AccessToken")
3902            .expect("System.AccessToken must always be present");
3903        assert_eq!(
3904            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3905            Some("constrained"),
3906            "permissions: read must constrain the token; got: {:?}",
3907            token.metadata.get(META_IDENTITY_SCOPE)
3908        );
3909    }
3910
3911    #[test]
3912    fn pipeline_level_permissions_write_scalar_keeps_token_broad() {
3913        // `permissions: write` (scalar) keeps the token broad.
3914        let yaml = "trigger: none\npermissions: write\nsteps:\n  - script: echo hello\n";
3915        let graph = parse(yaml);
3916        let token = graph
3917            .nodes_of_kind(NodeKind::Identity)
3918            .find(|n| n.name == "System.AccessToken")
3919            .expect("System.AccessToken must always be present");
3920        assert_eq!(
3921            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3922            Some("broad"),
3923            "permissions: write scalar must keep token broad; got: {:?}",
3924            token.metadata.get(META_IDENTITY_SCOPE)
3925        );
3926    }
3927
3928    #[test]
3929    fn pipeline_level_permissions_contents_read_constrains_token() {
3930        // Map form with contents: read — should constrain.
3931        let yaml =
3932            "trigger: none\npermissions:\n  contents: read\nsteps:\n  - script: echo hello\n";
3933        let graph = parse(yaml);
3934        let token = graph
3935            .nodes_of_kind(NodeKind::Identity)
3936            .find(|n| n.name == "System.AccessToken")
3937            .expect("System.AccessToken must always be present");
3938        assert_eq!(
3939            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
3940            Some("constrained"),
3941            "permissions: contents: read must constrain; got: {:?}",
3942            token.metadata.get(META_IDENTITY_SCOPE)
3943        );
3944    }
3945
3946    #[test]
3947    fn empty_pipeline_does_not_mark_partial_for_zero_steps() {
3948        // No top-level stages/jobs/steps at all — there's no carrier, so the
3949        // 0-step-nodes guard must NOT fire. A genuinely empty pipeline stays
3950        // Complete.
3951        let yaml = r#"
3952trigger:
3953  - main
3954"#;
3955        let graph = parse(yaml);
3956        let zero_step_gap = graph
3957            .completeness_gaps
3958            .iter()
3959            .any(|g| g.contains("0 step nodes"));
3960        assert!(
3961            !zero_step_gap,
3962            "no carrier means no 0-step gap reason; got: {:?}",
3963            graph.completeness_gaps
3964        );
3965    }
3966
3967    /// regression: ADO HashMap iteration must be deterministic across runs.
3968    ///
3969    /// Before the fix, `step.env` and `step.inputs` (both `HashMap`s populated
3970    /// by serde_yaml) were iterated in HashMap-random order at four call sites
3971    /// in `taudit-parse-ado`. That randomness leaked into `NodeId` allocation
3972    /// (Secret/Identity nodes get IDs in the order they're added) and edge
3973    /// append order, which then leaked into `pipeline_identity_material_hash`
3974    /// and silently broke baseline suppression — same YAML, different hash on
3975    /// each run.
3976    ///
3977    /// Fixture uses non-alphabetic-insertion-order keys (`Z_VAR/A_VAR/M_VAR/...`)
3978    /// so the pre-fix HashMap bucket ordering is overwhelmingly unlikely to
3979    /// align with the now-enforced sorted iteration. We parse the same YAML
3980    /// nine times in sequence and assert that
3981    /// `compute_pipeline_identity_material_hash` is byte-identical across all
3982    /// runs. Mirrors `taudit-report-json`'s
3983    /// `json_output_is_byte_deterministic_across_runs` test pattern.
3984    #[test]
3985    fn ado_hashmap_iteration_is_deterministic_across_runs() {
3986        // Multiple `$(VAR)` references in both `env:` and task `inputs:` so
3987        // every secret-creating HashMap-iteration site in the parser is
3988        // exercised. Names chosen so HashMap hash bucket order has near-zero
3989        // chance of accidentally aligning with the enforced sorted order.
3990        let yaml = r#"
3991trigger:
3992  - main
3993
3994pool:
3995  vmImage: ubuntu-latest
3996
3997steps:
3998  - task: AzureCLI@2
3999    displayName: Deploy
4000    inputs:
4001      azureSubscription: $(SUB_CONN)
4002      scriptType: bash
4003      inlineScript: |
4004        echo $(MIDDLE_INPUT_VAR)
4005        echo $(ALPHA_INPUT_VAR)
4006        echo $(ZULU_INPUT_VAR)
4007    env:
4008      Z_VAR: $(Z_SECRET)
4009      A_VAR: $(A_SECRET)
4010      M_VAR: $(M_SECRET)
4011      Q_VAR: $(Q_SECRET)
4012      B_VAR: $(B_SECRET)
4013"#;
4014
4015        // Capture the structural shape of the graph that the bug report
4016        // identified as drifting: NodeId allocation order (id, kind, name,
4017        // trust_zone) and edge append order ((from, to, kind)). We
4018        // intentionally exclude `node.metadata` from the comparison — that
4019        // map's serialisation is a separate concern handled by the JSON sink
4020        // (see `taudit-report-json::json_output_is_byte_deterministic_across_runs`).
4021        fn structural_fingerprint(graph: &taudit_core::graph::AuthorityGraph) -> String {
4022            let mut out = String::new();
4023            for n in &graph.nodes {
4024                out.push_str(&format!(
4025                    "N {} {:?} {} {:?}\n",
4026                    n.id, n.kind, n.name, n.trust_zone
4027                ));
4028            }
4029            for e in &graph.edges {
4030                out.push_str(&format!("E {} {} {:?}\n", e.from, e.to, e.kind));
4031            }
4032            out
4033        }
4034
4035        let mut hashes: Vec<String> = Vec::with_capacity(9);
4036        let mut fingerprints: Vec<String> = Vec::with_capacity(9);
4037        for _ in 0..9 {
4038            let graph = parse(yaml);
4039            hashes.push(taudit_core::baselines::compute_pipeline_identity_material_hash(&graph));
4040            fingerprints.push(structural_fingerprint(&graph));
4041        }
4042
4043        let first_hash = &hashes[0];
4044        for (i, h) in hashes.iter().enumerate().skip(1) {
4045            assert_eq!(
4046                first_hash, h,
4047                "run 0 and run {i} produced different pipeline_identity_material_hash \
4048                 — ADO parser HashMap iteration is non-deterministic"
4049            );
4050        }
4051
4052        let first_fp = &fingerprints[0];
4053        for (i, fp) in fingerprints.iter().enumerate().skip(1) {
4054            assert_eq!(
4055                first_fp, fp,
4056                "run 0 and run {i} produced different graph node-id / edge ordering \
4057                 — ADO parser HashMap iteration is non-deterministic"
4058            );
4059        }
4060    }
4061
4062    // ── condition: / dependsOn: modelling (RC blocker A) ─────────────────────
4063    //
4064    // The ADO parser previously ignored stage / job / step `condition:` and
4065    // `dependsOn:` keys entirely, which made `apply_compensating_controls`
4066    // unable to credit conditional runtime gates and caused
4067    // `trigger_context_mismatch`-class rules to fire at full severity on
4068    // jobs the runtime would never execute on a PR build (deep audit
4069    // 02-ado-parser.md, finding 10).
4070
4071    #[test]
4072    fn step_condition_marks_partial_with_expression_gap() {
4073        let yaml = r#"
4074steps:
4075  - script: deploy.sh
4076    displayName: Deploy
4077    condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
4078"#;
4079        let graph = parse(yaml);
4080        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
4081        assert!(
4082            graph.completeness_gap_kinds.contains(&GapKind::Expression),
4083            "step condition must produce an Expression gap, got: {:?}",
4084            graph.completeness_gap_kinds
4085        );
4086        // Reason text must cite the conditional so an operator can grep
4087        // findings against the source pipeline's `condition:` clauses.
4088        assert!(
4089            graph.completeness_gaps.iter().any(|g| g.contains("step")
4090                && g.contains("Deploy")
4091                && g.contains("eq(variables['Build.SourceBranch']")),
4092            "gap reason must name scope, step, and condition: {:?}",
4093            graph.completeness_gaps
4094        );
4095    }
4096
4097    #[test]
4098    fn job_condition_propagates_to_step_metadata() {
4099        let yaml = r#"
4100jobs:
4101  - job: DeployProd
4102    condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
4103    steps:
4104      - script: deploy.sh
4105        displayName: Run deploy
4106"#;
4107        let graph = parse(yaml);
4108        let step = graph
4109            .nodes_of_kind(NodeKind::Step)
4110            .find(|n| n.name == "Run deploy")
4111            .expect("step node must exist");
4112        // No step-level condition was declared, so META_CONDITION carries
4113        // ONLY the job-level expression — verbatim, no ` AND ` joiner.
4114        assert_eq!(
4115            step.metadata.get(META_CONDITION),
4116            Some(&"eq(variables['Build.SourceBranch'], 'refs/heads/main')".to_string()),
4117            "job-level condition must propagate to step META_CONDITION"
4118        );
4119        // Job-level condition also marks the graph Partial-Expression so
4120        // downstream consumers know the runtime gate is opaque.
4121        assert!(graph.completeness_gap_kinds.contains(&GapKind::Expression));
4122    }
4123
4124    #[test]
4125    fn stacked_conditions_join_with_and() {
4126        let yaml = r#"
4127stages:
4128  - stage: Deploy
4129    condition: succeeded()
4130    jobs:
4131      - job: Prod
4132        condition: eq(variables['env'], 'prod')
4133        steps:
4134          - script: deploy.sh
4135            displayName: Deploy step
4136            condition: ne(variables['Build.Reason'], 'PullRequest')
4137"#;
4138        let graph = parse(yaml);
4139        let step = graph
4140            .nodes_of_kind(NodeKind::Step)
4141            .find(|n| n.name == "Deploy step")
4142            .expect("step node must exist");
4143        let chain = step
4144            .metadata
4145            .get(META_CONDITION)
4146            .expect("step must carry META_CONDITION");
4147        // Stage → Job → Step joined with ` AND ` in declaration order.
4148        assert_eq!(
4149            chain,
4150            "succeeded() AND eq(variables['env'], 'prod') AND ne(variables['Build.Reason'], 'PullRequest')",
4151            "stacked conditions must AND-join in stage→job→step order"
4152        );
4153        // Each scope's condition contributed a separate gap reason.
4154        let expression_gap_count = graph
4155            .completeness_gap_kinds
4156            .iter()
4157            .filter(|k| **k == GapKind::Expression)
4158            .count();
4159        assert!(
4160            expression_gap_count >= 3,
4161            "stage + job + step conditions must each mark Partial-Expression, got {expression_gap_count}"
4162        );
4163    }
4164
4165    #[test]
4166    fn depends_on_string_form_parses() {
4167        let yaml = r#"
4168jobs:
4169  - job: Build
4170    steps:
4171      - script: build.sh
4172  - job: Deploy
4173    dependsOn: Build
4174    steps:
4175      - script: deploy.sh
4176        displayName: Deploy
4177"#;
4178        let graph = parse(yaml);
4179        let step = graph
4180            .nodes_of_kind(NodeKind::Step)
4181            .find(|n| n.name == "Deploy")
4182            .expect("Deploy step must exist");
4183        assert_eq!(
4184            step.metadata.get(META_DEPENDS_ON),
4185            Some(&"Build".to_string()),
4186            "single-string dependsOn must stamp the predecessor name verbatim"
4187        );
4188    }
4189
4190    #[test]
4191    fn depends_on_sequence_form_parses() {
4192        let yaml = r#"
4193jobs:
4194  - job: A
4195    steps: [{ script: a.sh }]
4196  - job: B
4197    steps: [{ script: b.sh }]
4198  - job: C
4199    steps: [{ script: c.sh }]
4200  - job: Final
4201    dependsOn:
4202      - A
4203      - B
4204      - C
4205    steps:
4206      - script: final.sh
4207        displayName: Final step
4208"#;
4209        let graph = parse(yaml);
4210        let step = graph
4211            .nodes_of_kind(NodeKind::Step)
4212            .find(|n| n.name == "Final step")
4213            .expect("Final step must exist");
4214        assert_eq!(
4215            step.metadata.get(META_DEPENDS_ON),
4216            Some(&"A,B,C".to_string()),
4217            "sequence-form dependsOn must comma-join predecessors in declaration order"
4218        );
4219    }
4220
4221    #[test]
4222    fn step_depends_on_mapping_marks_partial_expression() {
4223        let yaml = "steps:\n  - script: echo hi\n    displayName: Mixed depends\n    dependsOn:\n      \"${{ if eq(parameters.extra, true) }}\":\n        - Prep\n";
4224        let graph = parse(yaml);
4225        let step = graph
4226            .nodes_of_kind(NodeKind::Step)
4227            .find(|n| n.name == "Mixed depends")
4228            .expect("step exists");
4229        assert!(
4230            !step.metadata.contains_key(META_DEPENDS_ON),
4231            "unresolved mapping dependsOn must not stamp META_DEPENDS_ON"
4232        );
4233        assert!(
4234            graph.completeness_gap_kinds.contains(&GapKind::Expression),
4235            "mapping dependsOn must mark Partial-Expression"
4236        );
4237        assert!(
4238            graph.completeness_gaps.iter().any(|g| g.contains("step")
4239                && g.contains("Mixed depends")
4240                && g.contains("dependsOn")),
4241            "gap reason must name scope, step, and dependsOn"
4242        );
4243    }
4244
4245    #[test]
4246    fn stage_depends_on_mapping_does_not_fake_inherited_dependency() {
4247        let yaml = "stages:\n  - stage: Build\n    jobs:\n      - job: BuildJob\n        steps:\n          - script: echo build\n  - stage: Deploy\n    dependsOn:\n      \"${{ if eq(parameters.release, true) }}\":\n        - Build\n    jobs:\n      - job: DeployJob\n        steps:\n          - script: echo deploy\n            displayName: Deploy step\n";
4248        let graph = parse(yaml);
4249        let step = graph
4250            .nodes_of_kind(NodeKind::Step)
4251            .find(|n| n.name == "Deploy step")
4252            .expect("deploy step exists");
4253        assert!(
4254            !step.metadata.contains_key(META_DEPENDS_ON),
4255            "unresolved stage dependsOn must not flow into child step metadata"
4256        );
4257        assert!(
4258            graph
4259                .completeness_gaps
4260                .iter()
4261                .any(|g| g.contains("stage") && g.contains("Deploy") && g.contains("dependsOn")),
4262            "gap reason must cite stage-level dependsOn expression"
4263        );
4264    }
4265
4266    #[test]
4267    fn conditional_step_finding_is_downgraded_via_compensating_control() {
4268        // Untrusted task step (TrustZone::Untrusted) with access to a
4269        // pipeline secret would normally fire `untrusted_with_authority`
4270        // at Critical. With a `condition:` gate present on the job, the
4271        // Suppression-5 ADO conditional-gate CC must downgrade to High,
4272        // record the original severity, and credit the gate as a CC.
4273        let yaml = r#"
4274variables:
4275  - name: DEPLOY_KEY
4276    value: $(MySecret)
4277    isSecret: true
4278jobs:
4279  - job: ProdDeploy
4280    condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
4281    steps:
4282      - task: AzureCLI@2
4283        displayName: Deploy to prod
4284        inputs:
4285          azureSubscription: ProdConnection
4286          scriptType: bash
4287          inlineScript: |
4288            echo "$(DEPLOY_KEY)" > /tmp/key
4289            az login --service-principal -u $SP -p $(DEPLOY_KEY)
4290"#;
4291        let graph = parse(yaml);
4292        let mut findings =
4293            taudit_core::rules::run_all_rules(&graph, taudit_core::propagation::DEFAULT_MAX_HOPS);
4294        // Find the Critical finding the rule would have emitted absent the
4295        // compensating-control pass — note `run_all_rules` already applies
4296        // the CC pass, so post-pass severity is what we read here.
4297        let f = findings
4298            .iter_mut()
4299            .find(|f| {
4300                f.category == taudit_core::finding::FindingCategory::UntrustedWithAuthority
4301                    && f.message.contains("DEPLOY_KEY")
4302            })
4303            .expect(
4304                "untrusted_with_authority must fire on the AzureCLI@2 step accessing DEPLOY_KEY",
4305            );
4306        assert_eq!(
4307            f.severity,
4308            taudit_core::finding::Severity::High,
4309            "Critical must be downgraded one tier to High by the ADO conditional-gate CC"
4310        );
4311        assert_eq!(
4312            f.extras.original_severity,
4313            Some(taudit_core::finding::Severity::Critical),
4314            "original_severity must record Critical so the audit trail survives"
4315        );
4316        assert!(
4317            f.extras
4318                .compensating_controls
4319                .iter()
4320                .any(|c| c.starts_with("ADO conditional gate")),
4321            "compensating_controls must include the ADO conditional-gate entry, got: {:?}",
4322            f.extras.compensating_controls
4323        );
4324    }
4325
4326    #[test]
4327    fn variable_groups_are_scoped_to_their_stage_or_job() {
4328        let yaml = r#"
4329stages:
4330  - stage: UsesGroup
4331    variables:
4332      - group: OpaqueGroup
4333    jobs:
4334      - job: A
4335        steps:
4336          - script: echo $(OPAQUE_VALUE)
4337  - stage: NoGroup
4338    jobs:
4339      - job: B
4340        steps:
4341          - script: echo $(STAGE_TWO_SECRET)
4342"#;
4343        let graph = parse(yaml);
4344        assert!(
4345            graph
4346                .nodes_of_kind(NodeKind::Secret)
4347                .any(|n| n.name == "STAGE_TWO_SECRET"),
4348            "variable group in first stage must not suppress secret refs in unrelated stages"
4349        );
4350    }
4351
4352    #[test]
4353    fn plain_variables_are_scoped_to_their_stage_or_job() {
4354        let yaml = r#"
4355stages:
4356  - stage: PlainStage
4357    variables:
4358      - name: SHARED_NAME
4359        value: plain
4360    jobs:
4361      - job: A
4362        steps:
4363          - script: echo $(SHARED_NAME)
4364  - stage: SecretRefStage
4365    jobs:
4366      - job: B
4367        steps:
4368          - script: echo $(SHARED_NAME)
4369"#;
4370        let graph = parse(yaml);
4371        assert!(
4372            graph
4373                .nodes_of_kind(NodeKind::Secret)
4374                .any(|n| n.name == "SHARED_NAME"),
4375            "plain variable in one stage must not suppress same-name secret refs in another stage"
4376        );
4377    }
4378
4379    #[test]
4380    fn parser_context_stamps_only_safe_metadata() {
4381        let yaml = "steps:\n  - script: echo hi\n";
4382        let parser = AdoParser;
4383        let source = PipelineSource {
4384            file: "ctx.yml".to_string(),
4385            repo: None,
4386            git_ref: None,
4387            commit_sha: None,
4388        };
4389        let ctx = AdoParserContext {
4390            org: Some("org-a".to_string()),
4391            project: Some("project-a".to_string()),
4392            pat: Some("very-secret-pat".to_string()),
4393        };
4394
4395        let graph = parser
4396            .parse_with_context(yaml, &source, Some(&ctx))
4397            .expect("parse succeeds");
4398
4399        assert_eq!(graph.metadata.get("ado_org"), Some(&"org-a".to_string()));
4400        assert_eq!(
4401            graph.metadata.get("ado_project"),
4402            Some(&"project-a".to_string())
4403        );
4404        assert_eq!(
4405            graph.metadata.get("ado_pat_present"),
4406            Some(&"true".to_string())
4407        );
4408        assert_eq!(
4409            graph.metadata.get("ado_variable_group_enrichment_ready"),
4410            Some(&"true".to_string())
4411        );
4412        assert!(
4413            !graph
4414                .metadata
4415                .values()
4416                .any(|v| v.contains("very-secret-pat")),
4417            "PAT must never be persisted into graph metadata"
4418        );
4419    }
4420
4421    #[test]
4422    fn parser_context_absent_preserves_existing_metadata_shape() {
4423        let yaml = "steps:\n  - script: echo hi\n";
4424        let graph = parse(yaml);
4425
4426        assert!(!graph.metadata.contains_key("ado_org"));
4427        assert!(!graph.metadata.contains_key("ado_project"));
4428        assert!(!graph.metadata.contains_key("ado_pat_present"));
4429        assert!(!graph
4430            .metadata
4431            .contains_key("ado_variable_group_enrichment_ready"));
4432    }
4433
4434    #[test]
4435    fn escaped_ado_variable_refs_are_not_secret_refs() {
4436        let yaml = r###"
4437steps:
4438  - script: |
4439      echo $$(NOT_A_SECRET)
4440      echo "##vso[task.setvariable variable=Count]$$(NOT_A_SECRET)"
4441    displayName: Escaped
4442"###;
4443        let graph = parse(yaml);
4444        assert!(
4445            !graph
4446                .nodes_of_kind(NodeKind::Secret)
4447                .any(|n| n.name == "NOT_A_SECRET"),
4448            "$$(VAR) is an escaped literal and must not create a Secret node"
4449        );
4450        let step = graph
4451            .nodes_of_kind(NodeKind::Step)
4452            .find(|n| n.name == "Escaped")
4453            .expect("step exists");
4454        assert!(
4455            !step
4456                .metadata
4457                .contains_key(META_ENV_GATE_WRITES_SECRET_VALUE),
4458            "escaped setvariable value must not be treated as secret-derived"
4459        );
4460    }
4461
4462    #[test]
4463    fn terraform_var_flag_detection_ignores_var_file() {
4464        let yaml = r#"
4465steps:
4466  - script: terraform apply -var-file=$(TFVARS_FILE)
4467    displayName: Var file
4468  - script: terraform apply -var "password=$(TF_PASSWORD)"
4469    displayName: Var value
4470"#;
4471        let graph = parse(yaml);
4472        let tfvars = graph
4473            .nodes_of_kind(NodeKind::Secret)
4474            .find(|n| n.name == "TFVARS_FILE")
4475            .expect("TFVARS_FILE secret exists");
4476        assert!(
4477            !tfvars.metadata.contains_key(META_CLI_FLAG_EXPOSED),
4478            "-var-file path should not be classified as an exposed -var value"
4479        );
4480        let password = graph
4481            .nodes_of_kind(NodeKind::Secret)
4482            .find(|n| n.name == "TF_PASSWORD")
4483            .expect("TF_PASSWORD secret exists");
4484        assert_eq!(
4485            password
4486                .metadata
4487                .get(META_CLI_FLAG_EXPOSED)
4488                .map(String::as_str),
4489            Some("true"),
4490            "-var key=$(SECRET) should still be marked as command-line exposed"
4491        );
4492    }
4493
4494    #[test]
4495    fn task_input_lookup_is_case_insensitive() {
4496        let yaml = r#"
4497steps:
4498  - task: TerraformTaskV4@4
4499    displayName: Terraform
4500    inputs:
4501      Command: apply
4502      CommandOptions: -auto-approve
4503  - task: AzureCLI@2
4504    displayName: SPN
4505    inputs:
4506      AddSpnToEnvironment: TRUE
4507      InLineScRiPt: echo hi
4508"#;
4509        let graph = parse(yaml);
4510        let terraform = graph
4511            .nodes_of_kind(NodeKind::Step)
4512            .find(|n| n.name == "Terraform")
4513            .expect("terraform step");
4514        assert_eq!(
4515            terraform
4516                .metadata
4517                .get(META_TERRAFORM_AUTO_APPROVE)
4518                .map(String::as_str),
4519            Some("true")
4520        );
4521        let spn = graph
4522            .nodes_of_kind(NodeKind::Step)
4523            .find(|n| n.name == "SPN")
4524            .expect("spn step");
4525        assert_eq!(
4526            spn.metadata.get(META_ADD_SPN_TO_ENV).map(String::as_str),
4527            Some("true")
4528        );
4529        assert_eq!(
4530            spn.metadata.get(META_SCRIPT_BODY).map(String::as_str),
4531            Some("echo hi"),
4532            "mixed-case inline script input key should be detected"
4533        );
4534    }
4535}
taudit_parse_ado/lib.rs

taudit_parse_ado/
lib.rs