Skip to main content

taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Regex-free check: does `s` contain `terraform apply` followed by
9/// `-auto-approve` or `--auto-approve` (anywhere on the same line, or on a
10/// nearby line when the previous line ends in a shell continuation `\` /
11/// PowerShell continuation `` ` ``)?
12///
13/// Case-sensitive on purpose — Terraform's CLI is case-sensitive and these
14/// tokens never appear capitalised in real-world pipelines.
15fn script_does_terraform_auto_apply(s: &str) -> bool {
16    let lines: Vec<&str> = s.lines().collect();
17    for (i, raw_line) in lines.iter().enumerate() {
18        // Strip trailing comment.
19        let line = raw_line.split('#').next().unwrap_or("");
20        if !(line.contains("terraform apply") || line.contains("terraform\tapply")) {
21            continue;
22        }
23        if line.contains("auto-approve") {
24            return true;
25        }
26        // Continuation: peek a few lines forward for the flag.
27        let mut continuing = line.trim_end().ends_with('\\') || line.trim_end().ends_with('`');
28        let mut j = i + 1;
29        while continuing && j < lines.len() && j < i + 4 {
30            let next = lines[j].split('#').next().unwrap_or("");
31            if next.contains("auto-approve") {
32                return true;
33            }
34            continuing = next.trim_end().ends_with('\\') || next.trim_end().ends_with('`');
35            j += 1;
36        }
37    }
38    false
39}
40
41/// Azure DevOps YAML pipeline parser.
42pub struct AdoParser;
43
44impl PipelineParser for AdoParser {
45    fn platform(&self) -> &str {
46        "azure-devops"
47    }
48
49    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
50        let mut de = serde_yaml::Deserializer::from_str(content);
51        let doc = de
52            .next()
53            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
54        let pipeline: AdoPipeline = match AdoPipeline::deserialize(doc) {
55            Ok(p) => p,
56            Err(e) => {
57                // Real-world ADO template fragments often wrap their root content in
58                // a parameter conditional like `- ${{ if eq(parameters.X, true) }}:`
59                // followed by a list of jobs. That is not a standard YAML mapping at
60                // the root, so serde_yaml fails with a "did not find expected key"
61                // error. These files are intended to be `template:`-included from a
62                // parent pipeline; analyzing them in isolation is not meaningful.
63                // Return a near-empty graph marked Partial instead of crashing the scan.
64                let msg = e.to_string();
65                let looks_like_template_fragment = (msg.contains("did not find expected key")
66                    || (msg.contains("parameters")
67                        && msg.contains("invalid type: map")
68                        && msg.contains("expected a sequence")))
69                    && has_root_parameter_conditional(content);
70                if looks_like_template_fragment {
71                    let mut graph = AuthorityGraph::new(source.clone());
72                    graph.mark_partial(
73                        "ADO template fragment with top-level parameter conditional — root structure depends on parent pipeline context".to_string(),
74                    );
75                    return Ok(graph);
76                }
77                return Err(TauditError::Parse(format!("YAML parse error: {e}")));
78            }
79        };
80        let extra_docs = de.next().is_some();
81
82        let mut graph = AuthorityGraph::new(source.clone());
83        if extra_docs {
84            graph.mark_partial(
85                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
86            );
87        }
88
89        // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
90        let has_pr_trigger = pipeline.pr.is_some();
91        if has_pr_trigger {
92            graph.metadata.insert(META_TRIGGER.into(), "pr".into());
93        }
94
95        // Capture resources.repositories[] declarations and detect aliases that
96        // are actually referenced by an `extends:`, `template: x@alias`, or
97        // `checkout: alias`. The result is JSON-encoded into graph metadata
98        // for the `template_extends_unpinned_branch` rule to consume.
99        process_repositories(&pipeline, content, &mut graph);
100
101        // Capture top-level `parameters:` declarations (used by
102        // parameter_interpolation_into_shell). ADO defaults missing `type:`
103        // to string, so a missing/empty type is treated as a string.
104        if let Some(ref params) = pipeline.parameters {
105            for p in params {
106                let name = match p.name.as_ref() {
107                    Some(n) if !n.is_empty() => n.clone(),
108                    _ => continue,
109                };
110                let param_type = p.param_type.clone().unwrap_or_default();
111                let has_values_allowlist =
112                    p.values.as_ref().map(|v| !v.is_empty()).unwrap_or(false);
113                graph.parameters.insert(
114                    name,
115                    ParamSpec {
116                        param_type,
117                        has_values_allowlist,
118                    },
119                );
120            }
121        }
122
123        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
124
125        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
126        // Tagged implicit: ADO injects this token into every task by platform design;
127        // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
128        let mut meta = HashMap::new();
129        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
130        meta.insert(META_IMPLICIT.into(), "true".into());
131        let token_id = graph.add_node_with_metadata(
132            NodeKind::Identity,
133            "System.AccessToken",
134            TrustZone::FirstParty,
135            meta,
136        );
137
138        // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
139        process_pool(&pipeline.pool, &mut graph);
140
141        // Pipeline-level variable groups and named secrets.
142        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
143        // don't generate false-positive Secret nodes for plain config values.
144        let mut plain_vars: HashSet<String> = HashSet::new();
145        let pipeline_secret_ids = process_variables(
146            &pipeline.variables,
147            &mut graph,
148            &mut secret_ids,
149            "pipeline",
150            &mut plain_vars,
151        );
152
153        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
154        if let Some(ref stages) = pipeline.stages {
155            for stage in stages {
156                // Stage-level template reference — delegate and mark Partial
157                if let Some(ref tpl) = stage.template {
158                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
159                    add_template_delegation(stage_name, tpl, token_id, None, &mut graph);
160                    continue;
161                }
162
163                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
164                let stage_secret_ids = process_variables(
165                    &stage.variables,
166                    &mut graph,
167                    &mut secret_ids,
168                    &stage_name,
169                    &mut plain_vars,
170                );
171
172                for job in &stage.jobs {
173                    let job_name = job.effective_name();
174                    let job_secret_ids = process_variables(
175                        &job.variables,
176                        &mut graph,
177                        &mut secret_ids,
178                        &job_name,
179                        &mut plain_vars,
180                    );
181
182                    process_pool(&job.pool, &mut graph);
183
184                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
185                        .iter()
186                        .chain(&stage_secret_ids)
187                        .chain(&job_secret_ids)
188                        .copied()
189                        .collect();
190
191                    let steps_start = graph.nodes.len();
192
193                    let job_steps = job.all_steps();
194                    process_steps(
195                        &job_steps,
196                        &job_name,
197                        token_id,
198                        &all_secrets,
199                        &plain_vars,
200                        &mut graph,
201                        &mut secret_ids,
202                    );
203
204                    if let Some(ref tpl) = job.template {
205                        add_template_delegation(
206                            &job_name,
207                            tpl,
208                            token_id,
209                            Some(&job_name),
210                            &mut graph,
211                        );
212                    }
213
214                    if job.has_environment_binding() {
215                        tag_job_steps_env_approval(&mut graph, steps_start);
216                    }
217                }
218            }
219        } else if let Some(ref jobs) = pipeline.jobs {
220            for job in jobs {
221                let job_name = job.effective_name();
222                let job_secret_ids = process_variables(
223                    &job.variables,
224                    &mut graph,
225                    &mut secret_ids,
226                    &job_name,
227                    &mut plain_vars,
228                );
229
230                process_pool(&job.pool, &mut graph);
231
232                let all_secrets: Vec<NodeId> = pipeline_secret_ids
233                    .iter()
234                    .chain(&job_secret_ids)
235                    .copied()
236                    .collect();
237
238                let steps_start = graph.nodes.len();
239
240                let job_steps = job.all_steps();
241                process_steps(
242                    &job_steps,
243                    &job_name,
244                    token_id,
245                    &all_secrets,
246                    &plain_vars,
247                    &mut graph,
248                    &mut secret_ids,
249                );
250
251                if let Some(ref tpl) = job.template {
252                    add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
253                }
254
255                if job.has_environment_binding() {
256                    tag_job_steps_env_approval(&mut graph, steps_start);
257                }
258            }
259        } else if let Some(ref steps) = pipeline.steps {
260            process_steps(
261                steps,
262                "pipeline",
263                token_id,
264                &pipeline_secret_ids,
265                &plain_vars,
266                &mut graph,
267                &mut secret_ids,
268            );
269        }
270
271        Ok(graph)
272    }
273}
274
275/// Process an ADO `pool:` block. ADO pools come in two shapes:
276///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
277///   - `pool: { name: my-pool }` (named pool — self-hosted)
278///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
279///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
280///
281/// Creates an Image node representing the agent environment. Self-hosted pools
282/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
283fn process_pool(pool: &Option<serde_yaml::Value>, graph: &mut AuthorityGraph) {
284    let Some(pool_val) = pool else {
285        return;
286    };
287
288    let (image_name, is_self_hosted) = match pool_val {
289        serde_yaml::Value::String(s) => (s.clone(), true),
290        serde_yaml::Value::Mapping(map) => {
291            let name = map.get("name").and_then(|v| v.as_str());
292            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
293            match (name, vm_image) {
294                (_, Some(vm)) => (vm.to_string(), false),
295                (Some(n), None) => (n.to_string(), true),
296                (None, None) => return,
297            }
298        }
299        _ => return,
300    };
301
302    let mut meta = HashMap::new();
303    if is_self_hosted {
304        meta.insert(META_SELF_HOSTED.into(), "true".into());
305    }
306    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
307}
308
309/// Scan the parsed pipeline for `resources.repositories[]` declarations and
310/// determine which aliases are referenced inside the same file. Stores the
311/// result as a JSON-encoded array in `graph.metadata[META_REPOSITORIES]`.
312///
313/// Usage signal — an alias is "used" when it appears in any of:
314///   - `template: <path>@<alias>` (anywhere — top-level extends, stage, job, step)
315///   - `extends:` referencing `template: <path>@<alias>`
316///   - `checkout: <alias>` (steps consume an external repo into the workspace)
317///
318/// The `extends:` and per-step `template:` references are resolved by walking
319/// the parsed Value tree; the raw text is only used for the `checkout:` case
320/// (cheap substring scan, robust to YAML shape variation).
321fn process_repositories(pipeline: &AdoPipeline, raw_content: &str, graph: &mut AuthorityGraph) {
322    let resources = match pipeline.resources.as_ref() {
323        Some(r) if !r.repositories.is_empty() => r,
324        _ => return,
325    };
326
327    // Collect all aliases referenced as `template: x@alias`. We walk every
328    // `template:` field appearing in the parsed pipeline (extends and steps
329    // already deserialize to their own paths; stages/jobs use the per-job
330    // template field). The raw YAML walk via serde_yaml::Value covers all
331    // shapes uniformly without re-deriving structure-specific models.
332    let mut used_aliases: HashSet<String> = HashSet::new();
333
334    if let Some(ref ext) = pipeline.extends {
335        collect_template_alias_refs(ext, &mut used_aliases);
336    }
337    if let Ok(value) = serde_yaml::from_str::<serde_yaml::Value>(raw_content) {
338        collect_template_alias_refs(&value, &mut used_aliases);
339        collect_checkout_alias_refs(&value, &mut used_aliases);
340    }
341
342    // Build the JSON-encoded repository descriptor list.
343    let mut entries: Vec<serde_json::Value> = Vec::with_capacity(resources.repositories.len());
344    for repo in &resources.repositories {
345        let used = used_aliases.contains(&repo.repository);
346        let mut obj = serde_json::Map::new();
347        obj.insert(
348            "alias".into(),
349            serde_json::Value::String(repo.repository.clone()),
350        );
351        if let Some(ref t) = repo.repo_type {
352            obj.insert("repo_type".into(), serde_json::Value::String(t.clone()));
353        }
354        if let Some(ref n) = repo.name {
355            obj.insert("name".into(), serde_json::Value::String(n.clone()));
356        }
357        if let Some(ref r) = repo.git_ref {
358            obj.insert("ref".into(), serde_json::Value::String(r.clone()));
359        }
360        obj.insert("used".into(), serde_json::Value::Bool(used));
361        entries.push(serde_json::Value::Object(obj));
362    }
363
364    if let Ok(json) = serde_json::to_string(&serde_json::Value::Array(entries)) {
365        graph.metadata.insert(META_REPOSITORIES.into(), json);
366    }
367}
368
369/// Walk a YAML value and record every `template: <ref>@<alias>` alias seen.
370/// Recurses into mappings and sequences so it catches references in extends,
371/// stages, jobs, steps, and conditional blocks indiscriminately.
372fn collect_template_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
373    match value {
374        serde_yaml::Value::Mapping(map) => {
375            for (k, v) in map {
376                if k.as_str() == Some("template") {
377                    if let Some(s) = v.as_str() {
378                        if let Some(alias) = parse_template_alias(s) {
379                            sink.insert(alias);
380                        }
381                    }
382                }
383                collect_template_alias_refs(v, sink);
384            }
385        }
386        serde_yaml::Value::Sequence(seq) => {
387            for v in seq {
388                collect_template_alias_refs(v, sink);
389            }
390        }
391        _ => {}
392    }
393}
394
395/// Walk a YAML value and record every `checkout: <alias>` value seen, except
396/// `self` and `none` which are platform keywords (not external repo aliases).
397fn collect_checkout_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
398    match value {
399        serde_yaml::Value::Mapping(map) => {
400            for (k, v) in map {
401                if k.as_str() == Some("checkout") {
402                    if let Some(s) = v.as_str() {
403                        if s != "self" && s != "none" && !s.is_empty() {
404                            sink.insert(s.to_string());
405                        }
406                    }
407                }
408                collect_checkout_alias_refs(v, sink);
409            }
410        }
411        serde_yaml::Value::Sequence(seq) => {
412            for v in seq {
413                collect_checkout_alias_refs(v, sink);
414            }
415        }
416        _ => {}
417    }
418}
419
420/// Extract `<alias>` from a `template: <path>@<alias>` reference. Returns
421/// None for plain in-repo paths (`templates/deploy.yml`) which target the
422/// current pipeline's repo, not an external `resources.repositories[]` entry.
423fn parse_template_alias(template_ref: &str) -> Option<String> {
424    let at = template_ref.rfind('@')?;
425    let alias = &template_ref[at + 1..];
426    if alias.is_empty() {
427        None
428    } else {
429        Some(alias.to_string())
430    }
431}
432
433/// Tag every Step node added since `start_idx` with META_ENV_APPROVAL.
434/// Used after `process_steps` for a job whose `environment:` is configured —
435/// the environment binding indicates the job sits behind a manual approval
436/// gate, which is an isolation boundary that breaks automatic propagation.
437fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
438    for node in graph.nodes.iter_mut().skip(start_idx) {
439        if node.kind == NodeKind::Step {
440            node.metadata
441                .insert(META_ENV_APPROVAL.into(), "true".into());
442        }
443    }
444}
445
446/// Process a variable list, creating Secret nodes and returning their IDs.
447/// Returns IDs for secrets only (not variable groups, which are opaque).
448/// Populates `plain_vars` with the names of non-secret named variables so
449/// downstream `$(VAR)` scanning can skip them.
450fn process_variables(
451    variables: &Option<AdoVariables>,
452    graph: &mut AuthorityGraph,
453    cache: &mut HashMap<String, NodeId>,
454    scope: &str,
455    plain_vars: &mut HashSet<String>,
456) -> Vec<NodeId> {
457    let mut ids = Vec::new();
458
459    let vars = match variables.as_ref() {
460        Some(v) => v,
461        None => return ids,
462    };
463
464    for var in &vars.0 {
465        match var {
466            AdoVariable::Group { group } => {
467                // Skip template-expression group names like `${{ parameters.env }}`.
468                // We can't resolve them statically — mark Partial but don't create
469                // a misleading Secret node with the expression as its name.
470                if group.contains("${{") {
471                    graph.mark_partial(format!(
472                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
473                    ));
474                    continue;
475                }
476                let mut meta = HashMap::new();
477                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
478                let id = graph.add_node_with_metadata(
479                    NodeKind::Secret,
480                    group.as_str(),
481                    TrustZone::FirstParty,
482                    meta,
483                );
484                cache.insert(group.clone(), id);
485                ids.push(id);
486                graph.mark_partial(format!(
487                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
488                ));
489            }
490            AdoVariable::Named {
491                name, is_secret, ..
492            } => {
493                if *is_secret {
494                    let id = find_or_create_secret(graph, cache, name);
495                    ids.push(id);
496                } else {
497                    plain_vars.insert(name.clone());
498                }
499            }
500        }
501    }
502
503    ids
504}
505
506/// Process a list of ADO steps, adding nodes and edges to the graph.
507fn process_steps(
508    steps: &[AdoStep],
509    job_name: &str,
510    token_id: NodeId,
511    inherited_secrets: &[NodeId],
512    plain_vars: &HashSet<String>,
513    graph: &mut AuthorityGraph,
514    cache: &mut HashMap<String, NodeId>,
515) {
516    for (idx, step) in steps.iter().enumerate() {
517        // Template step — delegation, mark partial
518        if let Some(ref tpl) = step.template {
519            let step_name = step
520                .display_name
521                .as_deref()
522                .or(step.name.as_deref())
523                .map(|s| s.to_string())
524                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
525            add_template_delegation(&step_name, tpl, token_id, Some(job_name), graph);
526            continue;
527        }
528
529        // Determine step kind and trust zone
530        let (step_name, trust_zone, mut inline_script) = classify_step(step, job_name, idx);
531
532        // For task steps (where classify_step returns None), recover an inline
533        // script body from `inputs.inlineScript` / `inputs.script` — used by
534        // AzureCLI@2, AzurePowerShell@5, Bash@3, etc. Without this fallback,
535        // rules that pattern-match script content miss every typed task.
536        if inline_script.is_none() {
537            if let Some(ref inputs) = step.inputs {
538                let candidate_keys = ["inlineScript", "script", "InlineScript", "Inline"];
539                for key in candidate_keys {
540                    if let Some(v) = inputs.get(key).and_then(yaml_value_as_str) {
541                        if !v.is_empty() {
542                            inline_script = Some(v.to_string());
543                            break;
544                        }
545                    }
546                }
547            }
548        }
549
550        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
551
552        // Stamp parent job name so consumers (e.g. `taudit map --job`) can
553        // attribute steps back to their containing job.
554        if let Some(node) = graph.nodes.get_mut(step_id) {
555            node.metadata.insert(META_JOB_NAME.into(), job_name.into());
556            // Stamp the raw inline script body so script-aware rules
557            // (env-export of secrets, secret materialisation to files,
558            // Key Vault → plaintext) can pattern-match on the actual
559            // command text the agent will run.
560            if let Some(ref body) = inline_script {
561                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
562            }
563        }
564
565        // Stamp inline script body so command-line-leakage rules can inspect
566        // what the step actually executes (vm_remote_exec_via_pipeline_secret,
567        // short_lived_sas_in_command_line).
568        if let Some(ref body) = inline_script {
569            if let Some(node) = graph.nodes.get_mut(step_id) {
570                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
571            }
572        }
573
574        // Stamp the inline script body when present so rules that need to
575        // pattern-match against shell content can do so without re-parsing
576        // YAML. Bodies can be large; rules should treat META_SCRIPT_BODY as
577        // an opaque string and not assume any framing.
578        if let Some(ref body) = inline_script {
579            if let Some(node) = graph.nodes.get_mut(step_id) {
580                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
581            }
582        }
583
584        // Every step has access to System.AccessToken
585        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
586
587        // checkout step with persistCredentials: true writes the token to .git/config on disk,
588        // making it accessible to all subsequent steps and filesystem-level attackers.
589        if step.checkout.is_some() && step.persist_credentials == Some(true) {
590            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
591        }
592
593        // `checkout: self` pulls the repo being built. In a PR trigger context this
594        // is the untrusted fork head — tag the step so downstream rules can gate on
595        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
596        if let Some(ref ck) = step.checkout {
597            if ck == "self" {
598                if let Some(node) = graph.nodes.get_mut(step_id) {
599                    node.metadata
600                        .insert(META_CHECKOUT_SELF.into(), "true".into());
601                }
602            }
603        }
604
605        // Inherited pipeline/stage/job secrets
606        for &secret_id in inherited_secrets {
607            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
608        }
609
610        // Service connection detection from task inputs (case-insensitive key match)
611        if let Some(ref inputs) = step.inputs {
612            let service_conn_keys = [
613                "azuresubscription",
614                "connectedservicename",
615                "connectedservicenamearm",
616                "kubernetesserviceconnection",
617                "environmentservicename",
618                "backendservicearm",
619            ];
620            for (raw_key, val) in inputs {
621                let lower = raw_key.to_lowercase();
622                if !service_conn_keys.contains(&lower.as_str()) {
623                    continue;
624                }
625                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
626                if !conn_name.starts_with("$(") {
627                    // Stamp the connection name onto the step itself so rules
628                    // that need the name (e.g. terraform_auto_approve_in_prod)
629                    // don't have to traverse edges.
630                    if let Some(node) = graph.nodes.get_mut(step_id) {
631                        node.metadata
632                            .insert(META_SERVICE_CONNECTION_NAME.into(), conn_name.to_string());
633                    }
634
635                    let mut meta = HashMap::new();
636                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
637                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
638                    // ADO service connections are the platform's federated-identity equivalent
639                    // (modern Azure service connections use workload identity federation /
640                    // OIDC). Tag them so uplift_without_attestation treats ADO pipelines with
641                    // the same OIDC-parity logic applied to GHA.
642                    meta.insert(META_OIDC.into(), "true".into());
643                    let conn_id = graph.add_node_with_metadata(
644                        NodeKind::Identity,
645                        conn_name,
646                        TrustZone::FirstParty,
647                        meta,
648                    );
649                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
650                }
651            }
652
653            // addSpnToEnvironment: true exposes federated SPN material
654            // (idToken, servicePrincipalKey, servicePrincipalId, tenantId)
655            // to the step's inline script via env vars. Stamp the step so
656            // addspn_with_inline_script can pattern-match without traversal.
657            if let Some(val) = inputs.get("addSpnToEnvironment") {
658                let truthy = match val {
659                    serde_yaml::Value::Bool(b) => *b,
660                    serde_yaml::Value::String(s) => s.eq_ignore_ascii_case("true"),
661                    _ => false,
662                };
663                if truthy {
664                    if let Some(node) = graph.nodes.get_mut(step_id) {
665                        node.metadata
666                            .insert(META_ADD_SPN_TO_ENV.into(), "true".into());
667                    }
668                }
669            }
670
671            // TerraformCLI@N / TerraformTaskV1..V4 with command: apply +
672            // commandOptions containing auto-approve = same as inline
673            // `terraform apply --auto-approve`. Detect once here so the rule
674            // can read a single META_TERRAFORM_AUTO_APPROVE marker.
675            let task_lower = step
676                .task
677                .as_deref()
678                .map(|t| t.to_lowercase())
679                .unwrap_or_default();
680            let is_terraform_task = task_lower.starts_with("terraformcli@")
681                || task_lower.starts_with("terraformtask@")
682                || task_lower.starts_with("terraformtaskv");
683            if is_terraform_task {
684                let cmd_lower = inputs
685                    .get("command")
686                    .and_then(yaml_value_as_str)
687                    .map(|s| s.to_lowercase())
688                    .unwrap_or_default();
689                let opts = inputs
690                    .get("commandOptions")
691                    .and_then(yaml_value_as_str)
692                    .unwrap_or("");
693                if cmd_lower == "apply" && opts.contains("auto-approve") {
694                    if let Some(node) = graph.nodes.get_mut(step_id) {
695                        node.metadata
696                            .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
697                    }
698                }
699            }
700
701            // Detect $(varName) references in task input values
702            for val in inputs.values() {
703                if let Some(s) = yaml_value_as_str(val) {
704                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
705                }
706            }
707        }
708
709        // Inline-script detection of `terraform apply --auto-approve`.
710        // Done after inputs processing so we can OR the two signals into a
711        // single META_TERRAFORM_AUTO_APPROVE marker on the step.
712        if let Some(ref body) = inline_script {
713            if script_does_terraform_auto_apply(body) {
714                if let Some(node) = graph.nodes.get_mut(step_id) {
715                    node.metadata
716                        .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
717                }
718            }
719        }
720
721        // Detect $(varName) in step env values
722        if let Some(ref env) = step.env {
723            for val in env.values() {
724                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
725            }
726        }
727
728        // Detect $(varName) in inline script text
729        if let Some(ref script) = inline_script {
730            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
731        }
732
733        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines
734        if let Some(ref script) = inline_script {
735            let lower = script.to_lowercase();
736            if lower.contains("##vso[task.setvariable") {
737                if let Some(node) = graph.nodes.get_mut(step_id) {
738                    node.metadata
739                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
740                }
741            }
742        }
743    }
744}
745
746/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
747///
748/// `inline_script_text` is populated whenever the step has script content —
749/// either as a top-level `script:`/`bash:`/`powershell:`/`pwsh:` key, or as a
750/// task input (`Bash@3.inputs.script`, `PowerShell@2.inputs.script`,
751/// `AzureCLI@2.inputs.inlineScript`, `AzurePowerShell@5.inputs.Inline`, …).
752/// Task-input keys are matched case-insensitively because the ADO YAML schema
753/// is itself case-insensitive on input names.
754fn classify_step(
755    step: &AdoStep,
756    job_name: &str,
757    idx: usize,
758) -> (String, TrustZone, Option<String>) {
759    let default_name = || format!("{job_name}[{idx}]");
760
761    let name = step
762        .display_name
763        .as_deref()
764        .or(step.name.as_deref())
765        .map(|s| s.to_string())
766        .unwrap_or_else(default_name);
767
768    if step.task.is_some() {
769        // Task step — script body may live in inputs.{script,inlineScript,Inline}.
770        let inline = extract_task_inline_script(step.inputs.as_ref());
771        (name, TrustZone::Untrusted, inline)
772    } else if let Some(ref s) = step.script {
773        (name, TrustZone::FirstParty, Some(s.clone()))
774    } else if let Some(ref s) = step.bash {
775        (name, TrustZone::FirstParty, Some(s.clone()))
776    } else if let Some(ref s) = step.powershell {
777        (name, TrustZone::FirstParty, Some(s.clone()))
778    } else if let Some(ref s) = step.pwsh {
779        (name, TrustZone::FirstParty, Some(s.clone()))
780    } else {
781        (name, TrustZone::FirstParty, None)
782    }
783}
784
785/// Pull an inline script body out of a task step's `inputs:` mapping.
786/// Recognises the three common conventions:
787///   - `inputs.script` (Bash@3, PowerShell@2 — when targetType: inline)
788///   - `inputs.inlineScript` (AzureCLI@2)
789///   - `inputs.Inline` (AzurePowerShell@5 — note the capital I)
790///
791/// Match is case-insensitive so a hand-written pipeline using `Script:` or
792/// `INLINESCRIPT:` is still picked up.
793fn extract_task_inline_script(
794    inputs: Option<&HashMap<String, serde_yaml::Value>>,
795) -> Option<String> {
796    let inputs = inputs?;
797    const KEYS: &[&str] = &["script", "inlinescript", "inline"];
798    for (raw_key, val) in inputs {
799        let lower = raw_key.to_lowercase();
800        if KEYS.contains(&lower.as_str()) {
801            if let Some(s) = val.as_str() {
802                if !s.is_empty() {
803                    return Some(s.to_string());
804                }
805            }
806        }
807    }
808    None
809}
810
811/// Add a DelegatesTo edge from a synthetic step node to a template image node.
812///
813/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
814/// pull code from an external repository and are Untrusted. Plain relative paths like
815/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
816/// treats `./local-action`.
817///
818/// `job_name` is `Some` when the delegation is created inside a job's scope
819/// (job-level template, or template step inside `process_steps`); it is `None`
820/// for stage-level template delegations that don't belong to a specific job.
821fn add_template_delegation(
822    step_name: &str,
823    template_path: &str,
824    token_id: NodeId,
825    job_name: Option<&str>,
826    graph: &mut AuthorityGraph,
827) {
828    let tpl_trust_zone = if template_path.contains('@') {
829        TrustZone::Untrusted
830    } else {
831        TrustZone::FirstParty
832    };
833    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
834    if let Some(jn) = job_name {
835        if let Some(node) = graph.nodes.get_mut(step_id) {
836            node.metadata.insert(META_JOB_NAME.into(), jn.into());
837        }
838    }
839    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
840    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
841    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
842    graph.mark_partial(format!(
843        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
844    ));
845}
846
847/// Extract `$(varName)` references from a string, creating Secret nodes for
848/// non-predefined and non-plain ADO variables.
849/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
850/// is treated as a variable reference. This rejects PowerShell sub-expressions
851/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
852/// and anything with spaces or special characters.
853fn extract_dollar_paren_secrets(
854    text: &str,
855    step_id: NodeId,
856    plain_vars: &HashSet<String>,
857    graph: &mut AuthorityGraph,
858    cache: &mut HashMap<String, NodeId>,
859) {
860    let mut pos = 0;
861    let bytes = text.as_bytes();
862    while pos < bytes.len() {
863        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
864            let start = pos + 2;
865            if let Some(end_offset) = text[start..].find(')') {
866                let var_name = &text[start..start + end_offset];
867                if is_valid_ado_identifier(var_name)
868                    && !is_predefined_ado_var(var_name)
869                    && !plain_vars.contains(var_name)
870                {
871                    let id = find_or_create_secret(graph, cache, var_name);
872                    // Mark secrets embedded in -var flag arguments: their values appear in
873                    // pipeline logs (command string is logged before masking, and Terraform
874                    // itself logs -var values in plan output and debug traces).
875                    if is_in_terraform_var_flag(text, pos) {
876                        if let Some(node) = graph.nodes.get_mut(id) {
877                            node.metadata
878                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
879                        }
880                    }
881                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
882                }
883                pos = start + end_offset + 1;
884                continue;
885            }
886        }
887        pos += 1;
888    }
889}
890
891/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
892/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
893fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
894    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
895    let line_before = &text[line_start..var_pos];
896    // Must contain -var (the flag) and = (the key=value assignment)
897    line_before.contains("-var") && line_before.contains('=')
898}
899
900/// Returns true if `name` is a valid ADO variable identifier.
901/// ADO variable names start with a letter and contain only letters, digits,
902/// and underscores. Anything else — PowerShell vars (`$name`), template
903/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
904/// (`name -join ','`) — is rejected.
905fn is_valid_ado_identifier(name: &str) -> bool {
906    let mut chars = name.chars();
907    match chars.next() {
908        Some(first) if first.is_ascii_alphabetic() => {
909            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
910        }
911        _ => false,
912    }
913}
914
915/// Returns true if a variable name is a well-known ADO predefined variable.
916/// These are system-provided and never represent secrets.
917fn is_predefined_ado_var(name: &str) -> bool {
918    let prefixes = [
919        "Build.",
920        "Agent.",
921        "System.",
922        "Pipeline.",
923        "Release.",
924        "Environment.",
925        "Strategy.",
926        "Deployment.",
927        "Resources.",
928        "TF_BUILD",
929    ];
930    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
931}
932
933fn find_or_create_secret(
934    graph: &mut AuthorityGraph,
935    cache: &mut HashMap<String, NodeId>,
936    name: &str,
937) -> NodeId {
938    if let Some(&id) = cache.get(name) {
939        return id;
940    }
941    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
942    cache.insert(name.to_string(), id);
943    id
944}
945
946fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
947    val.as_str()
948}
949
950// ── Serde models for ADO YAML ─────────────────────────────
951
952/// Top-level ADO pipeline definition.
953/// ADO pipelines come in three shapes:
954///   (a) stages → jobs → steps
955///   (b) jobs → steps (no stages key)
956///   (c) steps only (no stages or jobs key)
957#[derive(Debug, Deserialize)]
958pub struct AdoPipeline {
959    #[serde(default)]
960    pub trigger: Option<serde_yaml::Value>,
961    #[serde(default)]
962    pub pr: Option<serde_yaml::Value>,
963    #[serde(default)]
964    pub variables: Option<AdoVariables>,
965    #[serde(default)]
966    pub stages: Option<Vec<AdoStage>>,
967    #[serde(default)]
968    pub jobs: Option<Vec<AdoJob>>,
969    #[serde(default)]
970    pub steps: Option<Vec<AdoStep>>,
971    #[serde(default)]
972    pub pool: Option<serde_yaml::Value>,
973    /// `resources:` block — repository declarations, container declarations,
974    /// pipeline declarations. We only consume `repositories[]` today.
975    #[serde(default)]
976    pub resources: Option<AdoResources>,
977    /// Top-level `extends:` directive — `extends: { template: x@alias, ... }`.
978    /// Captured raw so we can scan for `template: x@alias` references that
979    /// consume a `resources.repositories[]` entry.
980    #[serde(default)]
981    pub extends: Option<serde_yaml::Value>,
982    /// Top-level `parameters:` declarations. Each entry has at minimum a
983    /// `name`; `type` defaults to `string` when omitted. `values:` is an
984    /// optional allowlist that constrains caller input.
985    #[serde(default)]
986    pub parameters: Option<Vec<AdoParameter>>,
987}
988
989/// `resources:` block. Only `repositories[]` is modelled today.
990#[derive(Debug, Default, Deserialize)]
991pub struct AdoResources {
992    #[serde(default)]
993    pub repositories: Vec<AdoRepository>,
994}
995
996/// A single `resources.repositories[]` entry — declares an external repo
997/// alias the pipeline can consume via `template: x@alias`, `extends:`, or
998/// `checkout: alias`.
999#[derive(Debug, Deserialize)]
1000pub struct AdoRepository {
1001    /// The alias used by consumers (`template: file@<repository>`).
1002    pub repository: String,
1003    /// `git`, `github`, `bitbucket`, or `azureGit`.
1004    #[serde(default, rename = "type")]
1005    pub repo_type: Option<String>,
1006    /// Full repo path (e.g. `org/repo`).
1007    #[serde(default)]
1008    pub name: Option<String>,
1009    /// Optional ref. Absent = default branch (mutable). Present forms:
1010    /// `refs/tags/v1.2.3`, `refs/heads/main`, bare branch `main`, or a SHA.
1011    #[serde(default, rename = "ref")]
1012    pub git_ref: Option<String>,
1013}
1014
1015/// Pipeline / template `parameters:` entry. We deliberately ignore `default:`
1016/// — only the name, type, and `values:` allowlist matter for our rules.
1017#[derive(Debug, Deserialize)]
1018pub struct AdoParameter {
1019    #[serde(default)]
1020    pub name: Option<String>,
1021    #[serde(rename = "type", default)]
1022    pub param_type: Option<String>,
1023    #[serde(default)]
1024    pub values: Option<Vec<serde_yaml::Value>>,
1025}
1026
1027#[derive(Debug, Deserialize)]
1028pub struct AdoStage {
1029    /// Stage identifier. Absent when the stage entry is a template reference.
1030    #[serde(default)]
1031    pub stage: Option<String>,
1032    /// Stage-level template reference (`- template: path/to/stage.yml`).
1033    #[serde(default)]
1034    pub template: Option<String>,
1035    #[serde(default)]
1036    pub variables: Option<AdoVariables>,
1037    #[serde(default)]
1038    pub jobs: Vec<AdoJob>,
1039}
1040
1041#[derive(Debug, Deserialize)]
1042pub struct AdoJob {
1043    /// Regular job identifier
1044    #[serde(default)]
1045    pub job: Option<String>,
1046    /// Deployment job identifier
1047    #[serde(default)]
1048    pub deployment: Option<String>,
1049    #[serde(default)]
1050    pub variables: Option<AdoVariables>,
1051    #[serde(default)]
1052    pub steps: Option<Vec<AdoStep>>,
1053    /// Deployment-job nested strategy: runOnce/rolling/canary all share the
1054    /// shape `strategy.{runOnce,rolling,canary}.deploy.steps`. We only need
1055    /// the steps — the strategy choice itself doesn't change authority flow.
1056    #[serde(default)]
1057    pub strategy: Option<AdoStrategy>,
1058    #[serde(default)]
1059    pub pool: Option<serde_yaml::Value>,
1060    /// Job-level template reference
1061    #[serde(default)]
1062    pub template: Option<String>,
1063    /// Deployment-job environment binding. Two YAML shapes:
1064    ///
1065    ///   - `environment: production` (string shorthand)
1066    ///   - `environment: { name: staging, resourceType: VirtualMachine }` (mapping)
1067    ///
1068    /// When present, the environment may have approvals/checks attached in ADO's
1069    /// environment configuration. Approvals are a manual gate — authority cannot
1070    /// propagate past one without human intervention. We treat any `environment:`
1071    /// binding as an approval candidate and tag the job's steps so propagation
1072    /// rules can downgrade severity. (We can't see the approval config from YAML
1073    /// alone; the binding is the strongest signal available at parse time.)
1074    #[serde(default)]
1075    pub environment: Option<serde_yaml::Value>,
1076}
1077
1078impl AdoJob {
1079    pub fn effective_name(&self) -> String {
1080        self.job
1081            .as_deref()
1082            .or(self.deployment.as_deref())
1083            .unwrap_or("job")
1084            .to_string()
1085    }
1086
1087    /// Returns the effective step list for this job.
1088    ///
1089    /// Regular jobs put steps under `steps:` directly. Deployment jobs nest
1090    /// them under `strategy.{runOnce,rolling,canary}.{deploy,preDeploy,
1091    /// postDeploy,routeTraffic,onSuccess,onFailure}.steps`. We merge all
1092    /// strategy-nested step lists into a single sequence so downstream rules
1093    /// see them as part of the job. Order: regular `steps:` first, then any
1094    /// strategy-nested steps in deterministic phase order.
1095    pub fn all_steps(&self) -> Vec<AdoStep> {
1096        let mut out: Vec<AdoStep> = Vec::new();
1097        if let Some(ref s) = self.steps {
1098            out.extend(s.iter().cloned());
1099        }
1100        if let Some(ref strat) = self.strategy {
1101            for phase in strat.phases() {
1102                if let Some(ref s) = phase.steps {
1103                    out.extend(s.iter().cloned());
1104                }
1105            }
1106        }
1107        out
1108    }
1109
1110    /// Returns true when the job is bound to an `environment:` — either the
1111    /// string form (`environment: production`) or the mapping form with a
1112    /// non-empty `name:` field. An empty mapping or empty string is ignored.
1113    pub fn has_environment_binding(&self) -> bool {
1114        match self.environment.as_ref() {
1115            None => false,
1116            Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
1117            Some(serde_yaml::Value::Mapping(m)) => m
1118                .get("name")
1119                .and_then(|v| v.as_str())
1120                .map(|s| !s.trim().is_empty())
1121                .unwrap_or(false),
1122            _ => false,
1123        }
1124    }
1125}
1126
1127/// Deployment-job `strategy:` block. ADO ships three strategies — runOnce,
1128/// rolling, canary — each with multiple lifecycle phases that may carry
1129/// their own step list. We capture all of them; the AdoJob::all_steps
1130/// helper flattens them into one sequence.
1131#[derive(Debug, Default, Deserialize, Clone)]
1132pub struct AdoStrategy {
1133    #[serde(default, rename = "runOnce")]
1134    pub run_once: Option<AdoStrategyRunOnce>,
1135    #[serde(default)]
1136    pub rolling: Option<AdoStrategyRunOnce>,
1137    #[serde(default)]
1138    pub canary: Option<AdoStrategyRunOnce>,
1139}
1140
1141impl AdoStrategy {
1142    /// Iterate over every populated lifecycle phase across all strategies.
1143    pub fn phases(&self) -> Vec<&AdoStrategyPhase> {
1144        let mut out: Vec<&AdoStrategyPhase> = Vec::new();
1145        for runner in [&self.run_once, &self.rolling, &self.canary]
1146            .iter()
1147            .copied()
1148            .flatten()
1149        {
1150            for phase in [
1151                &runner.deploy,
1152                &runner.pre_deploy,
1153                &runner.post_deploy,
1154                &runner.route_traffic,
1155            ]
1156            .into_iter()
1157            .flatten()
1158            {
1159                out.push(phase);
1160            }
1161            if let Some(ref on) = runner.on {
1162                if let Some(ref s) = on.success {
1163                    out.push(s);
1164                }
1165                if let Some(ref f) = on.failure {
1166                    out.push(f);
1167                }
1168            }
1169        }
1170        out
1171    }
1172}
1173
1174/// Lifecycle phases carried by every deployment strategy. Each phase may
1175/// have its own `steps:`. Covering all six avoids silently dropping
1176/// privileged setup/teardown steps from the authority graph.
1177#[derive(Debug, Default, Deserialize, Clone)]
1178pub struct AdoStrategyRunOnce {
1179    #[serde(default)]
1180    pub deploy: Option<AdoStrategyPhase>,
1181    #[serde(default, rename = "preDeploy")]
1182    pub pre_deploy: Option<AdoStrategyPhase>,
1183    #[serde(default, rename = "postDeploy")]
1184    pub post_deploy: Option<AdoStrategyPhase>,
1185    #[serde(default, rename = "routeTraffic")]
1186    pub route_traffic: Option<AdoStrategyPhase>,
1187    #[serde(default)]
1188    pub on: Option<AdoStrategyOn>,
1189}
1190
1191#[derive(Debug, Default, Deserialize, Clone)]
1192pub struct AdoStrategyOn {
1193    #[serde(default)]
1194    pub success: Option<AdoStrategyPhase>,
1195    #[serde(default)]
1196    pub failure: Option<AdoStrategyPhase>,
1197}
1198
1199#[derive(Debug, Default, Deserialize, Clone)]
1200pub struct AdoStrategyPhase {
1201    #[serde(default)]
1202    pub steps: Option<Vec<AdoStep>>,
1203}
1204
1205#[derive(Debug, Deserialize, Clone)]
1206pub struct AdoStep {
1207    /// Task reference e.g. `AzureCLI@2`
1208    #[serde(default)]
1209    pub task: Option<String>,
1210    /// Inline script (cmd/sh)
1211    #[serde(default)]
1212    pub script: Option<String>,
1213    /// Inline bash script
1214    #[serde(default)]
1215    pub bash: Option<String>,
1216    /// Inline PowerShell script
1217    #[serde(default)]
1218    pub powershell: Option<String>,
1219    /// Cross-platform PowerShell
1220    #[serde(default)]
1221    pub pwsh: Option<String>,
1222    /// Step-level template reference
1223    #[serde(default)]
1224    pub template: Option<String>,
1225    #[serde(rename = "displayName", default)]
1226    pub display_name: Option<String>,
1227    /// Legacy name alias
1228    #[serde(default)]
1229    pub name: Option<String>,
1230    #[serde(default)]
1231    pub env: Option<HashMap<String, String>>,
1232    /// Task inputs (key → value, but values may be nested)
1233    #[serde(default)]
1234    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
1235    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
1236    #[serde(default)]
1237    pub checkout: Option<String>,
1238    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
1239    #[serde(rename = "persistCredentials", default)]
1240    pub persist_credentials: Option<bool>,
1241}
1242
1243/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
1244/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
1245#[derive(Debug, Default)]
1246pub struct AdoVariables(pub Vec<AdoVariable>);
1247
1248impl<'de> serde::Deserialize<'de> for AdoVariables {
1249    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1250    where
1251        D: serde::Deserializer<'de>,
1252    {
1253        let raw = serde_yaml::Value::deserialize(deserializer)?;
1254        let mut vars = Vec::new();
1255
1256        match raw {
1257            serde_yaml::Value::Sequence(seq) => {
1258                for item in seq {
1259                    if let Some(map) = item.as_mapping() {
1260                        if let Some(group_val) = map.get("group") {
1261                            if let Some(group) = group_val.as_str() {
1262                                vars.push(AdoVariable::Group {
1263                                    group: group.to_string(),
1264                                });
1265                                continue;
1266                            }
1267                        }
1268                        let name = map
1269                            .get("name")
1270                            .and_then(|v| v.as_str())
1271                            .unwrap_or("")
1272                            .to_string();
1273                        let value = map
1274                            .get("value")
1275                            .and_then(|v| v.as_str())
1276                            .unwrap_or("")
1277                            .to_string();
1278                        let is_secret = map
1279                            .get("isSecret")
1280                            .and_then(|v| v.as_bool())
1281                            .unwrap_or(false);
1282                        vars.push(AdoVariable::Named {
1283                            name,
1284                            value,
1285                            is_secret,
1286                        });
1287                    }
1288                }
1289            }
1290            serde_yaml::Value::Mapping(map) => {
1291                for (k, v) in map {
1292                    let name = k.as_str().unwrap_or("").to_string();
1293                    let value = v.as_str().unwrap_or("").to_string();
1294                    vars.push(AdoVariable::Named {
1295                        name,
1296                        value,
1297                        is_secret: false,
1298                    });
1299                }
1300            }
1301            _ => {}
1302        }
1303
1304        Ok(AdoVariables(vars))
1305    }
1306}
1307
1308#[derive(Debug)]
1309pub enum AdoVariable {
1310    Group {
1311        group: String,
1312    },
1313    Named {
1314        name: String,
1315        value: String,
1316        is_secret: bool,
1317    },
1318}
1319
1320/// Heuristic: does this YAML have a top-level parameter conditional wrapper
1321/// (e.g. `- ${{ if eq(parameters.X, true) }}:`) at column 0 or as the first
1322/// list item? This is the construct that breaks root-level mapping parses but
1323/// is valid in an ADO template fragment included by a parent pipeline.
1324fn has_root_parameter_conditional(content: &str) -> bool {
1325    for line in content.lines() {
1326        let trimmed = line.trim_start();
1327        // Strip an optional leading list marker so we match both
1328        // `- ${{ if ... }}:` and bare `${{ if ... }}:` forms.
1329        let candidate = trimmed.strip_prefix("- ").unwrap_or(trimmed);
1330        if candidate.starts_with("${{")
1331            && (candidate.contains("if ") || candidate.contains("if("))
1332            && candidate.trim_end().ends_with(":")
1333        {
1334            return true;
1335        }
1336    }
1337    false
1338}
1339
1340#[cfg(test)]
1341mod tests {
1342    use super::*;
1343
1344    fn parse(yaml: &str) -> AuthorityGraph {
1345        let parser = AdoParser;
1346        let source = PipelineSource {
1347            file: "azure-pipelines.yml".into(),
1348            repo: None,
1349            git_ref: None,
1350            commit_sha: None,
1351        };
1352        parser.parse(yaml, &source).unwrap()
1353    }
1354
1355    #[test]
1356    fn parses_simple_pipeline() {
1357        let yaml = r#"
1358trigger:
1359  - main
1360
1361jobs:
1362  - job: Build
1363    steps:
1364      - script: echo hello
1365        displayName: Say hello
1366"#;
1367        let graph = parse(yaml);
1368        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
1369    }
1370
1371    #[test]
1372    fn system_access_token_created() {
1373        let yaml = r#"
1374steps:
1375  - script: echo hi
1376"#;
1377        let graph = parse(yaml);
1378        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1379        assert_eq!(identities.len(), 1);
1380        assert_eq!(identities[0].name, "System.AccessToken");
1381        assert_eq!(
1382            identities[0].metadata.get(META_IDENTITY_SCOPE),
1383            Some(&"broad".to_string())
1384        );
1385    }
1386
1387    #[test]
1388    fn variable_group_creates_secret_and_marks_partial() {
1389        let yaml = r#"
1390variables:
1391  - group: MySecretGroup
1392
1393steps:
1394  - script: echo hi
1395"#;
1396        let graph = parse(yaml);
1397        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1398        assert_eq!(secrets.len(), 1);
1399        assert_eq!(secrets[0].name, "MySecretGroup");
1400        assert_eq!(
1401            secrets[0].metadata.get(META_VARIABLE_GROUP),
1402            Some(&"true".to_string())
1403        );
1404        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1405        assert!(
1406            graph
1407                .completeness_gaps
1408                .iter()
1409                .any(|g| g.contains("MySecretGroup")),
1410            "completeness gap should name the variable group"
1411        );
1412    }
1413
1414    #[test]
1415    fn task_with_azure_subscription_creates_service_connection_identity() {
1416        let yaml = r#"
1417steps:
1418  - task: AzureCLI@2
1419    displayName: Deploy to Azure
1420    inputs:
1421      azureSubscription: MyServiceConnection
1422      scriptType: bash
1423      inlineScript: az group list
1424"#;
1425        let graph = parse(yaml);
1426        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1427        // System.AccessToken + service connection
1428        assert_eq!(identities.len(), 2);
1429        let conn = identities
1430            .iter()
1431            .find(|i| i.name == "MyServiceConnection")
1432            .unwrap();
1433        assert_eq!(
1434            conn.metadata.get(META_SERVICE_CONNECTION),
1435            Some(&"true".to_string())
1436        );
1437        assert_eq!(
1438            conn.metadata.get(META_IDENTITY_SCOPE),
1439            Some(&"broad".to_string())
1440        );
1441    }
1442
1443    #[test]
1444    fn task_with_connected_service_name_creates_identity() {
1445        let yaml = r#"
1446steps:
1447  - task: SqlAzureDacpacDeployment@1
1448    inputs:
1449      ConnectedServiceNameARM: MySqlConnection
1450"#;
1451        let graph = parse(yaml);
1452        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1453        assert!(
1454            identities.iter().any(|i| i.name == "MySqlConnection"),
1455            "connectedServiceNameARM should create identity"
1456        );
1457    }
1458
1459    #[test]
1460    fn script_step_classified_as_first_party() {
1461        let yaml = r#"
1462steps:
1463  - script: echo hi
1464    displayName: Say hi
1465"#;
1466        let graph = parse(yaml);
1467        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1468        assert_eq!(steps.len(), 1);
1469        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1470    }
1471
1472    #[test]
1473    fn bash_step_classified_as_first_party() {
1474        let yaml = r#"
1475steps:
1476  - bash: echo hi
1477"#;
1478        let graph = parse(yaml);
1479        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1480        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1481    }
1482
1483    #[test]
1484    fn task_step_classified_as_untrusted() {
1485        let yaml = r#"
1486steps:
1487  - task: DotNetCoreCLI@2
1488    inputs:
1489      command: build
1490"#;
1491        let graph = parse(yaml);
1492        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1493        assert_eq!(steps.len(), 1);
1494        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
1495    }
1496
1497    #[test]
1498    fn dollar_paren_var_in_script_creates_secret() {
1499        let yaml = r#"
1500steps:
1501  - script: |
1502      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
1503    displayName: Call API
1504"#;
1505        let graph = parse(yaml);
1506        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1507        assert_eq!(secrets.len(), 1);
1508        assert_eq!(secrets[0].name, "MY_API_TOKEN");
1509    }
1510
1511    #[test]
1512    fn predefined_ado_var_not_treated_as_secret() {
1513        let yaml = r#"
1514steps:
1515  - script: |
1516      echo $(Build.BuildId)
1517      echo $(Agent.WorkFolder)
1518      echo $(System.DefaultWorkingDirectory)
1519    displayName: Print vars
1520"#;
1521        let graph = parse(yaml);
1522        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1523        assert!(
1524            secrets.is_empty(),
1525            "predefined ADO vars should not be treated as secrets, got: {:?}",
1526            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1527        );
1528    }
1529
1530    #[test]
1531    fn template_reference_creates_delegates_to_and_marks_partial() {
1532        let yaml = r#"
1533steps:
1534  - template: steps/deploy.yml
1535    parameters:
1536      env: production
1537"#;
1538        let graph = parse(yaml);
1539        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1540        assert_eq!(steps.len(), 1);
1541
1542        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1543        assert_eq!(images.len(), 1);
1544        assert_eq!(images[0].name, "steps/deploy.yml");
1545
1546        let delegates: Vec<_> = graph
1547            .edges_from(steps[0].id)
1548            .filter(|e| e.kind == EdgeKind::DelegatesTo)
1549            .collect();
1550        assert_eq!(delegates.len(), 1);
1551
1552        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1553    }
1554
1555    #[test]
1556    fn top_level_steps_no_jobs() {
1557        let yaml = r#"
1558steps:
1559  - script: echo a
1560  - script: echo b
1561"#;
1562        let graph = parse(yaml);
1563        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1564        assert_eq!(steps.len(), 2);
1565    }
1566
1567    #[test]
1568    fn top_level_jobs_no_stages() {
1569        let yaml = r#"
1570jobs:
1571  - job: JobA
1572    steps:
1573      - script: echo a
1574  - job: JobB
1575    steps:
1576      - script: echo b
1577"#;
1578        let graph = parse(yaml);
1579        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1580        assert_eq!(steps.len(), 2);
1581    }
1582
1583    #[test]
1584    fn stages_with_nested_jobs_parsed() {
1585        let yaml = r#"
1586stages:
1587  - stage: Build
1588    jobs:
1589      - job: Compile
1590        steps:
1591          - script: cargo build
1592  - stage: Test
1593    jobs:
1594      - job: UnitTest
1595        steps:
1596          - script: cargo test
1597"#;
1598        let graph = parse(yaml);
1599        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1600        assert_eq!(steps.len(), 2);
1601    }
1602
1603    #[test]
1604    fn all_steps_linked_to_system_access_token() {
1605        let yaml = r#"
1606steps:
1607  - script: echo a
1608  - task: SomeTask@1
1609    inputs: {}
1610"#;
1611        let graph = parse(yaml);
1612        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1613        assert_eq!(token.len(), 1);
1614        let token_id = token[0].id;
1615
1616        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1617        for step in &steps {
1618            let links: Vec<_> = graph
1619                .edges_from(step.id)
1620                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
1621                .collect();
1622            assert_eq!(
1623                links.len(),
1624                1,
1625                "step '{}' must link to System.AccessToken",
1626                step.name
1627            );
1628        }
1629    }
1630
1631    #[test]
1632    fn named_secret_variable_creates_secret_node() {
1633        let yaml = r#"
1634variables:
1635  - name: MY_PASSWORD
1636    value: dummy
1637    isSecret: true
1638
1639steps:
1640  - script: echo hi
1641"#;
1642        let graph = parse(yaml);
1643        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1644        assert_eq!(secrets.len(), 1);
1645        assert_eq!(secrets[0].name, "MY_PASSWORD");
1646    }
1647
1648    #[test]
1649    fn variables_as_mapping_parsed() {
1650        let yaml = r#"
1651variables:
1652  MY_VAR: hello
1653  ANOTHER_VAR: world
1654
1655steps:
1656  - script: echo hi
1657"#;
1658        let graph = parse(yaml);
1659        // Mapping-style variables without isSecret — no secret nodes created
1660        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1661        assert!(
1662            secrets.is_empty(),
1663            "plain mapping vars should not create secret nodes"
1664        );
1665    }
1666
1667    #[test]
1668    fn persist_credentials_creates_persists_to_edge() {
1669        let yaml = r#"
1670steps:
1671  - checkout: self
1672    persistCredentials: true
1673  - script: git push
1674"#;
1675        let graph = parse(yaml);
1676        let token_id = graph
1677            .nodes_of_kind(NodeKind::Identity)
1678            .find(|n| n.name == "System.AccessToken")
1679            .expect("System.AccessToken must exist")
1680            .id;
1681
1682        let persists_edges: Vec<_> = graph
1683            .edges
1684            .iter()
1685            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
1686            .collect();
1687        assert_eq!(
1688            persists_edges.len(),
1689            1,
1690            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
1691        );
1692    }
1693
1694    #[test]
1695    fn checkout_without_persist_credentials_no_persists_to_edge() {
1696        let yaml = r#"
1697steps:
1698  - checkout: self
1699  - script: echo hi
1700"#;
1701        let graph = parse(yaml);
1702        let persists_edges: Vec<_> = graph
1703            .edges
1704            .iter()
1705            .filter(|e| e.kind == EdgeKind::PersistsTo)
1706            .collect();
1707        assert!(
1708            persists_edges.is_empty(),
1709            "checkout without persistCredentials should not produce PersistsTo edge"
1710        );
1711    }
1712
1713    #[test]
1714    fn var_flag_secret_marked_as_cli_flag_exposed() {
1715        let yaml = r#"
1716steps:
1717  - script: |
1718      terraform apply \
1719        -var "db_password=$(db_password)" \
1720        -var "api_key=$(api_key)"
1721    displayName: Terraform apply
1722"#;
1723        let graph = parse(yaml);
1724        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1725        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
1726        for secret in &secrets {
1727            assert_eq!(
1728                secret.metadata.get(META_CLI_FLAG_EXPOSED),
1729                Some(&"true".to_string()),
1730                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
1731                secret.name
1732            );
1733        }
1734    }
1735
1736    #[test]
1737    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
1738        let yaml = r#"
1739steps:
1740  - script: |
1741      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
1742"#;
1743        let graph = parse(yaml);
1744        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1745        assert_eq!(secrets.len(), 1);
1746        assert!(
1747            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
1748            "non -var secret should not be marked as cli_flag_exposed"
1749        );
1750    }
1751
1752    #[test]
1753    fn step_linked_to_variable_group_secret() {
1754        let yaml = r#"
1755variables:
1756  - group: ProdSecrets
1757
1758steps:
1759  - script: deploy.sh
1760"#;
1761        let graph = parse(yaml);
1762        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1763        assert_eq!(secrets.len(), 1);
1764        let secret_id = secrets[0].id;
1765
1766        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1767        let links: Vec<_> = graph
1768            .edges_from(steps[0].id)
1769            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
1770            .collect();
1771        assert_eq!(
1772            links.len(),
1773            1,
1774            "step should be linked to variable group secret"
1775        );
1776    }
1777
1778    #[test]
1779    fn pr_trigger_sets_meta_trigger_on_graph() {
1780        let yaml = r#"
1781pr:
1782  - '*'
1783
1784steps:
1785  - script: echo hi
1786"#;
1787        let graph = parse(yaml);
1788        assert_eq!(
1789            graph.metadata.get(META_TRIGGER),
1790            Some(&"pr".to_string()),
1791            "ADO pr: trigger should set graph META_TRIGGER"
1792        );
1793    }
1794
1795    #[test]
1796    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
1797        let yaml = r#"
1798pool:
1799  name: my-self-hosted-pool
1800
1801steps:
1802  - script: echo hi
1803"#;
1804        let graph = parse(yaml);
1805        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1806        assert_eq!(images.len(), 1);
1807        assert_eq!(images[0].name, "my-self-hosted-pool");
1808        assert_eq!(
1809            images[0].metadata.get(META_SELF_HOSTED),
1810            Some(&"true".to_string()),
1811            "pool.name without vmImage must be tagged self-hosted"
1812        );
1813    }
1814
1815    #[test]
1816    fn vm_image_pool_is_not_tagged_self_hosted() {
1817        let yaml = r#"
1818pool:
1819  vmImage: ubuntu-latest
1820
1821steps:
1822  - script: echo hi
1823"#;
1824        let graph = parse(yaml);
1825        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1826        assert_eq!(images.len(), 1);
1827        assert_eq!(images[0].name, "ubuntu-latest");
1828        assert!(
1829            !images[0].metadata.contains_key(META_SELF_HOSTED),
1830            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
1831        );
1832    }
1833
1834    #[test]
1835    fn checkout_self_step_tagged_with_meta_checkout_self() {
1836        let yaml = r#"
1837steps:
1838  - checkout: self
1839  - script: echo hi
1840"#;
1841        let graph = parse(yaml);
1842        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1843        assert_eq!(steps.len(), 2);
1844        let checkout_step = steps
1845            .iter()
1846            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
1847            .expect("one step must be tagged META_CHECKOUT_SELF");
1848        assert_eq!(
1849            checkout_step.metadata.get(META_CHECKOUT_SELF),
1850            Some(&"true".to_string())
1851        );
1852    }
1853
1854    #[test]
1855    fn vso_setvariable_sets_meta_writes_env_gate() {
1856        let yaml = r###"
1857steps:
1858  - script: |
1859      echo "##vso[task.setvariable variable=FOO]bar"
1860    displayName: Set variable
1861"###;
1862        let graph = parse(yaml);
1863        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1864        assert_eq!(steps.len(), 1);
1865        assert_eq!(
1866            steps[0].metadata.get(META_WRITES_ENV_GATE),
1867            Some(&"true".to_string()),
1868            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
1869        );
1870    }
1871
1872    #[test]
1873    fn environment_key_tags_job_with_env_approval() {
1874        // String form: `environment: production`
1875        let yaml_string_form = r#"
1876jobs:
1877  - deployment: DeployWeb
1878    environment: production
1879    steps:
1880      - script: echo deploying
1881        displayName: Deploy
1882"#;
1883        let g1 = parse(yaml_string_form);
1884        let tagged: Vec<_> = g1
1885            .nodes_of_kind(NodeKind::Step)
1886            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
1887            .collect();
1888        assert!(
1889            !tagged.is_empty(),
1890            "string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
1891        );
1892
1893        // Mapping form: `environment: { name: staging }`
1894        let yaml_mapping_form = r#"
1895jobs:
1896  - deployment: DeployAPI
1897    environment:
1898      name: staging
1899      resourceType: VirtualMachine
1900    steps:
1901      - script: echo deploying
1902        displayName: Deploy
1903"#;
1904        let g2 = parse(yaml_mapping_form);
1905        let tagged2: Vec<_> = g2
1906            .nodes_of_kind(NodeKind::Step)
1907            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
1908            .collect();
1909        assert!(
1910            !tagged2.is_empty(),
1911            "mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
1912        );
1913
1914        // Negative: a job with no `environment:` must not be tagged
1915        let yaml_no_env = r#"
1916jobs:
1917  - job: Build
1918    steps:
1919      - script: echo building
1920"#;
1921        let g3 = parse(yaml_no_env);
1922        let any_tagged = g3
1923            .nodes_of_kind(NodeKind::Step)
1924            .any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
1925        assert!(
1926            !any_tagged,
1927            "jobs without `environment:` must not carry META_ENV_APPROVAL"
1928        );
1929    }
1930
1931    #[test]
1932    fn root_parameter_conditional_template_fragment_does_not_crash_and_marks_partial() {
1933        // Real-world repro: an ADO template fragment whose root content is wrapped
1934        // in a parameter conditional (`- ${{ if eq(parameters.X, true) }}:`) followed
1935        // by a list of jobs. This is valid when `template:`-included from a parent
1936        // pipeline, but parsing it standalone fails with "did not find expected key".
1937        // The parser must now return a Partial graph instead of a fatal error.
1938        let yaml = r#"
1939parameters:
1940  msabs_ws2022: false
1941
1942- ${{ if eq(parameters.msabs_ws2022, true) }}:
1943  - job: packer_ws2022
1944    displayName: Build WS2022 Gold Image
1945    steps:
1946      - task: PackerTool@0
1947"#;
1948        let parser = AdoParser;
1949        let source = PipelineSource {
1950            file: "fragment.yml".into(),
1951            repo: None,
1952            git_ref: None,
1953            commit_sha: None,
1954        };
1955        let result = parser.parse(yaml, &source);
1956        let graph = result.expect("template fragment must not crash the parser");
1957        assert!(
1958            matches!(graph.completeness, AuthorityCompleteness::Partial),
1959            "template-fragment graph must be marked Partial"
1960        );
1961        let saw_fragment_gap = graph
1962            .completeness_gaps
1963            .iter()
1964            .any(|g| g.contains("template fragment") && g.contains("parent pipeline"));
1965        assert!(
1966            saw_fragment_gap,
1967            "completeness_gaps must mention the template-fragment reason, got: {:?}",
1968            graph.completeness_gaps
1969        );
1970    }
1971
1972    #[test]
1973    fn environment_tag_isolated_to_gated_job_only() {
1974        // Two jobs side by side: only the deployment job has environment.
1975        // Steps from the non-gated job must NOT be tagged.
1976        let yaml = r#"
1977jobs:
1978  - job: Build
1979    steps:
1980      - script: echo build
1981        displayName: build-step
1982  - deployment: DeployProd
1983    environment: production
1984    steps:
1985      - script: echo deploy
1986        displayName: deploy-step
1987"#;
1988        let g = parse(yaml);
1989        let build_step = g
1990            .nodes_of_kind(NodeKind::Step)
1991            .find(|s| s.name == "build-step")
1992            .expect("build-step must exist");
1993        let deploy_step = g
1994            .nodes_of_kind(NodeKind::Step)
1995            .find(|s| s.name == "deploy-step")
1996            .expect("deploy-step must exist");
1997        assert!(
1998            !build_step.metadata.contains_key(META_ENV_APPROVAL),
1999            "non-gated job's step must not be tagged"
2000        );
2001        assert_eq!(
2002            deploy_step.metadata.get(META_ENV_APPROVAL),
2003            Some(&"true".to_string()),
2004            "gated deployment job's step must be tagged"
2005        );
2006    }
2007
2008    // ── resources.repositories[] capture ──────────────────────
2009
2010    fn repos_meta(graph: &AuthorityGraph) -> Vec<serde_json::Value> {
2011        let raw = graph
2012            .metadata
2013            .get(META_REPOSITORIES)
2014            .expect("META_REPOSITORIES must be set");
2015        serde_json::from_str(raw).expect("META_REPOSITORIES must be valid JSON")
2016    }
2017
2018    #[test]
2019    fn resources_repositories_captured_with_used_flag_when_referenced_by_extends() {
2020        let yaml = r#"
2021resources:
2022  repositories:
2023    - repository: shared-templates
2024      type: git
2025      name: Platform/shared-templates
2026      ref: refs/heads/main
2027
2028extends:
2029  template: pipeline.yml@shared-templates
2030"#;
2031        let graph = parse(yaml);
2032        let entries = repos_meta(&graph);
2033        assert_eq!(entries.len(), 1);
2034        let e = &entries[0];
2035        assert_eq!(e["alias"], "shared-templates");
2036        assert_eq!(e["repo_type"], "git");
2037        assert_eq!(e["name"], "Platform/shared-templates");
2038        assert_eq!(e["ref"], "refs/heads/main");
2039        assert_eq!(e["used"], true);
2040    }
2041
2042    #[test]
2043    fn resources_repositories_used_via_checkout_alias() {
2044        // Mirrors the msigeurope-adf-finance-reporting corpus shape.
2045        let yaml = r#"
2046resources:
2047  repositories:
2048    - repository: adf_publish
2049      type: git
2050      name: org/adf-finance-reporting
2051      ref: refs/heads/adf_publish
2052
2053jobs:
2054  - job: deploy
2055    steps:
2056      - checkout: adf_publish
2057"#;
2058        let graph = parse(yaml);
2059        let entries = repos_meta(&graph);
2060        assert_eq!(entries.len(), 1);
2061        assert_eq!(entries[0]["alias"], "adf_publish");
2062        assert_eq!(entries[0]["used"], true);
2063    }
2064
2065    #[test]
2066    fn resources_repositories_unreferenced_alias_is_marked_not_used() {
2067        // Declared but no `template: x@alias`, no `checkout: alias`, no extends.
2068        let yaml = r#"
2069resources:
2070  repositories:
2071    - repository: orphan-templates
2072      type: git
2073      name: Platform/orphan
2074      ref: main
2075
2076jobs:
2077  - job: build
2078    steps:
2079      - script: echo hi
2080"#;
2081        let graph = parse(yaml);
2082        let entries = repos_meta(&graph);
2083        assert_eq!(entries.len(), 1);
2084        assert_eq!(entries[0]["alias"], "orphan-templates");
2085        assert_eq!(entries[0]["used"], false);
2086    }
2087
2088    #[test]
2089    fn resources_repositories_absent_when_no_resources_block() {
2090        let yaml = r#"
2091jobs:
2092  - job: build
2093    steps:
2094      - script: echo hi
2095"#;
2096        let graph = parse(yaml);
2097        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
2098    }
2099
2100    #[test]
2101    fn parse_template_alias_extracts_segment_after_at() {
2102        assert_eq!(
2103            parse_template_alias("steps/deploy.yml@templates"),
2104            Some("templates".to_string())
2105        );
2106        assert_eq!(parse_template_alias("local/path.yml"), None);
2107        assert_eq!(parse_template_alias("path@"), None);
2108    }
2109}