Skip to main content

taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Regex-free check: does `s` contain `terraform apply` followed by
9/// `-auto-approve` or `--auto-approve` (anywhere on the same line, or on a
10/// nearby line when the previous line ends in a shell continuation `\` /
11/// PowerShell continuation `` ` ``)?
12///
13/// Case-sensitive on purpose — Terraform's CLI is case-sensitive and these
14/// tokens never appear capitalised in real-world pipelines.
15fn script_does_terraform_auto_apply(s: &str) -> bool {
16    let lines: Vec<&str> = s.lines().collect();
17    for (i, raw_line) in lines.iter().enumerate() {
18        // Strip trailing comment.
19        let line = raw_line.split('#').next().unwrap_or("");
20        if !(line.contains("terraform apply") || line.contains("terraform\tapply")) {
21            continue;
22        }
23        if line.contains("auto-approve") {
24            return true;
25        }
26        // Continuation: peek a few lines forward for the flag.
27        let mut continuing = line.trim_end().ends_with('\\') || line.trim_end().ends_with('`');
28        let mut j = i + 1;
29        while continuing && j < lines.len() && j < i + 4 {
30            let next = lines[j].split('#').next().unwrap_or("");
31            if next.contains("auto-approve") {
32                return true;
33            }
34            continuing = next.trim_end().ends_with('\\') || next.trim_end().ends_with('`');
35            j += 1;
36        }
37    }
38    false
39}
40
41/// Azure DevOps YAML pipeline parser.
42pub struct AdoParser;
43
44impl PipelineParser for AdoParser {
45    fn platform(&self) -> &str {
46        "azure-devops"
47    }
48
49    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
50        let mut de = serde_yaml::Deserializer::from_str(content);
51        let doc = de
52            .next()
53            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
54        let pipeline: AdoPipeline = match AdoPipeline::deserialize(doc) {
55            Ok(p) => p,
56            Err(e) => {
57                // Real-world ADO template fragments often wrap their root content in
58                // a parameter conditional like `- ${{ if eq(parameters.X, true) }}:`
59                // followed by a list of jobs. That is not a standard YAML mapping at
60                // the root, so serde_yaml fails with a "did not find expected key"
61                // error. These files are intended to be `template:`-included from a
62                // parent pipeline; analyzing them in isolation is not meaningful.
63                // Return a near-empty graph marked Partial instead of crashing the scan.
64                let msg = e.to_string();
65                let looks_like_template_fragment = (msg.contains("did not find expected key")
66                    || (msg.contains("parameters")
67                        && msg.contains("invalid type: map")
68                        && msg.contains("expected a sequence")))
69                    && has_root_parameter_conditional(content);
70                if looks_like_template_fragment {
71                    let mut graph = AuthorityGraph::new(source.clone());
72                    graph
73                        .metadata
74                        .insert(META_PLATFORM.into(), "azure-devops".into());
75                    graph.mark_partial(
76                        "ADO template fragment with top-level parameter conditional — root structure depends on parent pipeline context".to_string(),
77                    );
78                    return Ok(graph);
79                }
80                return Err(TauditError::Parse(format!("YAML parse error: {e}")));
81            }
82        };
83        let extra_docs = de.next().is_some();
84
85        let mut graph = AuthorityGraph::new(source.clone());
86        graph
87            .metadata
88            .insert(META_PLATFORM.into(), "azure-devops".into());
89        if extra_docs {
90            graph.mark_partial(
91                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
92            );
93        }
94
95        // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
96        let has_pr_trigger = pipeline.pr.is_some();
97        if has_pr_trigger {
98            graph.metadata.insert(META_TRIGGER.into(), "pr".into());
99        }
100
101        // Capture resources.repositories[] declarations and detect aliases that
102        // are actually referenced by an `extends:`, `template: x@alias`, or
103        // `checkout: alias`. The result is JSON-encoded into graph metadata
104        // for the `template_extends_unpinned_branch` rule to consume.
105        process_repositories(&pipeline, content, &mut graph);
106
107        // Capture top-level `parameters:` declarations (used by
108        // parameter_interpolation_into_shell). ADO defaults missing `type:`
109        // to string, so a missing/empty type is treated as a string.
110        if let Some(ref params) = pipeline.parameters {
111            for p in params {
112                let name = match p.name.as_ref() {
113                    Some(n) if !n.is_empty() => n.clone(),
114                    _ => continue,
115                };
116                let param_type = p.param_type.clone().unwrap_or_default();
117                let has_values_allowlist =
118                    p.values.as_ref().map(|v| !v.is_empty()).unwrap_or(false);
119                graph.parameters.insert(
120                    name,
121                    ParamSpec {
122                        param_type,
123                        has_values_allowlist,
124                    },
125                );
126            }
127        }
128
129        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
130
131        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
132        // Tagged implicit: ADO injects this token into every task by platform design;
133        // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
134        let mut meta = HashMap::new();
135        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
136        meta.insert(META_IMPLICIT.into(), "true".into());
137        let token_id = graph.add_node_with_metadata(
138            NodeKind::Identity,
139            "System.AccessToken",
140            TrustZone::FirstParty,
141            meta,
142        );
143
144        // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
145        process_pool(&pipeline.pool, &mut graph);
146
147        // Pipeline-level variable groups and named secrets.
148        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
149        // don't generate false-positive Secret nodes for plain config values.
150        let mut plain_vars: HashSet<String> = HashSet::new();
151        let pipeline_secret_ids = process_variables(
152            &pipeline.variables,
153            &mut graph,
154            &mut secret_ids,
155            "pipeline",
156            &mut plain_vars,
157        );
158
159        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
160        if let Some(ref stages) = pipeline.stages {
161            for stage in stages {
162                // Stage-level template reference — delegate and mark Partial
163                if let Some(ref tpl) = stage.template {
164                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
165                    add_template_delegation(stage_name, tpl, token_id, None, &mut graph);
166                    continue;
167                }
168
169                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
170                let stage_secret_ids = process_variables(
171                    &stage.variables,
172                    &mut graph,
173                    &mut secret_ids,
174                    &stage_name,
175                    &mut plain_vars,
176                );
177
178                for job in &stage.jobs {
179                    let job_name = job.effective_name();
180                    let job_secret_ids = process_variables(
181                        &job.variables,
182                        &mut graph,
183                        &mut secret_ids,
184                        &job_name,
185                        &mut plain_vars,
186                    );
187
188                    process_pool(&job.pool, &mut graph);
189
190                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
191                        .iter()
192                        .chain(&stage_secret_ids)
193                        .chain(&job_secret_ids)
194                        .copied()
195                        .collect();
196
197                    let steps_start = graph.nodes.len();
198
199                    let job_steps = job.all_steps();
200                    process_steps(
201                        &job_steps,
202                        &job_name,
203                        token_id,
204                        &all_secrets,
205                        &plain_vars,
206                        &mut graph,
207                        &mut secret_ids,
208                    );
209
210                    if let Some(ref tpl) = job.template {
211                        add_template_delegation(
212                            &job_name,
213                            tpl,
214                            token_id,
215                            Some(&job_name),
216                            &mut graph,
217                        );
218                    }
219
220                    if job.has_environment_binding() {
221                        tag_job_steps_env_approval(&mut graph, steps_start);
222                    }
223                }
224            }
225        } else if let Some(ref jobs) = pipeline.jobs {
226            for job in jobs {
227                let job_name = job.effective_name();
228                let job_secret_ids = process_variables(
229                    &job.variables,
230                    &mut graph,
231                    &mut secret_ids,
232                    &job_name,
233                    &mut plain_vars,
234                );
235
236                process_pool(&job.pool, &mut graph);
237
238                let all_secrets: Vec<NodeId> = pipeline_secret_ids
239                    .iter()
240                    .chain(&job_secret_ids)
241                    .copied()
242                    .collect();
243
244                let steps_start = graph.nodes.len();
245
246                let job_steps = job.all_steps();
247                process_steps(
248                    &job_steps,
249                    &job_name,
250                    token_id,
251                    &all_secrets,
252                    &plain_vars,
253                    &mut graph,
254                    &mut secret_ids,
255                );
256
257                if let Some(ref tpl) = job.template {
258                    add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
259                }
260
261                if job.has_environment_binding() {
262                    tag_job_steps_env_approval(&mut graph, steps_start);
263                }
264            }
265        } else if let Some(ref steps) = pipeline.steps {
266            process_steps(
267                steps,
268                "pipeline",
269                token_id,
270                &pipeline_secret_ids,
271                &plain_vars,
272                &mut graph,
273                &mut secret_ids,
274            );
275        }
276
277        // Cross-platform misclassification trap (red-team R2 #5): a YAML file
278        // shaped like ADO at the top level (stages/jobs/steps present) but whose
279        // body uses constructs the ADO parser doesn't recognise will deserialize
280        // without errors and yield no Step nodes. Marking Partial surfaces the
281        // gap instead of returning completeness=complete on a clean-but-empty
282        // graph (which a CI gate would treat as "passed").
283        let step_count = graph
284            .nodes
285            .iter()
286            .filter(|n| n.kind == NodeKind::Step)
287            .count();
288        let had_step_carrier = pipeline.stages.as_ref().is_some_and(|s| !s.is_empty())
289            || pipeline.jobs.as_ref().is_some_and(|j| !j.is_empty())
290            || pipeline.steps.as_ref().is_some_and(|s| !s.is_empty());
291        if step_count == 0 && had_step_carrier {
292            graph.mark_partial(
293                "stages/jobs/steps parsed but produced 0 step nodes — possible non-ADO YAML wrong-platform-classified".to_string(),
294            );
295        }
296
297        Ok(graph)
298    }
299}
300
301/// Process an ADO `pool:` block. ADO pools come in two shapes:
302///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
303///   - `pool: { name: my-pool }` (named pool — self-hosted)
304///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
305///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
306///
307/// Creates an Image node representing the agent environment. Self-hosted pools
308/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
309fn process_pool(pool: &Option<serde_yaml::Value>, graph: &mut AuthorityGraph) {
310    let Some(pool_val) = pool else {
311        return;
312    };
313
314    let (image_name, is_self_hosted) = match pool_val {
315        serde_yaml::Value::String(s) => (s.clone(), true),
316        serde_yaml::Value::Mapping(map) => {
317            let name = map.get("name").and_then(|v| v.as_str());
318            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
319            match (name, vm_image) {
320                (_, Some(vm)) => (vm.to_string(), false),
321                (Some(n), None) => (n.to_string(), true),
322                (None, None) => return,
323            }
324        }
325        _ => return,
326    };
327
328    let mut meta = HashMap::new();
329    if is_self_hosted {
330        meta.insert(META_SELF_HOSTED.into(), "true".into());
331    }
332    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
333}
334
335/// Scan the parsed pipeline for `resources.repositories[]` declarations and
336/// determine which aliases are referenced inside the same file. Stores the
337/// result as a JSON-encoded array in `graph.metadata[META_REPOSITORIES]`.
338///
339/// Usage signal — an alias is "used" when it appears in any of:
340///   - `template: <path>@<alias>` (anywhere — top-level extends, stage, job, step)
341///   - `extends:` referencing `template: <path>@<alias>`
342///   - `checkout: <alias>` (steps consume an external repo into the workspace)
343///
344/// The `extends:` and per-step `template:` references are resolved by walking
345/// the parsed Value tree; the raw text is only used for the `checkout:` case
346/// (cheap substring scan, robust to YAML shape variation).
347fn process_repositories(pipeline: &AdoPipeline, raw_content: &str, graph: &mut AuthorityGraph) {
348    let resources = match pipeline.resources.as_ref() {
349        Some(r) if !r.repositories.is_empty() => r,
350        _ => return,
351    };
352
353    // Collect all aliases referenced as `template: x@alias`. We walk every
354    // `template:` field appearing in the parsed pipeline (extends and steps
355    // already deserialize to their own paths; stages/jobs use the per-job
356    // template field). The raw YAML walk via serde_yaml::Value covers all
357    // shapes uniformly without re-deriving structure-specific models.
358    let mut used_aliases: HashSet<String> = HashSet::new();
359
360    if let Some(ref ext) = pipeline.extends {
361        collect_template_alias_refs(ext, &mut used_aliases);
362    }
363    if let Ok(value) = serde_yaml::from_str::<serde_yaml::Value>(raw_content) {
364        collect_template_alias_refs(&value, &mut used_aliases);
365        collect_checkout_alias_refs(&value, &mut used_aliases);
366    }
367
368    // Build the JSON-encoded repository descriptor list.
369    let mut entries: Vec<serde_json::Value> = Vec::with_capacity(resources.repositories.len());
370    for repo in &resources.repositories {
371        let used = used_aliases.contains(&repo.repository);
372        let mut obj = serde_json::Map::new();
373        obj.insert(
374            "alias".into(),
375            serde_json::Value::String(repo.repository.clone()),
376        );
377        if let Some(ref t) = repo.repo_type {
378            obj.insert("repo_type".into(), serde_json::Value::String(t.clone()));
379        }
380        if let Some(ref n) = repo.name {
381            obj.insert("name".into(), serde_json::Value::String(n.clone()));
382        }
383        if let Some(ref r) = repo.git_ref {
384            obj.insert("ref".into(), serde_json::Value::String(r.clone()));
385        }
386        obj.insert("used".into(), serde_json::Value::Bool(used));
387        entries.push(serde_json::Value::Object(obj));
388    }
389
390    if let Ok(json) = serde_json::to_string(&serde_json::Value::Array(entries)) {
391        graph.metadata.insert(META_REPOSITORIES.into(), json);
392    }
393}
394
395/// Walk a YAML value and record every `template: <ref>@<alias>` alias seen.
396/// Recurses into mappings and sequences so it catches references in extends,
397/// stages, jobs, steps, and conditional blocks indiscriminately.
398fn collect_template_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
399    match value {
400        serde_yaml::Value::Mapping(map) => {
401            for (k, v) in map {
402                if k.as_str() == Some("template") {
403                    if let Some(s) = v.as_str() {
404                        if let Some(alias) = parse_template_alias(s) {
405                            sink.insert(alias);
406                        }
407                    }
408                }
409                collect_template_alias_refs(v, sink);
410            }
411        }
412        serde_yaml::Value::Sequence(seq) => {
413            for v in seq {
414                collect_template_alias_refs(v, sink);
415            }
416        }
417        _ => {}
418    }
419}
420
421/// Walk a YAML value and record every `checkout: <alias>` value seen, except
422/// `self` and `none` which are platform keywords (not external repo aliases).
423fn collect_checkout_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
424    match value {
425        serde_yaml::Value::Mapping(map) => {
426            for (k, v) in map {
427                if k.as_str() == Some("checkout") {
428                    if let Some(s) = v.as_str() {
429                        if s != "self" && s != "none" && !s.is_empty() {
430                            sink.insert(s.to_string());
431                        }
432                    }
433                }
434                collect_checkout_alias_refs(v, sink);
435            }
436        }
437        serde_yaml::Value::Sequence(seq) => {
438            for v in seq {
439                collect_checkout_alias_refs(v, sink);
440            }
441        }
442        _ => {}
443    }
444}
445
446/// Extract `<alias>` from a `template: <path>@<alias>` reference. Returns
447/// None for plain in-repo paths (`templates/deploy.yml`) which target the
448/// current pipeline's repo, not an external `resources.repositories[]` entry.
449fn parse_template_alias(template_ref: &str) -> Option<String> {
450    let at = template_ref.rfind('@')?;
451    let alias = &template_ref[at + 1..];
452    if alias.is_empty() {
453        None
454    } else {
455        Some(alias.to_string())
456    }
457}
458
459/// Tag every Step node added since `start_idx` with META_ENV_APPROVAL.
460/// Used after `process_steps` for a job whose `environment:` is configured —
461/// the environment binding indicates the job sits behind a manual approval
462/// gate, which is an isolation boundary that breaks automatic propagation.
463fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
464    for node in graph.nodes.iter_mut().skip(start_idx) {
465        if node.kind == NodeKind::Step {
466            node.metadata
467                .insert(META_ENV_APPROVAL.into(), "true".into());
468        }
469    }
470}
471
472/// Process a variable list, creating Secret nodes and returning their IDs.
473/// Returns IDs for secrets only (not variable groups, which are opaque).
474/// Populates `plain_vars` with the names of non-secret named variables so
475/// downstream `$(VAR)` scanning can skip them.
476fn process_variables(
477    variables: &Option<AdoVariables>,
478    graph: &mut AuthorityGraph,
479    cache: &mut HashMap<String, NodeId>,
480    scope: &str,
481    plain_vars: &mut HashSet<String>,
482) -> Vec<NodeId> {
483    let mut ids = Vec::new();
484
485    let vars = match variables.as_ref() {
486        Some(v) => v,
487        None => return ids,
488    };
489
490    for var in &vars.0 {
491        match var {
492            AdoVariable::Group { group } => {
493                // Skip template-expression group names like `${{ parameters.env }}`.
494                // We can't resolve them statically — mark Partial but don't create
495                // a misleading Secret node with the expression as its name.
496                if group.contains("${{") {
497                    graph.mark_partial(format!(
498                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
499                    ));
500                    continue;
501                }
502                let mut meta = HashMap::new();
503                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
504                let id = graph.add_node_with_metadata(
505                    NodeKind::Secret,
506                    group.as_str(),
507                    TrustZone::FirstParty,
508                    meta,
509                );
510                cache.insert(group.clone(), id);
511                ids.push(id);
512                graph.mark_partial(format!(
513                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
514                ));
515            }
516            AdoVariable::Named {
517                name, is_secret, ..
518            } => {
519                if *is_secret {
520                    let id = find_or_create_secret(graph, cache, name);
521                    ids.push(id);
522                } else {
523                    plain_vars.insert(name.clone());
524                }
525            }
526        }
527    }
528
529    ids
530}
531
532/// Process a list of ADO steps, adding nodes and edges to the graph.
533fn process_steps(
534    steps: &[AdoStep],
535    job_name: &str,
536    token_id: NodeId,
537    inherited_secrets: &[NodeId],
538    plain_vars: &HashSet<String>,
539    graph: &mut AuthorityGraph,
540    cache: &mut HashMap<String, NodeId>,
541) {
542    for (idx, step) in steps.iter().enumerate() {
543        // Template step — delegation, mark partial
544        if let Some(ref tpl) = step.template {
545            let step_name = step
546                .display_name
547                .as_deref()
548                .or(step.name.as_deref())
549                .map(|s| s.to_string())
550                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
551            add_template_delegation(&step_name, tpl, token_id, Some(job_name), graph);
552            continue;
553        }
554
555        // Determine step kind and trust zone
556        let (step_name, trust_zone, mut inline_script) = classify_step(step, job_name, idx);
557
558        // For task steps (where classify_step returns None), recover an inline
559        // script body from `inputs.inlineScript` / `inputs.script` — used by
560        // AzureCLI@2, AzurePowerShell@5, Bash@3, etc. Without this fallback,
561        // rules that pattern-match script content miss every typed task.
562        if inline_script.is_none() {
563            if let Some(ref inputs) = step.inputs {
564                let candidate_keys = ["inlineScript", "script", "InlineScript", "Inline"];
565                for key in candidate_keys {
566                    if let Some(v) = inputs.get(key).and_then(yaml_value_as_str) {
567                        if !v.is_empty() {
568                            inline_script = Some(v.to_string());
569                            break;
570                        }
571                    }
572                }
573            }
574        }
575
576        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
577
578        // Stamp parent job name so consumers (e.g. `taudit map --job`) can
579        // attribute steps back to their containing job.
580        if let Some(node) = graph.nodes.get_mut(step_id) {
581            node.metadata.insert(META_JOB_NAME.into(), job_name.into());
582            // Stamp the raw inline script body so script-aware rules
583            // (env-export of secrets, secret materialisation to files,
584            // Key Vault → plaintext) can pattern-match on the actual
585            // command text the agent will run.
586            if let Some(ref body) = inline_script {
587                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
588            }
589        }
590
591        // Stamp inline script body so command-line-leakage rules can inspect
592        // what the step actually executes (vm_remote_exec_via_pipeline_secret,
593        // short_lived_sas_in_command_line).
594        if let Some(ref body) = inline_script {
595            if let Some(node) = graph.nodes.get_mut(step_id) {
596                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
597            }
598        }
599
600        // Stamp the inline script body when present so rules that need to
601        // pattern-match against shell content can do so without re-parsing
602        // YAML. Bodies can be large; rules should treat META_SCRIPT_BODY as
603        // an opaque string and not assume any framing.
604        if let Some(ref body) = inline_script {
605            if let Some(node) = graph.nodes.get_mut(step_id) {
606                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
607            }
608        }
609
610        // Every step has access to System.AccessToken
611        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
612
613        // checkout step with persistCredentials: true writes the token to .git/config on disk,
614        // making it accessible to all subsequent steps and filesystem-level attackers.
615        if step.checkout.is_some() && step.persist_credentials == Some(true) {
616            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
617        }
618
619        // `checkout: self` pulls the repo being built. In a PR trigger context this
620        // is the untrusted fork head — tag the step so downstream rules can gate on
621        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
622        if let Some(ref ck) = step.checkout {
623            if ck == "self" {
624                if let Some(node) = graph.nodes.get_mut(step_id) {
625                    node.metadata
626                        .insert(META_CHECKOUT_SELF.into(), "true".into());
627                }
628            }
629        }
630
631        // Inherited pipeline/stage/job secrets
632        for &secret_id in inherited_secrets {
633            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
634        }
635
636        // Service connection detection from task inputs (case-insensitive key match)
637        if let Some(ref inputs) = step.inputs {
638            let service_conn_keys = [
639                "azuresubscription",
640                "connectedservicename",
641                "connectedservicenamearm",
642                "kubernetesserviceconnection",
643                "environmentservicename",
644                "backendservicearm",
645            ];
646            for (raw_key, val) in inputs {
647                let lower = raw_key.to_lowercase();
648                if !service_conn_keys.contains(&lower.as_str()) {
649                    continue;
650                }
651                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
652                if !conn_name.starts_with("$(") {
653                    // Stamp the connection name onto the step itself so rules
654                    // that need the name (e.g. terraform_auto_approve_in_prod)
655                    // don't have to traverse edges.
656                    if let Some(node) = graph.nodes.get_mut(step_id) {
657                        node.metadata
658                            .insert(META_SERVICE_CONNECTION_NAME.into(), conn_name.to_string());
659                    }
660
661                    let mut meta = HashMap::new();
662                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
663                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
664                    // ADO service connections are the platform's federated-identity equivalent
665                    // (modern Azure service connections use workload identity federation /
666                    // OIDC). Tag them so uplift_without_attestation treats ADO pipelines with
667                    // the same OIDC-parity logic applied to GHA.
668                    meta.insert(META_OIDC.into(), "true".into());
669                    let conn_id = graph.add_node_with_metadata(
670                        NodeKind::Identity,
671                        conn_name,
672                        TrustZone::FirstParty,
673                        meta,
674                    );
675                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
676                }
677            }
678
679            // addSpnToEnvironment: true exposes federated SPN material
680            // (idToken, servicePrincipalKey, servicePrincipalId, tenantId)
681            // to the step's inline script via env vars. Stamp the step so
682            // addspn_with_inline_script can pattern-match without traversal.
683            if let Some(val) = inputs.get("addSpnToEnvironment") {
684                let truthy = match val {
685                    serde_yaml::Value::Bool(b) => *b,
686                    serde_yaml::Value::String(s) => s.eq_ignore_ascii_case("true"),
687                    _ => false,
688                };
689                if truthy {
690                    if let Some(node) = graph.nodes.get_mut(step_id) {
691                        node.metadata
692                            .insert(META_ADD_SPN_TO_ENV.into(), "true".into());
693                    }
694                }
695            }
696
697            // TerraformCLI@N / TerraformTaskV1..V4 with command: apply +
698            // commandOptions containing auto-approve = same as inline
699            // `terraform apply --auto-approve`. Detect once here so the rule
700            // can read a single META_TERRAFORM_AUTO_APPROVE marker.
701            let task_lower = step
702                .task
703                .as_deref()
704                .map(|t| t.to_lowercase())
705                .unwrap_or_default();
706            let is_terraform_task = task_lower.starts_with("terraformcli@")
707                || task_lower.starts_with("terraformtask@")
708                || task_lower.starts_with("terraformtaskv");
709            if is_terraform_task {
710                let cmd_lower = inputs
711                    .get("command")
712                    .and_then(yaml_value_as_str)
713                    .map(|s| s.to_lowercase())
714                    .unwrap_or_default();
715                let opts = inputs
716                    .get("commandOptions")
717                    .and_then(yaml_value_as_str)
718                    .unwrap_or("");
719                if cmd_lower == "apply" && opts.contains("auto-approve") {
720                    if let Some(node) = graph.nodes.get_mut(step_id) {
721                        node.metadata
722                            .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
723                    }
724                }
725            }
726
727            // Detect $(varName) references in task input values
728            for val in inputs.values() {
729                if let Some(s) = yaml_value_as_str(val) {
730                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
731                }
732            }
733        }
734
735        // Inline-script detection of `terraform apply --auto-approve`.
736        // Done after inputs processing so we can OR the two signals into a
737        // single META_TERRAFORM_AUTO_APPROVE marker on the step.
738        if let Some(ref body) = inline_script {
739            if script_does_terraform_auto_apply(body) {
740                if let Some(node) = graph.nodes.get_mut(step_id) {
741                    node.metadata
742                        .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
743                }
744            }
745        }
746
747        // Detect $(varName) in step env values
748        if let Some(ref env) = step.env {
749            for val in env.values() {
750                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
751            }
752        }
753
754        // Detect $(varName) in inline script text
755        if let Some(ref script) = inline_script {
756            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
757        }
758
759        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines
760        if let Some(ref script) = inline_script {
761            let lower = script.to_lowercase();
762            if lower.contains("##vso[task.setvariable") {
763                if let Some(node) = graph.nodes.get_mut(step_id) {
764                    node.metadata
765                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
766                }
767            }
768        }
769    }
770}
771
772/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
773///
774/// `inline_script_text` is populated whenever the step has script content —
775/// either as a top-level `script:`/`bash:`/`powershell:`/`pwsh:` key, or as a
776/// task input (`Bash@3.inputs.script`, `PowerShell@2.inputs.script`,
777/// `AzureCLI@2.inputs.inlineScript`, `AzurePowerShell@5.inputs.Inline`, …).
778/// Task-input keys are matched case-insensitively because the ADO YAML schema
779/// is itself case-insensitive on input names.
780fn classify_step(
781    step: &AdoStep,
782    job_name: &str,
783    idx: usize,
784) -> (String, TrustZone, Option<String>) {
785    let default_name = || format!("{job_name}[{idx}]");
786
787    let name = step
788        .display_name
789        .as_deref()
790        .or(step.name.as_deref())
791        .map(|s| s.to_string())
792        .unwrap_or_else(default_name);
793
794    if step.task.is_some() {
795        // Task step — script body may live in inputs.{script,inlineScript,Inline}.
796        let inline = extract_task_inline_script(step.inputs.as_ref());
797        (name, TrustZone::Untrusted, inline)
798    } else if let Some(ref s) = step.script {
799        (name, TrustZone::FirstParty, Some(s.clone()))
800    } else if let Some(ref s) = step.bash {
801        (name, TrustZone::FirstParty, Some(s.clone()))
802    } else if let Some(ref s) = step.powershell {
803        (name, TrustZone::FirstParty, Some(s.clone()))
804    } else if let Some(ref s) = step.pwsh {
805        (name, TrustZone::FirstParty, Some(s.clone()))
806    } else {
807        (name, TrustZone::FirstParty, None)
808    }
809}
810
811/// Pull an inline script body out of a task step's `inputs:` mapping.
812/// Recognises the three common conventions:
813///   - `inputs.script` (Bash@3, PowerShell@2 — when targetType: inline)
814///   - `inputs.inlineScript` (AzureCLI@2)
815///   - `inputs.Inline` (AzurePowerShell@5 — note the capital I)
816///
817/// Match is case-insensitive so a hand-written pipeline using `Script:` or
818/// `INLINESCRIPT:` is still picked up.
819fn extract_task_inline_script(
820    inputs: Option<&HashMap<String, serde_yaml::Value>>,
821) -> Option<String> {
822    let inputs = inputs?;
823    const KEYS: &[&str] = &["script", "inlinescript", "inline"];
824    for (raw_key, val) in inputs {
825        let lower = raw_key.to_lowercase();
826        if KEYS.contains(&lower.as_str()) {
827            if let Some(s) = val.as_str() {
828                if !s.is_empty() {
829                    return Some(s.to_string());
830                }
831            }
832        }
833    }
834    None
835}
836
837/// Add a DelegatesTo edge from a synthetic step node to a template image node.
838///
839/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
840/// pull code from an external repository and are Untrusted. Plain relative paths like
841/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
842/// treats `./local-action`.
843///
844/// `job_name` is `Some` when the delegation is created inside a job's scope
845/// (job-level template, or template step inside `process_steps`); it is `None`
846/// for stage-level template delegations that don't belong to a specific job.
847fn add_template_delegation(
848    step_name: &str,
849    template_path: &str,
850    token_id: NodeId,
851    job_name: Option<&str>,
852    graph: &mut AuthorityGraph,
853) {
854    let tpl_trust_zone = if template_path.contains('@') {
855        TrustZone::Untrusted
856    } else {
857        TrustZone::FirstParty
858    };
859    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
860    if let Some(jn) = job_name {
861        if let Some(node) = graph.nodes.get_mut(step_id) {
862            node.metadata.insert(META_JOB_NAME.into(), jn.into());
863        }
864    }
865    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
866    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
867    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
868    graph.mark_partial(format!(
869        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
870    ));
871}
872
873/// Extract `$(varName)` references from a string, creating Secret nodes for
874/// non-predefined and non-plain ADO variables.
875/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
876/// is treated as a variable reference. This rejects PowerShell sub-expressions
877/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
878/// and anything with spaces or special characters.
879fn extract_dollar_paren_secrets(
880    text: &str,
881    step_id: NodeId,
882    plain_vars: &HashSet<String>,
883    graph: &mut AuthorityGraph,
884    cache: &mut HashMap<String, NodeId>,
885) {
886    let mut pos = 0;
887    let bytes = text.as_bytes();
888    while pos < bytes.len() {
889        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
890            let start = pos + 2;
891            if let Some(end_offset) = text[start..].find(')') {
892                let var_name = &text[start..start + end_offset];
893                if is_valid_ado_identifier(var_name)
894                    && !is_predefined_ado_var(var_name)
895                    && !plain_vars.contains(var_name)
896                {
897                    let id = find_or_create_secret(graph, cache, var_name);
898                    // Mark secrets embedded in -var flag arguments: their values appear in
899                    // pipeline logs (command string is logged before masking, and Terraform
900                    // itself logs -var values in plan output and debug traces).
901                    if is_in_terraform_var_flag(text, pos) {
902                        if let Some(node) = graph.nodes.get_mut(id) {
903                            node.metadata
904                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
905                        }
906                    }
907                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
908                }
909                pos = start + end_offset + 1;
910                continue;
911            }
912        }
913        pos += 1;
914    }
915}
916
917/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
918/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
919fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
920    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
921    let line_before = &text[line_start..var_pos];
922    // Must contain -var (the flag) and = (the key=value assignment)
923    line_before.contains("-var") && line_before.contains('=')
924}
925
926/// Returns true if `name` is a valid ADO variable identifier.
927/// ADO variable names start with a letter and contain only letters, digits,
928/// and underscores. Anything else — PowerShell vars (`$name`), template
929/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
930/// (`name -join ','`) — is rejected.
931fn is_valid_ado_identifier(name: &str) -> bool {
932    let mut chars = name.chars();
933    match chars.next() {
934        Some(first) if first.is_ascii_alphabetic() => {
935            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
936        }
937        _ => false,
938    }
939}
940
941/// Returns true if a variable name is a well-known ADO predefined variable.
942/// These are system-provided and never represent secrets.
943fn is_predefined_ado_var(name: &str) -> bool {
944    let prefixes = [
945        "Build.",
946        "Agent.",
947        "System.",
948        "Pipeline.",
949        "Release.",
950        "Environment.",
951        "Strategy.",
952        "Deployment.",
953        "Resources.",
954        "TF_BUILD",
955    ];
956    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
957}
958
959fn find_or_create_secret(
960    graph: &mut AuthorityGraph,
961    cache: &mut HashMap<String, NodeId>,
962    name: &str,
963) -> NodeId {
964    if let Some(&id) = cache.get(name) {
965        return id;
966    }
967    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
968    cache.insert(name.to_string(), id);
969    id
970}
971
972fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
973    val.as_str()
974}
975
976// ── Serde models for ADO YAML ─────────────────────────────
977
978/// Top-level ADO pipeline definition.
979/// ADO pipelines come in three shapes:
980///   (a) stages → jobs → steps
981///   (b) jobs → steps (no stages key)
982///   (c) steps only (no stages or jobs key)
983#[derive(Debug, Deserialize)]
984pub struct AdoPipeline {
985    #[serde(default)]
986    pub trigger: Option<serde_yaml::Value>,
987    #[serde(default)]
988    pub pr: Option<serde_yaml::Value>,
989    #[serde(default)]
990    pub variables: Option<AdoVariables>,
991    /// `stages:` is normally a sequence of stage objects, but real-world
992    /// pipelines also use `stages: ${{ parameters.stages }}` (a template
993    /// expression that resolves at runtime to a list). The custom
994    /// deserializer accepts both shapes; non-sequence shapes resolve to
995    /// `None` and the graph is marked Partial downstream.
996    #[serde(default, deserialize_with = "deserialize_optional_stages")]
997    pub stages: Option<Vec<AdoStage>>,
998    #[serde(default)]
999    pub jobs: Option<Vec<AdoJob>>,
1000    #[serde(default)]
1001    pub steps: Option<Vec<AdoStep>>,
1002    #[serde(default)]
1003    pub pool: Option<serde_yaml::Value>,
1004    /// `resources:` block — repository declarations, container declarations,
1005    /// pipeline declarations. We only consume `repositories[]` today.
1006    /// Pre-2019 ADO accepts a sequence form (`resources: [- repo: self]`)
1007    /// which has no `repositories:` key — the custom deserializer accepts
1008    /// both shapes and treats the sequence form as an empty resources block.
1009    #[serde(default, deserialize_with = "deserialize_optional_resources")]
1010    pub resources: Option<AdoResources>,
1011    /// Top-level `extends:` directive — `extends: { template: x@alias, ... }`.
1012    /// Captured raw so we can scan for `template: x@alias` references that
1013    /// consume a `resources.repositories[]` entry.
1014    #[serde(default)]
1015    pub extends: Option<serde_yaml::Value>,
1016    /// Top-level `parameters:` declarations. Each entry has at minimum a
1017    /// `name`; `type` defaults to `string` when omitted. `values:` is an
1018    /// optional allowlist that constrains caller input.
1019    /// ADO accepts two shapes: the typed sequence form
1020    /// (`- name: foo \n type: string \n default: bar`) and the legacy
1021    /// untyped map form (`parameters: { foo: bar, baz: '' }`) used in
1022    /// older template fragments. The custom deserializer normalizes both.
1023    #[serde(default, deserialize_with = "deserialize_optional_parameters")]
1024    pub parameters: Option<Vec<AdoParameter>>,
1025}
1026
1027/// Accept either a sequence of `AdoParameter` (modern typed form) or a
1028/// mapping of parameter name → default value (legacy untyped form used in
1029/// many template fragments). For the map form, each key becomes an
1030/// `AdoParameter` with the key as `name` and no type/values. Returns `None`
1031/// for any other shape (e.g. a bare template expression).
1032///
1033/// Implemented as a serde Visitor (rather than going through
1034/// `serde_yaml::Value`) so that downstream struct deserialization uses
1035/// serde's native lazy iteration — this avoids serde_yaml's strict
1036/// duplicate-key detection on `${{ else }}`-style template-conditional
1037/// keys that appear in stage/job `parameters:` blocks of unrelated entries.
1038fn deserialize_optional_parameters<'de, D>(
1039    deserializer: D,
1040) -> Result<Option<Vec<AdoParameter>>, D::Error>
1041where
1042    D: serde::Deserializer<'de>,
1043{
1044    use serde::de::{MapAccess, SeqAccess, Visitor};
1045    use std::fmt;
1046
1047    struct ParamsVisitor;
1048
1049    impl<'de> Visitor<'de> for ParamsVisitor {
1050        type Value = Option<Vec<AdoParameter>>;
1051
1052        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1053            f.write_str("a sequence of parameter declarations, a mapping of name→default, null, or a template expression")
1054        }
1055
1056        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1057            Ok(None)
1058        }
1059
1060        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1061            Ok(None)
1062        }
1063
1064        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1065            d.deserialize_any(self)
1066        }
1067
1068        // Bare scalar (template expression like `${{ parameters.X }}`) —
1069        // can't statically enumerate; treat as absent.
1070        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1071            Ok(None)
1072        }
1073        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1074            Ok(None)
1075        }
1076        fn visit_bool<E: serde::de::Error>(self, _v: bool) -> Result<Self::Value, E> {
1077            Ok(None)
1078        }
1079        fn visit_i64<E: serde::de::Error>(self, _v: i64) -> Result<Self::Value, E> {
1080            Ok(None)
1081        }
1082        fn visit_u64<E: serde::de::Error>(self, _v: u64) -> Result<Self::Value, E> {
1083            Ok(None)
1084        }
1085        fn visit_f64<E: serde::de::Error>(self, _v: f64) -> Result<Self::Value, E> {
1086            Ok(None)
1087        }
1088
1089        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1090            let mut out = Vec::new();
1091            while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
1092                if let Ok(p) = serde_yaml::from_value::<AdoParameter>(item) {
1093                    out.push(p);
1094                }
1095            }
1096            Ok(Some(out))
1097        }
1098
1099        fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
1100            // Legacy untyped map form: name → default-value. We collect
1101            // names; defaults are intentionally discarded (matches typed-
1102            // form semantics where `default:` is also ignored).
1103            let mut out = Vec::new();
1104            while let Some(key) = map.next_key::<serde_yaml::Value>()? {
1105                let _ignore = map.next_value::<serde::de::IgnoredAny>()?;
1106                let name = match key {
1107                    serde_yaml::Value::String(s) if !s.is_empty() => s,
1108                    _ => continue,
1109                };
1110                out.push(AdoParameter {
1111                    name: Some(name),
1112                    param_type: None,
1113                    values: None,
1114                });
1115            }
1116            Ok(Some(out))
1117        }
1118    }
1119
1120    deserializer.deserialize_any(ParamsVisitor)
1121}
1122
1123/// Accept either an `AdoResources` mapping (modern form with `repositories:`,
1124/// `containers:`, `pipelines:`) or the legacy sequence form (`resources: [-
1125/// repo: self]`, pre-2019 ADO syntax). The legacy form has no
1126/// `repositories:` key, so we return an empty `AdoResources` for it — the
1127/// repository-tracking rules then see no aliases to track, which is correct
1128/// (legacy `repo: self` declares no external repositories).
1129fn deserialize_optional_resources<'de, D>(deserializer: D) -> Result<Option<AdoResources>, D::Error>
1130where
1131    D: serde::Deserializer<'de>,
1132{
1133    use serde::de::{MapAccess, SeqAccess, Visitor};
1134    use std::fmt;
1135
1136    struct ResourcesVisitor;
1137
1138    impl<'de> Visitor<'de> for ResourcesVisitor {
1139        type Value = Option<AdoResources>;
1140
1141        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1142            f.write_str("an AdoResources mapping or a legacy `- repo:` sequence")
1143        }
1144
1145        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1146            Ok(None)
1147        }
1148        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1149            Ok(None)
1150        }
1151        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1152            d.deserialize_any(self)
1153        }
1154
1155        // Legacy sequence form — drain it without producing any
1156        // repository entries. Modern rules track aliases via the
1157        // `AdoResources.repositories[]` shape, which the legacy form
1158        // does not produce.
1159        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1160            while seq.next_element::<serde::de::IgnoredAny>()?.is_some() {}
1161            Ok(Some(AdoResources::default()))
1162        }
1163
1164        fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
1165            let r = AdoResources::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
1166            Ok(Some(r))
1167        }
1168    }
1169
1170    deserializer.deserialize_any(ResourcesVisitor)
1171}
1172
1173/// Accept either a sequence of `AdoStage` (the normal form) or a bare
1174/// template expression (`stages: ${{ parameters.stages }}`) which resolves
1175/// at runtime. For the template-expression case, return `None` so the
1176/// pipeline still parses; the graph will simply contain no stages from this
1177/// scope (downstream code already handles empty stage lists).
1178fn deserialize_optional_stages<'de, D>(deserializer: D) -> Result<Option<Vec<AdoStage>>, D::Error>
1179where
1180    D: serde::Deserializer<'de>,
1181{
1182    use serde::de::{SeqAccess, Visitor};
1183    use std::fmt;
1184
1185    struct StagesVisitor;
1186
1187    impl<'de> Visitor<'de> for StagesVisitor {
1188        type Value = Option<Vec<AdoStage>>;
1189
1190        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1191            f.write_str("a sequence of stages or a template expression")
1192        }
1193
1194        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1195            Ok(None)
1196        }
1197        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1198            Ok(None)
1199        }
1200        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1201            d.deserialize_any(self)
1202        }
1203        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1204            Ok(None)
1205        }
1206        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1207            Ok(None)
1208        }
1209
1210        fn visit_seq<A: SeqAccess<'de>>(self, seq: A) -> Result<Self::Value, A::Error> {
1211            let stages =
1212                Vec::<AdoStage>::deserialize(serde::de::value::SeqAccessDeserializer::new(seq))?;
1213            Ok(Some(stages))
1214        }
1215    }
1216
1217    deserializer.deserialize_any(StagesVisitor)
1218}
1219
1220/// `resources:` block. Only `repositories[]` is modelled today.
1221#[derive(Debug, Default, Deserialize)]
1222pub struct AdoResources {
1223    #[serde(default)]
1224    pub repositories: Vec<AdoRepository>,
1225}
1226
1227/// A single `resources.repositories[]` entry — declares an external repo
1228/// alias the pipeline can consume via `template: x@alias`, `extends:`, or
1229/// `checkout: alias`.
1230#[derive(Debug, Deserialize)]
1231pub struct AdoRepository {
1232    /// The alias used by consumers (`template: file@<repository>`).
1233    pub repository: String,
1234    /// `git`, `github`, `bitbucket`, or `azureGit`.
1235    #[serde(default, rename = "type")]
1236    pub repo_type: Option<String>,
1237    /// Full repo path (e.g. `org/repo`).
1238    #[serde(default)]
1239    pub name: Option<String>,
1240    /// Optional ref. Absent = default branch (mutable). Present forms:
1241    /// `refs/tags/v1.2.3`, `refs/heads/main`, bare branch `main`, or a SHA.
1242    #[serde(default, rename = "ref")]
1243    pub git_ref: Option<String>,
1244}
1245
1246/// Pipeline / template `parameters:` entry. We deliberately ignore `default:`
1247/// — only the name, type, and `values:` allowlist matter for our rules.
1248#[derive(Debug, Deserialize)]
1249pub struct AdoParameter {
1250    #[serde(default)]
1251    pub name: Option<String>,
1252    #[serde(rename = "type", default)]
1253    pub param_type: Option<String>,
1254    #[serde(default)]
1255    pub values: Option<Vec<serde_yaml::Value>>,
1256}
1257
1258#[derive(Debug, Deserialize)]
1259pub struct AdoStage {
1260    /// Stage identifier. Absent when the stage entry is a template reference.
1261    #[serde(default)]
1262    pub stage: Option<String>,
1263    /// Stage-level template reference (`- template: path/to/stage.yml`).
1264    #[serde(default)]
1265    pub template: Option<String>,
1266    #[serde(default)]
1267    pub variables: Option<AdoVariables>,
1268    #[serde(default)]
1269    pub jobs: Vec<AdoJob>,
1270}
1271
1272#[derive(Debug, Deserialize)]
1273pub struct AdoJob {
1274    /// Regular job identifier
1275    #[serde(default)]
1276    pub job: Option<String>,
1277    /// Deployment job identifier
1278    #[serde(default)]
1279    pub deployment: Option<String>,
1280    #[serde(default)]
1281    pub variables: Option<AdoVariables>,
1282    #[serde(default)]
1283    pub steps: Option<Vec<AdoStep>>,
1284    /// Deployment-job nested strategy: runOnce/rolling/canary all share the
1285    /// shape `strategy.{runOnce,rolling,canary}.deploy.steps`. We only need
1286    /// the steps — the strategy choice itself doesn't change authority flow.
1287    #[serde(default)]
1288    pub strategy: Option<AdoStrategy>,
1289    #[serde(default)]
1290    pub pool: Option<serde_yaml::Value>,
1291    /// Job-level template reference
1292    #[serde(default)]
1293    pub template: Option<String>,
1294    /// Deployment-job environment binding. Two YAML shapes:
1295    ///
1296    ///   - `environment: production` (string shorthand)
1297    ///   - `environment: { name: staging, resourceType: VirtualMachine }` (mapping)
1298    ///
1299    /// When present, the environment may have approvals/checks attached in ADO's
1300    /// environment configuration. Approvals are a manual gate — authority cannot
1301    /// propagate past one without human intervention. We treat any `environment:`
1302    /// binding as an approval candidate and tag the job's steps so propagation
1303    /// rules can downgrade severity. (We can't see the approval config from YAML
1304    /// alone; the binding is the strongest signal available at parse time.)
1305    #[serde(default)]
1306    pub environment: Option<serde_yaml::Value>,
1307}
1308
1309impl AdoJob {
1310    pub fn effective_name(&self) -> String {
1311        self.job
1312            .as_deref()
1313            .or(self.deployment.as_deref())
1314            .unwrap_or("job")
1315            .to_string()
1316    }
1317
1318    /// Returns the effective step list for this job.
1319    ///
1320    /// Regular jobs put steps under `steps:` directly. Deployment jobs nest
1321    /// them under `strategy.{runOnce,rolling,canary}.{deploy,preDeploy,
1322    /// postDeploy,routeTraffic,onSuccess,onFailure}.steps`. We merge all
1323    /// strategy-nested step lists into a single sequence so downstream rules
1324    /// see them as part of the job. Order: regular `steps:` first, then any
1325    /// strategy-nested steps in deterministic phase order.
1326    pub fn all_steps(&self) -> Vec<AdoStep> {
1327        let mut out: Vec<AdoStep> = Vec::new();
1328        if let Some(ref s) = self.steps {
1329            out.extend(s.iter().cloned());
1330        }
1331        if let Some(ref strat) = self.strategy {
1332            for phase in strat.phases() {
1333                if let Some(ref s) = phase.steps {
1334                    out.extend(s.iter().cloned());
1335                }
1336            }
1337        }
1338        out
1339    }
1340
1341    /// Returns true when the job is bound to an `environment:` — either the
1342    /// string form (`environment: production`) or the mapping form with a
1343    /// non-empty `name:` field. An empty mapping or empty string is ignored.
1344    pub fn has_environment_binding(&self) -> bool {
1345        match self.environment.as_ref() {
1346            None => false,
1347            Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
1348            Some(serde_yaml::Value::Mapping(m)) => m
1349                .get("name")
1350                .and_then(|v| v.as_str())
1351                .map(|s| !s.trim().is_empty())
1352                .unwrap_or(false),
1353            _ => false,
1354        }
1355    }
1356}
1357
1358/// Deployment-job `strategy:` block. ADO ships three strategies — runOnce,
1359/// rolling, canary — each with multiple lifecycle phases that may carry
1360/// their own step list. We capture all of them; the AdoJob::all_steps
1361/// helper flattens them into one sequence.
1362#[derive(Debug, Default, Deserialize, Clone)]
1363pub struct AdoStrategy {
1364    #[serde(default, rename = "runOnce")]
1365    pub run_once: Option<AdoStrategyRunOnce>,
1366    #[serde(default)]
1367    pub rolling: Option<AdoStrategyRunOnce>,
1368    #[serde(default)]
1369    pub canary: Option<AdoStrategyRunOnce>,
1370}
1371
1372impl AdoStrategy {
1373    /// Iterate over every populated lifecycle phase across all strategies.
1374    pub fn phases(&self) -> Vec<&AdoStrategyPhase> {
1375        let mut out: Vec<&AdoStrategyPhase> = Vec::new();
1376        for runner in [&self.run_once, &self.rolling, &self.canary]
1377            .iter()
1378            .copied()
1379            .flatten()
1380        {
1381            for phase in [
1382                &runner.deploy,
1383                &runner.pre_deploy,
1384                &runner.post_deploy,
1385                &runner.route_traffic,
1386            ]
1387            .into_iter()
1388            .flatten()
1389            {
1390                out.push(phase);
1391            }
1392            if let Some(ref on) = runner.on {
1393                if let Some(ref s) = on.success {
1394                    out.push(s);
1395                }
1396                if let Some(ref f) = on.failure {
1397                    out.push(f);
1398                }
1399            }
1400        }
1401        out
1402    }
1403}
1404
1405/// Lifecycle phases carried by every deployment strategy. Each phase may
1406/// have its own `steps:`. Covering all six avoids silently dropping
1407/// privileged setup/teardown steps from the authority graph.
1408#[derive(Debug, Default, Deserialize, Clone)]
1409pub struct AdoStrategyRunOnce {
1410    #[serde(default)]
1411    pub deploy: Option<AdoStrategyPhase>,
1412    #[serde(default, rename = "preDeploy")]
1413    pub pre_deploy: Option<AdoStrategyPhase>,
1414    #[serde(default, rename = "postDeploy")]
1415    pub post_deploy: Option<AdoStrategyPhase>,
1416    #[serde(default, rename = "routeTraffic")]
1417    pub route_traffic: Option<AdoStrategyPhase>,
1418    #[serde(default)]
1419    pub on: Option<AdoStrategyOn>,
1420}
1421
1422#[derive(Debug, Default, Deserialize, Clone)]
1423pub struct AdoStrategyOn {
1424    #[serde(default)]
1425    pub success: Option<AdoStrategyPhase>,
1426    #[serde(default)]
1427    pub failure: Option<AdoStrategyPhase>,
1428}
1429
1430#[derive(Debug, Default, Deserialize, Clone)]
1431pub struct AdoStrategyPhase {
1432    #[serde(default)]
1433    pub steps: Option<Vec<AdoStep>>,
1434}
1435
1436#[derive(Debug, Deserialize, Clone)]
1437pub struct AdoStep {
1438    /// Task reference e.g. `AzureCLI@2`
1439    #[serde(default)]
1440    pub task: Option<String>,
1441    /// Inline script (cmd/sh)
1442    #[serde(default)]
1443    pub script: Option<String>,
1444    /// Inline bash script
1445    #[serde(default)]
1446    pub bash: Option<String>,
1447    /// Inline PowerShell script
1448    #[serde(default)]
1449    pub powershell: Option<String>,
1450    /// Cross-platform PowerShell
1451    #[serde(default)]
1452    pub pwsh: Option<String>,
1453    /// Step-level template reference
1454    #[serde(default)]
1455    pub template: Option<String>,
1456    #[serde(rename = "displayName", default)]
1457    pub display_name: Option<String>,
1458    /// Legacy name alias
1459    #[serde(default)]
1460    pub name: Option<String>,
1461    #[serde(default)]
1462    pub env: Option<HashMap<String, String>>,
1463    /// Task inputs (key → value, but values may be nested)
1464    #[serde(default)]
1465    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
1466    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
1467    #[serde(default)]
1468    pub checkout: Option<String>,
1469    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
1470    #[serde(rename = "persistCredentials", default)]
1471    pub persist_credentials: Option<bool>,
1472}
1473
1474/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
1475/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
1476#[derive(Debug, Default)]
1477pub struct AdoVariables(pub Vec<AdoVariable>);
1478
1479impl<'de> serde::Deserialize<'de> for AdoVariables {
1480    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1481    where
1482        D: serde::Deserializer<'de>,
1483    {
1484        let raw = serde_yaml::Value::deserialize(deserializer)?;
1485        let mut vars = Vec::new();
1486
1487        match raw {
1488            serde_yaml::Value::Sequence(seq) => {
1489                for item in seq {
1490                    if let Some(map) = item.as_mapping() {
1491                        if let Some(group_val) = map.get("group") {
1492                            if let Some(group) = group_val.as_str() {
1493                                vars.push(AdoVariable::Group {
1494                                    group: group.to_string(),
1495                                });
1496                                continue;
1497                            }
1498                        }
1499                        let name = map
1500                            .get("name")
1501                            .and_then(|v| v.as_str())
1502                            .unwrap_or("")
1503                            .to_string();
1504                        let value = map
1505                            .get("value")
1506                            .and_then(|v| v.as_str())
1507                            .unwrap_or("")
1508                            .to_string();
1509                        let is_secret = map
1510                            .get("isSecret")
1511                            .and_then(|v| v.as_bool())
1512                            .unwrap_or(false);
1513                        vars.push(AdoVariable::Named {
1514                            name,
1515                            value,
1516                            is_secret,
1517                        });
1518                    }
1519                }
1520            }
1521            serde_yaml::Value::Mapping(map) => {
1522                for (k, v) in map {
1523                    let name = k.as_str().unwrap_or("").to_string();
1524                    let value = v.as_str().unwrap_or("").to_string();
1525                    vars.push(AdoVariable::Named {
1526                        name,
1527                        value,
1528                        is_secret: false,
1529                    });
1530                }
1531            }
1532            _ => {}
1533        }
1534
1535        Ok(AdoVariables(vars))
1536    }
1537}
1538
1539#[derive(Debug)]
1540pub enum AdoVariable {
1541    Group {
1542        group: String,
1543    },
1544    Named {
1545        name: String,
1546        value: String,
1547        is_secret: bool,
1548    },
1549}
1550
1551/// Heuristic: does this YAML have a top-level parameter conditional wrapper
1552/// (e.g. `- ${{ if eq(parameters.X, true) }}:`) at column 0 or as the first
1553/// list item? This is the construct that breaks root-level mapping parses but
1554/// is valid in an ADO template fragment included by a parent pipeline.
1555fn has_root_parameter_conditional(content: &str) -> bool {
1556    for line in content.lines() {
1557        let trimmed = line.trim_start();
1558        // Strip an optional leading list marker so we match both
1559        // `- ${{ if ... }}:` and bare `${{ if ... }}:` forms.
1560        let candidate = trimmed.strip_prefix("- ").unwrap_or(trimmed);
1561        if candidate.starts_with("${{")
1562            && (candidate.contains("if ") || candidate.contains("if("))
1563            && candidate.trim_end().ends_with(":")
1564        {
1565            return true;
1566        }
1567    }
1568    false
1569}
1570
1571#[cfg(test)]
1572mod tests {
1573    use super::*;
1574
1575    fn parse(yaml: &str) -> AuthorityGraph {
1576        let parser = AdoParser;
1577        let source = PipelineSource {
1578            file: "azure-pipelines.yml".into(),
1579            repo: None,
1580            git_ref: None,
1581            commit_sha: None,
1582        };
1583        parser.parse(yaml, &source).unwrap()
1584    }
1585
1586    #[test]
1587    fn parses_simple_pipeline() {
1588        let yaml = r#"
1589trigger:
1590  - main
1591
1592jobs:
1593  - job: Build
1594    steps:
1595      - script: echo hello
1596        displayName: Say hello
1597"#;
1598        let graph = parse(yaml);
1599        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
1600    }
1601
1602    #[test]
1603    fn system_access_token_created() {
1604        let yaml = r#"
1605steps:
1606  - script: echo hi
1607"#;
1608        let graph = parse(yaml);
1609        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1610        assert_eq!(identities.len(), 1);
1611        assert_eq!(identities[0].name, "System.AccessToken");
1612        assert_eq!(
1613            identities[0].metadata.get(META_IDENTITY_SCOPE),
1614            Some(&"broad".to_string())
1615        );
1616    }
1617
1618    #[test]
1619    fn variable_group_creates_secret_and_marks_partial() {
1620        let yaml = r#"
1621variables:
1622  - group: MySecretGroup
1623
1624steps:
1625  - script: echo hi
1626"#;
1627        let graph = parse(yaml);
1628        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1629        assert_eq!(secrets.len(), 1);
1630        assert_eq!(secrets[0].name, "MySecretGroup");
1631        assert_eq!(
1632            secrets[0].metadata.get(META_VARIABLE_GROUP),
1633            Some(&"true".to_string())
1634        );
1635        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1636        assert!(
1637            graph
1638                .completeness_gaps
1639                .iter()
1640                .any(|g| g.contains("MySecretGroup")),
1641            "completeness gap should name the variable group"
1642        );
1643    }
1644
1645    #[test]
1646    fn task_with_azure_subscription_creates_service_connection_identity() {
1647        let yaml = r#"
1648steps:
1649  - task: AzureCLI@2
1650    displayName: Deploy to Azure
1651    inputs:
1652      azureSubscription: MyServiceConnection
1653      scriptType: bash
1654      inlineScript: az group list
1655"#;
1656        let graph = parse(yaml);
1657        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1658        // System.AccessToken + service connection
1659        assert_eq!(identities.len(), 2);
1660        let conn = identities
1661            .iter()
1662            .find(|i| i.name == "MyServiceConnection")
1663            .unwrap();
1664        assert_eq!(
1665            conn.metadata.get(META_SERVICE_CONNECTION),
1666            Some(&"true".to_string())
1667        );
1668        assert_eq!(
1669            conn.metadata.get(META_IDENTITY_SCOPE),
1670            Some(&"broad".to_string())
1671        );
1672    }
1673
1674    #[test]
1675    fn task_with_connected_service_name_creates_identity() {
1676        let yaml = r#"
1677steps:
1678  - task: SqlAzureDacpacDeployment@1
1679    inputs:
1680      ConnectedServiceNameARM: MySqlConnection
1681"#;
1682        let graph = parse(yaml);
1683        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1684        assert!(
1685            identities.iter().any(|i| i.name == "MySqlConnection"),
1686            "connectedServiceNameARM should create identity"
1687        );
1688    }
1689
1690    #[test]
1691    fn script_step_classified_as_first_party() {
1692        let yaml = r#"
1693steps:
1694  - script: echo hi
1695    displayName: Say hi
1696"#;
1697        let graph = parse(yaml);
1698        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1699        assert_eq!(steps.len(), 1);
1700        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1701    }
1702
1703    #[test]
1704    fn bash_step_classified_as_first_party() {
1705        let yaml = r#"
1706steps:
1707  - bash: echo hi
1708"#;
1709        let graph = parse(yaml);
1710        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1711        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1712    }
1713
1714    #[test]
1715    fn task_step_classified_as_untrusted() {
1716        let yaml = r#"
1717steps:
1718  - task: DotNetCoreCLI@2
1719    inputs:
1720      command: build
1721"#;
1722        let graph = parse(yaml);
1723        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1724        assert_eq!(steps.len(), 1);
1725        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
1726    }
1727
1728    #[test]
1729    fn dollar_paren_var_in_script_creates_secret() {
1730        let yaml = r#"
1731steps:
1732  - script: |
1733      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
1734    displayName: Call API
1735"#;
1736        let graph = parse(yaml);
1737        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1738        assert_eq!(secrets.len(), 1);
1739        assert_eq!(secrets[0].name, "MY_API_TOKEN");
1740    }
1741
1742    #[test]
1743    fn predefined_ado_var_not_treated_as_secret() {
1744        let yaml = r#"
1745steps:
1746  - script: |
1747      echo $(Build.BuildId)
1748      echo $(Agent.WorkFolder)
1749      echo $(System.DefaultWorkingDirectory)
1750    displayName: Print vars
1751"#;
1752        let graph = parse(yaml);
1753        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1754        assert!(
1755            secrets.is_empty(),
1756            "predefined ADO vars should not be treated as secrets, got: {:?}",
1757            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1758        );
1759    }
1760
1761    #[test]
1762    fn template_reference_creates_delegates_to_and_marks_partial() {
1763        let yaml = r#"
1764steps:
1765  - template: steps/deploy.yml
1766    parameters:
1767      env: production
1768"#;
1769        let graph = parse(yaml);
1770        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1771        assert_eq!(steps.len(), 1);
1772
1773        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1774        assert_eq!(images.len(), 1);
1775        assert_eq!(images[0].name, "steps/deploy.yml");
1776
1777        let delegates: Vec<_> = graph
1778            .edges_from(steps[0].id)
1779            .filter(|e| e.kind == EdgeKind::DelegatesTo)
1780            .collect();
1781        assert_eq!(delegates.len(), 1);
1782
1783        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1784    }
1785
1786    #[test]
1787    fn top_level_steps_no_jobs() {
1788        let yaml = r#"
1789steps:
1790  - script: echo a
1791  - script: echo b
1792"#;
1793        let graph = parse(yaml);
1794        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1795        assert_eq!(steps.len(), 2);
1796    }
1797
1798    #[test]
1799    fn top_level_jobs_no_stages() {
1800        let yaml = r#"
1801jobs:
1802  - job: JobA
1803    steps:
1804      - script: echo a
1805  - job: JobB
1806    steps:
1807      - script: echo b
1808"#;
1809        let graph = parse(yaml);
1810        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1811        assert_eq!(steps.len(), 2);
1812    }
1813
1814    #[test]
1815    fn stages_with_nested_jobs_parsed() {
1816        let yaml = r#"
1817stages:
1818  - stage: Build
1819    jobs:
1820      - job: Compile
1821        steps:
1822          - script: cargo build
1823  - stage: Test
1824    jobs:
1825      - job: UnitTest
1826        steps:
1827          - script: cargo test
1828"#;
1829        let graph = parse(yaml);
1830        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1831        assert_eq!(steps.len(), 2);
1832    }
1833
1834    #[test]
1835    fn all_steps_linked_to_system_access_token() {
1836        let yaml = r#"
1837steps:
1838  - script: echo a
1839  - task: SomeTask@1
1840    inputs: {}
1841"#;
1842        let graph = parse(yaml);
1843        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1844        assert_eq!(token.len(), 1);
1845        let token_id = token[0].id;
1846
1847        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1848        for step in &steps {
1849            let links: Vec<_> = graph
1850                .edges_from(step.id)
1851                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
1852                .collect();
1853            assert_eq!(
1854                links.len(),
1855                1,
1856                "step '{}' must link to System.AccessToken",
1857                step.name
1858            );
1859        }
1860    }
1861
1862    #[test]
1863    fn named_secret_variable_creates_secret_node() {
1864        let yaml = r#"
1865variables:
1866  - name: MY_PASSWORD
1867    value: dummy
1868    isSecret: true
1869
1870steps:
1871  - script: echo hi
1872"#;
1873        let graph = parse(yaml);
1874        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1875        assert_eq!(secrets.len(), 1);
1876        assert_eq!(secrets[0].name, "MY_PASSWORD");
1877    }
1878
1879    #[test]
1880    fn variables_as_mapping_parsed() {
1881        let yaml = r#"
1882variables:
1883  MY_VAR: hello
1884  ANOTHER_VAR: world
1885
1886steps:
1887  - script: echo hi
1888"#;
1889        let graph = parse(yaml);
1890        // Mapping-style variables without isSecret — no secret nodes created
1891        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1892        assert!(
1893            secrets.is_empty(),
1894            "plain mapping vars should not create secret nodes"
1895        );
1896    }
1897
1898    #[test]
1899    fn persist_credentials_creates_persists_to_edge() {
1900        let yaml = r#"
1901steps:
1902  - checkout: self
1903    persistCredentials: true
1904  - script: git push
1905"#;
1906        let graph = parse(yaml);
1907        let token_id = graph
1908            .nodes_of_kind(NodeKind::Identity)
1909            .find(|n| n.name == "System.AccessToken")
1910            .expect("System.AccessToken must exist")
1911            .id;
1912
1913        let persists_edges: Vec<_> = graph
1914            .edges
1915            .iter()
1916            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
1917            .collect();
1918        assert_eq!(
1919            persists_edges.len(),
1920            1,
1921            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
1922        );
1923    }
1924
1925    #[test]
1926    fn checkout_without_persist_credentials_no_persists_to_edge() {
1927        let yaml = r#"
1928steps:
1929  - checkout: self
1930  - script: echo hi
1931"#;
1932        let graph = parse(yaml);
1933        let persists_edges: Vec<_> = graph
1934            .edges
1935            .iter()
1936            .filter(|e| e.kind == EdgeKind::PersistsTo)
1937            .collect();
1938        assert!(
1939            persists_edges.is_empty(),
1940            "checkout without persistCredentials should not produce PersistsTo edge"
1941        );
1942    }
1943
1944    #[test]
1945    fn var_flag_secret_marked_as_cli_flag_exposed() {
1946        let yaml = r#"
1947steps:
1948  - script: |
1949      terraform apply \
1950        -var "db_password=$(db_password)" \
1951        -var "api_key=$(api_key)"
1952    displayName: Terraform apply
1953"#;
1954        let graph = parse(yaml);
1955        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1956        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
1957        for secret in &secrets {
1958            assert_eq!(
1959                secret.metadata.get(META_CLI_FLAG_EXPOSED),
1960                Some(&"true".to_string()),
1961                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
1962                secret.name
1963            );
1964        }
1965    }
1966
1967    #[test]
1968    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
1969        let yaml = r#"
1970steps:
1971  - script: |
1972      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
1973"#;
1974        let graph = parse(yaml);
1975        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1976        assert_eq!(secrets.len(), 1);
1977        assert!(
1978            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
1979            "non -var secret should not be marked as cli_flag_exposed"
1980        );
1981    }
1982
1983    #[test]
1984    fn step_linked_to_variable_group_secret() {
1985        let yaml = r#"
1986variables:
1987  - group: ProdSecrets
1988
1989steps:
1990  - script: deploy.sh
1991"#;
1992        let graph = parse(yaml);
1993        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1994        assert_eq!(secrets.len(), 1);
1995        let secret_id = secrets[0].id;
1996
1997        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1998        let links: Vec<_> = graph
1999            .edges_from(steps[0].id)
2000            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
2001            .collect();
2002        assert_eq!(
2003            links.len(),
2004            1,
2005            "step should be linked to variable group secret"
2006        );
2007    }
2008
2009    #[test]
2010    fn pr_trigger_sets_meta_trigger_on_graph() {
2011        let yaml = r#"
2012pr:
2013  - '*'
2014
2015steps:
2016  - script: echo hi
2017"#;
2018        let graph = parse(yaml);
2019        assert_eq!(
2020            graph.metadata.get(META_TRIGGER),
2021            Some(&"pr".to_string()),
2022            "ADO pr: trigger should set graph META_TRIGGER"
2023        );
2024    }
2025
2026    #[test]
2027    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
2028        let yaml = r#"
2029pool:
2030  name: my-self-hosted-pool
2031
2032steps:
2033  - script: echo hi
2034"#;
2035        let graph = parse(yaml);
2036        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2037        assert_eq!(images.len(), 1);
2038        assert_eq!(images[0].name, "my-self-hosted-pool");
2039        assert_eq!(
2040            images[0].metadata.get(META_SELF_HOSTED),
2041            Some(&"true".to_string()),
2042            "pool.name without vmImage must be tagged self-hosted"
2043        );
2044    }
2045
2046    #[test]
2047    fn vm_image_pool_is_not_tagged_self_hosted() {
2048        let yaml = r#"
2049pool:
2050  vmImage: ubuntu-latest
2051
2052steps:
2053  - script: echo hi
2054"#;
2055        let graph = parse(yaml);
2056        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2057        assert_eq!(images.len(), 1);
2058        assert_eq!(images[0].name, "ubuntu-latest");
2059        assert!(
2060            !images[0].metadata.contains_key(META_SELF_HOSTED),
2061            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
2062        );
2063    }
2064
2065    #[test]
2066    fn checkout_self_step_tagged_with_meta_checkout_self() {
2067        let yaml = r#"
2068steps:
2069  - checkout: self
2070  - script: echo hi
2071"#;
2072        let graph = parse(yaml);
2073        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2074        assert_eq!(steps.len(), 2);
2075        let checkout_step = steps
2076            .iter()
2077            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
2078            .expect("one step must be tagged META_CHECKOUT_SELF");
2079        assert_eq!(
2080            checkout_step.metadata.get(META_CHECKOUT_SELF),
2081            Some(&"true".to_string())
2082        );
2083    }
2084
2085    #[test]
2086    fn vso_setvariable_sets_meta_writes_env_gate() {
2087        let yaml = r###"
2088steps:
2089  - script: |
2090      echo "##vso[task.setvariable variable=FOO]bar"
2091    displayName: Set variable
2092"###;
2093        let graph = parse(yaml);
2094        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2095        assert_eq!(steps.len(), 1);
2096        assert_eq!(
2097            steps[0].metadata.get(META_WRITES_ENV_GATE),
2098            Some(&"true".to_string()),
2099            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
2100        );
2101    }
2102
2103    #[test]
2104    fn environment_key_tags_job_with_env_approval() {
2105        // String form: `environment: production`
2106        let yaml_string_form = r#"
2107jobs:
2108  - deployment: DeployWeb
2109    environment: production
2110    steps:
2111      - script: echo deploying
2112        displayName: Deploy
2113"#;
2114        let g1 = parse(yaml_string_form);
2115        let tagged: Vec<_> = g1
2116            .nodes_of_kind(NodeKind::Step)
2117            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
2118            .collect();
2119        assert!(
2120            !tagged.is_empty(),
2121            "string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
2122        );
2123
2124        // Mapping form: `environment: { name: staging }`
2125        let yaml_mapping_form = r#"
2126jobs:
2127  - deployment: DeployAPI
2128    environment:
2129      name: staging
2130      resourceType: VirtualMachine
2131    steps:
2132      - script: echo deploying
2133        displayName: Deploy
2134"#;
2135        let g2 = parse(yaml_mapping_form);
2136        let tagged2: Vec<_> = g2
2137            .nodes_of_kind(NodeKind::Step)
2138            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
2139            .collect();
2140        assert!(
2141            !tagged2.is_empty(),
2142            "mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
2143        );
2144
2145        // Negative: a job with no `environment:` must not be tagged
2146        let yaml_no_env = r#"
2147jobs:
2148  - job: Build
2149    steps:
2150      - script: echo building
2151"#;
2152        let g3 = parse(yaml_no_env);
2153        let any_tagged = g3
2154            .nodes_of_kind(NodeKind::Step)
2155            .any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
2156        assert!(
2157            !any_tagged,
2158            "jobs without `environment:` must not carry META_ENV_APPROVAL"
2159        );
2160    }
2161
2162    #[test]
2163    fn root_parameter_conditional_template_fragment_does_not_crash_and_marks_partial() {
2164        // Real-world repro: an ADO template fragment whose root content is wrapped
2165        // in a parameter conditional (`- ${{ if eq(parameters.X, true) }}:`) followed
2166        // by a list of jobs. This is valid when `template:`-included from a parent
2167        // pipeline, but parsing it standalone fails with "did not find expected key".
2168        // The parser must now return a Partial graph instead of a fatal error.
2169        let yaml = r#"
2170parameters:
2171  msabs_ws2022: false
2172
2173- ${{ if eq(parameters.msabs_ws2022, true) }}:
2174  - job: packer_ws2022
2175    displayName: Build WS2022 Gold Image
2176    steps:
2177      - task: PackerTool@0
2178"#;
2179        let parser = AdoParser;
2180        let source = PipelineSource {
2181            file: "fragment.yml".into(),
2182            repo: None,
2183            git_ref: None,
2184            commit_sha: None,
2185        };
2186        let result = parser.parse(yaml, &source);
2187        let graph = result.expect("template fragment must not crash the parser");
2188        assert!(
2189            matches!(graph.completeness, AuthorityCompleteness::Partial),
2190            "template-fragment graph must be marked Partial"
2191        );
2192        let saw_fragment_gap = graph
2193            .completeness_gaps
2194            .iter()
2195            .any(|g| g.contains("template fragment") && g.contains("parent pipeline"));
2196        assert!(
2197            saw_fragment_gap,
2198            "completeness_gaps must mention the template-fragment reason, got: {:?}",
2199            graph.completeness_gaps
2200        );
2201    }
2202
2203    #[test]
2204    fn environment_tag_isolated_to_gated_job_only() {
2205        // Two jobs side by side: only the deployment job has environment.
2206        // Steps from the non-gated job must NOT be tagged.
2207        let yaml = r#"
2208jobs:
2209  - job: Build
2210    steps:
2211      - script: echo build
2212        displayName: build-step
2213  - deployment: DeployProd
2214    environment: production
2215    steps:
2216      - script: echo deploy
2217        displayName: deploy-step
2218"#;
2219        let g = parse(yaml);
2220        let build_step = g
2221            .nodes_of_kind(NodeKind::Step)
2222            .find(|s| s.name == "build-step")
2223            .expect("build-step must exist");
2224        let deploy_step = g
2225            .nodes_of_kind(NodeKind::Step)
2226            .find(|s| s.name == "deploy-step")
2227            .expect("deploy-step must exist");
2228        assert!(
2229            !build_step.metadata.contains_key(META_ENV_APPROVAL),
2230            "non-gated job's step must not be tagged"
2231        );
2232        assert_eq!(
2233            deploy_step.metadata.get(META_ENV_APPROVAL),
2234            Some(&"true".to_string()),
2235            "gated deployment job's step must be tagged"
2236        );
2237    }
2238
2239    // ── resources.repositories[] capture ──────────────────────
2240
2241    fn repos_meta(graph: &AuthorityGraph) -> Vec<serde_json::Value> {
2242        let raw = graph
2243            .metadata
2244            .get(META_REPOSITORIES)
2245            .expect("META_REPOSITORIES must be set");
2246        serde_json::from_str(raw).expect("META_REPOSITORIES must be valid JSON")
2247    }
2248
2249    #[test]
2250    fn resources_repositories_captured_with_used_flag_when_referenced_by_extends() {
2251        let yaml = r#"
2252resources:
2253  repositories:
2254    - repository: shared-templates
2255      type: git
2256      name: Platform/shared-templates
2257      ref: refs/heads/main
2258
2259extends:
2260  template: pipeline.yml@shared-templates
2261"#;
2262        let graph = parse(yaml);
2263        let entries = repos_meta(&graph);
2264        assert_eq!(entries.len(), 1);
2265        let e = &entries[0];
2266        assert_eq!(e["alias"], "shared-templates");
2267        assert_eq!(e["repo_type"], "git");
2268        assert_eq!(e["name"], "Platform/shared-templates");
2269        assert_eq!(e["ref"], "refs/heads/main");
2270        assert_eq!(e["used"], true);
2271    }
2272
2273    #[test]
2274    fn resources_repositories_used_via_checkout_alias() {
2275        // Mirrors the msigeurope-adf-finance-reporting corpus shape.
2276        let yaml = r#"
2277resources:
2278  repositories:
2279    - repository: adf_publish
2280      type: git
2281      name: org/adf-finance-reporting
2282      ref: refs/heads/adf_publish
2283
2284jobs:
2285  - job: deploy
2286    steps:
2287      - checkout: adf_publish
2288"#;
2289        let graph = parse(yaml);
2290        let entries = repos_meta(&graph);
2291        assert_eq!(entries.len(), 1);
2292        assert_eq!(entries[0]["alias"], "adf_publish");
2293        assert_eq!(entries[0]["used"], true);
2294    }
2295
2296    #[test]
2297    fn resources_repositories_unreferenced_alias_is_marked_not_used() {
2298        // Declared but no `template: x@alias`, no `checkout: alias`, no extends.
2299        let yaml = r#"
2300resources:
2301  repositories:
2302    - repository: orphan-templates
2303      type: git
2304      name: Platform/orphan
2305      ref: main
2306
2307jobs:
2308  - job: build
2309    steps:
2310      - script: echo hi
2311"#;
2312        let graph = parse(yaml);
2313        let entries = repos_meta(&graph);
2314        assert_eq!(entries.len(), 1);
2315        assert_eq!(entries[0]["alias"], "orphan-templates");
2316        assert_eq!(entries[0]["used"], false);
2317    }
2318
2319    #[test]
2320    fn resources_repositories_absent_when_no_resources_block() {
2321        let yaml = r#"
2322jobs:
2323  - job: build
2324    steps:
2325      - script: echo hi
2326"#;
2327        let graph = parse(yaml);
2328        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
2329    }
2330
2331    #[test]
2332    fn parse_template_alias_extracts_segment_after_at() {
2333        assert_eq!(
2334            parse_template_alias("steps/deploy.yml@templates"),
2335            Some("templates".to_string())
2336        );
2337        assert_eq!(parse_template_alias("local/path.yml"), None);
2338        assert_eq!(parse_template_alias("path@"), None);
2339    }
2340
2341    #[test]
2342    fn parameters_as_map_form_parses_as_named_parameters() {
2343        // Real-world repro from Azure/aks-engine, PowerShell/PowerShell, dotnet/maui:
2344        // legacy template fragments declare `parameters:` as a mapping of
2345        // name → default-value rather than the modern typed sequence form.
2346        // Both shapes must parse; the map form yields parameters with names
2347        // but no type/values allowlist (so they default to "string" downstream).
2348        let yaml = r#"
2349parameters:
2350  name: ''
2351  k8sRelease: ''
2352  apimodel: 'examples/e2e-tests/kubernetes/release/default/definition.json'
2353  createVNET: false
2354
2355jobs:
2356  - job: build
2357    steps:
2358      - script: echo $(name)
2359"#;
2360        let graph = parse(yaml);
2361        // Parse must succeed and capture the four parameter names.
2362        assert!(graph.parameters.contains_key("name"));
2363        assert!(graph.parameters.contains_key("k8sRelease"));
2364        assert!(graph.parameters.contains_key("apimodel"));
2365        assert!(graph.parameters.contains_key("createVNET"));
2366        assert_eq!(graph.parameters.len(), 4);
2367    }
2368
2369    #[test]
2370    fn parameters_as_typed_sequence_form_still_parses() {
2371        // Make sure the modern form still works after the polymorphic
2372        // deserializer change.
2373        let yaml = r#"
2374parameters:
2375  - name: env
2376    type: string
2377    default: prod
2378    values:
2379      - prod
2380      - staging
2381  - name: skipTests
2382    type: boolean
2383    default: false
2384
2385jobs:
2386  - job: build
2387    steps:
2388      - script: echo hi
2389"#;
2390        let graph = parse(yaml);
2391        let env_param = graph.parameters.get("env").expect("env captured");
2392        assert_eq!(env_param.param_type, "string");
2393        assert!(env_param.has_values_allowlist);
2394        let skip_param = graph
2395            .parameters
2396            .get("skipTests")
2397            .expect("skipTests captured");
2398        assert_eq!(skip_param.param_type, "boolean");
2399        assert!(!skip_param.has_values_allowlist);
2400    }
2401
2402    #[test]
2403    fn resources_as_legacy_sequence_form_parses_to_empty_resources() {
2404        // Real-world repro from Azure/azure-cli, Chinachu/Mirakurun: pre-2019
2405        // ADO syntax allows `resources:` as a list of `- repo: self` entries,
2406        // not the modern `resources: { repositories: [...] }` mapping. Modern
2407        // ADO still tolerates the legacy form. We must accept both shapes
2408        // without crashing the parse.
2409        let yaml = r#"
2410resources:
2411- repo: self
2412
2413trigger:
2414  - main
2415
2416jobs:
2417  - job: build
2418    steps:
2419      - script: echo hi
2420"#;
2421        let graph = parse(yaml);
2422        // No external repositories declared (legacy form has none) — so the
2423        // META_REPOSITORIES metadata key is absent.
2424        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
2425        // But the job still parses.
2426        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2427        assert_eq!(steps.len(), 1);
2428    }
2429
2430    #[test]
2431    fn stages_as_template_expression_parses_with_no_stages() {
2432        // Real-world repro from dotnet/diagnostics templatePublic.yml:
2433        // `stages: ${{ parameters.stages }}` resolves at runtime. The static
2434        // parser cannot enumerate stages from a template expression — we
2435        // accept the file without crashing and the resulting graph simply
2436        // contains no stages from the template-expression scope.
2437        let yaml = r#"
2438parameters:
2439  - name: stages
2440    type: stageList
2441
2442stages: ${{ parameters.stages }}
2443"#;
2444        let graph = parse(yaml);
2445        // Graph must exist (no crash).
2446        assert!(graph.parameters.contains_key("stages"));
2447    }
2448
2449    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
2450
2451    #[test]
2452    fn jobs_carrier_without_steps_marks_partial() {
2453        // ADO `jobs:` carrier present but each job has no `steps:` and no
2454        // `template:`. process_steps([]) adds nothing. Result: 0 Step nodes
2455        // despite a non-empty job carrier — must mark Partial so a CI gate
2456        // doesn't treat completeness=complete + 0 findings as "passed".
2457        let yaml = r#"
2458jobs:
2459  - job: build
2460    pool:
2461      vmImage: ubuntu-latest
2462"#;
2463        let graph = parse(yaml);
2464        let step_count = graph
2465            .nodes
2466            .iter()
2467            .filter(|n| n.kind == NodeKind::Step)
2468            .count();
2469        assert_eq!(step_count, 0);
2470        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2471        assert!(
2472            graph
2473                .completeness_gaps
2474                .iter()
2475                .any(|g| g.contains("0 step nodes")),
2476            "completeness_gaps must mention 0 step nodes: {:?}",
2477            graph.completeness_gaps
2478        );
2479    }
2480
2481    #[test]
2482    fn jobs_carrier_with_empty_jobs_list_does_not_mark_partial() {
2483        // Defensive: an empty `jobs:` list is NOT a carrier — there is no
2484        // job content to be confused about. Stays Complete.
2485        let yaml = r#"
2486jobs: []
2487"#;
2488        let graph = parse(yaml);
2489        let zero_step_gap = graph
2490            .completeness_gaps
2491            .iter()
2492            .any(|g| g.contains("0 step nodes"));
2493        assert!(
2494            !zero_step_gap,
2495            "empty jobs: list is not a carrier; got: {:?}",
2496            graph.completeness_gaps
2497        );
2498    }
2499
2500    #[test]
2501    fn empty_pipeline_does_not_mark_partial_for_zero_steps() {
2502        // No top-level stages/jobs/steps at all — there's no carrier, so the
2503        // 0-step-nodes guard must NOT fire. A genuinely empty pipeline stays
2504        // Complete.
2505        let yaml = r#"
2506trigger:
2507  - main
2508"#;
2509        let graph = parse(yaml);
2510        let zero_step_gap = graph
2511            .completeness_gaps
2512            .iter()
2513            .any(|g| g.contains("0 step nodes"));
2514        assert!(
2515            !zero_step_gap,
2516            "no carrier means no 0-step gap reason; got: {:?}",
2517            graph.completeness_gaps
2518        );
2519    }
2520}