Skip to main content

taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Regex-free check: does `s` contain `terraform apply` followed by
9/// `-auto-approve` or `--auto-approve` (anywhere on the same line, or on a
10/// nearby line when the previous line ends in a shell continuation `\` /
11/// PowerShell continuation `` ` ``)?
12///
13/// Case-sensitive on purpose — Terraform's CLI is case-sensitive and these
14/// tokens never appear capitalised in real-world pipelines.
15fn script_does_terraform_auto_apply(s: &str) -> bool {
16    let lines: Vec<&str> = s.lines().collect();
17    for (i, raw_line) in lines.iter().enumerate() {
18        // Strip trailing comment.
19        let line = raw_line.split('#').next().unwrap_or("");
20        if !(line.contains("terraform apply") || line.contains("terraform\tapply")) {
21            continue;
22        }
23        if line.contains("auto-approve") {
24            return true;
25        }
26        // Continuation: peek a few lines forward for the flag.
27        let mut continuing = line.trim_end().ends_with('\\') || line.trim_end().ends_with('`');
28        let mut j = i + 1;
29        while continuing && j < lines.len() && j < i + 4 {
30            let next = lines[j].split('#').next().unwrap_or("");
31            if next.contains("auto-approve") {
32                return true;
33            }
34            continuing = next.trim_end().ends_with('\\') || next.trim_end().ends_with('`');
35            j += 1;
36        }
37    }
38    false
39}
40
41/// Azure DevOps YAML pipeline parser.
42pub struct AdoParser;
43
44impl PipelineParser for AdoParser {
45    fn platform(&self) -> &str {
46        "azure-devops"
47    }
48
49    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
50        let mut de = serde_yaml::Deserializer::from_str(content);
51        let doc = de
52            .next()
53            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
54        let pipeline: AdoPipeline = match AdoPipeline::deserialize(doc) {
55            Ok(p) => p,
56            Err(e) => {
57                // Real-world ADO template fragments often wrap their root content in
58                // a parameter conditional like `- ${{ if eq(parameters.X, true) }}:`
59                // followed by a list of jobs. That is not a standard YAML mapping at
60                // the root, so serde_yaml fails with a "did not find expected key"
61                // error. These files are intended to be `template:`-included from a
62                // parent pipeline; analyzing them in isolation is not meaningful.
63                // Return a near-empty graph marked Partial instead of crashing the scan.
64                let msg = e.to_string();
65                let looks_like_template_fragment = (msg.contains("did not find expected key")
66                    || (msg.contains("parameters")
67                        && msg.contains("invalid type: map")
68                        && msg.contains("expected a sequence")))
69                    && has_root_parameter_conditional(content);
70                if looks_like_template_fragment {
71                    let mut graph = AuthorityGraph::new(source.clone());
72                    graph
73                        .metadata
74                        .insert(META_PLATFORM.into(), "azure-devops".into());
75                    graph.mark_partial(
76                        "ADO template fragment with top-level parameter conditional — root structure depends on parent pipeline context".to_string(),
77                    );
78                    return Ok(graph);
79                }
80                return Err(TauditError::Parse(format!("YAML parse error: {e}")));
81            }
82        };
83        let extra_docs = de.next().is_some();
84
85        let mut graph = AuthorityGraph::new(source.clone());
86        graph
87            .metadata
88            .insert(META_PLATFORM.into(), "azure-devops".into());
89        if extra_docs {
90            graph.mark_partial(
91                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
92            );
93        }
94
95        // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
96        let has_pr_trigger = pipeline.pr.is_some();
97        if has_pr_trigger {
98            graph.metadata.insert(META_TRIGGER.into(), "pr".into());
99        }
100
101        // Capture resources.repositories[] declarations and detect aliases that
102        // are actually referenced by an `extends:`, `template: x@alias`, or
103        // `checkout: alias`. The result is JSON-encoded into graph metadata
104        // for the `template_extends_unpinned_branch` rule to consume.
105        process_repositories(&pipeline, content, &mut graph);
106
107        // Capture top-level `parameters:` declarations (used by
108        // parameter_interpolation_into_shell). ADO defaults missing `type:`
109        // to string, so a missing/empty type is treated as a string.
110        if let Some(ref params) = pipeline.parameters {
111            for p in params {
112                let name = match p.name.as_ref() {
113                    Some(n) if !n.is_empty() => n.clone(),
114                    _ => continue,
115                };
116                let param_type = p.param_type.clone().unwrap_or_default();
117                let has_values_allowlist =
118                    p.values.as_ref().map(|v| !v.is_empty()).unwrap_or(false);
119                graph.parameters.insert(
120                    name,
121                    ParamSpec {
122                        param_type,
123                        has_values_allowlist,
124                    },
125                );
126            }
127        }
128
129        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
130
131        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
132        // Tagged implicit: ADO injects this token into every task by platform design;
133        // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
134        let mut meta = HashMap::new();
135        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
136        meta.insert(META_IMPLICIT.into(), "true".into());
137        let token_id = graph.add_node_with_metadata(
138            NodeKind::Identity,
139            "System.AccessToken",
140            TrustZone::FirstParty,
141            meta,
142        );
143
144        // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
145        process_pool(&pipeline.pool, &pipeline.workspace, &mut graph);
146
147        // Pipeline-level variable groups and named secrets.
148        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
149        // don't generate false-positive Secret nodes for plain config values.
150        let mut plain_vars: HashSet<String> = HashSet::new();
151        let pipeline_secret_ids = process_variables(
152            &pipeline.variables,
153            &mut graph,
154            &mut secret_ids,
155            "pipeline",
156            &mut plain_vars,
157        );
158
159        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
160        if let Some(ref stages) = pipeline.stages {
161            for stage in stages {
162                // Stage-level template reference — delegate and mark Partial
163                if let Some(ref tpl) = stage.template {
164                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
165                    add_template_delegation(stage_name, tpl, token_id, None, &mut graph);
166                    continue;
167                }
168
169                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
170                let stage_secret_ids = process_variables(
171                    &stage.variables,
172                    &mut graph,
173                    &mut secret_ids,
174                    &stage_name,
175                    &mut plain_vars,
176                );
177
178                for job in &stage.jobs {
179                    let job_name = job.effective_name();
180                    let job_secret_ids = process_variables(
181                        &job.variables,
182                        &mut graph,
183                        &mut secret_ids,
184                        &job_name,
185                        &mut plain_vars,
186                    );
187
188                    let effective_workspace =
189                        job.workspace.as_ref().or(pipeline.workspace.as_ref());
190                    process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
191
192                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
193                        .iter()
194                        .chain(&stage_secret_ids)
195                        .chain(&job_secret_ids)
196                        .copied()
197                        .collect();
198
199                    let steps_start = graph.nodes.len();
200
201                    let job_steps = job.all_steps();
202                    process_steps(
203                        &job_steps,
204                        &job_name,
205                        token_id,
206                        &all_secrets,
207                        &plain_vars,
208                        &mut graph,
209                        &mut secret_ids,
210                    );
211
212                    if let Some(ref tpl) = job.template {
213                        add_template_delegation(
214                            &job_name,
215                            tpl,
216                            token_id,
217                            Some(&job_name),
218                            &mut graph,
219                        );
220                    }
221
222                    if job.has_environment_binding() {
223                        tag_job_steps_env_approval(&mut graph, steps_start);
224                    }
225                }
226            }
227        } else if let Some(ref jobs) = pipeline.jobs {
228            for job in jobs {
229                let job_name = job.effective_name();
230                let job_secret_ids = process_variables(
231                    &job.variables,
232                    &mut graph,
233                    &mut secret_ids,
234                    &job_name,
235                    &mut plain_vars,
236                );
237
238                let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
239                process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
240
241                let all_secrets: Vec<NodeId> = pipeline_secret_ids
242                    .iter()
243                    .chain(&job_secret_ids)
244                    .copied()
245                    .collect();
246
247                let steps_start = graph.nodes.len();
248
249                let job_steps = job.all_steps();
250                process_steps(
251                    &job_steps,
252                    &job_name,
253                    token_id,
254                    &all_secrets,
255                    &plain_vars,
256                    &mut graph,
257                    &mut secret_ids,
258                );
259
260                if let Some(ref tpl) = job.template {
261                    add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
262                }
263
264                if job.has_environment_binding() {
265                    tag_job_steps_env_approval(&mut graph, steps_start);
266                }
267            }
268        } else if let Some(ref steps) = pipeline.steps {
269            process_steps(
270                steps,
271                "pipeline",
272                token_id,
273                &pipeline_secret_ids,
274                &plain_vars,
275                &mut graph,
276                &mut secret_ids,
277            );
278        }
279
280        // Cross-platform misclassification trap (red-team R2 #5): a YAML file
281        // shaped like ADO at the top level (stages/jobs/steps present) but whose
282        // body uses constructs the ADO parser doesn't recognise will deserialize
283        // without errors and yield no Step nodes. Marking Partial surfaces the
284        // gap instead of returning completeness=complete on a clean-but-empty
285        // graph (which a CI gate would treat as "passed").
286        let step_count = graph
287            .nodes
288            .iter()
289            .filter(|n| n.kind == NodeKind::Step)
290            .count();
291        let had_step_carrier = pipeline.stages.as_ref().is_some_and(|s| !s.is_empty())
292            || pipeline.jobs.as_ref().is_some_and(|j| !j.is_empty())
293            || pipeline.steps.as_ref().is_some_and(|s| !s.is_empty());
294        if step_count == 0 && had_step_carrier {
295            graph.mark_partial(
296                "stages/jobs/steps parsed but produced 0 step nodes — possible non-ADO YAML wrong-platform-classified".to_string(),
297            );
298        }
299
300        Ok(graph)
301    }
302}
303
304/// Process an ADO `pool:` block. ADO pools come in two shapes:
305///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
306///   - `pool: { name: my-pool }` (named pool — self-hosted)
307///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
308///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
309///
310/// Creates an Image node representing the agent environment. Self-hosted pools
311/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
312///
313/// When `workspace` is provided and contains `clean:` with a truthy value
314/// (`true`, `all`, `outputs`, `resources`), the Image node is also tagged
315/// with META_WORKSPACE_CLEAN.
316fn process_pool(
317    pool: &Option<serde_yaml::Value>,
318    workspace: &Option<serde_yaml::Value>,
319    graph: &mut AuthorityGraph,
320) {
321    let Some(pool_val) = pool else {
322        return;
323    };
324
325    let (image_name, is_self_hosted) = match pool_val {
326        serde_yaml::Value::String(s) => (s.clone(), true),
327        serde_yaml::Value::Mapping(map) => {
328            let name = map.get("name").and_then(|v| v.as_str());
329            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
330            match (name, vm_image) {
331                (_, Some(vm)) => (vm.to_string(), false),
332                (Some(n), None) => (n.to_string(), true),
333                (None, None) => return,
334            }
335        }
336        _ => return,
337    };
338
339    let mut meta = HashMap::new();
340    if is_self_hosted {
341        meta.insert(META_SELF_HOSTED.into(), "true".into());
342    }
343    if has_workspace_clean(workspace) {
344        meta.insert(META_WORKSPACE_CLEAN.into(), "true".into());
345    }
346    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
347}
348
349/// Returns `true` when the ADO `workspace:` value specifies a `clean:` setting
350/// that wipes the workspace between runs. Recognised truthy forms:
351///   - `workspace: { clean: all }`
352///   - `workspace: { clean: outputs }`
353///   - `workspace: { clean: resources }`
354///   - `workspace: { clean: true }`
355fn has_workspace_clean(workspace: &Option<serde_yaml::Value>) -> bool {
356    let Some(ws) = workspace else {
357        return false;
358    };
359    let Some(map) = ws.as_mapping() else {
360        return false;
361    };
362    let Some(clean) = map.get("clean") else {
363        return false;
364    };
365    match clean {
366        serde_yaml::Value::Bool(b) => *b,
367        serde_yaml::Value::String(s) => {
368            let lower = s.to_ascii_lowercase();
369            matches!(lower.as_str(), "all" | "outputs" | "resources" | "true")
370        }
371        _ => false,
372    }
373}
374
375/// Scan the parsed pipeline for `resources.repositories[]` declarations and
376/// determine which aliases are referenced inside the same file. Stores the
377/// result as a JSON-encoded array in `graph.metadata[META_REPOSITORIES]`.
378///
379/// Usage signal — an alias is "used" when it appears in any of:
380///   - `template: <path>@<alias>` (anywhere — top-level extends, stage, job, step)
381///   - `extends:` referencing `template: <path>@<alias>`
382///   - `checkout: <alias>` (steps consume an external repo into the workspace)
383///
384/// The `extends:` and per-step `template:` references are resolved by walking
385/// the parsed Value tree; the raw text is only used for the `checkout:` case
386/// (cheap substring scan, robust to YAML shape variation).
387fn process_repositories(pipeline: &AdoPipeline, raw_content: &str, graph: &mut AuthorityGraph) {
388    let resources = match pipeline.resources.as_ref() {
389        Some(r) if !r.repositories.is_empty() => r,
390        _ => return,
391    };
392
393    // Collect all aliases referenced as `template: x@alias`. We walk every
394    // `template:` field appearing in the parsed pipeline (extends and steps
395    // already deserialize to their own paths; stages/jobs use the per-job
396    // template field). The raw YAML walk via serde_yaml::Value covers all
397    // shapes uniformly without re-deriving structure-specific models.
398    let mut used_aliases: HashSet<String> = HashSet::new();
399
400    if let Some(ref ext) = pipeline.extends {
401        collect_template_alias_refs(ext, &mut used_aliases);
402    }
403    if let Ok(value) = serde_yaml::from_str::<serde_yaml::Value>(raw_content) {
404        collect_template_alias_refs(&value, &mut used_aliases);
405        collect_checkout_alias_refs(&value, &mut used_aliases);
406    }
407
408    // Build the JSON-encoded repository descriptor list.
409    let mut entries: Vec<serde_json::Value> = Vec::with_capacity(resources.repositories.len());
410    for repo in &resources.repositories {
411        let used = used_aliases.contains(&repo.repository);
412        let mut obj = serde_json::Map::new();
413        obj.insert(
414            "alias".into(),
415            serde_json::Value::String(repo.repository.clone()),
416        );
417        if let Some(ref t) = repo.repo_type {
418            obj.insert("repo_type".into(), serde_json::Value::String(t.clone()));
419        }
420        if let Some(ref n) = repo.name {
421            obj.insert("name".into(), serde_json::Value::String(n.clone()));
422        }
423        if let Some(ref r) = repo.git_ref {
424            obj.insert("ref".into(), serde_json::Value::String(r.clone()));
425        }
426        obj.insert("used".into(), serde_json::Value::Bool(used));
427        entries.push(serde_json::Value::Object(obj));
428    }
429
430    if let Ok(json) = serde_json::to_string(&serde_json::Value::Array(entries)) {
431        graph.metadata.insert(META_REPOSITORIES.into(), json);
432    }
433}
434
435/// Walk a YAML value and record every `template: <ref>@<alias>` alias seen.
436/// Recurses into mappings and sequences so it catches references in extends,
437/// stages, jobs, steps, and conditional blocks indiscriminately.
438fn collect_template_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
439    match value {
440        serde_yaml::Value::Mapping(map) => {
441            for (k, v) in map {
442                if k.as_str() == Some("template") {
443                    if let Some(s) = v.as_str() {
444                        if let Some(alias) = parse_template_alias(s) {
445                            sink.insert(alias);
446                        }
447                    }
448                }
449                collect_template_alias_refs(v, sink);
450            }
451        }
452        serde_yaml::Value::Sequence(seq) => {
453            for v in seq {
454                collect_template_alias_refs(v, sink);
455            }
456        }
457        _ => {}
458    }
459}
460
461/// Walk a YAML value and record every `checkout: <alias>` value seen, except
462/// `self` and `none` which are platform keywords (not external repo aliases).
463fn collect_checkout_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
464    match value {
465        serde_yaml::Value::Mapping(map) => {
466            for (k, v) in map {
467                if k.as_str() == Some("checkout") {
468                    if let Some(s) = v.as_str() {
469                        if s != "self" && s != "none" && !s.is_empty() {
470                            sink.insert(s.to_string());
471                        }
472                    }
473                }
474                collect_checkout_alias_refs(v, sink);
475            }
476        }
477        serde_yaml::Value::Sequence(seq) => {
478            for v in seq {
479                collect_checkout_alias_refs(v, sink);
480            }
481        }
482        _ => {}
483    }
484}
485
486/// Extract `<alias>` from a `template: <path>@<alias>` reference. Returns
487/// None for plain in-repo paths (`templates/deploy.yml`) which target the
488/// current pipeline's repo, not an external `resources.repositories[]` entry.
489fn parse_template_alias(template_ref: &str) -> Option<String> {
490    let at = template_ref.rfind('@')?;
491    let alias = &template_ref[at + 1..];
492    if alias.is_empty() {
493        None
494    } else {
495        Some(alias.to_string())
496    }
497}
498
499/// Tag every Step node added since `start_idx` with META_ENV_APPROVAL.
500/// Used after `process_steps` for a job whose `environment:` is configured —
501/// the environment binding indicates the job sits behind a manual approval
502/// gate, which is an isolation boundary that breaks automatic propagation.
503fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
504    for node in graph.nodes.iter_mut().skip(start_idx) {
505        if node.kind == NodeKind::Step {
506            node.metadata
507                .insert(META_ENV_APPROVAL.into(), "true".into());
508        }
509    }
510}
511
512/// Process a variable list, creating Secret nodes and returning their IDs.
513/// Returns IDs for secrets only (not variable groups, which are opaque).
514/// Populates `plain_vars` with the names of non-secret named variables so
515/// downstream `$(VAR)` scanning can skip them.
516fn process_variables(
517    variables: &Option<AdoVariables>,
518    graph: &mut AuthorityGraph,
519    cache: &mut HashMap<String, NodeId>,
520    scope: &str,
521    plain_vars: &mut HashSet<String>,
522) -> Vec<NodeId> {
523    let mut ids = Vec::new();
524
525    let vars = match variables.as_ref() {
526        Some(v) => v,
527        None => return ids,
528    };
529
530    for var in &vars.0 {
531        match var {
532            AdoVariable::Group { group } => {
533                // Skip template-expression group names like `${{ parameters.env }}`.
534                // We can't resolve them statically — mark Partial but don't create
535                // a misleading Secret node with the expression as its name.
536                if group.contains("${{") {
537                    graph.mark_partial(format!(
538                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
539                    ));
540                    continue;
541                }
542                let mut meta = HashMap::new();
543                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
544                let id = graph.add_node_with_metadata(
545                    NodeKind::Secret,
546                    group.as_str(),
547                    TrustZone::FirstParty,
548                    meta,
549                );
550                cache.insert(group.clone(), id);
551                ids.push(id);
552                graph.mark_partial(format!(
553                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
554                ));
555            }
556            AdoVariable::Named {
557                name, is_secret, ..
558            } => {
559                if *is_secret {
560                    let id = find_or_create_secret(graph, cache, name);
561                    ids.push(id);
562                } else {
563                    plain_vars.insert(name.clone());
564                }
565            }
566        }
567    }
568
569    ids
570}
571
572/// Process a list of ADO steps, adding nodes and edges to the graph.
573fn process_steps(
574    steps: &[AdoStep],
575    job_name: &str,
576    token_id: NodeId,
577    inherited_secrets: &[NodeId],
578    plain_vars: &HashSet<String>,
579    graph: &mut AuthorityGraph,
580    cache: &mut HashMap<String, NodeId>,
581) {
582    for (idx, step) in steps.iter().enumerate() {
583        // Template step — delegation, mark partial
584        if let Some(ref tpl) = step.template {
585            let step_name = step
586                .display_name
587                .as_deref()
588                .or(step.name.as_deref())
589                .map(|s| s.to_string())
590                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
591            add_template_delegation(&step_name, tpl, token_id, Some(job_name), graph);
592            continue;
593        }
594
595        // Determine step kind and trust zone
596        let (step_name, trust_zone, mut inline_script) = classify_step(step, job_name, idx);
597
598        // For task steps (where classify_step returns None), recover an inline
599        // script body from `inputs.inlineScript` / `inputs.script` — used by
600        // AzureCLI@2, AzurePowerShell@5, Bash@3, etc. Without this fallback,
601        // rules that pattern-match script content miss every typed task.
602        if inline_script.is_none() {
603            if let Some(ref inputs) = step.inputs {
604                let candidate_keys = ["inlineScript", "script", "InlineScript", "Inline"];
605                for key in candidate_keys {
606                    if let Some(v) = inputs.get(key).and_then(yaml_value_as_str) {
607                        if !v.is_empty() {
608                            inline_script = Some(v.to_string());
609                            break;
610                        }
611                    }
612                }
613            }
614        }
615
616        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
617
618        // Stamp parent job name so consumers (e.g. `taudit map --job`) can
619        // attribute steps back to their containing job.
620        if let Some(node) = graph.nodes.get_mut(step_id) {
621            node.metadata.insert(META_JOB_NAME.into(), job_name.into());
622            // Stamp the raw inline script body so script-aware rules
623            // (env-export of secrets, secret materialisation to files,
624            // Key Vault → plaintext) can pattern-match on the actual
625            // command text the agent will run.
626            if let Some(ref body) = inline_script {
627                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
628            }
629        }
630
631        // Stamp inline script body so command-line-leakage rules can inspect
632        // what the step actually executes (vm_remote_exec_via_pipeline_secret,
633        // short_lived_sas_in_command_line).
634        if let Some(ref body) = inline_script {
635            if let Some(node) = graph.nodes.get_mut(step_id) {
636                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
637            }
638        }
639
640        // Stamp the inline script body when present so rules that need to
641        // pattern-match against shell content can do so without re-parsing
642        // YAML. Bodies can be large; rules should treat META_SCRIPT_BODY as
643        // an opaque string and not assume any framing.
644        if let Some(ref body) = inline_script {
645            if let Some(node) = graph.nodes.get_mut(step_id) {
646                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
647            }
648        }
649
650        // Every step has access to System.AccessToken
651        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
652
653        // checkout step with persistCredentials: true writes the token to .git/config on disk,
654        // making it accessible to all subsequent steps and filesystem-level attackers.
655        if step.checkout.is_some() && step.persist_credentials == Some(true) {
656            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
657        }
658
659        // `checkout: self` pulls the repo being built. In a PR trigger context this
660        // is the untrusted fork head — tag the step so downstream rules can gate on
661        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
662        if let Some(ref ck) = step.checkout {
663            if ck == "self" {
664                if let Some(node) = graph.nodes.get_mut(step_id) {
665                    node.metadata
666                        .insert(META_CHECKOUT_SELF.into(), "true".into());
667                }
668            }
669        }
670
671        // Inherited pipeline/stage/job secrets
672        for &secret_id in inherited_secrets {
673            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
674        }
675
676        // Service connection detection from task inputs (case-insensitive key match)
677        if let Some(ref inputs) = step.inputs {
678            let service_conn_keys = [
679                "azuresubscription",
680                "connectedservicename",
681                "connectedservicenamearm",
682                "kubernetesserviceconnection",
683                "environmentservicename",
684                "backendservicearm",
685            ];
686            for (raw_key, val) in inputs {
687                let lower = raw_key.to_lowercase();
688                if !service_conn_keys.contains(&lower.as_str()) {
689                    continue;
690                }
691                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
692                if !conn_name.starts_with("$(") {
693                    // Stamp the connection name onto the step itself so rules
694                    // that need the name (e.g. terraform_auto_approve_in_prod)
695                    // don't have to traverse edges.
696                    if let Some(node) = graph.nodes.get_mut(step_id) {
697                        node.metadata
698                            .insert(META_SERVICE_CONNECTION_NAME.into(), conn_name.to_string());
699                    }
700
701                    let mut meta = HashMap::new();
702                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
703                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
704                    // ADO pipeline YAML does not embed the authentication scheme
705                    // of the service endpoint (WorkloadIdentityFederation vs.
706                    // ServicePrincipal), so we cannot reliably determine whether a
707                    // connection uses OIDC.  Leave META_OIDC unset -- the safe
708                    // default -- so that rules like service_connection_scope_mismatch
709                    // can fire on classic SPN connections.
710                    let conn_id = graph.add_node_with_metadata(
711                        NodeKind::Identity,
712                        conn_name,
713                        TrustZone::FirstParty,
714                        meta,
715                    );
716                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
717                }
718            }
719
720            // addSpnToEnvironment: true exposes federated SPN material
721            // (idToken, servicePrincipalKey, servicePrincipalId, tenantId)
722            // to the step's inline script via env vars. Stamp the step so
723            // addspn_with_inline_script can pattern-match without traversal.
724            if let Some(val) = inputs.get("addSpnToEnvironment") {
725                let truthy = match val {
726                    serde_yaml::Value::Bool(b) => *b,
727                    serde_yaml::Value::String(s) => s.eq_ignore_ascii_case("true"),
728                    _ => false,
729                };
730                if truthy {
731                    if let Some(node) = graph.nodes.get_mut(step_id) {
732                        node.metadata
733                            .insert(META_ADD_SPN_TO_ENV.into(), "true".into());
734                    }
735                }
736            }
737
738            // TerraformCLI@N / TerraformTaskV1..V4 with command: apply +
739            // commandOptions containing auto-approve = same as inline
740            // `terraform apply --auto-approve`. Detect once here so the rule
741            // can read a single META_TERRAFORM_AUTO_APPROVE marker.
742            let task_lower = step
743                .task
744                .as_deref()
745                .map(|t| t.to_lowercase())
746                .unwrap_or_default();
747            let is_terraform_task = task_lower.starts_with("terraformcli@")
748                || task_lower.starts_with("terraformtask@")
749                || task_lower.starts_with("terraformtaskv");
750            if is_terraform_task {
751                let cmd_lower = inputs
752                    .get("command")
753                    .and_then(yaml_value_as_str)
754                    .map(|s| s.to_lowercase())
755                    .unwrap_or_default();
756                let opts = inputs
757                    .get("commandOptions")
758                    .and_then(yaml_value_as_str)
759                    .unwrap_or("");
760                if cmd_lower == "apply" && opts.contains("auto-approve") {
761                    if let Some(node) = graph.nodes.get_mut(step_id) {
762                        node.metadata
763                            .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
764                    }
765                }
766            }
767
768            // Detect $(varName) references in task input values
769            for val in inputs.values() {
770                if let Some(s) = yaml_value_as_str(val) {
771                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
772                }
773            }
774        }
775
776        // Inline-script detection of `terraform apply --auto-approve`.
777        // Done after inputs processing so we can OR the two signals into a
778        // single META_TERRAFORM_AUTO_APPROVE marker on the step.
779        if let Some(ref body) = inline_script {
780            if script_does_terraform_auto_apply(body) {
781                if let Some(node) = graph.nodes.get_mut(step_id) {
782                    node.metadata
783                        .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
784                }
785            }
786        }
787
788        // Detect $(varName) in step env values
789        if let Some(ref env) = step.env {
790            for val in env.values() {
791                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
792            }
793        }
794
795        // Detect $(varName) in inline script text
796        if let Some(ref script) = inline_script {
797            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
798        }
799
800        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines
801        if let Some(ref script) = inline_script {
802            let lower = script.to_lowercase();
803            if lower.contains("##vso[task.setvariable") {
804                if let Some(node) = graph.nodes.get_mut(step_id) {
805                    node.metadata
806                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
807                }
808            }
809        }
810    }
811}
812
813/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
814///
815/// `inline_script_text` is populated whenever the step has script content —
816/// either as a top-level `script:`/`bash:`/`powershell:`/`pwsh:` key, or as a
817/// task input (`Bash@3.inputs.script`, `PowerShell@2.inputs.script`,
818/// `AzureCLI@2.inputs.inlineScript`, `AzurePowerShell@5.inputs.Inline`, …).
819/// Task-input keys are matched case-insensitively because the ADO YAML schema
820/// is itself case-insensitive on input names.
821fn classify_step(
822    step: &AdoStep,
823    job_name: &str,
824    idx: usize,
825) -> (String, TrustZone, Option<String>) {
826    let default_name = || format!("{job_name}[{idx}]");
827
828    let name = step
829        .display_name
830        .as_deref()
831        .or(step.name.as_deref())
832        .map(|s| s.to_string())
833        .unwrap_or_else(default_name);
834
835    if step.task.is_some() {
836        // Task step — script body may live in inputs.{script,inlineScript,Inline}.
837        let inline = extract_task_inline_script(step.inputs.as_ref());
838        (name, TrustZone::Untrusted, inline)
839    } else if let Some(ref s) = step.script {
840        (name, TrustZone::FirstParty, Some(s.clone()))
841    } else if let Some(ref s) = step.bash {
842        (name, TrustZone::FirstParty, Some(s.clone()))
843    } else if let Some(ref s) = step.powershell {
844        (name, TrustZone::FirstParty, Some(s.clone()))
845    } else if let Some(ref s) = step.pwsh {
846        (name, TrustZone::FirstParty, Some(s.clone()))
847    } else {
848        (name, TrustZone::FirstParty, None)
849    }
850}
851
852/// Pull an inline script body out of a task step's `inputs:` mapping.
853/// Recognises the three common conventions:
854///   - `inputs.script` (Bash@3, PowerShell@2 — when targetType: inline)
855///   - `inputs.inlineScript` (AzureCLI@2)
856///   - `inputs.Inline` (AzurePowerShell@5 — note the capital I)
857///
858/// Match is case-insensitive so a hand-written pipeline using `Script:` or
859/// `INLINESCRIPT:` is still picked up.
860fn extract_task_inline_script(
861    inputs: Option<&HashMap<String, serde_yaml::Value>>,
862) -> Option<String> {
863    let inputs = inputs?;
864    const KEYS: &[&str] = &["script", "inlinescript", "inline"];
865    for (raw_key, val) in inputs {
866        let lower = raw_key.to_lowercase();
867        if KEYS.contains(&lower.as_str()) {
868            if let Some(s) = val.as_str() {
869                if !s.is_empty() {
870                    return Some(s.to_string());
871                }
872            }
873        }
874    }
875    None
876}
877
878/// Add a DelegatesTo edge from a synthetic step node to a template image node.
879///
880/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
881/// pull code from an external repository and are Untrusted. Plain relative paths like
882/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
883/// treats `./local-action`.
884///
885/// `job_name` is `Some` when the delegation is created inside a job's scope
886/// (job-level template, or template step inside `process_steps`); it is `None`
887/// for stage-level template delegations that don't belong to a specific job.
888fn add_template_delegation(
889    step_name: &str,
890    template_path: &str,
891    token_id: NodeId,
892    job_name: Option<&str>,
893    graph: &mut AuthorityGraph,
894) {
895    let tpl_trust_zone = if template_path.contains('@') {
896        TrustZone::Untrusted
897    } else {
898        TrustZone::FirstParty
899    };
900    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
901    if let Some(jn) = job_name {
902        if let Some(node) = graph.nodes.get_mut(step_id) {
903            node.metadata.insert(META_JOB_NAME.into(), jn.into());
904        }
905    }
906    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
907    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
908    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
909    graph.mark_partial(format!(
910        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
911    ));
912}
913
914/// Extract `$(varName)` references from a string, creating Secret nodes for
915/// non-predefined and non-plain ADO variables.
916/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
917/// is treated as a variable reference. This rejects PowerShell sub-expressions
918/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
919/// and anything with spaces or special characters.
920fn extract_dollar_paren_secrets(
921    text: &str,
922    step_id: NodeId,
923    plain_vars: &HashSet<String>,
924    graph: &mut AuthorityGraph,
925    cache: &mut HashMap<String, NodeId>,
926) {
927    let mut pos = 0;
928    let bytes = text.as_bytes();
929    while pos < bytes.len() {
930        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
931            let start = pos + 2;
932            if let Some(end_offset) = text[start..].find(')') {
933                let var_name = &text[start..start + end_offset];
934                if is_valid_ado_identifier(var_name)
935                    && !is_predefined_ado_var(var_name)
936                    && !plain_vars.contains(var_name)
937                {
938                    let id = find_or_create_secret(graph, cache, var_name);
939                    // Mark secrets embedded in -var flag arguments: their values appear in
940                    // pipeline logs (command string is logged before masking, and Terraform
941                    // itself logs -var values in plan output and debug traces).
942                    if is_in_terraform_var_flag(text, pos) {
943                        if let Some(node) = graph.nodes.get_mut(id) {
944                            node.metadata
945                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
946                        }
947                    }
948                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
949                }
950                pos = start + end_offset + 1;
951                continue;
952            }
953        }
954        pos += 1;
955    }
956}
957
958/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
959/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
960fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
961    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
962    let line_before = &text[line_start..var_pos];
963    // Must contain -var (the flag) and = (the key=value assignment)
964    line_before.contains("-var") && line_before.contains('=')
965}
966
967/// Returns true if `name` is a valid ADO variable identifier.
968/// ADO variable names start with a letter and contain only letters, digits,
969/// and underscores. Anything else — PowerShell vars (`$name`), template
970/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
971/// (`name -join ','`) — is rejected.
972fn is_valid_ado_identifier(name: &str) -> bool {
973    let mut chars = name.chars();
974    match chars.next() {
975        Some(first) if first.is_ascii_alphabetic() => {
976            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
977        }
978        _ => false,
979    }
980}
981
982/// Returns true if a variable name is a well-known ADO predefined variable.
983/// These are system-provided and never represent secrets.
984fn is_predefined_ado_var(name: &str) -> bool {
985    let prefixes = [
986        "Build.",
987        "Agent.",
988        "System.",
989        "Pipeline.",
990        "Release.",
991        "Environment.",
992        "Strategy.",
993        "Deployment.",
994        "Resources.",
995        "TF_BUILD",
996    ];
997    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
998}
999
1000fn find_or_create_secret(
1001    graph: &mut AuthorityGraph,
1002    cache: &mut HashMap<String, NodeId>,
1003    name: &str,
1004) -> NodeId {
1005    if let Some(&id) = cache.get(name) {
1006        return id;
1007    }
1008    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1009    cache.insert(name.to_string(), id);
1010    id
1011}
1012
1013fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
1014    val.as_str()
1015}
1016
1017// ── Serde models for ADO YAML ─────────────────────────────
1018
1019/// Top-level ADO pipeline definition.
1020/// ADO pipelines come in three shapes:
1021///   (a) stages → jobs → steps
1022///   (b) jobs → steps (no stages key)
1023///   (c) steps only (no stages or jobs key)
1024#[derive(Debug, Deserialize)]
1025pub struct AdoPipeline {
1026    #[serde(default)]
1027    pub trigger: Option<serde_yaml::Value>,
1028    #[serde(default)]
1029    pub pr: Option<serde_yaml::Value>,
1030    #[serde(default)]
1031    pub variables: Option<AdoVariables>,
1032    /// `stages:` is normally a sequence of stage objects, but real-world
1033    /// pipelines also use `stages: ${{ parameters.stages }}` (a template
1034    /// expression that resolves at runtime to a list). The custom
1035    /// deserializer accepts both shapes; non-sequence shapes resolve to
1036    /// `None` and the graph is marked Partial downstream.
1037    #[serde(default, deserialize_with = "deserialize_optional_stages")]
1038    pub stages: Option<Vec<AdoStage>>,
1039    #[serde(default)]
1040    pub jobs: Option<Vec<AdoJob>>,
1041    #[serde(default)]
1042    pub steps: Option<Vec<AdoStep>>,
1043    #[serde(default)]
1044    pub pool: Option<serde_yaml::Value>,
1045    /// Pipeline-level `workspace:` block. The only security-relevant field is
1046    /// `clean:` (`outputs`, `resources`, `all`, or `true`), which causes the
1047    /// agent to wipe the workspace between runs. Used to tag self-hosted Image
1048    /// nodes with `META_WORKSPACE_CLEAN`.
1049    #[serde(default)]
1050    pub workspace: Option<serde_yaml::Value>,
1051    /// `resources:` block — repository declarations, container declarations,
1052    /// pipeline declarations. We only consume `repositories[]` today.
1053    /// Pre-2019 ADO accepts a sequence form (`resources: [- repo: self]`)
1054    /// which has no `repositories:` key — the custom deserializer accepts
1055    /// both shapes and treats the sequence form as an empty resources block.
1056    #[serde(default, deserialize_with = "deserialize_optional_resources")]
1057    pub resources: Option<AdoResources>,
1058    /// Top-level `extends:` directive — `extends: { template: x@alias, ... }`.
1059    /// Captured raw so we can scan for `template: x@alias` references that
1060    /// consume a `resources.repositories[]` entry.
1061    #[serde(default)]
1062    pub extends: Option<serde_yaml::Value>,
1063    /// Top-level `parameters:` declarations. Each entry has at minimum a
1064    /// `name`; `type` defaults to `string` when omitted. `values:` is an
1065    /// optional allowlist that constrains caller input.
1066    /// ADO accepts two shapes: the typed sequence form
1067    /// (`- name: foo \n type: string \n default: bar`) and the legacy
1068    /// untyped map form (`parameters: { foo: bar, baz: '' }`) used in
1069    /// older template fragments. The custom deserializer normalizes both.
1070    #[serde(default, deserialize_with = "deserialize_optional_parameters")]
1071    pub parameters: Option<Vec<AdoParameter>>,
1072}
1073
1074/// Accept either a sequence of `AdoParameter` (modern typed form) or a
1075/// mapping of parameter name → default value (legacy untyped form used in
1076/// many template fragments). For the map form, each key becomes an
1077/// `AdoParameter` with the key as `name` and no type/values. Returns `None`
1078/// for any other shape (e.g. a bare template expression).
1079///
1080/// Implemented as a serde Visitor (rather than going through
1081/// `serde_yaml::Value`) so that downstream struct deserialization uses
1082/// serde's native lazy iteration — this avoids serde_yaml's strict
1083/// duplicate-key detection on `${{ else }}`-style template-conditional
1084/// keys that appear in stage/job `parameters:` blocks of unrelated entries.
1085fn deserialize_optional_parameters<'de, D>(
1086    deserializer: D,
1087) -> Result<Option<Vec<AdoParameter>>, D::Error>
1088where
1089    D: serde::Deserializer<'de>,
1090{
1091    use serde::de::{MapAccess, SeqAccess, Visitor};
1092    use std::fmt;
1093
1094    struct ParamsVisitor;
1095
1096    impl<'de> Visitor<'de> for ParamsVisitor {
1097        type Value = Option<Vec<AdoParameter>>;
1098
1099        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1100            f.write_str("a sequence of parameter declarations, a mapping of name→default, null, or a template expression")
1101        }
1102
1103        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1104            Ok(None)
1105        }
1106
1107        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1108            Ok(None)
1109        }
1110
1111        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1112            d.deserialize_any(self)
1113        }
1114
1115        // Bare scalar (template expression like `${{ parameters.X }}`) —
1116        // can't statically enumerate; treat as absent.
1117        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1118            Ok(None)
1119        }
1120        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1121            Ok(None)
1122        }
1123        fn visit_bool<E: serde::de::Error>(self, _v: bool) -> Result<Self::Value, E> {
1124            Ok(None)
1125        }
1126        fn visit_i64<E: serde::de::Error>(self, _v: i64) -> Result<Self::Value, E> {
1127            Ok(None)
1128        }
1129        fn visit_u64<E: serde::de::Error>(self, _v: u64) -> Result<Self::Value, E> {
1130            Ok(None)
1131        }
1132        fn visit_f64<E: serde::de::Error>(self, _v: f64) -> Result<Self::Value, E> {
1133            Ok(None)
1134        }
1135
1136        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1137            let mut out = Vec::new();
1138            while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
1139                if let Ok(p) = serde_yaml::from_value::<AdoParameter>(item) {
1140                    out.push(p);
1141                }
1142            }
1143            Ok(Some(out))
1144        }
1145
1146        fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
1147            // Legacy untyped map form: name → default-value. We collect
1148            // names; defaults are intentionally discarded (matches typed-
1149            // form semantics where `default:` is also ignored).
1150            let mut out = Vec::new();
1151            while let Some(key) = map.next_key::<serde_yaml::Value>()? {
1152                let _ignore = map.next_value::<serde::de::IgnoredAny>()?;
1153                let name = match key {
1154                    serde_yaml::Value::String(s) if !s.is_empty() => s,
1155                    _ => continue,
1156                };
1157                out.push(AdoParameter {
1158                    name: Some(name),
1159                    param_type: None,
1160                    values: None,
1161                });
1162            }
1163            Ok(Some(out))
1164        }
1165    }
1166
1167    deserializer.deserialize_any(ParamsVisitor)
1168}
1169
1170/// Accept either an `AdoResources` mapping (modern form with `repositories:`,
1171/// `containers:`, `pipelines:`) or the legacy sequence form (`resources: [-
1172/// repo: self]`, pre-2019 ADO syntax). The legacy form has no
1173/// `repositories:` key, so we return an empty `AdoResources` for it — the
1174/// repository-tracking rules then see no aliases to track, which is correct
1175/// (legacy `repo: self` declares no external repositories).
1176fn deserialize_optional_resources<'de, D>(deserializer: D) -> Result<Option<AdoResources>, D::Error>
1177where
1178    D: serde::Deserializer<'de>,
1179{
1180    use serde::de::{MapAccess, SeqAccess, Visitor};
1181    use std::fmt;
1182
1183    struct ResourcesVisitor;
1184
1185    impl<'de> Visitor<'de> for ResourcesVisitor {
1186        type Value = Option<AdoResources>;
1187
1188        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1189            f.write_str("an AdoResources mapping or a legacy `- repo:` sequence")
1190        }
1191
1192        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1193            Ok(None)
1194        }
1195        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1196            Ok(None)
1197        }
1198        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1199            d.deserialize_any(self)
1200        }
1201
1202        // Legacy sequence form — drain it without producing any
1203        // repository entries. Modern rules track aliases via the
1204        // `AdoResources.repositories[]` shape, which the legacy form
1205        // does not produce.
1206        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1207            while seq.next_element::<serde::de::IgnoredAny>()?.is_some() {}
1208            Ok(Some(AdoResources::default()))
1209        }
1210
1211        fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
1212            let r = AdoResources::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
1213            Ok(Some(r))
1214        }
1215    }
1216
1217    deserializer.deserialize_any(ResourcesVisitor)
1218}
1219
1220/// Accept either a sequence of `AdoStage` (the normal form) or a bare
1221/// template expression (`stages: ${{ parameters.stages }}`) which resolves
1222/// at runtime. For the template-expression case, return `None` so the
1223/// pipeline still parses; the graph will simply contain no stages from this
1224/// scope (downstream code already handles empty stage lists).
1225fn deserialize_optional_stages<'de, D>(deserializer: D) -> Result<Option<Vec<AdoStage>>, D::Error>
1226where
1227    D: serde::Deserializer<'de>,
1228{
1229    use serde::de::{SeqAccess, Visitor};
1230    use std::fmt;
1231
1232    struct StagesVisitor;
1233
1234    impl<'de> Visitor<'de> for StagesVisitor {
1235        type Value = Option<Vec<AdoStage>>;
1236
1237        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1238            f.write_str("a sequence of stages or a template expression")
1239        }
1240
1241        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1242            Ok(None)
1243        }
1244        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1245            Ok(None)
1246        }
1247        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1248            d.deserialize_any(self)
1249        }
1250        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1251            Ok(None)
1252        }
1253        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1254            Ok(None)
1255        }
1256
1257        fn visit_seq<A: SeqAccess<'de>>(self, seq: A) -> Result<Self::Value, A::Error> {
1258            let stages =
1259                Vec::<AdoStage>::deserialize(serde::de::value::SeqAccessDeserializer::new(seq))?;
1260            Ok(Some(stages))
1261        }
1262    }
1263
1264    deserializer.deserialize_any(StagesVisitor)
1265}
1266
1267/// `resources:` block. Only `repositories[]` is modelled today.
1268#[derive(Debug, Default, Deserialize)]
1269pub struct AdoResources {
1270    #[serde(default)]
1271    pub repositories: Vec<AdoRepository>,
1272}
1273
1274/// A single `resources.repositories[]` entry — declares an external repo
1275/// alias the pipeline can consume via `template: x@alias`, `extends:`, or
1276/// `checkout: alias`.
1277#[derive(Debug, Deserialize)]
1278pub struct AdoRepository {
1279    /// The alias used by consumers (`template: file@<repository>`).
1280    pub repository: String,
1281    /// `git`, `github`, `bitbucket`, or `azureGit`.
1282    #[serde(default, rename = "type")]
1283    pub repo_type: Option<String>,
1284    /// Full repo path (e.g. `org/repo`).
1285    #[serde(default)]
1286    pub name: Option<String>,
1287    /// Optional ref. Absent = default branch (mutable). Present forms:
1288    /// `refs/tags/v1.2.3`, `refs/heads/main`, bare branch `main`, or a SHA.
1289    #[serde(default, rename = "ref")]
1290    pub git_ref: Option<String>,
1291}
1292
1293/// Pipeline / template `parameters:` entry. We deliberately ignore `default:`
1294/// — only the name, type, and `values:` allowlist matter for our rules.
1295#[derive(Debug, Deserialize)]
1296pub struct AdoParameter {
1297    #[serde(default)]
1298    pub name: Option<String>,
1299    #[serde(rename = "type", default)]
1300    pub param_type: Option<String>,
1301    #[serde(default)]
1302    pub values: Option<Vec<serde_yaml::Value>>,
1303}
1304
1305#[derive(Debug, Deserialize)]
1306pub struct AdoStage {
1307    /// Stage identifier. Absent when the stage entry is a template reference.
1308    #[serde(default)]
1309    pub stage: Option<String>,
1310    /// Stage-level template reference (`- template: path/to/stage.yml`).
1311    #[serde(default)]
1312    pub template: Option<String>,
1313    #[serde(default)]
1314    pub variables: Option<AdoVariables>,
1315    #[serde(default)]
1316    pub jobs: Vec<AdoJob>,
1317}
1318
1319#[derive(Debug, Deserialize)]
1320pub struct AdoJob {
1321    /// Regular job identifier
1322    #[serde(default)]
1323    pub job: Option<String>,
1324    /// Deployment job identifier
1325    #[serde(default)]
1326    pub deployment: Option<String>,
1327    #[serde(default)]
1328    pub variables: Option<AdoVariables>,
1329    #[serde(default)]
1330    pub steps: Option<Vec<AdoStep>>,
1331    /// Deployment-job nested strategy: runOnce/rolling/canary all share the
1332    /// shape `strategy.{runOnce,rolling,canary}.deploy.steps`. We only need
1333    /// the steps — the strategy choice itself doesn't change authority flow.
1334    #[serde(default)]
1335    pub strategy: Option<AdoStrategy>,
1336    #[serde(default)]
1337    pub pool: Option<serde_yaml::Value>,
1338    /// Job-level `workspace:` block. The only security-relevant field is
1339    /// `clean:` which causes the agent to wipe the workspace between runs.
1340    #[serde(default)]
1341    pub workspace: Option<serde_yaml::Value>,
1342    /// Job-level template reference
1343    #[serde(default)]
1344    pub template: Option<String>,
1345    /// Deployment-job environment binding. Two YAML shapes:
1346    ///
1347    ///   - `environment: production` (string shorthand)
1348    ///   - `environment: { name: staging, resourceType: VirtualMachine }` (mapping)
1349    ///
1350    /// When present, the environment may have approvals/checks attached in ADO's
1351    /// environment configuration. Approvals are a manual gate — authority cannot
1352    /// propagate past one without human intervention. We treat any `environment:`
1353    /// binding as an approval candidate and tag the job's steps so propagation
1354    /// rules can downgrade severity. (We can't see the approval config from YAML
1355    /// alone; the binding is the strongest signal available at parse time.)
1356    #[serde(default)]
1357    pub environment: Option<serde_yaml::Value>,
1358}
1359
1360impl AdoJob {
1361    pub fn effective_name(&self) -> String {
1362        self.job
1363            .as_deref()
1364            .or(self.deployment.as_deref())
1365            .unwrap_or("job")
1366            .to_string()
1367    }
1368
1369    /// Returns the effective step list for this job.
1370    ///
1371    /// Regular jobs put steps under `steps:` directly. Deployment jobs nest
1372    /// them under `strategy.{runOnce,rolling,canary}.{deploy,preDeploy,
1373    /// postDeploy,routeTraffic,onSuccess,onFailure}.steps`. We merge all
1374    /// strategy-nested step lists into a single sequence so downstream rules
1375    /// see them as part of the job. Order: regular `steps:` first, then any
1376    /// strategy-nested steps in deterministic phase order.
1377    pub fn all_steps(&self) -> Vec<AdoStep> {
1378        let mut out: Vec<AdoStep> = Vec::new();
1379        if let Some(ref s) = self.steps {
1380            out.extend(s.iter().cloned());
1381        }
1382        if let Some(ref strat) = self.strategy {
1383            for phase in strat.phases() {
1384                if let Some(ref s) = phase.steps {
1385                    out.extend(s.iter().cloned());
1386                }
1387            }
1388        }
1389        out
1390    }
1391
1392    /// Returns true when the job is bound to an `environment:` — either the
1393    /// string form (`environment: production`) or the mapping form with a
1394    /// non-empty `name:` field. An empty mapping or empty string is ignored.
1395    pub fn has_environment_binding(&self) -> bool {
1396        match self.environment.as_ref() {
1397            None => false,
1398            Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
1399            Some(serde_yaml::Value::Mapping(m)) => m
1400                .get("name")
1401                .and_then(|v| v.as_str())
1402                .map(|s| !s.trim().is_empty())
1403                .unwrap_or(false),
1404            _ => false,
1405        }
1406    }
1407}
1408
1409/// Deployment-job `strategy:` block. ADO ships three strategies — runOnce,
1410/// rolling, canary — each with multiple lifecycle phases that may carry
1411/// their own step list. We capture all of them; the AdoJob::all_steps
1412/// helper flattens them into one sequence.
1413#[derive(Debug, Default, Deserialize, Clone)]
1414pub struct AdoStrategy {
1415    #[serde(default, rename = "runOnce")]
1416    pub run_once: Option<AdoStrategyRunOnce>,
1417    #[serde(default)]
1418    pub rolling: Option<AdoStrategyRunOnce>,
1419    #[serde(default)]
1420    pub canary: Option<AdoStrategyRunOnce>,
1421}
1422
1423impl AdoStrategy {
1424    /// Iterate over every populated lifecycle phase across all strategies.
1425    pub fn phases(&self) -> Vec<&AdoStrategyPhase> {
1426        let mut out: Vec<&AdoStrategyPhase> = Vec::new();
1427        for runner in [&self.run_once, &self.rolling, &self.canary]
1428            .iter()
1429            .copied()
1430            .flatten()
1431        {
1432            for phase in [
1433                &runner.deploy,
1434                &runner.pre_deploy,
1435                &runner.post_deploy,
1436                &runner.route_traffic,
1437            ]
1438            .into_iter()
1439            .flatten()
1440            {
1441                out.push(phase);
1442            }
1443            if let Some(ref on) = runner.on {
1444                if let Some(ref s) = on.success {
1445                    out.push(s);
1446                }
1447                if let Some(ref f) = on.failure {
1448                    out.push(f);
1449                }
1450            }
1451        }
1452        out
1453    }
1454}
1455
1456/// Lifecycle phases carried by every deployment strategy. Each phase may
1457/// have its own `steps:`. Covering all six avoids silently dropping
1458/// privileged setup/teardown steps from the authority graph.
1459#[derive(Debug, Default, Deserialize, Clone)]
1460pub struct AdoStrategyRunOnce {
1461    #[serde(default)]
1462    pub deploy: Option<AdoStrategyPhase>,
1463    #[serde(default, rename = "preDeploy")]
1464    pub pre_deploy: Option<AdoStrategyPhase>,
1465    #[serde(default, rename = "postDeploy")]
1466    pub post_deploy: Option<AdoStrategyPhase>,
1467    #[serde(default, rename = "routeTraffic")]
1468    pub route_traffic: Option<AdoStrategyPhase>,
1469    #[serde(default)]
1470    pub on: Option<AdoStrategyOn>,
1471}
1472
1473#[derive(Debug, Default, Deserialize, Clone)]
1474pub struct AdoStrategyOn {
1475    #[serde(default)]
1476    pub success: Option<AdoStrategyPhase>,
1477    #[serde(default)]
1478    pub failure: Option<AdoStrategyPhase>,
1479}
1480
1481#[derive(Debug, Default, Deserialize, Clone)]
1482pub struct AdoStrategyPhase {
1483    #[serde(default)]
1484    pub steps: Option<Vec<AdoStep>>,
1485}
1486
1487#[derive(Debug, Deserialize, Clone)]
1488pub struct AdoStep {
1489    /// Task reference e.g. `AzureCLI@2`
1490    #[serde(default)]
1491    pub task: Option<String>,
1492    /// Inline script (cmd/sh)
1493    #[serde(default)]
1494    pub script: Option<String>,
1495    /// Inline bash script
1496    #[serde(default)]
1497    pub bash: Option<String>,
1498    /// Inline PowerShell script
1499    #[serde(default)]
1500    pub powershell: Option<String>,
1501    /// Cross-platform PowerShell
1502    #[serde(default)]
1503    pub pwsh: Option<String>,
1504    /// Step-level template reference
1505    #[serde(default)]
1506    pub template: Option<String>,
1507    #[serde(rename = "displayName", default)]
1508    pub display_name: Option<String>,
1509    /// Legacy name alias
1510    #[serde(default)]
1511    pub name: Option<String>,
1512    #[serde(default)]
1513    pub env: Option<HashMap<String, String>>,
1514    /// Task inputs (key → value, but values may be nested)
1515    #[serde(default)]
1516    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
1517    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
1518    #[serde(default)]
1519    pub checkout: Option<String>,
1520    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
1521    #[serde(rename = "persistCredentials", default)]
1522    pub persist_credentials: Option<bool>,
1523}
1524
1525/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
1526/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
1527#[derive(Debug, Default)]
1528pub struct AdoVariables(pub Vec<AdoVariable>);
1529
1530impl<'de> serde::Deserialize<'de> for AdoVariables {
1531    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1532    where
1533        D: serde::Deserializer<'de>,
1534    {
1535        let raw = serde_yaml::Value::deserialize(deserializer)?;
1536        let mut vars = Vec::new();
1537
1538        match raw {
1539            serde_yaml::Value::Sequence(seq) => {
1540                for item in seq {
1541                    if let Some(map) = item.as_mapping() {
1542                        if let Some(group_val) = map.get("group") {
1543                            if let Some(group) = group_val.as_str() {
1544                                vars.push(AdoVariable::Group {
1545                                    group: group.to_string(),
1546                                });
1547                                continue;
1548                            }
1549                        }
1550                        let name = map
1551                            .get("name")
1552                            .and_then(|v| v.as_str())
1553                            .unwrap_or("")
1554                            .to_string();
1555                        let value = map
1556                            .get("value")
1557                            .and_then(|v| v.as_str())
1558                            .unwrap_or("")
1559                            .to_string();
1560                        let is_secret = map
1561                            .get("isSecret")
1562                            .and_then(|v| v.as_bool())
1563                            .unwrap_or(false);
1564                        vars.push(AdoVariable::Named {
1565                            name,
1566                            value,
1567                            is_secret,
1568                        });
1569                    }
1570                }
1571            }
1572            serde_yaml::Value::Mapping(map) => {
1573                for (k, v) in map {
1574                    let name = k.as_str().unwrap_or("").to_string();
1575                    let value = v.as_str().unwrap_or("").to_string();
1576                    vars.push(AdoVariable::Named {
1577                        name,
1578                        value,
1579                        is_secret: false,
1580                    });
1581                }
1582            }
1583            _ => {}
1584        }
1585
1586        Ok(AdoVariables(vars))
1587    }
1588}
1589
1590#[derive(Debug)]
1591pub enum AdoVariable {
1592    Group {
1593        group: String,
1594    },
1595    Named {
1596        name: String,
1597        value: String,
1598        is_secret: bool,
1599    },
1600}
1601
1602/// Heuristic: does this YAML have a top-level parameter conditional wrapper
1603/// (e.g. `- ${{ if eq(parameters.X, true) }}:`) at column 0 or as the first
1604/// list item? This is the construct that breaks root-level mapping parses but
1605/// is valid in an ADO template fragment included by a parent pipeline.
1606fn has_root_parameter_conditional(content: &str) -> bool {
1607    for line in content.lines() {
1608        let trimmed = line.trim_start();
1609        // Strip an optional leading list marker so we match both
1610        // `- ${{ if ... }}:` and bare `${{ if ... }}:` forms.
1611        let candidate = trimmed.strip_prefix("- ").unwrap_or(trimmed);
1612        if candidate.starts_with("${{")
1613            && (candidate.contains("if ") || candidate.contains("if("))
1614            && candidate.trim_end().ends_with(":")
1615        {
1616            return true;
1617        }
1618    }
1619    false
1620}
1621
1622#[cfg(test)]
1623mod tests {
1624    use super::*;
1625
1626    fn parse(yaml: &str) -> AuthorityGraph {
1627        let parser = AdoParser;
1628        let source = PipelineSource {
1629            file: "azure-pipelines.yml".into(),
1630            repo: None,
1631            git_ref: None,
1632            commit_sha: None,
1633        };
1634        parser.parse(yaml, &source).unwrap()
1635    }
1636
1637    #[test]
1638    fn parses_simple_pipeline() {
1639        let yaml = r#"
1640trigger:
1641  - main
1642
1643jobs:
1644  - job: Build
1645    steps:
1646      - script: echo hello
1647        displayName: Say hello
1648"#;
1649        let graph = parse(yaml);
1650        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
1651    }
1652
1653    #[test]
1654    fn system_access_token_created() {
1655        let yaml = r#"
1656steps:
1657  - script: echo hi
1658"#;
1659        let graph = parse(yaml);
1660        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1661        assert_eq!(identities.len(), 1);
1662        assert_eq!(identities[0].name, "System.AccessToken");
1663        assert_eq!(
1664            identities[0].metadata.get(META_IDENTITY_SCOPE),
1665            Some(&"broad".to_string())
1666        );
1667    }
1668
1669    #[test]
1670    fn variable_group_creates_secret_and_marks_partial() {
1671        let yaml = r#"
1672variables:
1673  - group: MySecretGroup
1674
1675steps:
1676  - script: echo hi
1677"#;
1678        let graph = parse(yaml);
1679        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1680        assert_eq!(secrets.len(), 1);
1681        assert_eq!(secrets[0].name, "MySecretGroup");
1682        assert_eq!(
1683            secrets[0].metadata.get(META_VARIABLE_GROUP),
1684            Some(&"true".to_string())
1685        );
1686        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1687        assert!(
1688            graph
1689                .completeness_gaps
1690                .iter()
1691                .any(|g| g.contains("MySecretGroup")),
1692            "completeness gap should name the variable group"
1693        );
1694    }
1695
1696    #[test]
1697    fn task_with_azure_subscription_creates_service_connection_identity() {
1698        let yaml = r#"
1699steps:
1700  - task: AzureCLI@2
1701    displayName: Deploy to Azure
1702    inputs:
1703      azureSubscription: MyServiceConnection
1704      scriptType: bash
1705      inlineScript: az group list
1706"#;
1707        let graph = parse(yaml);
1708        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1709        // System.AccessToken + service connection
1710        assert_eq!(identities.len(), 2);
1711        let conn = identities
1712            .iter()
1713            .find(|i| i.name == "MyServiceConnection")
1714            .unwrap();
1715        assert_eq!(
1716            conn.metadata.get(META_SERVICE_CONNECTION),
1717            Some(&"true".to_string())
1718        );
1719        assert_eq!(
1720            conn.metadata.get(META_IDENTITY_SCOPE),
1721            Some(&"broad".to_string())
1722        );
1723    }
1724
1725    #[test]
1726    fn service_connection_does_not_get_unconditional_oidc_tag() {
1727        let yaml = r#"
1728steps:
1729  - task: AzureCLI@2
1730    displayName: Deploy to Azure
1731    inputs:
1732      azureSubscription: MyClassicSpnConnection
1733      scriptType: bash
1734      inlineScript: az group list
1735"#;
1736        let graph = parse(yaml);
1737        let conn = graph
1738            .nodes_of_kind(NodeKind::Identity)
1739            .find(|i| i.name == "MyClassicSpnConnection")
1740            .expect("service connection identity should exist");
1741        assert_eq!(
1742            conn.metadata.get(META_OIDC),
1743            None,
1744            "service connections must not be tagged META_OIDC without a clear OIDC signal"
1745        );
1746    }
1747
1748    #[test]
1749    fn task_with_connected_service_name_creates_identity() {
1750        let yaml = r#"
1751steps:
1752  - task: SqlAzureDacpacDeployment@1
1753    inputs:
1754      ConnectedServiceNameARM: MySqlConnection
1755"#;
1756        let graph = parse(yaml);
1757        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1758        assert!(
1759            identities.iter().any(|i| i.name == "MySqlConnection"),
1760            "connectedServiceNameARM should create identity"
1761        );
1762    }
1763
1764    #[test]
1765    fn script_step_classified_as_first_party() {
1766        let yaml = r#"
1767steps:
1768  - script: echo hi
1769    displayName: Say hi
1770"#;
1771        let graph = parse(yaml);
1772        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1773        assert_eq!(steps.len(), 1);
1774        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1775    }
1776
1777    #[test]
1778    fn bash_step_classified_as_first_party() {
1779        let yaml = r#"
1780steps:
1781  - bash: echo hi
1782"#;
1783        let graph = parse(yaml);
1784        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1785        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1786    }
1787
1788    #[test]
1789    fn task_step_classified_as_untrusted() {
1790        let yaml = r#"
1791steps:
1792  - task: DotNetCoreCLI@2
1793    inputs:
1794      command: build
1795"#;
1796        let graph = parse(yaml);
1797        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1798        assert_eq!(steps.len(), 1);
1799        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
1800    }
1801
1802    #[test]
1803    fn dollar_paren_var_in_script_creates_secret() {
1804        let yaml = r#"
1805steps:
1806  - script: |
1807      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
1808    displayName: Call API
1809"#;
1810        let graph = parse(yaml);
1811        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1812        assert_eq!(secrets.len(), 1);
1813        assert_eq!(secrets[0].name, "MY_API_TOKEN");
1814    }
1815
1816    #[test]
1817    fn predefined_ado_var_not_treated_as_secret() {
1818        let yaml = r#"
1819steps:
1820  - script: |
1821      echo $(Build.BuildId)
1822      echo $(Agent.WorkFolder)
1823      echo $(System.DefaultWorkingDirectory)
1824    displayName: Print vars
1825"#;
1826        let graph = parse(yaml);
1827        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1828        assert!(
1829            secrets.is_empty(),
1830            "predefined ADO vars should not be treated as secrets, got: {:?}",
1831            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1832        );
1833    }
1834
1835    #[test]
1836    fn template_reference_creates_delegates_to_and_marks_partial() {
1837        let yaml = r#"
1838steps:
1839  - template: steps/deploy.yml
1840    parameters:
1841      env: production
1842"#;
1843        let graph = parse(yaml);
1844        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1845        assert_eq!(steps.len(), 1);
1846
1847        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1848        assert_eq!(images.len(), 1);
1849        assert_eq!(images[0].name, "steps/deploy.yml");
1850
1851        let delegates: Vec<_> = graph
1852            .edges_from(steps[0].id)
1853            .filter(|e| e.kind == EdgeKind::DelegatesTo)
1854            .collect();
1855        assert_eq!(delegates.len(), 1);
1856
1857        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1858    }
1859
1860    #[test]
1861    fn top_level_steps_no_jobs() {
1862        let yaml = r#"
1863steps:
1864  - script: echo a
1865  - script: echo b
1866"#;
1867        let graph = parse(yaml);
1868        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1869        assert_eq!(steps.len(), 2);
1870    }
1871
1872    #[test]
1873    fn top_level_jobs_no_stages() {
1874        let yaml = r#"
1875jobs:
1876  - job: JobA
1877    steps:
1878      - script: echo a
1879  - job: JobB
1880    steps:
1881      - script: echo b
1882"#;
1883        let graph = parse(yaml);
1884        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1885        assert_eq!(steps.len(), 2);
1886    }
1887
1888    #[test]
1889    fn stages_with_nested_jobs_parsed() {
1890        let yaml = r#"
1891stages:
1892  - stage: Build
1893    jobs:
1894      - job: Compile
1895        steps:
1896          - script: cargo build
1897  - stage: Test
1898    jobs:
1899      - job: UnitTest
1900        steps:
1901          - script: cargo test
1902"#;
1903        let graph = parse(yaml);
1904        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1905        assert_eq!(steps.len(), 2);
1906    }
1907
1908    #[test]
1909    fn all_steps_linked_to_system_access_token() {
1910        let yaml = r#"
1911steps:
1912  - script: echo a
1913  - task: SomeTask@1
1914    inputs: {}
1915"#;
1916        let graph = parse(yaml);
1917        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1918        assert_eq!(token.len(), 1);
1919        let token_id = token[0].id;
1920
1921        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1922        for step in &steps {
1923            let links: Vec<_> = graph
1924                .edges_from(step.id)
1925                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
1926                .collect();
1927            assert_eq!(
1928                links.len(),
1929                1,
1930                "step '{}' must link to System.AccessToken",
1931                step.name
1932            );
1933        }
1934    }
1935
1936    #[test]
1937    fn named_secret_variable_creates_secret_node() {
1938        let yaml = r#"
1939variables:
1940  - name: MY_PASSWORD
1941    value: dummy
1942    isSecret: true
1943
1944steps:
1945  - script: echo hi
1946"#;
1947        let graph = parse(yaml);
1948        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1949        assert_eq!(secrets.len(), 1);
1950        assert_eq!(secrets[0].name, "MY_PASSWORD");
1951    }
1952
1953    #[test]
1954    fn variables_as_mapping_parsed() {
1955        let yaml = r#"
1956variables:
1957  MY_VAR: hello
1958  ANOTHER_VAR: world
1959
1960steps:
1961  - script: echo hi
1962"#;
1963        let graph = parse(yaml);
1964        // Mapping-style variables without isSecret — no secret nodes created
1965        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1966        assert!(
1967            secrets.is_empty(),
1968            "plain mapping vars should not create secret nodes"
1969        );
1970    }
1971
1972    #[test]
1973    fn persist_credentials_creates_persists_to_edge() {
1974        let yaml = r#"
1975steps:
1976  - checkout: self
1977    persistCredentials: true
1978  - script: git push
1979"#;
1980        let graph = parse(yaml);
1981        let token_id = graph
1982            .nodes_of_kind(NodeKind::Identity)
1983            .find(|n| n.name == "System.AccessToken")
1984            .expect("System.AccessToken must exist")
1985            .id;
1986
1987        let persists_edges: Vec<_> = graph
1988            .edges
1989            .iter()
1990            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
1991            .collect();
1992        assert_eq!(
1993            persists_edges.len(),
1994            1,
1995            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
1996        );
1997    }
1998
1999    #[test]
2000    fn checkout_without_persist_credentials_no_persists_to_edge() {
2001        let yaml = r#"
2002steps:
2003  - checkout: self
2004  - script: echo hi
2005"#;
2006        let graph = parse(yaml);
2007        let persists_edges: Vec<_> = graph
2008            .edges
2009            .iter()
2010            .filter(|e| e.kind == EdgeKind::PersistsTo)
2011            .collect();
2012        assert!(
2013            persists_edges.is_empty(),
2014            "checkout without persistCredentials should not produce PersistsTo edge"
2015        );
2016    }
2017
2018    #[test]
2019    fn var_flag_secret_marked_as_cli_flag_exposed() {
2020        let yaml = r#"
2021steps:
2022  - script: |
2023      terraform apply \
2024        -var "db_password=$(db_password)" \
2025        -var "api_key=$(api_key)"
2026    displayName: Terraform apply
2027"#;
2028        let graph = parse(yaml);
2029        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2030        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
2031        for secret in &secrets {
2032            assert_eq!(
2033                secret.metadata.get(META_CLI_FLAG_EXPOSED),
2034                Some(&"true".to_string()),
2035                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
2036                secret.name
2037            );
2038        }
2039    }
2040
2041    #[test]
2042    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
2043        let yaml = r#"
2044steps:
2045  - script: |
2046      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
2047"#;
2048        let graph = parse(yaml);
2049        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2050        assert_eq!(secrets.len(), 1);
2051        assert!(
2052            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
2053            "non -var secret should not be marked as cli_flag_exposed"
2054        );
2055    }
2056
2057    #[test]
2058    fn step_linked_to_variable_group_secret() {
2059        let yaml = r#"
2060variables:
2061  - group: ProdSecrets
2062
2063steps:
2064  - script: deploy.sh
2065"#;
2066        let graph = parse(yaml);
2067        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2068        assert_eq!(secrets.len(), 1);
2069        let secret_id = secrets[0].id;
2070
2071        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2072        let links: Vec<_> = graph
2073            .edges_from(steps[0].id)
2074            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
2075            .collect();
2076        assert_eq!(
2077            links.len(),
2078            1,
2079            "step should be linked to variable group secret"
2080        );
2081    }
2082
2083    #[test]
2084    fn pr_trigger_sets_meta_trigger_on_graph() {
2085        let yaml = r#"
2086pr:
2087  - '*'
2088
2089steps:
2090  - script: echo hi
2091"#;
2092        let graph = parse(yaml);
2093        assert_eq!(
2094            graph.metadata.get(META_TRIGGER),
2095            Some(&"pr".to_string()),
2096            "ADO pr: trigger should set graph META_TRIGGER"
2097        );
2098    }
2099
2100    #[test]
2101    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
2102        let yaml = r#"
2103pool:
2104  name: my-self-hosted-pool
2105
2106steps:
2107  - script: echo hi
2108"#;
2109        let graph = parse(yaml);
2110        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2111        assert_eq!(images.len(), 1);
2112        assert_eq!(images[0].name, "my-self-hosted-pool");
2113        assert_eq!(
2114            images[0].metadata.get(META_SELF_HOSTED),
2115            Some(&"true".to_string()),
2116            "pool.name without vmImage must be tagged self-hosted"
2117        );
2118    }
2119
2120    #[test]
2121    fn vm_image_pool_is_not_tagged_self_hosted() {
2122        let yaml = r#"
2123pool:
2124  vmImage: ubuntu-latest
2125
2126steps:
2127  - script: echo hi
2128"#;
2129        let graph = parse(yaml);
2130        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2131        assert_eq!(images.len(), 1);
2132        assert_eq!(images[0].name, "ubuntu-latest");
2133        assert!(
2134            !images[0].metadata.contains_key(META_SELF_HOSTED),
2135            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
2136        );
2137    }
2138
2139    #[test]
2140    fn checkout_self_step_tagged_with_meta_checkout_self() {
2141        let yaml = r#"
2142steps:
2143  - checkout: self
2144  - script: echo hi
2145"#;
2146        let graph = parse(yaml);
2147        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2148        assert_eq!(steps.len(), 2);
2149        let checkout_step = steps
2150            .iter()
2151            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
2152            .expect("one step must be tagged META_CHECKOUT_SELF");
2153        assert_eq!(
2154            checkout_step.metadata.get(META_CHECKOUT_SELF),
2155            Some(&"true".to_string())
2156        );
2157    }
2158
2159    #[test]
2160    fn vso_setvariable_sets_meta_writes_env_gate() {
2161        let yaml = r###"
2162steps:
2163  - script: |
2164      echo "##vso[task.setvariable variable=FOO]bar"
2165    displayName: Set variable
2166"###;
2167        let graph = parse(yaml);
2168        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2169        assert_eq!(steps.len(), 1);
2170        assert_eq!(
2171            steps[0].metadata.get(META_WRITES_ENV_GATE),
2172            Some(&"true".to_string()),
2173            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
2174        );
2175    }
2176
2177    #[test]
2178    fn environment_key_tags_job_with_env_approval() {
2179        // String form: `environment: production`
2180        let yaml_string_form = r#"
2181jobs:
2182  - deployment: DeployWeb
2183    environment: production
2184    steps:
2185      - script: echo deploying
2186        displayName: Deploy
2187"#;
2188        let g1 = parse(yaml_string_form);
2189        let tagged: Vec<_> = g1
2190            .nodes_of_kind(NodeKind::Step)
2191            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
2192            .collect();
2193        assert!(
2194            !tagged.is_empty(),
2195            "string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
2196        );
2197
2198        // Mapping form: `environment: { name: staging }`
2199        let yaml_mapping_form = r#"
2200jobs:
2201  - deployment: DeployAPI
2202    environment:
2203      name: staging
2204      resourceType: VirtualMachine
2205    steps:
2206      - script: echo deploying
2207        displayName: Deploy
2208"#;
2209        let g2 = parse(yaml_mapping_form);
2210        let tagged2: Vec<_> = g2
2211            .nodes_of_kind(NodeKind::Step)
2212            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
2213            .collect();
2214        assert!(
2215            !tagged2.is_empty(),
2216            "mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
2217        );
2218
2219        // Negative: a job with no `environment:` must not be tagged
2220        let yaml_no_env = r#"
2221jobs:
2222  - job: Build
2223    steps:
2224      - script: echo building
2225"#;
2226        let g3 = parse(yaml_no_env);
2227        let any_tagged = g3
2228            .nodes_of_kind(NodeKind::Step)
2229            .any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
2230        assert!(
2231            !any_tagged,
2232            "jobs without `environment:` must not carry META_ENV_APPROVAL"
2233        );
2234    }
2235
2236    #[test]
2237    fn root_parameter_conditional_template_fragment_does_not_crash_and_marks_partial() {
2238        // Real-world repro: an ADO template fragment whose root content is wrapped
2239        // in a parameter conditional (`- ${{ if eq(parameters.X, true) }}:`) followed
2240        // by a list of jobs. This is valid when `template:`-included from a parent
2241        // pipeline, but parsing it standalone fails with "did not find expected key".
2242        // The parser must now return a Partial graph instead of a fatal error.
2243        let yaml = r#"
2244parameters:
2245  msabs_ws2022: false
2246
2247- ${{ if eq(parameters.msabs_ws2022, true) }}:
2248  - job: packer_ws2022
2249    displayName: Build WS2022 Gold Image
2250    steps:
2251      - task: PackerTool@0
2252"#;
2253        let parser = AdoParser;
2254        let source = PipelineSource {
2255            file: "fragment.yml".into(),
2256            repo: None,
2257            git_ref: None,
2258            commit_sha: None,
2259        };
2260        let result = parser.parse(yaml, &source);
2261        let graph = result.expect("template fragment must not crash the parser");
2262        assert!(
2263            matches!(graph.completeness, AuthorityCompleteness::Partial),
2264            "template-fragment graph must be marked Partial"
2265        );
2266        let saw_fragment_gap = graph
2267            .completeness_gaps
2268            .iter()
2269            .any(|g| g.contains("template fragment") && g.contains("parent pipeline"));
2270        assert!(
2271            saw_fragment_gap,
2272            "completeness_gaps must mention the template-fragment reason, got: {:?}",
2273            graph.completeness_gaps
2274        );
2275    }
2276
2277    #[test]
2278    fn environment_tag_isolated_to_gated_job_only() {
2279        // Two jobs side by side: only the deployment job has environment.
2280        // Steps from the non-gated job must NOT be tagged.
2281        let yaml = r#"
2282jobs:
2283  - job: Build
2284    steps:
2285      - script: echo build
2286        displayName: build-step
2287  - deployment: DeployProd
2288    environment: production
2289    steps:
2290      - script: echo deploy
2291        displayName: deploy-step
2292"#;
2293        let g = parse(yaml);
2294        let build_step = g
2295            .nodes_of_kind(NodeKind::Step)
2296            .find(|s| s.name == "build-step")
2297            .expect("build-step must exist");
2298        let deploy_step = g
2299            .nodes_of_kind(NodeKind::Step)
2300            .find(|s| s.name == "deploy-step")
2301            .expect("deploy-step must exist");
2302        assert!(
2303            !build_step.metadata.contains_key(META_ENV_APPROVAL),
2304            "non-gated job's step must not be tagged"
2305        );
2306        assert_eq!(
2307            deploy_step.metadata.get(META_ENV_APPROVAL),
2308            Some(&"true".to_string()),
2309            "gated deployment job's step must be tagged"
2310        );
2311    }
2312
2313    // ── resources.repositories[] capture ──────────────────────
2314
2315    fn repos_meta(graph: &AuthorityGraph) -> Vec<serde_json::Value> {
2316        let raw = graph
2317            .metadata
2318            .get(META_REPOSITORIES)
2319            .expect("META_REPOSITORIES must be set");
2320        serde_json::from_str(raw).expect("META_REPOSITORIES must be valid JSON")
2321    }
2322
2323    #[test]
2324    fn resources_repositories_captured_with_used_flag_when_referenced_by_extends() {
2325        let yaml = r#"
2326resources:
2327  repositories:
2328    - repository: shared-templates
2329      type: git
2330      name: Platform/shared-templates
2331      ref: refs/heads/main
2332
2333extends:
2334  template: pipeline.yml@shared-templates
2335"#;
2336        let graph = parse(yaml);
2337        let entries = repos_meta(&graph);
2338        assert_eq!(entries.len(), 1);
2339        let e = &entries[0];
2340        assert_eq!(e["alias"], "shared-templates");
2341        assert_eq!(e["repo_type"], "git");
2342        assert_eq!(e["name"], "Platform/shared-templates");
2343        assert_eq!(e["ref"], "refs/heads/main");
2344        assert_eq!(e["used"], true);
2345    }
2346
2347    #[test]
2348    fn resources_repositories_used_via_checkout_alias() {
2349        // Mirrors the msigeurope-adf-finance-reporting corpus shape.
2350        let yaml = r#"
2351resources:
2352  repositories:
2353    - repository: adf_publish
2354      type: git
2355      name: org/adf-finance-reporting
2356      ref: refs/heads/adf_publish
2357
2358jobs:
2359  - job: deploy
2360    steps:
2361      - checkout: adf_publish
2362"#;
2363        let graph = parse(yaml);
2364        let entries = repos_meta(&graph);
2365        assert_eq!(entries.len(), 1);
2366        assert_eq!(entries[0]["alias"], "adf_publish");
2367        assert_eq!(entries[0]["used"], true);
2368    }
2369
2370    #[test]
2371    fn resources_repositories_unreferenced_alias_is_marked_not_used() {
2372        // Declared but no `template: x@alias`, no `checkout: alias`, no extends.
2373        let yaml = r#"
2374resources:
2375  repositories:
2376    - repository: orphan-templates
2377      type: git
2378      name: Platform/orphan
2379      ref: main
2380
2381jobs:
2382  - job: build
2383    steps:
2384      - script: echo hi
2385"#;
2386        let graph = parse(yaml);
2387        let entries = repos_meta(&graph);
2388        assert_eq!(entries.len(), 1);
2389        assert_eq!(entries[0]["alias"], "orphan-templates");
2390        assert_eq!(entries[0]["used"], false);
2391    }
2392
2393    #[test]
2394    fn resources_repositories_absent_when_no_resources_block() {
2395        let yaml = r#"
2396jobs:
2397  - job: build
2398    steps:
2399      - script: echo hi
2400"#;
2401        let graph = parse(yaml);
2402        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
2403    }
2404
2405    #[test]
2406    fn parse_template_alias_extracts_segment_after_at() {
2407        assert_eq!(
2408            parse_template_alias("steps/deploy.yml@templates"),
2409            Some("templates".to_string())
2410        );
2411        assert_eq!(parse_template_alias("local/path.yml"), None);
2412        assert_eq!(parse_template_alias("path@"), None);
2413    }
2414
2415    #[test]
2416    fn parameters_as_map_form_parses_as_named_parameters() {
2417        // Real-world repro from Azure/aks-engine, PowerShell/PowerShell, dotnet/maui:
2418        // legacy template fragments declare `parameters:` as a mapping of
2419        // name → default-value rather than the modern typed sequence form.
2420        // Both shapes must parse; the map form yields parameters with names
2421        // but no type/values allowlist (so they default to "string" downstream).
2422        let yaml = r#"
2423parameters:
2424  name: ''
2425  k8sRelease: ''
2426  apimodel: 'examples/e2e-tests/kubernetes/release/default/definition.json'
2427  createVNET: false
2428
2429jobs:
2430  - job: build
2431    steps:
2432      - script: echo $(name)
2433"#;
2434        let graph = parse(yaml);
2435        // Parse must succeed and capture the four parameter names.
2436        assert!(graph.parameters.contains_key("name"));
2437        assert!(graph.parameters.contains_key("k8sRelease"));
2438        assert!(graph.parameters.contains_key("apimodel"));
2439        assert!(graph.parameters.contains_key("createVNET"));
2440        assert_eq!(graph.parameters.len(), 4);
2441    }
2442
2443    #[test]
2444    fn parameters_as_typed_sequence_form_still_parses() {
2445        // Make sure the modern form still works after the polymorphic
2446        // deserializer change.
2447        let yaml = r#"
2448parameters:
2449  - name: env
2450    type: string
2451    default: prod
2452    values:
2453      - prod
2454      - staging
2455  - name: skipTests
2456    type: boolean
2457    default: false
2458
2459jobs:
2460  - job: build
2461    steps:
2462      - script: echo hi
2463"#;
2464        let graph = parse(yaml);
2465        let env_param = graph.parameters.get("env").expect("env captured");
2466        assert_eq!(env_param.param_type, "string");
2467        assert!(env_param.has_values_allowlist);
2468        let skip_param = graph
2469            .parameters
2470            .get("skipTests")
2471            .expect("skipTests captured");
2472        assert_eq!(skip_param.param_type, "boolean");
2473        assert!(!skip_param.has_values_allowlist);
2474    }
2475
2476    #[test]
2477    fn resources_as_legacy_sequence_form_parses_to_empty_resources() {
2478        // Real-world repro from Azure/azure-cli, Chinachu/Mirakurun: pre-2019
2479        // ADO syntax allows `resources:` as a list of `- repo: self` entries,
2480        // not the modern `resources: { repositories: [...] }` mapping. Modern
2481        // ADO still tolerates the legacy form. We must accept both shapes
2482        // without crashing the parse.
2483        let yaml = r#"
2484resources:
2485- repo: self
2486
2487trigger:
2488  - main
2489
2490jobs:
2491  - job: build
2492    steps:
2493      - script: echo hi
2494"#;
2495        let graph = parse(yaml);
2496        // No external repositories declared (legacy form has none) — so the
2497        // META_REPOSITORIES metadata key is absent.
2498        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
2499        // But the job still parses.
2500        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2501        assert_eq!(steps.len(), 1);
2502    }
2503
2504    #[test]
2505    fn stages_as_template_expression_parses_with_no_stages() {
2506        // Real-world repro from dotnet/diagnostics templatePublic.yml:
2507        // `stages: ${{ parameters.stages }}` resolves at runtime. The static
2508        // parser cannot enumerate stages from a template expression — we
2509        // accept the file without crashing and the resulting graph simply
2510        // contains no stages from the template-expression scope.
2511        let yaml = r#"
2512parameters:
2513  - name: stages
2514    type: stageList
2515
2516stages: ${{ parameters.stages }}
2517"#;
2518        let graph = parse(yaml);
2519        // Graph must exist (no crash).
2520        assert!(graph.parameters.contains_key("stages"));
2521    }
2522
2523    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
2524
2525    #[test]
2526    fn jobs_carrier_without_steps_marks_partial() {
2527        // ADO `jobs:` carrier present but each job has no `steps:` and no
2528        // `template:`. process_steps([]) adds nothing. Result: 0 Step nodes
2529        // despite a non-empty job carrier — must mark Partial so a CI gate
2530        // doesn't treat completeness=complete + 0 findings as "passed".
2531        let yaml = r#"
2532jobs:
2533  - job: build
2534    pool:
2535      vmImage: ubuntu-latest
2536"#;
2537        let graph = parse(yaml);
2538        let step_count = graph
2539            .nodes
2540            .iter()
2541            .filter(|n| n.kind == NodeKind::Step)
2542            .count();
2543        assert_eq!(step_count, 0);
2544        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2545        assert!(
2546            graph
2547                .completeness_gaps
2548                .iter()
2549                .any(|g| g.contains("0 step nodes")),
2550            "completeness_gaps must mention 0 step nodes: {:?}",
2551            graph.completeness_gaps
2552        );
2553    }
2554
2555    #[test]
2556    fn jobs_carrier_with_empty_jobs_list_does_not_mark_partial() {
2557        // Defensive: an empty `jobs:` list is NOT a carrier — there is no
2558        // job content to be confused about. Stays Complete.
2559        let yaml = r#"
2560jobs: []
2561"#;
2562        let graph = parse(yaml);
2563        let zero_step_gap = graph
2564            .completeness_gaps
2565            .iter()
2566            .any(|g| g.contains("0 step nodes"));
2567        assert!(
2568            !zero_step_gap,
2569            "empty jobs: list is not a carrier; got: {:?}",
2570            graph.completeness_gaps
2571        );
2572    }
2573
2574    #[test]
2575    fn empty_pipeline_does_not_mark_partial_for_zero_steps() {
2576        // No top-level stages/jobs/steps at all — there's no carrier, so the
2577        // 0-step-nodes guard must NOT fire. A genuinely empty pipeline stays
2578        // Complete.
2579        let yaml = r#"
2580trigger:
2581  - main
2582"#;
2583        let graph = parse(yaml);
2584        let zero_step_gap = graph
2585            .completeness_gaps
2586            .iter()
2587            .any(|g| g.contains("0 step nodes"));
2588        assert!(
2589            !zero_step_gap,
2590            "no carrier means no 0-step gap reason; got: {:?}",
2591            graph.completeness_gaps
2592        );
2593    }
2594}