taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Regex-free check: does `s` contain `terraform apply` followed by
9/// `-auto-approve` or `--auto-approve` (anywhere on the same line, or on a
10/// nearby line when the previous line ends in a shell continuation `\` /
11/// PowerShell continuation `` ` ``)?
12///
13/// Case-sensitive on purpose — Terraform's CLI is case-sensitive and these
14/// tokens never appear capitalised in real-world pipelines.
15fn script_does_terraform_auto_apply(s: &str) -> bool {
16    let lines: Vec<&str> = s.lines().collect();
17    for (i, raw_line) in lines.iter().enumerate() {
18        // Strip trailing comment.
19        let line = raw_line.split('#').next().unwrap_or("");
20        if !(line.contains("terraform apply") || line.contains("terraform\tapply")) {
21            continue;
22        }
23        if line.contains("auto-approve") {
24            return true;
25        }
26        // Continuation: peek a few lines forward for the flag.
27        let mut continuing = line.trim_end().ends_with('\\') || line.trim_end().ends_with('`');
28        let mut j = i + 1;
29        while continuing && j < lines.len() && j < i + 4 {
30            let next = lines[j].split('#').next().unwrap_or("");
31            if next.contains("auto-approve") {
32                return true;
33            }
34            continuing = next.trim_end().ends_with('\\') || next.trim_end().ends_with('`');
35            j += 1;
36        }
37    }
38    false
39}
40
41/// Azure DevOps YAML pipeline parser.
42pub struct AdoParser;
43
44impl PipelineParser for AdoParser {
45    fn platform(&self) -> &str {
46        "azure-devops"
47    }
48
49    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
50        let mut de = serde_yaml::Deserializer::from_str(content);
51        let doc = de
52            .next()
53            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
54        let pipeline: AdoPipeline = match AdoPipeline::deserialize(doc) {
55            Ok(p) => p,
56            Err(e) => {
57                // Real-world ADO template fragments often wrap their root content in
58                // a parameter conditional like `- ${{ if eq(parameters.X, true) }}:`
59                // followed by a list of jobs. That is not a standard YAML mapping at
60                // the root, so serde_yaml fails with a "did not find expected key"
61                // error. These files are intended to be `template:`-included from a
62                // parent pipeline; analyzing them in isolation is not meaningful.
63                // Return a near-empty graph marked Partial instead of crashing the scan.
64                let msg = e.to_string();
65                let looks_like_template_fragment = (msg.contains("did not find expected key")
66                    || (msg.contains("parameters")
67                        && msg.contains("invalid type: map")
68                        && msg.contains("expected a sequence")))
69                    && has_root_parameter_conditional(content);
70                if looks_like_template_fragment {
71                    let mut graph = AuthorityGraph::new(source.clone());
72                    graph
73                        .metadata
74                        .insert(META_PLATFORM.into(), "azure-devops".into());
75                    graph.mark_partial(
76                        "ADO template fragment with top-level parameter conditional — root structure depends on parent pipeline context".to_string(),
77                    );
78                    return Ok(graph);
79                }
80                return Err(TauditError::Parse(format!("YAML parse error: {e}")));
81            }
82        };
83        let extra_docs = de.next().is_some();
84
85        let mut graph = AuthorityGraph::new(source.clone());
86        graph
87            .metadata
88            .insert(META_PLATFORM.into(), "azure-devops".into());
89        if extra_docs {
90            graph.mark_partial(
91                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
92            );
93        }
94
95        // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
96        // A genuine ADO PR trigger is always a mapping (`pr:\n  branches:...`) or a
97        // sequence (`pr:\n  - main`). Scalar opt-out forms — `pr: none`, `pr: ~`,
98        // `pr: false`, `pr: ""` — must NOT be treated as active triggers.
99        // Checking is_mapping()||is_sequence() is more robust than enumerating every
100        // scalar opt-out value (serde_yaml 0.9 parses "none" as a string, "~" as a
101        // string, and `null` as null — the shape test handles all forms uniformly).
102        let has_pr_trigger = pipeline
103            .pr
104            .as_ref()
105            .map(|v| v.is_mapping() || v.is_sequence())
106            .unwrap_or(false);
107        if has_pr_trigger {
108            graph.metadata.insert(META_TRIGGER.into(), "pr".into());
109        }
110
111        // Capture resources.repositories[] declarations and detect aliases that
112        // are actually referenced by an `extends:`, `template: x@alias`, or
113        // `checkout: alias`. The result is JSON-encoded into graph metadata
114        // for the `template_extends_unpinned_branch` rule to consume.
115        process_repositories(&pipeline, content, &mut graph);
116
117        // Capture top-level `parameters:` declarations (used by
118        // parameter_interpolation_into_shell). ADO defaults missing `type:`
119        // to string, so a missing/empty type is treated as a string.
120        if let Some(ref params) = pipeline.parameters {
121            for p in params {
122                let name = match p.name.as_ref() {
123                    Some(n) if !n.is_empty() => n.clone(),
124                    _ => continue,
125                };
126                let param_type = p.param_type.clone().unwrap_or_default();
127                let has_values_allowlist =
128                    p.values.as_ref().map(|v| !v.is_empty()).unwrap_or(false);
129                graph.parameters.insert(
130                    name,
131                    ParamSpec {
132                        param_type,
133                        has_values_allowlist,
134                    },
135                );
136            }
137        }
138
139        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
140
141        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
142        // Tagged implicit: ADO injects this token into every task by platform design;
143        // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
144        let mut meta = HashMap::new();
145        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
146        meta.insert(META_IMPLICIT.into(), "true".into());
147        let token_id = graph.add_node_with_metadata(
148            NodeKind::Identity,
149            "System.AccessToken",
150            TrustZone::FirstParty,
151            meta,
152        );
153
154        // Pipeline-level permissions block — when present and non-broad (no write
155        // permissions), downgrade System.AccessToken from broad → constrained so
156        // over_privileged_identity does not fire on already-restricted pipelines.
157        if let Some(ref perms_val) = pipeline.permissions {
158            if !ado_permissions_are_broad(perms_val) {
159                let perms_str = ado_permissions_display(perms_val);
160                graph.nodes[token_id]
161                    .metadata
162                    .insert(META_IDENTITY_SCOPE.into(), "constrained".into());
163                graph.nodes[token_id]
164                    .metadata
165                    .insert(META_PERMISSIONS.into(), perms_str);
166            }
167        }
168
169        // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
170        process_pool(&pipeline.pool, &pipeline.workspace, &mut graph);
171
172        // Pipeline-level variable groups and named secrets.
173        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
174        // don't generate false-positive Secret nodes for plain config values.
175        let mut plain_vars: HashSet<String> = HashSet::new();
176        let pipeline_secret_ids = process_variables(
177            &pipeline.variables,
178            &mut graph,
179            &mut secret_ids,
180            "pipeline",
181            &mut plain_vars,
182        );
183
184        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
185        if let Some(ref stages) = pipeline.stages {
186            for stage in stages {
187                // Stage-level template reference — delegate and mark Partial
188                if let Some(ref tpl) = stage.template {
189                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
190                    add_template_delegation(stage_name, tpl, token_id, None, &mut graph);
191                    continue;
192                }
193
194                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
195                let stage_secret_ids = process_variables(
196                    &stage.variables,
197                    &mut graph,
198                    &mut secret_ids,
199                    &stage_name,
200                    &mut plain_vars,
201                );
202
203                for job in &stage.jobs {
204                    let job_name = job.effective_name();
205                    let job_secret_ids = process_variables(
206                        &job.variables,
207                        &mut graph,
208                        &mut secret_ids,
209                        &job_name,
210                        &mut plain_vars,
211                    );
212
213                    let effective_workspace =
214                        job.workspace.as_ref().or(pipeline.workspace.as_ref());
215                    process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
216
217                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
218                        .iter()
219                        .chain(&stage_secret_ids)
220                        .chain(&job_secret_ids)
221                        .copied()
222                        .collect();
223
224                    let steps_start = graph.nodes.len();
225
226                    let job_steps = job.all_steps();
227                    process_steps(
228                        &job_steps,
229                        &job_name,
230                        token_id,
231                        &all_secrets,
232                        &plain_vars,
233                        &mut graph,
234                        &mut secret_ids,
235                    );
236
237                    if let Some(ref tpl) = job.template {
238                        add_template_delegation(
239                            &job_name,
240                            tpl,
241                            token_id,
242                            Some(&job_name),
243                            &mut graph,
244                        );
245                    }
246
247                    if job.has_environment_binding() {
248                        tag_job_steps_env_approval(&mut graph, steps_start);
249                    }
250                }
251            }
252        } else if let Some(ref jobs) = pipeline.jobs {
253            for job in jobs {
254                let job_name = job.effective_name();
255                let job_secret_ids = process_variables(
256                    &job.variables,
257                    &mut graph,
258                    &mut secret_ids,
259                    &job_name,
260                    &mut plain_vars,
261                );
262
263                let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
264                process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
265
266                let all_secrets: Vec<NodeId> = pipeline_secret_ids
267                    .iter()
268                    .chain(&job_secret_ids)
269                    .copied()
270                    .collect();
271
272                let steps_start = graph.nodes.len();
273
274                let job_steps = job.all_steps();
275                process_steps(
276                    &job_steps,
277                    &job_name,
278                    token_id,
279                    &all_secrets,
280                    &plain_vars,
281                    &mut graph,
282                    &mut secret_ids,
283                );
284
285                if let Some(ref tpl) = job.template {
286                    add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
287                }
288
289                if job.has_environment_binding() {
290                    tag_job_steps_env_approval(&mut graph, steps_start);
291                }
292            }
293        } else if let Some(ref steps) = pipeline.steps {
294            process_steps(
295                steps,
296                "pipeline",
297                token_id,
298                &pipeline_secret_ids,
299                &plain_vars,
300                &mut graph,
301                &mut secret_ids,
302            );
303        }
304
305        // Cross-platform misclassification trap (red-team R2 #5): a YAML file
306        // shaped like ADO at the top level (stages/jobs/steps present) but whose
307        // body uses constructs the ADO parser doesn't recognise will deserialize
308        // without errors and yield no Step nodes. Marking Partial surfaces the
309        // gap instead of returning completeness=complete on a clean-but-empty
310        // graph (which a CI gate would treat as "passed").
311        let step_count = graph
312            .nodes
313            .iter()
314            .filter(|n| n.kind == NodeKind::Step)
315            .count();
316        let had_step_carrier = pipeline.stages.as_ref().is_some_and(|s| !s.is_empty())
317            || pipeline.jobs.as_ref().is_some_and(|j| !j.is_empty())
318            || pipeline.steps.as_ref().is_some_and(|s| !s.is_empty());
319        if step_count == 0 && had_step_carrier {
320            graph.mark_partial(
321                "stages/jobs/steps parsed but produced 0 step nodes — possible non-ADO YAML wrong-platform-classified".to_string(),
322            );
323        }
324
325        Ok(graph)
326    }
327}
328
329/// Process an ADO `pool:` block. ADO pools come in two shapes:
330///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
331///   - `pool: { name: my-pool }` (named pool — self-hosted)
332///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
333///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
334///
335/// Creates an Image node representing the agent environment. Self-hosted pools
336/// Returns `true` when an ADO pipeline-level `permissions:` value implies a
337/// broad (write-capable) token scope, `false` when every scope is `none` or
338/// `read` (i.e. the token has been explicitly restricted).
339///
340/// ADO permission values are the strings `"read"`, `"write"`, and `"none"`.
341/// Any unrecognised shape is conservatively treated as broad.
342fn ado_permissions_are_broad(perms: &serde_yaml::Value) -> bool {
343    if let Some(map) = perms.as_mapping() {
344        map.values().any(|v| v.as_str() == Some("write"))
345    } else {
346        // Scalar form: ADO accepts "read", "write", "none" as pipeline-level
347        // permission values. "read" and "none" are constrained; "write" is
348        // broad. Anything else (null, tilde, empty, unrecognised string) is
349        // conservatively treated as broad (unknown = risky).
350        matches!(perms.as_str(), Some("write"))
351    }
352}
353
354/// Format an ADO `permissions:` YAML value into a compact human-readable
355/// string for the finding message (e.g. `"contents: none, idToken: none"`).
356fn ado_permissions_display(perms: &serde_yaml::Value) -> String {
357    if let Some(map) = perms.as_mapping() {
358        map.iter()
359            .filter_map(|(k, v)| {
360                let key = k.as_str()?;
361                let val = v.as_str().unwrap_or("?");
362                Some(format!("{key}: {val}"))
363            })
364            .collect::<Vec<_>>()
365            .join(", ")
366    } else {
367        perms.as_str().unwrap_or("none").to_string()
368    }
369}
370
371/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
372///
373/// When `workspace` is provided and contains `clean:` with a truthy value
374/// (`true`, `all`, `outputs`, `resources`), the Image node is also tagged
375/// with META_WORKSPACE_CLEAN.
376fn process_pool(
377    pool: &Option<serde_yaml::Value>,
378    workspace: &Option<serde_yaml::Value>,
379    graph: &mut AuthorityGraph,
380) {
381    let Some(pool_val) = pool else {
382        return;
383    };
384
385    let (image_name, is_self_hosted) = match pool_val {
386        serde_yaml::Value::String(s) => (s.clone(), true),
387        serde_yaml::Value::Mapping(map) => {
388            let name = map.get("name").and_then(|v| v.as_str());
389            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
390            match (name, vm_image) {
391                (_, Some(vm)) => (vm.to_string(), false),
392                (Some(n), None) => (n.to_string(), true),
393                (None, None) => return,
394            }
395        }
396        _ => return,
397    };
398
399    let mut meta = HashMap::new();
400    if is_self_hosted {
401        meta.insert(META_SELF_HOSTED.into(), "true".into());
402    }
403    if has_workspace_clean(workspace) {
404        meta.insert(META_WORKSPACE_CLEAN.into(), "true".into());
405    }
406    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
407}
408
409/// Returns `true` when the ADO `workspace:` value specifies a `clean:` setting
410/// that wipes the workspace between runs. Recognised truthy forms:
411///   - `workspace: { clean: all }`
412///   - `workspace: { clean: outputs }`
413///   - `workspace: { clean: resources }`
414///   - `workspace: { clean: true }`
415fn has_workspace_clean(workspace: &Option<serde_yaml::Value>) -> bool {
416    let Some(ws) = workspace else {
417        return false;
418    };
419    let Some(map) = ws.as_mapping() else {
420        return false;
421    };
422    let Some(clean) = map.get("clean") else {
423        return false;
424    };
425    match clean {
426        serde_yaml::Value::Bool(b) => *b,
427        serde_yaml::Value::String(s) => {
428            let lower = s.to_ascii_lowercase();
429            matches!(lower.as_str(), "all" | "outputs" | "resources" | "true")
430        }
431        _ => false,
432    }
433}
434
435/// Scan the parsed pipeline for `resources.repositories[]` declarations and
436/// determine which aliases are referenced inside the same file. Stores the
437/// result as a JSON-encoded array in `graph.metadata[META_REPOSITORIES]`.
438///
439/// Usage signal — an alias is "used" when it appears in any of:
440///   - `template: <path>@<alias>` (anywhere — top-level extends, stage, job, step)
441///   - `extends:` referencing `template: <path>@<alias>`
442///   - `checkout: <alias>` (steps consume an external repo into the workspace)
443///
444/// The `extends:` and per-step `template:` references are resolved by walking
445/// the parsed Value tree; the raw text is only used for the `checkout:` case
446/// (cheap substring scan, robust to YAML shape variation).
447fn process_repositories(pipeline: &AdoPipeline, raw_content: &str, graph: &mut AuthorityGraph) {
448    let resources = match pipeline.resources.as_ref() {
449        Some(r) if !r.repositories.is_empty() => r,
450        _ => return,
451    };
452
453    // Collect all aliases referenced as `template: x@alias`. We walk every
454    // `template:` field appearing in the parsed pipeline (extends and steps
455    // already deserialize to their own paths; stages/jobs use the per-job
456    // template field). The raw YAML walk via serde_yaml::Value covers all
457    // shapes uniformly without re-deriving structure-specific models.
458    let mut used_aliases: HashSet<String> = HashSet::new();
459
460    if let Some(ref ext) = pipeline.extends {
461        collect_template_alias_refs(ext, &mut used_aliases);
462    }
463    if let Ok(value) = serde_yaml::from_str::<serde_yaml::Value>(raw_content) {
464        collect_template_alias_refs(&value, &mut used_aliases);
465        collect_checkout_alias_refs(&value, &mut used_aliases);
466    }
467
468    // Build the JSON-encoded repository descriptor list.
469    let mut entries: Vec<serde_json::Value> = Vec::with_capacity(resources.repositories.len());
470    for repo in &resources.repositories {
471        let used = used_aliases.contains(&repo.repository);
472        let mut obj = serde_json::Map::new();
473        obj.insert(
474            "alias".into(),
475            serde_json::Value::String(repo.repository.clone()),
476        );
477        if let Some(ref t) = repo.repo_type {
478            obj.insert("repo_type".into(), serde_json::Value::String(t.clone()));
479        }
480        if let Some(ref n) = repo.name {
481            obj.insert("name".into(), serde_json::Value::String(n.clone()));
482        }
483        if let Some(ref r) = repo.git_ref {
484            obj.insert("ref".into(), serde_json::Value::String(r.clone()));
485        }
486        obj.insert("used".into(), serde_json::Value::Bool(used));
487        entries.push(serde_json::Value::Object(obj));
488    }
489
490    if let Ok(json) = serde_json::to_string(&serde_json::Value::Array(entries)) {
491        graph.metadata.insert(META_REPOSITORIES.into(), json);
492    }
493}
494
495/// Walk a YAML value and record every `template: <ref>@<alias>` alias seen.
496/// Recurses into mappings and sequences so it catches references in extends,
497/// stages, jobs, steps, and conditional blocks indiscriminately.
498fn collect_template_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
499    match value {
500        serde_yaml::Value::Mapping(map) => {
501            for (k, v) in map {
502                if k.as_str() == Some("template") {
503                    if let Some(s) = v.as_str() {
504                        if let Some(alias) = parse_template_alias(s) {
505                            sink.insert(alias);
506                        }
507                    }
508                }
509                collect_template_alias_refs(v, sink);
510            }
511        }
512        serde_yaml::Value::Sequence(seq) => {
513            for v in seq {
514                collect_template_alias_refs(v, sink);
515            }
516        }
517        _ => {}
518    }
519}
520
521/// Walk a YAML value and record every `checkout: <alias>` value seen, except
522/// `self` and `none` which are platform keywords (not external repo aliases).
523fn collect_checkout_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
524    match value {
525        serde_yaml::Value::Mapping(map) => {
526            for (k, v) in map {
527                if k.as_str() == Some("checkout") {
528                    if let Some(s) = v.as_str() {
529                        if s != "self" && s != "none" && !s.is_empty() {
530                            sink.insert(s.to_string());
531                        }
532                    }
533                }
534                collect_checkout_alias_refs(v, sink);
535            }
536        }
537        serde_yaml::Value::Sequence(seq) => {
538            for v in seq {
539                collect_checkout_alias_refs(v, sink);
540            }
541        }
542        _ => {}
543    }
544}
545
546/// Extract `<alias>` from a `template: <path>@<alias>` reference. Returns
547/// None for plain in-repo paths (`templates/deploy.yml`) which target the
548/// current pipeline's repo, not an external `resources.repositories[]` entry.
549fn parse_template_alias(template_ref: &str) -> Option<String> {
550    let at = template_ref.rfind('@')?;
551    let alias = &template_ref[at + 1..];
552    if alias.is_empty() {
553        None
554    } else {
555        Some(alias.to_string())
556    }
557}
558
559/// Tag every Step node added since `start_idx` with META_ENV_APPROVAL.
560/// Used after `process_steps` for a job whose `environment:` is configured —
561/// the environment binding indicates the job sits behind a manual approval
562/// gate, which is an isolation boundary that breaks automatic propagation.
563fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
564    for node in graph.nodes.iter_mut().skip(start_idx) {
565        if node.kind == NodeKind::Step {
566            node.metadata
567                .insert(META_ENV_APPROVAL.into(), "true".into());
568        }
569    }
570}
571
572/// Process a variable list, creating Secret nodes and returning their IDs.
573/// Returns IDs for secrets only (not variable groups, which are opaque).
574/// Populates `plain_vars` with the names of non-secret named variables so
575/// downstream `$(VAR)` scanning can skip them.
576fn process_variables(
577    variables: &Option<AdoVariables>,
578    graph: &mut AuthorityGraph,
579    cache: &mut HashMap<String, NodeId>,
580    scope: &str,
581    plain_vars: &mut HashSet<String>,
582) -> Vec<NodeId> {
583    let mut ids = Vec::new();
584
585    let vars = match variables.as_ref() {
586        Some(v) => v,
587        None => return ids,
588    };
589
590    for var in &vars.0 {
591        match var {
592            AdoVariable::Group { group } => {
593                // Skip template-expression group names like `${{ parameters.env }}`.
594                // We can't resolve them statically — mark Partial but don't create
595                // a misleading Secret node with the expression as its name.
596                if group.contains("${{") {
597                    graph.mark_partial(format!(
598                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
599                    ));
600                    continue;
601                }
602                let mut meta = HashMap::new();
603                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
604                let id = graph.add_node_with_metadata(
605                    NodeKind::Secret,
606                    group.as_str(),
607                    TrustZone::FirstParty,
608                    meta,
609                );
610                cache.insert(group.clone(), id);
611                ids.push(id);
612                graph.mark_partial(format!(
613                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
614                ));
615            }
616            AdoVariable::Named {
617                name, is_secret, ..
618            } => {
619                if *is_secret {
620                    let id = find_or_create_secret(graph, cache, name);
621                    ids.push(id);
622                } else {
623                    plain_vars.insert(name.clone());
624                }
625            }
626        }
627    }
628
629    ids
630}
631
632/// Process a list of ADO steps, adding nodes and edges to the graph.
633fn process_steps(
634    steps: &[AdoStep],
635    job_name: &str,
636    token_id: NodeId,
637    inherited_secrets: &[NodeId],
638    plain_vars: &HashSet<String>,
639    graph: &mut AuthorityGraph,
640    cache: &mut HashMap<String, NodeId>,
641) {
642    for (idx, step) in steps.iter().enumerate() {
643        // Template step — delegation, mark partial
644        if let Some(ref tpl) = step.template {
645            let step_name = step
646                .display_name
647                .as_deref()
648                .or(step.name.as_deref())
649                .map(|s| s.to_string())
650                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
651            add_template_delegation(&step_name, tpl, token_id, Some(job_name), graph);
652            continue;
653        }
654
655        // Determine step kind and trust zone
656        let (step_name, trust_zone, mut inline_script) = classify_step(step, job_name, idx);
657
658        // For task steps (where classify_step returns None), recover an inline
659        // script body from `inputs.inlineScript` / `inputs.script` — used by
660        // AzureCLI@2, AzurePowerShell@5, Bash@3, etc. Without this fallback,
661        // rules that pattern-match script content miss every typed task.
662        if inline_script.is_none() {
663            if let Some(ref inputs) = step.inputs {
664                let candidate_keys = ["inlineScript", "script", "InlineScript", "Inline"];
665                for key in candidate_keys {
666                    if let Some(v) = inputs.get(key).and_then(yaml_value_as_str) {
667                        if !v.is_empty() {
668                            inline_script = Some(v.to_string());
669                            break;
670                        }
671                    }
672                }
673            }
674        }
675
676        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
677
678        // Stamp parent job name so consumers (e.g. `taudit map --job`) can
679        // attribute steps back to their containing job.
680        if let Some(node) = graph.nodes.get_mut(step_id) {
681            node.metadata.insert(META_JOB_NAME.into(), job_name.into());
682            // Stamp the raw inline script body so script-aware rules
683            // (env-export of secrets, secret materialisation to files,
684            // Key Vault → plaintext) can pattern-match on the actual
685            // command text the agent will run.
686            if let Some(ref body) = inline_script {
687                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
688            }
689        }
690
691        // Stamp inline script body so command-line-leakage rules can inspect
692        // what the step actually executes (vm_remote_exec_via_pipeline_secret,
693        // short_lived_sas_in_command_line).
694        if let Some(ref body) = inline_script {
695            if let Some(node) = graph.nodes.get_mut(step_id) {
696                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
697            }
698        }
699
700        // Stamp the inline script body when present so rules that need to
701        // pattern-match against shell content can do so without re-parsing
702        // YAML. Bodies can be large; rules should treat META_SCRIPT_BODY as
703        // an opaque string and not assume any framing.
704        if let Some(ref body) = inline_script {
705            if let Some(node) = graph.nodes.get_mut(step_id) {
706                node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
707            }
708        }
709
710        // Every step has access to System.AccessToken
711        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
712
713        // checkout step with persistCredentials: true writes the token to .git/config on disk,
714        // making it accessible to all subsequent steps and filesystem-level attackers.
715        if step.checkout.is_some() && step.persist_credentials == Some(true) {
716            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
717        }
718
719        // `checkout: self` pulls the repo being built. In a PR trigger context this
720        // is the untrusted fork head — tag the step so downstream rules can gate on
721        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
722        if let Some(ref ck) = step.checkout {
723            if ck == "self" {
724                if let Some(node) = graph.nodes.get_mut(step_id) {
725                    node.metadata
726                        .insert(META_CHECKOUT_SELF.into(), "true".into());
727                }
728            }
729        }
730
731        // Inherited pipeline/stage/job secrets
732        for &secret_id in inherited_secrets {
733            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
734        }
735
736        // Service connection detection from task inputs (case-insensitive key match)
737        if let Some(ref inputs) = step.inputs {
738            let service_conn_keys = [
739                "azuresubscription",
740                "connectedservicename",
741                "connectedservicenamearm",
742                "kubernetesserviceconnection",
743                "environmentservicename",
744                "backendservicearm",
745            ];
746            for (raw_key, val) in inputs {
747                let lower = raw_key.to_lowercase();
748                if !service_conn_keys.contains(&lower.as_str()) {
749                    continue;
750                }
751                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
752                if !conn_name.starts_with("$(") {
753                    // Stamp the connection name onto the step itself so rules
754                    // that need the name (e.g. terraform_auto_approve_in_prod)
755                    // don't have to traverse edges.
756                    if let Some(node) = graph.nodes.get_mut(step_id) {
757                        node.metadata
758                            .insert(META_SERVICE_CONNECTION_NAME.into(), conn_name.to_string());
759                    }
760
761                    let mut meta = HashMap::new();
762                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
763                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
764                    // ADO pipeline YAML does not embed the authentication scheme
765                    // of the service endpoint (WorkloadIdentityFederation vs.
766                    // ServicePrincipal), so we cannot reliably determine whether a
767                    // connection uses OIDC.  Leave META_OIDC unset -- the safe
768                    // default -- so that rules like service_connection_scope_mismatch
769                    // can fire on classic SPN connections.
770                    let conn_id = graph.add_node_with_metadata(
771                        NodeKind::Identity,
772                        conn_name,
773                        TrustZone::FirstParty,
774                        meta,
775                    );
776                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
777                }
778            }
779
780            // addSpnToEnvironment: true exposes federated SPN material
781            // (idToken, servicePrincipalKey, servicePrincipalId, tenantId)
782            // to the step's inline script via env vars. Stamp the step so
783            // addspn_with_inline_script can pattern-match without traversal.
784            if let Some(val) = inputs.get("addSpnToEnvironment") {
785                let truthy = match val {
786                    serde_yaml::Value::Bool(b) => *b,
787                    serde_yaml::Value::String(s) => s.eq_ignore_ascii_case("true"),
788                    _ => false,
789                };
790                if truthy {
791                    if let Some(node) = graph.nodes.get_mut(step_id) {
792                        node.metadata
793                            .insert(META_ADD_SPN_TO_ENV.into(), "true".into());
794                    }
795                }
796            }
797
798            // TerraformCLI@N / TerraformTaskV1..V4 with command: apply +
799            // commandOptions containing auto-approve = same as inline
800            // `terraform apply --auto-approve`. Detect once here so the rule
801            // can read a single META_TERRAFORM_AUTO_APPROVE marker.
802            let task_lower = step
803                .task
804                .as_deref()
805                .map(|t| t.to_lowercase())
806                .unwrap_or_default();
807            let is_terraform_task = task_lower.starts_with("terraformcli@")
808                || task_lower.starts_with("terraformtask@")
809                || task_lower.starts_with("terraformtaskv");
810            if is_terraform_task {
811                let cmd_lower = inputs
812                    .get("command")
813                    .and_then(yaml_value_as_str)
814                    .map(|s| s.to_lowercase())
815                    .unwrap_or_default();
816                let opts = inputs
817                    .get("commandOptions")
818                    .and_then(yaml_value_as_str)
819                    .unwrap_or("");
820                if cmd_lower == "apply" && opts.contains("auto-approve") {
821                    if let Some(node) = graph.nodes.get_mut(step_id) {
822                        node.metadata
823                            .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
824                    }
825                }
826            }
827
828            // Detect $(varName) references in task input values
829            for val in inputs.values() {
830                if let Some(s) = yaml_value_as_str(val) {
831                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
832                }
833            }
834        }
835
836        // Inline-script detection of `terraform apply --auto-approve`.
837        // Done after inputs processing so we can OR the two signals into a
838        // single META_TERRAFORM_AUTO_APPROVE marker on the step.
839        if let Some(ref body) = inline_script {
840            if script_does_terraform_auto_apply(body) {
841                if let Some(node) = graph.nodes.get_mut(step_id) {
842                    node.metadata
843                        .insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
844                }
845            }
846        }
847
848        // Detect $(varName) in step env values
849        if let Some(ref env) = step.env {
850            for val in env.values() {
851                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
852            }
853        }
854
855        // Detect $(varName) in inline script text
856        if let Some(ref script) = inline_script {
857            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
858        }
859
860        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines
861        if let Some(ref script) = inline_script {
862            let lower = script.to_lowercase();
863            if lower.contains("##vso[task.setvariable") {
864                if let Some(node) = graph.nodes.get_mut(step_id) {
865                    node.metadata
866                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
867                }
868            }
869        }
870    }
871}
872
873/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
874///
875/// `inline_script_text` is populated whenever the step has script content —
876/// either as a top-level `script:`/`bash:`/`powershell:`/`pwsh:` key, or as a
877/// task input (`Bash@3.inputs.script`, `PowerShell@2.inputs.script`,
878/// `AzureCLI@2.inputs.inlineScript`, `AzurePowerShell@5.inputs.Inline`, …).
879/// Task-input keys are matched case-insensitively because the ADO YAML schema
880/// is itself case-insensitive on input names.
881fn classify_step(
882    step: &AdoStep,
883    job_name: &str,
884    idx: usize,
885) -> (String, TrustZone, Option<String>) {
886    let default_name = || format!("{job_name}[{idx}]");
887
888    let name = step
889        .display_name
890        .as_deref()
891        .or(step.name.as_deref())
892        .map(|s| s.to_string())
893        .unwrap_or_else(default_name);
894
895    if step.task.is_some() {
896        // Task step — script body may live in inputs.{script,inlineScript,Inline}.
897        let inline = extract_task_inline_script(step.inputs.as_ref());
898        (name, TrustZone::Untrusted, inline)
899    } else if let Some(ref s) = step.script {
900        (name, TrustZone::FirstParty, Some(s.clone()))
901    } else if let Some(ref s) = step.bash {
902        (name, TrustZone::FirstParty, Some(s.clone()))
903    } else if let Some(ref s) = step.powershell {
904        (name, TrustZone::FirstParty, Some(s.clone()))
905    } else if let Some(ref s) = step.pwsh {
906        (name, TrustZone::FirstParty, Some(s.clone()))
907    } else {
908        (name, TrustZone::FirstParty, None)
909    }
910}
911
912/// Pull an inline script body out of a task step's `inputs:` mapping.
913/// Recognises the three common conventions:
914///   - `inputs.script` (Bash@3, PowerShell@2 — when targetType: inline)
915///   - `inputs.inlineScript` (AzureCLI@2)
916///   - `inputs.Inline` (AzurePowerShell@5 — note the capital I)
917///
918/// Match is case-insensitive so a hand-written pipeline using `Script:` or
919/// `INLINESCRIPT:` is still picked up.
920fn extract_task_inline_script(
921    inputs: Option<&HashMap<String, serde_yaml::Value>>,
922) -> Option<String> {
923    let inputs = inputs?;
924    const KEYS: &[&str] = &["script", "inlinescript", "inline"];
925    for (raw_key, val) in inputs {
926        let lower = raw_key.to_lowercase();
927        if KEYS.contains(&lower.as_str()) {
928            if let Some(s) = val.as_str() {
929                if !s.is_empty() {
930                    return Some(s.to_string());
931                }
932            }
933        }
934    }
935    None
936}
937
938/// Add a DelegatesTo edge from a synthetic step node to a template image node.
939///
940/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
941/// pull code from an external repository and are Untrusted. Plain relative paths like
942/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
943/// treats `./local-action`.
944///
945/// `job_name` is `Some` when the delegation is created inside a job's scope
946/// (job-level template, or template step inside `process_steps`); it is `None`
947/// for stage-level template delegations that don't belong to a specific job.
948fn add_template_delegation(
949    step_name: &str,
950    template_path: &str,
951    token_id: NodeId,
952    job_name: Option<&str>,
953    graph: &mut AuthorityGraph,
954) {
955    let tpl_trust_zone = if template_path.contains('@') {
956        TrustZone::Untrusted
957    } else {
958        TrustZone::FirstParty
959    };
960    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
961    if let Some(jn) = job_name {
962        if let Some(node) = graph.nodes.get_mut(step_id) {
963            node.metadata.insert(META_JOB_NAME.into(), jn.into());
964        }
965    }
966    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
967    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
968    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
969    graph.mark_partial(format!(
970        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
971    ));
972}
973
974/// Extract `$(varName)` references from a string, creating Secret nodes for
975/// non-predefined and non-plain ADO variables.
976/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
977/// is treated as a variable reference. This rejects PowerShell sub-expressions
978/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
979/// and anything with spaces or special characters.
980fn extract_dollar_paren_secrets(
981    text: &str,
982    step_id: NodeId,
983    plain_vars: &HashSet<String>,
984    graph: &mut AuthorityGraph,
985    cache: &mut HashMap<String, NodeId>,
986) {
987    let mut pos = 0;
988    let bytes = text.as_bytes();
989    while pos < bytes.len() {
990        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
991            let start = pos + 2;
992            if let Some(end_offset) = text[start..].find(')') {
993                let var_name = &text[start..start + end_offset];
994                if is_valid_ado_identifier(var_name)
995                    && !is_predefined_ado_var(var_name)
996                    && !plain_vars.contains(var_name)
997                {
998                    let id = find_or_create_secret(graph, cache, var_name);
999                    // Mark secrets embedded in -var flag arguments: their values appear in
1000                    // pipeline logs (command string is logged before masking, and Terraform
1001                    // itself logs -var values in plan output and debug traces).
1002                    if is_in_terraform_var_flag(text, pos) {
1003                        if let Some(node) = graph.nodes.get_mut(id) {
1004                            node.metadata
1005                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
1006                        }
1007                    }
1008                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
1009                }
1010                pos = start + end_offset + 1;
1011                continue;
1012            }
1013        }
1014        pos += 1;
1015    }
1016}
1017
1018/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
1019/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
1020fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
1021    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
1022    let line_before = &text[line_start..var_pos];
1023    // Must contain -var (the flag) and = (the key=value assignment)
1024    line_before.contains("-var") && line_before.contains('=')
1025}
1026
1027/// Returns true if `name` is a valid ADO variable identifier.
1028/// ADO variable names start with a letter and contain only letters, digits,
1029/// and underscores. Anything else — PowerShell vars (`$name`), template
1030/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
1031/// (`name -join ','`) — is rejected.
1032fn is_valid_ado_identifier(name: &str) -> bool {
1033    let mut chars = name.chars();
1034    match chars.next() {
1035        Some(first) if first.is_ascii_alphabetic() => {
1036            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
1037        }
1038        _ => false,
1039    }
1040}
1041
1042/// Returns true if a variable name is a well-known ADO predefined variable.
1043/// These are system-provided and never represent secrets.
1044fn is_predefined_ado_var(name: &str) -> bool {
1045    let prefixes = [
1046        "Build.",
1047        "Agent.",
1048        "System.",
1049        "Pipeline.",
1050        "Release.",
1051        "Environment.",
1052        "Strategy.",
1053        "Deployment.",
1054        "Resources.",
1055        "TF_BUILD",
1056    ];
1057    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
1058}
1059
1060fn find_or_create_secret(
1061    graph: &mut AuthorityGraph,
1062    cache: &mut HashMap<String, NodeId>,
1063    name: &str,
1064) -> NodeId {
1065    if let Some(&id) = cache.get(name) {
1066        return id;
1067    }
1068    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
1069    cache.insert(name.to_string(), id);
1070    id
1071}
1072
1073fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
1074    val.as_str()
1075}
1076
1077// ── Serde models for ADO YAML ─────────────────────────────
1078
1079/// Top-level ADO pipeline definition.
1080/// ADO pipelines come in three shapes:
1081///   (a) stages → jobs → steps
1082///   (b) jobs → steps (no stages key)
1083///   (c) steps only (no stages or jobs key)
1084#[derive(Debug, Deserialize)]
1085pub struct AdoPipeline {
1086    #[serde(default)]
1087    pub trigger: Option<serde_yaml::Value>,
1088    #[serde(default)]
1089    pub pr: Option<serde_yaml::Value>,
1090    #[serde(default)]
1091    pub variables: Option<AdoVariables>,
1092    /// `stages:` is normally a sequence of stage objects, but real-world
1093    /// pipelines also use `stages: ${{ parameters.stages }}` (a template
1094    /// expression that resolves at runtime to a list). The custom
1095    /// deserializer accepts both shapes; non-sequence shapes resolve to
1096    /// `None` and the graph is marked Partial downstream.
1097    #[serde(default, deserialize_with = "deserialize_optional_stages")]
1098    pub stages: Option<Vec<AdoStage>>,
1099    #[serde(default)]
1100    pub jobs: Option<Vec<AdoJob>>,
1101    #[serde(default)]
1102    pub steps: Option<Vec<AdoStep>>,
1103    #[serde(default)]
1104    pub pool: Option<serde_yaml::Value>,
1105    /// Pipeline-level `workspace:` block. The only security-relevant field is
1106    /// `clean:` (`outputs`, `resources`, `all`, or `true`), which causes the
1107    /// agent to wipe the workspace between runs. Used to tag self-hosted Image
1108    /// nodes with `META_WORKSPACE_CLEAN`.
1109    #[serde(default)]
1110    pub workspace: Option<serde_yaml::Value>,
1111    /// `resources:` block — repository declarations, container declarations,
1112    /// pipeline declarations. We only consume `repositories[]` today.
1113    /// Pre-2019 ADO accepts a sequence form (`resources: [- repo: self]`)
1114    /// which has no `repositories:` key — the custom deserializer accepts
1115    /// both shapes and treats the sequence form as an empty resources block.
1116    #[serde(default, deserialize_with = "deserialize_optional_resources")]
1117    pub resources: Option<AdoResources>,
1118    /// Top-level `extends:` directive — `extends: { template: x@alias, ... }`.
1119    /// Captured raw so we can scan for `template: x@alias` references that
1120    /// consume a `resources.repositories[]` entry.
1121    #[serde(default)]
1122    pub extends: Option<serde_yaml::Value>,
1123    /// Top-level `parameters:` declarations. Each entry has at minimum a
1124    /// `name`; `type` defaults to `string` when omitted. `values:` is an
1125    /// optional allowlist that constrains caller input.
1126    /// ADO accepts two shapes: the typed sequence form
1127    /// (`- name: foo \n type: string \n default: bar`) and the legacy
1128    /// untyped map form (`parameters: { foo: bar, baz: '' }`) used in
1129    /// older template fragments. The custom deserializer normalizes both.
1130    #[serde(default, deserialize_with = "deserialize_optional_parameters")]
1131    pub parameters: Option<Vec<AdoParameter>>,
1132    /// Pipeline-level `permissions:` block. Controls the scope of
1133    /// `System.AccessToken` for all jobs in the pipeline unless overridden
1134    /// at the job level. Parsed to detect explicit scope restriction (e.g.
1135    /// `contents: none`) so `over_privileged_identity` doesn't fire on
1136    /// pipelines that have already locked down their token.
1137    #[serde(default)]
1138    pub permissions: Option<serde_yaml::Value>,
1139}
1140
1141/// Accept either a sequence of `AdoParameter` (modern typed form) or a
1142/// mapping of parameter name → default value (legacy untyped form used in
1143/// many template fragments). For the map form, each key becomes an
1144/// `AdoParameter` with the key as `name` and no type/values. Returns `None`
1145/// for any other shape (e.g. a bare template expression).
1146///
1147/// Implemented as a serde Visitor (rather than going through
1148/// `serde_yaml::Value`) so that downstream struct deserialization uses
1149/// serde's native lazy iteration — this avoids serde_yaml's strict
1150/// duplicate-key detection on `${{ else }}`-style template-conditional
1151/// keys that appear in stage/job `parameters:` blocks of unrelated entries.
1152fn deserialize_optional_parameters<'de, D>(
1153    deserializer: D,
1154) -> Result<Option<Vec<AdoParameter>>, D::Error>
1155where
1156    D: serde::Deserializer<'de>,
1157{
1158    use serde::de::{MapAccess, SeqAccess, Visitor};
1159    use std::fmt;
1160
1161    struct ParamsVisitor;
1162
1163    impl<'de> Visitor<'de> for ParamsVisitor {
1164        type Value = Option<Vec<AdoParameter>>;
1165
1166        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1167            f.write_str("a sequence of parameter declarations, a mapping of name→default, null, or a template expression")
1168        }
1169
1170        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1171            Ok(None)
1172        }
1173
1174        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1175            Ok(None)
1176        }
1177
1178        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1179            d.deserialize_any(self)
1180        }
1181
1182        // Bare scalar (template expression like `${{ parameters.X }}`) —
1183        // can't statically enumerate; treat as absent.
1184        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1185            Ok(None)
1186        }
1187        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1188            Ok(None)
1189        }
1190        fn visit_bool<E: serde::de::Error>(self, _v: bool) -> Result<Self::Value, E> {
1191            Ok(None)
1192        }
1193        fn visit_i64<E: serde::de::Error>(self, _v: i64) -> Result<Self::Value, E> {
1194            Ok(None)
1195        }
1196        fn visit_u64<E: serde::de::Error>(self, _v: u64) -> Result<Self::Value, E> {
1197            Ok(None)
1198        }
1199        fn visit_f64<E: serde::de::Error>(self, _v: f64) -> Result<Self::Value, E> {
1200            Ok(None)
1201        }
1202
1203        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1204            let mut out = Vec::new();
1205            while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
1206                if let Ok(p) = serde_yaml::from_value::<AdoParameter>(item) {
1207                    out.push(p);
1208                }
1209            }
1210            Ok(Some(out))
1211        }
1212
1213        fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
1214            // Legacy untyped map form: name → default-value. We collect
1215            // names; defaults are intentionally discarded (matches typed-
1216            // form semantics where `default:` is also ignored).
1217            let mut out = Vec::new();
1218            while let Some(key) = map.next_key::<serde_yaml::Value>()? {
1219                let _ignore = map.next_value::<serde::de::IgnoredAny>()?;
1220                let name = match key {
1221                    serde_yaml::Value::String(s) if !s.is_empty() => s,
1222                    _ => continue,
1223                };
1224                out.push(AdoParameter {
1225                    name: Some(name),
1226                    param_type: None,
1227                    values: None,
1228                });
1229            }
1230            Ok(Some(out))
1231        }
1232    }
1233
1234    deserializer.deserialize_any(ParamsVisitor)
1235}
1236
1237/// Accept either an `AdoResources` mapping (modern form with `repositories:`,
1238/// `containers:`, `pipelines:`) or the legacy sequence form (`resources: [-
1239/// repo: self]`, pre-2019 ADO syntax). The legacy form has no
1240/// `repositories:` key, so we return an empty `AdoResources` for it — the
1241/// repository-tracking rules then see no aliases to track, which is correct
1242/// (legacy `repo: self` declares no external repositories).
1243fn deserialize_optional_resources<'de, D>(deserializer: D) -> Result<Option<AdoResources>, D::Error>
1244where
1245    D: serde::Deserializer<'de>,
1246{
1247    use serde::de::{MapAccess, SeqAccess, Visitor};
1248    use std::fmt;
1249
1250    struct ResourcesVisitor;
1251
1252    impl<'de> Visitor<'de> for ResourcesVisitor {
1253        type Value = Option<AdoResources>;
1254
1255        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1256            f.write_str("an AdoResources mapping or a legacy `- repo:` sequence")
1257        }
1258
1259        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1260            Ok(None)
1261        }
1262        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1263            Ok(None)
1264        }
1265        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1266            d.deserialize_any(self)
1267        }
1268
1269        // Legacy sequence form — drain it without producing any
1270        // repository entries. Modern rules track aliases via the
1271        // `AdoResources.repositories[]` shape, which the legacy form
1272        // does not produce.
1273        fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
1274            while seq.next_element::<serde::de::IgnoredAny>()?.is_some() {}
1275            Ok(Some(AdoResources::default()))
1276        }
1277
1278        fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
1279            let r = AdoResources::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
1280            Ok(Some(r))
1281        }
1282    }
1283
1284    deserializer.deserialize_any(ResourcesVisitor)
1285}
1286
1287/// Accept either a sequence of `AdoStage` (the normal form) or a bare
1288/// template expression (`stages: ${{ parameters.stages }}`) which resolves
1289/// at runtime. For the template-expression case, return `None` so the
1290/// pipeline still parses; the graph will simply contain no stages from this
1291/// scope (downstream code already handles empty stage lists).
1292fn deserialize_optional_stages<'de, D>(deserializer: D) -> Result<Option<Vec<AdoStage>>, D::Error>
1293where
1294    D: serde::Deserializer<'de>,
1295{
1296    use serde::de::{SeqAccess, Visitor};
1297    use std::fmt;
1298
1299    struct StagesVisitor;
1300
1301    impl<'de> Visitor<'de> for StagesVisitor {
1302        type Value = Option<Vec<AdoStage>>;
1303
1304        fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1305            f.write_str("a sequence of stages or a template expression")
1306        }
1307
1308        fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1309            Ok(None)
1310        }
1311        fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
1312            Ok(None)
1313        }
1314        fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
1315            d.deserialize_any(self)
1316        }
1317        fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
1318            Ok(None)
1319        }
1320        fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
1321            Ok(None)
1322        }
1323
1324        fn visit_seq<A: SeqAccess<'de>>(self, seq: A) -> Result<Self::Value, A::Error> {
1325            let stages =
1326                Vec::<AdoStage>::deserialize(serde::de::value::SeqAccessDeserializer::new(seq))?;
1327            Ok(Some(stages))
1328        }
1329    }
1330
1331    deserializer.deserialize_any(StagesVisitor)
1332}
1333
1334/// `resources:` block. Only `repositories[]` is modelled today.
1335#[derive(Debug, Default, Deserialize)]
1336pub struct AdoResources {
1337    #[serde(default)]
1338    pub repositories: Vec<AdoRepository>,
1339}
1340
1341/// A single `resources.repositories[]` entry — declares an external repo
1342/// alias the pipeline can consume via `template: x@alias`, `extends:`, or
1343/// `checkout: alias`.
1344#[derive(Debug, Deserialize)]
1345pub struct AdoRepository {
1346    /// The alias used by consumers (`template: file@<repository>`).
1347    pub repository: String,
1348    /// `git`, `github`, `bitbucket`, or `azureGit`.
1349    #[serde(default, rename = "type")]
1350    pub repo_type: Option<String>,
1351    /// Full repo path (e.g. `org/repo`).
1352    #[serde(default)]
1353    pub name: Option<String>,
1354    /// Optional ref. Absent = default branch (mutable). Present forms:
1355    /// `refs/tags/v1.2.3`, `refs/heads/main`, bare branch `main`, or a SHA.
1356    #[serde(default, rename = "ref")]
1357    pub git_ref: Option<String>,
1358}
1359
1360/// Pipeline / template `parameters:` entry. We deliberately ignore `default:`
1361/// — only the name, type, and `values:` allowlist matter for our rules.
1362#[derive(Debug, Deserialize)]
1363pub struct AdoParameter {
1364    #[serde(default)]
1365    pub name: Option<String>,
1366    #[serde(rename = "type", default)]
1367    pub param_type: Option<String>,
1368    #[serde(default)]
1369    pub values: Option<Vec<serde_yaml::Value>>,
1370}
1371
1372#[derive(Debug, Deserialize)]
1373pub struct AdoStage {
1374    /// Stage identifier. Absent when the stage entry is a template reference.
1375    #[serde(default)]
1376    pub stage: Option<String>,
1377    /// Stage-level template reference (`- template: path/to/stage.yml`).
1378    #[serde(default)]
1379    pub template: Option<String>,
1380    #[serde(default)]
1381    pub variables: Option<AdoVariables>,
1382    #[serde(default)]
1383    pub jobs: Vec<AdoJob>,
1384}
1385
1386#[derive(Debug, Deserialize)]
1387pub struct AdoJob {
1388    /// Regular job identifier
1389    #[serde(default)]
1390    pub job: Option<String>,
1391    /// Deployment job identifier
1392    #[serde(default)]
1393    pub deployment: Option<String>,
1394    #[serde(default)]
1395    pub variables: Option<AdoVariables>,
1396    #[serde(default)]
1397    pub steps: Option<Vec<AdoStep>>,
1398    /// Deployment-job nested strategy: runOnce/rolling/canary all share the
1399    /// shape `strategy.{runOnce,rolling,canary}.deploy.steps`. We only need
1400    /// the steps — the strategy choice itself doesn't change authority flow.
1401    #[serde(default)]
1402    pub strategy: Option<AdoStrategy>,
1403    #[serde(default)]
1404    pub pool: Option<serde_yaml::Value>,
1405    /// Job-level `workspace:` block. The only security-relevant field is
1406    /// `clean:` which causes the agent to wipe the workspace between runs.
1407    #[serde(default)]
1408    pub workspace: Option<serde_yaml::Value>,
1409    /// Job-level template reference
1410    #[serde(default)]
1411    pub template: Option<String>,
1412    /// Deployment-job environment binding. Two YAML shapes:
1413    ///
1414    ///   - `environment: production` (string shorthand)
1415    ///   - `environment: { name: staging, resourceType: VirtualMachine }` (mapping)
1416    ///
1417    /// When present, the environment may have approvals/checks attached in ADO's
1418    /// environment configuration. Approvals are a manual gate — authority cannot
1419    /// propagate past one without human intervention. We treat any `environment:`
1420    /// binding as an approval candidate and tag the job's steps so propagation
1421    /// rules can downgrade severity. (We can't see the approval config from YAML
1422    /// alone; the binding is the strongest signal available at parse time.)
1423    #[serde(default)]
1424    pub environment: Option<serde_yaml::Value>,
1425}
1426
1427impl AdoJob {
1428    pub fn effective_name(&self) -> String {
1429        self.job
1430            .as_deref()
1431            .or(self.deployment.as_deref())
1432            .unwrap_or("job")
1433            .to_string()
1434    }
1435
1436    /// Returns the effective step list for this job.
1437    ///
1438    /// Regular jobs put steps under `steps:` directly. Deployment jobs nest
1439    /// them under `strategy.{runOnce,rolling,canary}.{deploy,preDeploy,
1440    /// postDeploy,routeTraffic,onSuccess,onFailure}.steps`. We merge all
1441    /// strategy-nested step lists into a single sequence so downstream rules
1442    /// see them as part of the job. Order: regular `steps:` first, then any
1443    /// strategy-nested steps in deterministic phase order.
1444    pub fn all_steps(&self) -> Vec<AdoStep> {
1445        let mut out: Vec<AdoStep> = Vec::new();
1446        if let Some(ref s) = self.steps {
1447            out.extend(s.iter().cloned());
1448        }
1449        if let Some(ref strat) = self.strategy {
1450            for phase in strat.phases() {
1451                if let Some(ref s) = phase.steps {
1452                    out.extend(s.iter().cloned());
1453                }
1454            }
1455        }
1456        out
1457    }
1458
1459    /// Returns true when the job is bound to an `environment:` — either the
1460    /// string form (`environment: production`) or the mapping form with a
1461    /// non-empty `name:` field. An empty mapping or empty string is ignored.
1462    pub fn has_environment_binding(&self) -> bool {
1463        match self.environment.as_ref() {
1464            None => false,
1465            Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
1466            Some(serde_yaml::Value::Mapping(m)) => m
1467                .get("name")
1468                .and_then(|v| v.as_str())
1469                .map(|s| !s.trim().is_empty())
1470                .unwrap_or(false),
1471            _ => false,
1472        }
1473    }
1474}
1475
1476/// Deployment-job `strategy:` block. ADO ships three strategies — runOnce,
1477/// rolling, canary — each with multiple lifecycle phases that may carry
1478/// their own step list. We capture all of them; the AdoJob::all_steps
1479/// helper flattens them into one sequence.
1480#[derive(Debug, Default, Deserialize, Clone)]
1481pub struct AdoStrategy {
1482    #[serde(default, rename = "runOnce")]
1483    pub run_once: Option<AdoStrategyRunOnce>,
1484    #[serde(default)]
1485    pub rolling: Option<AdoStrategyRunOnce>,
1486    #[serde(default)]
1487    pub canary: Option<AdoStrategyRunOnce>,
1488}
1489
1490impl AdoStrategy {
1491    /// Iterate over every populated lifecycle phase across all strategies.
1492    pub fn phases(&self) -> Vec<&AdoStrategyPhase> {
1493        let mut out: Vec<&AdoStrategyPhase> = Vec::new();
1494        for runner in [&self.run_once, &self.rolling, &self.canary]
1495            .iter()
1496            .copied()
1497            .flatten()
1498        {
1499            for phase in [
1500                &runner.deploy,
1501                &runner.pre_deploy,
1502                &runner.post_deploy,
1503                &runner.route_traffic,
1504            ]
1505            .into_iter()
1506            .flatten()
1507            {
1508                out.push(phase);
1509            }
1510            if let Some(ref on) = runner.on {
1511                if let Some(ref s) = on.success {
1512                    out.push(s);
1513                }
1514                if let Some(ref f) = on.failure {
1515                    out.push(f);
1516                }
1517            }
1518        }
1519        out
1520    }
1521}
1522
1523/// Lifecycle phases carried by every deployment strategy. Each phase may
1524/// have its own `steps:`. Covering all six avoids silently dropping
1525/// privileged setup/teardown steps from the authority graph.
1526#[derive(Debug, Default, Deserialize, Clone)]
1527pub struct AdoStrategyRunOnce {
1528    #[serde(default)]
1529    pub deploy: Option<AdoStrategyPhase>,
1530    #[serde(default, rename = "preDeploy")]
1531    pub pre_deploy: Option<AdoStrategyPhase>,
1532    #[serde(default, rename = "postDeploy")]
1533    pub post_deploy: Option<AdoStrategyPhase>,
1534    #[serde(default, rename = "routeTraffic")]
1535    pub route_traffic: Option<AdoStrategyPhase>,
1536    #[serde(default)]
1537    pub on: Option<AdoStrategyOn>,
1538}
1539
1540#[derive(Debug, Default, Deserialize, Clone)]
1541pub struct AdoStrategyOn {
1542    #[serde(default)]
1543    pub success: Option<AdoStrategyPhase>,
1544    #[serde(default)]
1545    pub failure: Option<AdoStrategyPhase>,
1546}
1547
1548#[derive(Debug, Default, Deserialize, Clone)]
1549pub struct AdoStrategyPhase {
1550    #[serde(default)]
1551    pub steps: Option<Vec<AdoStep>>,
1552}
1553
1554#[derive(Debug, Deserialize, Clone)]
1555pub struct AdoStep {
1556    /// Task reference e.g. `AzureCLI@2`
1557    #[serde(default)]
1558    pub task: Option<String>,
1559    /// Inline script (cmd/sh)
1560    #[serde(default)]
1561    pub script: Option<String>,
1562    /// Inline bash script
1563    #[serde(default)]
1564    pub bash: Option<String>,
1565    /// Inline PowerShell script
1566    #[serde(default)]
1567    pub powershell: Option<String>,
1568    /// Cross-platform PowerShell
1569    #[serde(default)]
1570    pub pwsh: Option<String>,
1571    /// Step-level template reference
1572    #[serde(default)]
1573    pub template: Option<String>,
1574    #[serde(rename = "displayName", default)]
1575    pub display_name: Option<String>,
1576    /// Legacy name alias
1577    #[serde(default)]
1578    pub name: Option<String>,
1579    #[serde(default)]
1580    pub env: Option<HashMap<String, String>>,
1581    /// Task inputs (key → value, but values may be nested)
1582    #[serde(default)]
1583    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
1584    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
1585    #[serde(default)]
1586    pub checkout: Option<String>,
1587    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
1588    #[serde(rename = "persistCredentials", default)]
1589    pub persist_credentials: Option<bool>,
1590}
1591
1592/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
1593/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
1594#[derive(Debug, Default)]
1595pub struct AdoVariables(pub Vec<AdoVariable>);
1596
1597impl<'de> serde::Deserialize<'de> for AdoVariables {
1598    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1599    where
1600        D: serde::Deserializer<'de>,
1601    {
1602        let raw = serde_yaml::Value::deserialize(deserializer)?;
1603        let mut vars = Vec::new();
1604
1605        match raw {
1606            serde_yaml::Value::Sequence(seq) => {
1607                for item in seq {
1608                    if let Some(map) = item.as_mapping() {
1609                        if let Some(group_val) = map.get("group") {
1610                            if let Some(group) = group_val.as_str() {
1611                                vars.push(AdoVariable::Group {
1612                                    group: group.to_string(),
1613                                });
1614                                continue;
1615                            }
1616                        }
1617                        let name = map
1618                            .get("name")
1619                            .and_then(|v| v.as_str())
1620                            .unwrap_or("")
1621                            .to_string();
1622                        let value = map
1623                            .get("value")
1624                            .and_then(|v| v.as_str())
1625                            .unwrap_or("")
1626                            .to_string();
1627                        let is_secret = map
1628                            .get("isSecret")
1629                            .and_then(|v| v.as_bool())
1630                            .unwrap_or(false);
1631                        vars.push(AdoVariable::Named {
1632                            name,
1633                            value,
1634                            is_secret,
1635                        });
1636                    }
1637                }
1638            }
1639            serde_yaml::Value::Mapping(map) => {
1640                for (k, v) in map {
1641                    let name = k.as_str().unwrap_or("").to_string();
1642                    let value = v.as_str().unwrap_or("").to_string();
1643                    vars.push(AdoVariable::Named {
1644                        name,
1645                        value,
1646                        is_secret: false,
1647                    });
1648                }
1649            }
1650            _ => {}
1651        }
1652
1653        Ok(AdoVariables(vars))
1654    }
1655}
1656
1657#[derive(Debug)]
1658pub enum AdoVariable {
1659    Group {
1660        group: String,
1661    },
1662    Named {
1663        name: String,
1664        value: String,
1665        is_secret: bool,
1666    },
1667}
1668
1669/// Heuristic: does this YAML have a top-level parameter conditional wrapper
1670/// (e.g. `- ${{ if eq(parameters.X, true) }}:`) at column 0 or as the first
1671/// list item? This is the construct that breaks root-level mapping parses but
1672/// is valid in an ADO template fragment included by a parent pipeline.
1673fn has_root_parameter_conditional(content: &str) -> bool {
1674    for line in content.lines() {
1675        let trimmed = line.trim_start();
1676        // Strip an optional leading list marker so we match both
1677        // `- ${{ if ... }}:` and bare `${{ if ... }}:` forms.
1678        let candidate = trimmed.strip_prefix("- ").unwrap_or(trimmed);
1679        if candidate.starts_with("${{")
1680            && (candidate.contains("if ") || candidate.contains("if("))
1681            && candidate.trim_end().ends_with(":")
1682        {
1683            return true;
1684        }
1685    }
1686    false
1687}
1688
1689#[cfg(test)]
1690mod tests {
1691    use super::*;
1692
1693    fn parse(yaml: &str) -> AuthorityGraph {
1694        let parser = AdoParser;
1695        let source = PipelineSource {
1696            file: "azure-pipelines.yml".into(),
1697            repo: None,
1698            git_ref: None,
1699            commit_sha: None,
1700        };
1701        parser.parse(yaml, &source).unwrap()
1702    }
1703
1704    #[test]
1705    fn parses_simple_pipeline() {
1706        let yaml = r#"
1707trigger:
1708  - main
1709
1710jobs:
1711  - job: Build
1712    steps:
1713      - script: echo hello
1714        displayName: Say hello
1715"#;
1716        let graph = parse(yaml);
1717        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
1718    }
1719
1720    #[test]
1721    fn system_access_token_created() {
1722        let yaml = r#"
1723steps:
1724  - script: echo hi
1725"#;
1726        let graph = parse(yaml);
1727        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1728        assert_eq!(identities.len(), 1);
1729        assert_eq!(identities[0].name, "System.AccessToken");
1730        assert_eq!(
1731            identities[0].metadata.get(META_IDENTITY_SCOPE),
1732            Some(&"broad".to_string())
1733        );
1734    }
1735
1736    #[test]
1737    fn variable_group_creates_secret_and_marks_partial() {
1738        let yaml = r#"
1739variables:
1740  - group: MySecretGroup
1741
1742steps:
1743  - script: echo hi
1744"#;
1745        let graph = parse(yaml);
1746        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1747        assert_eq!(secrets.len(), 1);
1748        assert_eq!(secrets[0].name, "MySecretGroup");
1749        assert_eq!(
1750            secrets[0].metadata.get(META_VARIABLE_GROUP),
1751            Some(&"true".to_string())
1752        );
1753        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1754        assert!(
1755            graph
1756                .completeness_gaps
1757                .iter()
1758                .any(|g| g.contains("MySecretGroup")),
1759            "completeness gap should name the variable group"
1760        );
1761    }
1762
1763    #[test]
1764    fn task_with_azure_subscription_creates_service_connection_identity() {
1765        let yaml = r#"
1766steps:
1767  - task: AzureCLI@2
1768    displayName: Deploy to Azure
1769    inputs:
1770      azureSubscription: MyServiceConnection
1771      scriptType: bash
1772      inlineScript: az group list
1773"#;
1774        let graph = parse(yaml);
1775        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1776        // System.AccessToken + service connection
1777        assert_eq!(identities.len(), 2);
1778        let conn = identities
1779            .iter()
1780            .find(|i| i.name == "MyServiceConnection")
1781            .unwrap();
1782        assert_eq!(
1783            conn.metadata.get(META_SERVICE_CONNECTION),
1784            Some(&"true".to_string())
1785        );
1786        assert_eq!(
1787            conn.metadata.get(META_IDENTITY_SCOPE),
1788            Some(&"broad".to_string())
1789        );
1790    }
1791
1792    #[test]
1793    fn service_connection_does_not_get_unconditional_oidc_tag() {
1794        let yaml = r#"
1795steps:
1796  - task: AzureCLI@2
1797    displayName: Deploy to Azure
1798    inputs:
1799      azureSubscription: MyClassicSpnConnection
1800      scriptType: bash
1801      inlineScript: az group list
1802"#;
1803        let graph = parse(yaml);
1804        let conn = graph
1805            .nodes_of_kind(NodeKind::Identity)
1806            .find(|i| i.name == "MyClassicSpnConnection")
1807            .expect("service connection identity should exist");
1808        assert_eq!(
1809            conn.metadata.get(META_OIDC),
1810            None,
1811            "service connections must not be tagged META_OIDC without a clear OIDC signal"
1812        );
1813    }
1814
1815    #[test]
1816    fn task_with_connected_service_name_creates_identity() {
1817        let yaml = r#"
1818steps:
1819  - task: SqlAzureDacpacDeployment@1
1820    inputs:
1821      ConnectedServiceNameARM: MySqlConnection
1822"#;
1823        let graph = parse(yaml);
1824        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1825        assert!(
1826            identities.iter().any(|i| i.name == "MySqlConnection"),
1827            "connectedServiceNameARM should create identity"
1828        );
1829    }
1830
1831    #[test]
1832    fn script_step_classified_as_first_party() {
1833        let yaml = r#"
1834steps:
1835  - script: echo hi
1836    displayName: Say hi
1837"#;
1838        let graph = parse(yaml);
1839        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1840        assert_eq!(steps.len(), 1);
1841        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1842    }
1843
1844    #[test]
1845    fn bash_step_classified_as_first_party() {
1846        let yaml = r#"
1847steps:
1848  - bash: echo hi
1849"#;
1850        let graph = parse(yaml);
1851        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1852        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
1853    }
1854
1855    #[test]
1856    fn task_step_classified_as_untrusted() {
1857        let yaml = r#"
1858steps:
1859  - task: DotNetCoreCLI@2
1860    inputs:
1861      command: build
1862"#;
1863        let graph = parse(yaml);
1864        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1865        assert_eq!(steps.len(), 1);
1866        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
1867    }
1868
1869    #[test]
1870    fn dollar_paren_var_in_script_creates_secret() {
1871        let yaml = r#"
1872steps:
1873  - script: |
1874      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
1875    displayName: Call API
1876"#;
1877        let graph = parse(yaml);
1878        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1879        assert_eq!(secrets.len(), 1);
1880        assert_eq!(secrets[0].name, "MY_API_TOKEN");
1881    }
1882
1883    #[test]
1884    fn predefined_ado_var_not_treated_as_secret() {
1885        let yaml = r#"
1886steps:
1887  - script: |
1888      echo $(Build.BuildId)
1889      echo $(Agent.WorkFolder)
1890      echo $(System.DefaultWorkingDirectory)
1891    displayName: Print vars
1892"#;
1893        let graph = parse(yaml);
1894        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1895        assert!(
1896            secrets.is_empty(),
1897            "predefined ADO vars should not be treated as secrets, got: {:?}",
1898            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
1899        );
1900    }
1901
1902    #[test]
1903    fn template_reference_creates_delegates_to_and_marks_partial() {
1904        let yaml = r#"
1905steps:
1906  - template: steps/deploy.yml
1907    parameters:
1908      env: production
1909"#;
1910        let graph = parse(yaml);
1911        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1912        assert_eq!(steps.len(), 1);
1913
1914        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1915        assert_eq!(images.len(), 1);
1916        assert_eq!(images[0].name, "steps/deploy.yml");
1917
1918        let delegates: Vec<_> = graph
1919            .edges_from(steps[0].id)
1920            .filter(|e| e.kind == EdgeKind::DelegatesTo)
1921            .collect();
1922        assert_eq!(delegates.len(), 1);
1923
1924        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
1925    }
1926
1927    #[test]
1928    fn top_level_steps_no_jobs() {
1929        let yaml = r#"
1930steps:
1931  - script: echo a
1932  - script: echo b
1933"#;
1934        let graph = parse(yaml);
1935        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1936        assert_eq!(steps.len(), 2);
1937    }
1938
1939    #[test]
1940    fn top_level_jobs_no_stages() {
1941        let yaml = r#"
1942jobs:
1943  - job: JobA
1944    steps:
1945      - script: echo a
1946  - job: JobB
1947    steps:
1948      - script: echo b
1949"#;
1950        let graph = parse(yaml);
1951        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1952        assert_eq!(steps.len(), 2);
1953    }
1954
1955    #[test]
1956    fn stages_with_nested_jobs_parsed() {
1957        let yaml = r#"
1958stages:
1959  - stage: Build
1960    jobs:
1961      - job: Compile
1962        steps:
1963          - script: cargo build
1964  - stage: Test
1965    jobs:
1966      - job: UnitTest
1967        steps:
1968          - script: cargo test
1969"#;
1970        let graph = parse(yaml);
1971        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1972        assert_eq!(steps.len(), 2);
1973    }
1974
1975    #[test]
1976    fn all_steps_linked_to_system_access_token() {
1977        let yaml = r#"
1978steps:
1979  - script: echo a
1980  - task: SomeTask@1
1981    inputs: {}
1982"#;
1983        let graph = parse(yaml);
1984        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1985        assert_eq!(token.len(), 1);
1986        let token_id = token[0].id;
1987
1988        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1989        for step in &steps {
1990            let links: Vec<_> = graph
1991                .edges_from(step.id)
1992                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
1993                .collect();
1994            assert_eq!(
1995                links.len(),
1996                1,
1997                "step '{}' must link to System.AccessToken",
1998                step.name
1999            );
2000        }
2001    }
2002
2003    #[test]
2004    fn named_secret_variable_creates_secret_node() {
2005        let yaml = r#"
2006variables:
2007  - name: MY_PASSWORD
2008    value: dummy
2009    isSecret: true
2010
2011steps:
2012  - script: echo hi
2013"#;
2014        let graph = parse(yaml);
2015        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2016        assert_eq!(secrets.len(), 1);
2017        assert_eq!(secrets[0].name, "MY_PASSWORD");
2018    }
2019
2020    #[test]
2021    fn variables_as_mapping_parsed() {
2022        let yaml = r#"
2023variables:
2024  MY_VAR: hello
2025  ANOTHER_VAR: world
2026
2027steps:
2028  - script: echo hi
2029"#;
2030        let graph = parse(yaml);
2031        // Mapping-style variables without isSecret — no secret nodes created
2032        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2033        assert!(
2034            secrets.is_empty(),
2035            "plain mapping vars should not create secret nodes"
2036        );
2037    }
2038
2039    #[test]
2040    fn persist_credentials_creates_persists_to_edge() {
2041        let yaml = r#"
2042steps:
2043  - checkout: self
2044    persistCredentials: true
2045  - script: git push
2046"#;
2047        let graph = parse(yaml);
2048        let token_id = graph
2049            .nodes_of_kind(NodeKind::Identity)
2050            .find(|n| n.name == "System.AccessToken")
2051            .expect("System.AccessToken must exist")
2052            .id;
2053
2054        let persists_edges: Vec<_> = graph
2055            .edges
2056            .iter()
2057            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
2058            .collect();
2059        assert_eq!(
2060            persists_edges.len(),
2061            1,
2062            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
2063        );
2064    }
2065
2066    #[test]
2067    fn checkout_without_persist_credentials_no_persists_to_edge() {
2068        let yaml = r#"
2069steps:
2070  - checkout: self
2071  - script: echo hi
2072"#;
2073        let graph = parse(yaml);
2074        let persists_edges: Vec<_> = graph
2075            .edges
2076            .iter()
2077            .filter(|e| e.kind == EdgeKind::PersistsTo)
2078            .collect();
2079        assert!(
2080            persists_edges.is_empty(),
2081            "checkout without persistCredentials should not produce PersistsTo edge"
2082        );
2083    }
2084
2085    #[test]
2086    fn var_flag_secret_marked_as_cli_flag_exposed() {
2087        let yaml = r#"
2088steps:
2089  - script: |
2090      terraform apply \
2091        -var "db_password=$(db_password)" \
2092        -var "api_key=$(api_key)"
2093    displayName: Terraform apply
2094"#;
2095        let graph = parse(yaml);
2096        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2097        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
2098        for secret in &secrets {
2099            assert_eq!(
2100                secret.metadata.get(META_CLI_FLAG_EXPOSED),
2101                Some(&"true".to_string()),
2102                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
2103                secret.name
2104            );
2105        }
2106    }
2107
2108    #[test]
2109    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
2110        let yaml = r#"
2111steps:
2112  - script: |
2113      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
2114"#;
2115        let graph = parse(yaml);
2116        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2117        assert_eq!(secrets.len(), 1);
2118        assert!(
2119            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
2120            "non -var secret should not be marked as cli_flag_exposed"
2121        );
2122    }
2123
2124    #[test]
2125    fn step_linked_to_variable_group_secret() {
2126        let yaml = r#"
2127variables:
2128  - group: ProdSecrets
2129
2130steps:
2131  - script: deploy.sh
2132"#;
2133        let graph = parse(yaml);
2134        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
2135        assert_eq!(secrets.len(), 1);
2136        let secret_id = secrets[0].id;
2137
2138        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2139        let links: Vec<_> = graph
2140            .edges_from(steps[0].id)
2141            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
2142            .collect();
2143        assert_eq!(
2144            links.len(),
2145            1,
2146            "step should be linked to variable group secret"
2147        );
2148    }
2149
2150    #[test]
2151    fn pr_trigger_sets_meta_trigger_on_graph() {
2152        let yaml = r#"
2153pr:
2154  - '*'
2155
2156steps:
2157  - script: echo hi
2158"#;
2159        let graph = parse(yaml);
2160        assert_eq!(
2161            graph.metadata.get(META_TRIGGER),
2162            Some(&"pr".to_string()),
2163            "ADO pr: trigger should set graph META_TRIGGER"
2164        );
2165    }
2166
2167    #[test]
2168    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
2169        let yaml = r#"
2170pool:
2171  name: my-self-hosted-pool
2172
2173steps:
2174  - script: echo hi
2175"#;
2176        let graph = parse(yaml);
2177        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2178        assert_eq!(images.len(), 1);
2179        assert_eq!(images[0].name, "my-self-hosted-pool");
2180        assert_eq!(
2181            images[0].metadata.get(META_SELF_HOSTED),
2182            Some(&"true".to_string()),
2183            "pool.name without vmImage must be tagged self-hosted"
2184        );
2185    }
2186
2187    #[test]
2188    fn vm_image_pool_is_not_tagged_self_hosted() {
2189        let yaml = r#"
2190pool:
2191  vmImage: ubuntu-latest
2192
2193steps:
2194  - script: echo hi
2195"#;
2196        let graph = parse(yaml);
2197        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
2198        assert_eq!(images.len(), 1);
2199        assert_eq!(images[0].name, "ubuntu-latest");
2200        assert!(
2201            !images[0].metadata.contains_key(META_SELF_HOSTED),
2202            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
2203        );
2204    }
2205
2206    #[test]
2207    fn checkout_self_step_tagged_with_meta_checkout_self() {
2208        let yaml = r#"
2209steps:
2210  - checkout: self
2211  - script: echo hi
2212"#;
2213        let graph = parse(yaml);
2214        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2215        assert_eq!(steps.len(), 2);
2216        let checkout_step = steps
2217            .iter()
2218            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
2219            .expect("one step must be tagged META_CHECKOUT_SELF");
2220        assert_eq!(
2221            checkout_step.metadata.get(META_CHECKOUT_SELF),
2222            Some(&"true".to_string())
2223        );
2224    }
2225
2226    #[test]
2227    fn vso_setvariable_sets_meta_writes_env_gate() {
2228        let yaml = r###"
2229steps:
2230  - script: |
2231      echo "##vso[task.setvariable variable=FOO]bar"
2232    displayName: Set variable
2233"###;
2234        let graph = parse(yaml);
2235        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2236        assert_eq!(steps.len(), 1);
2237        assert_eq!(
2238            steps[0].metadata.get(META_WRITES_ENV_GATE),
2239            Some(&"true".to_string()),
2240            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
2241        );
2242    }
2243
2244    #[test]
2245    fn environment_key_tags_job_with_env_approval() {
2246        // String form: `environment: production`
2247        let yaml_string_form = r#"
2248jobs:
2249  - deployment: DeployWeb
2250    environment: production
2251    steps:
2252      - script: echo deploying
2253        displayName: Deploy
2254"#;
2255        let g1 = parse(yaml_string_form);
2256        let tagged: Vec<_> = g1
2257            .nodes_of_kind(NodeKind::Step)
2258            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
2259            .collect();
2260        assert!(
2261            !tagged.is_empty(),
2262            "string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
2263        );
2264
2265        // Mapping form: `environment: { name: staging }`
2266        let yaml_mapping_form = r#"
2267jobs:
2268  - deployment: DeployAPI
2269    environment:
2270      name: staging
2271      resourceType: VirtualMachine
2272    steps:
2273      - script: echo deploying
2274        displayName: Deploy
2275"#;
2276        let g2 = parse(yaml_mapping_form);
2277        let tagged2: Vec<_> = g2
2278            .nodes_of_kind(NodeKind::Step)
2279            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
2280            .collect();
2281        assert!(
2282            !tagged2.is_empty(),
2283            "mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
2284        );
2285
2286        // Negative: a job with no `environment:` must not be tagged
2287        let yaml_no_env = r#"
2288jobs:
2289  - job: Build
2290    steps:
2291      - script: echo building
2292"#;
2293        let g3 = parse(yaml_no_env);
2294        let any_tagged = g3
2295            .nodes_of_kind(NodeKind::Step)
2296            .any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
2297        assert!(
2298            !any_tagged,
2299            "jobs without `environment:` must not carry META_ENV_APPROVAL"
2300        );
2301    }
2302
2303    #[test]
2304    fn root_parameter_conditional_template_fragment_does_not_crash_and_marks_partial() {
2305        // Real-world repro: an ADO template fragment whose root content is wrapped
2306        // in a parameter conditional (`- ${{ if eq(parameters.X, true) }}:`) followed
2307        // by a list of jobs. This is valid when `template:`-included from a parent
2308        // pipeline, but parsing it standalone fails with "did not find expected key".
2309        // The parser must now return a Partial graph instead of a fatal error.
2310        let yaml = r#"
2311parameters:
2312  msabs_ws2022: false
2313
2314- ${{ if eq(parameters.msabs_ws2022, true) }}:
2315  - job: packer_ws2022
2316    displayName: Build WS2022 Gold Image
2317    steps:
2318      - task: PackerTool@0
2319"#;
2320        let parser = AdoParser;
2321        let source = PipelineSource {
2322            file: "fragment.yml".into(),
2323            repo: None,
2324            git_ref: None,
2325            commit_sha: None,
2326        };
2327        let result = parser.parse(yaml, &source);
2328        let graph = result.expect("template fragment must not crash the parser");
2329        assert!(
2330            matches!(graph.completeness, AuthorityCompleteness::Partial),
2331            "template-fragment graph must be marked Partial"
2332        );
2333        let saw_fragment_gap = graph
2334            .completeness_gaps
2335            .iter()
2336            .any(|g| g.contains("template fragment") && g.contains("parent pipeline"));
2337        assert!(
2338            saw_fragment_gap,
2339            "completeness_gaps must mention the template-fragment reason, got: {:?}",
2340            graph.completeness_gaps
2341        );
2342    }
2343
2344    #[test]
2345    fn environment_tag_isolated_to_gated_job_only() {
2346        // Two jobs side by side: only the deployment job has environment.
2347        // Steps from the non-gated job must NOT be tagged.
2348        let yaml = r#"
2349jobs:
2350  - job: Build
2351    steps:
2352      - script: echo build
2353        displayName: build-step
2354  - deployment: DeployProd
2355    environment: production
2356    steps:
2357      - script: echo deploy
2358        displayName: deploy-step
2359"#;
2360        let g = parse(yaml);
2361        let build_step = g
2362            .nodes_of_kind(NodeKind::Step)
2363            .find(|s| s.name == "build-step")
2364            .expect("build-step must exist");
2365        let deploy_step = g
2366            .nodes_of_kind(NodeKind::Step)
2367            .find(|s| s.name == "deploy-step")
2368            .expect("deploy-step must exist");
2369        assert!(
2370            !build_step.metadata.contains_key(META_ENV_APPROVAL),
2371            "non-gated job's step must not be tagged"
2372        );
2373        assert_eq!(
2374            deploy_step.metadata.get(META_ENV_APPROVAL),
2375            Some(&"true".to_string()),
2376            "gated deployment job's step must be tagged"
2377        );
2378    }
2379
2380    // ── resources.repositories[] capture ──────────────────────
2381
2382    fn repos_meta(graph: &AuthorityGraph) -> Vec<serde_json::Value> {
2383        let raw = graph
2384            .metadata
2385            .get(META_REPOSITORIES)
2386            .expect("META_REPOSITORIES must be set");
2387        serde_json::from_str(raw).expect("META_REPOSITORIES must be valid JSON")
2388    }
2389
2390    #[test]
2391    fn resources_repositories_captured_with_used_flag_when_referenced_by_extends() {
2392        let yaml = r#"
2393resources:
2394  repositories:
2395    - repository: shared-templates
2396      type: git
2397      name: Platform/shared-templates
2398      ref: refs/heads/main
2399
2400extends:
2401  template: pipeline.yml@shared-templates
2402"#;
2403        let graph = parse(yaml);
2404        let entries = repos_meta(&graph);
2405        assert_eq!(entries.len(), 1);
2406        let e = &entries[0];
2407        assert_eq!(e["alias"], "shared-templates");
2408        assert_eq!(e["repo_type"], "git");
2409        assert_eq!(e["name"], "Platform/shared-templates");
2410        assert_eq!(e["ref"], "refs/heads/main");
2411        assert_eq!(e["used"], true);
2412    }
2413
2414    #[test]
2415    fn resources_repositories_used_via_checkout_alias() {
2416        // Mirrors the msigeurope-adf-finance-reporting corpus shape.
2417        let yaml = r#"
2418resources:
2419  repositories:
2420    - repository: adf_publish
2421      type: git
2422      name: org/adf-finance-reporting
2423      ref: refs/heads/adf_publish
2424
2425jobs:
2426  - job: deploy
2427    steps:
2428      - checkout: adf_publish
2429"#;
2430        let graph = parse(yaml);
2431        let entries = repos_meta(&graph);
2432        assert_eq!(entries.len(), 1);
2433        assert_eq!(entries[0]["alias"], "adf_publish");
2434        assert_eq!(entries[0]["used"], true);
2435    }
2436
2437    #[test]
2438    fn resources_repositories_unreferenced_alias_is_marked_not_used() {
2439        // Declared but no `template: x@alias`, no `checkout: alias`, no extends.
2440        let yaml = r#"
2441resources:
2442  repositories:
2443    - repository: orphan-templates
2444      type: git
2445      name: Platform/orphan
2446      ref: main
2447
2448jobs:
2449  - job: build
2450    steps:
2451      - script: echo hi
2452"#;
2453        let graph = parse(yaml);
2454        let entries = repos_meta(&graph);
2455        assert_eq!(entries.len(), 1);
2456        assert_eq!(entries[0]["alias"], "orphan-templates");
2457        assert_eq!(entries[0]["used"], false);
2458    }
2459
2460    #[test]
2461    fn resources_repositories_absent_when_no_resources_block() {
2462        let yaml = r#"
2463jobs:
2464  - job: build
2465    steps:
2466      - script: echo hi
2467"#;
2468        let graph = parse(yaml);
2469        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
2470    }
2471
2472    #[test]
2473    fn parse_template_alias_extracts_segment_after_at() {
2474        assert_eq!(
2475            parse_template_alias("steps/deploy.yml@templates"),
2476            Some("templates".to_string())
2477        );
2478        assert_eq!(parse_template_alias("local/path.yml"), None);
2479        assert_eq!(parse_template_alias("path@"), None);
2480    }
2481
2482    #[test]
2483    fn parameters_as_map_form_parses_as_named_parameters() {
2484        // Real-world repro from Azure/aks-engine, PowerShell/PowerShell, dotnet/maui:
2485        // legacy template fragments declare `parameters:` as a mapping of
2486        // name → default-value rather than the modern typed sequence form.
2487        // Both shapes must parse; the map form yields parameters with names
2488        // but no type/values allowlist (so they default to "string" downstream).
2489        let yaml = r#"
2490parameters:
2491  name: ''
2492  k8sRelease: ''
2493  apimodel: 'examples/e2e-tests/kubernetes/release/default/definition.json'
2494  createVNET: false
2495
2496jobs:
2497  - job: build
2498    steps:
2499      - script: echo $(name)
2500"#;
2501        let graph = parse(yaml);
2502        // Parse must succeed and capture the four parameter names.
2503        assert!(graph.parameters.contains_key("name"));
2504        assert!(graph.parameters.contains_key("k8sRelease"));
2505        assert!(graph.parameters.contains_key("apimodel"));
2506        assert!(graph.parameters.contains_key("createVNET"));
2507        assert_eq!(graph.parameters.len(), 4);
2508    }
2509
2510    #[test]
2511    fn parameters_as_typed_sequence_form_still_parses() {
2512        // Make sure the modern form still works after the polymorphic
2513        // deserializer change.
2514        let yaml = r#"
2515parameters:
2516  - name: env
2517    type: string
2518    default: prod
2519    values:
2520      - prod
2521      - staging
2522  - name: skipTests
2523    type: boolean
2524    default: false
2525
2526jobs:
2527  - job: build
2528    steps:
2529      - script: echo hi
2530"#;
2531        let graph = parse(yaml);
2532        let env_param = graph.parameters.get("env").expect("env captured");
2533        assert_eq!(env_param.param_type, "string");
2534        assert!(env_param.has_values_allowlist);
2535        let skip_param = graph
2536            .parameters
2537            .get("skipTests")
2538            .expect("skipTests captured");
2539        assert_eq!(skip_param.param_type, "boolean");
2540        assert!(!skip_param.has_values_allowlist);
2541    }
2542
2543    #[test]
2544    fn resources_as_legacy_sequence_form_parses_to_empty_resources() {
2545        // Real-world repro from Azure/azure-cli, Chinachu/Mirakurun: pre-2019
2546        // ADO syntax allows `resources:` as a list of `- repo: self` entries,
2547        // not the modern `resources: { repositories: [...] }` mapping. Modern
2548        // ADO still tolerates the legacy form. We must accept both shapes
2549        // without crashing the parse.
2550        let yaml = r#"
2551resources:
2552- repo: self
2553
2554trigger:
2555  - main
2556
2557jobs:
2558  - job: build
2559    steps:
2560      - script: echo hi
2561"#;
2562        let graph = parse(yaml);
2563        // No external repositories declared (legacy form has none) — so the
2564        // META_REPOSITORIES metadata key is absent.
2565        assert!(!graph.metadata.contains_key(META_REPOSITORIES));
2566        // But the job still parses.
2567        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
2568        assert_eq!(steps.len(), 1);
2569    }
2570
2571    #[test]
2572    fn stages_as_template_expression_parses_with_no_stages() {
2573        // Real-world repro from dotnet/diagnostics templatePublic.yml:
2574        // `stages: ${{ parameters.stages }}` resolves at runtime. The static
2575        // parser cannot enumerate stages from a template expression — we
2576        // accept the file without crashing and the resulting graph simply
2577        // contains no stages from the template-expression scope.
2578        let yaml = r#"
2579parameters:
2580  - name: stages
2581    type: stageList
2582
2583stages: ${{ parameters.stages }}
2584"#;
2585        let graph = parse(yaml);
2586        // Graph must exist (no crash).
2587        assert!(graph.parameters.contains_key("stages"));
2588    }
2589
2590    // ── Cross-platform misclassification trap (red-team R2 #5) ─────
2591
2592    #[test]
2593    fn jobs_carrier_without_steps_marks_partial() {
2594        // ADO `jobs:` carrier present but each job has no `steps:` and no
2595        // `template:`. process_steps([]) adds nothing. Result: 0 Step nodes
2596        // despite a non-empty job carrier — must mark Partial so a CI gate
2597        // doesn't treat completeness=complete + 0 findings as "passed".
2598        let yaml = r#"
2599jobs:
2600  - job: build
2601    pool:
2602      vmImage: ubuntu-latest
2603"#;
2604        let graph = parse(yaml);
2605        let step_count = graph
2606            .nodes
2607            .iter()
2608            .filter(|n| n.kind == NodeKind::Step)
2609            .count();
2610        assert_eq!(step_count, 0);
2611        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
2612        assert!(
2613            graph
2614                .completeness_gaps
2615                .iter()
2616                .any(|g| g.contains("0 step nodes")),
2617            "completeness_gaps must mention 0 step nodes: {:?}",
2618            graph.completeness_gaps
2619        );
2620    }
2621
2622    #[test]
2623    fn jobs_carrier_with_empty_jobs_list_does_not_mark_partial() {
2624        // Defensive: an empty `jobs:` list is NOT a carrier — there is no
2625        // job content to be confused about. Stays Complete.
2626        let yaml = r#"
2627jobs: []
2628"#;
2629        let graph = parse(yaml);
2630        let zero_step_gap = graph
2631            .completeness_gaps
2632            .iter()
2633            .any(|g| g.contains("0 step nodes"));
2634        assert!(
2635            !zero_step_gap,
2636            "empty jobs: list is not a carrier; got: {:?}",
2637            graph.completeness_gaps
2638        );
2639    }
2640
2641    // ── Bug regression: pr: none not suppressing PR-specific rules ──────────
2642
2643    #[test]
2644    fn pr_none_does_not_set_meta_trigger() {
2645        // `pr: none` is an explicit opt-out. Parser must require a mapping or
2646        // sequence for a real PR trigger; scalars are all opt-outs.
2647        let yaml = r#"
2648schedules:
2649  - cron: "0 5 * * 1"
2650pr: none
2651trigger: none
2652steps:
2653  - script: echo hello
2654"#;
2655        let graph = parse(yaml);
2656        assert!(
2657            !graph.metadata.contains_key(META_TRIGGER),
2658            "pr: none must not set META_TRIGGER; got: {:?}",
2659            graph.metadata.get(META_TRIGGER)
2660        );
2661    }
2662
2663    #[test]
2664    fn pr_tilde_does_not_set_meta_trigger() {
2665        // `pr: ~` is YAML null written as tilde — also an opt-out.
2666        let yaml = "pr: ~\nsteps:\n  - script: echo hello\n";
2667        let graph = parse(yaml);
2668        assert!(
2669            !graph.metadata.contains_key(META_TRIGGER),
2670            "pr: ~ must not set META_TRIGGER; got: {:?}",
2671            graph.metadata.get(META_TRIGGER)
2672        );
2673    }
2674
2675    #[test]
2676    fn pr_false_does_not_set_meta_trigger() {
2677        // `pr: false` — boolean false means disabled.
2678        let yaml = "pr: false\nsteps:\n  - script: echo hello\n";
2679        let graph = parse(yaml);
2680        assert!(
2681            !graph.metadata.contains_key(META_TRIGGER),
2682            "pr: false must not set META_TRIGGER; got: {:?}",
2683            graph.metadata.get(META_TRIGGER)
2684        );
2685    }
2686
2687    #[test]
2688    fn pr_sequence_sets_meta_trigger() {
2689        // Shorthand sequence form: `pr:\n  - main` is also a real PR trigger.
2690        let yaml = "pr:\n  - main\nsteps:\n  - script: echo hello\n";
2691        let graph = parse(yaml);
2692        assert_eq!(
2693            graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
2694            Some("pr"),
2695            "pr: [main] must set META_TRIGGER=pr"
2696        );
2697    }
2698
2699    #[test]
2700    fn pr_with_branches_sets_meta_trigger() {
2701        // Positive guard: a real PR trigger mapping must still set META_TRIGGER.
2702        let yaml = r#"
2703pr:
2704  branches:
2705    include:
2706      - main
2707steps:
2708  - script: echo hello
2709"#;
2710        let graph = parse(yaml);
2711        assert_eq!(
2712            graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
2713            Some("pr"),
2714            "real pr: block must set META_TRIGGER=pr"
2715        );
2716    }
2717
2718    // ── Bug regression: permissions: contents: none parsed as empty string ──
2719    // E2E test: parser → rule — the only test that catches the full chain.
2720
2721    #[test]
2722    fn over_privileged_identity_does_not_fire_when_permissions_contents_none() {
2723        // Full chain: ADO parser + over_privileged_identity rule.
2724        // Previously the parser ignored `permissions:`, leaving the token at
2725        // broad scope and firing the rule on every restricted pipeline.
2726        use taudit_core::rules::over_privileged_identity;
2727        let yaml = r#"
2728trigger: none
2729permissions:
2730  contents: none
2731steps:
2732  - script: echo hello
2733"#;
2734        let graph = parse(yaml);
2735        let findings = over_privileged_identity(&graph);
2736        let token_findings: Vec<_> = findings
2737            .iter()
2738            .filter(|f| {
2739                f.nodes_involved.iter().any(|&id| {
2740                    graph
2741                        .node(id)
2742                        .map(|n| n.name == "System.AccessToken")
2743                        .unwrap_or(false)
2744                })
2745            })
2746            .collect();
2747        assert!(
2748            token_findings.is_empty(),
2749            "over_privileged_identity must not fire on System.AccessToken when \
2750             permissions: contents: none is set; got: {token_findings:#?}"
2751        );
2752    }
2753
2754    #[test]
2755    fn pipeline_level_permissions_none_constrains_token() {
2756        // `permissions: contents: none` at pipeline level must downgrade
2757        // System.AccessToken from broad → constrained so over_privileged_identity
2758        // does not fire on an already-locked-down pipeline.
2759        let yaml = r#"
2760trigger: none
2761permissions:
2762  contents: none
2763steps:
2764  - script: echo hello
2765"#;
2766        let graph = parse(yaml);
2767        let token = graph
2768            .nodes_of_kind(NodeKind::Identity)
2769            .find(|n| n.name == "System.AccessToken")
2770            .expect("System.AccessToken must always be present");
2771        assert_eq!(
2772            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
2773            Some("constrained"),
2774            "permissions: contents: none must constrain the token; got: {:?}",
2775            token.metadata.get(META_IDENTITY_SCOPE)
2776        );
2777    }
2778
2779    #[test]
2780    fn pipeline_level_permissions_write_keeps_token_broad() {
2781        // A pipeline with write permissions must keep System.AccessToken broad.
2782        let yaml = r#"
2783trigger: none
2784permissions:
2785  contents: write
2786steps:
2787  - script: echo hello
2788"#;
2789        let graph = parse(yaml);
2790        let token = graph
2791            .nodes_of_kind(NodeKind::Identity)
2792            .find(|n| n.name == "System.AccessToken")
2793            .expect("System.AccessToken must always be present");
2794        assert_eq!(
2795            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
2796            Some("broad"),
2797            "permissions: contents: write must keep the token broad; got: {:?}",
2798            token.metadata.get(META_IDENTITY_SCOPE)
2799        );
2800    }
2801
2802    #[test]
2803    fn pipeline_level_permissions_read_scalar_constrains_token() {
2804        // `permissions: read` (scalar, not a map) must also downgrade the token.
2805        // Previously the scalar branch treated "read" as broad (incorrect).
2806        let yaml = "trigger: none\npermissions: read\nsteps:\n  - script: echo hello\n";
2807        let graph = parse(yaml);
2808        let token = graph
2809            .nodes_of_kind(NodeKind::Identity)
2810            .find(|n| n.name == "System.AccessToken")
2811            .expect("System.AccessToken must always be present");
2812        assert_eq!(
2813            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
2814            Some("constrained"),
2815            "permissions: read must constrain the token; got: {:?}",
2816            token.metadata.get(META_IDENTITY_SCOPE)
2817        );
2818    }
2819
2820    #[test]
2821    fn pipeline_level_permissions_write_scalar_keeps_token_broad() {
2822        // `permissions: write` (scalar) keeps the token broad.
2823        let yaml = "trigger: none\npermissions: write\nsteps:\n  - script: echo hello\n";
2824        let graph = parse(yaml);
2825        let token = graph
2826            .nodes_of_kind(NodeKind::Identity)
2827            .find(|n| n.name == "System.AccessToken")
2828            .expect("System.AccessToken must always be present");
2829        assert_eq!(
2830            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
2831            Some("broad"),
2832            "permissions: write scalar must keep token broad; got: {:?}",
2833            token.metadata.get(META_IDENTITY_SCOPE)
2834        );
2835    }
2836
2837    #[test]
2838    fn pipeline_level_permissions_contents_read_constrains_token() {
2839        // Map form with contents: read — should constrain.
2840        let yaml =
2841            "trigger: none\npermissions:\n  contents: read\nsteps:\n  - script: echo hello\n";
2842        let graph = parse(yaml);
2843        let token = graph
2844            .nodes_of_kind(NodeKind::Identity)
2845            .find(|n| n.name == "System.AccessToken")
2846            .expect("System.AccessToken must always be present");
2847        assert_eq!(
2848            token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
2849            Some("constrained"),
2850            "permissions: contents: read must constrain; got: {:?}",
2851            token.metadata.get(META_IDENTITY_SCOPE)
2852        );
2853    }
2854
2855    #[test]
2856    fn empty_pipeline_does_not_mark_partial_for_zero_steps() {
2857        // No top-level stages/jobs/steps at all — there's no carrier, so the
2858        // 0-step-nodes guard must NOT fire. A genuinely empty pipeline stays
2859        // Complete.
2860        let yaml = r#"
2861trigger:
2862  - main
2863"#;
2864        let graph = parse(yaml);
2865        let zero_step_gap = graph
2866            .completeness_gaps
2867            .iter()
2868            .any(|g| g.contains("0 step nodes"));
2869        assert!(
2870            !zero_step_gap,
2871            "no carrier means no 0-step gap reason; got: {:?}",
2872            graph.completeness_gaps
2873        );
2874    }
2875}
taudit_parse_ado/lib.rs

taudit_parse_ado/
lib.rs