Skip to main content

taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Azure DevOps YAML pipeline parser.
9pub struct AdoParser;
10
11impl PipelineParser for AdoParser {
12    fn platform(&self) -> &str {
13        "azure-devops"
14    }
15
16    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
17        let mut de = serde_yaml::Deserializer::from_str(content);
18        let doc = de
19            .next()
20            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
21        let pipeline: AdoPipeline = AdoPipeline::deserialize(doc)
22            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
23        let extra_docs = de.next().is_some();
24
25        let mut graph = AuthorityGraph::new(source.clone());
26        if extra_docs {
27            graph.mark_partial(
28                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
29            );
30        }
31
32        // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
33        let has_pr_trigger = pipeline.pr.is_some();
34        if has_pr_trigger {
35            graph.metadata.insert(META_TRIGGER.into(), "pr".into());
36        }
37
38        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
39
40        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
41        // Tagged implicit: ADO injects this token into every task by platform design;
42        // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
43        let mut meta = HashMap::new();
44        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
45        meta.insert(META_IMPLICIT.into(), "true".into());
46        let token_id = graph.add_node_with_metadata(
47            NodeKind::Identity,
48            "System.AccessToken",
49            TrustZone::FirstParty,
50            meta,
51        );
52
53        // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
54        process_pool(&pipeline.pool, &mut graph);
55
56        // Pipeline-level variable groups and named secrets.
57        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
58        // don't generate false-positive Secret nodes for plain config values.
59        let mut plain_vars: HashSet<String> = HashSet::new();
60        let pipeline_secret_ids = process_variables(
61            &pipeline.variables,
62            &mut graph,
63            &mut secret_ids,
64            "pipeline",
65            &mut plain_vars,
66        );
67
68        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
69        if let Some(ref stages) = pipeline.stages {
70            for stage in stages {
71                // Stage-level template reference — delegate and mark Partial
72                if let Some(ref tpl) = stage.template {
73                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
74                    add_template_delegation(stage_name, tpl, token_id, &mut graph);
75                    continue;
76                }
77
78                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
79                let stage_secret_ids = process_variables(
80                    &stage.variables,
81                    &mut graph,
82                    &mut secret_ids,
83                    &stage_name,
84                    &mut plain_vars,
85                );
86
87                for job in &stage.jobs {
88                    let job_name = job.effective_name();
89                    let job_secret_ids = process_variables(
90                        &job.variables,
91                        &mut graph,
92                        &mut secret_ids,
93                        &job_name,
94                        &mut plain_vars,
95                    );
96
97                    process_pool(&job.pool, &mut graph);
98
99                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
100                        .iter()
101                        .chain(&stage_secret_ids)
102                        .chain(&job_secret_ids)
103                        .copied()
104                        .collect();
105
106                    let steps_start = graph.nodes.len();
107
108                    process_steps(
109                        job.steps.as_deref().unwrap_or(&[]),
110                        &job_name,
111                        token_id,
112                        &all_secrets,
113                        &plain_vars,
114                        &mut graph,
115                        &mut secret_ids,
116                    );
117
118                    if let Some(ref tpl) = job.template {
119                        add_template_delegation(&job_name, tpl, token_id, &mut graph);
120                    }
121
122                    if job.has_environment_binding() {
123                        tag_job_steps_env_approval(&mut graph, steps_start);
124                    }
125                }
126            }
127        } else if let Some(ref jobs) = pipeline.jobs {
128            for job in jobs {
129                let job_name = job.effective_name();
130                let job_secret_ids = process_variables(
131                    &job.variables,
132                    &mut graph,
133                    &mut secret_ids,
134                    &job_name,
135                    &mut plain_vars,
136                );
137
138                process_pool(&job.pool, &mut graph);
139
140                let all_secrets: Vec<NodeId> = pipeline_secret_ids
141                    .iter()
142                    .chain(&job_secret_ids)
143                    .copied()
144                    .collect();
145
146                let steps_start = graph.nodes.len();
147
148                process_steps(
149                    job.steps.as_deref().unwrap_or(&[]),
150                    &job_name,
151                    token_id,
152                    &all_secrets,
153                    &plain_vars,
154                    &mut graph,
155                    &mut secret_ids,
156                );
157
158                if let Some(ref tpl) = job.template {
159                    add_template_delegation(&job_name, tpl, token_id, &mut graph);
160                }
161
162                if job.has_environment_binding() {
163                    tag_job_steps_env_approval(&mut graph, steps_start);
164                }
165            }
166        } else if let Some(ref steps) = pipeline.steps {
167            process_steps(
168                steps,
169                "pipeline",
170                token_id,
171                &pipeline_secret_ids,
172                &plain_vars,
173                &mut graph,
174                &mut secret_ids,
175            );
176        }
177
178        Ok(graph)
179    }
180}
181
182/// Process an ADO `pool:` block. ADO pools come in two shapes:
183///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
184///   - `pool: { name: my-pool }` (named pool — self-hosted)
185///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
186///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
187///
188/// Creates an Image node representing the agent environment. Self-hosted pools
189/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
190fn process_pool(pool: &Option<serde_yaml::Value>, graph: &mut AuthorityGraph) {
191    let Some(pool_val) = pool else {
192        return;
193    };
194
195    let (image_name, is_self_hosted) = match pool_val {
196        serde_yaml::Value::String(s) => (s.clone(), true),
197        serde_yaml::Value::Mapping(map) => {
198            let name = map.get("name").and_then(|v| v.as_str());
199            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
200            match (name, vm_image) {
201                (_, Some(vm)) => (vm.to_string(), false),
202                (Some(n), None) => (n.to_string(), true),
203                (None, None) => return,
204            }
205        }
206        _ => return,
207    };
208
209    let mut meta = HashMap::new();
210    if is_self_hosted {
211        meta.insert(META_SELF_HOSTED.into(), "true".into());
212    }
213    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
214}
215
216/// Tag every Step node added since `start_idx` with META_ENV_APPROVAL.
217/// Used after `process_steps` for a job whose `environment:` is configured —
218/// the environment binding indicates the job sits behind a manual approval
219/// gate, which is an isolation boundary that breaks automatic propagation.
220fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
221    for node in graph.nodes.iter_mut().skip(start_idx) {
222        if node.kind == NodeKind::Step {
223            node.metadata
224                .insert(META_ENV_APPROVAL.into(), "true".into());
225        }
226    }
227}
228
229/// Process a variable list, creating Secret nodes and returning their IDs.
230/// Returns IDs for secrets only (not variable groups, which are opaque).
231/// Populates `plain_vars` with the names of non-secret named variables so
232/// downstream `$(VAR)` scanning can skip them.
233fn process_variables(
234    variables: &Option<AdoVariables>,
235    graph: &mut AuthorityGraph,
236    cache: &mut HashMap<String, NodeId>,
237    scope: &str,
238    plain_vars: &mut HashSet<String>,
239) -> Vec<NodeId> {
240    let mut ids = Vec::new();
241
242    let vars = match variables.as_ref() {
243        Some(v) => v,
244        None => return ids,
245    };
246
247    for var in &vars.0 {
248        match var {
249            AdoVariable::Group { group } => {
250                // Skip template-expression group names like `${{ parameters.env }}`.
251                // We can't resolve them statically — mark Partial but don't create
252                // a misleading Secret node with the expression as its name.
253                if group.contains("${{") {
254                    graph.mark_partial(format!(
255                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
256                    ));
257                    continue;
258                }
259                let mut meta = HashMap::new();
260                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
261                let id = graph.add_node_with_metadata(
262                    NodeKind::Secret,
263                    group.as_str(),
264                    TrustZone::FirstParty,
265                    meta,
266                );
267                cache.insert(group.clone(), id);
268                ids.push(id);
269                graph.mark_partial(format!(
270                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
271                ));
272            }
273            AdoVariable::Named {
274                name, is_secret, ..
275            } => {
276                if *is_secret {
277                    let id = find_or_create_secret(graph, cache, name);
278                    ids.push(id);
279                } else {
280                    plain_vars.insert(name.clone());
281                }
282            }
283        }
284    }
285
286    ids
287}
288
289/// Process a list of ADO steps, adding nodes and edges to the graph.
290fn process_steps(
291    steps: &[AdoStep],
292    job_name: &str,
293    token_id: NodeId,
294    inherited_secrets: &[NodeId],
295    plain_vars: &HashSet<String>,
296    graph: &mut AuthorityGraph,
297    cache: &mut HashMap<String, NodeId>,
298) {
299    for (idx, step) in steps.iter().enumerate() {
300        // Template step — delegation, mark partial
301        if let Some(ref tpl) = step.template {
302            let step_name = step
303                .display_name
304                .as_deref()
305                .or(step.name.as_deref())
306                .map(|s| s.to_string())
307                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
308            add_template_delegation(&step_name, tpl, token_id, graph);
309            continue;
310        }
311
312        // Determine step kind and trust zone
313        let (step_name, trust_zone, inline_script) = classify_step(step, job_name, idx);
314
315        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
316
317        // Every step has access to System.AccessToken
318        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
319
320        // checkout step with persistCredentials: true writes the token to .git/config on disk,
321        // making it accessible to all subsequent steps and filesystem-level attackers.
322        if step.checkout.is_some() && step.persist_credentials == Some(true) {
323            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
324        }
325
326        // `checkout: self` pulls the repo being built. In a PR trigger context this
327        // is the untrusted fork head — tag the step so downstream rules can gate on
328        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
329        if let Some(ref ck) = step.checkout {
330            if ck == "self" {
331                if let Some(node) = graph.nodes.get_mut(step_id) {
332                    node.metadata
333                        .insert(META_CHECKOUT_SELF.into(), "true".into());
334                }
335            }
336        }
337
338        // Inherited pipeline/stage/job secrets
339        for &secret_id in inherited_secrets {
340            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
341        }
342
343        // Service connection detection from task inputs (case-insensitive key match)
344        if let Some(ref inputs) = step.inputs {
345            let service_conn_keys = [
346                "azuresubscription",
347                "connectedservicename",
348                "connectedservicenamearm",
349                "kubernetesserviceconnection",
350            ];
351            for (raw_key, val) in inputs {
352                let lower = raw_key.to_lowercase();
353                if !service_conn_keys.contains(&lower.as_str()) {
354                    continue;
355                }
356                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
357                if !conn_name.starts_with("$(") {
358                    let mut meta = HashMap::new();
359                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
360                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
361                    // ADO service connections are the platform's federated-identity equivalent
362                    // (modern Azure service connections use workload identity federation /
363                    // OIDC). Tag them so uplift_without_attestation treats ADO pipelines with
364                    // the same OIDC-parity logic applied to GHA.
365                    meta.insert(META_OIDC.into(), "true".into());
366                    let conn_id = graph.add_node_with_metadata(
367                        NodeKind::Identity,
368                        conn_name,
369                        TrustZone::FirstParty,
370                        meta,
371                    );
372                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
373                }
374            }
375
376            // Detect $(varName) references in task input values
377            for val in inputs.values() {
378                if let Some(s) = yaml_value_as_str(val) {
379                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
380                }
381            }
382        }
383
384        // Detect $(varName) in step env values
385        if let Some(ref env) = step.env {
386            for val in env.values() {
387                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
388            }
389        }
390
391        // Detect $(varName) in inline script text
392        if let Some(ref script) = inline_script {
393            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
394        }
395
396        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines
397        if let Some(ref script) = inline_script {
398            let lower = script.to_lowercase();
399            if lower.contains("##vso[task.setvariable") {
400                if let Some(node) = graph.nodes.get_mut(step_id) {
401                    node.metadata
402                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
403                }
404            }
405        }
406    }
407}
408
409/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
410fn classify_step(
411    step: &AdoStep,
412    job_name: &str,
413    idx: usize,
414) -> (String, TrustZone, Option<String>) {
415    let default_name = || format!("{job_name}[{idx}]");
416
417    let name = step
418        .display_name
419        .as_deref()
420        .or(step.name.as_deref())
421        .map(|s| s.to_string())
422        .unwrap_or_else(default_name);
423
424    if step.task.is_some() {
425        (name, TrustZone::Untrusted, None)
426    } else if let Some(ref s) = step.script {
427        (name, TrustZone::FirstParty, Some(s.clone()))
428    } else if let Some(ref s) = step.bash {
429        (name, TrustZone::FirstParty, Some(s.clone()))
430    } else if let Some(ref s) = step.powershell {
431        (name, TrustZone::FirstParty, Some(s.clone()))
432    } else if let Some(ref s) = step.pwsh {
433        (name, TrustZone::FirstParty, Some(s.clone()))
434    } else {
435        (name, TrustZone::FirstParty, None)
436    }
437}
438
439/// Add a DelegatesTo edge from a synthetic step node to a template image node.
440///
441/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
442/// pull code from an external repository and are Untrusted. Plain relative paths like
443/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
444/// treats `./local-action`.
445fn add_template_delegation(
446    step_name: &str,
447    template_path: &str,
448    token_id: NodeId,
449    graph: &mut AuthorityGraph,
450) {
451    let tpl_trust_zone = if template_path.contains('@') {
452        TrustZone::Untrusted
453    } else {
454        TrustZone::FirstParty
455    };
456    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
457    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
458    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
459    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
460    graph.mark_partial(format!(
461        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
462    ));
463}
464
465/// Extract `$(varName)` references from a string, creating Secret nodes for
466/// non-predefined and non-plain ADO variables.
467/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
468/// is treated as a variable reference. This rejects PowerShell sub-expressions
469/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
470/// and anything with spaces or special characters.
471fn extract_dollar_paren_secrets(
472    text: &str,
473    step_id: NodeId,
474    plain_vars: &HashSet<String>,
475    graph: &mut AuthorityGraph,
476    cache: &mut HashMap<String, NodeId>,
477) {
478    let mut pos = 0;
479    let bytes = text.as_bytes();
480    while pos < bytes.len() {
481        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
482            let start = pos + 2;
483            if let Some(end_offset) = text[start..].find(')') {
484                let var_name = &text[start..start + end_offset];
485                if is_valid_ado_identifier(var_name)
486                    && !is_predefined_ado_var(var_name)
487                    && !plain_vars.contains(var_name)
488                {
489                    let id = find_or_create_secret(graph, cache, var_name);
490                    // Mark secrets embedded in -var flag arguments: their values appear in
491                    // pipeline logs (command string is logged before masking, and Terraform
492                    // itself logs -var values in plan output and debug traces).
493                    if is_in_terraform_var_flag(text, pos) {
494                        if let Some(node) = graph.nodes.get_mut(id) {
495                            node.metadata
496                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
497                        }
498                    }
499                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
500                }
501                pos = start + end_offset + 1;
502                continue;
503            }
504        }
505        pos += 1;
506    }
507}
508
509/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
510/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
511fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
512    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
513    let line_before = &text[line_start..var_pos];
514    // Must contain -var (the flag) and = (the key=value assignment)
515    line_before.contains("-var") && line_before.contains('=')
516}
517
518/// Returns true if `name` is a valid ADO variable identifier.
519/// ADO variable names start with a letter and contain only letters, digits,
520/// and underscores. Anything else — PowerShell vars (`$name`), template
521/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
522/// (`name -join ','`) — is rejected.
523fn is_valid_ado_identifier(name: &str) -> bool {
524    let mut chars = name.chars();
525    match chars.next() {
526        Some(first) if first.is_ascii_alphabetic() => {
527            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
528        }
529        _ => false,
530    }
531}
532
533/// Returns true if a variable name is a well-known ADO predefined variable.
534/// These are system-provided and never represent secrets.
535fn is_predefined_ado_var(name: &str) -> bool {
536    let prefixes = [
537        "Build.",
538        "Agent.",
539        "System.",
540        "Pipeline.",
541        "Release.",
542        "Environment.",
543        "Strategy.",
544        "Deployment.",
545        "Resources.",
546        "TF_BUILD",
547    ];
548    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
549}
550
551fn find_or_create_secret(
552    graph: &mut AuthorityGraph,
553    cache: &mut HashMap<String, NodeId>,
554    name: &str,
555) -> NodeId {
556    if let Some(&id) = cache.get(name) {
557        return id;
558    }
559    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
560    cache.insert(name.to_string(), id);
561    id
562}
563
564fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
565    val.as_str()
566}
567
568// ── Serde models for ADO YAML ─────────────────────────────
569
570/// Top-level ADO pipeline definition.
571/// ADO pipelines come in three shapes:
572///   (a) stages → jobs → steps
573///   (b) jobs → steps (no stages key)
574///   (c) steps only (no stages or jobs key)
575#[derive(Debug, Deserialize)]
576pub struct AdoPipeline {
577    #[serde(default)]
578    pub trigger: Option<serde_yaml::Value>,
579    #[serde(default)]
580    pub pr: Option<serde_yaml::Value>,
581    #[serde(default)]
582    pub variables: Option<AdoVariables>,
583    #[serde(default)]
584    pub stages: Option<Vec<AdoStage>>,
585    #[serde(default)]
586    pub jobs: Option<Vec<AdoJob>>,
587    #[serde(default)]
588    pub steps: Option<Vec<AdoStep>>,
589    #[serde(default)]
590    pub pool: Option<serde_yaml::Value>,
591}
592
593#[derive(Debug, Deserialize)]
594pub struct AdoStage {
595    /// Stage identifier. Absent when the stage entry is a template reference.
596    #[serde(default)]
597    pub stage: Option<String>,
598    /// Stage-level template reference (`- template: path/to/stage.yml`).
599    #[serde(default)]
600    pub template: Option<String>,
601    #[serde(default)]
602    pub variables: Option<AdoVariables>,
603    #[serde(default)]
604    pub jobs: Vec<AdoJob>,
605}
606
607#[derive(Debug, Deserialize)]
608pub struct AdoJob {
609    /// Regular job identifier
610    #[serde(default)]
611    pub job: Option<String>,
612    /// Deployment job identifier
613    #[serde(default)]
614    pub deployment: Option<String>,
615    #[serde(default)]
616    pub variables: Option<AdoVariables>,
617    #[serde(default)]
618    pub steps: Option<Vec<AdoStep>>,
619    #[serde(default)]
620    pub pool: Option<serde_yaml::Value>,
621    /// Job-level template reference
622    #[serde(default)]
623    pub template: Option<String>,
624    /// Deployment-job environment binding. Two YAML shapes:
625    ///
626    ///   - `environment: production` (string shorthand)
627    ///   - `environment: { name: staging, resourceType: VirtualMachine }` (mapping)
628    ///
629    /// When present, the environment may have approvals/checks attached in ADO's
630    /// environment configuration. Approvals are a manual gate — authority cannot
631    /// propagate past one without human intervention. We treat any `environment:`
632    /// binding as an approval candidate and tag the job's steps so propagation
633    /// rules can downgrade severity. (We can't see the approval config from YAML
634    /// alone; the binding is the strongest signal available at parse time.)
635    #[serde(default)]
636    pub environment: Option<serde_yaml::Value>,
637}
638
639impl AdoJob {
640    pub fn effective_name(&self) -> String {
641        self.job
642            .as_deref()
643            .or(self.deployment.as_deref())
644            .unwrap_or("job")
645            .to_string()
646    }
647
648    /// Returns true when the job is bound to an `environment:` — either the
649    /// string form (`environment: production`) or the mapping form with a
650    /// non-empty `name:` field. An empty mapping or empty string is ignored.
651    pub fn has_environment_binding(&self) -> bool {
652        match self.environment.as_ref() {
653            None => false,
654            Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
655            Some(serde_yaml::Value::Mapping(m)) => m
656                .get("name")
657                .and_then(|v| v.as_str())
658                .map(|s| !s.trim().is_empty())
659                .unwrap_or(false),
660            _ => false,
661        }
662    }
663}
664
665#[derive(Debug, Deserialize)]
666pub struct AdoStep {
667    /// Task reference e.g. `AzureCLI@2`
668    #[serde(default)]
669    pub task: Option<String>,
670    /// Inline script (cmd/sh)
671    #[serde(default)]
672    pub script: Option<String>,
673    /// Inline bash script
674    #[serde(default)]
675    pub bash: Option<String>,
676    /// Inline PowerShell script
677    #[serde(default)]
678    pub powershell: Option<String>,
679    /// Cross-platform PowerShell
680    #[serde(default)]
681    pub pwsh: Option<String>,
682    /// Step-level template reference
683    #[serde(default)]
684    pub template: Option<String>,
685    #[serde(rename = "displayName", default)]
686    pub display_name: Option<String>,
687    /// Legacy name alias
688    #[serde(default)]
689    pub name: Option<String>,
690    #[serde(default)]
691    pub env: Option<HashMap<String, String>>,
692    /// Task inputs (key → value, but values may be nested)
693    #[serde(default)]
694    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
695    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
696    #[serde(default)]
697    pub checkout: Option<String>,
698    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
699    #[serde(rename = "persistCredentials", default)]
700    pub persist_credentials: Option<bool>,
701}
702
703/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
704/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
705#[derive(Debug, Default)]
706pub struct AdoVariables(pub Vec<AdoVariable>);
707
708impl<'de> serde::Deserialize<'de> for AdoVariables {
709    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
710    where
711        D: serde::Deserializer<'de>,
712    {
713        let raw = serde_yaml::Value::deserialize(deserializer)?;
714        let mut vars = Vec::new();
715
716        match raw {
717            serde_yaml::Value::Sequence(seq) => {
718                for item in seq {
719                    if let Some(map) = item.as_mapping() {
720                        if let Some(group_val) = map.get("group") {
721                            if let Some(group) = group_val.as_str() {
722                                vars.push(AdoVariable::Group {
723                                    group: group.to_string(),
724                                });
725                                continue;
726                            }
727                        }
728                        let name = map
729                            .get("name")
730                            .and_then(|v| v.as_str())
731                            .unwrap_or("")
732                            .to_string();
733                        let value = map
734                            .get("value")
735                            .and_then(|v| v.as_str())
736                            .unwrap_or("")
737                            .to_string();
738                        let is_secret = map
739                            .get("isSecret")
740                            .and_then(|v| v.as_bool())
741                            .unwrap_or(false);
742                        vars.push(AdoVariable::Named {
743                            name,
744                            value,
745                            is_secret,
746                        });
747                    }
748                }
749            }
750            serde_yaml::Value::Mapping(map) => {
751                for (k, v) in map {
752                    let name = k.as_str().unwrap_or("").to_string();
753                    let value = v.as_str().unwrap_or("").to_string();
754                    vars.push(AdoVariable::Named {
755                        name,
756                        value,
757                        is_secret: false,
758                    });
759                }
760            }
761            _ => {}
762        }
763
764        Ok(AdoVariables(vars))
765    }
766}
767
768#[derive(Debug)]
769pub enum AdoVariable {
770    Group {
771        group: String,
772    },
773    Named {
774        name: String,
775        value: String,
776        is_secret: bool,
777    },
778}
779
780#[cfg(test)]
781mod tests {
782    use super::*;
783
784    fn parse(yaml: &str) -> AuthorityGraph {
785        let parser = AdoParser;
786        let source = PipelineSource {
787            file: "azure-pipelines.yml".into(),
788            repo: None,
789            git_ref: None,
790        };
791        parser.parse(yaml, &source).unwrap()
792    }
793
794    #[test]
795    fn parses_simple_pipeline() {
796        let yaml = r#"
797trigger:
798  - main
799
800jobs:
801  - job: Build
802    steps:
803      - script: echo hello
804        displayName: Say hello
805"#;
806        let graph = parse(yaml);
807        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
808    }
809
810    #[test]
811    fn system_access_token_created() {
812        let yaml = r#"
813steps:
814  - script: echo hi
815"#;
816        let graph = parse(yaml);
817        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
818        assert_eq!(identities.len(), 1);
819        assert_eq!(identities[0].name, "System.AccessToken");
820        assert_eq!(
821            identities[0].metadata.get(META_IDENTITY_SCOPE),
822            Some(&"broad".to_string())
823        );
824    }
825
826    #[test]
827    fn variable_group_creates_secret_and_marks_partial() {
828        let yaml = r#"
829variables:
830  - group: MySecretGroup
831
832steps:
833  - script: echo hi
834"#;
835        let graph = parse(yaml);
836        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
837        assert_eq!(secrets.len(), 1);
838        assert_eq!(secrets[0].name, "MySecretGroup");
839        assert_eq!(
840            secrets[0].metadata.get(META_VARIABLE_GROUP),
841            Some(&"true".to_string())
842        );
843        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
844        assert!(
845            graph
846                .completeness_gaps
847                .iter()
848                .any(|g| g.contains("MySecretGroup")),
849            "completeness gap should name the variable group"
850        );
851    }
852
853    #[test]
854    fn task_with_azure_subscription_creates_service_connection_identity() {
855        let yaml = r#"
856steps:
857  - task: AzureCLI@2
858    displayName: Deploy to Azure
859    inputs:
860      azureSubscription: MyServiceConnection
861      scriptType: bash
862      inlineScript: az group list
863"#;
864        let graph = parse(yaml);
865        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
866        // System.AccessToken + service connection
867        assert_eq!(identities.len(), 2);
868        let conn = identities
869            .iter()
870            .find(|i| i.name == "MyServiceConnection")
871            .unwrap();
872        assert_eq!(
873            conn.metadata.get(META_SERVICE_CONNECTION),
874            Some(&"true".to_string())
875        );
876        assert_eq!(
877            conn.metadata.get(META_IDENTITY_SCOPE),
878            Some(&"broad".to_string())
879        );
880    }
881
882    #[test]
883    fn task_with_connected_service_name_creates_identity() {
884        let yaml = r#"
885steps:
886  - task: SqlAzureDacpacDeployment@1
887    inputs:
888      ConnectedServiceNameARM: MySqlConnection
889"#;
890        let graph = parse(yaml);
891        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
892        assert!(
893            identities.iter().any(|i| i.name == "MySqlConnection"),
894            "connectedServiceNameARM should create identity"
895        );
896    }
897
898    #[test]
899    fn script_step_classified_as_first_party() {
900        let yaml = r#"
901steps:
902  - script: echo hi
903    displayName: Say hi
904"#;
905        let graph = parse(yaml);
906        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
907        assert_eq!(steps.len(), 1);
908        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
909    }
910
911    #[test]
912    fn bash_step_classified_as_first_party() {
913        let yaml = r#"
914steps:
915  - bash: echo hi
916"#;
917        let graph = parse(yaml);
918        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
919        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
920    }
921
922    #[test]
923    fn task_step_classified_as_untrusted() {
924        let yaml = r#"
925steps:
926  - task: DotNetCoreCLI@2
927    inputs:
928      command: build
929"#;
930        let graph = parse(yaml);
931        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
932        assert_eq!(steps.len(), 1);
933        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
934    }
935
936    #[test]
937    fn dollar_paren_var_in_script_creates_secret() {
938        let yaml = r#"
939steps:
940  - script: |
941      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
942    displayName: Call API
943"#;
944        let graph = parse(yaml);
945        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
946        assert_eq!(secrets.len(), 1);
947        assert_eq!(secrets[0].name, "MY_API_TOKEN");
948    }
949
950    #[test]
951    fn predefined_ado_var_not_treated_as_secret() {
952        let yaml = r#"
953steps:
954  - script: |
955      echo $(Build.BuildId)
956      echo $(Agent.WorkFolder)
957      echo $(System.DefaultWorkingDirectory)
958    displayName: Print vars
959"#;
960        let graph = parse(yaml);
961        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
962        assert!(
963            secrets.is_empty(),
964            "predefined ADO vars should not be treated as secrets, got: {:?}",
965            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
966        );
967    }
968
969    #[test]
970    fn template_reference_creates_delegates_to_and_marks_partial() {
971        let yaml = r#"
972steps:
973  - template: steps/deploy.yml
974    parameters:
975      env: production
976"#;
977        let graph = parse(yaml);
978        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
979        assert_eq!(steps.len(), 1);
980
981        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
982        assert_eq!(images.len(), 1);
983        assert_eq!(images[0].name, "steps/deploy.yml");
984
985        let delegates: Vec<_> = graph
986            .edges_from(steps[0].id)
987            .filter(|e| e.kind == EdgeKind::DelegatesTo)
988            .collect();
989        assert_eq!(delegates.len(), 1);
990
991        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
992    }
993
994    #[test]
995    fn top_level_steps_no_jobs() {
996        let yaml = r#"
997steps:
998  - script: echo a
999  - script: echo b
1000"#;
1001        let graph = parse(yaml);
1002        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1003        assert_eq!(steps.len(), 2);
1004    }
1005
1006    #[test]
1007    fn top_level_jobs_no_stages() {
1008        let yaml = r#"
1009jobs:
1010  - job: JobA
1011    steps:
1012      - script: echo a
1013  - job: JobB
1014    steps:
1015      - script: echo b
1016"#;
1017        let graph = parse(yaml);
1018        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1019        assert_eq!(steps.len(), 2);
1020    }
1021
1022    #[test]
1023    fn stages_with_nested_jobs_parsed() {
1024        let yaml = r#"
1025stages:
1026  - stage: Build
1027    jobs:
1028      - job: Compile
1029        steps:
1030          - script: cargo build
1031  - stage: Test
1032    jobs:
1033      - job: UnitTest
1034        steps:
1035          - script: cargo test
1036"#;
1037        let graph = parse(yaml);
1038        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1039        assert_eq!(steps.len(), 2);
1040    }
1041
1042    #[test]
1043    fn all_steps_linked_to_system_access_token() {
1044        let yaml = r#"
1045steps:
1046  - script: echo a
1047  - task: SomeTask@1
1048    inputs: {}
1049"#;
1050        let graph = parse(yaml);
1051        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
1052        assert_eq!(token.len(), 1);
1053        let token_id = token[0].id;
1054
1055        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1056        for step in &steps {
1057            let links: Vec<_> = graph
1058                .edges_from(step.id)
1059                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
1060                .collect();
1061            assert_eq!(
1062                links.len(),
1063                1,
1064                "step '{}' must link to System.AccessToken",
1065                step.name
1066            );
1067        }
1068    }
1069
1070    #[test]
1071    fn named_secret_variable_creates_secret_node() {
1072        let yaml = r#"
1073variables:
1074  - name: MY_PASSWORD
1075    value: dummy
1076    isSecret: true
1077
1078steps:
1079  - script: echo hi
1080"#;
1081        let graph = parse(yaml);
1082        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1083        assert_eq!(secrets.len(), 1);
1084        assert_eq!(secrets[0].name, "MY_PASSWORD");
1085    }
1086
1087    #[test]
1088    fn variables_as_mapping_parsed() {
1089        let yaml = r#"
1090variables:
1091  MY_VAR: hello
1092  ANOTHER_VAR: world
1093
1094steps:
1095  - script: echo hi
1096"#;
1097        let graph = parse(yaml);
1098        // Mapping-style variables without isSecret — no secret nodes created
1099        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1100        assert!(
1101            secrets.is_empty(),
1102            "plain mapping vars should not create secret nodes"
1103        );
1104    }
1105
1106    #[test]
1107    fn persist_credentials_creates_persists_to_edge() {
1108        let yaml = r#"
1109steps:
1110  - checkout: self
1111    persistCredentials: true
1112  - script: git push
1113"#;
1114        let graph = parse(yaml);
1115        let token_id = graph
1116            .nodes_of_kind(NodeKind::Identity)
1117            .find(|n| n.name == "System.AccessToken")
1118            .expect("System.AccessToken must exist")
1119            .id;
1120
1121        let persists_edges: Vec<_> = graph
1122            .edges
1123            .iter()
1124            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
1125            .collect();
1126        assert_eq!(
1127            persists_edges.len(),
1128            1,
1129            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
1130        );
1131    }
1132
1133    #[test]
1134    fn checkout_without_persist_credentials_no_persists_to_edge() {
1135        let yaml = r#"
1136steps:
1137  - checkout: self
1138  - script: echo hi
1139"#;
1140        let graph = parse(yaml);
1141        let persists_edges: Vec<_> = graph
1142            .edges
1143            .iter()
1144            .filter(|e| e.kind == EdgeKind::PersistsTo)
1145            .collect();
1146        assert!(
1147            persists_edges.is_empty(),
1148            "checkout without persistCredentials should not produce PersistsTo edge"
1149        );
1150    }
1151
1152    #[test]
1153    fn var_flag_secret_marked_as_cli_flag_exposed() {
1154        let yaml = r#"
1155steps:
1156  - script: |
1157      terraform apply \
1158        -var "db_password=$(db_password)" \
1159        -var "api_key=$(api_key)"
1160    displayName: Terraform apply
1161"#;
1162        let graph = parse(yaml);
1163        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1164        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
1165        for secret in &secrets {
1166            assert_eq!(
1167                secret.metadata.get(META_CLI_FLAG_EXPOSED),
1168                Some(&"true".to_string()),
1169                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
1170                secret.name
1171            );
1172        }
1173    }
1174
1175    #[test]
1176    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
1177        let yaml = r#"
1178steps:
1179  - script: |
1180      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
1181"#;
1182        let graph = parse(yaml);
1183        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1184        assert_eq!(secrets.len(), 1);
1185        assert!(
1186            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
1187            "non -var secret should not be marked as cli_flag_exposed"
1188        );
1189    }
1190
1191    #[test]
1192    fn step_linked_to_variable_group_secret() {
1193        let yaml = r#"
1194variables:
1195  - group: ProdSecrets
1196
1197steps:
1198  - script: deploy.sh
1199"#;
1200        let graph = parse(yaml);
1201        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1202        assert_eq!(secrets.len(), 1);
1203        let secret_id = secrets[0].id;
1204
1205        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1206        let links: Vec<_> = graph
1207            .edges_from(steps[0].id)
1208            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
1209            .collect();
1210        assert_eq!(
1211            links.len(),
1212            1,
1213            "step should be linked to variable group secret"
1214        );
1215    }
1216
1217    #[test]
1218    fn pr_trigger_sets_meta_trigger_on_graph() {
1219        let yaml = r#"
1220pr:
1221  - '*'
1222
1223steps:
1224  - script: echo hi
1225"#;
1226        let graph = parse(yaml);
1227        assert_eq!(
1228            graph.metadata.get(META_TRIGGER),
1229            Some(&"pr".to_string()),
1230            "ADO pr: trigger should set graph META_TRIGGER"
1231        );
1232    }
1233
1234    #[test]
1235    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
1236        let yaml = r#"
1237pool:
1238  name: my-self-hosted-pool
1239
1240steps:
1241  - script: echo hi
1242"#;
1243        let graph = parse(yaml);
1244        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1245        assert_eq!(images.len(), 1);
1246        assert_eq!(images[0].name, "my-self-hosted-pool");
1247        assert_eq!(
1248            images[0].metadata.get(META_SELF_HOSTED),
1249            Some(&"true".to_string()),
1250            "pool.name without vmImage must be tagged self-hosted"
1251        );
1252    }
1253
1254    #[test]
1255    fn vm_image_pool_is_not_tagged_self_hosted() {
1256        let yaml = r#"
1257pool:
1258  vmImage: ubuntu-latest
1259
1260steps:
1261  - script: echo hi
1262"#;
1263        let graph = parse(yaml);
1264        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1265        assert_eq!(images.len(), 1);
1266        assert_eq!(images[0].name, "ubuntu-latest");
1267        assert!(
1268            !images[0].metadata.contains_key(META_SELF_HOSTED),
1269            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
1270        );
1271    }
1272
1273    #[test]
1274    fn checkout_self_step_tagged_with_meta_checkout_self() {
1275        let yaml = r#"
1276steps:
1277  - checkout: self
1278  - script: echo hi
1279"#;
1280        let graph = parse(yaml);
1281        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1282        assert_eq!(steps.len(), 2);
1283        let checkout_step = steps
1284            .iter()
1285            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
1286            .expect("one step must be tagged META_CHECKOUT_SELF");
1287        assert_eq!(
1288            checkout_step.metadata.get(META_CHECKOUT_SELF),
1289            Some(&"true".to_string())
1290        );
1291    }
1292
1293    #[test]
1294    fn vso_setvariable_sets_meta_writes_env_gate() {
1295        let yaml = r###"
1296steps:
1297  - script: |
1298      echo "##vso[task.setvariable variable=FOO]bar"
1299    displayName: Set variable
1300"###;
1301        let graph = parse(yaml);
1302        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1303        assert_eq!(steps.len(), 1);
1304        assert_eq!(
1305            steps[0].metadata.get(META_WRITES_ENV_GATE),
1306            Some(&"true".to_string()),
1307            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
1308        );
1309    }
1310
1311    #[test]
1312    fn environment_key_tags_job_with_env_approval() {
1313        // String form: `environment: production`
1314        let yaml_string_form = r#"
1315jobs:
1316  - deployment: DeployWeb
1317    environment: production
1318    steps:
1319      - script: echo deploying
1320        displayName: Deploy
1321"#;
1322        let g1 = parse(yaml_string_form);
1323        let tagged: Vec<_> = g1
1324            .nodes_of_kind(NodeKind::Step)
1325            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
1326            .collect();
1327        assert!(
1328            !tagged.is_empty(),
1329            "string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
1330        );
1331
1332        // Mapping form: `environment: { name: staging }`
1333        let yaml_mapping_form = r#"
1334jobs:
1335  - deployment: DeployAPI
1336    environment:
1337      name: staging
1338      resourceType: VirtualMachine
1339    steps:
1340      - script: echo deploying
1341        displayName: Deploy
1342"#;
1343        let g2 = parse(yaml_mapping_form);
1344        let tagged2: Vec<_> = g2
1345            .nodes_of_kind(NodeKind::Step)
1346            .filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
1347            .collect();
1348        assert!(
1349            !tagged2.is_empty(),
1350            "mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
1351        );
1352
1353        // Negative: a job with no `environment:` must not be tagged
1354        let yaml_no_env = r#"
1355jobs:
1356  - job: Build
1357    steps:
1358      - script: echo building
1359"#;
1360        let g3 = parse(yaml_no_env);
1361        let any_tagged = g3
1362            .nodes_of_kind(NodeKind::Step)
1363            .any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
1364        assert!(
1365            !any_tagged,
1366            "jobs without `environment:` must not carry META_ENV_APPROVAL"
1367        );
1368    }
1369
1370    #[test]
1371    fn environment_tag_isolated_to_gated_job_only() {
1372        // Two jobs side by side: only the deployment job has environment.
1373        // Steps from the non-gated job must NOT be tagged.
1374        let yaml = r#"
1375jobs:
1376  - job: Build
1377    steps:
1378      - script: echo build
1379        displayName: build-step
1380  - deployment: DeployProd
1381    environment: production
1382    steps:
1383      - script: echo deploy
1384        displayName: deploy-step
1385"#;
1386        let g = parse(yaml);
1387        let build_step = g
1388            .nodes_of_kind(NodeKind::Step)
1389            .find(|s| s.name == "build-step")
1390            .expect("build-step must exist");
1391        let deploy_step = g
1392            .nodes_of_kind(NodeKind::Step)
1393            .find(|s| s.name == "deploy-step")
1394            .expect("deploy-step must exist");
1395        assert!(
1396            !build_step.metadata.contains_key(META_ENV_APPROVAL),
1397            "non-gated job's step must not be tagged"
1398        );
1399        assert_eq!(
1400            deploy_step.metadata.get(META_ENV_APPROVAL),
1401            Some(&"true".to_string()),
1402            "gated deployment job's step must be tagged"
1403        );
1404    }
1405}