Skip to main content

taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Azure DevOps YAML pipeline parser.
9pub struct AdoParser;
10
11impl PipelineParser for AdoParser {
12    fn platform(&self) -> &str {
13        "azure-devops"
14    }
15
16    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
17        let mut de = serde_yaml::Deserializer::from_str(content);
18        let doc = de
19            .next()
20            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
21        let pipeline: AdoPipeline = AdoPipeline::deserialize(doc)
22            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
23        let extra_docs = de.next().is_some();
24
25        let mut graph = AuthorityGraph::new(source.clone());
26        if extra_docs {
27            graph.mark_partial(
28                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
29            );
30        }
31
32        // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
33        let has_pr_trigger = pipeline.pr.is_some();
34        if has_pr_trigger {
35            graph.metadata.insert(META_TRIGGER.into(), "pr".into());
36        }
37
38        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
39
40        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
41        // Tagged implicit: ADO injects this token into every task by platform design;
42        // its exposure to marketplace tasks is structural, not a fixable misconfiguration.
43        let mut meta = HashMap::new();
44        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
45        meta.insert(META_IMPLICIT.into(), "true".into());
46        let token_id = graph.add_node_with_metadata(
47            NodeKind::Identity,
48            "System.AccessToken",
49            TrustZone::FirstParty,
50            meta,
51        );
52
53        // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
54        process_pool(&pipeline.pool, &mut graph);
55
56        // Pipeline-level variable groups and named secrets.
57        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
58        // don't generate false-positive Secret nodes for plain config values.
59        let mut plain_vars: HashSet<String> = HashSet::new();
60        let pipeline_secret_ids = process_variables(
61            &pipeline.variables,
62            &mut graph,
63            &mut secret_ids,
64            "pipeline",
65            &mut plain_vars,
66        );
67
68        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
69        if let Some(ref stages) = pipeline.stages {
70            for stage in stages {
71                // Stage-level template reference — delegate and mark Partial
72                if let Some(ref tpl) = stage.template {
73                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
74                    add_template_delegation(stage_name, tpl, token_id, &mut graph);
75                    continue;
76                }
77
78                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
79                let stage_secret_ids = process_variables(
80                    &stage.variables,
81                    &mut graph,
82                    &mut secret_ids,
83                    &stage_name,
84                    &mut plain_vars,
85                );
86
87                for job in &stage.jobs {
88                    let job_name = job.effective_name();
89                    let job_secret_ids = process_variables(
90                        &job.variables,
91                        &mut graph,
92                        &mut secret_ids,
93                        &job_name,
94                        &mut plain_vars,
95                    );
96
97                    process_pool(&job.pool, &mut graph);
98
99                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
100                        .iter()
101                        .chain(&stage_secret_ids)
102                        .chain(&job_secret_ids)
103                        .copied()
104                        .collect();
105
106                    process_steps(
107                        job.steps.as_deref().unwrap_or(&[]),
108                        &job_name,
109                        token_id,
110                        &all_secrets,
111                        &plain_vars,
112                        &mut graph,
113                        &mut secret_ids,
114                    );
115
116                    if let Some(ref tpl) = job.template {
117                        add_template_delegation(&job_name, tpl, token_id, &mut graph);
118                    }
119                }
120            }
121        } else if let Some(ref jobs) = pipeline.jobs {
122            for job in jobs {
123                let job_name = job.effective_name();
124                let job_secret_ids = process_variables(
125                    &job.variables,
126                    &mut graph,
127                    &mut secret_ids,
128                    &job_name,
129                    &mut plain_vars,
130                );
131
132                process_pool(&job.pool, &mut graph);
133
134                let all_secrets: Vec<NodeId> = pipeline_secret_ids
135                    .iter()
136                    .chain(&job_secret_ids)
137                    .copied()
138                    .collect();
139
140                process_steps(
141                    job.steps.as_deref().unwrap_or(&[]),
142                    &job_name,
143                    token_id,
144                    &all_secrets,
145                    &plain_vars,
146                    &mut graph,
147                    &mut secret_ids,
148                );
149
150                if let Some(ref tpl) = job.template {
151                    add_template_delegation(&job_name, tpl, token_id, &mut graph);
152                }
153            }
154        } else if let Some(ref steps) = pipeline.steps {
155            process_steps(
156                steps,
157                "pipeline",
158                token_id,
159                &pipeline_secret_ids,
160                &plain_vars,
161                &mut graph,
162                &mut secret_ids,
163            );
164        }
165
166        Ok(graph)
167    }
168}
169
170/// Process an ADO `pool:` block. ADO pools come in two shapes:
171///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
172///   - `pool: { name: my-pool }` (named pool — self-hosted)
173///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
174///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
175///
176/// Creates an Image node representing the agent environment. Self-hosted pools
177/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
178fn process_pool(pool: &Option<serde_yaml::Value>, graph: &mut AuthorityGraph) {
179    let Some(pool_val) = pool else {
180        return;
181    };
182
183    let (image_name, is_self_hosted) = match pool_val {
184        serde_yaml::Value::String(s) => (s.clone(), true),
185        serde_yaml::Value::Mapping(map) => {
186            let name = map.get("name").and_then(|v| v.as_str());
187            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
188            match (name, vm_image) {
189                (_, Some(vm)) => (vm.to_string(), false),
190                (Some(n), None) => (n.to_string(), true),
191                (None, None) => return,
192            }
193        }
194        _ => return,
195    };
196
197    let mut meta = HashMap::new();
198    if is_self_hosted {
199        meta.insert(META_SELF_HOSTED.into(), "true".into());
200    }
201    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
202}
203
204/// Process a variable list, creating Secret nodes and returning their IDs.
205/// Returns IDs for secrets only (not variable groups, which are opaque).
206/// Populates `plain_vars` with the names of non-secret named variables so
207/// downstream `$(VAR)` scanning can skip them.
208fn process_variables(
209    variables: &Option<AdoVariables>,
210    graph: &mut AuthorityGraph,
211    cache: &mut HashMap<String, NodeId>,
212    scope: &str,
213    plain_vars: &mut HashSet<String>,
214) -> Vec<NodeId> {
215    let mut ids = Vec::new();
216
217    let vars = match variables.as_ref() {
218        Some(v) => v,
219        None => return ids,
220    };
221
222    for var in &vars.0 {
223        match var {
224            AdoVariable::Group { group } => {
225                // Skip template-expression group names like `${{ parameters.env }}`.
226                // We can't resolve them statically — mark Partial but don't create
227                // a misleading Secret node with the expression as its name.
228                if group.contains("${{") {
229                    graph.mark_partial(format!(
230                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
231                    ));
232                    continue;
233                }
234                let mut meta = HashMap::new();
235                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
236                let id = graph.add_node_with_metadata(
237                    NodeKind::Secret,
238                    group.as_str(),
239                    TrustZone::FirstParty,
240                    meta,
241                );
242                cache.insert(group.clone(), id);
243                ids.push(id);
244                graph.mark_partial(format!(
245                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
246                ));
247            }
248            AdoVariable::Named {
249                name, is_secret, ..
250            } => {
251                if *is_secret {
252                    let id = find_or_create_secret(graph, cache, name);
253                    ids.push(id);
254                } else {
255                    plain_vars.insert(name.clone());
256                }
257            }
258        }
259    }
260
261    ids
262}
263
264/// Process a list of ADO steps, adding nodes and edges to the graph.
265fn process_steps(
266    steps: &[AdoStep],
267    job_name: &str,
268    token_id: NodeId,
269    inherited_secrets: &[NodeId],
270    plain_vars: &HashSet<String>,
271    graph: &mut AuthorityGraph,
272    cache: &mut HashMap<String, NodeId>,
273) {
274    for (idx, step) in steps.iter().enumerate() {
275        // Template step — delegation, mark partial
276        if let Some(ref tpl) = step.template {
277            let step_name = step
278                .display_name
279                .as_deref()
280                .or(step.name.as_deref())
281                .map(|s| s.to_string())
282                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
283            add_template_delegation(&step_name, tpl, token_id, graph);
284            continue;
285        }
286
287        // Determine step kind and trust zone
288        let (step_name, trust_zone, inline_script) = classify_step(step, job_name, idx);
289
290        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
291
292        // Every step has access to System.AccessToken
293        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
294
295        // checkout step with persistCredentials: true writes the token to .git/config on disk,
296        // making it accessible to all subsequent steps and filesystem-level attackers.
297        if step.checkout.is_some() && step.persist_credentials == Some(true) {
298            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
299        }
300
301        // `checkout: self` pulls the repo being built. In a PR trigger context this
302        // is the untrusted fork head — tag the step so downstream rules can gate on
303        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
304        if let Some(ref ck) = step.checkout {
305            if ck == "self" {
306                if let Some(node) = graph.nodes.get_mut(step_id) {
307                    node.metadata
308                        .insert(META_CHECKOUT_SELF.into(), "true".into());
309                }
310            }
311        }
312
313        // Inherited pipeline/stage/job secrets
314        for &secret_id in inherited_secrets {
315            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
316        }
317
318        // Service connection detection from task inputs (case-insensitive key match)
319        if let Some(ref inputs) = step.inputs {
320            let service_conn_keys = [
321                "azuresubscription",
322                "connectedservicename",
323                "connectedservicenamearm",
324                "kubernetesserviceconnection",
325            ];
326            for (raw_key, val) in inputs {
327                let lower = raw_key.to_lowercase();
328                if !service_conn_keys.contains(&lower.as_str()) {
329                    continue;
330                }
331                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
332                if !conn_name.starts_with("$(") {
333                    let mut meta = HashMap::new();
334                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
335                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
336                    // ADO service connections are the platform's federated-identity equivalent
337                    // (modern Azure service connections use workload identity federation /
338                    // OIDC). Tag them so uplift_without_attestation treats ADO pipelines with
339                    // the same OIDC-parity logic applied to GHA.
340                    meta.insert(META_OIDC.into(), "true".into());
341                    let conn_id = graph.add_node_with_metadata(
342                        NodeKind::Identity,
343                        conn_name,
344                        TrustZone::FirstParty,
345                        meta,
346                    );
347                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
348                }
349            }
350
351            // Detect $(varName) references in task input values
352            for val in inputs.values() {
353                if let Some(s) = yaml_value_as_str(val) {
354                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
355                }
356            }
357        }
358
359        // Detect $(varName) in step env values
360        if let Some(ref env) = step.env {
361            for val in env.values() {
362                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
363            }
364        }
365
366        // Detect $(varName) in inline script text
367        if let Some(ref script) = inline_script {
368            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
369        }
370
371        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines
372        if let Some(ref script) = inline_script {
373            let lower = script.to_lowercase();
374            if lower.contains("##vso[task.setvariable") {
375                if let Some(node) = graph.nodes.get_mut(step_id) {
376                    node.metadata
377                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
378                }
379            }
380        }
381    }
382}
383
384/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
385fn classify_step(
386    step: &AdoStep,
387    job_name: &str,
388    idx: usize,
389) -> (String, TrustZone, Option<String>) {
390    let default_name = || format!("{job_name}[{idx}]");
391
392    let name = step
393        .display_name
394        .as_deref()
395        .or(step.name.as_deref())
396        .map(|s| s.to_string())
397        .unwrap_or_else(default_name);
398
399    if step.task.is_some() {
400        (name, TrustZone::Untrusted, None)
401    } else if let Some(ref s) = step.script {
402        (name, TrustZone::FirstParty, Some(s.clone()))
403    } else if let Some(ref s) = step.bash {
404        (name, TrustZone::FirstParty, Some(s.clone()))
405    } else if let Some(ref s) = step.powershell {
406        (name, TrustZone::FirstParty, Some(s.clone()))
407    } else if let Some(ref s) = step.pwsh {
408        (name, TrustZone::FirstParty, Some(s.clone()))
409    } else {
410        (name, TrustZone::FirstParty, None)
411    }
412}
413
414/// Add a DelegatesTo edge from a synthetic step node to a template image node.
415///
416/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
417/// pull code from an external repository and are Untrusted. Plain relative paths like
418/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
419/// treats `./local-action`.
420fn add_template_delegation(
421    step_name: &str,
422    template_path: &str,
423    token_id: NodeId,
424    graph: &mut AuthorityGraph,
425) {
426    let tpl_trust_zone = if template_path.contains('@') {
427        TrustZone::Untrusted
428    } else {
429        TrustZone::FirstParty
430    };
431    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
432    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
433    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
434    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
435    graph.mark_partial(format!(
436        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
437    ));
438}
439
440/// Extract `$(varName)` references from a string, creating Secret nodes for
441/// non-predefined and non-plain ADO variables.
442/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
443/// is treated as a variable reference. This rejects PowerShell sub-expressions
444/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
445/// and anything with spaces or special characters.
446fn extract_dollar_paren_secrets(
447    text: &str,
448    step_id: NodeId,
449    plain_vars: &HashSet<String>,
450    graph: &mut AuthorityGraph,
451    cache: &mut HashMap<String, NodeId>,
452) {
453    let mut pos = 0;
454    let bytes = text.as_bytes();
455    while pos < bytes.len() {
456        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
457            let start = pos + 2;
458            if let Some(end_offset) = text[start..].find(')') {
459                let var_name = &text[start..start + end_offset];
460                if is_valid_ado_identifier(var_name)
461                    && !is_predefined_ado_var(var_name)
462                    && !plain_vars.contains(var_name)
463                {
464                    let id = find_or_create_secret(graph, cache, var_name);
465                    // Mark secrets embedded in -var flag arguments: their values appear in
466                    // pipeline logs (command string is logged before masking, and Terraform
467                    // itself logs -var values in plan output and debug traces).
468                    if is_in_terraform_var_flag(text, pos) {
469                        if let Some(node) = graph.nodes.get_mut(id) {
470                            node.metadata
471                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
472                        }
473                    }
474                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
475                }
476                pos = start + end_offset + 1;
477                continue;
478            }
479        }
480        pos += 1;
481    }
482}
483
484/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
485/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
486fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
487    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
488    let line_before = &text[line_start..var_pos];
489    // Must contain -var (the flag) and = (the key=value assignment)
490    line_before.contains("-var") && line_before.contains('=')
491}
492
493/// Returns true if `name` is a valid ADO variable identifier.
494/// ADO variable names start with a letter and contain only letters, digits,
495/// and underscores. Anything else — PowerShell vars (`$name`), template
496/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
497/// (`name -join ','`) — is rejected.
498fn is_valid_ado_identifier(name: &str) -> bool {
499    let mut chars = name.chars();
500    match chars.next() {
501        Some(first) if first.is_ascii_alphabetic() => {
502            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
503        }
504        _ => false,
505    }
506}
507
508/// Returns true if a variable name is a well-known ADO predefined variable.
509/// These are system-provided and never represent secrets.
510fn is_predefined_ado_var(name: &str) -> bool {
511    let prefixes = [
512        "Build.",
513        "Agent.",
514        "System.",
515        "Pipeline.",
516        "Release.",
517        "Environment.",
518        "Strategy.",
519        "Deployment.",
520        "Resources.",
521        "TF_BUILD",
522    ];
523    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
524}
525
526fn find_or_create_secret(
527    graph: &mut AuthorityGraph,
528    cache: &mut HashMap<String, NodeId>,
529    name: &str,
530) -> NodeId {
531    if let Some(&id) = cache.get(name) {
532        return id;
533    }
534    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
535    cache.insert(name.to_string(), id);
536    id
537}
538
539fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
540    val.as_str()
541}
542
543// ── Serde models for ADO YAML ─────────────────────────────
544
545/// Top-level ADO pipeline definition.
546/// ADO pipelines come in three shapes:
547///   (a) stages → jobs → steps
548///   (b) jobs → steps (no stages key)
549///   (c) steps only (no stages or jobs key)
550#[derive(Debug, Deserialize)]
551pub struct AdoPipeline {
552    #[serde(default)]
553    pub trigger: Option<serde_yaml::Value>,
554    #[serde(default)]
555    pub pr: Option<serde_yaml::Value>,
556    #[serde(default)]
557    pub variables: Option<AdoVariables>,
558    #[serde(default)]
559    pub stages: Option<Vec<AdoStage>>,
560    #[serde(default)]
561    pub jobs: Option<Vec<AdoJob>>,
562    #[serde(default)]
563    pub steps: Option<Vec<AdoStep>>,
564    #[serde(default)]
565    pub pool: Option<serde_yaml::Value>,
566}
567
568#[derive(Debug, Deserialize)]
569pub struct AdoStage {
570    /// Stage identifier. Absent when the stage entry is a template reference.
571    #[serde(default)]
572    pub stage: Option<String>,
573    /// Stage-level template reference (`- template: path/to/stage.yml`).
574    #[serde(default)]
575    pub template: Option<String>,
576    #[serde(default)]
577    pub variables: Option<AdoVariables>,
578    #[serde(default)]
579    pub jobs: Vec<AdoJob>,
580}
581
582#[derive(Debug, Deserialize)]
583pub struct AdoJob {
584    /// Regular job identifier
585    #[serde(default)]
586    pub job: Option<String>,
587    /// Deployment job identifier
588    #[serde(default)]
589    pub deployment: Option<String>,
590    #[serde(default)]
591    pub variables: Option<AdoVariables>,
592    #[serde(default)]
593    pub steps: Option<Vec<AdoStep>>,
594    #[serde(default)]
595    pub pool: Option<serde_yaml::Value>,
596    /// Job-level template reference
597    #[serde(default)]
598    pub template: Option<String>,
599}
600
601impl AdoJob {
602    pub fn effective_name(&self) -> String {
603        self.job
604            .as_deref()
605            .or(self.deployment.as_deref())
606            .unwrap_or("job")
607            .to_string()
608    }
609}
610
611#[derive(Debug, Deserialize)]
612pub struct AdoStep {
613    /// Task reference e.g. `AzureCLI@2`
614    #[serde(default)]
615    pub task: Option<String>,
616    /// Inline script (cmd/sh)
617    #[serde(default)]
618    pub script: Option<String>,
619    /// Inline bash script
620    #[serde(default)]
621    pub bash: Option<String>,
622    /// Inline PowerShell script
623    #[serde(default)]
624    pub powershell: Option<String>,
625    /// Cross-platform PowerShell
626    #[serde(default)]
627    pub pwsh: Option<String>,
628    /// Step-level template reference
629    #[serde(default)]
630    pub template: Option<String>,
631    #[serde(rename = "displayName", default)]
632    pub display_name: Option<String>,
633    /// Legacy name alias
634    #[serde(default)]
635    pub name: Option<String>,
636    #[serde(default)]
637    pub env: Option<HashMap<String, String>>,
638    /// Task inputs (key → value, but values may be nested)
639    #[serde(default)]
640    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
641    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
642    #[serde(default)]
643    pub checkout: Option<String>,
644    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
645    #[serde(rename = "persistCredentials", default)]
646    pub persist_credentials: Option<bool>,
647}
648
649/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
650/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
651#[derive(Debug, Default)]
652pub struct AdoVariables(pub Vec<AdoVariable>);
653
654impl<'de> serde::Deserialize<'de> for AdoVariables {
655    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
656    where
657        D: serde::Deserializer<'de>,
658    {
659        let raw = serde_yaml::Value::deserialize(deserializer)?;
660        let mut vars = Vec::new();
661
662        match raw {
663            serde_yaml::Value::Sequence(seq) => {
664                for item in seq {
665                    if let Some(map) = item.as_mapping() {
666                        if let Some(group_val) = map.get("group") {
667                            if let Some(group) = group_val.as_str() {
668                                vars.push(AdoVariable::Group {
669                                    group: group.to_string(),
670                                });
671                                continue;
672                            }
673                        }
674                        let name = map
675                            .get("name")
676                            .and_then(|v| v.as_str())
677                            .unwrap_or("")
678                            .to_string();
679                        let value = map
680                            .get("value")
681                            .and_then(|v| v.as_str())
682                            .unwrap_or("")
683                            .to_string();
684                        let is_secret = map
685                            .get("isSecret")
686                            .and_then(|v| v.as_bool())
687                            .unwrap_or(false);
688                        vars.push(AdoVariable::Named {
689                            name,
690                            value,
691                            is_secret,
692                        });
693                    }
694                }
695            }
696            serde_yaml::Value::Mapping(map) => {
697                for (k, v) in map {
698                    let name = k.as_str().unwrap_or("").to_string();
699                    let value = v.as_str().unwrap_or("").to_string();
700                    vars.push(AdoVariable::Named {
701                        name,
702                        value,
703                        is_secret: false,
704                    });
705                }
706            }
707            _ => {}
708        }
709
710        Ok(AdoVariables(vars))
711    }
712}
713
714#[derive(Debug)]
715pub enum AdoVariable {
716    Group {
717        group: String,
718    },
719    Named {
720        name: String,
721        value: String,
722        is_secret: bool,
723    },
724}
725
726#[cfg(test)]
727mod tests {
728    use super::*;
729
730    fn parse(yaml: &str) -> AuthorityGraph {
731        let parser = AdoParser;
732        let source = PipelineSource {
733            file: "azure-pipelines.yml".into(),
734            repo: None,
735            git_ref: None,
736        };
737        parser.parse(yaml, &source).unwrap()
738    }
739
740    #[test]
741    fn parses_simple_pipeline() {
742        let yaml = r#"
743trigger:
744  - main
745
746jobs:
747  - job: Build
748    steps:
749      - script: echo hello
750        displayName: Say hello
751"#;
752        let graph = parse(yaml);
753        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
754    }
755
756    #[test]
757    fn system_access_token_created() {
758        let yaml = r#"
759steps:
760  - script: echo hi
761"#;
762        let graph = parse(yaml);
763        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
764        assert_eq!(identities.len(), 1);
765        assert_eq!(identities[0].name, "System.AccessToken");
766        assert_eq!(
767            identities[0].metadata.get(META_IDENTITY_SCOPE),
768            Some(&"broad".to_string())
769        );
770    }
771
772    #[test]
773    fn variable_group_creates_secret_and_marks_partial() {
774        let yaml = r#"
775variables:
776  - group: MySecretGroup
777
778steps:
779  - script: echo hi
780"#;
781        let graph = parse(yaml);
782        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
783        assert_eq!(secrets.len(), 1);
784        assert_eq!(secrets[0].name, "MySecretGroup");
785        assert_eq!(
786            secrets[0].metadata.get(META_VARIABLE_GROUP),
787            Some(&"true".to_string())
788        );
789        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
790        assert!(
791            graph
792                .completeness_gaps
793                .iter()
794                .any(|g| g.contains("MySecretGroup")),
795            "completeness gap should name the variable group"
796        );
797    }
798
799    #[test]
800    fn task_with_azure_subscription_creates_service_connection_identity() {
801        let yaml = r#"
802steps:
803  - task: AzureCLI@2
804    displayName: Deploy to Azure
805    inputs:
806      azureSubscription: MyServiceConnection
807      scriptType: bash
808      inlineScript: az group list
809"#;
810        let graph = parse(yaml);
811        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
812        // System.AccessToken + service connection
813        assert_eq!(identities.len(), 2);
814        let conn = identities
815            .iter()
816            .find(|i| i.name == "MyServiceConnection")
817            .unwrap();
818        assert_eq!(
819            conn.metadata.get(META_SERVICE_CONNECTION),
820            Some(&"true".to_string())
821        );
822        assert_eq!(
823            conn.metadata.get(META_IDENTITY_SCOPE),
824            Some(&"broad".to_string())
825        );
826    }
827
828    #[test]
829    fn task_with_connected_service_name_creates_identity() {
830        let yaml = r#"
831steps:
832  - task: SqlAzureDacpacDeployment@1
833    inputs:
834      ConnectedServiceNameARM: MySqlConnection
835"#;
836        let graph = parse(yaml);
837        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
838        assert!(
839            identities.iter().any(|i| i.name == "MySqlConnection"),
840            "connectedServiceNameARM should create identity"
841        );
842    }
843
844    #[test]
845    fn script_step_classified_as_first_party() {
846        let yaml = r#"
847steps:
848  - script: echo hi
849    displayName: Say hi
850"#;
851        let graph = parse(yaml);
852        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
853        assert_eq!(steps.len(), 1);
854        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
855    }
856
857    #[test]
858    fn bash_step_classified_as_first_party() {
859        let yaml = r#"
860steps:
861  - bash: echo hi
862"#;
863        let graph = parse(yaml);
864        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
865        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
866    }
867
868    #[test]
869    fn task_step_classified_as_untrusted() {
870        let yaml = r#"
871steps:
872  - task: DotNetCoreCLI@2
873    inputs:
874      command: build
875"#;
876        let graph = parse(yaml);
877        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
878        assert_eq!(steps.len(), 1);
879        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
880    }
881
882    #[test]
883    fn dollar_paren_var_in_script_creates_secret() {
884        let yaml = r#"
885steps:
886  - script: |
887      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
888    displayName: Call API
889"#;
890        let graph = parse(yaml);
891        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
892        assert_eq!(secrets.len(), 1);
893        assert_eq!(secrets[0].name, "MY_API_TOKEN");
894    }
895
896    #[test]
897    fn predefined_ado_var_not_treated_as_secret() {
898        let yaml = r#"
899steps:
900  - script: |
901      echo $(Build.BuildId)
902      echo $(Agent.WorkFolder)
903      echo $(System.DefaultWorkingDirectory)
904    displayName: Print vars
905"#;
906        let graph = parse(yaml);
907        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
908        assert!(
909            secrets.is_empty(),
910            "predefined ADO vars should not be treated as secrets, got: {:?}",
911            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
912        );
913    }
914
915    #[test]
916    fn template_reference_creates_delegates_to_and_marks_partial() {
917        let yaml = r#"
918steps:
919  - template: steps/deploy.yml
920    parameters:
921      env: production
922"#;
923        let graph = parse(yaml);
924        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
925        assert_eq!(steps.len(), 1);
926
927        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
928        assert_eq!(images.len(), 1);
929        assert_eq!(images[0].name, "steps/deploy.yml");
930
931        let delegates: Vec<_> = graph
932            .edges_from(steps[0].id)
933            .filter(|e| e.kind == EdgeKind::DelegatesTo)
934            .collect();
935        assert_eq!(delegates.len(), 1);
936
937        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
938    }
939
940    #[test]
941    fn top_level_steps_no_jobs() {
942        let yaml = r#"
943steps:
944  - script: echo a
945  - script: echo b
946"#;
947        let graph = parse(yaml);
948        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
949        assert_eq!(steps.len(), 2);
950    }
951
952    #[test]
953    fn top_level_jobs_no_stages() {
954        let yaml = r#"
955jobs:
956  - job: JobA
957    steps:
958      - script: echo a
959  - job: JobB
960    steps:
961      - script: echo b
962"#;
963        let graph = parse(yaml);
964        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
965        assert_eq!(steps.len(), 2);
966    }
967
968    #[test]
969    fn stages_with_nested_jobs_parsed() {
970        let yaml = r#"
971stages:
972  - stage: Build
973    jobs:
974      - job: Compile
975        steps:
976          - script: cargo build
977  - stage: Test
978    jobs:
979      - job: UnitTest
980        steps:
981          - script: cargo test
982"#;
983        let graph = parse(yaml);
984        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
985        assert_eq!(steps.len(), 2);
986    }
987
988    #[test]
989    fn all_steps_linked_to_system_access_token() {
990        let yaml = r#"
991steps:
992  - script: echo a
993  - task: SomeTask@1
994    inputs: {}
995"#;
996        let graph = parse(yaml);
997        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
998        assert_eq!(token.len(), 1);
999        let token_id = token[0].id;
1000
1001        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1002        for step in &steps {
1003            let links: Vec<_> = graph
1004                .edges_from(step.id)
1005                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
1006                .collect();
1007            assert_eq!(
1008                links.len(),
1009                1,
1010                "step '{}' must link to System.AccessToken",
1011                step.name
1012            );
1013        }
1014    }
1015
1016    #[test]
1017    fn named_secret_variable_creates_secret_node() {
1018        let yaml = r#"
1019variables:
1020  - name: MY_PASSWORD
1021    value: dummy
1022    isSecret: true
1023
1024steps:
1025  - script: echo hi
1026"#;
1027        let graph = parse(yaml);
1028        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1029        assert_eq!(secrets.len(), 1);
1030        assert_eq!(secrets[0].name, "MY_PASSWORD");
1031    }
1032
1033    #[test]
1034    fn variables_as_mapping_parsed() {
1035        let yaml = r#"
1036variables:
1037  MY_VAR: hello
1038  ANOTHER_VAR: world
1039
1040steps:
1041  - script: echo hi
1042"#;
1043        let graph = parse(yaml);
1044        // Mapping-style variables without isSecret — no secret nodes created
1045        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1046        assert!(
1047            secrets.is_empty(),
1048            "plain mapping vars should not create secret nodes"
1049        );
1050    }
1051
1052    #[test]
1053    fn persist_credentials_creates_persists_to_edge() {
1054        let yaml = r#"
1055steps:
1056  - checkout: self
1057    persistCredentials: true
1058  - script: git push
1059"#;
1060        let graph = parse(yaml);
1061        let token_id = graph
1062            .nodes_of_kind(NodeKind::Identity)
1063            .find(|n| n.name == "System.AccessToken")
1064            .expect("System.AccessToken must exist")
1065            .id;
1066
1067        let persists_edges: Vec<_> = graph
1068            .edges
1069            .iter()
1070            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
1071            .collect();
1072        assert_eq!(
1073            persists_edges.len(),
1074            1,
1075            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
1076        );
1077    }
1078
1079    #[test]
1080    fn checkout_without_persist_credentials_no_persists_to_edge() {
1081        let yaml = r#"
1082steps:
1083  - checkout: self
1084  - script: echo hi
1085"#;
1086        let graph = parse(yaml);
1087        let persists_edges: Vec<_> = graph
1088            .edges
1089            .iter()
1090            .filter(|e| e.kind == EdgeKind::PersistsTo)
1091            .collect();
1092        assert!(
1093            persists_edges.is_empty(),
1094            "checkout without persistCredentials should not produce PersistsTo edge"
1095        );
1096    }
1097
1098    #[test]
1099    fn var_flag_secret_marked_as_cli_flag_exposed() {
1100        let yaml = r#"
1101steps:
1102  - script: |
1103      terraform apply \
1104        -var "db_password=$(db_password)" \
1105        -var "api_key=$(api_key)"
1106    displayName: Terraform apply
1107"#;
1108        let graph = parse(yaml);
1109        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1110        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
1111        for secret in &secrets {
1112            assert_eq!(
1113                secret.metadata.get(META_CLI_FLAG_EXPOSED),
1114                Some(&"true".to_string()),
1115                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
1116                secret.name
1117            );
1118        }
1119    }
1120
1121    #[test]
1122    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
1123        let yaml = r#"
1124steps:
1125  - script: |
1126      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
1127"#;
1128        let graph = parse(yaml);
1129        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1130        assert_eq!(secrets.len(), 1);
1131        assert!(
1132            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
1133            "non -var secret should not be marked as cli_flag_exposed"
1134        );
1135    }
1136
1137    #[test]
1138    fn step_linked_to_variable_group_secret() {
1139        let yaml = r#"
1140variables:
1141  - group: ProdSecrets
1142
1143steps:
1144  - script: deploy.sh
1145"#;
1146        let graph = parse(yaml);
1147        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1148        assert_eq!(secrets.len(), 1);
1149        let secret_id = secrets[0].id;
1150
1151        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1152        let links: Vec<_> = graph
1153            .edges_from(steps[0].id)
1154            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
1155            .collect();
1156        assert_eq!(
1157            links.len(),
1158            1,
1159            "step should be linked to variable group secret"
1160        );
1161    }
1162
1163    #[test]
1164    fn pr_trigger_sets_meta_trigger_on_graph() {
1165        let yaml = r#"
1166pr:
1167  - '*'
1168
1169steps:
1170  - script: echo hi
1171"#;
1172        let graph = parse(yaml);
1173        assert_eq!(
1174            graph.metadata.get(META_TRIGGER),
1175            Some(&"pr".to_string()),
1176            "ADO pr: trigger should set graph META_TRIGGER"
1177        );
1178    }
1179
1180    #[test]
1181    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
1182        let yaml = r#"
1183pool:
1184  name: my-self-hosted-pool
1185
1186steps:
1187  - script: echo hi
1188"#;
1189        let graph = parse(yaml);
1190        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1191        assert_eq!(images.len(), 1);
1192        assert_eq!(images[0].name, "my-self-hosted-pool");
1193        assert_eq!(
1194            images[0].metadata.get(META_SELF_HOSTED),
1195            Some(&"true".to_string()),
1196            "pool.name without vmImage must be tagged self-hosted"
1197        );
1198    }
1199
1200    #[test]
1201    fn vm_image_pool_is_not_tagged_self_hosted() {
1202        let yaml = r#"
1203pool:
1204  vmImage: ubuntu-latest
1205
1206steps:
1207  - script: echo hi
1208"#;
1209        let graph = parse(yaml);
1210        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1211        assert_eq!(images.len(), 1);
1212        assert_eq!(images[0].name, "ubuntu-latest");
1213        assert!(
1214            !images[0].metadata.contains_key(META_SELF_HOSTED),
1215            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
1216        );
1217    }
1218
1219    #[test]
1220    fn checkout_self_step_tagged_with_meta_checkout_self() {
1221        let yaml = r#"
1222steps:
1223  - checkout: self
1224  - script: echo hi
1225"#;
1226        let graph = parse(yaml);
1227        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1228        assert_eq!(steps.len(), 2);
1229        let checkout_step = steps
1230            .iter()
1231            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
1232            .expect("one step must be tagged META_CHECKOUT_SELF");
1233        assert_eq!(
1234            checkout_step.metadata.get(META_CHECKOUT_SELF),
1235            Some(&"true".to_string())
1236        );
1237    }
1238
1239    #[test]
1240    fn vso_setvariable_sets_meta_writes_env_gate() {
1241        let yaml = r###"
1242steps:
1243  - script: |
1244      echo "##vso[task.setvariable variable=FOO]bar"
1245    displayName: Set variable
1246"###;
1247        let graph = parse(yaml);
1248        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1249        assert_eq!(steps.len(), 1);
1250        assert_eq!(
1251            steps[0].metadata.get(META_WRITES_ENV_GATE),
1252            Some(&"true".to_string()),
1253            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
1254        );
1255    }
1256}