Skip to main content

taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Azure DevOps YAML pipeline parser.
9pub struct AdoParser;
10
11impl PipelineParser for AdoParser {
12    fn platform(&self) -> &str {
13        "azure-devops"
14    }
15
16    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
17        let mut de = serde_yaml::Deserializer::from_str(content);
18        let doc = de
19            .next()
20            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
21        let pipeline: AdoPipeline = AdoPipeline::deserialize(doc)
22            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
23        let extra_docs = de.next().is_some();
24
25        let mut graph = AuthorityGraph::new(source.clone());
26        if extra_docs {
27            graph.mark_partial(
28                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
29            );
30        }
31
32        // Detect PR trigger — sets graph-level META_TRIGGER for trigger_context_mismatch.
33        let has_pr_trigger = pipeline.pr.is_some();
34        if has_pr_trigger {
35            graph.metadata.insert(META_TRIGGER.into(), "pr".into());
36        }
37
38        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
39
40        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
41        let mut meta = HashMap::new();
42        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
43        let token_id = graph.add_node_with_metadata(
44            NodeKind::Identity,
45            "System.AccessToken",
46            TrustZone::FirstParty,
47            meta,
48        );
49
50        // Pipeline-level pool: adds Image node, tagged self-hosted when applicable.
51        process_pool(&pipeline.pool, &mut graph);
52
53        // Pipeline-level variable groups and named secrets.
54        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
55        // don't generate false-positive Secret nodes for plain config values.
56        let mut plain_vars: HashSet<String> = HashSet::new();
57        let pipeline_secret_ids = process_variables(
58            &pipeline.variables,
59            &mut graph,
60            &mut secret_ids,
61            "pipeline",
62            &mut plain_vars,
63        );
64
65        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
66        if let Some(ref stages) = pipeline.stages {
67            for stage in stages {
68                // Stage-level template reference — delegate and mark Partial
69                if let Some(ref tpl) = stage.template {
70                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
71                    add_template_delegation(stage_name, tpl, token_id, &mut graph);
72                    continue;
73                }
74
75                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
76                let stage_secret_ids = process_variables(
77                    &stage.variables,
78                    &mut graph,
79                    &mut secret_ids,
80                    &stage_name,
81                    &mut plain_vars,
82                );
83
84                for job in &stage.jobs {
85                    let job_name = job.effective_name();
86                    let job_secret_ids = process_variables(
87                        &job.variables,
88                        &mut graph,
89                        &mut secret_ids,
90                        &job_name,
91                        &mut plain_vars,
92                    );
93
94                    process_pool(&job.pool, &mut graph);
95
96                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
97                        .iter()
98                        .chain(&stage_secret_ids)
99                        .chain(&job_secret_ids)
100                        .copied()
101                        .collect();
102
103                    process_steps(
104                        job.steps.as_deref().unwrap_or(&[]),
105                        &job_name,
106                        token_id,
107                        &all_secrets,
108                        &plain_vars,
109                        &mut graph,
110                        &mut secret_ids,
111                    );
112
113                    if let Some(ref tpl) = job.template {
114                        add_template_delegation(&job_name, tpl, token_id, &mut graph);
115                    }
116                }
117            }
118        } else if let Some(ref jobs) = pipeline.jobs {
119            for job in jobs {
120                let job_name = job.effective_name();
121                let job_secret_ids = process_variables(
122                    &job.variables,
123                    &mut graph,
124                    &mut secret_ids,
125                    &job_name,
126                    &mut plain_vars,
127                );
128
129                process_pool(&job.pool, &mut graph);
130
131                let all_secrets: Vec<NodeId> = pipeline_secret_ids
132                    .iter()
133                    .chain(&job_secret_ids)
134                    .copied()
135                    .collect();
136
137                process_steps(
138                    job.steps.as_deref().unwrap_or(&[]),
139                    &job_name,
140                    token_id,
141                    &all_secrets,
142                    &plain_vars,
143                    &mut graph,
144                    &mut secret_ids,
145                );
146
147                if let Some(ref tpl) = job.template {
148                    add_template_delegation(&job_name, tpl, token_id, &mut graph);
149                }
150            }
151        } else if let Some(ref steps) = pipeline.steps {
152            process_steps(
153                steps,
154                "pipeline",
155                token_id,
156                &pipeline_secret_ids,
157                &plain_vars,
158                &mut graph,
159                &mut secret_ids,
160            );
161        }
162
163        Ok(graph)
164    }
165}
166
167/// Process an ADO `pool:` block. ADO pools come in two shapes:
168///   - `pool: my-self-hosted-pool` (string shorthand — always self-hosted)
169///   - `pool: { name: my-pool }` (named pool — self-hosted)
170///   - `pool: { vmImage: ubuntu-latest }` (Microsoft-hosted)
171///   - `pool: { name: my-pool, vmImage: ubuntu-latest }` (hosted; vmImage wins)
172///
173/// Creates an Image node representing the agent environment. Self-hosted pools
174/// are tagged with META_SELF_HOSTED so downstream rules can flag them.
175fn process_pool(pool: &Option<serde_yaml::Value>, graph: &mut AuthorityGraph) {
176    let Some(pool_val) = pool else {
177        return;
178    };
179
180    let (image_name, is_self_hosted) = match pool_val {
181        serde_yaml::Value::String(s) => (s.clone(), true),
182        serde_yaml::Value::Mapping(map) => {
183            let name = map.get("name").and_then(|v| v.as_str());
184            let vm_image = map.get("vmImage").and_then(|v| v.as_str());
185            match (name, vm_image) {
186                (_, Some(vm)) => (vm.to_string(), false),
187                (Some(n), None) => (n.to_string(), true),
188                (None, None) => return,
189            }
190        }
191        _ => return,
192    };
193
194    let mut meta = HashMap::new();
195    if is_self_hosted {
196        meta.insert(META_SELF_HOSTED.into(), "true".into());
197    }
198    graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
199}
200
201/// Process a variable list, creating Secret nodes and returning their IDs.
202/// Returns IDs for secrets only (not variable groups, which are opaque).
203/// Populates `plain_vars` with the names of non-secret named variables so
204/// downstream `$(VAR)` scanning can skip them.
205fn process_variables(
206    variables: &Option<AdoVariables>,
207    graph: &mut AuthorityGraph,
208    cache: &mut HashMap<String, NodeId>,
209    scope: &str,
210    plain_vars: &mut HashSet<String>,
211) -> Vec<NodeId> {
212    let mut ids = Vec::new();
213
214    let vars = match variables.as_ref() {
215        Some(v) => v,
216        None => return ids,
217    };
218
219    for var in &vars.0 {
220        match var {
221            AdoVariable::Group { group } => {
222                // Skip template-expression group names like `${{ parameters.env }}`.
223                // We can't resolve them statically — mark Partial but don't create
224                // a misleading Secret node with the expression as its name.
225                if group.contains("${{") {
226                    graph.mark_partial(format!(
227                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
228                    ));
229                    continue;
230                }
231                let mut meta = HashMap::new();
232                meta.insert(META_VARIABLE_GROUP.into(), "true".into());
233                let id = graph.add_node_with_metadata(
234                    NodeKind::Secret,
235                    group.as_str(),
236                    TrustZone::FirstParty,
237                    meta,
238                );
239                cache.insert(group.clone(), id);
240                ids.push(id);
241                graph.mark_partial(format!(
242                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
243                ));
244            }
245            AdoVariable::Named {
246                name, is_secret, ..
247            } => {
248                if *is_secret {
249                    let id = find_or_create_secret(graph, cache, name);
250                    ids.push(id);
251                } else {
252                    plain_vars.insert(name.clone());
253                }
254            }
255        }
256    }
257
258    ids
259}
260
261/// Process a list of ADO steps, adding nodes and edges to the graph.
262fn process_steps(
263    steps: &[AdoStep],
264    job_name: &str,
265    token_id: NodeId,
266    inherited_secrets: &[NodeId],
267    plain_vars: &HashSet<String>,
268    graph: &mut AuthorityGraph,
269    cache: &mut HashMap<String, NodeId>,
270) {
271    for (idx, step) in steps.iter().enumerate() {
272        // Template step — delegation, mark partial
273        if let Some(ref tpl) = step.template {
274            let step_name = step
275                .display_name
276                .as_deref()
277                .or(step.name.as_deref())
278                .map(|s| s.to_string())
279                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
280            add_template_delegation(&step_name, tpl, token_id, graph);
281            continue;
282        }
283
284        // Determine step kind and trust zone
285        let (step_name, trust_zone, inline_script) = classify_step(step, job_name, idx);
286
287        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
288
289        // Every step has access to System.AccessToken
290        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
291
292        // checkout step with persistCredentials: true writes the token to .git/config on disk,
293        // making it accessible to all subsequent steps and filesystem-level attackers.
294        if step.checkout.is_some() && step.persist_credentials == Some(true) {
295            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
296        }
297
298        // `checkout: self` pulls the repo being built. In a PR trigger context this
299        // is the untrusted fork head — tag the step so downstream rules can gate on
300        // trigger context. Default ADO checkout (`checkout: self`) is the common case.
301        if let Some(ref ck) = step.checkout {
302            if ck == "self" {
303                if let Some(node) = graph.nodes.get_mut(step_id) {
304                    node.metadata
305                        .insert(META_CHECKOUT_SELF.into(), "true".into());
306                }
307            }
308        }
309
310        // Inherited pipeline/stage/job secrets
311        for &secret_id in inherited_secrets {
312            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
313        }
314
315        // Service connection detection from task inputs (case-insensitive key match)
316        if let Some(ref inputs) = step.inputs {
317            let service_conn_keys = [
318                "azuresubscription",
319                "connectedservicename",
320                "connectedservicenamearm",
321                "kubernetesserviceconnection",
322            ];
323            for (raw_key, val) in inputs {
324                let lower = raw_key.to_lowercase();
325                if !service_conn_keys.contains(&lower.as_str()) {
326                    continue;
327                }
328                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
329                if !conn_name.starts_with("$(") {
330                    let mut meta = HashMap::new();
331                    meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
332                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
333                    // ADO service connections are the platform's federated-identity equivalent
334                    // (modern Azure service connections use workload identity federation /
335                    // OIDC). Tag them so uplift_without_attestation treats ADO pipelines with
336                    // the same OIDC-parity logic applied to GHA.
337                    meta.insert(META_OIDC.into(), "true".into());
338                    let conn_id = graph.add_node_with_metadata(
339                        NodeKind::Identity,
340                        conn_name,
341                        TrustZone::FirstParty,
342                        meta,
343                    );
344                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
345                }
346            }
347
348            // Detect $(varName) references in task input values
349            for val in inputs.values() {
350                if let Some(s) = yaml_value_as_str(val) {
351                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
352                }
353            }
354        }
355
356        // Detect $(varName) in step env values
357        if let Some(ref env) = step.env {
358            for val in env.values() {
359                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
360            }
361        }
362
363        // Detect $(varName) in inline script text
364        if let Some(ref script) = inline_script {
365            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
366        }
367
368        // Detect ##vso[task.setvariable] — environment gate mutation in ADO pipelines
369        if let Some(ref script) = inline_script {
370            let lower = script.to_lowercase();
371            if lower.contains("##vso[task.setvariable") {
372                if let Some(node) = graph.nodes.get_mut(step_id) {
373                    node.metadata
374                        .insert(META_WRITES_ENV_GATE.into(), "true".into());
375                }
376            }
377        }
378    }
379}
380
381/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
382fn classify_step(
383    step: &AdoStep,
384    job_name: &str,
385    idx: usize,
386) -> (String, TrustZone, Option<String>) {
387    let default_name = || format!("{job_name}[{idx}]");
388
389    let name = step
390        .display_name
391        .as_deref()
392        .or(step.name.as_deref())
393        .map(|s| s.to_string())
394        .unwrap_or_else(default_name);
395
396    if step.task.is_some() {
397        (name, TrustZone::Untrusted, None)
398    } else if let Some(ref s) = step.script {
399        (name, TrustZone::FirstParty, Some(s.clone()))
400    } else if let Some(ref s) = step.bash {
401        (name, TrustZone::FirstParty, Some(s.clone()))
402    } else if let Some(ref s) = step.powershell {
403        (name, TrustZone::FirstParty, Some(s.clone()))
404    } else if let Some(ref s) = step.pwsh {
405        (name, TrustZone::FirstParty, Some(s.clone()))
406    } else {
407        (name, TrustZone::FirstParty, None)
408    }
409}
410
411/// Add a DelegatesTo edge from a synthetic step node to a template image node.
412///
413/// Trust zone heuristic: templates referenced with `@repository` (e.g. `steps/deploy.yml@templates`)
414/// pull code from an external repository and are Untrusted. Plain relative paths like
415/// `steps/deploy.yml` resolve within the same repo and are FirstParty — mirroring how GHA
416/// treats `./local-action`.
417fn add_template_delegation(
418    step_name: &str,
419    template_path: &str,
420    token_id: NodeId,
421    graph: &mut AuthorityGraph,
422) {
423    let tpl_trust_zone = if template_path.contains('@') {
424        TrustZone::Untrusted
425    } else {
426        TrustZone::FirstParty
427    };
428    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
429    let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
430    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
431    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
432    graph.mark_partial(format!(
433        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
434    ));
435}
436
437/// Extract `$(varName)` references from a string, creating Secret nodes for
438/// non-predefined and non-plain ADO variables.
439/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
440/// is treated as a variable reference. This rejects PowerShell sub-expressions
441/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
442/// and anything with spaces or special characters.
443fn extract_dollar_paren_secrets(
444    text: &str,
445    step_id: NodeId,
446    plain_vars: &HashSet<String>,
447    graph: &mut AuthorityGraph,
448    cache: &mut HashMap<String, NodeId>,
449) {
450    let mut pos = 0;
451    let bytes = text.as_bytes();
452    while pos < bytes.len() {
453        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
454            let start = pos + 2;
455            if let Some(end_offset) = text[start..].find(')') {
456                let var_name = &text[start..start + end_offset];
457                if is_valid_ado_identifier(var_name)
458                    && !is_predefined_ado_var(var_name)
459                    && !plain_vars.contains(var_name)
460                {
461                    let id = find_or_create_secret(graph, cache, var_name);
462                    // Mark secrets embedded in -var flag arguments: their values appear in
463                    // pipeline logs (command string is logged before masking, and Terraform
464                    // itself logs -var values in plan output and debug traces).
465                    if is_in_terraform_var_flag(text, pos) {
466                        if let Some(node) = graph.nodes.get_mut(id) {
467                            node.metadata
468                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
469                        }
470                    }
471                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
472                }
473                pos = start + end_offset + 1;
474                continue;
475            }
476        }
477        pos += 1;
478    }
479}
480
481/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
482/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
483fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
484    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
485    let line_before = &text[line_start..var_pos];
486    // Must contain -var (the flag) and = (the key=value assignment)
487    line_before.contains("-var") && line_before.contains('=')
488}
489
490/// Returns true if `name` is a valid ADO variable identifier.
491/// ADO variable names start with a letter and contain only letters, digits,
492/// and underscores. Anything else — PowerShell vars (`$name`), template
493/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
494/// (`name -join ','`) — is rejected.
495fn is_valid_ado_identifier(name: &str) -> bool {
496    let mut chars = name.chars();
497    match chars.next() {
498        Some(first) if first.is_ascii_alphabetic() => {
499            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
500        }
501        _ => false,
502    }
503}
504
505/// Returns true if a variable name is a well-known ADO predefined variable.
506/// These are system-provided and never represent secrets.
507fn is_predefined_ado_var(name: &str) -> bool {
508    let prefixes = [
509        "Build.",
510        "Agent.",
511        "System.",
512        "Pipeline.",
513        "Release.",
514        "Environment.",
515        "Strategy.",
516        "Deployment.",
517        "Resources.",
518        "TF_BUILD",
519    ];
520    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
521}
522
523fn find_or_create_secret(
524    graph: &mut AuthorityGraph,
525    cache: &mut HashMap<String, NodeId>,
526    name: &str,
527) -> NodeId {
528    if let Some(&id) = cache.get(name) {
529        return id;
530    }
531    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
532    cache.insert(name.to_string(), id);
533    id
534}
535
536fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
537    val.as_str()
538}
539
540// ── Serde models for ADO YAML ─────────────────────────────
541
542/// Top-level ADO pipeline definition.
543/// ADO pipelines come in three shapes:
544///   (a) stages → jobs → steps
545///   (b) jobs → steps (no stages key)
546///   (c) steps only (no stages or jobs key)
547#[derive(Debug, Deserialize)]
548pub struct AdoPipeline {
549    #[serde(default)]
550    pub trigger: Option<serde_yaml::Value>,
551    #[serde(default)]
552    pub pr: Option<serde_yaml::Value>,
553    #[serde(default)]
554    pub variables: Option<AdoVariables>,
555    #[serde(default)]
556    pub stages: Option<Vec<AdoStage>>,
557    #[serde(default)]
558    pub jobs: Option<Vec<AdoJob>>,
559    #[serde(default)]
560    pub steps: Option<Vec<AdoStep>>,
561    #[serde(default)]
562    pub pool: Option<serde_yaml::Value>,
563}
564
565#[derive(Debug, Deserialize)]
566pub struct AdoStage {
567    /// Stage identifier. Absent when the stage entry is a template reference.
568    #[serde(default)]
569    pub stage: Option<String>,
570    /// Stage-level template reference (`- template: path/to/stage.yml`).
571    #[serde(default)]
572    pub template: Option<String>,
573    #[serde(default)]
574    pub variables: Option<AdoVariables>,
575    #[serde(default)]
576    pub jobs: Vec<AdoJob>,
577}
578
579#[derive(Debug, Deserialize)]
580pub struct AdoJob {
581    /// Regular job identifier
582    #[serde(default)]
583    pub job: Option<String>,
584    /// Deployment job identifier
585    #[serde(default)]
586    pub deployment: Option<String>,
587    #[serde(default)]
588    pub variables: Option<AdoVariables>,
589    #[serde(default)]
590    pub steps: Option<Vec<AdoStep>>,
591    #[serde(default)]
592    pub pool: Option<serde_yaml::Value>,
593    /// Job-level template reference
594    #[serde(default)]
595    pub template: Option<String>,
596}
597
598impl AdoJob {
599    pub fn effective_name(&self) -> String {
600        self.job
601            .as_deref()
602            .or(self.deployment.as_deref())
603            .unwrap_or("job")
604            .to_string()
605    }
606}
607
608#[derive(Debug, Deserialize)]
609pub struct AdoStep {
610    /// Task reference e.g. `AzureCLI@2`
611    #[serde(default)]
612    pub task: Option<String>,
613    /// Inline script (cmd/sh)
614    #[serde(default)]
615    pub script: Option<String>,
616    /// Inline bash script
617    #[serde(default)]
618    pub bash: Option<String>,
619    /// Inline PowerShell script
620    #[serde(default)]
621    pub powershell: Option<String>,
622    /// Cross-platform PowerShell
623    #[serde(default)]
624    pub pwsh: Option<String>,
625    /// Step-level template reference
626    #[serde(default)]
627    pub template: Option<String>,
628    #[serde(rename = "displayName", default)]
629    pub display_name: Option<String>,
630    /// Legacy name alias
631    #[serde(default)]
632    pub name: Option<String>,
633    #[serde(default)]
634    pub env: Option<HashMap<String, String>>,
635    /// Task inputs (key → value, but values may be nested)
636    #[serde(default)]
637    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
638    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
639    #[serde(default)]
640    pub checkout: Option<String>,
641    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
642    #[serde(rename = "persistCredentials", default)]
643    pub persist_credentials: Option<bool>,
644}
645
646/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
647/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
648#[derive(Debug, Default)]
649pub struct AdoVariables(pub Vec<AdoVariable>);
650
651impl<'de> serde::Deserialize<'de> for AdoVariables {
652    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
653    where
654        D: serde::Deserializer<'de>,
655    {
656        let raw = serde_yaml::Value::deserialize(deserializer)?;
657        let mut vars = Vec::new();
658
659        match raw {
660            serde_yaml::Value::Sequence(seq) => {
661                for item in seq {
662                    if let Some(map) = item.as_mapping() {
663                        if let Some(group_val) = map.get("group") {
664                            if let Some(group) = group_val.as_str() {
665                                vars.push(AdoVariable::Group {
666                                    group: group.to_string(),
667                                });
668                                continue;
669                            }
670                        }
671                        let name = map
672                            .get("name")
673                            .and_then(|v| v.as_str())
674                            .unwrap_or("")
675                            .to_string();
676                        let value = map
677                            .get("value")
678                            .and_then(|v| v.as_str())
679                            .unwrap_or("")
680                            .to_string();
681                        let is_secret = map
682                            .get("isSecret")
683                            .and_then(|v| v.as_bool())
684                            .unwrap_or(false);
685                        vars.push(AdoVariable::Named {
686                            name,
687                            value,
688                            is_secret,
689                        });
690                    }
691                }
692            }
693            serde_yaml::Value::Mapping(map) => {
694                for (k, v) in map {
695                    let name = k.as_str().unwrap_or("").to_string();
696                    let value = v.as_str().unwrap_or("").to_string();
697                    vars.push(AdoVariable::Named {
698                        name,
699                        value,
700                        is_secret: false,
701                    });
702                }
703            }
704            _ => {}
705        }
706
707        Ok(AdoVariables(vars))
708    }
709}
710
711#[derive(Debug)]
712pub enum AdoVariable {
713    Group {
714        group: String,
715    },
716    Named {
717        name: String,
718        value: String,
719        is_secret: bool,
720    },
721}
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726
727    fn parse(yaml: &str) -> AuthorityGraph {
728        let parser = AdoParser;
729        let source = PipelineSource {
730            file: "azure-pipelines.yml".into(),
731            repo: None,
732            git_ref: None,
733        };
734        parser.parse(yaml, &source).unwrap()
735    }
736
737    #[test]
738    fn parses_simple_pipeline() {
739        let yaml = r#"
740trigger:
741  - main
742
743jobs:
744  - job: Build
745    steps:
746      - script: echo hello
747        displayName: Say hello
748"#;
749        let graph = parse(yaml);
750        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
751    }
752
753    #[test]
754    fn system_access_token_created() {
755        let yaml = r#"
756steps:
757  - script: echo hi
758"#;
759        let graph = parse(yaml);
760        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
761        assert_eq!(identities.len(), 1);
762        assert_eq!(identities[0].name, "System.AccessToken");
763        assert_eq!(
764            identities[0].metadata.get(META_IDENTITY_SCOPE),
765            Some(&"broad".to_string())
766        );
767    }
768
769    #[test]
770    fn variable_group_creates_secret_and_marks_partial() {
771        let yaml = r#"
772variables:
773  - group: MySecretGroup
774
775steps:
776  - script: echo hi
777"#;
778        let graph = parse(yaml);
779        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
780        assert_eq!(secrets.len(), 1);
781        assert_eq!(secrets[0].name, "MySecretGroup");
782        assert_eq!(
783            secrets[0].metadata.get(META_VARIABLE_GROUP),
784            Some(&"true".to_string())
785        );
786        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
787        assert!(
788            graph
789                .completeness_gaps
790                .iter()
791                .any(|g| g.contains("MySecretGroup")),
792            "completeness gap should name the variable group"
793        );
794    }
795
796    #[test]
797    fn task_with_azure_subscription_creates_service_connection_identity() {
798        let yaml = r#"
799steps:
800  - task: AzureCLI@2
801    displayName: Deploy to Azure
802    inputs:
803      azureSubscription: MyServiceConnection
804      scriptType: bash
805      inlineScript: az group list
806"#;
807        let graph = parse(yaml);
808        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
809        // System.AccessToken + service connection
810        assert_eq!(identities.len(), 2);
811        let conn = identities
812            .iter()
813            .find(|i| i.name == "MyServiceConnection")
814            .unwrap();
815        assert_eq!(
816            conn.metadata.get(META_SERVICE_CONNECTION),
817            Some(&"true".to_string())
818        );
819        assert_eq!(
820            conn.metadata.get(META_IDENTITY_SCOPE),
821            Some(&"broad".to_string())
822        );
823    }
824
825    #[test]
826    fn task_with_connected_service_name_creates_identity() {
827        let yaml = r#"
828steps:
829  - task: SqlAzureDacpacDeployment@1
830    inputs:
831      ConnectedServiceNameARM: MySqlConnection
832"#;
833        let graph = parse(yaml);
834        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
835        assert!(
836            identities.iter().any(|i| i.name == "MySqlConnection"),
837            "connectedServiceNameARM should create identity"
838        );
839    }
840
841    #[test]
842    fn script_step_classified_as_first_party() {
843        let yaml = r#"
844steps:
845  - script: echo hi
846    displayName: Say hi
847"#;
848        let graph = parse(yaml);
849        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
850        assert_eq!(steps.len(), 1);
851        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
852    }
853
854    #[test]
855    fn bash_step_classified_as_first_party() {
856        let yaml = r#"
857steps:
858  - bash: echo hi
859"#;
860        let graph = parse(yaml);
861        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
862        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
863    }
864
865    #[test]
866    fn task_step_classified_as_untrusted() {
867        let yaml = r#"
868steps:
869  - task: DotNetCoreCLI@2
870    inputs:
871      command: build
872"#;
873        let graph = parse(yaml);
874        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
875        assert_eq!(steps.len(), 1);
876        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
877    }
878
879    #[test]
880    fn dollar_paren_var_in_script_creates_secret() {
881        let yaml = r#"
882steps:
883  - script: |
884      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
885    displayName: Call API
886"#;
887        let graph = parse(yaml);
888        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
889        assert_eq!(secrets.len(), 1);
890        assert_eq!(secrets[0].name, "MY_API_TOKEN");
891    }
892
893    #[test]
894    fn predefined_ado_var_not_treated_as_secret() {
895        let yaml = r#"
896steps:
897  - script: |
898      echo $(Build.BuildId)
899      echo $(Agent.WorkFolder)
900      echo $(System.DefaultWorkingDirectory)
901    displayName: Print vars
902"#;
903        let graph = parse(yaml);
904        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
905        assert!(
906            secrets.is_empty(),
907            "predefined ADO vars should not be treated as secrets, got: {:?}",
908            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
909        );
910    }
911
912    #[test]
913    fn template_reference_creates_delegates_to_and_marks_partial() {
914        let yaml = r#"
915steps:
916  - template: steps/deploy.yml
917    parameters:
918      env: production
919"#;
920        let graph = parse(yaml);
921        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
922        assert_eq!(steps.len(), 1);
923
924        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
925        assert_eq!(images.len(), 1);
926        assert_eq!(images[0].name, "steps/deploy.yml");
927
928        let delegates: Vec<_> = graph
929            .edges_from(steps[0].id)
930            .filter(|e| e.kind == EdgeKind::DelegatesTo)
931            .collect();
932        assert_eq!(delegates.len(), 1);
933
934        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
935    }
936
937    #[test]
938    fn top_level_steps_no_jobs() {
939        let yaml = r#"
940steps:
941  - script: echo a
942  - script: echo b
943"#;
944        let graph = parse(yaml);
945        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
946        assert_eq!(steps.len(), 2);
947    }
948
949    #[test]
950    fn top_level_jobs_no_stages() {
951        let yaml = r#"
952jobs:
953  - job: JobA
954    steps:
955      - script: echo a
956  - job: JobB
957    steps:
958      - script: echo b
959"#;
960        let graph = parse(yaml);
961        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
962        assert_eq!(steps.len(), 2);
963    }
964
965    #[test]
966    fn stages_with_nested_jobs_parsed() {
967        let yaml = r#"
968stages:
969  - stage: Build
970    jobs:
971      - job: Compile
972        steps:
973          - script: cargo build
974  - stage: Test
975    jobs:
976      - job: UnitTest
977        steps:
978          - script: cargo test
979"#;
980        let graph = parse(yaml);
981        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
982        assert_eq!(steps.len(), 2);
983    }
984
985    #[test]
986    fn all_steps_linked_to_system_access_token() {
987        let yaml = r#"
988steps:
989  - script: echo a
990  - task: SomeTask@1
991    inputs: {}
992"#;
993        let graph = parse(yaml);
994        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
995        assert_eq!(token.len(), 1);
996        let token_id = token[0].id;
997
998        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
999        for step in &steps {
1000            let links: Vec<_> = graph
1001                .edges_from(step.id)
1002                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
1003                .collect();
1004            assert_eq!(
1005                links.len(),
1006                1,
1007                "step '{}' must link to System.AccessToken",
1008                step.name
1009            );
1010        }
1011    }
1012
1013    #[test]
1014    fn named_secret_variable_creates_secret_node() {
1015        let yaml = r#"
1016variables:
1017  - name: MY_PASSWORD
1018    value: dummy
1019    isSecret: true
1020
1021steps:
1022  - script: echo hi
1023"#;
1024        let graph = parse(yaml);
1025        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1026        assert_eq!(secrets.len(), 1);
1027        assert_eq!(secrets[0].name, "MY_PASSWORD");
1028    }
1029
1030    #[test]
1031    fn variables_as_mapping_parsed() {
1032        let yaml = r#"
1033variables:
1034  MY_VAR: hello
1035  ANOTHER_VAR: world
1036
1037steps:
1038  - script: echo hi
1039"#;
1040        let graph = parse(yaml);
1041        // Mapping-style variables without isSecret — no secret nodes created
1042        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1043        assert!(
1044            secrets.is_empty(),
1045            "plain mapping vars should not create secret nodes"
1046        );
1047    }
1048
1049    #[test]
1050    fn persist_credentials_creates_persists_to_edge() {
1051        let yaml = r#"
1052steps:
1053  - checkout: self
1054    persistCredentials: true
1055  - script: git push
1056"#;
1057        let graph = parse(yaml);
1058        let token_id = graph
1059            .nodes_of_kind(NodeKind::Identity)
1060            .find(|n| n.name == "System.AccessToken")
1061            .expect("System.AccessToken must exist")
1062            .id;
1063
1064        let persists_edges: Vec<_> = graph
1065            .edges
1066            .iter()
1067            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
1068            .collect();
1069        assert_eq!(
1070            persists_edges.len(),
1071            1,
1072            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
1073        );
1074    }
1075
1076    #[test]
1077    fn checkout_without_persist_credentials_no_persists_to_edge() {
1078        let yaml = r#"
1079steps:
1080  - checkout: self
1081  - script: echo hi
1082"#;
1083        let graph = parse(yaml);
1084        let persists_edges: Vec<_> = graph
1085            .edges
1086            .iter()
1087            .filter(|e| e.kind == EdgeKind::PersistsTo)
1088            .collect();
1089        assert!(
1090            persists_edges.is_empty(),
1091            "checkout without persistCredentials should not produce PersistsTo edge"
1092        );
1093    }
1094
1095    #[test]
1096    fn var_flag_secret_marked_as_cli_flag_exposed() {
1097        let yaml = r#"
1098steps:
1099  - script: |
1100      terraform apply \
1101        -var "db_password=$(db_password)" \
1102        -var "api_key=$(api_key)"
1103    displayName: Terraform apply
1104"#;
1105        let graph = parse(yaml);
1106        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1107        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
1108        for secret in &secrets {
1109            assert_eq!(
1110                secret.metadata.get(META_CLI_FLAG_EXPOSED),
1111                Some(&"true".to_string()),
1112                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
1113                secret.name
1114            );
1115        }
1116    }
1117
1118    #[test]
1119    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
1120        let yaml = r#"
1121steps:
1122  - script: |
1123      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
1124"#;
1125        let graph = parse(yaml);
1126        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1127        assert_eq!(secrets.len(), 1);
1128        assert!(
1129            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
1130            "non -var secret should not be marked as cli_flag_exposed"
1131        );
1132    }
1133
1134    #[test]
1135    fn step_linked_to_variable_group_secret() {
1136        let yaml = r#"
1137variables:
1138  - group: ProdSecrets
1139
1140steps:
1141  - script: deploy.sh
1142"#;
1143        let graph = parse(yaml);
1144        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1145        assert_eq!(secrets.len(), 1);
1146        let secret_id = secrets[0].id;
1147
1148        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1149        let links: Vec<_> = graph
1150            .edges_from(steps[0].id)
1151            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
1152            .collect();
1153        assert_eq!(
1154            links.len(),
1155            1,
1156            "step should be linked to variable group secret"
1157        );
1158    }
1159
1160    #[test]
1161    fn pr_trigger_sets_meta_trigger_on_graph() {
1162        let yaml = r#"
1163pr:
1164  - '*'
1165
1166steps:
1167  - script: echo hi
1168"#;
1169        let graph = parse(yaml);
1170        assert_eq!(
1171            graph.metadata.get(META_TRIGGER),
1172            Some(&"pr".to_string()),
1173            "ADO pr: trigger should set graph META_TRIGGER"
1174        );
1175    }
1176
1177    #[test]
1178    fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
1179        let yaml = r#"
1180pool:
1181  name: my-self-hosted-pool
1182
1183steps:
1184  - script: echo hi
1185"#;
1186        let graph = parse(yaml);
1187        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1188        assert_eq!(images.len(), 1);
1189        assert_eq!(images[0].name, "my-self-hosted-pool");
1190        assert_eq!(
1191            images[0].metadata.get(META_SELF_HOSTED),
1192            Some(&"true".to_string()),
1193            "pool.name without vmImage must be tagged self-hosted"
1194        );
1195    }
1196
1197    #[test]
1198    fn vm_image_pool_is_not_tagged_self_hosted() {
1199        let yaml = r#"
1200pool:
1201  vmImage: ubuntu-latest
1202
1203steps:
1204  - script: echo hi
1205"#;
1206        let graph = parse(yaml);
1207        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
1208        assert_eq!(images.len(), 1);
1209        assert_eq!(images[0].name, "ubuntu-latest");
1210        assert!(
1211            !images[0].metadata.contains_key(META_SELF_HOSTED),
1212            "pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
1213        );
1214    }
1215
1216    #[test]
1217    fn checkout_self_step_tagged_with_meta_checkout_self() {
1218        let yaml = r#"
1219steps:
1220  - checkout: self
1221  - script: echo hi
1222"#;
1223        let graph = parse(yaml);
1224        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1225        assert_eq!(steps.len(), 2);
1226        let checkout_step = steps
1227            .iter()
1228            .find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
1229            .expect("one step must be tagged META_CHECKOUT_SELF");
1230        assert_eq!(
1231            checkout_step.metadata.get(META_CHECKOUT_SELF),
1232            Some(&"true".to_string())
1233        );
1234    }
1235
1236    #[test]
1237    fn vso_setvariable_sets_meta_writes_env_gate() {
1238        let yaml = r###"
1239steps:
1240  - script: |
1241      echo "##vso[task.setvariable variable=FOO]bar"
1242    displayName: Set variable
1243"###;
1244        let graph = parse(yaml);
1245        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1246        assert_eq!(steps.len(), 1);
1247        assert_eq!(
1248            steps[0].metadata.get(META_WRITES_ENV_GATE),
1249            Some(&"true".to_string()),
1250            "##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
1251        );
1252    }
1253}