Skip to main content

taudit_parse_ado/
lib.rs

1use std::collections::{HashMap, HashSet};
2
3use serde::Deserialize;
4use taudit_core::error::TauditError;
5use taudit_core::graph::*;
6use taudit_core::ports::PipelineParser;
7
8/// Azure DevOps YAML pipeline parser.
9pub struct AdoParser;
10
11impl PipelineParser for AdoParser {
12    fn platform(&self) -> &str {
13        "azure-devops"
14    }
15
16    fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
17        let mut de = serde_yaml::Deserializer::from_str(content);
18        let doc = de
19            .next()
20            .ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
21        let pipeline: AdoPipeline = AdoPipeline::deserialize(doc)
22            .map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
23        let extra_docs = de.next().is_some();
24
25        let mut graph = AuthorityGraph::new(source.clone());
26        if extra_docs {
27            graph.mark_partial(
28                "file contains multiple YAML documents (--- separator) — only the first was analyzed".to_string(),
29            );
30        }
31        let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
32
33        // System.AccessToken is always present — equivalent to GITHUB_TOKEN.
34        let mut meta = HashMap::new();
35        meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
36        let token_id = graph.add_node_with_metadata(
37            NodeKind::Identity,
38            "System.AccessToken",
39            TrustZone::FirstParty,
40            meta,
41        );
42
43        // Pipeline-level variable groups and named secrets.
44        // plain_vars tracks non-secret named variables so $(VAR) refs in scripts
45        // don't generate false-positive Secret nodes for plain config values.
46        let mut plain_vars: HashSet<String> = HashSet::new();
47        let pipeline_secret_ids = process_variables(
48            &pipeline.variables,
49            &mut graph,
50            &mut secret_ids,
51            "pipeline",
52            &mut plain_vars,
53        );
54
55        // Determine pipeline structure: stages → jobs → steps, or jobs → steps, or steps only
56        if let Some(ref stages) = pipeline.stages {
57            for stage in stages {
58                // Stage-level template reference — delegate and mark Partial
59                if let Some(ref tpl) = stage.template {
60                    let stage_name = stage.stage.as_deref().unwrap_or("stage");
61                    add_template_delegation(stage_name, tpl, token_id, &mut graph);
62                    continue;
63                }
64
65                let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
66                let stage_secret_ids = process_variables(
67                    &stage.variables,
68                    &mut graph,
69                    &mut secret_ids,
70                    &stage_name,
71                    &mut plain_vars,
72                );
73
74                for job in &stage.jobs {
75                    let job_name = job.effective_name();
76                    let job_secret_ids = process_variables(
77                        &job.variables,
78                        &mut graph,
79                        &mut secret_ids,
80                        &job_name,
81                        &mut plain_vars,
82                    );
83
84                    let all_secrets: Vec<NodeId> = pipeline_secret_ids
85                        .iter()
86                        .chain(&stage_secret_ids)
87                        .chain(&job_secret_ids)
88                        .copied()
89                        .collect();
90
91                    process_steps(
92                        job.steps.as_deref().unwrap_or(&[]),
93                        &job_name,
94                        token_id,
95                        &all_secrets,
96                        &plain_vars,
97                        &mut graph,
98                        &mut secret_ids,
99                    );
100
101                    if let Some(ref tpl) = job.template {
102                        add_template_delegation(&job_name, tpl, token_id, &mut graph);
103                    }
104                }
105            }
106        } else if let Some(ref jobs) = pipeline.jobs {
107            for job in jobs {
108                let job_name = job.effective_name();
109                let job_secret_ids = process_variables(
110                    &job.variables,
111                    &mut graph,
112                    &mut secret_ids,
113                    &job_name,
114                    &mut plain_vars,
115                );
116
117                let all_secrets: Vec<NodeId> = pipeline_secret_ids
118                    .iter()
119                    .chain(&job_secret_ids)
120                    .copied()
121                    .collect();
122
123                process_steps(
124                    job.steps.as_deref().unwrap_or(&[]),
125                    &job_name,
126                    token_id,
127                    &all_secrets,
128                    &plain_vars,
129                    &mut graph,
130                    &mut secret_ids,
131                );
132
133                if let Some(ref tpl) = job.template {
134                    add_template_delegation(&job_name, tpl, token_id, &mut graph);
135                }
136            }
137        } else if let Some(ref steps) = pipeline.steps {
138            process_steps(
139                steps,
140                "pipeline",
141                token_id,
142                &pipeline_secret_ids,
143                &plain_vars,
144                &mut graph,
145                &mut secret_ids,
146            );
147        }
148
149        Ok(graph)
150    }
151}
152
153/// Process a variable list, creating Secret nodes and returning their IDs.
154/// Returns IDs for secrets only (not variable groups, which are opaque).
155/// Populates `plain_vars` with the names of non-secret named variables so
156/// downstream `$(VAR)` scanning can skip them.
157fn process_variables(
158    variables: &Option<AdoVariables>,
159    graph: &mut AuthorityGraph,
160    cache: &mut HashMap<String, NodeId>,
161    scope: &str,
162    plain_vars: &mut HashSet<String>,
163) -> Vec<NodeId> {
164    let mut ids = Vec::new();
165
166    let vars = match variables.as_ref() {
167        Some(v) => v,
168        None => return ids,
169    };
170
171    for var in &vars.0 {
172        match var {
173            AdoVariable::Group { group } => {
174                // Skip template-expression group names like `${{ parameters.env }}`.
175                // We can't resolve them statically — mark Partial but don't create
176                // a misleading Secret node with the expression as its name.
177                if group.contains("${{") {
178                    graph.mark_partial(format!(
179                        "variable group in {scope} uses template expression — group name unresolvable at parse time"
180                    ));
181                    continue;
182                }
183                let mut meta = HashMap::new();
184                meta.insert("variable_group".into(), "true".into());
185                let id = graph.add_node_with_metadata(
186                    NodeKind::Secret,
187                    group.as_str(),
188                    TrustZone::FirstParty,
189                    meta,
190                );
191                cache.insert(group.clone(), id);
192                ids.push(id);
193                graph.mark_partial(format!(
194                    "variable group '{group}' in {scope} — contents unresolvable without ADO API access"
195                ));
196            }
197            AdoVariable::Named {
198                name, is_secret, ..
199            } => {
200                if *is_secret {
201                    let id = find_or_create_secret(graph, cache, name);
202                    ids.push(id);
203                } else {
204                    plain_vars.insert(name.clone());
205                }
206            }
207        }
208    }
209
210    ids
211}
212
213/// Process a list of ADO steps, adding nodes and edges to the graph.
214fn process_steps(
215    steps: &[AdoStep],
216    job_name: &str,
217    token_id: NodeId,
218    inherited_secrets: &[NodeId],
219    plain_vars: &HashSet<String>,
220    graph: &mut AuthorityGraph,
221    cache: &mut HashMap<String, NodeId>,
222) {
223    for (idx, step) in steps.iter().enumerate() {
224        // Template step — delegation, mark partial
225        if let Some(ref tpl) = step.template {
226            let step_name = step
227                .display_name
228                .as_deref()
229                .or(step.name.as_deref())
230                .map(|s| s.to_string())
231                .unwrap_or_else(|| format!("{job_name}[{idx}]"));
232            add_template_delegation(&step_name, tpl, token_id, graph);
233            continue;
234        }
235
236        // Determine step kind and trust zone
237        let (step_name, trust_zone, inline_script) = classify_step(step, job_name, idx);
238
239        let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
240
241        // Every step has access to System.AccessToken
242        graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
243
244        // checkout step with persistCredentials: true writes the token to .git/config on disk,
245        // making it accessible to all subsequent steps and filesystem-level attackers.
246        if step.checkout.is_some() && step.persist_credentials == Some(true) {
247            graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
248        }
249
250        // Inherited pipeline/stage/job secrets
251        for &secret_id in inherited_secrets {
252            graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
253        }
254
255        // Service connection detection from task inputs (case-insensitive key match)
256        if let Some(ref inputs) = step.inputs {
257            let service_conn_keys = [
258                "azuresubscription",
259                "connectedservicename",
260                "connectedservicenamearm",
261                "kubernetesserviceconnection",
262            ];
263            for (raw_key, val) in inputs {
264                let lower = raw_key.to_lowercase();
265                if !service_conn_keys.contains(&lower.as_str()) {
266                    continue;
267                }
268                let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
269                if !conn_name.starts_with("$(") {
270                    let mut meta = HashMap::new();
271                    meta.insert("service_connection".into(), "true".into());
272                    meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
273                    let conn_id = graph.add_node_with_metadata(
274                        NodeKind::Identity,
275                        conn_name,
276                        TrustZone::FirstParty,
277                        meta,
278                    );
279                    graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
280                }
281            }
282
283            // Detect $(varName) references in task input values
284            for val in inputs.values() {
285                if let Some(s) = yaml_value_as_str(val) {
286                    extract_dollar_paren_secrets(s, step_id, plain_vars, graph, cache);
287                }
288            }
289        }
290
291        // Detect $(varName) in step env values
292        if let Some(ref env) = step.env {
293            for val in env.values() {
294                extract_dollar_paren_secrets(val, step_id, plain_vars, graph, cache);
295            }
296        }
297
298        // Detect $(varName) in inline script text
299        if let Some(ref script) = inline_script {
300            extract_dollar_paren_secrets(script, step_id, plain_vars, graph, cache);
301        }
302    }
303}
304
305/// Classify an ADO step, returning (name, trust_zone, inline_script_text).
306fn classify_step(
307    step: &AdoStep,
308    job_name: &str,
309    idx: usize,
310) -> (String, TrustZone, Option<String>) {
311    let default_name = || format!("{job_name}[{idx}]");
312
313    let name = step
314        .display_name
315        .as_deref()
316        .or(step.name.as_deref())
317        .map(|s| s.to_string())
318        .unwrap_or_else(default_name);
319
320    if step.task.is_some() {
321        (name, TrustZone::Untrusted, None)
322    } else if let Some(ref s) = step.script {
323        (name, TrustZone::FirstParty, Some(s.clone()))
324    } else if let Some(ref s) = step.bash {
325        (name, TrustZone::FirstParty, Some(s.clone()))
326    } else if let Some(ref s) = step.powershell {
327        (name, TrustZone::FirstParty, Some(s.clone()))
328    } else if let Some(ref s) = step.pwsh {
329        (name, TrustZone::FirstParty, Some(s.clone()))
330    } else {
331        (name, TrustZone::FirstParty, None)
332    }
333}
334
335/// Add a DelegatesTo edge from a synthetic step node to a template image node.
336fn add_template_delegation(
337    step_name: &str,
338    template_path: &str,
339    token_id: NodeId,
340    graph: &mut AuthorityGraph,
341) {
342    let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
343    let tpl_id = graph.add_node(NodeKind::Image, template_path, TrustZone::Untrusted);
344    graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
345    graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
346    graph.mark_partial(format!(
347        "template '{template_path}' cannot be resolved inline — authority within the template is unknown"
348    ));
349}
350
351/// Extract `$(varName)` references from a string, creating Secret nodes for
352/// non-predefined and non-plain ADO variables.
353/// Only content that is a valid ADO variable identifier (`[A-Za-z][A-Za-z0-9_]*`)
354/// is treated as a variable reference. This rejects PowerShell sub-expressions
355/// (`$($var)`), ADO template expressions (`${{ ... }}`), shell commands (`$(date)`),
356/// and anything with spaces or special characters.
357fn extract_dollar_paren_secrets(
358    text: &str,
359    step_id: NodeId,
360    plain_vars: &HashSet<String>,
361    graph: &mut AuthorityGraph,
362    cache: &mut HashMap<String, NodeId>,
363) {
364    let mut pos = 0;
365    let bytes = text.as_bytes();
366    while pos < bytes.len() {
367        if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
368            let start = pos + 2;
369            if let Some(end_offset) = text[start..].find(')') {
370                let var_name = &text[start..start + end_offset];
371                if is_valid_ado_identifier(var_name)
372                    && !is_predefined_ado_var(var_name)
373                    && !plain_vars.contains(var_name)
374                {
375                    let id = find_or_create_secret(graph, cache, var_name);
376                    // Mark secrets embedded in -var flag arguments: their values appear in
377                    // pipeline logs (command string is logged before masking, and Terraform
378                    // itself logs -var values in plan output and debug traces).
379                    if is_in_terraform_var_flag(text, pos) {
380                        if let Some(node) = graph.nodes.get_mut(id) {
381                            node.metadata
382                                .insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
383                        }
384                    }
385                    graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
386                }
387                pos = start + end_offset + 1;
388                continue;
389            }
390        }
391        pos += 1;
392    }
393}
394
395/// Returns true if the `$(VAR)` at `var_pos` is inside a Terraform `-var` flag argument.
396/// Pattern: the line before `$(VAR)` contains `-var` and `=`, indicating `-var "key=$(VAR)"`.
397fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
398    let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
399    let line_before = &text[line_start..var_pos];
400    // Must contain -var (the flag) and = (the key=value assignment)
401    line_before.contains("-var") && line_before.contains('=')
402}
403
404/// Returns true if `name` is a valid ADO variable identifier.
405/// ADO variable names start with a letter and contain only letters, digits,
406/// and underscores. Anything else — PowerShell vars (`$name`), template
407/// expressions (`{{ ... }}`), shell commands (`date`), or complex expressions
408/// (`name -join ','`) — is rejected.
409fn is_valid_ado_identifier(name: &str) -> bool {
410    let mut chars = name.chars();
411    match chars.next() {
412        Some(first) if first.is_ascii_alphabetic() => {
413            chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
414        }
415        _ => false,
416    }
417}
418
419/// Returns true if a variable name is a well-known ADO predefined variable.
420/// These are system-provided and never represent secrets.
421fn is_predefined_ado_var(name: &str) -> bool {
422    let prefixes = [
423        "Build.",
424        "Agent.",
425        "System.",
426        "Pipeline.",
427        "Release.",
428        "Environment.",
429        "Strategy.",
430        "Deployment.",
431        "Resources.",
432        "TF_BUILD",
433    ];
434    prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
435}
436
437fn find_or_create_secret(
438    graph: &mut AuthorityGraph,
439    cache: &mut HashMap<String, NodeId>,
440    name: &str,
441) -> NodeId {
442    if let Some(&id) = cache.get(name) {
443        return id;
444    }
445    let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
446    cache.insert(name.to_string(), id);
447    id
448}
449
450fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
451    val.as_str()
452}
453
454// ── Serde models for ADO YAML ─────────────────────────────
455
456/// Top-level ADO pipeline definition.
457/// ADO pipelines come in three shapes:
458///   (a) stages → jobs → steps
459///   (b) jobs → steps (no stages key)
460///   (c) steps only (no stages or jobs key)
461#[derive(Debug, Deserialize)]
462pub struct AdoPipeline {
463    #[serde(default)]
464    pub trigger: Option<serde_yaml::Value>,
465    #[serde(default)]
466    pub pr: Option<serde_yaml::Value>,
467    #[serde(default)]
468    pub variables: Option<AdoVariables>,
469    #[serde(default)]
470    pub stages: Option<Vec<AdoStage>>,
471    #[serde(default)]
472    pub jobs: Option<Vec<AdoJob>>,
473    #[serde(default)]
474    pub steps: Option<Vec<AdoStep>>,
475    #[serde(default)]
476    pub pool: Option<serde_yaml::Value>,
477}
478
479#[derive(Debug, Deserialize)]
480pub struct AdoStage {
481    /// Stage identifier. Absent when the stage entry is a template reference.
482    #[serde(default)]
483    pub stage: Option<String>,
484    /// Stage-level template reference (`- template: path/to/stage.yml`).
485    #[serde(default)]
486    pub template: Option<String>,
487    #[serde(default)]
488    pub variables: Option<AdoVariables>,
489    #[serde(default)]
490    pub jobs: Vec<AdoJob>,
491}
492
493#[derive(Debug, Deserialize)]
494pub struct AdoJob {
495    /// Regular job identifier
496    #[serde(default)]
497    pub job: Option<String>,
498    /// Deployment job identifier
499    #[serde(default)]
500    pub deployment: Option<String>,
501    #[serde(default)]
502    pub variables: Option<AdoVariables>,
503    #[serde(default)]
504    pub steps: Option<Vec<AdoStep>>,
505    #[serde(default)]
506    pub pool: Option<serde_yaml::Value>,
507    /// Job-level template reference
508    #[serde(default)]
509    pub template: Option<String>,
510}
511
512impl AdoJob {
513    pub fn effective_name(&self) -> String {
514        self.job
515            .as_deref()
516            .or(self.deployment.as_deref())
517            .unwrap_or("job")
518            .to_string()
519    }
520}
521
522#[derive(Debug, Deserialize)]
523pub struct AdoStep {
524    /// Task reference e.g. `AzureCLI@2`
525    #[serde(default)]
526    pub task: Option<String>,
527    /// Inline script (cmd/sh)
528    #[serde(default)]
529    pub script: Option<String>,
530    /// Inline bash script
531    #[serde(default)]
532    pub bash: Option<String>,
533    /// Inline PowerShell script
534    #[serde(default)]
535    pub powershell: Option<String>,
536    /// Cross-platform PowerShell
537    #[serde(default)]
538    pub pwsh: Option<String>,
539    /// Step-level template reference
540    #[serde(default)]
541    pub template: Option<String>,
542    #[serde(rename = "displayName", default)]
543    pub display_name: Option<String>,
544    /// Legacy name alias
545    #[serde(default)]
546    pub name: Option<String>,
547    #[serde(default)]
548    pub env: Option<HashMap<String, String>>,
549    /// Task inputs (key → value, but values may be nested)
550    #[serde(default)]
551    pub inputs: Option<HashMap<String, serde_yaml::Value>>,
552    /// Checkout step target (e.g. `self`, a repo alias, or `none`)
553    #[serde(default)]
554    pub checkout: Option<String>,
555    /// When true on a checkout step, writes credentials to .git/config for subsequent steps.
556    #[serde(rename = "persistCredentials", default)]
557    pub persist_credentials: Option<bool>,
558}
559
560/// ADO `variables:` block. Can be a sequence (list of group/name-value entries)
561/// or a mapping (variableName: value). We normalise both into a Vec<AdoVariable>.
562#[derive(Debug, Default)]
563pub struct AdoVariables(pub Vec<AdoVariable>);
564
565impl<'de> serde::Deserialize<'de> for AdoVariables {
566    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
567    where
568        D: serde::Deserializer<'de>,
569    {
570        let raw = serde_yaml::Value::deserialize(deserializer)?;
571        let mut vars = Vec::new();
572
573        match raw {
574            serde_yaml::Value::Sequence(seq) => {
575                for item in seq {
576                    if let Some(map) = item.as_mapping() {
577                        if let Some(group_val) = map.get("group") {
578                            if let Some(group) = group_val.as_str() {
579                                vars.push(AdoVariable::Group {
580                                    group: group.to_string(),
581                                });
582                                continue;
583                            }
584                        }
585                        let name = map
586                            .get("name")
587                            .and_then(|v| v.as_str())
588                            .unwrap_or("")
589                            .to_string();
590                        let value = map
591                            .get("value")
592                            .and_then(|v| v.as_str())
593                            .unwrap_or("")
594                            .to_string();
595                        let is_secret = map
596                            .get("isSecret")
597                            .and_then(|v| v.as_bool())
598                            .unwrap_or(false);
599                        vars.push(AdoVariable::Named {
600                            name,
601                            value,
602                            is_secret,
603                        });
604                    }
605                }
606            }
607            serde_yaml::Value::Mapping(map) => {
608                for (k, v) in map {
609                    let name = k.as_str().unwrap_or("").to_string();
610                    let value = v.as_str().unwrap_or("").to_string();
611                    vars.push(AdoVariable::Named {
612                        name,
613                        value,
614                        is_secret: false,
615                    });
616                }
617            }
618            _ => {}
619        }
620
621        Ok(AdoVariables(vars))
622    }
623}
624
625#[derive(Debug)]
626pub enum AdoVariable {
627    Group {
628        group: String,
629    },
630    Named {
631        name: String,
632        value: String,
633        is_secret: bool,
634    },
635}
636
637#[cfg(test)]
638mod tests {
639    use super::*;
640
641    fn parse(yaml: &str) -> AuthorityGraph {
642        let parser = AdoParser;
643        let source = PipelineSource {
644            file: "azure-pipelines.yml".into(),
645            repo: None,
646            git_ref: None,
647        };
648        parser.parse(yaml, &source).unwrap()
649    }
650
651    #[test]
652    fn parses_simple_pipeline() {
653        let yaml = r#"
654trigger:
655  - main
656
657jobs:
658  - job: Build
659    steps:
660      - script: echo hello
661        displayName: Say hello
662"#;
663        let graph = parse(yaml);
664        assert!(graph.nodes.len() >= 2); // System.AccessToken + step
665    }
666
667    #[test]
668    fn system_access_token_created() {
669        let yaml = r#"
670steps:
671  - script: echo hi
672"#;
673        let graph = parse(yaml);
674        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
675        assert_eq!(identities.len(), 1);
676        assert_eq!(identities[0].name, "System.AccessToken");
677        assert_eq!(
678            identities[0].metadata.get(META_IDENTITY_SCOPE),
679            Some(&"broad".to_string())
680        );
681    }
682
683    #[test]
684    fn variable_group_creates_secret_and_marks_partial() {
685        let yaml = r#"
686variables:
687  - group: MySecretGroup
688
689steps:
690  - script: echo hi
691"#;
692        let graph = parse(yaml);
693        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
694        assert_eq!(secrets.len(), 1);
695        assert_eq!(secrets[0].name, "MySecretGroup");
696        assert_eq!(
697            secrets[0].metadata.get("variable_group"),
698            Some(&"true".to_string())
699        );
700        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
701        assert!(
702            graph
703                .completeness_gaps
704                .iter()
705                .any(|g| g.contains("MySecretGroup")),
706            "completeness gap should name the variable group"
707        );
708    }
709
710    #[test]
711    fn task_with_azure_subscription_creates_service_connection_identity() {
712        let yaml = r#"
713steps:
714  - task: AzureCLI@2
715    displayName: Deploy to Azure
716    inputs:
717      azureSubscription: MyServiceConnection
718      scriptType: bash
719      inlineScript: az group list
720"#;
721        let graph = parse(yaml);
722        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
723        // System.AccessToken + service connection
724        assert_eq!(identities.len(), 2);
725        let conn = identities
726            .iter()
727            .find(|i| i.name == "MyServiceConnection")
728            .unwrap();
729        assert_eq!(
730            conn.metadata.get("service_connection"),
731            Some(&"true".to_string())
732        );
733        assert_eq!(
734            conn.metadata.get(META_IDENTITY_SCOPE),
735            Some(&"broad".to_string())
736        );
737    }
738
739    #[test]
740    fn task_with_connected_service_name_creates_identity() {
741        let yaml = r#"
742steps:
743  - task: SqlAzureDacpacDeployment@1
744    inputs:
745      ConnectedServiceNameARM: MySqlConnection
746"#;
747        let graph = parse(yaml);
748        let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
749        assert!(
750            identities.iter().any(|i| i.name == "MySqlConnection"),
751            "connectedServiceNameARM should create identity"
752        );
753    }
754
755    #[test]
756    fn script_step_classified_as_first_party() {
757        let yaml = r#"
758steps:
759  - script: echo hi
760    displayName: Say hi
761"#;
762        let graph = parse(yaml);
763        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
764        assert_eq!(steps.len(), 1);
765        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
766    }
767
768    #[test]
769    fn bash_step_classified_as_first_party() {
770        let yaml = r#"
771steps:
772  - bash: echo hi
773"#;
774        let graph = parse(yaml);
775        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
776        assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
777    }
778
779    #[test]
780    fn task_step_classified_as_untrusted() {
781        let yaml = r#"
782steps:
783  - task: DotNetCoreCLI@2
784    inputs:
785      command: build
786"#;
787        let graph = parse(yaml);
788        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
789        assert_eq!(steps.len(), 1);
790        assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
791    }
792
793    #[test]
794    fn dollar_paren_var_in_script_creates_secret() {
795        let yaml = r#"
796steps:
797  - script: |
798      curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
799    displayName: Call API
800"#;
801        let graph = parse(yaml);
802        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
803        assert_eq!(secrets.len(), 1);
804        assert_eq!(secrets[0].name, "MY_API_TOKEN");
805    }
806
807    #[test]
808    fn predefined_ado_var_not_treated_as_secret() {
809        let yaml = r#"
810steps:
811  - script: |
812      echo $(Build.BuildId)
813      echo $(Agent.WorkFolder)
814      echo $(System.DefaultWorkingDirectory)
815    displayName: Print vars
816"#;
817        let graph = parse(yaml);
818        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
819        assert!(
820            secrets.is_empty(),
821            "predefined ADO vars should not be treated as secrets, got: {:?}",
822            secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
823        );
824    }
825
826    #[test]
827    fn template_reference_creates_delegates_to_and_marks_partial() {
828        let yaml = r#"
829steps:
830  - template: steps/deploy.yml
831    parameters:
832      env: production
833"#;
834        let graph = parse(yaml);
835        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
836        assert_eq!(steps.len(), 1);
837
838        let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
839        assert_eq!(images.len(), 1);
840        assert_eq!(images[0].name, "steps/deploy.yml");
841
842        let delegates: Vec<_> = graph
843            .edges_from(steps[0].id)
844            .filter(|e| e.kind == EdgeKind::DelegatesTo)
845            .collect();
846        assert_eq!(delegates.len(), 1);
847
848        assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
849    }
850
851    #[test]
852    fn top_level_steps_no_jobs() {
853        let yaml = r#"
854steps:
855  - script: echo a
856  - script: echo b
857"#;
858        let graph = parse(yaml);
859        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
860        assert_eq!(steps.len(), 2);
861    }
862
863    #[test]
864    fn top_level_jobs_no_stages() {
865        let yaml = r#"
866jobs:
867  - job: JobA
868    steps:
869      - script: echo a
870  - job: JobB
871    steps:
872      - script: echo b
873"#;
874        let graph = parse(yaml);
875        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
876        assert_eq!(steps.len(), 2);
877    }
878
879    #[test]
880    fn stages_with_nested_jobs_parsed() {
881        let yaml = r#"
882stages:
883  - stage: Build
884    jobs:
885      - job: Compile
886        steps:
887          - script: cargo build
888  - stage: Test
889    jobs:
890      - job: UnitTest
891        steps:
892          - script: cargo test
893"#;
894        let graph = parse(yaml);
895        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
896        assert_eq!(steps.len(), 2);
897    }
898
899    #[test]
900    fn all_steps_linked_to_system_access_token() {
901        let yaml = r#"
902steps:
903  - script: echo a
904  - task: SomeTask@1
905    inputs: {}
906"#;
907        let graph = parse(yaml);
908        let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
909        assert_eq!(token.len(), 1);
910        let token_id = token[0].id;
911
912        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
913        for step in &steps {
914            let links: Vec<_> = graph
915                .edges_from(step.id)
916                .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
917                .collect();
918            assert_eq!(
919                links.len(),
920                1,
921                "step '{}' must link to System.AccessToken",
922                step.name
923            );
924        }
925    }
926
927    #[test]
928    fn named_secret_variable_creates_secret_node() {
929        let yaml = r#"
930variables:
931  - name: MY_PASSWORD
932    value: dummy
933    isSecret: true
934
935steps:
936  - script: echo hi
937"#;
938        let graph = parse(yaml);
939        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
940        assert_eq!(secrets.len(), 1);
941        assert_eq!(secrets[0].name, "MY_PASSWORD");
942    }
943
944    #[test]
945    fn variables_as_mapping_parsed() {
946        let yaml = r#"
947variables:
948  MY_VAR: hello
949  ANOTHER_VAR: world
950
951steps:
952  - script: echo hi
953"#;
954        let graph = parse(yaml);
955        // Mapping-style variables without isSecret — no secret nodes created
956        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
957        assert!(
958            secrets.is_empty(),
959            "plain mapping vars should not create secret nodes"
960        );
961    }
962
963    #[test]
964    fn persist_credentials_creates_persists_to_edge() {
965        let yaml = r#"
966steps:
967  - checkout: self
968    persistCredentials: true
969  - script: git push
970"#;
971        let graph = parse(yaml);
972        let token_id = graph
973            .nodes_of_kind(NodeKind::Identity)
974            .find(|n| n.name == "System.AccessToken")
975            .expect("System.AccessToken must exist")
976            .id;
977
978        let persists_edges: Vec<_> = graph
979            .edges
980            .iter()
981            .filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
982            .collect();
983        assert_eq!(
984            persists_edges.len(),
985            1,
986            "checkout with persistCredentials: true must produce exactly one PersistsTo edge"
987        );
988    }
989
990    #[test]
991    fn checkout_without_persist_credentials_no_persists_to_edge() {
992        let yaml = r#"
993steps:
994  - checkout: self
995  - script: echo hi
996"#;
997        let graph = parse(yaml);
998        let persists_edges: Vec<_> = graph
999            .edges
1000            .iter()
1001            .filter(|e| e.kind == EdgeKind::PersistsTo)
1002            .collect();
1003        assert!(
1004            persists_edges.is_empty(),
1005            "checkout without persistCredentials should not produce PersistsTo edge"
1006        );
1007    }
1008
1009    #[test]
1010    fn var_flag_secret_marked_as_cli_flag_exposed() {
1011        let yaml = r#"
1012steps:
1013  - script: |
1014      terraform apply \
1015        -var "db_password=$(db_password)" \
1016        -var "api_key=$(api_key)"
1017    displayName: Terraform apply
1018"#;
1019        let graph = parse(yaml);
1020        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1021        assert!(!secrets.is_empty(), "should detect secrets from -var flags");
1022        for secret in &secrets {
1023            assert_eq!(
1024                secret.metadata.get(META_CLI_FLAG_EXPOSED),
1025                Some(&"true".to_string()),
1026                "secret '{}' passed via -var flag should be marked cli_flag_exposed",
1027                secret.name
1028            );
1029        }
1030    }
1031
1032    #[test]
1033    fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
1034        let yaml = r#"
1035steps:
1036  - script: |
1037      curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
1038"#;
1039        let graph = parse(yaml);
1040        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1041        assert_eq!(secrets.len(), 1);
1042        assert!(
1043            !secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
1044            "non -var secret should not be marked as cli_flag_exposed"
1045        );
1046    }
1047
1048    #[test]
1049    fn step_linked_to_variable_group_secret() {
1050        let yaml = r#"
1051variables:
1052  - group: ProdSecrets
1053
1054steps:
1055  - script: deploy.sh
1056"#;
1057        let graph = parse(yaml);
1058        let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
1059        assert_eq!(secrets.len(), 1);
1060        let secret_id = secrets[0].id;
1061
1062        let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
1063        let links: Vec<_> = graph
1064            .edges_from(steps[0].id)
1065            .filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
1066            .collect();
1067        assert_eq!(
1068            links.len(),
1069            1,
1070            "step should be linked to variable group secret"
1071        );
1072    }
1073}