use std::collections::{HashMap, HashSet};
use base64::Engine;
use serde::Deserialize;
use taudit_core::error::TauditError;
use taudit_core::graph::*;
use taudit_core::ports::PipelineParser;
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct AdoParserContext {
pub org: Option<String>,
pub project: Option<String>,
pub pat: Option<String>,
}
impl AdoParserContext {
fn is_empty(&self) -> bool {
self.org.is_none() && self.project.is_none() && self.pat.is_none()
}
}
const META_ADO_ORG: &str = "ado_org";
const META_ADO_PROJECT: &str = "ado_project";
const META_ADO_PAT_PRESENT: &str = "ado_pat_present";
const META_ADO_VG_ENRICHMENT_READY: &str = "ado_variable_group_enrichment_ready";
const META_ADO_VG_ENRICHED: &str = "ado_variable_group_enriched";
type AdoVariableGroupIndex = HashMap<String, HashMap<String, bool>>;
fn script_does_terraform_auto_apply(s: &str) -> bool {
let lines: Vec<&str> = s.lines().collect();
for (i, raw_line) in lines.iter().enumerate() {
let line = raw_line.split('#').next().unwrap_or("");
if !(line.contains("terraform apply") || line.contains("terraform\tapply")) {
continue;
}
if line.contains("auto-approve") {
return true;
}
let mut continuing = line.trim_end().ends_with('\\') || line.trim_end().ends_with('`');
let mut j = i + 1;
while continuing && j < lines.len() && j < i + 4 {
let next = lines[j].split('#').next().unwrap_or("");
if next.contains("auto-approve") {
return true;
}
continuing = next.trim_end().ends_with('\\') || next.trim_end().ends_with('`');
j += 1;
}
}
false
}
pub struct AdoParser;
impl AdoParser {
pub fn parse_with_context(
&self,
content: &str,
source: &PipelineSource,
ctx: Option<&AdoParserContext>,
) -> Result<AuthorityGraph, TauditError> {
let mut de = serde_yaml::Deserializer::from_str(content);
let doc = de
.next()
.ok_or_else(|| TauditError::Parse("empty YAML document".into()))?;
let pipeline: AdoPipeline = match AdoPipeline::deserialize(doc) {
Ok(p) => p,
Err(e) => {
let msg = e.to_string();
if msg.contains("invalid type: sequence, expected struct AdoPipeline") {
if let Some(recovered) = recover_after_leading_root_sequence(content) {
let pipeline: AdoPipeline = serde_yaml::from_str(recovered)
.map_err(|e| TauditError::Parse(format!("YAML parse error: {e}")))?;
let mut graph = build_ado_graph(pipeline, false, source, content, ctx);
graph.mark_partial(
GapKind::Structural,
"ADO file starts with a root-level sequence before the pipeline mapping — recovered by analyzing the later pipeline mapping only".to_string(),
);
graph.stamp_edge_authority_summaries();
return Ok(graph);
}
}
let looks_like_template_fragment = (msg.contains("did not find expected key")
|| (msg.contains("parameters")
&& msg.contains("invalid type: map")
&& msg.contains("expected a sequence")))
&& has_root_parameter_conditional(content);
if looks_like_template_fragment {
let mut graph = AuthorityGraph::new(source.clone());
graph
.metadata
.insert(META_PLATFORM.into(), "azure-devops".into());
apply_parser_context_metadata(&mut graph, ctx);
graph.mark_partial(
GapKind::Structural,
"ADO template fragment with top-level parameter conditional — root structure depends on parent pipeline context".to_string(),
);
graph.stamp_edge_authority_summaries();
return Ok(graph);
}
return Err(TauditError::Parse(format!("YAML parse error: {e}")));
}
};
let extra_docs = de.next().is_some();
let mut graph = build_ado_graph(pipeline, extra_docs, source, content, ctx);
graph.stamp_edge_authority_summaries();
Ok(graph)
}
}
impl PipelineParser for AdoParser {
fn platform(&self) -> &str {
"azure-devops"
}
fn parse(&self, content: &str, source: &PipelineSource) -> Result<AuthorityGraph, TauditError> {
self.parse_with_context(content, source, None)
}
}
fn build_ado_graph(
pipeline: AdoPipeline,
extra_docs: bool,
source: &PipelineSource,
content: &str,
ctx: Option<&AdoParserContext>,
) -> AuthorityGraph {
let mut graph = AuthorityGraph::new(source.clone());
graph
.metadata
.insert(META_PLATFORM.into(), "azure-devops".into());
apply_parser_context_metadata(&mut graph, ctx);
if extra_docs {
graph.mark_partial(
GapKind::Expression,
"file contains multiple YAML documents (--- separator) — only the first was analyzed"
.to_string(),
);
}
mark_unresolved_top_level_carriers(content, &mut graph);
let has_pr_trigger = pipeline
.pr
.as_ref()
.map(|v| v.is_mapping() || v.is_sequence())
.unwrap_or(false);
if has_pr_trigger {
graph.metadata.insert(META_TRIGGER.into(), "pr".into());
}
process_repositories(&pipeline, content, &mut graph);
if let Some(ref params) = pipeline.parameters {
for p in params {
let name = match p.name.as_ref() {
Some(n) if !n.is_empty() => n.clone(),
_ => continue,
};
let param_type = p.param_type.clone().unwrap_or_default();
let has_values_allowlist = p.values.as_ref().map(|v| !v.is_empty()).unwrap_or(false);
graph.parameters.insert(
name,
ParamSpec {
param_type,
has_values_allowlist,
},
);
}
}
let mut secret_ids: HashMap<String, NodeId> = HashMap::new();
let mut meta = HashMap::new();
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
meta.insert(META_IMPLICIT.into(), "true".into());
let token_id = graph.add_node_with_metadata(
NodeKind::Identity,
"System.AccessToken",
TrustZone::FirstParty,
meta,
);
if let Some(ref perms_val) = pipeline.permissions {
if !ado_permissions_are_broad(perms_val) {
let perms_str = ado_permissions_display(perms_val);
graph.nodes[token_id]
.metadata
.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
graph.nodes[token_id]
.metadata
.insert(META_PERMISSIONS.into(), perms_str);
}
}
process_pool(&pipeline.pool, &pipeline.workspace, &mut graph);
let mut pipeline_plain_vars: HashSet<String> = HashSet::new();
let mut pipeline_has_variable_groups = false;
let variable_group_index = maybe_fetch_variable_group_index(ctx, &mut graph);
let pipeline_secret_ids = process_variables(
&pipeline.variables,
&mut graph,
&mut secret_ids,
"pipeline",
&mut pipeline_plain_vars,
&mut pipeline_has_variable_groups,
variable_group_index.as_ref(),
);
if let Some(ref stages) = pipeline.stages {
for stage in stages {
if let Some(ref tpl) = stage.template {
let stage_name = stage.stage.as_deref().unwrap_or("stage");
add_template_delegation(stage_name, tpl, token_id, None, &mut graph);
continue;
}
let stage_name = stage.stage.as_deref().unwrap_or("stage").to_string();
let mut stage_plain_vars = pipeline_plain_vars.clone();
let mut stage_has_variable_groups = false;
let stage_secret_ids = process_variables(
&stage.variables,
&mut graph,
&mut secret_ids,
&stage_name,
&mut stage_plain_vars,
&mut stage_has_variable_groups,
variable_group_index.as_ref(),
);
let stage_scope_has_variable_groups =
pipeline_has_variable_groups || stage_has_variable_groups;
let stage_condition = non_empty_condition(&stage.condition);
if let Some(c) = stage_condition {
mark_condition_partial(&mut graph, "stage", &stage_name, c);
}
let stage_depends_on =
explicit_depends_on_csv(&stage.depends_on, &mut graph, "stage", &stage_name);
for job in &stage.jobs {
let job_name = job.effective_name();
let mut job_plain_vars = stage_plain_vars.clone();
let mut job_has_variable_groups = false;
let job_secret_ids = process_variables(
&job.variables,
&mut graph,
&mut secret_ids,
&job_name,
&mut job_plain_vars,
&mut job_has_variable_groups,
variable_group_index.as_ref(),
);
let step_scope_has_variable_groups =
stage_scope_has_variable_groups || job_has_variable_groups;
let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
let all_secrets: Vec<NodeId> = pipeline_secret_ids
.iter()
.chain(&stage_secret_ids)
.chain(&job_secret_ids)
.copied()
.collect();
let steps_start = graph.nodes.len();
let job_condition = non_empty_condition(&job.condition);
if let Some(c) = job_condition {
mark_condition_partial(&mut graph, "job", &job_name, c);
}
let job_depends_on =
explicit_depends_on_csv(&job.depends_on, &mut graph, "job", &job_name)
.or_else(|| stage_depends_on.clone());
let outer_condition = join_conditions(stage_condition, job_condition);
let job_steps = job.all_steps();
process_steps(
&job_steps,
&job_name,
token_id,
&all_secrets,
&job_plain_vars,
step_scope_has_variable_groups,
outer_condition.as_deref(),
job_depends_on.as_deref(),
&mut graph,
&mut secret_ids,
);
if let Some(ref tpl) = job.template {
add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
}
if job.has_environment_binding() {
tag_job_steps_env_approval(&mut graph, steps_start);
}
}
}
} else if let Some(ref jobs) = pipeline.jobs {
for job in jobs {
let job_name = job.effective_name();
let mut job_plain_vars = pipeline_plain_vars.clone();
let mut job_has_variable_groups = false;
let job_secret_ids = process_variables(
&job.variables,
&mut graph,
&mut secret_ids,
&job_name,
&mut job_plain_vars,
&mut job_has_variable_groups,
variable_group_index.as_ref(),
);
let step_scope_has_variable_groups =
pipeline_has_variable_groups || job_has_variable_groups;
let effective_workspace = job.workspace.as_ref().or(pipeline.workspace.as_ref());
process_pool(&job.pool, &effective_workspace.cloned(), &mut graph);
let all_secrets: Vec<NodeId> = pipeline_secret_ids
.iter()
.chain(&job_secret_ids)
.copied()
.collect();
let steps_start = graph.nodes.len();
let job_condition = non_empty_condition(&job.condition);
if let Some(c) = job_condition {
mark_condition_partial(&mut graph, "job", &job_name, c);
}
let job_depends_on =
explicit_depends_on_csv(&job.depends_on, &mut graph, "job", &job_name);
let job_steps = job.all_steps();
process_steps(
&job_steps,
&job_name,
token_id,
&all_secrets,
&job_plain_vars,
step_scope_has_variable_groups,
job_condition,
job_depends_on.as_deref(),
&mut graph,
&mut secret_ids,
);
if let Some(ref tpl) = job.template {
add_template_delegation(&job_name, tpl, token_id, Some(&job_name), &mut graph);
}
if job.has_environment_binding() {
tag_job_steps_env_approval(&mut graph, steps_start);
}
}
} else if let Some(ref steps) = pipeline.steps {
process_steps(
steps,
"pipeline",
token_id,
&pipeline_secret_ids,
&pipeline_plain_vars,
pipeline_has_variable_groups,
None,
None,
&mut graph,
&mut secret_ids,
);
}
let step_count = graph
.nodes
.iter()
.filter(|n| n.kind == NodeKind::Step)
.count();
let had_step_carrier = pipeline.stages.as_ref().is_some_and(|s| !s.is_empty())
|| pipeline.jobs.as_ref().is_some_and(|j| !j.is_empty())
|| pipeline.steps.as_ref().is_some_and(|s| !s.is_empty());
if step_count == 0 && had_step_carrier {
graph.mark_partial(
GapKind::Structural,
"stages/jobs/steps parsed but produced 0 step nodes — possible non-ADO YAML wrong-platform-classified".to_string(),
);
}
graph.stamp_edge_authority_summaries();
graph
}
fn apply_parser_context_metadata(graph: &mut AuthorityGraph, ctx: Option<&AdoParserContext>) {
let Some(ctx) = ctx.filter(|c| !c.is_empty()) else {
return;
};
if let Some(org) = ctx.org.as_ref().filter(|v| !v.trim().is_empty()) {
graph
.metadata
.insert(META_ADO_ORG.into(), org.trim().to_string());
}
if let Some(project) = ctx.project.as_ref().filter(|v| !v.trim().is_empty()) {
graph
.metadata
.insert(META_ADO_PROJECT.into(), project.trim().to_string());
}
let pat_present = ctx.pat.as_ref().is_some_and(|v| !v.trim().is_empty());
graph
.metadata
.insert(META_ADO_PAT_PRESENT.into(), pat_present.to_string());
let enrichment_ready = graph.metadata.contains_key(META_ADO_ORG)
&& graph.metadata.contains_key(META_ADO_PROJECT)
&& pat_present;
graph.metadata.insert(
META_ADO_VG_ENRICHMENT_READY.into(),
enrichment_ready.to_string(),
);
}
fn maybe_fetch_variable_group_index(
ctx: Option<&AdoParserContext>,
graph: &mut AuthorityGraph,
) -> Option<AdoVariableGroupIndex> {
let ctx = ctx?;
if graph
.metadata
.get(META_ADO_VG_ENRICHMENT_READY)
.is_none_or(|v| v != "true")
{
return None;
}
match fetch_variable_group_index(ctx) {
Ok(index) => {
graph
.metadata
.insert(META_ADO_VG_ENRICHED.into(), "true".into());
Some(index)
}
Err(err) => {
graph
.metadata
.insert(META_ADO_VG_ENRICHED.into(), "false".into());
graph.mark_partial(
GapKind::Structural,
format!(
"warning: ADO variable-group enrichment failed ({err}) — falling back to static variable-group modelling"
),
);
None
}
}
}
fn fetch_variable_group_index(ctx: &AdoParserContext) -> Result<AdoVariableGroupIndex, String> {
let org = ctx
.org
.as_deref()
.map(str::trim)
.filter(|v| !v.is_empty())
.ok_or_else(|| "missing org".to_string())?;
let project = ctx
.project
.as_deref()
.map(str::trim)
.filter(|v| !v.is_empty())
.ok_or_else(|| "missing project".to_string())?;
let pat = ctx
.pat
.as_deref()
.map(str::trim)
.filter(|v| !v.is_empty())
.ok_or_else(|| "missing PAT".to_string())?;
let org_base = if org.starts_with("http://") || org.starts_with("https://") {
org.trim_end_matches('/').to_string()
} else {
format!("https://dev.azure.com/{}", org.trim_matches('/'))
};
let project_segment = project.replace(' ', "%20");
let url = format!(
"{org_base}/{project_segment}/_apis/distributedtask/variablegroups?api-version=7.1"
);
let auth = format!(
"Basic {}",
base64::engine::general_purpose::STANDARD.encode(format!(":{pat}"))
);
let response = ureq::get(&url)
.set("Accept", "application/json")
.set("Authorization", &auth)
.call()
.map_err(map_ureq_error)?;
let body: serde_json::Value = response
.into_json()
.map_err(|e| format!("invalid JSON response: {e}"))?;
parse_variable_group_index_from_json(&body)
}
fn map_ureq_error(err: ureq::Error) -> String {
match err {
ureq::Error::Status(code, _) => format!("HTTP {code} from variablegroups API"),
ureq::Error::Transport(t) => t.to_string(),
}
}
fn parse_variable_group_index_from_json(
body: &serde_json::Value,
) -> Result<AdoVariableGroupIndex, String> {
let mut index: AdoVariableGroupIndex = HashMap::new();
let values = body
.get("value")
.and_then(|v| v.as_array())
.ok_or_else(|| "response missing 'value' array".to_string())?;
for item in values {
let Some(group_name) = item.get("name").and_then(|v| v.as_str()) else {
continue;
};
let mut group_vars: HashMap<String, bool> = HashMap::new();
if let Some(vars_obj) = item.get("variables").and_then(|v| v.as_object()) {
for (var_name, meta) in vars_obj {
let is_secret = meta
.get("isSecret")
.and_then(|v| v.as_bool())
.unwrap_or(false);
group_vars.insert(var_name.clone(), is_secret);
}
}
index.insert(group_name.to_string(), group_vars);
}
Ok(index)
}
fn non_empty_condition(c: &Option<String>) -> Option<&str> {
let s = c.as_deref()?.trim();
if s.is_empty() {
None
} else {
Some(s)
}
}
fn join_conditions(outer: Option<&str>, inner: Option<&str>) -> Option<String> {
match (outer, inner) {
(None, None) => None,
(Some(o), None) => Some(o.to_string()),
(None, Some(i)) => Some(i.to_string()),
(Some(o), Some(i)) => Some(format!("{o} AND {i}")),
}
}
fn mark_unresolved_top_level_carriers(content: &str, graph: &mut AuthorityGraph) {
let mut de = serde_yaml::Deserializer::from_str(content);
let Some(doc) = de.next() else {
return;
};
let Ok(value) = serde_yaml::Value::deserialize(doc) else {
return;
};
let Some(map) = value.as_mapping() else {
return;
};
for key in ["stages", "jobs"] {
let Some(value) = map.get(key) else {
continue;
};
if is_ado_template_expression_scalar(value) {
graph.mark_partial(
GapKind::Expression,
format!(
"ADO top-level `{key}:` uses a template expression — {key} cannot be enumerated statically"
),
);
}
}
}
fn is_ado_template_expression_scalar(value: &serde_yaml::Value) -> bool {
value
.as_str()
.map(|s| {
let trimmed = s.trim();
trimmed.starts_with("${{") && trimmed.ends_with("}}")
})
.unwrap_or(false)
}
fn mark_condition_partial(
graph: &mut AuthorityGraph,
scope_kind: &str,
name: &str,
condition: &str,
) {
graph.mark_partial(
GapKind::Expression,
format!(
"ADO {scope_kind} '{name}' condition: '{condition}' — runtime evaluation not modelled"
),
);
}
fn explicit_depends_on_csv(
depends_on: &Option<DependsOn>,
graph: &mut AuthorityGraph,
scope_kind: &str,
name: &str,
) -> Option<String> {
let d = depends_on.as_ref()?;
match d {
DependsOn::Single(s) => {
let trimmed = s.trim();
if trimmed.is_empty() {
None
} else {
Some(trimmed.to_string())
}
}
DependsOn::Multiple(v) => {
let csv = v
.iter()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(",");
if csv.is_empty() {
None
} else {
Some(csv)
}
}
DependsOn::Other(raw) => {
mark_depends_on_partial(graph, scope_kind, name, raw);
None
}
}
}
fn mark_depends_on_partial(
graph: &mut AuthorityGraph,
scope_kind: &str,
name: &str,
raw: &serde_yaml::Value,
) {
let shape = match raw {
serde_yaml::Value::Null => "null",
serde_yaml::Value::Bool(_) => "bool",
serde_yaml::Value::Number(_) => "number",
serde_yaml::Value::String(_) => "string",
serde_yaml::Value::Sequence(_) => "sequence",
serde_yaml::Value::Mapping(_) => "mapping",
serde_yaml::Value::Tagged(_) => "tagged",
};
graph.mark_partial(
GapKind::Expression,
format!(
"ADO {scope_kind} '{name}' dependsOn uses unsupported {shape} form — runtime expansion not modelled"
),
);
}
fn ado_permissions_are_broad(perms: &serde_yaml::Value) -> bool {
if let Some(map) = perms.as_mapping() {
map.values().any(|v| v.as_str() == Some("write"))
} else {
matches!(perms.as_str(), Some("write"))
}
}
fn ado_permissions_display(perms: &serde_yaml::Value) -> String {
if let Some(map) = perms.as_mapping() {
map.iter()
.filter_map(|(k, v)| {
let key = k.as_str()?;
let val = v.as_str().unwrap_or("?");
Some(format!("{key}: {val}"))
})
.collect::<Vec<_>>()
.join(", ")
} else {
perms.as_str().unwrap_or("none").to_string()
}
}
fn process_pool(
pool: &Option<serde_yaml::Value>,
workspace: &Option<serde_yaml::Value>,
graph: &mut AuthorityGraph,
) {
let Some(pool_val) = pool else {
return;
};
let (image_name, is_self_hosted) = match pool_val {
serde_yaml::Value::String(s) => (s.clone(), true),
serde_yaml::Value::Mapping(map) => {
let name = map.get("name").and_then(|v| v.as_str());
let vm_image = map.get("vmImage").and_then(|v| v.as_str());
match (name, vm_image) {
(_, Some(vm)) => (vm.to_string(), false),
(Some(n), None) => (n.to_string(), true),
(None, None) => return,
}
}
_ => return,
};
let mut meta = HashMap::new();
if is_self_hosted {
meta.insert(META_SELF_HOSTED.into(), "true".into());
}
if has_workspace_clean(workspace) {
meta.insert(META_WORKSPACE_CLEAN.into(), "true".into());
}
graph.add_node_with_metadata(NodeKind::Image, image_name, TrustZone::FirstParty, meta);
}
fn has_workspace_clean(workspace: &Option<serde_yaml::Value>) -> bool {
let Some(ws) = workspace else {
return false;
};
let Some(map) = ws.as_mapping() else {
return false;
};
let Some(clean) = map.get("clean") else {
return false;
};
match clean {
serde_yaml::Value::Bool(b) => *b,
serde_yaml::Value::String(s) => {
let lower = s.to_ascii_lowercase();
matches!(lower.as_str(), "all" | "outputs" | "resources" | "true")
}
_ => false,
}
}
fn process_repositories(pipeline: &AdoPipeline, raw_content: &str, graph: &mut AuthorityGraph) {
let resources = match pipeline.resources.as_ref() {
Some(r) if !r.repositories.is_empty() => r,
_ => return,
};
let mut used_aliases: HashSet<String> = HashSet::new();
if let Some(ref ext) = pipeline.extends {
collect_template_alias_refs(ext, &mut used_aliases);
}
if let Ok(value) = serde_yaml::from_str::<serde_yaml::Value>(raw_content) {
collect_template_alias_refs(&value, &mut used_aliases);
collect_checkout_alias_refs(&value, &mut used_aliases);
}
let mut entries: Vec<serde_json::Value> = Vec::with_capacity(resources.repositories.len());
for repo in &resources.repositories {
let Some(alias) = repo.repository.as_ref().filter(|s| !s.is_empty()) else {
continue;
};
let used = used_aliases.contains(alias);
let mut obj = serde_json::Map::new();
obj.insert("alias".into(), serde_json::Value::String(alias.clone()));
if let Some(ref t) = repo.repo_type {
obj.insert("repo_type".into(), serde_json::Value::String(t.clone()));
}
if let Some(ref n) = repo.name {
obj.insert("name".into(), serde_json::Value::String(n.clone()));
}
if let Some(ref r) = repo.git_ref {
obj.insert("ref".into(), serde_json::Value::String(r.clone()));
}
obj.insert("used".into(), serde_json::Value::Bool(used));
entries.push(serde_json::Value::Object(obj));
}
if let Ok(json) = serde_json::to_string(&serde_json::Value::Array(entries)) {
graph.metadata.insert(META_REPOSITORIES.into(), json);
}
}
fn collect_template_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
match value {
serde_yaml::Value::Mapping(map) => {
for (k, v) in map {
if k.as_str() == Some("template") {
if let Some(s) = v.as_str() {
if let Some(alias) = parse_template_alias(s) {
sink.insert(alias);
}
}
}
collect_template_alias_refs(v, sink);
}
}
serde_yaml::Value::Sequence(seq) => {
for v in seq {
collect_template_alias_refs(v, sink);
}
}
_ => {}
}
}
fn collect_checkout_alias_refs(value: &serde_yaml::Value, sink: &mut HashSet<String>) {
match value {
serde_yaml::Value::Mapping(map) => {
for (k, v) in map {
if k.as_str() == Some("checkout") {
if let Some(s) = v.as_str() {
if s != "self" && s != "none" && !s.is_empty() {
sink.insert(s.to_string());
}
}
}
collect_checkout_alias_refs(v, sink);
}
}
serde_yaml::Value::Sequence(seq) => {
for v in seq {
collect_checkout_alias_refs(v, sink);
}
}
_ => {}
}
}
fn parse_template_alias(template_ref: &str) -> Option<String> {
let at = template_ref.rfind('@')?;
let alias = &template_ref[at + 1..];
if alias.is_empty() {
None
} else {
Some(alias.to_string())
}
}
fn tag_job_steps_env_approval(graph: &mut AuthorityGraph, start_idx: usize) {
for node in graph.nodes.iter_mut().skip(start_idx) {
if node.kind == NodeKind::Step {
node.metadata
.insert(META_ENV_APPROVAL.into(), "true".into());
}
}
}
fn process_variables(
variables: &Option<AdoVariables>,
graph: &mut AuthorityGraph,
cache: &mut HashMap<String, NodeId>,
scope: &str,
plain_vars: &mut HashSet<String>,
has_variable_groups: &mut bool,
variable_group_index: Option<&AdoVariableGroupIndex>,
) -> Vec<NodeId> {
let mut ids = Vec::new();
let vars = match variables.as_ref() {
Some(v) => v,
None => return ids,
};
for var in &vars.0 {
match var {
AdoVariable::Group { group } => {
if group.contains("${{") {
graph.mark_partial(
GapKind::Expression,
format!(
"variable group in {scope} uses template expression — group name unresolvable at parse time"
),
);
continue;
}
if let Some(group_vars) = variable_group_index.and_then(|idx| idx.get(group)) {
for (var_name, is_secret) in group_vars {
if *is_secret {
let id = find_or_create_secret(graph, cache, var_name);
ids.push(id);
} else {
plain_vars.insert(var_name.clone());
}
}
continue;
}
*has_variable_groups = true;
let mut meta = HashMap::new();
meta.insert(META_VARIABLE_GROUP.into(), "true".into());
let id = graph.add_node_with_metadata(
NodeKind::Secret,
group.as_str(),
TrustZone::FirstParty,
meta,
);
cache.insert(group.clone(), id);
ids.push(id);
graph.mark_partial(
GapKind::Structural,
format!(
"variable group '{group}' in {scope} — contents unresolvable without ADO API access"
),
);
}
AdoVariable::Named {
name, is_secret, ..
} => {
if *is_secret {
let id = find_or_create_secret(graph, cache, name);
ids.push(id);
} else {
plain_vars.insert(name.clone());
}
}
}
}
ids
}
#[allow(clippy::too_many_arguments)]
fn process_steps(
steps: &[AdoStep],
job_name: &str,
token_id: NodeId,
inherited_secrets: &[NodeId],
plain_vars: &HashSet<String>,
has_variable_groups: bool,
outer_condition: Option<&str>,
outer_depends_on: Option<&str>,
graph: &mut AuthorityGraph,
cache: &mut HashMap<String, NodeId>,
) {
for (idx, step) in steps.iter().enumerate() {
if let Some(ref tpl) = step.template {
let step_name = step
.display_name
.as_deref()
.or(step.name.as_deref())
.map(|s| s.to_string())
.unwrap_or_else(|| format!("{job_name}[{idx}]"));
add_template_delegation(&step_name, tpl, token_id, Some(job_name), graph);
continue;
}
let (step_name, trust_zone, inline_script) = classify_step(step, job_name, idx);
let step_condition = non_empty_condition(&step.condition);
if let Some(c) = step_condition {
mark_condition_partial(graph, "step", &step_name, c);
}
let effective_condition = join_conditions(outer_condition, step_condition);
let effective_depends_on =
explicit_depends_on_csv(&step.depends_on, graph, "step", &step_name)
.or_else(|| outer_depends_on.map(|s| s.to_string()));
let step_id = graph.add_node(NodeKind::Step, &step_name, trust_zone);
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_JOB_NAME.into(), job_name.into());
if let Some(ref body) = inline_script {
node.metadata.insert(META_SCRIPT_BODY.into(), body.clone());
}
if let Some(ref c) = effective_condition {
node.metadata.insert(META_CONDITION.into(), c.clone());
}
if let Some(ref d) = effective_depends_on {
if !d.is_empty() {
node.metadata.insert(META_DEPENDS_ON.into(), d.clone());
}
}
}
graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
if step.checkout.is_some() && step.persist_credentials == Some(true) {
graph.add_edge(step_id, token_id, EdgeKind::PersistsTo);
}
if let Some(ref ck) = step.checkout {
if ck == "self" {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_CHECKOUT_SELF.into(), "true".into());
}
}
}
for &secret_id in inherited_secrets {
graph.add_edge(step_id, secret_id, EdgeKind::HasAccessTo);
}
if let Some(ref inputs) = step.inputs {
let service_conn_keys = [
"azuresubscription",
"connectedservicename",
"connectedservicenamearm",
"kubernetesserviceconnection",
"environmentservicename",
"backendservicearm",
];
let mut input_entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
input_entries.sort_by(|a, b| a.0.cmp(b.0));
for (raw_key, val) in input_entries {
let lower = raw_key.to_lowercase();
if !service_conn_keys.contains(&lower.as_str()) {
continue;
}
let conn_name = yaml_value_as_str(val).unwrap_or(raw_key.as_str());
if !conn_name.starts_with("$(") {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_SERVICE_CONNECTION_NAME.into(), conn_name.to_string());
}
let mut meta = HashMap::new();
meta.insert(META_SERVICE_CONNECTION.into(), "true".into());
meta.insert(META_IDENTITY_SCOPE.into(), "broad".into());
let conn_id = graph.add_node_with_metadata(
NodeKind::Identity,
conn_name,
TrustZone::FirstParty,
meta,
);
graph.add_edge(step_id, conn_id, EdgeKind::HasAccessTo);
}
}
if let Some(val) = input_value(inputs, "addSpnToEnvironment") {
let truthy = match val {
serde_yaml::Value::Bool(b) => *b,
serde_yaml::Value::String(s) => s.eq_ignore_ascii_case("true"),
_ => false,
};
if truthy {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_ADD_SPN_TO_ENV.into(), "true".into());
}
}
}
let task_lower = step
.task
.as_deref()
.map(|t| t.to_lowercase())
.unwrap_or_default();
let is_terraform_task = task_lower.starts_with("terraformcli@")
|| task_lower.starts_with("terraformtask@")
|| task_lower.starts_with("terraformtaskv");
if is_terraform_task {
let cmd_lower = input_str(inputs, "command")
.map(|s| s.to_lowercase())
.unwrap_or_default();
let opts = input_str(inputs, "commandOptions").unwrap_or("");
if cmd_lower == "apply" && opts.contains("auto-approve") {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
}
}
}
let mut paren_entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
paren_entries.sort_by(|a, b| a.0.cmp(b.0));
for (_k, val) in paren_entries {
if let Some(s) = yaml_value_as_str(val) {
extract_dollar_paren_secrets(
s,
step_id,
plain_vars,
has_variable_groups,
graph,
cache,
);
}
}
}
if let Some(ref body) = inline_script {
if script_does_terraform_auto_apply(body) {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_TERRAFORM_AUTO_APPROVE.into(), "true".into());
}
}
}
if let Some(ref env) = step.env {
let mut env_entries: Vec<(&String, &serde_yaml::Value)> = env.iter().collect();
env_entries.sort_by(|a, b| a.0.cmp(b.0));
for (_k, val) in env_entries {
if let Some(s) = yaml_scalar_to_string(val) {
extract_dollar_paren_secrets(
&s,
step_id,
plain_vars,
has_variable_groups,
graph,
cache,
);
}
}
}
if let Some(ref script) = inline_script {
extract_dollar_paren_secrets(
script,
step_id,
plain_vars,
has_variable_groups,
graph,
cache,
);
}
if let Some(ref script) = inline_script {
let lower = script.to_lowercase();
if lower.contains("##vso[task.setvariable") {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata
.insert(META_WRITES_ENV_GATE.into(), "true".into());
node.metadata
.insert(META_SETVARIABLE_ADO.into(), "true".into());
if setvariable_value_contains_secret_ref(script) {
node.metadata
.insert(META_ENV_GATE_WRITES_SECRET_VALUE.into(), "true".into());
}
}
}
}
}
}
fn classify_step(
step: &AdoStep,
job_name: &str,
idx: usize,
) -> (String, TrustZone, Option<String>) {
let default_name = || format!("{job_name}[{idx}]");
let name = step
.display_name
.as_deref()
.or(step.name.as_deref())
.map(|s| s.to_string())
.unwrap_or_else(default_name);
if step.task.is_some() {
let inline = extract_task_inline_script(step.inputs.as_ref());
(name, TrustZone::Untrusted, inline)
} else if let Some(ref s) = step.script {
(name, TrustZone::FirstParty, Some(s.clone()))
} else if let Some(ref s) = step.bash {
(name, TrustZone::FirstParty, Some(s.clone()))
} else if let Some(ref s) = step.powershell {
(name, TrustZone::FirstParty, Some(s.clone()))
} else if let Some(ref s) = step.pwsh {
(name, TrustZone::FirstParty, Some(s.clone()))
} else {
(name, TrustZone::FirstParty, None)
}
}
fn extract_task_inline_script(
inputs: Option<&HashMap<String, serde_yaml::Value>>,
) -> Option<String> {
let inputs = inputs?;
const KEYS: &[&str] = &["script", "inlinescript", "inline"];
let mut entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
for (raw_key, val) in entries {
let lower = raw_key.to_lowercase();
if KEYS.contains(&lower.as_str()) {
if let Some(s) = val.as_str() {
if !s.is_empty() {
return Some(s.to_string());
}
}
}
}
None
}
fn input_value<'a>(
inputs: &'a HashMap<String, serde_yaml::Value>,
wanted: &str,
) -> Option<&'a serde_yaml::Value> {
let mut entries: Vec<(&String, &serde_yaml::Value)> = inputs.iter().collect();
entries.sort_by(|a, b| a.0.cmp(b.0));
entries
.into_iter()
.find(|(key, _)| key.eq_ignore_ascii_case(wanted))
.map(|(_, value)| value)
}
fn input_str<'a>(inputs: &'a HashMap<String, serde_yaml::Value>, wanted: &str) -> Option<&'a str> {
input_value(inputs, wanted).and_then(yaml_value_as_str)
}
fn add_template_delegation(
step_name: &str,
template_path: &str,
token_id: NodeId,
job_name: Option<&str>,
graph: &mut AuthorityGraph,
) {
let tpl_trust_zone = if template_path.contains('@') {
TrustZone::Untrusted
} else {
TrustZone::FirstParty
};
let step_id = graph.add_node(NodeKind::Step, step_name, TrustZone::FirstParty);
if let Some(jn) = job_name {
if let Some(node) = graph.nodes.get_mut(step_id) {
node.metadata.insert(META_JOB_NAME.into(), jn.into());
}
}
let tpl_id = graph.add_node(NodeKind::Image, template_path, tpl_trust_zone);
graph.add_edge(step_id, tpl_id, EdgeKind::DelegatesTo);
graph.add_edge(step_id, token_id, EdgeKind::HasAccessTo);
graph.mark_partial(
GapKind::Structural,
format!(
"template '{template_path}' cannot be resolved inline — authority within the template is unknown"
),
);
}
fn setvariable_value_contains_secret_ref(script: &str) -> bool {
for line in script.lines() {
let lower = line.to_lowercase();
if !lower.contains("##vso[task.setvariable") {
continue;
}
if let Some(close_bracket) = line.find(']') {
let value_part = &line[close_bracket + 1..];
if contains_unescaped_dollar_paren(value_part) {
return true;
}
}
}
false
}
fn contains_unescaped_dollar_paren(s: &str) -> bool {
let bytes = s.as_bytes();
let mut i = 0;
while i + 1 < bytes.len() {
if bytes[i] == b'$' && bytes[i + 1] == b'(' {
if i > 0 && bytes[i - 1] == b'$' {
let after_open = i + 2;
if let Some(end_offset) = s[after_open..].find(')') {
i = after_open + end_offset + 1;
continue;
}
i += 2;
continue;
}
return true;
}
i += 1;
}
false
}
fn extract_dollar_paren_secrets(
text: &str,
step_id: NodeId,
plain_vars: &HashSet<String>,
has_variable_groups: bool,
graph: &mut AuthorityGraph,
cache: &mut HashMap<String, NodeId>,
) {
let mut pos = 0;
let bytes = text.as_bytes();
while pos < bytes.len() {
if pos + 2 < bytes.len() && bytes[pos] == b'$' && bytes[pos + 1] == b'(' {
if pos > 0 && bytes[pos - 1] == b'$' {
let start = pos + 2;
if let Some(end_offset) = text[start..].find(')') {
pos = start + end_offset + 1;
continue;
}
pos += 1;
continue;
}
let start = pos + 2;
if let Some(end_offset) = text[start..].find(')') {
let var_name = &text[start..start + end_offset];
let already_declared_secret = cache.contains_key(var_name);
if is_valid_ado_identifier(var_name)
&& !is_predefined_ado_var(var_name)
&& !plain_vars.contains(var_name)
&& (!has_variable_groups || already_declared_secret)
{
let id = find_or_create_secret(graph, cache, var_name);
if is_in_terraform_var_flag(text, pos) {
if let Some(node) = graph.nodes.get_mut(id) {
node.metadata
.insert(META_CLI_FLAG_EXPOSED.into(), "true".into());
}
}
graph.add_edge(step_id, id, EdgeKind::HasAccessTo);
}
pos = start + end_offset + 1;
continue;
}
}
pos += 1;
}
}
fn is_in_terraform_var_flag(text: &str, var_pos: usize) -> bool {
let line_start = text[..var_pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
let line_before = &text[line_start..var_pos];
let has_var_flag = line_before.contains("-var ") || line_before.contains("-var=");
if !has_var_flag {
return false;
}
let lower_line = line_before.to_lowercase();
if lower_line.contains("terraform") {
return true;
}
let mut cursor_end = line_start; while cursor_end > 0 {
let nl_idx = cursor_end.saturating_sub(1);
let prev_line_start = text[..nl_idx].rfind('\n').map(|p| p + 1).unwrap_or(0);
let prev_line = &text[prev_line_start..nl_idx];
let trimmed = prev_line.trim_end();
let continues = trimmed.ends_with('\\') || trimmed.ends_with('`');
if !continues {
return false;
}
if prev_line.to_lowercase().contains("terraform") {
return true;
}
cursor_end = prev_line_start;
}
false
}
fn is_valid_ado_identifier(name: &str) -> bool {
let mut chars = name.chars();
match chars.next() {
Some(first) if first.is_ascii_alphabetic() => {
chars.all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.')
}
_ => false,
}
}
fn is_predefined_ado_var(name: &str) -> bool {
let prefixes = [
"Build.",
"Agent.",
"System.",
"Pipeline.",
"Release.",
"Environment.",
"Strategy.",
"Deployment.",
"Resources.",
"TF_BUILD",
];
prefixes.iter().any(|p| name.starts_with(p)) || name == "TF_BUILD"
}
fn find_or_create_secret(
graph: &mut AuthorityGraph,
cache: &mut HashMap<String, NodeId>,
name: &str,
) -> NodeId {
if let Some(&id) = cache.get(name) {
return id;
}
let id = graph.add_node(NodeKind::Secret, name, TrustZone::FirstParty);
cache.insert(name.to_string(), id);
id
}
fn yaml_value_as_str(val: &serde_yaml::Value) -> Option<&str> {
val.as_str()
}
fn yaml_scalar_to_string(value: &serde_yaml::Value) -> Option<String> {
match value {
serde_yaml::Value::String(s) => Some(s.clone()),
serde_yaml::Value::Bool(b) => Some(b.to_string()),
serde_yaml::Value::Number(n) => Some(n.to_string()),
serde_yaml::Value::Null => Some(String::new()),
_ => None,
}
}
#[derive(Debug, Deserialize)]
pub struct AdoPipeline {
#[serde(default)]
pub trigger: Option<serde_yaml::Value>,
#[serde(default)]
pub pr: Option<serde_yaml::Value>,
#[serde(default)]
pub variables: Option<AdoVariables>,
#[serde(default, deserialize_with = "deserialize_optional_stages")]
pub stages: Option<Vec<AdoStage>>,
#[serde(default, deserialize_with = "deserialize_optional_jobs")]
pub jobs: Option<Vec<AdoJob>>,
#[serde(default)]
pub steps: Option<Vec<AdoStep>>,
#[serde(default)]
pub pool: Option<serde_yaml::Value>,
#[serde(default)]
pub workspace: Option<serde_yaml::Value>,
#[serde(default, deserialize_with = "deserialize_optional_resources")]
pub resources: Option<AdoResources>,
#[serde(default)]
pub extends: Option<serde_yaml::Value>,
#[serde(default, deserialize_with = "deserialize_optional_parameters")]
pub parameters: Option<Vec<AdoParameter>>,
#[serde(default)]
pub permissions: Option<serde_yaml::Value>,
}
fn deserialize_optional_parameters<'de, D>(
deserializer: D,
) -> Result<Option<Vec<AdoParameter>>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{MapAccess, SeqAccess, Visitor};
use std::fmt;
struct ParamsVisitor;
impl<'de> Visitor<'de> for ParamsVisitor {
type Value = Option<Vec<AdoParameter>>;
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("a sequence of parameter declarations, a mapping of name→default, null, or a template expression")
}
fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
d.deserialize_any(self)
}
fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_bool<E: serde::de::Error>(self, _v: bool) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_i64<E: serde::de::Error>(self, _v: i64) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_u64<E: serde::de::Error>(self, _v: u64) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_f64<E: serde::de::Error>(self, _v: f64) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
let mut out = Vec::new();
while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
if let Ok(p) = serde_yaml::from_value::<AdoParameter>(item) {
out.push(p);
}
}
Ok(Some(out))
}
fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
let mut out = Vec::new();
while let Some(key) = map.next_key::<serde_yaml::Value>()? {
let _ignore = map.next_value::<serde::de::IgnoredAny>()?;
let name = match key {
serde_yaml::Value::String(s) if !s.is_empty() => s,
_ => continue,
};
out.push(AdoParameter {
name: Some(name),
param_type: None,
values: None,
});
}
Ok(Some(out))
}
}
deserializer.deserialize_any(ParamsVisitor)
}
fn deserialize_optional_resources<'de, D>(deserializer: D) -> Result<Option<AdoResources>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{MapAccess, SeqAccess, Visitor};
use std::fmt;
struct ResourcesVisitor;
impl<'de> Visitor<'de> for ResourcesVisitor {
type Value = Option<AdoResources>;
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("an AdoResources mapping or a legacy `- repo:` sequence")
}
fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
d.deserialize_any(self)
}
fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
while seq.next_element::<serde::de::IgnoredAny>()?.is_some() {}
Ok(Some(AdoResources::default()))
}
fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
let r = AdoResources::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
Ok(Some(r))
}
}
deserializer.deserialize_any(ResourcesVisitor)
}
fn deserialize_optional_stages<'de, D>(deserializer: D) -> Result<Option<Vec<AdoStage>>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{MapAccess, SeqAccess, Visitor};
use std::fmt;
struct StagesVisitor;
impl<'de> Visitor<'de> for StagesVisitor {
type Value = Option<Vec<AdoStage>>;
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("a sequence of stages or a template expression")
}
fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
d.deserialize_any(self)
}
fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
Ok(None)
}
fn visit_seq<A: SeqAccess<'de>>(self, seq: A) -> Result<Self::Value, A::Error> {
let stages =
Vec::<AdoStage>::deserialize(serde::de::value::SeqAccessDeserializer::new(seq))?;
Ok(Some(stages))
}
fn visit_map<A: MapAccess<'de>>(self, map: A) -> Result<Self::Value, A::Error> {
let stage = AdoStage::deserialize(serde::de::value::MapAccessDeserializer::new(map))?;
Ok(Some(vec![stage]))
}
}
deserializer.deserialize_any(StagesVisitor)
}
fn deserialize_optional_jobs<'de, D>(deserializer: D) -> Result<Option<Vec<AdoJob>>, D::Error>
where
D: serde::Deserializer<'de>,
{
deserialize_jobs(deserializer).map(Some)
}
fn deserialize_jobs<'de, D>(deserializer: D) -> Result<Vec<AdoJob>, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::{MapAccess, SeqAccess, Visitor};
use std::fmt;
struct JobsVisitor;
impl<'de> Visitor<'de> for JobsVisitor {
type Value = Vec<AdoJob>;
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("a sequence of ADO jobs, a map of job-name to job body, null, or a template expression")
}
fn visit_unit<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(Vec::new())
}
fn visit_none<E: serde::de::Error>(self) -> Result<Self::Value, E> {
Ok(Vec::new())
}
fn visit_some<D: serde::Deserializer<'de>>(self, d: D) -> Result<Self::Value, D::Error> {
d.deserialize_any(self)
}
fn visit_str<E: serde::de::Error>(self, _v: &str) -> Result<Self::Value, E> {
Ok(Vec::new())
}
fn visit_string<E: serde::de::Error>(self, _v: String) -> Result<Self::Value, E> {
Ok(Vec::new())
}
fn visit_seq<A: SeqAccess<'de>>(self, mut seq: A) -> Result<Self::Value, A::Error> {
let mut out = Vec::new();
while let Some(item) = seq.next_element::<serde_yaml::Value>()? {
if let Ok(job) = serde_yaml::from_value::<AdoJob>(item) {
out.push(job);
}
}
Ok(out)
}
fn visit_map<A: MapAccess<'de>>(self, mut map: A) -> Result<Self::Value, A::Error> {
let mut out = Vec::new();
while let Some(key) = map.next_key::<serde_yaml::Value>()? {
let value = map.next_value::<serde_yaml::Value>()?;
let name = match key {
serde_yaml::Value::String(s) if !s.is_empty() => s,
_ => continue,
};
let Ok(mut job) = serde_yaml::from_value::<AdoJob>(value) else {
continue;
};
if job.job.is_none() && job.deployment.is_none() {
job.job = Some(name);
}
out.push(job);
}
Ok(out)
}
}
deserializer.deserialize_any(JobsVisitor)
}
fn deserialize_optional_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
where
D: serde::Deserializer<'de>,
{
let value = Option::<serde_yaml::Value>::deserialize(deserializer)?;
let Some(value) = value else {
return Ok(None);
};
let parsed = match value {
serde_yaml::Value::Bool(b) => Some(b),
serde_yaml::Value::String(s) => match s.trim().to_ascii_lowercase().as_str() {
"true" | "yes" | "y" | "on" | "1" => Some(true),
"false" | "no" | "n" | "off" | "0" => Some(false),
_ => None,
},
serde_yaml::Value::Number(n) => n.as_i64().map(|v| v != 0),
serde_yaml::Value::Null => None,
_ => None,
};
Ok(parsed)
}
#[derive(Debug, Default, Deserialize)]
pub struct AdoResources {
#[serde(default)]
pub repositories: Vec<AdoRepository>,
}
#[derive(Debug, Deserialize)]
pub struct AdoRepository {
#[serde(default)]
pub repository: Option<String>,
#[serde(default, rename = "type")]
pub repo_type: Option<String>,
#[serde(default)]
pub name: Option<String>,
#[serde(default, rename = "ref")]
pub git_ref: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct AdoParameter {
#[serde(default)]
pub name: Option<String>,
#[serde(rename = "type", default)]
pub param_type: Option<String>,
#[serde(default)]
pub values: Option<Vec<serde_yaml::Value>>,
}
#[derive(Debug, Deserialize, Clone)]
#[serde(untagged)]
pub enum DependsOn {
Single(String),
Multiple(Vec<String>),
Other(serde_yaml::Value),
}
impl DependsOn {
pub fn as_csv(&self) -> String {
match self {
DependsOn::Single(s) => s.trim().to_string(),
DependsOn::Multiple(v) => v
.iter()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(","),
DependsOn::Other(_) => String::new(),
}
}
}
#[derive(Debug, Deserialize)]
pub struct AdoStage {
#[serde(default)]
pub stage: Option<String>,
#[serde(default)]
pub template: Option<String>,
#[serde(default)]
pub variables: Option<AdoVariables>,
#[serde(default, deserialize_with = "deserialize_jobs")]
pub jobs: Vec<AdoJob>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub condition: Option<String>,
#[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
pub depends_on: Option<DependsOn>,
}
#[derive(Debug, Deserialize)]
pub struct AdoJob {
#[serde(default)]
pub job: Option<String>,
#[serde(default)]
pub deployment: Option<String>,
#[serde(default)]
pub variables: Option<AdoVariables>,
#[serde(default)]
pub steps: Option<Vec<AdoStep>>,
#[serde(default)]
pub strategy: Option<AdoStrategy>,
#[serde(default)]
pub pool: Option<serde_yaml::Value>,
#[serde(default)]
pub workspace: Option<serde_yaml::Value>,
#[serde(default)]
pub template: Option<String>,
#[serde(default)]
pub environment: Option<serde_yaml::Value>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub condition: Option<String>,
#[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
pub depends_on: Option<DependsOn>,
}
impl AdoJob {
pub fn effective_name(&self) -> String {
self.job
.as_deref()
.or(self.deployment.as_deref())
.unwrap_or("job")
.to_string()
}
pub fn all_steps(&self) -> Vec<AdoStep> {
let mut out: Vec<AdoStep> = Vec::new();
if let Some(ref s) = self.steps {
out.extend(s.iter().cloned());
}
if let Some(ref strat) = self.strategy {
for phase in strat.phases() {
if let Some(ref s) = phase.steps {
out.extend(s.iter().cloned());
}
}
}
out
}
pub fn has_environment_binding(&self) -> bool {
match self.environment.as_ref() {
None => false,
Some(serde_yaml::Value::String(s)) => !s.trim().is_empty(),
Some(serde_yaml::Value::Mapping(m)) => m
.get("name")
.and_then(|v| v.as_str())
.map(|s| !s.trim().is_empty())
.unwrap_or(false),
_ => false,
}
}
}
#[derive(Debug, Default, Deserialize, Clone)]
pub struct AdoStrategy {
#[serde(default, rename = "runOnce")]
pub run_once: Option<AdoStrategyRunOnce>,
#[serde(default)]
pub rolling: Option<AdoStrategyRunOnce>,
#[serde(default)]
pub canary: Option<AdoStrategyRunOnce>,
}
impl AdoStrategy {
pub fn phases(&self) -> Vec<&AdoStrategyPhase> {
let mut out: Vec<&AdoStrategyPhase> = Vec::new();
for runner in [&self.run_once, &self.rolling, &self.canary]
.iter()
.copied()
.flatten()
{
for phase in [
&runner.deploy,
&runner.pre_deploy,
&runner.post_deploy,
&runner.route_traffic,
]
.into_iter()
.flatten()
{
out.push(phase);
}
if let Some(ref on) = runner.on {
if let Some(ref s) = on.success {
out.push(s);
}
if let Some(ref f) = on.failure {
out.push(f);
}
}
}
out
}
}
#[derive(Debug, Default, Deserialize, Clone)]
pub struct AdoStrategyRunOnce {
#[serde(default)]
pub deploy: Option<AdoStrategyPhase>,
#[serde(default, rename = "preDeploy")]
pub pre_deploy: Option<AdoStrategyPhase>,
#[serde(default, rename = "postDeploy")]
pub post_deploy: Option<AdoStrategyPhase>,
#[serde(default, rename = "routeTraffic")]
pub route_traffic: Option<AdoStrategyPhase>,
#[serde(default)]
pub on: Option<AdoStrategyOn>,
}
#[derive(Debug, Default, Deserialize, Clone)]
pub struct AdoStrategyOn {
#[serde(default)]
pub success: Option<AdoStrategyPhase>,
#[serde(default)]
pub failure: Option<AdoStrategyPhase>,
}
#[derive(Debug, Default, Deserialize, Clone)]
pub struct AdoStrategyPhase {
#[serde(default)]
pub steps: Option<Vec<AdoStep>>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct AdoStep {
#[serde(default)]
pub task: Option<String>,
#[serde(default)]
pub script: Option<String>,
#[serde(default)]
pub bash: Option<String>,
#[serde(default)]
pub powershell: Option<String>,
#[serde(default)]
pub pwsh: Option<String>,
#[serde(default)]
pub template: Option<String>,
#[serde(rename = "displayName", default)]
pub display_name: Option<String>,
#[serde(default)]
pub name: Option<String>,
#[serde(default)]
pub env: Option<HashMap<String, serde_yaml::Value>>,
#[serde(default)]
pub inputs: Option<HashMap<String, serde_yaml::Value>>,
#[serde(default)]
pub checkout: Option<String>,
#[serde(
rename = "persistCredentials",
default,
deserialize_with = "deserialize_optional_bool"
)]
pub persist_credentials: Option<bool>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub condition: Option<String>,
#[serde(rename = "dependsOn", default, skip_serializing_if = "Option::is_none")]
pub depends_on: Option<DependsOn>,
}
#[derive(Debug, Default)]
pub struct AdoVariables(pub Vec<AdoVariable>);
impl<'de> serde::Deserialize<'de> for AdoVariables {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let raw = serde_yaml::Value::deserialize(deserializer)?;
let mut vars = Vec::new();
match raw {
serde_yaml::Value::Sequence(seq) => {
for item in seq {
if let Some(map) = item.as_mapping() {
if let Some(group_val) = map.get("group") {
if let Some(group) = group_val.as_str() {
vars.push(AdoVariable::Group {
group: group.to_string(),
});
continue;
}
}
let name = map
.get("name")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let value = map
.get("value")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
let is_secret = map
.get("isSecret")
.and_then(|v| v.as_bool())
.unwrap_or(false);
vars.push(AdoVariable::Named {
name,
value,
is_secret,
});
}
}
}
serde_yaml::Value::Mapping(map) => {
for (k, v) in map {
let name = k.as_str().unwrap_or("").to_string();
let value = v.as_str().unwrap_or("").to_string();
vars.push(AdoVariable::Named {
name,
value,
is_secret: false,
});
}
}
_ => {}
}
Ok(AdoVariables(vars))
}
}
#[derive(Debug)]
pub enum AdoVariable {
Group {
group: String,
},
Named {
name: String,
value: String,
is_secret: bool,
},
}
fn has_root_parameter_conditional(content: &str) -> bool {
for line in content.lines() {
let trimmed = line.trim_start();
let candidate = trimmed.strip_prefix("- ").unwrap_or(trimmed);
if candidate.starts_with("${{")
&& (candidate.contains("if ") || candidate.contains("if("))
&& candidate.trim_end().ends_with(":")
{
return true;
}
}
false
}
fn recover_after_leading_root_sequence(content: &str) -> Option<&str> {
for (idx, _) in content.char_indices() {
if idx == 0 {
continue;
}
if !is_root_pipeline_key_line(content[idx..].lines().next().unwrap_or_default()) {
continue;
}
let recovered = &content[idx..];
if serde_yaml::from_str::<AdoPipeline>(recovered).is_ok() {
return Some(recovered);
}
}
None
}
fn is_root_pipeline_key_line(line: &str) -> bool {
if line.starts_with(char::is_whitespace) || !line.ends_with(':') {
return false;
}
let key = line.trim_end_matches(':').trim();
matches!(
key,
"trigger"
| "pr"
| "pool"
| "variables"
| "resources"
| "stages"
| "jobs"
| "steps"
| "extends"
| "parameters"
| "permissions"
)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{Read, Write};
use std::net::TcpListener;
use std::thread;
fn parse(yaml: &str) -> AuthorityGraph {
let parser = AdoParser;
let source = PipelineSource {
file: "azure-pipelines.yml".into(),
repo: None,
git_ref: None,
commit_sha: None,
};
parser.parse(yaml, &source).unwrap()
}
fn parse_with_ctx(yaml: &str, ctx: &AdoParserContext) -> AuthorityGraph {
let parser = AdoParser;
let source = PipelineSource {
file: "azure-pipelines.yml".into(),
repo: None,
git_ref: None,
commit_sha: None,
};
parser.parse_with_context(yaml, &source, Some(ctx)).unwrap()
}
fn spawn_variable_groups_server(response_json: &'static str) -> String {
let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock server");
let addr = listener.local_addr().expect("local addr");
thread::spawn(move || {
if let Ok((mut stream, _)) = listener.accept() {
let mut buf = [0_u8; 2048];
let _ = stream.read(&mut buf);
let body = response_json.as_bytes();
let header = format!(
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\nConnection: close\r\n\r\n",
body.len()
);
let _ = stream.write_all(header.as_bytes());
let _ = stream.write_all(body);
}
});
format!("http://{addr}")
}
#[test]
fn parses_simple_pipeline() {
let yaml = r#"
trigger:
- main
jobs:
- job: Build
steps:
- script: echo hello
displayName: Say hello
"#;
let graph = parse(yaml);
assert!(graph.nodes.len() >= 2); }
#[test]
fn system_access_token_created() {
let yaml = r#"
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 1);
assert_eq!(identities[0].name, "System.AccessToken");
assert_eq!(
identities[0].metadata.get(META_IDENTITY_SCOPE),
Some(&"broad".to_string())
);
}
#[test]
fn variable_group_creates_secret_and_marks_partial() {
let yaml = r#"
variables:
- group: MySecretGroup
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "MySecretGroup");
assert_eq!(
secrets[0].metadata.get(META_VARIABLE_GROUP),
Some(&"true".to_string())
);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("MySecretGroup")),
"completeness gap should name the variable group"
);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Structural),
"variable group gap must be Structural, got: {:?}",
graph.completeness_gap_kinds
);
}
#[test]
fn variable_group_enrichment_resolves_plain_and_secret_vars() {
let yaml = r#"
variables:
- group: MySecretGroup
steps:
- script: |
echo $(PUBLIC_FLAG)
echo $(DB_PASSWORD)
"#;
let org_url = spawn_variable_groups_server(
r#"{"value":[{"name":"MySecretGroup","variables":{"PUBLIC_FLAG":{"value":"1","isSecret":false},"DB_PASSWORD":{"isSecret":true}}}]}"#,
);
let ctx = AdoParserContext {
org: Some(org_url),
project: Some("DemoProject".to_string()),
pat: Some("dummy-pat".to_string()),
};
let graph = parse_with_ctx(yaml, &ctx);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert!(
secrets.iter().any(|n| n.name == "DB_PASSWORD"),
"secret variable from enriched group must be modelled as Secret"
);
assert!(
!secrets.iter().any(|n| n.name == "MySecretGroup"),
"resolved group should not be represented as an opaque group-secret node"
);
assert!(
!graph
.completeness_gaps
.iter()
.any(|g| g.contains("MySecretGroup") && g.contains("unresolvable")),
"resolved group must not emit unresolvable-group partial gap"
);
assert_eq!(
graph.metadata.get(META_ADO_VG_ENRICHED),
Some(&"true".to_string())
);
}
#[test]
fn variable_group_enrichment_failure_falls_back_to_static_model() {
let yaml = r#"
variables:
- group: MySecretGroup
steps:
- script: echo hi
"#;
let unused_port = {
let probe = TcpListener::bind("127.0.0.1:0").expect("bind probe listener");
let p = probe.local_addr().expect("probe addr").port();
drop(probe);
p
};
let ctx = AdoParserContext {
org: Some(format!("http://127.0.0.1:{unused_port}")),
project: Some("DemoProject".to_string()),
pat: Some("dummy-pat".to_string()),
};
let graph = parse_with_ctx(yaml, &ctx);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("enrichment failed")),
"failed enrichment should produce warning partial gap"
);
assert!(
graph
.nodes_of_kind(NodeKind::Secret)
.any(|n| n.name == "MySecretGroup"),
"on failure parser must fall back to opaque group-secret behaviour"
);
assert_eq!(
graph.metadata.get(META_ADO_VG_ENRICHED),
Some(&"false".to_string())
);
}
#[test]
fn task_with_azure_subscription_creates_service_connection_identity() {
let yaml = r#"
steps:
- task: AzureCLI@2
displayName: Deploy to Azure
inputs:
azureSubscription: MyServiceConnection
scriptType: bash
inlineScript: az group list
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(identities.len(), 2);
let conn = identities
.iter()
.find(|i| i.name == "MyServiceConnection")
.unwrap();
assert_eq!(
conn.metadata.get(META_SERVICE_CONNECTION),
Some(&"true".to_string())
);
assert_eq!(
conn.metadata.get(META_IDENTITY_SCOPE),
Some(&"broad".to_string())
);
}
#[test]
fn service_connection_does_not_get_unconditional_oidc_tag() {
let yaml = r#"
steps:
- task: AzureCLI@2
displayName: Deploy to Azure
inputs:
azureSubscription: MyClassicSpnConnection
scriptType: bash
inlineScript: az group list
"#;
let graph = parse(yaml);
let conn = graph
.nodes_of_kind(NodeKind::Identity)
.find(|i| i.name == "MyClassicSpnConnection")
.expect("service connection identity should exist");
assert_eq!(
conn.metadata.get(META_OIDC),
None,
"service connections must not be tagged META_OIDC without a clear OIDC signal"
);
}
#[test]
fn task_with_connected_service_name_creates_identity() {
let yaml = r#"
steps:
- task: SqlAzureDacpacDeployment@1
inputs:
ConnectedServiceNameARM: MySqlConnection
"#;
let graph = parse(yaml);
let identities: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert!(
identities.iter().any(|i| i.name == "MySqlConnection"),
"connectedServiceNameARM should create identity"
);
}
#[test]
fn script_step_classified_as_first_party() {
let yaml = r#"
steps:
- script: echo hi
displayName: Say hi
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
}
#[test]
fn bash_step_classified_as_first_party() {
let yaml = r#"
steps:
- bash: echo hi
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps[0].trust_zone, TrustZone::FirstParty);
}
#[test]
fn task_step_classified_as_untrusted() {
let yaml = r#"
steps:
- task: DotNetCoreCLI@2
inputs:
command: build
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert_eq!(steps[0].trust_zone, TrustZone::Untrusted);
}
#[test]
fn dollar_paren_var_in_script_creates_secret() {
let yaml = r#"
steps:
- script: |
curl -H "Authorization: $(MY_API_TOKEN)" https://api.example.com
displayName: Call API
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "MY_API_TOKEN");
}
#[test]
fn predefined_ado_var_not_treated_as_secret() {
let yaml = r#"
steps:
- script: |
echo $(Build.BuildId)
echo $(Agent.WorkFolder)
echo $(System.DefaultWorkingDirectory)
displayName: Print vars
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert!(
secrets.is_empty(),
"predefined ADO vars should not be treated as secrets, got: {:?}",
secrets.iter().map(|s| &s.name).collect::<Vec<_>>()
);
}
#[test]
fn template_reference_creates_delegates_to_and_marks_partial() {
let yaml = r#"
steps:
- template: steps/deploy.yml
parameters:
env: production
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].name, "steps/deploy.yml");
let delegates: Vec<_> = graph
.edges_from(steps[0].id)
.filter(|e| e.kind == EdgeKind::DelegatesTo)
.collect();
assert_eq!(delegates.len(), 1);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
}
#[test]
fn top_level_steps_no_jobs() {
let yaml = r#"
steps:
- script: echo a
- script: echo b
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 2);
}
#[test]
fn top_level_jobs_no_stages() {
let yaml = r#"
jobs:
- job: JobA
steps:
- script: echo a
- job: JobB
steps:
- script: echo b
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 2);
}
#[test]
fn stages_with_nested_jobs_parsed() {
let yaml = r#"
stages:
- stage: Build
jobs:
- job: Compile
steps:
- script: cargo build
- stage: Test
jobs:
- job: UnitTest
steps:
- script: cargo test
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 2);
}
#[test]
fn all_steps_linked_to_system_access_token() {
let yaml = r#"
steps:
- script: echo a
- task: SomeTask@1
inputs: {}
"#;
let graph = parse(yaml);
let token: Vec<_> = graph.nodes_of_kind(NodeKind::Identity).collect();
assert_eq!(token.len(), 1);
let token_id = token[0].id;
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
for step in &steps {
let links: Vec<_> = graph
.edges_from(step.id)
.filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == token_id)
.collect();
assert_eq!(
links.len(),
1,
"step '{}' must link to System.AccessToken",
step.name
);
}
}
#[test]
fn named_secret_variable_creates_secret_node() {
let yaml = r#"
variables:
- name: MY_PASSWORD
value: dummy
isSecret: true
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert_eq!(secrets[0].name, "MY_PASSWORD");
}
#[test]
fn variables_as_mapping_parsed() {
let yaml = r#"
variables:
MY_VAR: hello
ANOTHER_VAR: world
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert!(
secrets.is_empty(),
"plain mapping vars should not create secret nodes"
);
}
#[test]
fn persist_credentials_creates_persists_to_edge() {
let yaml = r#"
steps:
- checkout: self
persistCredentials: true
- script: git push
"#;
let graph = parse(yaml);
let token_id = graph
.nodes_of_kind(NodeKind::Identity)
.find(|n| n.name == "System.AccessToken")
.expect("System.AccessToken must exist")
.id;
let persists_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::PersistsTo && e.to == token_id)
.collect();
assert_eq!(
persists_edges.len(),
1,
"checkout with persistCredentials: true must produce exactly one PersistsTo edge"
);
}
#[test]
fn persist_credentials_string_true_creates_persists_to_edge() {
let yaml = r#"
steps:
- checkout: self
persistCredentials: "true"
"#;
let graph = parse(yaml);
assert!(
graph.edges.iter().any(|e| e.kind == EdgeKind::PersistsTo),
"string true is accepted by ADO and must be treated as true"
);
}
#[test]
fn jobs_mapping_form_parses() {
let yaml = r#"
jobs:
build:
steps:
- script: build.sh
displayName: Build
"#;
let graph = parse(yaml);
assert!(
graph
.nodes_of_kind(NodeKind::Step)
.any(|s| s.name == "Build"),
"jobs: map form must produce step nodes"
);
}
#[test]
fn step_env_non_string_scalar_values_parse() {
let yaml = r#"
steps:
- script: echo hi
env:
FEATURE_ENABLED: true
RETRIES: 3
EMPTY:
"#;
let graph = parse(yaml);
assert!(
graph.nodes_of_kind(NodeKind::Step).next().is_some(),
"scalar env values should not reject the whole ADO file"
);
}
#[test]
fn checkout_without_persist_credentials_no_persists_to_edge() {
let yaml = r#"
steps:
- checkout: self
- script: echo hi
"#;
let graph = parse(yaml);
let persists_edges: Vec<_> = graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::PersistsTo)
.collect();
assert!(
persists_edges.is_empty(),
"checkout without persistCredentials should not produce PersistsTo edge"
);
}
#[test]
fn var_flag_secret_marked_as_cli_flag_exposed() {
let yaml = r#"
steps:
- script: |
terraform apply \
-var "db_password=$(db_password)" \
-var "api_key=$(api_key)"
displayName: Terraform apply
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert!(!secrets.is_empty(), "should detect secrets from -var flags");
for secret in &secrets {
assert_eq!(
secret.metadata.get(META_CLI_FLAG_EXPOSED),
Some(&"true".to_string()),
"secret '{}' passed via -var flag should be marked cli_flag_exposed",
secret.name
);
}
}
#[test]
fn non_var_flag_secret_not_marked_as_cli_flag_exposed() {
let yaml = r#"
steps:
- script: |
curl -H "Authorization: $(MY_TOKEN)" https://api.example.com
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
assert!(
!secrets[0].metadata.contains_key(META_CLI_FLAG_EXPOSED),
"non -var secret should not be marked as cli_flag_exposed"
);
}
#[test]
fn step_linked_to_variable_group_secret() {
let yaml = r#"
variables:
- group: ProdSecrets
steps:
- script: deploy.sh
"#;
let graph = parse(yaml);
let secrets: Vec<_> = graph.nodes_of_kind(NodeKind::Secret).collect();
assert_eq!(secrets.len(), 1);
let secret_id = secrets[0].id;
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
let links: Vec<_> = graph
.edges_from(steps[0].id)
.filter(|e| e.kind == EdgeKind::HasAccessTo && e.to == secret_id)
.collect();
assert_eq!(
links.len(),
1,
"step should be linked to variable group secret"
);
}
#[test]
fn pr_trigger_sets_meta_trigger_on_graph() {
let yaml = r#"
pr:
- '*'
steps:
- script: echo hi
"#;
let graph = parse(yaml);
assert_eq!(
graph.metadata.get(META_TRIGGER),
Some(&"pr".to_string()),
"ADO pr: trigger should set graph META_TRIGGER"
);
}
#[test]
fn self_hosted_pool_by_name_creates_image_with_self_hosted_metadata() {
let yaml = r#"
pool:
name: my-self-hosted-pool
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].name, "my-self-hosted-pool");
assert_eq!(
images[0].metadata.get(META_SELF_HOSTED),
Some(&"true".to_string()),
"pool.name without vmImage must be tagged self-hosted"
);
}
#[test]
fn vm_image_pool_is_not_tagged_self_hosted() {
let yaml = r#"
pool:
vmImage: ubuntu-latest
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let images: Vec<_> = graph.nodes_of_kind(NodeKind::Image).collect();
assert_eq!(images.len(), 1);
assert_eq!(images[0].name, "ubuntu-latest");
assert!(
!images[0].metadata.contains_key(META_SELF_HOSTED),
"pool.vmImage is Microsoft-hosted — must not be tagged self-hosted"
);
}
#[test]
fn checkout_self_step_tagged_with_meta_checkout_self() {
let yaml = r#"
steps:
- checkout: self
- script: echo hi
"#;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 2);
let checkout_step = steps
.iter()
.find(|s| s.metadata.contains_key(META_CHECKOUT_SELF))
.expect("one step must be tagged META_CHECKOUT_SELF");
assert_eq!(
checkout_step.metadata.get(META_CHECKOUT_SELF),
Some(&"true".to_string())
);
}
#[test]
fn vso_setvariable_sets_meta_writes_env_gate() {
let yaml = r###"
steps:
- script: |
echo "##vso[task.setvariable variable=FOO]bar"
displayName: Set variable
"###;
let graph = parse(yaml);
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
assert_eq!(
steps[0].metadata.get(META_WRITES_ENV_GATE),
Some(&"true".to_string()),
"##vso[task.setvariable] must mark META_WRITES_ENV_GATE"
);
}
#[test]
fn environment_key_tags_job_with_env_approval() {
let yaml_string_form = r#"
jobs:
- deployment: DeployWeb
environment: production
steps:
- script: echo deploying
displayName: Deploy
"#;
let g1 = parse(yaml_string_form);
let tagged: Vec<_> = g1
.nodes_of_kind(NodeKind::Step)
.filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
.collect();
assert!(
!tagged.is_empty(),
"string-form `environment:` must tag job's step nodes with META_ENV_APPROVAL"
);
let yaml_mapping_form = r#"
jobs:
- deployment: DeployAPI
environment:
name: staging
resourceType: VirtualMachine
steps:
- script: echo deploying
displayName: Deploy
"#;
let g2 = parse(yaml_mapping_form);
let tagged2: Vec<_> = g2
.nodes_of_kind(NodeKind::Step)
.filter(|s| s.metadata.get(META_ENV_APPROVAL) == Some(&"true".to_string()))
.collect();
assert!(
!tagged2.is_empty(),
"mapping-form `environment: {{ name: ... }}` must tag job's step nodes"
);
let yaml_no_env = r#"
jobs:
- job: Build
steps:
- script: echo building
"#;
let g3 = parse(yaml_no_env);
let any_tagged = g3
.nodes_of_kind(NodeKind::Step)
.any(|s| s.metadata.contains_key(META_ENV_APPROVAL));
assert!(
!any_tagged,
"jobs without `environment:` must not carry META_ENV_APPROVAL"
);
}
#[test]
fn root_parameter_conditional_template_fragment_does_not_crash_and_marks_partial() {
let yaml = r#"
parameters:
msabs_ws2022: false
- ${{ if eq(parameters.msabs_ws2022, true) }}:
- job: packer_ws2022
displayName: Build WS2022 Gold Image
steps:
- task: PackerTool@0
"#;
let parser = AdoParser;
let source = PipelineSource {
file: "fragment.yml".into(),
repo: None,
git_ref: None,
commit_sha: None,
};
let result = parser.parse(yaml, &source);
let graph = result.expect("template fragment must not crash the parser");
assert!(
matches!(graph.completeness, AuthorityCompleteness::Partial),
"template-fragment graph must be marked Partial"
);
let saw_fragment_gap = graph
.completeness_gaps
.iter()
.any(|g| g.contains("template fragment") && g.contains("parent pipeline"));
assert!(
saw_fragment_gap,
"completeness_gaps must mention the template-fragment reason, got: {:?}",
graph.completeness_gaps
);
assert_eq!(
graph.completeness_gap_kinds.len(),
1,
"template-fragment graph should record exactly one gap kind"
);
assert_eq!(graph.completeness_gap_kinds[0], GapKind::Structural);
}
#[test]
fn environment_tag_isolated_to_gated_job_only() {
let yaml = r#"
jobs:
- job: Build
steps:
- script: echo build
displayName: build-step
- deployment: DeployProd
environment: production
steps:
- script: echo deploy
displayName: deploy-step
"#;
let g = parse(yaml);
let build_step = g
.nodes_of_kind(NodeKind::Step)
.find(|s| s.name == "build-step")
.expect("build-step must exist");
let deploy_step = g
.nodes_of_kind(NodeKind::Step)
.find(|s| s.name == "deploy-step")
.expect("deploy-step must exist");
assert!(
!build_step.metadata.contains_key(META_ENV_APPROVAL),
"non-gated job's step must not be tagged"
);
assert_eq!(
deploy_step.metadata.get(META_ENV_APPROVAL),
Some(&"true".to_string()),
"gated deployment job's step must be tagged"
);
}
fn repos_meta(graph: &AuthorityGraph) -> Vec<serde_json::Value> {
let raw = graph
.metadata
.get(META_REPOSITORIES)
.expect("META_REPOSITORIES must be set");
serde_json::from_str(raw).expect("META_REPOSITORIES must be valid JSON")
}
#[test]
fn resources_repositories_captured_with_used_flag_when_referenced_by_extends() {
let yaml = r#"
resources:
repositories:
- repository: shared-templates
type: git
name: Platform/shared-templates
ref: refs/heads/main
extends:
template: pipeline.yml@shared-templates
"#;
let graph = parse(yaml);
let entries = repos_meta(&graph);
assert_eq!(entries.len(), 1);
let e = &entries[0];
assert_eq!(e["alias"], "shared-templates");
assert_eq!(e["repo_type"], "git");
assert_eq!(e["name"], "Platform/shared-templates");
assert_eq!(e["ref"], "refs/heads/main");
assert_eq!(e["used"], true);
}
#[test]
fn resources_repositories_used_via_checkout_alias() {
let yaml = r#"
resources:
repositories:
- repository: adf_publish
type: git
name: org/adf-finance-reporting
ref: refs/heads/adf_publish
jobs:
- job: deploy
steps:
- checkout: adf_publish
"#;
let graph = parse(yaml);
let entries = repos_meta(&graph);
assert_eq!(entries.len(), 1);
assert_eq!(entries[0]["alias"], "adf_publish");
assert_eq!(entries[0]["used"], true);
}
#[test]
fn resources_repositories_unreferenced_alias_is_marked_not_used() {
let yaml = r#"
resources:
repositories:
- repository: orphan-templates
type: git
name: Platform/orphan
ref: main
jobs:
- job: build
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let entries = repos_meta(&graph);
assert_eq!(entries.len(), 1);
assert_eq!(entries[0]["alias"], "orphan-templates");
assert_eq!(entries[0]["used"], false);
}
#[test]
fn resources_repositories_absent_when_no_resources_block() {
let yaml = r#"
jobs:
- job: build
steps:
- script: echo hi
"#;
let graph = parse(yaml);
assert!(!graph.metadata.contains_key(META_REPOSITORIES));
}
#[test]
fn parse_template_alias_extracts_segment_after_at() {
assert_eq!(
parse_template_alias("steps/deploy.yml@templates"),
Some("templates".to_string())
);
assert_eq!(parse_template_alias("local/path.yml"), None);
assert_eq!(parse_template_alias("path@"), None);
}
#[test]
fn parameters_as_map_form_parses_as_named_parameters() {
let yaml = r#"
parameters:
name: ''
k8sRelease: ''
apimodel: 'examples/e2e-tests/kubernetes/release/default/definition.json'
createVNET: false
jobs:
- job: build
steps:
- script: echo $(name)
"#;
let graph = parse(yaml);
assert!(graph.parameters.contains_key("name"));
assert!(graph.parameters.contains_key("k8sRelease"));
assert!(graph.parameters.contains_key("apimodel"));
assert!(graph.parameters.contains_key("createVNET"));
assert_eq!(graph.parameters.len(), 4);
}
#[test]
fn parameters_as_typed_sequence_form_still_parses() {
let yaml = r#"
parameters:
- name: env
type: string
default: prod
values:
- prod
- staging
- name: skipTests
type: boolean
default: false
jobs:
- job: build
steps:
- script: echo hi
"#;
let graph = parse(yaml);
let env_param = graph.parameters.get("env").expect("env captured");
assert_eq!(env_param.param_type, "string");
assert!(env_param.has_values_allowlist);
let skip_param = graph
.parameters
.get("skipTests")
.expect("skipTests captured");
assert_eq!(skip_param.param_type, "boolean");
assert!(!skip_param.has_values_allowlist);
}
#[test]
fn resources_as_legacy_sequence_form_parses_to_empty_resources() {
let yaml = r#"
resources:
- repo: self
trigger:
- main
jobs:
- job: build
steps:
- script: echo hi
"#;
let graph = parse(yaml);
assert!(!graph.metadata.contains_key(META_REPOSITORIES));
let steps: Vec<_> = graph.nodes_of_kind(NodeKind::Step).collect();
assert_eq!(steps.len(), 1);
}
#[test]
fn stages_as_template_expression_marks_partial_expression_gap() {
let yaml = r#"
parameters:
- name: stages
type: stageList
stages: ${{ parameters.stages }}
"#;
let graph = parse(yaml);
assert!(graph.parameters.contains_key("stages"));
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Expression),
"dynamic stages carrier must be an Expression gap, got: {:?}",
graph.completeness_gap_kinds
);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("top-level `stages:`") && g.contains("template expression")),
"gap must identify the dynamic stages carrier, got: {:?}",
graph.completeness_gaps
);
}
#[test]
fn jobs_as_template_expression_marks_partial_expression_gap() {
let yaml = r#"
parameters:
- name: jobs
type: jobList
jobs: ${{ parameters.jobs }}
"#;
let graph = parse(yaml);
assert!(graph.parameters.contains_key("jobs"));
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Expression),
"dynamic jobs carrier must be an Expression gap, got: {:?}",
graph.completeness_gap_kinds
);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("top-level `jobs:`") && g.contains("template expression")),
"gap must identify the dynamic jobs carrier, got: {:?}",
graph.completeness_gaps
);
}
#[test]
fn jobs_carrier_without_steps_marks_partial() {
let yaml = r#"
jobs:
- job: build
pool:
vmImage: ubuntu-latest
"#;
let graph = parse(yaml);
let step_count = graph
.nodes
.iter()
.filter(|n| n.kind == NodeKind::Step)
.count();
assert_eq!(step_count, 0);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("0 step nodes")),
"completeness_gaps must mention 0 step nodes: {:?}",
graph.completeness_gaps
);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Structural),
"0-step-nodes gap must be Structural, got: {:?}",
graph.completeness_gap_kinds
);
}
#[test]
fn jobs_carrier_with_empty_jobs_list_does_not_mark_partial() {
let yaml = r#"
jobs: []
"#;
let graph = parse(yaml);
let zero_step_gap = graph
.completeness_gaps
.iter()
.any(|g| g.contains("0 step nodes"));
assert!(
!zero_step_gap,
"empty jobs: list is not a carrier; got: {:?}",
graph.completeness_gaps
);
}
#[test]
fn pr_none_does_not_set_meta_trigger() {
let yaml = r#"
schedules:
- cron: "0 5 * * 1"
pr: none
trigger: none
steps:
- script: echo hello
"#;
let graph = parse(yaml);
assert!(
!graph.metadata.contains_key(META_TRIGGER),
"pr: none must not set META_TRIGGER; got: {:?}",
graph.metadata.get(META_TRIGGER)
);
}
#[test]
fn pr_tilde_does_not_set_meta_trigger() {
let yaml = "pr: ~\nsteps:\n - script: echo hello\n";
let graph = parse(yaml);
assert!(
!graph.metadata.contains_key(META_TRIGGER),
"pr: ~ must not set META_TRIGGER; got: {:?}",
graph.metadata.get(META_TRIGGER)
);
}
#[test]
fn pr_false_does_not_set_meta_trigger() {
let yaml = "pr: false\nsteps:\n - script: echo hello\n";
let graph = parse(yaml);
assert!(
!graph.metadata.contains_key(META_TRIGGER),
"pr: false must not set META_TRIGGER; got: {:?}",
graph.metadata.get(META_TRIGGER)
);
}
#[test]
fn pr_sequence_sets_meta_trigger() {
let yaml = "pr:\n - main\nsteps:\n - script: echo hello\n";
let graph = parse(yaml);
assert_eq!(
graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
Some("pr"),
"pr: [main] must set META_TRIGGER=pr"
);
}
#[test]
fn pr_with_branches_sets_meta_trigger() {
let yaml = r#"
pr:
branches:
include:
- main
steps:
- script: echo hello
"#;
let graph = parse(yaml);
assert_eq!(
graph.metadata.get(META_TRIGGER).map(|s| s.as_str()),
Some("pr"),
"real pr: block must set META_TRIGGER=pr"
);
}
#[test]
fn over_privileged_identity_does_not_fire_when_permissions_contents_none() {
use taudit_core::rules::over_privileged_identity;
let yaml = r#"
trigger: none
permissions:
contents: none
steps:
- script: echo hello
"#;
let graph = parse(yaml);
let findings = over_privileged_identity(&graph);
let token_findings: Vec<_> = findings
.iter()
.filter(|f| {
f.nodes_involved.iter().any(|&id| {
graph
.node(id)
.map(|n| n.name == "System.AccessToken")
.unwrap_or(false)
})
})
.collect();
assert!(
token_findings.is_empty(),
"over_privileged_identity must not fire on System.AccessToken when \
permissions: contents: none is set; got: {token_findings:#?}"
);
}
#[test]
fn pipeline_level_permissions_none_constrains_token() {
let yaml = r#"
trigger: none
permissions:
contents: none
steps:
- script: echo hello
"#;
let graph = parse(yaml);
let token = graph
.nodes_of_kind(NodeKind::Identity)
.find(|n| n.name == "System.AccessToken")
.expect("System.AccessToken must always be present");
assert_eq!(
token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
Some("constrained"),
"permissions: contents: none must constrain the token; got: {:?}",
token.metadata.get(META_IDENTITY_SCOPE)
);
}
#[test]
fn pipeline_level_permissions_write_keeps_token_broad() {
let yaml = r#"
trigger: none
permissions:
contents: write
steps:
- script: echo hello
"#;
let graph = parse(yaml);
let token = graph
.nodes_of_kind(NodeKind::Identity)
.find(|n| n.name == "System.AccessToken")
.expect("System.AccessToken must always be present");
assert_eq!(
token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
Some("broad"),
"permissions: contents: write must keep the token broad; got: {:?}",
token.metadata.get(META_IDENTITY_SCOPE)
);
}
#[test]
fn pipeline_level_permissions_read_scalar_constrains_token() {
let yaml = "trigger: none\npermissions: read\nsteps:\n - script: echo hello\n";
let graph = parse(yaml);
let token = graph
.nodes_of_kind(NodeKind::Identity)
.find(|n| n.name == "System.AccessToken")
.expect("System.AccessToken must always be present");
assert_eq!(
token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
Some("constrained"),
"permissions: read must constrain the token; got: {:?}",
token.metadata.get(META_IDENTITY_SCOPE)
);
}
#[test]
fn pipeline_level_permissions_write_scalar_keeps_token_broad() {
let yaml = "trigger: none\npermissions: write\nsteps:\n - script: echo hello\n";
let graph = parse(yaml);
let token = graph
.nodes_of_kind(NodeKind::Identity)
.find(|n| n.name == "System.AccessToken")
.expect("System.AccessToken must always be present");
assert_eq!(
token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
Some("broad"),
"permissions: write scalar must keep token broad; got: {:?}",
token.metadata.get(META_IDENTITY_SCOPE)
);
}
#[test]
fn pipeline_level_permissions_contents_read_constrains_token() {
let yaml =
"trigger: none\npermissions:\n contents: read\nsteps:\n - script: echo hello\n";
let graph = parse(yaml);
let token = graph
.nodes_of_kind(NodeKind::Identity)
.find(|n| n.name == "System.AccessToken")
.expect("System.AccessToken must always be present");
assert_eq!(
token.metadata.get(META_IDENTITY_SCOPE).map(|s| s.as_str()),
Some("constrained"),
"permissions: contents: read must constrain; got: {:?}",
token.metadata.get(META_IDENTITY_SCOPE)
);
}
#[test]
fn empty_pipeline_does_not_mark_partial_for_zero_steps() {
let yaml = r#"
trigger:
- main
"#;
let graph = parse(yaml);
let zero_step_gap = graph
.completeness_gaps
.iter()
.any(|g| g.contains("0 step nodes"));
assert!(
!zero_step_gap,
"no carrier means no 0-step gap reason; got: {:?}",
graph.completeness_gaps
);
}
#[test]
fn ado_hashmap_iteration_is_deterministic_across_runs() {
let yaml = r#"
trigger:
- main
pool:
vmImage: ubuntu-latest
steps:
- task: AzureCLI@2
displayName: Deploy
inputs:
azureSubscription: $(SUB_CONN)
scriptType: bash
inlineScript: |
echo $(MIDDLE_INPUT_VAR)
echo $(ALPHA_INPUT_VAR)
echo $(ZULU_INPUT_VAR)
env:
Z_VAR: $(Z_SECRET)
A_VAR: $(A_SECRET)
M_VAR: $(M_SECRET)
Q_VAR: $(Q_SECRET)
B_VAR: $(B_SECRET)
"#;
fn structural_fingerprint(graph: &taudit_core::graph::AuthorityGraph) -> String {
let mut out = String::new();
for n in &graph.nodes {
out.push_str(&format!(
"N {} {:?} {} {:?}\n",
n.id, n.kind, n.name, n.trust_zone
));
}
for e in &graph.edges {
out.push_str(&format!("E {} {} {:?}\n", e.from, e.to, e.kind));
}
out
}
let mut hashes: Vec<String> = Vec::with_capacity(9);
let mut fingerprints: Vec<String> = Vec::with_capacity(9);
for _ in 0..9 {
let graph = parse(yaml);
hashes.push(taudit_core::baselines::compute_pipeline_identity_material_hash(&graph));
fingerprints.push(structural_fingerprint(&graph));
}
let first_hash = &hashes[0];
for (i, h) in hashes.iter().enumerate().skip(1) {
assert_eq!(
first_hash, h,
"run 0 and run {i} produced different pipeline_identity_material_hash \
— ADO parser HashMap iteration is non-deterministic"
);
}
let first_fp = &fingerprints[0];
for (i, fp) in fingerprints.iter().enumerate().skip(1) {
assert_eq!(
first_fp, fp,
"run 0 and run {i} produced different graph node-id / edge ordering \
— ADO parser HashMap iteration is non-deterministic"
);
}
}
#[test]
fn step_condition_marks_partial_with_expression_gap() {
let yaml = r#"
steps:
- script: deploy.sh
displayName: Deploy
condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
"#;
let graph = parse(yaml);
assert_eq!(graph.completeness, AuthorityCompleteness::Partial);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Expression),
"step condition must produce an Expression gap, got: {:?}",
graph.completeness_gap_kinds
);
assert!(
graph.completeness_gaps.iter().any(|g| g.contains("step")
&& g.contains("Deploy")
&& g.contains("eq(variables['Build.SourceBranch']")),
"gap reason must name scope, step, and condition: {:?}",
graph.completeness_gaps
);
}
#[test]
fn job_condition_propagates_to_step_metadata() {
let yaml = r#"
jobs:
- job: DeployProd
condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
steps:
- script: deploy.sh
displayName: Run deploy
"#;
let graph = parse(yaml);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Run deploy")
.expect("step node must exist");
assert_eq!(
step.metadata.get(META_CONDITION),
Some(&"eq(variables['Build.SourceBranch'], 'refs/heads/main')".to_string()),
"job-level condition must propagate to step META_CONDITION"
);
assert!(graph.completeness_gap_kinds.contains(&GapKind::Expression));
}
#[test]
fn stacked_conditions_join_with_and() {
let yaml = r#"
stages:
- stage: Deploy
condition: succeeded()
jobs:
- job: Prod
condition: eq(variables['env'], 'prod')
steps:
- script: deploy.sh
displayName: Deploy step
condition: ne(variables['Build.Reason'], 'PullRequest')
"#;
let graph = parse(yaml);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Deploy step")
.expect("step node must exist");
let chain = step
.metadata
.get(META_CONDITION)
.expect("step must carry META_CONDITION");
assert_eq!(
chain,
"succeeded() AND eq(variables['env'], 'prod') AND ne(variables['Build.Reason'], 'PullRequest')",
"stacked conditions must AND-join in stage→job→step order"
);
let expression_gap_count = graph
.completeness_gap_kinds
.iter()
.filter(|k| **k == GapKind::Expression)
.count();
assert!(
expression_gap_count >= 3,
"stage + job + step conditions must each mark Partial-Expression, got {expression_gap_count}"
);
}
#[test]
fn depends_on_string_form_parses() {
let yaml = r#"
jobs:
- job: Build
steps:
- script: build.sh
- job: Deploy
dependsOn: Build
steps:
- script: deploy.sh
displayName: Deploy
"#;
let graph = parse(yaml);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Deploy")
.expect("Deploy step must exist");
assert_eq!(
step.metadata.get(META_DEPENDS_ON),
Some(&"Build".to_string()),
"single-string dependsOn must stamp the predecessor name verbatim"
);
}
#[test]
fn depends_on_sequence_form_parses() {
let yaml = r#"
jobs:
- job: A
steps: [{ script: a.sh }]
- job: B
steps: [{ script: b.sh }]
- job: C
steps: [{ script: c.sh }]
- job: Final
dependsOn:
- A
- B
- C
steps:
- script: final.sh
displayName: Final step
"#;
let graph = parse(yaml);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Final step")
.expect("Final step must exist");
assert_eq!(
step.metadata.get(META_DEPENDS_ON),
Some(&"A,B,C".to_string()),
"sequence-form dependsOn must comma-join predecessors in declaration order"
);
}
#[test]
fn step_depends_on_mapping_marks_partial_expression() {
let yaml = "steps:\n - script: echo hi\n displayName: Mixed depends\n dependsOn:\n \"${{ if eq(parameters.extra, true) }}\":\n - Prep\n";
let graph = parse(yaml);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Mixed depends")
.expect("step exists");
assert!(
!step.metadata.contains_key(META_DEPENDS_ON),
"unresolved mapping dependsOn must not stamp META_DEPENDS_ON"
);
assert!(
graph.completeness_gap_kinds.contains(&GapKind::Expression),
"mapping dependsOn must mark Partial-Expression"
);
assert!(
graph.completeness_gaps.iter().any(|g| g.contains("step")
&& g.contains("Mixed depends")
&& g.contains("dependsOn")),
"gap reason must name scope, step, and dependsOn"
);
}
#[test]
fn stage_depends_on_mapping_does_not_fake_inherited_dependency() {
let yaml = "stages:\n - stage: Build\n jobs:\n - job: BuildJob\n steps:\n - script: echo build\n - stage: Deploy\n dependsOn:\n \"${{ if eq(parameters.release, true) }}\":\n - Build\n jobs:\n - job: DeployJob\n steps:\n - script: echo deploy\n displayName: Deploy step\n";
let graph = parse(yaml);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Deploy step")
.expect("deploy step exists");
assert!(
!step.metadata.contains_key(META_DEPENDS_ON),
"unresolved stage dependsOn must not flow into child step metadata"
);
assert!(
graph
.completeness_gaps
.iter()
.any(|g| g.contains("stage") && g.contains("Deploy") && g.contains("dependsOn")),
"gap reason must cite stage-level dependsOn expression"
);
}
#[test]
fn conditional_step_finding_is_downgraded_via_compensating_control() {
let yaml = r#"
variables:
- name: DEPLOY_KEY
value: $(MySecret)
isSecret: true
jobs:
- job: ProdDeploy
condition: eq(variables['Build.SourceBranch'], 'refs/heads/main')
steps:
- task: AzureCLI@2
displayName: Deploy to prod
inputs:
azureSubscription: ProdConnection
scriptType: bash
inlineScript: |
echo "$(DEPLOY_KEY)" > /tmp/key
az login --service-principal -u $SP -p $(DEPLOY_KEY)
"#;
let graph = parse(yaml);
let mut findings =
taudit_core::rules::run_all_rules(&graph, taudit_core::propagation::DEFAULT_MAX_HOPS);
let f = findings
.iter_mut()
.find(|f| {
f.category == taudit_core::finding::FindingCategory::UntrustedWithAuthority
&& f.message.contains("DEPLOY_KEY")
})
.expect(
"untrusted_with_authority must fire on the AzureCLI@2 step accessing DEPLOY_KEY",
);
assert_eq!(
f.severity,
taudit_core::finding::Severity::High,
"Critical must be downgraded one tier to High by the ADO conditional-gate CC"
);
assert_eq!(
f.extras.original_severity,
Some(taudit_core::finding::Severity::Critical),
"original_severity must record Critical so the audit trail survives"
);
assert!(
f.extras
.compensating_controls
.iter()
.any(|c| c.starts_with("ADO conditional gate")),
"compensating_controls must include the ADO conditional-gate entry, got: {:?}",
f.extras.compensating_controls
);
}
#[test]
fn variable_groups_are_scoped_to_their_stage_or_job() {
let yaml = r#"
stages:
- stage: UsesGroup
variables:
- group: OpaqueGroup
jobs:
- job: A
steps:
- script: echo $(OPAQUE_VALUE)
- stage: NoGroup
jobs:
- job: B
steps:
- script: echo $(STAGE_TWO_SECRET)
"#;
let graph = parse(yaml);
assert!(
graph
.nodes_of_kind(NodeKind::Secret)
.any(|n| n.name == "STAGE_TWO_SECRET"),
"variable group in first stage must not suppress secret refs in unrelated stages"
);
}
#[test]
fn plain_variables_are_scoped_to_their_stage_or_job() {
let yaml = r#"
stages:
- stage: PlainStage
variables:
- name: SHARED_NAME
value: plain
jobs:
- job: A
steps:
- script: echo $(SHARED_NAME)
- stage: SecretRefStage
jobs:
- job: B
steps:
- script: echo $(SHARED_NAME)
"#;
let graph = parse(yaml);
assert!(
graph
.nodes_of_kind(NodeKind::Secret)
.any(|n| n.name == "SHARED_NAME"),
"plain variable in one stage must not suppress same-name secret refs in another stage"
);
}
#[test]
fn parser_context_stamps_only_safe_metadata() {
let yaml = "steps:\n - script: echo hi\n";
let parser = AdoParser;
let source = PipelineSource {
file: "ctx.yml".to_string(),
repo: None,
git_ref: None,
commit_sha: None,
};
let ctx = AdoParserContext {
org: Some("org-a".to_string()),
project: Some("project-a".to_string()),
pat: Some("very-secret-pat".to_string()),
};
let graph = parser
.parse_with_context(yaml, &source, Some(&ctx))
.expect("parse succeeds");
assert_eq!(graph.metadata.get("ado_org"), Some(&"org-a".to_string()));
assert_eq!(
graph.metadata.get("ado_project"),
Some(&"project-a".to_string())
);
assert_eq!(
graph.metadata.get("ado_pat_present"),
Some(&"true".to_string())
);
assert_eq!(
graph.metadata.get("ado_variable_group_enrichment_ready"),
Some(&"true".to_string())
);
assert!(
!graph
.metadata
.values()
.any(|v| v.contains("very-secret-pat")),
"PAT must never be persisted into graph metadata"
);
}
#[test]
fn parser_context_absent_preserves_existing_metadata_shape() {
let yaml = "steps:\n - script: echo hi\n";
let graph = parse(yaml);
assert!(!graph.metadata.contains_key("ado_org"));
assert!(!graph.metadata.contains_key("ado_project"));
assert!(!graph.metadata.contains_key("ado_pat_present"));
assert!(!graph
.metadata
.contains_key("ado_variable_group_enrichment_ready"));
}
#[test]
fn escaped_ado_variable_refs_are_not_secret_refs() {
let yaml = r###"
steps:
- script: |
echo $$(NOT_A_SECRET)
echo "##vso[task.setvariable variable=Count]$$(NOT_A_SECRET)"
displayName: Escaped
"###;
let graph = parse(yaml);
assert!(
!graph
.nodes_of_kind(NodeKind::Secret)
.any(|n| n.name == "NOT_A_SECRET"),
"$$(VAR) is an escaped literal and must not create a Secret node"
);
let step = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Escaped")
.expect("step exists");
assert!(
!step
.metadata
.contains_key(META_ENV_GATE_WRITES_SECRET_VALUE),
"escaped setvariable value must not be treated as secret-derived"
);
}
#[test]
fn terraform_var_flag_detection_ignores_var_file() {
let yaml = r#"
steps:
- script: terraform apply -var-file=$(TFVARS_FILE)
displayName: Var file
- script: terraform apply -var "password=$(TF_PASSWORD)"
displayName: Var value
"#;
let graph = parse(yaml);
let tfvars = graph
.nodes_of_kind(NodeKind::Secret)
.find(|n| n.name == "TFVARS_FILE")
.expect("TFVARS_FILE secret exists");
assert!(
!tfvars.metadata.contains_key(META_CLI_FLAG_EXPOSED),
"-var-file path should not be classified as an exposed -var value"
);
let password = graph
.nodes_of_kind(NodeKind::Secret)
.find(|n| n.name == "TF_PASSWORD")
.expect("TF_PASSWORD secret exists");
assert_eq!(
password
.metadata
.get(META_CLI_FLAG_EXPOSED)
.map(String::as_str),
Some("true"),
"-var key=$(SECRET) should still be marked as command-line exposed"
);
}
#[test]
fn task_input_lookup_is_case_insensitive() {
let yaml = r#"
steps:
- task: TerraformTaskV4@4
displayName: Terraform
inputs:
Command: apply
CommandOptions: -auto-approve
- task: AzureCLI@2
displayName: SPN
inputs:
AddSpnToEnvironment: TRUE
InLineScRiPt: echo hi
"#;
let graph = parse(yaml);
let terraform = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "Terraform")
.expect("terraform step");
assert_eq!(
terraform
.metadata
.get(META_TERRAFORM_AUTO_APPROVE)
.map(String::as_str),
Some("true")
);
let spn = graph
.nodes_of_kind(NodeKind::Step)
.find(|n| n.name == "SPN")
.expect("spn step");
assert_eq!(
spn.metadata.get(META_ADD_SPN_TO_ENV).map(String::as_str),
Some("true")
);
assert_eq!(
spn.metadata.get(META_SCRIPT_BODY).map(String::as_str),
Some("echo hi"),
"mixed-case inline script input key should be detected"
);
}
}