taudit_core/graph.rs
1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
26pub const META_TRIGGER: &str = "trigger";
27/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
28pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
29/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
30pub const META_ATTESTS: &str = "attests";
31/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
32pub const META_VARIABLE_GROUP: &str = "variable_group";
33/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
34pub const META_SELF_HOSTED: &str = "self_hosted";
35/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
36pub const META_CHECKOUT_SELF: &str = "checkout_self";
37/// Marks an Identity node as an ADO service connection.
38pub const META_SERVICE_CONNECTION: &str = "service_connection";
39/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
40/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
41/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
42pub const META_IMPLICIT: &str = "implicit";
43/// Marks a Step that belongs to an ADO deployment job whose `environment:` is
44/// configured with required approvals — a manual gate that breaks automatic
45/// authority propagation. Findings whose path crosses such a node have their
46/// severity reduced by one step (Critical → High → Medium → Low).
47pub const META_ENV_APPROVAL: &str = "env_approval";
48/// Records the parent job name on every Step node, enabling per-job subgraph
49/// filtering (e.g. `taudit map --job build`) and downstream consumers that
50/// need to attribute steps back to their containing job. Set by both the GHA
51/// and ADO parsers on every Step they create within a job's scope.
52pub const META_JOB_NAME: &str = "job_name";
53/// Graph-level metadata: JSON-encoded array of `resources.repositories[]`
54/// entries declared by the pipeline. Each entry is an object with fields
55/// `alias`, `repo_type`, `name`, optional `ref`, and `used` (true when the
56/// alias is referenced via `template: x@alias`, `extends: x@alias`, or
57/// `checkout: alias` somewhere in the same pipeline file). Set by the ADO
58/// parser; consumed by `template_extends_unpinned_branch`.
59pub const META_REPOSITORIES: &str = "repositories";
60/// Records the raw inline script body of a Step (the text from
61/// `script:` / `bash:` / `powershell:` / `pwsh:` / `run:` / task
62/// `inputs.script` / `inputs.Inline` / `inputs.inlineScript`). Stamped by
63/// parsers when the step has an inline script. Consumed by script-aware
64/// rules: `vm_remote_exec_via_pipeline_secret`,
65/// `short_lived_sas_in_command_line`, `secret_to_inline_script_env_export`,
66/// `secret_materialised_to_workspace_file`, `keyvault_secret_to_plaintext`,
67/// `add_spn_with_inline_script`, `parameter_interpolation_into_shell`.
68/// Stored verbatim — rules apply their own pattern matching.
69pub const META_SCRIPT_BODY: &str = "script_body";
70/// Records the name of the ADO service connection a step uses (the value of
71/// `inputs.azureSubscription` / `inputs.connectedServiceName*`). Set on the
72/// Step node itself (in addition to the Identity node it links to) so rules
73/// can pattern-match on the connection name without traversing edges.
74pub const META_SERVICE_CONNECTION_NAME: &str = "service_connection_name";
75/// Marks a Step as performing `terraform apply ... -auto-approve` (either via
76/// an inline script or via a `TerraformCLI` / `TerraformTask` task with
77/// `command: apply` and `commandOptions` containing `auto-approve`).
78pub const META_TERRAFORM_AUTO_APPROVE: &str = "terraform_auto_approve";
79/// Marks a Step task that runs with `addSpnToEnvironment: true`, exposing
80/// the federated SPN (idToken / servicePrincipalKey / servicePrincipalId /
81/// tenantId) to the inline script body via environment variables.
82pub const META_ADD_SPN_TO_ENV: &str = "add_spn_to_environment";
83
84// ── Shared helpers ─────────────────────────────────────
85
86/// Returns true if `ref_str` is a SHA-pinned action reference.
87/// Checks: contains `@`, part after `@` is >= 40 hex chars.
88/// Single source of truth — used by both parser and rules.
89pub fn is_sha_pinned(ref_str: &str) -> bool {
90 ref_str.contains('@')
91 && ref_str
92 .split('@')
93 .next_back()
94 .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
95 .unwrap_or(false)
96}
97
98/// Returns true if `image` is pinned to a Docker digest.
99/// Docker digest format: `image@sha256:<64-hex-chars>`.
100pub fn is_docker_digest_pinned(image: &str) -> bool {
101 image.contains("@sha256:")
102 && image
103 .split("@sha256:")
104 .nth(1)
105 .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
106 .unwrap_or(false)
107}
108
109// ── Graph-level precision markers ───────────────────────
110
111/// How complete is this authority graph? Parsers set this based on whether
112/// they could fully resolve all authority relationships in the pipeline YAML.
113///
114/// A `Partial` graph is still useful — it just tells the consumer that some
115/// authority paths may be missing. This is better than silent incompleteness.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
117#[serde(rename_all = "snake_case")]
118pub enum AuthorityCompleteness {
119 /// Parser resolved all authority relationships.
120 Complete,
121 /// Parser found constructs it couldn't fully resolve (e.g. secrets in
122 /// shell strings, composite actions, reusable workflows). The graph
123 /// captures what it can, but edges may be missing.
124 Partial,
125 /// Parser couldn't determine completeness.
126 Unknown,
127}
128
129/// How broad is an identity's scope? Classifies the risk surface of tokens,
130/// service principals, and OIDC identities.
131#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
132#[serde(rename_all = "snake_case")]
133pub enum IdentityScope {
134 /// Wide permissions: write-all, admin, or unscoped tokens.
135 Broad,
136 /// Narrow permissions: contents:read, specific scopes.
137 Constrained,
138 /// Scope couldn't be determined — treat as risky.
139 Unknown,
140}
141
142impl IdentityScope {
143 /// Classify an identity scope from a permissions string.
144 pub fn from_permissions(perms: &str) -> Self {
145 let p = perms.to_lowercase();
146 if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
147 IdentityScope::Broad
148 } else if p.contains("write") {
149 // Any write permission = broad (conservative)
150 IdentityScope::Broad
151 } else if p.contains("read") {
152 IdentityScope::Constrained
153 } else {
154 IdentityScope::Unknown
155 }
156 }
157}
158
159// ── Node types ──────────────────────────────────────────
160
161/// Semantic kind of a graph node.
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
163#[serde(rename_all = "snake_case")]
164pub enum NodeKind {
165 Step,
166 Secret,
167 Artifact,
168 Identity,
169 Image,
170}
171
172/// Trust classification. Explicit on every node — not inferred from kind.
173#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
174#[serde(rename_all = "snake_case")]
175pub enum TrustZone {
176 /// Code/config authored by the repo owner.
177 FirstParty,
178 /// Marketplace actions, external images (pinned).
179 ThirdParty,
180 /// Unpinned actions, fork PRs, user input.
181 Untrusted,
182}
183
184impl TrustZone {
185 /// Returns true if `self` is a lower trust level than `other`.
186 pub fn is_lower_than(&self, other: &TrustZone) -> bool {
187 self.rank() < other.rank()
188 }
189
190 fn rank(&self) -> u8 {
191 match self {
192 TrustZone::FirstParty => 2,
193 TrustZone::ThirdParty => 1,
194 TrustZone::Untrusted => 0,
195 }
196 }
197}
198
199/// A node in the authority graph.
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct Node {
202 pub id: NodeId,
203 pub kind: NodeKind,
204 pub name: String,
205 pub trust_zone: TrustZone,
206 /// Flexible metadata: pinning status, digest, scope, permissions, etc.
207 pub metadata: HashMap<String, String>,
208}
209
210// ── Edge types ──────────────────────────────────────────
211
212/// Edge semantics model authority/data flow — not syntactic YAML relations.
213/// Design test: "Can authority propagate along this edge?"
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
215#[serde(rename_all = "snake_case")]
216pub enum EdgeKind {
217 /// Step -> Secret or Identity (authority granted at runtime).
218 HasAccessTo,
219 /// Step -> Artifact (data flows out).
220 Produces,
221 /// Artifact -> Step (authority flows from artifact to consuming step).
222 Consumes,
223 /// Step -> Image/Action (execution delegation).
224 UsesImage,
225 /// Step -> Step (cross-job or action boundary).
226 DelegatesTo,
227 /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
228 /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
229 /// and processes with filesystem access, not just the step that created it.
230 PersistsTo,
231}
232
233/// A directed edge in the authority graph.
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct Edge {
236 pub id: EdgeId,
237 pub from: NodeId,
238 pub to: NodeId,
239 pub kind: EdgeKind,
240}
241
242// ── Pipeline source ─────────────────────────────────────
243
244/// Where the pipeline definition came from.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct PipelineSource {
247 pub file: String,
248 #[serde(skip_serializing_if = "Option::is_none")]
249 pub repo: Option<String>,
250 #[serde(skip_serializing_if = "Option::is_none")]
251 pub git_ref: Option<String>,
252 /// SHA of the commit being analyzed; reproducibility hint when set.
253 /// Parsers leave None; CI integrations populate this from the build env.
254 #[serde(default, skip_serializing_if = "Option::is_none")]
255 pub commit_sha: Option<String>,
256}
257
258// ── The graph ───────────────────────────────────────────
259
260/// Pipeline-level parameter declaration captured from a top-level
261/// `parameters:` block. Used by rules that need to reason about whether
262/// caller-supplied parameter values are constrained (`values:` allowlist)
263/// or free-form (no allowlist on a string parameter — shell-injection risk).
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct ParamSpec {
266 /// Declared parameter type (`string`, `number`, `boolean`, `object`, etc.).
267 /// Empty string when the YAML omitted `type:` (ADO defaults to string).
268 pub param_type: String,
269 /// True when the parameter declares a `values:` allowlist that constrains
270 /// the set of acceptable inputs. When true, free-form shell injection is
271 /// not possible because the runtime rejects any value outside the list.
272 pub has_values_allowlist: bool,
273}
274
275/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
276/// artifacts, identities, images). Edges model authority/data flow.
277#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct AuthorityGraph {
279 pub source: PipelineSource,
280 pub nodes: Vec<Node>,
281 pub edges: Vec<Edge>,
282 /// How complete is this graph? Set by the parser based on what it could resolve.
283 pub completeness: AuthorityCompleteness,
284 /// Human-readable reasons why the graph is Partial (if applicable).
285 #[serde(default, skip_serializing_if = "Vec::is_empty")]
286 pub completeness_gaps: Vec<String>,
287 /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
288 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
289 pub metadata: HashMap<String, String>,
290 /// Top-level pipeline `parameters:` declarations, keyed by parameter name.
291 /// Populated by parsers that surface parameter metadata (currently ADO).
292 /// Empty for platforms / pipelines that don't declare parameters.
293 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
294 pub parameters: HashMap<String, ParamSpec>,
295}
296
297impl AuthorityGraph {
298 pub fn new(source: PipelineSource) -> Self {
299 Self {
300 source,
301 nodes: Vec::new(),
302 edges: Vec::new(),
303 completeness: AuthorityCompleteness::Complete,
304 completeness_gaps: Vec::new(),
305 metadata: HashMap::new(),
306 parameters: HashMap::new(),
307 }
308 }
309
310 /// Mark the graph as partially complete with a reason.
311 pub fn mark_partial(&mut self, reason: impl Into<String>) {
312 self.completeness = AuthorityCompleteness::Partial;
313 self.completeness_gaps.push(reason.into());
314 }
315
316 /// Add a node, returns its ID.
317 pub fn add_node(
318 &mut self,
319 kind: NodeKind,
320 name: impl Into<String>,
321 trust_zone: TrustZone,
322 ) -> NodeId {
323 let id = self.nodes.len();
324 self.nodes.push(Node {
325 id,
326 kind,
327 name: name.into(),
328 trust_zone,
329 metadata: HashMap::new(),
330 });
331 id
332 }
333
334 /// Add a node with metadata, returns its ID.
335 pub fn add_node_with_metadata(
336 &mut self,
337 kind: NodeKind,
338 name: impl Into<String>,
339 trust_zone: TrustZone,
340 metadata: HashMap<String, String>,
341 ) -> NodeId {
342 let id = self.nodes.len();
343 self.nodes.push(Node {
344 id,
345 kind,
346 name: name.into(),
347 trust_zone,
348 metadata,
349 });
350 id
351 }
352
353 /// Add a directed edge, returns its ID.
354 pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
355 let id = self.edges.len();
356 self.edges.push(Edge { id, from, to, kind });
357 id
358 }
359
360 /// Outgoing edges from a node.
361 pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
362 self.edges.iter().filter(move |e| e.from == id)
363 }
364
365 /// Incoming edges to a node.
366 pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
367 self.edges.iter().filter(move |e| e.to == id)
368 }
369
370 /// All authority-bearing source nodes (Secret + Identity).
371 /// These are the BFS start set for propagation analysis.
372 pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
373 self.nodes
374 .iter()
375 .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
376 }
377
378 /// All nodes of a given kind.
379 pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
380 self.nodes.iter().filter(move |n| n.kind == kind)
381 }
382
383 /// All nodes in a given trust zone.
384 pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
385 self.nodes.iter().filter(move |n| n.trust_zone == zone)
386 }
387
388 /// Get a node by ID.
389 pub fn node(&self, id: NodeId) -> Option<&Node> {
390 self.nodes.get(id)
391 }
392
393 /// Get an edge by ID.
394 pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
395 self.edges.get(id)
396 }
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn build_simple_graph() {
405 let mut g = AuthorityGraph::new(PipelineSource {
406 file: "deploy.yml".into(),
407 repo: None,
408 git_ref: None,
409 commit_sha: None,
410 });
411
412 let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
413 let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
414 let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
415 let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
416
417 g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
418 g.add_edge(step_build, artifact, EdgeKind::Produces);
419 g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
420
421 assert_eq!(g.nodes.len(), 4);
422 assert_eq!(g.edges.len(), 3);
423 assert_eq!(g.authority_sources().count(), 1);
424 assert_eq!(g.edges_from(step_build).count(), 2);
425 assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
426 }
427
428 #[test]
429 fn completeness_default_is_complete() {
430 let g = AuthorityGraph::new(PipelineSource {
431 file: "test.yml".into(),
432 repo: None,
433 git_ref: None,
434 commit_sha: None,
435 });
436 assert_eq!(g.completeness, AuthorityCompleteness::Complete);
437 assert!(g.completeness_gaps.is_empty());
438 }
439
440 #[test]
441 fn mark_partial_records_reason() {
442 let mut g = AuthorityGraph::new(PipelineSource {
443 file: "test.yml".into(),
444 repo: None,
445 git_ref: None,
446 commit_sha: None,
447 });
448 g.mark_partial("secrets in run: block inferred, not precisely mapped");
449 assert_eq!(g.completeness, AuthorityCompleteness::Partial);
450 assert_eq!(g.completeness_gaps.len(), 1);
451 }
452
453 #[test]
454 fn identity_scope_from_permissions() {
455 assert_eq!(
456 IdentityScope::from_permissions("write-all"),
457 IdentityScope::Broad
458 );
459 assert_eq!(
460 IdentityScope::from_permissions("{ contents: write }"),
461 IdentityScope::Broad
462 );
463 assert_eq!(
464 IdentityScope::from_permissions("{ contents: read }"),
465 IdentityScope::Constrained
466 );
467 assert_eq!(
468 IdentityScope::from_permissions("{ id-token: write }"),
469 IdentityScope::Broad
470 );
471 assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
472 assert_eq!(
473 IdentityScope::from_permissions("custom-scope"),
474 IdentityScope::Unknown
475 );
476 }
477
478 #[test]
479 fn trust_zone_ordering() {
480 assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
481 assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
482 assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
483 assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
484 }
485}