Skip to main content

taudit_core/
graph.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
26pub const META_TRIGGER: &str = "trigger";
27/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
28pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
29/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
30pub const META_ATTESTS: &str = "attests";
31/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
32pub const META_VARIABLE_GROUP: &str = "variable_group";
33/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
34pub const META_SELF_HOSTED: &str = "self_hosted";
35/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
36pub const META_CHECKOUT_SELF: &str = "checkout_self";
37/// Marks an Identity node as an ADO service connection.
38pub const META_SERVICE_CONNECTION: &str = "service_connection";
39/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
40/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
41/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
42pub const META_IMPLICIT: &str = "implicit";
43/// Marks a Step that belongs to an ADO deployment job whose `environment:` is
44/// configured with required approvals — a manual gate that breaks automatic
45/// authority propagation. Findings whose path crosses such a node have their
46/// severity reduced by one step (Critical → High → Medium → Low).
47pub const META_ENV_APPROVAL: &str = "env_approval";
48/// Records the parent job name on every Step node, enabling per-job subgraph
49/// filtering (e.g. `taudit map --job build`) and downstream consumers that
50/// need to attribute steps back to their containing job. Set by both the GHA
51/// and ADO parsers on every Step they create within a job's scope.
52pub const META_JOB_NAME: &str = "job_name";
53/// Graph-level metadata: JSON-encoded array of `resources.repositories[]`
54/// entries declared by the pipeline. Each entry is an object with fields
55/// `alias`, `repo_type`, `name`, optional `ref`, and `used` (true when the
56/// alias is referenced via `template: x@alias`, `extends: x@alias`, or
57/// `checkout: alias` somewhere in the same pipeline file). Set by the ADO
58/// parser; consumed by `template_extends_unpinned_branch`.
59pub const META_REPOSITORIES: &str = "repositories";
60/// Records the raw inline script body of a Step (the text from
61/// `script:` / `bash:` / `powershell:` / `pwsh:` / `run:` / task
62/// `inputs.script` / `inputs.Inline` / `inputs.inlineScript`). Stamped by
63/// parsers when the step has an inline script. Consumed by script-aware
64/// rules: `vm_remote_exec_via_pipeline_secret`,
65/// `short_lived_sas_in_command_line`, `secret_to_inline_script_env_export`,
66/// `secret_materialised_to_workspace_file`, `keyvault_secret_to_plaintext`,
67/// `add_spn_with_inline_script`, `parameter_interpolation_into_shell`.
68/// Stored verbatim — rules apply their own pattern matching.
69pub const META_SCRIPT_BODY: &str = "script_body";
70/// Records the name of the ADO service connection a step uses (the value of
71/// `inputs.azureSubscription` / `inputs.connectedServiceName*`). Set on the
72/// Step node itself (in addition to the Identity node it links to) so rules
73/// can pattern-match on the connection name without traversing edges.
74pub const META_SERVICE_CONNECTION_NAME: &str = "service_connection_name";
75/// Marks a Step as performing `terraform apply ... -auto-approve` (either via
76/// an inline script or via a `TerraformCLI` / `TerraformTask` task with
77/// `command: apply` and `commandOptions` containing `auto-approve`).
78pub const META_TERRAFORM_AUTO_APPROVE: &str = "terraform_auto_approve";
79/// Marks a Step task that runs with `addSpnToEnvironment: true`, exposing
80/// the federated SPN (idToken / servicePrincipalKey / servicePrincipalId /
81/// tenantId) to the inline script body via environment variables.
82pub const META_ADD_SPN_TO_ENV: &str = "add_spn_to_environment";
83
84// ── Shared helpers ─────────────────────────────────────
85
86/// Returns true if `ref_str` is a SHA-pinned action reference.
87/// Checks: contains `@`, part after `@` is >= 40 hex chars.
88/// Single source of truth — used by both parser and rules.
89pub fn is_sha_pinned(ref_str: &str) -> bool {
90    ref_str.contains('@')
91        && ref_str
92            .split('@')
93            .next_back()
94            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
95            .unwrap_or(false)
96}
97
98/// Returns true if `image` is pinned to a Docker digest.
99/// Docker digest format: `image@sha256:<64-hex-chars>`.
100pub fn is_docker_digest_pinned(image: &str) -> bool {
101    image.contains("@sha256:")
102        && image
103            .split("@sha256:")
104            .nth(1)
105            .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
106            .unwrap_or(false)
107}
108
109// ── Graph-level precision markers ───────────────────────
110
111/// How complete is this authority graph? Parsers set this based on whether
112/// they could fully resolve all authority relationships in the pipeline YAML.
113///
114/// A `Partial` graph is still useful — it just tells the consumer that some
115/// authority paths may be missing. This is better than silent incompleteness.
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
117#[serde(rename_all = "snake_case")]
118pub enum AuthorityCompleteness {
119    /// Parser resolved all authority relationships.
120    Complete,
121    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
122    /// shell strings, composite actions, reusable workflows). The graph
123    /// captures what it can, but edges may be missing.
124    Partial,
125    /// Parser couldn't determine completeness.
126    Unknown,
127}
128
129/// How broad is an identity's scope? Classifies the risk surface of tokens,
130/// service principals, and OIDC identities.
131#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
132#[serde(rename_all = "snake_case")]
133pub enum IdentityScope {
134    /// Wide permissions: write-all, admin, or unscoped tokens.
135    Broad,
136    /// Narrow permissions: contents:read, specific scopes.
137    Constrained,
138    /// Scope couldn't be determined — treat as risky.
139    Unknown,
140}
141
142impl IdentityScope {
143    /// Classify an identity scope from a permissions string.
144    pub fn from_permissions(perms: &str) -> Self {
145        let p = perms.to_lowercase();
146        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
147            IdentityScope::Broad
148        } else if p.contains("write") {
149            // Any write permission = broad (conservative)
150            IdentityScope::Broad
151        } else if p.contains("read") {
152            IdentityScope::Constrained
153        } else {
154            IdentityScope::Unknown
155        }
156    }
157}
158
159// ── Node types ──────────────────────────────────────────
160
161/// Semantic kind of a graph node.
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
163#[serde(rename_all = "snake_case")]
164pub enum NodeKind {
165    Step,
166    Secret,
167    Artifact,
168    Identity,
169    Image,
170}
171
172/// Trust classification. Explicit on every node — not inferred from kind.
173#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
174#[serde(rename_all = "snake_case")]
175pub enum TrustZone {
176    /// Code/config authored by the repo owner.
177    FirstParty,
178    /// Marketplace actions, external images (pinned).
179    ThirdParty,
180    /// Unpinned actions, fork PRs, user input.
181    Untrusted,
182}
183
184impl TrustZone {
185    /// Returns true if `self` is a lower trust level than `other`.
186    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
187        self.rank() < other.rank()
188    }
189
190    fn rank(&self) -> u8 {
191        match self {
192            TrustZone::FirstParty => 2,
193            TrustZone::ThirdParty => 1,
194            TrustZone::Untrusted => 0,
195        }
196    }
197}
198
199/// A node in the authority graph.
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct Node {
202    pub id: NodeId,
203    pub kind: NodeKind,
204    pub name: String,
205    pub trust_zone: TrustZone,
206    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
207    pub metadata: HashMap<String, String>,
208}
209
210// ── Edge types ──────────────────────────────────────────
211
212/// Edge semantics model authority/data flow — not syntactic YAML relations.
213/// Design test: "Can authority propagate along this edge?"
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
215#[serde(rename_all = "snake_case")]
216pub enum EdgeKind {
217    /// Step -> Secret or Identity (authority granted at runtime).
218    HasAccessTo,
219    /// Step -> Artifact (data flows out).
220    Produces,
221    /// Artifact -> Step (authority flows from artifact to consuming step).
222    Consumes,
223    /// Step -> Image/Action (execution delegation).
224    UsesImage,
225    /// Step -> Step (cross-job or action boundary).
226    DelegatesTo,
227    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
228    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
229    /// and processes with filesystem access, not just the step that created it.
230    PersistsTo,
231}
232
233/// A directed edge in the authority graph.
234#[derive(Debug, Clone, Serialize, Deserialize)]
235pub struct Edge {
236    pub id: EdgeId,
237    pub from: NodeId,
238    pub to: NodeId,
239    pub kind: EdgeKind,
240}
241
242// ── Pipeline source ─────────────────────────────────────
243
244/// Where the pipeline definition came from.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct PipelineSource {
247    pub file: String,
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub repo: Option<String>,
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub git_ref: Option<String>,
252    /// SHA of the commit being analyzed; reproducibility hint when set.
253    /// Parsers leave None; CI integrations populate this from the build env.
254    #[serde(default, skip_serializing_if = "Option::is_none")]
255    pub commit_sha: Option<String>,
256}
257
258// ── The graph ───────────────────────────────────────────
259
260/// Pipeline-level parameter declaration captured from a top-level
261/// `parameters:` block. Used by rules that need to reason about whether
262/// caller-supplied parameter values are constrained (`values:` allowlist)
263/// or free-form (no allowlist on a string parameter — shell-injection risk).
264#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct ParamSpec {
266    /// Declared parameter type (`string`, `number`, `boolean`, `object`, etc.).
267    /// Empty string when the YAML omitted `type:` (ADO defaults to string).
268    pub param_type: String,
269    /// True when the parameter declares a `values:` allowlist that constrains
270    /// the set of acceptable inputs. When true, free-form shell injection is
271    /// not possible because the runtime rejects any value outside the list.
272    pub has_values_allowlist: bool,
273}
274
275/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
276/// artifacts, identities, images). Edges model authority/data flow.
277#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct AuthorityGraph {
279    pub source: PipelineSource,
280    pub nodes: Vec<Node>,
281    pub edges: Vec<Edge>,
282    /// How complete is this graph? Set by the parser based on what it could resolve.
283    pub completeness: AuthorityCompleteness,
284    /// Human-readable reasons why the graph is Partial (if applicable).
285    #[serde(default, skip_serializing_if = "Vec::is_empty")]
286    pub completeness_gaps: Vec<String>,
287    /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
288    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
289    pub metadata: HashMap<String, String>,
290    /// Top-level pipeline `parameters:` declarations, keyed by parameter name.
291    /// Populated by parsers that surface parameter metadata (currently ADO).
292    /// Empty for platforms / pipelines that don't declare parameters.
293    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
294    pub parameters: HashMap<String, ParamSpec>,
295}
296
297impl AuthorityGraph {
298    pub fn new(source: PipelineSource) -> Self {
299        Self {
300            source,
301            nodes: Vec::new(),
302            edges: Vec::new(),
303            completeness: AuthorityCompleteness::Complete,
304            completeness_gaps: Vec::new(),
305            metadata: HashMap::new(),
306            parameters: HashMap::new(),
307        }
308    }
309
310    /// Mark the graph as partially complete with a reason.
311    pub fn mark_partial(&mut self, reason: impl Into<String>) {
312        self.completeness = AuthorityCompleteness::Partial;
313        self.completeness_gaps.push(reason.into());
314    }
315
316    /// Add a node, returns its ID.
317    pub fn add_node(
318        &mut self,
319        kind: NodeKind,
320        name: impl Into<String>,
321        trust_zone: TrustZone,
322    ) -> NodeId {
323        let id = self.nodes.len();
324        self.nodes.push(Node {
325            id,
326            kind,
327            name: name.into(),
328            trust_zone,
329            metadata: HashMap::new(),
330        });
331        id
332    }
333
334    /// Add a node with metadata, returns its ID.
335    pub fn add_node_with_metadata(
336        &mut self,
337        kind: NodeKind,
338        name: impl Into<String>,
339        trust_zone: TrustZone,
340        metadata: HashMap<String, String>,
341    ) -> NodeId {
342        let id = self.nodes.len();
343        self.nodes.push(Node {
344            id,
345            kind,
346            name: name.into(),
347            trust_zone,
348            metadata,
349        });
350        id
351    }
352
353    /// Add a directed edge, returns its ID.
354    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
355        let id = self.edges.len();
356        self.edges.push(Edge { id, from, to, kind });
357        id
358    }
359
360    /// Outgoing edges from a node.
361    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
362        self.edges.iter().filter(move |e| e.from == id)
363    }
364
365    /// Incoming edges to a node.
366    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
367        self.edges.iter().filter(move |e| e.to == id)
368    }
369
370    /// All authority-bearing source nodes (Secret + Identity).
371    /// These are the BFS start set for propagation analysis.
372    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
373        self.nodes
374            .iter()
375            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
376    }
377
378    /// All nodes of a given kind.
379    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
380        self.nodes.iter().filter(move |n| n.kind == kind)
381    }
382
383    /// All nodes in a given trust zone.
384    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
385        self.nodes.iter().filter(move |n| n.trust_zone == zone)
386    }
387
388    /// Get a node by ID.
389    pub fn node(&self, id: NodeId) -> Option<&Node> {
390        self.nodes.get(id)
391    }
392
393    /// Get an edge by ID.
394    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
395        self.edges.get(id)
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn build_simple_graph() {
405        let mut g = AuthorityGraph::new(PipelineSource {
406            file: "deploy.yml".into(),
407            repo: None,
408            git_ref: None,
409            commit_sha: None,
410        });
411
412        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
413        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
414        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
415        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
416
417        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
418        g.add_edge(step_build, artifact, EdgeKind::Produces);
419        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
420
421        assert_eq!(g.nodes.len(), 4);
422        assert_eq!(g.edges.len(), 3);
423        assert_eq!(g.authority_sources().count(), 1);
424        assert_eq!(g.edges_from(step_build).count(), 2);
425        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
426    }
427
428    #[test]
429    fn completeness_default_is_complete() {
430        let g = AuthorityGraph::new(PipelineSource {
431            file: "test.yml".into(),
432            repo: None,
433            git_ref: None,
434            commit_sha: None,
435        });
436        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
437        assert!(g.completeness_gaps.is_empty());
438    }
439
440    #[test]
441    fn mark_partial_records_reason() {
442        let mut g = AuthorityGraph::new(PipelineSource {
443            file: "test.yml".into(),
444            repo: None,
445            git_ref: None,
446            commit_sha: None,
447        });
448        g.mark_partial("secrets in run: block inferred, not precisely mapped");
449        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
450        assert_eq!(g.completeness_gaps.len(), 1);
451    }
452
453    #[test]
454    fn identity_scope_from_permissions() {
455        assert_eq!(
456            IdentityScope::from_permissions("write-all"),
457            IdentityScope::Broad
458        );
459        assert_eq!(
460            IdentityScope::from_permissions("{ contents: write }"),
461            IdentityScope::Broad
462        );
463        assert_eq!(
464            IdentityScope::from_permissions("{ contents: read }"),
465            IdentityScope::Constrained
466        );
467        assert_eq!(
468            IdentityScope::from_permissions("{ id-token: write }"),
469            IdentityScope::Broad
470        );
471        assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
472        assert_eq!(
473            IdentityScope::from_permissions("custom-scope"),
474            IdentityScope::Unknown
475        );
476    }
477
478    #[test]
479    fn trust_zone_ordering() {
480        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
481        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
482        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
483        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
484    }
485}