Skip to main content

taudit_core/
graph.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
26pub const META_TRIGGER: &str = "trigger";
27/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
28pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
29/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
30pub const META_ATTESTS: &str = "attests";
31/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
32pub const META_VARIABLE_GROUP: &str = "variable_group";
33/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
34pub const META_SELF_HOSTED: &str = "self_hosted";
35/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
36pub const META_CHECKOUT_SELF: &str = "checkout_self";
37/// Marks an Identity node as an ADO service connection.
38pub const META_SERVICE_CONNECTION: &str = "service_connection";
39/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
40/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
41/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
42pub const META_IMPLICIT: &str = "implicit";
43/// Marks a Step that belongs to an ADO deployment job whose `environment:` is
44/// configured with required approvals — a manual gate that breaks automatic
45/// authority propagation. Findings whose path crosses such a node have their
46/// severity reduced by one step (Critical → High → Medium → Low).
47pub const META_ENV_APPROVAL: &str = "env_approval";
48
49// ── Shared helpers ─────────────────────────────────────
50
51/// Returns true if `ref_str` is a SHA-pinned action reference.
52/// Checks: contains `@`, part after `@` is >= 40 hex chars.
53/// Single source of truth — used by both parser and rules.
54pub fn is_sha_pinned(ref_str: &str) -> bool {
55    ref_str.contains('@')
56        && ref_str
57            .split('@')
58            .next_back()
59            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
60            .unwrap_or(false)
61}
62
63/// Returns true if `image` is pinned to a Docker digest.
64/// Docker digest format: `image@sha256:<64-hex-chars>`.
65pub fn is_docker_digest_pinned(image: &str) -> bool {
66    image.contains("@sha256:")
67        && image
68            .split("@sha256:")
69            .nth(1)
70            .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
71            .unwrap_or(false)
72}
73
74// ── Graph-level precision markers ───────────────────────
75
76/// How complete is this authority graph? Parsers set this based on whether
77/// they could fully resolve all authority relationships in the pipeline YAML.
78///
79/// A `Partial` graph is still useful — it just tells the consumer that some
80/// authority paths may be missing. This is better than silent incompleteness.
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
82#[serde(rename_all = "snake_case")]
83pub enum AuthorityCompleteness {
84    /// Parser resolved all authority relationships.
85    Complete,
86    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
87    /// shell strings, composite actions, reusable workflows). The graph
88    /// captures what it can, but edges may be missing.
89    Partial,
90    /// Parser couldn't determine completeness.
91    Unknown,
92}
93
94/// How broad is an identity's scope? Classifies the risk surface of tokens,
95/// service principals, and OIDC identities.
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
97#[serde(rename_all = "snake_case")]
98pub enum IdentityScope {
99    /// Wide permissions: write-all, admin, or unscoped tokens.
100    Broad,
101    /// Narrow permissions: contents:read, specific scopes.
102    Constrained,
103    /// Scope couldn't be determined — treat as risky.
104    Unknown,
105}
106
107impl IdentityScope {
108    /// Classify an identity scope from a permissions string.
109    pub fn from_permissions(perms: &str) -> Self {
110        let p = perms.to_lowercase();
111        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
112            IdentityScope::Broad
113        } else if p.contains("write") {
114            // Any write permission = broad (conservative)
115            IdentityScope::Broad
116        } else if p.contains("read") {
117            IdentityScope::Constrained
118        } else {
119            IdentityScope::Unknown
120        }
121    }
122}
123
124// ── Node types ──────────────────────────────────────────
125
126/// Semantic kind of a graph node.
127#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
128#[serde(rename_all = "snake_case")]
129pub enum NodeKind {
130    Step,
131    Secret,
132    Artifact,
133    Identity,
134    Image,
135}
136
137/// Trust classification. Explicit on every node — not inferred from kind.
138#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
139#[serde(rename_all = "snake_case")]
140pub enum TrustZone {
141    /// Code/config authored by the repo owner.
142    FirstParty,
143    /// Marketplace actions, external images (pinned).
144    ThirdParty,
145    /// Unpinned actions, fork PRs, user input.
146    Untrusted,
147}
148
149impl TrustZone {
150    /// Returns true if `self` is a lower trust level than `other`.
151    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
152        self.rank() < other.rank()
153    }
154
155    fn rank(&self) -> u8 {
156        match self {
157            TrustZone::FirstParty => 2,
158            TrustZone::ThirdParty => 1,
159            TrustZone::Untrusted => 0,
160        }
161    }
162}
163
164/// A node in the authority graph.
165#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct Node {
167    pub id: NodeId,
168    pub kind: NodeKind,
169    pub name: String,
170    pub trust_zone: TrustZone,
171    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
172    pub metadata: HashMap<String, String>,
173}
174
175// ── Edge types ──────────────────────────────────────────
176
177/// Edge semantics model authority/data flow — not syntactic YAML relations.
178/// Design test: "Can authority propagate along this edge?"
179#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
180#[serde(rename_all = "snake_case")]
181pub enum EdgeKind {
182    /// Step -> Secret or Identity (authority granted at runtime).
183    HasAccessTo,
184    /// Step -> Artifact (data flows out).
185    Produces,
186    /// Artifact -> Step (authority flows from artifact to consuming step).
187    Consumes,
188    /// Step -> Image/Action (execution delegation).
189    UsesImage,
190    /// Step -> Step (cross-job or action boundary).
191    DelegatesTo,
192    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
193    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
194    /// and processes with filesystem access, not just the step that created it.
195    PersistsTo,
196}
197
198/// A directed edge in the authority graph.
199#[derive(Debug, Clone, Serialize, Deserialize)]
200pub struct Edge {
201    pub id: EdgeId,
202    pub from: NodeId,
203    pub to: NodeId,
204    pub kind: EdgeKind,
205}
206
207// ── Pipeline source ─────────────────────────────────────
208
209/// Where the pipeline definition came from.
210#[derive(Debug, Clone, Serialize, Deserialize)]
211pub struct PipelineSource {
212    pub file: String,
213    #[serde(skip_serializing_if = "Option::is_none")]
214    pub repo: Option<String>,
215    #[serde(skip_serializing_if = "Option::is_none")]
216    pub git_ref: Option<String>,
217}
218
219// ── The graph ───────────────────────────────────────────
220
221/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
222/// artifacts, identities, images). Edges model authority/data flow.
223#[derive(Debug, Clone, Serialize, Deserialize)]
224pub struct AuthorityGraph {
225    pub source: PipelineSource,
226    pub nodes: Vec<Node>,
227    pub edges: Vec<Edge>,
228    /// How complete is this graph? Set by the parser based on what it could resolve.
229    pub completeness: AuthorityCompleteness,
230    /// Human-readable reasons why the graph is Partial (if applicable).
231    #[serde(default, skip_serializing_if = "Vec::is_empty")]
232    pub completeness_gaps: Vec<String>,
233    /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
234    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
235    pub metadata: HashMap<String, String>,
236}
237
238impl AuthorityGraph {
239    pub fn new(source: PipelineSource) -> Self {
240        Self {
241            source,
242            nodes: Vec::new(),
243            edges: Vec::new(),
244            completeness: AuthorityCompleteness::Complete,
245            completeness_gaps: Vec::new(),
246            metadata: HashMap::new(),
247        }
248    }
249
250    /// Mark the graph as partially complete with a reason.
251    pub fn mark_partial(&mut self, reason: impl Into<String>) {
252        self.completeness = AuthorityCompleteness::Partial;
253        self.completeness_gaps.push(reason.into());
254    }
255
256    /// Add a node, returns its ID.
257    pub fn add_node(
258        &mut self,
259        kind: NodeKind,
260        name: impl Into<String>,
261        trust_zone: TrustZone,
262    ) -> NodeId {
263        let id = self.nodes.len();
264        self.nodes.push(Node {
265            id,
266            kind,
267            name: name.into(),
268            trust_zone,
269            metadata: HashMap::new(),
270        });
271        id
272    }
273
274    /// Add a node with metadata, returns its ID.
275    pub fn add_node_with_metadata(
276        &mut self,
277        kind: NodeKind,
278        name: impl Into<String>,
279        trust_zone: TrustZone,
280        metadata: HashMap<String, String>,
281    ) -> NodeId {
282        let id = self.nodes.len();
283        self.nodes.push(Node {
284            id,
285            kind,
286            name: name.into(),
287            trust_zone,
288            metadata,
289        });
290        id
291    }
292
293    /// Add a directed edge, returns its ID.
294    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
295        let id = self.edges.len();
296        self.edges.push(Edge { id, from, to, kind });
297        id
298    }
299
300    /// Outgoing edges from a node.
301    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
302        self.edges.iter().filter(move |e| e.from == id)
303    }
304
305    /// Incoming edges to a node.
306    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
307        self.edges.iter().filter(move |e| e.to == id)
308    }
309
310    /// All authority-bearing source nodes (Secret + Identity).
311    /// These are the BFS start set for propagation analysis.
312    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
313        self.nodes
314            .iter()
315            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
316    }
317
318    /// All nodes of a given kind.
319    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
320        self.nodes.iter().filter(move |n| n.kind == kind)
321    }
322
323    /// All nodes in a given trust zone.
324    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
325        self.nodes.iter().filter(move |n| n.trust_zone == zone)
326    }
327
328    /// Get a node by ID.
329    pub fn node(&self, id: NodeId) -> Option<&Node> {
330        self.nodes.get(id)
331    }
332
333    /// Get an edge by ID.
334    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
335        self.edges.get(id)
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342
343    #[test]
344    fn build_simple_graph() {
345        let mut g = AuthorityGraph::new(PipelineSource {
346            file: "deploy.yml".into(),
347            repo: None,
348            git_ref: None,
349        });
350
351        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
352        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
353        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
354        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
355
356        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
357        g.add_edge(step_build, artifact, EdgeKind::Produces);
358        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
359
360        assert_eq!(g.nodes.len(), 4);
361        assert_eq!(g.edges.len(), 3);
362        assert_eq!(g.authority_sources().count(), 1);
363        assert_eq!(g.edges_from(step_build).count(), 2);
364        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
365    }
366
367    #[test]
368    fn completeness_default_is_complete() {
369        let g = AuthorityGraph::new(PipelineSource {
370            file: "test.yml".into(),
371            repo: None,
372            git_ref: None,
373        });
374        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
375        assert!(g.completeness_gaps.is_empty());
376    }
377
378    #[test]
379    fn mark_partial_records_reason() {
380        let mut g = AuthorityGraph::new(PipelineSource {
381            file: "test.yml".into(),
382            repo: None,
383            git_ref: None,
384        });
385        g.mark_partial("secrets in run: block inferred, not precisely mapped");
386        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
387        assert_eq!(g.completeness_gaps.len(), 1);
388    }
389
390    #[test]
391    fn identity_scope_from_permissions() {
392        assert_eq!(
393            IdentityScope::from_permissions("write-all"),
394            IdentityScope::Broad
395        );
396        assert_eq!(
397            IdentityScope::from_permissions("{ contents: write }"),
398            IdentityScope::Broad
399        );
400        assert_eq!(
401            IdentityScope::from_permissions("{ contents: read }"),
402            IdentityScope::Constrained
403        );
404        assert_eq!(
405            IdentityScope::from_permissions("{ id-token: write }"),
406            IdentityScope::Broad
407        );
408        assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
409        assert_eq!(
410            IdentityScope::from_permissions("custom-scope"),
411            IdentityScope::Unknown
412        );
413    }
414
415    #[test]
416    fn trust_zone_ordering() {
417        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
418        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
419        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
420        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
421    }
422}