Skip to main content

taudit_core/
graph.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25
26// ── Shared helpers ─────────────────────────────────────
27
28/// Returns true if `ref_str` is a SHA-pinned action reference.
29/// Checks: contains `@`, part after `@` is >= 40 hex chars.
30/// Single source of truth — used by both parser and rules.
31pub fn is_sha_pinned(ref_str: &str) -> bool {
32    ref_str.contains('@')
33        && ref_str
34            .split('@')
35            .next_back()
36            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
37            .unwrap_or(false)
38}
39
40/// Returns true if `image` is pinned to a Docker digest.
41/// Docker digest format: `image@sha256:<64-hex-chars>`.
42pub fn is_docker_digest_pinned(image: &str) -> bool {
43    image.contains("@sha256:")
44        && image
45            .split("@sha256:")
46            .nth(1)
47            .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
48            .unwrap_or(false)
49}
50
51// ── Graph-level precision markers ───────────────────────
52
53/// How complete is this authority graph? Parsers set this based on whether
54/// they could fully resolve all authority relationships in the pipeline YAML.
55///
56/// A `Partial` graph is still useful — it just tells the consumer that some
57/// authority paths may be missing. This is better than silent incompleteness.
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
59#[serde(rename_all = "snake_case")]
60pub enum AuthorityCompleteness {
61    /// Parser resolved all authority relationships.
62    Complete,
63    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
64    /// shell strings, composite actions, reusable workflows). The graph
65    /// captures what it can, but edges may be missing.
66    Partial,
67    /// Parser couldn't determine completeness.
68    Unknown,
69}
70
71/// How broad is an identity's scope? Classifies the risk surface of tokens,
72/// service principals, and OIDC identities.
73#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
74#[serde(rename_all = "snake_case")]
75pub enum IdentityScope {
76    /// Wide permissions: write-all, admin, or unscoped tokens.
77    Broad,
78    /// Narrow permissions: contents:read, specific scopes.
79    Constrained,
80    /// Scope couldn't be determined — treat as risky.
81    Unknown,
82}
83
84impl IdentityScope {
85    /// Classify an identity scope from a permissions string.
86    pub fn from_permissions(perms: &str) -> Self {
87        let p = perms.to_lowercase();
88        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
89            IdentityScope::Broad
90        } else if p.contains("write") {
91            // Any write permission = broad (conservative)
92            IdentityScope::Broad
93        } else if p.contains("read") {
94            IdentityScope::Constrained
95        } else {
96            IdentityScope::Unknown
97        }
98    }
99}
100
101// ── Node types ──────────────────────────────────────────
102
103/// Semantic kind of a graph node.
104#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
105#[serde(rename_all = "snake_case")]
106pub enum NodeKind {
107    Step,
108    Secret,
109    Artifact,
110    Identity,
111    Image,
112}
113
114/// Trust classification. Explicit on every node — not inferred from kind.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
116#[serde(rename_all = "snake_case")]
117pub enum TrustZone {
118    /// Code/config authored by the repo owner.
119    FirstParty,
120    /// Marketplace actions, external images (pinned).
121    ThirdParty,
122    /// Unpinned actions, fork PRs, user input.
123    Untrusted,
124}
125
126impl TrustZone {
127    /// Returns true if `self` is a lower trust level than `other`.
128    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
129        self.rank() < other.rank()
130    }
131
132    fn rank(&self) -> u8 {
133        match self {
134            TrustZone::FirstParty => 2,
135            TrustZone::ThirdParty => 1,
136            TrustZone::Untrusted => 0,
137        }
138    }
139}
140
141/// A node in the authority graph.
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct Node {
144    pub id: NodeId,
145    pub kind: NodeKind,
146    pub name: String,
147    pub trust_zone: TrustZone,
148    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
149    pub metadata: HashMap<String, String>,
150}
151
152// ── Edge types ──────────────────────────────────────────
153
154/// Edge semantics model authority/data flow — not syntactic YAML relations.
155/// Design test: "Can authority propagate along this edge?"
156#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
157#[serde(rename_all = "snake_case")]
158pub enum EdgeKind {
159    /// Step -> Secret or Identity (authority granted at runtime).
160    HasAccessTo,
161    /// Step -> Artifact (data flows out).
162    Produces,
163    /// Artifact -> Step (authority flows from artifact to consuming step).
164    Consumes,
165    /// Step -> Image/Action (execution delegation).
166    UsesImage,
167    /// Step -> Step (cross-job or action boundary).
168    DelegatesTo,
169    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
170    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
171    /// and processes with filesystem access, not just the step that created it.
172    PersistsTo,
173}
174
175/// A directed edge in the authority graph.
176#[derive(Debug, Clone, Serialize, Deserialize)]
177pub struct Edge {
178    pub id: EdgeId,
179    pub from: NodeId,
180    pub to: NodeId,
181    pub kind: EdgeKind,
182}
183
184// ── Pipeline source ─────────────────────────────────────
185
186/// Where the pipeline definition came from.
187#[derive(Debug, Clone, Serialize, Deserialize)]
188pub struct PipelineSource {
189    pub file: String,
190    #[serde(skip_serializing_if = "Option::is_none")]
191    pub repo: Option<String>,
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub git_ref: Option<String>,
194}
195
196// ── The graph ───────────────────────────────────────────
197
198/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
199/// artifacts, identities, images). Edges model authority/data flow.
200#[derive(Debug, Clone, Serialize, Deserialize)]
201pub struct AuthorityGraph {
202    pub source: PipelineSource,
203    pub nodes: Vec<Node>,
204    pub edges: Vec<Edge>,
205    /// How complete is this graph? Set by the parser based on what it could resolve.
206    pub completeness: AuthorityCompleteness,
207    /// Human-readable reasons why the graph is Partial (if applicable).
208    #[serde(default, skip_serializing_if = "Vec::is_empty")]
209    pub completeness_gaps: Vec<String>,
210}
211
212impl AuthorityGraph {
213    pub fn new(source: PipelineSource) -> Self {
214        Self {
215            source,
216            nodes: Vec::new(),
217            edges: Vec::new(),
218            completeness: AuthorityCompleteness::Complete,
219            completeness_gaps: Vec::new(),
220        }
221    }
222
223    /// Mark the graph as partially complete with a reason.
224    pub fn mark_partial(&mut self, reason: impl Into<String>) {
225        self.completeness = AuthorityCompleteness::Partial;
226        self.completeness_gaps.push(reason.into());
227    }
228
229    /// Add a node, returns its ID.
230    pub fn add_node(
231        &mut self,
232        kind: NodeKind,
233        name: impl Into<String>,
234        trust_zone: TrustZone,
235    ) -> NodeId {
236        let id = self.nodes.len();
237        self.nodes.push(Node {
238            id,
239            kind,
240            name: name.into(),
241            trust_zone,
242            metadata: HashMap::new(),
243        });
244        id
245    }
246
247    /// Add a node with metadata, returns its ID.
248    pub fn add_node_with_metadata(
249        &mut self,
250        kind: NodeKind,
251        name: impl Into<String>,
252        trust_zone: TrustZone,
253        metadata: HashMap<String, String>,
254    ) -> NodeId {
255        let id = self.nodes.len();
256        self.nodes.push(Node {
257            id,
258            kind,
259            name: name.into(),
260            trust_zone,
261            metadata,
262        });
263        id
264    }
265
266    /// Add a directed edge, returns its ID.
267    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
268        let id = self.edges.len();
269        self.edges.push(Edge { id, from, to, kind });
270        id
271    }
272
273    /// Outgoing edges from a node.
274    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
275        self.edges.iter().filter(move |e| e.from == id)
276    }
277
278    /// Incoming edges to a node.
279    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
280        self.edges.iter().filter(move |e| e.to == id)
281    }
282
283    /// All authority-bearing source nodes (Secret + Identity).
284    /// These are the BFS start set for propagation analysis.
285    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
286        self.nodes
287            .iter()
288            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
289    }
290
291    /// All nodes of a given kind.
292    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
293        self.nodes.iter().filter(move |n| n.kind == kind)
294    }
295
296    /// All nodes in a given trust zone.
297    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
298        self.nodes.iter().filter(move |n| n.trust_zone == zone)
299    }
300
301    /// Get a node by ID.
302    pub fn node(&self, id: NodeId) -> Option<&Node> {
303        self.nodes.get(id)
304    }
305
306    /// Get an edge by ID.
307    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
308        self.edges.get(id)
309    }
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315
316    #[test]
317    fn build_simple_graph() {
318        let mut g = AuthorityGraph::new(PipelineSource {
319            file: "deploy.yml".into(),
320            repo: None,
321            git_ref: None,
322        });
323
324        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
325        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
326        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
327        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
328
329        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
330        g.add_edge(step_build, artifact, EdgeKind::Produces);
331        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
332
333        assert_eq!(g.nodes.len(), 4);
334        assert_eq!(g.edges.len(), 3);
335        assert_eq!(g.authority_sources().count(), 1);
336        assert_eq!(g.edges_from(step_build).count(), 2);
337        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
338    }
339
340    #[test]
341    fn completeness_default_is_complete() {
342        let g = AuthorityGraph::new(PipelineSource {
343            file: "test.yml".into(),
344            repo: None,
345            git_ref: None,
346        });
347        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
348        assert!(g.completeness_gaps.is_empty());
349    }
350
351    #[test]
352    fn mark_partial_records_reason() {
353        let mut g = AuthorityGraph::new(PipelineSource {
354            file: "test.yml".into(),
355            repo: None,
356            git_ref: None,
357        });
358        g.mark_partial("secrets in run: block inferred, not precisely mapped");
359        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
360        assert_eq!(g.completeness_gaps.len(), 1);
361    }
362
363    #[test]
364    fn identity_scope_from_permissions() {
365        assert_eq!(
366            IdentityScope::from_permissions("write-all"),
367            IdentityScope::Broad
368        );
369        assert_eq!(
370            IdentityScope::from_permissions("{ contents: write }"),
371            IdentityScope::Broad
372        );
373        assert_eq!(
374            IdentityScope::from_permissions("{ contents: read }"),
375            IdentityScope::Constrained
376        );
377        assert_eq!(
378            IdentityScope::from_permissions("{ id-token: write }"),
379            IdentityScope::Broad
380        );
381        assert_eq!(
382            IdentityScope::from_permissions(""),
383            IdentityScope::Broad
384        );
385        assert_eq!(
386            IdentityScope::from_permissions("custom-scope"),
387            IdentityScope::Unknown
388        );
389    }
390
391    #[test]
392    fn trust_zone_ordering() {
393        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
394        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
395        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
396        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
397    }
398}