Skip to main content

taudit_core/
graph.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21
22// ── Shared helpers ─────────────────────────────────────
23
24/// Returns true if `ref_str` is a SHA-pinned action reference.
25/// Checks: contains `@`, part after `@` is >= 40 hex chars.
26/// Single source of truth — used by both parser and rules.
27pub fn is_sha_pinned(ref_str: &str) -> bool {
28    ref_str.contains('@')
29        && ref_str
30            .split('@')
31            .next_back()
32            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
33            .unwrap_or(false)
34}
35
36/// Returns true if `image` is pinned to a Docker digest.
37/// Docker digest format: `image@sha256:<64-hex-chars>`.
38pub fn is_docker_digest_pinned(image: &str) -> bool {
39    image.contains("@sha256:")
40        && image
41            .split("@sha256:")
42            .nth(1)
43            .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
44            .unwrap_or(false)
45}
46
47// ── Graph-level precision markers ───────────────────────
48
49/// How complete is this authority graph? Parsers set this based on whether
50/// they could fully resolve all authority relationships in the pipeline YAML.
51///
52/// A `Partial` graph is still useful — it just tells the consumer that some
53/// authority paths may be missing. This is better than silent incompleteness.
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
55#[serde(rename_all = "snake_case")]
56pub enum AuthorityCompleteness {
57    /// Parser resolved all authority relationships.
58    Complete,
59    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
60    /// shell strings, composite actions, reusable workflows). The graph
61    /// captures what it can, but edges may be missing.
62    Partial,
63    /// Parser couldn't determine completeness.
64    Unknown,
65}
66
67/// How broad is an identity's scope? Classifies the risk surface of tokens,
68/// service principals, and OIDC identities.
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
70#[serde(rename_all = "snake_case")]
71pub enum IdentityScope {
72    /// Wide permissions: write-all, admin, or unscoped tokens.
73    Broad,
74    /// Narrow permissions: contents:read, specific scopes.
75    Constrained,
76    /// Scope couldn't be determined — treat as risky.
77    Unknown,
78}
79
80impl IdentityScope {
81    /// Classify an identity scope from a permissions string.
82    pub fn from_permissions(perms: &str) -> Self {
83        let p = perms.to_lowercase();
84        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
85            IdentityScope::Broad
86        } else if p.contains("write") {
87            // Any write permission = broad (conservative)
88            IdentityScope::Broad
89        } else if p.contains("read") {
90            IdentityScope::Constrained
91        } else {
92            IdentityScope::Unknown
93        }
94    }
95}
96
97// ── Node types ──────────────────────────────────────────
98
99/// Semantic kind of a graph node.
100#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
101#[serde(rename_all = "snake_case")]
102pub enum NodeKind {
103    Step,
104    Secret,
105    Artifact,
106    Identity,
107    Image,
108}
109
110/// Trust classification. Explicit on every node — not inferred from kind.
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
112#[serde(rename_all = "snake_case")]
113pub enum TrustZone {
114    /// Code/config authored by the repo owner.
115    FirstParty,
116    /// Marketplace actions, external images (pinned).
117    ThirdParty,
118    /// Unpinned actions, fork PRs, user input.
119    Untrusted,
120}
121
122impl TrustZone {
123    /// Returns true if `self` is a lower trust level than `other`.
124    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
125        self.rank() < other.rank()
126    }
127
128    fn rank(&self) -> u8 {
129        match self {
130            TrustZone::FirstParty => 2,
131            TrustZone::ThirdParty => 1,
132            TrustZone::Untrusted => 0,
133        }
134    }
135}
136
137/// A node in the authority graph.
138#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct Node {
140    pub id: NodeId,
141    pub kind: NodeKind,
142    pub name: String,
143    pub trust_zone: TrustZone,
144    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
145    pub metadata: HashMap<String, String>,
146}
147
148// ── Edge types ──────────────────────────────────────────
149
150/// Edge semantics model authority/data flow — not syntactic YAML relations.
151/// Design test: "Can authority propagate along this edge?"
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
153#[serde(rename_all = "snake_case")]
154pub enum EdgeKind {
155    /// Step -> Secret or Identity (authority granted).
156    HasAccessTo,
157    /// Step -> Artifact (data flows out).
158    Produces,
159    /// Artifact -> Step (authority flows from artifact to consuming step).
160    Consumes,
161    /// Step -> Image/Action (execution delegation).
162    UsesImage,
163    /// Step -> Step (cross-job or action boundary).
164    DelegatesTo,
165}
166
167/// A directed edge in the authority graph.
168#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct Edge {
170    pub id: EdgeId,
171    pub from: NodeId,
172    pub to: NodeId,
173    pub kind: EdgeKind,
174}
175
176// ── Pipeline source ─────────────────────────────────────
177
178/// Where the pipeline definition came from.
179#[derive(Debug, Clone, Serialize, Deserialize)]
180pub struct PipelineSource {
181    pub file: String,
182    #[serde(skip_serializing_if = "Option::is_none")]
183    pub repo: Option<String>,
184    #[serde(skip_serializing_if = "Option::is_none")]
185    pub git_ref: Option<String>,
186}
187
188// ── The graph ───────────────────────────────────────────
189
190/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
191/// artifacts, identities, images). Edges model authority/data flow.
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct AuthorityGraph {
194    pub source: PipelineSource,
195    pub nodes: Vec<Node>,
196    pub edges: Vec<Edge>,
197    /// How complete is this graph? Set by the parser based on what it could resolve.
198    pub completeness: AuthorityCompleteness,
199    /// Human-readable reasons why the graph is Partial (if applicable).
200    #[serde(default, skip_serializing_if = "Vec::is_empty")]
201    pub completeness_gaps: Vec<String>,
202}
203
204impl AuthorityGraph {
205    pub fn new(source: PipelineSource) -> Self {
206        Self {
207            source,
208            nodes: Vec::new(),
209            edges: Vec::new(),
210            completeness: AuthorityCompleteness::Complete,
211            completeness_gaps: Vec::new(),
212        }
213    }
214
215    /// Mark the graph as partially complete with a reason.
216    pub fn mark_partial(&mut self, reason: impl Into<String>) {
217        self.completeness = AuthorityCompleteness::Partial;
218        self.completeness_gaps.push(reason.into());
219    }
220
221    /// Add a node, returns its ID.
222    pub fn add_node(
223        &mut self,
224        kind: NodeKind,
225        name: impl Into<String>,
226        trust_zone: TrustZone,
227    ) -> NodeId {
228        let id = self.nodes.len();
229        self.nodes.push(Node {
230            id,
231            kind,
232            name: name.into(),
233            trust_zone,
234            metadata: HashMap::new(),
235        });
236        id
237    }
238
239    /// Add a node with metadata, returns its ID.
240    pub fn add_node_with_metadata(
241        &mut self,
242        kind: NodeKind,
243        name: impl Into<String>,
244        trust_zone: TrustZone,
245        metadata: HashMap<String, String>,
246    ) -> NodeId {
247        let id = self.nodes.len();
248        self.nodes.push(Node {
249            id,
250            kind,
251            name: name.into(),
252            trust_zone,
253            metadata,
254        });
255        id
256    }
257
258    /// Add a directed edge, returns its ID.
259    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
260        let id = self.edges.len();
261        self.edges.push(Edge { id, from, to, kind });
262        id
263    }
264
265    /// Outgoing edges from a node.
266    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
267        self.edges.iter().filter(move |e| e.from == id)
268    }
269
270    /// Incoming edges to a node.
271    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
272        self.edges.iter().filter(move |e| e.to == id)
273    }
274
275    /// All authority-bearing source nodes (Secret + Identity).
276    /// These are the BFS start set for propagation analysis.
277    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
278        self.nodes
279            .iter()
280            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
281    }
282
283    /// All nodes of a given kind.
284    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
285        self.nodes.iter().filter(move |n| n.kind == kind)
286    }
287
288    /// All nodes in a given trust zone.
289    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
290        self.nodes.iter().filter(move |n| n.trust_zone == zone)
291    }
292
293    /// Get a node by ID.
294    pub fn node(&self, id: NodeId) -> Option<&Node> {
295        self.nodes.get(id)
296    }
297
298    /// Get an edge by ID.
299    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
300        self.edges.get(id)
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307
308    #[test]
309    fn build_simple_graph() {
310        let mut g = AuthorityGraph::new(PipelineSource {
311            file: "deploy.yml".into(),
312            repo: None,
313            git_ref: None,
314        });
315
316        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
317        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
318        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
319        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
320
321        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
322        g.add_edge(step_build, artifact, EdgeKind::Produces);
323        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
324
325        assert_eq!(g.nodes.len(), 4);
326        assert_eq!(g.edges.len(), 3);
327        assert_eq!(g.authority_sources().count(), 1);
328        assert_eq!(g.edges_from(step_build).count(), 2);
329        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
330    }
331
332    #[test]
333    fn completeness_default_is_complete() {
334        let g = AuthorityGraph::new(PipelineSource {
335            file: "test.yml".into(),
336            repo: None,
337            git_ref: None,
338        });
339        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
340        assert!(g.completeness_gaps.is_empty());
341    }
342
343    #[test]
344    fn mark_partial_records_reason() {
345        let mut g = AuthorityGraph::new(PipelineSource {
346            file: "test.yml".into(),
347            repo: None,
348            git_ref: None,
349        });
350        g.mark_partial("secrets in run: block inferred, not precisely mapped");
351        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
352        assert_eq!(g.completeness_gaps.len(), 1);
353    }
354
355    #[test]
356    fn identity_scope_from_permissions() {
357        assert_eq!(
358            IdentityScope::from_permissions("write-all"),
359            IdentityScope::Broad
360        );
361        assert_eq!(
362            IdentityScope::from_permissions("{ contents: write }"),
363            IdentityScope::Broad
364        );
365        assert_eq!(
366            IdentityScope::from_permissions("{ contents: read }"),
367            IdentityScope::Constrained
368        );
369        assert_eq!(
370            IdentityScope::from_permissions("{ id-token: write }"),
371            IdentityScope::Broad
372        );
373        assert_eq!(
374            IdentityScope::from_permissions(""),
375            IdentityScope::Broad
376        );
377        assert_eq!(
378            IdentityScope::from_permissions("custom-scope"),
379            IdentityScope::Unknown
380        );
381    }
382
383    #[test]
384    fn trust_zone_ordering() {
385        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
386        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
387        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
388        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
389    }
390}