Skip to main content

taudit_core/
graph.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
26pub const META_TRIGGER: &str = "trigger";
27/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
28pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
29/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
30pub const META_ATTESTS: &str = "attests";
31/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
32pub const META_VARIABLE_GROUP: &str = "variable_group";
33/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
34pub const META_SELF_HOSTED: &str = "self_hosted";
35/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
36pub const META_CHECKOUT_SELF: &str = "checkout_self";
37/// Marks an Identity node as an ADO service connection.
38pub const META_SERVICE_CONNECTION: &str = "service_connection";
39
40// ── Shared helpers ─────────────────────────────────────
41
42/// Returns true if `ref_str` is a SHA-pinned action reference.
43/// Checks: contains `@`, part after `@` is >= 40 hex chars.
44/// Single source of truth — used by both parser and rules.
45pub fn is_sha_pinned(ref_str: &str) -> bool {
46    ref_str.contains('@')
47        && ref_str
48            .split('@')
49            .next_back()
50            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
51            .unwrap_or(false)
52}
53
54/// Returns true if `image` is pinned to a Docker digest.
55/// Docker digest format: `image@sha256:<64-hex-chars>`.
56pub fn is_docker_digest_pinned(image: &str) -> bool {
57    image.contains("@sha256:")
58        && image
59            .split("@sha256:")
60            .nth(1)
61            .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
62            .unwrap_or(false)
63}
64
65// ── Graph-level precision markers ───────────────────────
66
67/// How complete is this authority graph? Parsers set this based on whether
68/// they could fully resolve all authority relationships in the pipeline YAML.
69///
70/// A `Partial` graph is still useful — it just tells the consumer that some
71/// authority paths may be missing. This is better than silent incompleteness.
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
73#[serde(rename_all = "snake_case")]
74pub enum AuthorityCompleteness {
75    /// Parser resolved all authority relationships.
76    Complete,
77    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
78    /// shell strings, composite actions, reusable workflows). The graph
79    /// captures what it can, but edges may be missing.
80    Partial,
81    /// Parser couldn't determine completeness.
82    Unknown,
83}
84
85/// How broad is an identity's scope? Classifies the risk surface of tokens,
86/// service principals, and OIDC identities.
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
88#[serde(rename_all = "snake_case")]
89pub enum IdentityScope {
90    /// Wide permissions: write-all, admin, or unscoped tokens.
91    Broad,
92    /// Narrow permissions: contents:read, specific scopes.
93    Constrained,
94    /// Scope couldn't be determined — treat as risky.
95    Unknown,
96}
97
98impl IdentityScope {
99    /// Classify an identity scope from a permissions string.
100    pub fn from_permissions(perms: &str) -> Self {
101        let p = perms.to_lowercase();
102        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
103            IdentityScope::Broad
104        } else if p.contains("write") {
105            // Any write permission = broad (conservative)
106            IdentityScope::Broad
107        } else if p.contains("read") {
108            IdentityScope::Constrained
109        } else {
110            IdentityScope::Unknown
111        }
112    }
113}
114
115// ── Node types ──────────────────────────────────────────
116
117/// Semantic kind of a graph node.
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum NodeKind {
121    Step,
122    Secret,
123    Artifact,
124    Identity,
125    Image,
126}
127
128/// Trust classification. Explicit on every node — not inferred from kind.
129#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
130#[serde(rename_all = "snake_case")]
131pub enum TrustZone {
132    /// Code/config authored by the repo owner.
133    FirstParty,
134    /// Marketplace actions, external images (pinned).
135    ThirdParty,
136    /// Unpinned actions, fork PRs, user input.
137    Untrusted,
138}
139
140impl TrustZone {
141    /// Returns true if `self` is a lower trust level than `other`.
142    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
143        self.rank() < other.rank()
144    }
145
146    fn rank(&self) -> u8 {
147        match self {
148            TrustZone::FirstParty => 2,
149            TrustZone::ThirdParty => 1,
150            TrustZone::Untrusted => 0,
151        }
152    }
153}
154
155/// A node in the authority graph.
156#[derive(Debug, Clone, Serialize, Deserialize)]
157pub struct Node {
158    pub id: NodeId,
159    pub kind: NodeKind,
160    pub name: String,
161    pub trust_zone: TrustZone,
162    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
163    pub metadata: HashMap<String, String>,
164}
165
166// ── Edge types ──────────────────────────────────────────
167
168/// Edge semantics model authority/data flow — not syntactic YAML relations.
169/// Design test: "Can authority propagate along this edge?"
170#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
171#[serde(rename_all = "snake_case")]
172pub enum EdgeKind {
173    /// Step -> Secret or Identity (authority granted at runtime).
174    HasAccessTo,
175    /// Step -> Artifact (data flows out).
176    Produces,
177    /// Artifact -> Step (authority flows from artifact to consuming step).
178    Consumes,
179    /// Step -> Image/Action (execution delegation).
180    UsesImage,
181    /// Step -> Step (cross-job or action boundary).
182    DelegatesTo,
183    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
184    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
185    /// and processes with filesystem access, not just the step that created it.
186    PersistsTo,
187}
188
189/// A directed edge in the authority graph.
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct Edge {
192    pub id: EdgeId,
193    pub from: NodeId,
194    pub to: NodeId,
195    pub kind: EdgeKind,
196}
197
198// ── Pipeline source ─────────────────────────────────────
199
200/// Where the pipeline definition came from.
201#[derive(Debug, Clone, Serialize, Deserialize)]
202pub struct PipelineSource {
203    pub file: String,
204    #[serde(skip_serializing_if = "Option::is_none")]
205    pub repo: Option<String>,
206    #[serde(skip_serializing_if = "Option::is_none")]
207    pub git_ref: Option<String>,
208}
209
210// ── The graph ───────────────────────────────────────────
211
212/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
213/// artifacts, identities, images). Edges model authority/data flow.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct AuthorityGraph {
216    pub source: PipelineSource,
217    pub nodes: Vec<Node>,
218    pub edges: Vec<Edge>,
219    /// How complete is this graph? Set by the parser based on what it could resolve.
220    pub completeness: AuthorityCompleteness,
221    /// Human-readable reasons why the graph is Partial (if applicable).
222    #[serde(default, skip_serializing_if = "Vec::is_empty")]
223    pub completeness_gaps: Vec<String>,
224    /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
225    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
226    pub metadata: HashMap<String, String>,
227}
228
229impl AuthorityGraph {
230    pub fn new(source: PipelineSource) -> Self {
231        Self {
232            source,
233            nodes: Vec::new(),
234            edges: Vec::new(),
235            completeness: AuthorityCompleteness::Complete,
236            completeness_gaps: Vec::new(),
237            metadata: HashMap::new(),
238        }
239    }
240
241    /// Mark the graph as partially complete with a reason.
242    pub fn mark_partial(&mut self, reason: impl Into<String>) {
243        self.completeness = AuthorityCompleteness::Partial;
244        self.completeness_gaps.push(reason.into());
245    }
246
247    /// Add a node, returns its ID.
248    pub fn add_node(
249        &mut self,
250        kind: NodeKind,
251        name: impl Into<String>,
252        trust_zone: TrustZone,
253    ) -> NodeId {
254        let id = self.nodes.len();
255        self.nodes.push(Node {
256            id,
257            kind,
258            name: name.into(),
259            trust_zone,
260            metadata: HashMap::new(),
261        });
262        id
263    }
264
265    /// Add a node with metadata, returns its ID.
266    pub fn add_node_with_metadata(
267        &mut self,
268        kind: NodeKind,
269        name: impl Into<String>,
270        trust_zone: TrustZone,
271        metadata: HashMap<String, String>,
272    ) -> NodeId {
273        let id = self.nodes.len();
274        self.nodes.push(Node {
275            id,
276            kind,
277            name: name.into(),
278            trust_zone,
279            metadata,
280        });
281        id
282    }
283
284    /// Add a directed edge, returns its ID.
285    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
286        let id = self.edges.len();
287        self.edges.push(Edge { id, from, to, kind });
288        id
289    }
290
291    /// Outgoing edges from a node.
292    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
293        self.edges.iter().filter(move |e| e.from == id)
294    }
295
296    /// Incoming edges to a node.
297    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
298        self.edges.iter().filter(move |e| e.to == id)
299    }
300
301    /// All authority-bearing source nodes (Secret + Identity).
302    /// These are the BFS start set for propagation analysis.
303    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
304        self.nodes
305            .iter()
306            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
307    }
308
309    /// All nodes of a given kind.
310    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
311        self.nodes.iter().filter(move |n| n.kind == kind)
312    }
313
314    /// All nodes in a given trust zone.
315    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
316        self.nodes.iter().filter(move |n| n.trust_zone == zone)
317    }
318
319    /// Get a node by ID.
320    pub fn node(&self, id: NodeId) -> Option<&Node> {
321        self.nodes.get(id)
322    }
323
324    /// Get an edge by ID.
325    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
326        self.edges.get(id)
327    }
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    #[test]
335    fn build_simple_graph() {
336        let mut g = AuthorityGraph::new(PipelineSource {
337            file: "deploy.yml".into(),
338            repo: None,
339            git_ref: None,
340        });
341
342        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
343        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
344        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
345        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
346
347        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
348        g.add_edge(step_build, artifact, EdgeKind::Produces);
349        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
350
351        assert_eq!(g.nodes.len(), 4);
352        assert_eq!(g.edges.len(), 3);
353        assert_eq!(g.authority_sources().count(), 1);
354        assert_eq!(g.edges_from(step_build).count(), 2);
355        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
356    }
357
358    #[test]
359    fn completeness_default_is_complete() {
360        let g = AuthorityGraph::new(PipelineSource {
361            file: "test.yml".into(),
362            repo: None,
363            git_ref: None,
364        });
365        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
366        assert!(g.completeness_gaps.is_empty());
367    }
368
369    #[test]
370    fn mark_partial_records_reason() {
371        let mut g = AuthorityGraph::new(PipelineSource {
372            file: "test.yml".into(),
373            repo: None,
374            git_ref: None,
375        });
376        g.mark_partial("secrets in run: block inferred, not precisely mapped");
377        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
378        assert_eq!(g.completeness_gaps.len(), 1);
379    }
380
381    #[test]
382    fn identity_scope_from_permissions() {
383        assert_eq!(
384            IdentityScope::from_permissions("write-all"),
385            IdentityScope::Broad
386        );
387        assert_eq!(
388            IdentityScope::from_permissions("{ contents: write }"),
389            IdentityScope::Broad
390        );
391        assert_eq!(
392            IdentityScope::from_permissions("{ contents: read }"),
393            IdentityScope::Constrained
394        );
395        assert_eq!(
396            IdentityScope::from_permissions("{ id-token: write }"),
397            IdentityScope::Broad
398        );
399        assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
400        assert_eq!(
401            IdentityScope::from_permissions("custom-scope"),
402            IdentityScope::Unknown
403        );
404    }
405
406    #[test]
407    fn trust_zone_ordering() {
408        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
409        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
410        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
411        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
412    }
413}