Skip to main content

taudit_core/
graph.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
26pub const META_TRIGGER: &str = "trigger";
27/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
28pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
29/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
30pub const META_ATTESTS: &str = "attests";
31/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
32pub const META_VARIABLE_GROUP: &str = "variable_group";
33/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
34pub const META_SELF_HOSTED: &str = "self_hosted";
35/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
36pub const META_CHECKOUT_SELF: &str = "checkout_self";
37/// Marks an Identity node as an ADO service connection.
38pub const META_SERVICE_CONNECTION: &str = "service_connection";
39/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
40/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
41/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
42pub const META_IMPLICIT: &str = "implicit";
43
44// ── Shared helpers ─────────────────────────────────────
45
46/// Returns true if `ref_str` is a SHA-pinned action reference.
47/// Checks: contains `@`, part after `@` is >= 40 hex chars.
48/// Single source of truth — used by both parser and rules.
49pub fn is_sha_pinned(ref_str: &str) -> bool {
50    ref_str.contains('@')
51        && ref_str
52            .split('@')
53            .next_back()
54            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
55            .unwrap_or(false)
56}
57
58/// Returns true if `image` is pinned to a Docker digest.
59/// Docker digest format: `image@sha256:<64-hex-chars>`.
60pub fn is_docker_digest_pinned(image: &str) -> bool {
61    image.contains("@sha256:")
62        && image
63            .split("@sha256:")
64            .nth(1)
65            .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
66            .unwrap_or(false)
67}
68
69// ── Graph-level precision markers ───────────────────────
70
71/// How complete is this authority graph? Parsers set this based on whether
72/// they could fully resolve all authority relationships in the pipeline YAML.
73///
74/// A `Partial` graph is still useful — it just tells the consumer that some
75/// authority paths may be missing. This is better than silent incompleteness.
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
77#[serde(rename_all = "snake_case")]
78pub enum AuthorityCompleteness {
79    /// Parser resolved all authority relationships.
80    Complete,
81    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
82    /// shell strings, composite actions, reusable workflows). The graph
83    /// captures what it can, but edges may be missing.
84    Partial,
85    /// Parser couldn't determine completeness.
86    Unknown,
87}
88
89/// How broad is an identity's scope? Classifies the risk surface of tokens,
90/// service principals, and OIDC identities.
91#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum IdentityScope {
94    /// Wide permissions: write-all, admin, or unscoped tokens.
95    Broad,
96    /// Narrow permissions: contents:read, specific scopes.
97    Constrained,
98    /// Scope couldn't be determined — treat as risky.
99    Unknown,
100}
101
102impl IdentityScope {
103    /// Classify an identity scope from a permissions string.
104    pub fn from_permissions(perms: &str) -> Self {
105        let p = perms.to_lowercase();
106        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
107            IdentityScope::Broad
108        } else if p.contains("write") {
109            // Any write permission = broad (conservative)
110            IdentityScope::Broad
111        } else if p.contains("read") {
112            IdentityScope::Constrained
113        } else {
114            IdentityScope::Unknown
115        }
116    }
117}
118
119// ── Node types ──────────────────────────────────────────
120
121/// Semantic kind of a graph node.
122#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
123#[serde(rename_all = "snake_case")]
124pub enum NodeKind {
125    Step,
126    Secret,
127    Artifact,
128    Identity,
129    Image,
130}
131
132/// Trust classification. Explicit on every node — not inferred from kind.
133#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
134#[serde(rename_all = "snake_case")]
135pub enum TrustZone {
136    /// Code/config authored by the repo owner.
137    FirstParty,
138    /// Marketplace actions, external images (pinned).
139    ThirdParty,
140    /// Unpinned actions, fork PRs, user input.
141    Untrusted,
142}
143
144impl TrustZone {
145    /// Returns true if `self` is a lower trust level than `other`.
146    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
147        self.rank() < other.rank()
148    }
149
150    fn rank(&self) -> u8 {
151        match self {
152            TrustZone::FirstParty => 2,
153            TrustZone::ThirdParty => 1,
154            TrustZone::Untrusted => 0,
155        }
156    }
157}
158
159/// A node in the authority graph.
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct Node {
162    pub id: NodeId,
163    pub kind: NodeKind,
164    pub name: String,
165    pub trust_zone: TrustZone,
166    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
167    pub metadata: HashMap<String, String>,
168}
169
170// ── Edge types ──────────────────────────────────────────
171
172/// Edge semantics model authority/data flow — not syntactic YAML relations.
173/// Design test: "Can authority propagate along this edge?"
174#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
175#[serde(rename_all = "snake_case")]
176pub enum EdgeKind {
177    /// Step -> Secret or Identity (authority granted at runtime).
178    HasAccessTo,
179    /// Step -> Artifact (data flows out).
180    Produces,
181    /// Artifact -> Step (authority flows from artifact to consuming step).
182    Consumes,
183    /// Step -> Image/Action (execution delegation).
184    UsesImage,
185    /// Step -> Step (cross-job or action boundary).
186    DelegatesTo,
187    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
188    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
189    /// and processes with filesystem access, not just the step that created it.
190    PersistsTo,
191}
192
193/// A directed edge in the authority graph.
194#[derive(Debug, Clone, Serialize, Deserialize)]
195pub struct Edge {
196    pub id: EdgeId,
197    pub from: NodeId,
198    pub to: NodeId,
199    pub kind: EdgeKind,
200}
201
202// ── Pipeline source ─────────────────────────────────────
203
204/// Where the pipeline definition came from.
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct PipelineSource {
207    pub file: String,
208    #[serde(skip_serializing_if = "Option::is_none")]
209    pub repo: Option<String>,
210    #[serde(skip_serializing_if = "Option::is_none")]
211    pub git_ref: Option<String>,
212}
213
214// ── The graph ───────────────────────────────────────────
215
216/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
217/// artifacts, identities, images). Edges model authority/data flow.
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct AuthorityGraph {
220    pub source: PipelineSource,
221    pub nodes: Vec<Node>,
222    pub edges: Vec<Edge>,
223    /// How complete is this graph? Set by the parser based on what it could resolve.
224    pub completeness: AuthorityCompleteness,
225    /// Human-readable reasons why the graph is Partial (if applicable).
226    #[serde(default, skip_serializing_if = "Vec::is_empty")]
227    pub completeness_gaps: Vec<String>,
228    /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
229    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
230    pub metadata: HashMap<String, String>,
231}
232
233impl AuthorityGraph {
234    pub fn new(source: PipelineSource) -> Self {
235        Self {
236            source,
237            nodes: Vec::new(),
238            edges: Vec::new(),
239            completeness: AuthorityCompleteness::Complete,
240            completeness_gaps: Vec::new(),
241            metadata: HashMap::new(),
242        }
243    }
244
245    /// Mark the graph as partially complete with a reason.
246    pub fn mark_partial(&mut self, reason: impl Into<String>) {
247        self.completeness = AuthorityCompleteness::Partial;
248        self.completeness_gaps.push(reason.into());
249    }
250
251    /// Add a node, returns its ID.
252    pub fn add_node(
253        &mut self,
254        kind: NodeKind,
255        name: impl Into<String>,
256        trust_zone: TrustZone,
257    ) -> NodeId {
258        let id = self.nodes.len();
259        self.nodes.push(Node {
260            id,
261            kind,
262            name: name.into(),
263            trust_zone,
264            metadata: HashMap::new(),
265        });
266        id
267    }
268
269    /// Add a node with metadata, returns its ID.
270    pub fn add_node_with_metadata(
271        &mut self,
272        kind: NodeKind,
273        name: impl Into<String>,
274        trust_zone: TrustZone,
275        metadata: HashMap<String, String>,
276    ) -> NodeId {
277        let id = self.nodes.len();
278        self.nodes.push(Node {
279            id,
280            kind,
281            name: name.into(),
282            trust_zone,
283            metadata,
284        });
285        id
286    }
287
288    /// Add a directed edge, returns its ID.
289    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
290        let id = self.edges.len();
291        self.edges.push(Edge { id, from, to, kind });
292        id
293    }
294
295    /// Outgoing edges from a node.
296    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
297        self.edges.iter().filter(move |e| e.from == id)
298    }
299
300    /// Incoming edges to a node.
301    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
302        self.edges.iter().filter(move |e| e.to == id)
303    }
304
305    /// All authority-bearing source nodes (Secret + Identity).
306    /// These are the BFS start set for propagation analysis.
307    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
308        self.nodes
309            .iter()
310            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
311    }
312
313    /// All nodes of a given kind.
314    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
315        self.nodes.iter().filter(move |n| n.kind == kind)
316    }
317
318    /// All nodes in a given trust zone.
319    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
320        self.nodes.iter().filter(move |n| n.trust_zone == zone)
321    }
322
323    /// Get a node by ID.
324    pub fn node(&self, id: NodeId) -> Option<&Node> {
325        self.nodes.get(id)
326    }
327
328    /// Get an edge by ID.
329    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
330        self.edges.get(id)
331    }
332}
333
334#[cfg(test)]
335mod tests {
336    use super::*;
337
338    #[test]
339    fn build_simple_graph() {
340        let mut g = AuthorityGraph::new(PipelineSource {
341            file: "deploy.yml".into(),
342            repo: None,
343            git_ref: None,
344        });
345
346        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
347        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
348        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
349        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
350
351        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
352        g.add_edge(step_build, artifact, EdgeKind::Produces);
353        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
354
355        assert_eq!(g.nodes.len(), 4);
356        assert_eq!(g.edges.len(), 3);
357        assert_eq!(g.authority_sources().count(), 1);
358        assert_eq!(g.edges_from(step_build).count(), 2);
359        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
360    }
361
362    #[test]
363    fn completeness_default_is_complete() {
364        let g = AuthorityGraph::new(PipelineSource {
365            file: "test.yml".into(),
366            repo: None,
367            git_ref: None,
368        });
369        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
370        assert!(g.completeness_gaps.is_empty());
371    }
372
373    #[test]
374    fn mark_partial_records_reason() {
375        let mut g = AuthorityGraph::new(PipelineSource {
376            file: "test.yml".into(),
377            repo: None,
378            git_ref: None,
379        });
380        g.mark_partial("secrets in run: block inferred, not precisely mapped");
381        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
382        assert_eq!(g.completeness_gaps.len(), 1);
383    }
384
385    #[test]
386    fn identity_scope_from_permissions() {
387        assert_eq!(
388            IdentityScope::from_permissions("write-all"),
389            IdentityScope::Broad
390        );
391        assert_eq!(
392            IdentityScope::from_permissions("{ contents: write }"),
393            IdentityScope::Broad
394        );
395        assert_eq!(
396            IdentityScope::from_permissions("{ contents: read }"),
397            IdentityScope::Constrained
398        );
399        assert_eq!(
400            IdentityScope::from_permissions("{ id-token: write }"),
401            IdentityScope::Broad
402        );
403        assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
404        assert_eq!(
405            IdentityScope::from_permissions("custom-scope"),
406            IdentityScope::Unknown
407        );
408    }
409
410    #[test]
411    fn trust_zone_ordering() {
412        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
413        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
414        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
415        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
416    }
417}