Skip to main content

taudit_core/
graph.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
26pub const META_TRIGGER: &str = "trigger";
27/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
28pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
29/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
30pub const META_ATTESTS: &str = "attests";
31/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
32pub const META_VARIABLE_GROUP: &str = "variable_group";
33/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
34pub const META_SELF_HOSTED: &str = "self_hosted";
35/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
36pub const META_CHECKOUT_SELF: &str = "checkout_self";
37/// Marks an Identity node as an ADO service connection.
38pub const META_SERVICE_CONNECTION: &str = "service_connection";
39/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
40/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
41/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
42pub const META_IMPLICIT: &str = "implicit";
43/// Marks a Step that belongs to an ADO deployment job whose `environment:` is
44/// configured with required approvals — a manual gate that breaks automatic
45/// authority propagation. Findings whose path crosses such a node have their
46/// severity reduced by one step (Critical → High → Medium → Low).
47pub const META_ENV_APPROVAL: &str = "env_approval";
48/// Records the parent job name on every Step node, enabling per-job subgraph
49/// filtering (e.g. `taudit map --job build`) and downstream consumers that
50/// need to attribute steps back to their containing job. Set by both the GHA
51/// and ADO parsers on every Step they create within a job's scope.
52pub const META_JOB_NAME: &str = "job_name";
53
54// ── Shared helpers ─────────────────────────────────────
55
56/// Returns true if `ref_str` is a SHA-pinned action reference.
57/// Checks: contains `@`, part after `@` is >= 40 hex chars.
58/// Single source of truth — used by both parser and rules.
59pub fn is_sha_pinned(ref_str: &str) -> bool {
60    ref_str.contains('@')
61        && ref_str
62            .split('@')
63            .next_back()
64            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
65            .unwrap_or(false)
66}
67
68/// Returns true if `image` is pinned to a Docker digest.
69/// Docker digest format: `image@sha256:<64-hex-chars>`.
70pub fn is_docker_digest_pinned(image: &str) -> bool {
71    image.contains("@sha256:")
72        && image
73            .split("@sha256:")
74            .nth(1)
75            .map(|h| h.len() == 64 && h.chars().all(|c| c.is_ascii_hexdigit()))
76            .unwrap_or(false)
77}
78
79// ── Graph-level precision markers ───────────────────────
80
81/// How complete is this authority graph? Parsers set this based on whether
82/// they could fully resolve all authority relationships in the pipeline YAML.
83///
84/// A `Partial` graph is still useful — it just tells the consumer that some
85/// authority paths may be missing. This is better than silent incompleteness.
86#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
87#[serde(rename_all = "snake_case")]
88pub enum AuthorityCompleteness {
89    /// Parser resolved all authority relationships.
90    Complete,
91    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
92    /// shell strings, composite actions, reusable workflows). The graph
93    /// captures what it can, but edges may be missing.
94    Partial,
95    /// Parser couldn't determine completeness.
96    Unknown,
97}
98
99/// How broad is an identity's scope? Classifies the risk surface of tokens,
100/// service principals, and OIDC identities.
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
102#[serde(rename_all = "snake_case")]
103pub enum IdentityScope {
104    /// Wide permissions: write-all, admin, or unscoped tokens.
105    Broad,
106    /// Narrow permissions: contents:read, specific scopes.
107    Constrained,
108    /// Scope couldn't be determined — treat as risky.
109    Unknown,
110}
111
112impl IdentityScope {
113    /// Classify an identity scope from a permissions string.
114    pub fn from_permissions(perms: &str) -> Self {
115        let p = perms.to_lowercase();
116        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
117            IdentityScope::Broad
118        } else if p.contains("write") {
119            // Any write permission = broad (conservative)
120            IdentityScope::Broad
121        } else if p.contains("read") {
122            IdentityScope::Constrained
123        } else {
124            IdentityScope::Unknown
125        }
126    }
127}
128
129// ── Node types ──────────────────────────────────────────
130
131/// Semantic kind of a graph node.
132#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
133#[serde(rename_all = "snake_case")]
134pub enum NodeKind {
135    Step,
136    Secret,
137    Artifact,
138    Identity,
139    Image,
140}
141
142/// Trust classification. Explicit on every node — not inferred from kind.
143#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
144#[serde(rename_all = "snake_case")]
145pub enum TrustZone {
146    /// Code/config authored by the repo owner.
147    FirstParty,
148    /// Marketplace actions, external images (pinned).
149    ThirdParty,
150    /// Unpinned actions, fork PRs, user input.
151    Untrusted,
152}
153
154impl TrustZone {
155    /// Returns true if `self` is a lower trust level than `other`.
156    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
157        self.rank() < other.rank()
158    }
159
160    fn rank(&self) -> u8 {
161        match self {
162            TrustZone::FirstParty => 2,
163            TrustZone::ThirdParty => 1,
164            TrustZone::Untrusted => 0,
165        }
166    }
167}
168
169/// A node in the authority graph.
170#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct Node {
172    pub id: NodeId,
173    pub kind: NodeKind,
174    pub name: String,
175    pub trust_zone: TrustZone,
176    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
177    pub metadata: HashMap<String, String>,
178}
179
180// ── Edge types ──────────────────────────────────────────
181
182/// Edge semantics model authority/data flow — not syntactic YAML relations.
183/// Design test: "Can authority propagate along this edge?"
184#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
185#[serde(rename_all = "snake_case")]
186pub enum EdgeKind {
187    /// Step -> Secret or Identity (authority granted at runtime).
188    HasAccessTo,
189    /// Step -> Artifact (data flows out).
190    Produces,
191    /// Artifact -> Step (authority flows from artifact to consuming step).
192    Consumes,
193    /// Step -> Image/Action (execution delegation).
194    UsesImage,
195    /// Step -> Step (cross-job or action boundary).
196    DelegatesTo,
197    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
198    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
199    /// and processes with filesystem access, not just the step that created it.
200    PersistsTo,
201}
202
203/// A directed edge in the authority graph.
204#[derive(Debug, Clone, Serialize, Deserialize)]
205pub struct Edge {
206    pub id: EdgeId,
207    pub from: NodeId,
208    pub to: NodeId,
209    pub kind: EdgeKind,
210}
211
212// ── Pipeline source ─────────────────────────────────────
213
214/// Where the pipeline definition came from.
215#[derive(Debug, Clone, Serialize, Deserialize)]
216pub struct PipelineSource {
217    pub file: String,
218    #[serde(skip_serializing_if = "Option::is_none")]
219    pub repo: Option<String>,
220    #[serde(skip_serializing_if = "Option::is_none")]
221    pub git_ref: Option<String>,
222}
223
224// ── The graph ───────────────────────────────────────────
225
226/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
227/// artifacts, identities, images). Edges model authority/data flow.
228#[derive(Debug, Clone, Serialize, Deserialize)]
229pub struct AuthorityGraph {
230    pub source: PipelineSource,
231    pub nodes: Vec<Node>,
232    pub edges: Vec<Edge>,
233    /// How complete is this graph? Set by the parser based on what it could resolve.
234    pub completeness: AuthorityCompleteness,
235    /// Human-readable reasons why the graph is Partial (if applicable).
236    #[serde(default, skip_serializing_if = "Vec::is_empty")]
237    pub completeness_gaps: Vec<String>,
238    /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
239    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
240    pub metadata: HashMap<String, String>,
241}
242
243impl AuthorityGraph {
244    pub fn new(source: PipelineSource) -> Self {
245        Self {
246            source,
247            nodes: Vec::new(),
248            edges: Vec::new(),
249            completeness: AuthorityCompleteness::Complete,
250            completeness_gaps: Vec::new(),
251            metadata: HashMap::new(),
252        }
253    }
254
255    /// Mark the graph as partially complete with a reason.
256    pub fn mark_partial(&mut self, reason: impl Into<String>) {
257        self.completeness = AuthorityCompleteness::Partial;
258        self.completeness_gaps.push(reason.into());
259    }
260
261    /// Add a node, returns its ID.
262    pub fn add_node(
263        &mut self,
264        kind: NodeKind,
265        name: impl Into<String>,
266        trust_zone: TrustZone,
267    ) -> NodeId {
268        let id = self.nodes.len();
269        self.nodes.push(Node {
270            id,
271            kind,
272            name: name.into(),
273            trust_zone,
274            metadata: HashMap::new(),
275        });
276        id
277    }
278
279    /// Add a node with metadata, returns its ID.
280    pub fn add_node_with_metadata(
281        &mut self,
282        kind: NodeKind,
283        name: impl Into<String>,
284        trust_zone: TrustZone,
285        metadata: HashMap<String, String>,
286    ) -> NodeId {
287        let id = self.nodes.len();
288        self.nodes.push(Node {
289            id,
290            kind,
291            name: name.into(),
292            trust_zone,
293            metadata,
294        });
295        id
296    }
297
298    /// Add a directed edge, returns its ID.
299    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
300        let id = self.edges.len();
301        self.edges.push(Edge { id, from, to, kind });
302        id
303    }
304
305    /// Outgoing edges from a node.
306    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
307        self.edges.iter().filter(move |e| e.from == id)
308    }
309
310    /// Incoming edges to a node.
311    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
312        self.edges.iter().filter(move |e| e.to == id)
313    }
314
315    /// All authority-bearing source nodes (Secret + Identity).
316    /// These are the BFS start set for propagation analysis.
317    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
318        self.nodes
319            .iter()
320            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
321    }
322
323    /// All nodes of a given kind.
324    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
325        self.nodes.iter().filter(move |n| n.kind == kind)
326    }
327
328    /// All nodes in a given trust zone.
329    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
330        self.nodes.iter().filter(move |n| n.trust_zone == zone)
331    }
332
333    /// Get a node by ID.
334    pub fn node(&self, id: NodeId) -> Option<&Node> {
335        self.nodes.get(id)
336    }
337
338    /// Get an edge by ID.
339    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
340        self.edges.get(id)
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347
348    #[test]
349    fn build_simple_graph() {
350        let mut g = AuthorityGraph::new(PipelineSource {
351            file: "deploy.yml".into(),
352            repo: None,
353            git_ref: None,
354        });
355
356        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
357        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
358        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
359        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
360
361        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
362        g.add_edge(step_build, artifact, EdgeKind::Produces);
363        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
364
365        assert_eq!(g.nodes.len(), 4);
366        assert_eq!(g.edges.len(), 3);
367        assert_eq!(g.authority_sources().count(), 1);
368        assert_eq!(g.edges_from(step_build).count(), 2);
369        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
370    }
371
372    #[test]
373    fn completeness_default_is_complete() {
374        let g = AuthorityGraph::new(PipelineSource {
375            file: "test.yml".into(),
376            repo: None,
377            git_ref: None,
378        });
379        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
380        assert!(g.completeness_gaps.is_empty());
381    }
382
383    #[test]
384    fn mark_partial_records_reason() {
385        let mut g = AuthorityGraph::new(PipelineSource {
386            file: "test.yml".into(),
387            repo: None,
388            git_ref: None,
389        });
390        g.mark_partial("secrets in run: block inferred, not precisely mapped");
391        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
392        assert_eq!(g.completeness_gaps.len(), 1);
393    }
394
395    #[test]
396    fn identity_scope_from_permissions() {
397        assert_eq!(
398            IdentityScope::from_permissions("write-all"),
399            IdentityScope::Broad
400        );
401        assert_eq!(
402            IdentityScope::from_permissions("{ contents: write }"),
403            IdentityScope::Broad
404        );
405        assert_eq!(
406            IdentityScope::from_permissions("{ contents: read }"),
407            IdentityScope::Constrained
408        );
409        assert_eq!(
410            IdentityScope::from_permissions("{ id-token: write }"),
411            IdentityScope::Broad
412        );
413        assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
414        assert_eq!(
415            IdentityScope::from_permissions("custom-scope"),
416            IdentityScope::Unknown
417        );
418    }
419
420    #[test]
421    fn trust_zone_ordering() {
422        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
423        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
424        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
425        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
426    }
427}