Skip to main content

taudit_core/
graph.rs

1//! Authority-graph engine for `taudit-core`.
2//!
3//! ## What lives here
4//!
5//! The mutable engine: [`AuthorityGraph`] with its `add_node`, `add_edge`,
6//! `mark_partial`, `stamp_edge_authority_summaries` impl, plus the structural
7//! / semantic pin-validation helpers ([`is_sha_pinned`],
8//! [`is_docker_digest_pinned`], [`is_pin_semantically_valid`]).
9//!
10//! ## What lives in `taudit-api`
11//!
12//! The **wire types** that compose the graph ([`Node`], [`Edge`],
13//! [`PipelineSource`], [`ParamSpec`], [`AuthorityEdgeSummary`], the
14//! [`NodeKind`] / [`EdgeKind`] / [`TrustZone`] / [`AuthorityCompleteness`] /
15//! [`GapKind`] / [`IdentityScope`] enums, the [`NodeId`] / [`EdgeId`] type
16//! aliases, and every `META_*` metadata-key constant) live in `taudit-api`.
17//! They are re-exported below so every existing in-tree call site
18//! (`use taudit_core::graph::NodeKind`) keeps compiling.
19//!
20//! `taudit-api` is the externally-stable contract surface; `taudit-core` is
21//! workspace-internal. See `crates/taudit-core/src/lib.rs` for the API
22//! stability docstring.
23
24use serde::{Deserialize, Serialize};
25use std::collections::HashMap;
26
27// ── Re-exports of wire types (now owned by taudit-api) ─────────────────
28
29pub use taudit_api::{
30    serialize_string_map_sorted, AuthorityCompleteness, AuthorityEdgeSummary, Edge, EdgeId,
31    EdgeKind, GapKind, IdentityScope, Node, NodeId, NodeKind, ParamSpec, PipelineSource, TrustZone,
32    AUTHORITY_EDGE_SUMMARY_FIELD_MAX,
33};
34
35pub use taudit_api::{
36    META_ADD_SPN_TO_ENV, META_ATTESTS, META_CACHE_KEY, META_CHECKOUT_REF, META_CHECKOUT_SELF,
37    META_CLI_FLAG_EXPOSED, META_CONDITION, META_CONTAINER, META_DEPENDS_ON, META_DIGEST,
38    META_DISPATCH_INPUTS, META_DOTENV_FILE, META_DOWNLOADS_ARTIFACT, META_ENVIRONMENT_NAME,
39    META_ENVIRONMENT_URL, META_ENV_APPROVAL, META_ENV_GATE_WRITES_SECRET_VALUE, META_FORK_CHECK,
40    META_GHA_ACTION, META_GHA_CONTAINER_OPTIONS, META_GHA_ENV_ASSIGNMENTS, META_GHA_RUNS_ON,
41    META_GHA_WITH_INPUTS, META_GHA_WORKFLOW_CALL_INPUTS, META_GITLAB_ALLOW_FAILURE,
42    META_GITLAB_CACHE_KEY, META_GITLAB_CACHE_POLICY, META_GITLAB_DIND_SERVICE, META_GITLAB_EXTENDS,
43    META_GITLAB_INCLUDES, META_GITLAB_TRIGGER_KIND, META_IDENTITY_SCOPE, META_IMPLICIT,
44    META_INFERRED, META_INTERACTIVE_DEBUG, META_INTERPRETS_ARTIFACT, META_JOB_NAME,
45    META_JOB_OUTPUTS, META_NEEDS, META_NO_WORKFLOW_PERMISSIONS, META_OIDC, META_OIDC_AUDIENCE,
46    META_OIDC_AUDIENCES, META_PERMISSIONS, META_PLATFORM, META_READS_ENV, META_REPOSITORIES,
47    META_RULES_PROTECTED_ONLY, META_SCRIPT_BODY, META_SECRETS_INHERIT, META_SELF_HOSTED,
48    META_SERVICE_CONNECTION, META_SERVICE_CONNECTION_NAME, META_SETVARIABLE_ADO,
49    META_TERRAFORM_AUTO_APPROVE, META_TRIGGER, META_TRIGGERS, META_VARIABLE_GROUP,
50    META_WORKSPACE_CLEAN, META_WRITES_ENV_GATE,
51};
52
53// ── Shared helpers ─────────────────────────────────────
54
55/// Returns true if `ref_str` is a SHA-pinned action reference.
56/// Checks: contains `@`, part after `@` is >= 40 hex chars.
57/// Single source of truth — used by both parser and rules.
58///
59/// This is a *structural* check — it accepts any 40+ hex character suffix
60/// without verifying the SHA refers to a real commit. For a semantic check
61/// that rejects obviously-bogus values like all-zero, see
62/// [`is_pin_semantically_valid`].
63pub fn is_sha_pinned(ref_str: &str) -> bool {
64    ref_str.contains('@')
65        && ref_str
66            .split('@')
67            .next_back()
68            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
69            .unwrap_or(false)
70}
71
72/// Returns true if `image` is pinned to a Docker digest.
73/// Docker digest format: `image@sha256:<64-hex-chars-lowercase>`.
74///
75/// Truncated digests (e.g. `alpine@sha256:abc`) and uppercase hex are
76/// rejected — Docker requires the full 64-character lowercase hex form.
77pub fn is_docker_digest_pinned(image: &str) -> bool {
78    image.contains("@sha256:")
79        && image
80            .split("@sha256:")
81            .nth(1)
82            .map(|h| {
83                h.len() == 64
84                    && h.chars()
85                        .all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c))
86            })
87            .unwrap_or(false)
88}
89
90/// Returns true if `ref_str` looks both structurally pinned AND semantically
91/// plausible. Layered on top of [`is_sha_pinned`] / [`is_docker_digest_pinned`]:
92/// a structurally valid pin can still be obviously bogus (e.g. an all-zero SHA
93/// is syntactically a 40-char hex string but does not refer to any real
94/// commit; an attacker could use it to fake a "pinned" appearance).
95///
96/// Rules that want to flag impersonation attempts (rather than just laziness)
97/// should call this in addition to / instead of the structural check.
98///
99/// Rejects:
100/// - All-zero SHA-1 references (`actions/foo@0000…0000`).
101/// - All-zero sha256 docker digests (`image@sha256:0000…0000`).
102///
103/// Anything else that passes the structural check passes here.
104pub fn is_pin_semantically_valid(ref_str: &str) -> bool {
105    // Docker digest form takes priority (the `@sha256:` prefix is unambiguous).
106    if ref_str.contains("@sha256:") {
107        if !is_docker_digest_pinned(ref_str) {
108            return false;
109        }
110        let digest = ref_str.split("@sha256:").nth(1).unwrap_or("");
111        return !digest.chars().all(|c| c == '0');
112    }
113
114    if !is_sha_pinned(ref_str) {
115        return false;
116    }
117    let sha = ref_str.split('@').next_back().unwrap_or("");
118    !sha.chars().all(|c| c == '0')
119}
120
121// ── AuthorityEdgeSummary helpers (engine-side) ─────────────────────────
122
123fn truncate_edge_summary_field(s: &str) -> String {
124    let max = AUTHORITY_EDGE_SUMMARY_FIELD_MAX;
125    let n = s.chars().count();
126    if n <= max {
127        s.to_string()
128    } else {
129        let mut out: String = s.chars().take(max.saturating_sub(1)).collect();
130        out.push('…');
131        out
132    }
133}
134
135fn trust_zone_snake_case(zone: TrustZone) -> String {
136    match zone {
137        TrustZone::FirstParty => "first_party".into(),
138        TrustZone::ThirdParty => "third_party".into(),
139        TrustZone::Untrusted => "untrusted".into(),
140    }
141}
142
143// ── The graph ───────────────────────────────────────────
144
145/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
146/// artifacts, identities, images). Edges model authority/data flow.
147#[derive(Debug, Clone, Serialize, Deserialize)]
148pub struct AuthorityGraph {
149    pub source: PipelineSource,
150    pub nodes: Vec<Node>,
151    pub edges: Vec<Edge>,
152    /// How complete is this graph? Set by the parser based on what it could resolve.
153    pub completeness: AuthorityCompleteness,
154    /// Human-readable reasons why the graph is Partial (if applicable).
155    #[serde(default, skip_serializing_if = "Vec::is_empty")]
156    pub completeness_gaps: Vec<String>,
157    /// Typed categories for each completeness gap (parallel to `completeness_gaps`).
158    #[serde(default, skip_serializing_if = "Vec::is_empty")]
159    pub completeness_gap_kinds: Vec<GapKind>,
160    /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
161    /// Serialized in sorted-key order — see `Node.metadata` rationale.
162    #[serde(
163        default,
164        skip_serializing_if = "HashMap::is_empty",
165        serialize_with = "serialize_string_map_sorted"
166    )]
167    pub metadata: HashMap<String, String>,
168    /// Top-level pipeline `parameters:` declarations, keyed by parameter name.
169    /// Populated by parsers that surface parameter metadata (currently ADO).
170    /// Empty for platforms / pipelines that don't declare parameters.
171    /// Serialized in sorted-key order — see `Node.metadata` rationale.
172    #[serde(
173        default,
174        skip_serializing_if = "HashMap::is_empty",
175        serialize_with = "serialize_string_map_sorted"
176    )]
177    pub parameters: HashMap<String, ParamSpec>,
178}
179
180impl AuthorityGraph {
181    pub fn new(source: PipelineSource) -> Self {
182        Self {
183            source,
184            nodes: Vec::new(),
185            edges: Vec::new(),
186            completeness: AuthorityCompleteness::Complete,
187            completeness_gaps: Vec::new(),
188            completeness_gap_kinds: Vec::new(),
189            metadata: HashMap::new(),
190            parameters: HashMap::new(),
191        }
192    }
193
194    /// Mark the graph as partially complete with a reason.
195    pub fn mark_partial(&mut self, kind: GapKind, reason: impl Into<String>) {
196        self.completeness = AuthorityCompleteness::Partial;
197        self.completeness_gaps.push(reason.into());
198        self.completeness_gap_kinds.push(kind);
199    }
200
201    /// Returns the most severe GapKind present, or None if the graph is complete/unknown.
202    pub fn worst_gap_kind(&self) -> Option<GapKind> {
203        self.completeness_gap_kinds
204            .iter()
205            .max_by_key(|k| match k {
206                GapKind::Expression => 0u8,
207                GapKind::Structural => 1,
208                GapKind::Opaque => 2,
209            })
210            .copied()
211    }
212
213    /// Add a node, returns its ID.
214    pub fn add_node(
215        &mut self,
216        kind: NodeKind,
217        name: impl Into<String>,
218        trust_zone: TrustZone,
219    ) -> NodeId {
220        let id = self.nodes.len();
221        self.nodes.push(Node {
222            id,
223            kind,
224            name: name.into(),
225            trust_zone,
226            metadata: HashMap::new(),
227        });
228        id
229    }
230
231    /// Add a node with metadata, returns its ID.
232    pub fn add_node_with_metadata(
233        &mut self,
234        kind: NodeKind,
235        name: impl Into<String>,
236        trust_zone: TrustZone,
237        metadata: HashMap<String, String>,
238    ) -> NodeId {
239        let id = self.nodes.len();
240        self.nodes.push(Node {
241            id,
242            kind,
243            name: name.into(),
244            trust_zone,
245            metadata,
246        });
247        id
248    }
249
250    /// Add a directed edge, returns its ID.
251    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
252        let id = self.edges.len();
253        self.edges.push(Edge {
254            id,
255            from,
256            to,
257            kind,
258            authority_summary: None,
259        });
260        id
261    }
262
263    /// Populate [`Edge::authority_summary`] for each **`HasAccessTo`** edge whose
264    /// target is an **identity** node, from that node’s trust zone and
265    /// allowlisted metadata (`identity_scope`, `permissions`). Idempotent.
266    ///
267    /// Called automatically at the end of every built-in [`crate::ports::PipelineParser`]
268    /// implementation so `taudit graph --format json` and scan JSON include summaries.
269    pub fn stamp_edge_authority_summaries(&mut self) {
270        for edge in &mut self.edges {
271            if edge.kind != EdgeKind::HasAccessTo {
272                continue;
273            }
274            let Some(to_node) = self.nodes.get(edge.to) else {
275                continue;
276            };
277            if to_node.kind != NodeKind::Identity {
278                continue;
279            }
280            edge.authority_summary = Some(AuthorityEdgeSummary {
281                trust_zone: Some(trust_zone_snake_case(to_node.trust_zone)),
282                identity_scope: to_node
283                    .metadata
284                    .get(META_IDENTITY_SCOPE)
285                    .map(|s| truncate_edge_summary_field(s)),
286                permissions_summary: to_node
287                    .metadata
288                    .get(META_PERMISSIONS)
289                    .map(|s| truncate_edge_summary_field(s)),
290            });
291        }
292    }
293
294    /// Outgoing edges from a node.
295    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
296        self.edges.iter().filter(move |e| e.from == id)
297    }
298
299    /// Incoming edges to a node.
300    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
301        self.edges.iter().filter(move |e| e.to == id)
302    }
303
304    /// All authority-bearing source nodes (Secret + Identity).
305    /// These are the BFS start set for propagation analysis.
306    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
307        self.nodes
308            .iter()
309            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
310    }
311
312    /// All nodes of a given kind.
313    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
314        self.nodes.iter().filter(move |n| n.kind == kind)
315    }
316
317    /// All nodes in a given trust zone.
318    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
319        self.nodes.iter().filter(move |n| n.trust_zone == zone)
320    }
321
322    /// Get a node by ID.
323    pub fn node(&self, id: NodeId) -> Option<&Node> {
324        self.nodes.get(id)
325    }
326
327    /// Get an edge by ID.
328    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
329        self.edges.get(id)
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336
337    #[test]
338    fn build_simple_graph() {
339        let mut g = AuthorityGraph::new(PipelineSource {
340            file: "deploy.yml".into(),
341            repo: None,
342            git_ref: None,
343            commit_sha: None,
344        });
345
346        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
347        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
348        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
349        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
350
351        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
352        g.add_edge(step_build, artifact, EdgeKind::Produces);
353        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
354
355        assert_eq!(g.nodes.len(), 4);
356        assert_eq!(g.edges.len(), 3);
357        assert_eq!(g.authority_sources().count(), 1);
358        assert_eq!(g.edges_from(step_build).count(), 2);
359        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
360    }
361
362    #[test]
363    fn stamp_edge_authority_summaries_on_has_access_to_identity() {
364        let mut g = AuthorityGraph::new(PipelineSource {
365            file: "ci.yml".into(),
366            repo: None,
367            git_ref: None,
368            commit_sha: None,
369        });
370        let secret = g.add_node(NodeKind::Secret, "K", TrustZone::FirstParty);
371        let mut id_meta = HashMap::new();
372        id_meta.insert(META_IDENTITY_SCOPE.into(), "constrained".into());
373        id_meta.insert(META_PERMISSIONS.into(), "read-all".into());
374        let ident = g.add_node_with_metadata(
375            NodeKind::Identity,
376            "GITHUB_TOKEN",
377            TrustZone::FirstParty,
378            id_meta,
379        );
380        let step = g.add_node(NodeKind::Step, "s", TrustZone::FirstParty);
381        let e_secret = g.add_edge(step, secret, EdgeKind::HasAccessTo);
382        let e_ident = g.add_edge(step, ident, EdgeKind::HasAccessTo);
383
384        g.stamp_edge_authority_summaries();
385
386        assert!(g.edges[e_secret].authority_summary.is_none());
387        let sum = g.edges[e_ident]
388            .authority_summary
389            .as_ref()
390            .expect("identity edge summary");
391        assert_eq!(sum.trust_zone.as_deref(), Some("first_party"));
392        assert_eq!(sum.identity_scope.as_deref(), Some("constrained"));
393        assert_eq!(sum.permissions_summary.as_deref(), Some("read-all"));
394    }
395
396    #[test]
397    fn completeness_default_is_complete() {
398        let g = AuthorityGraph::new(PipelineSource {
399            file: "test.yml".into(),
400            repo: None,
401            git_ref: None,
402            commit_sha: None,
403        });
404        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
405        assert!(g.completeness_gaps.is_empty());
406    }
407
408    #[test]
409    fn mark_partial_records_reason() {
410        let mut g = AuthorityGraph::new(PipelineSource {
411            file: "test.yml".into(),
412            repo: None,
413            git_ref: None,
414            commit_sha: None,
415        });
416        g.mark_partial(
417            GapKind::Expression,
418            "secrets in run: block inferred, not precisely mapped",
419        );
420        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
421        assert_eq!(g.completeness_gaps.len(), 1);
422        assert_eq!(g.completeness_gap_kinds.len(), 1);
423    }
424
425    #[test]
426    fn identity_scope_from_permissions() {
427        assert_eq!(
428            IdentityScope::from_permissions("write-all"),
429            IdentityScope::Broad
430        );
431        assert_eq!(
432            IdentityScope::from_permissions("{ contents: write }"),
433            IdentityScope::Broad
434        );
435        assert_eq!(
436            IdentityScope::from_permissions("{ contents: read }"),
437            IdentityScope::Constrained
438        );
439        assert_eq!(
440            IdentityScope::from_permissions("{ id-token: write }"),
441            IdentityScope::Broad
442        );
443        assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
444        assert_eq!(
445            IdentityScope::from_permissions("custom-scope"),
446            IdentityScope::Unknown
447        );
448    }
449
450    #[test]
451    fn trust_zone_ordering() {
452        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
453        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
454        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
455        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
456    }
457
458    // ── Pin validation (fuzz B3 regression) ─────────────────
459
460    #[test]
461    fn is_sha_pinned_accepts_lowercase_40_hex() {
462        // 40 lowercase hex — the canonical legitimate form.
463        assert!(is_sha_pinned(
464            "actions/checkout@abc1234567890abcdef1234567890abcdef123456"
465        ));
466        // Mixed case is still structurally pinned (legitimate — Git accepts both).
467        assert!(is_sha_pinned(
468            "actions/checkout@ABCDEF1234567890abcdef1234567890ABCDEF12"
469        ));
470    }
471
472    #[test]
473    fn is_sha_pinned_structural_accepts_all_zero() {
474        // Structural check is intentionally permissive — semantic rejection
475        // happens in is_pin_semantically_valid. Documented in B3.
476        assert!(is_sha_pinned(
477            "actions/setup-python@0000000000000000000000000000000000000000"
478        ));
479    }
480
481    #[test]
482    fn is_sha_pinned_rejects_short_or_non_hex() {
483        assert!(!is_sha_pinned("actions/checkout@v4"));
484        assert!(!is_sha_pinned("actions/setup-node@a1b2c3"));
485        // 60 chars but not all hex.
486        assert!(!is_sha_pinned(
487            "actions/checkout@somethingthatlookslikeashabutisntsha1234567890abcdef"
488        ));
489    }
490
491    #[test]
492    fn is_pin_semantically_valid_rejects_all_zero_sha() {
493        // Fuzz B3 reproducer.
494        assert!(!is_pin_semantically_valid(
495            "actions/setup-python@0000000000000000000000000000000000000000"
496        ));
497    }
498
499    #[test]
500    fn is_pin_semantically_valid_accepts_real_looking_sha() {
501        assert!(is_pin_semantically_valid(
502            "actions/checkout@abc1234567890abcdef1234567890abcdef123456"
503        ));
504    }
505
506    #[test]
507    fn is_pin_semantically_valid_rejects_unpinned() {
508        assert!(!is_pin_semantically_valid("actions/checkout@v4"));
509        assert!(!is_pin_semantically_valid("actions/setup-node@a1b2c3"));
510    }
511
512    #[test]
513    fn is_docker_digest_pinned_rejects_truncated() {
514        // Fuzz B3 reproducer: previously accepted, now rejected.
515        assert!(!is_docker_digest_pinned("alpine@sha256:abc"));
516        // 65 chars (one too long).
517        assert!(!is_docker_digest_pinned(
518            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcde"
519        ));
520        // 63 chars (one short).
521        assert!(!is_docker_digest_pinned(
522            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abc"
523        ));
524    }
525
526    #[test]
527    fn is_docker_digest_pinned_accepts_full_64_lowercase() {
528        // Exactly 64 lowercase hex chars after `@sha256:`.
529        assert!(is_docker_digest_pinned(
530            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcd"
531        ));
532    }
533
534    #[test]
535    fn is_docker_digest_pinned_rejects_uppercase() {
536        // Docker requires lowercase — uppercase indicates a hand-crafted /
537        // tampered string and should not pass.
538        assert!(!is_docker_digest_pinned(
539            "alpine@sha256:ABC123DEF456ABC123DEF456ABC123DEF456ABC123DEF456ABC123DEF456ABCD"
540        ));
541    }
542
543    #[test]
544    fn is_pin_semantically_valid_rejects_all_zero_docker_digest() {
545        assert!(!is_pin_semantically_valid(
546            "alpine@sha256:0000000000000000000000000000000000000000000000000000000000000000"
547        ));
548    }
549
550    #[test]
551    fn is_pin_semantically_valid_accepts_real_docker_digest() {
552        assert!(is_pin_semantically_valid(
553            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcd"
554        ));
555    }
556}