taudit_core/
graph.rs

1use serde::{Deserialize, Serialize, Serializer};
2use std::collections::{BTreeMap, HashMap};
3
4/// Unique identifier for a node in the authority graph.
5pub type NodeId = usize;
6
7/// Unique identifier for an edge in the authority graph.
8pub type EdgeId = usize;
9
10// ── Metadata key constants ─────────────────────────────
11// Avoids stringly-typed bugs across crate boundaries.
12
13pub const META_DIGEST: &str = "digest";
14pub const META_PERMISSIONS: &str = "permissions";
15pub const META_IDENTITY_SCOPE: &str = "identity_scope";
16pub const META_INFERRED: &str = "inferred";
17/// Marks an Image node as a job container (not a `uses:` action).
18pub const META_CONTAINER: &str = "container";
19/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
20pub const META_OIDC: &str = "oidc";
21/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
22/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
23/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
24pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
25/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
26pub const META_TRIGGER: &str = "trigger";
27/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
28pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
29/// Marks a Step that reads from the runner-managed environment via an
30/// `env.<NAME>` template reference — `${{ env.X }}` in a `with:` value,
31/// inline script body, or step `env:` mapping. Distinct from `secrets.X`
32/// references (which produce a HasAccessTo edge to a Secret node) — `env.X`
33/// references can be sourced from the ambient runner environment, including
34/// values laundered through `$GITHUB_ENV` by an earlier step. Stamped by
35/// the GHA parser so `secret_via_env_gate_to_untrusted_consumer` can find
36/// the gate-laundering chain that the explicit-secret rules miss.
37pub const META_READS_ENV: &str = "reads_env";
38/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
39pub const META_ATTESTS: &str = "attests";
40/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
41pub const META_VARIABLE_GROUP: &str = "variable_group";
42/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
43pub const META_SELF_HOSTED: &str = "self_hosted";
44/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
45pub const META_CHECKOUT_SELF: &str = "checkout_self";
46/// Marks an Identity node as an ADO service connection.
47pub const META_SERVICE_CONNECTION: &str = "service_connection";
48/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
49/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
50/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
51pub const META_IMPLICIT: &str = "implicit";
52/// Marks a Step that belongs to an ADO deployment job whose `environment:` is
53/// configured with required approvals — a manual gate that breaks automatic
54/// authority propagation. Findings whose path crosses such a node have their
55/// severity reduced by one step (Critical → High → Medium → Low).
56pub const META_ENV_APPROVAL: &str = "env_approval";
57/// Records the parent job name on every Step node, enabling per-job subgraph
58/// filtering (e.g. `taudit map --job build`) and downstream consumers that
59/// need to attribute steps back to their containing job. Set by both the GHA
60/// and ADO parsers on every Step they create within a job's scope.
61pub const META_JOB_NAME: &str = "job_name";
62/// Graph-level metadata: JSON-encoded array of `resources.repositories[]`
63/// entries declared by the pipeline. Each entry is an object with fields
64/// `alias`, `repo_type`, `name`, optional `ref`, and `used` (true when the
65/// alias is referenced via `template: x@alias`, `extends: x@alias`, or
66/// `checkout: alias` somewhere in the same pipeline file). Set by the ADO
67/// parser; consumed by `template_extends_unpinned_branch`.
68pub const META_REPOSITORIES: &str = "repositories";
69/// Records the raw inline script body of a Step (the text from
70/// `script:` / `bash:` / `powershell:` / `pwsh:` / `run:` / task
71/// `inputs.script` / `inputs.Inline` / `inputs.inlineScript`). Stamped by
72/// parsers when the step has an inline script. Consumed by script-aware
73/// rules: `vm_remote_exec_via_pipeline_secret`,
74/// `short_lived_sas_in_command_line`, `secret_to_inline_script_env_export`,
75/// `secret_materialised_to_workspace_file`, `keyvault_secret_to_plaintext`,
76/// `add_spn_with_inline_script`, `parameter_interpolation_into_shell`.
77/// Stored verbatim — rules apply their own pattern matching.
78pub const META_SCRIPT_BODY: &str = "script_body";
79/// Records the name of the ADO service connection a step uses (the value of
80/// `inputs.azureSubscription` / `inputs.connectedServiceName*`). Set on the
81/// Step node itself (in addition to the Identity node it links to) so rules
82/// can pattern-match on the connection name without traversing edges.
83pub const META_SERVICE_CONNECTION_NAME: &str = "service_connection_name";
84/// Marks a Step as performing `terraform apply ... -auto-approve` (either via
85/// an inline script or via a `TerraformCLI` / `TerraformTask` task with
86/// `command: apply` and `commandOptions` containing `auto-approve`).
87pub const META_TERRAFORM_AUTO_APPROVE: &str = "terraform_auto_approve";
88/// Marks a Step task that runs with `addSpnToEnvironment: true`, exposing
89/// the federated SPN (idToken / servicePrincipalKey / servicePrincipalId /
90/// tenantId) to the inline script body via environment variables.
91pub const META_ADD_SPN_TO_ENV: &str = "add_spn_to_environment";
92/// Graph-level metadata: identifies the source platform of the parsed
93/// pipeline. Set by every parser to its `platform()` value
94/// (`"github-actions"`, `"azure-devops"`, `"gitlab"`). Allows platform-scoped
95/// rules to gate their detection without parsing the source file path.
96pub const META_PLATFORM: &str = "platform";
97/// Graph-level metadata: marks a GitHub Actions workflow as having NO
98/// top-level `permissions:` block declared. Set by the GHA parser when
99/// `workflow.permissions` is absent so rules can detect the negative-space
100/// "no permissions block at all" pattern (which leaves `GITHUB_TOKEN` at its
101/// broad platform default — `contents: write`, `packages: write`, etc.).
102pub const META_NO_WORKFLOW_PERMISSIONS: &str = "no_workflow_permissions";
103/// Marks a Step in a GHA workflow as carrying an `if:` condition that
104/// references the standard fork-check pattern
105/// (`github.event.pull_request.head.repo.fork == false` or the equivalent
106/// `head.repo.full_name == github.repository`). Stamped by the GHA parser so
107/// rules can credit the step with the compensating control without
108/// re-parsing the YAML expression. Bool stored as `"true"`.
109pub const META_FORK_CHECK: &str = "fork_check";
110/// Marks a GitLab CI job (Step node) whose `rules:` or `only:` clause
111/// restricts execution to protected branches — either via an explicit
112/// `if: $CI_COMMIT_REF_PROTECTED == "true"` rule, an `if: $CI_COMMIT_BRANCH
113/// == $CI_DEFAULT_BRANCH` rule, or an `only: [main, ...]` allowlist of
114/// platform-protected refs. Set by the GitLab parser. Absence on a
115/// deployment job is a control gap.
116pub const META_RULES_PROTECTED_ONLY: &str = "rules_protected_only";
117
118// ── Shared helpers ─────────────────────────────────────
119
120/// Serialize a `HashMap<String, V>` with keys in sorted order. The
121/// in-memory representation stays a `HashMap` (cheaper insertion, hot
122/// path on every parser); only the serialized form is canonicalised.
123/// This is the single point of determinism control for graph metadata
124/// emitted via JSON / SARIF / CloudEvents — without it, HashMap iteration
125/// order leaks per-process randomness into every diff and cache key.
126fn serialize_string_map_sorted<S, V>(
127    map: &HashMap<String, V>,
128    serializer: S,
129) -> Result<S::Ok, S::Error>
130where
131    S: Serializer,
132    V: Serialize,
133{
134    let sorted: BTreeMap<&String, &V> = map.iter().collect();
135    sorted.serialize(serializer)
136}
137
138/// Returns true if `ref_str` is a SHA-pinned action reference.
139/// Checks: contains `@`, part after `@` is >= 40 hex chars.
140/// Single source of truth — used by both parser and rules.
141///
142/// This is a *structural* check — it accepts any 40+ hex character suffix
143/// without verifying the SHA refers to a real commit. For a semantic check
144/// that rejects obviously-bogus values like all-zero, see
145/// [`is_pin_semantically_valid`].
146pub fn is_sha_pinned(ref_str: &str) -> bool {
147    ref_str.contains('@')
148        && ref_str
149            .split('@')
150            .next_back()
151            .map(|s| s.len() >= 40 && s.chars().all(|c| c.is_ascii_hexdigit()))
152            .unwrap_or(false)
153}
154
155/// Returns true if `image` is pinned to a Docker digest.
156/// Docker digest format: `image@sha256:<64-hex-chars-lowercase>`.
157///
158/// Truncated digests (e.g. `alpine@sha256:abc`) and uppercase hex are
159/// rejected — Docker requires the full 64-character lowercase hex form.
160pub fn is_docker_digest_pinned(image: &str) -> bool {
161    image.contains("@sha256:")
162        && image
163            .split("@sha256:")
164            .nth(1)
165            .map(|h| {
166                h.len() == 64
167                    && h.chars()
168                        .all(|c| c.is_ascii_digit() || ('a'..='f').contains(&c))
169            })
170            .unwrap_or(false)
171}
172
173/// Returns true if `ref_str` looks both structurally pinned AND semantically
174/// plausible. Layered on top of [`is_sha_pinned`] / [`is_docker_digest_pinned`]:
175/// a structurally valid pin can still be obviously bogus (e.g. an all-zero SHA
176/// is syntactically a 40-char hex string but does not refer to any real
177/// commit; an attacker could use it to fake a "pinned" appearance).
178///
179/// Rules that want to flag impersonation attempts (rather than just laziness)
180/// should call this in addition to / instead of the structural check.
181///
182/// Rejects:
183/// - All-zero SHA-1 references (`actions/foo@0000…0000`).
184/// - All-zero sha256 docker digests (`image@sha256:0000…0000`).
185///
186/// Anything else that passes the structural check passes here.
187pub fn is_pin_semantically_valid(ref_str: &str) -> bool {
188    // Docker digest form takes priority (the `@sha256:` prefix is unambiguous).
189    if ref_str.contains("@sha256:") {
190        if !is_docker_digest_pinned(ref_str) {
191            return false;
192        }
193        let digest = ref_str.split("@sha256:").nth(1).unwrap_or("");
194        return !digest.chars().all(|c| c == '0');
195    }
196
197    if !is_sha_pinned(ref_str) {
198        return false;
199    }
200    let sha = ref_str.split('@').next_back().unwrap_or("");
201    !sha.chars().all(|c| c == '0')
202}
203
204// ── Graph-level precision markers ───────────────────────
205
206/// How complete is this authority graph? Parsers set this based on whether
207/// they could fully resolve all authority relationships in the pipeline YAML.
208///
209/// A `Partial` graph is still useful — it just tells the consumer that some
210/// authority paths may be missing. This is better than silent incompleteness.
211#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
212#[serde(rename_all = "snake_case")]
213pub enum AuthorityCompleteness {
214    /// Parser resolved all authority relationships.
215    Complete,
216    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
217    /// shell strings, composite actions, reusable workflows). The graph
218    /// captures what it can, but edges may be missing.
219    Partial,
220    /// Parser couldn't determine completeness.
221    Unknown,
222}
223
224/// How broad is an identity's scope? Classifies the risk surface of tokens,
225/// service principals, and OIDC identities.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
227#[serde(rename_all = "snake_case")]
228pub enum IdentityScope {
229    /// Wide permissions: write-all, admin, or unscoped tokens.
230    Broad,
231    /// Narrow permissions: contents:read, specific scopes.
232    Constrained,
233    /// Scope couldn't be determined — treat as risky.
234    Unknown,
235}
236
237impl IdentityScope {
238    /// Classify an identity scope from a permissions string.
239    pub fn from_permissions(perms: &str) -> Self {
240        let p = perms.to_lowercase();
241        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
242            IdentityScope::Broad
243        } else if p.contains("write") {
244            // Any write permission = broad (conservative)
245            IdentityScope::Broad
246        } else if p.contains("read") {
247            IdentityScope::Constrained
248        } else {
249            IdentityScope::Unknown
250        }
251    }
252}
253
254// ── Node types ──────────────────────────────────────────
255
256/// Semantic kind of a graph node.
257#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
258#[serde(rename_all = "snake_case")]
259pub enum NodeKind {
260    Step,
261    Secret,
262    Artifact,
263    Identity,
264    Image,
265}
266
267/// Trust classification. Explicit on every node — not inferred from kind.
268#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
269#[serde(rename_all = "snake_case")]
270pub enum TrustZone {
271    /// Code/config authored by the repo owner.
272    FirstParty,
273    /// Marketplace actions, external images (pinned).
274    ThirdParty,
275    /// Unpinned actions, fork PRs, user input.
276    Untrusted,
277}
278
279impl TrustZone {
280    /// Returns true if `self` is a lower trust level than `other`.
281    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
282        self.rank() < other.rank()
283    }
284
285    fn rank(&self) -> u8 {
286        match self {
287            TrustZone::FirstParty => 2,
288            TrustZone::ThirdParty => 1,
289            TrustZone::Untrusted => 0,
290        }
291    }
292}
293
294/// A node in the authority graph.
295#[derive(Debug, Clone, Serialize, Deserialize)]
296pub struct Node {
297    pub id: NodeId,
298    pub kind: NodeKind,
299    pub name: String,
300    pub trust_zone: TrustZone,
301    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
302    /// Serialized in sorted-key order so JSON / SARIF / CloudEvents output
303    /// is byte-deterministic across runs (HashMap iteration is randomised
304    /// per process, which would otherwise break diffs and cache keys).
305    #[serde(serialize_with = "serialize_string_map_sorted")]
306    pub metadata: HashMap<String, String>,
307}
308
309// ── Edge types ──────────────────────────────────────────
310
311/// Edge semantics model authority/data flow — not syntactic YAML relations.
312/// Design test: "Can authority propagate along this edge?"
313#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
314#[serde(rename_all = "snake_case")]
315pub enum EdgeKind {
316    /// Step -> Secret or Identity (authority granted at runtime).
317    HasAccessTo,
318    /// Step -> Artifact (data flows out).
319    Produces,
320    /// Artifact -> Step (authority flows from artifact to consuming step).
321    Consumes,
322    /// Step -> Image/Action (execution delegation).
323    UsesImage,
324    /// Step -> Step (cross-job or action boundary).
325    DelegatesTo,
326    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
327    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
328    /// and processes with filesystem access, not just the step that created it.
329    PersistsTo,
330}
331
332/// A directed edge in the authority graph.
333#[derive(Debug, Clone, Serialize, Deserialize)]
334pub struct Edge {
335    pub id: EdgeId,
336    pub from: NodeId,
337    pub to: NodeId,
338    pub kind: EdgeKind,
339}
340
341// ── Pipeline source ─────────────────────────────────────
342
343/// Where the pipeline definition came from.
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct PipelineSource {
346    pub file: String,
347    #[serde(skip_serializing_if = "Option::is_none")]
348    pub repo: Option<String>,
349    #[serde(skip_serializing_if = "Option::is_none")]
350    pub git_ref: Option<String>,
351    /// SHA of the commit being analyzed; reproducibility hint when set.
352    /// Parsers leave None; CI integrations populate this from the build env.
353    #[serde(default, skip_serializing_if = "Option::is_none")]
354    pub commit_sha: Option<String>,
355}
356
357// ── The graph ───────────────────────────────────────────
358
359/// Pipeline-level parameter declaration captured from a top-level
360/// `parameters:` block. Used by rules that need to reason about whether
361/// caller-supplied parameter values are constrained (`values:` allowlist)
362/// or free-form (no allowlist on a string parameter — shell-injection risk).
363#[derive(Debug, Clone, Serialize, Deserialize)]
364pub struct ParamSpec {
365    /// Declared parameter type (`string`, `number`, `boolean`, `object`, etc.).
366    /// Empty string when the YAML omitted `type:` (ADO defaults to string).
367    pub param_type: String,
368    /// True when the parameter declares a `values:` allowlist that constrains
369    /// the set of acceptable inputs. When true, free-form shell injection is
370    /// not possible because the runtime rejects any value outside the list.
371    pub has_values_allowlist: bool,
372}
373
374/// Directed authority graph. Nodes are pipeline elements (steps, secrets,
375/// artifacts, identities, images). Edges model authority/data flow.
376#[derive(Debug, Clone, Serialize, Deserialize)]
377pub struct AuthorityGraph {
378    pub source: PipelineSource,
379    pub nodes: Vec<Node>,
380    pub edges: Vec<Edge>,
381    /// How complete is this graph? Set by the parser based on what it could resolve.
382    pub completeness: AuthorityCompleteness,
383    /// Human-readable reasons why the graph is Partial (if applicable).
384    #[serde(default, skip_serializing_if = "Vec::is_empty")]
385    pub completeness_gaps: Vec<String>,
386    /// Graph-level metadata set by parsers (e.g. trigger type, platform-specific flags).
387    /// Serialized in sorted-key order — see `Node.metadata` rationale.
388    #[serde(
389        default,
390        skip_serializing_if = "HashMap::is_empty",
391        serialize_with = "serialize_string_map_sorted"
392    )]
393    pub metadata: HashMap<String, String>,
394    /// Top-level pipeline `parameters:` declarations, keyed by parameter name.
395    /// Populated by parsers that surface parameter metadata (currently ADO).
396    /// Empty for platforms / pipelines that don't declare parameters.
397    /// Serialized in sorted-key order — see `Node.metadata` rationale.
398    #[serde(
399        default,
400        skip_serializing_if = "HashMap::is_empty",
401        serialize_with = "serialize_string_map_sorted"
402    )]
403    pub parameters: HashMap<String, ParamSpec>,
404}
405
406impl AuthorityGraph {
407    pub fn new(source: PipelineSource) -> Self {
408        Self {
409            source,
410            nodes: Vec::new(),
411            edges: Vec::new(),
412            completeness: AuthorityCompleteness::Complete,
413            completeness_gaps: Vec::new(),
414            metadata: HashMap::new(),
415            parameters: HashMap::new(),
416        }
417    }
418
419    /// Mark the graph as partially complete with a reason.
420    pub fn mark_partial(&mut self, reason: impl Into<String>) {
421        self.completeness = AuthorityCompleteness::Partial;
422        self.completeness_gaps.push(reason.into());
423    }
424
425    /// Add a node, returns its ID.
426    pub fn add_node(
427        &mut self,
428        kind: NodeKind,
429        name: impl Into<String>,
430        trust_zone: TrustZone,
431    ) -> NodeId {
432        let id = self.nodes.len();
433        self.nodes.push(Node {
434            id,
435            kind,
436            name: name.into(),
437            trust_zone,
438            metadata: HashMap::new(),
439        });
440        id
441    }
442
443    /// Add a node with metadata, returns its ID.
444    pub fn add_node_with_metadata(
445        &mut self,
446        kind: NodeKind,
447        name: impl Into<String>,
448        trust_zone: TrustZone,
449        metadata: HashMap<String, String>,
450    ) -> NodeId {
451        let id = self.nodes.len();
452        self.nodes.push(Node {
453            id,
454            kind,
455            name: name.into(),
456            trust_zone,
457            metadata,
458        });
459        id
460    }
461
462    /// Add a directed edge, returns its ID.
463    pub fn add_edge(&mut self, from: NodeId, to: NodeId, kind: EdgeKind) -> EdgeId {
464        let id = self.edges.len();
465        self.edges.push(Edge { id, from, to, kind });
466        id
467    }
468
469    /// Outgoing edges from a node.
470    pub fn edges_from(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
471        self.edges.iter().filter(move |e| e.from == id)
472    }
473
474    /// Incoming edges to a node.
475    pub fn edges_to(&self, id: NodeId) -> impl Iterator<Item = &Edge> {
476        self.edges.iter().filter(move |e| e.to == id)
477    }
478
479    /// All authority-bearing source nodes (Secret + Identity).
480    /// These are the BFS start set for propagation analysis.
481    pub fn authority_sources(&self) -> impl Iterator<Item = &Node> {
482        self.nodes
483            .iter()
484            .filter(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
485    }
486
487    /// All nodes of a given kind.
488    pub fn nodes_of_kind(&self, kind: NodeKind) -> impl Iterator<Item = &Node> {
489        self.nodes.iter().filter(move |n| n.kind == kind)
490    }
491
492    /// All nodes in a given trust zone.
493    pub fn nodes_in_zone(&self, zone: TrustZone) -> impl Iterator<Item = &Node> {
494        self.nodes.iter().filter(move |n| n.trust_zone == zone)
495    }
496
497    /// Get a node by ID.
498    pub fn node(&self, id: NodeId) -> Option<&Node> {
499        self.nodes.get(id)
500    }
501
502    /// Get an edge by ID.
503    pub fn edge(&self, id: EdgeId) -> Option<&Edge> {
504        self.edges.get(id)
505    }
506}
507
508#[cfg(test)]
509mod tests {
510    use super::*;
511
512    #[test]
513    fn build_simple_graph() {
514        let mut g = AuthorityGraph::new(PipelineSource {
515            file: "deploy.yml".into(),
516            repo: None,
517            git_ref: None,
518            commit_sha: None,
519        });
520
521        let secret = g.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
522        let step_build = g.add_node(NodeKind::Step, "build", TrustZone::FirstParty);
523        let artifact = g.add_node(NodeKind::Artifact, "dist.tar.gz", TrustZone::FirstParty);
524        let step_deploy = g.add_node(NodeKind::Step, "deploy", TrustZone::ThirdParty);
525
526        g.add_edge(step_build, secret, EdgeKind::HasAccessTo);
527        g.add_edge(step_build, artifact, EdgeKind::Produces);
528        g.add_edge(artifact, step_deploy, EdgeKind::Consumes);
529
530        assert_eq!(g.nodes.len(), 4);
531        assert_eq!(g.edges.len(), 3);
532        assert_eq!(g.authority_sources().count(), 1);
533        assert_eq!(g.edges_from(step_build).count(), 2);
534        assert_eq!(g.edges_from(artifact).count(), 1); // Consumes flows artifact -> step
535    }
536
537    #[test]
538    fn completeness_default_is_complete() {
539        let g = AuthorityGraph::new(PipelineSource {
540            file: "test.yml".into(),
541            repo: None,
542            git_ref: None,
543            commit_sha: None,
544        });
545        assert_eq!(g.completeness, AuthorityCompleteness::Complete);
546        assert!(g.completeness_gaps.is_empty());
547    }
548
549    #[test]
550    fn mark_partial_records_reason() {
551        let mut g = AuthorityGraph::new(PipelineSource {
552            file: "test.yml".into(),
553            repo: None,
554            git_ref: None,
555            commit_sha: None,
556        });
557        g.mark_partial("secrets in run: block inferred, not precisely mapped");
558        assert_eq!(g.completeness, AuthorityCompleteness::Partial);
559        assert_eq!(g.completeness_gaps.len(), 1);
560    }
561
562    #[test]
563    fn identity_scope_from_permissions() {
564        assert_eq!(
565            IdentityScope::from_permissions("write-all"),
566            IdentityScope::Broad
567        );
568        assert_eq!(
569            IdentityScope::from_permissions("{ contents: write }"),
570            IdentityScope::Broad
571        );
572        assert_eq!(
573            IdentityScope::from_permissions("{ contents: read }"),
574            IdentityScope::Constrained
575        );
576        assert_eq!(
577            IdentityScope::from_permissions("{ id-token: write }"),
578            IdentityScope::Broad
579        );
580        assert_eq!(IdentityScope::from_permissions(""), IdentityScope::Broad);
581        assert_eq!(
582            IdentityScope::from_permissions("custom-scope"),
583            IdentityScope::Unknown
584        );
585    }
586
587    #[test]
588    fn trust_zone_ordering() {
589        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::FirstParty));
590        assert!(TrustZone::ThirdParty.is_lower_than(&TrustZone::FirstParty));
591        assert!(TrustZone::Untrusted.is_lower_than(&TrustZone::ThirdParty));
592        assert!(!TrustZone::FirstParty.is_lower_than(&TrustZone::FirstParty));
593    }
594
595    // ── Pin validation (fuzz B3 regression) ─────────────────
596
597    #[test]
598    fn is_sha_pinned_accepts_lowercase_40_hex() {
599        // 40 lowercase hex — the canonical legitimate form.
600        assert!(is_sha_pinned(
601            "actions/checkout@abc1234567890abcdef1234567890abcdef123456"
602        ));
603        // Mixed case is still structurally pinned (legitimate — Git accepts both).
604        assert!(is_sha_pinned(
605            "actions/checkout@ABCDEF1234567890abcdef1234567890ABCDEF12"
606        ));
607    }
608
609    #[test]
610    fn is_sha_pinned_structural_accepts_all_zero() {
611        // Structural check is intentionally permissive — semantic rejection
612        // happens in is_pin_semantically_valid. Documented in B3.
613        assert!(is_sha_pinned(
614            "actions/setup-python@0000000000000000000000000000000000000000"
615        ));
616    }
617
618    #[test]
619    fn is_sha_pinned_rejects_short_or_non_hex() {
620        assert!(!is_sha_pinned("actions/checkout@v4"));
621        assert!(!is_sha_pinned("actions/setup-node@a1b2c3"));
622        // 60 chars but not all hex.
623        assert!(!is_sha_pinned(
624            "actions/checkout@somethingthatlookslikeashabutisntsha1234567890abcdef"
625        ));
626    }
627
628    #[test]
629    fn is_pin_semantically_valid_rejects_all_zero_sha() {
630        // Fuzz B3 reproducer.
631        assert!(!is_pin_semantically_valid(
632            "actions/setup-python@0000000000000000000000000000000000000000"
633        ));
634    }
635
636    #[test]
637    fn is_pin_semantically_valid_accepts_real_looking_sha() {
638        assert!(is_pin_semantically_valid(
639            "actions/checkout@abc1234567890abcdef1234567890abcdef123456"
640        ));
641    }
642
643    #[test]
644    fn is_pin_semantically_valid_rejects_unpinned() {
645        assert!(!is_pin_semantically_valid("actions/checkout@v4"));
646        assert!(!is_pin_semantically_valid("actions/setup-node@a1b2c3"));
647    }
648
649    #[test]
650    fn is_docker_digest_pinned_rejects_truncated() {
651        // Fuzz B3 reproducer: previously accepted, now rejected.
652        assert!(!is_docker_digest_pinned("alpine@sha256:abc"));
653        // 65 chars (one too long).
654        assert!(!is_docker_digest_pinned(
655            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcde"
656        ));
657        // 63 chars (one short).
658        assert!(!is_docker_digest_pinned(
659            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abc"
660        ));
661    }
662
663    #[test]
664    fn is_docker_digest_pinned_accepts_full_64_lowercase() {
665        // Exactly 64 lowercase hex chars after `@sha256:`.
666        assert!(is_docker_digest_pinned(
667            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcd"
668        ));
669    }
670
671    #[test]
672    fn is_docker_digest_pinned_rejects_uppercase() {
673        // Docker requires lowercase — uppercase indicates a hand-crafted /
674        // tampered string and should not pass.
675        assert!(!is_docker_digest_pinned(
676            "alpine@sha256:ABC123DEF456ABC123DEF456ABC123DEF456ABC123DEF456ABC123DEF456ABCD"
677        ));
678    }
679
680    #[test]
681    fn is_pin_semantically_valid_rejects_all_zero_docker_digest() {
682        assert!(!is_pin_semantically_valid(
683            "alpine@sha256:0000000000000000000000000000000000000000000000000000000000000000"
684        ));
685    }
686
687    #[test]
688    fn is_pin_semantically_valid_accepts_real_docker_digest() {
689        assert!(is_pin_semantically_valid(
690            "alpine@sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abcd"
691        ));
692    }
693}
taudit_core/graph.rs

taudit_core/
graph.rs