rust-sanitize 0.11.0

Deterministic one-way data sanitization engine
Documentation
# GitLab — token detection patterns sourced from TruffleHog gitlab/v1, v2, v3
# and gitlab-scrubber sensitiveKeyPatterns

# Personal access tokens — v2 (glpat- + 20-22 chars)
- kind: regex
  pattern: '\b(glpat-[a-zA-Z0-9\-=_]{20,22})\b'
  category: auth_token
  label: gitlab_personal_token_v2

# Personal access tokens — v3 (extended format with checksum segments)
# Ref: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/169322
- kind: regex
  pattern: '\b(glpat-[a-zA-Z0-9\-=_]{27,300}\.[0-9a-z]{2}\.[a-z0-9]{9})\b'
  category: auth_token
  label: gitlab_personal_token_v3

# CI/CD job token (short-lived, scoped to a pipeline)
- kind: regex
  pattern: '\bglcbt-\d{2}-[A-Za-z0-9_-]{20}\b'
  category: auth_token
  label: gitlab_ci_job_token

# Deploy token
- kind: regex
  pattern: '\bgldt-[A-Za-z0-9_-]{20}\b'
  category: auth_token
  label: gitlab_deploy_token

# Feed token (RSS/Atom)
- kind: regex
  pattern: '\bglft-[A-Za-z0-9_-]{20}\b'
  category: auth_token
  label: gitlab_feed_token

# Runner authentication token
- kind: regex
  pattern: '\bglrt-[A-Za-z0-9_-]{20}\b'
  category: auth_token
  label: gitlab_runner_token

# SCIM provisioning token
- kind: regex
  pattern: '\bglsoat-[A-Za-z0-9_-]{20}\b'
  category: auth_token
  label: gitlab_scim_token

# GitLab Agent (KAS) token
- kind: regex
  pattern: '\bglagent-[A-Za-z0-9_-]{50,}\b'
  category: auth_token
  label: gitlab_agent_token

# Legacy context-keyed token (v1 TruffleHog style — 20-22 char alphanumeric
# preceded by "gitlab" context keyword; entropy >= 3.6 in TH)
- kind: regex
  pattern: '(?i)(?:gitlab[_-]?(?:secret|token|key|api|pat)|gl_token)[\s:="'']+([a-zA-Z0-9][a-zA-Z0-9\-=_]{19,21})\b'
  category: auth_token
  label: gitlab_context_token

# Sentry DSN with gitlab context (DSN contains an auth key before the @ sign)
- kind: regex
  pattern: '(?i)(?:sentry_dsn|sentry_clientside_dsn)[\s:="'']+https?://([A-Za-z0-9]{32})@'
  category: auth_token
  label: gitlab_sentry_key

- kind: allow
  values:
    - "gitlab"
    - "gitlab.com"
    - "gitlab.example.com"
    - "gitlab-ci"
    - "gitlab-ci.yml"
    - ".gitlab-ci.yml"
    - "gitlab-runner"
    - "gitlab-org"
    - "gitlab-ce"
    - "gitlab-ee"
    - "true"
    - "false"
    - "yes"
    - "no"
    - "null"
    - "none"
    - "nil"
    - "0"
    - "1"
    - "localhost"
    - "127.0.0.1"
    - "0.0.0.0"
    - "::1"
    # Common placeholder/example values used in commented-out config lines.
    # Without these, the augmented scanner would propagate them as literals and
    # replace the words wherever they appear — including inside key names.
    - "token"
    - "secret"
    - "password"
    - "YOUR-*"
    - "GENERATED_*"
    - "YOUR_*"
    - "CHANGE_*"
    - "REPLACE_*"
    - "ENTER_*"
    - "<*>"
    - "changeme"
    - "example"
    - "sample"
    - "placeholder"
    - "${*}"
    - "{{*}}"
    - "example.com"
    - "example.org"