rust-sanitize 0.11.0

Deterministic one-way data sanitization engine
Documentation
# GitLab — gitlab.rb Omnibus config, .gitlab-ci.yml CI/CD variables, Helm chart values

# --- Omnibus gitlab.rb (Ruby key = 'value' DSL) ---
# Sensitive key substrings mirror gitlab-scrubber/internal/scrubber/unstructured/gitlab_rb.go
# sensitiveKeyPatterns: authorization, aws_access_key, accesskey, secretkey,
# accountkey, key_base, secret, _key, bind_dn, _fingerprint, token, _dsn
# Plus "password" (handled more aggressively in the scrubber).
- processor: key-value
  extensions: [".rb"]
  include: ["gitlab.rb", "*/gitlab.rb"]
  options:
    delimiter: "="
    secondary_delimiter: "=>,:"
    comment_prefix: "#"
  fields:
    - pattern: "*password*"
      category: custom:password
      label: gitlab_rb_password
    - pattern: "*secret*"
      category: auth_token
      label: gitlab_rb_secret
    - pattern: "*token*"
      category: auth_token
      label: gitlab_rb_token
    - pattern: "*_key*"
      category: auth_token
      label: gitlab_rb_key
    - pattern: "*key_base*"
      category: auth_token
      label: gitlab_rb_key_base
    - pattern: "*authorization*"
      category: auth_token
      label: gitlab_rb_authorization
    - pattern: "*aws_access_key*"
      category: auth_token
      label: gitlab_rb_aws_access_key
    - pattern: "*accesskey*"
      category: auth_token
      label: gitlab_rb_accesskey
    - pattern: "*secretkey*"
      category: auth_token
      label: gitlab_rb_secretkey
    - pattern: "*accountkey*"
      category: auth_token
      label: gitlab_rb_accountkey
    - pattern: "*bind_dn*"
      category: auth_token
      label: gitlab_rb_ldap_bind_dn
    - pattern: "*_fingerprint*"
      category: auth_token
      label: gitlab_rb_fingerprint
    - pattern: "*_dsn*"
      category: url
      label: gitlab_rb_sentry_dsn

# --- .gitlab-ci.yml and GitLab CI config includes ---
- processor: yaml
  extensions: [".yml", ".yaml"]
  include:
    - ".gitlab-ci.yml"
    - "gitlab-ci.yml"
    - "*/.gitlab-ci.yml"
  fields:
    - pattern: "variables.*"
      category: auth_token
      label: ci_variable

# --- GitLab Helm chart values (gitlab-specific deep paths) ---
# Note: broad *.password / *.token patterns intentionally catch inlined credentials
# in user override files. K8s secret reference fields (secret: name, key: name)
# are also matched — redacting object names is acceptable in this context.
- processor: yaml
  extensions: [".yml", ".yaml"]
  include:
    - "values.yaml"
    - "values-*.yaml"
    - "gitlab-values.yaml"
    - "gitlab-values-*.yaml"
  fields:
    # Generic credential fields (catches inlined passwords/tokens anywhere)
    - pattern: "*.password"
      category: custom:password
      label: gitlab_helm_password

    - pattern: "*.token"
      category: auth_token
      label: gitlab_helm_token

    - pattern: "*.authToken"
      category: auth_token
      label: gitlab_helm_auth_token

    # GitLab-specific global paths
    - pattern: "global.smtp.password"
      category: custom:password
      label: gitlab_helm_smtp_password
    - pattern: "global.psql.password"
      category: custom:password
      label: gitlab_helm_psql_password
    - pattern: "global.redis.auth.secret"
      category: auth_token
      label: gitlab_helm_redis_auth_secret
    - pattern: "global.gitaly.authToken"
      category: auth_token
      label: gitlab_helm_gitaly_auth_token
    - pattern: "global.initialRootPassword"
      category: custom:password
      label: gitlab_helm_initial_root_password
    # MinIO (used as object storage in GitLab Helm deployments)
    - pattern: "minio.accessKey"
      category: auth_token
      label: gitlab_helm_minio_access_key
    - pattern: "minio.secretKey"
      category: auth_token
      label: gitlab_helm_minio_secret_key
    # KAS and Pages secrets
    - pattern: "gitlab.kas.*.secret"
      category: auth_token
      label: gitlab_helm_kas_secret
    - pattern: "gitlab.gitlab-pages.*.secret"
      category: auth_token
      label: gitlab_helm_pages_secret
    # Registry storage credentials (S3/GCS/Azure backends)
    - pattern: "registry.storage.*.accesskey"
      category: auth_token
      label: gitlab_helm_registry_access_key
    - pattern: "registry.storage.*.secretkey"
      category: auth_token
      label: gitlab_helm_registry_secret_key

# --- GitLab Rails production JSON log (production_json.log) ---
# Sensitive fields per gitlab-scrubber structured/field_extractors.go:
# keys containing "user"/"author", "ip", "email", project path fields.
- processor: json
  extensions: [".log"]
  include:
    - "production_json.log"
    - "*/production_json.log"
    - "log/production_json.log"
  fields:
    - pattern: "username"
      category: name
      label: rails_log_username
    - pattern: "remote_ip"
      category: ipv4
      label: rails_log_remote_ip
    - pattern: "meta.user"
      category: name
      label: rails_log_meta_user
    - pattern: "meta.remote_ip"
      category: ipv4
      label: rails_log_meta_remote_ip
    - pattern: "meta.caller_id"
      category: name
      label: rails_log_meta_caller_id
    - pattern: "meta.project"
      category: name
      label: rails_log_meta_project

# --- Sidekiq JSON log (sidekiq.log) ---
# "args" is scrubbed entirely — job arguments can contain arbitrary secrets.
- processor: json
  extensions: [".log"]
  include:
    - "sidekiq.log"
    - "*/sidekiq.log"
    - "log/sidekiq.log"
  fields:
    - pattern: "args"
      category: auth_token
      label: sidekiq_log_args
    - pattern: "meta.user"
      category: name
      label: sidekiq_log_meta_user
    - pattern: "meta.project"
      category: name
      label: sidekiq_log_meta_project
    - pattern: "meta.remote_ip"
      category: ipv4
      label: sidekiq_log_meta_remote_ip

# --- GitLab Workhorse log (gitlab-workhorse.log / workhorse.log) ---
- processor: json
  extensions: [".log"]
  include:
    - "gitlab-workhorse.log"
    - "workhorse.log"
    - "*/gitlab-workhorse.log"
    - "log/workhorse.log"
  fields:
    - pattern: "remote_addr"
      category: ipv4
      label: workhorse_log_remote_addr
    - pattern: "host"
      category: hostname
      label: workhorse_log_host
    - pattern: "uri"
      category: url
      label: workhorse_log_uri

# --- Gitaly log (gitaly.log) ---
# grpc.request.glProjectPath contains the group/project path.
# grpc.request.repoPath is a content-addressed hash path — not user data, skip.
- processor: json
  extensions: [".log"]
  include:
    - "gitaly.log"
    - "*/gitaly.log"
    - "log/gitaly.log"
  fields:
    - pattern: "grpc.request.glProjectPath"
      category: name
      label: gitaly_log_project_path
    - pattern: "peer.address"
      category: ipv4
      label: gitaly_log_peer_address

# --- GitLab Shell log (gitlab-shell.log / gitlab_shell.log) ---
# originalCommand leaks group/project.git from git-upload-pack / git-receive-pack calls.
- processor: json
  extensions: [".log"]
  include:
    - "gitlab-shell.log"
    - "gitlab_shell.log"
    - "*/gitlab-shell.log"
    - "log/gitlab-shell.log"
  fields:
    - pattern: "username"
      category: name
      label: shell_log_username
    - pattern: "remote_addr"
      category: ipv4
      label: shell_log_remote_addr
    - pattern: "original_command"
      category: name
      label: shell_log_original_command