rust-sanitize 0.11.0

Deterministic one-way data sanitization engine
Documentation
# HAR (HTTP Archive) — auth headers, tokens, and API keys in HTTP traffic
#
# These patterns cover the streaming pass — they catch shaped secrets that
# appear in request/response bodies and raw header values where the structured
# profile cannot selectively filter by header name.
#
# The `kind: field-name` signal at the bottom handles high-entropy `value`
# fields (Authorization, Cookie, X-Api-Key, etc.) across the entire document.

# HTTP Basic auth credential — "Basic <base64>"
- kind: regex
  pattern: '(?i)\bBasic\s+([A-Za-z0-9+/]{8,}={0,2})\b'
  category: auth_token
  label: har_basic_auth

# HTTP Bearer token — "Bearer <token>"
- kind: regex
  pattern: '(?i)\bBearer\s+([A-Za-z0-9\-._~+/]{16,})\b'
  category: auth_token
  label: har_bearer_token

# JWT — three base64url segments separated by dots (header.payload.signature)
- kind: regex
  pattern: '\b(eyJ[A-Za-z0-9_-]{10,}\.eyJ[A-Za-z0-9_-]{20,}\.[A-Za-z0-9_-]{20,})\b'
  category: jwt
  label: har_jwt

# AWS access key ID
- kind: regex
  pattern: '\b((?:AKIA|ASIA|AROA|AIDA|ANPA|ANVA|AIPA)[A-Z0-9]{16})\b'
  category: auth_token
  label: har_aws_access_key

# AWS Signature v4 value (64-char lowercase hex)
- kind: regex
  pattern: '(?i)x-amz-signature[=:\s"'']+([0-9a-f]{64})\b'
  category: auth_token
  label: har_aws_signature

# OAuth2 / OIDC tokens in JSON response bodies
- kind: regex
  pattern: '"(?:access_token|refresh_token|id_token|token)"\s*:\s*"([A-Za-z0-9\-._~+/]{16,})"'
  category: auth_token
  label: har_oauth_token

# API key in query string or form body (api_key=, apikey=, access_token=, etc.)
- kind: regex
  pattern: '(?i)(?:[?&])(?:api[-_]?key|apikey|access[-_]?token|auth[-_]?token|client[-_]?secret)=([A-Za-z0-9\-._~+/%]{16,})'
  category: auth_token
  label: har_api_key_param

# Session/auth cookie value patterns inside Cookie or Set-Cookie header strings
- kind: regex
  pattern: '(?i)(?:^|;\s*)(?:session(?:id)?|auth|token|sid|__Secure-|__Host-)[-\w]*=([A-Za-z0-9\-._~+/%]{16,})'
  category: auth_token
  label: har_session_cookie

# X-* auth headers in raw strings (postData, content.text, or log lines)
- kind: regex
  pattern: '(?i)(?:x-api-key|x-auth-token|x-access-token|x-secret-token|x-csrf-token)[:\s"''=]+([A-Za-z0-9\-._~+/]{16,})'
  category: auth_token
  label: har_x_auth_header

# field-name signal: redact high-entropy `value` fields throughout the document.
# Catches Authorization, Cookie, X-Api-Key, and any other header/query/cookie
# values that the profile's explicit rules can't filter by name.
# threshold 4.0 skips low-entropy values (gzip, text/html, keep-alive, etc.)
- kind: field-name
  pattern: "^value$"
  category: auth_token
  label: har_value_signal
  threshold: 4.0

- kind: allow
  values:
    # HTTP methods and versions
    - "GET"
    - "POST"
    - "PUT"
    - "DELETE"
    - "PATCH"
    - "HEAD"
    - "OPTIONS"
    - "HTTP/1.0"
    - "HTTP/1.1"
    - "HTTP/2.0"
    - "HTTP/3.0"
    # Common MIME types
    - "application/json"
    - "application/x-www-form-urlencoded"
    - "multipart/form-data"
    - "text/html"
    - "text/plain"
    - "text/css"
    - "text/javascript"
    - "application/javascript"
    - "image/png"
    - "image/jpeg"
    - "image/gif"
    - "image/webp"
    - "image/svg+xml"
    # Encoding / transfer values
    - "gzip"
    - "deflate"
    - "br"
    - "zstd"
    - "identity"
    - "chunked"
    # Cache / connection header values
    - "no-cache"
    - "no-store"
    - "keep-alive"
    - "close"
    - "upgrade-insecure-requests"
    # Auth scheme names (not the credential itself)
    - "Bearer"
    - "Basic"
    - "Digest"
    - "Negotiate"
    - "NTLM"
    # Standard
    - "true"
    - "false"
    - "null"
    - "none"
    - "0"
    - "1"
    - "localhost"
    - "127.0.0.1"
    - "0.0.0.0"
    - "::1"
    - "changeme"
    - "example"
    - "sample"
    - "placeholder"
    - "${*}"
    - "{{*}}"
    - "example.com"
    - "example.org"