rust-sanitize 0.11.0

Deterministic one-way data sanitization engine
Documentation
# nginx — access logs, error logs, upstream auth headers
# Sources: gitlab-scrubber nginx.go (access log) / nginx_error.go (error log)

# Access log: client IP — first token of nginx combined log format
# Line format: IP - user [timestamp] "request" status bytes "referer" "agent" extra
- kind: regex
  pattern: '^(\d{1,3}(?:\.\d{1,3}){3}|[0-9a-fA-F]{1,4}(?::[0-9a-fA-F]{0,4}){2,7})\s+-\s+\S+\s+\['
  category: ipv4
  label: nginx_access_client_ip

# Access log: HTTP referer URL (second-to-last quoted field in combined format)
- kind: regex
  pattern: '"\S+\s+\S+\s+HTTP/[\d.]+"\s+\d+\s+\d+\s+"(https?://[^"]+)"'
  category: url
  label: nginx_access_referer

# Access/error logs: query params with user or email keys
- kind: regex
  pattern: '(?i)[?&](?:user(?:name)?|email)=([^&\s"'']+)'
  category: name
  label: nginx_query_user_email

# Error log: client IP — structured field after "client: "
# Format: ... message, client: IP, server: hostname, request: "...", host: "..."
- kind: regex
  pattern: ',\s*client:\s*(\d{1,3}(?:\.\d{1,3}){3}|[0-9a-fA-F:]{2,39}),'
  category: ipv4
  label: nginx_error_client_ip

# Error log: server hostname — structured field after "server: "
- kind: regex
  pattern: ',\s*server:\s*([a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z]{2,})+),'
  category: hostname
  label: nginx_error_server_host

# Error log: Host request header — structured field after "host: "
- kind: regex
  pattern: ',\s*host:\s*"([a-zA-Z0-9](?:[a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z]{2,})+)"'
  category: hostname
  label: nginx_error_host_header

# Error log: failed basic-auth username from message field
- kind: regex
  pattern: '(?i)\buser\s+"([^"]{1,64})"\s+was\s+not\s+found'
  category: name
  label: nginx_auth_username

# Upstream backend URL logged on proxy errors
- kind: regex
  pattern: '(?i)\bupstream:\s*"([a-z][a-z0-9+.\-]+://[^\s"''<>]+)"'
  category: url
  label: nginx_upstream_url

# X-Forwarded-For / X-Real-IP values in debug or access logs
- kind: regex
  pattern: '(?i)(?:x-real-ip|x-forwarded-for):\s*((?:[\d.]+|[0-9a-fA-F:]+)(?:[,\s]+(?:[\d.]+|[0-9a-fA-F:]+))*)'
  category: ipv4
  label: nginx_forwarded_ip

- kind: allow
  values:
    - "nginx"
    - "nginx.conf"
    - "nginx.org"
    - "nginx.com"
    - "unix:/var/run/php/php-fpm.sock"
    - "unix:/var/run/fastcgi.sock"
    - "unix:/var/run/nginx.sock"
    - "true"
    - "false"
    - "yes"
    - "no"
    - "null"
    - "none"
    - "nil"
    - "0"
    - "1"
    - "localhost"
    - "127.0.0.1"
    - "0.0.0.0"
    - "::1"
    - "changeme"
    - "example"
    - "sample"
    - "placeholder"
    - "${*}"
    - "{{*}}"
    - "example.com"
    - "example.org"