xbp 10.15.4

XBP is a zero-config build pack that can also interact with proxies, kafka, sockets, synthetic monitors.
Documentation
import os
import re
from collections import defaultdict
from pathlib import Path

def find_env_variables(project_root):
    """Search for process.env.XXXXX patterns in the project and extract default values"""
    env_vars = {}
    pattern = re.compile(r"process\.env\.([A-Z_][A-Z0-9_]*)", re.IGNORECASE)
    default_pattern = re.compile(
        r'process\.env\.([A-Z_][A-Z0-9_]*)\s*\|\|\s*(["\'])((?:(?!\2).)*)\2',
        re.IGNORECASE,
    )
    default_pattern_number = re.compile(
        r"process\.env\.([A-Z_][A-Z0-9_]*)\s*\|\|\s*(\d+(?:\.\d+)?)", re.IGNORECASE
    )
    default_pattern_bool = re.compile(
        r"process\.env\.([A-Z_][A-Z0-9_]*)\s*\|\|\s*(true|false)", re.IGNORECASE
    )
    ignore_dirs = {
        "node_modules",
        ".next",
        ".git",
        "dist",
        "build",
        "__pycache__",
        ".turbo",
        "out",
        ".cache",
    }
    file_extensions = {".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"}
    for root, dirs, files in os.walk(project_root):
        dirs[:] = [d for d in dirs if d not in ignore_dirs]
        for file in files:
            if Path(file).suffix in file_extensions:
                file_path = os.path.join(root, file)
                try:
                    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                        content = f.read()
                        for match in default_pattern.finditer(content):
                            var_name = match.group(1)
                            default_value = match.group(3)
                            if var_name not in env_vars:
                                env_vars[var_name] = f'"{default_value}"'
                        for match in default_pattern_number.finditer(content):
                            var_name = match.group(1)
                            default_value = match.group(2)
                            if var_name not in env_vars:
                                env_vars[var_name] = default_value
                        for match in default_pattern_bool.finditer(content):
                            var_name = match.group(1)
                            default_value = match.group(2)
                            if var_name not in env_vars:
                                env_vars[var_name] = default_value
                        for match in pattern.finditer(content):
                            var_name = match.group(1)
                            if var_name not in env_vars:
                                env_vars[var_name] = None
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")
    return env_vars

def categorize_env_vars(env_vars):
    """Group environment variables by prefix"""
    categories = defaultdict(list)
    category_prefixes = {
        "NEXT_PUBLIC_": "Next.js Configuration",
        "NEXT_": "Next.js Configuration",
        "NODE_": "Node.js Configuration",
        "DATABASE_": "Database Configuration",
        "SUPABASE_": "Supabase Configuration",
        "AUTH_": "Authentication",
        "BETTER_AUTH_": "Better Auth Configuration",
        "SMTP_": "Email Configuration",
        "EMAIL_": "Email Configuration",
        "RESEND_": "Email Configuration",
        "AWS_": "AWS Configuration",
        "S3_": "S3 Storage Configuration",
        "MINIO_": "MinIO Storage Configuration",
        "DIGITALOCEAN_": "DigitalOcean Configuration",
        "STRIPE_": "Stripe Configuration",
        "MOLLIE_": "Mollie Payment Configuration",
        "GOOGLE_": "Google Services",
        "APPLE_": "Apple Services",
        "MICROSOFT_": "Microsoft Services",
        "GITHUB_": "GitHub Integration",
        "API_": "API Configuration",
        "APP_": "Application Configuration",
        "REDIS_": "Redis Configuration",
        "WEBHOOK_": "Webhook Configuration",
        "TYPESENSE_": "Typesense Configuration",
        "PASSKEY_": "Passkey Configuration",
        "PDF_": "PDF Configuration",
        "PRESIGNED_": "Presigned URL Configuration",
    }
    for var, default_value in env_vars.items():
        categorized = False
        for prefix, category in sorted(
            category_prefixes.items(), key=lambda x: len(x[0]), reverse=True
        ):
            if var.startswith(prefix):
                categories[category].append((var, default_value))
                categorized = True
                break
        if not categorized:
            categories["Other"].append((var, default_value))
    return categories

def generate_env_example(categories, output_file=".env.example"):
    """Generate .env.example file with grouped variables"""
    with open(output_file, "w", encoding="utf-8") as f:
        f.write("# Environment Variables\n")
        f.write("# Copy this file to .env and fill in the values\n")
        category_order = [
            "Next.js Configuration",
            "Node.js Configuration",
            "Application Configuration",
            "Database Configuration",
            "Supabase Configuration",
            "Authentication",
            "Better Auth Configuration",
            "Email Configuration",
            "MinIO Storage Configuration",
            "S3 Storage Configuration",
            "DigitalOcean Configuration",
            "AWS Configuration",
            "Stripe Configuration",
            "Mollie Payment Configuration",
            "Google Services",
            "Apple Services",
            "Microsoft Services",
            "GitHub Integration",
            "API Configuration",
            "Redis Configuration",
            "Webhook Configuration",
            "Typesense Configuration",
            "Passkey Configuration",
            "PDF Configuration",
            "Presigned URL Configuration",
            "Other",
        ]
        for category in category_order:
            if category in categories:
                f.write(f"\n# {category}\n")
                for var, default_value in sorted(
                    categories[category], key=lambda x: x[0]

                ):
                    if default_value:
                        f.write(f"{var}= # default: {default_value}\n")
                    else:
                        f.write(f"{var}=\n")
        for category in sorted(categories.keys()):
            if category not in category_order:
                f.write(f"\n# {category}\n")
                for var, default_value in sorted(
                    categories[category], key=lambda x: x[0]

                ):
                    if default_value:
                        f.write(f"{var}= # default: {default_value}\n")
                    else:
                        f.write(f"{var}=\n")

def parse_env_file(file_path):
    """Parse an env file and return a dict of key-value pairs"""
    env_vars = {}
    if not os.path.exists(file_path):
        return env_vars
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                # Skip comments and empty lines
                if not line or line.startswith("#"):
                    continue
                # Parse KEY=VALUE
                if "=" in line:
                    key = line.split("=", 1)[0].strip()
                    value = (
                        line.split("=", 1)[1].strip()
                        if len(line.split("=", 1)) > 1
                        else ""
                    )
                    env_vars[key] = value
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
    return env_vars

def clean_env_local(env_example_vars):
    """Remove keys from .env.local that are not in .env.example"""
    env_local_path = ".env.local"
    if not os.path.exists(env_local_path):
        print(f"{env_local_path} not found, skipping cleanup")
        return
    # Parse .env.local
    env_local_vars = parse_env_file(env_local_path)
    # Find unused keys
    unused_keys = set(env_local_vars.keys()) - set(env_example_vars.keys())
    if not unused_keys:
        print(f"No unused keys found in {env_local_path}")
        return
    print(f"\nFound {len(unused_keys)} unused keys in {env_local_path}:")
    for key in sorted(unused_keys):
        print(f"  - {key}")
    # Read the original file to preserve comments and formatting
    with open(env_local_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    # Filter out lines with unused keys
    filtered_lines = []
    prev_was_empty = False
    prev_was_title = False
    for line in lines:
        stripped = line.strip()
        is_empty = not stripped
        is_title = stripped.startswith("#")
        # Skip double empty lines
        if is_empty and prev_was_empty:
            continue
        # Skip empty line if previous was a title and current is also a title
        if is_empty and prev_was_title and is_title:
            continue
        # Keep comments and empty lines (with filtering above)
        if is_empty or is_title:
            filtered_lines.append(line)
            prev_was_empty = is_empty
            prev_was_title = is_title
            continue
        # Check if this line contains an unused key
        if "=" in stripped:
            key = stripped.split("=", 1)[0].strip()
            if key not in unused_keys:
                filtered_lines.append(line)
                prev_was_empty = False
                prev_was_title = False
    # Write back the cleaned file
    with open(env_local_path, "w", encoding="utf-8") as f:
        f.writelines(filtered_lines)
    print(f"\nRemoved {len(unused_keys)} unused keys from {env_local_path}")

def main():
    project_root = os.getcwd()
    print(f"Scanning project: {project_root}")
    print("Finding environment variables...")
    env_vars = find_env_variables(project_root)
    print(f"Found {len(env_vars)} unique environment variables")
    print("Categorizing variables...")
    categories = categorize_env_vars(env_vars)
    print("Generating .env.example...")
    generate_env_example(categories)
    print("\nSummary:")
    total_with_defaults = 0
    for category, vars in sorted(categories.items()):
        with_defaults = sum(1 for _, default in vars if default)
        total_with_defaults += with_defaults
        print(f"  {category}: {len(vars)} variables ({with_defaults} with defaults)")
    print(f"\nTotal variables with defaults: {total_with_defaults}/{len(env_vars)}")
    print(f"\n.env.example generated successfully!")
    print("\nCleaning up .env.local...")
    clean_env_local(env_vars)

if __name__ == "__main__":
    main()