embeddenator 0.20.0-alpha.1

# GitHub Actions Runner Configuration
# Copy this file to .env and fill in your values

# ============================================================================
# GitHub Repository Configuration
# ============================================================================

# GitHub repository (format: owner/repo)
GITHUB_REPOSITORY=tzervas/embeddenator

# GitHub API URL (default: https://api.github.com)
GITHUB_API_URL=https://api.github.com

# GitHub Personal Access Token (PAT) with repo scope
# Used for obtaining runner registration tokens
# Generate at: https://github.com/settings/tokens
GITHUB_TOKEN=

# ============================================================================
# Runner Registration Configuration
# ============================================================================

# Runner name prefix (will be suffixed with unique ID or index)
RUNNER_NAME_PREFIX=embeddenator-runner

# Runner labels (comma-separated, default: self-hosted,linux,ARM64)
RUNNER_LABELS=self-hosted,linux,ARM64

# Runner group (default: Default)
RUNNER_GROUP=Default

# Runner work directory (default: _work)
RUNNER_WORK_DIR=_work

# ============================================================================
# Lifecycle Management Configuration
# ============================================================================

# Deployment mode: auto | manual
# - auto: Automatically deregister after idle timeout
# - manual: Keep running until manually stopped
RUNNER_MODE=auto

# Auto-deregister idle timeout in seconds (default: 300 = 5 minutes)
# Only used when RUNNER_MODE=auto
# Runner will deregister if no jobs are in queue for this duration
RUNNER_IDLE_TIMEOUT=300

# Job check interval in seconds (default: 30)
# How often to check for jobs in queue when monitoring
RUNNER_CHECK_INTERVAL=30

# Maximum runner lifetime in seconds (default: 0 = unlimited)
# Runner will deregister after this time regardless of activity
# Set to 0 to disable
RUNNER_MAX_LIFETIME=0

# ============================================================================
# Multi-Runner Deployment Configuration
# ============================================================================

# Number of runners to deploy (default: 1)
RUNNER_COUNT=1

# Runner deployment strategy: sequential | parallel
# - sequential: Deploy runners one at a time
# - parallel: Deploy all runners simultaneously
RUNNER_DEPLOYMENT_STRATEGY=sequential

# Stagger delay between runner deployments in seconds (default: 5)
# Only used when RUNNER_DEPLOYMENT_STRATEGY=sequential
RUNNER_DEPLOYMENT_STAGGER=5

# ============================================================================
# Resource Configuration
# ============================================================================

# CPU cores per runner (default: auto-detect)
# Set to limit CPU usage per runner
RUNNER_CPU_CORES=

# Memory limit per runner in GB (default: none)
# Requires systemd or cgroup support
RUNNER_MEMORY_GB=

# Disk space threshold in GB (default: 20)
# Warning will be issued if free space falls below this
RUNNER_DISK_THRESHOLD_GB=20

# ============================================================================
# Architecture and Emulation Configuration
# ============================================================================

# Runner architecture (default: auto-detect)
# Options: x64, arm64, riscv64
RUNNER_ARCH=

# Target architectures to deploy (comma-separated, default: host architecture only)
# Examples: x64,arm64,riscv64
# This allows deploying runners for multiple architectures simultaneously
RUNNER_TARGET_ARCHITECTURES=

# Enable emulation for cross-architecture support (default: true)
# When enabled, ARM64 and RISC-V runners can run on x86_64 hardware via QEMU
RUNNER_ENABLE_EMULATION=true

# Emulation method (default: auto)
# Options: auto, qemu, docker, podman
# - auto: Automatically detect available container runtime (docker/podman), fallback to qemu
# - qemu: Use standalone QEMU (requires QEMU installation)
# - docker: Use Docker for emulation setup (preferred if available)
# - podman: Use Podman for emulation setup (Docker alternative)
RUNNER_EMULATION_METHOD=auto

# Auto-install QEMU if not present (default: false)
# Requires sudo access - will attempt to install qemu-user-static
RUNNER_EMULATION_AUTO_INSTALL=false

# ============================================================================
# Runner Installation Configuration
# ============================================================================

# Runner installation directory (default: ./actions-runner)
RUNNER_INSTALL_DIR=./actions-runner

# Runner version (default: latest)
# Specify a version like "2.319.0" or "latest"
RUNNER_VERSION=latest

# Fallback runner version (default: 2.319.0)
# Used if latest version check fails
RUNNER_FALLBACK_VERSION=2.319.0

# ============================================================================
# Timeout Configuration
# ============================================================================

# GitHub API timeout in seconds (default: 30)
# Timeout for GitHub API requests (registration, removal tokens, etc.)
GITHUB_API_TIMEOUT=30

# Version check timeout in seconds (default: 10)
# Timeout for checking latest runner version from GitHub
GITHUB_VERSION_CHECK_TIMEOUT=10

# ============================================================================
# Logging and Monitoring Configuration
# ============================================================================

# Log level: DEBUG | INFO | WARNING | ERROR | CRITICAL
LOG_LEVEL=INFO

# Log file path (default: ./runner_manager.log)
LOG_FILE=./runner_manager.log

# Enable metrics collection (true | false)
ENABLE_METRICS=false

# Metrics output file (default: ./runner_metrics.json)
METRICS_FILE=./runner_metrics.json

# ============================================================================
# Advanced Configuration
# ============================================================================

# Enable ephemeral runners (true | false)
# Ephemeral runners automatically deregister after completing one job
RUNNER_EPHEMERAL=false

# Replace existing runner with same name (true | false)
RUNNER_REPLACE_EXISTING=false

# Disable automatic updates (true | false)
RUNNER_DISABLE_AUTO_UPDATE=false

# Additional runner configuration flags (space-separated)
# Example: --no-default-labels --disableupdate
RUNNER_ADDITIONAL_FLAGS=

# ============================================================================
# Cleanup Configuration
# ============================================================================

# Clean runner installation on deregister (true | false)
RUNNER_CLEAN_ON_DEREGISTER=true

# Clean Docker resources on deregister (true | false)
RUNNER_CLEAN_DOCKER=true

# Docker cleanup threshold in GB (default: 10)
# Cleanup will run if available space falls below this
DOCKER_CLEANUP_THRESHOLD_GB=10

# ============================================================================
# GPU Configuration
# ============================================================================

# Enable GPU runner support (true | false)
# When enabled, detects and assigns GPUs to runners
RUNNER_ENABLE_GPU=false

# GPU vendor preference (auto | nvidia | amd | intel | apple)
# auto: Automatically detect available GPUs
# nvidia: Use only NVIDIA GPUs
# amd: Use only AMD GPUs
# intel: Use only Intel GPUs (Arc, Flex, Max)
# apple: Use only Apple Silicon GPUs (M1, M2, M3)
RUNNER_GPU_VENDOR=auto

# GPU allocation per runner (auto | specific number)
# auto: Automatically distribute GPUs across runners
# Specific number (e.g., 1): Assign that many GPUs per runner
RUNNER_GPU_PER_RUNNER=auto

# Inference-only mode (true | false)
# When true, only use GPUs capable of inference workloads
# Optimizes for Intel Arc, NVIDIA T4, AMD MI series, Apple Silicon
RUNNER_INFERENCE_ONLY=false

# ============================================================================
# Resource Optimization Configuration
# ============================================================================

# Enable automatic resource optimization (true | false)
# Automatically calculates optimal CPU and memory allocation
# Reserves resources for host system while maximizing runner performance
RUNNER_ENABLE_RESOURCE_OPTIMIZATION=false

# Host CPU reservation percentage (default: 20)
# Percentage of CPU cores to reserve for host system
# Minimum: 2 cores always reserved
RUNNER_HOST_CPU_RESERVE_PERCENT=20

# Host memory reservation percentage (default: 15)
# Percentage of total memory to reserve for host system
# Minimum: 2 GB always reserved
RUNNER_HOST_MEMORY_RESERVE_PERCENT=15

# Use CPU affinity (true | false)
# Pin runners to specific CPU cores for better performance
# Recommended for Xeon E5-2660 v3/v4 and other multi-core systems
RUNNER_USE_CPU_AFFINITY=false

# ============================================================================
# Hardware-Specific Optimizations
# ============================================================================

# These settings are automatically detected and applied:
#
# Intel Xeon E5-26xx Haswell (v3) - 14-core:
#   - Optimized for dual-socket 14-core (28 thread) configuration
#   - CPU affinity enabled for NUMA optimization
#   - 4-6 physical cores per runner (recommended)
#
# Apple Silicon (M1, M2, M3):
#   - Performance cores prioritized for runners
#   - Efficiency cores reserved for host
#   - Metal acceleration for ML inference
#
# AMD EPYC:
#   - CCX-aware CPU affinity
#   - Memory channel optimization
#
# Raspberry Pi / ARM:
#   - Temperature monitoring
#   - Conservative resource allocation
#   - Inference-optimized settings


# ============================================================================
# Git Platform Configuration
# ============================================================================

# Git platform type (github | gitlab | gitea)
# github: GitHub (cloud or enterprise)
# gitlab: GitLab (cloud or self-hosted)
# gitea: Gitea (self-hosted)
GIT_PLATFORM=github

# API URL for git platform
# GitHub: https://api.github.com (default)
# GitLab: https://gitlab.com (default) or https://your-gitlab.com
# Gitea: https://your-gitea.com (required)
GIT_API_URL=

# Repository or project identifier
# GitHub/Gitea: owner/repo format (e.g., tzervas/embeddenator)
# GitLab: project ID (numeric, e.g., 12345)
GITHUB_REPOSITORY=
GITLAB_PROJECT_ID=

# Authentication token
# Use GIT_TOKEN for all platforms, or GITHUB_TOKEN for backward compatibility
GIT_TOKEN=
GITHUB_TOKEN=

# ============================================================================
# Dynamic Runner Manager Configuration
# ============================================================================

# Enable dynamic auto-scaling (true | false)
# When enabled, automatically starts/stops runners based on queue
RUNNER_ENABLE_DYNAMIC_SCALING=false

# Minimum number of runners (always running)
# Ensures quick response time for new jobs
RUNNER_MIN_COUNT=1

# Maximum number of runners (scale limit)
# Prevents resource exhaustion
RUNNER_MAX_COUNT=10

# Scale up threshold (number of pending jobs)
# Start new runners when pending jobs exceed this number
RUNNER_SCALE_UP_THRESHOLD=2

# Scale down threshold (number of pending jobs)
# Stop idle runners when queue drops to this level
RUNNER_SCALE_DOWN_THRESHOLD=0

# Scale cooldown period (seconds)
# Minimum time between scaling operations
RUNNER_SCALE_COOLDOWN=60

# ============================================================================
# Load Balancing Configuration
# ============================================================================

# Runners are automatically load-balanced based on:
#   - Architecture (amd64, arm64, riscv64)
#   - GPU capabilities (nvidia, amd, intel, apple)
#   - Workload type (inference, training)
#   - Available resources (CPU, memory, GPU)
#
# Jobs are matched to runners based on required labels
# Manager maintains capability pools for efficient dispatching


# ============================================================================
# Scaling Modes
# ============================================================================

# Scaling mode (lazy | balanced | aggressive)
# 
# LAZY MODE - Relaxed queue handling:
#   - Waits for 5+ pending jobs before scaling up
#   - 2 minute cooldown between scaling operations
#   - Keeps at least 1 runner always ready
#   - Best for: Low-priority jobs, cost optimization, unpredictable workloads
#   - Pros: Minimal resource usage, lower costs
#   - Cons: Slower response to job spikes
#
# BALANCED MODE (default) - Moderate response:
#   - Scales up with 2+ pending jobs
#   - 1 minute cooldown between scaling operations
#   - Good balance between responsiveness and cost
#   - Best for: Most general workloads, typical CI/CD
#   - Pros: Good response time, reasonable costs
#   - Cons: May be too slow for time-critical jobs
#
# AGGRESSIVE MODE - Fast queue handling:
#   - Scales up with just 1 pending job
#   - 30 second cooldown between scaling operations
#   - Maximum responsiveness
#   - Best for: Time-critical jobs, high-priority builds, production deployments
#   - Pros: Fastest response time, minimal queue wait
#   - Cons: Higher resource usage and costs
#
RUNNER_SCALING_MODE=balanced

# Note: You can override individual thresholds by setting:
#   RUNNER_SCALE_UP_THRESHOLD
#   RUNNER_SCALE_DOWN_THRESHOLD
#   RUNNER_SCALE_COOLDOWN
# These will take precedence over the scaling mode presets