inferno-ai 0.10.3

version: '3.8'

services:
  # Inferno AI/ML inference server
  inferno:
    build:
      context: ..
      dockerfile: Dockerfile
    container_name: inferno-server
    ports:
      - "8080:8080"    # HTTP API
      - "9090:9090"    # Prometheus metrics
    volumes:
      - ./models:/app/models
      - ./config:/app/config
      - ./logs:/app/logs
    environment:
      - INFERNO_LOG_LEVEL=info
      - INFERNO_LOG_FORMAT=json
      - INFERNO_MODELS_DIR=/app/models
      - INFERNO_CACHE_DIR=/app/cache
      - INFERNO_BIND_ADDRESS=0.0.0.0
      - INFERNO_PORT=8080
      - INFERNO_PROMETHEUS_ENABLED=true
      - INFERNO_PROMETHEUS_PORT=9090
      - INFERNO_OTEL_ENABLED=true
      - INFERNO_OTEL_ENDPOINT=http://jaeger:14268/api/traces
      - INFERNO_GRAFANA_ENABLED=true
      - INFERNO_GRAFANA_ENDPOINT=http://grafana:3000
    networks:
      - inferno-network
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  # Prometheus for metrics collection
  prometheus:
    image: prom/prometheus:latest
    container_name: inferno-prometheus
    ports:
      - "9091:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - prometheus_data:/prometheus
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--storage.tsdb.retention.time=200h'
      - '--web.enable-lifecycle'
    networks:
      - inferno-network
    restart: unless-stopped

  # Grafana for visualization
  grafana:
    image: grafana/grafana:latest
    container_name: inferno-grafana
    ports:
      - "3000:3000"
    volumes:
      - grafana_data:/var/lib/grafana
      - ./grafana/provisioning:/etc/grafana/provisioning
      - ./grafana/dashboards:/var/lib/grafana/dashboards
    environment:
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=admin
      - GF_USERS_ALLOW_SIGN_UP=false
    networks:
      - inferno-network
    restart: unless-stopped
    depends_on:
      - prometheus

  # Jaeger for distributed tracing
  jaeger:
    image: jaegertracing/all-in-one:latest
    container_name: inferno-jaeger
    ports:
      - "16686:16686"  # Jaeger UI
      - "14268:14268"  # Jaeger collector HTTP
      - "14250:14250"  # Jaeger collector gRPC
    environment:
      - COLLECTOR_OTLP_ENABLED=true
    networks:
      - inferno-network
    restart: unless-stopped

  # Redis for caching (optional)
  redis:
    image: redis:alpine
    container_name: inferno-redis
    ports:
      - "6379:6379"
    volumes:
      - redis_data:/data
    networks:
      - inferno-network
    restart: unless-stopped
    command: redis-server --appendonly yes

  # Load balancer (for multiple Inferno instances)
  nginx:
    image: nginx:alpine
    container_name: inferno-nginx
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf
      - ./ssl:/etc/nginx/ssl
    networks:
      - inferno-network
    restart: unless-stopped
    depends_on:
      - inferno

  # Model downloader (init container)
  model-downloader:
    image: alpine/curl:latest
    container_name: inferno-model-downloader
    volumes:
      - ./models:/models
    command: |
      sh -c "
        echo 'Downloading sample models...'
        mkdir -p /models
        # Download a sample GGUF model (replace with actual model URLs)
        # curl -L -o /models/llama-2-7b-chat.gguf 'https://example.com/llama-2-7b-chat.gguf'
        echo 'Model download complete'
      "
    networks:
      - inferno-network

volumes:
  prometheus_data:
  grafana_data:
  redis_data:

networks:
  inferno-network:
    driver: bridge