version: "3.8"
services:
runtime:
build:
context: .
dockerfile: Dockerfile
args:
- EXTRAS=${EXTRAS:-}
ports:
- "8000:8000"
environment:
- SGLANG_URL=${SGLANG_URL:-}
- LANCEDB_URI=/app/data/lancedb
- PLUGINS_DIR=/app/data/plugins
- TELEMETRY_BROKER_URL=http://localhost:8000
- TEMPORAL_HOST=temporal:7233
- COREASON_MASTER_GATEWAY_URL=${COREASON_MASTER_GATEWAY_URL:-http://host.docker.internal:8080}
volumes:
- ./data/lancedb:/app/data/lancedb
- ./data/plugins:/app/data/plugins
- ./data/bronze:/app/data/bronze
- ./data/silver:/app/data/silver
- ./data/gold:/app/data/gold
depends_on:
- temporal
restart: unless-stopped
sglang:
profiles:
- gpu
image: lmsysorg/sglang:latest
ports:
- "30000:30000"
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
command: ["python3", "-m", "sglang.launch_server", "--model-path", "meta-llama/Meta-Llama-3-8B-Instruct", "--port", "30000", "--host", "0.0.0.0"]
restart: unless-stopped
temporal:
image: temporalio/admin-tools:latest
ports:
- "7233:7233"
- "8233:8233"
- "8088:8080" command: ["temporal", "server", "start-dev", "--ip", "0.0.0.0"]
restart: unless-stopped