opensourcellmrouter 0.4.0

A fast, local-first LLM router — proxy any OpenAI/Anthropic/Ollama client to your own provider pipeline with classifiers, cost/latency/random routing rules, plugins, a live dashboard, and a TUI.
# opensourcellmrouter configuration
#
# Providers:
#   local-llama — llama.cpp server on :8080 (OpenAI-compatible)
#   ollama      — Ollama on :11434 (native Ollama API)
#   cloudflare  — Cloudflare Workers AI  (CLOUDFLARE_API_TOKEN)
#   openai      — OpenAI                 (OPENAI_API_KEY)
#   anthropic   — Anthropic              (ANTHROPIC_API_KEY)
#
# API keys live in .env — run `source .env` or let demo.sh load it.
#
# Pipeline: classifiers tag the request → routers pick a provider → logging

[server]
host      = "0.0.0.0"
port      = 8090
dashboard = true

[logging]
enabled = true
path    = "logs/requests.jsonl"

# ── providers ─────────────────────────────────────────────────────────────────

[[providers]]
name                      = "local-llama"
format                    = "openai"
base_url                  = "http://localhost:8080/v1"
cost_per_1m_tokens        = 0.0
quality                   = 60
latency_ms                = 900
throughput_tokens_per_sec = 20

# Ollama native API — base_url has no /v1 suffix.
# The "discover" router rule queries /api/tags at startup to know which
# models are pulled (llama3.1:8b, deepseek-r1:latest, gemma3:latest, etc.).
[[providers]]
name                      = "ollama"
format                    = "ollama"
base_url                  = "http://localhost:11434"
cost_per_1m_tokens        = 0.0
quality                   = 75
latency_ms                = 600
throughput_tokens_per_sec = 30

# Cloudflare Workers AI — OpenAI-compatible endpoint.
# Requires CLOUDFLARE_API_TOKEN in the environment (source .env).
[[providers]]
name               = "cloudflare"
format             = "openai"
base_url           = "https://api.cloudflare.com/client/v4/accounts/e4847a910eddf0d11d40af276cc478b7/ai/v1"
api_key_env        = "CLOUDFLARE_API_TOKEN"
cost_per_1m_tokens = 0.2
quality            = 80
latency_ms         = 500
throughput_tokens_per_sec = 60

# OpenAI — requires OPENAI_API_KEY in environment.
[[providers]]
name               = "openai"
format             = "openai"
base_url           = "https://api.openai.com/v1"
api_key_env        = "OPENAI_API_KEY"
cost_per_1m_tokens = 5.0
quality            = 90
latency_ms         = 400
throughput_tokens_per_sec = 80

# Anthropic — requires ANTHROPIC_API_KEY in environment.
[[providers]]
name               = "anthropic"
format             = "anthropic"
base_url           = "https://api.anthropic.com"
api_key_env        = "ANTHROPIC_API_KEY"
cost_per_1m_tokens = 15.0
quality            = 95
latency_ms         = 600
throughput_tokens_per_sec = 70

# ── classifiers ───────────────────────────────────────────────────────────────

[classifiers.keyword]
enabled = true

[classifiers.keyword.tags]
# Matches any message mentioning images/photos → routed to a vision-capable model
vision = ["image", "photo", "picture", "screenshot", "visual", "diagram", "chart"]
# Matches video content references
video  = ["video", "clip", "footage", "frame", "timestamp"]
# Matches code-heavy requests → routed to deepseek-r1 (strong at reasoning/code)
code   = ["function", "class", "import", "def ", "fn ", "bug", "error", "stack trace",
          "compile", "runtime", "algorithm", "refactor", "debug"]
# Adult/explicit content → kept on local-llama (private, no content policy)
nsfw   = ["nsfw", "adult", "explicit", "erotic", "nude", "naked",
          "sexual", "xxx", "porn", "hentai", "fetish", "lewd"]

# ── routers (first match wins) ────────────────────────────────────────────────

# Pick a random model from everything we have — applies to every request.
# Providers whose API key env var is unset are skipped automatically at startup.
[[routers]]
type = "random"
candidates = [
  # local
  { provider = "local-llama", model = "llama3.2-3b"                                  },
  # ollama
  { provider = "ollama",      model = "llama3.1:8b"                                  },
  { provider = "ollama",      model = "deepseek-r1:latest"                           },
  { provider = "ollama",      model = "gemma3:latest"                                },
  # cloudflare workers ai
  { provider = "cloudflare",  model = "@cf/meta/llama-3.1-8b-instruct"               },
  { provider = "cloudflare",  model = "@cf/meta/llama-3.2-3b-instruct"               },
  { provider = "cloudflare",  model = "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" },
  { provider = "cloudflare",  model = "@cf/google/gemma-3-12b-it"                    },
  # openai
  { provider = "openai",      model = "gpt-4o-mini"                                  },
  { provider = "openai",      model = "gpt-4o"                                       },
  # anthropic
  { provider = "anthropic",   model = "claude-haiku-4-5-20251001"                    },
  { provider = "anthropic",   model = "claude-sonnet-4-6"                            },
]

# ── plugins ───────────────────────────────────────────────────────────────────

[plugins.response-healing]
enabled = true

[plugins.pareto-router]
enabled      = false
default_tier = "medium"

[plugins.pareto-router.tiers]
low    = ["local-llama"]
medium = ["ollama"]
high   = ["ollama"]