1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# opensourcellmrouter configuration
#
# Two local providers:
# local-llama — llama.cpp server on :8080 (OpenAI-compatible)
# ollama — Ollama on :11434 (native Ollama API)
#
# Pipeline: classifiers tag the request → routers pick a provider → logging
[]
= "0.0.0.0"
= 8090
= true
[]
= true
= "logs/requests.jsonl"
# ── providers ─────────────────────────────────────────────────────────────────
[[]]
= "local-llama"
= "openai"
= "http://localhost:8080/v1"
= 0.0
= 60
= 900
= 20
# Ollama native API — base_url has no /v1 suffix.
# The "discover" router rule queries /api/tags at startup to know which
# models are pulled (llama3.1:8b, deepseek-r1:latest, gemma3:latest, etc.).
[[]]
= "ollama"
= "ollama"
= "http://localhost:11434"
= 0.0
= 75
= 600
= 30
# ── classifiers ───────────────────────────────────────────────────────────────
[]
= true
[]
# Matches any message mentioning images/photos → routed to a vision-capable model
= ["image", "photo", "picture", "screenshot", "visual", "diagram", "chart"]
# Matches video content references
= ["video", "clip", "footage", "frame", "timestamp"]
# Matches code-heavy requests → routed to deepseek-r1 (strong at reasoning/code)
= ["function", "class", "import", "def ", "fn ", "bug", "error", "stack trace",
"compile", "runtime", "algorithm", "refactor", "debug"]
# Adult/explicit content → kept on local-llama (private, no content policy)
= ["nsfw", "adult", "explicit", "erotic", "nude", "naked",
"sexual", "xxx", "porn", "hentai", "fetish", "lewd"]
# ── routers (first match wins) ────────────────────────────────────────────────
# "local/..." always goes to llama.cpp regardless of content.
[[]]
= "prefix"
= "local/"
= "local-llama"
= "llama3.2-3b"
# Adult content → local-llama (stays off cloud providers, no content policy).
# Swap rewrite_model to whichever GGUF you've loaded for this purpose.
[[]]
= "tag"
= "nsfw"
= "local-llama"
= "llama3.2-3b"
# Code/reasoning → deepseek-r1 (strong at step-by-step reasoning and code)
[[]]
= "tag"
= "code"
= "ollama"
= "deepseek-r1:latest"
# Vision/video → llama3.1:8b (best general capability in the Ollama pool)
[[]]
= "tag"
= "vision"
= "ollama"
= "llama3.1:8b"
[[]]
= "tag"
= "video"
= "ollama"
= "llama3.1:8b"
# Any model Ollama actually has pulled is routed there directly.
# This handles llama3.1:8b, deepseek-r1:latest, gemma3:latest,
# gpt-oss:latest, gpt-oss:20b, phi3:mini, carter-hire:latest, etc.
[[]]
= "discover"
= "ollama"
# Catch-all: unknown model names (e.g. "gpt-4", "claude-3") get rewritten to
# llama3.1:8b and routed to the highest-quality local provider (ollama).
[[]]
= "fallback"
= ["local-llama", "ollama"]
= 0.7
= "llama3.1:8b"
# ── plugins ───────────────────────────────────────────────────────────────────
[]
= true
[]
= false
= "medium"
[]
= ["local-llama"]
= ["ollama"]
= ["ollama"]