1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# mcp-proxy configuration
[]
= "my-proxy"
= "0.1.0"
# Separator between namespace and tool/resource/prompt names
= "/"
# Graceful shutdown timeout (seconds, default: 30)
# shutdown_timeout_seconds = 30
# Watch config file for new backends and add them without restart
# hot_reload = true
[]
# HTTP transport binding
= "127.0.0.1"
= 8080
# Optional: custom instructions sent to clients on initialize
# instructions = "This proxy aggregates multiple MCP backends."
# --- Backends ---
# Each backend is an MCP server the proxy routes to.
# The name becomes the namespace prefix for all tools/resources/prompts.
[[]]
= "files"
# Subprocess backend -- spawns a child process
= "stdio"
= "npx"
= ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]
# Per-backend timeout (seconds)
[]
= 30
# Per-backend concurrency limit
[]
= 10
# Per-backend response caching
# [backends.cache]
# resource_ttl_seconds = 300 # cache resource reads for 5 minutes
# tool_ttl_seconds = 60 # cache tool call results for 1 minute
# max_entries = 1000 # max cached entries (default: 1000)
# --- Tool Aliasing ---
# Rename tools exposed by this backend
# [[backends.aliases]]
# from = "read_file" # backend's original tool name
# to = "read" # exposed as files/read
# --- Capability Filtering ---
# Only expose specific tools (allowlist -- mutually exclusive with hide_tools)
# expose_tools = ["read_file", "list_directory", "search_files"]
# Or hide specific tools (denylist)
# hide_tools = ["write_file", "delete_file"]
# Same for resources and prompts
# expose_resources = ["file:///public"]
# hide_resources = ["file:///etc/shadow"]
# expose_prompts = ["help"]
# hide_prompts = ["admin_prompt"]
# --- Argument Injection ---
# Default args merged into all tool calls for this backend (does not overwrite)
# [backends.default_args]
# timeout = 30
# Per-tool argument injection
# [[backends.inject_args]]
# tool = "query"
# args = { read_only = true, max_rows = 1000 }
# Force overwrite existing arguments
# [[backends.inject_args]]
# tool = "dangerous_op"
# args = { dry_run = true }
# overwrite = true
[[]]
= "github"
= "stdio"
= "npx"
= ["-y", "@modelcontextprotocol/server-github"]
[]
= 60
# Environment variables passed to the subprocess
[]
= "${GITHUB_TOKEN}"
# Circuit breaker -- trips open after 50% failure rate
[]
= 0.5 # 0.0-1.0
= 5 # min calls before evaluating
= 30 # seconds in open before half-open
= 3
# Rate limit -- max requests per period
[]
= 30
= 1
# Retry with exponential backoff
# [backends.retry]
# max_retries = 3 # max retry attempts (default: 3)
# initial_backoff_ms = 100 # initial backoff (default: 100)
# max_backoff_ms = 5000 # max backoff (default: 5000)
# budget_percent = 20.0 # max % of requests that can be retries
# min_retries_per_sec = 10 # min retries/sec regardless of budget
# Request hedging -- fire parallel requests to reduce tail latency
# [backends.hedging]
# delay_ms = 200 # wait before sending hedge (default: 200, 0 = immediate)
# max_hedges = 1 # max additional hedge requests (default: 1)
# Outlier detection -- passive health checks that eject unhealthy backends
# [backends.outlier_detection]
# consecutive_errors = 5 # errors before ejection (default: 5)
# interval_seconds = 10 # evaluation interval (default: 10)
# base_ejection_seconds = 30 # ejection duration (default: 30)
# max_ejection_percent = 50 # max % of backends ejectable (default: 50)
# HTTP backend -- connects to a remote MCP server
# [[backends]]
# name = "remote"
# transport = "http"
# url = "http://mcp-server:8080"
# # Static bearer token for authenticating to this backend
# bearer_token = "${BACKEND_TOKEN}"
# # Forward the client's inbound auth token to this backend
# forward_auth = true
# [backends.timeout]
# seconds = 60
# --- Traffic Mirroring ---
# Shadow traffic to a canary backend (fire-and-forget).
# The mirror receives a copy of matching requests; responses are discarded.
# [[backends]]
# name = "api-v2"
# transport = "http"
# url = "http://api-v2:8080"
# mirror_of = "api" # name of the source backend to mirror
# mirror_percent = 10 # percentage of requests to mirror (default: 100)
# --- Auth (optional) ---
# Uncomment ONE of the following auth blocks.
# Bearer token auth:
# [auth]
# type = "bearer"
# tokens = ["my-secret-token"]
# JWT/JWKS auth with RBAC:
# [auth]
# type = "jwt"
# issuer = "https://auth.example.com"
# audience = "mcp-proxy"
# jwks_uri = "https://auth.example.com/.well-known/jwks.json"
#
# # Define roles with tool access control
# [[auth.roles]]
# name = "reader"
# allow_tools = ["files/read_file", "files/list_directory"]
#
# [[auth.roles]]
# name = "admin"
# # Empty allow_tools = all tools allowed
#
# # Map JWT claims to roles
# [auth.role_mapping]
# claim = "scope"
# mapping = { "mcp:read" = "reader", "mcp:admin" = "admin" }
# --- Performance ---
# [performance]
# coalesce_requests = true # deduplicate identical concurrent tool calls
# --- Security ---
# [security]
# max_argument_size = 1048576 # 1MB limit on tool call arguments
# --- Observability ---
[]
# Structured audit logging (target: mcp::audit)
= true
# Log level (trace, debug, info, warn, error)
= "info"
# JSON structured logging (for production log aggregation)
= false
# Prometheus metrics (exposed at /admin/metrics)
[]
= true
# OpenTelemetry distributed tracing (OTLP export)
# [observability.tracing]
# enabled = true
# endpoint = "http://localhost:4317"
# service_name = "mcp-proxy"