1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
auth_token: ""
zipper:
host: "127.0.0.1"
port: 9000
tls:
http_api:
host: "127.0.0.1"
port: 9001
enable_tool_api: false
llm_providers:
# LLM provider types:
# - openai-compatible: OpenAI-compatible /v1/chat/completions upstream.
# required params: model, api_key, base_url
# - vllm_deepseek: vllm deepseek specialized provider.
# required params: model, api_key, base_url
# - tokenhub: Tencent TokenHub /v1/chat/completions provider.
# required params: model, api_key
# optional params: base_url (default https://tokenhub.tencentmaas.com/v1)
# - vertexai: Google Vertex AI generateContent/streamGenerateContent.
# required params: model, project_id, location, credentials_file
# Common fields:
# - model_id: external model name used by requests
# - default: optional, marks default model when model is not specified
# - label: optional display label for logs/ops
- type: "openai-compatible"
model_id: "gpt-4o-mini"
default: true
params:
model: "gpt-4o-mini"
api_key: ""
base_url: "https://api.openai.com/v1"
# - type: "vllm_deepseek"
# model_id: "deepseek-v4-flash"
# params:
# model: "DeepSeek-V4-Flash"
# api_key: ""
# base_url: "http://127.0.0.1:8000/v1"
# - type: "tokenhub"
# model_id: "deepseek-v4-flash"
# params:
# model: "deepseek-v4-flash"
# api_key: ""
# base_url: "https://tokenhub.tencentmaas.com/v1"
# - type: "vertexai"
# model_id: "gemini-2.5-flash"
# params:
# model: "gemini-2.5-flash"
# project_id: "your-gcp-project-id"
# location: "global"
# credentials_file: "/abs/path/to/service-account.json"
model_api:
# model_api provider types:
# - passthrough: generic proxy for embeddings/rerank/audio/images endpoints.
# required params: api_key, base_url, model
# - responses: OpenAI /v1/responses adapter.
# required params: api_key, base_url, model
# - bedrock-messages: AWS Bedrock Anthropic Messages adapter.
# required params: model, aws_region
# optional params: anthropic_version(default bedrock-2023-05-31),
# max_tokens(default 4096), aws_bearer_token
# - generate_content: Google Vertex generateContent adapter.
# required params: model, project_id, credentials_file
# optional params: location(default global)
providers:
- type: "responses"
model_id: "gpt-4o-mini"
params:
model: "gpt-4o-mini"
api_key: ""
base_url: "https://api.openai.com/v1"
# - type: "responses"
# model_id: "gpt-5.3-codex"
# params:
# model: "gpt-5.3-codex"
# api_key: ""
# base_url: "https://api.openai.com/v1"
# - type: "bedrock-messages"
# model_id: "claude-sonnet-4-6"
# params:
# model: "global.anthropic.claude-sonnet-4-6"
# aws_region: "ap-northeast-1"
# aws_bearer_token: ""
# anthropic_version: "bedrock-2023-05-31"
# max_tokens: "4096"
# - type: "generate_content"
# model_id: "gemini-2.5-flash"
# params:
# model: "gemini-2.5-flash"
# project_id: "your-gcp-project-id"
# location: "global"
# credentials_file: "/abs/path/to/service-account.json"
endpoints:
# Supported endpoint paths:
# /responses, /messages, /models/:generateContent,
# /embeddings, /rerank,
# /audio/speech, /audio/transcriptions,
# /images/generations, /images/edits
- path: "/responses"
models:
- "gpt-4o-mini"
default_model: "gpt-4o-mini"