ai_lib/provider/configs.rs
1use super::config::ProviderConfig;
2
3/// Predefined provider configurations for multiple AI services
4///
5/// Each provider configuration includes default models for chat and optional multimodal support.
6/// Developers can override these defaults or build custom model managers for advanced use cases.
7pub struct ProviderConfigs;
8
9impl ProviderConfigs {
10 pub fn groq() -> ProviderConfig {
11 ProviderConfig::openai_compatible(
12 "https://api.groq.com/openai/v1",
13 "GROQ_API_KEY",
14 "llama-3.1-8b-instant", // Default chat model - fast and cost-effective
15 Some("llama-3.2-11b-vision"), // Multimodal / vision-capable model
16 )
17 }
18
19 pub fn openai() -> ProviderConfig {
20 ProviderConfig::openai_compatible(
21 "https://api.openai.com/v1",
22 "OPENAI_API_KEY",
23 "gpt-3.5-turbo", // Default chat model
24 Some("gpt-4o"), // Multimodal model
25 )
26 }
27
28 pub fn deepseek() -> ProviderConfig {
29 ProviderConfig::openai_compatible(
30 "https://api.deepseek.com/v1",
31 "DEEPSEEK_API_KEY",
32 "deepseek-chat", // Default chat model
33 None, // No multimodal support yet
34 )
35 }
36
37 pub fn ollama() -> ProviderConfig {
38 // Ollama is commonly run locally and is OpenAI-compatible in many setups.
39 // Allow developers to override the local base URL via OLLAMA_BASE_URL.
40 // Default remains the common local address used by Ollama.
41 let base_url = std::env::var("OLLAMA_BASE_URL")
42 .unwrap_or_else(|_| "http://localhost:11434/api".to_string());
43
44 // Ollama typically doesn't require an API key for local installs. We keep
45 // the env variable name for parity, but it's optional for users.
46 ProviderConfig::openai_compatible(
47 &base_url,
48 "OLLAMA_API_KEY",
49 "llama3-8b", // Default chat model - common local model
50 None, // No multimodal support yet
51 )
52 }
53
54 /// xAI / Grok configuration - OpenAI-compatible hosted offering
55 pub fn xai_grok() -> ProviderConfig {
56 let base_url = std::env::var("GROK_BASE_URL")
57 .unwrap_or_else(|_| "https://api.grok.com/openai/v1".to_string());
58 ProviderConfig::openai_compatible(
59 &base_url,
60 "GROK_API_KEY",
61 "grok-beta", // Default chat model
62 None, // No multimodal support yet
63 )
64 }
65
66 /// Azure OpenAI configuration - highly compatible but often uses resource-specific base URL
67 pub fn azure_openai() -> ProviderConfig {
68 // Expect developers to set AZURE_OPENAI_BASE_URL to their resource endpoint
69 let base_url = std::env::var("AZURE_OPENAI_BASE_URL")
70 .unwrap_or_else(|_| "https://api.openai.azure.com/v1".to_string());
71 ProviderConfig::openai_compatible(
72 &base_url,
73 "AZURE_OPENAI_API_KEY",
74 "gpt-35-turbo", // Default Azure OpenAI model
75 Some("gpt-4o"), // Multimodal model
76 )
77 }
78
79 /// Hugging Face Inference API - configured to reuse generic adapter; may need adjustments per model
80 pub fn huggingface() -> ProviderConfig {
81 // Hugging Face inference API default (used for inference calls)
82 let base_url = std::env::var("HUGGINGFACE_BASE_URL")
83 .unwrap_or_else(|_| "https://api-inference.huggingface.co".to_string());
84
85 use std::collections::HashMap;
86 let mut headers = HashMap::new();
87 headers.insert("Content-Type".to_string(), "application/json".to_string());
88
89 // Hugging Face model listing is served from the Hub API under huggingface.co.
90 // Use an absolute models endpoint so we can query the Hub independently of the
91 // inference base URL (inference and hub are different services).
92 ProviderConfig {
93 base_url: base_url.clone(),
94 api_key_env: "HUGGINGFACE_API_KEY".to_string(),
95 chat_endpoint: "/models/{model}:predict".to_string(), // placeholder; per-model inference often requires model in path
96 chat_model: "microsoft/DialoGPT-medium".to_string(), // Default chat model
97 multimodal_model: None, // No multimodal support yet
98 upload_endpoint: None,
99 upload_size_limit: None,
100 models_endpoint: Some("https://huggingface.co/api/models".to_string()),
101 headers,
102 field_mapping: crate::provider::config::FieldMapping {
103 messages_field: "messages".to_string(),
104 model_field: "model".to_string(),
105 role_mapping: {
106 let mut role_mapping = std::collections::HashMap::new();
107 role_mapping.insert("System".to_string(), "system".to_string());
108 role_mapping.insert("User".to_string(), "user".to_string());
109 role_mapping.insert("Assistant".to_string(), "assistant".to_string());
110 role_mapping
111 },
112 response_content_path: "choices.0.message.content".to_string(),
113 },
114 }
115 }
116
117 /// Together AI - OpenAI-compatible chat API
118 pub fn together_ai() -> ProviderConfig {
119 let base_url = std::env::var("TOGETHER_BASE_URL")
120 .unwrap_or_else(|_| "https://api.together.ai".to_string());
121 ProviderConfig::openai_compatible(
122 &base_url,
123 "TOGETHER_API_KEY",
124 "meta-llama/Llama-3-8b-chat-hf", // Default chat model
125 None, // No multimodal support yet
126 )
127 }
128
129 /// Groq configuration - proving OpenAI compatibility
130 pub fn groq_as_generic() -> ProviderConfig {
131 ProviderConfig::openai_compatible(
132 "https://api.groq.com/openai/v1",
133 "GROQ_API_KEY",
134 "llama-3.1-8b-instant", // Default chat model
135 None, // No multimodal support yet
136 )
137 }
138
139 /// Qwen / Tongyi Qianwen (Alibaba Cloud) - OpenAI-compatible mode
140 /// Uses DASHSCOPE_API_KEY and optional DASHSCOPE_BASE_URL to override the base URL.
141 pub fn qwen() -> ProviderConfig {
142 // Default to Alibaba DashScope public base URL and use the generation endpoint.
143 let base_url = std::env::var("DASHSCOPE_BASE_URL")
144 .unwrap_or_else(|_| "https://dashscope.aliyuncs.com".to_string());
145
146 // DashScope generation endpoint example: /api/v1/services/aigc/text-generation/generation
147 let mut cfg = ProviderConfig::openai_compatible(
148 &base_url,
149 "DASHSCOPE_API_KEY",
150 "qwen-turbo", // Default chat model
151 None, // No multimodal support yet
152 );
153 cfg.chat_endpoint = "/api/v1/services/aigc/text-generation/generation".to_string();
154 cfg
155 }
156
157 /// Baidu Wenxin (ERNIE) - OpenAI compatible mode via Qianfan/Console configuration
158 /// Environment variables: BAIDU_WENXIN_BASE_URL (optional), BAIDU_WENXIN_API_KEY, BAIDU_WENXIN_SECRET
159 pub fn baidu_wenxin() -> ProviderConfig {
160 let base_url = std::env::var("BAIDU_WENXIN_BASE_URL")
161 .unwrap_or_else(|_| "https://aip.baidubce.com".to_string());
162
163 // Baidu Wenxin example path (model-specific):
164 // /rpc/2.0/ai_custom/v1/wenxinworkshop/chat/{model_name}
165 let mut cfg = ProviderConfig::openai_compatible(
166 &base_url,
167 "BAIDU_WENXIN_API_KEY",
168 "ernie-3.5", // Default chat model
169 None, // No multimodal support yet
170 );
171 cfg.chat_endpoint = "/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/{model}".to_string();
172 // Baidu may use different upload endpoints; keep defaults but don't assume model listing.
173 cfg.models_endpoint = None;
174 cfg
175 }
176
177 /// Tencent Hunyuan - Tencent Cloud OpenAI compatible endpoint
178 /// Environment variables: TENCENT_HUNYUAN_BASE_URL (optional), TENCENT_HUNYUAN_API_KEY
179 pub fn tencent_hunyuan() -> ProviderConfig {
180 let base_url = std::env::var("TENCENT_HUNYUAN_BASE_URL")
181 .unwrap_or_else(|_| "https://hunyuan.tencentcloudapi.com".to_string());
182
183 // Tencent Hunyuan example uses an Action query parameter: ?Action=ChatCompletions
184 let mut cfg = ProviderConfig::openai_compatible(
185 &base_url,
186 "TENCENT_HUNYUAN_API_KEY",
187 "hunyuan-standard", // Default chat model
188 None, // No multimodal support yet
189 );
190 cfg.chat_endpoint = "/?Action=ChatCompletions".to_string();
191 cfg.models_endpoint = None;
192 cfg
193 }
194
195 /// iFlytek Spark - OpenAI compatible endpoint example
196 /// Environment variables: IFLYTEK_BASE_URL (optional), IFLYTEK_API_KEY
197 pub fn iflytek_spark() -> ProviderConfig {
198 let base_url = std::env::var("IFLYTEK_BASE_URL")
199 .unwrap_or_else(|_| "https://api.xf-yun.com".to_string());
200 let mut cfg = ProviderConfig::openai_compatible(
201 &base_url,
202 "IFLYTEK_API_KEY",
203 "spark-v3.0", // Default chat model
204 None, // No multimodal support yet
205 );
206 cfg.chat_endpoint = "/v1/chat".to_string();
207 cfg
208 }
209
210 /// Moonshot (Kimi) - OpenAI compatible endpoint
211 /// Environment variables: MOONSHOT_BASE_URL (optional), MOONSHOT_API_KEY
212 pub fn moonshot() -> ProviderConfig {
213 let base_url = std::env::var("MOONSHOT_BASE_URL")
214 .unwrap_or_else(|_| "https://api.moonshot.cn".to_string());
215 let mut cfg = ProviderConfig::openai_compatible(
216 &base_url,
217 "MOONSHOT_API_KEY",
218 "moonshot-v1-8k", // Default chat model
219 None, // No multimodal support yet
220 );
221 cfg.chat_endpoint = "/v1/chat/completions".to_string();
222 cfg
223 }
224
225 /// Anthropic Claude configuration - requires special handling
226 pub fn anthropic() -> ProviderConfig {
227 use std::collections::HashMap;
228
229 let mut headers = HashMap::new();
230 headers.insert("Content-Type".to_string(), "application/json".to_string());
231 headers.insert("anthropic-version".to_string(), "2023-06-01".to_string());
232 // Note: Anthropic uses x-api-key instead of Authorization: Bearer
233
234 let mut role_mapping = HashMap::new();
235 role_mapping.insert("System".to_string(), "system".to_string());
236 role_mapping.insert("User".to_string(), "user".to_string());
237 role_mapping.insert("Assistant".to_string(), "assistant".to_string());
238
239 ProviderConfig {
240 base_url: "https://api.anthropic.com/v1".to_string(),
241 api_key_env: "ANTHROPIC_API_KEY".to_string(),
242 chat_endpoint: "/messages".to_string(),
243 chat_model: "claude-3-5-sonnet-20241022".to_string(), // Default chat model
244 multimodal_model: Some("claude-3-5-sonnet-20241022".to_string()), // Multimodal support
245 upload_endpoint: None,
246 upload_size_limit: None,
247 models_endpoint: None, // Claude doesn't have a public model list endpoint
248 headers,
249 field_mapping: crate::provider::config::FieldMapping {
250 messages_field: "messages".to_string(),
251 model_field: "model".to_string(),
252 role_mapping,
253 response_content_path: "content.0.text".to_string(), // Claude's response format is different
254 },
255 }
256 }
257
258 /// OpenRouter configuration
259 ///
260 /// OpenRouter is a unified gateway for multiple AI models with OpenAI-compatible API.
261 /// Base URL: https://openrouter.ai/api/v1
262 /// Documentation: https://openrouter.ai/docs/api-reference/overview
263 pub fn openrouter() -> ProviderConfig {
264 ProviderConfig::openai_compatible(
265 "https://openrouter.ai/api/v1",
266 "OPENROUTER_API_KEY",
267 "openai/gpt-3.5-turbo", // Default model
268 Some("openai/gpt-4o"), // Multimodal model
269 )
270 }
271
272 /// Replicate configuration
273 ///
274 /// Replicate provides access to various AI models with OpenAI-compatible API.
275 /// Base URL: https://api.replicate.com/v1
276 /// Documentation: https://replicate.com/docs/reference/http
277 pub fn replicate() -> ProviderConfig {
278 ProviderConfig::openai_compatible(
279 "https://api.replicate.com/v1",
280 "REPLICATE_API_TOKEN",
281 "meta/llama-2-7b-chat", // Default model
282 Some("meta/llama-2-7b-chat"), // Multimodal model
283 )
284 }
285
286 /// 智谱AI (GLM) configuration
287 ///
288 /// 智谱AI provides GLM series models with OpenAI-compatible API.
289 /// Base URL: https://open.bigmodel.cn/api/paas/v4
290 /// Documentation: https://docs.bigmodel.cn/cn/api/introduction
291 pub fn zhipu_ai() -> ProviderConfig {
292 ProviderConfig::openai_compatible(
293 "https://open.bigmodel.cn/api/paas/v4",
294 "ZHIPU_API_KEY",
295 "glm-4", // Default model
296 Some("glm-4v"), // Multimodal model
297 )
298 }
299
300 /// MiniMax configuration
301 ///
302 /// MiniMax provides AI models with OpenAI-compatible API.
303 /// Base URL: https://api.minimax.chat/v1
304 /// Documentation: https://www.minimax.io/platform/document/ChatCompletion
305 pub fn minimax() -> ProviderConfig {
306 ProviderConfig::openai_compatible(
307 "https://api.minimax.chat/v1",
308 "MINIMAX_API_KEY",
309 "abab6.5-chat", // Default model
310 Some("abab6.5-chat"), // Multimodal model
311 )
312 }
313}