vtcode_core/llm/providers/
qwen.rs1use async_trait::async_trait;
2use reqwest::Client as HttpClient;
3use serde_json::{Map, Value};
4
5use crate::config::TimeoutsConfig;
6use crate::config::constants::{env_vars, models, urls};
7use crate::config::core::{AnthropicConfig, ModelConfig, PromptCachingConfig};
8use crate::llm::error_display;
9use crate::llm::provider::{LLMError, LLMProvider, LLMRequest, LLMResponse, LLMStream};
10
11use super::{
12 common::{
13 ensure_model, extract_prompt_cache_settings_default, impl_llm_client, override_base_url,
14 parse_json_response, parse_response_openai_format, resolve_model,
15 serialize_messages_openai_format, serialize_tools_openai_format,
16 spawn_openai_compatible_stream, validate_supported_models,
17 },
18 error_handling::handle_openai_http_error,
19 extract_reasoning_trace,
20};
21
22const PROVIDER_NAME: &str = "Qwen";
23const PROVIDER_KEY: &str = "qwen";
24
25pub struct QwenProvider {
26 api_key: String,
27 http_client: HttpClient,
28 base_url: String,
29 model: String,
30 prompt_cache_enabled: bool,
31 model_behavior: Option<ModelConfig>,
32}
33
34impl QwenProvider {
35 pub fn new(api_key: String) -> Self {
36 Self::with_model_internal(
37 api_key,
38 models::qwen::DEFAULT_MODEL.to_string(),
39 None,
40 None,
41 TimeoutsConfig::default(),
42 None,
43 )
44 }
45
46 pub fn with_model(api_key: String, model: String) -> Self {
47 Self::with_model_internal(api_key, model, None, None, TimeoutsConfig::default(), None)
48 }
49
50 pub fn new_with_client(
51 api_key: String,
52 model: String,
53 http_client: reqwest::Client,
54 base_url: String,
55 _timeouts: TimeoutsConfig,
56 ) -> Self {
57 Self {
58 api_key,
59 http_client,
60 base_url,
61 model,
62 prompt_cache_enabled: false,
63 model_behavior: None,
64 }
65 }
66
67 pub fn from_config(
68 api_key: Option<String>,
69 model: Option<String>,
70 base_url: Option<String>,
71 prompt_cache: Option<PromptCachingConfig>,
72 timeouts: Option<TimeoutsConfig>,
73 _anthropic: Option<AnthropicConfig>,
74 model_behavior: Option<ModelConfig>,
75 ) -> Self {
76 let api_key_value = api_key
77 .filter(|k| !k.trim().is_empty())
78 .or_else(|| {
79 std::env::var("QWEN_API_KEY")
80 .ok()
81 .filter(|k| !k.trim().is_empty())
82 })
83 .or_else(|| {
84 std::env::var("DASHSCOPE_API_KEY")
85 .ok()
86 .filter(|k| !k.trim().is_empty())
87 })
88 .unwrap_or_default();
89
90 Self::with_model_internal(
91 api_key_value,
92 resolve_model(model, models::qwen::DEFAULT_MODEL),
93 prompt_cache,
94 base_url,
95 timeouts.unwrap_or_default(),
96 model_behavior,
97 )
98 }
99
100 fn with_model_internal(
101 api_key: String,
102 model: String,
103 prompt_cache: Option<PromptCachingConfig>,
104 base_url: Option<String>,
105 timeouts: TimeoutsConfig,
106 model_behavior: Option<ModelConfig>,
107 ) -> Self {
108 use crate::llm::http_client::HttpClientFactory;
109
110 let (prompt_cache_enabled, _) =
111 extract_prompt_cache_settings_default(prompt_cache, PROVIDER_KEY);
112
113 Self {
114 api_key,
115 http_client: HttpClientFactory::for_llm(&timeouts),
116 base_url: override_base_url(
117 urls::QWEN_API_BASE,
118 base_url,
119 Some(env_vars::QWEN_BASE_URL),
120 ),
121 model,
122 prompt_cache_enabled,
123 model_behavior,
124 }
125 }
126
127 #[must_use]
128 #[inline]
129 fn is_thinking_enabled(request: &LLMRequest) -> bool {
130 request
131 .reasoning_effort
132 .is_some_and(|e| e != crate::config::types::ReasoningEffortLevel::None)
133 }
134
135 fn float_to_json_number(value: f32) -> Result<serde_json::Number, LLMError> {
136 serde_json::Number::from_f64(value as f64).ok_or_else(|| LLMError::InvalidRequest {
137 message: "invalid numeric parameter value (NaN or infinity)".to_string(),
138 metadata: None,
139 })
140 }
141
142 fn convert_to_qwen_format(&self, request: &LLMRequest) -> Result<Value, LLMError> {
143 let mut payload = Map::with_capacity(12);
144
145 payload.insert("model".to_owned(), Value::String(request.model.clone()));
146
147 let mut messages = self.serialize_messages(request)?;
148
149 if let Some(system_prompt) = &request.system_prompt {
150 let trimmed = system_prompt.trim();
151 if !trimmed.is_empty() {
152 messages.insert(0, serde_json::json!({"role": "system", "content": trimmed}));
153 }
154 }
155
156 payload.insert("messages".to_owned(), Value::Array(messages));
157
158 if let Some(max_tokens) = request.max_tokens {
159 payload.insert(
160 "max_tokens".to_owned(),
161 Value::Number(serde_json::Number::from(max_tokens as u64)),
162 );
163 }
164
165 let thinking_enabled = Self::is_thinking_enabled(request);
166
167 if !thinking_enabled {
168 if let Some(temperature) = request.temperature {
169 payload.insert(
170 "temperature".to_owned(),
171 Value::Number(Self::float_to_json_number(temperature)?),
172 );
173 }
174
175 if let Some(top_p) = request.top_p {
176 payload.insert(
177 "top_p".to_owned(),
178 Value::Number(Self::float_to_json_number(top_p)?),
179 );
180 }
181 }
182
183 if request.stream {
184 payload.insert("stream".to_string(), Value::Bool(true));
185 payload.insert(
186 "stream_options".to_string(),
187 serde_json::json!({"include_usage": true}),
188 );
189 }
190
191 if let Some(tools) = &request.tools
192 && let Some(serialized_tools) = serialize_tools_openai_format(tools)
193 {
194 payload.insert("tools".to_string(), Value::Array(serialized_tools));
195 }
196
197 if let Some(choice) = &request.tool_choice {
198 payload.insert(
199 "tool_choice".to_string(),
200 choice.to_provider_format(PROVIDER_KEY),
201 );
202 }
203
204 if let Some(effort) = request.reasoning_effort {
205 let enable_thinking = effort != crate::config::types::ReasoningEffortLevel::None;
206 payload.insert("enable_thinking".to_owned(), Value::Bool(enable_thinking));
207 }
208
209 if let Some(meta) = &request.metadata
210 && let Some(user_id) = meta.get("user_id").and_then(|v| v.as_str())
211 {
212 payload.insert("user_id".to_owned(), Value::String(user_id.to_owned()));
213 }
214
215 Ok(Value::Object(payload))
216 }
217
218 async fn send_request(&self, payload: &Value) -> Result<reqwest::Response, LLMError> {
219 let url = format!("{}/chat/completions", self.base_url.trim_end_matches('/'));
220
221 self.http_client
222 .post(&url)
223 .header("Authorization", format!("Bearer {}", self.api_key))
224 .json(payload)
225 .send()
226 .await
227 .map_err(|e| LLMError::Network {
228 message: error_display::format_llm_error(
229 PROVIDER_NAME,
230 &format!("network error: {}", e),
231 ),
232 metadata: None,
233 })
234 }
235
236 fn serialize_messages(&self, request: &LLMRequest) -> Result<Vec<Value>, LLMError> {
237 serialize_messages_openai_format(request, PROVIDER_KEY)
238 }
239
240 fn parse_response(&self, response_json: Value, model: String) -> Result<LLMResponse, LLMError> {
241 let reasoning_extractor = |message: &Value, choice: &Value| {
242 message
243 .get("reasoning_content")
244 .and_then(extract_reasoning_trace)
245 .or_else(|| {
246 choice
247 .get("reasoning_content")
248 .and_then(extract_reasoning_trace)
249 })
250 };
251
252 parse_response_openai_format(
253 response_json,
254 PROVIDER_NAME,
255 model,
256 self.prompt_cache_enabled,
257 Some(reasoning_extractor),
258 )
259 }
260}
261
262#[async_trait]
263impl LLMProvider for QwenProvider {
264 fn name(&self) -> &str {
265 PROVIDER_KEY
266 }
267
268 fn supports_streaming(&self) -> bool {
269 true
270 }
271
272 fn supports_tools(&self, _model: &str) -> bool {
273 true
274 }
275
276 fn supports_structured_output(&self, _model: &str) -> bool {
277 true
278 }
279
280 fn supports_vision(&self, _model: &str) -> bool {
281 false
282 }
283
284 fn supports_reasoning(&self, model: &str) -> bool {
285 let requested = if model.trim().is_empty() {
286 &self.model
287 } else {
288 model
289 };
290
291 self.model_behavior
292 .as_ref()
293 .and_then(|b| b.model_supports_reasoning)
294 .unwrap_or(false)
295 || requested == models::qwen::QWEN3_7_MAX
296 || requested == models::qwen::QWEN3_6_FLASH
297 || requested == models::qwen::QWEN3_6_PLUS
298 || requested == models::qwen::DEEPSEEK_V4_FLASH
299 || requested == models::qwen::DEEPSEEK_V4_PRO
300 || requested == models::qwen::GLM_5_1
301 }
302
303 fn supports_reasoning_effort(&self, _model: &str) -> bool {
304 self.model_behavior
305 .as_ref()
306 .and_then(|b| b.model_supports_reasoning_effort)
307 .unwrap_or(false)
308 }
309
310 fn effective_context_size(&self, model: &str) -> usize {
311 let requested = if model.trim().is_empty() {
312 &self.model
313 } else {
314 model
315 };
316 match requested {
317 models::qwen::QWEN3_6_FLASH
318 | models::qwen::DEEPSEEK_V4_FLASH
319 | models::qwen::DEEPSEEK_V4_PRO => 1_048_576,
320 models::qwen::QWEN3_7_MAX | models::qwen::QWEN3_6_PLUS | models::qwen::GLM_5_1 => {
321 131_072
322 }
323 _ => 131_072,
324 }
325 }
326
327 async fn generate(&self, mut request: LLMRequest) -> Result<LLMResponse, LLMError> {
328 let model = ensure_model(&mut request, &self.model);
329
330 let payload = self.convert_to_qwen_format(&request)?;
331 let response = self.send_request(&payload).await?;
332 let response = handle_openai_http_error(response, PROVIDER_NAME, "QWEN_API_KEY").await?;
333
334 let response_json = parse_json_response(response, PROVIDER_NAME).await?;
335 self.parse_response(response_json, model)
336 }
337
338 async fn stream(&self, mut request: LLMRequest) -> Result<LLMStream, LLMError> {
339 ensure_model(&mut request, &self.model);
340 self.validate_request(&request)?;
341 request.stream = true;
342 let model = request.model.clone();
343
344 let payload = self.convert_to_qwen_format(&request)?;
345 let response = self.send_request(&payload).await?;
346 let response = handle_openai_http_error(response, PROVIDER_NAME, "QWEN_API_KEY").await?;
347
348 Ok(spawn_openai_compatible_stream(
349 response,
350 PROVIDER_NAME,
351 model,
352 Some("reasoning_content"),
353 super::shared::OpenAiDeltaOrder::ReasoningFirst,
354 ))
355 }
356
357 fn supported_models(&self) -> Vec<String> {
358 models::qwen::SUPPORTED_MODELS
359 .iter()
360 .map(|model| model.to_string())
361 .collect()
362 }
363
364 fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError> {
365 validate_supported_models(
366 request,
367 PROVIDER_NAME,
368 PROVIDER_KEY,
369 models::qwen::SUPPORTED_MODELS,
370 )
371 }
372}
373
374impl_llm_client!(QwenProvider);