Skip to main content

vtcode_core/llm/providers/
qwen.rs

1use async_trait::async_trait;
2use reqwest::Client as HttpClient;
3use serde_json::{Map, Value};
4
5use crate::config::TimeoutsConfig;
6use crate::config::constants::{env_vars, models, urls};
7use crate::config::core::{AnthropicConfig, ModelConfig, PromptCachingConfig};
8use crate::llm::error_display;
9use crate::llm::provider::{LLMError, LLMProvider, LLMRequest, LLMResponse, LLMStream};
10
11use super::{
12    common::{
13        ensure_model, extract_prompt_cache_settings_default, impl_llm_client, override_base_url,
14        parse_json_response, parse_response_openai_format, resolve_model,
15        serialize_messages_openai_format, serialize_tools_openai_format,
16        spawn_openai_compatible_stream, validate_supported_models,
17    },
18    error_handling::handle_openai_http_error,
19    extract_reasoning_trace,
20};
21
22const PROVIDER_NAME: &str = "Qwen";
23const PROVIDER_KEY: &str = "qwen";
24
25pub struct QwenProvider {
26    api_key: String,
27    http_client: HttpClient,
28    base_url: String,
29    model: String,
30    prompt_cache_enabled: bool,
31    model_behavior: Option<ModelConfig>,
32}
33
34impl QwenProvider {
35    pub fn new(api_key: String) -> Self {
36        Self::with_model_internal(
37            api_key,
38            models::qwen::DEFAULT_MODEL.to_string(),
39            None,
40            None,
41            TimeoutsConfig::default(),
42            None,
43        )
44    }
45
46    pub fn with_model(api_key: String, model: String) -> Self {
47        Self::with_model_internal(api_key, model, None, None, TimeoutsConfig::default(), None)
48    }
49
50    pub fn new_with_client(
51        api_key: String,
52        model: String,
53        http_client: reqwest::Client,
54        base_url: String,
55        _timeouts: TimeoutsConfig,
56    ) -> Self {
57        Self {
58            api_key,
59            http_client,
60            base_url,
61            model,
62            prompt_cache_enabled: false,
63            model_behavior: None,
64        }
65    }
66
67    pub fn from_config(
68        api_key: Option<String>,
69        model: Option<String>,
70        base_url: Option<String>,
71        prompt_cache: Option<PromptCachingConfig>,
72        timeouts: Option<TimeoutsConfig>,
73        _anthropic: Option<AnthropicConfig>,
74        model_behavior: Option<ModelConfig>,
75    ) -> Self {
76        let api_key_value = api_key
77            .filter(|k| !k.trim().is_empty())
78            .or_else(|| {
79                std::env::var("QWEN_API_KEY")
80                    .ok()
81                    .filter(|k| !k.trim().is_empty())
82            })
83            .or_else(|| {
84                std::env::var("DASHSCOPE_API_KEY")
85                    .ok()
86                    .filter(|k| !k.trim().is_empty())
87            })
88            .unwrap_or_default();
89
90        Self::with_model_internal(
91            api_key_value,
92            resolve_model(model, models::qwen::DEFAULT_MODEL),
93            prompt_cache,
94            base_url,
95            timeouts.unwrap_or_default(),
96            model_behavior,
97        )
98    }
99
100    fn with_model_internal(
101        api_key: String,
102        model: String,
103        prompt_cache: Option<PromptCachingConfig>,
104        base_url: Option<String>,
105        timeouts: TimeoutsConfig,
106        model_behavior: Option<ModelConfig>,
107    ) -> Self {
108        use crate::llm::http_client::HttpClientFactory;
109
110        let (prompt_cache_enabled, _) =
111            extract_prompt_cache_settings_default(prompt_cache, PROVIDER_KEY);
112
113        Self {
114            api_key,
115            http_client: HttpClientFactory::for_llm(&timeouts),
116            base_url: override_base_url(
117                urls::QWEN_API_BASE,
118                base_url,
119                Some(env_vars::QWEN_BASE_URL),
120            ),
121            model,
122            prompt_cache_enabled,
123            model_behavior,
124        }
125    }
126
127    #[must_use]
128    #[inline]
129    fn is_thinking_enabled(request: &LLMRequest) -> bool {
130        request
131            .reasoning_effort
132            .is_some_and(|e| e != crate::config::types::ReasoningEffortLevel::None)
133    }
134
135    fn float_to_json_number(value: f32) -> Result<serde_json::Number, LLMError> {
136        serde_json::Number::from_f64(value as f64).ok_or_else(|| LLMError::InvalidRequest {
137            message: "invalid numeric parameter value (NaN or infinity)".to_string(),
138            metadata: None,
139        })
140    }
141
142    fn convert_to_qwen_format(&self, request: &LLMRequest) -> Result<Value, LLMError> {
143        let mut payload = Map::with_capacity(12);
144
145        payload.insert("model".to_owned(), Value::String(request.model.clone()));
146
147        let mut messages = self.serialize_messages(request)?;
148
149        if let Some(system_prompt) = &request.system_prompt {
150            let trimmed = system_prompt.trim();
151            if !trimmed.is_empty() {
152                messages.insert(0, serde_json::json!({"role": "system", "content": trimmed}));
153            }
154        }
155
156        payload.insert("messages".to_owned(), Value::Array(messages));
157
158        if let Some(max_tokens) = request.max_tokens {
159            payload.insert(
160                "max_tokens".to_owned(),
161                Value::Number(serde_json::Number::from(max_tokens as u64)),
162            );
163        }
164
165        let thinking_enabled = Self::is_thinking_enabled(request);
166
167        if !thinking_enabled {
168            if let Some(temperature) = request.temperature {
169                payload.insert(
170                    "temperature".to_owned(),
171                    Value::Number(Self::float_to_json_number(temperature)?),
172                );
173            }
174
175            if let Some(top_p) = request.top_p {
176                payload.insert(
177                    "top_p".to_owned(),
178                    Value::Number(Self::float_to_json_number(top_p)?),
179                );
180            }
181        }
182
183        if request.stream {
184            payload.insert("stream".to_string(), Value::Bool(true));
185            payload.insert(
186                "stream_options".to_string(),
187                serde_json::json!({"include_usage": true}),
188            );
189        }
190
191        if let Some(tools) = &request.tools
192            && let Some(serialized_tools) = serialize_tools_openai_format(tools)
193        {
194            payload.insert("tools".to_string(), Value::Array(serialized_tools));
195        }
196
197        if let Some(choice) = &request.tool_choice {
198            payload.insert(
199                "tool_choice".to_string(),
200                choice.to_provider_format(PROVIDER_KEY),
201            );
202        }
203
204        if let Some(effort) = request.reasoning_effort {
205            let enable_thinking = effort != crate::config::types::ReasoningEffortLevel::None;
206            payload.insert("enable_thinking".to_owned(), Value::Bool(enable_thinking));
207        }
208
209        if let Some(meta) = &request.metadata
210            && let Some(user_id) = meta.get("user_id").and_then(|v| v.as_str())
211        {
212            payload.insert("user_id".to_owned(), Value::String(user_id.to_owned()));
213        }
214
215        Ok(Value::Object(payload))
216    }
217
218    async fn send_request(&self, payload: &Value) -> Result<reqwest::Response, LLMError> {
219        let url = format!("{}/chat/completions", self.base_url.trim_end_matches('/'));
220
221        self.http_client
222            .post(&url)
223            .header("Authorization", format!("Bearer {}", self.api_key))
224            .json(payload)
225            .send()
226            .await
227            .map_err(|e| LLMError::Network {
228                message: error_display::format_llm_error(
229                    PROVIDER_NAME,
230                    &format!("network error: {}", e),
231                ),
232                metadata: None,
233            })
234    }
235
236    fn serialize_messages(&self, request: &LLMRequest) -> Result<Vec<Value>, LLMError> {
237        serialize_messages_openai_format(request, PROVIDER_KEY)
238    }
239
240    fn parse_response(&self, response_json: Value, model: String) -> Result<LLMResponse, LLMError> {
241        let reasoning_extractor = |message: &Value, choice: &Value| {
242            message
243                .get("reasoning_content")
244                .and_then(extract_reasoning_trace)
245                .or_else(|| {
246                    choice
247                        .get("reasoning_content")
248                        .and_then(extract_reasoning_trace)
249                })
250        };
251
252        parse_response_openai_format(
253            response_json,
254            PROVIDER_NAME,
255            model,
256            self.prompt_cache_enabled,
257            Some(reasoning_extractor),
258        )
259    }
260}
261
262#[async_trait]
263impl LLMProvider for QwenProvider {
264    fn name(&self) -> &str {
265        PROVIDER_KEY
266    }
267
268    fn supports_streaming(&self) -> bool {
269        true
270    }
271
272    fn supports_tools(&self, _model: &str) -> bool {
273        true
274    }
275
276    fn supports_structured_output(&self, _model: &str) -> bool {
277        true
278    }
279
280    fn supports_vision(&self, _model: &str) -> bool {
281        false
282    }
283
284    fn supports_reasoning(&self, model: &str) -> bool {
285        let requested = if model.trim().is_empty() {
286            &self.model
287        } else {
288            model
289        };
290
291        self.model_behavior
292            .as_ref()
293            .and_then(|b| b.model_supports_reasoning)
294            .unwrap_or(false)
295            || requested == models::qwen::QWEN3_7_MAX
296            || requested == models::qwen::QWEN3_6_FLASH
297            || requested == models::qwen::QWEN3_6_PLUS
298            || requested == models::qwen::DEEPSEEK_V4_FLASH
299            || requested == models::qwen::DEEPSEEK_V4_PRO
300            || requested == models::qwen::GLM_5_1
301    }
302
303    fn supports_reasoning_effort(&self, _model: &str) -> bool {
304        self.model_behavior
305            .as_ref()
306            .and_then(|b| b.model_supports_reasoning_effort)
307            .unwrap_or(false)
308    }
309
310    fn effective_context_size(&self, model: &str) -> usize {
311        let requested = if model.trim().is_empty() {
312            &self.model
313        } else {
314            model
315        };
316        match requested {
317            models::qwen::QWEN3_6_FLASH
318            | models::qwen::DEEPSEEK_V4_FLASH
319            | models::qwen::DEEPSEEK_V4_PRO => 1_048_576,
320            models::qwen::QWEN3_7_MAX | models::qwen::QWEN3_6_PLUS | models::qwen::GLM_5_1 => {
321                131_072
322            }
323            _ => 131_072,
324        }
325    }
326
327    async fn generate(&self, mut request: LLMRequest) -> Result<LLMResponse, LLMError> {
328        let model = ensure_model(&mut request, &self.model);
329
330        let payload = self.convert_to_qwen_format(&request)?;
331        let response = self.send_request(&payload).await?;
332        let response = handle_openai_http_error(response, PROVIDER_NAME, "QWEN_API_KEY").await?;
333
334        let response_json = parse_json_response(response, PROVIDER_NAME).await?;
335        self.parse_response(response_json, model)
336    }
337
338    async fn stream(&self, mut request: LLMRequest) -> Result<LLMStream, LLMError> {
339        ensure_model(&mut request, &self.model);
340        self.validate_request(&request)?;
341        request.stream = true;
342        let model = request.model.clone();
343
344        let payload = self.convert_to_qwen_format(&request)?;
345        let response = self.send_request(&payload).await?;
346        let response = handle_openai_http_error(response, PROVIDER_NAME, "QWEN_API_KEY").await?;
347
348        Ok(spawn_openai_compatible_stream(
349            response,
350            PROVIDER_NAME,
351            model,
352            Some("reasoning_content"),
353            super::shared::OpenAiDeltaOrder::ReasoningFirst,
354        ))
355    }
356
357    fn supported_models(&self) -> Vec<String> {
358        models::qwen::SUPPORTED_MODELS
359            .iter()
360            .map(|model| model.to_string())
361            .collect()
362    }
363
364    fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError> {
365        validate_supported_models(
366            request,
367            PROVIDER_NAME,
368            PROVIDER_KEY,
369            models::qwen::SUPPORTED_MODELS,
370        )
371    }
372}
373
374impl_llm_client!(QwenProvider);