Skip to main content

codetether_agent/provider/
moonshot.rs

1//! Moonshot AI provider implementation (direct API)
2//!
3//! For Kimi K2.5 and other Moonshot models via api.moonshot.ai
4
5use super::util;
6use super::{
7    CompletionRequest, CompletionResponse, ContentPart, FinishReason, Message, ModelInfo, Provider,
8    Role, StreamChunk, ToolDefinition, Usage,
9};
10use anyhow::{Context, Result};
11use async_trait::async_trait;
12use reqwest::Client;
13use serde::Deserialize;
14use serde_json::{Value, json};
15
16pub struct MoonshotProvider {
17    client: Client,
18    api_key: String,
19    base_url: String,
20}
21
22impl MoonshotProvider {
23    pub fn new(api_key: String) -> Result<Self> {
24        Ok(Self {
25            client: Client::new(),
26            api_key,
27            base_url: "https://api.moonshot.ai/v1".to_string(),
28        })
29    }
30
31    fn convert_messages(messages: &[Message]) -> Vec<Value> {
32        messages
33            .iter()
34            .map(|msg| {
35                let role = match msg.role {
36                    Role::System => "system",
37                    Role::User => "user",
38                    Role::Assistant => "assistant",
39                    Role::Tool => "tool",
40                };
41
42                match msg.role {
43                    Role::Tool => {
44                        if let Some(ContentPart::ToolResult {
45                            tool_call_id,
46                            content,
47                        }) = msg.content.first()
48                        {
49                            json!({
50                                "role": "tool",
51                                "tool_call_id": tool_call_id,
52                                "content": content
53                            })
54                        } else {
55                            json!({"role": role, "content": ""})
56                        }
57                    }
58                    Role::Assistant => {
59                        let text: String = msg
60                            .content
61                            .iter()
62                            .filter_map(|p| match p {
63                                ContentPart::Text { text } => Some(text.clone()),
64                                _ => None,
65                            })
66                            .collect::<Vec<_>>()
67                            .join("");
68
69                        let tool_calls: Vec<Value> = msg
70                            .content
71                            .iter()
72                            .filter_map(|p| match p {
73                                ContentPart::ToolCall {
74                                    id,
75                                    name,
76                                    arguments,
77                                    ..
78                                } => Some(json!({
79                                    "id": id,
80                                    "type": "function",
81                                    "function": {
82                                        "name": name,
83                                        "arguments": arguments
84                                    }
85                                })),
86                                _ => None,
87                            })
88                            .collect();
89
90                        if tool_calls.is_empty() {
91                            json!({"role": "assistant", "content": text})
92                        } else {
93                            // Moonshot requires reasoning_content for K2.5 thinking models
94                            // Include empty string when we don't have the original
95                            json!({
96                                "role": "assistant",
97                                "content": if text.is_empty() { "".to_string() } else { text },
98                                "reasoning_content": "",
99                                "tool_calls": tool_calls
100                            })
101                        }
102                    }
103                    _ => {
104                        let text: String = msg
105                            .content
106                            .iter()
107                            .filter_map(|p| match p {
108                                ContentPart::Text { text } => Some(text.clone()),
109                                _ => None,
110                            })
111                            .collect::<Vec<_>>()
112                            .join("\n");
113
114                        json!({"role": role, "content": text})
115                    }
116                }
117            })
118            .collect()
119    }
120
121    fn convert_tools(tools: &[ToolDefinition]) -> Vec<Value> {
122        tools
123            .iter()
124            .map(|t| {
125                json!({
126                    "type": "function",
127                    "function": {
128                        "name": t.name,
129                        "description": t.description,
130                        "parameters": t.parameters
131                    }
132                })
133            })
134            .collect()
135    }
136}
137
138#[derive(Debug, Deserialize)]
139struct MoonshotResponse {
140    id: String,
141    model: String,
142    choices: Vec<MoonshotChoice>,
143    #[serde(default)]
144    usage: Option<MoonshotUsage>,
145}
146
147#[derive(Debug, Deserialize)]
148struct MoonshotChoice {
149    message: MoonshotMessage,
150    #[serde(default)]
151    finish_reason: Option<String>,
152}
153
154#[derive(Debug, Deserialize)]
155struct MoonshotMessage {
156    #[allow(dead_code)]
157    role: String,
158    #[serde(default)]
159    content: Option<String>,
160    #[serde(default)]
161    tool_calls: Option<Vec<MoonshotToolCall>>,
162    // Kimi K2.5 reasoning
163    #[serde(default)]
164    reasoning_content: Option<String>,
165}
166
167#[derive(Debug, Deserialize)]
168struct MoonshotToolCall {
169    id: String,
170    #[serde(rename = "type")]
171    call_type: String,
172    function: MoonshotFunction,
173}
174
175#[derive(Debug, Deserialize)]
176struct MoonshotFunction {
177    name: String,
178    arguments: String,
179}
180
181#[derive(Debug, Deserialize)]
182struct MoonshotUsage {
183    #[serde(default)]
184    prompt_tokens: usize,
185    #[serde(default)]
186    completion_tokens: usize,
187    #[serde(default)]
188    total_tokens: usize,
189}
190
191#[derive(Debug, Deserialize)]
192struct MoonshotError {
193    #[allow(dead_code)]
194    error: MoonshotErrorDetail,
195}
196
197#[derive(Debug, Deserialize)]
198struct MoonshotErrorDetail {
199    message: String,
200    #[serde(default, rename = "type")]
201    error_type: Option<String>,
202}
203
204#[async_trait]
205impl Provider for MoonshotProvider {
206    fn name(&self) -> &str {
207        "moonshotai"
208    }
209
210    async fn list_models(&self) -> Result<Vec<ModelInfo>> {
211        Ok(vec![
212            ModelInfo {
213                id: "kimi-k2.5".to_string(),
214                name: "Kimi K2.5".to_string(),
215                provider: "moonshotai".to_string(),
216                context_window: 256_000,
217                max_output_tokens: Some(64_000),
218                supports_vision: true,
219                supports_tools: true,
220                supports_streaming: true,
221                input_cost_per_million: Some(0.56), // ¥4/M tokens
222                output_cost_per_million: Some(2.8), // ¥20/M tokens
223            },
224            ModelInfo {
225                id: "kimi-k2-thinking".to_string(),
226                name: "Kimi K2 Thinking".to_string(),
227                provider: "moonshotai".to_string(),
228                context_window: 128_000,
229                max_output_tokens: Some(64_000),
230                supports_vision: false,
231                supports_tools: true,
232                supports_streaming: true,
233                input_cost_per_million: Some(0.56),
234                output_cost_per_million: Some(2.8),
235            },
236            ModelInfo {
237                id: "kimi-latest".to_string(),
238                name: "Kimi Latest".to_string(),
239                provider: "moonshotai".to_string(),
240                context_window: 128_000,
241                max_output_tokens: Some(64_000),
242                supports_vision: false,
243                supports_tools: true,
244                supports_streaming: true,
245                input_cost_per_million: Some(0.42), // Cheaper
246                output_cost_per_million: Some(1.68),
247            },
248        ])
249    }
250
251    async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
252        let messages = Self::convert_messages(&request.messages);
253        let tools = Self::convert_tools(&request.tools);
254
255        // Kimi K2.5 requires specific temperatures:
256        // - temperature = 1.0 when thinking is enabled
257        // - temperature = 0.6 when thinking is disabled
258        let temperature = if request.model.contains("k2") {
259            0.6 // We disable thinking for tool calling workflows
260        } else {
261            request.temperature.unwrap_or(0.7)
262        };
263
264        let mut body = json!({
265            "model": request.model,
266            "messages": messages,
267            "temperature": temperature,
268        });
269
270        // Disable thinking mode to avoid needing to track reasoning_content
271        // across message roundtrips (required for K2.5)
272        if request.model.contains("k2") {
273            body["thinking"] = json!({"type": "disabled"});
274        }
275
276        if !tools.is_empty() {
277            body["tools"] = json!(tools);
278        }
279        if let Some(max) = request.max_tokens {
280            body["max_tokens"] = json!(max);
281        }
282
283        tracing::debug!("Moonshot request to model {}", request.model);
284
285        let response = self
286            .client
287            .post(format!("{}/chat/completions", self.base_url))
288            .header("Authorization", format!("Bearer {}", self.api_key))
289            .header("Content-Type", "application/json")
290            .json(&body)
291            .send()
292            .await
293            .context("Failed to send request to Moonshot")?;
294
295        let status = response.status();
296        let text = response.text().await.context("Failed to read response")?;
297
298        if !status.is_success() {
299            if let Ok(err) = serde_json::from_str::<MoonshotError>(&text) {
300                anyhow::bail!(
301                    "Moonshot API error: {} ({:?})",
302                    err.error.message,
303                    err.error.error_type
304                );
305            }
306            anyhow::bail!("Moonshot API error: {} {}", status, text);
307        }
308
309        let response: MoonshotResponse = serde_json::from_str(&text).context(format!(
310            "Failed to parse Moonshot response: {}",
311            util::truncate_bytes_safe(&text, 200)
312        ))?;
313
314        // Log response metadata for debugging
315        tracing::debug!(
316            response_id = %response.id,
317            model = %response.model,
318            "Received Moonshot response"
319        );
320
321        let choice = response
322            .choices
323            .first()
324            .ok_or_else(|| anyhow::anyhow!("No choices"))?;
325
326        // Log reasoning/thinking content if present (Kimi K2 models)
327        if let Some(ref reasoning) = choice.message.reasoning_content
328            && !reasoning.is_empty()
329        {
330            tracing::info!(
331                reasoning_len = reasoning.len(),
332                "Model reasoning/thinking content received"
333            );
334            tracing::debug!(
335                reasoning = %reasoning,
336                "Full model reasoning"
337            );
338        }
339
340        let mut content = Vec::new();
341        let mut has_tool_calls = false;
342
343        if let Some(text) = &choice.message.content
344            && !text.is_empty()
345        {
346            content.push(ContentPart::Text { text: text.clone() });
347        }
348
349        if let Some(tool_calls) = &choice.message.tool_calls {
350            has_tool_calls = !tool_calls.is_empty();
351            for tc in tool_calls {
352                // Log tool call details for debugging (uses role and call_type fields)
353                tracing::debug!(
354                    tool_call_id = %tc.id,
355                    call_type = %tc.call_type,
356                    function_name = %tc.function.name,
357                    "Processing tool call"
358                );
359                content.push(ContentPart::ToolCall {
360                    id: tc.id.clone(),
361                    name: tc.function.name.clone(),
362                    arguments: tc.function.arguments.clone(),
363                    thought_signature: None,
364                });
365            }
366        }
367
368        let finish_reason = if has_tool_calls {
369            FinishReason::ToolCalls
370        } else {
371            match choice.finish_reason.as_deref() {
372                Some("stop") => FinishReason::Stop,
373                Some("length") => FinishReason::Length,
374                Some("tool_calls") => FinishReason::ToolCalls,
375                _ => FinishReason::Stop,
376            }
377        };
378
379        Ok(CompletionResponse {
380            message: Message {
381                role: Role::Assistant,
382                content,
383            },
384            usage: Usage {
385                prompt_tokens: response
386                    .usage
387                    .as_ref()
388                    .map(|u| u.prompt_tokens)
389                    .unwrap_or(0),
390                completion_tokens: response
391                    .usage
392                    .as_ref()
393                    .map(|u| u.completion_tokens)
394                    .unwrap_or(0),
395                total_tokens: response.usage.as_ref().map(|u| u.total_tokens).unwrap_or(0),
396                ..Default::default()
397            },
398            finish_reason,
399        })
400    }
401
402    async fn complete_stream(
403        &self,
404        request: CompletionRequest,
405    ) -> Result<futures::stream::BoxStream<'static, StreamChunk>> {
406        tracing::debug!(
407            provider = "moonshotai",
408            model = %request.model,
409            message_count = request.messages.len(),
410            "Starting streaming completion request (falling back to non-streaming)"
411        );
412
413        // Fall back to non-streaming for now
414        let response = self.complete(request).await?;
415        let text = response
416            .message
417            .content
418            .iter()
419            .filter_map(|p| match p {
420                ContentPart::Text { text } => Some(text.clone()),
421                _ => None,
422            })
423            .collect::<Vec<_>>()
424            .join("");
425
426        Ok(Box::pin(futures::stream::once(async move {
427            StreamChunk::Text(text)
428        })))
429    }
430}