Skip to main content

mofa_foundation/llm/
client.rs

1//! LLM Client - 高级 LLM 交互封装
2//!
3//! 提供便捷的 LLM 交互 API,包括消息管理、工具调用循环等
4
5use super::provider::{LLMConfig, LLMProvider};
6use super::tool_executor::ToolExecutor;
7use super::types::*;
8use std::sync::Arc;
9
10/// LLM 客户端
11///
12/// 提供高级 LLM 交互功能
13///
14/// # 示例
15///
16/// ```rust,ignore
17/// use mofa_foundation::llm::{LLMClient, LLMConfig, ChatMessage};
18///
19/// // 创建客户端
20/// let client = LLMClient::new(provider);
21///
22/// // 简单对话
23/// let response = client
24///     .chat()
25///     .system("You are a helpful assistant.")
26///     .user("Hello!")
27///     .send()
28///     .await?;
29///
30/// info!("{}", response.content().unwrap_or_default());
31/// ```
32pub struct LLMClient {
33    provider: Arc<dyn LLMProvider>,
34    config: LLMConfig,
35}
36
37impl LLMClient {
38    /// 使用 Provider 创建客户端
39    pub fn new(provider: Arc<dyn LLMProvider>) -> Self {
40        Self {
41            provider,
42            config: LLMConfig::default(),
43        }
44    }
45
46    /// 使用配置创建客户端
47    pub fn with_config(provider: Arc<dyn LLMProvider>, config: LLMConfig) -> Self {
48        Self { provider, config }
49    }
50
51    /// 获取 Provider
52    pub fn provider(&self) -> &Arc<dyn LLMProvider> {
53        &self.provider
54    }
55
56    /// 获取配置
57    pub fn config(&self) -> &LLMConfig {
58        &self.config
59    }
60
61    /// 创建 Chat 请求构建器
62    pub fn chat(&self) -> ChatRequestBuilder {
63        let model = self
64            .config
65            .default_model
66            .clone()
67            .unwrap_or_else(|| self.provider.default_model().to_string());
68
69        let mut builder = ChatRequestBuilder::new(self.provider.clone(), model);
70
71        if let Some(temp) = self.config.default_temperature {
72            builder = builder.temperature(temp);
73        }
74        if let Some(tokens) = self.config.default_max_tokens {
75            builder = builder.max_tokens(tokens);
76        }
77
78        builder
79    }
80
81    /// 创建 Embedding 请求
82    pub async fn embed(&self, input: impl Into<String>) -> LLMResult<Vec<f32>> {
83        let model = self
84            .config
85            .default_model
86            .clone()
87            .unwrap_or_else(|| "text-embedding-ada-002".to_string());
88
89        let request = EmbeddingRequest {
90            model,
91            input: EmbeddingInput::Single(input.into()),
92            encoding_format: None,
93            dimensions: None,
94            user: None,
95        };
96
97        let response = self.provider.embedding(request).await?;
98        response
99            .data
100            .into_iter()
101            .next()
102            .map(|d| d.embedding)
103            .ok_or_else(|| LLMError::Other("No embedding data returned".to_string()))
104    }
105
106    /// 批量 Embedding
107    pub async fn embed_batch(&self, inputs: Vec<String>) -> LLMResult<Vec<Vec<f32>>> {
108        let model = self
109            .config
110            .default_model
111            .clone()
112            .unwrap_or_else(|| "text-embedding-ada-002".to_string());
113
114        let request = EmbeddingRequest {
115            model,
116            input: EmbeddingInput::Multiple(inputs),
117            encoding_format: None,
118            dimensions: None,
119            user: None,
120        };
121
122        let response = self.provider.embedding(request).await?;
123        Ok(response.data.into_iter().map(|d| d.embedding).collect())
124    }
125
126    /// 简单对话(单次问答)
127    pub async fn ask(&self, question: impl Into<String>) -> LLMResult<String> {
128        let response = self.chat().user(question).send().await?;
129
130        response
131            .content()
132            .map(|s| s.to_string())
133            .ok_or_else(|| LLMError::Other("No content in response".to_string()))
134    }
135
136    /// 带系统提示的简单对话
137    pub async fn ask_with_system(
138        &self,
139        system: impl Into<String>,
140        question: impl Into<String>,
141    ) -> LLMResult<String> {
142        let response = self.chat().system(system).user(question).send().await?;
143
144        response
145            .content()
146            .map(|s| s.to_string())
147            .ok_or_else(|| LLMError::Other("No content in response".to_string()))
148    }
149}
150
151/// Chat 请求构建器
152pub struct ChatRequestBuilder {
153    provider: Arc<dyn LLMProvider>,
154    request: ChatCompletionRequest,
155    tool_executor: Option<Arc<dyn ToolExecutor>>,
156    max_tool_rounds: u32,
157    // Retry configuration
158    retry_policy: Option<LLMRetryPolicy>,
159    retry_enabled: bool,
160}
161
162impl ChatRequestBuilder {
163    /// 创建新的构建器
164    pub fn new(provider: Arc<dyn LLMProvider>, model: impl Into<String>) -> Self {
165        Self {
166            provider,
167            request: ChatCompletionRequest::new(model),
168            tool_executor: None,
169            max_tool_rounds: 10,
170            retry_policy: None,
171            retry_enabled: false,
172        }
173    }
174
175    /// 添加系统消息
176    pub fn system(mut self, content: impl Into<String>) -> Self {
177        self.request.messages.push(ChatMessage::system(content));
178        self
179    }
180
181    /// 添加用户消息
182    pub fn user(mut self, content: impl Into<String>) -> Self {
183        self.request.messages.push(ChatMessage::user(content));
184        self
185    }
186
187    /// 添加用户消息(结构化内容)
188    pub fn user_with_content(mut self, content: MessageContent) -> Self {
189        self.request
190            .messages
191            .push(ChatMessage::user_with_content(content));
192        self
193    }
194
195    /// 添加用户消息(多部分内容)
196    pub fn user_with_parts(mut self, parts: Vec<ContentPart>) -> Self {
197        self.request
198            .messages
199            .push(ChatMessage::user_with_parts(parts));
200        self
201    }
202
203    /// 添加助手消息
204    pub fn assistant(mut self, content: impl Into<String>) -> Self {
205        self.request.messages.push(ChatMessage::assistant(content));
206        self
207    }
208
209    /// 添加消息
210    pub fn message(mut self, message: ChatMessage) -> Self {
211        self.request.messages.push(message);
212        self
213    }
214
215    /// 添加消息列表
216    pub fn messages(mut self, messages: Vec<ChatMessage>) -> Self {
217        self.request.messages.extend(messages);
218        self
219    }
220
221    /// 设置温度
222    pub fn temperature(mut self, temp: f32) -> Self {
223        self.request.temperature = Some(temp);
224        self
225    }
226
227    /// 设置最大 token 数
228    pub fn max_tokens(mut self, tokens: u32) -> Self {
229        self.request.max_tokens = Some(tokens);
230        self
231    }
232
233    /// 添加工具
234    pub fn tool(mut self, tool: Tool) -> Self {
235        self.request.tools.get_or_insert_with(Vec::new).push(tool);
236        self
237    }
238
239    /// 设置工具列表
240    pub fn tools(mut self, tools: Vec<Tool>) -> Self {
241        self.request.tools = Some(tools);
242        self
243    }
244
245    /// 设置工具执行器
246    pub fn with_tool_executor(mut self, executor: Arc<dyn ToolExecutor>) -> Self {
247        self.tool_executor = Some(executor);
248        self
249    }
250
251    /// 设置最大工具调用轮数
252    pub fn max_tool_rounds(mut self, rounds: u32) -> Self {
253        self.max_tool_rounds = rounds;
254        self
255    }
256
257    /// 设置响应格式为 JSON
258    pub fn json_mode(mut self) -> Self {
259        self.request.response_format = Some(ResponseFormat::json());
260        self
261    }
262
263    /// 设置停止序列
264    pub fn stop(mut self, sequences: Vec<String>) -> Self {
265        self.request.stop = Some(sequences);
266        self
267    }
268
269    // ========================================================================
270    // Retry Configuration
271    // ========================================================================
272
273    /// Enable retry with default policy
274    ///
275    /// Uses PromptRetry strategy for serialization errors (best for JSON mode)
276    /// and DirectRetry for network/transient errors.
277    ///
278    /// # Example
279    /// ```rust,ignore
280    /// let response = client.chat()
281    ///     .json_mode()
282    ///     .with_retry()
283    ///     .send()
284    ///     .await?;
285    /// ```
286    pub fn with_retry(mut self) -> Self {
287        self.retry_enabled = true;
288        self.retry_policy = Some(LLMRetryPolicy::default());
289        self
290    }
291
292    /// Enable retry with custom policy
293    ///
294    /// # Example
295    /// ```rust,ignore
296    /// use mofa_foundation::llm::{LLMRetryPolicy, BackoffStrategy};
297    ///
298    /// let custom_policy = LLMRetryPolicy {
299    ///     max_attempts: 5,
300    ///     backoff: BackoffStrategy::ExponentialWithJitter {
301    ///         initial_delay_ms: 500,
302    ///         max_delay_ms: 60000,
303    ///         jitter_ms: 250,
304    ///     },
305    ///     ..Default::default()
306    /// };
307    ///
308    /// let response = client.chat()
309    ///     .with_retry_policy(custom_policy)
310    ///     .send()
311    ///     .await?;
312    /// ```
313    pub fn with_retry_policy(mut self, policy: LLMRetryPolicy) -> Self {
314        self.retry_enabled = true;
315        self.retry_policy = Some(policy);
316        self
317    }
318
319    /// Disable retry (explicit)
320    ///
321    /// This is the default behavior, but can be used to override
322    /// any previously set retry configuration.
323    pub fn without_retry(mut self) -> Self {
324        self.retry_enabled = false;
325        self.retry_policy = None;
326        self
327    }
328
329    /// Set max retry attempts (convenience method)
330    ///
331    /// Shortcut for setting max_attempts in the default retry policy.
332    /// Equivalent to `.with_retry_policy(LLMRetryPolicy::with_max_attempts(n))`
333    ///
334    /// # Example
335    /// ```rust,ignore
336    /// let response = client.chat()
337    ///     .json_mode()
338    ///     .max_retries(3)
339    ///     .send()
340    ///     .await?;
341    /// ```
342    pub fn max_retries(mut self, max: u32) -> Self {
343        if self.retry_policy.is_none() {
344            self.retry_policy = Some(LLMRetryPolicy::default());
345        }
346        if let Some(ref mut policy) = self.retry_policy {
347            policy.max_attempts = max;
348        }
349        self.retry_enabled = true;
350        self
351    }
352
353    /// 发送请求
354    pub async fn send(self) -> LLMResult<ChatCompletionResponse> {
355        if self.retry_enabled {
356            let policy = self.retry_policy.unwrap_or_default();
357            let executor = crate::llm::retry::RetryExecutor::new(self.provider, policy);
358            executor.chat(self.request).await
359        } else {
360            self.provider.chat(self.request).await
361        }
362    }
363
364    /// 发送流式请求
365    pub async fn send_stream(mut self) -> LLMResult<super::provider::ChatStream> {
366        self.request.stream = Some(true);
367        self.provider.chat_stream(self.request).await
368    }
369
370    /// 发送请求并自动执行工具调用
371    ///
372    /// 当 LLM 返回工具调用时,自动执行工具并继续对话,
373    /// 直到 LLM 返回最终响应或达到最大轮数
374    pub async fn send_with_tools(mut self) -> LLMResult<ChatCompletionResponse> {
375        let executor = self
376            .tool_executor
377            .take()
378            .ok_or_else(|| LLMError::ConfigError("Tool executor not set".to_string()))?;
379
380        if self
381            .request
382            .tools
383            .as_ref()
384            .map(|tools| tools.is_empty())
385            .unwrap_or(true)
386        {
387            let tools = executor.available_tools().await?;
388            if !tools.is_empty() {
389                self.request.tools = Some(tools);
390            }
391        }
392
393        let max_rounds = self.max_tool_rounds;
394        let mut round = 0;
395
396        loop {
397            let response = self.provider.chat(self.request.clone()).await?;
398
399            // 检查是否有工具调用
400            if !response.has_tool_calls() {
401                return Ok(response);
402            }
403
404            round += 1;
405            if round >= max_rounds {
406                return Err(LLMError::Other(format!(
407                    "Max tool rounds ({}) exceeded",
408                    max_rounds
409                )));
410            }
411
412            // 添加助手消息(包含工具调用)
413            if let Some(choice) = response.choices.first() {
414                self.request.messages.push(choice.message.clone());
415            }
416
417            // 执行工具调用
418            if let Some(tool_calls) = response.tool_calls() {
419                for tool_call in tool_calls {
420                    let result = executor
421                        .execute(&tool_call.function.name, &tool_call.function.arguments)
422                        .await;
423
424                    let result_str = match result {
425                        Ok(r) => r,
426                        Err(e) => format!("Error: {}", e),
427                    };
428
429                    // 添加工具结果消息
430                    self.request
431                        .messages
432                        .push(ChatMessage::tool_result(&tool_call.id, result_str));
433                }
434            }
435        }
436    }
437}
438
439// ============================================================================
440// 会话管理
441// ============================================================================
442
443/// 对话会话
444///
445/// 管理多轮对话的消息历史
446pub struct ChatSession {
447    /// 会话唯一标识
448    session_id: uuid::Uuid,
449    /// 用户 ID
450    user_id: uuid::Uuid,
451    /// Agent ID
452    agent_id: uuid::Uuid,
453    /// 租户 ID
454    tenant_id: uuid::Uuid,
455    /// LLM 客户端
456    client: LLMClient,
457    /// 消息历史
458    messages: Vec<ChatMessage>,
459    /// 系统提示词
460    system_prompt: Option<String>,
461    /// 工具列表
462    tools: Vec<Tool>,
463    /// 工具执行器
464    tool_executor: Option<Arc<dyn ToolExecutor>>,
465    /// 会话创建时间
466    created_at: std::time::Instant,
467    /// 会话元数据
468    metadata: std::collections::HashMap<String, String>,
469    /// 消息存储
470    message_store: Arc<dyn crate::persistence::MessageStore>,
471    /// 会话存储
472    session_store: Arc<dyn crate::persistence::SessionStore>,
473    /// 上下文窗口大小(滑动窗口,限制对话轮数)
474    context_window_size: Option<usize>,
475    /// 最后一次 LLM 响应的元数据
476    last_response_metadata: Option<super::types::LLMResponseMetadata>,
477}
478
479impl ChatSession {
480    /// 创建新会话(自动生成 ID)
481    pub fn new(client: LLMClient) -> Self {
482        // 默认使用内存存储
483        let store = Arc::new(crate::persistence::InMemoryStore::new());
484        Self::with_id_and_stores(
485            Self::generate_session_id(),
486            client,
487            uuid::Uuid::now_v7(),
488            uuid::Uuid::now_v7(),
489            uuid::Uuid::now_v7(),
490            store.clone(),
491            store.clone(),
492            None,
493        )
494    }
495
496    /// 创建新会话并指定存储实现
497    pub fn new_with_stores(
498        client: LLMClient,
499        user_id: uuid::Uuid,
500        tenant_id: uuid::Uuid,
501        agent_id: uuid::Uuid,
502        message_store: Arc<dyn crate::persistence::MessageStore>,
503        session_store: Arc<dyn crate::persistence::SessionStore>,
504    ) -> Self {
505        Self::with_id_and_stores(
506            Self::generate_session_id(),
507            client,
508            user_id,
509            tenant_id,
510            agent_id,
511            message_store,
512            session_store,
513            None,
514        )
515    }
516
517    /// 使用指定 UUID 创建会话
518    pub fn with_id(session_id: uuid::Uuid, client: LLMClient) -> Self {
519        // 默认使用内存存储
520        let store = Arc::new(crate::persistence::InMemoryStore::new());
521        Self {
522            session_id,
523            user_id: uuid::Uuid::now_v7(),
524            agent_id: uuid::Uuid::now_v7(),
525            tenant_id: uuid::Uuid::now_v7(),
526            client,
527            messages: Vec::new(),
528            system_prompt: None,
529            tools: Vec::new(),
530            tool_executor: None,
531            created_at: std::time::Instant::now(),
532            metadata: std::collections::HashMap::new(),
533            message_store: store.clone(),
534            session_store: store.clone(),
535            context_window_size: None,
536            last_response_metadata: None,
537        }
538    }
539
540    /// 使用指定字符串 ID 创建会话
541    pub fn with_id_str(session_id: &str, client: LLMClient) -> Self {
542        // 尝试将字符串解析为 UUID,如果失败则生成新的 UUID
543        let session_id = uuid::Uuid::parse_str(session_id).unwrap_or_else(|_| uuid::Uuid::now_v7());
544        Self::with_id(session_id, client)
545    }
546
547    /// 使用指定 ID 和存储实现创建会话
548    pub fn with_id_and_stores(
549        session_id: uuid::Uuid,
550        client: LLMClient,
551        user_id: uuid::Uuid,
552        tenant_id: uuid::Uuid,
553        agent_id: uuid::Uuid,
554        message_store: Arc<dyn crate::persistence::MessageStore>,
555        session_store: Arc<dyn crate::persistence::SessionStore>,
556        context_window_size: Option<usize>,
557    ) -> Self {
558        Self {
559            session_id,
560            user_id,
561            tenant_id,
562            agent_id,
563            client,
564            messages: Vec::new(),
565            system_prompt: None,
566            tools: Vec::new(),
567            tool_executor: None,
568            created_at: std::time::Instant::now(),
569            metadata: std::collections::HashMap::new(),
570            message_store,
571            session_store,
572            context_window_size,
573            last_response_metadata: None,
574        }
575    }
576
577    /// 使用指定 ID 和存储实现创建会话,并立即持久化到数据库
578    ///
579    /// 这个方法会将会话记录保存到数据库,确保会话在创建时就被持久化。
580    /// 这对于需要将会话 ID 用作外键的场景很重要(例如保存消息时)。
581    ///
582    /// # 参数
583    /// - `session_id`: 会话 ID
584    /// - `client`: LLM 客户端
585    /// - `user_id`: 用户 ID
586    /// - `agent_id`: Agent ID
587    /// - `message_store`: 消息存储
588    /// - `session_store`: 会话存储
589    /// - `context_window_size`: 可选的上下文窗口大小
590    ///
591    /// # 返回
592    /// 返回创建并持久化后的会话
593    ///
594    /// # 错误
595    /// 如果数据库操作失败,返回错误
596    pub async fn with_id_and_stores_and_persist(
597        session_id: uuid::Uuid,
598        client: LLMClient,
599        user_id: uuid::Uuid,
600        tenant_id: uuid::Uuid,
601        agent_id: uuid::Uuid,
602        message_store: Arc<dyn crate::persistence::MessageStore>,
603        session_store: Arc<dyn crate::persistence::SessionStore>,
604        context_window_size: Option<usize>,
605    ) -> crate::persistence::PersistenceResult<Self> {
606        // 创建内存会话
607        let session = Self::with_id_and_stores(
608            session_id,
609            client,
610            user_id,
611            tenant_id,
612            agent_id,
613            message_store,
614            session_store.clone(),
615            context_window_size,
616        );
617
618        // 持久化会话记录到数据库
619        let db_session =
620            crate::persistence::ChatSession::new(user_id, agent_id).with_id(session_id);
621        session_store.create_session(&db_session).await?;
622
623        Ok(session)
624    }
625
626    /// 生成唯一会话 ID
627    fn generate_session_id() -> uuid::Uuid {
628        uuid::Uuid::now_v7()
629    }
630
631    /// 获取会话 ID
632    pub fn session_id(&self) -> uuid::Uuid {
633        self.session_id
634    }
635
636    /// 获取会话 ID 字符串
637    pub fn session_id_str(&self) -> String {
638        self.session_id.to_string()
639    }
640
641    /// 获取会话创建时间
642    pub fn created_at(&self) -> std::time::Instant {
643        self.created_at
644    }
645
646    /// 从数据库加载会话
647    ///
648    /// 创建一个新的 ChatSession 实例,加载指定 ID 的会话和消息。
649    ///
650    /// # 参数
651    /// - `session_id`: 会话 ID
652    /// - `client`: LLM 客户端
653    /// - `user_id`: 用户 ID
654    /// - `agent_id`: Agent ID
655    /// - `message_store`: 消息存储
656    /// - `session_store`: 会话存储
657    /// - `context_window_size`: 可选的上下文窗口大小(轮数),如果指定则只加载最近的 N 轮对话
658    ///
659    /// # 注意
660    /// 当指定 `context_window_size` 时,只会加载最近的 N 轮对话到内存中。
661    /// 这对于长期对话很有用,可以避免加载大量历史消息。
662    pub async fn load(
663        session_id: uuid::Uuid,
664        client: LLMClient,
665        user_id: uuid::Uuid,
666        tenant_id: uuid::Uuid,
667        agent_id: uuid::Uuid,
668        message_store: Arc<dyn crate::persistence::MessageStore>,
669        session_store: Arc<dyn crate::persistence::SessionStore>,
670        context_window_size: Option<usize>,
671    ) -> crate::persistence::PersistenceResult<Self> {
672        // Load session from database
673        let _db_session = session_store
674            .get_session(session_id)
675            .await?
676            .ok_or_else(|| {
677                crate::persistence::PersistenceError::NotFound("Session not found".to_string())
678            })?;
679
680        // Load messages from database
681        // Use pagination when context_window_size is set to avoid loading all messages
682        let db_messages = if context_window_size.is_some() {
683            // Use pagination to avoid loading all messages for long-running sessions
684            let total_count = message_store.count_session_messages(session_id).await?;
685
686            // Calculate fetch limit: rounds * 2 (user+assistant per round) + buffer for system messages
687            let rounds = context_window_size.unwrap_or(0);
688            let limit = (rounds * 2 + 20) as i64; // 20 message buffer for system messages at beginning
689
690            // Calculate offset to get the most recent messages
691            let offset = std::cmp::max(0, total_count - limit);
692
693            message_store
694                .get_session_messages_paginated(session_id, offset, limit)
695                .await?
696        } else {
697            // No window size specified, fetch all messages (current behavior)
698            message_store.get_session_messages(session_id).await?
699        };
700
701        // Convert messages to domain format
702        let mut messages = Vec::new();
703        for db_msg in db_messages {
704            // Convert MessageRole to Role
705            let domain_role = match db_msg.role {
706                crate::persistence::MessageRole::System => crate::llm::types::Role::System,
707                crate::persistence::MessageRole::User => crate::llm::types::Role::User,
708                crate::persistence::MessageRole::Assistant => crate::llm::types::Role::Assistant,
709                crate::persistence::MessageRole::Tool => crate::llm::types::Role::Tool,
710            };
711
712            // Convert MessageContent to domain format
713            let domain_content = db_msg
714                .content
715                .text
716                .map(crate::llm::types::MessageContent::Text);
717
718            // Create domain message
719            let domain_msg = ChatMessage {
720                role: domain_role,
721                content: domain_content,
722                name: None,
723                tool_calls: None,
724                tool_call_id: None,
725            };
726            messages.push(domain_msg);
727        }
728
729        // 应用滑动窗口逻辑(如果指定了 context_window_size)
730        let messages = Self::apply_sliding_window_static(&messages, context_window_size);
731
732        // Create and return ChatSession
733        Ok(Self {
734            session_id,
735            user_id,
736            tenant_id,
737            agent_id,
738            client,
739            messages,
740            system_prompt: None, // System prompt is not stored in messages
741            tools: Vec::new(),   // Tools are not persisted yet
742            tool_executor: None, // Tool executor is not persisted
743            created_at: std::time::Instant::now(), // TODO: Convert from db_session.create_time
744            metadata: std::collections::HashMap::new(), // TODO: Convert from db_session.metadata
745            message_store,
746            session_store,
747            context_window_size,
748            last_response_metadata: None,
749        })
750    }
751
752    /// 获取会话存活时长
753    pub fn elapsed(&self) -> std::time::Duration {
754        self.created_at.elapsed()
755    }
756
757    /// 设置元数据
758    pub fn set_metadata(&mut self, key: impl Into<String>, value: impl Into<String>) {
759        self.metadata.insert(key.into(), value.into());
760    }
761
762    /// 获取元数据
763    pub fn get_metadata(&self, key: &str) -> Option<&String> {
764        self.metadata.get(key)
765    }
766
767    /// 获取所有元数据
768    pub fn metadata(&self) -> &std::collections::HashMap<String, String> {
769        &self.metadata
770    }
771
772    /// 设置系统提示
773    pub fn with_system(mut self, prompt: impl Into<String>) -> Self {
774        self.system_prompt = Some(prompt.into());
775        self
776    }
777
778    /// 设置上下文窗口大小(滑动窗口)
779    ///
780    /// # 参数
781    /// - `size`: 保留的最大对话轮数(None 表示不限制)
782    ///
783    /// # 注意
784    /// - 单位是**轮数**(rounds),不是 token 数量
785    /// - 每轮对话 ≈ 1 个用户消息 + 1 个助手响应
786    /// - 系统消息始终保留,不计入轮数限制
787    ///
788    /// # 示例
789    ///
790    /// ```rust,ignore
791    /// let session = ChatSession::new(client)
792    ///     .with_context_window_size(Some(10)); // 只保留最近 10 轮对话
793    /// ```
794    pub fn with_context_window_size(mut self, size: Option<usize>) -> Self {
795        self.context_window_size = size;
796        self
797    }
798
799    /// 设置工具
800    pub fn with_tools(mut self, tools: Vec<Tool>, executor: Arc<dyn ToolExecutor>) -> Self {
801        self.tools = tools;
802        self.tool_executor = Some(executor);
803        self
804    }
805
806    /// 仅设置工具执行器(工具列表自动发现)
807    pub fn with_tool_executor(mut self, executor: Arc<dyn ToolExecutor>) -> Self {
808        self.tool_executor = Some(executor);
809        self
810    }
811
812    /// 发送消息
813    pub async fn send(&mut self, content: impl Into<String>) -> LLMResult<String> {
814        // 添加用户消息
815        self.messages.push(ChatMessage::user(content));
816
817        // 构建请求
818        let mut builder = self.client.chat();
819
820        // 添加系统提示
821        if let Some(ref system) = self.system_prompt {
822            builder = builder.system(system.clone());
823        }
824
825        // 添加历史消息(应用滑动窗口)
826        let messages_for_context = self.apply_sliding_window();
827        builder = builder.messages(messages_for_context);
828
829        // 添加工具
830        if let Some(ref executor) = self.tool_executor {
831            let tools = if self.tools.is_empty() {
832                executor.available_tools().await?
833            } else {
834                self.tools.clone()
835            };
836
837            if !tools.is_empty() {
838                builder = builder.tools(tools);
839            }
840
841            builder = builder.with_tool_executor(executor.clone());
842        }
843
844        // 发送请求
845        let response = if self.tool_executor.is_some() {
846            builder.send_with_tools().await?
847        } else {
848            builder.send().await?
849        };
850
851        // 存储响应元数据
852        self.last_response_metadata = Some(super::types::LLMResponseMetadata::from(&response));
853
854        // 提取响应内容
855        let content = response
856            .content()
857            .ok_or_else(|| LLMError::Other("No content in response".to_string()))?
858            .to_string();
859
860        // 添加助手消息到历史
861        self.messages.push(ChatMessage::assistant(&content));
862
863        // 滑动窗口:裁剪历史消息以保持固定大小
864        if self.context_window_size.is_some() {
865            self.messages =
866                Self::apply_sliding_window_static(&self.messages, self.context_window_size);
867        }
868
869        Ok(content)
870    }
871
872    /// 发送结构化消息(支持多模态)
873    pub async fn send_with_content(&mut self, content: MessageContent) -> LLMResult<String> {
874        self.messages.push(ChatMessage::user_with_content(content));
875
876        // 构建请求
877        let mut builder = self.client.chat();
878
879        // 添加系统提示
880        if let Some(ref system) = self.system_prompt {
881            builder = builder.system(system.clone());
882        }
883
884        // 添加历史消息(应用滑动窗口)
885        let messages_for_context = self.apply_sliding_window();
886        builder = builder.messages(messages_for_context);
887
888        // 添加工具
889        if let Some(ref executor) = self.tool_executor {
890            let tools = if self.tools.is_empty() {
891                executor.available_tools().await?
892            } else {
893                self.tools.clone()
894            };
895
896            if !tools.is_empty() {
897                builder = builder.tools(tools);
898            }
899
900            builder = builder.with_tool_executor(executor.clone());
901        }
902
903        // 发送请求
904        let response = if self.tool_executor.is_some() {
905            builder.send_with_tools().await?
906        } else {
907            builder.send().await?
908        };
909
910        // 存储响应元数据
911        self.last_response_metadata = Some(super::types::LLMResponseMetadata::from(&response));
912
913        // 提取响应内容
914        let content = response
915            .content()
916            .ok_or_else(|| LLMError::Other("No content in response".to_string()))?
917            .to_string();
918
919        // 添加助手消息到历史
920        self.messages.push(ChatMessage::assistant(&content));
921
922        // 滑动窗口:裁剪历史消息以保持固定大小
923        if self.context_window_size.is_some() {
924            self.messages =
925                Self::apply_sliding_window_static(&self.messages, self.context_window_size);
926        }
927
928        Ok(content)
929    }
930
931    /// 获取消息历史
932    pub fn messages(&self) -> &[ChatMessage] {
933        &self.messages
934    }
935
936    /// 获取消息历史(可变引用)
937    pub fn messages_mut(&mut self) -> &mut Vec<ChatMessage> {
938        &mut self.messages
939    }
940
941    /// 清空消息历史
942    pub fn clear(&mut self) {
943        self.messages.clear();
944    }
945
946    /// 获取消息数量
947    pub fn len(&self) -> usize {
948        self.messages.len()
949    }
950
951    /// 是否为空
952    pub fn is_empty(&self) -> bool {
953        self.messages.is_empty()
954    }
955
956    /// 设置上下文窗口大小(滑动窗口)
957    ///
958    /// # 参数
959    /// - `size`: 保留的最大对话轮数(None 表示不限制)
960    ///
961    /// # 注意
962    /// - 单位是**轮数**(rounds),不是 token 数量
963    /// - 每轮对话 ≈ 1 个用户消息 + 1 个助手响应(可能包含工具调用)
964    /// - 系统消息始终保留,不计入轮数限制
965    ///
966    /// # 示例
967    ///
968    /// ```rust,ignore
969    /// session.set_context_window_size(Some(10)); // 只保留最近 10 轮对话
970    /// session.set_context_window_size(None);     // 不限制对话轮数
971    /// ```
972    pub fn set_context_window_size(&mut self, size: Option<usize>) {
973        self.context_window_size = size;
974    }
975
976    /// 获取上下文窗口大小(轮数)
977    pub fn context_window_size(&self) -> Option<usize> {
978        self.context_window_size
979    }
980
981    /// 获取最后一次 LLM 响应的元数据
982    pub fn last_response_metadata(&self) -> Option<&super::types::LLMResponseMetadata> {
983        self.last_response_metadata.as_ref()
984    }
985
986    /// 应用滑动窗口,返回限定后的消息列表
987    ///
988    /// 保留最近的 N 轮对话(每轮包括用户消息和助手响应)。
989    /// 系统消息始终保留。
990    ///
991    /// # 参数
992    /// - `messages`: 要过滤的消息列表
993    /// - `window_size`: 保留的最大对话轮数(None 表示不限制)
994    ///
995    /// # 算法说明
996    /// - 1. 分离系统消息和对话消息
997    /// - 2. 从对话消息中保留最近的 N 轮
998    /// - 3. 合并系统消息和裁剪后的对话消息
999    ///
1000    /// # 示例
1001    ///
1002    /// ```rust,ignore
1003    /// // 假设有 20 条消息(约 10 轮对话)
1004    /// // 设置 window_size = Some(3)
1005    /// // 结果:保留最近 3 轮对话(约 6 条消息)+ 所有系统消息
1006    /// let limited = ChatSession::apply_sliding_window_static(messages, Some(3));
1007    /// ```
1008    fn apply_sliding_window(&self) -> Vec<ChatMessage> {
1009        Self::apply_sliding_window_static(&self.messages, self.context_window_size)
1010    }
1011
1012    /// 静态方法:应用滑动窗口到消息列表
1013    ///
1014    /// 这个方法可以在不拥有 ChatSession 实例的情况下使用,
1015    /// 例如在从数据库加载消息时。
1016    ///
1017    /// # 参数
1018    /// - `messages`: 要过滤的消息列表
1019    /// - `window_size`: 保留的最大对话轮数(None 表示不限制)
1020    pub fn apply_sliding_window_static(
1021        messages: &[ChatMessage],
1022        window_size: Option<usize>,
1023    ) -> Vec<ChatMessage> {
1024        let max_rounds = match window_size {
1025            Some(size) if size > 0 => size,
1026            _ => return messages.to_vec(), // 无限制,返回所有消息
1027        };
1028
1029        // 分离系统消息和对话消息
1030        let mut system_messages = Vec::new();
1031        let mut conversation_messages = Vec::new();
1032
1033        for msg in messages {
1034            if msg.role == Role::System {
1035                system_messages.push(msg.clone());
1036            } else {
1037                conversation_messages.push(msg.clone());
1038            }
1039        }
1040
1041        // 计算需要保留的最大消息数(每轮大约2条:用户+助手)
1042        let max_messages = max_rounds * 2;
1043
1044        if conversation_messages.len() <= max_messages {
1045            // 对话消息数量在限制内,返回所有消息
1046            return messages.to_vec();
1047        }
1048
1049        // 保留最后的 N 条对话消息
1050        let start_index = conversation_messages.len() - max_messages;
1051        let limited_conversation: Vec<ChatMessage> = conversation_messages
1052            .into_iter()
1053            .skip(start_index)
1054            .collect();
1055
1056        // 合并系统消息和裁剪后的对话消息
1057        let mut result = system_messages;
1058        result.extend(limited_conversation);
1059
1060        result
1061    }
1062
1063    /// 保存会话和消息到数据库
1064    pub async fn save(&self) -> crate::persistence::PersistenceResult<()> {
1065        // Convert ChatSession to persistence entity
1066        let db_session = crate::persistence::ChatSession::new(self.user_id, self.agent_id)
1067            .with_id(self.session_id)
1068            .with_metadata("client_version", serde_json::json!("0.1.0"));
1069
1070        // Save session
1071        self.session_store.create_session(&db_session).await?;
1072
1073        // Convert and save messages
1074        for msg in self.messages.iter() {
1075            // Convert Role to MessageRole
1076            let persistence_role = match msg.role {
1077                crate::llm::types::Role::System => crate::persistence::MessageRole::System,
1078                crate::llm::types::Role::User => crate::persistence::MessageRole::User,
1079                crate::llm::types::Role::Assistant => crate::persistence::MessageRole::Assistant,
1080                crate::llm::types::Role::Tool => crate::persistence::MessageRole::Tool,
1081            };
1082
1083            // Convert MessageContent to persistence format
1084            let persistence_content = match &msg.content {
1085                Some(crate::llm::types::MessageContent::Text(text)) => {
1086                    crate::persistence::MessageContent::text(text)
1087                }
1088                Some(crate::llm::types::MessageContent::Parts(parts)) => {
1089                    // For now, only handle text parts
1090                    let text = parts
1091                        .iter()
1092                        .filter_map(|part| {
1093                            if let crate::llm::types::ContentPart::Text { text } = part {
1094                                Some(text.clone())
1095                            } else {
1096                                None
1097                            }
1098                        })
1099                        .collect::<Vec<_>>()
1100                        .join("\n");
1101                    crate::persistence::MessageContent::text(text)
1102                }
1103                None => crate::persistence::MessageContent::text(""),
1104            };
1105
1106            let llm_message = crate::persistence::LLMMessage::new(
1107                self.session_id,
1108                self.agent_id,
1109                self.user_id,
1110                self.tenant_id,
1111                persistence_role,
1112                persistence_content,
1113            );
1114
1115            // Save message
1116            self.message_store.save_message(&llm_message).await?;
1117        }
1118
1119        Ok(())
1120    }
1121
1122    /// 从数据库删除会话和消息
1123    pub async fn delete(&self) -> crate::persistence::PersistenceResult<()> {
1124        // Delete all messages for the session
1125        self.message_store
1126            .delete_session_messages(self.session_id)
1127            .await?;
1128
1129        // Delete the session itself
1130        self.session_store.delete_session(self.session_id).await?;
1131
1132        Ok(())
1133    }
1134}
1135
1136// ============================================================================
1137// 便捷函数
1138// ============================================================================
1139
1140/// 快速创建函数工具定义
1141///
1142/// # 示例
1143///
1144/// ```rust,ignore
1145/// use mofa_foundation::llm::function_tool;
1146/// use serde_json::json;
1147///
1148/// let tool = function_tool(
1149///     "get_weather",
1150///     "Get the current weather for a location",
1151///     json!({
1152///         "type": "object",
1153///         "properties": {
1154///             "location": {
1155///                 "type": "string",
1156///                 "description": "City name"
1157///             }
1158///         },
1159///         "required": ["location"]
1160///     })
1161/// );
1162/// ```
1163pub fn function_tool(
1164    name: impl Into<String>,
1165    description: impl Into<String>,
1166    parameters: serde_json::Value,
1167) -> Tool {
1168    Tool::function(name, description, parameters)
1169}