agent-context 0.1.3

//! 后端 trait 定义。
//!
//! [`ContextBackend`] 封装 LLM 后端的消息工厂、格式转换、模型对话和配置信息。
//! [`ContextBackendResponse`] 约束后端 Response 类型，提供统一的响应访问接口。

use crate::error::AgentError;
use crate::message::ContextMessage;

// ---------------------------------------------------------------------------
// ContextBackendResponse — Response 类型约束
// ---------------------------------------------------------------------------

/// 工具调用信息。从后端 Response 中提取，供 consumer 执行工具并构造 Tool 角色消息。
#[derive(Debug, Clone)]
pub struct ToolCallInfo {
    /// 工具调用唯一标识，对应 [`ContextBackend::tool_message`] 的 `tool_call_id`。
    pub id: String,
    /// 函数名。
    pub name: String,
    /// 函数参数（JSON 字符串）。
    pub arguments: String,
}

/// 后端 Response 类型约束，流式/非流式 Response 均需实现。
///
/// 提供：
/// - [`response_type`](Self::response_type)：内容分类，供 [`ContextBackend::classify_chunk`] 默认实现
/// - [`reasoning_content`](Self::reasoning_content)：思维链文本
/// - [`content`](Self::content)：正文文本
/// - [`tool_calls`](Self::tool_calls)：工具调用信息
pub trait ContextBackendResponse {
    /// 返回响应包含的内容类型。
    fn response_type(&self) -> ResponseType;

    /// 提取思维链文本（流式为 delta，非流式为完整内容）。
    ///
    /// `None` 表示字段不存在/null，`Some("")` 表示空字符串。
    fn reasoning_content(&self) -> Option<String>;

    /// 提取正文文本（流式为 delta，非流式为完整内容）。
    ///
    /// `None` 表示字段不存在/null，`Some("")` 表示空字符串。
    fn content(&self) -> Option<String>;

    /// 提取工具调用信息（流式为 delta，非流式为完整列表）。
    fn tool_calls(&self) -> Vec<ToolCallInfo>;
}

/// 响应内容类型枚举。
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ResponseType {
    /// 无增量内容
    Empty,
    /// 仅思维链
    Reasoning,
    /// 仅正文
    Content,
    /// 同时包含思维链和正文
    ReasoningAndContent,
}

// ---------------------------------------------------------------------------
// 流式输出事件
// ---------------------------------------------------------------------------

/// 流式输出事件，由 [`ContextBackend::classify_chunk`] 产出。
///
/// 每个事件持有原始后端响应引用，用户可从中提取任意数据（content、reasoning、usage 等）。
/// 事件类型仅做阶段标记，不做数据裁剪。
#[derive(Debug, Clone)]
pub enum StreamEvent<R> {
    /// 思维链增量响应
    Thinking(R),
    /// 第一个正文增量响应（思维链→正文的过渡点）
    ContentFirst(R),
    /// 后续正文增量响应
    Content(R),
    /// 工具调用增量响应
    ToolCalls(R),
}

// ---------------------------------------------------------------------------
// CommonOpts — 请求级公共配置
// ---------------------------------------------------------------------------

/// 请求级公共配置，内嵌于各后端的 Opts 类型。
///
/// [`ContextBackend::Opts`] 通过 `AsRef<CommonOpts>` 约束，确保后端 Opts 提供这些字段。
/// 每次请求由调用方显式构造，不提供 `Default`。
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CommonOpts {
    /// 模型标识（如 `"deepseek-v4-pro"`、`"glm-4-flash"`）。
    pub model: String,
    /// 模型上下文窗口大小（token 数），用于自动压缩检测。
    pub context_window: usize,
    /// 最大输出 token 数。
    pub max_tokens: usize,
    /// 上下文溢出时是否自动压缩。`false` 时溢出返回错误。
    pub auto_compress: bool,
    /// 每轮刷新的临时元数据，发送时作为 system 消息拼接到对话末尾，不存储。
    pub scratch: Option<String>,
}

// ---------------------------------------------------------------------------
// ContextBackend trait
// ---------------------------------------------------------------------------

/// 后端 trait：抽象 LLM 后端的完整接口。
///
/// 实现此 trait 即可让 [`AgentContext`](crate::AgentContext) 对接任意 LLM 后端（DeepSeek、智谱、OpenAI 等）。
///
/// ## 方法分类
///
/// | 类别 | 方法 | 类型 |
/// |------|------|------|
/// | 消息工厂 | [`user_message`](Self::user_message)、[`system_message`](Self::system_message)、[`tool_message`](Self::tool_message) | 实例方法 |
/// | 格式转换 | [`to_system_message`](Self::to_system_message)、[`to_request_messages`](Self::to_request_messages) | 实例方法（默认实现） |
/// | 响应解析 | [`extract_messages`](Self::extract_messages) | 实例方法 |
/// | 模型对话 | [`estimate_tokens`](Self::estimate_tokens)、[`send`](Self::send)、[`send_stream`](Self::send_stream) | 实例方法 |
pub trait ContextBackend: Send + Sync + Clone + 'static {
    /// 后端消息类型，必须实现 [`ContextMessage`]。
    type Message: ContextMessage;
    /// 后端自定义的请求选项类型，须内嵌 [`CommonOpts`] 并实现 `AsRef<CommonOpts>`。
    ///
    /// 典型用途：传递 `model`、`temperature`、`thinking` 等模型参数。
    type Opts: AsRef<CommonOpts> + Clone + Send + Sync;
    /// 后端完整的 API 响应类型。
    ///
    /// - 非流式：ChatCompletion（含 choices + usage）
    /// - 流式：ChatCompletionChunk（含 delta content / reasoning_content）
    type Response: Clone + Send + Sync + ContextBackendResponse;

    // 消息工厂（实例方法）
    /// 构造一条 User 角色消息。
    fn user_message(&self, content: impl Into<String> + Send) -> Self::Message;
    /// 构造一条 System 角色消息。
    fn system_message(&self, content: impl Into<String> + Send) -> Self::Message;
    /// 构造一条 Tool 角色消息（工具调用结果）。
    fn tool_message(
        &self,
        tool_call_id: impl Into<String> + Send,
        content: impl Into<String> + Send,
    ) -> Self::Message;

    // 格式转换（实例方法，含默认实现）
    /// 将消息转换为 System 角色（用于压缩摘要等场景）。
    ///
    /// 默认实现调用 [`ContextMessage::with_role`]。
    fn to_system_message(&self, msg: Self::Message) -> Self::Message {
        msg.with_role(crate::Role::System)
    }

    /// 将后端响应消息转换为请求格式。
    ///
    /// 对 `!preserve_reasoning()` 的消息剥离 `reasoning_content`，减少网络传输和 token 消耗。
    ///
    /// 默认实现调用 [`ContextMessage::without_reasoning`]。
    fn to_request_messages(
        &self,
        messages: Vec<Self::Message>,
    ) -> Result<Vec<Self::Message>, AgentError> {
        Ok(messages
            .into_iter()
            .map(|m| {
                if m.preserve_reasoning() {
                    m
                } else {
                    m.without_reasoning()
                }
            })
            .collect())
    }

    // 响应解析（实例方法）
    /// 将流式分块合并为单条消息。
    ///
    /// 累加 `content`、`reasoning_content`、`tool_calls`，构造完整的 assistant 消息。
    /// 返回 `None` 表示分块中没有有效数据。
    fn merge_chunks(&self, responses: &[Self::Response]) -> Option<Self::Message>;

    /// 从后端非流式响应中提取消息列表。流式场景请用 [`merge_chunks`](Self::merge_chunks)。
    fn extract_messages(
        &self,
        responses: &[Self::Response],
    ) -> Result<Vec<Self::Message>, AgentError>;

    // 模型对话（实例方法）
    /// 估算消息列表的 token 数量。I/O 操作（可能需要调用远程 tokenizer API）。
    fn estimate_tokens(
        &self,
        messages: &[Self::Message],
    ) -> impl std::future::Future<Output = Result<usize, AgentError>> + Send;

    /// 非流式对话。发送全部消息，返回完整 Response（含 usage 等元数据）。
    fn send(
        &self,
        messages: &[Self::Message],
        opts: &Self::Opts,
    ) -> impl std::future::Future<Output = Result<Self::Response, AgentError>> + Send;

    /// 流式对话。参数为 owned（数据已移动），返回 `'static` 流。
    fn send_stream(
        &self,
        messages: Vec<Self::Message>,
        opts: Self::Opts,
    ) -> impl futures_core::Stream<Item = Result<Self::Response, AgentError>> + Send + 'static;

    /// 将流式分块分类为结构化事件，同时更新阶段状态。
    ///
    /// 默认实现基于 [`ContextBackendResponse::response_type`] 判断阶段：
    /// - 含 `reasoning_content` → [`StreamEvent::Thinking`]
    /// - 第一个 `content`（且之前有思维链）→ [`StreamEvent::ContentFirst`]
    /// - 后续 `content` → [`StreamEvent::Content`]
    fn classify_chunk(
        &self,
        response: &Self::Response,
        saw_thinking: &mut bool,
    ) -> Vec<StreamEvent<Self::Response>> {
        let mut events = Vec::new();
        if !response.tool_calls().is_empty() {
            events.push(StreamEvent::ToolCalls(response.clone()));
        }
        match response.response_type() {
            ResponseType::Empty => return events,
            ResponseType::Reasoning => {
                events.push(StreamEvent::Thinking(response.clone()));
                *saw_thinking = true;
            }
            ResponseType::Content => {
                if *saw_thinking {
                    events.push(StreamEvent::ContentFirst(response.clone()));
                    *saw_thinking = false;
                } else {
                    events.push(StreamEvent::Content(response.clone()));
                }
            }
            ResponseType::ReasoningAndContent => {
                events.push(StreamEvent::Thinking(response.clone()));
                *saw_thinking = true;
                events.push(StreamEvent::ContentFirst(response.clone()));
                *saw_thinking = false;
            }
        }
        events
    }

    /// 将消息序列化为 JSONL 行（供应商原生格式）。
    fn message_to_jsonl(&self, msg: &Self::Message) -> Result<String, AgentError> {
        serde_json::to_string(msg).map_err(|e| AgentError::Context(e.to_string()))
    }

    /// 从 JSONL 行反序列化为消息。
    fn message_from_jsonl(&self, line: &str) -> Result<Self::Message, AgentError> {
        serde_json::from_str(line).map_err(|e| AgentError::Context(format!("JSONL 解析失败: {e}")))
    }
}