lellm_core/
request.rs

1//! 请求类型。
2
3use serde::{Deserialize, Serialize};
4
5use crate::tool::ToolDefinition;
6
7/// 统一的聊天请求。
8#[derive(Debug, Clone, Default, Serialize, Deserialize)]
9pub struct ChatRequest {
10    pub model: String,
11    pub messages: Vec<crate::Message>,
12    pub tools: Option<Vec<ToolDefinition>>,
13    pub temperature: Option<f64>,
14    pub max_tokens: Option<u32>,
15    pub top_p: Option<f64>,
16    pub seed: Option<u64>,
17    pub tool_choice: Option<ToolChoice>,
18    pub stop_sequences: Option<Vec<String>>,
19    pub prefill: Option<String>,
20    /// 推理配置 — 控制模型是否进行深度推理。
21    ///
22    /// `None` = 不干预 Provider 默认行为
23    /// `Some(Disabled)` = 显式关闭推理
24    /// `Some(Low/Medium/High)` = 开启对应级别的推理
25    #[serde(skip_serializing_if = "Option::is_none")]
26    pub reasoning: Option<ReasoningConfig>,
27    /// 单次 LLM 调用的推理 Token 上限（可选，默认无限制）。
28    ///
29    /// 与 `max_tokens` 分离：reasoning 是模型内部推理，不计入输出预算。
30    /// 透传给 Provider Adapter，由 Adapter 映射为协议特定字段。
31    ///
32    /// **两种语义：**
33    /// - 流式: Hard limit — 达到限额当场切断 stream，省钱
34    /// - 非流式: Soft limit — response 已完整返回，事后检测并标记
35    ///
36    /// Adapter 映射示例：
37    /// - DeepSeek: `max_thinking_tokens`
38    /// - OpenAI: 无直接对应，由 `reasoning` 级别间接控制
39    /// - 其他: 放入 `extra` 或忽略
40    #[serde(skip_serializing_if = "Option::is_none")]
41    pub max_reasoning_tokens: Option<u32>,
42    /// Provider 特有参数（如 OpenAI 的 presence_penalty），由 Adapter 自行处理。
43    #[serde(skip_serializing_if = "Option::is_none")]
44    pub extra: Option<serde_json::Map<String, serde_json::Value>>,
45}
46
47// Default is derived - all fields have valid default values
48
49impl ChatRequest {
50    /// 便捷构造：单条用户消息
51    pub fn user_prompt(prompt: impl Into<String>) -> Self {
52        Self {
53            messages: vec![crate::Message::User {
54                content: crate::text_block(prompt),
55            }],
56            ..Default::default()
57        }
58    }
59
60    pub fn with_temperature(mut self, temp: f64) -> Self {
61        self.temperature = Some(temp);
62        self
63    }
64
65    pub fn with_max_tokens(mut self, max: u32) -> Self {
66        self.max_tokens = Some(max);
67        self
68    }
69
70    pub fn with_top_p(mut self, top_p: f64) -> Self {
71        self.top_p = Some(top_p);
72        self
73    }
74
75    pub fn with_seed(mut self, seed: u64) -> Self {
76        self.seed = Some(seed);
77        self
78    }
79
80    pub fn with_model(mut self, model: String) -> Self {
81        self.model = model;
82        self
83    }
84
85    pub fn with_tools(mut self, tools: Vec<ToolDefinition>) -> Self {
86        self.tools = Some(tools);
87        self
88    }
89
90    /// 便捷构造：单条系统消息
91    pub fn with_system_prompt(mut self, prompt: String) -> Self {
92        self.messages.insert(
93            0,
94            crate::Message::System {
95                content: crate::text_block(prompt),
96            },
97        );
98        self
99    }
100
101    /// 设置推理配置
102    pub fn with_reasoning(mut self, reasoning: ReasoningConfig) -> Self {
103        self.reasoning = Some(reasoning);
104        self
105    }
106
107    /// 设置单次调用的推理 Token 上限
108    pub fn with_max_reasoning_tokens(mut self, max: u32) -> Self {
109        self.max_reasoning_tokens = Some(max);
110        self
111    }
112
113    /// 设置 Provider 特有参数
114    pub fn with_extra(mut self, extra: serde_json::Map<String, serde_json::Value>) -> Self {
115        self.extra = Some(extra);
116        self
117    }
118}
119
120/// 工具选择策略
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub enum ToolChoice {
123    Tool { name: String },
124    Any,
125}
126
127/// 推理配置 — 声明式控制模型的深度推理行为。
128///
129/// 四值语义（Option + Enum）：
130/// - `None`（未设置）= 不干预，Provider 自行决定默认行为
131/// - `Some(Disabled)` = 显式关闭推理（尽最大努力）
132/// - `Some(Low)` = 低推理预算（快速、轻量）
133/// - `Some(Medium)` = 中等推理预算
134/// - `Some(High)` = 高推理预算（深度思考）
135///
136/// Adapter 映射示例：
137/// - OpenAI / NVIDIA / vLLM: `Disabled` → 不插字段；`Low` → "low"；`Medium` → "medium"；`High` → "high"
138/// - DeepSeek: `Disabled` → `enable_thinking=false`；其余 → `reasoning_effort=<level>`
139/// - llama.cpp: `Disabled` → `thinking=false`；其余 → `reasoning_effort=<level>`
140/// - Anthropic: `Disabled` → 静默忽略（不支持推理配置）；其余 → `UnsupportedFeature`
141/// - 不支持推理的 Provider: `Disabled` → 静默忽略；其余 → `UnsupportedFeature`
142#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
143pub enum ReasoningConfig {
144    /// 显式关闭推理
145    Disabled,
146    /// 低推理预算
147    Low,
148    /// 中等推理预算
149    Medium,
150    /// 高推理预算
151    High,
152}
153
154impl ReasoningConfig {
155    /// 判断是否为 Disabled
156    pub fn is_disabled(self) -> bool {
157        matches!(self, Self::Disabled)
158    }
159}
lellm_core/request.rs

lellm_core/
request.rs