lellm_core/
request.rs

1//! 请求类型。
2
3use serde::{Deserialize, Serialize};
4
5/// 统一的聊天请求。
6#[derive(Debug, Clone, Default, Serialize, Deserialize)]
7pub struct ChatRequest {
8    pub model: String,
9    pub messages: Vec<crate::Message>,
10    pub tools: Option<Vec<ToolDefinition>>,
11    pub temperature: Option<f64>,
12    pub max_tokens: Option<u32>,
13    pub top_p: Option<f64>,
14    pub seed: Option<u64>,
15    pub tool_choice: Option<ToolChoice>,
16    pub stop_sequences: Option<Vec<String>>,
17    pub prefill: Option<String>,
18    /// 推理配置 — 控制模型是否进行深度推理。
19    ///
20    /// `None` = 不干预 Provider 默认行为
21    /// `Some(Disabled)` = 显式关闭推理
22    /// `Some(Low/Medium/High)` = 开启对应级别的推理
23    #[serde(skip_serializing_if = "Option::is_none")]
24    pub reasoning: Option<ReasoningConfig>,
25    /// 单次 LLM 调用的推理 Token 上限（可选，默认无限制）。
26    ///
27    /// 与 `max_tokens` 分离：reasoning 是模型内部推理，不计入输出预算。
28    /// 透传给 Provider Adapter，由 Adapter 映射为协议特定字段。
29    ///
30    /// **两种语义：**
31    /// - 流式: Hard limit — 达到限额当场切断 stream，省钱
32    /// - 非流式: Soft limit — response 已完整返回，事后检测并标记
33    ///
34    /// Adapter 映射示例：
35    /// - DeepSeek: `max_thinking_tokens`
36    /// - OpenAI: 无直接对应，由 `reasoning` 级别间接控制
37    /// - 其他: 放入 `extra` 或忽略
38    #[serde(skip_serializing_if = "Option::is_none")]
39    pub max_reasoning_tokens: Option<u32>,
40    /// Provider 特有参数（如 OpenAI 的 presence_penalty），由 Adapter 自行处理。
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub extra: Option<serde_json::Map<String, serde_json::Value>>,
43}
44
45// Default is derived - all fields have valid default values
46
47impl ChatRequest {
48    /// 便捷构造：单条用户消息
49    pub fn user_prompt(prompt: String) -> Self {
50        Self {
51            messages: vec![crate::Message::User {
52                content: crate::text_block(prompt),
53            }],
54            ..Default::default()
55        }
56    }
57
58    pub fn with_temperature(mut self, temp: f64) -> Self {
59        self.temperature = Some(temp);
60        self
61    }
62
63    pub fn with_max_tokens(mut self, max: u32) -> Self {
64        self.max_tokens = Some(max);
65        self
66    }
67
68    pub fn with_top_p(mut self, top_p: f64) -> Self {
69        self.top_p = Some(top_p);
70        self
71    }
72
73    pub fn with_seed(mut self, seed: u64) -> Self {
74        self.seed = Some(seed);
75        self
76    }
77
78    pub fn with_model(mut self, model: String) -> Self {
79        self.model = model;
80        self
81    }
82
83    pub fn with_tools(mut self, tools: Vec<ToolDefinition>) -> Self {
84        self.tools = Some(tools);
85        self
86    }
87
88    /// 便捷构造：单条系统消息
89    pub fn with_system_prompt(mut self, prompt: String) -> Self {
90        self.messages.insert(
91            0,
92            crate::Message::System {
93                content: crate::text_block(prompt),
94            },
95        );
96        self
97    }
98
99    /// 设置推理配置
100    pub fn with_reasoning(mut self, reasoning: ReasoningConfig) -> Self {
101        self.reasoning = Some(reasoning);
102        self
103    }
104
105    /// 设置单次调用的推理 Token 上限
106    pub fn with_max_reasoning_tokens(mut self, max: u32) -> Self {
107        self.max_reasoning_tokens = Some(max);
108        self
109    }
110
111    /// 设置 Provider 特有参数
112    pub fn with_extra(mut self, extra: serde_json::Map<String, serde_json::Value>) -> Self {
113        self.extra = Some(extra);
114        self
115    }
116}
117
118/// 工具选择策略
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub enum ToolChoice {
121    Tool { name: String },
122    Any,
123}
124
125/// 工具定义（输入侧）。
126///
127/// Schema 由 `schemars` 在编译期生成，经 `compute_and_clean_schema` 清洗后
128/// 存入 `parameters` 字段。Codec 层按 Provider 需求进行二次适配。
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct ToolDefinition {
131    pub name: String,
132    pub description: String,
133    pub parameters: serde_json::Value,
134
135    /// 缓存控制标记。Anthropic 支持 Tool Definition 级别的缓存。
136    #[serde(skip_serializing_if = "Option::is_none")]
137    pub cache_control: Option<crate::message::CacheControl>,
138}
139
140impl ToolDefinition {
141    /// 克隆并设置缓存标记。
142    pub fn with_cache(self, cache: crate::message::CacheControl) -> Self {
143        Self {
144            cache_control: Some(cache),
145            ..self
146        }
147    }
148
149    /// 从 `schemars::JsonSchema` 类型计算并清洗 JSON Schema。
150    ///
151    /// 供 `#[tool]` 宏生成的 `LazyLock` 调用，不在泛型函数中使用 `LazyLock`。
152    ///
153    /// **清洗规则：** 去除 `$schema`, `$id`, `title`, `description` 等根部元数据，
154    /// 保留 `type`, `properties`, `required`, `definitions` 等核心 JSON Schema 字段。
155    pub fn compute_and_clean_schema<S: schemars::JsonSchema>() -> serde_json::Value {
156        let root = schemars::schema_for!(S);
157        let val = serde_json::to_value(&root)
158            .expect("Failed to serialize JsonSchema; this is a bug in schemars");
159        Self::clean_schema(val)
160    }
161
162    /// 清洗 schemars 生成的 RootSchema，去除根部元数据噪音。
163    ///
164    /// 保留 `type`, `properties`, `required`, `definitions`, `additionalProperties`
165    /// 等核心 JSON Schema 字段。Codec 层在此基础上进行 Provider 特定的二次适配。
166    fn clean_schema(mut value: serde_json::Value) -> serde_json::Value {
167        if let Some(obj) = value.as_object_mut() {
168            // 去除标准 JSON Schema 根部的噪声元数据
169            obj.remove("$schema");
170            obj.remove("$id");
171            obj.remove("title");
172            obj.remove("description");
173        }
174        value
175    }
176}
177
178/// 推理配置 — 声明式控制模型的深度推理行为。
179///
180/// 四值语义（Option + Enum）：
181/// - `None`（未设置）= 不干预，Provider 自行决定默认行为
182/// - `Some(Disabled)` = 显式关闭推理（尽最大努力）
183/// - `Some(Low)` = 低推理预算（快速、轻量）
184/// - `Some(Medium)` = 中等推理预算
185/// - `Some(High)` = 高推理预算（深度思考）
186///
187/// Adapter 映射示例：
188/// - OpenAI / NVIDIA / vLLM: `Disabled` → 不插字段；`Low` → "low"；`Medium` → "medium"；`High` → "high"
189/// - DeepSeek: `Disabled` → `enable_thinking=false`；其余 → `reasoning_effort=<level>`
190/// - llama.cpp: `Disabled` → `thinking=false`；其余 → `reasoning_effort=<level>`
191/// - Anthropic: `Disabled` → 静默忽略（不支持推理配置）；其余 → `UnsupportedFeature`
192/// - 不支持推理的 Provider: `Disabled` → 静默忽略；其余 → `UnsupportedFeature`
193#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
194pub enum ReasoningConfig {
195    /// 显式关闭推理
196    Disabled,
197    /// 低推理预算
198    Low,
199    /// 中等推理预算
200    Medium,
201    /// 高推理预算
202    High,
203}
204
205impl ReasoningConfig {
206    /// 判断是否为 Disabled
207    pub fn is_disabled(self) -> bool {
208        matches!(self, Self::Disabled)
209    }
210}
lellm_core/request.rs

lellm_core/
request.rs