defect_core/llm/request.rs
1//! Provider request parameters.
2
3use std::sync::Arc;
4
5use serde::{Deserialize, Serialize};
6
7use crate::llm::capability::HostedCapabilities;
8use crate::tool::ToolSchema;
9
10/// Input for a single generation request.
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12pub struct CompletionRequest {
13 pub model: String,
14 /// System prompt. Uses `Arc<str>` instead of `String`: the request is `clone`d in the
15 /// turn main loop (sent to the provider, fanned out with the `LlmCallStarted` event),
16 /// and deep-copying a long system prompt repeatedly is expensive; `Arc` reduces clone
17 /// to a reference-count bump.
18 pub system: Option<Arc<str>>,
19 pub messages: Vec<Message>,
20 pub tools: Vec<ToolSchema>,
21 pub tool_choice: ToolChoice,
22 pub sampling: SamplingParams,
23 /// The set of hosted capabilities the provider may use in this turn.
24 ///
25 /// Determined once at session startup (see
26 ///
27 /// Reused from the session marker when assembling each turn's request.
28 /// The provider adapter uses this to decide whether to advertise a hosted tool on the
29 /// wire.
30 #[serde(default)]
31 pub hosted_capabilities: HostedCapabilities,
32}
33
34/// A single message in the conversation history.
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct Message {
37 pub role: Role,
38 /// Content fragments. Uses `Arc<[_]>` instead of `Vec`: cloning the entire messages
39 /// list (e.g. for history `snapshot()`, `complete()`, or fan-out of `LlmCallStarted`
40 /// events) is expensive with deep copies under long contexts; `Arc` reduces clone to
41 /// reference counting. Messages are read-only once in history, so this is
42 /// appropriate.
43 pub content: Arc<[MessageContent]>,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum Role {
49 User,
50 Assistant,
51}
52
53/// A piece of content inside a message body.
54///
55/// Both "the model requesting a tool call in the previous turn" and "the tool result
56/// reported back in the current turn" are placed in the `messages` array, matching the
57/// shape of the Anthropic Messages API. OpenAI uses separate `assistant message with
58/// tool_calls` + `tool message`; the codec translates between the two during encoding.
59#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
60#[serde(tag = "type", rename_all = "snake_case")]
61pub enum MessageContent {
62 Text {
63 text: String,
64 },
65 /// The thinking chain produced by the model in the previous turn. Only present in
66 /// [`Role::Assistant`] messages.
67 ///
68 /// `signature` is the anti-forgery signature for Anthropic extended thinking: it must
69 /// be kept together with the text. For providers that echo plain text (e.g.
70 /// DeepSeek-v4-pro), this is [`None`].
71 Thinking {
72 text: String,
73 signature: Option<String>,
74 },
75 /// Tool call from a previous turn: when sending a request, include both the prior
76 /// `tool_use` and `tool_result` in `messages` so the provider can reconstruct the
77 /// context.
78 ToolUse {
79 id: String,
80 name: String,
81 args: serde_json::Value,
82 },
83 ToolResult {
84 tool_use_id: String,
85 output: ToolResultBody,
86 is_error: bool,
87 },
88 /// Multimodal input. *(P2)*
89 Image {
90 mime: String,
91 data: ImageData,
92 },
93 /// Provider-hosted capability activity (e.g. hosted web_search, hosted code
94 /// execution).
95 /// The agent does not interpret `payload`; it passes it through when retrying the
96 /// same
97 /// provider, or the codec decides how to degrade when switching providers.
98 ///
99 /// `payload` uses `#[serde(skip)]`: it is dropped when persisting across processes;
100 /// on session resume, if the model re-triggers the same hosted call, a new hosted
101 /// call is made without relying on the old payload.
102 ProviderActivity {
103 provider_id: String,
104 kind: ProviderActivityKind,
105 #[serde(skip)]
106 payload: serde_json::Value,
107 },
108}
109
110/// The kind of hosted activity. Only appears inside [`MessageContent::ProviderActivity`].
111///
112/// Adding `CodeExecution` / `ImageGeneration` etc. later is a deliberate breaking change:
113/// downstream provider crates that depend on `defect-core` should re-compile and handle
114/// the new variant rather than silently fall through a wildcard arm.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
116#[serde(rename_all = "snake_case")]
117pub enum ProviderActivityKind {
118 /// Hosted web search.
119 Search,
120}
121
122/// Tool result payload. The codec converts it for the wire during serialization: some
123/// wires only support strings, so they stringify [`ToolResultBody::Json`].
124#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
125#[serde(tag = "kind", rename_all = "snake_case")]
126pub enum ToolResultBody {
127 Text {
128 text: String,
129 },
130 Json {
131 value: serde_json::Value,
132 },
133 /// Multimodal tool result: a mix of text and image blocks. Used by `read_file` for
134 /// images and future screenshot tools.
135 ///
136 /// Materialization per provider is handled by the codec, with different shapes:
137 /// - Anthropic's `tool_result` block natively supports images; just insert each block
138 /// as-is.
139 /// - OpenAI's tool message only accepts text — the codec strips image blocks and
140 /// attaches them to the following user message, leaving only text (including
141 /// placeholder hints) in the tool message.
142 Content {
143 blocks: Vec<ToolResultContent>,
144 },
145}
146
147/// A single block inside [`ToolResultBody::Content`]. Text follows the same semantics as
148/// [`ToolResultBody::Text`]; images reuse the `(mime, data)` shape from
149/// [`MessageContent::Image`].
150#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151#[serde(tag = "kind", rename_all = "snake_case")]
152pub enum ToolResultContent {
153 Text { text: String },
154 Image { mime: String, data: ImageData },
155}
156
157/// Placeholder shape for multimodal image payloads. The exact shape is not yet
158/// finalized.
159#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
160#[serde(tag = "kind", rename_all = "snake_case")]
161pub enum ImageData {
162 /// Base64-encoded image bytes.
163 Base64 { encoded: String },
164 /// A remote URL.
165 Url { url: String },
166}
167
168/// Tool selection strategy.
169#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
170#[serde(tag = "mode", rename_all = "snake_case")]
171pub enum ToolChoice {
172 /// The model decides on its own.
173 #[default]
174 Auto,
175 /// Forces at least one tool to be called.
176 Required,
177 /// Force the model to call the specified tool.
178 Named { name: String },
179 /// Disables tool calls; only text output is allowed.
180 None,
181}
182
183/// Sampling parameters.
184#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
185pub struct SamplingParams {
186 pub max_tokens: Option<u32>,
187 pub temperature: Option<f32>,
188 pub top_p: Option<f32>,
189 pub top_k: Option<u32>,
190 pub stop_sequences: Vec<String>,
191 pub thinking: ThinkingConfig,
192 /// The `reasoning_effort` level in the OpenAI-compatible protocol. When `Some(_)`,
193 /// the codec writes it directly to the wire; when `None`, the codec falls back to
194 /// deriving it from [`Self::thinking`].
195 ///
196 /// This is the **runtime authoritative representation** of the value — it can be
197 /// switched per-session (ACP `session/set_config_option`, category=ThoughtLevel). The
198 /// config layer has its own `defect_config::ReasoningEffort` for deserialization,
199 /// which is converted into this enum during assembly and placed into the initial
200 /// `SamplingParams`. Providers that do not support this concept should ignore this
201 /// field.
202 #[serde(default)]
203 pub reasoning_effort: Option<ReasoningEffort>,
204}
205
206/// Runtime-level enum for the OpenAI-compatible `reasoning_effort` protocol.
207///
208/// Maps 1:1 to the official OpenAI wire enum: `xhigh` is only supported after
209/// `gpt-5.1-codex-max`, and `none` only after `gpt-5.1`. This layer does not distinguish
210/// between models; the value is passed through as-is for upstream validation. The
211/// `defect-llm` wire codec imports this enum for materialization mapping.
212#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
213#[serde(rename_all = "snake_case")]
214pub enum ReasoningEffort {
215 None,
216 Minimal,
217 Low,
218 Medium,
219 High,
220 Xhigh,
221}
222
223/// Thinking chain configuration. Providers that do not support the concept of a thinking
224/// chain should ignore the budget field of `Enabled`, or report
225/// [`super::FeatureSupport::Unsupported`] in the capability matrix.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
227#[serde(tag = "mode", rename_all = "snake_case")]
228pub enum ThinkingConfig {
229 #[default]
230 Disabled,
231 /// Enable thinking chain; `budget_tokens` is only used by providers that support
232 /// budgets, such as Anthropic.
233 Enabled { budget_tokens: Option<u32> },
234}