defect_agent/llm/request.rs
1//! Provider request parameters.
2
3use std::sync::Arc;
4
5use serde::{Deserialize, Serialize};
6
7use crate::llm::capability::HostedCapabilities;
8use crate::tool::ToolSchema;
9
10/// Input for a single generation request.
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12pub struct CompletionRequest {
13 pub model: String,
14 /// System prompt. Uses `Arc<str>` instead of `String`: the request is `clone`d in the
15 /// turn main loop (sent to the provider, fanned out with the `LlmCallStarted` event),
16 /// and deep-copying a long system prompt repeatedly is expensive; `Arc` reduces clone
17 /// to a reference-count bump.
18 pub system: Option<Arc<str>>,
19 pub messages: Vec<Message>,
20 pub tools: Vec<ToolSchema>,
21 pub tool_choice: ToolChoice,
22 pub sampling: SamplingParams,
23 /// The set of hosted capabilities the provider may use in this turn.
24 ///
25 /// Determined once at session startup (see
26 ///
27 /// Reused from the session marker when assembling each turn's request.
28 /// The provider adapter uses this to decide whether to advertise a hosted tool on the
29 /// wire.
30 #[serde(default)]
31 pub hosted_capabilities: HostedCapabilities,
32}
33
34/// A single message in the conversation history.
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct Message {
37 pub role: Role,
38 /// Content fragments. Uses `Arc<[_]>` instead of `Vec`: cloning the entire messages
39 /// list (e.g. for history `snapshot()`, `complete()`, or fan-out of `LlmCallStarted`
40 /// events) is expensive with deep copies under long contexts; `Arc` reduces clone to
41 /// reference counting. Messages are read-only once in history, so this is
42 /// appropriate.
43 pub content: Arc<[MessageContent]>,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum Role {
49 User,
50 Assistant,
51}
52
53/// A piece of content inside a message body.
54///
55/// Both "the model requesting a tool call in the previous turn" and "the tool result
56/// reported back in the current turn" are placed in the `messages` array, matching the
57/// shape of the Anthropic Messages API. OpenAI uses separate `assistant message with
58/// tool_calls` + `tool message`; the codec translates between the two during encoding.
59#[non_exhaustive]
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61#[serde(tag = "type", rename_all = "snake_case")]
62pub enum MessageContent {
63 Text {
64 text: String,
65 },
66 /// The thinking chain produced by the model in the previous turn. Only present in
67 /// [`Role::Assistant`] messages.
68 ///
69 /// `signature` is the anti-forgery signature for Anthropic extended thinking: it must
70 /// be kept together with the text. For providers that echo plain text (e.g.
71 /// DeepSeek-v4-pro), this is [`None`].
72 Thinking {
73 text: String,
74 signature: Option<String>,
75 },
76 /// Tool call from a previous turn: when sending a request, include both the prior
77 /// `tool_use` and `tool_result` in `messages` so the provider can reconstruct the
78 /// context.
79 ToolUse {
80 id: String,
81 name: String,
82 args: serde_json::Value,
83 },
84 ToolResult {
85 tool_use_id: String,
86 output: ToolResultBody,
87 is_error: bool,
88 },
89 /// Multimodal input. *(P2)*
90 Image {
91 mime: String,
92 data: ImageData,
93 },
94 /// Provider-hosted capability activity (e.g. hosted web_search, hosted code
95 /// execution).
96 /// The agent does not interpret `payload`; it passes it through when retrying the
97 /// same
98 /// provider, or the codec decides how to degrade when switching providers.
99 ///
100 /// `payload` uses `#[serde(skip)]`: it is dropped when persisting across processes;
101 /// on session resume, if the model re-triggers the same hosted call, a new hosted
102 /// call is made without relying on the old payload.
103 ProviderActivity {
104 provider_id: String,
105 kind: ProviderActivityKind,
106 #[serde(skip)]
107 payload: serde_json::Value,
108 },
109}
110
111/// The kind of hosted activity. Only appears inside [`MessageContent::ProviderActivity`].
112///
113/// `#[non_exhaustive]` is used so that adding `CodeExecution` / `ImageGeneration` etc.
114/// later is not a breaking change.
115#[non_exhaustive]
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
117#[serde(rename_all = "snake_case")]
118pub enum ProviderActivityKind {
119 /// Hosted web search.
120 Search,
121}
122
123/// Tool result payload. The codec converts it for the wire during serialization: some
124/// wires only support strings, so they stringify [`ToolResultBody::Json`].
125#[non_exhaustive]
126#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
127#[serde(tag = "kind", rename_all = "snake_case")]
128pub enum ToolResultBody {
129 Text {
130 text: String,
131 },
132 Json {
133 value: serde_json::Value,
134 },
135 /// Multimodal tool result: a mix of text and image blocks. Used by `read_file` for
136 /// images and future screenshot tools.
137 ///
138 /// Materialization per provider is handled by the codec, with different shapes:
139 /// - Anthropic's `tool_result` block natively supports images; just insert each block
140 /// as-is.
141 /// - OpenAI's tool message only accepts text — the codec strips image blocks and
142 /// attaches them to the following user message, leaving only text (including
143 /// placeholder hints) in the tool message.
144 Content {
145 blocks: Vec<ToolResultContent>,
146 },
147}
148
149/// A single block inside [`ToolResultBody::Content`]. Text follows the same semantics as
150/// [`ToolResultBody::Text`]; images reuse the `(mime, data)` shape from
151/// [`MessageContent::Image`].
152#[non_exhaustive]
153#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
154#[serde(tag = "kind", rename_all = "snake_case")]
155pub enum ToolResultContent {
156 Text { text: String },
157 Image { mime: String, data: ImageData },
158}
159
160/// Placeholder shape for multimodal image payloads. The exact shape is not yet
161/// finalized.
162#[non_exhaustive]
163#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
164#[serde(tag = "kind", rename_all = "snake_case")]
165pub enum ImageData {
166 /// Base64-encoded image bytes.
167 Base64 { encoded: String },
168 /// A remote URL.
169 Url { url: String },
170}
171
172/// Tool selection strategy.
173#[non_exhaustive]
174#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
175#[serde(tag = "mode", rename_all = "snake_case")]
176pub enum ToolChoice {
177 /// The model decides on its own.
178 #[default]
179 Auto,
180 /// Forces at least one tool to be called.
181 Required,
182 /// Force the model to call the specified tool.
183 Named { name: String },
184 /// Disables tool calls; only text output is allowed.
185 None,
186}
187
188/// Sampling parameters.
189#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
190pub struct SamplingParams {
191 pub max_tokens: Option<u32>,
192 pub temperature: Option<f32>,
193 pub top_p: Option<f32>,
194 pub top_k: Option<u32>,
195 pub stop_sequences: Vec<String>,
196 pub thinking: ThinkingConfig,
197 /// The `reasoning_effort` level in the OpenAI-compatible protocol. When `Some(_)`,
198 /// the codec writes it directly to the wire; when `None`, the codec falls back to
199 /// deriving it from [`Self::thinking`].
200 ///
201 /// This is the **runtime authoritative representation** of the value — it can be
202 /// switched per-session (ACP `session/set_config_option`, category=ThoughtLevel). The
203 /// config layer has its own `defect_config::ReasoningEffort` for deserialization,
204 /// which is converted into this enum during assembly and placed into the initial
205 /// `SamplingParams`. Providers that do not support this concept should ignore this
206 /// field.
207 #[serde(default)]
208 pub reasoning_effort: Option<ReasoningEffort>,
209}
210
211/// Runtime-level enum for the OpenAI-compatible `reasoning_effort` protocol.
212///
213/// Maps 1:1 to the official OpenAI wire enum: `xhigh` is only supported after
214/// `gpt-5.1-codex-max`, and `none` only after `gpt-5.1`. This layer does not distinguish
215/// between models; the value is passed through as-is for upstream validation. The
216/// `defect-llm` wire codec imports this enum for materialization mapping.
217#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
218#[serde(rename_all = "snake_case")]
219pub enum ReasoningEffort {
220 None,
221 Minimal,
222 Low,
223 Medium,
224 High,
225 Xhigh,
226}
227
228/// Thinking chain configuration. Providers that do not support the concept of a thinking
229/// chain should ignore the budget field of `Enabled`, or report
230/// [`super::FeatureSupport::Unsupported`] in the capability matrix.
231#[non_exhaustive]
232#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
233#[serde(tag = "mode", rename_all = "snake_case")]
234pub enum ThinkingConfig {
235 #[default]
236 Disabled,
237 /// Enable thinking chain; `budget_tokens` is only used by providers that support
238 /// budgets, such as Anthropic.
239 Enabled { budget_tokens: Option<u32> },
240}