Skip to main content

defect_agent/llm/
request.rs

1//! Provider request parameters.
2
3use std::sync::Arc;
4
5use serde::{Deserialize, Serialize};
6
7use crate::llm::capability::HostedCapabilities;
8use crate::tool::ToolSchema;
9
10/// Input for a single generation request.
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12pub struct CompletionRequest {
13    pub model: String,
14    /// System prompt. Uses `Arc<str>` instead of `String`: the request is `clone`d in the
15    /// turn main loop (sent to the provider, fanned out with the `LlmCallStarted` event),
16    /// and deep-copying a long system prompt repeatedly is expensive; `Arc` reduces clone
17    /// to a reference-count bump.
18    pub system: Option<Arc<str>>,
19    pub messages: Vec<Message>,
20    pub tools: Vec<ToolSchema>,
21    pub tool_choice: ToolChoice,
22    pub sampling: SamplingParams,
23    /// The set of hosted capabilities the provider may use in this turn.
24    ///
25    /// Determined once at session startup (see
26    ///
27    /// Reused from the session marker when assembling each turn's request.
28    /// The provider adapter uses this to decide whether to advertise a hosted tool on the
29    /// wire.
30    #[serde(default)]
31    pub hosted_capabilities: HostedCapabilities,
32}
33
34/// A single message in the conversation history.
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct Message {
37    pub role: Role,
38    /// Content fragments. Uses `Arc<[_]>` instead of `Vec`: cloning the entire messages
39    /// list (e.g. for history `snapshot()`, `complete()`, or fan-out of `LlmCallStarted`
40    /// events) is expensive with deep copies under long contexts; `Arc` reduces clone to
41    /// reference counting. Messages are read-only once in history, so this is
42    /// appropriate.
43    pub content: Arc<[MessageContent]>,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum Role {
49    User,
50    Assistant,
51}
52
53/// A piece of content inside a message body.
54///
55/// Both "the model requesting a tool call in the previous turn" and "the tool result
56/// reported back in the current turn" are placed in the `messages` array, matching the
57/// shape of the Anthropic Messages API. OpenAI uses separate `assistant message with
58/// tool_calls` + `tool message`; the codec translates between the two during encoding.
59#[non_exhaustive]
60#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
61#[serde(tag = "type", rename_all = "snake_case")]
62pub enum MessageContent {
63    Text {
64        text: String,
65    },
66    /// The thinking chain produced by the model in the previous turn. Only present in
67    /// [`Role::Assistant`] messages.
68    ///
69    /// `signature` is the anti-forgery signature for Anthropic extended thinking: it must
70    /// be kept together with the text. For providers that echo plain text (e.g.
71    /// DeepSeek-v4-pro), this is [`None`].
72    Thinking {
73        text: String,
74        signature: Option<String>,
75    },
76    /// Tool call from a previous turn: when sending a request, include both the prior
77    /// `tool_use` and `tool_result` in `messages` so the provider can reconstruct the
78    /// context.
79    ToolUse {
80        id: String,
81        name: String,
82        args: serde_json::Value,
83    },
84    ToolResult {
85        tool_use_id: String,
86        output: ToolResultBody,
87        is_error: bool,
88    },
89    /// Multimodal input. *(P2)*
90    Image {
91        mime: String,
92        data: ImageData,
93    },
94    /// Provider-hosted capability activity (e.g. hosted web_search, hosted code
95    /// execution).
96    /// The agent does not interpret `payload`; it passes it through when retrying the
97    /// same
98    /// provider, or the codec decides how to degrade when switching providers.
99    ///
100    /// `payload` uses `#[serde(skip)]`: it is dropped when persisting across processes;
101    /// on session resume, if the model re-triggers the same hosted call, a new hosted
102    /// call is made without relying on the old payload.
103    ProviderActivity {
104        provider_id: String,
105        kind: ProviderActivityKind,
106        #[serde(skip)]
107        payload: serde_json::Value,
108    },
109}
110
111/// The kind of hosted activity. Only appears inside [`MessageContent::ProviderActivity`].
112///
113/// `#[non_exhaustive]` is used so that adding `CodeExecution` / `ImageGeneration` etc.
114/// later is not a breaking change.
115#[non_exhaustive]
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
117#[serde(rename_all = "snake_case")]
118pub enum ProviderActivityKind {
119    /// Hosted web search.
120    Search,
121}
122
123/// Tool result payload. The codec converts it for the wire during serialization: some
124/// wires only support strings, so they stringify [`ToolResultBody::Json`].
125#[non_exhaustive]
126#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
127#[serde(tag = "kind", rename_all = "snake_case")]
128pub enum ToolResultBody {
129    Text {
130        text: String,
131    },
132    Json {
133        value: serde_json::Value,
134    },
135    /// Multimodal tool result: a mix of text and image blocks. Used by `read_file` for
136    /// images and future screenshot tools.
137    ///
138    /// Materialization per provider is handled by the codec, with different shapes:
139    /// - Anthropic's `tool_result` block natively supports images; just insert each block
140    ///   as-is.
141    /// - OpenAI's tool message only accepts text — the codec strips image blocks and
142    ///   attaches them to the following user message, leaving only text (including
143    ///   placeholder hints) in the tool message.
144    Content {
145        blocks: Vec<ToolResultContent>,
146    },
147}
148
149/// A single block inside [`ToolResultBody::Content`]. Text follows the same semantics as
150/// [`ToolResultBody::Text`]; images reuse the `(mime, data)` shape from
151/// [`MessageContent::Image`].
152#[non_exhaustive]
153#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
154#[serde(tag = "kind", rename_all = "snake_case")]
155pub enum ToolResultContent {
156    Text { text: String },
157    Image { mime: String, data: ImageData },
158}
159
160/// Placeholder shape for multimodal image payloads. The exact shape is not yet
161/// finalized.
162#[non_exhaustive]
163#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
164#[serde(tag = "kind", rename_all = "snake_case")]
165pub enum ImageData {
166    /// Base64-encoded image bytes.
167    Base64 { encoded: String },
168    /// A remote URL.
169    Url { url: String },
170}
171
172/// Tool selection strategy.
173#[non_exhaustive]
174#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
175#[serde(tag = "mode", rename_all = "snake_case")]
176pub enum ToolChoice {
177    /// The model decides on its own.
178    #[default]
179    Auto,
180    /// Forces at least one tool to be called.
181    Required,
182    /// Force the model to call the specified tool.
183    Named { name: String },
184    /// Disables tool calls; only text output is allowed.
185    None,
186}
187
188/// Sampling parameters.
189#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
190pub struct SamplingParams {
191    pub max_tokens: Option<u32>,
192    pub temperature: Option<f32>,
193    pub top_p: Option<f32>,
194    pub top_k: Option<u32>,
195    pub stop_sequences: Vec<String>,
196    pub thinking: ThinkingConfig,
197    /// The `reasoning_effort` level in the OpenAI-compatible protocol. When `Some(_)`,
198    /// the codec writes it directly to the wire; when `None`, the codec falls back to
199    /// deriving it from [`Self::thinking`].
200    ///
201    /// This is the **runtime authoritative representation** of the value — it can be
202    /// switched per-session (ACP `session/set_config_option`, category=ThoughtLevel). The
203    /// config layer has its own `defect_config::ReasoningEffort` for deserialization,
204    /// which is converted into this enum during assembly and placed into the initial
205    /// `SamplingParams`. Providers that do not support this concept should ignore this
206    /// field.
207    #[serde(default)]
208    pub reasoning_effort: Option<ReasoningEffort>,
209}
210
211/// Runtime-level enum for the OpenAI-compatible `reasoning_effort` protocol.
212///
213/// Maps 1:1 to the official OpenAI wire enum: `xhigh` is only supported after
214/// `gpt-5.1-codex-max`, and `none` only after `gpt-5.1`. This layer does not distinguish
215/// between models; the value is passed through as-is for upstream validation. The
216/// `defect-llm` wire codec imports this enum for materialization mapping.
217#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
218#[serde(rename_all = "snake_case")]
219pub enum ReasoningEffort {
220    None,
221    Minimal,
222    Low,
223    Medium,
224    High,
225    Xhigh,
226}
227
228/// Thinking chain configuration. Providers that do not support the concept of a thinking
229/// chain should ignore the budget field of `Enabled`, or report
230/// [`super::FeatureSupport::Unsupported`] in the capability matrix.
231#[non_exhaustive]
232#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
233#[serde(tag = "mode", rename_all = "snake_case")]
234pub enum ThinkingConfig {
235    #[default]
236    Disabled,
237    /// Enable thinking chain; `budget_tokens` is only used by providers that support
238    /// budgets, such as Anthropic.
239    Enabled { budget_tokens: Option<u32> },
240}