Skip to main content

defect_core/llm/
request.rs

1//! Provider request parameters.
2
3use std::sync::Arc;
4
5use serde::{Deserialize, Serialize};
6
7use crate::llm::capability::HostedCapabilities;
8use crate::tool::ToolSchema;
9
10/// Input for a single generation request.
11#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
12pub struct CompletionRequest {
13    pub model: String,
14    /// System prompt. Uses `Arc<str>` instead of `String`: the request is `clone`d in the
15    /// turn main loop (sent to the provider, fanned out with the `LlmCallStarted` event),
16    /// and deep-copying a long system prompt repeatedly is expensive; `Arc` reduces clone
17    /// to a reference-count bump.
18    pub system: Option<Arc<str>>,
19    pub messages: Vec<Message>,
20    pub tools: Vec<ToolSchema>,
21    pub tool_choice: ToolChoice,
22    pub sampling: SamplingParams,
23    /// The set of hosted capabilities the provider may use in this turn.
24    ///
25    /// Determined once at session startup (see
26    ///
27    /// Reused from the session marker when assembling each turn's request.
28    /// The provider adapter uses this to decide whether to advertise a hosted tool on the
29    /// wire.
30    #[serde(default)]
31    pub hosted_capabilities: HostedCapabilities,
32}
33
34/// A single message in the conversation history.
35#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
36pub struct Message {
37    pub role: Role,
38    /// Content fragments. Uses `Arc<[_]>` instead of `Vec`: cloning the entire messages
39    /// list (e.g. for history `snapshot()`, `complete()`, or fan-out of `LlmCallStarted`
40    /// events) is expensive with deep copies under long contexts; `Arc` reduces clone to
41    /// reference counting. Messages are read-only once in history, so this is
42    /// appropriate.
43    pub content: Arc<[MessageContent]>,
44}
45
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
47#[serde(rename_all = "snake_case")]
48pub enum Role {
49    User,
50    Assistant,
51}
52
53/// A piece of content inside a message body.
54///
55/// Both "the model requesting a tool call in the previous turn" and "the tool result
56/// reported back in the current turn" are placed in the `messages` array, matching the
57/// shape of the Anthropic Messages API. OpenAI uses separate `assistant message with
58/// tool_calls` + `tool message`; the codec translates between the two during encoding.
59#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
60#[serde(tag = "type", rename_all = "snake_case")]
61pub enum MessageContent {
62    Text {
63        text: String,
64    },
65    /// The thinking chain produced by the model in the previous turn. Only present in
66    /// [`Role::Assistant`] messages.
67    ///
68    /// `signature` is the anti-forgery signature for Anthropic extended thinking: it must
69    /// be kept together with the text. For providers that echo plain text (e.g.
70    /// DeepSeek-v4-pro), this is [`None`].
71    Thinking {
72        text: String,
73        signature: Option<String>,
74    },
75    /// Tool call from a previous turn: when sending a request, include both the prior
76    /// `tool_use` and `tool_result` in `messages` so the provider can reconstruct the
77    /// context.
78    ToolUse {
79        id: String,
80        name: String,
81        args: serde_json::Value,
82    },
83    ToolResult {
84        tool_use_id: String,
85        output: ToolResultBody,
86        is_error: bool,
87    },
88    /// Multimodal input. *(P2)*
89    Image {
90        mime: String,
91        data: ImageData,
92    },
93    /// Provider-hosted capability activity (e.g. hosted web_search, hosted code
94    /// execution).
95    /// The agent does not interpret `payload`; it passes it through when retrying the
96    /// same
97    /// provider, or the codec decides how to degrade when switching providers.
98    ///
99    /// `payload` uses `#[serde(skip)]`: it is dropped when persisting across processes;
100    /// on session resume, if the model re-triggers the same hosted call, a new hosted
101    /// call is made without relying on the old payload.
102    ProviderActivity {
103        provider_id: String,
104        kind: ProviderActivityKind,
105        #[serde(skip)]
106        payload: serde_json::Value,
107    },
108}
109
110/// The kind of hosted activity. Only appears inside [`MessageContent::ProviderActivity`].
111///
112/// Adding `CodeExecution` / `ImageGeneration` etc. later is a deliberate breaking change:
113/// downstream provider crates that depend on `defect-core` should re-compile and handle
114/// the new variant rather than silently fall through a wildcard arm.
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
116#[serde(rename_all = "snake_case")]
117pub enum ProviderActivityKind {
118    /// Hosted web search.
119    Search,
120}
121
122/// Tool result payload. The codec converts it for the wire during serialization: some
123/// wires only support strings, so they stringify [`ToolResultBody::Json`].
124#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
125#[serde(tag = "kind", rename_all = "snake_case")]
126pub enum ToolResultBody {
127    Text {
128        text: String,
129    },
130    Json {
131        value: serde_json::Value,
132    },
133    /// Multimodal tool result: a mix of text and image blocks. Used by `read_file` for
134    /// images and future screenshot tools.
135    ///
136    /// Materialization per provider is handled by the codec, with different shapes:
137    /// - Anthropic's `tool_result` block natively supports images; just insert each block
138    ///   as-is.
139    /// - OpenAI's tool message only accepts text — the codec strips image blocks and
140    ///   attaches them to the following user message, leaving only text (including
141    ///   placeholder hints) in the tool message.
142    Content {
143        blocks: Vec<ToolResultContent>,
144    },
145}
146
147/// A single block inside [`ToolResultBody::Content`]. Text follows the same semantics as
148/// [`ToolResultBody::Text`]; images reuse the `(mime, data)` shape from
149/// [`MessageContent::Image`].
150#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
151#[serde(tag = "kind", rename_all = "snake_case")]
152pub enum ToolResultContent {
153    Text { text: String },
154    Image { mime: String, data: ImageData },
155}
156
157/// Placeholder shape for multimodal image payloads. The exact shape is not yet
158/// finalized.
159#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
160#[serde(tag = "kind", rename_all = "snake_case")]
161pub enum ImageData {
162    /// Base64-encoded image bytes.
163    Base64 { encoded: String },
164    /// A remote URL.
165    Url { url: String },
166}
167
168/// Tool selection strategy.
169#[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
170#[serde(tag = "mode", rename_all = "snake_case")]
171pub enum ToolChoice {
172    /// The model decides on its own.
173    #[default]
174    Auto,
175    /// Forces at least one tool to be called.
176    Required,
177    /// Force the model to call the specified tool.
178    Named { name: String },
179    /// Disables tool calls; only text output is allowed.
180    None,
181}
182
183/// Sampling parameters.
184#[derive(Debug, Clone, PartialEq, Default, Serialize, Deserialize)]
185pub struct SamplingParams {
186    pub max_tokens: Option<u32>,
187    pub temperature: Option<f32>,
188    pub top_p: Option<f32>,
189    pub top_k: Option<u32>,
190    pub stop_sequences: Vec<String>,
191    pub thinking: ThinkingConfig,
192    /// The `reasoning_effort` level in the OpenAI-compatible protocol. When `Some(_)`,
193    /// the codec writes it directly to the wire; when `None`, the codec falls back to
194    /// deriving it from [`Self::thinking`].
195    ///
196    /// This is the **runtime authoritative representation** of the value — it can be
197    /// switched per-session (ACP `session/set_config_option`, category=ThoughtLevel). The
198    /// config layer has its own `defect_config::ReasoningEffort` for deserialization,
199    /// which is converted into this enum during assembly and placed into the initial
200    /// `SamplingParams`. Providers that do not support this concept should ignore this
201    /// field.
202    #[serde(default)]
203    pub reasoning_effort: Option<ReasoningEffort>,
204}
205
206/// Runtime-level enum for the OpenAI-compatible `reasoning_effort` protocol.
207///
208/// Maps 1:1 to the official OpenAI wire enum: `xhigh` is only supported after
209/// `gpt-5.1-codex-max`, and `none` only after `gpt-5.1`. This layer does not distinguish
210/// between models; the value is passed through as-is for upstream validation. The
211/// `defect-llm` wire codec imports this enum for materialization mapping.
212#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
213#[serde(rename_all = "snake_case")]
214pub enum ReasoningEffort {
215    None,
216    Minimal,
217    Low,
218    Medium,
219    High,
220    Xhigh,
221}
222
223/// Thinking chain configuration. Providers that do not support the concept of a thinking
224/// chain should ignore the budget field of `Enabled`, or report
225/// [`super::FeatureSupport::Unsupported`] in the capability matrix.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
227#[serde(tag = "mode", rename_all = "snake_case")]
228pub enum ThinkingConfig {
229    #[default]
230    Disabled,
231    /// Enable thinking chain; `budget_tokens` is only used by providers that support
232    /// budgets, such as Anthropic.
233    Enabled { budget_tokens: Option<u32> },
234}