zeph_llm/
provider.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::future::Future;
5use std::pin::Pin;
6use std::{
7    any::TypeId,
8    collections::HashMap,
9    sync::{LazyLock, Mutex},
10};
11
12use futures_core::Stream;
13use serde::{Deserialize, Serialize};
14
15use zeph_common::ToolName;
16
17pub use zeph_common::ToolDefinition;
18
19use crate::embed::owned_strs;
20use crate::error::LlmError;
21
22static SCHEMA_CACHE: LazyLock<Mutex<HashMap<TypeId, (serde_json::Value, String)>>> =
23    LazyLock::new(|| Mutex::new(HashMap::new()));
24
25/// Return the JSON schema value and pretty-printed string for type `T`, cached by `TypeId`.
26///
27/// # Errors
28///
29/// Returns an error if schema serialization fails.
30pub(crate) fn cached_schema<T: schemars::JsonSchema + 'static>()
31-> Result<(serde_json::Value, String), crate::LlmError> {
32    let type_id = TypeId::of::<T>();
33    if let Ok(cache) = SCHEMA_CACHE.lock()
34        && let Some(entry) = cache.get(&type_id)
35    {
36        return Ok(entry.clone());
37    }
38    let schema = schemars::schema_for!(T);
39    let value = serde_json::to_value(&schema)
40        .map_err(|e| crate::LlmError::StructuredParse(e.to_string()))?;
41    let pretty = serde_json::to_string_pretty(&schema)
42        .map_err(|e| crate::LlmError::StructuredParse(e.to_string()))?;
43    if let Ok(mut cache) = SCHEMA_CACHE.lock() {
44        cache.insert(type_id, (value.clone(), pretty.clone()));
45    }
46    Ok((value, pretty))
47}
48
49/// Extract the short (unqualified) type name for schema prompts and tool names.
50///
51/// Returns the last `::` segment of [`std::any::type_name::<T>()`], which is always
52/// non-empty. The `"Output"` fallback is unreachable in practice (`type_name` never returns
53/// an empty string and `rsplit` on a non-empty string always yields at least one element),
54/// but is kept for defensive clarity.
55///
56/// # Examples
57///
58/// ```
59/// struct MyOutput;
60/// // short_type_name::<MyOutput>() returns "MyOutput"
61/// ```
62pub(crate) fn short_type_name<T: ?Sized>() -> &'static str {
63    std::any::type_name::<T>()
64        .rsplit("::")
65        .next()
66        .unwrap_or("Output")
67}
68
69/// Per-call extras returned alongside the chat response by [`LlmProvider::chat_with_extras`].
70///
71/// Always paired 1:1 with a single response — no shared state, no races possible.
72/// All optional fields default to `None` so providers that do not expose the
73/// underlying API (e.g. Claude, Gemini) can simply return the default.
74///
75/// Marked `#[non_exhaustive]` so future fields (e.g. `cached_tokens`) can be added
76/// without breaking match sites.
77#[non_exhaustive]
78#[derive(Debug, Clone, Default)]
79pub struct ChatExtras {
80    /// Mean negative log-probability of the generated tokens, when the provider
81    /// was configured to request `logprobs` and the API supplied them.
82    ///
83    /// Lower = more confident. Typical range: `[0.0, ~6.0]` for natural-language tokens.
84    pub entropy: Option<f64>,
85}
86
87impl ChatExtras {
88    /// Return a `ChatExtras` with the given entropy value.
89    ///
90    /// Used by `MockProvider` (test-only, enabled via `testing` feature) and OpenAI/Ollama providers.
91    ///
92    /// # Examples
93    ///
94    /// ```
95    /// use zeph_llm::provider::ChatExtras;
96    ///
97    /// let extras = ChatExtras::with_entropy(0.9);
98    /// assert_eq!(extras.entropy, Some(0.9));
99    /// ```
100    #[must_use]
101    pub fn with_entropy(entropy: f64) -> Self {
102        Self {
103            entropy: Some(entropy),
104        }
105    }
106}
107
108/// A chunk from an LLM streaming response.
109///
110/// Consumers should match all variants: future providers may emit non-`Content` chunks
111/// that callers must not silently drop (e.g. thinking blocks that must be echoed back).
112#[derive(Debug, Clone)]
113pub enum StreamChunk {
114    /// Regular response text.
115    Content(String),
116    /// Internal reasoning/thinking token (e.g. Claude extended thinking, `OpenAI` reasoning).
117    Thinking(String),
118    /// Server-side compaction summary (Claude compact-2026-01-12 beta).
119    /// Delivered when the Claude API automatically summarizes conversation history.
120    Compaction(String),
121    /// One or more tool calls from the model received during streaming.
122    ToolUse(Vec<ToolUseRequest>),
123}
124
125/// Boxed stream of typed chunks from an LLM provider.
126///
127/// Obtain via [`LlmProvider::chat_stream`]. Drive the stream with
128/// `futures::StreamExt::next` or `tokio_stream::StreamExt::next`.
129pub type ChatStream = Pin<Box<dyn Stream<Item = Result<StreamChunk, LlmError>> + Send>>;
130
131/// Structured tool invocation request from the model.
132///
133/// Returned by [`LlmProvider::chat_with_tools`] when the model decides to call one or
134/// more tools. The caller is responsible for executing the tool and returning results
135/// via a [`MessagePart::ToolResult`] in the next turn.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub struct ToolUseRequest {
138    /// Opaque call identifier assigned by the model; must be echoed in `ToolResult.tool_use_id`.
139    pub id: String,
140    /// Name of the tool to invoke, matching a [`ToolDefinition::name`].
141    pub name: ToolName,
142    /// JSON arguments the model wants to pass to the tool.
143    pub input: serde_json::Value,
144}
145
146/// Thinking block returned by Claude when extended or adaptive thinking is enabled.
147///
148/// Both variants must be echoed verbatim in the next turn's `assistant` message so
149/// the API can correctly attribute reasoning across turns. Never modify or discard
150/// these blocks between turns.
151#[derive(Debug, Clone)]
152pub enum ThinkingBlock {
153    /// Visible reasoning token with its cryptographic signature.
154    Thinking { thinking: String, signature: String },
155    /// Redacted reasoning block (API-side privacy redaction). Preserved as opaque data.
156    Redacted { data: String },
157}
158
159/// Marker injected into `ChatResponse::Text` when the LLM response was cut off by the
160/// token limit. Consumers can detect this substring to signal `MaxTokens` stop reason.
161pub const MAX_TOKENS_TRUNCATION_MARKER: &str = "max_tokens limit reached";
162
163/// Response from [`LlmProvider::chat_with_tools`].
164///
165/// When the model returns `ToolUse`, the caller must:
166/// 1. Execute each tool in `tool_calls`.
167/// 2. Append an `assistant` message with the original `tool_calls` and any `thinking_blocks`.
168/// 3. Append a `user` message containing [`MessagePart::ToolResult`] entries.
169/// 4. Call `chat_with_tools` again to continue the conversation.
170#[derive(Debug, Clone)]
171pub enum ChatResponse {
172    /// Model produced text output only.
173    Text(String),
174    /// Model requests one or more tool invocations.
175    ToolUse {
176        /// Any text the model emitted before/alongside tool calls.
177        text: Option<String>,
178        tool_calls: Vec<ToolUseRequest>,
179        /// Thinking blocks from the model (empty when thinking is disabled).
180        /// Must be preserved verbatim in multi-turn requests.
181        thinking_blocks: Vec<ThinkingBlock>,
182    },
183}
184
185/// Boxed future returning an embedding vector, returned by [`EmbedFn`].
186pub type EmbedFuture = Pin<Box<dyn Future<Output = Result<Vec<f32>, LlmError>> + Send>>;
187
188/// A Send + Sync closure that embeds a text slice into a vector.
189///
190/// Obtain a provider-backed `EmbedFn` via [`crate::any::AnyProvider::embed_fn`].
191/// The closure captures an `Arc`-wrapped provider clone, so it is cheap to clone.
192pub type EmbedFn = Box<dyn Fn(&str) -> EmbedFuture + Send + Sync>;
193
194/// Sender for emitting human-readable status events (retries, fallbacks) to the UI layer.
195///
196/// When set on a provider, the provider sends short strings such as
197/// `"Retrying after rate limit…"` or `"Falling back to secondary provider"`.
198/// The TUI consumes these to show real-time activity spinners.
199pub type StatusTx = tokio::sync::mpsc::UnboundedSender<String>;
200
201/// Best-effort fallback for debug dump request payloads when a provider does not expose
202/// its concrete API request body.
203#[must_use]
204pub fn default_debug_request_json(
205    messages: &[Message],
206    tools: &[ToolDefinition],
207) -> serde_json::Value {
208    serde_json::json!({
209        "model": serde_json::Value::Null,
210        "max_tokens": serde_json::Value::Null,
211        "messages": serde_json::to_value(messages).unwrap_or(serde_json::Value::Array(vec![])),
212        "tools": serde_json::to_value(tools).unwrap_or(serde_json::Value::Array(vec![])),
213        "temperature": serde_json::Value::Null,
214        "cache_control": serde_json::Value::Null,
215    })
216}
217
218/// Partial LLM generation parameter overrides for experiment variation injection.
219///
220/// Applied by the experiment engine to clone-and-patch a provider before evaluation,
221/// so each variation is scored with its specific generation parameters.
222///
223/// Only `Some` fields are applied; `None` fields leave the provider's configured
224/// defaults unchanged. Not all providers support all fields — unsupported fields
225/// are silently ignored by each backend.
226#[derive(Debug, Clone, Default)]
227pub struct GenerationOverrides {
228    /// Sampling temperature in `[0.0, 2.0]`. Lower = more deterministic.
229    pub temperature: Option<f64>,
230    /// Nucleus sampling probability in `[0.0, 1.0]`.
231    pub top_p: Option<f64>,
232    /// Top-K sampling cutoff (number of top tokens to consider).
233    pub top_k: Option<usize>,
234    /// Penalty for tokens that have already appeared (OpenAI-compatible providers).
235    pub frequency_penalty: Option<f64>,
236    /// Penalty for topics the model has already covered (OpenAI-compatible providers).
237    pub presence_penalty: Option<f64>,
238}
239
240/// Message role in a conversation.
241///
242/// Determines how each message is presented to the model:
243/// - `System` — global instructions prepended before the conversation
244/// - `User` — human turn input
245/// - `Assistant` — previous model output (used for multi-turn context)
246#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
247#[serde(rename_all = "lowercase")]
248pub enum Role {
249    System,
250    User,
251    Assistant,
252}
253
254/// A typed content part within a [`Message`].
255///
256/// Messages may contain zero or more parts that represent heterogeneous content:
257/// plain text, tool invocations, memory recall fragments, images, and internal
258/// protocol blocks (thinking, compaction). Most providers flatten these into a single
259/// string before sending; Claude encodes them as structured content blocks.
260///
261/// # Ordering invariants
262///
263/// - `ToolUse` parts must precede their corresponding `ToolResult` parts.
264/// - `ThinkingBlock` / `RedactedThinkingBlock` parts must be preserved verbatim in
265///   multi-turn requests so the API can correctly attribute reasoning.
266/// - `Compaction` parts must be preserved verbatim; the API uses them to prune
267///   prior history on subsequent turns (Claude compact-2026-01-12 beta).
268#[derive(Clone, Debug, Serialize, Deserialize)]
269#[serde(tag = "kind", rename_all = "snake_case")]
270pub enum MessagePart {
271    /// Plain assistant or user text.
272    Text { text: String },
273    /// Output from a tool execution, optionally compacted.
274    ToolOutput {
275        tool_name: zeph_common::ToolName,
276        body: String,
277        #[serde(default, skip_serializing_if = "Option::is_none")]
278        compacted_at: Option<i64>,
279    },
280    /// Memory recall fragment injected by the agent's semantic memory layer.
281    Recall { text: String },
282    /// Repository or file code context injected by the code indexing layer.
283    CodeContext { text: String },
284    /// Compaction summary replacing pruned conversation history.
285    Summary { text: String },
286    /// Cross-session memory fragment carried over from a previous conversation.
287    CrossSession { text: String },
288    /// Model-initiated tool invocation. Pairs with a subsequent [`MessagePart::ToolResult`].
289    ToolUse {
290        id: String,
291        name: String,
292        input: serde_json::Value,
293    },
294    /// Tool execution result returned to the model after a [`MessagePart::ToolUse`].
295    ToolResult {
296        tool_use_id: String,
297        content: String,
298        #[serde(default)]
299        is_error: bool,
300    },
301    /// Inline image payload (vision input).
302    Image(Box<ImageData>),
303    /// Claude thinking block — must be preserved verbatim in multi-turn requests.
304    ThinkingBlock { thinking: String, signature: String },
305    /// Claude redacted thinking block — preserved as-is in multi-turn requests.
306    RedactedThinkingBlock { data: String },
307    /// Claude server-side compaction block — must be preserved verbatim in multi-turn requests
308    /// so the API can correctly prune prior history on the next turn.
309    Compaction { summary: String },
310}
311
312impl MessagePart {
313    /// Return the plain text content if this part is a text-like variant (`Text`, `Recall`,
314    /// `CodeContext`, `Summary`, `CrossSession`), `None` otherwise.
315    #[must_use]
316    pub fn as_plain_text(&self) -> Option<&str> {
317        match self {
318            Self::Text { text }
319            | Self::Recall { text }
320            | Self::CodeContext { text }
321            | Self::Summary { text }
322            | Self::CrossSession { text } => Some(text.as_str()),
323            _ => None,
324        }
325    }
326
327    /// Return the image data if this part is an `Image` variant, `None` otherwise.
328    #[must_use]
329    pub fn as_image(&self) -> Option<&ImageData> {
330        if let Self::Image(img) = self {
331            Some(img)
332        } else {
333            None
334        }
335    }
336}
337
338#[derive(Clone, Debug, Serialize, Deserialize)]
339/// Raw image payload for vision-capable providers.
340///
341/// The `data` field is serialized as a Base64 string. `mime_type` must be a valid
342/// image MIME type supported by the target provider (e.g. `"image/png"`, `"image/jpeg"`).
343pub struct ImageData {
344    #[serde(with = "serde_bytes_base64")]
345    pub data: Vec<u8>,
346    pub mime_type: String,
347}
348
349mod serde_bytes_base64 {
350    use base64::{Engine, engine::general_purpose::STANDARD};
351    use serde::{Deserialize, Deserializer, Serializer};
352
353    pub fn serialize<S>(bytes: &[u8], s: S) -> Result<S::Ok, S::Error>
354    where
355        S: Serializer,
356    {
357        s.serialize_str(&STANDARD.encode(bytes))
358    }
359
360    pub fn deserialize<'de, D>(d: D) -> Result<Vec<u8>, D::Error>
361    where
362        D: Deserializer<'de>,
363    {
364        let s = String::deserialize(d)?;
365        STANDARD.decode(&s).map_err(serde::de::Error::custom)
366    }
367}
368
369/// Visibility of a message to agent and user.
370///
371/// Replaces the former `(agent_visible: bool, user_visible: bool)` pair, which
372/// allowed the semantically invalid `(false, false)` combination. Every variant
373/// guarantees at least one consumer can see the message.
374///
375/// # Examples
376///
377/// ```
378/// use zeph_llm::provider::MessageVisibility;
379///
380/// let v = MessageVisibility::AgentOnly;
381/// assert!(v.is_agent_visible());
382/// assert!(!v.is_user_visible());
383/// ```
384#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
385#[serde(rename_all = "snake_case")]
386pub enum MessageVisibility {
387    /// Visible to both the agent (LLM context) and the user (conversation log).
388    Both,
389    /// Visible to the agent only (e.g. compaction summaries, internal context).
390    AgentOnly,
391    /// Visible to the user only (e.g. compacted originals shown in history).
392    UserOnly,
393}
394
395impl MessageVisibility {
396    /// Returns `true` if this message should be included in the LLM request context.
397    #[must_use]
398    pub fn is_agent_visible(self) -> bool {
399        matches!(self, MessageVisibility::Both | MessageVisibility::AgentOnly)
400    }
401
402    /// Returns `true` if this message should appear in the user-facing conversation log.
403    #[must_use]
404    pub fn is_user_visible(self) -> bool {
405        matches!(self, MessageVisibility::Both | MessageVisibility::UserOnly)
406    }
407}
408
409impl Default for MessageVisibility {
410    /// Defaults to [`Both`](MessageVisibility::Both) — visible to agent and user.
411    fn default() -> Self {
412        MessageVisibility::Both
413    }
414}
415
416impl MessageVisibility {
417    /// Serialize to the SQLite/PostgreSQL text value stored in the `visibility` column.
418    #[must_use]
419    pub fn as_db_str(self) -> &'static str {
420        match self {
421            MessageVisibility::Both => "both",
422            MessageVisibility::AgentOnly => "agent_only",
423            MessageVisibility::UserOnly => "user_only",
424        }
425    }
426
427    /// Deserialize from the SQLite/PostgreSQL text value stored in the `visibility` column.
428    ///
429    /// Unknown values (e.g. from a future migration) default to `Both` for safety.
430    #[must_use]
431    pub fn from_db_str(s: &str) -> Self {
432        match s {
433            "agent_only" => MessageVisibility::AgentOnly,
434            "user_only" => MessageVisibility::UserOnly,
435            _ => MessageVisibility::Both,
436        }
437    }
438}
439
440/// Per-message visibility and metadata controlling agent context and user display.
441///
442/// Constructors [`agent_only`](Self::agent_only), [`user_only`](Self::user_only),
443/// and [`focus_pinned`](Self::focus_pinned) cover the most common combinations.
444#[derive(Clone, Debug, Serialize, Deserialize)]
445pub struct MessageMetadata {
446    /// Who can see this message.
447    pub visibility: MessageVisibility,
448    /// Unix timestamp (seconds) when this message was compacted, if applicable.
449    #[serde(default, skip_serializing_if = "Option::is_none")]
450    pub compacted_at: Option<i64>,
451    /// Pre-computed tool pair summary, applied lazily when context pressure rises.
452    /// Stored on the tool response message; cleared after application.
453    #[serde(default, skip_serializing_if = "Option::is_none")]
454    pub deferred_summary: Option<String>,
455    /// When true, this message is excluded from all compaction passes (soft pruning,
456    /// hard summarization, sidequest eviction). Used for the Focus Knowledge block (#1850).
457    #[serde(default, skip_serializing_if = "std::ops::Not::not")]
458    pub focus_pinned: bool,
459    /// Unique marker UUID set when `start_focus` begins a session. Used by `complete_focus`
460    /// to locate the checkpoint without relying on a fragile raw index.
461    #[serde(default, skip_serializing_if = "Option::is_none")]
462    pub focus_marker_id: Option<uuid::Uuid>,
463    /// `SQLite` row ID for this message. Populated when loading from DB or after persisting.
464    /// Never serialized — always re-populated from the database on load.
465    #[serde(skip)]
466    pub db_id: Option<i64>,
467}
468
469impl Default for MessageMetadata {
470    fn default() -> Self {
471        Self {
472            visibility: MessageVisibility::Both,
473            compacted_at: None,
474            deferred_summary: None,
475            focus_pinned: false,
476            focus_marker_id: None,
477            db_id: None,
478        }
479    }
480}
481
482impl MessageMetadata {
483    /// Message visible only to the agent (e.g. compaction summary).
484    #[must_use]
485    pub fn agent_only() -> Self {
486        Self {
487            visibility: MessageVisibility::AgentOnly,
488            compacted_at: None,
489            deferred_summary: None,
490            focus_pinned: false,
491            focus_marker_id: None,
492            db_id: None,
493        }
494    }
495
496    /// Message visible only to the user (e.g. compacted original).
497    #[must_use]
498    pub fn user_only() -> Self {
499        Self {
500            visibility: MessageVisibility::UserOnly,
501            compacted_at: None,
502            deferred_summary: None,
503            focus_pinned: false,
504            focus_marker_id: None,
505            db_id: None,
506        }
507    }
508
509    /// Pinned Knowledge block — excluded from all compaction passes.
510    #[must_use]
511    pub fn focus_pinned() -> Self {
512        Self {
513            visibility: MessageVisibility::AgentOnly,
514            compacted_at: None,
515            deferred_summary: None,
516            focus_pinned: true,
517            focus_marker_id: None,
518            db_id: None,
519        }
520    }
521}
522
523/// A single message in a conversation.
524///
525/// Each message has a [`Role`], a flat `content` string (used when sending to providers
526/// that do not support structured parts), and an optional list of [`MessagePart`]s for
527/// providers that accept heterogeneous content blocks (e.g. Claude).
528///
529/// The `content` field is kept in sync with `parts` via [`Message::rebuild_content`].
530/// When building messages from structured parts, always use [`Message::from_parts`] —
531/// it populates both `parts` and `content`.
532///
533/// # Examples
534///
535/// ```
536/// use zeph_llm::provider::{Message, MessagePart, Role};
537///
538/// // Simple text-only message
539/// let msg = Message::from_legacy(Role::User, "What is Rust?");
540/// assert_eq!(msg.to_llm_content(), "What is Rust?");
541///
542/// // Structured message with parts
543/// let parts = vec![
544///     MessagePart::Text { text: "Explain this code.".into() },
545/// ];
546/// let msg = Message::from_parts(Role::User, parts);
547/// assert!(!msg.parts.is_empty());
548/// ```
549#[derive(Clone, Debug, Serialize, Deserialize)]
550pub struct Message {
551    pub role: Role,
552    /// Flat text representation of this message, derived from `parts` when structured.
553    pub content: String,
554    #[serde(default)]
555    pub parts: Vec<MessagePart>,
556    #[serde(default)]
557    pub metadata: MessageMetadata,
558}
559
560impl Default for Message {
561    fn default() -> Self {
562        Self {
563            role: Role::User,
564            content: String::new(),
565            parts: vec![],
566            metadata: MessageMetadata::default(),
567        }
568    }
569}
570
571impl Message {
572    /// Create a simple text-only message without structured parts.
573    ///
574    /// Use this constructor for system prompts, plain user turns, and assistant
575    /// messages produced by providers that return a raw string.
576    #[must_use]
577    pub fn from_legacy(role: Role, content: impl Into<String>) -> Self {
578        Self {
579            role,
580            content: content.into(),
581            parts: vec![],
582            metadata: MessageMetadata::default(),
583        }
584    }
585
586    /// Create a message from structured parts, deriving the flat `content` automatically.
587    ///
588    /// Prefer this constructor when the message contains tool invocations, images,
589    /// or other non-text content that providers need to render as separate content blocks.
590    #[must_use]
591    pub fn from_parts(role: Role, parts: Vec<MessagePart>) -> Self {
592        let content = Self::flatten_parts(&parts);
593        Self {
594            role,
595            content,
596            parts,
597            metadata: MessageMetadata::default(),
598        }
599    }
600
601    /// Return the flat text content of this message, suitable for providers that do
602    /// not support structured content blocks.
603    #[must_use]
604    pub fn to_llm_content(&self) -> &str {
605        &self.content
606    }
607
608    /// Re-synchronize `content` from `parts` after in-place mutation.
609    pub fn rebuild_content(&mut self) {
610        if !self.parts.is_empty() {
611            self.content = Self::flatten_parts(&self.parts);
612        }
613    }
614
615    fn flatten_parts(parts: &[MessagePart]) -> String {
616        use std::fmt::Write;
617        let mut out = String::new();
618        for part in parts {
619            match part {
620                MessagePart::Text { text }
621                | MessagePart::Recall { text }
622                | MessagePart::CodeContext { text }
623                | MessagePart::Summary { text }
624                | MessagePart::CrossSession { text } => out.push_str(text),
625                MessagePart::ToolOutput {
626                    tool_name,
627                    body,
628                    compacted_at,
629                } => {
630                    if compacted_at.is_some() {
631                        if body.is_empty() {
632                            let _ = write!(out, "[tool output: {tool_name}] (pruned)");
633                        } else {
634                            let _ = write!(out, "[tool output: {tool_name}] {body}");
635                        }
636                    } else {
637                        let _ = write!(out, "[tool output: {tool_name}]\n```\n{body}\n```");
638                    }
639                }
640                MessagePart::ToolUse { id, name, .. } => {
641                    let _ = write!(out, "[tool_use: {name}({id})]");
642                }
643                MessagePart::ToolResult {
644                    tool_use_id,
645                    content,
646                    ..
647                } => {
648                    let _ = write!(out, "[tool_result: {tool_use_id}]\n{content}");
649                }
650                MessagePart::Image(img) => {
651                    let _ = write!(out, "[image: {}, {} bytes]", img.mime_type, img.data.len());
652                }
653                // Thinking and compaction blocks are internal API metadata — not rendered in text.
654                MessagePart::ThinkingBlock { .. }
655                | MessagePart::RedactedThinkingBlock { .. }
656                | MessagePart::Compaction { .. } => {}
657            }
658        }
659        out
660    }
661}
662
663/// Core abstraction for all LLM inference backends.
664///
665/// Every backend — `Ollama`, `Claude`, `OpenAI`, `Gemini`, `Candle` — implements this trait.
666/// The [`crate::any::AnyProvider`] enum erases the concrete type so callers can
667/// hold any backend behind a single type, and [`crate::router::RouterProvider`]
668/// implements this trait to multiplex across multiple backends.
669///
670/// # Object safety
671///
672/// This trait is **not** object-safe: 6 methods return `impl Future + Send` (RPIT),
673/// and [`chat_typed`](Self::chat_typed) carries a generic type parameter `T`.
674/// The `where Self: Sized` bound on `chat_typed` is a mitigation — it excludes that
675/// method from the vtable — but the RPIT methods remain and prevent `dyn LlmProvider`.
676/// Attempting `Box<dyn LlmProvider>` will produce a compile error.
677///
678/// For dynamic dispatch, use [`Arc<dyn LlmProviderDyn>`](crate::provider_dyn::LlmProviderDyn)
679/// instead — a blanket impl wires every `LlmProvider` implementor automatically.
680/// See the [`provider_dyn`](crate::provider_dyn) module for details.
681///
682/// # Required methods
683///
684/// Implementors must provide: [`chat`](Self::chat), [`chat_stream`](Self::chat_stream),
685/// [`supports_streaming`](Self::supports_streaming), [`embed`](Self::embed),
686/// [`supports_embeddings`](Self::supports_embeddings), and [`name`](Self::name).
687///
688/// # Optional methods
689///
690/// All other methods have default implementations that are safe to accept:
691/// - [`context_window`](Self::context_window) — returns `None`
692/// - [`embed_batch`](Self::embed_batch) — sequential fallback via [`embed`](Self::embed)
693/// - [`chat_with_tools`](Self::chat_with_tools) — falls back to [`chat`](Self::chat)
694/// - [`chat_typed`](Self::chat_typed) — schema-prompt injection + retry
695///   (requires `Self: Sized`; use [`chat_typed_dyn`](crate::provider_dyn::chat_typed_dyn)
696///   for trait objects)
697/// - [`supports_vision`](Self::supports_vision) — returns `false`
698/// - [`supports_tool_use`](Self::supports_tool_use) — returns `true`
699///
700/// # Examples
701///
702/// ```rust,no_run
703/// use zeph_llm::provider::{LlmProvider, Message, Role, ChatStream};
704/// use zeph_llm::LlmError;
705///
706/// struct EchoProvider;
707///
708/// impl LlmProvider for EchoProvider {
709///     async fn chat(&self, messages: &[Message]) -> Result<String, LlmError> {
710///         Ok(messages.last().map(|m| m.content.clone()).unwrap_or_default())
711///     }
712///
713///     async fn chat_stream(&self, messages: &[Message]) -> Result<ChatStream, LlmError> {
714///         use zeph_llm::provider::StreamChunk;
715///         let text = self.chat(messages).await?;
716///         Ok(Box::pin(tokio_stream::once(Ok(StreamChunk::Content(text)))))
717///     }
718///
719///     fn supports_streaming(&self) -> bool { true }
720///
721///     async fn embed(&self, _text: &str) -> Result<Vec<f32>, LlmError> {
722///         Err(LlmError::EmbedUnsupported { provider: "echo".into() })
723///     }
724///
725///     fn supports_embeddings(&self) -> bool { false }
726///
727///     fn name(&self) -> &str { "echo" }
728/// }
729/// ```
730pub trait LlmProvider: Send + Sync {
731    /// Report the model's context window size in tokens.
732    ///
733    /// Returns `None` if unknown. Used for auto-budget calculation.
734    fn context_window(&self) -> Option<usize> {
735        None
736    }
737
738    /// Send messages to the LLM and return the assistant response.
739    ///
740    /// # Errors
741    ///
742    /// Returns an error if the provider fails to communicate or the response is invalid.
743    fn chat(&self, messages: &[Message]) -> impl Future<Output = Result<String, LlmError>> + Send;
744
745    /// Send messages and return a stream of response chunks.
746    ///
747    /// # Errors
748    ///
749    /// Returns an error if the provider fails to communicate or the response is invalid.
750    fn chat_stream(
751        &self,
752        messages: &[Message],
753    ) -> impl Future<Output = Result<ChatStream, LlmError>> + Send;
754
755    /// Whether this provider supports native streaming.
756    fn supports_streaming(&self) -> bool;
757
758    /// Generate an embedding vector from text.
759    ///
760    /// # Errors
761    ///
762    /// Returns an error if the provider does not support embeddings or the request fails.
763    fn embed(&self, text: &str) -> impl Future<Output = Result<Vec<f32>, LlmError>> + Send;
764
765    /// Embed multiple texts in a single API call.
766    ///
767    /// Default implementation calls [`embed`][Self::embed] sequentially for each input.
768    /// Providers with native batch APIs should override this.
769    ///
770    /// # Errors
771    ///
772    /// Returns an error if any embedding fails. On native batch backends the entire batch
773    /// fails atomically; on the sequential fallback the first error aborts.
774    fn embed_batch(
775        &self,
776        texts: &[&str],
777    ) -> impl Future<Output = Result<Vec<Vec<f32>>, LlmError>> + Send {
778        let owned = owned_strs(texts);
779        async move {
780            let mut results = Vec::with_capacity(owned.len());
781            for text in &owned {
782                results.push(self.embed(text).await?);
783            }
784            Ok(results)
785        }
786    }
787
788    /// Whether this provider supports embedding generation.
789    fn supports_embeddings(&self) -> bool;
790
791    /// Provider name for logging and identification.
792    fn name(&self) -> &str;
793
794    /// Model identifier string (e.g. `gpt-4o-mini`, `claude-sonnet-4-6`).
795    /// Used by cost-estimation heuristics. Returns `""` when not applicable.
796    #[allow(clippy::unnecessary_literal_bound)]
797    fn model_identifier(&self) -> &str {
798        ""
799    }
800
801    /// Whether this provider supports image input (vision).
802    fn supports_vision(&self) -> bool {
803        false
804    }
805
806    /// Whether this provider supports native `tool_use` / function calling.
807    fn supports_tool_use(&self) -> bool {
808        true
809    }
810
811    /// Send messages with tool definitions, returning a structured response.
812    ///
813    /// Default: falls back to `chat()` and wraps the result in `ChatResponse::Text`.
814    ///
815    /// # Errors
816    ///
817    /// Returns an error if the provider fails to communicate or the response is invalid.
818    fn chat_with_tools(
819        &self,
820        messages: &[Message],
821        _tools: &[ToolDefinition],
822    ) -> impl std::future::Future<Output = Result<ChatResponse, LlmError>> + Send {
823        let msgs = messages.to_vec();
824        async move { Ok(ChatResponse::Text(self.chat(&msgs).await?)) }
825    }
826
827    /// Return the cache usage from the last API call, if available.
828    /// Returns `(cache_creation_tokens, cache_read_tokens)`.
829    fn last_cache_usage(&self) -> Option<(u64, u64)> {
830        None
831    }
832
833    /// Return token counts from the last API call, if available.
834    /// Returns `(input_tokens, output_tokens)`.
835    fn last_usage(&self) -> Option<(u64, u64)> {
836        None
837    }
838
839    /// Return reasoning tokens from the last API call, if the provider reports them.
840    ///
841    /// Reasoning tokens are a **subset** of completion tokens (`OpenAI` o-series only).
842    /// Returns `None` for providers that do not expose reasoning token counts.
843    fn last_reasoning_tokens(&self) -> Option<u64> {
844        None
845    }
846
847    /// Return the compaction summary from the most recent API call, if a server-side
848    /// compaction occurred (Claude compact-2026-01-12 beta). Clears the stored value.
849    fn take_compaction_summary(&self) -> Option<String> {
850        None
851    }
852
853    /// Send messages and return the assistant response together with per-call extras.
854    ///
855    /// Default implementation calls [`chat`][Self::chat] and returns [`ChatExtras::default()`],
856    /// keeping every existing implementor source-compatible at zero cost.
857    ///
858    /// Providers that support logprobs (`OpenAI`, `Compatible`, `Ollama`) override this to
859    /// populate [`ChatExtras::entropy`] with the mean negative log-probability.
860    ///
861    /// `CoE` is the only caller of this method; the canonical entry point for the agent
862    /// loop remains [`chat`][Self::chat].
863    ///
864    /// # Errors
865    ///
866    /// Same as [`chat`][Self::chat].
867    fn chat_with_extras(
868        &self,
869        messages: &[Message],
870    ) -> impl Future<Output = Result<(String, ChatExtras), LlmError>> + Send {
871        let msgs = messages.to_vec();
872        async move { Ok((self.chat(&msgs).await?, ChatExtras::default())) }
873    }
874
875    /// Return the request payload that will be sent to the provider, for debug dumps.
876    ///
877    /// Implementations should mirror the provider's request body as closely as practical.
878    #[must_use]
879    fn debug_request_json(
880        &self,
881        messages: &[Message],
882        tools: &[ToolDefinition],
883        _stream: bool,
884    ) -> serde_json::Value {
885        default_debug_request_json(messages, tools)
886    }
887
888    /// Return the list of model identifiers this provider can serve.
889    /// Default: empty (provider does not advertise models).
890    fn list_models(&self) -> Vec<String> {
891        vec![]
892    }
893
894    /// Whether this provider supports native structured output.
895    fn supports_structured_output(&self) -> bool {
896        false
897    }
898
899    /// Send messages and parse the response into a typed value `T`.
900    ///
901    /// Default implementation injects JSON schema into the system prompt and retries once
902    /// on parse failure. Providers with native structured output should override this.
903    ///
904    /// # Object safety
905    ///
906    /// This method requires `Self: Sized` and is therefore unavailable on trait objects.
907    /// Use [`chat_typed_dyn`](crate::provider_dyn::chat_typed_dyn) when working with
908    /// `Arc<dyn LlmProviderDyn>`.
909    #[allow(async_fn_in_trait)]
910    async fn chat_typed<T>(&self, messages: &[Message]) -> Result<T, LlmError>
911    where
912        T: serde::de::DeserializeOwned + schemars::JsonSchema + 'static,
913        Self: Sized,
914    {
915        let (_, schema_json) = cached_schema::<T>()?;
916        let type_name = short_type_name::<T>();
917
918        let mut augmented = messages.to_vec();
919        let instruction = format!(
920            "Respond with a valid JSON object matching this schema. \
921             Output ONLY the JSON, no markdown fences or extra text.\n\n\
922             Type: {type_name}\nSchema:\n```json\n{schema_json}\n```"
923        );
924        augmented.insert(0, Message::from_legacy(Role::System, instruction));
925
926        let raw = self.chat(&augmented).await?;
927        let cleaned = strip_json_fences(&raw);
928        match serde_json::from_str::<T>(cleaned) {
929            Ok(val) => Ok(val),
930            Err(first_err) => {
931                augmented.push(Message::from_legacy(Role::Assistant, &raw));
932                augmented.push(Message::from_legacy(
933                    Role::User,
934                    format!(
935                        "Your response was not valid JSON. Error: {first_err}. \
936                         Please output ONLY valid JSON matching the schema."
937                    ),
938                ));
939                let retry_raw = self.chat(&augmented).await?;
940                let retry_cleaned = strip_json_fences(&retry_raw);
941                serde_json::from_str::<T>(retry_cleaned).map_err(|e| {
942                    LlmError::StructuredParse(format!("parse failed after retry: {e}"))
943                })
944            }
945        }
946    }
947}
948
949/// Strip markdown code fences from LLM output. Only handles outer fences;
950/// JSON containing trailing triple backticks in string values may be
951/// incorrectly trimmed (acceptable for MVP — see review R2).
952fn strip_json_fences(s: &str) -> &str {
953    s.trim()
954        .trim_start_matches("```json")
955        .trim_start_matches("```")
956        .trim_end_matches("```")
957        .trim()
958}
959
960#[cfg(test)]
961mod tests {
962    use tokio_stream::StreamExt;
963
964    use super::*;
965
966    struct StubProvider {
967        response: String,
968    }
969
970    impl LlmProvider for StubProvider {
971        async fn chat(&self, _messages: &[Message]) -> Result<String, LlmError> {
972            Ok(self.response.clone())
973        }
974
975        async fn chat_stream(&self, messages: &[Message]) -> Result<ChatStream, LlmError> {
976            let response = self.chat(messages).await?;
977            Ok(Box::pin(tokio_stream::once(Ok(StreamChunk::Content(
978                response,
979            )))))
980        }
981
982        fn supports_streaming(&self) -> bool {
983            false
984        }
985
986        async fn embed(&self, _text: &str) -> Result<Vec<f32>, LlmError> {
987            Ok(vec![0.1, 0.2, 0.3])
988        }
989
990        fn supports_embeddings(&self) -> bool {
991            false
992        }
993
994        fn name(&self) -> &'static str {
995            "stub"
996        }
997    }
998
999    #[test]
1000    fn context_window_default_returns_none() {
1001        let provider = StubProvider {
1002            response: String::new(),
1003        };
1004        assert!(provider.context_window().is_none());
1005    }
1006
1007    #[test]
1008    fn supports_streaming_default_returns_false() {
1009        let provider = StubProvider {
1010            response: String::new(),
1011        };
1012        assert!(!provider.supports_streaming());
1013    }
1014
1015    #[tokio::test]
1016    async fn chat_stream_default_yields_single_chunk() {
1017        let provider = StubProvider {
1018            response: "hello world".into(),
1019        };
1020        let messages = vec![Message {
1021            role: Role::User,
1022            content: "test".into(),
1023            parts: vec![],
1024            metadata: MessageMetadata::default(),
1025        }];
1026
1027        let mut stream = provider.chat_stream(&messages).await.unwrap();
1028        let chunk = stream.next().await.unwrap().unwrap();
1029        assert!(matches!(chunk, StreamChunk::Content(s) if s == "hello world"));
1030        assert!(stream.next().await.is_none());
1031    }
1032
1033    #[tokio::test]
1034    async fn chat_stream_default_propagates_chat_error() {
1035        struct FailProvider;
1036
1037        impl LlmProvider for FailProvider {
1038            async fn chat(&self, _messages: &[Message]) -> Result<String, LlmError> {
1039                Err(LlmError::Unavailable)
1040            }
1041
1042            async fn chat_stream(&self, messages: &[Message]) -> Result<ChatStream, LlmError> {
1043                let response = self.chat(messages).await?;
1044                Ok(Box::pin(tokio_stream::once(Ok(StreamChunk::Content(
1045                    response,
1046                )))))
1047            }
1048
1049            fn supports_streaming(&self) -> bool {
1050                false
1051            }
1052
1053            async fn embed(&self, _text: &str) -> Result<Vec<f32>, LlmError> {
1054                Err(LlmError::Unavailable)
1055            }
1056
1057            fn supports_embeddings(&self) -> bool {
1058                false
1059            }
1060
1061            fn name(&self) -> &'static str {
1062                "fail"
1063            }
1064        }
1065
1066        let provider = FailProvider;
1067        let messages = vec![Message {
1068            role: Role::User,
1069            content: "test".into(),
1070            parts: vec![],
1071            metadata: MessageMetadata::default(),
1072        }];
1073
1074        let result = provider.chat_stream(&messages).await;
1075        assert!(result.is_err());
1076        if let Err(e) = result {
1077            assert!(e.to_string().contains("provider unavailable"));
1078        }
1079    }
1080
1081    #[tokio::test]
1082    async fn stub_provider_embed_returns_vector() {
1083        let provider = StubProvider {
1084            response: String::new(),
1085        };
1086        let embedding = provider.embed("test").await.unwrap();
1087        assert_eq!(embedding, vec![0.1, 0.2, 0.3]);
1088    }
1089
1090    #[tokio::test]
1091    async fn fail_provider_embed_propagates_error() {
1092        struct FailProvider;
1093
1094        impl LlmProvider for FailProvider {
1095            async fn chat(&self, _messages: &[Message]) -> Result<String, LlmError> {
1096                Err(LlmError::Unavailable)
1097            }
1098
1099            async fn chat_stream(&self, messages: &[Message]) -> Result<ChatStream, LlmError> {
1100                let response = self.chat(messages).await?;
1101                Ok(Box::pin(tokio_stream::once(Ok(StreamChunk::Content(
1102                    response,
1103                )))))
1104            }
1105
1106            fn supports_streaming(&self) -> bool {
1107                false
1108            }
1109
1110            async fn embed(&self, _text: &str) -> Result<Vec<f32>, LlmError> {
1111                Err(LlmError::EmbedUnsupported {
1112                    provider: "fail".into(),
1113                })
1114            }
1115
1116            fn supports_embeddings(&self) -> bool {
1117                false
1118            }
1119
1120            fn name(&self) -> &'static str {
1121                "fail"
1122            }
1123        }
1124
1125        let provider = FailProvider;
1126        let result = provider.embed("test").await;
1127        assert!(result.is_err());
1128        assert!(
1129            result
1130                .unwrap_err()
1131                .to_string()
1132                .contains("embedding not supported")
1133        );
1134    }
1135
1136    #[test]
1137    fn role_serialization() {
1138        let system = Role::System;
1139        let user = Role::User;
1140        let assistant = Role::Assistant;
1141
1142        assert_eq!(serde_json::to_string(&system).unwrap(), "\"system\"");
1143        assert_eq!(serde_json::to_string(&user).unwrap(), "\"user\"");
1144        assert_eq!(serde_json::to_string(&assistant).unwrap(), "\"assistant\"");
1145    }
1146
1147    #[test]
1148    fn role_deserialization() {
1149        let system: Role = serde_json::from_str("\"system\"").unwrap();
1150        let user: Role = serde_json::from_str("\"user\"").unwrap();
1151        let assistant: Role = serde_json::from_str("\"assistant\"").unwrap();
1152
1153        assert_eq!(system, Role::System);
1154        assert_eq!(user, Role::User);
1155        assert_eq!(assistant, Role::Assistant);
1156    }
1157
1158    #[test]
1159    fn message_clone() {
1160        let msg = Message {
1161            role: Role::User,
1162            content: "test".into(),
1163            parts: vec![],
1164            metadata: MessageMetadata::default(),
1165        };
1166        let cloned = msg.clone();
1167        assert_eq!(cloned.role, msg.role);
1168        assert_eq!(cloned.content, msg.content);
1169    }
1170
1171    #[test]
1172    fn message_debug() {
1173        let msg = Message {
1174            role: Role::Assistant,
1175            content: "response".into(),
1176            parts: vec![],
1177            metadata: MessageMetadata::default(),
1178        };
1179        let debug = format!("{msg:?}");
1180        assert!(debug.contains("Assistant"));
1181        assert!(debug.contains("response"));
1182    }
1183
1184    #[test]
1185    fn message_serialization() {
1186        let msg = Message {
1187            role: Role::User,
1188            content: "hello".into(),
1189            parts: vec![],
1190            metadata: MessageMetadata::default(),
1191        };
1192        let json = serde_json::to_string(&msg).unwrap();
1193        assert!(json.contains("\"role\":\"user\""));
1194        assert!(json.contains("\"content\":\"hello\""));
1195    }
1196
1197    #[test]
1198    fn message_part_serde_round_trip() {
1199        let parts = vec![
1200            MessagePart::Text {
1201                text: "hello".into(),
1202            },
1203            MessagePart::ToolOutput {
1204                tool_name: "bash".into(),
1205                body: "output".into(),
1206                compacted_at: None,
1207            },
1208            MessagePart::Recall {
1209                text: "recall".into(),
1210            },
1211            MessagePart::CodeContext {
1212                text: "code".into(),
1213            },
1214            MessagePart::Summary {
1215                text: "summary".into(),
1216            },
1217        ];
1218        let json = serde_json::to_string(&parts).unwrap();
1219        let deserialized: Vec<MessagePart> = serde_json::from_str(&json).unwrap();
1220        assert_eq!(deserialized.len(), 5);
1221    }
1222
1223    #[test]
1224    fn from_legacy_creates_empty_parts() {
1225        let msg = Message::from_legacy(Role::User, "hello");
1226        assert_eq!(msg.role, Role::User);
1227        assert_eq!(msg.content, "hello");
1228        assert!(msg.parts.is_empty());
1229        assert_eq!(msg.to_llm_content(), "hello");
1230    }
1231
1232    #[test]
1233    fn from_parts_flattens_content() {
1234        let msg = Message::from_parts(
1235            Role::System,
1236            vec![MessagePart::Recall {
1237                text: "recalled data".into(),
1238            }],
1239        );
1240        assert_eq!(msg.content, "recalled data");
1241        assert_eq!(msg.to_llm_content(), "recalled data");
1242        assert_eq!(msg.parts.len(), 1);
1243    }
1244
1245    #[test]
1246    fn from_parts_tool_output_format() {
1247        let msg = Message::from_parts(
1248            Role::User,
1249            vec![MessagePart::ToolOutput {
1250                tool_name: "bash".into(),
1251                body: "hello world".into(),
1252                compacted_at: None,
1253            }],
1254        );
1255        assert!(msg.content.contains("[tool output: bash]"));
1256        assert!(msg.content.contains("hello world"));
1257    }
1258
1259    #[test]
1260    fn message_deserializes_without_parts() {
1261        let json = r#"{"role":"user","content":"hello"}"#;
1262        let msg: Message = serde_json::from_str(json).unwrap();
1263        assert_eq!(msg.content, "hello");
1264        assert!(msg.parts.is_empty());
1265    }
1266
1267    #[test]
1268    fn flatten_skips_compacted_tool_output_empty_body() {
1269        // When compacted_at is set and body is empty, renders "(pruned)".
1270        let msg = Message::from_parts(
1271            Role::User,
1272            vec![
1273                MessagePart::Text {
1274                    text: "prefix ".into(),
1275                },
1276                MessagePart::ToolOutput {
1277                    tool_name: "bash".into(),
1278                    body: String::new(),
1279                    compacted_at: Some(1234),
1280                },
1281                MessagePart::Text {
1282                    text: " suffix".into(),
1283                },
1284            ],
1285        );
1286        assert!(msg.content.contains("(pruned)"));
1287        assert!(msg.content.contains("prefix "));
1288        assert!(msg.content.contains(" suffix"));
1289    }
1290
1291    #[test]
1292    fn flatten_compacted_tool_output_with_reference_renders_body() {
1293        // When compacted_at is set and body contains a reference notice, renders the body.
1294        let ref_notice = "[tool output pruned; full content at /tmp/overflow/big.txt]";
1295        let msg = Message::from_parts(
1296            Role::User,
1297            vec![MessagePart::ToolOutput {
1298                tool_name: "bash".into(),
1299                body: ref_notice.into(),
1300                compacted_at: Some(1234),
1301            }],
1302        );
1303        assert!(msg.content.contains(ref_notice));
1304        assert!(!msg.content.contains("(pruned)"));
1305    }
1306
1307    #[test]
1308    fn rebuild_content_syncs_after_mutation() {
1309        let mut msg = Message::from_parts(
1310            Role::User,
1311            vec![MessagePart::ToolOutput {
1312                tool_name: "bash".into(),
1313                body: "original".into(),
1314                compacted_at: None,
1315            }],
1316        );
1317        assert!(msg.content.contains("original"));
1318
1319        if let MessagePart::ToolOutput {
1320            ref mut compacted_at,
1321            ref mut body,
1322            ..
1323        } = msg.parts[0]
1324        {
1325            *compacted_at = Some(999);
1326            body.clear(); // simulate pruning: body cleared, no overflow notice
1327        }
1328        msg.rebuild_content();
1329
1330        assert!(msg.content.contains("(pruned)"));
1331        assert!(!msg.content.contains("original"));
1332    }
1333
1334    #[test]
1335    fn message_part_tool_use_serde_round_trip() {
1336        let part = MessagePart::ToolUse {
1337            id: "toolu_123".into(),
1338            name: "bash".into(),
1339            input: serde_json::json!({"command": "ls"}),
1340        };
1341        let json = serde_json::to_string(&part).unwrap();
1342        let deserialized: MessagePart = serde_json::from_str(&json).unwrap();
1343        if let MessagePart::ToolUse { id, name, input } = deserialized {
1344            assert_eq!(id, "toolu_123");
1345            assert_eq!(name, "bash");
1346            assert_eq!(input["command"], "ls");
1347        } else {
1348            panic!("expected ToolUse");
1349        }
1350    }
1351
1352    #[test]
1353    fn message_part_tool_result_serde_round_trip() {
1354        let part = MessagePart::ToolResult {
1355            tool_use_id: "toolu_123".into(),
1356            content: "file1.rs\nfile2.rs".into(),
1357            is_error: false,
1358        };
1359        let json = serde_json::to_string(&part).unwrap();
1360        let deserialized: MessagePart = serde_json::from_str(&json).unwrap();
1361        if let MessagePart::ToolResult {
1362            tool_use_id,
1363            content,
1364            is_error,
1365        } = deserialized
1366        {
1367            assert_eq!(tool_use_id, "toolu_123");
1368            assert_eq!(content, "file1.rs\nfile2.rs");
1369            assert!(!is_error);
1370        } else {
1371            panic!("expected ToolResult");
1372        }
1373    }
1374
1375    #[test]
1376    fn message_part_tool_result_is_error_default() {
1377        let json = r#"{"kind":"tool_result","tool_use_id":"id","content":"err"}"#;
1378        let part: MessagePart = serde_json::from_str(json).unwrap();
1379        if let MessagePart::ToolResult { is_error, .. } = part {
1380            assert!(!is_error);
1381        } else {
1382            panic!("expected ToolResult");
1383        }
1384    }
1385
1386    #[test]
1387    fn chat_response_construction() {
1388        let text = ChatResponse::Text("hello".into());
1389        assert!(matches!(text, ChatResponse::Text(s) if s == "hello"));
1390
1391        let tool_use = ChatResponse::ToolUse {
1392            text: Some("I'll run that".into()),
1393            tool_calls: vec![ToolUseRequest {
1394                id: "1".into(),
1395                name: "bash".into(),
1396                input: serde_json::json!({}),
1397            }],
1398            thinking_blocks: vec![],
1399        };
1400        assert!(matches!(tool_use, ChatResponse::ToolUse { .. }));
1401    }
1402
1403    #[test]
1404    fn flatten_parts_tool_use() {
1405        let msg = Message::from_parts(
1406            Role::Assistant,
1407            vec![MessagePart::ToolUse {
1408                id: "t1".into(),
1409                name: "bash".into(),
1410                input: serde_json::json!({"command": "ls"}),
1411            }],
1412        );
1413        assert!(msg.content.contains("[tool_use: bash(t1)]"));
1414    }
1415
1416    #[test]
1417    fn flatten_parts_tool_result() {
1418        let msg = Message::from_parts(
1419            Role::User,
1420            vec![MessagePart::ToolResult {
1421                tool_use_id: "t1".into(),
1422                content: "output here".into(),
1423                is_error: false,
1424            }],
1425        );
1426        assert!(msg.content.contains("[tool_result: t1]"));
1427        assert!(msg.content.contains("output here"));
1428    }
1429
1430    #[test]
1431    fn tool_definition_serde_round_trip() {
1432        let def = ToolDefinition {
1433            name: "bash".into(),
1434            description: "Execute a shell command".into(),
1435            parameters: serde_json::json!({"type": "object"}),
1436            output_schema: None,
1437        };
1438        let json = serde_json::to_string(&def).unwrap();
1439        let deserialized: ToolDefinition = serde_json::from_str(&json).unwrap();
1440        assert_eq!(deserialized.name, "bash");
1441        assert_eq!(deserialized.description, "Execute a shell command");
1442    }
1443
1444    #[tokio::test]
1445    async fn chat_with_tools_default_delegates_to_chat() {
1446        let provider = StubProvider {
1447            response: "hello".into(),
1448        };
1449        let messages = vec![Message::from_legacy(Role::User, "test")];
1450        let result = provider.chat_with_tools(&messages, &[]).await.unwrap();
1451        assert!(matches!(result, ChatResponse::Text(s) if s == "hello"));
1452    }
1453
1454    #[test]
1455    fn tool_output_compacted_at_serde_default() {
1456        let json = r#"{"kind":"tool_output","tool_name":"bash","body":"out"}"#;
1457        let part: MessagePart = serde_json::from_str(json).unwrap();
1458        if let MessagePart::ToolOutput { compacted_at, .. } = part {
1459            assert!(compacted_at.is_none());
1460        } else {
1461            panic!("expected ToolOutput");
1462        }
1463    }
1464
1465    // --- M27: strip_json_fences tests ---
1466
1467    #[test]
1468    fn strip_json_fences_plain_json() {
1469        assert_eq!(strip_json_fences(r#"{"a": 1}"#), r#"{"a": 1}"#);
1470    }
1471
1472    #[test]
1473    fn strip_json_fences_with_json_fence() {
1474        assert_eq!(strip_json_fences("```json\n{\"a\": 1}\n```"), r#"{"a": 1}"#);
1475    }
1476
1477    #[test]
1478    fn strip_json_fences_with_plain_fence() {
1479        assert_eq!(strip_json_fences("```\n{\"a\": 1}\n```"), r#"{"a": 1}"#);
1480    }
1481
1482    #[test]
1483    fn strip_json_fences_whitespace() {
1484        assert_eq!(strip_json_fences("  \n  "), "");
1485    }
1486
1487    #[test]
1488    fn strip_json_fences_empty() {
1489        assert_eq!(strip_json_fences(""), "");
1490    }
1491
1492    #[test]
1493    fn strip_json_fences_outer_whitespace() {
1494        assert_eq!(
1495            strip_json_fences("  ```json\n{\"a\": 1}\n```  "),
1496            r#"{"a": 1}"#
1497        );
1498    }
1499
1500    #[test]
1501    fn strip_json_fences_only_opening_fence() {
1502        assert_eq!(strip_json_fences("```json\n{\"a\": 1}"), r#"{"a": 1}"#);
1503    }
1504
1505    // --- M27: chat_typed tests ---
1506
1507    #[derive(Debug, serde::Deserialize, schemars::JsonSchema, PartialEq)]
1508    struct TestOutput {
1509        value: String,
1510    }
1511
1512    struct SequentialStub {
1513        responses: std::sync::Mutex<Vec<Result<String, LlmError>>>,
1514    }
1515
1516    impl SequentialStub {
1517        fn new(responses: Vec<Result<String, LlmError>>) -> Self {
1518            Self {
1519                responses: std::sync::Mutex::new(responses),
1520            }
1521        }
1522    }
1523
1524    impl LlmProvider for SequentialStub {
1525        async fn chat(&self, _messages: &[Message]) -> Result<String, LlmError> {
1526            let mut responses = self.responses.lock().unwrap();
1527            if responses.is_empty() {
1528                return Err(LlmError::Other("no more responses".into()));
1529            }
1530            responses.remove(0)
1531        }
1532
1533        async fn chat_stream(&self, messages: &[Message]) -> Result<ChatStream, LlmError> {
1534            let response = self.chat(messages).await?;
1535            Ok(Box::pin(tokio_stream::once(Ok(StreamChunk::Content(
1536                response,
1537            )))))
1538        }
1539
1540        fn supports_streaming(&self) -> bool {
1541            false
1542        }
1543
1544        async fn embed(&self, _text: &str) -> Result<Vec<f32>, LlmError> {
1545            Err(LlmError::EmbedUnsupported {
1546                provider: "sequential-stub".into(),
1547            })
1548        }
1549
1550        fn supports_embeddings(&self) -> bool {
1551            false
1552        }
1553
1554        fn name(&self) -> &'static str {
1555            "sequential-stub"
1556        }
1557    }
1558
1559    #[tokio::test]
1560    async fn chat_typed_happy_path() {
1561        let provider = StubProvider {
1562            response: r#"{"value": "hello"}"#.into(),
1563        };
1564        let messages = vec![Message::from_legacy(Role::User, "test")];
1565        let result: TestOutput = provider.chat_typed(&messages).await.unwrap();
1566        assert_eq!(
1567            result,
1568            TestOutput {
1569                value: "hello".into()
1570            }
1571        );
1572    }
1573
1574    #[tokio::test]
1575    async fn chat_typed_retry_succeeds() {
1576        let provider = SequentialStub::new(vec![
1577            Ok("not valid json".into()),
1578            Ok(r#"{"value": "ok"}"#.into()),
1579        ]);
1580        let messages = vec![Message::from_legacy(Role::User, "test")];
1581        let result: TestOutput = provider.chat_typed(&messages).await.unwrap();
1582        assert_eq!(result, TestOutput { value: "ok".into() });
1583    }
1584
1585    #[tokio::test]
1586    async fn chat_typed_both_fail() {
1587        let provider = SequentialStub::new(vec![Ok("bad json".into()), Ok("still bad".into())]);
1588        let messages = vec![Message::from_legacy(Role::User, "test")];
1589        let result = provider.chat_typed::<TestOutput>(&messages).await;
1590        let err = result.unwrap_err();
1591        assert!(err.to_string().contains("parse failed after retry"));
1592    }
1593
1594    #[tokio::test]
1595    async fn chat_typed_chat_error_propagates() {
1596        let provider = SequentialStub::new(vec![Err(LlmError::Unavailable)]);
1597        let messages = vec![Message::from_legacy(Role::User, "test")];
1598        let result = provider.chat_typed::<TestOutput>(&messages).await;
1599        assert!(matches!(result, Err(LlmError::Unavailable)));
1600    }
1601
1602    #[tokio::test]
1603    async fn chat_typed_strips_fences() {
1604        let provider = StubProvider {
1605            response: "```json\n{\"value\": \"fenced\"}\n```".into(),
1606        };
1607        let messages = vec![Message::from_legacy(Role::User, "test")];
1608        let result: TestOutput = provider.chat_typed(&messages).await.unwrap();
1609        assert_eq!(
1610            result,
1611            TestOutput {
1612                value: "fenced".into()
1613            }
1614        );
1615    }
1616
1617    #[test]
1618    fn supports_structured_output_default_false() {
1619        let provider = StubProvider {
1620            response: String::new(),
1621        };
1622        assert!(!provider.supports_structured_output());
1623    }
1624
1625    #[test]
1626    fn structured_parse_error_display() {
1627        let err = LlmError::StructuredParse("test error".into());
1628        assert_eq!(
1629            err.to_string(),
1630            "structured output parse failed: test error"
1631        );
1632    }
1633
1634    #[test]
1635    fn message_part_image_roundtrip_json() {
1636        let part = MessagePart::Image(Box::new(ImageData {
1637            data: vec![1, 2, 3, 4],
1638            mime_type: "image/jpeg".into(),
1639        }));
1640        let json = serde_json::to_string(&part).unwrap();
1641        let decoded: MessagePart = serde_json::from_str(&json).unwrap();
1642        match decoded {
1643            MessagePart::Image(img) => {
1644                assert_eq!(img.data, vec![1, 2, 3, 4]);
1645                assert_eq!(img.mime_type, "image/jpeg");
1646            }
1647            _ => panic!("expected Image variant"),
1648        }
1649    }
1650
1651    #[test]
1652    fn flatten_parts_includes_image_placeholder() {
1653        let msg = Message::from_parts(
1654            Role::User,
1655            vec![
1656                MessagePart::Text {
1657                    text: "see this".into(),
1658                },
1659                MessagePart::Image(Box::new(ImageData {
1660                    data: vec![0u8; 100],
1661                    mime_type: "image/png".into(),
1662                })),
1663            ],
1664        );
1665        let content = msg.to_llm_content();
1666        assert!(content.contains("see this"));
1667        assert!(content.contains("[image: image/png"));
1668    }
1669
1670    #[test]
1671    fn supports_vision_default_false() {
1672        let provider = StubProvider {
1673            response: String::new(),
1674        };
1675        assert!(!provider.supports_vision());
1676    }
1677
1678    #[test]
1679    fn message_metadata_default_both_visible() {
1680        let m = MessageMetadata::default();
1681        assert!(m.visibility.is_agent_visible());
1682        assert!(m.visibility.is_user_visible());
1683        assert_eq!(m.visibility, MessageVisibility::Both);
1684        assert!(m.compacted_at.is_none());
1685    }
1686
1687    #[test]
1688    fn message_metadata_agent_only() {
1689        let m = MessageMetadata::agent_only();
1690        assert!(m.visibility.is_agent_visible());
1691        assert!(!m.visibility.is_user_visible());
1692        assert_eq!(m.visibility, MessageVisibility::AgentOnly);
1693    }
1694
1695    #[test]
1696    fn message_metadata_user_only() {
1697        let m = MessageMetadata::user_only();
1698        assert!(!m.visibility.is_agent_visible());
1699        assert!(m.visibility.is_user_visible());
1700        assert_eq!(m.visibility, MessageVisibility::UserOnly);
1701    }
1702
1703    #[test]
1704    fn message_metadata_serde_default() {
1705        let json = r#"{"role":"user","content":"hello"}"#;
1706        let msg: Message = serde_json::from_str(json).unwrap();
1707        assert!(msg.metadata.visibility.is_agent_visible());
1708        assert!(msg.metadata.visibility.is_user_visible());
1709    }
1710
1711    #[test]
1712    fn message_metadata_round_trip() {
1713        let msg = Message {
1714            role: Role::User,
1715            content: "test".into(),
1716            parts: vec![],
1717            metadata: MessageMetadata::agent_only(),
1718        };
1719        let json = serde_json::to_string(&msg).unwrap();
1720        let decoded: Message = serde_json::from_str(&json).unwrap();
1721        assert!(decoded.metadata.visibility.is_agent_visible());
1722        assert!(!decoded.metadata.visibility.is_user_visible());
1723        assert_eq!(decoded.metadata.visibility, MessageVisibility::AgentOnly);
1724    }
1725
1726    #[test]
1727    fn message_part_compaction_round_trip() {
1728        let part = MessagePart::Compaction {
1729            summary: "Context was summarized.".to_owned(),
1730        };
1731        let json = serde_json::to_string(&part).unwrap();
1732        let decoded: MessagePart = serde_json::from_str(&json).unwrap();
1733        assert!(
1734            matches!(decoded, MessagePart::Compaction { summary } if summary == "Context was summarized.")
1735        );
1736    }
1737
1738    #[test]
1739    fn flatten_parts_compaction_contributes_no_text() {
1740        // MessagePart::Compaction must not appear in the flattened content string
1741        // (it's metadata-only; the summary is stored on the Message separately).
1742        let parts = vec![
1743            MessagePart::Text {
1744                text: "Hello".to_owned(),
1745            },
1746            MessagePart::Compaction {
1747                summary: "Summary".to_owned(),
1748            },
1749        ];
1750        let msg = Message::from_parts(Role::Assistant, parts);
1751        // Only the Text part should appear in content.
1752        assert_eq!(msg.content.trim(), "Hello");
1753    }
1754
1755    #[test]
1756    fn stream_chunk_compaction_variant() {
1757        let chunk = StreamChunk::Compaction("A summary".to_owned());
1758        assert!(matches!(chunk, StreamChunk::Compaction(s) if s == "A summary"));
1759    }
1760
1761    #[test]
1762    fn short_type_name_extracts_last_segment() {
1763        struct MyOutput;
1764        assert_eq!(short_type_name::<MyOutput>(), "MyOutput");
1765    }
1766
1767    #[test]
1768    fn short_type_name_primitive_returns_full_name() {
1769        // Primitives have no "::" in their type_name — rsplit returns the full name.
1770        assert_eq!(short_type_name::<u32>(), "u32");
1771        assert_eq!(short_type_name::<bool>(), "bool");
1772    }
1773
1774    #[test]
1775    fn short_type_name_nested_path_returns_last() {
1776        // Use a type whose path contains "::" segments.
1777        assert_eq!(
1778            short_type_name::<std::collections::HashMap<u32, u32>>(),
1779            "HashMap<u32, u32>"
1780        );
1781    }
1782
1783    // Regression test for #2257: `MessagePart::Summary` must serialize to the
1784    // internally-tagged format `{"kind":"summary","text":"..."}` and round-trip correctly.
1785    #[test]
1786    fn summary_roundtrip() {
1787        let part = MessagePart::Summary {
1788            text: "hello".to_string(),
1789        };
1790        let json = serde_json::to_string(&part).expect("serialization must not fail");
1791        assert!(
1792            json.contains("\"kind\":\"summary\""),
1793            "must use internally-tagged format, got: {json}"
1794        );
1795        assert!(
1796            !json.contains("\"Summary\""),
1797            "must not use externally-tagged format, got: {json}"
1798        );
1799        let decoded: MessagePart =
1800            serde_json::from_str(&json).expect("deserialization must not fail");
1801        match decoded {
1802            MessagePart::Summary { text } => assert_eq!(text, "hello"),
1803            other => panic!("expected MessagePart::Summary, got {other:?}"),
1804        }
1805    }
1806
1807    #[tokio::test]
1808    async fn embed_batch_default_empty_returns_empty() {
1809        let provider = StubProvider {
1810            response: String::new(),
1811        };
1812        let result = provider.embed_batch(&[]).await.unwrap();
1813        assert!(result.is_empty());
1814    }
1815
1816    #[tokio::test]
1817    async fn embed_batch_default_calls_embed_sequentially() {
1818        let provider = StubProvider {
1819            response: String::new(),
1820        };
1821        let texts = ["hello", "world", "foo"];
1822        let result = provider.embed_batch(&texts).await.unwrap();
1823        assert_eq!(result.len(), 3);
1824        // StubProvider::embed always returns [0.1, 0.2, 0.3]
1825        for vec in &result {
1826            assert_eq!(vec, &[0.1_f32, 0.2, 0.3]);
1827        }
1828    }
1829
1830    #[test]
1831    fn message_visibility_db_roundtrip_both() {
1832        assert_eq!(MessageVisibility::Both.as_db_str(), "both");
1833        assert_eq!(
1834            MessageVisibility::from_db_str("both"),
1835            MessageVisibility::Both
1836        );
1837    }
1838
1839    #[test]
1840    fn message_visibility_db_roundtrip_agent_only() {
1841        assert_eq!(MessageVisibility::AgentOnly.as_db_str(), "agent_only");
1842        assert_eq!(
1843            MessageVisibility::from_db_str("agent_only"),
1844            MessageVisibility::AgentOnly
1845        );
1846    }
1847
1848    #[test]
1849    fn message_visibility_db_roundtrip_user_only() {
1850        assert_eq!(MessageVisibility::UserOnly.as_db_str(), "user_only");
1851        assert_eq!(
1852            MessageVisibility::from_db_str("user_only"),
1853            MessageVisibility::UserOnly
1854        );
1855    }
1856
1857    #[test]
1858    fn message_visibility_from_db_str_unknown_defaults_to_both() {
1859        assert_eq!(
1860            MessageVisibility::from_db_str("unknown_future_value"),
1861            MessageVisibility::Both
1862        );
1863        assert_eq!(MessageVisibility::from_db_str(""), MessageVisibility::Both);
1864    }
1865}
zeph_llm/provider.rs

zeph_llm/
provider.rs