edgequake_llm/
traits.rs

1//! LLM provider traits for text completion and embedding.
2//!
3//! # Implements
4//!
5//! @implements FEAT0006 (Vector Embedding Generation via EmbeddingProvider trait)
6//! @implements FEAT0017 (Multi-Provider LLM Support via LLMProvider trait)
7//! @implements FEAT0018 (Embedding Provider Abstraction)
8//!
9//! # Enforces
10//!
11//! - **BR0303**: Token usage tracked in [`LLMResponse`]
12//! - **BR0010**: Embedding dimension validated by providers
13//!
14//! # WHY: Trait-Based Provider Abstraction
15//!
16//! Using traits instead of concrete types enables:
17//! - **Testing**: MockProvider for unit tests (no API calls)
18//! - **Flexibility**: Swap providers without code changes
19//! - **Cost control**: Route to different providers based on request type
20//! - **Resilience**: Fallback providers when primary is unavailable
21//!
22//! # Key Traits
23//!
24//! - [`LLMProvider`]: Text completion (chat, extraction prompts)
25//! - [`EmbeddingProvider`]: Vector embedding generation
26
27use async_trait::async_trait;
28use serde::{Deserialize, Serialize};
29use serde_json::Value as JsonValue;
30use std::collections::HashMap;
31
32use crate::error::Result;
33
34use futures::stream::BoxStream;
35
36// ============================================================================
37// Function/Tool Calling Types (OpenAI-compatible)
38// ============================================================================
39
40/// Definition of a tool that the model can call.
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ToolDefinition {
43    /// Type of tool (always "function" for function tools).
44    #[serde(rename = "type")]
45    pub tool_type: String,
46
47    /// Function definition.
48    pub function: FunctionDefinition,
49}
50
51impl ToolDefinition {
52    /// Create a new function tool definition.
53    pub fn function(
54        name: impl Into<String>,
55        description: impl Into<String>,
56        parameters: JsonValue,
57    ) -> Self {
58        Self {
59            tool_type: "function".to_string(),
60            function: FunctionDefinition {
61                name: name.into(),
62                description: description.into(),
63                parameters,
64                strict: Some(true),
65            },
66        }
67    }
68}
69
70/// Definition of a function that can be called by the model.
71#[derive(Debug, Clone, Serialize, Deserialize)]
72pub struct FunctionDefinition {
73    /// Name of the function.
74    pub name: String,
75
76    /// Description of what the function does.
77    pub description: String,
78
79    /// JSON Schema defining the function parameters.
80    pub parameters: JsonValue,
81
82    /// Whether to enforce strict mode for schema validation.
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub strict: Option<bool>,
85}
86
87/// A tool call request from the model.
88#[derive(Debug, Clone, Serialize, Deserialize)]
89pub struct ToolCall {
90    /// Unique identifier for this tool call.
91    pub id: String,
92
93    /// Type of tool (always "function").
94    #[serde(rename = "type")]
95    pub call_type: String,
96
97    /// Function call details.
98    pub function: FunctionCall,
99}
100
101impl ToolCall {
102    /// Parse the function arguments as JSON.
103    pub fn parse_arguments<T: serde::de::DeserializeOwned>(&self) -> Result<T> {
104        serde_json::from_str(&self.function.arguments).map_err(|e| {
105            crate::error::LlmError::InvalidRequest(format!("Failed to parse tool arguments: {}", e))
106        })
107    }
108
109    /// Get the function name.
110    pub fn name(&self) -> &str {
111        &self.function.name
112    }
113
114    /// Get the raw arguments string.
115    pub fn arguments(&self) -> &str {
116        &self.function.arguments
117    }
118}
119
120/// Details of a function call.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct FunctionCall {
123    /// Name of the function to call.
124    pub name: String,
125
126    /// JSON-encoded arguments for the function.
127    pub arguments: String,
128}
129
130/// Tool choice configuration.
131#[derive(Debug, Clone, Serialize, Deserialize)]
132#[serde(untagged)]
133pub enum ToolChoice {
134    /// Let the model decide (default).
135    Auto(String),
136
137    /// Force the model to use tools.
138    Required(String),
139
140    /// Force a specific function.
141    Function {
142        #[serde(rename = "type")]
143        choice_type: String,
144        function: ToolChoiceFunction,
145    },
146}
147
148impl ToolChoice {
149    /// Auto mode - model decides when to use tools.
150    pub fn auto() -> Self {
151        ToolChoice::Auto("auto".to_string())
152    }
153
154    /// Required mode - model must use at least one tool.
155    pub fn required() -> Self {
156        ToolChoice::Required("required".to_string())
157    }
158
159    /// Force a specific function to be called.
160    pub fn function(name: impl Into<String>) -> Self {
161        ToolChoice::Function {
162            choice_type: "function".to_string(),
163            function: ToolChoiceFunction { name: name.into() },
164        }
165    }
166
167    /// None mode - disable tool calling.
168    pub fn none() -> Self {
169        ToolChoice::Auto("none".to_string())
170    }
171}
172
173/// Specific function choice.
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub struct ToolChoiceFunction {
176    /// Name of the function to call.
177    pub name: String,
178}
179
180/// Result of a tool execution to send back to the model.
181#[derive(Debug, Clone, Serialize, Deserialize)]
182pub struct ToolResult {
183    /// ID of the tool call this result is for.
184    pub tool_call_id: String,
185
186    /// Role (always "tool").
187    pub role: String,
188
189    /// Content/output of the tool execution.
190    pub content: String,
191}
192
193impl ToolResult {
194    /// Create a new tool result.
195    pub fn new(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
196        Self {
197            tool_call_id: tool_call_id.into(),
198            role: "tool".to_string(),
199            content: content.into(),
200        }
201    }
202
203    /// Create an error result.
204    pub fn error(tool_call_id: impl Into<String>, error: impl std::fmt::Display) -> Self {
205        Self {
206            tool_call_id: tool_call_id.into(),
207            role: "tool".to_string(),
208            content: format!("Error: {}", error),
209        }
210    }
211}
212
213// ============================================================================
214// Streaming Types
215// ============================================================================
216
217/// Chunk of a streaming response with tool call support.
218///
219/// OODA-04: Added ThinkingContent for extended thinking/reasoning streaming.
220/// OODA-10: Added budget_remaining for thinking budget display.
221#[derive(Debug, Clone)]
222pub enum StreamChunk {
223    /// Partial content/reasoning text.
224    Content(String),
225
226    /// Extended thinking/reasoning content (OODA-04, OODA-10).
227    ///
228    /// Emitted by models supporting extended thinking (Claude, Gemini 2.0 Flash Thinking,
229    /// DeepSeek R1/V3). Allows real-time display of model reasoning process.
230    ThinkingContent {
231        /// The thinking/reasoning text fragment
232        text: String,
233        /// Tokens used for this thinking chunk (if provider reports it)
234        tokens_used: Option<usize>,
235        /// Total thinking budget (OODA-10: for budget display like "1.2k/10k")
236        budget_total: Option<usize>,
237    },
238
239    /// Incremental tool call data.
240    ToolCallDelta {
241        /// Index of the tool call (for multiple parallel calls).
242        index: usize,
243        /// Tool call ID (may be sent once at start).
244        id: Option<String>,
245        /// Function name (may be sent once at start).
246        function_name: Option<String>,
247        /// Incremental function arguments (JSON fragment).
248        function_arguments: Option<String>,
249    },
250
251    /// Stream finished with reason.
252    ///
253    /// OODA-35: Extended with optional provider metrics.
254    Finished {
255        /// Finish reason (e.g., "stop", "tool_calls", "length").
256        reason: String,
257        /// Time to first token in milliseconds (if provider reports it).
258        /// OODA-35: Added for provider-native TTFT.
259        #[allow(dead_code)]
260        ttft_ms: Option<f64>,
261    },
262}
263
264// ============================================================================
265// LLM Response with Tool Calls
266// ============================================================================
267
268/// Response from an LLM completion.
269#[derive(Debug, Clone, Serialize, Deserialize)]
270pub struct LLMResponse {
271    /// The generated text content.
272    pub content: String,
273
274    /// Number of tokens in the prompt.
275    pub prompt_tokens: usize,
276
277    /// Number of tokens in the completion.
278    pub completion_tokens: usize,
279
280    /// Total tokens used.
281    pub total_tokens: usize,
282
283    /// Model used for the request.
284    pub model: String,
285
286    /// Finish reason (e.g., "stop", "length", "content_filter", "tool_calls").
287    pub finish_reason: Option<String>,
288
289    /// Tool calls requested by the model (if any).
290    #[serde(default, skip_serializing_if = "Vec::is_empty")]
291    pub tool_calls: Vec<ToolCall>,
292
293    /// Additional metadata from the provider.
294    pub metadata: HashMap<String, serde_json::Value>,
295
296    /// Number of tokens served from cache (if provider supports caching).
297    #[serde(skip_serializing_if = "Option::is_none")]
298    pub cache_hit_tokens: Option<usize>,
299
300    /// Number of reasoning/thinking tokens used by the model.
301    ///
302    /// OODA-15: Extended thinking/reasoning mode capture
303    ///
304    /// OpenAI o-series: Extracted from `output_tokens_details.reasoning_tokens`
305    /// Anthropic Claude: Derived from thinking block token count
306    ///
307    /// These tokens are billed as output tokens but represent internal reasoning
308    /// that precedes the visible response.
309    #[serde(default, skip_serializing_if = "Option::is_none")]
310    pub thinking_tokens: Option<usize>,
311
312    /// Reasoning/thinking content from the model (if available).
313    ///
314    /// OODA-15: Extended thinking content capture
315    ///
316    /// Only populated when:
317    /// 1. The model supports visible thinking (e.g., Claude extended thinking)
318    /// 2. Content capture is enabled (EDGECODE_CAPTURE_CONTENT=true for tracing)
319    ///
320    /// OpenAI o-series: Reasoning is hidden (not returned via API)
321    /// Anthropic Claude: Thinking content returned in thinking blocks
322    #[serde(default, skip_serializing_if = "Option::is_none")]
323    pub thinking_content: Option<String>,
324}
325
326impl LLMResponse {
327    /// Create a new LLM response.
328    pub fn new(content: impl Into<String>, model: impl Into<String>) -> Self {
329        Self {
330            content: content.into(),
331            prompt_tokens: 0,
332            completion_tokens: 0,
333            total_tokens: 0,
334            model: model.into(),
335            finish_reason: None,
336            tool_calls: Vec::new(),
337            metadata: HashMap::new(),
338            cache_hit_tokens: None,
339            thinking_tokens: None,
340            thinking_content: None,
341        }
342    }
343
344    /// Set token usage.
345    pub fn with_usage(mut self, prompt: usize, completion: usize) -> Self {
346        self.prompt_tokens = prompt;
347        self.completion_tokens = completion;
348        self.total_tokens = prompt + completion;
349        self
350    }
351
352    /// Set finish reason.
353    pub fn with_finish_reason(mut self, reason: impl Into<String>) -> Self {
354        self.finish_reason = Some(reason.into());
355        self
356    }
357
358    /// Add tool calls to the response.
359    pub fn with_tool_calls(mut self, calls: Vec<ToolCall>) -> Self {
360        self.tool_calls = calls;
361        self
362    }
363
364    /// Set the number of tokens served from cache.
365    ///
366    /// # Context Engineering Note
367    /// Cache hit tracking is critical for measuring the effectiveness of
368    /// prompt caching strategies. Providers like OpenAI, Anthropic, and Gemini
369    /// support KV-cache and report cached token counts in their responses.
370    ///
371    /// A high cache hit rate (>80%) indicates effective context engineering:
372    /// - Stable prompt prefixes (no timestamps at start)
373    /// - Deterministic message serialization
374    /// - Append-only history patterns
375    pub fn with_cache_hit_tokens(mut self, tokens: usize) -> Self {
376        self.cache_hit_tokens = Some(tokens);
377        self
378    }
379
380    /// Add metadata to the response.
381    ///
382    /// # OODA-13: Response ID Capture
383    /// Providers should call this to add response IDs and other metadata
384    /// for OpenTelemetry GenAI semantic conventions compliance.
385    ///
386    /// Common keys: "id" (response ID), "system_fingerprint", etc.
387    pub fn with_metadata(mut self, key: impl Into<String>, value: serde_json::Value) -> Self {
388        self.metadata.insert(key.into(), value);
389        self
390    }
391
392    /// Set the number of reasoning/thinking tokens.
393    ///
394    /// # OODA-15: Extended Thinking Token Capture
395    /// Use this to record the number of tokens the model used for internal
396    /// reasoning before generating the visible response.
397    ///
398    /// OpenAI o-series: `output_tokens_details.reasoning_tokens`
399    /// Anthropic Claude: Derived from thinking block sizes
400    ///
401    /// These tokens are billed as output tokens but represent hidden reasoning.
402    pub fn with_thinking_tokens(mut self, tokens: usize) -> Self {
403        self.thinking_tokens = Some(tokens);
404        self
405    }
406
407    /// Set the reasoning/thinking content.
408    ///
409    /// # OODA-15: Extended Thinking Content Capture
410    /// Use this to record the model's visible thinking/reasoning text.
411    ///
412    /// Only applicable for models that expose thinking content:
413    /// - Anthropic Claude: Returns thinking blocks with visible reasoning
414    /// - OpenAI o-series: Reasoning is hidden (do not use this method)
415    ///
416    /// Content should be captured only when opt-in is enabled
417    /// (EDGECODE_CAPTURE_CONTENT=true) due to potential sensitivity.
418    pub fn with_thinking_content(mut self, content: impl Into<String>) -> Self {
419        self.thinking_content = Some(content.into());
420        self
421    }
422
423    /// Check if the response has tool calls.
424    pub fn has_tool_calls(&self) -> bool {
425        !self.tool_calls.is_empty()
426    }
427
428    /// Check if the response has thinking/reasoning tokens.
429    ///
430    /// Returns true if the model used extended thinking capabilities.
431    pub fn has_thinking(&self) -> bool {
432        self.thinking_tokens.is_some() || self.thinking_content.is_some()
433    }
434}
435
436/// Options for LLM completion requests.
437#[derive(Debug, Clone, Default, Serialize, Deserialize)]
438pub struct CompletionOptions {
439    /// Maximum number of tokens to generate.
440    pub max_tokens: Option<usize>,
441
442    /// Temperature for sampling (0.0 = deterministic, 1.0 = creative).
443    pub temperature: Option<f32>,
444
445    /// Top-p (nucleus) sampling.
446    pub top_p: Option<f32>,
447
448    /// Stop sequences.
449    pub stop: Option<Vec<String>>,
450
451    /// Frequency penalty.
452    pub frequency_penalty: Option<f32>,
453
454    /// Presence penalty.
455    pub presence_penalty: Option<f32>,
456
457    /// Response format (e.g., "json").
458    pub response_format: Option<String>,
459
460    /// System prompt to prepend.
461    pub system_prompt: Option<String>,
462}
463
464impl CompletionOptions {
465    /// Create options with a specific temperature.
466    pub fn with_temperature(temperature: f32) -> Self {
467        Self {
468            temperature: Some(temperature),
469            ..Default::default()
470        }
471    }
472
473    /// Create options for JSON output.
474    pub fn json_mode() -> Self {
475        Self {
476            response_format: Some("json_object".to_string()),
477            ..Default::default()
478        }
479    }
480}
481
482/// Trait for LLM providers that can generate text completions.
483#[async_trait]
484pub trait LLMProvider: Send + Sync {
485    /// Get the name of this provider.
486    fn name(&self) -> &str;
487
488    /// Get the current model.
489    fn model(&self) -> &str;
490
491    /// Get the maximum context length for the model.
492    fn max_context_length(&self) -> usize;
493
494    /// Generate a completion for the given prompt.
495    async fn complete(&self, prompt: &str) -> Result<LLMResponse>;
496
497    /// Generate a completion with custom options.
498    async fn complete_with_options(
499        &self,
500        prompt: &str,
501        options: &CompletionOptions,
502    ) -> Result<LLMResponse>;
503
504    /// Generate a chat completion with messages.
505    async fn chat(
506        &self,
507        messages: &[ChatMessage],
508        options: Option<&CompletionOptions>,
509    ) -> Result<LLMResponse>;
510
511    /// Generate a chat completion with tool/function calling support.
512    ///
513    /// This method allows the model to call tools/functions defined in the `tools` parameter.
514    /// The model may respond with tool_calls in the response, which should be executed
515    /// and the results sent back via ToolResult messages.
516    ///
517    /// # Arguments
518    /// * `messages` - The conversation messages
519    /// * `tools` - Available tools the model can call
520    /// * `tool_choice` - How the model should select tools (auto, required, or specific)
521    /// * `options` - Additional completion options
522    ///
523    /// # Returns
524    /// An LLMResponse that may contain tool_calls if the model wants to use tools.
525    async fn chat_with_tools(
526        &self,
527        messages: &[ChatMessage],
528        tools: &[ToolDefinition],
529        tool_choice: Option<ToolChoice>,
530        options: Option<&CompletionOptions>,
531    ) -> Result<LLMResponse> {
532        // Default implementation: ignore tools and use regular chat
533        // Providers that support function calling should override this
534        let _ = (tools, tool_choice);
535        self.chat(messages, options).await
536    }
537
538    /// Generate a streaming completion.
539    async fn stream(&self, _prompt: &str) -> Result<BoxStream<'static, Result<String>>> {
540        Err(crate::error::LlmError::NotSupported(
541            "Streaming not supported".to_string(),
542        ))
543    }
544
545    /// Stream chat completion with tool calling support.
546    /// Returns a stream of events containing content chunks, tool call deltas, and finish reasons.
547    ///
548    /// # Arguments
549    /// * `messages` - Chat messages for context
550    /// * `tools` - Available tools the model can call
551    /// * `tool_choice` - How the model should select tools
552    /// * `options` - Additional completion options
553    ///
554    /// # Returns
555    /// A stream of [`StreamChunk`] events that must be accumulated by the consumer.
556    async fn chat_with_tools_stream(
557        &self,
558        _messages: &[ChatMessage],
559        _tools: &[ToolDefinition],
560        _tool_choice: Option<ToolChoice>,
561        _options: Option<&CompletionOptions>,
562    ) -> Result<BoxStream<'static, Result<StreamChunk>>> {
563        Err(crate::error::LlmError::NotSupported(
564            "Streaming tool calls not supported by this provider".to_string(),
565        ))
566    }
567
568    /// Check if the model supports streaming.
569    fn supports_streaming(&self) -> bool {
570        false
571    }
572
573    /// Check if the provider supports streaming with tool calls.
574    fn supports_tool_streaming(&self) -> bool {
575        false
576    }
577
578    /// Check if the model supports JSON mode.
579    fn supports_json_mode(&self) -> bool {
580        false
581    }
582
583    /// Check if the model supports function/tool calling.
584    fn supports_function_calling(&self) -> bool {
585        false
586    }
587
588    /// Get the model name as an `Option<String>`.
589    ///
590    /// This is a convenience method for systems that need an optional model name.
591    /// Returns Some(model_name) if the model is set, None otherwise.
592    ///
593    /// # OODA-27: Model-Specific Edit Format Selection
594    /// This method is used to determine the optimal edit format based on model capabilities:
595    /// - Claude Haiku → WholeFile (format errors common)
596    /// - Claude Sonnet → SearchReplace (excellent reliability)
597    /// - GPT-4 Turbo → UnifiedDiff (reduces lazy coding)
598    fn model_name(&self) -> Option<String> {
599        let m = self.model();
600        if m.is_empty() {
601            None
602        } else {
603            Some(m.to_string())
604        }
605    }
606}
607
608// ============================================================================
609// Image Data for Multimodal Messages (OODA-51)
610// ============================================================================
611
612/// Image data for multimodal messages.
613///
614/// WHY: Vision-capable LLMs (GPT-4V, Claude 3, Gemini Pro Vision) accept images
615/// as part of the conversation. This struct provides a provider-agnostic way
616/// to attach images to messages, which providers then convert to their specific
617/// format (OpenAI: image_url, Anthropic: source.base64).
618///
619/// # Example
620/// ```
621/// use edgequake_llm::traits::ImageData;
622///
623/// let image = ImageData::new("iVBORw0KGgo...", "image/png");
624/// assert_eq!(image.mime_type, "image/png");
625/// ```
626#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
627pub struct ImageData {
628    /// Base64-encoded image data (without data: URI prefix).
629    pub data: String,
630
631    /// MIME type of the image (e.g., "image/png", "image/jpeg", "image/gif", "image/webp").
632    pub mime_type: String,
633
634    /// Optional detail level for vision models.
635    /// - "auto": Let the model decide (default)
636    /// - "low": Lower resolution, faster, cheaper
637    /// - "high": Higher resolution, better for detailed images
638    #[serde(skip_serializing_if = "Option::is_none")]
639    pub detail: Option<String>,
640}
641
642impl ImageData {
643    /// Create new image data from base64 string and MIME type.
644    pub fn new(data: impl Into<String>, mime_type: impl Into<String>) -> Self {
645        Self {
646            data: data.into(),
647            mime_type: mime_type.into(),
648            detail: None,
649        }
650    }
651
652    /// Create image data with specific detail level.
653    pub fn with_detail(mut self, detail: impl Into<String>) -> Self {
654        self.detail = Some(detail.into());
655        self
656    }
657
658    /// Create a data URI for the image (OpenAI format).
659    ///
660    /// Returns: `data:image/png;base64,iVBORw0KGgo...`
661    pub fn to_data_uri(&self) -> String {
662        format!("data:{};base64,{}", self.mime_type, self.data)
663    }
664
665    /// Create image data from a public HTTPS URL.
666    ///
667    /// The URL is passed directly to the vision API instead of being base64-encoded,
668    /// which is more efficient for large images and avoids encoding overhead.
669    ///
670    /// # Example
671    /// ```
672    /// use edgequake_llm::traits::ImageData;
673    /// let img = ImageData::from_url("https://example.com/photo.jpg");
674    /// assert!(img.is_url());
675    /// ```
676    pub fn from_url(url: impl Into<String>) -> Self {
677        Self {
678            data: url.into(),
679            mime_type: "url".to_string(),
680            detail: None,
681        }
682    }
683
684    /// Returns true if this image was constructed from a URL (not base64 data).
685    pub fn is_url(&self) -> bool {
686        self.mime_type == "url"
687    }
688
689    /// Returns the URL string for display/URL images, or the data URI for base64 images.
690    pub fn to_api_url(&self) -> String {
691        if self.is_url() {
692            self.data.clone()
693        } else {
694            self.to_data_uri()
695        }
696    }
697
698    /// Check if MIME type is supported by most vision APIs.
699    pub fn is_supported_mime(&self) -> bool {
700        matches!(
701            self.mime_type.as_str(),
702            "image/png" | "image/jpeg" | "image/gif" | "image/webp" | "url"
703        )
704    }
705}
706
707/// A message in a chat conversation.
708/// Cache control hint for providers that support prompt caching (e.g., Anthropic).
709///
710/// Some LLM providers (notably Anthropic Claude) support explicit cache breakpoints
711/// to optimize KV-cache hits and reduce costs by ~90% for cached tokens.
712///
713/// # Example
714/// ```
715/// use edgequake_llm::traits::CacheControl;
716///
717/// let cache = CacheControl::ephemeral();
718/// assert_eq!(cache.cache_type, "ephemeral");
719/// ```
720#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
721pub struct CacheControl {
722    /// Cache type. Currently supports "ephemeral" (Anthropic's cache_control.type).
723    #[serde(rename = "type")]
724    pub cache_type: String,
725}
726
727impl CacheControl {
728    /// Create an ephemeral cache control (Anthropic's default).
729    ///
730    /// Ephemeral caches persist for ~5 minutes and are shared across API calls
731    /// with the same prefix.
732    pub fn ephemeral() -> Self {
733        Self {
734            cache_type: "ephemeral".to_string(),
735        }
736    }
737}
738
739#[derive(Debug, Clone, Serialize, Deserialize)]
740pub struct ChatMessage {
741    /// Role of the message sender.
742    pub role: ChatRole,
743
744    /// Content of the message.
745    pub content: String,
746
747    /// Optional name for the message sender.
748    #[serde(skip_serializing_if = "Option::is_none")]
749    pub name: Option<String>,
750
751    /// Tool calls made by the assistant (only for assistant role).
752    #[serde(skip_serializing_if = "Option::is_none")]
753    pub tool_calls: Option<Vec<ToolCall>>,
754
755    /// Tool call ID this message is responding to (only for tool role).
756    #[serde(skip_serializing_if = "Option::is_none")]
757    pub tool_call_id: Option<String>,
758
759    /// Cache control hint for providers that support prompt caching.
760    ///
761    /// When set, this tells the provider to establish a cache breakpoint at this message.
762    /// Currently supported by Anthropic Claude (cache_control) and Gemini (cachedContent).
763    ///
764    /// # Example
765    /// ```
766    /// use edgequake_llm::traits::{ChatMessage, CacheControl};
767    ///
768    /// let mut msg = ChatMessage::system("You are a helpful assistant");
769    /// msg.cache_control = Some(CacheControl::ephemeral());
770    /// ```
771    #[serde(skip_serializing_if = "Option::is_none")]
772    pub cache_control: Option<CacheControl>,
773
774    /// Optional images for multimodal messages (OODA-51).
775    ///
776    /// WHY: Vision-capable models accept images alongside text. This field enables
777    /// sending images to models like GPT-4V, Claude 3, and Gemini Pro Vision.
778    /// Providers convert these to their specific multipart format during serialization.
779    ///
780    /// # Example
781    /// ```
782    /// use edgequake_llm::traits::{ChatMessage, ImageData};
783    ///
784    /// let mut msg = ChatMessage::user("What's in this image?");
785    /// msg.images = Some(vec![ImageData::new("iVBORw0...", "image/png")]);
786    /// ```
787    #[serde(skip_serializing_if = "Option::is_none")]
788    pub images: Option<Vec<ImageData>>,
789}
790
791impl ChatMessage {
792    /// Create a system message.
793    pub fn system(content: impl Into<String>) -> Self {
794        Self {
795            role: ChatRole::System,
796            content: content.into(),
797            name: None,
798            tool_calls: None,
799            tool_call_id: None,
800            cache_control: None,
801            images: None,
802        }
803    }
804
805    /// Create a user message.
806    pub fn user(content: impl Into<String>) -> Self {
807        Self {
808            role: ChatRole::User,
809            content: content.into(),
810            name: None,
811            tool_calls: None,
812            tool_call_id: None,
813            cache_control: None,
814            images: None,
815        }
816    }
817
818    /// Create a user message with images (OODA-51).
819    ///
820    /// Use this for multimodal conversations with vision models.
821    pub fn user_with_images(content: impl Into<String>, images: Vec<ImageData>) -> Self {
822        Self {
823            role: ChatRole::User,
824            content: content.into(),
825            name: None,
826            tool_calls: None,
827            tool_call_id: None,
828            cache_control: None,
829            images: if images.is_empty() {
830                None
831            } else {
832                Some(images)
833            },
834        }
835    }
836
837    /// Create an assistant message.
838    pub fn assistant(content: impl Into<String>) -> Self {
839        Self {
840            role: ChatRole::Assistant,
841            content: content.into(),
842            name: None,
843            tool_calls: None,
844            tool_call_id: None,
845            cache_control: None,
846            images: None,
847        }
848    }
849
850    /// Create an assistant message with tool calls.
851    pub fn assistant_with_tools(content: impl Into<String>, tool_calls: Vec<ToolCall>) -> Self {
852        Self {
853            role: ChatRole::Assistant,
854            content: content.into(),
855            name: None,
856            tool_calls: if tool_calls.is_empty() {
857                None
858            } else {
859                Some(tool_calls)
860            },
861            tool_call_id: None,
862            cache_control: None,
863            images: None,
864        }
865    }
866
867    /// Create a tool response message.
868    pub fn tool_result(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
869        Self {
870            role: ChatRole::Tool,
871            content: content.into(),
872            name: None,
873            tool_calls: None,
874            tool_call_id: Some(tool_call_id.into()),
875            cache_control: None,
876            images: None,
877        }
878    }
879
880    /// Check if this message has images attached.
881    pub fn has_images(&self) -> bool {
882        self.images.as_ref().map(|v| !v.is_empty()).unwrap_or(false)
883    }
884}
885
886/// Role of a chat message sender.
887#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
888#[serde(rename_all = "lowercase")]
889pub enum ChatRole {
890    /// System message for setting context.
891    System,
892    /// User input message.
893    User,
894    /// Assistant response message.
895    Assistant,
896    /// Tool/function result message.
897    Tool,
898    /// Function/tool result message (deprecated, use Tool).
899    Function,
900}
901
902impl ChatRole {
903    /// Convert role to string representation.
904    pub fn as_str(&self) -> &'static str {
905        match self {
906            ChatRole::System => "system",
907            ChatRole::User => "user",
908            ChatRole::Assistant => "assistant",
909            ChatRole::Tool => "tool",
910            ChatRole::Function => "function",
911        }
912    }
913}
914
915/// Trait for providers that can generate text embeddings.
916#[async_trait]
917pub trait EmbeddingProvider: Send + Sync {
918    /// Get the name of this provider.
919    fn name(&self) -> &str;
920
921    /// Get the embedding model.
922    fn model(&self) -> &str;
923
924    /// Get the dimension of the embeddings.
925    fn dimension(&self) -> usize;
926
927    /// Get the maximum number of tokens per input.
928    fn max_tokens(&self) -> usize;
929
930    /// Generate embeddings for a batch of texts.
931    async fn embed(&self, texts: &[String]) -> Result<Vec<Vec<f32>>>;
932
933    /// Generate embedding for a single text.
934    async fn embed_one(&self, text: &str) -> Result<Vec<f32>> {
935        let results = self.embed(&[text.to_string()]).await?;
936        results
937            .into_iter()
938            .next()
939            .ok_or_else(|| crate::error::LlmError::Unknown("Empty embedding result".to_string()))
940    }
941}
942
943#[cfg(test)]
944mod tests {
945    use super::*;
946
947    #[test]
948    fn test_llm_response_builder() {
949        let response = LLMResponse::new("Hello, world!", "gpt-4")
950            .with_usage(10, 5)
951            .with_finish_reason("stop");
952
953        assert_eq!(response.content, "Hello, world!");
954        assert_eq!(response.model, "gpt-4");
955        assert_eq!(response.prompt_tokens, 10);
956        assert_eq!(response.completion_tokens, 5);
957        assert_eq!(response.total_tokens, 15);
958        assert_eq!(response.finish_reason, Some("stop".to_string()));
959    }
960
961    #[test]
962    fn test_llm_response_with_cache_hit_tokens() {
963        // Test cache hit tracking for context engineering
964        let response = LLMResponse::new("cached response", "gemini-pro")
965            .with_usage(1000, 50)
966            .with_cache_hit_tokens(800);
967
968        assert_eq!(response.cache_hit_tokens, Some(800));
969        assert_eq!(response.prompt_tokens, 1000);
970        // Verify 80% cache hit rate
971        let cache_rate = response.cache_hit_tokens.unwrap() as f64 / response.prompt_tokens as f64;
972        assert!((cache_rate - 0.8).abs() < 0.001);
973    }
974
975    #[test]
976    fn test_llm_response_no_cache_hit_tokens() {
977        // Default should be None when not set
978        let response = LLMResponse::new("no cache", "gpt-4").with_usage(100, 20);
979
980        assert_eq!(response.cache_hit_tokens, None);
981    }
982
983    #[test]
984    fn test_chat_message_constructors() {
985        let system = ChatMessage::system("You are helpful");
986        assert_eq!(system.role, ChatRole::System);
987
988        let user = ChatMessage::user("Hello");
989        assert_eq!(user.role, ChatRole::User);
990
991        let assistant = ChatMessage::assistant("Hi there!");
992        assert_eq!(assistant.role, ChatRole::Assistant);
993    }
994
995    #[test]
996    fn test_cache_control_ephemeral() {
997        let cache = CacheControl::ephemeral();
998        assert_eq!(cache.cache_type, "ephemeral");
999    }
1000
1001    #[test]
1002    fn test_cache_control_serialization() {
1003        let cache = CacheControl::ephemeral();
1004        let json = serde_json::to_value(&cache).unwrap();
1005
1006        // Should serialize with "type" key (not "cache_type")
1007        assert_eq!(json["type"], "ephemeral");
1008        assert!(!json.as_object().unwrap().contains_key("cache_type"));
1009    }
1010
1011    #[test]
1012    fn test_message_with_cache_control() {
1013        let mut msg = ChatMessage::system("System prompt");
1014        msg.cache_control = Some(CacheControl::ephemeral());
1015
1016        let json = serde_json::to_value(&msg).unwrap();
1017
1018        // Should include cache_control in JSON
1019        assert!(json.as_object().unwrap().contains_key("cache_control"));
1020        assert_eq!(json["cache_control"]["type"], "ephemeral");
1021    }
1022
1023    #[test]
1024    fn test_message_without_cache_control() {
1025        let msg = ChatMessage::user("Hello");
1026
1027        let json = serde_json::to_value(&msg).unwrap();
1028
1029        // Should omit cache_control if None (skip_serializing_if)
1030        assert!(!json.as_object().unwrap().contains_key("cache_control"));
1031    }
1032
1033    #[test]
1034    fn test_cache_control_roundtrip() {
1035        let original = CacheControl {
1036            cache_type: "ephemeral".to_string(),
1037        };
1038
1039        // Serialize
1040        let json_str = serde_json::to_string(&original).unwrap();
1041
1042        // Deserialize
1043        let deserialized: CacheControl = serde_json::from_str(&json_str).unwrap();
1044
1045        assert_eq!(original.cache_type, deserialized.cache_type);
1046    }
1047
1048    // =========================================================================
1049    // ImageData Tests (OODA-51)
1050    // =========================================================================
1051
1052    #[test]
1053    fn test_image_data_new() {
1054        let image = ImageData::new("iVBORw0KGgo...", "image/png");
1055        assert_eq!(image.mime_type, "image/png");
1056        assert_eq!(image.data, "iVBORw0KGgo...");
1057        assert_eq!(image.detail, None);
1058    }
1059
1060    #[test]
1061    fn test_image_data_with_detail() {
1062        let image = ImageData::new("data123", "image/jpeg").with_detail("high");
1063        assert_eq!(image.detail, Some("high".to_string()));
1064    }
1065
1066    #[test]
1067    fn test_image_data_to_data_uri() {
1068        let image = ImageData::new("base64data", "image/png");
1069        assert_eq!(image.to_data_uri(), "data:image/png;base64,base64data");
1070    }
1071
1072    #[test]
1073    fn test_image_data_supported_mime() {
1074        assert!(ImageData::new("", "image/png").is_supported_mime());
1075        assert!(ImageData::new("", "image/jpeg").is_supported_mime());
1076        assert!(ImageData::new("", "image/gif").is_supported_mime());
1077        assert!(ImageData::new("", "image/webp").is_supported_mime());
1078        assert!(!ImageData::new("", "image/bmp").is_supported_mime());
1079        assert!(!ImageData::new("", "text/plain").is_supported_mime());
1080    }
1081
1082    #[test]
1083    fn test_chat_message_user_with_images() {
1084        let images = vec![ImageData::new("data1", "image/png")];
1085        let msg = ChatMessage::user_with_images("What's this?", images);
1086
1087        assert_eq!(msg.role, ChatRole::User);
1088        assert_eq!(msg.content, "What's this?");
1089        assert!(msg.has_images());
1090        assert_eq!(msg.images.as_ref().unwrap().len(), 1);
1091    }
1092
1093    #[test]
1094    fn test_chat_message_user_with_empty_images() {
1095        let msg = ChatMessage::user_with_images("Hello", vec![]);
1096
1097        assert!(!msg.has_images());
1098        assert!(msg.images.is_none());
1099    }
1100
1101    #[test]
1102    fn test_image_data_serialization() {
1103        let image = ImageData::new("base64", "image/png").with_detail("low");
1104        let json = serde_json::to_value(&image).unwrap();
1105
1106        assert_eq!(json["data"], "base64");
1107        assert_eq!(json["mime_type"], "image/png");
1108        assert_eq!(json["detail"], "low");
1109    }
1110
1111    // ---- Iteration 24: Additional traits tests ----
1112
1113    #[test]
1114    fn test_tool_definition_function_constructor() {
1115        let tool = ToolDefinition::function(
1116            "my_func",
1117            "Does something",
1118            serde_json::json!({"type": "object"}),
1119        );
1120        assert_eq!(tool.tool_type, "function");
1121        assert_eq!(tool.function.name, "my_func");
1122        assert_eq!(tool.function.description, "Does something");
1123        assert_eq!(tool.function.strict, Some(true));
1124    }
1125
1126    #[test]
1127    fn test_tool_definition_serialization() {
1128        let tool = ToolDefinition::function(
1129            "search",
1130            "Search the web",
1131            serde_json::json!({"type": "object", "properties": {}}),
1132        );
1133        let json = serde_json::to_value(&tool).unwrap();
1134        assert_eq!(json["type"], "function");
1135        assert_eq!(json["function"]["name"], "search");
1136    }
1137
1138    #[test]
1139    fn test_tool_call_name_and_arguments() {
1140        let tc = ToolCall {
1141            id: "call_1".to_string(),
1142            call_type: "function".to_string(),
1143            function: FunctionCall {
1144                name: "get_weather".to_string(),
1145                arguments: r#"{"city": "Paris"}"#.to_string(),
1146            },
1147        };
1148        assert_eq!(tc.name(), "get_weather");
1149        assert_eq!(tc.arguments(), r#"{"city": "Paris"}"#);
1150    }
1151
1152    #[test]
1153    fn test_tool_call_parse_arguments() {
1154        let tc = ToolCall {
1155            id: "call_2".to_string(),
1156            call_type: "function".to_string(),
1157            function: FunctionCall {
1158                name: "add".to_string(),
1159                arguments: r#"{"a": 1, "b": 2}"#.to_string(),
1160            },
1161        };
1162        let parsed: serde_json::Value = tc.parse_arguments().unwrap();
1163        assert_eq!(parsed["a"], 1);
1164        assert_eq!(parsed["b"], 2);
1165    }
1166
1167    #[test]
1168    fn test_tool_call_parse_arguments_invalid() {
1169        let tc = ToolCall {
1170            id: "call_3".to_string(),
1171            call_type: "function".to_string(),
1172            function: FunctionCall {
1173                name: "bad".to_string(),
1174                arguments: "not json".to_string(),
1175            },
1176        };
1177        let result: std::result::Result<serde_json::Value, _> = tc.parse_arguments();
1178        assert!(result.is_err());
1179    }
1180
1181    #[test]
1182    fn test_tool_choice_auto() {
1183        let tc = ToolChoice::auto();
1184        let json = serde_json::to_value(&tc).unwrap();
1185        assert_eq!(json, "auto");
1186    }
1187
1188    #[test]
1189    fn test_tool_choice_required() {
1190        let tc = ToolChoice::required();
1191        let json = serde_json::to_value(&tc).unwrap();
1192        assert_eq!(json, "required");
1193    }
1194
1195    #[test]
1196    fn test_tool_choice_none() {
1197        let tc = ToolChoice::none();
1198        let json = serde_json::to_value(&tc).unwrap();
1199        assert_eq!(json, "none");
1200    }
1201
1202    #[test]
1203    fn test_tool_choice_function() {
1204        let tc = ToolChoice::function("get_weather");
1205        if let ToolChoice::Function {
1206            choice_type,
1207            function,
1208        } = tc
1209        {
1210            assert_eq!(choice_type, "function");
1211            assert_eq!(function.name, "get_weather");
1212        } else {
1213            panic!("Expected ToolChoice::Function");
1214        }
1215    }
1216
1217    #[test]
1218    fn test_tool_result_new() {
1219        let tr = ToolResult::new("call_1", "sunny, 20C");
1220        assert_eq!(tr.tool_call_id, "call_1");
1221        assert_eq!(tr.role, "tool");
1222        assert_eq!(tr.content, "sunny, 20C");
1223    }
1224
1225    #[test]
1226    fn test_tool_result_error() {
1227        let tr = ToolResult::error("call_2", "City not found");
1228        assert_eq!(tr.tool_call_id, "call_2");
1229        assert_eq!(tr.content, "Error: City not found");
1230    }
1231
1232    #[test]
1233    fn test_llm_response_with_tool_calls() {
1234        let tc = vec![ToolCall {
1235            id: "c1".to_string(),
1236            call_type: "function".to_string(),
1237            function: FunctionCall {
1238                name: "search".to_string(),
1239                arguments: "{}".to_string(),
1240            },
1241        }];
1242        let resp = LLMResponse::new("", "gpt-4").with_tool_calls(tc);
1243        assert!(resp.has_tool_calls());
1244        assert_eq!(resp.tool_calls.len(), 1);
1245    }
1246
1247    #[test]
1248    fn test_llm_response_no_tool_calls() {
1249        let resp = LLMResponse::new("hello", "gpt-4");
1250        assert!(!resp.has_tool_calls());
1251    }
1252
1253    #[test]
1254    fn test_llm_response_with_metadata() {
1255        let resp =
1256            LLMResponse::new("hi", "gpt-4").with_metadata("id", serde_json::json!("resp_123"));
1257        assert_eq!(
1258            resp.metadata.get("id"),
1259            Some(&serde_json::json!("resp_123"))
1260        );
1261    }
1262
1263    #[test]
1264    fn test_llm_response_with_thinking() {
1265        let resp = LLMResponse::new("answer", "claude-3")
1266            .with_thinking_tokens(500)
1267            .with_thinking_content("Let me think...");
1268        assert!(resp.has_thinking());
1269        assert_eq!(resp.thinking_tokens, Some(500));
1270        assert_eq!(resp.thinking_content, Some("Let me think...".to_string()));
1271    }
1272
1273    #[test]
1274    fn test_llm_response_has_thinking_tokens_only() {
1275        let resp = LLMResponse::new("x", "o1").with_thinking_tokens(100);
1276        assert!(resp.has_thinking());
1277    }
1278
1279    #[test]
1280    fn test_llm_response_has_thinking_content_only() {
1281        let resp = LLMResponse::new("x", "claude").with_thinking_content("hmm");
1282        assert!(resp.has_thinking());
1283    }
1284
1285    #[test]
1286    fn test_llm_response_no_thinking() {
1287        let resp = LLMResponse::new("x", "gpt-4");
1288        assert!(!resp.has_thinking());
1289    }
1290
1291    #[test]
1292    fn test_completion_options_default() {
1293        let opts = CompletionOptions::default();
1294        assert!(opts.max_tokens.is_none());
1295        assert!(opts.temperature.is_none());
1296        assert!(opts.response_format.is_none());
1297    }
1298
1299    #[test]
1300    fn test_completion_options_with_temperature() {
1301        let opts = CompletionOptions::with_temperature(0.7);
1302        assert_eq!(opts.temperature, Some(0.7));
1303        assert!(opts.max_tokens.is_none());
1304    }
1305
1306    #[test]
1307    fn test_completion_options_json_mode() {
1308        let opts = CompletionOptions::json_mode();
1309        assert_eq!(opts.response_format, Some("json_object".to_string()));
1310    }
1311
1312    #[test]
1313    fn test_chat_role_as_str() {
1314        assert_eq!(ChatRole::System.as_str(), "system");
1315        assert_eq!(ChatRole::User.as_str(), "user");
1316        assert_eq!(ChatRole::Assistant.as_str(), "assistant");
1317        assert_eq!(ChatRole::Tool.as_str(), "tool");
1318        assert_eq!(ChatRole::Function.as_str(), "function");
1319    }
1320
1321    #[test]
1322    fn test_chat_role_serialization() {
1323        let json = serde_json::to_value(ChatRole::User).unwrap();
1324        assert_eq!(json, "user");
1325        let json = serde_json::to_value(ChatRole::Tool).unwrap();
1326        assert_eq!(json, "tool");
1327    }
1328
1329    #[test]
1330    fn test_chat_message_assistant_with_tools() {
1331        let tc = vec![ToolCall {
1332            id: "c1".to_string(),
1333            call_type: "function".to_string(),
1334            function: FunctionCall {
1335                name: "search".to_string(),
1336                arguments: "{}".to_string(),
1337            },
1338        }];
1339        let msg = ChatMessage::assistant_with_tools("I'll search", tc);
1340        assert_eq!(msg.role, ChatRole::Assistant);
1341        assert!(msg.tool_calls.is_some());
1342        assert_eq!(msg.tool_calls.as_ref().unwrap().len(), 1);
1343    }
1344
1345    #[test]
1346    fn test_chat_message_assistant_with_empty_tools() {
1347        let msg = ChatMessage::assistant_with_tools("just text", vec![]);
1348        assert!(msg.tool_calls.is_none());
1349    }
1350
1351    #[test]
1352    fn test_chat_message_tool_result() {
1353        let msg = ChatMessage::tool_result("call_1", "result data");
1354        assert_eq!(msg.role, ChatRole::Tool);
1355        assert_eq!(msg.tool_call_id, Some("call_1".to_string()));
1356        assert_eq!(msg.content, "result data");
1357    }
1358
1359    #[test]
1360    fn test_chat_message_has_images_false() {
1361        let msg = ChatMessage::user("hello");
1362        assert!(!msg.has_images());
1363    }
1364
1365    #[test]
1366    fn test_image_data_equality() {
1367        let a = ImageData::new("data", "image/png");
1368        let b = ImageData::new("data", "image/png");
1369        assert_eq!(a, b);
1370
1371        let c = ImageData::new("data", "image/jpeg");
1372        assert_ne!(a, c);
1373    }
1374
1375    #[test]
1376    fn test_stream_chunk_content() {
1377        let chunk = StreamChunk::Content("hello".to_string());
1378        if let StreamChunk::Content(text) = chunk {
1379            assert_eq!(text, "hello");
1380        } else {
1381            panic!("Expected Content");
1382        }
1383    }
1384
1385    #[test]
1386    fn test_stream_chunk_thinking() {
1387        let chunk = StreamChunk::ThinkingContent {
1388            text: "reasoning...".to_string(),
1389            tokens_used: Some(50),
1390            budget_total: Some(10000),
1391        };
1392        if let StreamChunk::ThinkingContent {
1393            text,
1394            tokens_used,
1395            budget_total,
1396        } = chunk
1397        {
1398            assert_eq!(text, "reasoning...");
1399            assert_eq!(tokens_used, Some(50));
1400            assert_eq!(budget_total, Some(10000));
1401        }
1402    }
1403
1404    #[test]
1405    fn test_stream_chunk_finished() {
1406        let chunk = StreamChunk::Finished {
1407            reason: "stop".to_string(),
1408            ttft_ms: Some(120.5),
1409        };
1410        if let StreamChunk::Finished { reason, ttft_ms } = chunk {
1411            assert_eq!(reason, "stop");
1412            assert_eq!(ttft_ms, Some(120.5));
1413        }
1414    }
1415
1416    #[test]
1417    fn test_stream_chunk_tool_call_delta() {
1418        let chunk = StreamChunk::ToolCallDelta {
1419            index: 0,
1420            id: Some("call_1".to_string()),
1421            function_name: Some("search".to_string()),
1422            function_arguments: Some(r#"{"q":"#.to_string()),
1423        };
1424        if let StreamChunk::ToolCallDelta {
1425            index,
1426            id,
1427            function_name,
1428            function_arguments,
1429        } = chunk
1430        {
1431            assert_eq!(index, 0);
1432            assert_eq!(id, Some("call_1".to_string()));
1433            assert_eq!(function_name, Some("search".to_string()));
1434            assert!(function_arguments.is_some());
1435        }
1436    }
1437}
edgequake_llm/traits.rs

edgequake_llm/
traits.rs