ferrous_llm_core/
types.rs

1//! Core types for LLM requests and responses.
2//!
3//! This module defines standardized types that are used across all providers,
4//! including request/response structures, messages, and common data types.
5
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use serde_json::Value;
9use std::collections::HashMap;
10
11/// A chat request containing messages and parameters.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ChatRequest {
14    /// The conversation messages
15    pub messages: Vec<Message>,
16    /// Request parameters (temperature, max_tokens, etc.)
17    pub parameters: Parameters,
18    /// Additional metadata and provider-specific extensions
19    pub metadata: Metadata,
20}
21
22/// A completion request for non-chat text generation.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct CompletionRequest {
25    /// The text prompt to complete
26    pub prompt: String,
27    /// Request parameters
28    pub parameters: Parameters,
29    /// Additional metadata
30    pub metadata: Metadata,
31}
32
33/// Common parameters used across providers.
34#[derive(Debug, Clone, Serialize, Deserialize, Default)]
35pub struct Parameters {
36    /// Controls randomness in the response (0.0 to 2.0)
37    pub temperature: Option<f32>,
38    /// Maximum number of tokens to generate
39    pub max_tokens: Option<u32>,
40    /// Nucleus sampling parameter (0.0 to 1.0)
41    pub top_p: Option<f32>,
42    /// Alternative to temperature, called Top-k sampling
43    pub top_k: Option<u32>,
44    /// Sequences where the API will stop generating further tokens
45    pub stop_sequences: Vec<String>,
46    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency
47    pub frequency_penalty: Option<f32>,
48    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far
49    pub presence_penalty: Option<f32>,
50}
51
52/// Metadata for requests, including provider-specific extensions.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct Metadata {
55    /// Provider-specific extensions
56    pub extensions: HashMap<String, Value>,
57    /// Optional request ID for tracking
58    pub request_id: Option<String>,
59    /// Optional user ID for tracking
60    pub user_id: Option<String>,
61    /// Timestamp when the request was created
62    pub created_at: DateTime<Utc>,
63}
64
65/// A message in a conversation.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct Message {
68    /// The role of the message sender
69    pub role: Role,
70    /// The content of the message
71    pub content: MessageContent,
72}
73
74/// The role of a message sender.
75#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
76#[serde(rename_all = "lowercase")]
77pub enum Role {
78    /// Message from the user
79    User,
80    /// Message from the AI assistant
81    Assistant,
82    /// System message (instructions, context)
83    System,
84    /// Message from a tool/function call
85    Tool,
86}
87
88/// Content of a message, which can be text or multimodal.
89#[derive(Debug, Clone, Serialize, Deserialize)]
90#[serde(untagged)]
91pub enum MessageContent {
92    /// Simple text content
93    Text(String),
94    /// Multimodal content with text and other media
95    Multimodal(Vec<ContentPart>),
96    /// Tool-related content (calls and responses)
97    Tool(ToolContent),
98}
99
100/// Tool-related message content.
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct ToolContent {
103    /// Tool calls made by the assistant
104    pub tool_calls: Option<Vec<ToolCall>>,
105    /// Tool call ID if this is a tool response
106    pub tool_call_id: Option<String>,
107    /// Optional text content alongside tool data
108    pub text: Option<String>,
109}
110
111impl MessageContent {
112    /// Create text content
113    pub fn text(content: impl Into<String>) -> Self {
114        Self::Text(content.into())
115    }
116
117    /// Create multimodal content
118    pub fn multimodal(parts: Vec<ContentPart>) -> Self {
119        Self::Multimodal(parts)
120    }
121
122    /// Create tool content with tool calls
123    pub fn tool_calls(tool_calls: Vec<ToolCall>) -> Self {
124        Self::Tool(ToolContent {
125            tool_calls: Some(tool_calls),
126            tool_call_id: None,
127            text: None,
128        })
129    }
130
131    /// Create tool response content
132    pub fn tool_response(content: impl Into<String>, tool_call_id: impl Into<String>) -> Self {
133        Self::Tool(ToolContent {
134            tool_calls: None,
135            tool_call_id: Some(tool_call_id.into()),
136            text: Some(content.into()),
137        })
138    }
139}
140
141/// A part of multimodal message content.
142#[derive(Debug, Clone, Serialize, Deserialize)]
143#[serde(tag = "type", rename_all = "snake_case")]
144pub enum ContentPart {
145    /// Text content
146    Text { text: String },
147    /// Image content
148    Image {
149        /// Image data or URL
150        image_source: ImageSource,
151        /// Optional detail level for image processing
152        detail: Option<String>,
153    },
154    /// Audio content
155    Audio {
156        /// Audio data or URL
157        audio_url: String,
158        /// Audio format (mp3, wav, etc.)
159        format: Option<String>,
160    },
161}
162impl ContentPart {
163    /// Create text content part
164    pub fn text(text: impl Into<String>) -> Self {
165        Self::Text { text: text.into() }
166    }
167
168    /// Create image content part
169    pub fn image(source: ImageSource) -> Self {
170        Self::Image {
171            image_source: source,
172            detail: None,
173        }
174    }
175
176    #[cfg(feature = "dynamic-image")]
177    pub fn image_dynamic(image: image::DynamicImage) -> Self {
178        Self::Image {
179            image_source: ImageSource::DynamicImage(image),
180            detail: None,
181        }
182    }
183
184    pub fn image_url(url: impl Into<String>) -> Self {
185        Self::Image {
186            image_source: ImageSource::Url(url.into()),
187            detail: None,
188        }
189    }
190
191    /// Create image content part with detail level
192    pub fn image_with_detail(url: impl Into<String>, detail: impl Into<String>) -> Self {
193        let detail_str = detail.into();
194        Self::Image {
195            image_source: ImageSource::Url(url.into()),
196            detail: Some(detail_str),
197        }
198    }
199
200    /// Create audio content part
201    pub fn audio(url: impl Into<String>, format: Option<String>) -> Self {
202        Self::Audio {
203            audio_url: url.into(),
204            format,
205        }
206    }
207}
208
209#[derive(Debug, Clone, Serialize, Deserialize)]
210pub enum ImageSource {
211    /// The URL or base64-encoded image data
212    Url(String),
213
214    #[cfg(feature = "dynamic-image")]
215    #[serde(skip_serializing, skip_deserializing)]
216    /// The image data
217    DynamicImage(image::DynamicImage),
218}
219
220#[cfg(feature = "dynamic-image")]
221impl ImageSource {
222    pub fn dynamic_image(image: image::DynamicImage) -> Self {
223        Self::DynamicImage(image)
224    }
225}
226
227#[cfg(feature = "dynamic-image")]
228impl From<image::DynamicImage> for ImageSource {
229    fn from(image: image::DynamicImage) -> Self {
230        Self::DynamicImage(image)
231    }
232}
233
234/// Converts an ImageSource to a String representation.
235///
236/// - `Url` variants are returned as-is
237/// - `DynamicImage` variants are converted to base64-encoded PNG data URLs
238///
239/// Note: This conversion is lossy - the original type cannot be determined from the resulting string.
240impl From<ImageSource> for String {
241    fn from(source: ImageSource) -> Self {
242        match source {
243            ImageSource::Url(url) => url,
244
245            #[cfg(feature = "dynamic-image")]
246            ImageSource::DynamicImage(image) => crate::util::dynamic_image::image_to_base64(&image),
247
248            #[cfg(not(feature = "dynamic-image"))]
249            #[allow(unreachable_patterns)]
250            _ => panic!(
251                "ImageSource::DynamicImage variant requires the 'dynamic-image' feature to be enabled"
252            ),
253        }
254    }
255}
256
257/// A tool/function call made by the AI.
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct ToolCall {
260    /// Unique identifier for this tool call
261    pub id: String,
262    /// The type of tool call (usually "function")
263    #[serde(rename = "type")]
264    pub call_type: String,
265    /// The function being called
266    pub function: FunctionCall,
267}
268
269/// A function call within a tool call.
270#[derive(Debug, Clone, Serialize, Deserialize)]
271pub struct FunctionCall {
272    /// Name of the function to call
273    pub name: String,
274    /// Arguments to pass to the function (JSON string)
275    pub arguments: String,
276}
277
278/// Definition of a tool/function that can be called.
279#[derive(Debug, Clone, Serialize, Deserialize)]
280pub struct Tool {
281    /// Type of tool (usually "function")
282    #[serde(rename = "type")]
283    pub tool_type: String,
284    /// The function definition
285    pub function: Function,
286}
287
288/// Definition of a function that can be called.
289#[derive(Debug, Clone, Serialize, Deserialize)]
290pub struct Function {
291    /// Name of the function
292    pub name: String,
293    /// Description of what the function does
294    pub description: String,
295    /// JSON schema for the function parameters
296    pub parameters: Value,
297}
298
299/// Usage statistics for a request.
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct Usage {
302    /// Number of tokens in the prompt
303    pub prompt_tokens: u32,
304    /// Number of tokens in the completion
305    pub completion_tokens: u32,
306    /// Total number of tokens used
307    pub total_tokens: u32,
308}
309
310/// Reason why the model stopped generating.
311#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
312#[serde(rename_all = "snake_case")]
313pub enum FinishReason {
314    /// The model reached a natural stopping point
315    Stop,
316    /// The model reached the maximum token limit
317    Length,
318    /// The model generated a stop sequence
319    StopSequence,
320    /// The model made a tool call
321    ToolCalls,
322    /// Content was filtered
323    ContentFilter,
324    /// An error occurred
325    Error,
326}
327
328/// An embedding vector.
329#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct Embedding {
331    /// The embedding vector
332    pub embedding: Vec<f32>,
333    /// Index of the input text this embedding corresponds to
334    pub index: usize,
335}
336
337/// Request for image generation.
338#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct ImageRequest {
340    /// Text prompt for image generation
341    pub prompt: String,
342    /// Optional negative prompt (what to avoid)
343    pub negative_prompt: Option<String>,
344    /// Number of images to generate
345    pub n: Option<u32>,
346    /// Image size specification
347    pub size: Option<String>,
348    /// Image quality setting
349    pub quality: Option<String>,
350    /// Response format (url or b64_json)
351    pub response_format: Option<String>,
352    /// Additional metadata
353    pub metadata: Metadata,
354}
355
356/// Request for speech-to-text conversion.
357#[derive(Debug, Clone, Serialize, Deserialize)]
358pub struct SpeechToTextRequest {
359    /// Audio data (base64 encoded or file path)
360    pub audio: String,
361    /// Audio format (mp3, wav, etc.)
362    pub format: String,
363    /// Language of the audio (optional)
364    pub language: Option<String>,
365    /// Additional metadata
366    pub metadata: Metadata,
367}
368
369/// Request for text-to-speech conversion.
370#[derive(Debug, Clone, Serialize, Deserialize)]
371pub struct TextToSpeechRequest {
372    /// Text to convert to speech
373    pub text: String,
374    /// Voice to use
375    pub voice: String,
376    /// Audio format for output
377    pub format: Option<String>,
378    /// Speed of speech (0.25 to 4.0)
379    pub speed: Option<f32>,
380    /// Additional metadata
381    pub metadata: Metadata,
382}
383
384/// Trait for chat response types.
385pub trait ChatResponse: Send + Sync {
386    /// Get the text content of the response
387    fn content(&self) -> String;
388
389    /// Get usage statistics if available
390    fn usage(&self) -> Option<Usage>;
391
392    /// Get the reason why generation finished
393    fn finish_reason(&self) -> Option<FinishReason>;
394
395    /// Get response metadata
396    fn metadata(&self) -> Metadata;
397
398    /// Get tool calls if any were made
399    fn tool_calls(&self) -> Option<Vec<ToolCall>> {
400        None
401    }
402
403    /// Convert response to a Message for conversation history
404    fn as_message(&self) -> Message {
405        let content = if let Some(tool_calls) = self.tool_calls() {
406            MessageContent::Tool(ToolContent {
407                tool_calls: Some(tool_calls),
408                tool_call_id: None,
409                text: if self.content().is_empty() {
410                    None
411                } else {
412                    Some(self.content())
413                },
414            })
415        } else {
416            MessageContent::Text(self.content())
417        };
418
419        Message {
420            role: Role::Assistant,
421            content,
422        }
423    }
424}
425
426/// Trait for completion response types.
427pub trait CompletionResponse: Send + Sync {
428    /// Get the completion text
429    fn text(&self) -> String;
430
431    /// Get usage statistics if available
432    fn usage(&self) -> Option<Usage>;
433
434    /// Get the reason why generation finished
435    fn finish_reason(&self) -> Option<FinishReason>;
436
437    /// Get response metadata
438    fn metadata(&self) -> Metadata;
439}
440
441/// Trait for image generation response types.
442pub trait ImageResponse: Send + Sync {
443    /// Get the generated images
444    fn images(&self) -> Vec<GeneratedImage>;
445
446    /// Get response metadata
447    fn metadata(&self) -> Metadata;
448}
449
450/// A generated image.
451#[derive(Debug, Clone, Serialize, Deserialize)]
452pub struct GeneratedImage {
453    /// Image URL or base64 data
454    pub url: Option<String>,
455    /// Base64 encoded image data
456    pub b64_json: Option<String>,
457    /// Revised prompt used for generation
458    pub revised_prompt: Option<String>,
459}
460
461/// Trait for speech-to-text response types.
462pub trait SpeechToTextResponse: Send + Sync {
463    /// Get the transcribed text
464    fn text(&self) -> String;
465
466    /// Get the detected language if available
467    fn language(&self) -> Option<String>;
468
469    /// Get response metadata
470    fn metadata(&self) -> Metadata;
471}
472
473/// Trait for text-to-speech response types.
474pub trait TextToSpeechResponse: Send + Sync {
475    /// Get the audio data
476    fn audio_data(&self) -> Vec<u8>;
477
478    /// Get the audio format
479    fn format(&self) -> String;
480
481    /// Get response metadata
482    fn metadata(&self) -> Metadata;
483}
484
485// Convenience constructors
486impl Message {
487    /// Create a user message with text content
488    pub fn user(content: impl Into<String>) -> Self {
489        Self {
490            role: Role::User,
491            content: MessageContent::Text(content.into()),
492        }
493    }
494
495    /// Create an assistant message with text content
496    pub fn assistant(content: impl Into<String>) -> Self {
497        Self {
498            role: Role::Assistant,
499            content: MessageContent::Text(content.into()),
500        }
501    }
502
503    /// Create a system message with text content
504    pub fn system(content: impl Into<String>) -> Self {
505        Self {
506            role: Role::System,
507            content: MessageContent::Text(content.into()),
508        }
509    }
510
511    /// Create a tool response message
512    pub fn tool_response(content: impl Into<String>, tool_call_id: impl Into<String>) -> Self {
513        Self {
514            role: Role::Tool,
515            content: MessageContent::Tool(ToolContent {
516                tool_calls: None,
517                tool_call_id: Some(tool_call_id.into()),
518                text: Some(content.into()),
519            }),
520        }
521    }
522
523    /// Create an assistant message with tool calls
524    pub fn assistant_with_tools(content: impl Into<String>, tool_calls: Vec<ToolCall>) -> Self {
525        let content_str = content.into();
526        Self {
527            role: Role::Assistant,
528            content: MessageContent::Tool(ToolContent {
529                tool_calls: Some(tool_calls),
530                tool_call_id: None,
531                text: if content_str.is_empty() {
532                    None
533                } else {
534                    Some(content_str)
535                },
536            }),
537        }
538    }
539
540    /// Create a user message with multimodal content
541    pub fn user_multimodal(content: Vec<ContentPart>) -> Self {
542        Self {
543            role: Role::User,
544            content: MessageContent::Multimodal(content),
545        }
546    }
547
548    /// Create an assistant message with multimodal content
549    pub fn assistant_multimodal(content: Vec<ContentPart>) -> Self {
550        Self {
551            role: Role::Assistant,
552            content: MessageContent::Multimodal(content),
553        }
554    }
555}
556
557impl Default for Metadata {
558    fn default() -> Self {
559        Self {
560            extensions: HashMap::new(),
561            request_id: None,
562            user_id: None,
563            created_at: Utc::now(),
564        }
565    }
566}
567
568impl ChatRequest {
569    /// Create a new chat request builder
570    pub fn builder() -> ChatRequestBuilder {
571        ChatRequestBuilder::new()
572    }
573}
574
575/// Builder for ChatRequest
576pub struct ChatRequestBuilder {
577    messages: Vec<Message>,
578    parameters: Parameters,
579    metadata: Metadata,
580}
581
582impl Default for ChatRequestBuilder {
583    fn default() -> Self {
584        Self::new()
585    }
586}
587
588impl ChatRequestBuilder {
589    pub fn new() -> Self {
590        Self {
591            messages: Vec::new(),
592            parameters: Parameters::default(),
593            metadata: Metadata::default(),
594        }
595    }
596
597    pub fn message(mut self, message: Message) -> Self {
598        self.messages.push(message);
599        self
600    }
601
602    pub fn messages(mut self, messages: Vec<Message>) -> Self {
603        self.messages = messages;
604        self
605    }
606
607    pub fn temperature(mut self, temperature: f32) -> Self {
608        self.parameters.temperature = Some(temperature);
609        self
610    }
611
612    pub fn max_tokens(mut self, max_tokens: u32) -> Self {
613        self.parameters.max_tokens = Some(max_tokens);
614        self
615    }
616
617    pub fn top_p(mut self, top_p: f32) -> Self {
618        self.parameters.top_p = Some(top_p);
619        self
620    }
621
622    pub fn stop_sequences(mut self, stop_sequences: Vec<String>) -> Self {
623        self.parameters.stop_sequences = stop_sequences;
624        self
625    }
626
627    pub fn request_id(mut self, request_id: String) -> Self {
628        self.metadata.request_id = Some(request_id);
629        self
630    }
631
632    pub fn user_id(mut self, user_id: String) -> Self {
633        self.metadata.user_id = Some(user_id);
634        self
635    }
636
637    pub fn extension(mut self, key: String, value: Value) -> Self {
638        self.metadata.extensions.insert(key, value);
639        self
640    }
641    /// Add a user message with text content
642    pub fn user_message(mut self, content: impl Into<String>) -> Self {
643        self.messages.push(Message::user(content));
644        self
645    }
646
647    /// Add an assistant message with text content
648    pub fn assistant_message(mut self, content: impl Into<String>) -> Self {
649        self.messages.push(Message::assistant(content));
650        self
651    }
652
653    /// Add a system message with text content
654    pub fn system_message(mut self, content: impl Into<String>) -> Self {
655        self.messages.push(Message::system(content));
656        self
657    }
658
659    /// Add a tool response message
660    pub fn tool_response(
661        mut self,
662        content: impl Into<String>,
663        tool_call_id: impl Into<String>,
664    ) -> Self {
665        self.messages
666            .push(Message::tool_response(content, tool_call_id));
667        self
668    }
669
670    /// Add an assistant message with tool calls
671    pub fn assistant_with_tools(
672        mut self,
673        content: impl Into<String>,
674        tool_calls: Vec<ToolCall>,
675    ) -> Self {
676        self.messages
677            .push(Message::assistant_with_tools(content, tool_calls));
678        self
679    }
680
681    /// Add a user message with multimodal content
682    pub fn user_multimodal(mut self, content: Vec<ContentPart>) -> Self {
683        self.messages.push(Message::user_multimodal(content));
684        self
685    }
686
687    /// Add an assistant message with multimodal content
688    pub fn assistant_multimodal(mut self, content: Vec<ContentPart>) -> Self {
689        self.messages.push(Message::assistant_multimodal(content));
690        self
691    }
692
693    pub fn build(self) -> ChatRequest {
694        ChatRequest {
695            messages: self.messages,
696            parameters: self.parameters,
697            metadata: self.metadata,
698        }
699    }
700}