use anyhow::Result;
use futures_util::Stream;
use serde::{Deserialize, Serialize};
use std::{
collections::HashMap,
pin::Pin,
time::{Duration, Instant},
};
mod duration_serde {
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::time::Duration;
pub fn serialize<S>(duration: &Duration, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
duration.as_millis().serialize(serializer)
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<Duration, D::Error>
where
D: Deserializer<'de>,
{
let millis = u64::deserialize(deserializer)?;
Ok(Duration::from_millis(millis))
}
}
mod option_duration_serde {
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use std::time::Duration;
pub fn serialize<S>(duration: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match duration {
Some(d) => Some(d.as_millis()).serialize(serializer),
None => None::<u128>.serialize(serializer),
}
}
pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
where
D: Deserializer<'de>,
{
let millis_opt = Option::<u64>::deserialize(deserializer)?;
Ok(millis_opt.map(Duration::from_millis))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LLMRequest {
pub messages: Vec<ChatMessage>,
pub system_prompt: Option<String>,
pub temperature: f32,
pub max_tokens: Option<usize>,
pub use_case: UseCase,
pub priority: Priority,
#[serde(with = "option_duration_serde")]
pub timeout: Option<Duration>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatMessage {
pub role: ChatRole,
pub content: String,
pub metadata: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Hash, Serialize, Deserialize)]
pub enum ChatRole {
System,
User,
Assistant,
}
#[derive(Debug, Clone, PartialEq, Hash, Serialize, Deserialize)]
pub enum UseCase {
SimpleQuery,
ComplexReasoning,
SparqlGeneration,
KnowledgeExtraction,
Conversation,
Analysis,
CodeGeneration,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum Priority {
Low,
Normal,
High,
Critical,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LLMResponse {
pub content: String,
pub model_used: String,
pub provider_used: String,
pub usage: Usage,
#[serde(with = "duration_serde")]
pub latency: Duration,
pub quality_score: Option<f32>,
pub metadata: HashMap<String, serde_json::Value>,
}
#[derive(Debug, Clone)]
pub struct LLMResponseChunk {
pub content: String,
pub is_final: bool,
pub chunk_index: usize,
pub model_used: String,
pub provider_used: String,
pub latency: Duration,
pub metadata: HashMap<String, serde_json::Value>,
}
pub struct LLMResponseStream {
pub stream: Pin<Box<dyn Stream<Item = Result<LLMResponseChunk>> + Send>>,
pub model_used: String,
pub provider_used: String,
pub started_at: Instant,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Usage {
pub prompt_tokens: usize,
pub completion_tokens: usize,
pub total_tokens: usize,
pub cost: f64,
}
#[derive(Debug, Clone)]
pub struct RoutingCandidate {
pub provider: String,
pub model: String,
pub score: f32,
}