use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum LlmError {
#[error("Connection failed: {0}")]
ConnectionFailed(String),
#[error("Request failed: {0}")]
RequestFailed(String),
#[error("Invalid response: {0}")]
InvalidResponse(String),
#[error("Rate limited")]
RateLimited,
#[error("Provider not available")]
NotAvailable,
#[error("Input too large: {0} bytes exceeds maximum {1} bytes")]
InputTooLarge(usize, usize),
}
pub const MAX_PROMPT_SIZE: usize = 100 * 1024;
pub const MAX_SYSTEM_SIZE: usize = 10 * 1024;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmRequest {
#[serde(skip_serializing_if = "Option::is_none")]
pub tenant_id: Option<String>,
pub system: String,
pub prompt: String,
pub temperature: f32,
pub max_tokens: u32,
pub top_p: Option<f32>,
pub presence_penalty: Option<f32>,
pub frequency_penalty: Option<f32>,
}
impl LlmRequest {
pub fn simple(prompt: &str) -> Self {
Self {
tenant_id: None,
system: "You are a helpful assistant.".to_string(),
prompt: prompt.to_string(),
temperature: 0.7,
max_tokens: 1024,
top_p: None,
presence_penalty: None,
frequency_penalty: None,
}
}
pub fn with_role(system: &str, prompt: &str) -> Self {
Self {
system: system.to_string(),
prompt: prompt.to_string(),
temperature: 0.7,
max_tokens: 1024,
tenant_id: None,
top_p: None,
presence_penalty: None,
frequency_penalty: None,
}
}
pub fn validate(&self) -> Result<(), LlmError> {
if self.prompt.len() > MAX_PROMPT_SIZE {
return Err(LlmError::InputTooLarge(self.prompt.len(), MAX_PROMPT_SIZE));
}
if self.system.len() > MAX_SYSTEM_SIZE {
return Err(LlmError::InputTooLarge(self.system.len(), MAX_SYSTEM_SIZE));
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmResponse {
pub content: String,
pub model: String,
pub tokens_used: Option<u32>,
pub latency_ms: u64,
#[serde(skip_serializing_if = "Option::is_none")]
pub trace_root: Option<String>,
}
#[async_trait]
pub trait LlmProvider: Send + Sync + std::fmt::Debug {
fn name(&self) -> &str;
async fn is_available(&self) -> bool;
async fn complete(&self, request: LlmRequest) -> Result<LlmResponse, LlmError>;
async fn ask(&self, prompt: &str) -> Result<String, LlmError> {
let response = self.complete(LlmRequest::simple(prompt)).await?;
Ok(response.content)
}
}
#[async_trait]
pub trait EmbeddingProvider: Send + Sync + std::fmt::Debug {
async fn embed(&self, text: &str) -> Result<Vec<f32>, LlmError>;
}