use crate::api::{ModelAliasSpec, ModelTask};
use crate::error::Result;
use async_trait::async_trait;
use std::any::Any;
#[derive(Debug, Clone)]
pub struct ProviderCapabilities {
pub supported_tasks: Vec<ModelTask>,
}
#[derive(Debug, Clone)]
pub enum ProviderHealth {
Healthy,
Degraded(String),
Unhealthy(String),
}
#[async_trait]
pub trait ModelProvider: Send + Sync {
fn provider_id(&self) -> &'static str;
fn capabilities(&self) -> ProviderCapabilities;
async fn load(&self, spec: &ModelAliasSpec) -> Result<LoadedModelHandle>;
async fn health(&self) -> ProviderHealth;
async fn warmup(&self) -> Result<()> {
Ok(())
}
}
pub type LoadedModelHandle = std::sync::Arc<dyn Any + Send + Sync>;
#[async_trait]
pub trait EmbeddingModel: Send + Sync + Any {
async fn embed(&self, texts: Vec<&str>) -> Result<Vec<Vec<f32>>>;
fn dimensions(&self) -> u32;
fn model_id(&self) -> &str;
async fn warmup(&self) -> Result<()> {
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct ScoredDoc {
pub index: usize,
pub score: f32,
pub text: Option<String>,
}
#[async_trait]
pub trait RerankerModel: Send + Sync {
async fn rerank(&self, query: &str, docs: &[&str]) -> Result<Vec<ScoredDoc>>;
async fn warmup(&self) -> Result<()> {
Ok(())
}
}
#[derive(Debug, Clone, Default)]
pub struct GenerationOptions {
pub max_tokens: Option<usize>,
pub temperature: Option<f32>,
pub top_p: Option<f32>,
}
#[derive(Debug, Clone)]
pub struct GenerationResult {
pub text: String,
pub usage: Option<TokenUsage>,
}
#[derive(Debug, Clone)]
pub struct TokenUsage {
pub prompt_tokens: usize,
pub completion_tokens: usize,
pub total_tokens: usize,
}
#[async_trait]
pub trait GeneratorModel: Send + Sync {
async fn generate(
&self,
messages: &[String],
options: GenerationOptions,
) -> Result<GenerationResult>;
async fn warmup(&self) -> Result<()> {
Ok(())
}
}