use async_trait::async_trait;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
pub mod llamacpp;
pub mod lmstudio;
pub mod ollama;
pub mod openrouter;
pub mod venice;
pub mod vllm;
mod http;
pub use llamacpp::LlamaCppBackend;
pub use lmstudio::LmStudioBackend;
pub use ollama::OllamaBackend;
pub use openrouter::OpenRouterBackend;
pub use venice::VeniceBackend;
pub use vllm::VllmBackend;
#[derive(Debug, thiserror::Error)]
pub enum BackendError {
#[error("backend unreachable: {0}")]
Unreachable(String),
#[error("backend returned HTTP {status}: {body}")]
BadStatus { status: u16, body: String },
#[error("missing or invalid API key for {0}")]
MissingApiKey(&'static str),
#[error("model not found on backend: {0}")]
ModelNotFound(String),
#[error("backend timeout")]
Timeout,
#[error("transport error: {0}")]
Transport(#[from] reqwest::Error),
#[error("serialization error: {0}")]
Serde(#[from] serde_json::Error),
#[error("{0}")]
Other(String),
}
pub type BackendResult<T> = std::result::Result<T, BackendError>;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BackendModel {
pub model_id: String,
pub context_window: Option<u32>,
pub native: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BackendHealth {
pub reachable: bool,
pub latency_ms: Option<u32>,
pub last_error: Option<String>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "lowercase")]
pub enum WireFormat {
Openai,
Anthropic,
}
#[derive(Debug, Clone)]
pub struct Job {
pub job_id: Uuid,
pub model_id: String,
pub request: serde_json::Value,
pub format: WireFormat,
pub deadline_ms: u32,
}
#[derive(Debug, Clone, Default)]
pub struct JobResult {
pub input_tokens: Option<u32>,
pub output_tokens: Option<u32>,
pub duration_ms: u32,
}
#[async_trait]
pub trait JobSink: Send {
async fn send_chunk(&mut self, bytes: Bytes) -> BackendResult<()>;
}
#[async_trait]
pub trait Backend: Send + Sync {
fn kind(&self) -> &'static str;
fn id(&self) -> &str;
async fn list_models(&self) -> BackendResult<Vec<BackendModel>>;
async fn health(&self) -> BackendResult<BackendHealth>;
async fn execute(&self, job: &Job, sink: &mut dyn JobSink) -> BackendResult<JobResult>;
}