Skip to main content

provider_agent/backend/
mod.rs

1//! Backend abstraction. See `plan/V2_AGENT_SPEC.md` §5.
2//!
3//! Each adapter implements [`Backend`], exposing a uniform API over the
4//! supported inference engines (vLLM, llama.cpp, LM Studio, Ollama) and remote
5//! BYOK passthroughs (OpenRouter, Venice). The agent's discovery and (in a
6//! later task) job-executor layers consume backends through this trait.
7
8use async_trait::async_trait;
9use bytes::Bytes;
10use serde::{Deserialize, Serialize};
11use uuid::Uuid;
12
13pub mod llamacpp;
14pub mod lmstudio;
15pub mod ollama;
16pub mod openrouter;
17pub mod venice;
18pub mod vllm;
19
20mod http;
21
22pub use llamacpp::LlamaCppBackend;
23pub use lmstudio::LmStudioBackend;
24pub use ollama::OllamaBackend;
25pub use openrouter::OpenRouterBackend;
26pub use venice::VeniceBackend;
27pub use vllm::VllmBackend;
28
29/// Errors that can be returned at the backend boundary.
30#[derive(Debug, thiserror::Error)]
31pub enum BackendError {
32    #[error("backend unreachable: {0}")]
33    Unreachable(String),
34    #[error("backend returned HTTP {status}: {body}")]
35    BadStatus { status: u16, body: String },
36    #[error("missing or invalid API key for {0}")]
37    MissingApiKey(&'static str),
38    #[error("model not found on backend: {0}")]
39    ModelNotFound(String),
40    #[error("backend timeout")]
41    Timeout,
42    #[error("transport error: {0}")]
43    Transport(#[from] reqwest::Error),
44    #[error("serialization error: {0}")]
45    Serde(#[from] serde_json::Error),
46    #[error("{0}")]
47    Other(String),
48}
49
50pub type BackendResult<T> = std::result::Result<T, BackendError>;
51
52/// A model exposed by a backend.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct BackendModel {
55    pub model_id: String,
56    pub context_window: Option<u32>,
57    /// `true` for local backends; `false` for BYOK passthroughs.
58    pub native: bool,
59}
60
61/// Health-check result used by discovery.
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct BackendHealth {
64    pub reachable: bool,
65    pub latency_ms: Option<u32>,
66    pub last_error: Option<String>,
67}
68
69/// Wire format expected by the upstream client. The agent does not transcode
70/// between these — it picks a backend that natively speaks the required shape.
71#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
72#[serde(rename_all = "lowercase")]
73pub enum WireFormat {
74    Openai,
75    Anthropic,
76}
77
78/// A unit of work received from the coordinator.
79#[derive(Debug, Clone)]
80pub struct Job {
81    pub job_id: Uuid,
82    pub model_id: String,
83    pub request: serde_json::Value,
84    pub format: WireFormat,
85    pub deadline_ms: u32,
86}
87
88/// Result returned to the coordinator when a job completes successfully.
89#[derive(Debug, Clone, Default)]
90pub struct JobResult {
91    pub input_tokens: Option<u32>,
92    pub output_tokens: Option<u32>,
93    pub duration_ms: u32,
94}
95
96/// Sink used by adapters to push streaming chunks back to the coordinator.
97///
98/// Adapters do not parse SSE — they relay raw upstream bytes faithfully. The
99/// coordinator's own tokenizer is the source of truth for token counts.
100#[async_trait]
101pub trait JobSink: Send {
102    async fn send_chunk(&mut self, bytes: Bytes) -> BackendResult<()>;
103}
104
105#[async_trait]
106pub trait Backend: Send + Sync {
107    fn kind(&self) -> &'static str;
108
109    /// A stable identifier for this backend instance — typically `"<kind>:<url>"`
110    /// for local adapters and `"<kind>"` for remote passthroughs.
111    fn id(&self) -> &str;
112
113    async fn list_models(&self) -> BackendResult<Vec<BackendModel>>;
114
115    async fn health(&self) -> BackendResult<BackendHealth>;
116
117    async fn execute(&self, job: &Job, sink: &mut dyn JobSink) -> BackendResult<JobResult>;
118}