pub trait OllamaClient: Send + Sync {
// Required methods
fn generate<'life0, 'async_trait>(
&'life0 self,
req: GenerateRequest,
) -> Pin<Box<dyn Future<Output = Result<GenerateResponse, OxideError>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn chat<'life0, 'async_trait>(
&'life0 self,
req: ChatRequest,
) -> Pin<Box<dyn Future<Output = Result<ChatResponse, OxideError>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn embed<'life0, 'async_trait>(
&'life0 self,
req: EmbedRequest,
) -> Pin<Box<dyn Future<Output = Result<EmbedResponse, OxideError>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn list_models<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<ListModelsResponse, OxideError>> + Send + 'async_trait>>
where Self: 'async_trait,
'life0: 'async_trait;
fn stream_generate(
&self,
req: GenerateRequest,
) -> BoxStream<GenerateResponse>;
fn stream_chat(&self, req: ChatRequest) -> BoxStream<ChatResponse>;
}Expand description
The type-safe bridge to any Ollama-compatible backend.
Decoupling agent logic from transport via a trait means you can:
- Swap in a real HTTP client for production.
- Swap in a
MockOllamaClientfor unit testing — no running server needed. - Wrap the client with middleware (retry, logging, rate-limiting) transparently.
Required Methods§
Sourcefn generate<'life0, 'async_trait>(
&'life0 self,
req: GenerateRequest,
) -> Pin<Box<dyn Future<Output = Result<GenerateResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn generate<'life0, 'async_trait>(
&'life0 self,
req: GenerateRequest,
) -> Pin<Box<dyn Future<Output = Result<GenerateResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Single-turn text completion (/api/generate).
Sourcefn chat<'life0, 'async_trait>(
&'life0 self,
req: ChatRequest,
) -> Pin<Box<dyn Future<Output = Result<ChatResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn chat<'life0, 'async_trait>(
&'life0 self,
req: ChatRequest,
) -> Pin<Box<dyn Future<Output = Result<ChatResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Multi-turn chat completion (/api/chat), including tool/function calling.
Sourcefn embed<'life0, 'async_trait>(
&'life0 self,
req: EmbedRequest,
) -> Pin<Box<dyn Future<Output = Result<EmbedResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn embed<'life0, 'async_trait>(
&'life0 self,
req: EmbedRequest,
) -> Pin<Box<dyn Future<Output = Result<EmbedResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
Produce dense vector embeddings for one or more texts (/api/embed).
Sourcefn list_models<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<ListModelsResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
fn list_models<'life0, 'async_trait>(
&'life0 self,
) -> Pin<Box<dyn Future<Output = Result<ListModelsResponse, OxideError>> + Send + 'async_trait>>where
Self: 'async_trait,
'life0: 'async_trait,
List models available on the Ollama server (/api/tags).
Sourcefn stream_generate(&self, req: GenerateRequest) -> BoxStream<GenerateResponse>
fn stream_generate(&self, req: GenerateRequest) -> BoxStream<GenerateResponse>
Stream a generate response token-by-token.
Ollama sends newline-delimited JSON (NDJSON) when stream: true.
This method decodes each line into a GenerateResponse without buffering
the entire response body, keeping memory overhead minimal regardless of
output length.
Sourcefn stream_chat(&self, req: ChatRequest) -> BoxStream<ChatResponse>
fn stream_chat(&self, req: ChatRequest) -> BoxStream<ChatResponse>
Stream a chat response token-by-token.