ollama-api-rs 0.2.0

An async Rust SDK for the Ollama API with OpenAI compatibility
Documentation
// Copyright 2026 Cloudflavor GmbH

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # Library Name Note
//!
//! This library is published as `ollama-api-rs` on crates.io.
//! Users should write `use oai_sdk::{ModelClient, ChatRequest, Message};`
//!
//! # Features
//!
//! - **Async/await support** - Built on top of Tokio for efficient async operations
//! - **Easy configuration** - Simple client setup with `ModelClient::builder()`
//! - **Streaming responses** - Real-time streaming for both chat and generation
//! - **Full Ollama API compatibility** - Complete coverage of all Ollama API endpoints
//! - **Modular design** - Separate modules for chat, generate, embed, and model operations
//! - **Comprehensive error handling** - Custom error types with detailed context
//! - **Tool calling** - Support for function/tool calling in chat completions
//! - **Structured outputs** - JSON schema validation support for responses
//! - **Model lifecycle management** - Load/unload models programmatically
//! - **Blob management** - Push and check model blobs
//! - **Batch embeddings** - Efficient batch processing for embeddings
//!
//! # Examples
//!
//! ## Basic Chat Completion
//!
//! ```no_run
//! use oai_sdk::{ModelClient, ChatRequest, Message};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     let request = ChatRequest {
//!         model: "llama3.1:8b".to_string(),
//!         messages: vec![
//!             Message {
//!                 role: "user".to_string(),
//!                 content: "Why is the sky blue?".to_string(),
//!                 images: None,
//!                 tool_calls: None,
//!                 tool_name: None,
//!                 thinking: None,
//!             }
//!         ],
//!         stream: false,
//!         format: None,
//!         options: None,
//!         keep_alive: None,
//!         tools: None,
//!         think: None,
//!     };
//!
//!     let response = client.chat(request).await?;
//!     println!("{}", response.message.content);
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Streaming Chat
//!
//! ```no_run
//! use oai_sdk::{ModelClient, ChatRequest, Message};
//! use tokio_stream::StreamExt;
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     let request = ChatRequest {
//!         model: "llama3.1:8b".to_string(),
//!         messages: vec![
//!             Message {
//!                 role: "user".to_string(),
//!                 content: "Write a story about Rust".to_string(),
//!                 images: None,
//!                 tool_calls: None,
//!                 tool_name: None,
//!                 thinking: None,
//!             }
//!         ],
//!         stream: true,
//!         format: None,
//!         options: None,
//!         keep_alive: None,
//!         tools: None,
//!         think: None,
//!     };
//!
//!     let mut stream = client.chat_stream(request).await?;
//!     while let Some(result) = stream.next().await {
//!         match result {
//!             Ok(response) => print!("{}", response.message.content),
//!             Err(e) => eprintln!("Error: {}", e),
//!         }
//!     }
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Text Generation
//!
//! ```no_run
//! use oai_sdk::{ModelClient, GenerateRequest};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     let request = GenerateRequest {
//!         model: "llama3.1:8b".to_string(),
//!         prompt: "Why is the sky blue?".to_string(),
//!         stream: false,
//!         ..Default::default()
//!     };
//!
//!     let response = client.generate(request).await?;
//!     println!("{}", response.response);
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Embeddings
//!
//! ```no_run
//! use oai_sdk::{ModelClient, EmbedRequest, EmbedInput};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     let request = EmbedRequest {
//!         model: "llama3:8b".to_string(),
//!         input: EmbedInput::Single("Hello, world!".to_string()),
//!         truncate: Some(true),
//!         options: None,
//!         keep_alive: None,
//!         dimensions: None,
//!     };
//!
//!     let response = client.embed(request).await?;
//!     println!("Embeddings: {:?}", response.embeddings);
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Tool Calling
//!
//! ```no_run
//! use oai_sdk::{ModelClient, ChatRequest, Message, Tool, ToolFunction};
//! use serde_json::json;
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     let tools = vec![
//!         Tool {
//!             tool_type: "function".to_string(),
//!             function: ToolFunction {
//!                 name: "get_current_weather".to_string(),
//!                 description: "Get the current weather for a location".to_string(),
//!                 parameters: json!({
//!                     "type": "object",
//!                     "properties": {
//!                         "location": {
//!                             "type": "string",
//!                             "description": "The location to get the weather for"
//!                         },
//!                         "format": {
//!                             "type": "string",
//!                             "enum": ["celsius", "fahrenheit"]
//!                         }
//!                     },
//!                     "required": ["location", "format"]
//!                 }),
//!             }
//!         }
//!     ];
//!
//!     let request = ChatRequest {
//!         model: "llama3.1:8b".to_string(),
//!         messages: vec![
//!             Message {
//!                 role: "user".to_string(),
//!                 content: "What is the weather in Tokyo?".to_string(),
//!                 images: None,
//!                 tool_calls: None,
//!                 tool_name: None,
//!                 thinking: None,
//!             }
//!         ],
//!         stream: false,
//!         format: None,
//!         options: None,
//!         keep_alive: None,
//!         tools: Some(tools),
//!         think: None,
//!     };
//!
//!     let response = client.chat(request).await?;
//!     if let Some(tool_calls) = response.message.tool_calls {
//!         for tool_call in tool_calls {
//!             println!("Tool call: {}", tool_call.function.name);
//!         }
//!     }
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Model Management
//!
//! ```no_run
//! use oai_sdk::{ModelClient, ShowModelRequest, CopyModelRequest, DeleteModelRequest};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     // List models
//!     let models = client.list_models().await?;
//!     for model in models {
//!         println!("Model: {}", model.name);
//!     }
//!
//!     // Show model information
//!     let request = ShowModelRequest {
//!         model: "llama3.1:8b".to_string(),
//!         verbose: Some(true),
//!     };
//!     let info = client.show_model(request).await?;
//!     println!("Model info: {:?}", info);
//!
//!     // Copy model
//!     let copy_req = CopyModelRequest {
//!         source: "llama3.1:8b".to_string(),
//!         destination: "llama3-backup".to_string(),
//!     };
//!     client.copy_model(copy_req).await?;
//!
//!     // Delete model
//!     let delete_req = DeleteModelRequest {
//!         model: "llama3-backup".to_string(),
//!     };
//!     client.delete_model(delete_req).await?;
//!
//!     Ok(())
//! }
//! ```
//!
//! ## OpenAI-Compatible Endpoints
//!
//! Use OpenAI client libraries with Ollama by specifying the base URL:
//!
//! ```no_run
//! use oai_sdk::{ModelClient, ChatCompletionsRequest, ChatMessage};
//!
//! #[tokio::main]
//! async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     let request = ChatCompletionsRequest {
//!         model: "llama3.1:8b".to_string(),
//!         messages: vec![
//!             ChatMessage {
//!                 role: "user".to_string(),
//!                 content: serde_json::json!("Why is the sky blue?"),
//!             }
//!         ],
//!         stream: Some(false),
//!         ..Default::default()
//!     };
//!
//!     let response = client.chat_completions(request).await?;
//!     println!("{}", response.choices[0].message.content);
//!
//!     Ok(())
//! }
//! ```
//!
//! ## Model Lifecycle (requires `local` feature)
//!
//! ```no_run
//! # #[cfg(feature = "local")]
//! # {
//! use oai_sdk::ModelClient;
//!
//! # #[tokio::main]
//! # async fn main() -> Result<(), Box<dyn std::error::Error>> {
//!     let client = ModelClient::builder()
//!         .base_url("http://localhost:11434".to_string())
//!         .build()?;
//!
//!     // Load model into memory
//!     client.load_model("llama3.1:8b").await?;
//!     println!("Model loaded");
//!
//!     // Unload model from memory
//!     client.unload_model("llama3.1:8b").await?;
//!     println!("Model unloaded");
//!
//!     Ok(())
//! # }
//! # }
//! ```
//!
//! ## API Modules
//!
//! - [`chat`](crate::chat) - Chat completion with streaming and tool support
//! - [`generate`](crate::generate) - Text generation with streaming support
//! - [`embed`](crate::embed) - Single and batch embeddings
//! - [`model`](crate::model) - Model management (CRUD, pull, push, running models)
//! - [`openai`](crate::openai) - OpenAI-compatible endpoints (chat, embeddings, responses)
//! - [`client`](crate::client) - Core client, blob management, model lifecycle
//! - [`error`](crate::error) - Error types and handling

mod chat;
mod client;
mod embed;
mod error;
mod generate;
mod model;
mod openai;

pub use chat::{
    ChatRequest, ChatResponse, Format, Message, Tool, ToolCall, ToolCallFunction, ToolFunction,
};
pub use client::{ModelClient, ModelClientBuilder};
pub use embed::{EmbedInput, EmbedRequest, EmbedResponse, EmbeddingsRequest, EmbeddingsResponse};
pub use error::{OllamaError, Result};
pub use generate::{GenerateRequest, GenerateResponse};
pub use model::{
    CopyModelRequest, CreateModelRequest, DeleteModelRequest, License, ListModelsResponse,
    ListRunningModelsResponse, ModelDetails, ModelInfo, PullModelRequest, PushModelRequest,
    RunningModel, ShowModelRequest, ShowModelResponse, StatusResponse, VersionResponse,
};
pub use openai::{
    ChatCompletionsRequest, ChatCompletionsResponse, ChatMessage, OpenAIEmbedding,
    OpenAIEmbeddingsInput, OpenAIEmbeddingsRequest, OpenAIEmbeddingsResponse, ResponsesRequest,
    ResponsesResponse,
};