ollama-api-rs 0.2.0

An async Rust SDK for the Ollama API with OpenAI compatibility
Documentation
// Copyright 2026 Cloudflavor GmbH

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::client::ModelClient;
use crate::client::handle_error_response;
use crate::client::json_lines_stream;
use crate::error::{OllamaError, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use tokio_stream::Stream;

/// Request for text generation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenerateRequest {
    pub model: String,
    pub prompt: String,
    #[serde(default)]
    pub stream: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub suffix: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub images: Option<Vec<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub format: Option<Format>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub options: Option<HashMap<String, serde_json::Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub system: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub template: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub raw: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub keep_alive: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub context: Option<Vec<u32>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub think: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub width: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub height: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub steps: Option<u32>,
}

impl Default for GenerateRequest {
    fn default() -> Self {
        Self {
            model: "llama3.1:8b".to_string(),
            prompt: String::new(),
            stream: false,
            suffix: None,
            images: None,
            format: None,
            options: None,
            system: None,
            template: None,
            raw: None,
            keep_alive: None,
            context: None,
            think: None,
            width: None,
            height: None,
            steps: None,
        }
    }
}

use crate::chat::Format;

/// Response for text generation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenerateResponse {
    pub model: String,
    pub created_at: String,
    pub response: String,
    pub done: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub done_reason: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub context: Option<Vec<u32>>,
    #[serde(default)]
    pub total_duration: u64,
    #[serde(default)]
    pub load_duration: u64,
    #[serde(default)]
    pub prompt_eval_count: u32,
    #[serde(default)]
    pub prompt_eval_duration: u64,
    #[serde(default)]
    pub eval_count: u32,
    #[serde(default)]
    pub eval_duration: u64,
}

impl ModelClient {
    /// Generate text from a prompt.
    pub async fn generate(&self, request: GenerateRequest) -> Result<GenerateResponse> {
        let url = self
            .base_url
            .join("api/generate")
            .map_err(OllamaError::UrlError)?;
        let response = self
            .client
            .post(url)
            .json(&request)
            .send()
            .await
            .map_err(OllamaError::RequestError)?;

        self.handle_response(response, Some(&request.model)).await
    }

    /// Generate text from a prompt with streaming.
    pub async fn generate_stream(
        &self,
        request: GenerateRequest,
    ) -> Result<impl Stream<Item = Result<GenerateResponse>> + '_> {
        let url = self
            .base_url
            .join("api/generate")
            .map_err(OllamaError::UrlError)?;
        let response = self
            .client
            .post(url)
            .json(&request)
            .send()
            .await
            .map_err(OllamaError::RequestError)?;

        if !response.status().is_success() {
            return Err(handle_error_response(response, Some(&request.model)).await);
        }

        Ok(json_lines_stream(response))
    }
}