ollama-api-rs 0.1.0

An async Rust SDK for the Ollama API with OpenAI compatibility
Documentation
// Copyright 2026 Cloudflavor GmbH

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::client::ModelClient;
use crate::error::{OllamaError, Result};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

/// Request for embeddings.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbedRequest {
    pub model: String,
    pub input: EmbedInput,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub truncate: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub options: Option<HashMap<String, serde_json::Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub keep_alive: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub dimensions: Option<u32>,
}

/// Input for embeddings.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum EmbedInput {
    Single(String),
    Multiple(Vec<String>),
}

/// Response for embeddings.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbedResponse {
    pub model: String,
    pub embeddings: Vec<Vec<f32>>,
    #[serde(default)]
    pub total_duration: u64,
    #[serde(default)]
    pub load_duration: u64,
    #[serde(default)]
    pub prompt_eval_count: u32,
}

/// Request for legacy embeddings.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingsRequest {
    pub model: String,
    pub prompt: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub options: Option<HashMap<String, serde_json::Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub keep_alive: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub truncate: Option<bool>,
}

/// Response for legacy embeddings.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddingsResponse {
    pub embedding: Vec<f32>,
}

impl ModelClient {
    /// Generate embeddings from text.
    pub async fn embed(&self, request: EmbedRequest) -> Result<EmbedResponse> {
        let url = self
            .base_url
            .join("api/embed")
            .map_err(OllamaError::UrlError)?;
        let response = self
            .client
            .post(url)
            .json(&request)
            .send()
            .await
            .map_err(OllamaError::RequestError)?;

        self.handle_response(response, Some(&request.model)).await
    }

    /// Generate legacy embeddings from text.
    pub async fn embeddings(&self, request: EmbeddingsRequest) -> Result<EmbeddingsResponse> {
        let url = self
            .base_url
            .join("api/embeddings")
            .map_err(OllamaError::UrlError)?;
        let response = self
            .client
            .post(url)
            .json(&request)
            .send()
            .await
            .map_err(OllamaError::RequestError)?;

        self.handle_response(response, Some(&request.model)).await
    }
}

impl Default for EmbeddingsRequest {
    fn default() -> Self {
        Self {
            model: "llama3".to_string(),
            prompt: String::new(),
            options: None,
            keep_alive: None,
            truncate: None,
        }
    }
}

impl Default for EmbedRequest {
    fn default() -> Self {
        Self {
            model: "llama3".to_string(),
            input: EmbedInput::Single(String::new()),
            truncate: None,
            options: None,
            keep_alive: None,
            dimensions: None,
        }
    }
}