ollama-api-rs 0.2.0

An async Rust SDK for the Ollama API with OpenAI compatibility
Documentation
// Copyright 2026 Cloudflavor GmbH

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

// http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Model lifecycle example (requires `local` feature)
//!
//! This example demonstrates how to manage model memory lifecycle,
//! including loading and unloading models from memory.
//!
//! Run with: cargo run --example model_lifecycle --features local

#[cfg(not(feature = "local"))]
fn main() {
    println!("This example requires the 'local' feature.");
    println!("Run with: cargo run --example model_lifecycle --features local");
}

#[cfg(feature = "local")]
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    use oai_sdk::ModelClient;

    // Create the client
    let client = ModelClient::builder()
        .base_url("http://localhost:11434".to_string())
        .build()?;

    println!("Ollama Client Model Lifecycle Example");
    println!("This example shows how to load and unload models into memory.\n");

    let model = "llama3.1:8b";

    // Load model into memory via generate API (sends empty prompt)
    println!("Loading model '{}' into memory...", model);
    match client.load_model(model).await {
        Ok(response) => {
            println!("Model loaded successfully!");
            println!("Response: {}", response.response);
        }
        Err(e) => println!("Error loading model: {}", e),
    }

    // Check running models
    println!("\nChecking running models:");
    match client.list_running_models().await {
        Ok(models) => {
            if models.is_empty() {
                println!("No models currently running.");
            } else {
                for m in models {
                    println!("  - {} (expires at: {})", m.name, m.expires_at);
                }
            }
        }
        Err(e) => println!("Error listing running models: {}", e),
    }

    // Unload model from memory (keep_alive: "0")
    println!("\nUnloading model '{}' from memory...", model);
    match client.unload_model(model).await {
        Ok(response) => {
            println!("Model unloaded successfully!");
            println!("Response: {}", response.response);
        }
        Err(e) => println!("Error unloading model: {}", e),
    }

    println!("\nModel lifecycle management complete!");
    println!("Note: Loading/unloading may take a few seconds depending on the model size.");

    Ok(())
}