llama-cpp-v3 0.1.6

Safe and ergonomic Rust wrapper for llama.cpp with dynamic loading
Documentation
use llama_cpp_v3::backend::Backend;
use llama_cpp_v3::{ChatMessage, LlamaBackend, LlamaModel, LoadOptions};
use std::env;
use std::path::PathBuf;

fn main() {
    let args: Vec<String> = env::args().collect();
    if args.len() < 3 {
        eprintln!("Usage: chat_template <backend> <model.gguf> [cache_dir]");
        eprintln!("       backend: cpu | cuda | vulkan | hip | sycl | opencl");
        std::process::exit(1);
    }

    let backend_str = &args[1];
    let model_path = &args[2];

    let cache_dir = if args.len() > 3 {
        Some(PathBuf::from(&args[3]))
    } else {
        None
    };

    let backend_type = match backend_str.to_lowercase().as_str() {
        "cpu" => Backend::Cpu,
        "cuda" => Backend::Cuda,
        "vulkan" => Backend::Vulkan,
        "hip" => Backend::Hip,
        "sycl" => Backend::Sycl,
        "opencl" => Backend::OpenCl,
        _ => {
            eprintln!("Unknown backend: {}", backend_str);
            std::process::exit(1);
        }
    };

    let options = LoadOptions {
        backend: backend_type,
        app_name: "llama-cpp-v3-chat-test",
        version: None,
        explicit_path: None,
        cache_dir,
    };

    let backend = LlamaBackend::load(options).expect("Failed to load backend");
    let mut model_params = LlamaModel::default_params(&backend);
    model_params.n_gpu_layers = 0;

    let model = LlamaModel::load_from_file(&backend, model_path, model_params)
        .expect("Failed to load model");

    let messages = vec![
        ChatMessage {
            role: "system".to_string(),
            content: "You are a helpful assistant.".to_string(),
        },
        ChatMessage {
            role: "user".to_string(),
            content: "Hello! How can you help me today?".to_string(),
        },
    ];

    println!("Inspecting chat template...");
    let template = model.get_chat_template(None);
    match &template {
        Some(t) => println!("Model has template: {}", t),
        None => println!("Model has NO template. Using fallback."),
    }

    // ChatML-like fallback
    let fallback_tmpl = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}";
    let tmpl_to_use = if template.is_none() {
        Some(fallback_tmpl)
    } else {
        None
    };

    println!("Applying chat template...");
    let formatted = model
        .apply_chat_template(tmpl_to_use, &messages, true)
        .expect("Failed to apply chat template");

    println!("--- Formatted Output ---");
    println!("{}", formatted);
    println!("-------------------------");

    if !formatted.is_empty() {
        println!("Success! Chat template applied.");
    } else {
        println!("Error: Formatted output is empty.");
        std::process::exit(1);
    }
}