Skip to main content

Module fit

Module fit 

Source
Expand description

Memory estimation and parameter fitting from llama.cpp common/fit.

§Example — memory estimate

use llama_cpp_4::prelude::*;
use std::path::Path;

fn main() {
    let _backend = LlamaBackend::init().unwrap();
    let report = get_device_memory_data(
        Path::new("model.gguf"),
        &LlamaModelParams::default().with_n_gpu_layers(99),
        &LlamaContextParams::default(),
        llama_cpp_sys_4::GGML_LOG_LEVEL_ERROR,
    )
    .unwrap();

    println!("training ctx: {}", report.hyperparams.n_ctx_train);
    for (i, entry) in report.entries.iter().enumerate() {
        println!(
            "device {i}: {} bytes free / {} total (projected {})",
            entry.free,
            entry.total,
            entry.used(),
        );
    }
}

§Example — auto-fit parameters

use llama_cpp_4::fit::{fit_params, FitParams};
use llama_cpp_4::prelude::*;
use std::path::Path;

fn main() {
    let backend = LlamaBackend::init().unwrap();
    let result = fit_params(
        &backend,
        Path::new("model.gguf"),
        FitParams::default().with_n_ctx_min(512),
    )
    .unwrap();

    use std::num::NonZeroU32;

    println!("n_ctx: {}", result.context_params.n_ctx().map_or(0, NonZeroU32::get));
    println!("n_gpu_layers: {}", result.model_params.n_gpu_layers());
}

Structs§

DeviceMemoryEntry
Per-device memory projection from get_device_memory_data.
DeviceMemoryHyperParams
Hyper-parameters discovered while estimating device memory.
DeviceMemoryReport
Result of get_device_memory_data.
FitParams
Input to fit_params.
FitParamsResult
Fitted model/context parameters plus auxiliary buffers.

Enums§

DeviceMemoryError
Errors from get_device_memory_data.
FitParamsError
Errors from fit_params.

Functions§

fit_params
Adjust model and context parameters to fit available device memory.
get_device_memory_data
Estimate per-device memory for a model path and parameter set.