Struct MtmdContextParams

Source

pub struct MtmdContextParams { /* private fields */ }

Expand description

Parameters used when creating an MtmdContext.

Obtain a default-initialised instance via MtmdContextParams::default().

Implementations§

Source §

impl MtmdContextParams

Source

pub fn use_gpu(self, v: bool) -> Self

Whether to run the vision/audio encoder on the GPU (default: true).

Source

pub fn print_timings(self, v: bool) -> Self

Whether to print timing info after each encode (default: false).

Source

pub fn n_threads(self, n: i32) -> Self

Number of threads used for the vision encoder (default taken from mtmd_context_params_default).

Source

pub fn warmup(self, v: bool) -> Self

Whether to run a warm-up encode pass after initialisation.

Source

pub fn image_min_tokens(self, n: i32) -> Self

Minimum number of image tokens (0 = use model default).

Source

pub fn image_max_tokens(self, n: i32) -> Self

Maximum number of image tokens (0 = use model default).

Source

pub fn with_batch_max_tokens(self, n: i32) -> Self

Maximum number of multimodal output tokens per batch.

Maps to mtmd_context_params.batch_max_tokens. The upstream default is 1024. Increase for large images or long audio segments.

§Examples

use llama_cpp_4::mtmd::MtmdContextParams;
let params = MtmdContextParams::default().with_batch_max_tokens(2048);
assert_eq!(params.batch_max_tokens(), 2048);

Source

pub fn batch_max_tokens(&self) -> i32

Get the configured batch token cap (batch_max_tokens).

Source

pub fn with_flash_attn_type(self, flash_attn_type: LlamaFlashAttnType) -> Self

Set flash-attention mode for the vision encoder.

Maps to mtmd_context_params.flash_attn_type. Uses the same crate::context::params::LlamaFlashAttnType enum as text contexts.

§Examples

use llama_cpp_4::context::params::LlamaFlashAttnType;
use llama_cpp_4::mtmd::MtmdContextParams;
let params = MtmdContextParams::default()
    .with_flash_attn_type(LlamaFlashAttnType::Auto);
assert_eq!(params.flash_attn_type(), LlamaFlashAttnType::Auto);

Source

pub fn flash_attn_type(&self) -> LlamaFlashAttnType

Get flash-attention mode for the vision encoder.

Source

pub fn with_progress_callback( self, callback: Option<MtmdProgressCallback>, user_data: *mut c_void, ) -> Self

Register a callback invoked while mmproj weights load.

Maps to mtmd_context_params.progress_callback. Pass None to disable progress reporting. The callback may return false to abort loading early; see MtmdProgressCallback.

user_data is forwarded to each invocation and must remain valid until MtmdContext::init_from_file returns.

Source