Skip to main content

Crate llama_crab_sys

Crate llama_crab_sys 

Source
Expand description

Low-level FFI bindings to llama.cpp.

Generated at build time via bindgen over wrapper.h, which in turn includes the public C headers of llama.cpp, ggml and gguf.

This crate is unsafe by design: every public item is a thin extern "C" wrapper around a llama.cpp symbol. Use the safe llama-crab crate instead unless you need fine-grained control.

§Features

FeatureDescription
commonCompile libcommon.a for chat templates and JSON schema helpers
cudaNVIDIA CUDA backend
cuda-no-vmmCUDA without Virtual Memory Management
metalApple Metal (default on macOS aarch64)
vulkanVulkan backend
rocmAMD ROCm/HIP backend
openmpOpenMP parallel CPU backend (default)
dynamic-linkLink against libllama as a shared object
system-ggmlUse GGML from the system instead of the bundled copy
mtmdMultimodal (vision + audio) helpers
llguidancellguidance sampler (custom C-ABI vtable)
dynamic-backendsLoad GGML backends as shared objects at runtime

Structs§

_IO_FILE
_IO_codecvt
_IO_marker
_IO_wide_data
ggml_backend
ggml_backend_buffer
ggml_backend_buffer_type
ggml_backend_dev_caps
ggml_backend_dev_props
ggml_backend_device
ggml_backend_event
ggml_backend_feature
ggml_backend_graph_copy
ggml_backend_meta_split_state
ggml_backend_reg
ggml_backend_sched
ggml_bf16_t
ggml_cgraph
ggml_context
ggml_cplan
ggml_gallocr
ggml_init_params
ggml_object
ggml_opt_context
ggml_opt_dataset
ggml_opt_optimizer_params
ggml_opt_optimizer_params__bindgen_ty_1
ggml_opt_optimizer_params__bindgen_ty_2
ggml_opt_params
ggml_opt_result
ggml_tallocr
ggml_tensor
ggml_threadpool
ggml_threadpool_params
ggml_type_traits
ggml_type_traits_cpu
gguf_context
gguf_init_params
llama_adapter_lora
llama_batch
llama_chat_message
llama_context
llama_context_params
llama_logit_bias
llama_memory_i
llama_model
llama_model_imatrix_data
llama_model_kv_override
llama_model_params
llama_model_quantize_params
llama_model_tensor_buft_override
llama_model_tensor_override
llama_opt_params
llama_perf_context_data
llama_perf_sampler_data
llama_sampler
llama_sampler_chain_params
llama_sampler_data
llama_sampler_i
llama_sampler_seq_config
llama_token_data
llama_token_data_array
llama_vocab
mtmd_batch
mtmd_bitmap
mtmd_caps
mtmd_context
mtmd_context_params
mtmd_decoder_pos
mtmd_helper_bitmap_wrapper
mtmd_helper_video
mtmd_helper_video_info
mtmd_helper_video_init_params
mtmd_image_tokens
mtmd_input_chunk
mtmd_input_chunks
mtmd_input_text

Enums§

ggml_backend_buffer_usage
ggml_backend_dev_type
ggml_backend_meta_split_axis
ggml_ftype
ggml_glu_op
ggml_log_level
ggml_numa_strategy
ggml_object_type
ggml_op
ggml_op_hint
ggml_op_pool
ggml_opt_build_type
ggml_opt_loss_type
ggml_opt_optimizer_type
ggml_prec
ggml_scale_flag
ggml_scale_mode
ggml_sched_priority
ggml_sort_order
ggml_status
ggml_tensor_flag
ggml_tri_type
ggml_type
ggml_unary_op
gguf_type
llama_attention_type
llama_context_type
llama_flash_attn_type
llama_ftype
llama_model_kv_override_type
llama_model_meta_key
llama_pooling_type
llama_rope_scaling_type
llama_rope_type
llama_split_mode
llama_token_attr
llama_token_type
llama_vocab_type
mtmd_input_chunk_type

Constants§

GGML_BACKEND_META_MAX_DEVICES
GGML_DEFAULT_GRAPH_SIZE
GGML_DEFAULT_N_THREADS
GGML_EXIT_ABORTED
GGML_EXIT_SUCCESS
GGML_FILE_MAGIC
GGML_FILE_VERSION
GGML_MAX_DIMS
GGML_MAX_NAME
GGML_MAX_N_THREADS
GGML_MAX_OP_PARAMS
GGML_MAX_PARAMS
GGML_MAX_SRC
GGML_MEM_ALIGN
GGML_MROPE_SECTIONS
GGML_N_TASKS_MAX
GGML_QNT_VERSION
GGML_QNT_VERSION_FACTOR
GGML_ROPE_TYPE_IMROPE
GGML_ROPE_TYPE_MROPE
GGML_ROPE_TYPE_NEOX
GGML_ROPE_TYPE_NORMAL
GGML_ROPE_TYPE_VISION
GGML_TENSOR_SIZE
LLAMA_DEFAULT_SEED
LLAMA_FILE_MAGIC_GGLA
LLAMA_FILE_MAGIC_GGSN
LLAMA_FILE_MAGIC_GGSQ
LLAMA_SESSION_MAGIC
LLAMA_SESSION_VERSION
LLAMA_STATE_SEQ_FLAGS_NONE
LLAMA_STATE_SEQ_FLAGS_ON_DEVICE
LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY
LLAMA_STATE_SEQ_FLAGS_SWA_ONLY
LLAMA_STATE_SEQ_MAGIC
LLAMA_STATE_SEQ_VERSION
LLAMA_TOKEN_NULL

Functions§

ggml_abort
ggml_abs
ggml_abs_inplace
ggml_acc
ggml_acc_inplace
ggml_add
ggml_add1
ggml_add1_inplace
ggml_add_cast
ggml_add_id
ggml_add_inplace
ggml_add_rel_pos
ggml_add_rel_pos_inplace
ggml_arange
ggml_are_same_shape
ggml_are_same_stride
ggml_argmax
ggml_argsort
ggml_argsort_top_k
ggml_backend_alloc_buffer
ggml_backend_alloc_ctx_tensors
ggml_backend_alloc_ctx_tensors_from_buft
ggml_backend_alloc_ctx_tensors_from_buft_size
ggml_backend_buffer_clear
ggml_backend_buffer_free
ggml_backend_buffer_get_alignment
ggml_backend_buffer_get_alloc_size
ggml_backend_buffer_get_base
ggml_backend_buffer_get_max_size
ggml_backend_buffer_get_size
ggml_backend_buffer_get_type
ggml_backend_buffer_get_usage
ggml_backend_buffer_init_tensor
ggml_backend_buffer_is_host
ggml_backend_buffer_name
ggml_backend_buffer_reset
ggml_backend_buffer_set_usage
ggml_backend_buft_alloc_buffer
ggml_backend_buft_get_alignment
ggml_backend_buft_get_alloc_size
ggml_backend_buft_get_device
ggml_backend_buft_get_max_size
ggml_backend_buft_is_host
ggml_backend_buft_name
ggml_backend_compare_graph_backend
ggml_backend_cpu_buffer_from_ptr
ggml_backend_cpu_buffer_type
ggml_backend_cpu_init
ggml_backend_cpu_reg
ggml_backend_cpu_set_abort_callback
ggml_backend_cpu_set_n_threads
ggml_backend_cpu_set_threadpool
ggml_backend_cpu_set_use_ref
ggml_backend_dev_backend_reg
ggml_backend_dev_buffer_from_host_ptr
ggml_backend_dev_buffer_type
ggml_backend_dev_by_name
ggml_backend_dev_by_type
ggml_backend_dev_count
ggml_backend_dev_description
ggml_backend_dev_get
ggml_backend_dev_get_props
ggml_backend_dev_host_buffer_type
ggml_backend_dev_init
ggml_backend_dev_memory
ggml_backend_dev_name
ggml_backend_dev_offload_op
ggml_backend_dev_supports_buft
ggml_backend_dev_supports_op
ggml_backend_dev_type
ggml_backend_device_register
ggml_backend_event_free
ggml_backend_event_new
ggml_backend_event_record
ggml_backend_event_synchronize
ggml_backend_event_wait
ggml_backend_free
ggml_backend_get_alignment
ggml_backend_get_default_buffer_type
ggml_backend_get_device
ggml_backend_get_max_size
ggml_backend_graph_compute
ggml_backend_graph_compute_async
ggml_backend_graph_copy
ggml_backend_graph_copy_free
ggml_backend_graph_plan_compute
ggml_backend_graph_plan_create
ggml_backend_graph_plan_free
ggml_backend_guid
ggml_backend_init_best
ggml_backend_init_by_name
ggml_backend_init_by_type
ggml_backend_is_cpu
ggml_backend_load
ggml_backend_load_all
ggml_backend_load_all_from_path
ggml_backend_meta_device
ggml_backend_meta_split_axis_name
ggml_backend_name
ggml_backend_offload_op
ggml_backend_reg_by_name
ggml_backend_reg_count
ggml_backend_reg_dev_count
ggml_backend_reg_dev_get
ggml_backend_reg_get
ggml_backend_reg_get_proc_address
ggml_backend_reg_name
ggml_backend_register
ggml_backend_sched_alloc_graph
ggml_backend_sched_free
ggml_backend_sched_get_backend
ggml_backend_sched_get_buffer_size
ggml_backend_sched_get_buffer_type
ggml_backend_sched_get_n_backends
ggml_backend_sched_get_n_copies
ggml_backend_sched_get_n_splits
ggml_backend_sched_get_tensor_backend
ggml_backend_sched_graph_compute
ggml_backend_sched_graph_compute_async
ggml_backend_sched_new
ggml_backend_sched_reserve
ggml_backend_sched_reserve_size
ggml_backend_sched_reset
ggml_backend_sched_set_eval_callback
ggml_backend_sched_set_tensor_backend
ggml_backend_sched_split_graph
ggml_backend_sched_synchronize
ggml_backend_supports_buft
ggml_backend_supports_op
ggml_backend_synchronize
ggml_backend_tensor_alloc
ggml_backend_tensor_copy
ggml_backend_tensor_copy_async
ggml_backend_tensor_get
ggml_backend_tensor_get_2d
ggml_backend_tensor_get_2d_async
ggml_backend_tensor_get_async
ggml_backend_tensor_memset
ggml_backend_tensor_set
ggml_backend_tensor_set_2d
ggml_backend_tensor_set_2d_async
ggml_backend_tensor_set_async
ggml_backend_unload
ggml_backend_view_init
ggml_bf16_to_fp32
ggml_bf16_to_fp32_row
ggml_blck_size
ggml_build_backward_expand
ggml_build_forward_expand
ggml_build_forward_select
ggml_can_repeat
ggml_cast
ggml_ceil
ggml_ceil_inplace
ggml_clamp
ggml_col2im_1d
ggml_commit
ggml_concat
ggml_cont
ggml_cont_1d
ggml_cont_2d
ggml_cont_3d
ggml_cont_4d
ggml_conv_1d
ggml_conv_1d_dw
ggml_conv_1d_dw_ph
ggml_conv_1d_ph
ggml_conv_2d
ggml_conv_2d_direct
ggml_conv_2d_dw
ggml_conv_2d_dw_direct
ggml_conv_2d_s1_ph
ggml_conv_2d_sk_p0
ggml_conv_3d
ggml_conv_3d_direct
ggml_conv_transpose_1d
ggml_conv_transpose_2d_p0
ggml_cos
ggml_cos_inplace
ggml_count_equal
ggml_cpu_bf16_to_fp32
ggml_cpu_fp16_to_fp32
ggml_cpu_fp32_to_bf16
ggml_cpu_fp32_to_fp16
ggml_cpu_fp32_to_fp32
ggml_cpu_fp32_to_i32
ggml_cpu_get_rvv_vlen
ggml_cpu_get_sve_cnt
ggml_cpu_has_amx_int8
ggml_cpu_has_arm_fma
ggml_cpu_has_avx
ggml_cpu_has_avx2
ggml_cpu_has_avx512
ggml_cpu_has_avx512_bf16
ggml_cpu_has_avx512_vbmi
ggml_cpu_has_avx512_vnni
ggml_cpu_has_avx_vnni
ggml_cpu_has_bmi2
ggml_cpu_has_dotprod
ggml_cpu_has_f16c
ggml_cpu_has_fma
ggml_cpu_has_fp16_va
ggml_cpu_has_llamafile
ggml_cpu_has_matmul_int8
ggml_cpu_has_neon
ggml_cpu_has_riscv_v
ggml_cpu_has_sme
ggml_cpu_has_sse3
ggml_cpu_has_ssse3
ggml_cpu_has_sve
ggml_cpu_has_vsx
ggml_cpu_has_vxe
ggml_cpu_has_wasm_simd
ggml_cpu_init
ggml_cpy
ggml_cross_entropy_loss
ggml_cross_entropy_loss_back
ggml_cumsum
ggml_custom_4d
ggml_custom_inplace
ggml_cycles
ggml_cycles_per_ms
ggml_diag
ggml_diag_mask_inf
ggml_diag_mask_inf_inplace
ggml_diag_mask_zero
ggml_diag_mask_zero_inplace
ggml_div
ggml_div_inplace
ggml_dup
ggml_dup_inplace
ggml_dup_tensor
ggml_element_size
ggml_elu
ggml_elu_inplace
ggml_exp
ggml_exp_inplace
ggml_expm1
ggml_expm1_inplace
ggml_fill
ggml_fill_inplace
ggml_flash_attn_back
ggml_flash_attn_ext
ggml_flash_attn_ext_add_sinks
ggml_flash_attn_ext_get_prec
ggml_flash_attn_ext_set_prec
ggml_floor
ggml_floor_inplace
ggml_fopen
ggml_format_name
ggml_fp16_to_fp32
ggml_fp16_to_fp32_row
ggml_fp32_to_bf16
ggml_fp32_to_bf16_row
ggml_fp32_to_bf16_row_ref
ggml_fp32_to_fp16
ggml_fp32_to_fp16_row
ggml_free
ggml_ftype_to_ggml_type
ggml_gallocr_alloc_graph
ggml_gallocr_free
ggml_gallocr_get_buffer_size
ggml_gallocr_new
ggml_gallocr_new_n
ggml_gallocr_reserve
ggml_gallocr_reserve_n
ggml_gallocr_reserve_n_size
ggml_gated_delta_net
ggml_gated_linear_attn
ggml_geglu
ggml_geglu_erf
ggml_geglu_erf_split
ggml_geglu_erf_swapped
ggml_geglu_quick
ggml_geglu_quick_split
ggml_geglu_quick_swapped
ggml_geglu_split
ggml_geglu_swapped
ggml_gelu
ggml_gelu_erf
ggml_gelu_erf_inplace
ggml_gelu_inplace
ggml_gelu_quick
ggml_gelu_quick_inplace
ggml_get_data
ggml_get_data_f32
ggml_get_f32_1d
ggml_get_f32_nd
ggml_get_first_tensor
ggml_get_glu_op
ggml_get_i32_1d
ggml_get_i32_nd
ggml_get_max_tensor_size
ggml_get_mem_buffer
ggml_get_mem_size
ggml_get_name
ggml_get_next_tensor
ggml_get_no_alloc
ggml_get_rel_pos
ggml_get_rows
ggml_get_rows_back
ggml_get_tensor
ggml_get_type_traits
ggml_get_type_traits_cpu
ggml_get_unary_op
ggml_glu
ggml_glu_op_name
ggml_glu_split
ggml_graph_add_node
ggml_graph_clear
ggml_graph_compute
ggml_graph_compute_with_ctx
ggml_graph_cpy
ggml_graph_dump_dot
ggml_graph_dup
ggml_graph_get_grad
ggml_graph_get_grad_acc
ggml_graph_get_tensor
ggml_graph_n_nodes
ggml_graph_node
ggml_graph_nodes
ggml_graph_overhead
ggml_graph_overhead_custom
ggml_graph_plan
ggml_graph_print
ggml_graph_reset
ggml_graph_size
ggml_group_norm
ggml_group_norm_inplace
ggml_guid_matches
ggml_hardsigmoid
ggml_hardswish
ggml_im2col
ggml_im2col_3d
ggml_im2col_back
ggml_init
ggml_interpolate
ggml_is_3d
ggml_is_contiguous
ggml_is_contiguous_0
ggml_is_contiguous_1
ggml_is_contiguous_2
ggml_is_contiguous_channels
ggml_is_contiguous_rows
ggml_is_contiguously_allocated
ggml_is_empty
ggml_is_matrix
ggml_is_numa
ggml_is_permuted
ggml_is_quantized
ggml_is_scalar
ggml_is_transposed
ggml_is_vector
ggml_is_view
ggml_l2_norm
ggml_l2_norm_inplace
ggml_leaky_relu
ggml_log
ggml_log_get
ggml_log_inplace
ggml_log_set
ggml_map_custom1
ggml_map_custom2
ggml_map_custom3
ggml_map_custom1_inplace
ggml_map_custom2_inplace
ggml_map_custom3_inplace
ggml_mean
ggml_mul
ggml_mul_inplace
ggml_mul_mat
ggml_mul_mat_id
ggml_mul_mat_set_hint
ggml_mul_mat_set_prec
ggml_n_dims
ggml_nbytes
ggml_nbytes_pad
ggml_neg
ggml_neg_inplace
ggml_nelements
ggml_new_buffer
ggml_new_f32
ggml_new_graph
ggml_new_graph_custom
ggml_new_i32
ggml_new_tensor
ggml_new_tensor_1d
ggml_new_tensor_2d
ggml_new_tensor_3d
ggml_new_tensor_4d
ggml_norm
ggml_norm_inplace
ggml_nrows
ggml_numa_init
ggml_op_desc
ggml_op_name
ggml_op_symbol
ggml_opt_alloc
ggml_opt_context_optimizer_type
ggml_opt_dataset_data
ggml_opt_dataset_free
ggml_opt_dataset_get_batch
ggml_opt_dataset_get_batch_host
ggml_opt_dataset_init
ggml_opt_dataset_labels
ggml_opt_dataset_ndata
ggml_opt_dataset_shuffle
ggml_opt_default_params
ggml_opt_epoch
ggml_opt_epoch_callback_progress_bar
ggml_opt_eval
ggml_opt_fit
ggml_opt_free
ggml_opt_get_constant_optimizer_params
ggml_opt_get_default_optimizer_params
ggml_opt_grad_acc
ggml_opt_init
ggml_opt_inputs
ggml_opt_labels
ggml_opt_loss
ggml_opt_ncorrect
ggml_opt_optimizer_name
ggml_opt_outputs
ggml_opt_pred
ggml_opt_prepare_alloc
ggml_opt_reset
ggml_opt_result_accuracy
ggml_opt_result_free
ggml_opt_result_init
ggml_opt_result_loss
ggml_opt_result_ndata
ggml_opt_result_pred
ggml_opt_result_reset
ggml_opt_static_graphs
ggml_opt_step_adamw
ggml_opt_step_sgd
ggml_out_prod
ggml_pad
ggml_pad_circular
ggml_pad_ext
ggml_pad_ext_circular
ggml_pad_reflect_1d
ggml_permute
ggml_pool_1d
ggml_pool_2d
ggml_pool_2d_back
ggml_print_object
ggml_print_objects
ggml_quantize_chunk
ggml_quantize_free
ggml_quantize_init
ggml_quantize_requires_imatrix
ggml_reglu
ggml_reglu_split
ggml_reglu_swapped
ggml_relu
ggml_relu_inplace
ggml_repeat
ggml_repeat_4d
ggml_repeat_back
ggml_reset
ggml_reshape
ggml_reshape_1d
ggml_reshape_2d
ggml_reshape_3d
ggml_reshape_4d
ggml_rms_norm
ggml_rms_norm_back
ggml_rms_norm_inplace
ggml_roll
ggml_rope
ggml_rope_custom
ggml_rope_custom_inplace
ggml_rope_ext
ggml_rope_ext_back
ggml_rope_ext_inplace
ggml_rope_inplace
ggml_rope_multi
ggml_rope_multi_back
ggml_rope_multi_inplace
ggml_rope_yarn_corr_dims
ggml_round
ggml_round_inplace
ggml_row_size
ggml_rwkv_wkv6
ggml_rwkv_wkv7
ggml_scale
ggml_scale_bias
ggml_scale_bias_inplace
ggml_scale_inplace
ggml_set
ggml_set_1d
ggml_set_1d_inplace
ggml_set_2d
ggml_set_2d_inplace
ggml_set_abort_callback
ggml_set_f32
ggml_set_f32_1d
ggml_set_f32_nd
ggml_set_i32
ggml_set_i32_1d
ggml_set_i32_nd
ggml_set_inplace
ggml_set_input
ggml_set_loss
ggml_set_name
ggml_set_no_alloc
ggml_set_output
ggml_set_param
ggml_set_rows
ggml_set_zero
ggml_sgn
ggml_sgn_inplace
ggml_sigmoid
ggml_sigmoid_inplace
ggml_silu
ggml_silu_back
ggml_silu_inplace
ggml_sin
ggml_sin_inplace
ggml_soft_max
ggml_soft_max_add_sinks
ggml_soft_max_ext
ggml_soft_max_ext_back
ggml_soft_max_ext_back_inplace
ggml_soft_max_ext_inplace
ggml_soft_max_inplace
ggml_softplus
ggml_softplus_inplace
ggml_solve_tri
ggml_sqr
ggml_sqr_inplace
ggml_sqrt
ggml_sqrt_inplace
ggml_ssm_conv
ggml_ssm_scan
ggml_status_to_string
ggml_step
ggml_step_inplace
ggml_sub
ggml_sub_inplace
ggml_sum
ggml_sum_rows
ggml_swiglu
ggml_swiglu_oai
ggml_swiglu_split
ggml_swiglu_swapped
ggml_tallocr_alloc
ggml_tallocr_new
ggml_tanh
ggml_tanh_inplace
ggml_tensor_overhead
ggml_threadpool_free
ggml_threadpool_get_n_threads
ggml_threadpool_new
ggml_threadpool_params_default
ggml_threadpool_params_init
ggml_threadpool_params_match
ggml_threadpool_pause
ggml_threadpool_resume
ggml_time_init
ggml_time_ms
ggml_time_us
ggml_timestep_embedding
ggml_top_k
ggml_transpose
ggml_tri
ggml_trunc
Truncates the fractional part of each element in the tensor (towards zero). For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0 Similar to std::trunc in C/C++.
ggml_trunc_inplace
ggml_type_name
ggml_type_size
ggml_type_sizef
ggml_unary
ggml_unary_inplace
ggml_unary_op_name
ggml_unravel_index
ggml_upscale
ggml_upscale_ext
ggml_used_mem
ggml_validate_row_data
ggml_version
ggml_view_1d
ggml_view_2d
ggml_view_3d
ggml_view_4d
ggml_view_tensor
ggml_win_part
ggml_win_unpart
ggml_xielu
gguf_add_tensor
gguf_find_key
gguf_find_tensor
gguf_free
gguf_get_alignment
gguf_get_arr_data
gguf_get_arr_n
gguf_get_arr_str
gguf_get_arr_type
gguf_get_data_offset
gguf_get_key
gguf_get_kv_type
gguf_get_meta_data
gguf_get_meta_size
gguf_get_n_kv
gguf_get_n_tensors
gguf_get_tensor_name
gguf_get_tensor_offset
gguf_get_tensor_size
gguf_get_tensor_type
gguf_get_val_bool
gguf_get_val_data
gguf_get_val_f32
gguf_get_val_f64
gguf_get_val_i8
gguf_get_val_i16
gguf_get_val_i32
gguf_get_val_i64
gguf_get_val_str
gguf_get_val_u8
gguf_get_val_u16
gguf_get_val_u32
gguf_get_val_u64
gguf_get_version
gguf_init_empty
gguf_init_from_buffer
gguf_init_from_callback
gguf_init_from_file
gguf_init_from_file_ptr
gguf_remove_key
gguf_set_arr_data
gguf_set_arr_str
gguf_set_kv
gguf_set_tensor_data
gguf_set_tensor_type
gguf_set_val_bool
gguf_set_val_f32
gguf_set_val_f64
gguf_set_val_i8
gguf_set_val_i16
gguf_set_val_i32
gguf_set_val_i64
gguf_set_val_str
gguf_set_val_u8
gguf_set_val_u16
gguf_set_val_u32
gguf_set_val_u64
gguf_type_name
gguf_write_to_file
gguf_write_to_file_ptr
llama_adapter_get_alora_invocation_tokens
llama_adapter_get_alora_n_invocation_tokens
llama_adapter_lora_free
llama_adapter_lora_init
llama_adapter_meta_count
llama_adapter_meta_key_by_index
llama_adapter_meta_val_str
llama_adapter_meta_val_str_by_index
llama_add_bos_token
llama_add_eos_token
llama_attach_threadpool
llama_backend_free
llama_backend_init
llama_batch_free
llama_batch_get_one
llama_batch_init
llama_chat_apply_template
Apply chat template. Inspired by hf apply_chat_template() on python.
llama_chat_builtin_templates
llama_context_default_params
llama_copy_state_data
llama_decode
llama_detach_threadpool
llama_detokenize
@details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
llama_encode
llama_flash_attn_type_name
llama_free
llama_free_model
llama_get_embeddings
llama_get_embeddings_ith
llama_get_embeddings_seq
llama_get_logits
llama_get_logits_ith
llama_get_memory
llama_get_model
llama_get_sampled_candidates_count_ith
llama_get_sampled_candidates_ith
llama_get_sampled_logits_count_ith
llama_get_sampled_logits_ith
llama_get_sampled_probs_count_ith
llama_get_sampled_probs_ith
llama_get_sampled_token_ith
llama_get_state_size
llama_init_from_model
llama_load_model_from_file
llama_load_session_file
llama_log_get
llama_log_set
llama_max_devices
llama_max_parallel_sequences
llama_max_tensor_buft_overrides
llama_memory_can_shift
llama_memory_clear
llama_memory_seq_add
llama_memory_seq_cp
llama_memory_seq_div
llama_memory_seq_keep
llama_memory_seq_pos_max
llama_memory_seq_pos_min
llama_memory_seq_rm
llama_model_chat_template
llama_model_cls_label
llama_model_decoder_start_token
llama_model_default_params
llama_model_desc
llama_model_free
llama_model_get_vocab
llama_model_has_decoder
llama_model_has_encoder
llama_model_init_from_user
llama_model_is_diffusion
llama_model_is_hybrid
llama_model_is_recurrent
llama_model_load_from_file
llama_model_load_from_file_ptr
llama_model_load_from_splits
llama_model_meta_count
llama_model_meta_key_by_index
llama_model_meta_key_str
llama_model_meta_val_str
llama_model_meta_val_str_by_index
llama_model_n_cls_out
llama_model_n_ctx_train
llama_model_n_embd
llama_model_n_embd_inp
llama_model_n_embd_out
llama_model_n_head
llama_model_n_head_kv
llama_model_n_layer
llama_model_n_params
llama_model_n_swa
llama_model_quantize
llama_model_quantize_default_params
llama_model_rope_freq_scale_train
llama_model_rope_type
llama_model_save_to_file
llama_model_size
llama_n_batch
llama_n_ctx
llama_n_ctx_seq
llama_n_ctx_train
llama_n_embd
llama_n_head
llama_n_layer
llama_n_rs_seq
llama_n_seq_max
llama_n_threads
llama_n_threads_batch
llama_n_ubatch
llama_n_vocab
llama_new_context_with_model
llama_numa_init
llama_opt_epoch
llama_opt_init
llama_opt_param_filter_all
llama_perf_context
llama_perf_context_print
llama_perf_context_reset
llama_perf_sampler
llama_perf_sampler_print
llama_perf_sampler_reset
llama_pooling_type
llama_print_system_info
llama_sampler_accept
llama_sampler_apply
llama_sampler_chain_add
llama_sampler_chain_default_params
llama_sampler_chain_get
llama_sampler_chain_init
llama_sampler_chain_n
llama_sampler_chain_remove
llama_sampler_clone
llama_sampler_free
llama_sampler_get_seed
llama_sampler_init
llama_sampler_init_adaptive_p
adaptive-p: select tokens near a configurable target probability over time.
llama_sampler_init_dist
seed == LLAMA_DEFAULT_SEED to use a random seed.
llama_sampler_init_dry
@details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
llama_sampler_init_grammar
@details Initializes a GBNF grammar, see grammars/README.md for details. @param vocab The vocabulary that this grammar will be used with. @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails. @param grammar_root The name of the start symbol for the grammar.
llama_sampler_init_grammar_lazy
llama_sampler_init_grammar_lazy_patterns
@details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639 @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group. @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
llama_sampler_init_greedy
llama_sampler_init_infill
llama_sampler_init_logit_bias
llama_sampler_init_min_p
@details Minimum P sampling as described in https://github.com/ggml-org/llama.cpp/pull/3841
llama_sampler_init_mirostat
@details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_mirostat_v2
@details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_penalties
NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_temp
#details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf
llama_sampler_init_temp_ext
@details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
llama_sampler_init_top_k
@details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751 Setting k <= 0 makes this a noop
llama_sampler_init_top_n_sigma
@details Top n sigma sampling as described in academic paper “Top-nσ: Not All Logits Are You Need” https://arxiv.org/pdf/2411.07641
llama_sampler_init_top_p
@details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_typical
@details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_sampler_init_xtc
@details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
llama_sampler_name
llama_sampler_reset
llama_sampler_sample
llama_save_session_file
llama_set_abort_callback
llama_set_adapter_cvec
llama_set_adapters_lora
llama_set_causal_attn
llama_set_embeddings
llama_set_n_threads
llama_set_sampler
llama_set_state_data
llama_set_warmup
llama_split_path
@details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf”
llama_split_prefix
@details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0”
llama_state_get_data
llama_state_get_size
llama_state_load_file
llama_state_save_file
llama_state_seq_get_data
llama_state_seq_get_data_ext
llama_state_seq_get_size
llama_state_seq_get_size_ext
llama_state_seq_load_file
llama_state_seq_save_file
llama_state_seq_set_data
llama_state_seq_set_data_ext
llama_state_set_data
llama_supports_gpu_offload
llama_supports_mlock
llama_supports_mmap
llama_supports_rpc
llama_synchronize
llama_time_us
llama_token_bos
llama_token_cls
llama_token_eos
llama_token_eot
llama_token_fim_mid
llama_token_fim_pad
llama_token_fim_pre
llama_token_fim_rep
llama_token_fim_sep
llama_token_fim_suf
llama_token_get_attr
llama_token_get_score
llama_token_get_text
llama_token_is_control
llama_token_is_eog
llama_token_nl
llama_token_pad
llama_token_sep
llama_token_to_piece
llama_tokenize
@details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit) @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_bos
llama_vocab_cls
llama_vocab_eos
llama_vocab_eot
llama_vocab_fim_mid
llama_vocab_fim_pad
llama_vocab_fim_pre
llama_vocab_fim_rep
llama_vocab_fim_sep
llama_vocab_fim_suf
llama_vocab_get_add_bos
llama_vocab_get_add_eos
llama_vocab_get_add_sep
llama_vocab_get_attr
llama_vocab_get_score
llama_vocab_get_text
llama_vocab_is_control
llama_vocab_is_eog
llama_vocab_mask
llama_vocab_n_tokens
llama_vocab_nl
llama_vocab_pad
llama_vocab_sep
llama_vocab_type
mtmd_batch_add_chunk
mtmd_batch_encode
mtmd_batch_free
mtmd_batch_get_output_embd
mtmd_batch_init
mtmd_bitmap_free
mtmd_bitmap_get_data
mtmd_bitmap_get_id
mtmd_bitmap_get_n_bytes
mtmd_bitmap_get_nx
mtmd_bitmap_get_ny
mtmd_bitmap_init
mtmd_bitmap_init_from_audio
mtmd_bitmap_init_lazy
mtmd_bitmap_is_audio
mtmd_bitmap_set_id
mtmd_context_params_default
mtmd_decode_use_mrope
mtmd_decode_use_non_causal
mtmd_default_marker
mtmd_encode
mtmd_encode_chunk
mtmd_free
mtmd_get_audio_sample_rate
mtmd_get_cap_from_file
mtmd_get_marker
mtmd_get_output_embd
mtmd_helper_bitmap_init_from_buf
mtmd_helper_bitmap_init_from_file
mtmd_helper_decode_image_chunk
mtmd_helper_eval_chunk_single
mtmd_helper_eval_chunks
mtmd_helper_get_n_pos
mtmd_helper_get_n_tokens
mtmd_helper_image_get_decoder_pos
mtmd_helper_log_set
mtmd_helper_support_video
mtmd_image_tokens_get_decoder_pos
mtmd_image_tokens_get_id
mtmd_image_tokens_get_n_pos
mtmd_image_tokens_get_n_tokens
mtmd_image_tokens_get_nx
mtmd_image_tokens_get_ny
mtmd_init_from_file
mtmd_input_chunk_copy
mtmd_input_chunk_free
mtmd_input_chunk_get_id
mtmd_input_chunk_get_n_pos
mtmd_input_chunk_get_n_tokens
mtmd_input_chunk_get_tokens_image
mtmd_input_chunk_get_tokens_text
mtmd_input_chunk_get_type
mtmd_input_chunks_free
mtmd_input_chunks_get
mtmd_input_chunks_init
mtmd_input_chunks_size
mtmd_log_set
mtmd_support_audio
mtmd_support_vision
mtmd_test_create_input_chunks
mtmd_tokenize

Type Aliases§

FILE
_IO_lock_t
__off64_t
__off_t
ggml_abort_callback
ggml_abort_callback_t
ggml_backend_buffer_t
ggml_backend_buffer_type_t
ggml_backend_comm_allreduce_tensor_t
ggml_backend_comm_free_t
ggml_backend_comm_init_t
ggml_backend_dev_get_extra_bufts_t
ggml_backend_dev_t
ggml_backend_eval_callback
ggml_backend_event_t
ggml_backend_get_features_t
ggml_backend_graph_plan_t
ggml_backend_meta_get_split_state_t
ggml_backend_reg_t
ggml_backend_sched_eval_callback
ggml_backend_sched_t
ggml_backend_set_abort_callback_t
ggml_backend_set_n_threads_t
ggml_backend_split_buffer_type_t
ggml_backend_t
ggml_custom1_op_t
ggml_custom2_op_t
ggml_custom3_op_t
ggml_custom_op_t
ggml_fp16_t
ggml_from_float_t
ggml_gallocr_t
ggml_guid
ggml_guid_t
ggml_log_callback
ggml_opt_context_t
ggml_opt_dataset_t
ggml_opt_epoch_callback
ggml_opt_get_optimizer_params
ggml_opt_result_t
ggml_threadpool_t
ggml_to_float_t
ggml_vec_dot_t
gguf_reader_callback_t
llama_memory_t
llama_model_set_tensor_data_t
llama_opt_param_filter
llama_pos
llama_progress_callback
llama_sampler_context_t
llama_seq_id
llama_state_seq_flags
llama_token
mtmd_bitmap_lazy_callback

Unions§

llama_model_kv_override__bindgen_ty_1