Skip to main content

Crate llama_crab_sys

Crate llama_crab_sys

Expand description

Low-level FFI bindings to llama.cpp.

Generated at build time via bindgen over wrapper.h, which in turn includes the public C headers of llama.cpp, ggml and gguf.

This crate is unsafe by design: every public item is a thin extern "C" wrapper around a llama.cpp symbol. Use the safe llama-crab crate instead unless you need fine-grained control.

§Features

Feature	Description
`common`	Compile `libcommon.a` for chat templates and JSON schema helpers
`cuda`	NVIDIA CUDA backend
`cuda-no-vmm`	CUDA without Virtual Memory Management
`metal`	Apple Metal (default on macOS aarch64)
`vulkan`	Vulkan backend
`rocm`	AMD ROCm/HIP backend
`openmp`	OpenMP parallel CPU backend (default)
`dynamic-link`	Link against `libllama` as a shared object
`system-ggml`	Use GGML from the system instead of the bundled copy
`mtmd`	Multimodal (vision + audio) helpers
`llguidance`	`llguidance` sampler (custom C-ABI vtable)
`dynamic-backends`	Load GGML backends as shared objects at runtime

Structs§

Enums§

Constants§

Functions§

ggml_abort^⚠
ggml_abs^⚠
ggml_abs_inplace^⚠
ggml_acc^⚠
ggml_acc_inplace^⚠
ggml_add^⚠
ggml_add1^⚠
ggml_add1_inplace^⚠
ggml_add_cast^⚠
ggml_add_id^⚠
ggml_add_inplace^⚠
ggml_add_rel_pos^⚠
ggml_add_rel_pos_inplace^⚠
ggml_arange^⚠
ggml_are_same_shape^⚠
ggml_are_same_stride^⚠
ggml_argmax^⚠
ggml_argsort^⚠
ggml_argsort_top_k^⚠
ggml_backend_alloc_buffer^⚠
ggml_backend_alloc_ctx_tensors^⚠
ggml_backend_alloc_ctx_tensors_from_buft^⚠
ggml_backend_alloc_ctx_tensors_from_buft_size^⚠
ggml_backend_buffer_clear^⚠
ggml_backend_buffer_free^⚠
ggml_backend_buffer_get_alignment^⚠
ggml_backend_buffer_get_alloc_size^⚠
ggml_backend_buffer_get_base^⚠
ggml_backend_buffer_get_max_size^⚠
ggml_backend_buffer_get_size^⚠
ggml_backend_buffer_get_type^⚠
ggml_backend_buffer_get_usage^⚠
ggml_backend_buffer_init_tensor^⚠
ggml_backend_buffer_is_host^⚠
ggml_backend_buffer_name^⚠
ggml_backend_buffer_reset^⚠
ggml_backend_buffer_set_usage^⚠
ggml_backend_buft_alloc_buffer^⚠
ggml_backend_buft_get_alignment^⚠
ggml_backend_buft_get_alloc_size^⚠
ggml_backend_buft_get_device^⚠
ggml_backend_buft_get_max_size^⚠
ggml_backend_buft_is_host^⚠
ggml_backend_buft_name^⚠
ggml_backend_compare_graph_backend^⚠
ggml_backend_cpu_buffer_from_ptr^⚠
ggml_backend_cpu_buffer_type^⚠
ggml_backend_cpu_init^⚠
ggml_backend_cpu_reg^⚠
ggml_backend_cpu_set_abort_callback^⚠
ggml_backend_cpu_set_n_threads^⚠
ggml_backend_cpu_set_threadpool^⚠
ggml_backend_cpu_set_use_ref^⚠
ggml_backend_dev_backend_reg^⚠
ggml_backend_dev_buffer_from_host_ptr^⚠
ggml_backend_dev_buffer_type^⚠
ggml_backend_dev_by_name^⚠
ggml_backend_dev_by_type^⚠
ggml_backend_dev_count^⚠
ggml_backend_dev_description^⚠
ggml_backend_dev_get^⚠
ggml_backend_dev_get_props^⚠
ggml_backend_dev_host_buffer_type^⚠
ggml_backend_dev_init^⚠
ggml_backend_dev_memory^⚠
ggml_backend_dev_name^⚠
ggml_backend_dev_offload_op^⚠
ggml_backend_dev_supports_buft^⚠
ggml_backend_dev_supports_op^⚠
ggml_backend_dev_type^⚠
ggml_backend_device_register^⚠
ggml_backend_event_free^⚠
ggml_backend_event_new^⚠
ggml_backend_event_record^⚠
ggml_backend_event_synchronize^⚠
ggml_backend_event_wait^⚠
ggml_backend_free^⚠
ggml_backend_get_alignment^⚠
ggml_backend_get_default_buffer_type^⚠
ggml_backend_get_device^⚠
ggml_backend_get_max_size^⚠
ggml_backend_graph_compute^⚠
ggml_backend_graph_compute_async^⚠
ggml_backend_graph_copy^⚠
ggml_backend_graph_copy_free^⚠
ggml_backend_graph_plan_compute^⚠
ggml_backend_graph_plan_create^⚠
ggml_backend_graph_plan_free^⚠
ggml_backend_guid^⚠
ggml_backend_init_best^⚠
ggml_backend_init_by_name^⚠
ggml_backend_init_by_type^⚠
ggml_backend_is_cpu^⚠
ggml_backend_load^⚠
ggml_backend_load_all^⚠
ggml_backend_load_all_from_path^⚠
ggml_backend_meta_device^⚠
ggml_backend_meta_split_axis_name^⚠
ggml_backend_name^⚠
ggml_backend_offload_op^⚠
ggml_backend_reg_by_name^⚠
ggml_backend_reg_count^⚠
ggml_backend_reg_dev_count^⚠
ggml_backend_reg_dev_get^⚠
ggml_backend_reg_get^⚠
ggml_backend_reg_get_proc_address^⚠
ggml_backend_reg_name^⚠
ggml_backend_register^⚠
ggml_backend_sched_alloc_graph^⚠
ggml_backend_sched_free^⚠
ggml_backend_sched_get_backend^⚠
ggml_backend_sched_get_buffer_size^⚠
ggml_backend_sched_get_buffer_type^⚠
ggml_backend_sched_get_n_backends^⚠
ggml_backend_sched_get_n_copies^⚠
ggml_backend_sched_get_n_splits^⚠
ggml_backend_sched_get_tensor_backend^⚠
ggml_backend_sched_graph_compute^⚠
ggml_backend_sched_graph_compute_async^⚠
ggml_backend_sched_new^⚠
ggml_backend_sched_reserve^⚠
ggml_backend_sched_reserve_size^⚠
ggml_backend_sched_reset^⚠
ggml_backend_sched_set_eval_callback^⚠
ggml_backend_sched_set_tensor_backend^⚠
ggml_backend_sched_split_graph^⚠
ggml_backend_sched_synchronize^⚠
ggml_backend_supports_buft^⚠
ggml_backend_supports_op^⚠
ggml_backend_synchronize^⚠
ggml_backend_tensor_alloc^⚠
ggml_backend_tensor_copy^⚠
ggml_backend_tensor_copy_async^⚠
ggml_backend_tensor_get^⚠
ggml_backend_tensor_get_2d^⚠
ggml_backend_tensor_get_2d_async^⚠
ggml_backend_tensor_get_async^⚠
ggml_backend_tensor_memset^⚠
ggml_backend_tensor_set^⚠
ggml_backend_tensor_set_2d^⚠
ggml_backend_tensor_set_2d_async^⚠
ggml_backend_tensor_set_async^⚠
ggml_backend_unload^⚠
ggml_backend_view_init^⚠
ggml_bf16_to_fp32^⚠
ggml_bf16_to_fp32_row^⚠
ggml_blck_size^⚠
ggml_build_backward_expand^⚠
ggml_build_forward_expand^⚠
ggml_build_forward_select^⚠
ggml_can_repeat^⚠
ggml_cast^⚠
ggml_ceil^⚠
ggml_ceil_inplace^⚠
ggml_clamp^⚠
ggml_col2im_1d^⚠
ggml_commit^⚠
ggml_concat^⚠
ggml_cont^⚠
ggml_cont_1d^⚠
ggml_cont_2d^⚠
ggml_cont_3d^⚠
ggml_cont_4d^⚠
ggml_conv_1d^⚠
ggml_conv_1d_dw^⚠
ggml_conv_1d_dw_ph^⚠
ggml_conv_1d_ph^⚠
ggml_conv_2d^⚠
ggml_conv_2d_direct^⚠
ggml_conv_2d_dw^⚠
ggml_conv_2d_dw_direct^⚠
ggml_conv_2d_s1_ph^⚠
ggml_conv_2d_sk_p0^⚠
ggml_conv_3d^⚠
ggml_conv_3d_direct^⚠
ggml_conv_transpose_1d^⚠
ggml_conv_transpose_2d_p0^⚠
ggml_cos^⚠
ggml_cos_inplace^⚠
ggml_count_equal^⚠
ggml_cpu_bf16_to_fp32^⚠
ggml_cpu_fp16_to_fp32^⚠
ggml_cpu_fp32_to_bf16^⚠
ggml_cpu_fp32_to_fp16^⚠
ggml_cpu_fp32_to_fp32^⚠
ggml_cpu_fp32_to_i32^⚠
ggml_cpu_get_rvv_vlen^⚠
ggml_cpu_get_sve_cnt^⚠
ggml_cpu_has_amx_int8^⚠
ggml_cpu_has_arm_fma^⚠
ggml_cpu_has_avx^⚠
ggml_cpu_has_avx2^⚠
ggml_cpu_has_avx512^⚠
ggml_cpu_has_avx512_bf16^⚠
ggml_cpu_has_avx512_vbmi^⚠
ggml_cpu_has_avx512_vnni^⚠
ggml_cpu_has_avx_vnni^⚠
ggml_cpu_has_bmi2^⚠
ggml_cpu_has_dotprod^⚠
ggml_cpu_has_f16c^⚠
ggml_cpu_has_fma^⚠
ggml_cpu_has_fp16_va^⚠
ggml_cpu_has_llamafile^⚠
ggml_cpu_has_matmul_int8^⚠
ggml_cpu_has_neon^⚠
ggml_cpu_has_riscv_v^⚠
ggml_cpu_has_sme^⚠
ggml_cpu_has_sse3^⚠
ggml_cpu_has_ssse3^⚠
ggml_cpu_has_sve^⚠
ggml_cpu_has_vsx^⚠
ggml_cpu_has_vxe^⚠
ggml_cpu_has_wasm_simd^⚠
ggml_cpu_init^⚠
ggml_cpy^⚠
ggml_cross_entropy_loss^⚠
ggml_cross_entropy_loss_back^⚠
ggml_cumsum^⚠
ggml_custom_4d^⚠
ggml_custom_inplace^⚠
ggml_cycles^⚠
ggml_cycles_per_ms^⚠
ggml_diag^⚠
ggml_diag_mask_inf^⚠
ggml_diag_mask_inf_inplace^⚠
ggml_diag_mask_zero^⚠
ggml_diag_mask_zero_inplace^⚠
ggml_div^⚠
ggml_div_inplace^⚠
ggml_dup^⚠
ggml_dup_inplace^⚠
ggml_dup_tensor^⚠
ggml_element_size^⚠
ggml_elu^⚠
ggml_elu_inplace^⚠
ggml_exp^⚠
ggml_exp_inplace^⚠
ggml_expm1^⚠
ggml_expm1_inplace^⚠
ggml_fill^⚠
ggml_fill_inplace^⚠
ggml_flash_attn_back^⚠
ggml_flash_attn_ext^⚠
ggml_flash_attn_ext_add_sinks^⚠
ggml_flash_attn_ext_get_prec^⚠
ggml_flash_attn_ext_set_prec^⚠
ggml_floor^⚠
ggml_floor_inplace^⚠
ggml_fopen^⚠
ggml_format_name^⚠
ggml_fp16_to_fp32^⚠
ggml_fp16_to_fp32_row^⚠
ggml_fp32_to_bf16^⚠
ggml_fp32_to_bf16_row^⚠
ggml_fp32_to_bf16_row_ref^⚠
ggml_fp32_to_fp16^⚠
ggml_fp32_to_fp16_row^⚠
ggml_free^⚠
ggml_ftype_to_ggml_type^⚠
ggml_gallocr_alloc_graph^⚠
ggml_gallocr_free^⚠
ggml_gallocr_get_buffer_size^⚠
ggml_gallocr_new^⚠
ggml_gallocr_new_n^⚠
ggml_gallocr_reserve^⚠
ggml_gallocr_reserve_n^⚠
ggml_gallocr_reserve_n_size^⚠
ggml_gated_delta_net^⚠
ggml_gated_linear_attn^⚠
ggml_geglu^⚠
ggml_geglu_erf^⚠
ggml_geglu_erf_split^⚠
ggml_geglu_erf_swapped^⚠
ggml_geglu_quick^⚠
ggml_geglu_quick_split^⚠
ggml_geglu_quick_swapped^⚠
ggml_geglu_split^⚠
ggml_geglu_swapped^⚠
ggml_gelu^⚠
ggml_gelu_erf^⚠
ggml_gelu_erf_inplace^⚠
ggml_gelu_inplace^⚠
ggml_gelu_quick^⚠
ggml_gelu_quick_inplace^⚠
ggml_get_data^⚠
ggml_get_data_f32^⚠
ggml_get_f32_1d^⚠
ggml_get_f32_nd^⚠
ggml_get_first_tensor^⚠
ggml_get_glu_op^⚠
ggml_get_i32_1d^⚠
ggml_get_i32_nd^⚠
ggml_get_max_tensor_size^⚠
ggml_get_mem_buffer^⚠
ggml_get_mem_size^⚠
ggml_get_name^⚠
ggml_get_next_tensor^⚠
ggml_get_no_alloc^⚠
ggml_get_rel_pos^⚠
ggml_get_rows^⚠
ggml_get_rows_back^⚠
ggml_get_tensor^⚠
ggml_get_type_traits^⚠
ggml_get_type_traits_cpu^⚠
ggml_get_unary_op^⚠
ggml_glu^⚠
ggml_glu_op_name^⚠
ggml_glu_split^⚠
ggml_graph_add_node^⚠
ggml_graph_clear^⚠
ggml_graph_compute^⚠
ggml_graph_compute_with_ctx^⚠
ggml_graph_cpy^⚠
ggml_graph_dump_dot^⚠
ggml_graph_dup^⚠
ggml_graph_get_grad^⚠
ggml_graph_get_grad_acc^⚠
ggml_graph_get_tensor^⚠
ggml_graph_n_nodes^⚠
ggml_graph_node^⚠
ggml_graph_nodes^⚠
ggml_graph_overhead^⚠
ggml_graph_overhead_custom^⚠
ggml_graph_plan^⚠
ggml_graph_print^⚠
ggml_graph_reset^⚠
ggml_graph_size^⚠
ggml_group_norm^⚠
ggml_group_norm_inplace^⚠
ggml_guid_matches^⚠
ggml_hardsigmoid^⚠
ggml_hardswish^⚠
ggml_im2col^⚠
ggml_im2col_3d^⚠
ggml_im2col_back^⚠
ggml_init^⚠
ggml_interpolate^⚠
ggml_is_3d^⚠
ggml_is_contiguous^⚠
ggml_is_contiguous_0^⚠
ggml_is_contiguous_1^⚠
ggml_is_contiguous_2^⚠
ggml_is_contiguous_channels^⚠
ggml_is_contiguous_rows^⚠
ggml_is_contiguously_allocated^⚠
ggml_is_empty^⚠
ggml_is_matrix^⚠
ggml_is_numa^⚠
ggml_is_permuted^⚠
ggml_is_quantized^⚠
ggml_is_scalar^⚠
ggml_is_transposed^⚠
ggml_is_vector^⚠
ggml_is_view^⚠
ggml_l2_norm^⚠
ggml_l2_norm_inplace^⚠
ggml_leaky_relu^⚠
ggml_log^⚠
ggml_log_get^⚠
ggml_log_inplace^⚠
ggml_log_set^⚠
ggml_map_custom1^⚠
ggml_map_custom2^⚠
ggml_map_custom3^⚠
ggml_map_custom1_inplace^⚠
ggml_map_custom2_inplace^⚠
ggml_map_custom3_inplace^⚠
ggml_mean^⚠
ggml_mul^⚠
ggml_mul_inplace^⚠
ggml_mul_mat^⚠
ggml_mul_mat_id^⚠
ggml_mul_mat_set_hint^⚠
ggml_mul_mat_set_prec^⚠
ggml_n_dims^⚠
ggml_nbytes^⚠
ggml_nbytes_pad^⚠
ggml_neg^⚠
ggml_neg_inplace^⚠
ggml_nelements^⚠
ggml_new_buffer^⚠
ggml_new_f32^⚠
ggml_new_graph^⚠
ggml_new_graph_custom^⚠
ggml_new_i32^⚠
ggml_new_tensor^⚠
ggml_new_tensor_1d^⚠
ggml_new_tensor_2d^⚠
ggml_new_tensor_3d^⚠
ggml_new_tensor_4d^⚠
ggml_norm^⚠
ggml_norm_inplace^⚠
ggml_nrows^⚠
ggml_numa_init^⚠
ggml_op_desc^⚠
ggml_op_name^⚠
ggml_op_symbol^⚠
ggml_opt_alloc^⚠
ggml_opt_context_optimizer_type^⚠
ggml_opt_dataset_data^⚠
ggml_opt_dataset_free^⚠
ggml_opt_dataset_get_batch^⚠
ggml_opt_dataset_get_batch_host^⚠
ggml_opt_dataset_init^⚠
ggml_opt_dataset_labels^⚠
ggml_opt_dataset_ndata^⚠
ggml_opt_dataset_shuffle^⚠
ggml_opt_default_params^⚠
ggml_opt_epoch^⚠
ggml_opt_epoch_callback_progress_bar^⚠
ggml_opt_eval^⚠
ggml_opt_fit^⚠
ggml_opt_free^⚠
ggml_opt_get_constant_optimizer_params^⚠
ggml_opt_get_default_optimizer_params^⚠
ggml_opt_grad_acc^⚠
ggml_opt_init^⚠
ggml_opt_inputs^⚠
ggml_opt_labels^⚠
ggml_opt_loss^⚠
ggml_opt_ncorrect^⚠
ggml_opt_optimizer_name^⚠
ggml_opt_outputs^⚠
ggml_opt_pred^⚠
ggml_opt_prepare_alloc^⚠
ggml_opt_reset^⚠
ggml_opt_result_accuracy^⚠
ggml_opt_result_free^⚠
ggml_opt_result_init^⚠
ggml_opt_result_loss^⚠
ggml_opt_result_ndata^⚠
ggml_opt_result_pred^⚠
ggml_opt_result_reset^⚠
ggml_opt_static_graphs^⚠
ggml_opt_step_adamw^⚠
ggml_opt_step_sgd^⚠
ggml_out_prod^⚠
ggml_pad^⚠
ggml_pad_circular^⚠
ggml_pad_ext^⚠
ggml_pad_ext_circular^⚠
ggml_pad_reflect_1d^⚠
ggml_permute^⚠
ggml_pool_1d^⚠
ggml_pool_2d^⚠
ggml_pool_2d_back^⚠
ggml_print_object^⚠
ggml_print_objects^⚠
ggml_quantize_chunk^⚠
ggml_quantize_free^⚠
ggml_quantize_init^⚠
ggml_quantize_requires_imatrix^⚠
ggml_reglu^⚠
ggml_reglu_split^⚠
ggml_reglu_swapped^⚠
ggml_relu^⚠
ggml_relu_inplace^⚠
ggml_repeat^⚠
ggml_repeat_4d^⚠
ggml_repeat_back^⚠
ggml_reset^⚠
ggml_reshape^⚠
ggml_reshape_1d^⚠
ggml_reshape_2d^⚠
ggml_reshape_3d^⚠
ggml_reshape_4d^⚠
ggml_rms_norm^⚠
ggml_rms_norm_back^⚠
ggml_rms_norm_inplace^⚠
ggml_roll^⚠
ggml_rope^⚠
ggml_rope_custom^⚠
ggml_rope_custom_inplace^⚠
ggml_rope_ext^⚠
ggml_rope_ext_back^⚠
ggml_rope_ext_inplace^⚠
ggml_rope_inplace^⚠
ggml_rope_multi^⚠
ggml_rope_multi_back^⚠
ggml_rope_multi_inplace^⚠
ggml_rope_yarn_corr_dims^⚠
ggml_round^⚠
ggml_round_inplace^⚠
ggml_row_size^⚠
ggml_rwkv_wkv6^⚠
ggml_rwkv_wkv7^⚠
ggml_scale^⚠
ggml_scale_bias^⚠
ggml_scale_bias_inplace^⚠
ggml_scale_inplace^⚠
ggml_set^⚠
ggml_set_1d^⚠
ggml_set_1d_inplace^⚠
ggml_set_2d^⚠
ggml_set_2d_inplace^⚠
ggml_set_abort_callback^⚠
ggml_set_f32^⚠
ggml_set_f32_1d^⚠
ggml_set_f32_nd^⚠
ggml_set_i32^⚠
ggml_set_i32_1d^⚠
ggml_set_i32_nd^⚠
ggml_set_inplace^⚠
ggml_set_input^⚠
ggml_set_loss^⚠
ggml_set_name^⚠
ggml_set_no_alloc^⚠
ggml_set_output^⚠
ggml_set_param^⚠
ggml_set_rows^⚠
ggml_set_zero^⚠
ggml_sgn^⚠
ggml_sgn_inplace^⚠
ggml_sigmoid^⚠
ggml_sigmoid_inplace^⚠
ggml_silu^⚠
ggml_silu_back^⚠
ggml_silu_inplace^⚠
ggml_sin^⚠
ggml_sin_inplace^⚠
ggml_soft_max^⚠
ggml_soft_max_add_sinks^⚠
ggml_soft_max_ext^⚠
ggml_soft_max_ext_back^⚠
ggml_soft_max_ext_back_inplace^⚠
ggml_soft_max_ext_inplace^⚠
ggml_soft_max_inplace^⚠
ggml_softplus^⚠
ggml_softplus_inplace^⚠
ggml_solve_tri^⚠
ggml_sqr^⚠
ggml_sqr_inplace^⚠
ggml_sqrt^⚠
ggml_sqrt_inplace^⚠
ggml_ssm_conv^⚠
ggml_ssm_scan^⚠
ggml_status_to_string^⚠
ggml_step^⚠
ggml_step_inplace^⚠
ggml_sub^⚠
ggml_sub_inplace^⚠
ggml_sum^⚠
ggml_sum_rows^⚠
ggml_swiglu^⚠
ggml_swiglu_oai^⚠
ggml_swiglu_split^⚠
ggml_swiglu_swapped^⚠
ggml_tallocr_alloc^⚠
ggml_tallocr_new^⚠
ggml_tanh^⚠
ggml_tanh_inplace^⚠
ggml_tensor_overhead^⚠
ggml_threadpool_free^⚠
ggml_threadpool_get_n_threads^⚠
ggml_threadpool_new^⚠
ggml_threadpool_params_default^⚠
ggml_threadpool_params_init^⚠
ggml_threadpool_params_match^⚠
ggml_threadpool_pause^⚠
ggml_threadpool_resume^⚠
ggml_time_init^⚠
ggml_time_ms^⚠
ggml_time_us^⚠
ggml_timestep_embedding^⚠
ggml_top_k^⚠
ggml_transpose^⚠
ggml_tri^⚠
ggml_trunc^⚠: Truncates the fractional part of each element in the tensor (towards zero). For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0 Similar to std::trunc in C/C++.
ggml_trunc_inplace^⚠
ggml_type_name^⚠
ggml_type_size^⚠
ggml_type_sizef^⚠
ggml_unary^⚠
ggml_unary_inplace^⚠
ggml_unary_op_name^⚠
ggml_unravel_index^⚠
ggml_upscale^⚠
ggml_upscale_ext^⚠
ggml_used_mem^⚠
ggml_validate_row_data^⚠
ggml_version^⚠
ggml_view_1d^⚠
ggml_view_2d^⚠
ggml_view_3d^⚠
ggml_view_4d^⚠
ggml_view_tensor^⚠
ggml_win_part^⚠
ggml_win_unpart^⚠
ggml_xielu^⚠
gguf_add_tensor^⚠
gguf_find_key^⚠
gguf_find_tensor^⚠
gguf_free^⚠
gguf_get_alignment^⚠
gguf_get_arr_data^⚠
gguf_get_arr_n^⚠
gguf_get_arr_str^⚠
gguf_get_arr_type^⚠
gguf_get_data_offset^⚠
gguf_get_key^⚠
gguf_get_kv_type^⚠
gguf_get_meta_data^⚠
gguf_get_meta_size^⚠
gguf_get_n_kv^⚠
gguf_get_n_tensors^⚠
gguf_get_tensor_name^⚠
gguf_get_tensor_offset^⚠
gguf_get_tensor_size^⚠
gguf_get_tensor_type^⚠
gguf_get_val_bool^⚠
gguf_get_val_data^⚠
gguf_get_val_f32^⚠
gguf_get_val_f64^⚠
gguf_get_val_i8^⚠
gguf_get_val_i16^⚠
gguf_get_val_i32^⚠
gguf_get_val_i64^⚠
gguf_get_val_str^⚠
gguf_get_val_u8^⚠
gguf_get_val_u16^⚠
gguf_get_val_u32^⚠
gguf_get_val_u64^⚠
gguf_get_version^⚠
gguf_init_empty^⚠
gguf_init_from_buffer^⚠
gguf_init_from_callback^⚠
gguf_init_from_file^⚠
gguf_init_from_file_ptr^⚠
gguf_remove_key^⚠
gguf_set_arr_data^⚠
gguf_set_arr_str^⚠
gguf_set_kv^⚠
gguf_set_tensor_data^⚠
gguf_set_tensor_type^⚠
gguf_set_val_bool^⚠
gguf_set_val_f32^⚠
gguf_set_val_f64^⚠
gguf_set_val_i8^⚠
gguf_set_val_i16^⚠
gguf_set_val_i32^⚠
gguf_set_val_i64^⚠
gguf_set_val_str^⚠
gguf_set_val_u8^⚠
gguf_set_val_u16^⚠
gguf_set_val_u32^⚠
gguf_set_val_u64^⚠
gguf_type_name^⚠
gguf_write_to_file^⚠
gguf_write_to_file_ptr^⚠
llama_adapter_get_alora_invocation_tokens^⚠
llama_adapter_get_alora_n_invocation_tokens^⚠
llama_adapter_lora_free^⚠
llama_adapter_lora_init^⚠
llama_adapter_meta_count^⚠
llama_adapter_meta_key_by_index^⚠
llama_adapter_meta_val_str^⚠
llama_adapter_meta_val_str_by_index^⚠
llama_add_bos_token^⚠
llama_add_eos_token^⚠
llama_attach_threadpool^⚠
llama_backend_free^⚠
llama_backend_init^⚠
llama_batch_free^⚠
llama_batch_get_one^⚠
llama_batch_init^⚠
llama_chat_apply_template^⚠: Apply chat template. Inspired by hf apply_chat_template() on python.
llama_chat_builtin_templates^⚠
llama_context_default_params^⚠
llama_copy_state_data^⚠
llama_decode^⚠
llama_detach_threadpool^⚠
llama_detokenize^⚠: @details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
llama_encode^⚠
llama_flash_attn_type_name^⚠
llama_free^⚠
llama_free_model^⚠
llama_get_embeddings^⚠
llama_get_embeddings_ith^⚠
llama_get_embeddings_seq^⚠
llama_get_logits^⚠
llama_get_logits_ith^⚠
llama_get_memory^⚠
llama_get_model^⚠
llama_get_sampled_candidates_count_ith^⚠
llama_get_sampled_candidates_ith^⚠
llama_get_sampled_logits_count_ith^⚠
llama_get_sampled_logits_ith^⚠
llama_get_sampled_probs_count_ith^⚠
llama_get_sampled_probs_ith^⚠
llama_get_sampled_token_ith^⚠
llama_get_state_size^⚠
llama_init_from_model^⚠
llama_load_model_from_file^⚠
llama_load_session_file^⚠
llama_log_get^⚠
llama_log_set^⚠
llama_max_devices^⚠
llama_max_parallel_sequences^⚠
llama_max_tensor_buft_overrides^⚠
llama_memory_can_shift^⚠
llama_memory_clear^⚠
llama_memory_seq_add^⚠
llama_memory_seq_cp^⚠
llama_memory_seq_div^⚠
llama_memory_seq_keep^⚠
llama_memory_seq_pos_max^⚠
llama_memory_seq_pos_min^⚠
llama_memory_seq_rm^⚠
llama_model_chat_template^⚠
llama_model_cls_label^⚠
llama_model_decoder_start_token^⚠
llama_model_default_params^⚠
llama_model_desc^⚠
llama_model_free^⚠
llama_model_get_vocab^⚠
llama_model_has_decoder^⚠
llama_model_has_encoder^⚠
llama_model_init_from_user^⚠
llama_model_is_diffusion^⚠
llama_model_is_hybrid^⚠
llama_model_is_recurrent^⚠
llama_model_load_from_file^⚠
llama_model_load_from_file_ptr^⚠
llama_model_load_from_splits^⚠
llama_model_meta_count^⚠
llama_model_meta_key_by_index^⚠
llama_model_meta_key_str^⚠
llama_model_meta_val_str^⚠
llama_model_meta_val_str_by_index^⚠
llama_model_n_cls_out^⚠
llama_model_n_ctx_train^⚠
llama_model_n_embd^⚠
llama_model_n_embd_inp^⚠
llama_model_n_embd_out^⚠
llama_model_n_head^⚠
llama_model_n_head_kv^⚠
llama_model_n_layer^⚠
llama_model_n_params^⚠
llama_model_n_swa^⚠
llama_model_quantize^⚠
llama_model_quantize_default_params^⚠
llama_model_rope_freq_scale_train^⚠
llama_model_rope_type^⚠
llama_model_save_to_file^⚠
llama_model_size^⚠
llama_n_batch^⚠
llama_n_ctx^⚠
llama_n_ctx_seq^⚠
llama_n_ctx_train^⚠
llama_n_embd^⚠
llama_n_head^⚠
llama_n_layer^⚠
llama_n_rs_seq^⚠
llama_n_seq_max^⚠
llama_n_threads^⚠
llama_n_threads_batch^⚠
llama_n_ubatch^⚠
llama_n_vocab^⚠
llama_new_context_with_model^⚠
llama_numa_init^⚠
llama_opt_epoch^⚠
llama_opt_init^⚠
llama_opt_param_filter_all^⚠
llama_perf_context^⚠
llama_perf_context_print^⚠
llama_perf_context_reset^⚠
llama_perf_sampler^⚠
llama_perf_sampler_print^⚠
llama_perf_sampler_reset^⚠
llama_pooling_type^⚠
llama_print_system_info^⚠
llama_sampler_accept^⚠
llama_sampler_apply^⚠
llama_sampler_chain_add^⚠
llama_sampler_chain_default_params^⚠
llama_sampler_chain_get^⚠
llama_sampler_chain_init^⚠
llama_sampler_chain_n^⚠
llama_sampler_chain_remove^⚠
llama_sampler_clone^⚠
llama_sampler_free^⚠
llama_sampler_get_seed^⚠
llama_sampler_init^⚠
llama_sampler_init_adaptive_p^⚠: adaptive-p: select tokens near a configurable target probability over time.
llama_sampler_init_dist^⚠: seed == LLAMA_DEFAULT_SEED to use a random seed.
llama_sampler_init_dry^⚠: @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
llama_sampler_init_grammar^⚠: @details Initializes a GBNF grammar, see grammars/README.md for details. @param vocab The vocabulary that this grammar will be used with. @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails. @param grammar_root The name of the start symbol for the grammar.
llama_sampler_init_grammar_lazy^⚠
llama_sampler_init_grammar_lazy_patterns^⚠: @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639 @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group. @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
llama_sampler_init_greedy^⚠
llama_sampler_init_infill^⚠
llama_sampler_init_logit_bias^⚠
llama_sampler_init_min_p^⚠: @details Minimum P sampling as described in https://github.com/ggml-org/llama.cpp/pull/3841
llama_sampler_init_mirostat^⚠: @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_mirostat_v2^⚠: @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_penalties^⚠: NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_temp^⚠: #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf
llama_sampler_init_temp_ext^⚠: @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
llama_sampler_init_top_k^⚠: @details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751 Setting k <= 0 makes this a noop
llama_sampler_init_top_n_sigma^⚠: @details Top n sigma sampling as described in academic paper “Top-nσ: Not All Logits Are You Need” https://arxiv.org/pdf/2411.07641
llama_sampler_init_top_p^⚠: @details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_typical^⚠: @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_sampler_init_xtc^⚠: @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
llama_sampler_name^⚠
llama_sampler_reset^⚠
llama_sampler_sample^⚠
llama_save_session_file^⚠
llama_set_abort_callback^⚠
llama_set_adapter_cvec^⚠
llama_set_adapters_lora^⚠
llama_set_causal_attn^⚠
llama_set_embeddings^⚠
llama_set_n_threads^⚠
llama_set_sampler^⚠
llama_set_state_data^⚠
llama_set_warmup^⚠
llama_split_path^⚠: @details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf”
llama_split_prefix^⚠: @details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0”
llama_state_get_data^⚠
llama_state_get_size^⚠
llama_state_load_file^⚠
llama_state_save_file^⚠
llama_state_seq_get_data^⚠
llama_state_seq_get_data_ext^⚠
llama_state_seq_get_size^⚠
llama_state_seq_get_size_ext^⚠
llama_state_seq_load_file^⚠
llama_state_seq_save_file^⚠
llama_state_seq_set_data^⚠
llama_state_seq_set_data_ext^⚠
llama_state_set_data^⚠
llama_supports_gpu_offload^⚠
llama_supports_mlock^⚠
llama_supports_mmap^⚠
llama_supports_rpc^⚠
llama_synchronize^⚠
llama_time_us^⚠
llama_token_bos^⚠
llama_token_cls^⚠
llama_token_eos^⚠
llama_token_eot^⚠
llama_token_fim_mid^⚠
llama_token_fim_pad^⚠
llama_token_fim_pre^⚠
llama_token_fim_rep^⚠
llama_token_fim_sep^⚠
llama_token_fim_suf^⚠
llama_token_get_attr^⚠
llama_token_get_score^⚠
llama_token_get_text^⚠
llama_token_is_control^⚠
llama_token_is_eog^⚠
llama_token_nl^⚠
llama_token_pad^⚠
llama_token_sep^⚠
llama_token_to_piece^⚠
llama_tokenize^⚠: @details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit) @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_bos^⚠
llama_vocab_cls^⚠
llama_vocab_eos^⚠
llama_vocab_eot^⚠
llama_vocab_fim_mid^⚠
llama_vocab_fim_pad^⚠
llama_vocab_fim_pre^⚠
llama_vocab_fim_rep^⚠
llama_vocab_fim_sep^⚠
llama_vocab_fim_suf^⚠
llama_vocab_get_add_bos^⚠
llama_vocab_get_add_eos^⚠
llama_vocab_get_add_sep^⚠
llama_vocab_get_attr^⚠
llama_vocab_get_score^⚠
llama_vocab_get_text^⚠
llama_vocab_is_control^⚠
llama_vocab_is_eog^⚠
llama_vocab_mask^⚠
llama_vocab_n_tokens^⚠
llama_vocab_nl^⚠
llama_vocab_pad^⚠
llama_vocab_sep^⚠
llama_vocab_type^⚠
mtmd_batch_add_chunk^⚠
mtmd_batch_encode^⚠
mtmd_batch_free^⚠
mtmd_batch_get_output_embd^⚠
mtmd_batch_init^⚠
mtmd_bitmap_free^⚠
mtmd_bitmap_get_data^⚠
mtmd_bitmap_get_id^⚠
mtmd_bitmap_get_n_bytes^⚠
mtmd_bitmap_get_nx^⚠
mtmd_bitmap_get_ny^⚠
mtmd_bitmap_init^⚠
mtmd_bitmap_init_from_audio^⚠
mtmd_bitmap_init_lazy^⚠
mtmd_bitmap_is_audio^⚠
mtmd_bitmap_set_id^⚠
mtmd_context_params_default^⚠
mtmd_decode_use_mrope^⚠
mtmd_decode_use_non_causal^⚠
mtmd_default_marker^⚠
mtmd_encode^⚠
mtmd_encode_chunk^⚠
mtmd_free^⚠
mtmd_get_audio_sample_rate^⚠
mtmd_get_cap_from_file^⚠
mtmd_get_marker^⚠
mtmd_get_output_embd^⚠
mtmd_helper_bitmap_init_from_buf^⚠
mtmd_helper_bitmap_init_from_file^⚠
mtmd_helper_decode_image_chunk^⚠
mtmd_helper_eval_chunk_single^⚠
mtmd_helper_eval_chunks^⚠
mtmd_helper_get_n_pos^⚠
mtmd_helper_get_n_tokens^⚠
mtmd_helper_image_get_decoder_pos^⚠
mtmd_helper_log_set^⚠
mtmd_helper_support_video^⚠
mtmd_image_tokens_get_decoder_pos^⚠
mtmd_image_tokens_get_id^⚠
mtmd_image_tokens_get_n_pos^⚠
mtmd_image_tokens_get_n_tokens^⚠
mtmd_image_tokens_get_nx^⚠
mtmd_image_tokens_get_ny^⚠
mtmd_init_from_file^⚠
mtmd_input_chunk_copy^⚠
mtmd_input_chunk_free^⚠
mtmd_input_chunk_get_id^⚠
mtmd_input_chunk_get_n_pos^⚠
mtmd_input_chunk_get_n_tokens^⚠
mtmd_input_chunk_get_tokens_image^⚠
mtmd_input_chunk_get_tokens_text^⚠
mtmd_input_chunk_get_type^⚠
mtmd_input_chunks_free^⚠
mtmd_input_chunks_get^⚠
mtmd_input_chunks_init^⚠
mtmd_input_chunks_size^⚠
mtmd_log_set^⚠
mtmd_support_audio^⚠
mtmd_support_vision^⚠
mtmd_test_create_input_chunks^⚠
mtmd_tokenize^⚠

Type Aliases§

Unions§

llama_model_kv_override__bindgen_ty_1