Crate llama_cpp_sys_2

Source
Expand description

See llama-cpp-2 for a documented and safe API.

Structs§

_IO_FILE
_IO_codecvt
_IO_marker
_IO_wide_data
ggml_backend
ggml_backend_buffer
ggml_backend_buffer_type
ggml_backend_dev_caps
ggml_backend_dev_props
ggml_backend_device
ggml_backend_event
ggml_backend_feature
ggml_backend_graph_copy
ggml_backend_reg
ggml_backend_sched
ggml_bf16_t
ggml_cgraph
ggml_context
ggml_cplan
ggml_gallocr
ggml_init_params
ggml_object
ggml_opt_context
ggml_opt_dataset
ggml_opt_optimizer_params
ggml_opt_optimizer_params__bindgen_ty_1
ggml_opt_params
ggml_opt_result
ggml_tallocr
ggml_tensor
ggml_threadpool
ggml_threadpool_params
ggml_type_traits
ggml_type_traits_cpu
llama_adapter_lora
llama_batch
llama_chat_message
llama_context
llama_context_params
llama_kv_cache
llama_logit_bias
llama_memory_i
llama_model
llama_model_kv_override
llama_model_params
llama_model_quantize_params
llama_model_tensor_buft_override
llama_opt_params
llama_perf_context_data
llama_perf_sampler_data
llama_sampler
llama_sampler_chain_params
llama_sampler_i
llama_token_data
llama_token_data_array
llama_vocab

Constants§

GGML_BACKEND_BUFFER_USAGE_ANY
GGML_BACKEND_BUFFER_USAGE_COMPUTE
GGML_BACKEND_BUFFER_USAGE_WEIGHTS
GGML_BACKEND_DEVICE_TYPE_ACCEL
GGML_BACKEND_DEVICE_TYPE_CPU
GGML_BACKEND_DEVICE_TYPE_GPU
GGML_FTYPE_ALL_F32
GGML_FTYPE_MOSTLY_BF16
GGML_FTYPE_MOSTLY_F16
GGML_FTYPE_MOSTLY_IQ1_M
GGML_FTYPE_MOSTLY_IQ1_S
GGML_FTYPE_MOSTLY_IQ2_S
GGML_FTYPE_MOSTLY_IQ2_XS
GGML_FTYPE_MOSTLY_IQ2_XXS
GGML_FTYPE_MOSTLY_IQ3_S
GGML_FTYPE_MOSTLY_IQ3_XXS
GGML_FTYPE_MOSTLY_IQ4_NL
GGML_FTYPE_MOSTLY_IQ4_XS
GGML_FTYPE_MOSTLY_Q2_K
GGML_FTYPE_MOSTLY_Q3_K
GGML_FTYPE_MOSTLY_Q4_0
GGML_FTYPE_MOSTLY_Q4_1
GGML_FTYPE_MOSTLY_Q4_1_SOME_F16
GGML_FTYPE_MOSTLY_Q4_K
GGML_FTYPE_MOSTLY_Q5_0
GGML_FTYPE_MOSTLY_Q5_1
GGML_FTYPE_MOSTLY_Q5_K
GGML_FTYPE_MOSTLY_Q6_K
GGML_FTYPE_MOSTLY_Q8_0
GGML_FTYPE_UNKNOWN
GGML_LOG_LEVEL_CONT
GGML_LOG_LEVEL_DEBUG
GGML_LOG_LEVEL_ERROR
GGML_LOG_LEVEL_INFO
GGML_LOG_LEVEL_NONE
GGML_LOG_LEVEL_WARN
GGML_NUMA_STRATEGY_COUNT
GGML_NUMA_STRATEGY_DISABLED
GGML_NUMA_STRATEGY_DISTRIBUTE
GGML_NUMA_STRATEGY_ISOLATE
GGML_NUMA_STRATEGY_MIRROR
GGML_NUMA_STRATEGY_NUMACTL
GGML_OBJECT_TYPE_GRAPH
GGML_OBJECT_TYPE_TENSOR
GGML_OBJECT_TYPE_WORK_BUFFER
GGML_OPT_BUILD_TYPE_FORWARD
GGML_OPT_BUILD_TYPE_GRAD
GGML_OPT_BUILD_TYPE_OPT
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY
GGML_OPT_LOSS_TYPE_MEAN
GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR
GGML_OPT_LOSS_TYPE_SUM
GGML_OP_ACC
GGML_OP_ADD
GGML_OP_ADD1
GGML_OP_ADD_REL_POS
GGML_OP_ARANGE
GGML_OP_ARGMAX
GGML_OP_ARGSORT
GGML_OP_CLAMP
GGML_OP_CONCAT
GGML_OP_CONT
GGML_OP_CONV_2D_DW
GGML_OP_CONV_TRANSPOSE_1D
GGML_OP_CONV_TRANSPOSE_2D
GGML_OP_COS
GGML_OP_COUNT
GGML_OP_COUNT_EQUAL
GGML_OP_CPY
GGML_OP_CROSS_ENTROPY_LOSS
GGML_OP_CROSS_ENTROPY_LOSS_BACK
GGML_OP_CUSTOM
GGML_OP_DIAG
GGML_OP_DIAG_MASK_INF
GGML_OP_DIAG_MASK_ZERO
GGML_OP_DIV
GGML_OP_DUP
GGML_OP_FLASH_ATTN_BACK
GGML_OP_FLASH_ATTN_EXT
GGML_OP_GATED_LINEAR_ATTN
GGML_OP_GET_REL_POS
GGML_OP_GET_ROWS
GGML_OP_GET_ROWS_BACK
GGML_OP_GROUP_NORM
GGML_OP_IM2COL
GGML_OP_IM2COL_BACK
GGML_OP_L2_NORM
GGML_OP_LEAKY_RELU
GGML_OP_LOG
GGML_OP_MAP_CUSTOM1
GGML_OP_MAP_CUSTOM2
GGML_OP_MAP_CUSTOM3
GGML_OP_MEAN
GGML_OP_MUL
GGML_OP_MUL_MAT
GGML_OP_MUL_MAT_ID
GGML_OP_NONE
GGML_OP_NORM
GGML_OP_OPT_STEP_ADAMW
GGML_OP_OUT_PROD
GGML_OP_PAD
GGML_OP_PAD_REFLECT_1D
GGML_OP_PERMUTE
GGML_OP_POOL_1D
GGML_OP_POOL_2D
GGML_OP_POOL_2D_BACK
GGML_OP_POOL_AVG
GGML_OP_POOL_COUNT
GGML_OP_POOL_MAX
GGML_OP_REPEAT
GGML_OP_REPEAT_BACK
GGML_OP_RESHAPE
GGML_OP_RMS_NORM
GGML_OP_RMS_NORM_BACK
GGML_OP_ROPE
GGML_OP_ROPE_BACK
GGML_OP_RWKV_WKV6
GGML_OP_RWKV_WKV7
GGML_OP_SCALE
GGML_OP_SET
GGML_OP_SILU_BACK
GGML_OP_SIN
GGML_OP_SOFT_MAX
GGML_OP_SOFT_MAX_BACK
GGML_OP_SQR
GGML_OP_SQRT
GGML_OP_SSM_CONV
GGML_OP_SSM_SCAN
GGML_OP_SUB
GGML_OP_SUM
GGML_OP_SUM_ROWS
GGML_OP_TIMESTEP_EMBEDDING
GGML_OP_TRANSPOSE
GGML_OP_UNARY
GGML_OP_UPSCALE
GGML_OP_VIEW
GGML_OP_WIN_PART
GGML_OP_WIN_UNPART
GGML_PREC_DEFAULT
GGML_PREC_F32
GGML_SCALE_MODE_BILINEAR
GGML_SCALE_MODE_NEAREST
GGML_SCHED_PRIO_HIGH
GGML_SCHED_PRIO_LOW
GGML_SCHED_PRIO_MEDIUM
GGML_SCHED_PRIO_NORMAL
GGML_SCHED_PRIO_REALTIME
GGML_SORT_ORDER_ASC
GGML_SORT_ORDER_DESC
GGML_STATUS_ABORTED
GGML_STATUS_ALLOC_FAILED
GGML_STATUS_FAILED
GGML_STATUS_SUCCESS
GGML_TENSOR_FLAG_INPUT
GGML_TENSOR_FLAG_LOSS
GGML_TENSOR_FLAG_OUTPUT
GGML_TENSOR_FLAG_PARAM
GGML_TYPE_BF16
GGML_TYPE_COUNT
GGML_TYPE_F16
GGML_TYPE_F32
GGML_TYPE_F64
GGML_TYPE_I8
GGML_TYPE_I16
GGML_TYPE_I32
GGML_TYPE_I64
GGML_TYPE_IQ1_M
GGML_TYPE_IQ1_S
GGML_TYPE_IQ2_S
GGML_TYPE_IQ2_XS
GGML_TYPE_IQ2_XXS
GGML_TYPE_IQ3_S
GGML_TYPE_IQ3_XXS
GGML_TYPE_IQ4_NL
GGML_TYPE_IQ4_XS
GGML_TYPE_Q2_K
GGML_TYPE_Q3_K
GGML_TYPE_Q4_0
GGML_TYPE_Q4_1
GGML_TYPE_Q4_K
GGML_TYPE_Q5_0
GGML_TYPE_Q5_1
GGML_TYPE_Q5_K
GGML_TYPE_Q6_K
GGML_TYPE_Q8_0
GGML_TYPE_Q8_1
GGML_TYPE_Q8_K
GGML_TYPE_TQ1_0
GGML_TYPE_TQ2_0
GGML_UNARY_OP_ABS
GGML_UNARY_OP_COUNT
GGML_UNARY_OP_ELU
GGML_UNARY_OP_EXP
GGML_UNARY_OP_GELU
GGML_UNARY_OP_GELU_ERF
GGML_UNARY_OP_GELU_QUICK
GGML_UNARY_OP_HARDSIGMOID
GGML_UNARY_OP_HARDSWISH
GGML_UNARY_OP_NEG
GGML_UNARY_OP_RELU
GGML_UNARY_OP_SGN
GGML_UNARY_OP_SIGMOID
GGML_UNARY_OP_SILU
GGML_UNARY_OP_STEP
GGML_UNARY_OP_TANH
LLAMA_ATTENTION_TYPE_CAUSAL
LLAMA_ATTENTION_TYPE_NON_CAUSAL
LLAMA_ATTENTION_TYPE_UNSPECIFIED
LLAMA_FTYPE_ALL_F32
LLAMA_FTYPE_GUESSED
LLAMA_FTYPE_MOSTLY_BF16
LLAMA_FTYPE_MOSTLY_F16
LLAMA_FTYPE_MOSTLY_IQ1_M
LLAMA_FTYPE_MOSTLY_IQ1_S
LLAMA_FTYPE_MOSTLY_IQ2_M
LLAMA_FTYPE_MOSTLY_IQ2_S
LLAMA_FTYPE_MOSTLY_IQ2_XS
LLAMA_FTYPE_MOSTLY_IQ2_XXS
LLAMA_FTYPE_MOSTLY_IQ3_M
LLAMA_FTYPE_MOSTLY_IQ3_S
LLAMA_FTYPE_MOSTLY_IQ3_XS
LLAMA_FTYPE_MOSTLY_IQ3_XXS
LLAMA_FTYPE_MOSTLY_IQ4_NL
LLAMA_FTYPE_MOSTLY_IQ4_XS
LLAMA_FTYPE_MOSTLY_Q2_K
LLAMA_FTYPE_MOSTLY_Q2_K_S
LLAMA_FTYPE_MOSTLY_Q3_K_L
LLAMA_FTYPE_MOSTLY_Q3_K_M
LLAMA_FTYPE_MOSTLY_Q3_K_S
LLAMA_FTYPE_MOSTLY_Q4_0
LLAMA_FTYPE_MOSTLY_Q4_1
LLAMA_FTYPE_MOSTLY_Q4_K_M
LLAMA_FTYPE_MOSTLY_Q4_K_S
LLAMA_FTYPE_MOSTLY_Q5_0
LLAMA_FTYPE_MOSTLY_Q5_1
LLAMA_FTYPE_MOSTLY_Q5_K_M
LLAMA_FTYPE_MOSTLY_Q5_K_S
LLAMA_FTYPE_MOSTLY_Q6_K
LLAMA_FTYPE_MOSTLY_Q8_0
LLAMA_FTYPE_MOSTLY_TQ1_0
LLAMA_FTYPE_MOSTLY_TQ2_0
LLAMA_KV_OVERRIDE_TYPE_BOOL
LLAMA_KV_OVERRIDE_TYPE_FLOAT
LLAMA_KV_OVERRIDE_TYPE_INT
LLAMA_KV_OVERRIDE_TYPE_STR
LLAMA_POOLING_TYPE_CLS
LLAMA_POOLING_TYPE_LAST
LLAMA_POOLING_TYPE_MEAN
LLAMA_POOLING_TYPE_NONE
LLAMA_POOLING_TYPE_RANK
LLAMA_POOLING_TYPE_UNSPECIFIED
LLAMA_ROPE_SCALING_TYPE_LINEAR
LLAMA_ROPE_SCALING_TYPE_LONGROPE
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE
LLAMA_ROPE_SCALING_TYPE_NONE
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
LLAMA_ROPE_SCALING_TYPE_YARN
LLAMA_ROPE_TYPE_MROPE
LLAMA_ROPE_TYPE_NEOX
LLAMA_ROPE_TYPE_NONE
LLAMA_ROPE_TYPE_NORM
LLAMA_ROPE_TYPE_VISION
LLAMA_SPLIT_MODE_LAYER
LLAMA_SPLIT_MODE_NONE
LLAMA_SPLIT_MODE_ROW
LLAMA_TOKEN_ATTR_BYTE
LLAMA_TOKEN_ATTR_CONTROL
LLAMA_TOKEN_ATTR_LSTRIP
LLAMA_TOKEN_ATTR_NORMAL
LLAMA_TOKEN_ATTR_NORMALIZED
LLAMA_TOKEN_ATTR_RSTRIP
LLAMA_TOKEN_ATTR_SINGLE_WORD
LLAMA_TOKEN_ATTR_UNDEFINED
LLAMA_TOKEN_ATTR_UNKNOWN
LLAMA_TOKEN_ATTR_UNUSED
LLAMA_TOKEN_ATTR_USER_DEFINED
LLAMA_TOKEN_TYPE_BYTE
LLAMA_TOKEN_TYPE_CONTROL
LLAMA_TOKEN_TYPE_NORMAL
LLAMA_TOKEN_TYPE_UNDEFINED
LLAMA_TOKEN_TYPE_UNKNOWN
LLAMA_TOKEN_TYPE_UNUSED
LLAMA_TOKEN_TYPE_USER_DEFINED
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE
LLAMA_VOCAB_PRE_TYPE_BLOOM
LLAMA_VOCAB_PRE_TYPE_CHAMELEON
LLAMA_VOCAB_PRE_TYPE_CHATGLM3
LLAMA_VOCAB_PRE_TYPE_CHATGLM4
LLAMA_VOCAB_PRE_TYPE_CODESHELL
LLAMA_VOCAB_PRE_TYPE_COMMAND_R
LLAMA_VOCAB_PRE_TYPE_DBRX
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM
LLAMA_VOCAB_PRE_TYPE_DEFAULT
LLAMA_VOCAB_PRE_TYPE_EXAONE
LLAMA_VOCAB_PRE_TYPE_FALCON
LLAMA_VOCAB_PRE_TYPE_GPT2
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH
LLAMA_VOCAB_PRE_TYPE_GPT4O
LLAMA_VOCAB_PRE_TYPE_JAIS
LLAMA_VOCAB_PRE_TYPE_LLAMA3
LLAMA_VOCAB_PRE_TYPE_LLAMA4
LLAMA_VOCAB_PRE_TYPE_MINERVA
LLAMA_VOCAB_PRE_TYPE_MPT
LLAMA_VOCAB_PRE_TYPE_OLMO
LLAMA_VOCAB_PRE_TYPE_PIXTRAL
LLAMA_VOCAB_PRE_TYPE_PORO
LLAMA_VOCAB_PRE_TYPE_QWEN2
LLAMA_VOCAB_PRE_TYPE_REFACT
LLAMA_VOCAB_PRE_TYPE_SEED_CODER
LLAMA_VOCAB_PRE_TYPE_SMAUG
LLAMA_VOCAB_PRE_TYPE_SMOLLM
LLAMA_VOCAB_PRE_TYPE_STABLELM2
LLAMA_VOCAB_PRE_TYPE_STARCODER
LLAMA_VOCAB_PRE_TYPE_SUPERBPE
LLAMA_VOCAB_PRE_TYPE_TEKKEN
LLAMA_VOCAB_PRE_TYPE_TRILLION
LLAMA_VOCAB_PRE_TYPE_VIKING
LLAMA_VOCAB_TYPE_BPE
LLAMA_VOCAB_TYPE_NONE
LLAMA_VOCAB_TYPE_RWKV
LLAMA_VOCAB_TYPE_SPM
LLAMA_VOCAB_TYPE_UGM
LLAMA_VOCAB_TYPE_WPM

Functions§

ggml_abort
ggml_abs
ggml_abs_inplace
ggml_acc
ggml_acc_inplace
ggml_add
ggml_add1
ggml_add1_inplace
ggml_add_cast
ggml_add_inplace
ggml_add_rel_pos
ggml_add_rel_pos_inplace
ggml_arange
ggml_are_same_shape
ggml_are_same_stride
ggml_argmax
ggml_argsort
ggml_backend_alloc_buffer
ggml_backend_alloc_ctx_tensors
ggml_backend_alloc_ctx_tensors_from_buft
ggml_backend_buffer_clear
ggml_backend_buffer_free
ggml_backend_buffer_get_alignment
ggml_backend_buffer_get_alloc_size
ggml_backend_buffer_get_base
ggml_backend_buffer_get_max_size
ggml_backend_buffer_get_size
ggml_backend_buffer_get_type
ggml_backend_buffer_get_usage
ggml_backend_buffer_init_tensor
ggml_backend_buffer_is_host
ggml_backend_buffer_name
ggml_backend_buffer_reset
ggml_backend_buffer_set_usage
ggml_backend_buft_alloc_buffer
ggml_backend_buft_get_alignment
ggml_backend_buft_get_alloc_size
ggml_backend_buft_get_device
ggml_backend_buft_get_max_size
ggml_backend_buft_is_host
ggml_backend_buft_name
ggml_backend_compare_graph_backend
ggml_backend_cpu_buffer_from_ptr
ggml_backend_cpu_buffer_type
ggml_backend_cpu_init
ggml_backend_cpu_reg
ggml_backend_cpu_set_abort_callback
ggml_backend_cpu_set_n_threads
ggml_backend_cpu_set_threadpool
ggml_backend_dev_backend_reg
ggml_backend_dev_buffer_from_host_ptr
ggml_backend_dev_buffer_type
ggml_backend_dev_by_name
ggml_backend_dev_by_type
ggml_backend_dev_count
ggml_backend_dev_description
ggml_backend_dev_get
ggml_backend_dev_get_props
ggml_backend_dev_host_buffer_type
ggml_backend_dev_init
ggml_backend_dev_memory
ggml_backend_dev_name
ggml_backend_dev_offload_op
ggml_backend_dev_supports_buft
ggml_backend_dev_supports_op
ggml_backend_dev_type
ggml_backend_device_register
ggml_backend_event_free
ggml_backend_event_new
ggml_backend_event_record
ggml_backend_event_synchronize
ggml_backend_event_wait
ggml_backend_free
ggml_backend_get_alignment
ggml_backend_get_default_buffer_type
ggml_backend_get_device
ggml_backend_get_max_size
ggml_backend_graph_compute
ggml_backend_graph_compute_async
ggml_backend_graph_copy
ggml_backend_graph_copy_free
ggml_backend_graph_plan_compute
ggml_backend_graph_plan_create
ggml_backend_graph_plan_free
ggml_backend_guid
ggml_backend_init_best
ggml_backend_init_by_name
ggml_backend_init_by_type
ggml_backend_is_cpu
ggml_backend_load
ggml_backend_load_all
ggml_backend_load_all_from_path
ggml_backend_name
ggml_backend_offload_op
ggml_backend_reg_by_name
ggml_backend_reg_count
ggml_backend_reg_dev_count
ggml_backend_reg_dev_get
ggml_backend_reg_get
ggml_backend_reg_get_proc_address
ggml_backend_reg_name
ggml_backend_sched_alloc_graph
ggml_backend_sched_free
ggml_backend_sched_get_backend
ggml_backend_sched_get_buffer_size
ggml_backend_sched_get_n_backends
ggml_backend_sched_get_n_copies
ggml_backend_sched_get_n_splits
ggml_backend_sched_get_tensor_backend
ggml_backend_sched_graph_compute
ggml_backend_sched_graph_compute_async
ggml_backend_sched_new
ggml_backend_sched_reserve
ggml_backend_sched_reset
ggml_backend_sched_set_eval_callback
ggml_backend_sched_set_tensor_backend
ggml_backend_sched_synchronize
ggml_backend_supports_buft
ggml_backend_supports_op
ggml_backend_synchronize
ggml_backend_tensor_alloc
ggml_backend_tensor_copy
ggml_backend_tensor_copy_async
ggml_backend_tensor_get
ggml_backend_tensor_get_async
ggml_backend_tensor_memset
ggml_backend_tensor_set
ggml_backend_tensor_set_async
ggml_backend_unload
ggml_backend_view_init
ggml_bf16_to_fp32
ggml_bf16_to_fp32_row
ggml_blck_size
ggml_build_backward_expand
ggml_build_forward_expand
ggml_can_repeat
ggml_cast
ggml_clamp
ggml_concat
ggml_cont
ggml_cont_1d
ggml_cont_2d
ggml_cont_3d
ggml_cont_4d
ggml_conv_1d
ggml_conv_1d_dw
ggml_conv_1d_dw_ph
ggml_conv_1d_ph
ggml_conv_2d
ggml_conv_2d_dw
ggml_conv_2d_dw_direct
ggml_conv_2d_s1_ph
ggml_conv_2d_sk_p0
ggml_conv_transpose_1d
ggml_conv_transpose_2d_p0
ggml_cos
ggml_cos_inplace
ggml_count_equal
ggml_cpu_bf16_to_fp32
ggml_cpu_fp16_to_fp32
ggml_cpu_fp32_to_bf16
ggml_cpu_fp32_to_fp16
ggml_cpu_get_sve_cnt
ggml_cpu_has_amx_int8
ggml_cpu_has_arm_fma
ggml_cpu_has_avx
ggml_cpu_has_avx2
ggml_cpu_has_avx512
ggml_cpu_has_avx512_bf16
ggml_cpu_has_avx512_vbmi
ggml_cpu_has_avx512_vnni
ggml_cpu_has_avx_vnni
ggml_cpu_has_bmi2
ggml_cpu_has_dotprod
ggml_cpu_has_f16c
ggml_cpu_has_fma
ggml_cpu_has_fp16_va
ggml_cpu_has_llamafile
ggml_cpu_has_matmul_int8
ggml_cpu_has_neon
ggml_cpu_has_riscv_v
ggml_cpu_has_sme
ggml_cpu_has_sse3
ggml_cpu_has_ssse3
ggml_cpu_has_sve
ggml_cpu_has_vsx
ggml_cpu_has_vxe
ggml_cpu_has_wasm_simd
ggml_cpu_init
ggml_cpy
ggml_cross_entropy_loss
ggml_cross_entropy_loss_back
ggml_custom_4d
ggml_custom_inplace
ggml_cycles
ggml_cycles_per_ms
ggml_diag
ggml_diag_mask_inf
ggml_diag_mask_inf_inplace
ggml_diag_mask_zero
ggml_diag_mask_zero_inplace
ggml_div
ggml_div_inplace
ggml_dup
ggml_dup_inplace
ggml_dup_tensor
ggml_element_size
ggml_elu
ggml_elu_inplace
ggml_exp
ggml_exp_inplace
ggml_flash_attn_back
ggml_flash_attn_ext
ggml_flash_attn_ext_get_prec
ggml_flash_attn_ext_set_prec
ggml_fopen
ggml_format_name
ggml_fp16_to_fp32
ggml_fp16_to_fp32_row
ggml_fp32_to_bf16
ggml_fp32_to_bf16_row
ggml_fp32_to_bf16_row_ref
ggml_fp32_to_fp16
ggml_fp32_to_fp16_row
ggml_free
ggml_ftype_to_ggml_type
ggml_gallocr_alloc_graph
ggml_gallocr_free
ggml_gallocr_get_buffer_size
ggml_gallocr_new
ggml_gallocr_new_n
ggml_gallocr_reserve
ggml_gallocr_reserve_n
ggml_gated_linear_attn
ggml_gelu
ggml_gelu_erf
ggml_gelu_erf_inplace
ggml_gelu_inplace
ggml_gelu_quick
ggml_gelu_quick_inplace
ggml_get_data
ggml_get_data_f32
ggml_get_f32_1d
ggml_get_f32_nd
ggml_get_first_tensor
ggml_get_i32_1d
ggml_get_i32_nd
ggml_get_max_tensor_size
ggml_get_mem_buffer
ggml_get_mem_size
ggml_get_name
ggml_get_next_tensor
ggml_get_no_alloc
ggml_get_rel_pos
ggml_get_rows
ggml_get_rows_back
ggml_get_tensor
ggml_get_type_traits
ggml_get_type_traits_cpu
ggml_get_unary_op
ggml_graph_add_node
ggml_graph_clear
ggml_graph_compute
ggml_graph_compute_with_ctx
ggml_graph_cpy
ggml_graph_dump_dot
ggml_graph_dup
ggml_graph_get_grad
ggml_graph_get_grad_acc
ggml_graph_get_tensor
ggml_graph_n_nodes
ggml_graph_node
ggml_graph_nodes
ggml_graph_overhead
ggml_graph_overhead_custom
ggml_graph_plan
ggml_graph_print
ggml_graph_reset
ggml_graph_size
ggml_group_norm
ggml_group_norm_inplace
ggml_guid_matches
ggml_hardsigmoid
ggml_hardswish
ggml_im2col
ggml_im2col_back
ggml_init
ggml_is_3d
ggml_is_contiguous
ggml_is_contiguous_0
ggml_is_contiguous_1
ggml_is_contiguous_2
ggml_is_contiguous_channels
ggml_is_contiguously_allocated
ggml_is_empty
ggml_is_matrix
ggml_is_numa
ggml_is_permuted
ggml_is_quantized
ggml_is_scalar
ggml_is_transposed
ggml_is_vector
ggml_l2_norm
ggml_l2_norm_inplace
ggml_leaky_relu
ggml_log
ggml_log_inplace
ggml_log_set
ggml_map_custom1
ggml_map_custom2
ggml_map_custom3
ggml_map_custom1_inplace
ggml_map_custom2_inplace
ggml_map_custom3_inplace
ggml_mean
ggml_mul
ggml_mul_inplace
ggml_mul_mat
ggml_mul_mat_id
ggml_mul_mat_set_prec
ggml_n_dims
ggml_nbytes
ggml_nbytes_pad
ggml_neg
ggml_neg_inplace
ggml_nelements
ggml_new_buffer
ggml_new_f32
ggml_new_graph
ggml_new_graph_custom
ggml_new_i32
ggml_new_tensor
ggml_new_tensor_1d
ggml_new_tensor_2d
ggml_new_tensor_3d
ggml_new_tensor_4d
ggml_norm
ggml_norm_inplace
ggml_nrows
ggml_numa_init
ggml_op_desc
ggml_op_name
ggml_op_symbol
ggml_opt_alloc
ggml_opt_dataset_data
ggml_opt_dataset_free
ggml_opt_dataset_get_batch
ggml_opt_dataset_get_batch_host
ggml_opt_dataset_init
ggml_opt_dataset_labels
ggml_opt_dataset_ndata
ggml_opt_dataset_shuffle
ggml_opt_default_params
ggml_opt_epoch
ggml_opt_epoch_callback_progress_bar
ggml_opt_eval
ggml_opt_fit
ggml_opt_free
ggml_opt_get_constant_optimizer_params
ggml_opt_get_default_optimizer_params
ggml_opt_grad_acc
ggml_opt_init
ggml_opt_inputs
ggml_opt_labels
ggml_opt_loss
ggml_opt_ncorrect
ggml_opt_outputs
ggml_opt_pred
ggml_opt_prepare_alloc
ggml_opt_reset
ggml_opt_result_accuracy
ggml_opt_result_free
ggml_opt_result_init
ggml_opt_result_loss
ggml_opt_result_ndata
ggml_opt_result_pred
ggml_opt_result_reset
ggml_opt_static_graphs
ggml_opt_step_adamw
ggml_out_prod
ggml_pad
ggml_pad_reflect_1d
ggml_permute
ggml_pool_1d
ggml_pool_2d
ggml_pool_2d_back
ggml_print_object
ggml_print_objects
ggml_quantize_chunk
ggml_quantize_free
ggml_quantize_init
ggml_quantize_requires_imatrix
ggml_relu
ggml_relu_inplace
ggml_repeat
ggml_repeat_4d
ggml_repeat_back
ggml_reset
ggml_reshape
ggml_reshape_1d
ggml_reshape_2d
ggml_reshape_3d
ggml_reshape_4d
ggml_rms_norm
ggml_rms_norm_back
ggml_rms_norm_inplace
ggml_rope
ggml_rope_custom
ggml_rope_custom_inplace
ggml_rope_ext
ggml_rope_ext_back
ggml_rope_ext_inplace
ggml_rope_inplace
ggml_rope_multi
ggml_rope_multi_back
ggml_rope_yarn_corr_dims
ggml_row_size
ggml_rwkv_wkv6
ggml_rwkv_wkv7
ggml_scale
ggml_scale_inplace
ggml_set
ggml_set_1d
ggml_set_1d_inplace
ggml_set_2d
ggml_set_2d_inplace
ggml_set_f32
ggml_set_f32_1d
ggml_set_f32_nd
ggml_set_i32
ggml_set_i32_1d
ggml_set_i32_nd
ggml_set_inplace
ggml_set_input
ggml_set_loss
ggml_set_name
ggml_set_no_alloc
ggml_set_output
ggml_set_param
ggml_set_zero
ggml_sgn
ggml_sgn_inplace
ggml_sigmoid
ggml_sigmoid_inplace
ggml_silu
ggml_silu_back
ggml_silu_inplace
ggml_sin
ggml_sin_inplace
ggml_soft_max
ggml_soft_max_ext
ggml_soft_max_ext_back
ggml_soft_max_ext_back_inplace
ggml_soft_max_inplace
ggml_sqr
ggml_sqr_inplace
ggml_sqrt
ggml_sqrt_inplace
ggml_ssm_conv
ggml_ssm_scan
ggml_status_to_string
ggml_step
ggml_step_inplace
ggml_sub
ggml_sub_inplace
ggml_sum
ggml_sum_rows
ggml_tallocr_alloc
ggml_tallocr_new
ggml_tanh
ggml_tanh_inplace
ggml_tensor_overhead
ggml_threadpool_free
ggml_threadpool_get_n_threads
ggml_threadpool_new
ggml_threadpool_params_default
ggml_threadpool_params_init
ggml_threadpool_params_match
ggml_threadpool_pause
ggml_threadpool_resume
ggml_time_init
ggml_time_ms
ggml_time_us
ggml_timestep_embedding
ggml_top_k
ggml_transpose
ggml_type_name
ggml_type_size
ggml_type_sizef
ggml_unary
ggml_unary_inplace
ggml_unary_op_name
ggml_unravel_index
ggml_upscale
ggml_upscale_ext
ggml_used_mem
ggml_validate_row_data
ggml_view_1d
ggml_view_2d
ggml_view_3d
ggml_view_4d
ggml_view_tensor
ggml_win_part
ggml_win_unpart
llama_adapter_lora_free
llama_adapter_lora_init
llama_add_bos_token
llama_add_eos_token
llama_apply_adapter_cvec
llama_attach_threadpool
llama_backend_free
llama_backend_init
llama_batch_free
llama_batch_get_one
llama_batch_init
llama_chat_apply_template
Apply chat template. Inspired by hf apply_chat_template() on python. Both “model” and “custom_template” are optional, but at least one is required. “custom_template” has higher precedence than “model” NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. @param chat Pointer to a list of multiple llama_chat_message @param n_msg Number of llama_chat_message in this chat @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) @param length The size of the allocated buffer @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
llama_chat_builtin_templates
llama_clear_adapter_lora
llama_context_default_params
llama_copy_state_data
llama_decode
llama_detach_threadpool
llama_detokenize
@details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
llama_encode
llama_free
llama_free_model
llama_get_embeddings
llama_get_embeddings_ith
llama_get_embeddings_seq
llama_get_kv_self
llama_get_logits
llama_get_logits_ith
llama_get_memory
llama_get_model
llama_get_state_size
llama_init_from_model
llama_kv_self_can_shift
llama_kv_self_clear
llama_kv_self_defrag
llama_kv_self_n_tokens
llama_kv_self_seq_add
llama_kv_self_seq_cp
llama_kv_self_seq_div
llama_kv_self_seq_keep
llama_kv_self_seq_pos_max
llama_kv_self_seq_pos_min
llama_kv_self_seq_rm
llama_kv_self_update
llama_kv_self_used_cells
llama_load_model_from_file
llama_load_session_file
llama_log_set
llama_max_devices
llama_max_parallel_sequences
llama_memory_can_shift
llama_memory_clear
llama_memory_seq_add
llama_memory_seq_cp
llama_memory_seq_div
llama_memory_seq_keep
llama_memory_seq_pos_max
llama_memory_seq_pos_min
llama_memory_seq_rm
llama_model_chat_template
llama_model_cls_label
llama_model_decoder_start_token
llama_model_default_params
llama_model_desc
llama_model_free
llama_model_get_vocab
llama_model_has_decoder
llama_model_has_encoder
llama_model_is_recurrent
llama_model_load_from_file
llama_model_load_from_splits
llama_model_meta_count
llama_model_meta_key_by_index
llama_model_meta_val_str
llama_model_meta_val_str_by_index
llama_model_n_cls_out
llama_model_n_ctx_train
llama_model_n_embd
llama_model_n_head
llama_model_n_head_kv
llama_model_n_layer
llama_model_n_params
llama_model_n_swa
llama_model_quantize
llama_model_quantize_default_params
llama_model_rope_freq_scale_train
llama_model_rope_type
llama_model_save_to_file
llama_model_size
llama_n_batch
llama_n_ctx
llama_n_ctx_train
llama_n_embd
llama_n_head
llama_n_layer
llama_n_seq_max
llama_n_threads
llama_n_threads_batch
llama_n_ubatch
llama_n_vocab
llama_new_context_with_model
llama_numa_init
llama_opt_epoch
llama_opt_init
llama_opt_param_filter_all
llama_perf_context
llama_perf_context_print
llama_perf_context_reset
llama_perf_sampler
llama_perf_sampler_print
llama_perf_sampler_reset
llama_pooling_type
llama_print_system_info
llama_rm_adapter_lora
llama_sampler_accept
llama_sampler_apply
llama_sampler_chain_add
llama_sampler_chain_default_params
llama_sampler_chain_get
llama_sampler_chain_init
llama_sampler_chain_n
llama_sampler_chain_remove
llama_sampler_clone
llama_sampler_free
llama_sampler_get_seed
llama_sampler_init
llama_sampler_init_dist
llama_sampler_init_dry
@details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
llama_sampler_init_grammar
@details Intializes a GBNF grammar, see grammars/README.md for details. @param vocab The vocabulary that this grammar will be used with. @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails. @param grammar_root The name of the start symbol for the grammar.
llama_sampler_init_grammar_lazy
llama_sampler_init_grammar_lazy_patterns
@details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639 @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group. @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
llama_sampler_init_greedy
llama_sampler_init_infill
llama_sampler_init_logit_bias
llama_sampler_init_min_p
@details Minimum P sampling as described in https://github.com/ggml-org/llama.cpp/pull/3841
llama_sampler_init_mirostat
@details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_mirostat_v2
@details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_penalties
NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_softmax
@details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits. NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_temp
#details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf
llama_sampler_init_temp_ext
@details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
llama_sampler_init_top_k
@details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751 Setting k <= 0 makes this a noop
llama_sampler_init_top_n_sigma
@details Top n sigma sampling as described in academic paper “Top-nσ: Not All Logits Are You Need” https://arxiv.org/pdf/2411.07641
llama_sampler_init_top_p
@details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_typical
@details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_sampler_init_xtc
@details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
llama_sampler_name
llama_sampler_reset
llama_sampler_sample
llama_save_session_file
llama_set_abort_callback
llama_set_adapter_lora
llama_set_causal_attn
llama_set_embeddings
llama_set_n_threads
llama_set_state_data
llama_set_warmup
llama_split_path
@details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf”
llama_split_prefix
@details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0”
llama_state_get_data
llama_state_get_size
llama_state_load_file
llama_state_save_file
llama_state_seq_get_data
llama_state_seq_get_size
llama_state_seq_load_file
llama_state_seq_save_file
llama_state_seq_set_data
llama_state_set_data
llama_supports_gpu_offload
llama_supports_mlock
llama_supports_mmap
llama_supports_rpc
llama_synchronize
llama_time_us
llama_token_bos
llama_token_cls
llama_token_eos
llama_token_eot
llama_token_fim_mid
llama_token_fim_pad
llama_token_fim_pre
llama_token_fim_rep
llama_token_fim_sep
llama_token_fim_suf
llama_token_get_attr
llama_token_get_score
llama_token_get_text
llama_token_is_control
llama_token_is_eog
llama_token_nl
llama_token_pad
llama_token_sep
llama_token_to_piece
llama_tokenize
@details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_bos
llama_vocab_cls
llama_vocab_eos
llama_vocab_eot
llama_vocab_fim_mid
llama_vocab_fim_pad
llama_vocab_fim_pre
llama_vocab_fim_rep
llama_vocab_fim_sep
llama_vocab_fim_suf
llama_vocab_get_add_bos
llama_vocab_get_add_eos
llama_vocab_get_attr
llama_vocab_get_score
llama_vocab_get_text
llama_vocab_is_control
llama_vocab_is_eog
llama_vocab_n_tokens
llama_vocab_nl
llama_vocab_pad
llama_vocab_sep
llama_vocab_type

Type Aliases§

FILE
_IO_lock_t
__off64_t
__off_t
ggml_abort_callback
ggml_backend_buffer_t
ggml_backend_buffer_type_t
ggml_backend_buffer_usage
ggml_backend_dev_get_extra_bufts_t
ggml_backend_dev_t
ggml_backend_dev_type
ggml_backend_eval_callback
ggml_backend_event_t
ggml_backend_get_features_t
ggml_backend_graph_plan_t
ggml_backend_reg_t
ggml_backend_sched_eval_callback
ggml_backend_sched_t
ggml_backend_set_abort_callback_t
ggml_backend_set_n_threads_t
ggml_backend_split_buffer_type_t
ggml_backend_t
ggml_custom1_op_t
ggml_custom2_op_t
ggml_custom3_op_t
ggml_custom_op_t
ggml_fp16_t
ggml_from_float_t
ggml_ftype
ggml_gallocr_t
ggml_guid
ggml_guid_t
ggml_log_callback
ggml_log_level
ggml_numa_strategy
ggml_object_type
ggml_op
ggml_op_pool
ggml_opt_build_type
ggml_opt_context_t
ggml_opt_dataset_t
ggml_opt_epoch_callback
ggml_opt_get_optimizer_params
ggml_opt_loss_type
ggml_opt_result_t
ggml_prec
ggml_scale_mode
ggml_sched_priority
ggml_sort_order
ggml_status
ggml_tensor_flag
ggml_threadpool_t
ggml_to_float_t
ggml_type
ggml_unary_op
ggml_vec_dot_t
llama_attention_type
llama_ftype
llama_memory_t
llama_model_kv_override_type
llama_opt_param_filter
llama_pooling_type
llama_pos
llama_progress_callback
llama_rope_scaling_type
llama_rope_type
llama_sampler_context_t
llama_seq_id
llama_split_mode
llama_token
llama_token_attr
llama_token_type
llama_vocab_pre_type
llama_vocab_type

Unions§

llama_model_kv_override__bindgen_ty_1