Crate llama_cpp_sys_2

Expand description

See llama-cpp-2 for a documented and safe API.

Structs§

Constants§

GGML_BACKEND_BUFFER_USAGE_ANY
GGML_BACKEND_BUFFER_USAGE_COMPUTE
GGML_BACKEND_BUFFER_USAGE_WEIGHTS
GGML_BACKEND_DEVICE_TYPE_ACCEL
GGML_BACKEND_DEVICE_TYPE_CPU
GGML_BACKEND_DEVICE_TYPE_GPU
GGML_FTYPE_ALL_F32
GGML_FTYPE_MOSTLY_BF16
GGML_FTYPE_MOSTLY_F16
GGML_FTYPE_MOSTLY_IQ1_M
GGML_FTYPE_MOSTLY_IQ1_S
GGML_FTYPE_MOSTLY_IQ2_S
GGML_FTYPE_MOSTLY_IQ2_XS
GGML_FTYPE_MOSTLY_IQ2_XXS
GGML_FTYPE_MOSTLY_IQ3_S
GGML_FTYPE_MOSTLY_IQ3_XXS
GGML_FTYPE_MOSTLY_IQ4_NL
GGML_FTYPE_MOSTLY_IQ4_XS
GGML_FTYPE_MOSTLY_Q2_K
GGML_FTYPE_MOSTLY_Q3_K
GGML_FTYPE_MOSTLY_Q4_0
GGML_FTYPE_MOSTLY_Q4_1
GGML_FTYPE_MOSTLY_Q4_1_SOME_F16
GGML_FTYPE_MOSTLY_Q4_K
GGML_FTYPE_MOSTLY_Q5_0
GGML_FTYPE_MOSTLY_Q5_1
GGML_FTYPE_MOSTLY_Q5_K
GGML_FTYPE_MOSTLY_Q6_K
GGML_FTYPE_MOSTLY_Q8_0
GGML_FTYPE_UNKNOWN
GGML_LOG_LEVEL_CONT
GGML_LOG_LEVEL_DEBUG
GGML_LOG_LEVEL_ERROR
GGML_LOG_LEVEL_INFO
GGML_LOG_LEVEL_NONE
GGML_LOG_LEVEL_WARN
GGML_NUMA_STRATEGY_COUNT
GGML_NUMA_STRATEGY_DISABLED
GGML_NUMA_STRATEGY_DISTRIBUTE
GGML_NUMA_STRATEGY_ISOLATE
GGML_NUMA_STRATEGY_MIRROR
GGML_NUMA_STRATEGY_NUMACTL
GGML_OBJECT_TYPE_GRAPH
GGML_OBJECT_TYPE_TENSOR
GGML_OBJECT_TYPE_WORK_BUFFER
GGML_OPT_BUILD_TYPE_FORWARD
GGML_OPT_BUILD_TYPE_GRAD
GGML_OPT_BUILD_TYPE_OPT
GGML_OPT_LOSS_TYPE_CROSS_ENTROPY
GGML_OPT_LOSS_TYPE_MEAN
GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR
GGML_OPT_LOSS_TYPE_SUM
GGML_OP_ACC
GGML_OP_ADD
GGML_OP_ADD1
GGML_OP_ADD_REL_POS
GGML_OP_ARANGE
GGML_OP_ARGMAX
GGML_OP_ARGSORT
GGML_OP_CLAMP
GGML_OP_CONCAT
GGML_OP_CONT
GGML_OP_CONV_2D_DW
GGML_OP_CONV_TRANSPOSE_1D
GGML_OP_CONV_TRANSPOSE_2D
GGML_OP_COS
GGML_OP_COUNT
GGML_OP_COUNT_EQUAL
GGML_OP_CPY
GGML_OP_CROSS_ENTROPY_LOSS
GGML_OP_CROSS_ENTROPY_LOSS_BACK
GGML_OP_CUSTOM
GGML_OP_DIAG
GGML_OP_DIAG_MASK_INF
GGML_OP_DIAG_MASK_ZERO
GGML_OP_DIV
GGML_OP_DUP
GGML_OP_FLASH_ATTN_BACK
GGML_OP_FLASH_ATTN_EXT
GGML_OP_GATED_LINEAR_ATTN
GGML_OP_GET_REL_POS
GGML_OP_GET_ROWS
GGML_OP_GET_ROWS_BACK
GGML_OP_GROUP_NORM
GGML_OP_IM2COL
GGML_OP_IM2COL_BACK
GGML_OP_L2_NORM
GGML_OP_LEAKY_RELU
GGML_OP_LOG
GGML_OP_MAP_CUSTOM1
GGML_OP_MAP_CUSTOM2
GGML_OP_MAP_CUSTOM3
GGML_OP_MEAN
GGML_OP_MUL
GGML_OP_MUL_MAT
GGML_OP_MUL_MAT_ID
GGML_OP_NONE
GGML_OP_NORM
GGML_OP_OPT_STEP_ADAMW
GGML_OP_OUT_PROD
GGML_OP_PAD
GGML_OP_PAD_REFLECT_1D
GGML_OP_PERMUTE
GGML_OP_POOL_1D
GGML_OP_POOL_2D
GGML_OP_POOL_2D_BACK
GGML_OP_POOL_AVG
GGML_OP_POOL_COUNT
GGML_OP_POOL_MAX
GGML_OP_REPEAT
GGML_OP_REPEAT_BACK
GGML_OP_RESHAPE
GGML_OP_RMS_NORM
GGML_OP_RMS_NORM_BACK
GGML_OP_ROPE
GGML_OP_ROPE_BACK
GGML_OP_RWKV_WKV6
GGML_OP_RWKV_WKV7
GGML_OP_SCALE
GGML_OP_SET
GGML_OP_SILU_BACK
GGML_OP_SIN
GGML_OP_SOFT_MAX
GGML_OP_SOFT_MAX_BACK
GGML_OP_SQR
GGML_OP_SQRT
GGML_OP_SSM_CONV
GGML_OP_SSM_SCAN
GGML_OP_SUB
GGML_OP_SUM
GGML_OP_SUM_ROWS
GGML_OP_TIMESTEP_EMBEDDING
GGML_OP_TRANSPOSE
GGML_OP_UNARY
GGML_OP_UPSCALE
GGML_OP_VIEW
GGML_OP_WIN_PART
GGML_OP_WIN_UNPART
GGML_PREC_DEFAULT
GGML_PREC_F32
GGML_SCALE_MODE_BILINEAR
GGML_SCALE_MODE_NEAREST
GGML_SCHED_PRIO_HIGH
GGML_SCHED_PRIO_LOW
GGML_SCHED_PRIO_MEDIUM
GGML_SCHED_PRIO_NORMAL
GGML_SCHED_PRIO_REALTIME
GGML_SORT_ORDER_ASC
GGML_SORT_ORDER_DESC
GGML_STATUS_ABORTED
GGML_STATUS_ALLOC_FAILED
GGML_STATUS_FAILED
GGML_STATUS_SUCCESS
GGML_TENSOR_FLAG_INPUT
GGML_TENSOR_FLAG_LOSS
GGML_TENSOR_FLAG_OUTPUT
GGML_TENSOR_FLAG_PARAM
GGML_TYPE_BF16
GGML_TYPE_COUNT
GGML_TYPE_F16
GGML_TYPE_F32
GGML_TYPE_F64
GGML_TYPE_I8
GGML_TYPE_I16
GGML_TYPE_I32
GGML_TYPE_I64
GGML_TYPE_IQ1_M
GGML_TYPE_IQ1_S
GGML_TYPE_IQ2_S
GGML_TYPE_IQ2_XS
GGML_TYPE_IQ2_XXS
GGML_TYPE_IQ3_S
GGML_TYPE_IQ3_XXS
GGML_TYPE_IQ4_NL
GGML_TYPE_IQ4_XS
GGML_TYPE_Q2_K
GGML_TYPE_Q3_K
GGML_TYPE_Q4_0
GGML_TYPE_Q4_1
GGML_TYPE_Q4_K
GGML_TYPE_Q5_0
GGML_TYPE_Q5_1
GGML_TYPE_Q5_K
GGML_TYPE_Q6_K
GGML_TYPE_Q8_0
GGML_TYPE_Q8_1
GGML_TYPE_Q8_K
GGML_TYPE_TQ1_0
GGML_TYPE_TQ2_0
GGML_UNARY_OP_ABS
GGML_UNARY_OP_COUNT
GGML_UNARY_OP_ELU
GGML_UNARY_OP_EXP
GGML_UNARY_OP_GELU
GGML_UNARY_OP_GELU_ERF
GGML_UNARY_OP_GELU_QUICK
GGML_UNARY_OP_HARDSIGMOID
GGML_UNARY_OP_HARDSWISH
GGML_UNARY_OP_NEG
GGML_UNARY_OP_RELU
GGML_UNARY_OP_SGN
GGML_UNARY_OP_SIGMOID
GGML_UNARY_OP_SILU
GGML_UNARY_OP_STEP
GGML_UNARY_OP_TANH
LLAMA_ATTENTION_TYPE_CAUSAL
LLAMA_ATTENTION_TYPE_NON_CAUSAL
LLAMA_ATTENTION_TYPE_UNSPECIFIED
LLAMA_FTYPE_ALL_F32
LLAMA_FTYPE_GUESSED
LLAMA_FTYPE_MOSTLY_BF16
LLAMA_FTYPE_MOSTLY_F16
LLAMA_FTYPE_MOSTLY_IQ1_M
LLAMA_FTYPE_MOSTLY_IQ1_S
LLAMA_FTYPE_MOSTLY_IQ2_M
LLAMA_FTYPE_MOSTLY_IQ2_S
LLAMA_FTYPE_MOSTLY_IQ2_XS
LLAMA_FTYPE_MOSTLY_IQ2_XXS
LLAMA_FTYPE_MOSTLY_IQ3_M
LLAMA_FTYPE_MOSTLY_IQ3_S
LLAMA_FTYPE_MOSTLY_IQ3_XS
LLAMA_FTYPE_MOSTLY_IQ3_XXS
LLAMA_FTYPE_MOSTLY_IQ4_NL
LLAMA_FTYPE_MOSTLY_IQ4_XS
LLAMA_FTYPE_MOSTLY_Q2_K
LLAMA_FTYPE_MOSTLY_Q2_K_S
LLAMA_FTYPE_MOSTLY_Q3_K_L
LLAMA_FTYPE_MOSTLY_Q3_K_M
LLAMA_FTYPE_MOSTLY_Q3_K_S
LLAMA_FTYPE_MOSTLY_Q4_0
LLAMA_FTYPE_MOSTLY_Q4_1
LLAMA_FTYPE_MOSTLY_Q4_K_M
LLAMA_FTYPE_MOSTLY_Q4_K_S
LLAMA_FTYPE_MOSTLY_Q5_0
LLAMA_FTYPE_MOSTLY_Q5_1
LLAMA_FTYPE_MOSTLY_Q5_K_M
LLAMA_FTYPE_MOSTLY_Q5_K_S
LLAMA_FTYPE_MOSTLY_Q6_K
LLAMA_FTYPE_MOSTLY_Q8_0
LLAMA_FTYPE_MOSTLY_TQ1_0
LLAMA_FTYPE_MOSTLY_TQ2_0
LLAMA_KV_OVERRIDE_TYPE_BOOL
LLAMA_KV_OVERRIDE_TYPE_FLOAT
LLAMA_KV_OVERRIDE_TYPE_INT
LLAMA_KV_OVERRIDE_TYPE_STR
LLAMA_POOLING_TYPE_CLS
LLAMA_POOLING_TYPE_LAST
LLAMA_POOLING_TYPE_MEAN
LLAMA_POOLING_TYPE_NONE
LLAMA_POOLING_TYPE_RANK
LLAMA_POOLING_TYPE_UNSPECIFIED
LLAMA_ROPE_SCALING_TYPE_LINEAR
LLAMA_ROPE_SCALING_TYPE_LONGROPE
LLAMA_ROPE_SCALING_TYPE_MAX_VALUE
LLAMA_ROPE_SCALING_TYPE_NONE
LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
LLAMA_ROPE_SCALING_TYPE_YARN
LLAMA_ROPE_TYPE_MROPE
LLAMA_ROPE_TYPE_NEOX
LLAMA_ROPE_TYPE_NONE
LLAMA_ROPE_TYPE_NORM
LLAMA_ROPE_TYPE_VISION
LLAMA_SPLIT_MODE_LAYER
LLAMA_SPLIT_MODE_NONE
LLAMA_SPLIT_MODE_ROW
LLAMA_TOKEN_ATTR_BYTE
LLAMA_TOKEN_ATTR_CONTROL
LLAMA_TOKEN_ATTR_LSTRIP
LLAMA_TOKEN_ATTR_NORMAL
LLAMA_TOKEN_ATTR_NORMALIZED
LLAMA_TOKEN_ATTR_RSTRIP
LLAMA_TOKEN_ATTR_SINGLE_WORD
LLAMA_TOKEN_ATTR_UNDEFINED
LLAMA_TOKEN_ATTR_UNKNOWN
LLAMA_TOKEN_ATTR_UNUSED
LLAMA_TOKEN_ATTR_USER_DEFINED
LLAMA_TOKEN_TYPE_BYTE
LLAMA_TOKEN_TYPE_CONTROL
LLAMA_TOKEN_TYPE_NORMAL
LLAMA_TOKEN_TYPE_UNDEFINED
LLAMA_TOKEN_TYPE_UNKNOWN
LLAMA_TOKEN_TYPE_UNUSED
LLAMA_TOKEN_TYPE_USER_DEFINED
LLAMA_VOCAB_PRE_TYPE_BAILINGMOE
LLAMA_VOCAB_PRE_TYPE_BLOOM
LLAMA_VOCAB_PRE_TYPE_CHAMELEON
LLAMA_VOCAB_PRE_TYPE_CHATGLM3
LLAMA_VOCAB_PRE_TYPE_CHATGLM4
LLAMA_VOCAB_PRE_TYPE_CODESHELL
LLAMA_VOCAB_PRE_TYPE_COMMAND_R
LLAMA_VOCAB_PRE_TYPE_DBRX
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM
LLAMA_VOCAB_PRE_TYPE_DEFAULT
LLAMA_VOCAB_PRE_TYPE_EXAONE
LLAMA_VOCAB_PRE_TYPE_FALCON
LLAMA_VOCAB_PRE_TYPE_GPT2
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH
LLAMA_VOCAB_PRE_TYPE_GPT4O
LLAMA_VOCAB_PRE_TYPE_JAIS
LLAMA_VOCAB_PRE_TYPE_LLAMA3
LLAMA_VOCAB_PRE_TYPE_LLAMA4
LLAMA_VOCAB_PRE_TYPE_MINERVA
LLAMA_VOCAB_PRE_TYPE_MPT
LLAMA_VOCAB_PRE_TYPE_OLMO
LLAMA_VOCAB_PRE_TYPE_PIXTRAL
LLAMA_VOCAB_PRE_TYPE_PORO
LLAMA_VOCAB_PRE_TYPE_QWEN2
LLAMA_VOCAB_PRE_TYPE_REFACT
LLAMA_VOCAB_PRE_TYPE_SEED_CODER
LLAMA_VOCAB_PRE_TYPE_SMAUG
LLAMA_VOCAB_PRE_TYPE_SMOLLM
LLAMA_VOCAB_PRE_TYPE_STABLELM2
LLAMA_VOCAB_PRE_TYPE_STARCODER
LLAMA_VOCAB_PRE_TYPE_SUPERBPE
LLAMA_VOCAB_PRE_TYPE_TEKKEN
LLAMA_VOCAB_PRE_TYPE_TRILLION
LLAMA_VOCAB_PRE_TYPE_VIKING
LLAMA_VOCAB_TYPE_BPE
LLAMA_VOCAB_TYPE_NONE
LLAMA_VOCAB_TYPE_RWKV
LLAMA_VOCAB_TYPE_SPM
LLAMA_VOCAB_TYPE_UGM
LLAMA_VOCAB_TYPE_WPM

Functions§

ggml_abort^⚠
ggml_abs^⚠
ggml_abs_inplace^⚠
ggml_acc^⚠
ggml_acc_inplace^⚠
ggml_add^⚠
ggml_add1^⚠
ggml_add1_inplace^⚠
ggml_add_cast^⚠
ggml_add_inplace^⚠
ggml_add_rel_pos^⚠
ggml_add_rel_pos_inplace^⚠
ggml_arange^⚠
ggml_are_same_shape^⚠
ggml_are_same_stride^⚠
ggml_argmax^⚠
ggml_argsort^⚠
ggml_backend_alloc_buffer^⚠
ggml_backend_alloc_ctx_tensors^⚠
ggml_backend_alloc_ctx_tensors_from_buft^⚠
ggml_backend_buffer_clear^⚠
ggml_backend_buffer_free^⚠
ggml_backend_buffer_get_alignment^⚠
ggml_backend_buffer_get_alloc_size^⚠
ggml_backend_buffer_get_base^⚠
ggml_backend_buffer_get_max_size^⚠
ggml_backend_buffer_get_size^⚠
ggml_backend_buffer_get_type^⚠
ggml_backend_buffer_get_usage^⚠
ggml_backend_buffer_init_tensor^⚠
ggml_backend_buffer_is_host^⚠
ggml_backend_buffer_name^⚠
ggml_backend_buffer_reset^⚠
ggml_backend_buffer_set_usage^⚠
ggml_backend_buft_alloc_buffer^⚠
ggml_backend_buft_get_alignment^⚠
ggml_backend_buft_get_alloc_size^⚠
ggml_backend_buft_get_device^⚠
ggml_backend_buft_get_max_size^⚠
ggml_backend_buft_is_host^⚠
ggml_backend_buft_name^⚠
ggml_backend_compare_graph_backend^⚠
ggml_backend_cpu_buffer_from_ptr^⚠
ggml_backend_cpu_buffer_type^⚠
ggml_backend_cpu_init^⚠
ggml_backend_cpu_reg^⚠
ggml_backend_cpu_set_abort_callback^⚠
ggml_backend_cpu_set_n_threads^⚠
ggml_backend_cpu_set_threadpool^⚠
ggml_backend_dev_backend_reg^⚠
ggml_backend_dev_buffer_from_host_ptr^⚠
ggml_backend_dev_buffer_type^⚠
ggml_backend_dev_by_name^⚠
ggml_backend_dev_by_type^⚠
ggml_backend_dev_count^⚠
ggml_backend_dev_description^⚠
ggml_backend_dev_get^⚠
ggml_backend_dev_get_props^⚠
ggml_backend_dev_host_buffer_type^⚠
ggml_backend_dev_init^⚠
ggml_backend_dev_memory^⚠
ggml_backend_dev_name^⚠
ggml_backend_dev_offload_op^⚠
ggml_backend_dev_supports_buft^⚠
ggml_backend_dev_supports_op^⚠
ggml_backend_dev_type^⚠
ggml_backend_device_register^⚠
ggml_backend_event_free^⚠
ggml_backend_event_new^⚠
ggml_backend_event_record^⚠
ggml_backend_event_synchronize^⚠
ggml_backend_event_wait^⚠
ggml_backend_free^⚠
ggml_backend_get_alignment^⚠
ggml_backend_get_default_buffer_type^⚠
ggml_backend_get_device^⚠
ggml_backend_get_max_size^⚠
ggml_backend_graph_compute^⚠
ggml_backend_graph_compute_async^⚠
ggml_backend_graph_copy^⚠
ggml_backend_graph_copy_free^⚠
ggml_backend_graph_plan_compute^⚠
ggml_backend_graph_plan_create^⚠
ggml_backend_graph_plan_free^⚠
ggml_backend_guid^⚠
ggml_backend_init_best^⚠
ggml_backend_init_by_name^⚠
ggml_backend_init_by_type^⚠
ggml_backend_is_cpu^⚠
ggml_backend_load^⚠
ggml_backend_load_all^⚠
ggml_backend_load_all_from_path^⚠
ggml_backend_name^⚠
ggml_backend_offload_op^⚠
ggml_backend_reg_by_name^⚠
ggml_backend_reg_count^⚠
ggml_backend_reg_dev_count^⚠
ggml_backend_reg_dev_get^⚠
ggml_backend_reg_get^⚠
ggml_backend_reg_get_proc_address^⚠
ggml_backend_reg_name^⚠
ggml_backend_sched_alloc_graph^⚠
ggml_backend_sched_free^⚠
ggml_backend_sched_get_backend^⚠
ggml_backend_sched_get_buffer_size^⚠
ggml_backend_sched_get_n_backends^⚠
ggml_backend_sched_get_n_copies^⚠
ggml_backend_sched_get_n_splits^⚠
ggml_backend_sched_get_tensor_backend^⚠
ggml_backend_sched_graph_compute^⚠
ggml_backend_sched_graph_compute_async^⚠
ggml_backend_sched_new^⚠
ggml_backend_sched_reserve^⚠
ggml_backend_sched_reset^⚠
ggml_backend_sched_set_eval_callback^⚠
ggml_backend_sched_set_tensor_backend^⚠
ggml_backend_sched_synchronize^⚠
ggml_backend_supports_buft^⚠
ggml_backend_supports_op^⚠
ggml_backend_synchronize^⚠
ggml_backend_tensor_alloc^⚠
ggml_backend_tensor_copy^⚠
ggml_backend_tensor_copy_async^⚠
ggml_backend_tensor_get^⚠
ggml_backend_tensor_get_async^⚠
ggml_backend_tensor_memset^⚠
ggml_backend_tensor_set^⚠
ggml_backend_tensor_set_async^⚠
ggml_backend_unload^⚠
ggml_backend_view_init^⚠
ggml_bf16_to_fp32^⚠
ggml_bf16_to_fp32_row^⚠
ggml_blck_size^⚠
ggml_build_backward_expand^⚠
ggml_build_forward_expand^⚠
ggml_can_repeat^⚠
ggml_cast^⚠
ggml_clamp^⚠
ggml_concat^⚠
ggml_cont^⚠
ggml_cont_1d^⚠
ggml_cont_2d^⚠
ggml_cont_3d^⚠
ggml_cont_4d^⚠
ggml_conv_1d^⚠
ggml_conv_1d_dw^⚠
ggml_conv_1d_dw_ph^⚠
ggml_conv_1d_ph^⚠
ggml_conv_2d^⚠
ggml_conv_2d_dw^⚠
ggml_conv_2d_dw_direct^⚠
ggml_conv_2d_s1_ph^⚠
ggml_conv_2d_sk_p0^⚠
ggml_conv_transpose_1d^⚠
ggml_conv_transpose_2d_p0^⚠
ggml_cos^⚠
ggml_cos_inplace^⚠
ggml_count_equal^⚠
ggml_cpu_bf16_to_fp32^⚠
ggml_cpu_fp16_to_fp32^⚠
ggml_cpu_fp32_to_bf16^⚠
ggml_cpu_fp32_to_fp16^⚠
ggml_cpu_get_sve_cnt^⚠
ggml_cpu_has_amx_int8^⚠
ggml_cpu_has_arm_fma^⚠
ggml_cpu_has_avx^⚠
ggml_cpu_has_avx2^⚠
ggml_cpu_has_avx512^⚠
ggml_cpu_has_avx512_bf16^⚠
ggml_cpu_has_avx512_vbmi^⚠
ggml_cpu_has_avx512_vnni^⚠
ggml_cpu_has_avx_vnni^⚠
ggml_cpu_has_bmi2^⚠
ggml_cpu_has_dotprod^⚠
ggml_cpu_has_f16c^⚠
ggml_cpu_has_fma^⚠
ggml_cpu_has_fp16_va^⚠
ggml_cpu_has_llamafile^⚠
ggml_cpu_has_matmul_int8^⚠
ggml_cpu_has_neon^⚠
ggml_cpu_has_riscv_v^⚠
ggml_cpu_has_sme^⚠
ggml_cpu_has_sse3^⚠
ggml_cpu_has_ssse3^⚠
ggml_cpu_has_sve^⚠
ggml_cpu_has_vsx^⚠
ggml_cpu_has_vxe^⚠
ggml_cpu_has_wasm_simd^⚠
ggml_cpu_init^⚠
ggml_cpy^⚠
ggml_cross_entropy_loss^⚠
ggml_cross_entropy_loss_back^⚠
ggml_custom_4d^⚠
ggml_custom_inplace^⚠
ggml_cycles^⚠
ggml_cycles_per_ms^⚠
ggml_diag^⚠
ggml_diag_mask_inf^⚠
ggml_diag_mask_inf_inplace^⚠
ggml_diag_mask_zero^⚠
ggml_diag_mask_zero_inplace^⚠
ggml_div^⚠
ggml_div_inplace^⚠
ggml_dup^⚠
ggml_dup_inplace^⚠
ggml_dup_tensor^⚠
ggml_element_size^⚠
ggml_elu^⚠
ggml_elu_inplace^⚠
ggml_exp^⚠
ggml_exp_inplace^⚠
ggml_flash_attn_back^⚠
ggml_flash_attn_ext^⚠
ggml_flash_attn_ext_get_prec^⚠
ggml_flash_attn_ext_set_prec^⚠
ggml_fopen^⚠
ggml_format_name^⚠
ggml_fp16_to_fp32^⚠
ggml_fp16_to_fp32_row^⚠
ggml_fp32_to_bf16^⚠
ggml_fp32_to_bf16_row^⚠
ggml_fp32_to_bf16_row_ref^⚠
ggml_fp32_to_fp16^⚠
ggml_fp32_to_fp16_row^⚠
ggml_free^⚠
ggml_ftype_to_ggml_type^⚠
ggml_gallocr_alloc_graph^⚠
ggml_gallocr_free^⚠
ggml_gallocr_get_buffer_size^⚠
ggml_gallocr_new^⚠
ggml_gallocr_new_n^⚠
ggml_gallocr_reserve^⚠
ggml_gallocr_reserve_n^⚠
ggml_gated_linear_attn^⚠
ggml_gelu^⚠
ggml_gelu_erf^⚠
ggml_gelu_erf_inplace^⚠
ggml_gelu_inplace^⚠
ggml_gelu_quick^⚠
ggml_gelu_quick_inplace^⚠
ggml_get_data^⚠
ggml_get_data_f32^⚠
ggml_get_f32_1d^⚠
ggml_get_f32_nd^⚠
ggml_get_first_tensor^⚠
ggml_get_i32_1d^⚠
ggml_get_i32_nd^⚠
ggml_get_max_tensor_size^⚠
ggml_get_mem_buffer^⚠
ggml_get_mem_size^⚠
ggml_get_name^⚠
ggml_get_next_tensor^⚠
ggml_get_no_alloc^⚠
ggml_get_rel_pos^⚠
ggml_get_rows^⚠
ggml_get_rows_back^⚠
ggml_get_tensor^⚠
ggml_get_type_traits^⚠
ggml_get_type_traits_cpu^⚠
ggml_get_unary_op^⚠
ggml_graph_add_node^⚠
ggml_graph_clear^⚠
ggml_graph_compute^⚠
ggml_graph_compute_with_ctx^⚠
ggml_graph_cpy^⚠
ggml_graph_dump_dot^⚠
ggml_graph_dup^⚠
ggml_graph_get_grad^⚠
ggml_graph_get_grad_acc^⚠
ggml_graph_get_tensor^⚠
ggml_graph_n_nodes^⚠
ggml_graph_node^⚠
ggml_graph_nodes^⚠
ggml_graph_overhead^⚠
ggml_graph_overhead_custom^⚠
ggml_graph_plan^⚠
ggml_graph_print^⚠
ggml_graph_reset^⚠
ggml_graph_size^⚠
ggml_group_norm^⚠
ggml_group_norm_inplace^⚠
ggml_guid_matches^⚠
ggml_hardsigmoid^⚠
ggml_hardswish^⚠
ggml_im2col^⚠
ggml_im2col_back^⚠
ggml_init^⚠
ggml_is_3d^⚠
ggml_is_contiguous^⚠
ggml_is_contiguous_0^⚠
ggml_is_contiguous_1^⚠
ggml_is_contiguous_2^⚠
ggml_is_contiguous_channels^⚠
ggml_is_contiguously_allocated^⚠
ggml_is_empty^⚠
ggml_is_matrix^⚠
ggml_is_numa^⚠
ggml_is_permuted^⚠
ggml_is_quantized^⚠
ggml_is_scalar^⚠
ggml_is_transposed^⚠
ggml_is_vector^⚠
ggml_l2_norm^⚠
ggml_l2_norm_inplace^⚠
ggml_leaky_relu^⚠
ggml_log^⚠
ggml_log_inplace^⚠
ggml_log_set^⚠
ggml_map_custom1^⚠
ggml_map_custom2^⚠
ggml_map_custom3^⚠
ggml_map_custom1_inplace^⚠
ggml_map_custom2_inplace^⚠
ggml_map_custom3_inplace^⚠
ggml_mean^⚠
ggml_mul^⚠
ggml_mul_inplace^⚠
ggml_mul_mat^⚠
ggml_mul_mat_id^⚠
ggml_mul_mat_set_prec^⚠
ggml_n_dims^⚠
ggml_nbytes^⚠
ggml_nbytes_pad^⚠
ggml_neg^⚠
ggml_neg_inplace^⚠
ggml_nelements^⚠
ggml_new_buffer^⚠
ggml_new_f32^⚠
ggml_new_graph^⚠
ggml_new_graph_custom^⚠
ggml_new_i32^⚠
ggml_new_tensor^⚠
ggml_new_tensor_1d^⚠
ggml_new_tensor_2d^⚠
ggml_new_tensor_3d^⚠
ggml_new_tensor_4d^⚠
ggml_norm^⚠
ggml_norm_inplace^⚠
ggml_nrows^⚠
ggml_numa_init^⚠
ggml_op_desc^⚠
ggml_op_name^⚠
ggml_op_symbol^⚠
ggml_opt_alloc^⚠
ggml_opt_dataset_data^⚠
ggml_opt_dataset_free^⚠
ggml_opt_dataset_get_batch^⚠
ggml_opt_dataset_get_batch_host^⚠
ggml_opt_dataset_init^⚠
ggml_opt_dataset_labels^⚠
ggml_opt_dataset_ndata^⚠
ggml_opt_dataset_shuffle^⚠
ggml_opt_default_params^⚠
ggml_opt_epoch^⚠
ggml_opt_epoch_callback_progress_bar^⚠
ggml_opt_eval^⚠
ggml_opt_fit^⚠
ggml_opt_free^⚠
ggml_opt_get_constant_optimizer_params^⚠
ggml_opt_get_default_optimizer_params^⚠
ggml_opt_grad_acc^⚠
ggml_opt_init^⚠
ggml_opt_inputs^⚠
ggml_opt_labels^⚠
ggml_opt_loss^⚠
ggml_opt_ncorrect^⚠
ggml_opt_outputs^⚠
ggml_opt_pred^⚠
ggml_opt_prepare_alloc^⚠
ggml_opt_reset^⚠
ggml_opt_result_accuracy^⚠
ggml_opt_result_free^⚠
ggml_opt_result_init^⚠
ggml_opt_result_loss^⚠
ggml_opt_result_ndata^⚠
ggml_opt_result_pred^⚠
ggml_opt_result_reset^⚠
ggml_opt_static_graphs^⚠
ggml_opt_step_adamw^⚠
ggml_out_prod^⚠
ggml_pad^⚠
ggml_pad_reflect_1d^⚠
ggml_permute^⚠
ggml_pool_1d^⚠
ggml_pool_2d^⚠
ggml_pool_2d_back^⚠
ggml_print_object^⚠
ggml_print_objects^⚠
ggml_quantize_chunk^⚠
ggml_quantize_free^⚠
ggml_quantize_init^⚠
ggml_quantize_requires_imatrix^⚠
ggml_relu^⚠
ggml_relu_inplace^⚠
ggml_repeat^⚠
ggml_repeat_4d^⚠
ggml_repeat_back^⚠
ggml_reset^⚠
ggml_reshape^⚠
ggml_reshape_1d^⚠
ggml_reshape_2d^⚠
ggml_reshape_3d^⚠
ggml_reshape_4d^⚠
ggml_rms_norm^⚠
ggml_rms_norm_back^⚠
ggml_rms_norm_inplace^⚠
ggml_rope^⚠
ggml_rope_custom^⚠
ggml_rope_custom_inplace^⚠
ggml_rope_ext^⚠
ggml_rope_ext_back^⚠
ggml_rope_ext_inplace^⚠
ggml_rope_inplace^⚠
ggml_rope_multi^⚠
ggml_rope_multi_back^⚠
ggml_rope_yarn_corr_dims^⚠
ggml_row_size^⚠
ggml_rwkv_wkv6^⚠
ggml_rwkv_wkv7^⚠
ggml_scale^⚠
ggml_scale_inplace^⚠
ggml_set^⚠
ggml_set_1d^⚠
ggml_set_1d_inplace^⚠
ggml_set_2d^⚠
ggml_set_2d_inplace^⚠
ggml_set_f32^⚠
ggml_set_f32_1d^⚠
ggml_set_f32_nd^⚠
ggml_set_i32^⚠
ggml_set_i32_1d^⚠
ggml_set_i32_nd^⚠
ggml_set_inplace^⚠
ggml_set_input^⚠
ggml_set_loss^⚠
ggml_set_name^⚠
ggml_set_no_alloc^⚠
ggml_set_output^⚠
ggml_set_param^⚠
ggml_set_zero^⚠
ggml_sgn^⚠
ggml_sgn_inplace^⚠
ggml_sigmoid^⚠
ggml_sigmoid_inplace^⚠
ggml_silu^⚠
ggml_silu_back^⚠
ggml_silu_inplace^⚠
ggml_sin^⚠
ggml_sin_inplace^⚠
ggml_soft_max^⚠
ggml_soft_max_ext^⚠
ggml_soft_max_ext_back^⚠
ggml_soft_max_ext_back_inplace^⚠
ggml_soft_max_inplace^⚠
ggml_sqr^⚠
ggml_sqr_inplace^⚠
ggml_sqrt^⚠
ggml_sqrt_inplace^⚠
ggml_ssm_conv^⚠
ggml_ssm_scan^⚠
ggml_status_to_string^⚠
ggml_step^⚠
ggml_step_inplace^⚠
ggml_sub^⚠
ggml_sub_inplace^⚠
ggml_sum^⚠
ggml_sum_rows^⚠
ggml_tallocr_alloc^⚠
ggml_tallocr_new^⚠
ggml_tanh^⚠
ggml_tanh_inplace^⚠
ggml_tensor_overhead^⚠
ggml_threadpool_free^⚠
ggml_threadpool_get_n_threads^⚠
ggml_threadpool_new^⚠
ggml_threadpool_params_default^⚠
ggml_threadpool_params_init^⚠
ggml_threadpool_params_match^⚠
ggml_threadpool_pause^⚠
ggml_threadpool_resume^⚠
ggml_time_init^⚠
ggml_time_ms^⚠
ggml_time_us^⚠
ggml_timestep_embedding^⚠
ggml_top_k^⚠
ggml_transpose^⚠
ggml_type_name^⚠
ggml_type_size^⚠
ggml_type_sizef^⚠
ggml_unary^⚠
ggml_unary_inplace^⚠
ggml_unary_op_name^⚠
ggml_unravel_index^⚠
ggml_upscale^⚠
ggml_upscale_ext^⚠
ggml_used_mem^⚠
ggml_validate_row_data^⚠
ggml_view_1d^⚠
ggml_view_2d^⚠
ggml_view_3d^⚠
ggml_view_4d^⚠
ggml_view_tensor^⚠
ggml_win_part^⚠
ggml_win_unpart^⚠
llama_adapter_lora_free^⚠
llama_adapter_lora_init^⚠
llama_add_bos_token^⚠
llama_add_eos_token^⚠
llama_apply_adapter_cvec^⚠
llama_attach_threadpool^⚠
llama_backend_free^⚠
llama_backend_init^⚠
llama_batch_free^⚠
llama_batch_get_one^⚠
llama_batch_init^⚠
llama_chat_apply_template^⚠: Apply chat template. Inspired by hf apply_chat_template() on python. Both “model” and “custom_template” are optional, but at least one is required. “custom_template” has higher precedence than “model” NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. @param chat Pointer to a list of multiple llama_chat_message @param n_msg Number of llama_chat_message in this chat @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) @param length The size of the allocated buffer @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
llama_chat_builtin_templates^⚠
llama_clear_adapter_lora^⚠
llama_context_default_params^⚠
llama_copy_state_data^⚠
llama_decode^⚠
llama_detach_threadpool^⚠
llama_detokenize^⚠: @details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
llama_encode^⚠
llama_free^⚠
llama_free_model^⚠
llama_get_embeddings^⚠
llama_get_embeddings_ith^⚠
llama_get_embeddings_seq^⚠
llama_get_kv_self^⚠
llama_get_logits^⚠
llama_get_logits_ith^⚠
llama_get_memory^⚠
llama_get_model^⚠
llama_get_state_size^⚠
llama_init_from_model^⚠
llama_kv_self_can_shift^⚠
llama_kv_self_clear^⚠
llama_kv_self_defrag^⚠
llama_kv_self_n_tokens^⚠
llama_kv_self_seq_add^⚠
llama_kv_self_seq_cp^⚠
llama_kv_self_seq_div^⚠
llama_kv_self_seq_keep^⚠
llama_kv_self_seq_pos_max^⚠
llama_kv_self_seq_pos_min^⚠
llama_kv_self_seq_rm^⚠
llama_kv_self_update^⚠
llama_kv_self_used_cells^⚠
llama_load_model_from_file^⚠
llama_load_session_file^⚠
llama_log_set^⚠
llama_max_devices^⚠
llama_max_parallel_sequences^⚠
llama_memory_can_shift^⚠
llama_memory_clear^⚠
llama_memory_seq_add^⚠
llama_memory_seq_cp^⚠
llama_memory_seq_div^⚠
llama_memory_seq_keep^⚠
llama_memory_seq_pos_max^⚠
llama_memory_seq_pos_min^⚠
llama_memory_seq_rm^⚠
llama_model_chat_template^⚠
llama_model_cls_label^⚠
llama_model_decoder_start_token^⚠
llama_model_default_params^⚠
llama_model_desc^⚠
llama_model_free^⚠
llama_model_get_vocab^⚠
llama_model_has_decoder^⚠
llama_model_has_encoder^⚠
llama_model_is_recurrent^⚠
llama_model_load_from_file^⚠
llama_model_load_from_splits^⚠
llama_model_meta_count^⚠
llama_model_meta_key_by_index^⚠
llama_model_meta_val_str^⚠
llama_model_meta_val_str_by_index^⚠
llama_model_n_cls_out^⚠
llama_model_n_ctx_train^⚠
llama_model_n_embd^⚠
llama_model_n_head^⚠
llama_model_n_head_kv^⚠
llama_model_n_layer^⚠
llama_model_n_params^⚠
llama_model_n_swa^⚠
llama_model_quantize^⚠
llama_model_quantize_default_params^⚠
llama_model_rope_freq_scale_train^⚠
llama_model_rope_type^⚠
llama_model_save_to_file^⚠
llama_model_size^⚠
llama_n_batch^⚠
llama_n_ctx^⚠
llama_n_ctx_train^⚠
llama_n_embd^⚠
llama_n_head^⚠
llama_n_layer^⚠
llama_n_seq_max^⚠
llama_n_threads^⚠
llama_n_threads_batch^⚠
llama_n_ubatch^⚠
llama_n_vocab^⚠
llama_new_context_with_model^⚠
llama_numa_init^⚠
llama_opt_epoch^⚠
llama_opt_init^⚠
llama_opt_param_filter_all^⚠
llama_perf_context^⚠
llama_perf_context_print^⚠
llama_perf_context_reset^⚠
llama_perf_sampler^⚠
llama_perf_sampler_print^⚠
llama_perf_sampler_reset^⚠
llama_pooling_type^⚠
llama_print_system_info^⚠
llama_rm_adapter_lora^⚠
llama_sampler_accept^⚠
llama_sampler_apply^⚠
llama_sampler_chain_add^⚠
llama_sampler_chain_default_params^⚠
llama_sampler_chain_get^⚠
llama_sampler_chain_init^⚠
llama_sampler_chain_n^⚠
llama_sampler_chain_remove^⚠
llama_sampler_clone^⚠
llama_sampler_free^⚠
llama_sampler_get_seed^⚠
llama_sampler_init^⚠
llama_sampler_init_dist^⚠
llama_sampler_init_dry^⚠: @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
llama_sampler_init_grammar^⚠: @details Intializes a GBNF grammar, see grammars/README.md for details. @param vocab The vocabulary that this grammar will be used with. @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails. @param grammar_root The name of the start symbol for the grammar.
llama_sampler_init_grammar_lazy^⚠
llama_sampler_init_grammar_lazy_patterns^⚠: @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639 @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group. @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
llama_sampler_init_greedy^⚠
llama_sampler_init_infill^⚠
llama_sampler_init_logit_bias^⚠
llama_sampler_init_min_p^⚠: @details Minimum P sampling as described in https://github.com/ggml-org/llama.cpp/pull/3841
llama_sampler_init_mirostat^⚠: @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_mirostat_v2^⚠: @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_penalties^⚠: NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_softmax^⚠: @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits. NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_temp^⚠: #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf
llama_sampler_init_temp_ext^⚠: @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
llama_sampler_init_top_k^⚠: @details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751 Setting k <= 0 makes this a noop
llama_sampler_init_top_n_sigma^⚠: @details Top n sigma sampling as described in academic paper “Top-nσ: Not All Logits Are You Need” https://arxiv.org/pdf/2411.07641
llama_sampler_init_top_p^⚠: @details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sampler_init_typical^⚠: @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_sampler_init_xtc^⚠: @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
llama_sampler_name^⚠
llama_sampler_reset^⚠
llama_sampler_sample^⚠
llama_save_session_file^⚠
llama_set_abort_callback^⚠
llama_set_adapter_lora^⚠
llama_set_causal_attn^⚠
llama_set_embeddings^⚠
llama_set_n_threads^⚠
llama_set_state_data^⚠
llama_set_warmup^⚠
llama_split_path^⚠: @details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf”
llama_split_prefix^⚠: @details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0”
llama_state_get_data^⚠
llama_state_get_size^⚠
llama_state_load_file^⚠
llama_state_save_file^⚠
llama_state_seq_get_data^⚠
llama_state_seq_get_size^⚠
llama_state_seq_load_file^⚠
llama_state_seq_save_file^⚠
llama_state_seq_set_data^⚠
llama_state_set_data^⚠
llama_supports_gpu_offload^⚠
llama_supports_mlock^⚠
llama_supports_mmap^⚠
llama_supports_rpc^⚠
llama_synchronize^⚠
llama_time_us^⚠
llama_token_bos^⚠
llama_token_cls^⚠
llama_token_eos^⚠
llama_token_eot^⚠
llama_token_fim_mid^⚠
llama_token_fim_pad^⚠
llama_token_fim_pre^⚠
llama_token_fim_rep^⚠
llama_token_fim_sep^⚠
llama_token_fim_suf^⚠
llama_token_get_attr^⚠
llama_token_get_score^⚠
llama_token_get_text^⚠
llama_token_is_control^⚠
llama_token_is_eog^⚠
llama_token_nl^⚠
llama_token_pad^⚠
llama_token_sep^⚠
llama_token_to_piece^⚠
llama_tokenize^⚠: @details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_bos^⚠
llama_vocab_cls^⚠
llama_vocab_eos^⚠
llama_vocab_eot^⚠
llama_vocab_fim_mid^⚠
llama_vocab_fim_pad^⚠
llama_vocab_fim_pre^⚠
llama_vocab_fim_rep^⚠
llama_vocab_fim_sep^⚠
llama_vocab_fim_suf^⚠
llama_vocab_get_add_bos^⚠
llama_vocab_get_add_eos^⚠
llama_vocab_get_attr^⚠
llama_vocab_get_score^⚠
llama_vocab_get_text^⚠
llama_vocab_is_control^⚠
llama_vocab_is_eog^⚠
llama_vocab_n_tokens^⚠
llama_vocab_nl^⚠
llama_vocab_pad^⚠
llama_vocab_sep^⚠
llama_vocab_type^⚠

Type Aliases§

Unions§

llama_model_kv_override__bindgen_ty_1