Crate ggml_sys_bleedingedge

Source

Structs§

Constants§

GGMLSYS_VERSION
GGML_DEFAULT_GRAPH_SIZE
GGML_DEFAULT_N_THREADS
GGML_EXIT_ABORTED
GGML_EXIT_SUCCESS
GGML_FILE_MAGIC
GGML_FILE_VERSION
GGML_MAX_CONTEXTS
GGML_MAX_DIMS
GGML_MAX_NAME
GGML_MAX_OP_PARAMS
GGML_MAX_PARAMS
GGML_MAX_SRC
GGML_MEM_ALIGN
GGML_N_TASKS_MAX
GGML_OBJECT_SIZE
GGML_QNT_VERSION
GGML_QNT_VERSION_FACTOR
GGML_TENSOR_SIZE
GGUF_DEFAULT_ALIGNMENT
GGUF_MAGIC
GGUF_VERSION
LLAMA_DEFAULT_SEED
LLAMA_FILE_MAGIC_GGLA
LLAMA_FILE_MAGIC_GGSN
LLAMA_FILE_MAGIC_GGSQ
LLAMA_MAX_RNG_STATE
LLAMA_SESSION_MAGIC
LLAMA_SESSION_VERSION
LLAMA_STATE_SEQ_MAGIC
LLAMA_STATE_SEQ_VERSION
ggml_backend_type_GGML_BACKEND_TYPE_CPU
ggml_backend_type_GGML_BACKEND_TYPE_GPU
ggml_backend_type_GGML_BACKEND_TYPE_GPU_SPLIT
ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_COUNT
ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT
ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT
ggml_ftype_GGML_FTYPE_ALL_F32
ggml_ftype_GGML_FTYPE_MOSTLY_F16
ggml_ftype_GGML_FTYPE_MOSTLY_IQ1_M
ggml_ftype_GGML_FTYPE_MOSTLY_IQ1_S
ggml_ftype_GGML_FTYPE_MOSTLY_IQ2_S
ggml_ftype_GGML_FTYPE_MOSTLY_IQ2_XS
ggml_ftype_GGML_FTYPE_MOSTLY_IQ2_XXS
ggml_ftype_GGML_FTYPE_MOSTLY_IQ3_S
ggml_ftype_GGML_FTYPE_MOSTLY_IQ3_XXS
ggml_ftype_GGML_FTYPE_MOSTLY_IQ4_NL
ggml_ftype_GGML_FTYPE_MOSTLY_IQ4_XS
ggml_ftype_GGML_FTYPE_MOSTLY_Q2_K
ggml_ftype_GGML_FTYPE_MOSTLY_Q3_K
ggml_ftype_GGML_FTYPE_MOSTLY_Q4_0
ggml_ftype_GGML_FTYPE_MOSTLY_Q4_1
ggml_ftype_GGML_FTYPE_MOSTLY_Q4_1_SOME_F16
ggml_ftype_GGML_FTYPE_MOSTLY_Q4_K
ggml_ftype_GGML_FTYPE_MOSTLY_Q5_0
ggml_ftype_GGML_FTYPE_MOSTLY_Q5_1
ggml_ftype_GGML_FTYPE_MOSTLY_Q5_K
ggml_ftype_GGML_FTYPE_MOSTLY_Q6_K
ggml_ftype_GGML_FTYPE_MOSTLY_Q8_0
ggml_ftype_GGML_FTYPE_UNKNOWN
ggml_linesearch_GGML_LINESEARCH_BACKTRACKING_ARMIJO
ggml_linesearch_GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE
ggml_linesearch_GGML_LINESEARCH_BACKTRACKING_WOLFE
ggml_linesearch_GGML_LINESEARCH_DEFAULT
ggml_log_level_GGML_LOG_LEVEL_DEBUG
ggml_log_level_GGML_LOG_LEVEL_ERROR
ggml_log_level_GGML_LOG_LEVEL_INFO
ggml_log_level_GGML_LOG_LEVEL_WARN
ggml_numa_strategy_GGML_NUMA_STRATEGY_COUNT
ggml_numa_strategy_GGML_NUMA_STRATEGY_DISABLED
ggml_numa_strategy_GGML_NUMA_STRATEGY_DISTRIBUTE
ggml_numa_strategy_GGML_NUMA_STRATEGY_ISOLATE
ggml_numa_strategy_GGML_NUMA_STRATEGY_MIRROR
ggml_numa_strategy_GGML_NUMA_STRATEGY_NUMACTL
ggml_object_type_GGML_OBJECT_TYPE_GRAPH
ggml_object_type_GGML_OBJECT_TYPE_TENSOR
ggml_object_type_GGML_OBJECT_TYPE_WORK_BUFFER
ggml_op_GGML_OP_ACC
ggml_op_GGML_OP_ADD
ggml_op_GGML_OP_ADD1
ggml_op_GGML_OP_ADD_REL_POS
ggml_op_GGML_OP_ALIBI
ggml_op_GGML_OP_ARANGE
ggml_op_GGML_OP_ARGMAX
ggml_op_GGML_OP_ARGSORT
ggml_op_GGML_OP_CLAMP
ggml_op_GGML_OP_CONCAT
ggml_op_GGML_OP_CONT
ggml_op_GGML_OP_CONV_TRANSPOSE_1D
ggml_op_GGML_OP_CONV_TRANSPOSE_2D
ggml_op_GGML_OP_COUNT
ggml_op_GGML_OP_CPY
ggml_op_GGML_OP_CROSS_ENTROPY_LOSS
ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK
ggml_op_GGML_OP_DIAG
ggml_op_GGML_OP_DIAG_MASK_INF
ggml_op_GGML_OP_DIAG_MASK_ZERO
ggml_op_GGML_OP_DIV
ggml_op_GGML_OP_DUP
ggml_op_GGML_OP_FLASH_ATTN
ggml_op_GGML_OP_FLASH_ATTN_BACK
ggml_op_GGML_OP_FLASH_FF
ggml_op_GGML_OP_GET_REL_POS
ggml_op_GGML_OP_GET_ROWS
ggml_op_GGML_OP_GET_ROWS_BACK
ggml_op_GGML_OP_GROUP_NORM
ggml_op_GGML_OP_IM2COL
ggml_op_GGML_OP_LEAKY_RELU
ggml_op_GGML_OP_LOG
ggml_op_GGML_OP_MAP_BINARY
ggml_op_GGML_OP_MAP_CUSTOM1
ggml_op_GGML_OP_MAP_CUSTOM2
ggml_op_GGML_OP_MAP_CUSTOM3
ggml_op_GGML_OP_MAP_CUSTOM1_F32
ggml_op_GGML_OP_MAP_CUSTOM2_F32
ggml_op_GGML_OP_MAP_CUSTOM3_F32
ggml_op_GGML_OP_MAP_UNARY
ggml_op_GGML_OP_MEAN
ggml_op_GGML_OP_MUL
ggml_op_GGML_OP_MUL_MAT
ggml_op_GGML_OP_MUL_MAT_ID
ggml_op_GGML_OP_NONE
ggml_op_GGML_OP_NORM
ggml_op_GGML_OP_OUT_PROD
ggml_op_GGML_OP_PAD
ggml_op_GGML_OP_PERMUTE
ggml_op_GGML_OP_POOL_1D
ggml_op_GGML_OP_POOL_2D
ggml_op_GGML_OP_REPEAT
ggml_op_GGML_OP_REPEAT_BACK
ggml_op_GGML_OP_RESHAPE
ggml_op_GGML_OP_RMS_NORM
ggml_op_GGML_OP_RMS_NORM_BACK
ggml_op_GGML_OP_ROPE
ggml_op_GGML_OP_ROPE_BACK
ggml_op_GGML_OP_SCALE
ggml_op_GGML_OP_SET
ggml_op_GGML_OP_SILU_BACK
ggml_op_GGML_OP_SOFT_MAX
ggml_op_GGML_OP_SOFT_MAX_BACK
ggml_op_GGML_OP_SQR
ggml_op_GGML_OP_SQRT
ggml_op_GGML_OP_SSM_CONV
ggml_op_GGML_OP_SSM_SCAN
ggml_op_GGML_OP_SUB
ggml_op_GGML_OP_SUM
ggml_op_GGML_OP_SUM_ROWS
ggml_op_GGML_OP_TIMESTEP_EMBEDDING
ggml_op_GGML_OP_TRANSPOSE
ggml_op_GGML_OP_UNARY
ggml_op_GGML_OP_UPSCALE
ggml_op_GGML_OP_VIEW
ggml_op_GGML_OP_WIN_PART
ggml_op_GGML_OP_WIN_UNPART
ggml_op_pool_GGML_OP_POOL_AVG
ggml_op_pool_GGML_OP_POOL_COUNT
ggml_op_pool_GGML_OP_POOL_MAX
ggml_opt_result_GGML_LINESEARCH_FAIL
ggml_opt_result_GGML_LINESEARCH_INVALID_PARAMETERS
ggml_opt_result_GGML_LINESEARCH_MAXIMUM_ITERATIONS
ggml_opt_result_GGML_LINESEARCH_MAXIMUM_STEP
ggml_opt_result_GGML_LINESEARCH_MINIMUM_STEP
ggml_opt_result_GGML_OPT_RESULT_CANCEL
ggml_opt_result_GGML_OPT_RESULT_DID_NOT_CONVERGE
ggml_opt_result_GGML_OPT_RESULT_FAIL
ggml_opt_result_GGML_OPT_RESULT_INVALID_WOLFE
ggml_opt_result_GGML_OPT_RESULT_NO_CONTEXT
ggml_opt_result_GGML_OPT_RESULT_OK
ggml_opt_type_GGML_OPT_TYPE_ADAM
ggml_opt_type_GGML_OPT_TYPE_LBFGS
ggml_prec_GGML_PREC_DEFAULT
ggml_prec_GGML_PREC_F32
ggml_sort_order_GGML_SORT_ORDER_ASC
ggml_sort_order_GGML_SORT_ORDER_DESC
ggml_status_GGML_STATUS_ABORTED
ggml_status_GGML_STATUS_ALLOC_FAILED
ggml_status_GGML_STATUS_FAILED
ggml_status_GGML_STATUS_SUCCESS
ggml_task_type_GGML_TASK_TYPE_COMPUTE
ggml_task_type_GGML_TASK_TYPE_FINALIZE
ggml_task_type_GGML_TASK_TYPE_INIT
ggml_tensor_flag_GGML_TENSOR_FLAG_INPUT
ggml_tensor_flag_GGML_TENSOR_FLAG_OUTPUT
ggml_tensor_flag_GGML_TENSOR_FLAG_PARAM
ggml_type_GGML_TYPE_COUNT
ggml_type_GGML_TYPE_F16
ggml_type_GGML_TYPE_F32
ggml_type_GGML_TYPE_F64
ggml_type_GGML_TYPE_I8
ggml_type_GGML_TYPE_I16
ggml_type_GGML_TYPE_I32
ggml_type_GGML_TYPE_I64
ggml_type_GGML_TYPE_IQ1_M
ggml_type_GGML_TYPE_IQ1_S
ggml_type_GGML_TYPE_IQ2_S
ggml_type_GGML_TYPE_IQ2_XS
ggml_type_GGML_TYPE_IQ2_XXS
ggml_type_GGML_TYPE_IQ3_S
ggml_type_GGML_TYPE_IQ3_XXS
ggml_type_GGML_TYPE_IQ4_NL
ggml_type_GGML_TYPE_IQ4_XS
ggml_type_GGML_TYPE_Q2_K
ggml_type_GGML_TYPE_Q3_K
ggml_type_GGML_TYPE_Q4_0
ggml_type_GGML_TYPE_Q4_1
ggml_type_GGML_TYPE_Q4_K
ggml_type_GGML_TYPE_Q5_0
ggml_type_GGML_TYPE_Q5_1
ggml_type_GGML_TYPE_Q5_K
ggml_type_GGML_TYPE_Q6_K
ggml_type_GGML_TYPE_Q8_0
ggml_type_GGML_TYPE_Q8_1
ggml_type_GGML_TYPE_Q8_K
ggml_unary_op_GGML_UNARY_OP_ABS
ggml_unary_op_GGML_UNARY_OP_COUNT
ggml_unary_op_GGML_UNARY_OP_ELU
ggml_unary_op_GGML_UNARY_OP_GELU
ggml_unary_op_GGML_UNARY_OP_GELU_QUICK
ggml_unary_op_GGML_UNARY_OP_HARDSIGMOID
ggml_unary_op_GGML_UNARY_OP_HARDSWISH
ggml_unary_op_GGML_UNARY_OP_NEG
ggml_unary_op_GGML_UNARY_OP_RELU
ggml_unary_op_GGML_UNARY_OP_SGN
ggml_unary_op_GGML_UNARY_OP_SILU
ggml_unary_op_GGML_UNARY_OP_STEP
ggml_unary_op_GGML_UNARY_OP_TANH
gguf_type_GGUF_TYPE_ARRAY
gguf_type_GGUF_TYPE_BOOL
gguf_type_GGUF_TYPE_COUNT
gguf_type_GGUF_TYPE_FLOAT32
gguf_type_GGUF_TYPE_FLOAT64
gguf_type_GGUF_TYPE_INT8
gguf_type_GGUF_TYPE_INT16
gguf_type_GGUF_TYPE_INT32
gguf_type_GGUF_TYPE_INT64
gguf_type_GGUF_TYPE_STRING
gguf_type_GGUF_TYPE_UINT8
gguf_type_GGUF_TYPE_UINT16
gguf_type_GGUF_TYPE_UINT32
gguf_type_GGUF_TYPE_UINT64
llama_ftype_LLAMA_FTYPE_ALL_F32
llama_ftype_LLAMA_FTYPE_GUESSED
llama_ftype_LLAMA_FTYPE_MOSTLY_F16
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ1_M
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ1_S
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_M
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_S
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_XS
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_XXS
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_M
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_S
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_XS
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_XXS
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ4_NL
llama_ftype_LLAMA_FTYPE_MOSTLY_IQ4_XS
llama_ftype_LLAMA_FTYPE_MOSTLY_Q2_K
llama_ftype_LLAMA_FTYPE_MOSTLY_Q2_K_S
llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_L
llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_M
llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_S
llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_0
llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1
llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16
llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_M
llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_S
llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_0
llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_1
llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_M
llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_S
llama_ftype_LLAMA_FTYPE_MOSTLY_Q6_K
llama_ftype_LLAMA_FTYPE_MOSTLY_Q8_0
llama_gretype_LLAMA_GRETYPE_ALT
llama_gretype_LLAMA_GRETYPE_CHAR
llama_gretype_LLAMA_GRETYPE_CHAR_ALT
llama_gretype_LLAMA_GRETYPE_CHAR_NOT
llama_gretype_LLAMA_GRETYPE_CHAR_RNG_UPPER
llama_gretype_LLAMA_GRETYPE_END
llama_gretype_LLAMA_GRETYPE_RULE_REF
llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_BOOL
llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_FLOAT
llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_INT
llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_STR
llama_pooling_type_LLAMA_POOLING_TYPE_CLS
llama_pooling_type_LLAMA_POOLING_TYPE_MEAN
llama_pooling_type_LLAMA_POOLING_TYPE_NONE
llama_pooling_type_LLAMA_POOLING_TYPE_UNSPECIFIED
llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_LINEAR
llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_MAX_VALUE
llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_NONE
llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_YARN
llama_rope_type_LLAMA_ROPE_TYPE_GLM
llama_rope_type_LLAMA_ROPE_TYPE_NEOX
llama_rope_type_LLAMA_ROPE_TYPE_NONE
llama_rope_type_LLAMA_ROPE_TYPE_NORM
llama_split_mode_LLAMA_SPLIT_MODE_LAYER
llama_split_mode_LLAMA_SPLIT_MODE_NONE
llama_split_mode_LLAMA_SPLIT_MODE_ROW
llama_token_type_LLAMA_TOKEN_TYPE_BYTE
llama_token_type_LLAMA_TOKEN_TYPE_CONTROL
llama_token_type_LLAMA_TOKEN_TYPE_NORMAL
llama_token_type_LLAMA_TOKEN_TYPE_UNDEFINED
llama_token_type_LLAMA_TOKEN_TYPE_UNKNOWN
llama_token_type_LLAMA_TOKEN_TYPE_UNUSED
llama_token_type_LLAMA_TOKEN_TYPE_USER_DEFINED
llama_vocab_type_LLAMA_VOCAB_TYPE_BPE
llama_vocab_type_LLAMA_VOCAB_TYPE_NONE
llama_vocab_type_LLAMA_VOCAB_TYPE_SPM
llama_vocab_type_LLAMA_VOCAB_TYPE_WPM

Functions§

ggml_abs^⚠
ggml_abs_inplace^⚠
ggml_acc^⚠
ggml_acc_inplace^⚠
ggml_add^⚠
ggml_add1^⚠
ggml_add1_inplace^⚠
ggml_add_cast^⚠
ggml_add_inplace^⚠
ggml_add_rel_pos^⚠
ggml_add_rel_pos_inplace^⚠
ggml_alibi^⚠
ggml_arange^⚠
ggml_are_same_shape^⚠
ggml_argmax^⚠
ggml_argsort^⚠
ggml_blck_size^⚠
ggml_build_backward_expand^⚠
ggml_build_backward_gradient_checkpointing^⚠
ggml_build_forward_expand^⚠
ggml_cast^⚠
ggml_clamp^⚠
ggml_concat^⚠
ggml_cont^⚠
ggml_cont_1d^⚠
ggml_cont_2d^⚠
ggml_cont_3d^⚠
ggml_cont_4d^⚠
ggml_conv_1d^⚠
ggml_conv_1d_ph^⚠
ggml_conv_2d^⚠
ggml_conv_2d_s1_ph^⚠
ggml_conv_2d_sk_p0^⚠
ggml_conv_depthwise_2d^⚠
ggml_conv_transpose_1d^⚠
ggml_conv_transpose_2d_p0^⚠
ggml_cpu_has_arm_fma^⚠
ggml_cpu_has_avx^⚠
ggml_cpu_has_avx2^⚠
ggml_cpu_has_avx512^⚠
ggml_cpu_has_avx512_vbmi^⚠
ggml_cpu_has_avx512_vnni^⚠
ggml_cpu_has_avx_vnni^⚠
ggml_cpu_has_blas^⚠
ggml_cpu_has_clblast^⚠
ggml_cpu_has_cuda^⚠
ggml_cpu_has_f16c^⚠
ggml_cpu_has_fma^⚠
ggml_cpu_has_fp16_va^⚠
ggml_cpu_has_gpublas^⚠
ggml_cpu_has_kompute^⚠
ggml_cpu_has_matmul_int8^⚠
ggml_cpu_has_metal^⚠
ggml_cpu_has_neon^⚠
ggml_cpu_has_sse3^⚠
ggml_cpu_has_ssse3^⚠
ggml_cpu_has_sycl^⚠
ggml_cpu_has_vsx^⚠
ggml_cpu_has_vulkan^⚠
ggml_cpu_has_wasm_simd^⚠
ggml_cpy^⚠
ggml_cross_entropy_loss^⚠
ggml_cross_entropy_loss_back^⚠
ggml_cycles^⚠
ggml_cycles_per_ms^⚠
ggml_diag^⚠
ggml_diag_mask_inf^⚠
ggml_diag_mask_inf_inplace^⚠
ggml_diag_mask_zero^⚠
ggml_diag_mask_zero_inplace^⚠
ggml_div^⚠
ggml_div_inplace^⚠
ggml_dup^⚠
ggml_dup_inplace^⚠
ggml_dup_tensor^⚠
ggml_element_size^⚠
ggml_elu^⚠
ggml_elu_inplace^⚠
ggml_flash_attn^⚠
ggml_flash_attn_back^⚠
ggml_flash_ff^⚠
ggml_fopen^⚠
ggml_format_name^⚠
ggml_fp16_to_fp32^⚠
ggml_fp16_to_fp32_row^⚠
ggml_fp32_to_fp16^⚠
ggml_fp32_to_fp16_row^⚠
ggml_free^⚠
ggml_ftype_to_ggml_type^⚠
ggml_gelu^⚠
ggml_gelu_inplace^⚠
ggml_gelu_quick^⚠
ggml_gelu_quick_inplace^⚠
ggml_get_data^⚠
ggml_get_data_f32^⚠
ggml_get_f32_1d^⚠
ggml_get_f32_nd^⚠
ggml_get_first_tensor^⚠
ggml_get_i32_1d^⚠
ggml_get_i32_nd^⚠
ggml_get_max_tensor_size^⚠
ggml_get_mem_buffer^⚠
ggml_get_mem_size^⚠
ggml_get_name^⚠
ggml_get_next_tensor^⚠
ggml_get_no_alloc^⚠
ggml_get_rel_pos^⚠
ggml_get_rows^⚠
ggml_get_rows_back^⚠
ggml_get_tensor^⚠
ggml_get_unary_op^⚠
ggml_graph_clear^⚠
ggml_graph_compute^⚠
ggml_graph_compute_with_ctx^⚠
ggml_graph_cpy^⚠
ggml_graph_dump_dot^⚠
ggml_graph_dup^⚠
ggml_graph_export^⚠
ggml_graph_get_tensor^⚠
ggml_graph_import^⚠
ggml_graph_overhead^⚠
ggml_graph_overhead_custom^⚠
ggml_graph_plan^⚠
ggml_graph_print^⚠
ggml_graph_reset^⚠
ggml_graph_view^⚠
ggml_group_norm^⚠
ggml_group_norm_inplace^⚠
ggml_guid_matches^⚠
ggml_hardsigmoid^⚠
ggml_hardswish^⚠
ggml_im2col^⚠
ggml_init^⚠
ggml_internal_get_type_traits^⚠
ggml_is_3d^⚠
ggml_is_contiguous^⚠
ggml_is_empty^⚠
ggml_is_matrix^⚠
ggml_is_numa^⚠
ggml_is_permuted^⚠
ggml_is_quantized^⚠
ggml_is_scalar^⚠
ggml_is_transposed^⚠
ggml_is_vector^⚠
ggml_leaky_relu^⚠
ggml_log^⚠
ggml_log_inplace^⚠
ggml_map_binary_f32^⚠
ggml_map_binary_inplace_f32^⚠
ggml_map_custom1^⚠
ggml_map_custom2^⚠
ggml_map_custom3^⚠
ggml_map_custom1_f32^⚠
ggml_map_custom1_inplace^⚠
ggml_map_custom1_inplace_f32^⚠
ggml_map_custom2_f32^⚠
ggml_map_custom2_inplace^⚠
ggml_map_custom2_inplace_f32^⚠
ggml_map_custom3_f32^⚠
ggml_map_custom3_inplace^⚠
ggml_map_custom3_inplace_f32^⚠
ggml_map_unary_f32^⚠
ggml_map_unary_inplace_f32^⚠
ggml_mean^⚠
ggml_mul^⚠
ggml_mul_inplace^⚠
ggml_mul_mat^⚠
ggml_mul_mat_id^⚠
ggml_mul_mat_set_prec^⚠
ggml_n_dims^⚠
ggml_nbytes^⚠
ggml_nbytes_pad^⚠
ggml_neg^⚠
ggml_neg_inplace^⚠
ggml_nelements^⚠
ggml_new_f32^⚠
ggml_new_graph^⚠
ggml_new_graph_custom^⚠
ggml_new_i32^⚠
ggml_new_tensor^⚠
ggml_new_tensor_1d^⚠
ggml_new_tensor_2d^⚠
ggml_new_tensor_3d^⚠
ggml_new_tensor_4d^⚠
ggml_norm^⚠
ggml_norm_inplace^⚠
ggml_nrows^⚠
ggml_numa_init^⚠
ggml_op_desc^⚠
ggml_op_name^⚠
ggml_op_symbol^⚠
ggml_opt^⚠
ggml_opt_default_params^⚠
ggml_opt_init^⚠
ggml_opt_resume^⚠
ggml_opt_resume_g^⚠
ggml_out_prod^⚠
ggml_pad^⚠
ggml_permute^⚠
ggml_pool_1d^⚠
ggml_pool_2d^⚠
ggml_print_backtrace^⚠
ggml_print_object^⚠
ggml_print_objects^⚠
ggml_quantize_chunk^⚠
ggml_quantize_free^⚠
ggml_quantize_init^⚠
ggml_quantize_requires_imatrix^⚠
ggml_relu^⚠
ggml_relu_inplace^⚠
ggml_repeat^⚠
ggml_repeat_back^⚠
ggml_reshape^⚠
ggml_reshape_1d^⚠
ggml_reshape_2d^⚠
ggml_reshape_3d^⚠
ggml_reshape_4d^⚠
ggml_rms_norm^⚠
ggml_rms_norm_back^⚠
ggml_rms_norm_inplace^⚠
ggml_rope^⚠
ggml_rope_back^⚠
ggml_rope_custom^⚠
ggml_rope_custom_inplace^⚠
ggml_rope_inplace^⚠
ggml_rope_xpos_inplace^⚠
ggml_rope_yarn_corr_dims^⚠
ggml_row_size^⚠
ggml_scale^⚠
ggml_scale_inplace^⚠
ggml_set^⚠
ggml_set_1d^⚠
ggml_set_1d_inplace^⚠
ggml_set_2d^⚠
ggml_set_2d_inplace^⚠
ggml_set_f32^⚠
ggml_set_f32_1d^⚠
ggml_set_f32_nd^⚠
ggml_set_i32^⚠
ggml_set_i32_1d^⚠
ggml_set_i32_nd^⚠
ggml_set_inplace^⚠
ggml_set_input^⚠
ggml_set_name^⚠
ggml_set_no_alloc^⚠
ggml_set_output^⚠
ggml_set_param^⚠
ggml_set_scratch^⚠
ggml_set_zero^⚠
ggml_sgn^⚠
ggml_sgn_inplace^⚠
ggml_silu^⚠
ggml_silu_back^⚠
ggml_silu_inplace^⚠
ggml_soft_max^⚠
ggml_soft_max_back^⚠
ggml_soft_max_back_inplace^⚠
ggml_soft_max_ext^⚠
ggml_soft_max_inplace^⚠
ggml_sqr^⚠
ggml_sqr_inplace^⚠
ggml_sqrt^⚠
ggml_sqrt_inplace^⚠
ggml_ssm_conv^⚠
ggml_ssm_scan^⚠
ggml_status_to_string^⚠
ggml_step^⚠
ggml_step_inplace^⚠
ggml_sub^⚠
ggml_sub_inplace^⚠
ggml_sum^⚠
ggml_sum_rows^⚠
ggml_tanh^⚠
ggml_tanh_inplace^⚠
ggml_tensor_overhead^⚠
ggml_time_init^⚠
ggml_time_ms^⚠
ggml_time_us^⚠
ggml_timestep_embedding^⚠
ggml_top_k^⚠
ggml_transpose^⚠
ggml_type_name^⚠
ggml_type_size^⚠
ggml_type_sizef^⚠
ggml_unary^⚠
ggml_unary_inplace^⚠
ggml_unary_op_name^⚠
ggml_unravel_index^⚠
ggml_upscale^⚠
ggml_used_mem^⚠
ggml_validate_row_data^⚠
ggml_view_1d^⚠
ggml_view_2d^⚠
ggml_view_3d^⚠
ggml_view_4d^⚠
ggml_view_tensor^⚠
ggml_win_part^⚠
ggml_win_unpart^⚠
gguf_add_tensor^⚠
gguf_find_key^⚠
gguf_find_tensor^⚠
gguf_free^⚠
gguf_get_alignment^⚠
gguf_get_arr_data^⚠
gguf_get_arr_n^⚠
gguf_get_arr_str^⚠
gguf_get_arr_type^⚠
gguf_get_data^⚠
gguf_get_data_offset^⚠
gguf_get_key^⚠
gguf_get_kv_type^⚠
gguf_get_meta_data^⚠
gguf_get_meta_size^⚠
gguf_get_n_kv^⚠
gguf_get_n_tensors^⚠
gguf_get_tensor_name^⚠
gguf_get_tensor_offset^⚠
gguf_get_tensor_type^⚠
gguf_get_val_bool^⚠
gguf_get_val_data^⚠
gguf_get_val_f32^⚠
gguf_get_val_f64^⚠
gguf_get_val_i8^⚠
gguf_get_val_i16^⚠
gguf_get_val_i32^⚠
gguf_get_val_i64^⚠
gguf_get_val_str^⚠
gguf_get_val_u8^⚠
gguf_get_val_u16^⚠
gguf_get_val_u32^⚠
gguf_get_val_u64^⚠
gguf_get_version^⚠
gguf_init_empty^⚠
gguf_init_from_file^⚠
gguf_remove_key^⚠
gguf_set_arr_data^⚠
gguf_set_arr_str^⚠
gguf_set_kv^⚠
gguf_set_tensor_data^⚠
gguf_set_tensor_type^⚠
gguf_set_val_bool^⚠
gguf_set_val_f32^⚠
gguf_set_val_f64^⚠
gguf_set_val_i8^⚠
gguf_set_val_i16^⚠
gguf_set_val_i32^⚠
gguf_set_val_i64^⚠
gguf_set_val_str^⚠
gguf_set_val_u8^⚠
gguf_set_val_u16^⚠
gguf_set_val_u32^⚠
gguf_set_val_u64^⚠
gguf_type_name^⚠
gguf_write_to_file^⚠
llama_add_bos_token^⚠
llama_add_eos_token^⚠
llama_backend_free^⚠
llama_backend_init^⚠
llama_batch_free^⚠
llama_batch_get_one^⚠
llama_batch_init^⚠
llama_beam_search^⚠: @details Deterministically returns entire sentence constructed by a beam search. @param ctx Pointer to the llama_context. @param callback Invoked for each iteration of the beam_search loop, passing in beams_state. @param callback_data A pointer that is simply passed back to callback. @param n_beams Number of beams to use. @param n_past Number of tokens already evaluated. @param n_predict Maximum number of tokens to predict. EOS may occur earlier.
llama_chat_apply_template^⚠: Apply chat template. Inspired by hf apply_chat_template() on python. Both “model” and “custom_template” are optional, but at least one is required. “custom_template” has higher precedence than “model” NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. @param chat Pointer to a list of multiple llama_chat_message @param n_msg Number of llama_chat_message in this chat @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) @param length The size of the allocated buffer @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
llama_context_default_params^⚠
llama_control_vector_apply^⚠
llama_copy_state_data^⚠
llama_decode^⚠
llama_dump_timing_info_yaml^⚠
llama_free^⚠
llama_free_model^⚠
llama_get_embeddings^⚠
llama_get_embeddings_ith^⚠
llama_get_embeddings_seq^⚠
llama_get_kv_cache_token_count^⚠
llama_get_kv_cache_used_cells^⚠
llama_get_logits^⚠
llama_get_logits_ith^⚠
llama_get_model^⚠
llama_get_model_tensor^⚠
llama_get_state_size^⚠
llama_get_timings^⚠
llama_grammar_accept_token^⚠: @details Accepts the sampled token into the grammar
llama_grammar_copy^⚠
llama_grammar_free^⚠
llama_grammar_init^⚠
llama_kv_cache_clear^⚠
llama_kv_cache_defrag^⚠
llama_kv_cache_seq_add^⚠
llama_kv_cache_seq_cp^⚠
llama_kv_cache_seq_div^⚠
llama_kv_cache_seq_keep^⚠
llama_kv_cache_seq_pos_max^⚠
llama_kv_cache_seq_rm^⚠
llama_kv_cache_update^⚠
llama_kv_cache_view_free^⚠
llama_kv_cache_view_init^⚠
llama_kv_cache_view_update^⚠
llama_load_model_from_file^⚠
llama_load_session_file^⚠
llama_log_set^⚠
llama_max_devices^⚠
llama_model_apply_lora_from_file^⚠
llama_model_default_params^⚠
llama_model_desc^⚠
llama_model_meta_count^⚠
llama_model_meta_key_by_index^⚠
llama_model_meta_val_str^⚠
llama_model_meta_val_str_by_index^⚠
llama_model_n_params^⚠
llama_model_quantize^⚠
llama_model_quantize_default_params^⚠
llama_model_size^⚠
llama_n_batch^⚠
llama_n_ctx^⚠
llama_n_ctx_train^⚠
llama_n_embd^⚠
llama_n_layer^⚠
llama_n_seq_max^⚠
llama_n_ubatch^⚠
llama_n_vocab^⚠
llama_new_context_with_model^⚠
llama_numa_init^⚠
llama_pooling_type^⚠
llama_print_system_info^⚠
llama_print_timings^⚠
llama_reset_timings^⚠
llama_rope_freq_scale_train^⚠
llama_rope_type^⚠
llama_sample_apply_guidance^⚠: @details Apply classifier-free guidance to the logits as described in academic paper “Stay on topic with Classifier-Free Guidance” https://arxiv.org/abs/2306.17806 @param logits Logits extracted from the original generation context. @param logits_guidance Logits extracted from a separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context. @param scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
llama_sample_entropy^⚠: @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772.
llama_sample_grammar^⚠: @details Apply constraints from grammar
llama_sample_min_p^⚠: @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
llama_sample_repetition_penalties^⚠: @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
llama_sample_softmax^⚠: @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
llama_sample_tail_free^⚠: @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
llama_sample_temp^⚠
llama_sample_token^⚠: @details Randomly selects a token from the candidates based on their probabilities using the RNG of ctx.
llama_sample_token_greedy^⚠: @details Selects the token with the highest probability. Does not compute the token probabilities. Use llama_sample_softmax() instead.
llama_sample_token_mirostat^⚠: @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sample_token_mirostat_v2^⚠: @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sample_top_k^⚠: @details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sample_top_p^⚠: @details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
llama_sample_typical^⚠: @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_save_session_file^⚠
llama_set_abort_callback^⚠
llama_set_causal_attn^⚠
llama_set_n_threads^⚠
llama_set_rng_seed^⚠
llama_set_state_data^⚠
llama_split_path^⚠: @details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf”
llama_split_prefix^⚠: @details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0”
llama_state_get_data^⚠
llama_state_get_size^⚠
llama_state_load_file^⚠
llama_state_save_file^⚠
llama_state_seq_get_data^⚠
llama_state_seq_get_size^⚠
llama_state_seq_load_file^⚠
llama_state_seq_save_file^⚠
llama_state_seq_set_data^⚠
llama_state_set_data^⚠
llama_supports_gpu_offload^⚠
llama_supports_mlock^⚠
llama_supports_mmap^⚠
llama_synchronize^⚠
llama_time_us^⚠
llama_token_bos^⚠
llama_token_cls^⚠
llama_token_eos^⚠
llama_token_eot^⚠
llama_token_get_score^⚠
llama_token_get_text^⚠
llama_token_get_type^⚠
llama_token_is_eog^⚠
llama_token_middle^⚠
llama_token_nl^⚠
llama_token_prefix^⚠
llama_token_sep^⚠
llama_token_suffix^⚠
llama_token_to_piece^⚠
llama_tokenize^⚠: @details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_type^⚠

Type Aliases§

FILE
_IO_lock_t
__off64_t
__off_t
ggml_abort_callback
ggml_backend_sched_eval_callback
ggml_backend_type
ggml_binary_op_f32_t
ggml_cgraph_eval_order
ggml_custom1_op_f32_t
ggml_custom1_op_t
ggml_custom2_op_f32_t
ggml_custom2_op_t
ggml_custom3_op_f32_t
ggml_custom3_op_t
ggml_fp16_t
ggml_from_float_t
ggml_ftype
ggml_guid
ggml_guid_t
ggml_linesearch
ggml_log_callback
ggml_log_level
ggml_numa_strategy
ggml_object_type
ggml_op
ggml_op_pool
ggml_opt_callback
ggml_opt_result
ggml_opt_type
ggml_prec
ggml_sort_order
ggml_status
ggml_task_type
ggml_tensor_flag
ggml_to_float_t
ggml_type
ggml_unary_op
ggml_unary_op_f32_t
ggml_vec_dot_t
gguf_type
llama_beam_search_callback_fn_t
llama_ftype
llama_gretype
llama_model_kv_override_type
llama_pooling_type
llama_pos
llama_progress_callback
llama_rope_scaling_type
llama_rope_type
llama_seq_id
llama_split_mode
llama_token
llama_token_type
llama_vocab_type

Unions§

llama_model_kv_override__bindgen_ty_1

Crate ggml_sys_bleedingedge

Crate ggml_sys_bleedingedge Copy item path

Structs§

Constants§

Functions§

Type Aliases§

Unions§

Crate ggml_sys_bleedingedge