Crate ggml_sys_bleedingedge Copy item path Source _IO_FILE _IO_codecvt _IO_marker _IO_wide_data ggml_backend_buffer ggml_cgraph ggml_compute_params ggml_context ggml_cplan ggml_hash_set ggml_init_params ggml_object ggml_opt_context ggml_opt_context__bindgen_ty_1 ggml_opt_context__bindgen_ty_2 ggml_opt_params ggml_opt_params__bindgen_ty_1 ggml_opt_params__bindgen_ty_2 ggml_scratch ggml_tensor ggml_type_traits_t gguf_context gguf_init_params llama_batch llama_beam_view llama_beams_state llama_chat_message llama_context llama_context_params llama_grammar llama_grammar_element llama_kv_cache_view llama_kv_cache_view_cell llama_model llama_model_kv_override llama_model_params llama_model_quantize_params llama_timings llama_token_data llama_token_data_array GGMLSYS_VERSION GGML_DEFAULT_GRAPH_SIZE GGML_DEFAULT_N_THREADS GGML_EXIT_ABORTED GGML_EXIT_SUCCESS GGML_FILE_MAGIC GGML_FILE_VERSION GGML_MAX_CONTEXTS GGML_MAX_DIMS GGML_MAX_NAME GGML_MAX_OP_PARAMS GGML_MAX_PARAMS GGML_MAX_SRC GGML_MEM_ALIGN GGML_N_TASKS_MAX GGML_OBJECT_SIZE GGML_QNT_VERSION GGML_QNT_VERSION_FACTOR GGML_TENSOR_SIZE GGUF_DEFAULT_ALIGNMENT GGUF_MAGIC GGUF_VERSION LLAMA_DEFAULT_SEED LLAMA_FILE_MAGIC_GGLA LLAMA_FILE_MAGIC_GGSN LLAMA_FILE_MAGIC_GGSQ LLAMA_MAX_RNG_STATE LLAMA_SESSION_MAGIC LLAMA_SESSION_VERSION LLAMA_STATE_SEQ_MAGIC LLAMA_STATE_SEQ_VERSION ggml_backend_type_GGML_BACKEND_TYPE_CPU ggml_backend_type_GGML_BACKEND_TYPE_GPU ggml_backend_type_GGML_BACKEND_TYPE_GPU_SPLIT ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_COUNT ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT ggml_cgraph_eval_order_GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT ggml_ftype_GGML_FTYPE_ALL_F32 ggml_ftype_GGML_FTYPE_MOSTLY_F16 ggml_ftype_GGML_FTYPE_MOSTLY_IQ1_M ggml_ftype_GGML_FTYPE_MOSTLY_IQ1_S ggml_ftype_GGML_FTYPE_MOSTLY_IQ2_S ggml_ftype_GGML_FTYPE_MOSTLY_IQ2_XS ggml_ftype_GGML_FTYPE_MOSTLY_IQ2_XXS ggml_ftype_GGML_FTYPE_MOSTLY_IQ3_S ggml_ftype_GGML_FTYPE_MOSTLY_IQ3_XXS ggml_ftype_GGML_FTYPE_MOSTLY_IQ4_NL ggml_ftype_GGML_FTYPE_MOSTLY_IQ4_XS ggml_ftype_GGML_FTYPE_MOSTLY_Q2_K ggml_ftype_GGML_FTYPE_MOSTLY_Q3_K ggml_ftype_GGML_FTYPE_MOSTLY_Q4_0 ggml_ftype_GGML_FTYPE_MOSTLY_Q4_1 ggml_ftype_GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 ggml_ftype_GGML_FTYPE_MOSTLY_Q4_K ggml_ftype_GGML_FTYPE_MOSTLY_Q5_0 ggml_ftype_GGML_FTYPE_MOSTLY_Q5_1 ggml_ftype_GGML_FTYPE_MOSTLY_Q5_K ggml_ftype_GGML_FTYPE_MOSTLY_Q6_K ggml_ftype_GGML_FTYPE_MOSTLY_Q8_0 ggml_ftype_GGML_FTYPE_UNKNOWN ggml_linesearch_GGML_LINESEARCH_BACKTRACKING_ARMIJO ggml_linesearch_GGML_LINESEARCH_BACKTRACKING_STRONG_WOLFE ggml_linesearch_GGML_LINESEARCH_BACKTRACKING_WOLFE ggml_linesearch_GGML_LINESEARCH_DEFAULT ggml_log_level_GGML_LOG_LEVEL_DEBUG ggml_log_level_GGML_LOG_LEVEL_ERROR ggml_log_level_GGML_LOG_LEVEL_INFO ggml_log_level_GGML_LOG_LEVEL_WARN ggml_numa_strategy_GGML_NUMA_STRATEGY_COUNT ggml_numa_strategy_GGML_NUMA_STRATEGY_DISABLED ggml_numa_strategy_GGML_NUMA_STRATEGY_DISTRIBUTE ggml_numa_strategy_GGML_NUMA_STRATEGY_ISOLATE ggml_numa_strategy_GGML_NUMA_STRATEGY_MIRROR ggml_numa_strategy_GGML_NUMA_STRATEGY_NUMACTL ggml_object_type_GGML_OBJECT_TYPE_GRAPH ggml_object_type_GGML_OBJECT_TYPE_TENSOR ggml_object_type_GGML_OBJECT_TYPE_WORK_BUFFER ggml_op_GGML_OP_ACC ggml_op_GGML_OP_ADD ggml_op_GGML_OP_ADD1 ggml_op_GGML_OP_ADD_REL_POS ggml_op_GGML_OP_ALIBI ggml_op_GGML_OP_ARANGE ggml_op_GGML_OP_ARGMAX ggml_op_GGML_OP_ARGSORT ggml_op_GGML_OP_CLAMP ggml_op_GGML_OP_CONCAT ggml_op_GGML_OP_CONT ggml_op_GGML_OP_CONV_TRANSPOSE_1D ggml_op_GGML_OP_CONV_TRANSPOSE_2D ggml_op_GGML_OP_COUNT ggml_op_GGML_OP_CPY ggml_op_GGML_OP_CROSS_ENTROPY_LOSS ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK ggml_op_GGML_OP_DIAG ggml_op_GGML_OP_DIAG_MASK_INF ggml_op_GGML_OP_DIAG_MASK_ZERO ggml_op_GGML_OP_DIV ggml_op_GGML_OP_DUP ggml_op_GGML_OP_FLASH_ATTN ggml_op_GGML_OP_FLASH_ATTN_BACK ggml_op_GGML_OP_FLASH_FF ggml_op_GGML_OP_GET_REL_POS ggml_op_GGML_OP_GET_ROWS ggml_op_GGML_OP_GET_ROWS_BACK ggml_op_GGML_OP_GROUP_NORM ggml_op_GGML_OP_IM2COL ggml_op_GGML_OP_LEAKY_RELU ggml_op_GGML_OP_LOG ggml_op_GGML_OP_MAP_BINARY ggml_op_GGML_OP_MAP_CUSTOM1 ggml_op_GGML_OP_MAP_CUSTOM2 ggml_op_GGML_OP_MAP_CUSTOM3 ggml_op_GGML_OP_MAP_CUSTOM1_F32 ggml_op_GGML_OP_MAP_CUSTOM2_F32 ggml_op_GGML_OP_MAP_CUSTOM3_F32 ggml_op_GGML_OP_MAP_UNARY ggml_op_GGML_OP_MEAN ggml_op_GGML_OP_MUL ggml_op_GGML_OP_MUL_MAT ggml_op_GGML_OP_MUL_MAT_ID ggml_op_GGML_OP_NONE ggml_op_GGML_OP_NORM ggml_op_GGML_OP_OUT_PROD ggml_op_GGML_OP_PAD ggml_op_GGML_OP_PERMUTE ggml_op_GGML_OP_POOL_1D ggml_op_GGML_OP_POOL_2D ggml_op_GGML_OP_REPEAT ggml_op_GGML_OP_REPEAT_BACK ggml_op_GGML_OP_RESHAPE ggml_op_GGML_OP_RMS_NORM ggml_op_GGML_OP_RMS_NORM_BACK ggml_op_GGML_OP_ROPE ggml_op_GGML_OP_ROPE_BACK ggml_op_GGML_OP_SCALE ggml_op_GGML_OP_SET ggml_op_GGML_OP_SILU_BACK ggml_op_GGML_OP_SOFT_MAX ggml_op_GGML_OP_SOFT_MAX_BACK ggml_op_GGML_OP_SQR ggml_op_GGML_OP_SQRT ggml_op_GGML_OP_SSM_CONV ggml_op_GGML_OP_SSM_SCAN ggml_op_GGML_OP_SUB ggml_op_GGML_OP_SUM ggml_op_GGML_OP_SUM_ROWS ggml_op_GGML_OP_TIMESTEP_EMBEDDING ggml_op_GGML_OP_TRANSPOSE ggml_op_GGML_OP_UNARY ggml_op_GGML_OP_UPSCALE ggml_op_GGML_OP_VIEW ggml_op_GGML_OP_WIN_PART ggml_op_GGML_OP_WIN_UNPART ggml_op_pool_GGML_OP_POOL_AVG ggml_op_pool_GGML_OP_POOL_COUNT ggml_op_pool_GGML_OP_POOL_MAX ggml_opt_result_GGML_LINESEARCH_FAIL ggml_opt_result_GGML_LINESEARCH_INVALID_PARAMETERS ggml_opt_result_GGML_LINESEARCH_MAXIMUM_ITERATIONS ggml_opt_result_GGML_LINESEARCH_MAXIMUM_STEP ggml_opt_result_GGML_LINESEARCH_MINIMUM_STEP ggml_opt_result_GGML_OPT_RESULT_CANCEL ggml_opt_result_GGML_OPT_RESULT_DID_NOT_CONVERGE ggml_opt_result_GGML_OPT_RESULT_FAIL ggml_opt_result_GGML_OPT_RESULT_INVALID_WOLFE ggml_opt_result_GGML_OPT_RESULT_NO_CONTEXT ggml_opt_result_GGML_OPT_RESULT_OK ggml_opt_type_GGML_OPT_TYPE_ADAM ggml_opt_type_GGML_OPT_TYPE_LBFGS ggml_prec_GGML_PREC_DEFAULT ggml_prec_GGML_PREC_F32 ggml_sort_order_GGML_SORT_ORDER_ASC ggml_sort_order_GGML_SORT_ORDER_DESC ggml_status_GGML_STATUS_ABORTED ggml_status_GGML_STATUS_ALLOC_FAILED ggml_status_GGML_STATUS_FAILED ggml_status_GGML_STATUS_SUCCESS ggml_task_type_GGML_TASK_TYPE_COMPUTE ggml_task_type_GGML_TASK_TYPE_FINALIZE ggml_task_type_GGML_TASK_TYPE_INIT ggml_tensor_flag_GGML_TENSOR_FLAG_INPUT ggml_tensor_flag_GGML_TENSOR_FLAG_OUTPUT ggml_tensor_flag_GGML_TENSOR_FLAG_PARAM ggml_type_GGML_TYPE_COUNT ggml_type_GGML_TYPE_F16 ggml_type_GGML_TYPE_F32 ggml_type_GGML_TYPE_F64 ggml_type_GGML_TYPE_I8 ggml_type_GGML_TYPE_I16 ggml_type_GGML_TYPE_I32 ggml_type_GGML_TYPE_I64 ggml_type_GGML_TYPE_IQ1_M ggml_type_GGML_TYPE_IQ1_S ggml_type_GGML_TYPE_IQ2_S ggml_type_GGML_TYPE_IQ2_XS ggml_type_GGML_TYPE_IQ2_XXS ggml_type_GGML_TYPE_IQ3_S ggml_type_GGML_TYPE_IQ3_XXS ggml_type_GGML_TYPE_IQ4_NL ggml_type_GGML_TYPE_IQ4_XS ggml_type_GGML_TYPE_Q2_K ggml_type_GGML_TYPE_Q3_K ggml_type_GGML_TYPE_Q4_0 ggml_type_GGML_TYPE_Q4_1 ggml_type_GGML_TYPE_Q4_K ggml_type_GGML_TYPE_Q5_0 ggml_type_GGML_TYPE_Q5_1 ggml_type_GGML_TYPE_Q5_K ggml_type_GGML_TYPE_Q6_K ggml_type_GGML_TYPE_Q8_0 ggml_type_GGML_TYPE_Q8_1 ggml_type_GGML_TYPE_Q8_K ggml_unary_op_GGML_UNARY_OP_ABS ggml_unary_op_GGML_UNARY_OP_COUNT ggml_unary_op_GGML_UNARY_OP_ELU ggml_unary_op_GGML_UNARY_OP_GELU ggml_unary_op_GGML_UNARY_OP_GELU_QUICK ggml_unary_op_GGML_UNARY_OP_HARDSIGMOID ggml_unary_op_GGML_UNARY_OP_HARDSWISH ggml_unary_op_GGML_UNARY_OP_NEG ggml_unary_op_GGML_UNARY_OP_RELU ggml_unary_op_GGML_UNARY_OP_SGN ggml_unary_op_GGML_UNARY_OP_SILU ggml_unary_op_GGML_UNARY_OP_STEP ggml_unary_op_GGML_UNARY_OP_TANH gguf_type_GGUF_TYPE_ARRAY gguf_type_GGUF_TYPE_BOOL gguf_type_GGUF_TYPE_COUNT gguf_type_GGUF_TYPE_FLOAT32 gguf_type_GGUF_TYPE_FLOAT64 gguf_type_GGUF_TYPE_INT8 gguf_type_GGUF_TYPE_INT16 gguf_type_GGUF_TYPE_INT32 gguf_type_GGUF_TYPE_INT64 gguf_type_GGUF_TYPE_STRING gguf_type_GGUF_TYPE_UINT8 gguf_type_GGUF_TYPE_UINT16 gguf_type_GGUF_TYPE_UINT32 gguf_type_GGUF_TYPE_UINT64 llama_ftype_LLAMA_FTYPE_ALL_F32 llama_ftype_LLAMA_FTYPE_GUESSED llama_ftype_LLAMA_FTYPE_MOSTLY_F16 llama_ftype_LLAMA_FTYPE_MOSTLY_IQ1_M llama_ftype_LLAMA_FTYPE_MOSTLY_IQ1_S llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_M llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_S llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_XS llama_ftype_LLAMA_FTYPE_MOSTLY_IQ2_XXS llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_M llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_S llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_XS llama_ftype_LLAMA_FTYPE_MOSTLY_IQ3_XXS llama_ftype_LLAMA_FTYPE_MOSTLY_IQ4_NL llama_ftype_LLAMA_FTYPE_MOSTLY_IQ4_XS llama_ftype_LLAMA_FTYPE_MOSTLY_Q2_K llama_ftype_LLAMA_FTYPE_MOSTLY_Q2_K_S llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_L llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_M llama_ftype_LLAMA_FTYPE_MOSTLY_Q3_K_S llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_0 llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1 llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_M llama_ftype_LLAMA_FTYPE_MOSTLY_Q4_K_S llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_0 llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_1 llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_M llama_ftype_LLAMA_FTYPE_MOSTLY_Q5_K_S llama_ftype_LLAMA_FTYPE_MOSTLY_Q6_K llama_ftype_LLAMA_FTYPE_MOSTLY_Q8_0 llama_gretype_LLAMA_GRETYPE_ALT llama_gretype_LLAMA_GRETYPE_CHAR llama_gretype_LLAMA_GRETYPE_CHAR_ALT llama_gretype_LLAMA_GRETYPE_CHAR_NOT llama_gretype_LLAMA_GRETYPE_CHAR_RNG_UPPER llama_gretype_LLAMA_GRETYPE_END llama_gretype_LLAMA_GRETYPE_RULE_REF llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_BOOL llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_FLOAT llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_INT llama_model_kv_override_type_LLAMA_KV_OVERRIDE_TYPE_STR llama_pooling_type_LLAMA_POOLING_TYPE_CLS llama_pooling_type_LLAMA_POOLING_TYPE_MEAN llama_pooling_type_LLAMA_POOLING_TYPE_NONE llama_pooling_type_LLAMA_POOLING_TYPE_UNSPECIFIED llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_LINEAR llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_MAX_VALUE llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_NONE llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED llama_rope_scaling_type_LLAMA_ROPE_SCALING_TYPE_YARN llama_rope_type_LLAMA_ROPE_TYPE_GLM llama_rope_type_LLAMA_ROPE_TYPE_NEOX llama_rope_type_LLAMA_ROPE_TYPE_NONE llama_rope_type_LLAMA_ROPE_TYPE_NORM llama_split_mode_LLAMA_SPLIT_MODE_LAYER llama_split_mode_LLAMA_SPLIT_MODE_NONE llama_split_mode_LLAMA_SPLIT_MODE_ROW llama_token_type_LLAMA_TOKEN_TYPE_BYTE llama_token_type_LLAMA_TOKEN_TYPE_CONTROL llama_token_type_LLAMA_TOKEN_TYPE_NORMAL llama_token_type_LLAMA_TOKEN_TYPE_UNDEFINED llama_token_type_LLAMA_TOKEN_TYPE_UNKNOWN llama_token_type_LLAMA_TOKEN_TYPE_UNUSED llama_token_type_LLAMA_TOKEN_TYPE_USER_DEFINED llama_vocab_type_LLAMA_VOCAB_TYPE_BPE llama_vocab_type_LLAMA_VOCAB_TYPE_NONE llama_vocab_type_LLAMA_VOCAB_TYPE_SPM llama_vocab_type_LLAMA_VOCAB_TYPE_WPM ggml_abs ⚠ ggml_abs_inplace ⚠ ggml_acc ⚠ ggml_acc_inplace ⚠ ggml_add ⚠ ggml_add1 ⚠ ggml_add1_inplace ⚠ ggml_add_cast ⚠ ggml_add_inplace ⚠ ggml_add_rel_pos ⚠ ggml_add_rel_pos_inplace ⚠ ggml_alibi ⚠ ggml_arange ⚠ ggml_are_same_shape ⚠ ggml_argmax ⚠ ggml_argsort ⚠ ggml_blck_size ⚠ ggml_build_backward_expand ⚠ ggml_build_backward_gradient_checkpointing ⚠ ggml_build_forward_expand ⚠ ggml_cast ⚠ ggml_clamp ⚠ ggml_concat ⚠ ggml_cont ⚠ ggml_cont_1d ⚠ ggml_cont_2d ⚠ ggml_cont_3d ⚠ ggml_cont_4d ⚠ ggml_conv_1d ⚠ ggml_conv_1d_ph ⚠ ggml_conv_2d ⚠ ggml_conv_2d_s1_ph ⚠ ggml_conv_2d_sk_p0 ⚠ ggml_conv_depthwise_2d ⚠ ggml_conv_transpose_1d ⚠ ggml_conv_transpose_2d_p0 ⚠ ggml_cpu_has_arm_fma ⚠ ggml_cpu_has_avx ⚠ ggml_cpu_has_avx2 ⚠ ggml_cpu_has_avx512 ⚠ ggml_cpu_has_avx512_vbmi ⚠ ggml_cpu_has_avx512_vnni ⚠ ggml_cpu_has_avx_vnni ⚠ ggml_cpu_has_blas ⚠ ggml_cpu_has_clblast ⚠ ggml_cpu_has_cuda ⚠ ggml_cpu_has_f16c ⚠ ggml_cpu_has_fma ⚠ ggml_cpu_has_fp16_va ⚠ ggml_cpu_has_gpublas ⚠ ggml_cpu_has_kompute ⚠ ggml_cpu_has_matmul_int8 ⚠ ggml_cpu_has_metal ⚠ ggml_cpu_has_neon ⚠ ggml_cpu_has_sse3 ⚠ ggml_cpu_has_ssse3 ⚠ ggml_cpu_has_sycl ⚠ ggml_cpu_has_vsx ⚠ ggml_cpu_has_vulkan ⚠ ggml_cpu_has_wasm_simd ⚠ ggml_cpy ⚠ ggml_cross_entropy_loss ⚠ ggml_cross_entropy_loss_back ⚠ ggml_cycles ⚠ ggml_cycles_per_ms ⚠ ggml_diag ⚠ ggml_diag_mask_inf ⚠ ggml_diag_mask_inf_inplace ⚠ ggml_diag_mask_zero ⚠ ggml_diag_mask_zero_inplace ⚠ ggml_div ⚠ ggml_div_inplace ⚠ ggml_dup ⚠ ggml_dup_inplace ⚠ ggml_dup_tensor ⚠ ggml_element_size ⚠ ggml_elu ⚠ ggml_elu_inplace ⚠ ggml_flash_attn ⚠ ggml_flash_attn_back ⚠ ggml_flash_ff ⚠ ggml_fopen ⚠ ggml_format_name ⚠ ggml_fp16_to_fp32 ⚠ ggml_fp16_to_fp32_row ⚠ ggml_fp32_to_fp16 ⚠ ggml_fp32_to_fp16_row ⚠ ggml_free ⚠ ggml_ftype_to_ggml_type ⚠ ggml_gelu ⚠ ggml_gelu_inplace ⚠ ggml_gelu_quick ⚠ ggml_gelu_quick_inplace ⚠ ggml_get_data ⚠ ggml_get_data_f32 ⚠ ggml_get_f32_1d ⚠ ggml_get_f32_nd ⚠ ggml_get_first_tensor ⚠ ggml_get_i32_1d ⚠ ggml_get_i32_nd ⚠ ggml_get_max_tensor_size ⚠ ggml_get_mem_buffer ⚠ ggml_get_mem_size ⚠ ggml_get_name ⚠ ggml_get_next_tensor ⚠ ggml_get_no_alloc ⚠ ggml_get_rel_pos ⚠ ggml_get_rows ⚠ ggml_get_rows_back ⚠ ggml_get_tensor ⚠ ggml_get_unary_op ⚠ ggml_graph_clear ⚠ ggml_graph_compute ⚠ ggml_graph_compute_with_ctx ⚠ ggml_graph_cpy ⚠ ggml_graph_dump_dot ⚠ ggml_graph_dup ⚠ ggml_graph_export ⚠ ggml_graph_get_tensor ⚠ ggml_graph_import ⚠ ggml_graph_overhead ⚠ ggml_graph_overhead_custom ⚠ ggml_graph_plan ⚠ ggml_graph_print ⚠ ggml_graph_reset ⚠ ggml_graph_view ⚠ ggml_group_norm ⚠ ggml_group_norm_inplace ⚠ ggml_guid_matches ⚠ ggml_hardsigmoid ⚠ ggml_hardswish ⚠ ggml_im2col ⚠ ggml_init ⚠ ggml_internal_get_type_traits ⚠ ggml_is_3d ⚠ ggml_is_contiguous ⚠ ggml_is_empty ⚠ ggml_is_matrix ⚠ ggml_is_numa ⚠ ggml_is_permuted ⚠ ggml_is_quantized ⚠ ggml_is_scalar ⚠ ggml_is_transposed ⚠ ggml_is_vector ⚠ ggml_leaky_relu ⚠ ggml_log ⚠ ggml_log_inplace ⚠ ggml_map_binary_f32 ⚠ ggml_map_binary_inplace_f32 ⚠ ggml_map_custom1 ⚠ ggml_map_custom2 ⚠ ggml_map_custom3 ⚠ ggml_map_custom1_f32 ⚠ ggml_map_custom1_inplace ⚠ ggml_map_custom1_inplace_f32 ⚠ ggml_map_custom2_f32 ⚠ ggml_map_custom2_inplace ⚠ ggml_map_custom2_inplace_f32 ⚠ ggml_map_custom3_f32 ⚠ ggml_map_custom3_inplace ⚠ ggml_map_custom3_inplace_f32 ⚠ ggml_map_unary_f32 ⚠ ggml_map_unary_inplace_f32 ⚠ ggml_mean ⚠ ggml_mul ⚠ ggml_mul_inplace ⚠ ggml_mul_mat ⚠ ggml_mul_mat_id ⚠ ggml_mul_mat_set_prec ⚠ ggml_n_dims ⚠ ggml_nbytes ⚠ ggml_nbytes_pad ⚠ ggml_neg ⚠ ggml_neg_inplace ⚠ ggml_nelements ⚠ ggml_new_f32 ⚠ ggml_new_graph ⚠ ggml_new_graph_custom ⚠ ggml_new_i32 ⚠ ggml_new_tensor ⚠ ggml_new_tensor_1d ⚠ ggml_new_tensor_2d ⚠ ggml_new_tensor_3d ⚠ ggml_new_tensor_4d ⚠ ggml_norm ⚠ ggml_norm_inplace ⚠ ggml_nrows ⚠ ggml_numa_init ⚠ ggml_op_desc ⚠ ggml_op_name ⚠ ggml_op_symbol ⚠ ggml_opt ⚠ ggml_opt_default_params ⚠ ggml_opt_init ⚠ ggml_opt_resume ⚠ ggml_opt_resume_g ⚠ ggml_out_prod ⚠ ggml_pad ⚠ ggml_permute ⚠ ggml_pool_1d ⚠ ggml_pool_2d ⚠ ggml_print_backtrace ⚠ ggml_print_object ⚠ ggml_print_objects ⚠ ggml_quantize_chunk ⚠ ggml_quantize_free ⚠ ggml_quantize_init ⚠ ggml_quantize_requires_imatrix ⚠ ggml_relu ⚠ ggml_relu_inplace ⚠ ggml_repeat ⚠ ggml_repeat_back ⚠ ggml_reshape ⚠ ggml_reshape_1d ⚠ ggml_reshape_2d ⚠ ggml_reshape_3d ⚠ ggml_reshape_4d ⚠ ggml_rms_norm ⚠ ggml_rms_norm_back ⚠ ggml_rms_norm_inplace ⚠ ggml_rope ⚠ ggml_rope_back ⚠ ggml_rope_custom ⚠ ggml_rope_custom_inplace ⚠ ggml_rope_inplace ⚠ ggml_rope_xpos_inplace ⚠ ggml_rope_yarn_corr_dims ⚠ ggml_row_size ⚠ ggml_scale ⚠ ggml_scale_inplace ⚠ ggml_set ⚠ ggml_set_1d ⚠ ggml_set_1d_inplace ⚠ ggml_set_2d ⚠ ggml_set_2d_inplace ⚠ ggml_set_f32 ⚠ ggml_set_f32_1d ⚠ ggml_set_f32_nd ⚠ ggml_set_i32 ⚠ ggml_set_i32_1d ⚠ ggml_set_i32_nd ⚠ ggml_set_inplace ⚠ ggml_set_input ⚠ ggml_set_name ⚠ ggml_set_no_alloc ⚠ ggml_set_output ⚠ ggml_set_param ⚠ ggml_set_scratch ⚠ ggml_set_zero ⚠ ggml_sgn ⚠ ggml_sgn_inplace ⚠ ggml_silu ⚠ ggml_silu_back ⚠ ggml_silu_inplace ⚠ ggml_soft_max ⚠ ggml_soft_max_back ⚠ ggml_soft_max_back_inplace ⚠ ggml_soft_max_ext ⚠ ggml_soft_max_inplace ⚠ ggml_sqr ⚠ ggml_sqr_inplace ⚠ ggml_sqrt ⚠ ggml_sqrt_inplace ⚠ ggml_ssm_conv ⚠ ggml_ssm_scan ⚠ ggml_status_to_string ⚠ ggml_step ⚠ ggml_step_inplace ⚠ ggml_sub ⚠ ggml_sub_inplace ⚠ ggml_sum ⚠ ggml_sum_rows ⚠ ggml_tanh ⚠ ggml_tanh_inplace ⚠ ggml_tensor_overhead ⚠ ggml_time_init ⚠ ggml_time_ms ⚠ ggml_time_us ⚠ ggml_timestep_embedding ⚠ ggml_top_k ⚠ ggml_transpose ⚠ ggml_type_name ⚠ ggml_type_size ⚠ ggml_type_sizef ⚠ ggml_unary ⚠ ggml_unary_inplace ⚠ ggml_unary_op_name ⚠ ggml_unravel_index ⚠ ggml_upscale ⚠ ggml_used_mem ⚠ ggml_validate_row_data ⚠ ggml_view_1d ⚠ ggml_view_2d ⚠ ggml_view_3d ⚠ ggml_view_4d ⚠ ggml_view_tensor ⚠ ggml_win_part ⚠ ggml_win_unpart ⚠ gguf_add_tensor ⚠ gguf_find_key ⚠ gguf_find_tensor ⚠ gguf_free ⚠ gguf_get_alignment ⚠ gguf_get_arr_data ⚠ gguf_get_arr_n ⚠ gguf_get_arr_str ⚠ gguf_get_arr_type ⚠ gguf_get_data ⚠ gguf_get_data_offset ⚠ gguf_get_key ⚠ gguf_get_kv_type ⚠ gguf_get_meta_data ⚠ gguf_get_meta_size ⚠ gguf_get_n_kv ⚠ gguf_get_n_tensors ⚠ gguf_get_tensor_name ⚠ gguf_get_tensor_offset ⚠ gguf_get_tensor_type ⚠ gguf_get_val_bool ⚠ gguf_get_val_data ⚠ gguf_get_val_f32 ⚠ gguf_get_val_f64 ⚠ gguf_get_val_i8 ⚠ gguf_get_val_i16 ⚠ gguf_get_val_i32 ⚠ gguf_get_val_i64 ⚠ gguf_get_val_str ⚠ gguf_get_val_u8 ⚠ gguf_get_val_u16 ⚠ gguf_get_val_u32 ⚠ gguf_get_val_u64 ⚠ gguf_get_version ⚠ gguf_init_empty ⚠ gguf_init_from_file ⚠ gguf_remove_key ⚠ gguf_set_arr_data ⚠ gguf_set_arr_str ⚠ gguf_set_kv ⚠ gguf_set_tensor_data ⚠ gguf_set_tensor_type ⚠ gguf_set_val_bool ⚠ gguf_set_val_f32 ⚠ gguf_set_val_f64 ⚠ gguf_set_val_i8 ⚠ gguf_set_val_i16 ⚠ gguf_set_val_i32 ⚠ gguf_set_val_i64 ⚠ gguf_set_val_str ⚠ gguf_set_val_u8 ⚠ gguf_set_val_u16 ⚠ gguf_set_val_u32 ⚠ gguf_set_val_u64 ⚠ gguf_type_name ⚠ gguf_write_to_file ⚠ llama_add_bos_token ⚠ llama_add_eos_token ⚠ llama_backend_free ⚠ llama_backend_init ⚠ llama_batch_free ⚠ llama_batch_get_one ⚠ llama_batch_init ⚠ llama_beam_search ⚠ @details Deterministically returns entire sentence constructed by a beam search.
@param ctx Pointer to the llama_context.
@param callback Invoked for each iteration of the beam_search loop, passing in beams_state.
@param callback_data A pointer that is simply passed back to callback.
@param n_beams Number of beams to use.
@param n_past Number of tokens already evaluated.
@param n_predict Maximum number of tokens to predict. EOS may occur earlier. llama_chat_apply_template ⚠ Apply chat template. Inspired by hf apply_chat_template() on python.
Both “model” and “custom_template” are optional, but at least one is required. “custom_template” has higher precedence than “model”
NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template
@param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead.
@param chat Pointer to a list of multiple llama_chat_message
@param n_msg Number of llama_chat_message in this chat
@param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message.
@param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages)
@param length The size of the allocated buffer
@return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template. llama_context_default_params ⚠ llama_control_vector_apply ⚠ llama_copy_state_data ⚠ llama_decode ⚠ llama_dump_timing_info_yaml ⚠ llama_free ⚠ llama_free_model ⚠ llama_get_embeddings ⚠ llama_get_embeddings_ith ⚠ llama_get_embeddings_seq ⚠ llama_get_kv_cache_token_count ⚠ llama_get_kv_cache_used_cells ⚠ llama_get_logits ⚠ llama_get_logits_ith ⚠ llama_get_model ⚠ llama_get_model_tensor ⚠ llama_get_state_size ⚠ llama_get_timings ⚠ llama_grammar_accept_token ⚠ @details Accepts the sampled token into the grammar llama_grammar_copy ⚠ llama_grammar_free ⚠ llama_grammar_init ⚠ llama_kv_cache_clear ⚠ llama_kv_cache_defrag ⚠ llama_kv_cache_seq_add ⚠ llama_kv_cache_seq_cp ⚠ llama_kv_cache_seq_div ⚠ llama_kv_cache_seq_keep ⚠ llama_kv_cache_seq_pos_max ⚠ llama_kv_cache_seq_rm ⚠ llama_kv_cache_update ⚠ llama_kv_cache_view_free ⚠ llama_kv_cache_view_init ⚠ llama_kv_cache_view_update ⚠ llama_load_model_from_file ⚠ llama_load_session_file ⚠ llama_log_set ⚠ llama_max_devices ⚠ llama_model_apply_lora_from_file ⚠ llama_model_default_params ⚠ llama_model_desc ⚠ llama_model_meta_count ⚠ llama_model_meta_key_by_index ⚠ llama_model_meta_val_str ⚠ llama_model_meta_val_str_by_index ⚠ llama_model_n_params ⚠ llama_model_quantize ⚠ llama_model_quantize_default_params ⚠ llama_model_size ⚠ llama_n_batch ⚠ llama_n_ctx ⚠ llama_n_ctx_train ⚠ llama_n_embd ⚠ llama_n_layer ⚠ llama_n_seq_max ⚠ llama_n_ubatch ⚠ llama_n_vocab ⚠ llama_new_context_with_model ⚠ llama_numa_init ⚠ llama_pooling_type ⚠ llama_print_system_info ⚠ llama_print_timings ⚠ llama_reset_timings ⚠ llama_rope_freq_scale_train ⚠ llama_rope_type ⚠ llama_sample_apply_guidance ⚠ @details Apply classifier-free guidance to the logits as described in academic paper “Stay on topic with Classifier-Free Guidance” https://arxiv.org/abs/2306.17806
@param logits Logits extracted from the original generation context.
@param logits_guidance Logits extracted from a separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
@param scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance. llama_sample_entropy ⚠ @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772. llama_sample_grammar ⚠ @details Apply constraints from grammar llama_sample_min_p ⚠ @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841 llama_sample_repetition_penalties ⚠ @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
@details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details. llama_sample_softmax ⚠ @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits. llama_sample_tail_free ⚠ @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/. llama_sample_temp ⚠ llama_sample_token ⚠ @details Randomly selects a token from the candidates based on their probabilities using the RNG of ctx. llama_sample_token_greedy ⚠ @details Selects the token with the highest probability.
Does not compute the token probabilities. Use llama_sample_softmax() instead. llama_sample_token_mirostat ⚠ @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
@param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates.
@param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm.
@param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal. llama_sample_token_mirostat_v2 ⚠ @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
@param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates.
@param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal. llama_sample_top_k ⚠ @details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751 llama_sample_top_p ⚠ @details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751 llama_sample_typical ⚠ @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666. llama_save_session_file ⚠ llama_set_abort_callback ⚠ llama_set_causal_attn ⚠ llama_set_n_threads ⚠ llama_set_rng_seed ⚠ llama_set_state_data ⚠ llama_split_path ⚠ @details Build a split GGUF final path for this chunk.
llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf” llama_split_prefix ⚠ @details Extract the path prefix from the split_path if and only if the split_no and split_count match.
llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0” llama_state_get_data ⚠ llama_state_get_size ⚠ llama_state_load_file ⚠ llama_state_save_file ⚠ llama_state_seq_get_data ⚠ llama_state_seq_get_size ⚠ llama_state_seq_load_file ⚠ llama_state_seq_save_file ⚠ llama_state_seq_set_data ⚠ llama_state_set_data ⚠ llama_supports_gpu_offload ⚠ llama_supports_mlock ⚠ llama_supports_mmap ⚠ llama_synchronize ⚠ llama_time_us ⚠ llama_token_bos ⚠ llama_token_cls ⚠ llama_token_eos ⚠ llama_token_eot ⚠ llama_token_get_score ⚠ llama_token_get_text ⚠ llama_token_get_type ⚠ llama_token_is_eog ⚠ llama_token_middle ⚠ llama_token_nl ⚠ llama_token_prefix ⚠ llama_token_sep ⚠ llama_token_suffix ⚠ llama_token_to_piece ⚠ llama_tokenize ⚠ @details Convert the provided text into tokens.
@param tokens The tokens pointer must be large enough to hold the resulting tokens.
@return Returns the number of tokens on success, no more than n_tokens_max
@return Returns a negative number on failure - the number of tokens that would have been returned
@param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
as plaintext. Does not insert a leading space. llama_vocab_type ⚠ FILE _IO_lock_t __off64_t __off_t ggml_abort_callback ggml_backend_sched_eval_callback ggml_backend_type ggml_binary_op_f32_t ggml_cgraph_eval_order ggml_custom1_op_f32_t ggml_custom1_op_t ggml_custom2_op_f32_t ggml_custom2_op_t ggml_custom3_op_f32_t ggml_custom3_op_t ggml_fp16_t ggml_from_float_t ggml_ftype ggml_guid ggml_guid_t ggml_linesearch ggml_log_callback ggml_log_level ggml_numa_strategy ggml_object_type ggml_op ggml_op_pool ggml_opt_callback ggml_opt_result ggml_opt_type ggml_prec ggml_sort_order ggml_status ggml_task_type ggml_tensor_flag ggml_to_float_t ggml_type ggml_unary_op ggml_unary_op_f32_t ggml_vec_dot_t gguf_type llama_beam_search_callback_fn_t llama_ftype llama_gretype llama_model_kv_override_type llama_pooling_type llama_pos llama_progress_callback llama_rope_scaling_type llama_rope_type llama_seq_id llama_split_mode llama_token llama_token_type llama_vocab_type llama_model_kv_override__bindgen_ty_1