Skip to main content Crate llama_cpp_bindings_sys Copy item path Source ggml_backend ggml_backend_buffer ggml_backend_buffer_type ggml_backend_dev_caps ggml_backend_dev_props ggml_backend_device ggml_backend_event ggml_backend_feature ggml_backend_graph_copy ggml_backend_meta_split_state ggml_backend_reg ggml_backend_sched ggml_bf16_t ggml_cgraph ggml_context ggml_cplan ggml_gallocr ggml_init_params ggml_object ggml_opt_context ggml_opt_dataset ggml_opt_optimizer_params ggml_opt_optimizer_params__bindgen_ty_1 ggml_opt_optimizer_params__bindgen_ty_2 ggml_opt_params ggml_opt_result ggml_tallocr ggml_tensor ggml_threadpool ggml_threadpool_params ggml_type_traits ggml_type_traits_cpu gguf_context gguf_init_params llama_adapter_lora llama_batch llama_chat_message llama_context llama_context_params llama_logit_bias llama_memory_i llama_model llama_model_imatrix_data llama_model_kv_override llama_model_params llama_model_quantize_params llama_model_tensor_buft_override llama_model_tensor_override llama_opt_params llama_perf_context_data llama_perf_sampler_data llama_rs_parsed_chat llama_sampler llama_sampler_chain_params llama_sampler_data llama_sampler_i llama_sampler_seq_config llama_token_data llama_token_data_array llama_vocab mtmd_bitmap mtmd_caps mtmd_context mtmd_context_params mtmd_decoder_pos mtmd_image_tokens mtmd_input_chunk mtmd_input_chunks mtmd_input_text GGML_BACKEND_BUFFER_USAGE_ANY GGML_BACKEND_BUFFER_USAGE_COMPUTE GGML_BACKEND_BUFFER_USAGE_WEIGHTS GGML_BACKEND_DEVICE_TYPE_ACCEL GGML_BACKEND_DEVICE_TYPE_CPU GGML_BACKEND_DEVICE_TYPE_GPU GGML_BACKEND_DEVICE_TYPE_IGPU GGML_BACKEND_DEVICE_TYPE_META GGML_BACKEND_SPLIT_AXIS_0 GGML_BACKEND_SPLIT_AXIS_1 GGML_BACKEND_SPLIT_AXIS_2 GGML_BACKEND_SPLIT_AXIS_3 GGML_BACKEND_SPLIT_AXIS_MIRRORED GGML_BACKEND_SPLIT_AXIS_NONE GGML_BACKEND_SPLIT_AXIS_PARTIAL GGML_BACKEND_SPLIT_AXIS_UNKNOWN GGML_FTYPE_ALL_F32 GGML_FTYPE_MOSTLY_BF16 GGML_FTYPE_MOSTLY_F16 GGML_FTYPE_MOSTLY_IQ1_M GGML_FTYPE_MOSTLY_IQ1_S GGML_FTYPE_MOSTLY_IQ2_S GGML_FTYPE_MOSTLY_IQ2_XS GGML_FTYPE_MOSTLY_IQ2_XXS GGML_FTYPE_MOSTLY_IQ3_S GGML_FTYPE_MOSTLY_IQ3_XXS GGML_FTYPE_MOSTLY_IQ4_NL GGML_FTYPE_MOSTLY_IQ4_XS GGML_FTYPE_MOSTLY_MXFP4 GGML_FTYPE_MOSTLY_NVFP4 GGML_FTYPE_MOSTLY_Q1_0 GGML_FTYPE_MOSTLY_Q2_K GGML_FTYPE_MOSTLY_Q3_K GGML_FTYPE_MOSTLY_Q4_0 GGML_FTYPE_MOSTLY_Q4_1 GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 GGML_FTYPE_MOSTLY_Q4_K GGML_FTYPE_MOSTLY_Q5_0 GGML_FTYPE_MOSTLY_Q5_1 GGML_FTYPE_MOSTLY_Q5_K GGML_FTYPE_MOSTLY_Q6_K GGML_FTYPE_MOSTLY_Q8_0 GGML_FTYPE_UNKNOWN GGML_GLU_OP_COUNT GGML_GLU_OP_GEGLU GGML_GLU_OP_GEGLU_ERF GGML_GLU_OP_GEGLU_QUICK GGML_GLU_OP_REGLU GGML_GLU_OP_SWIGLU GGML_GLU_OP_SWIGLU_OAI GGML_HINT_NONE GGML_HINT_SRC0_IS_HADAMARD GGML_LOG_LEVEL_CONT GGML_LOG_LEVEL_DEBUG GGML_LOG_LEVEL_ERROR GGML_LOG_LEVEL_INFO GGML_LOG_LEVEL_NONE GGML_LOG_LEVEL_WARN GGML_NUMA_STRATEGY_COUNT GGML_NUMA_STRATEGY_DISABLED GGML_NUMA_STRATEGY_DISTRIBUTE GGML_NUMA_STRATEGY_ISOLATE GGML_NUMA_STRATEGY_MIRROR GGML_NUMA_STRATEGY_NUMACTL GGML_OBJECT_TYPE_GRAPH GGML_OBJECT_TYPE_TENSOR GGML_OBJECT_TYPE_WORK_BUFFER GGML_OPT_BUILD_TYPE_FORWARD GGML_OPT_BUILD_TYPE_GRAD GGML_OPT_BUILD_TYPE_OPT GGML_OPT_LOSS_TYPE_CROSS_ENTROPY GGML_OPT_LOSS_TYPE_MEAN GGML_OPT_LOSS_TYPE_MEAN_SQUARED_ERROR GGML_OPT_LOSS_TYPE_SUM GGML_OPT_OPTIMIZER_TYPE_ADAMW GGML_OPT_OPTIMIZER_TYPE_COUNT GGML_OPT_OPTIMIZER_TYPE_SGD GGML_OP_ACC GGML_OP_ADD GGML_OP_ADD1 GGML_OP_ADD_ID GGML_OP_ADD_REL_POS GGML_OP_ARANGE GGML_OP_ARGMAX GGML_OP_ARGSORT GGML_OP_CLAMP GGML_OP_CONCAT GGML_OP_CONT GGML_OP_CONV_2D GGML_OP_CONV_2D_DW GGML_OP_CONV_3D GGML_OP_CONV_TRANSPOSE_1D GGML_OP_CONV_TRANSPOSE_2D GGML_OP_COS GGML_OP_COUNT GGML_OP_COUNT_EQUAL GGML_OP_CPY GGML_OP_CROSS_ENTROPY_LOSS GGML_OP_CROSS_ENTROPY_LOSS_BACK GGML_OP_CUMSUM GGML_OP_CUSTOM GGML_OP_DIAG GGML_OP_DIAG_MASK_INF GGML_OP_DIAG_MASK_ZERO GGML_OP_DIV GGML_OP_DUP GGML_OP_FILL GGML_OP_FLASH_ATTN_BACK GGML_OP_FLASH_ATTN_EXT GGML_OP_GATED_DELTA_NET GGML_OP_GATED_LINEAR_ATTN GGML_OP_GET_REL_POS GGML_OP_GET_ROWS GGML_OP_GET_ROWS_BACK GGML_OP_GLU GGML_OP_GROUP_NORM GGML_OP_IM2COL GGML_OP_IM2COL_3D GGML_OP_IM2COL_BACK GGML_OP_L2_NORM GGML_OP_LEAKY_RELU GGML_OP_LOG GGML_OP_MAP_CUSTOM1 GGML_OP_MAP_CUSTOM2 GGML_OP_MAP_CUSTOM3 GGML_OP_MEAN GGML_OP_MUL GGML_OP_MUL_MAT GGML_OP_MUL_MAT_ID GGML_OP_NONE GGML_OP_NORM GGML_OP_OPT_STEP_ADAMW GGML_OP_OPT_STEP_SGD GGML_OP_OUT_PROD GGML_OP_PAD GGML_OP_PAD_REFLECT_1D GGML_OP_PERMUTE GGML_OP_POOL_1D GGML_OP_POOL_2D GGML_OP_POOL_2D_BACK GGML_OP_POOL_AVG GGML_OP_POOL_COUNT GGML_OP_POOL_MAX GGML_OP_REPEAT GGML_OP_REPEAT_BACK GGML_OP_RESHAPE GGML_OP_RMS_NORM GGML_OP_RMS_NORM_BACK GGML_OP_ROLL GGML_OP_ROPE GGML_OP_ROPE_BACK GGML_OP_RWKV_WKV6 GGML_OP_RWKV_WKV7 GGML_OP_SCALE GGML_OP_SET GGML_OP_SET_ROWS GGML_OP_SILU_BACK GGML_OP_SIN GGML_OP_SOFT_MAX GGML_OP_SOFT_MAX_BACK GGML_OP_SOLVE_TRI GGML_OP_SQR GGML_OP_SQRT GGML_OP_SSM_CONV GGML_OP_SSM_SCAN GGML_OP_SUB GGML_OP_SUM GGML_OP_SUM_ROWS GGML_OP_TIMESTEP_EMBEDDING GGML_OP_TOP_K GGML_OP_TRANSPOSE GGML_OP_TRI GGML_OP_UNARY GGML_OP_UPSCALE GGML_OP_VIEW GGML_OP_WIN_PART GGML_OP_WIN_UNPART GGML_PREC_DEFAULT GGML_PREC_F32 GGML_SCALE_FLAG_ALIGN_CORNERS GGML_SCALE_FLAG_ANTIALIAS GGML_SCALE_MODE_BICUBIC GGML_SCALE_MODE_BILINEAR GGML_SCALE_MODE_COUNT GGML_SCALE_MODE_NEAREST GGML_SCHED_PRIO_HIGH GGML_SCHED_PRIO_LOW GGML_SCHED_PRIO_MEDIUM GGML_SCHED_PRIO_NORMAL GGML_SCHED_PRIO_REALTIME GGML_SORT_ORDER_ASC GGML_SORT_ORDER_DESC GGML_STATUS_ABORTED GGML_STATUS_ALLOC_FAILED GGML_STATUS_FAILED GGML_STATUS_SUCCESS GGML_TENSOR_FLAG_COMPUTE GGML_TENSOR_FLAG_INPUT GGML_TENSOR_FLAG_LOSS GGML_TENSOR_FLAG_OUTPUT GGML_TENSOR_FLAG_PARAM GGML_TRI_TYPE_LOWER GGML_TRI_TYPE_LOWER_DIAG GGML_TRI_TYPE_UPPER GGML_TRI_TYPE_UPPER_DIAG GGML_TYPE_BF16 GGML_TYPE_COUNT GGML_TYPE_F16 GGML_TYPE_F32 GGML_TYPE_F64 GGML_TYPE_I8 GGML_TYPE_I16 GGML_TYPE_I32 GGML_TYPE_I64 GGML_TYPE_IQ1_M GGML_TYPE_IQ1_S GGML_TYPE_IQ2_S GGML_TYPE_IQ2_XS GGML_TYPE_IQ2_XXS GGML_TYPE_IQ3_S GGML_TYPE_IQ3_XXS GGML_TYPE_IQ4_NL GGML_TYPE_IQ4_XS GGML_TYPE_MXFP4 GGML_TYPE_NVFP4 GGML_TYPE_Q1_0 GGML_TYPE_Q2_K GGML_TYPE_Q3_K GGML_TYPE_Q4_0 GGML_TYPE_Q4_1 GGML_TYPE_Q4_K GGML_TYPE_Q5_0 GGML_TYPE_Q5_1 GGML_TYPE_Q5_K GGML_TYPE_Q6_K GGML_TYPE_Q8_0 GGML_TYPE_Q8_1 GGML_TYPE_Q8_K GGML_TYPE_TQ1_0 GGML_TYPE_TQ2_0 GGML_UNARY_OP_ABS GGML_UNARY_OP_CEIL GGML_UNARY_OP_COUNT GGML_UNARY_OP_ELU GGML_UNARY_OP_EXP GGML_UNARY_OP_EXPM1 GGML_UNARY_OP_FLOOR GGML_UNARY_OP_GELU GGML_UNARY_OP_GELU_ERF GGML_UNARY_OP_GELU_QUICK GGML_UNARY_OP_HARDSIGMOID GGML_UNARY_OP_HARDSWISH GGML_UNARY_OP_NEG GGML_UNARY_OP_RELU GGML_UNARY_OP_ROUND GGML_UNARY_OP_SGN GGML_UNARY_OP_SIGMOID GGML_UNARY_OP_SILU GGML_UNARY_OP_SOFTPLUS GGML_UNARY_OP_STEP GGML_UNARY_OP_TANH GGML_UNARY_OP_TRUNC GGML_UNARY_OP_XIELU GGUF_TYPE_ARRAY GGUF_TYPE_BOOL GGUF_TYPE_COUNT GGUF_TYPE_FLOAT32 GGUF_TYPE_FLOAT64 GGUF_TYPE_INT8 GGUF_TYPE_INT16 GGUF_TYPE_INT32 GGUF_TYPE_INT64 GGUF_TYPE_STRING GGUF_TYPE_UINT8 GGUF_TYPE_UINT16 GGUF_TYPE_UINT32 GGUF_TYPE_UINT64 LLAMA_ATTENTION_TYPE_CAUSAL LLAMA_ATTENTION_TYPE_NON_CAUSAL LLAMA_ATTENTION_TYPE_UNSPECIFIED LLAMA_FLASH_ATTN_TYPE_AUTO LLAMA_FLASH_ATTN_TYPE_DISABLED LLAMA_FLASH_ATTN_TYPE_ENABLED LLAMA_FTYPE_ALL_F32 LLAMA_FTYPE_GUESSED LLAMA_FTYPE_MOSTLY_BF16 LLAMA_FTYPE_MOSTLY_F16 LLAMA_FTYPE_MOSTLY_IQ1_M LLAMA_FTYPE_MOSTLY_IQ1_S LLAMA_FTYPE_MOSTLY_IQ2_M LLAMA_FTYPE_MOSTLY_IQ2_S LLAMA_FTYPE_MOSTLY_IQ2_XS LLAMA_FTYPE_MOSTLY_IQ2_XXS LLAMA_FTYPE_MOSTLY_IQ3_M LLAMA_FTYPE_MOSTLY_IQ3_S LLAMA_FTYPE_MOSTLY_IQ3_XS LLAMA_FTYPE_MOSTLY_IQ3_XXS LLAMA_FTYPE_MOSTLY_IQ4_NL LLAMA_FTYPE_MOSTLY_IQ4_XS LLAMA_FTYPE_MOSTLY_MXFP4_MOE LLAMA_FTYPE_MOSTLY_NVFP4 LLAMA_FTYPE_MOSTLY_Q1_0 LLAMA_FTYPE_MOSTLY_Q2_K LLAMA_FTYPE_MOSTLY_Q2_K_S LLAMA_FTYPE_MOSTLY_Q3_K_L LLAMA_FTYPE_MOSTLY_Q3_K_M LLAMA_FTYPE_MOSTLY_Q3_K_S LLAMA_FTYPE_MOSTLY_Q4_0 LLAMA_FTYPE_MOSTLY_Q4_1 LLAMA_FTYPE_MOSTLY_Q4_K_M LLAMA_FTYPE_MOSTLY_Q4_K_S LLAMA_FTYPE_MOSTLY_Q5_0 LLAMA_FTYPE_MOSTLY_Q5_1 LLAMA_FTYPE_MOSTLY_Q5_K_M LLAMA_FTYPE_MOSTLY_Q5_K_S LLAMA_FTYPE_MOSTLY_Q6_K LLAMA_FTYPE_MOSTLY_Q8_0 LLAMA_FTYPE_MOSTLY_TQ1_0 LLAMA_FTYPE_MOSTLY_TQ2_0 LLAMA_KV_OVERRIDE_TYPE_BOOL LLAMA_KV_OVERRIDE_TYPE_FLOAT LLAMA_KV_OVERRIDE_TYPE_INT LLAMA_KV_OVERRIDE_TYPE_STR LLAMA_MODEL_META_KEY_SAMPLING_MIN_P LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_ETA LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_TAU LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_LAST_N LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_REPEAT LLAMA_MODEL_META_KEY_SAMPLING_SEQUENCE LLAMA_MODEL_META_KEY_SAMPLING_TEMP LLAMA_MODEL_META_KEY_SAMPLING_TOP_K LLAMA_MODEL_META_KEY_SAMPLING_TOP_P LLAMA_MODEL_META_KEY_SAMPLING_XTC_PROBABILITY LLAMA_MODEL_META_KEY_SAMPLING_XTC_THRESHOLD LLAMA_POOLING_TYPE_CLS LLAMA_POOLING_TYPE_LAST LLAMA_POOLING_TYPE_MEAN LLAMA_POOLING_TYPE_NONE LLAMA_POOLING_TYPE_RANK LLAMA_POOLING_TYPE_UNSPECIFIED LLAMA_ROPE_SCALING_TYPE_LINEAR LLAMA_ROPE_SCALING_TYPE_LONGROPE LLAMA_ROPE_SCALING_TYPE_MAX_VALUE LLAMA_ROPE_SCALING_TYPE_NONE LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED LLAMA_ROPE_SCALING_TYPE_YARN LLAMA_ROPE_TYPE_IMROPE LLAMA_ROPE_TYPE_MROPE LLAMA_ROPE_TYPE_NEOX LLAMA_ROPE_TYPE_NONE LLAMA_ROPE_TYPE_NORM LLAMA_ROPE_TYPE_VISION LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_MODEL_ARG LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_ERROR_ARG LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_NULL_OUT_HAYSTACK_ARG LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_OK LLAMA_RS_COMPUTE_TOOL_CALL_HAYSTACK_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_DECODE_COMPUTE_FAILED LLAMA_RS_DECODE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_DECODE_NULL_CTX_ARG LLAMA_RS_DECODE_NULL_OUT_ERROR_ARG LLAMA_RS_DECODE_OK LLAMA_RS_DECODE_OUT_OF_MEMORY LLAMA_RS_DECODE_VENDORED_RETURNED_NONZERO_CODE LLAMA_RS_DECODE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_DETECT_REASONING_MARKERS_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_DETECT_REASONING_MARKERS_NULL_MODEL_ARG LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_CLOSE_ARG LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_ERROR_ARG LLAMA_RS_DETECT_REASONING_MARKERS_NULL_OUT_OPEN_ARG LLAMA_RS_DETECT_REASONING_MARKERS_OK LLAMA_RS_DETECT_REASONING_MARKERS_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_MODEL_ARG LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_ERROR_ARG LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_NO_TOOLS_ARG LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_NULL_OUT_WITH_TOOLS_ARG LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_OK LLAMA_RS_DIAGNOSE_TOOL_CALL_SYNTHETIC_RENDERS_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_ENCODE_COMPUTE_FAILED LLAMA_RS_ENCODE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_ENCODE_MODEL_HAS_NO_ENCODER LLAMA_RS_ENCODE_NULL_CTX_ARG LLAMA_RS_ENCODE_OK LLAMA_RS_ENCODE_OUT_OF_MEMORY LLAMA_RS_ENCODE_VENDORED_RETURNED_NONZERO_CODE LLAMA_RS_ENCODE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_FIT_PARAMS_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_FIT_PARAMS_OK LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_ERROR LLAMA_RS_FIT_PARAMS_VENDORED_REPORTED_FAILURE LLAMA_RS_FIT_PARAMS_VENDORED_RETURNED_UNRECOGNIZED_STATUS_CODE LLAMA_RS_FIT_PARAMS_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_INVALID_SCHEMA LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_ERROR_ARG LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_OUT_GRAMMAR_ARG LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_NULL_SCHEMA_JSON_ARG LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_OK LLAMA_RS_JSON_SCHEMA_TO_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_LOAD_MODEL_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_ERROR_ARG LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_OUT_MODEL_ARG LLAMA_RS_LOAD_MODEL_FROM_FILE_NULL_PATH_ARG LLAMA_RS_LOAD_MODEL_FROM_FILE_OK LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_RETURNED_NULL LLAMA_RS_LOAD_MODEL_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_MEMORY_SEQ_ADD_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_MEMORY_SEQ_ADD_INCOMPATIBLE_ROPE_TYPE LLAMA_RS_MEMORY_SEQ_ADD_NULL_CTX_ARG LLAMA_RS_MEMORY_SEQ_ADD_NULL_MEM LLAMA_RS_MEMORY_SEQ_ADD_OK LLAMA_RS_MEMORY_SEQ_ADD_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_MEMORY_SEQ_DIV_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_MEMORY_SEQ_DIV_INCOMPATIBLE_ROPE_TYPE LLAMA_RS_MEMORY_SEQ_DIV_NULL_CTX_ARG LLAMA_RS_MEMORY_SEQ_DIV_NULL_MEM LLAMA_RS_MEMORY_SEQ_DIV_OK LLAMA_RS_MEMORY_SEQ_DIV_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_CTX_ARG LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_FNAME_ARG LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_NULL_OUT_BITMAP_ARG LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_OK LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_RETURNED_NULL LLAMA_RS_MTMD_BITMAP_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_MTMD_ENCODE_CHUNK_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CHUNK_ARG LLAMA_RS_MTMD_ENCODE_CHUNK_NULL_CTX_ARG LLAMA_RS_MTMD_ENCODE_CHUNK_OK LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_RETURNED_NONZERO_CODE LLAMA_RS_MTMD_ENCODE_CHUNK_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_CHUNK_ARG LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_LLAMA_CTX_ARG LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_MTMD_CTX_ARG LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_NULL_OUT_NEW_N_PAST_ARG LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_OK LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_RETURNED_NONZERO_CODE LLAMA_RS_MTMD_EVAL_CHUNK_SINGLE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_MTMD_INIT_FROM_FILE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_MMPROJ_PATH_ARG LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_OUT_CTX_ARG LLAMA_RS_MTMD_INIT_FROM_FILE_NULL_TEXT_MODEL_ARG LLAMA_RS_MTMD_INIT_FROM_FILE_OK LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_RETURNED_NULL LLAMA_RS_MTMD_INIT_FROM_FILE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_MTMD_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_MTMD_TOKENIZE_NULL_BITMAPS_ARG_WHEN_NUM_BITMAPS_NONZERO LLAMA_RS_MTMD_TOKENIZE_NULL_CTX_ARG LLAMA_RS_MTMD_TOKENIZE_NULL_OUTPUT_ARG LLAMA_RS_MTMD_TOKENIZE_NULL_TEXT_ARG LLAMA_RS_MTMD_TOKENIZE_OK LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_BITMAP_COUNT_DOES_NOT_MATCH_MARKER_COUNT LLAMA_RS_MTMD_TOKENIZE_VENDORED_REPORTED_IMAGE_PREPROCESSING_ERROR LLAMA_RS_MTMD_TOKENIZE_VENDORED_RETURNED_UNDOCUMENTED_NONZERO_CODE LLAMA_RS_MTMD_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_NEW_CONTEXT_WITH_MODEL_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_MODEL_ARG LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_CTX_ARG LLAMA_RS_NEW_CONTEXT_WITH_MODEL_NULL_OUT_ERROR_ARG LLAMA_RS_NEW_CONTEXT_WITH_MODEL_OK LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_RETURNED_NULL LLAMA_RS_NEW_CONTEXT_WITH_MODEL_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_PARSED_CHAT_CONTENT_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSED_CHAT_CONTENT_NULL_HANDLE_ARG LLAMA_RS_PARSED_CHAT_CONTENT_NULL_OUT_STRING_ARG LLAMA_RS_PARSED_CHAT_CONTENT_OK LLAMA_RS_PARSED_CHAT_CONTENT_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_PARSED_CHAT_FREE_DESTRUCTOR_THREW_CXX_EXCEPTION LLAMA_RS_PARSED_CHAT_FREE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSED_CHAT_FREE_OK LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_HANDLE_ARG LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_NULL_OUT_STRING_ARG LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_OK LLAMA_RS_PARSED_CHAT_REASONING_CONTENT_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_INDEX_OUT_OF_BOUNDS LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_HANDLE_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_NULL_OUT_STRING_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_OK LLAMA_RS_PARSED_CHAT_TOOL_CALL_ARGUMENTS_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_HANDLE_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_NULL_OUT_COUNT_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_OK LLAMA_RS_PARSED_CHAT_TOOL_CALL_COUNT_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_INDEX_OUT_OF_BOUNDS LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_HANDLE_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_NULL_OUT_STRING_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_OK LLAMA_RS_PARSED_CHAT_TOOL_CALL_ID_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_INDEX_OUT_OF_BOUNDS LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_HANDLE_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_NULL_OUT_STRING_ARG LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_OK LLAMA_RS_PARSED_CHAT_TOOL_CALL_NAME_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_PARSE_CHAT_MESSAGE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_CHAT_TEMPLATE LLAMA_RS_PARSE_CHAT_MESSAGE_MODEL_HAS_NO_VOCAB LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_INPUT_ARG LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_MODEL_ARG LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_ERROR_ARG LLAMA_RS_PARSE_CHAT_MESSAGE_NULL_OUT_HANDLE_ARG LLAMA_RS_PARSE_CHAT_MESSAGE_OK LLAMA_RS_PARSE_CHAT_MESSAGE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_SAMPLER_ACCEPT_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_SAMPLER_ACCEPT_NULL_OUT_ERROR_ARG LLAMA_RS_SAMPLER_ACCEPT_NULL_SAMPLER_ARG LLAMA_RS_SAMPLER_ACCEPT_OK LLAMA_RS_SAMPLER_ACCEPT_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_SAMPLER_APPLY_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_SAMPLER_APPLY_NULL_DATA_ARRAY_ARG LLAMA_RS_SAMPLER_APPLY_NULL_OUT_ERROR_ARG LLAMA_RS_SAMPLER_APPLY_NULL_SAMPLER_ARG LLAMA_RS_SAMPLER_APPLY_OK LLAMA_RS_SAMPLER_APPLY_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_SAMPLER_INIT_GRAMMAR_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_ERROR_ARG LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_NULL_OUT_SAMPLER_ARG LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_OK LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_INVALID_TRIGGER_PATTERN LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_ERROR_ARG LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_NULL_OUT_SAMPLER_ARG LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_OK LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_RETURNED_NULL LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_PATTERNS_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_RETURNED_NULL LLAMA_RS_SAMPLER_INIT_GRAMMAR_LAZY_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_ERROR_ARG LLAMA_RS_SAMPLER_INIT_GRAMMAR_NULL_OUT_SAMPLER_ARG LLAMA_RS_SAMPLER_INIT_GRAMMAR_OK LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_RETURNED_NULL LLAMA_RS_SAMPLER_INIT_GRAMMAR_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_SAMPLER_SAMPLE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_SAMPLER_SAMPLE_NULL_CTX_ARG LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_ERROR_ARG LLAMA_RS_SAMPLER_SAMPLE_NULL_OUT_TOKEN_ARG LLAMA_RS_SAMPLER_SAMPLE_NULL_SAMPLER_ARG LLAMA_RS_SAMPLER_SAMPLE_OK LLAMA_RS_SAMPLER_SAMPLE_VENDORED_THREW_CXX_EXCEPTION LLAMA_RS_STATUS_ALLOCATION_FAILED LLAMA_RS_STATUS_EXCEPTION LLAMA_RS_STATUS_INVALID_ARGUMENT LLAMA_RS_STATUS_OK LLAMA_RS_TOKENIZE_ERROR_STRING_ALLOCATION_FAILED LLAMA_RS_TOKENIZE_NULL_OUT_ERROR_ARG LLAMA_RS_TOKENIZE_NULL_OUT_RETURNED_COUNT_ARG LLAMA_RS_TOKENIZE_NULL_TEXT_ARG LLAMA_RS_TOKENIZE_NULL_VOCAB_ARG LLAMA_RS_TOKENIZE_OK LLAMA_RS_TOKENIZE_VENDORED_THREW_CXX_EXCEPTION LLAMA_SPLIT_MODE_LAYER LLAMA_SPLIT_MODE_NONE LLAMA_SPLIT_MODE_ROW LLAMA_SPLIT_MODE_TENSOR LLAMA_TOKEN_ATTR_BYTE LLAMA_TOKEN_ATTR_CONTROL LLAMA_TOKEN_ATTR_LSTRIP LLAMA_TOKEN_ATTR_NORMAL LLAMA_TOKEN_ATTR_NORMALIZED LLAMA_TOKEN_ATTR_RSTRIP LLAMA_TOKEN_ATTR_SINGLE_WORD LLAMA_TOKEN_ATTR_UNDEFINED LLAMA_TOKEN_ATTR_UNKNOWN LLAMA_TOKEN_ATTR_UNUSED LLAMA_TOKEN_ATTR_USER_DEFINED LLAMA_TOKEN_TYPE_BYTE LLAMA_TOKEN_TYPE_CONTROL LLAMA_TOKEN_TYPE_NORMAL LLAMA_TOKEN_TYPE_UNDEFINED LLAMA_TOKEN_TYPE_UNKNOWN LLAMA_TOKEN_TYPE_UNUSED LLAMA_TOKEN_TYPE_USER_DEFINED LLAMA_VOCAB_TYPE_BPE LLAMA_VOCAB_TYPE_NONE LLAMA_VOCAB_TYPE_PLAMO2 LLAMA_VOCAB_TYPE_RWKV LLAMA_VOCAB_TYPE_SPM LLAMA_VOCAB_TYPE_UGM LLAMA_VOCAB_TYPE_WPM MTMD_INPUT_CHUNK_TYPE_AUDIO MTMD_INPUT_CHUNK_TYPE_IMAGE MTMD_INPUT_CHUNK_TYPE_TEXT ggml_abort ⚠ ggml_abs ⚠ ggml_abs_inplace ⚠ ggml_acc ⚠ ggml_acc_inplace ⚠ ggml_add ⚠ ggml_add1 ⚠ ggml_add1_inplace ⚠ ggml_add_cast ⚠ ggml_add_id ⚠ ggml_add_inplace ⚠ ggml_add_rel_pos ⚠ ggml_add_rel_pos_inplace ⚠ ggml_arange ⚠ ggml_are_same_shape ⚠ ggml_are_same_stride ⚠ ggml_argmax ⚠ ggml_argsort ⚠ ggml_argsort_top_k ⚠ ggml_backend_alloc_buffer ⚠ ggml_backend_alloc_ctx_tensors ⚠ ggml_backend_alloc_ctx_tensors_from_buft ⚠ ggml_backend_alloc_ctx_tensors_from_buft_size ⚠ ggml_backend_buffer_clear ⚠ ggml_backend_buffer_free ⚠ ggml_backend_buffer_get_alignment ⚠ ggml_backend_buffer_get_alloc_size ⚠ ggml_backend_buffer_get_base ⚠ ggml_backend_buffer_get_max_size ⚠ ggml_backend_buffer_get_size ⚠ ggml_backend_buffer_get_type ⚠ ggml_backend_buffer_get_usage ⚠ ggml_backend_buffer_init_tensor ⚠ ggml_backend_buffer_is_host ⚠ ggml_backend_buffer_name ⚠ ggml_backend_buffer_reset ⚠ ggml_backend_buffer_set_usage ⚠ ggml_backend_buft_alloc_buffer ⚠ ggml_backend_buft_get_alignment ⚠ ggml_backend_buft_get_alloc_size ⚠ ggml_backend_buft_get_device ⚠ ggml_backend_buft_get_max_size ⚠ ggml_backend_buft_is_host ⚠ ggml_backend_buft_name ⚠ ggml_backend_compare_graph_backend ⚠ ggml_backend_cpu_buffer_from_ptr ⚠ ggml_backend_cpu_buffer_type ⚠ ggml_backend_cpu_init ⚠ ggml_backend_cpu_reg ⚠ ggml_backend_cpu_set_abort_callback ⚠ ggml_backend_cpu_set_n_threads ⚠ ggml_backend_cpu_set_threadpool ⚠ ggml_backend_cpu_set_use_ref ⚠ ggml_backend_dev_backend_reg ⚠ ggml_backend_dev_buffer_from_host_ptr ⚠ ggml_backend_dev_buffer_type ⚠ ggml_backend_dev_by_name ⚠ ggml_backend_dev_by_type ⚠ ggml_backend_dev_count ⚠ ggml_backend_dev_description ⚠ ggml_backend_dev_get ⚠ ggml_backend_dev_get_props ⚠ ggml_backend_dev_host_buffer_type ⚠ ggml_backend_dev_init ⚠ ggml_backend_dev_memory ⚠ ggml_backend_dev_name ⚠ ggml_backend_dev_offload_op ⚠ ggml_backend_dev_supports_buft ⚠ ggml_backend_dev_supports_op ⚠ ggml_backend_dev_type ⚠ ggml_backend_device_register ⚠ ggml_backend_event_free ⚠ ggml_backend_event_new ⚠ ggml_backend_event_record ⚠ ggml_backend_event_synchronize ⚠ ggml_backend_event_wait ⚠ ggml_backend_free ⚠ ggml_backend_get_alignment ⚠ ggml_backend_get_default_buffer_type ⚠ ggml_backend_get_device ⚠ ggml_backend_get_max_size ⚠ ggml_backend_graph_compute ⚠ ggml_backend_graph_compute_async ⚠ ggml_backend_graph_copy ⚠ ggml_backend_graph_copy_free ⚠ ggml_backend_graph_plan_compute ⚠ ggml_backend_graph_plan_create ⚠ ggml_backend_graph_plan_free ⚠ ggml_backend_guid ⚠ ggml_backend_init_best ⚠ ggml_backend_init_by_name ⚠ ggml_backend_init_by_type ⚠ ggml_backend_is_cpu ⚠ ggml_backend_load ⚠ ggml_backend_load_all ⚠ ggml_backend_load_all_from_path ⚠ ggml_backend_meta_device ⚠ ggml_backend_meta_split_axis_name ⚠ ggml_backend_name ⚠ ggml_backend_offload_op ⚠ ggml_backend_reg_by_name ⚠ ggml_backend_reg_count ⚠ ggml_backend_reg_dev_count ⚠ ggml_backend_reg_dev_get ⚠ ggml_backend_reg_get ⚠ ggml_backend_reg_get_proc_address ⚠ ggml_backend_reg_name ⚠ ggml_backend_register ⚠ ggml_backend_sched_alloc_graph ⚠ ggml_backend_sched_free ⚠ ggml_backend_sched_get_backend ⚠ ggml_backend_sched_get_buffer_size ⚠ ggml_backend_sched_get_buffer_type ⚠ ggml_backend_sched_get_n_backends ⚠ ggml_backend_sched_get_n_copies ⚠ ggml_backend_sched_get_n_splits ⚠ ggml_backend_sched_get_tensor_backend ⚠ ggml_backend_sched_graph_compute ⚠ ggml_backend_sched_graph_compute_async ⚠ ggml_backend_sched_new ⚠ ggml_backend_sched_reserve ⚠ ggml_backend_sched_reserve_size ⚠ ggml_backend_sched_reset ⚠ ggml_backend_sched_set_eval_callback ⚠ ggml_backend_sched_set_tensor_backend ⚠ ggml_backend_sched_split_graph ⚠ ggml_backend_sched_synchronize ⚠ ggml_backend_supports_buft ⚠ ggml_backend_supports_op ⚠ ggml_backend_synchronize ⚠ ggml_backend_tensor_alloc ⚠ ggml_backend_tensor_copy ⚠ ggml_backend_tensor_copy_async ⚠ ggml_backend_tensor_get ⚠ ggml_backend_tensor_get_2d ⚠ ggml_backend_tensor_get_2d_async ⚠ ggml_backend_tensor_get_async ⚠ ggml_backend_tensor_memset ⚠ ggml_backend_tensor_set ⚠ ggml_backend_tensor_set_2d ⚠ ggml_backend_tensor_set_2d_async ⚠ ggml_backend_tensor_set_async ⚠ ggml_backend_unload ⚠ ggml_backend_view_init ⚠ ggml_bf16_to_fp32 ⚠ ggml_bf16_to_fp32_row ⚠ ggml_blck_size ⚠ ggml_build_backward_expand ⚠ ggml_build_forward_expand ⚠ ggml_build_forward_select ⚠ ggml_can_repeat ⚠ ggml_cast ⚠ ggml_ceil ⚠ ggml_ceil_inplace ⚠ ggml_clamp ⚠ ggml_commit ⚠ ggml_concat ⚠ ggml_cont ⚠ ggml_cont_1d ⚠ ggml_cont_2d ⚠ ggml_cont_3d ⚠ ggml_cont_4d ⚠ ggml_conv_1d ⚠ ggml_conv_1d_dw ⚠ ggml_conv_1d_dw_ph ⚠ ggml_conv_1d_ph ⚠ ggml_conv_2d ⚠ ggml_conv_2d_direct ⚠ ggml_conv_2d_dw ⚠ ggml_conv_2d_dw_direct ⚠ ggml_conv_2d_s1_ph ⚠ ggml_conv_2d_sk_p0 ⚠ ggml_conv_3d ⚠ ggml_conv_3d_direct ⚠ ggml_conv_transpose_1d ⚠ ggml_conv_transpose_2d_p0 ⚠ ggml_cos ⚠ ggml_cos_inplace ⚠ ggml_count_equal ⚠ ggml_cpu_bf16_to_fp32 ⚠ ggml_cpu_fp16_to_fp32 ⚠ ggml_cpu_fp32_to_bf16 ⚠ ggml_cpu_fp32_to_fp16 ⚠ ggml_cpu_fp32_to_fp32 ⚠ ggml_cpu_fp32_to_i32 ⚠ ggml_cpu_get_rvv_vlen ⚠ ggml_cpu_get_sve_cnt ⚠ ggml_cpu_has_amx_int8 ⚠ ggml_cpu_has_arm_fma ⚠ ggml_cpu_has_avx ⚠ ggml_cpu_has_avx2 ⚠ ggml_cpu_has_avx512 ⚠ ggml_cpu_has_avx512_bf16 ⚠ ggml_cpu_has_avx512_vbmi ⚠ ggml_cpu_has_avx512_vnni ⚠ ggml_cpu_has_avx_vnni ⚠ ggml_cpu_has_bmi2 ⚠ ggml_cpu_has_dotprod ⚠ ggml_cpu_has_f16c ⚠ ggml_cpu_has_fma ⚠ ggml_cpu_has_fp16_va ⚠ ggml_cpu_has_llamafile ⚠ ggml_cpu_has_matmul_int8 ⚠ ggml_cpu_has_neon ⚠ ggml_cpu_has_riscv_v ⚠ ggml_cpu_has_sme ⚠ ggml_cpu_has_sse3 ⚠ ggml_cpu_has_ssse3 ⚠ ggml_cpu_has_sve ⚠ ggml_cpu_has_vsx ⚠ ggml_cpu_has_vxe ⚠ ggml_cpu_has_wasm_simd ⚠ ggml_cpu_init ⚠ ggml_cpy ⚠ ggml_cross_entropy_loss ⚠ ggml_cross_entropy_loss_back ⚠ ggml_cumsum ⚠ ggml_custom_4d ⚠ ggml_custom_inplace ⚠ ggml_cycles ⚠ ggml_cycles_per_ms ⚠ ggml_diag ⚠ ggml_diag_mask_inf ⚠ ggml_diag_mask_inf_inplace ⚠ ggml_diag_mask_zero ⚠ ggml_diag_mask_zero_inplace ⚠ ggml_div ⚠ ggml_div_inplace ⚠ ggml_dup ⚠ ggml_dup_inplace ⚠ ggml_dup_tensor ⚠ ggml_element_size ⚠ ggml_elu ⚠ ggml_elu_inplace ⚠ ggml_exp ⚠ ggml_exp_inplace ⚠ ggml_expm1 ⚠ ggml_expm1_inplace ⚠ ggml_fill ⚠ ggml_fill_inplace ⚠ ggml_flash_attn_back ⚠ ggml_flash_attn_ext ⚠ ggml_flash_attn_ext_add_sinks ⚠ ggml_flash_attn_ext_get_prec ⚠ ggml_flash_attn_ext_set_prec ⚠ ggml_floor ⚠ ggml_floor_inplace ⚠ ggml_format_name ⚠ ggml_fp16_to_fp32 ⚠ ggml_fp16_to_fp32_row ⚠ ggml_fp32_to_bf16 ⚠ ggml_fp32_to_bf16_row ⚠ ggml_fp32_to_bf16_row_ref ⚠ ggml_fp32_to_fp16 ⚠ ggml_fp32_to_fp16_row ⚠ ggml_free ⚠ ggml_ftype_to_ggml_type ⚠ ggml_gallocr_alloc_graph ⚠ ggml_gallocr_free ⚠ ggml_gallocr_get_buffer_size ⚠ ggml_gallocr_new ⚠ ggml_gallocr_new_n ⚠ ggml_gallocr_reserve ⚠ ggml_gallocr_reserve_n ⚠ ggml_gallocr_reserve_n_size ⚠ ggml_gated_delta_net ⚠ ggml_gated_linear_attn ⚠ ggml_geglu ⚠ ggml_geglu_erf ⚠ ggml_geglu_erf_split ⚠ ggml_geglu_erf_swapped ⚠ ggml_geglu_quick ⚠ ggml_geglu_quick_split ⚠ ggml_geglu_quick_swapped ⚠ ggml_geglu_split ⚠ ggml_geglu_swapped ⚠ ggml_gelu ⚠ ggml_gelu_erf ⚠ ggml_gelu_erf_inplace ⚠ ggml_gelu_inplace ⚠ ggml_gelu_quick ⚠ ggml_gelu_quick_inplace ⚠ ggml_get_data ⚠ ggml_get_data_f32 ⚠ ggml_get_f32_1d ⚠ ggml_get_f32_nd ⚠ ggml_get_first_tensor ⚠ ggml_get_glu_op ⚠ ggml_get_i32_1d ⚠ ggml_get_i32_nd ⚠ ggml_get_max_tensor_size ⚠ ggml_get_mem_buffer ⚠ ggml_get_mem_size ⚠ ggml_get_name ⚠ ggml_get_next_tensor ⚠ ggml_get_no_alloc ⚠ ggml_get_rel_pos ⚠ ggml_get_rows ⚠ ggml_get_rows_back ⚠ ggml_get_tensor ⚠ ggml_get_type_traits ⚠ ggml_get_type_traits_cpu ⚠ ggml_get_unary_op ⚠ ggml_glu ⚠ ggml_glu_op_name ⚠ ggml_glu_split ⚠ ggml_graph_add_node ⚠ ggml_graph_clear ⚠ ggml_graph_compute ⚠ ggml_graph_compute_with_ctx ⚠ ggml_graph_cpy ⚠ ggml_graph_dump_dot ⚠ ggml_graph_dup ⚠ ggml_graph_get_grad ⚠ ggml_graph_get_grad_acc ⚠ ggml_graph_get_tensor ⚠ ggml_graph_n_nodes ⚠ ggml_graph_node ⚠ ggml_graph_nodes ⚠ ggml_graph_overhead ⚠ ggml_graph_overhead_custom ⚠ ggml_graph_plan ⚠ ggml_graph_print ⚠ ggml_graph_reset ⚠ ggml_graph_size ⚠ ggml_group_norm ⚠ ggml_group_norm_inplace ⚠ ggml_guid_matches ⚠ ggml_hardsigmoid ⚠ ggml_hardswish ⚠ ggml_im2col ⚠ ggml_im2col_3d ⚠ ggml_im2col_back ⚠ ggml_init ⚠ ggml_interpolate ⚠ ggml_is_3d ⚠ ggml_is_contiguous ⚠ ggml_is_contiguous_0 ⚠ ggml_is_contiguous_1 ⚠ ggml_is_contiguous_2 ⚠ ggml_is_contiguous_channels ⚠ ggml_is_contiguous_rows ⚠ ggml_is_contiguously_allocated ⚠ ggml_is_empty ⚠ ggml_is_matrix ⚠ ggml_is_numa ⚠ ggml_is_permuted ⚠ ggml_is_quantized ⚠ ggml_is_scalar ⚠ ggml_is_transposed ⚠ ggml_is_vector ⚠ ggml_is_view ⚠ ggml_l2_norm ⚠ ggml_l2_norm_inplace ⚠ ggml_leaky_relu ⚠ ggml_log ⚠ ggml_log_get ⚠ ggml_log_inplace ⚠ ggml_log_set ⚠ ggml_map_custom1 ⚠ ggml_map_custom2 ⚠ ggml_map_custom3 ⚠ ggml_map_custom1_inplace ⚠ ggml_map_custom2_inplace ⚠ ggml_map_custom3_inplace ⚠ ggml_mean ⚠ ggml_mul ⚠ ggml_mul_inplace ⚠ ggml_mul_mat ⚠ ggml_mul_mat_id ⚠ ggml_mul_mat_set_hint ⚠ ggml_mul_mat_set_prec ⚠ ggml_n_dims ⚠ ggml_nbytes ⚠ ggml_nbytes_pad ⚠ ggml_neg ⚠ ggml_neg_inplace ⚠ ggml_nelements ⚠ ggml_new_buffer ⚠ ggml_new_f32 ⚠ ggml_new_graph ⚠ ggml_new_graph_custom ⚠ ggml_new_i32 ⚠ ggml_new_tensor ⚠ ggml_new_tensor_1d ⚠ ggml_new_tensor_2d ⚠ ggml_new_tensor_3d ⚠ ggml_new_tensor_4d ⚠ ggml_norm ⚠ ggml_norm_inplace ⚠ ggml_nrows ⚠ ggml_numa_init ⚠ ggml_op_desc ⚠ ggml_op_name ⚠ ggml_op_symbol ⚠ ggml_opt_alloc ⚠ ggml_opt_context_optimizer_type ⚠ ggml_opt_dataset_data ⚠ ggml_opt_dataset_free ⚠ ggml_opt_dataset_get_batch ⚠ ggml_opt_dataset_get_batch_host ⚠ ggml_opt_dataset_init ⚠ ggml_opt_dataset_labels ⚠ ggml_opt_dataset_ndata ⚠ ggml_opt_dataset_shuffle ⚠ ggml_opt_default_params ⚠ ggml_opt_epoch ⚠ ggml_opt_epoch_callback_progress_bar ⚠ ggml_opt_eval ⚠ ggml_opt_fit ⚠ ggml_opt_free ⚠ ggml_opt_get_constant_optimizer_params ⚠ ggml_opt_get_default_optimizer_params ⚠ ggml_opt_grad_acc ⚠ ggml_opt_init ⚠ ggml_opt_inputs ⚠ ggml_opt_labels ⚠ ggml_opt_loss ⚠ ggml_opt_ncorrect ⚠ ggml_opt_optimizer_name ⚠ ggml_opt_outputs ⚠ ggml_opt_pred ⚠ ggml_opt_prepare_alloc ⚠ ggml_opt_reset ⚠ ggml_opt_result_accuracy ⚠ ggml_opt_result_free ⚠ ggml_opt_result_init ⚠ ggml_opt_result_loss ⚠ ggml_opt_result_ndata ⚠ ggml_opt_result_pred ⚠ ggml_opt_result_reset ⚠ ggml_opt_static_graphs ⚠ ggml_opt_step_adamw ⚠ ggml_opt_step_sgd ⚠ ggml_out_prod ⚠ ggml_pad ⚠ ggml_pad_circular ⚠ ggml_pad_ext ⚠ ggml_pad_ext_circular ⚠ ggml_pad_reflect_1d ⚠ ggml_permute ⚠ ggml_pool_1d ⚠ ggml_pool_2d ⚠ ggml_pool_2d_back ⚠ ggml_print_object ⚠ ggml_print_objects ⚠ ggml_quantize_chunk ⚠ ggml_quantize_free ⚠ ggml_quantize_init ⚠ ggml_quantize_requires_imatrix ⚠ ggml_reglu ⚠ ggml_reglu_split ⚠ ggml_reglu_swapped ⚠ ggml_relu ⚠ ggml_relu_inplace ⚠ ggml_repeat ⚠ ggml_repeat_4d ⚠ ggml_repeat_back ⚠ ggml_reset ⚠ ggml_reshape ⚠ ggml_reshape_1d ⚠ ggml_reshape_2d ⚠ ggml_reshape_3d ⚠ ggml_reshape_4d ⚠ ggml_rms_norm ⚠ ggml_rms_norm_back ⚠ ggml_rms_norm_inplace ⚠ ggml_roll ⚠ ggml_rope ⚠ ggml_rope_custom ⚠ ggml_rope_custom_inplace ⚠ ggml_rope_ext ⚠ ggml_rope_ext_back ⚠ ggml_rope_ext_inplace ⚠ ggml_rope_inplace ⚠ ggml_rope_multi ⚠ ggml_rope_multi_back ⚠ ggml_rope_multi_inplace ⚠ ggml_rope_yarn_corr_dims ⚠ ggml_round ⚠ ggml_round_inplace ⚠ ggml_row_size ⚠ ggml_rwkv_wkv6 ⚠ ggml_rwkv_wkv7 ⚠ ggml_scale ⚠ ggml_scale_bias ⚠ ggml_scale_bias_inplace ⚠ ggml_scale_inplace ⚠ ggml_set ⚠ ggml_set_1d ⚠ ggml_set_1d_inplace ⚠ ggml_set_2d ⚠ ggml_set_2d_inplace ⚠ ggml_set_abort_callback ⚠ ggml_set_f32 ⚠ ggml_set_f32_1d ⚠ ggml_set_f32_nd ⚠ ggml_set_i32 ⚠ ggml_set_i32_1d ⚠ ggml_set_i32_nd ⚠ ggml_set_inplace ⚠ ggml_set_input ⚠ ggml_set_loss ⚠ ggml_set_name ⚠ ggml_set_no_alloc ⚠ ggml_set_output ⚠ ggml_set_param ⚠ ggml_set_rows ⚠ ggml_set_zero ⚠ ggml_sgn ⚠ ggml_sgn_inplace ⚠ ggml_sigmoid ⚠ ggml_sigmoid_inplace ⚠ ggml_silu ⚠ ggml_silu_back ⚠ ggml_silu_inplace ⚠ ggml_sin ⚠ ggml_sin_inplace ⚠ ggml_soft_max ⚠ ggml_soft_max_add_sinks ⚠ ggml_soft_max_ext ⚠ ggml_soft_max_ext_back ⚠ ggml_soft_max_ext_back_inplace ⚠ ggml_soft_max_ext_inplace ⚠ ggml_soft_max_inplace ⚠ ggml_softplus ⚠ ggml_softplus_inplace ⚠ ggml_solve_tri ⚠ ggml_sqr ⚠ ggml_sqr_inplace ⚠ ggml_sqrt ⚠ ggml_sqrt_inplace ⚠ ggml_ssm_conv ⚠ ggml_ssm_scan ⚠ ggml_status_to_string ⚠ ggml_step ⚠ ggml_step_inplace ⚠ ggml_sub ⚠ ggml_sub_inplace ⚠ ggml_sum ⚠ ggml_sum_rows ⚠ ggml_swiglu ⚠ ggml_swiglu_oai ⚠ ggml_swiglu_split ⚠ ggml_swiglu_swapped ⚠ ggml_tallocr_alloc ⚠ ggml_tallocr_new ⚠ ggml_tanh ⚠ ggml_tanh_inplace ⚠ ggml_tensor_overhead ⚠ ggml_threadpool_free ⚠ ggml_threadpool_get_n_threads ⚠ ggml_threadpool_new ⚠ ggml_threadpool_params_default ⚠ ggml_threadpool_params_init ⚠ ggml_threadpool_params_match ⚠ ggml_threadpool_pause ⚠ ggml_threadpool_resume ⚠ ggml_time_init ⚠ ggml_time_ms ⚠ ggml_time_us ⚠ ggml_timestep_embedding ⚠ ggml_top_k ⚠ ggml_transpose ⚠ ggml_tri ⚠ ggml_trunc ⚠ Truncates the fractional part of each element in the tensor (towards zero).
For example: trunc(3.7) = 3.0, trunc(-2.9) = -2.0
Similar to std::trunc in C/C++. ggml_trunc_inplace ⚠ ggml_type_name ⚠ ggml_type_size ⚠ ggml_type_sizef ⚠ ggml_unary ⚠ ggml_unary_inplace ⚠ ggml_unary_op_name ⚠ ggml_unravel_index ⚠ ggml_upscale ⚠ ggml_upscale_ext ⚠ ggml_used_mem ⚠ ggml_validate_row_data ⚠ ggml_version ⚠ ggml_view_1d ⚠ ggml_view_2d ⚠ ggml_view_3d ⚠ ggml_view_4d ⚠ ggml_view_tensor ⚠ ggml_win_part ⚠ ggml_win_unpart ⚠ ggml_xielu ⚠ gguf_add_tensor ⚠ gguf_find_key ⚠ gguf_find_tensor ⚠ gguf_free ⚠ gguf_get_alignment ⚠ gguf_get_arr_data ⚠ gguf_get_arr_n ⚠ gguf_get_arr_str ⚠ gguf_get_arr_type ⚠ gguf_get_data_offset ⚠ gguf_get_key ⚠ gguf_get_kv_type ⚠ gguf_get_meta_data ⚠ gguf_get_meta_size ⚠ gguf_get_n_kv ⚠ gguf_get_n_tensors ⚠ gguf_get_tensor_name ⚠ gguf_get_tensor_offset ⚠ gguf_get_tensor_size ⚠ gguf_get_tensor_type ⚠ gguf_get_val_bool ⚠ gguf_get_val_data ⚠ gguf_get_val_f32 ⚠ gguf_get_val_f64 ⚠ gguf_get_val_i8 ⚠ gguf_get_val_i16 ⚠ gguf_get_val_i32 ⚠ gguf_get_val_i64 ⚠ gguf_get_val_str ⚠ gguf_get_val_u8 ⚠ gguf_get_val_u16 ⚠ gguf_get_val_u32 ⚠ gguf_get_val_u64 ⚠ gguf_get_version ⚠ gguf_init_empty ⚠ gguf_init_from_file ⚠ gguf_remove_key ⚠ gguf_set_arr_data ⚠ gguf_set_arr_str ⚠ gguf_set_kv ⚠ gguf_set_tensor_data ⚠ gguf_set_tensor_type ⚠ gguf_set_val_bool ⚠ gguf_set_val_f32 ⚠ gguf_set_val_f64 ⚠ gguf_set_val_i8 ⚠ gguf_set_val_i16 ⚠ gguf_set_val_i32 ⚠ gguf_set_val_i64 ⚠ gguf_set_val_str ⚠ gguf_set_val_u8 ⚠ gguf_set_val_u16 ⚠ gguf_set_val_u32 ⚠ gguf_set_val_u64 ⚠ gguf_type_name ⚠ gguf_write_to_file ⚠ llama_adapter_get_alora_invocation_tokens ⚠ llama_adapter_get_alora_n_invocation_tokens ⚠ llama_adapter_lora_free ⚠ llama_adapter_lora_init ⚠ llama_adapter_meta_count ⚠ llama_adapter_meta_key_by_index ⚠ llama_adapter_meta_val_str ⚠ llama_adapter_meta_val_str_by_index ⚠ llama_add_bos_token ⚠ llama_add_eos_token ⚠ llama_attach_threadpool ⚠ llama_backend_free ⚠ llama_backend_init ⚠ llama_batch_free ⚠ llama_batch_get_one ⚠ llama_batch_init ⚠ llama_chat_apply_template ⚠ Apply chat template. Inspired by hf apply_chat_template() on python. llama_chat_builtin_templates ⚠ llama_context_default_params ⚠ llama_copy_state_data ⚠ llama_decode ⚠ llama_detach_threadpool ⚠ llama_detokenize ⚠ @details Convert the provided tokens into text (inverse of llama_tokenize()).
@param text The char pointer must be large enough to hold the resulting text.
@return Returns the number of chars/bytes on success, no more than text_len_max.
@return Returns a negative number on failure - the number of chars/bytes that would have been returned.
@param remove_special Allow to remove BOS and EOS tokens if model is configured to do so.
@param unparse_special If true, special tokens are rendered in the output. llama_encode ⚠ llama_flash_attn_type_name ⚠ llama_free ⚠ llama_free_model ⚠ llama_get_embeddings ⚠ llama_get_embeddings_ith ⚠ llama_get_embeddings_seq ⚠ llama_get_logits ⚠ llama_get_logits_ith ⚠ llama_get_memory ⚠ llama_get_model ⚠ llama_get_sampled_candidates_count_ith ⚠ llama_get_sampled_candidates_ith ⚠ llama_get_sampled_logits_count_ith ⚠ llama_get_sampled_logits_ith ⚠ llama_get_sampled_probs_count_ith ⚠ llama_get_sampled_probs_ith ⚠ llama_get_sampled_token_ith ⚠ llama_get_state_size ⚠ llama_init_from_model ⚠ llama_load_model_from_file ⚠ llama_load_session_file ⚠ llama_log_get ⚠ llama_log_set ⚠ llama_max_devices ⚠ llama_max_parallel_sequences ⚠ llama_max_tensor_buft_overrides ⚠ llama_memory_can_shift ⚠ llama_memory_clear ⚠ llama_memory_seq_add ⚠ llama_memory_seq_cp ⚠ llama_memory_seq_div ⚠ llama_memory_seq_keep ⚠ llama_memory_seq_pos_max ⚠ llama_memory_seq_pos_min ⚠ llama_memory_seq_rm ⚠ llama_model_chat_template ⚠ llama_model_cls_label ⚠ llama_model_decoder_start_token ⚠ llama_model_default_params ⚠ llama_model_desc ⚠ llama_model_free ⚠ llama_model_get_vocab ⚠ llama_model_has_decoder ⚠ llama_model_has_encoder ⚠ llama_model_init_from_user ⚠ llama_model_is_diffusion ⚠ llama_model_is_hybrid ⚠ llama_model_is_recurrent ⚠ llama_model_load_from_file ⚠ llama_model_load_from_splits ⚠ llama_model_meta_count ⚠ llama_model_meta_key_by_index ⚠ llama_model_meta_key_str ⚠ llama_model_meta_val_str ⚠ llama_model_meta_val_str_by_index ⚠ llama_model_n_cls_out ⚠ llama_model_n_ctx_train ⚠ llama_model_n_embd ⚠ llama_model_n_embd_inp ⚠ llama_model_n_embd_out ⚠ llama_model_n_head ⚠ llama_model_n_head_kv ⚠ llama_model_n_layer ⚠ llama_model_n_params ⚠ llama_model_n_swa ⚠ llama_model_quantize ⚠ llama_model_quantize_default_params ⚠ llama_model_rope_freq_scale_train ⚠ llama_model_rope_type ⚠ llama_model_save_to_file ⚠ llama_model_size ⚠ llama_n_batch ⚠ llama_n_ctx ⚠ llama_n_ctx_seq ⚠ llama_n_ctx_train ⚠ llama_n_embd ⚠ llama_n_head ⚠ llama_n_layer ⚠ llama_n_seq_max ⚠ llama_n_threads ⚠ llama_n_threads_batch ⚠ llama_n_ubatch ⚠ llama_n_vocab ⚠ llama_new_context_with_model ⚠ llama_numa_init ⚠ llama_opt_epoch ⚠ llama_opt_init ⚠ llama_opt_param_filter_all ⚠ llama_perf_context ⚠ llama_perf_context_print ⚠ llama_perf_context_reset ⚠ llama_perf_sampler ⚠ llama_perf_sampler_print ⚠ llama_perf_sampler_reset ⚠ llama_pooling_type ⚠ llama_print_system_info ⚠ llama_rs_compute_tool_call_haystack ⚠ llama_rs_decode ⚠ llama_rs_detect_reasoning_markers ⚠ llama_rs_diagnose_tool_call_synthetic_renders ⚠ llama_rs_encode ⚠ llama_rs_fit_params ⚠ llama_rs_json_schema_to_grammar ⚠ llama_rs_load_model_from_file ⚠ llama_rs_memory_seq_add ⚠ llama_rs_memory_seq_div ⚠ llama_rs_memory_seq_pos_max ⚠ llama_rs_mtmd_bitmap_init_from_file ⚠ llama_rs_mtmd_encode_chunk ⚠ llama_rs_mtmd_eval_chunk_single ⚠ llama_rs_mtmd_init_from_file ⚠ llama_rs_mtmd_tokenize ⚠ llama_rs_new_context_with_model ⚠ llama_rs_parse_chat_message ⚠ llama_rs_parsed_chat_content ⚠ llama_rs_parsed_chat_free ⚠ llama_rs_parsed_chat_reasoning_content ⚠ llama_rs_parsed_chat_tool_call_arguments ⚠ llama_rs_parsed_chat_tool_call_count ⚠ llama_rs_parsed_chat_tool_call_id ⚠ llama_rs_parsed_chat_tool_call_name ⚠ llama_rs_sampler_accept ⚠ llama_rs_sampler_apply ⚠ llama_rs_sampler_init_grammar ⚠ llama_rs_sampler_init_grammar_lazy ⚠ llama_rs_sampler_init_grammar_lazy_patterns ⚠ llama_rs_sampler_sample ⚠ llama_rs_string_free ⚠ llama_rs_tokenize ⚠ llama_sampler_accept ⚠ llama_sampler_apply ⚠ llama_sampler_chain_add ⚠ llama_sampler_chain_default_params ⚠ llama_sampler_chain_get ⚠ llama_sampler_chain_init ⚠ llama_sampler_chain_n ⚠ llama_sampler_chain_remove ⚠ llama_sampler_clone ⚠ llama_sampler_free ⚠ llama_sampler_get_seed ⚠ llama_sampler_init ⚠ llama_sampler_init_adaptive_p ⚠ adaptive-p: select tokens near a configurable target probability over time. llama_sampler_init_dist ⚠ seed == LLAMA_DEFAULT_SEED to use a random seed. llama_sampler_init_dry ⚠ @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982 llama_sampler_init_grammar ⚠ @details Initializes a GBNF grammar, see grammars/README.md for details.
@param vocab The vocabulary that this grammar will be used with.
@param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
@param grammar_root The name of the start symbol for the grammar. llama_sampler_init_grammar_lazy ⚠ llama_sampler_init_grammar_lazy_patterns ⚠ @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639
@param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group.
@param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included. llama_sampler_init_greedy ⚠ llama_sampler_init_infill ⚠ llama_sampler_init_logit_bias ⚠ llama_sampler_init_min_p ⚠ @details Minimum P sampling as described in https://github.com/ggml-org/llama.cpp/pull/3841 llama_sampler_init_mirostat ⚠ @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
@param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates.
@param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm.
@param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal. llama_sampler_init_mirostat_v2 ⚠ @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
@param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates.
@param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal. llama_sampler_init_penalties ⚠ NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first. llama_sampler_init_temp ⚠ #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it’s original value, the rest are set to -inf llama_sampler_init_temp_ext ⚠ @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772. llama_sampler_init_top_k ⚠ @details Top-K sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751
Setting k <= 0 makes this a noop llama_sampler_init_top_n_sigma ⚠ @details Top n sigma sampling as described in academic paper “Top-nσ: Not All Logits Are You Need” https://arxiv.org/pdf/2411.07641 llama_sampler_init_top_p ⚠ @details Nucleus sampling described in academic paper “The Curious Case of Neural Text Degeneration” https://arxiv.org/abs/1904.09751 llama_sampler_init_typical ⚠ @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666. llama_sampler_init_xtc ⚠ @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335 llama_sampler_name ⚠ llama_sampler_reset ⚠ llama_sampler_sample ⚠ llama_save_session_file ⚠ llama_set_abort_callback ⚠ llama_set_adapter_cvec ⚠ llama_set_adapters_lora ⚠ llama_set_causal_attn ⚠ llama_set_embeddings ⚠ llama_set_n_threads ⚠ llama_set_sampler ⚠ llama_set_state_data ⚠ llama_set_warmup ⚠ llama_split_path ⚠ @details Build a split GGUF final path for this chunk.
llama_split_path(split_path, sizeof(split_path), “/models/ggml-model-q4_0”, 2, 4) => split_path = “/models/ggml-model-q4_0-00002-of-00004.gguf” llama_split_prefix ⚠ @details Extract the path prefix from the split_path if and only if the split_no and split_count match.
llama_split_prefix(split_prefix, 64, “/models/ggml-model-q4_0-00002-of-00004.gguf”, 2, 4) => split_prefix = “/models/ggml-model-q4_0” llama_state_get_data ⚠ llama_state_get_size ⚠ llama_state_load_file ⚠ llama_state_save_file ⚠ llama_state_seq_get_data ⚠ llama_state_seq_get_data_ext ⚠ llama_state_seq_get_size ⚠ llama_state_seq_get_size_ext ⚠ llama_state_seq_load_file ⚠ llama_state_seq_save_file ⚠ llama_state_seq_set_data ⚠ llama_state_seq_set_data_ext ⚠ llama_state_set_data ⚠ llama_supports_gpu_offload ⚠ llama_supports_mlock ⚠ llama_supports_mmap ⚠ llama_supports_rpc ⚠ llama_synchronize ⚠ llama_time_us ⚠ llama_token_bos ⚠ llama_token_cls ⚠ llama_token_eos ⚠ llama_token_eot ⚠ llama_token_fim_mid ⚠ llama_token_fim_pad ⚠ llama_token_fim_pre ⚠ llama_token_fim_rep ⚠ llama_token_fim_sep ⚠ llama_token_fim_suf ⚠ llama_token_get_attr ⚠ llama_token_get_score ⚠ llama_token_get_text ⚠ llama_token_is_control ⚠ llama_token_is_eog ⚠ llama_token_nl ⚠ llama_token_pad ⚠ llama_token_sep ⚠ llama_token_to_piece ⚠ llama_tokenize ⚠ @details Convert the provided text into tokens.
@param tokens The tokens pointer must be large enough to hold the resulting tokens.
@return Returns the number of tokens on success, no more than n_tokens_max
@return Returns a negative number on failure - the number of tokens that would have been returned
@return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit)
@param add_special Allow to add BOS and EOS tokens if model is configured to do so.
@param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated
as plaintext. Does not insert a leading space. llama_vocab_bos ⚠ llama_vocab_cls ⚠ llama_vocab_eos ⚠ llama_vocab_eot ⚠ llama_vocab_fim_mid ⚠ llama_vocab_fim_pad ⚠ llama_vocab_fim_pre ⚠ llama_vocab_fim_rep ⚠ llama_vocab_fim_sep ⚠ llama_vocab_fim_suf ⚠ llama_vocab_get_add_bos ⚠ llama_vocab_get_add_eos ⚠ llama_vocab_get_add_sep ⚠ llama_vocab_get_attr ⚠ llama_vocab_get_score ⚠ llama_vocab_get_text ⚠ llama_vocab_is_control ⚠ llama_vocab_is_eog ⚠ llama_vocab_mask ⚠ llama_vocab_n_tokens ⚠ llama_vocab_nl ⚠ llama_vocab_pad ⚠ llama_vocab_sep ⚠ llama_vocab_type ⚠ mtmd_bitmap_free ⚠ mtmd_bitmap_get_data ⚠ mtmd_bitmap_get_id ⚠ mtmd_bitmap_get_n_bytes ⚠ mtmd_bitmap_get_nx ⚠ mtmd_bitmap_get_ny ⚠ mtmd_bitmap_init ⚠ mtmd_bitmap_init_from_audio ⚠ mtmd_bitmap_is_audio ⚠ mtmd_bitmap_set_id ⚠ mtmd_context_params_default ⚠ mtmd_decode_use_mrope ⚠ mtmd_decode_use_non_causal ⚠ mtmd_default_marker ⚠ mtmd_encode ⚠ mtmd_encode_chunk ⚠ mtmd_free ⚠ mtmd_get_audio_sample_rate ⚠ mtmd_get_cap_from_file ⚠ mtmd_get_output_embd ⚠ mtmd_helper_bitmap_init_from_buf ⚠ mtmd_helper_bitmap_init_from_file ⚠ mtmd_helper_decode_image_chunk ⚠ mtmd_helper_eval_chunk_single ⚠ mtmd_helper_eval_chunks ⚠ mtmd_helper_get_n_pos ⚠ mtmd_helper_get_n_tokens ⚠ mtmd_helper_image_get_decoder_pos ⚠ mtmd_helper_log_set ⚠ mtmd_image_tokens_get_decoder_pos ⚠ mtmd_image_tokens_get_id ⚠ mtmd_image_tokens_get_n_pos ⚠ mtmd_image_tokens_get_n_tokens ⚠ mtmd_image_tokens_get_nx ⚠ mtmd_image_tokens_get_ny ⚠ mtmd_init_from_file ⚠ mtmd_input_chunk_copy ⚠ mtmd_input_chunk_free ⚠ mtmd_input_chunk_get_id ⚠ mtmd_input_chunk_get_n_pos ⚠ mtmd_input_chunk_get_n_tokens ⚠ mtmd_input_chunk_get_tokens_image ⚠ mtmd_input_chunk_get_tokens_text ⚠ mtmd_input_chunk_get_type ⚠ mtmd_input_chunks_free ⚠ mtmd_input_chunks_get ⚠ mtmd_input_chunks_init ⚠ mtmd_input_chunks_size ⚠ mtmd_log_set ⚠ mtmd_support_audio ⚠ mtmd_support_vision ⚠ mtmd_test_create_input_chunks ⚠ mtmd_tokenize ⚠ __off64_t __off_t ggml_abort_callback ggml_abort_callback_t ggml_backend_buffer_t ggml_backend_buffer_type_t ggml_backend_buffer_usage ggml_backend_comm_allreduce_tensor_t ggml_backend_comm_free_t ggml_backend_comm_init_t ggml_backend_dev_get_extra_bufts_t ggml_backend_dev_t ggml_backend_dev_type ggml_backend_eval_callback ggml_backend_event_t ggml_backend_get_features_t ggml_backend_graph_plan_t ggml_backend_meta_get_split_state_t ggml_backend_meta_split_axis ggml_backend_reg_t ggml_backend_sched_eval_callback ggml_backend_sched_t ggml_backend_set_abort_callback_t ggml_backend_set_n_threads_t ggml_backend_split_buffer_type_t ggml_backend_t ggml_custom1_op_t ggml_custom2_op_t ggml_custom3_op_t ggml_custom_op_t ggml_fp16_t ggml_from_float_t ggml_ftype ggml_gallocr_t ggml_glu_op ggml_guid ggml_guid_t ggml_log_callback ggml_log_level ggml_numa_strategy ggml_object_type ggml_op ggml_op_hint ggml_op_pool ggml_opt_build_type ggml_opt_context_t ggml_opt_dataset_t ggml_opt_epoch_callback ggml_opt_get_optimizer_params ggml_opt_loss_type ggml_opt_optimizer_type ggml_opt_result_t ggml_prec ggml_scale_flag ggml_scale_mode ggml_sched_priority ggml_sort_order ggml_status ggml_tensor_flag ggml_threadpool_t ggml_to_float_t ggml_tri_type ggml_type ggml_unary_op ggml_vec_dot_t gguf_type llama_attention_type llama_flash_attn_type llama_ftype llama_memory_t llama_model_kv_override_type llama_model_meta_key llama_model_set_tensor_data_t llama_opt_param_filter llama_pooling_type llama_pos llama_progress_callback llama_rope_scaling_type llama_rope_type llama_rs_compute_tool_call_haystack_status llama_rs_decode_status llama_rs_detect_reasoning_markers_status llama_rs_diagnose_tool_call_synthetic_renders_status llama_rs_encode_status llama_rs_fit_params_status llama_rs_json_schema_to_grammar_status llama_rs_load_model_from_file_status llama_rs_memory_seq_add_status llama_rs_memory_seq_div_status llama_rs_mtmd_bitmap_init_from_file_status llama_rs_mtmd_encode_chunk_status llama_rs_mtmd_eval_chunk_single_status llama_rs_mtmd_init_from_file_status llama_rs_mtmd_tokenize_status llama_rs_new_context_with_model_status llama_rs_parse_chat_message_status llama_rs_parsed_chat_content_status llama_rs_parsed_chat_free_status llama_rs_parsed_chat_handle llama_rs_parsed_chat_reasoning_content_status llama_rs_parsed_chat_tool_call_arguments_status llama_rs_parsed_chat_tool_call_count_status llama_rs_parsed_chat_tool_call_id_status llama_rs_parsed_chat_tool_call_name_status llama_rs_sampler_accept_status llama_rs_sampler_apply_status llama_rs_sampler_init_grammar_lazy_patterns_status llama_rs_sampler_init_grammar_lazy_status llama_rs_sampler_init_grammar_status llama_rs_sampler_sample_status llama_rs_status llama_rs_tokenize_status llama_sampler_context_t llama_seq_id llama_split_mode llama_state_seq_flags llama_token llama_token_attr llama_token_type llama_vocab_type mtmd_input_chunk_type llama_model_kv_override__bindgen_ty_1