#pragma once
#include "run-inference.h"
#include <stdint.h>
extern "C" {
struct gpt_params_c {
int32_t seed;
int32_t n_threads;
int32_t n_predict;
int32_t n_ctx;
int32_t n_batch;
int32_t n_keep;
int32_t n_gpu_layers;
int32_t main_gpu;
float tensor_split[LLAMA_MAX_DEVICES];
int32_t top_k;
float top_p;
float tfs_z;
float typical_p;
float temp;
float repeat_penalty;
int32_t repeat_last_n;
float frequency_penalty;
float presence_penalty;
int mirostat;
float mirostat_tau;
float mirostat_eta;
char *model;
char *model_alias;
char *prompt;
char *path_prompt_cache;
char *input_prefix;
char *input_suffix;
char *lora_adapter;
char *lora_base;
bool memory_f16;
bool random_prompt;
bool use_color;
bool interactive;
bool prompt_cache_all;
bool prompt_cache_ro;
bool embedding;
bool interactive_first;
bool multiline_input;
bool instruct;
bool penalize_nl;
bool perplexity;
bool use_mmap;
bool use_mlock;
bool mem_test;
bool export_cgraph;
bool verbose_prompt;
};
void rs_llama_cpp_run_inference(gpt_params_c params, token_callback callback);
}