llama_cpp_sys_v3/
types.rs1#![allow(non_camel_case_types)]
2
3pub type llama_pos = i32;
6pub type llama_token = i32;
7pub type llama_seq_id = i32;
8
9pub const LLAMA_TOKEN_NULL: llama_token = -1;
10
11#[repr(C)]
13pub struct llama_vocab {
14 _data: [u8; 0],
15 _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
16}
17
18#[repr(C)]
19pub struct llama_model {
20 _data: [u8; 0],
21 _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
22}
23
24#[repr(C)]
25pub struct llama_context {
26 _data: [u8; 0],
27 _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
28}
29
30#[repr(C)]
31pub struct llama_sampler {
32 _data: [u8; 0],
33 _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>,
34}
35
36#[repr(C)]
37#[derive(Debug, Copy, Clone)]
38pub struct llama_chat_message {
39 pub role: *const std::ffi::c_char,
40 pub content: *const std::ffi::c_char,
41}
42
43#[repr(C)]
44#[derive(Debug, Copy, Clone)]
45pub struct llama_batch {
46 pub n_tokens: i32,
47 pub token: *mut llama_token,
48 pub embd: *mut f32,
49 pub pos: *mut llama_pos,
50 pub n_seq_id: *mut i32,
51 pub seq_id: *mut *mut llama_seq_id,
52 pub logits: *mut i8, }
54
55#[repr(C)]
56#[derive(Debug, Copy, Clone)]
57pub struct llama_model_params {
58 pub devices: *mut std::ffi::c_void,
59 pub tensor_buft_overrides: *const std::ffi::c_void,
60 pub n_gpu_layers: i32,
61 pub split_mode: i32,
62 pub main_gpu: i32,
63 pub tensor_split: *const f32,
64 pub progress_callback: Option<unsafe extern "C" fn(f32, *mut std::ffi::c_void) -> bool>,
65 pub progress_callback_user_data: *mut std::ffi::c_void,
66 pub kv_overrides: *const std::ffi::c_void,
67 pub vocab_only: bool,
68 pub use_mmap: bool,
69 pub use_direct_io: bool,
70 pub use_mlock: bool,
71 pub check_tensors: bool,
72 pub use_extra_bufts: bool,
73 pub no_host: bool,
74 pub no_alloc: bool,
75}
76
77#[repr(C)]
78#[derive(Debug, Copy, Clone)]
79pub struct llama_context_params {
80 pub n_ctx: u32,
81 pub n_batch: u32,
82 pub n_ubatch: u32,
83 pub n_seq_max: u32,
84 pub n_threads: i32,
85 pub n_threads_batch: i32,
86 pub rope_scaling_type: i32,
87 pub pooling_type: i32,
88 pub attention_type: i32,
89 pub flash_attn_type: i32,
90 pub rope_freq_base: f32,
91 pub rope_freq_scale: f32,
92 pub yarn_ext_factor: f32,
93 pub yarn_attn_factor: f32,
94 pub yarn_beta_fast: f32,
95 pub yarn_beta_slow: f32,
96 pub yarn_orig_ctx: u32,
97 pub defrag_thold: f32,
98 pub cb_eval: Option<unsafe extern "C" fn(*mut std::ffi::c_void, *mut std::ffi::c_void) -> bool>,
99 pub cb_eval_user_data: *mut std::ffi::c_void,
100 pub type_k: i32,
101 pub type_v: i32,
102 pub abort_callback: Option<unsafe extern "C" fn(*mut std::ffi::c_void) -> bool>,
103 pub abort_callback_data: *mut std::ffi::c_void,
104 pub embeddings: bool,
105 pub offload_kqv: bool,
106 pub no_perf: bool,
107 pub op_offload: bool,
108 pub swa_full: bool,
109 pub kv_unified: bool,
110 pub samplers: *mut std::ffi::c_void,
111 pub n_samplers: usize,
112}
113
114#[repr(C)]
115#[derive(Debug, Copy, Clone)]
116pub struct llama_sampler_chain_params {
117 pub no_perf: bool,
118}