llama_cpp_4/llama_backend.rs
1//! Representation of an initialized llama backend
2
3use crate::LLamaCppError;
4use llama_cpp_sys_4::ggml_log_level;
5use std::sync::atomic::AtomicBool;
6use std::sync::atomic::Ordering::SeqCst;
7
8/// Representation of an initialized llama backend
9/// This is required as a parameter for most llama functions as the backend must be initialized
10/// before any llama functions are called. This type is proof of initialization.
11#[derive(Eq, PartialEq, Debug)]
12pub struct LlamaBackend {}
13
14static LLAMA_BACKEND_INITIALIZED: AtomicBool = AtomicBool::new(false);
15
16impl LlamaBackend {
17 /// Mark the llama backend as initialized
18 fn mark_init() -> crate::Result<()> {
19 match LLAMA_BACKEND_INITIALIZED.compare_exchange(false, true, SeqCst, SeqCst) {
20 Ok(_) => Ok(()),
21 Err(_) => Err(LLamaCppError::BackendAlreadyInitialized),
22 }
23 }
24
25 /// Initialize the llama backend (without numa).
26 ///
27 /// # Examples
28 ///
29 /// ```
30 ///# use llama_cpp_4::llama_backend::LlamaBackend;
31 ///# use llama_cpp_4::LLamaCppError;
32 ///# use std::error::Error;
33 ///
34 ///# fn main() -> Result<(), Box<dyn Error>> {
35 ///
36 ///
37 /// let backend = LlamaBackend::init()?;
38 /// // the llama backend can only be initialized once
39 /// assert_eq!(Err(LLamaCppError::BackendAlreadyInitialized), LlamaBackend::init());
40 ///
41 ///# Ok(())
42 ///# }
43 /// ```
44 /// # Errors
45 ///
46 /// Returns [`LLamaCppError::BackendAlreadyInitialized`] if the backend has already been initialized.
47 #[tracing::instrument(skip_all)]
48 pub fn init() -> crate::Result<LlamaBackend> {
49 Self::mark_init()?;
50 unsafe { llama_cpp_sys_4::llama_backend_init() }
51 Ok(LlamaBackend {})
52 }
53
54 /// Initialize the llama backend (with numa).
55 /// ```
56 ///# use llama_cpp_4::llama_backend::LlamaBackend;
57 ///# use std::error::Error;
58 ///# use llama_cpp_4::llama_backend::NumaStrategy;
59 ///
60 ///# fn main() -> Result<(), Box<dyn Error>> {
61 ///
62 /// let llama_backend = LlamaBackend::init_numa(NumaStrategy::MIRROR)?;
63 ///
64 ///# Ok(())
65 ///# }
66 /// ```
67 /// # Errors
68 ///
69 /// Returns [`LLamaCppError::BackendAlreadyInitialized`] if the backend has already been initialized.
70 #[tracing::instrument(skip_all)]
71 pub fn init_numa(strategy: NumaStrategy) -> crate::Result<LlamaBackend> {
72 Self::mark_init()?;
73 unsafe {
74 llama_cpp_sys_4::llama_numa_init(llama_cpp_sys_4::ggml_numa_strategy::from(strategy));
75 }
76 Ok(LlamaBackend {})
77 }
78
79 /// Change the output of llama.cpp's logging to be voided instead of pushed to `stderr`.
80 pub fn void_logs(&mut self) {
81 unsafe extern "C" fn void_log(
82 _level: ggml_log_level,
83 _text: *const ::std::os::raw::c_char,
84 _user_data: *mut ::std::os::raw::c_void,
85 ) {
86 }
87
88 unsafe {
89 llama_cpp_sys_4::llama_log_set(Some(void_log), std::ptr::null_mut());
90 }
91 }
92}
93
94/// A rusty wrapper around `numa_strategy`.
95///
96/// ## Description
97/// Represents different NUMA (Non-Uniform Memory Access) strategies for memory management
98/// in multi-core or multi-processor systems.
99///
100/// ## See more
101/// <https://github.com/ggerganov/llama.cpp/blob/master/ggml/include/ggml-cpu.h#L25-L32>
102#[derive(Debug, Eq, PartialEq, Copy, Clone)]
103pub enum NumaStrategy {
104 /// The NUMA strategy is disabled. No NUMA-aware optimizations are applied.
105 /// Memory allocation will not consider NUMA node locality.
106 DISABLED,
107
108 /// Distribute memory across NUMA nodes. This strategy aims to balance memory usage
109 /// across all available NUMA nodes, potentially improving load balancing and preventing
110 /// memory hotspots on a single node. It may use round-robin or another method to
111 /// distribute allocations.
112 DISTRIBUTE,
113
114 /// Isolate memory to specific NUMA nodes. Memory allocations will be restricted to
115 /// specific NUMA nodes, potentially reducing contention and improving locality for
116 /// processes or threads bound to a particular node.
117 ISOLATE,
118
119 /// Use `numactl` to manage memory and processor affinities. This strategy utilizes
120 /// the `numactl` command or library to bind processes or memory allocations to specific
121 /// NUMA nodes or CPUs, providing fine-grained control over memory placement.
122 NUMACTL,
123
124 /// Mirror memory across NUMA nodes. This strategy creates duplicate memory copies
125 /// on multiple NUMA nodes, which can help with fault tolerance and redundancy,
126 /// ensuring that each NUMA node has access to a copy of the memory.
127 MIRROR,
128
129 /// A placeholder representing the total number of strategies available.
130 /// Typically used for iteration or determining the number of strategies in the enum.
131 COUNT,
132}
133
134/// An invalid numa strategy was provided.
135#[derive(Debug, Eq, PartialEq, Copy, Clone)]
136pub struct InvalidNumaStrategy(
137 /// The invalid numa strategy that was provided.
138 pub llama_cpp_sys_4::ggml_numa_strategy,
139);
140
141impl TryFrom<llama_cpp_sys_4::ggml_numa_strategy> for NumaStrategy {
142 type Error = InvalidNumaStrategy;
143
144 fn try_from(value: llama_cpp_sys_4::ggml_numa_strategy) -> Result<Self, Self::Error> {
145 match value {
146 llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISABLED => Ok(Self::DISABLED),
147 llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISTRIBUTE => Ok(Self::DISTRIBUTE),
148 llama_cpp_sys_4::GGML_NUMA_STRATEGY_ISOLATE => Ok(Self::ISOLATE),
149 llama_cpp_sys_4::GGML_NUMA_STRATEGY_NUMACTL => Ok(Self::NUMACTL),
150 llama_cpp_sys_4::GGML_NUMA_STRATEGY_MIRROR => Ok(Self::MIRROR),
151 llama_cpp_sys_4::GGML_NUMA_STRATEGY_COUNT => Ok(Self::COUNT),
152 value => Err(InvalidNumaStrategy(value)),
153 }
154 }
155}
156
157impl From<NumaStrategy> for llama_cpp_sys_4::ggml_numa_strategy {
158 fn from(value: NumaStrategy) -> Self {
159 match value {
160 NumaStrategy::DISABLED => llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISABLED,
161 NumaStrategy::DISTRIBUTE => llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISTRIBUTE,
162 NumaStrategy::ISOLATE => llama_cpp_sys_4::GGML_NUMA_STRATEGY_ISOLATE,
163 NumaStrategy::NUMACTL => llama_cpp_sys_4::GGML_NUMA_STRATEGY_NUMACTL,
164 NumaStrategy::MIRROR => llama_cpp_sys_4::GGML_NUMA_STRATEGY_MIRROR,
165 NumaStrategy::COUNT => llama_cpp_sys_4::GGML_NUMA_STRATEGY_COUNT,
166 }
167 }
168}
169
170/// Drops the llama backend.
171/// ```
172///
173///# use llama_cpp_4::llama_backend::LlamaBackend;
174///# use std::error::Error;
175///
176///# fn main() -> Result<(), Box<dyn Error>> {
177/// let backend = LlamaBackend::init()?;
178/// drop(backend);
179/// // can be initialized again after being dropped
180/// let backend = LlamaBackend::init()?;
181///# Ok(())
182///# }
183///
184/// ```
185impl Drop for LlamaBackend {
186 fn drop(&mut self) {
187 match LLAMA_BACKEND_INITIALIZED.compare_exchange(true, false, SeqCst, SeqCst) {
188 Ok(_) => {}
189 Err(_) => {
190 unreachable!("This should not be reachable as the only ways to obtain a llama backend involve marking the backend as initialized.")
191 }
192 }
193 unsafe { llama_cpp_sys_4::llama_backend_free() }
194 }
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200
201 #[test]
202 fn numa_from_and_to() {
203 let numas = [
204 NumaStrategy::DISABLED,
205 NumaStrategy::DISTRIBUTE,
206 NumaStrategy::ISOLATE,
207 NumaStrategy::NUMACTL,
208 NumaStrategy::MIRROR,
209 NumaStrategy::COUNT,
210 ];
211
212 for numa in &numas {
213 let from = llama_cpp_sys_4::ggml_numa_strategy::from(*numa);
214 let to = NumaStrategy::try_from(from).expect("Failed to convert from and to");
215 assert_eq!(*numa, to);
216 }
217 }
218
219 #[test]
220 fn check_invalid_numa() {
221 let invalid = 800;
222 let invalid = NumaStrategy::try_from(invalid);
223 assert_eq!(invalid, Err(InvalidNumaStrategy(invalid.unwrap_err().0)));
224 }
225}