Skip to main content

llama_cpp_4/
llama_backend.rs

1//! Representation of an initialized llama backend
2
3use crate::LLamaCppError;
4use llama_cpp_sys_4::ggml_log_level;
5use std::sync::atomic::AtomicBool;
6use std::sync::atomic::Ordering::SeqCst;
7
8/// Representation of an initialized llama backend
9/// This is required as a parameter for most llama functions as the backend must be initialized
10/// before any llama functions are called. This type is proof of initialization.
11#[derive(Eq, PartialEq, Debug)]
12pub struct LlamaBackend {}
13
14static LLAMA_BACKEND_INITIALIZED: AtomicBool = AtomicBool::new(false);
15
16impl LlamaBackend {
17    /// Mark the llama backend as initialized
18    fn mark_init() -> crate::Result<()> {
19        match LLAMA_BACKEND_INITIALIZED.compare_exchange(false, true, SeqCst, SeqCst) {
20            Ok(_) => Ok(()),
21            Err(_) => Err(LLamaCppError::BackendAlreadyInitialized),
22        }
23    }
24
25    /// Initialize the llama backend (without numa).
26    ///
27    /// # Examples
28    ///
29    /// ```
30    ///# use llama_cpp_4::llama_backend::LlamaBackend;
31    ///# use llama_cpp_4::LLamaCppError;
32    ///# use std::error::Error;
33    ///
34    ///# fn main() -> Result<(), Box<dyn Error>> {
35    ///
36    ///
37    /// let backend = LlamaBackend::init()?;
38    /// // the llama backend can only be initialized once
39    /// assert_eq!(Err(LLamaCppError::BackendAlreadyInitialized), LlamaBackend::init());
40    ///
41    ///# Ok(())
42    ///# }
43    /// ```
44    /// # Errors
45    ///
46    /// Returns [`LLamaCppError::BackendAlreadyInitialized`] if the backend has already been initialized.
47    #[tracing::instrument(skip_all)]
48    pub fn init() -> crate::Result<LlamaBackend> {
49        Self::mark_init()?;
50        unsafe { llama_cpp_sys_4::llama_backend_init() }
51        Ok(LlamaBackend {})
52    }
53
54    /// Initialize the llama backend (with numa).
55    /// ```
56    ///# use llama_cpp_4::llama_backend::LlamaBackend;
57    ///# use std::error::Error;
58    ///# use llama_cpp_4::llama_backend::NumaStrategy;
59    ///
60    ///# fn main() -> Result<(), Box<dyn Error>> {
61    ///
62    /// let llama_backend = LlamaBackend::init_numa(NumaStrategy::MIRROR)?;
63    ///
64    ///# Ok(())
65    ///# }
66    /// ```
67    /// # Errors
68    ///
69    /// Returns [`LLamaCppError::BackendAlreadyInitialized`] if the backend has already been initialized.
70    #[tracing::instrument(skip_all)]
71    pub fn init_numa(strategy: NumaStrategy) -> crate::Result<LlamaBackend> {
72        Self::mark_init()?;
73        unsafe {
74            llama_cpp_sys_4::llama_numa_init(llama_cpp_sys_4::ggml_numa_strategy::from(strategy));
75        }
76        Ok(LlamaBackend {})
77    }
78
79    /// Change the output of llama.cpp's logging to be voided instead of pushed to `stderr`.
80    pub fn void_logs(&mut self) {
81        unsafe extern "C" fn void_log(
82            _level: ggml_log_level,
83            _text: *const ::std::os::raw::c_char,
84            _user_data: *mut ::std::os::raw::c_void,
85        ) {
86        }
87
88        unsafe {
89            llama_cpp_sys_4::llama_log_set(Some(void_log), std::ptr::null_mut());
90        }
91    }
92}
93
94/// A rusty wrapper around `numa_strategy`.
95///
96/// ## Description
97/// Represents different NUMA (Non-Uniform Memory Access) strategies for memory management
98/// in multi-core or multi-processor systems.
99///
100/// ## See more
101/// <https://github.com/ggerganov/llama.cpp/blob/master/ggml/include/ggml-cpu.h#L25-L32>
102#[derive(Debug, Eq, PartialEq, Copy, Clone)]
103pub enum NumaStrategy {
104    /// The NUMA strategy is disabled. No NUMA-aware optimizations are applied.
105    /// Memory allocation will not consider NUMA node locality.
106    DISABLED,
107
108    /// Distribute memory across NUMA nodes. This strategy aims to balance memory usage
109    /// across all available NUMA nodes, potentially improving load balancing and preventing
110    /// memory hotspots on a single node. It may use round-robin or another method to
111    /// distribute allocations.
112    DISTRIBUTE,
113
114    /// Isolate memory to specific NUMA nodes. Memory allocations will be restricted to
115    /// specific NUMA nodes, potentially reducing contention and improving locality for
116    /// processes or threads bound to a particular node.
117    ISOLATE,
118
119    /// Use `numactl` to manage memory and processor affinities. This strategy utilizes
120    /// the `numactl` command or library to bind processes or memory allocations to specific
121    /// NUMA nodes or CPUs, providing fine-grained control over memory placement.
122    NUMACTL,
123
124    /// Mirror memory across NUMA nodes. This strategy creates duplicate memory copies
125    /// on multiple NUMA nodes, which can help with fault tolerance and redundancy,
126    /// ensuring that each NUMA node has access to a copy of the memory.
127    MIRROR,
128
129    /// A placeholder representing the total number of strategies available.
130    /// Typically used for iteration or determining the number of strategies in the enum.
131    COUNT,
132}
133
134/// An invalid numa strategy was provided.
135#[derive(Debug, Eq, PartialEq, Copy, Clone)]
136pub struct InvalidNumaStrategy(
137    /// The invalid numa strategy that was provided.
138    pub llama_cpp_sys_4::ggml_numa_strategy,
139);
140
141impl TryFrom<llama_cpp_sys_4::ggml_numa_strategy> for NumaStrategy {
142    type Error = InvalidNumaStrategy;
143
144    fn try_from(value: llama_cpp_sys_4::ggml_numa_strategy) -> Result<Self, Self::Error> {
145        match value {
146            llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISABLED => Ok(Self::DISABLED),
147            llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISTRIBUTE => Ok(Self::DISTRIBUTE),
148            llama_cpp_sys_4::GGML_NUMA_STRATEGY_ISOLATE => Ok(Self::ISOLATE),
149            llama_cpp_sys_4::GGML_NUMA_STRATEGY_NUMACTL => Ok(Self::NUMACTL),
150            llama_cpp_sys_4::GGML_NUMA_STRATEGY_MIRROR => Ok(Self::MIRROR),
151            llama_cpp_sys_4::GGML_NUMA_STRATEGY_COUNT => Ok(Self::COUNT),
152            value => Err(InvalidNumaStrategy(value)),
153        }
154    }
155}
156
157impl From<NumaStrategy> for llama_cpp_sys_4::ggml_numa_strategy {
158    fn from(value: NumaStrategy) -> Self {
159        match value {
160            NumaStrategy::DISABLED => llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISABLED,
161            NumaStrategy::DISTRIBUTE => llama_cpp_sys_4::GGML_NUMA_STRATEGY_DISTRIBUTE,
162            NumaStrategy::ISOLATE => llama_cpp_sys_4::GGML_NUMA_STRATEGY_ISOLATE,
163            NumaStrategy::NUMACTL => llama_cpp_sys_4::GGML_NUMA_STRATEGY_NUMACTL,
164            NumaStrategy::MIRROR => llama_cpp_sys_4::GGML_NUMA_STRATEGY_MIRROR,
165            NumaStrategy::COUNT => llama_cpp_sys_4::GGML_NUMA_STRATEGY_COUNT,
166        }
167    }
168}
169
170/// Drops the llama backend.
171/// ```
172///
173///# use llama_cpp_4::llama_backend::LlamaBackend;
174///# use std::error::Error;
175///
176///# fn main() -> Result<(), Box<dyn Error>> {
177/// let backend = LlamaBackend::init()?;
178/// drop(backend);
179/// // can be initialized again after being dropped
180/// let backend = LlamaBackend::init()?;
181///# Ok(())
182///# }
183///
184/// ```
185impl Drop for LlamaBackend {
186    fn drop(&mut self) {
187        match LLAMA_BACKEND_INITIALIZED.compare_exchange(true, false, SeqCst, SeqCst) {
188            Ok(_) => {}
189            Err(_) => {
190                unreachable!("This should not be reachable as the only ways to obtain a llama backend involve marking the backend as initialized.")
191            }
192        }
193        unsafe { llama_cpp_sys_4::llama_backend_free() }
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200
201    #[test]
202    fn numa_from_and_to() {
203        let numas = [
204            NumaStrategy::DISABLED,
205            NumaStrategy::DISTRIBUTE,
206            NumaStrategy::ISOLATE,
207            NumaStrategy::NUMACTL,
208            NumaStrategy::MIRROR,
209            NumaStrategy::COUNT,
210        ];
211
212        for numa in &numas {
213            let from = llama_cpp_sys_4::ggml_numa_strategy::from(*numa);
214            let to = NumaStrategy::try_from(from).expect("Failed to convert from and to");
215            assert_eq!(*numa, to);
216        }
217    }
218
219    #[test]
220    fn check_invalid_numa() {
221        let invalid = 800;
222        let invalid = NumaStrategy::try_from(invalid);
223        assert_eq!(invalid, Err(InvalidNumaStrategy(invalid.unwrap_err().0)));
224    }
225}