Skip to main content

llama_cpp_bindings/
llama_backend.rs

1//! Representation of an initialized llama backend
2
3use crate::LlamaCppError;
4use crate::llama_backend_numa_strategy::NumaStrategy;
5use llama_cpp_bindings_sys::ggml_log_level;
6use std::sync::atomic::AtomicBool;
7use std::sync::atomic::Ordering::SeqCst;
8
9/// Representation of an initialized llama backend
10/// This is required as a parameter for most llama functions as the backend must be initialized
11/// before any llama functions are called. This type is proof of initialization.
12#[derive(Eq, PartialEq, Debug)]
13pub struct LlamaBackend {}
14
15static LLAMA_BACKEND_INITIALIZED: AtomicBool = AtomicBool::new(false);
16
17impl LlamaBackend {
18    /// Mark the llama backend as initialized
19    fn mark_init() -> crate::Result<()> {
20        match LLAMA_BACKEND_INITIALIZED.compare_exchange(false, true, SeqCst, SeqCst) {
21            Ok(_) => Ok(()),
22            Err(_) => Err(LlamaCppError::BackendAlreadyInitialized),
23        }
24    }
25
26    /// Initialize the llama backend (without numa).
27    ///
28    /// # Examples
29    ///
30    /// ```
31    ///# use llama_cpp_bindings::llama_backend::LlamaBackend;
32    ///# use llama_cpp_bindings::LlamaCppError;
33    ///# use std::error::Error;
34    ///
35    ///# fn main() -> Result<(), Box<dyn Error>> {
36    ///
37    ///
38    /// let backend = LlamaBackend::init()?;
39    /// // the llama backend can only be initialized once
40    /// assert_eq!(Err(LlamaCppError::BackendAlreadyInitialized), LlamaBackend::init());
41    ///
42    ///# Ok(())
43    ///# }
44    /// ```
45    /// # Errors
46    /// Returns an error if the backend was already initialized.
47    #[tracing::instrument(skip_all)]
48    pub fn init() -> crate::Result<LlamaBackend> {
49        Self::mark_init()?;
50        unsafe { llama_cpp_bindings_sys::llama_backend_init() }
51        Ok(LlamaBackend {})
52    }
53
54    /// Initialize the llama backend (with numa).
55    /// ```
56    ///# use llama_cpp_bindings::llama_backend::LlamaBackend;
57    ///# use std::error::Error;
58    ///# use llama_cpp_bindings::llama_backend_numa_strategy::NumaStrategy;
59    ///
60    ///# fn main() -> Result<(), Box<dyn Error>> {
61    ///
62    /// let llama_backend = LlamaBackend::init_numa(NumaStrategy::Mirror)?;
63    ///
64    ///# Ok(())
65    ///# }
66    /// ```
67    /// # Errors
68    /// Returns an error if the backend was already initialized.
69    #[tracing::instrument(skip_all)]
70    pub fn init_numa(strategy: NumaStrategy) -> crate::Result<LlamaBackend> {
71        Self::mark_init()?;
72        unsafe {
73            llama_cpp_bindings_sys::llama_numa_init(
74                llama_cpp_bindings_sys::ggml_numa_strategy::from(strategy),
75            );
76        }
77        Ok(LlamaBackend {})
78    }
79
80    /// Was the code built for a GPU backend & is a supported one available.
81    #[must_use]
82    pub fn supports_gpu_offload(&self) -> bool {
83        unsafe { llama_cpp_bindings_sys::llama_supports_gpu_offload() }
84    }
85
86    /// Does this platform support loading the model via mmap.
87    #[must_use]
88    pub fn supports_mmap(&self) -> bool {
89        unsafe { llama_cpp_bindings_sys::llama_supports_mmap() }
90    }
91
92    /// Does this platform support locking the model in RAM.
93    #[must_use]
94    pub fn supports_mlock(&self) -> bool {
95        unsafe { llama_cpp_bindings_sys::llama_supports_mlock() }
96    }
97
98    /// Change the output of llama.cpp's logging to be voided instead of pushed to `stderr`.
99    pub fn void_logs(&mut self) {
100        unsafe extern "C" fn void_log(
101            _level: ggml_log_level,
102            _text: *const ::std::os::raw::c_char,
103            _user_data: *mut ::std::os::raw::c_void,
104        ) {
105        }
106
107        unsafe {
108            llama_cpp_bindings_sys::llama_log_set(Some(void_log), std::ptr::null_mut());
109        }
110    }
111}
112
113/// Drops the llama backend.
114/// ```
115///
116///# use llama_cpp_bindings::llama_backend::LlamaBackend;
117///# use std::error::Error;
118///
119///# fn main() -> Result<(), Box<dyn Error>> {
120/// let backend = LlamaBackend::init()?;
121/// drop(backend);
122/// // can be initialized again after being dropped
123/// let backend = LlamaBackend::init()?;
124///# Ok(())
125///# }
126///
127/// ```
128impl Drop for LlamaBackend {
129    fn drop(&mut self) {
130        match LLAMA_BACKEND_INITIALIZED.compare_exchange(true, false, SeqCst, SeqCst) {
131            Ok(_) => {}
132            Err(_) => {
133                unreachable!(
134                    "This should not be reachable as the only ways to obtain a llama backend involve marking the backend as initialized."
135                )
136            }
137        }
138        unsafe { llama_cpp_bindings_sys::llama_backend_free() }
139    }
140}