Skip to main content

llama_cpp_bindings/
lib.rs

1//! Bindings to the llama.cpp library.
2//!
3//! As llama.cpp is a very fast moving target, this crate does not attempt to create a stable API
4//! with all the rust idioms. Instead it provided safe wrappers around nearly direct bindings to
5//! llama.cpp. This makes it easier to keep up with the changes in llama.cpp, but does mean that
6//! the API is not as nice as it could be.
7//!
8//! # Feature Flags
9//!
10//! - `cuda` enables CUDA gpu support.
11//! - `sampler` adds the [`context::sample::sampler`] struct for a more rusty way of sampling.
12
13pub mod context;
14pub mod error;
15pub mod ffi_error_reader;
16pub mod ffi_status_is_ok;
17pub mod ffi_status_to_i32;
18pub mod ggml_time_us;
19pub mod gguf_context;
20pub mod gguf_context_error;
21pub mod gguf_type;
22pub mod json_schema_to_grammar;
23pub mod llama_backend;
24pub mod llama_backend_device;
25pub mod llama_backend_numa_strategy;
26pub mod llama_batch;
27pub mod llama_time_us;
28#[cfg(feature = "llguidance")]
29pub mod llguidance_sampler;
30pub mod log;
31pub mod log_options;
32pub mod max_devices;
33pub mod mlock_supported;
34pub mod mmap_supported;
35pub mod model;
36#[cfg(feature = "mtmd")]
37pub mod mtmd;
38pub mod openai;
39pub mod sampling;
40pub mod timing;
41pub mod token;
42pub mod token_type;
43
44pub use error::{
45    ApplyChatTemplateError, ChatParseError, ChatTemplateError, DecodeError, EmbeddingsError,
46    EncodeError, GrammarError, LlamaContextLoadError, LlamaCppError, LlamaLoraAdapterInitError,
47    LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError, LlamaModelLoadError, LogitsError,
48    MetaValError, ModelParamsError, NewLlamaChatMessageError, Result, SampleError,
49    SamplerAcceptError, SamplingError, StringToTokenError, TokenSamplingError, TokenToStringError,
50};
51
52pub use llama_backend_device::{
53    LlamaBackendDevice, LlamaBackendDeviceType, list_llama_ggml_backend_devices,
54};
55
56pub use ffi_status_is_ok::status_is_ok;
57pub use ffi_status_to_i32::status_to_i32;
58pub use ggml_time_us::ggml_time_us;
59pub use json_schema_to_grammar::json_schema_to_grammar;
60pub use llama_time_us::llama_time_us;
61pub use max_devices::max_devices;
62pub use mlock_supported::mlock_supported;
63pub use mmap_supported::mmap_supported;
64
65pub use log::send_logs_to_tracing;
66pub use log_options::LogOptions;
67
68#[cfg(any(test, feature = "tests_that_use_llms"))]
69pub mod test_model;