llama_cpp_bindings/
lib.rs

1//! Bindings to the llama.cpp library.
2//!
3//! As llama.cpp is a very fast moving target, this crate does not attempt to create a stable API
4//! with all the rust idioms. Instead it provided safe wrappers around nearly direct bindings to
5//! llama.cpp. This makes it easier to keep up with the changes in llama.cpp, but does mean that
6//! the API is not as nice as it could be.
7//!
8//! # Feature Flags
9//!
10//! - `cuda` enables CUDA gpu support.
11//! - `sampler` adds the [`context::sample::sampler`] struct for a more rusty way of sampling.
12
13pub mod batch_add_error;
14pub mod chat_message_parse_outcome;
15pub mod context;
16pub mod error;
17pub mod extract_tool_call_markers_from_haystack;
18pub mod ffi_error_reader;
19pub mod ffi_status_is_ok;
20pub mod ffi_status_to_i32;
21pub mod ggml_time_us;
22pub mod gguf_context;
23pub mod gguf_context_error;
24pub mod gguf_type;
25pub mod ingest_prompt_chunk;
26pub mod json_schema_to_grammar;
27pub mod llama_backend;
28pub mod llama_backend_device;
29pub mod llama_backend_numa_strategy;
30pub mod llama_batch;
31pub mod llama_time_us;
32pub mod llama_token_attr;
33pub mod llama_token_attrs;
34pub mod llama_token_attrs_from_int_error;
35pub mod llguidance_sampler;
36#[cfg(feature = "dynamic-backends")]
37pub mod load_backends;
38#[cfg(feature = "dynamic-backends")]
39pub mod load_backends_error;
40#[cfg(feature = "dynamic-backends")]
41pub mod load_backends_from_path;
42pub mod log;
43pub mod log_options;
44pub mod max_devices;
45pub mod mlock_supported;
46pub mod mmap_supported;
47pub mod model;
48pub mod mtmd;
49pub mod raw_chat_message;
50pub mod resolved_tool_call_markers;
51pub mod sampled_token;
52pub mod sampled_token_classifier;
53pub mod sampling;
54pub mod streaming_json_probe;
55pub mod timing;
56pub mod token;
57pub mod tool_call_format;
58pub mod tool_call_marker_pair;
59pub mod tool_call_template_overrides;
60
61pub use error::{
62    ApplyChatTemplateError, ChatTemplateError, DecodeError, EmbeddingsError, EncodeError,
63    EvalMultimodalChunksError, GrammarError, LlamaContextLoadError, LlamaCppError,
64    LlamaLoraAdapterInitError, LlamaLoraAdapterRemoveError, LlamaLoraAdapterSetError,
65    LlamaModelLoadError, LogitsError, MarkerDetectionError, MetaValError, ModelParamsError,
66    NewLlamaChatMessageError, ParseChatMessageError, Result, SampleError, SamplerAcceptError,
67    SamplingError, StringToTokenError, TokenSamplingError, TokenToStringError,
68};
69
70pub use chat_message_parse_outcome::ChatMessageParseOutcome;
71pub use llama_backend_device::{
72    LlamaBackendDevice, LlamaBackendDeviceType, list_llama_ggml_backend_devices,
73};
74pub use llama_cpp_bindings_types::{
75    BracketedJsonShape, KeyValueXmlTagsShape, PairedQuoteShape, ParsedChatMessage, ParsedToolCall,
76    ReasoningMarkers, TokenUsage, TokenUsageError, ToolCallArgsShape, ToolCallArguments,
77    ToolCallMarkers, ToolCallValueQuote, XmlTagsShape,
78};
79pub use raw_chat_message::RawChatMessage;
80pub use sampled_token::SampledToken;
81pub use sampled_token_classifier::SampledTokenClassifier;
82pub use sampled_token_classifier::SampledTokenSection;
83
84pub use ffi_status_is_ok::status_is_ok;
85pub use ffi_status_to_i32::status_to_i32;
86pub use ggml_time_us::ggml_time_us;
87pub use ingest_prompt_chunk::ingest_prompt_chunk;
88pub use json_schema_to_grammar::json_schema_to_grammar;
89pub use llama_time_us::llama_time_us;
90pub use max_devices::max_devices;
91pub use mlock_supported::mlock_supported;
92pub use mmap_supported::mmap_supported;
93
94pub use log::send_logs_to_tracing;
95pub use log_options::LogOptions;
llama_cpp_bindings/lib.rs

llama_cpp_bindings/
lib.rs