xybrid_llama/lib.rs
1//! Safe Rust wrappers over [`llama-cpp-sys`].
2//!
3//! Owns the FFI boundary for llama.cpp: RAII handles, typed errors, the
4//! streaming trampoline. Downstream code (the `xybrid-core` adapter, Phase
5//! 2's consumers, and any future backend that wants llama-cpp without the
6//! `xybrid-core` surface) only touches the safe types in this crate.
7//!
8//! # Activation
9//!
10//! The real implementation lives behind the `bindings` cargo feature. A
11//! default build — `cargo build -p xybrid-llama` — compiles this crate to
12//! an empty shell on every target, which keeps `cargo clippy --workspace`
13//! on Linux CI runners green even without a C++ toolchain.
14//!
15//! # Public surface
16//!
17//! - [`LlamaModel`] — owning handle to a loaded GGUF model
18//! - [`LlamaContext`] — owning handle to a llama context, with KV-cache
19//! manipulation methods
20//! - [`StreamingCallback`] — closure type alias for streaming generation
21//! - [`generate_streaming`] / [`generate_with_stops`] — the autoregressive
22//! loops, including the prefix-reuse `n_past_in` knob
23//! - [`set_verbosity`] / [`get_verbosity`] — llama.cpp log-level control
24//! - [`LlamaError`] / [`LlamaResult`] — error surface
25//!
26//! Zero `unsafe` appears on the public surface. Every `unsafe` block lives
27//! in the [`mod@ffi`] module behind `pub(crate)` with `# Safety` doc
28//! comments, mirroring `xybrid-mlx::ffi`'s discipline.
29
30// Unconditional: callers can spell error variants and stub-call
31// `backend_init` even in a no-bindings build.
32mod error;
33pub use error::{LlamaError, LlamaResult};
34
35/// Initialize the llama.cpp backend and apply Xybrid's log policy once.
36///
37/// The `-sys` crate owns only native backend initialization. This wrapper
38/// keeps the Xybrid-specific `XYBRID_LLAMACPP_VERBOSITY` env-var contract
39/// in the safe wrapper crate while preserving the historical one-time
40/// init timing: the env var is read during the same `Once` closure as
41/// `llama_backend_init_c()`.
42pub fn backend_init() {
43 llama_cpp_sys::backend_init_with_configure(configure_verbosity_from_env);
44}
45
46#[cfg(feature = "bindings")]
47fn configure_verbosity_from_env() {
48 if let Ok(level) = std::env::var("XYBRID_LLAMACPP_VERBOSITY") {
49 if let Ok(v) = level.parse::<i32>() {
50 crate::log_control::set_verbosity(v);
51 }
52 }
53}
54
55#[cfg(not(feature = "bindings"))]
56fn configure_verbosity_from_env() {}
57
58#[cfg(feature = "bindings")]
59pub(crate) mod ffi;
60
61#[cfg(feature = "bindings")]
62mod context;
63#[cfg(feature = "bindings")]
64mod generation;
65#[cfg(feature = "bindings")]
66mod log_control;
67#[cfg(feature = "bindings")]
68mod model;
69
70#[cfg(feature = "bindings")]
71pub use context::LlamaContext;
72#[cfg(feature = "bindings")]
73pub use generation::{format_chat, generate_streaming, generate_with_stops, StreamingCallback};
74#[cfg(feature = "bindings")]
75pub use log_control::{get_verbosity, set_verbosity};
76#[cfg(feature = "bindings")]
77pub use model::LlamaModel;
78
79// =========================================================================
80// No-bindings stubs
81// =========================================================================
82//
83// These keep the crate's public type surface present on a default build
84// (no `bindings` feature), so `cargo build -p xybrid-llama` and
85// `cargo clippy --workspace` stay green on toolchain-free CI runners.
86
87/// Stub returned when the `bindings` feature is disabled.
88#[cfg(not(feature = "bindings"))]
89pub struct LlamaModel;
90
91/// Stub returned when the `bindings` feature is disabled.
92#[cfg(not(feature = "bindings"))]
93pub struct LlamaContext;
94
95#[cfg(not(feature = "bindings"))]
96pub fn set_verbosity(_level: i32) {}
97
98#[cfg(not(feature = "bindings"))]
99pub fn get_verbosity() -> i32 {
100 0
101}