rig-llama-cpp 0.1.0

Rig completion provider for local GGUF models via llama.cpp, with streaming, tool calling, reasoning, and multimodal (mtmd) support.
Documentation
[package]
name = "rig-llama-cpp"
version = "0.1.0"
edition = "2024"
rust-version = "1.88"
description = "Rig completion provider for local GGUF models via llama.cpp, with streaming, tool calling, reasoning, and multimodal (mtmd) support."
license = "MIT"
repository = "https://github.com/camperking/rig-llama-cpp"
homepage = "https://github.com/camperking/rig-llama-cpp"
documentation = "https://docs.rs/rig-llama-cpp"
readme = "README.md"
keywords = ["llm", "llama-cpp", "gguf", "inference", "rig"]
categories = ["api-bindings", "science"]
# Strip repository-only artefacts from the .crate tarball. We deliberately
# keep `tests/` in the package because the explicit `[[test]] e2e` entry
# below references files in that directory; excluding `tests/` would
# strand the declaration and break `cargo test` on the unpacked crate.
exclude = [
    ".github/",
    "CLAUDE.md",
]

[package.metadata.docs.rs]
features = ["mtmd"]
rustdoc-args = ["--cfg", "docsrs"]

[features]
# No default backend: pick exactly the one that matches your hardware. CPU-only
# (no GPU feature) works out of the box; add `openmp` to opt into OpenMP
# threading.
default = []
openmp = ["llama-cpp-2/openmp"]
vulkan = ["llama-cpp-2/vulkan"]
cuda = ["llama-cpp-2/cuda"]
metal = ["llama-cpp-2/metal"]
rocm = ["llama-cpp-2/rocm"]
mtmd = ["llama-cpp-2/mtmd", "dep:base64"]

[dependencies]
rig-core = "0.36.0"
llama-cpp-2 = { version = "0.1.146", default-features = false }
llama-cpp-sys-2 = { version = "0.1.146", default-features = false }
tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
encoding_rs = "0.8"
thiserror = "2.0.18"
chrono = "0.4.44"
tokio-stream = "0.1"
log = "0.4"
base64 = { version = "0.22", optional = true }

[dev-dependencies]
schemars = "1"
anyhow = "1"
hf-hub = { version = "0.4", default-features = false, features = ["ureq", "native-tls"] }
# Forces e2e tests to run sequentially. Cargo's default test runner is
# parallel, but loading multiple GGUF models at once OOMs the GPU and
# concurrent first-time hf-hub downloads of the same file race the
# cache. `#[serial]` on every e2e test serialises both naturally.
serial_test = { version = "3", default-features = false, features = ["async"] }

# E2E integration tests live under `tests/e2e/` as a single binary so they
# share a `common` helper module (auto-download via hf-hub, RunSummary,
# corpus prompts, etc.). Cargo only auto-discovers `tests/*.rs`, so we
# declare the binary explicitly.
[[test]]
name = "e2e"
path = "tests/e2e/main.rs"