llm-kernel 0.8.0

Foundation library for Rust AI-native apps — provider catalog, LLM client, MCP server, search, telemetry, and safety
Documentation
[package]
name         = "llm-kernel"
version      = "0.8.0"
edition      = "2024"
description  = "Foundation library for Rust AI-native apps — provider catalog, LLM client, MCP server, search, telemetry, and safety"
license      = "Apache-2.0"
repository   = "https://github.com/epicsagas/llm-kernel"
homepage     = "https://github.com/epicsagas/llm-kernel"
rust-version = "1.92"
keywords     = ["llm", "ai", "mcp", "provider", "embedding"]
categories   = ["api-bindings", "asynchronous", "science"]
documentation = "https://docs.rs/llm-kernel"
readme       = "README.md"
exclude      = [".github/", "target/"]

[workspace]
members = [
    ".",
]

[lib]
name = "llm_kernel"
path = "src/lib.rs"

[features]
default = ["provider"]

# Core provider catalog (ProviderIndex, ServiceDescriptor, catalog.json)
provider = ["dep:serde_json"]

# Dynamic model discovery (models.dev, Ollama, OpenAI-compatible)
discovery = ["provider", "dep:ureq"]

# Asynchronous LLM client (reqwest-based) with JSON extraction
client-async = ["dep:reqwest", "dep:serde_json", "dep:async-trait", "dep:tokio", "dep:tokio-stream", "dep:futures-core"]

# Asynchronous model discovery (async DiscoverySource over reqwest)
discovery-async = ["discovery", "dep:reqwest", "dep:async-trait", "dep:tokio"]

# LLM response cache over a KvStore (wraps any LLMClient)
cache = ["client-async", "store"]

# Secret vault (dotenv-style credential management)
secrets = ["dep:tempfile"]

# SQLite store helpers (init_schema, PRAGMA, FTS5, KvStore)
store = ["dep:rusqlite"]

# TOML config loader
config = ["dep:toml"]

# Knowledge graph (SQLite + FTS5 + smart recall + BFS traversal)
graph = ["store", "dep:serde_json"]

# CJK-aware graph search (Rust-side segmentation; no schema change)
graph-cjk = ["graph"]

# PostgreSQL GraphBackend (PgGraph) + SQLite<->PostgreSQL migration CLI.
# Mirrors the SQLite graph over the synchronous `postgres` driver.
graph-pg = ["graph", "dep:postgres", "dep:clap"]

# MCP server framework (JSON-RPC 2.0, stdio transport, async handlers)
mcp = ["dep:serde_json", "dep:async-trait"]

# MCP remote transport over HTTP with optional SSE (axum + tokio)
mcp-http = ["mcp", "dep:axum", "dep:tokio", "dep:tokio-stream", "tokio/net", "tokio/io-util"]

# Token estimation (zero-dep heuristic)
tokens = []

# AI tool installation wizard
install = ["dep:serde_json"]

# Hybrid search with Reciprocal Rank Fusion
search = ["dep:serde_json"]

# Embedding provider trait + cosine similarity (+ async vector index trait)
embedding = ["dep:async-trait"]

# TurboQuant compressed vector index (2-bit/4-bit, SIMD ANN search)
vector-index = ["embedding", "dep:turbovec", "dep:serde_json"]

# Qdrant AsyncVectorIndex (QdrantVectorIndex) for remote vector search.
qdrant = ["embedding", "dep:qdrant-client", "dep:serde_json"]

# OpenAI text-embedding provider (requires ureq for sync HTTP)
embedding-openai = ["embedding", "dep:ureq", "dep:serde_json"]

# Local ONNX embedding via fastembed-rs (downloads models from HuggingFace)
embedding-fastembed = ["embedding", "dep:fastembed", "dep:indexmap"]

# Qwen3 embedding via fastembed-rs candle backend
embedding-fastembed-qwen3 = ["embedding-fastembed", "fastembed/qwen3", "dep:candle-core"]

# Nomic V2 MoE embedding via fastembed-rs candle backend
embedding-fastembed-nomic-moe = ["embedding-fastembed", "fastembed/nomic-v2-moe", "dep:candle-core"]

# DirectML GPU execution provider for FastembedProvider (Windows only)
embedding-fastembed-directml = ["embedding-fastembed", "dep:ort"]

# Knowledge graph with async wrappers (requires tokio)
graph-async = ["graph", "dep:tokio"]

# Knowledge graph with multi-connection async pool (rusqlite + tokio semaphore).
# Provides higher read concurrency than `graph-async` for WAL-mode workloads.
graph-pool = ["graph", "dep:tokio"]

# Telemetry framework (enum-gated events)
telemetry = ["dep:serde_json"]

# Safety utilities (secret masking, error classification)
safety = ["dep:regex"]

# Quality evaluation CLI (datasets-based metrics)
eval = ["dep:clap", "tokens", "safety", "embedding", "search"]
eval-full = ["eval", "graph"]

# Everything except Windows-only execution backends.
# embedding-fastembed-directml is intentionally excluded: it pulls in the ort
# RC dependency which is not appropriate for cross-platform builds.
full = ["provider", "discovery", "discovery-async", "client-async", "cache", "secrets", "store", "config", "graph", "graph-async", "graph-pool", "graph-cjk", "graph-pg", "mcp", "mcp-http", "tokens", "install", "search", "embedding", "embedding-openai", "embedding-fastembed", "embedding-fastembed-qwen3", "embedding-fastembed-nomic-moe", "vector-index", "qdrant", "telemetry", "safety"]

[dependencies]
# Always required
serde       = { version = "1", features = ["derive"] }
thiserror   = "2"
anyhow      = "1"
tracing     = "0.1"

# Feature-gated
serde_json  = { version = "1", optional = true }
ureq        = { version = "3", features = ["json"], optional = true }
tempfile    = { version = "3", optional = true }
reqwest     = { version = "0.13", features = ["json", "rustls", "stream"], default-features = false, optional = true }
rusqlite    = { version = "0.40", features = ["bundled"], optional = true }
toml        = { version = "1", optional = true }
tokio       = { version = "1", features = ["rt-multi-thread", "macros", "sync"], optional = true }
tokio-stream = { version = "0.1", optional = true }
futures-core = { version = "0.3", optional = true }
async-trait = { version = "0.1", optional = true }
regex       = { version = "1", optional = true }
fastembed   = { version = "5", default-features = false, features = ["hf-hub-rustls-tls", "ort-download-binaries-rustls-tls"], optional = true }
# Pinned to the newest ort release-candidate: ort 2.0.0 stable is not yet
# published (2.0.0-rc.12 is the latest on crates.io as of 2026-06), and
# fastembed 5 targets this same RC, so the pin must move in lockstep with a
# fastembed release built against stable. Switch to a caret range (`2`) once
# both conditions hold.
ort         = { version = "=2.0.0-rc.12", default-features = false, optional = true }
candle-core = { version = "0.10", optional = true }
indexmap    = { version = "2", optional = true }
clap        = { version = "4", features = ["derive"], optional = true }
turbovec    = { version = "0.9", optional = true }
axum        = { version = "0.8", default-features = false, features = ["http1", "json", "tokio"], optional = true }
postgres    = { version = "0.19", optional = true }
qdrant-client = { version = "1", optional = true }

[dev-dependencies]
tempfile = "3"
criterion = { version = "0.8", features = ["html_reports"] }
tokio = { version = "1", features = ["macros", "rt", "rt-multi-thread"] }

[[bench]]
name = "graph_bench"
harness = false
required-features = ["graph"]

[[bench]]
name = "compute_bench"
harness = false
required-features = ["search", "tokens", "embedding"]

[[bin]]
name = "llm-kernel-eval"
path = "src/bin/eval.rs"
required-features = ["eval"]

[[bin]]
name = "llm-kernel-migrate-graph"
path = "src/bin/migrate.rs"
required-features = ["graph-pg"]

[profile.dev]
opt-level = 0
debug = 1

[profile.release]
opt-level = 3
lto = "thin"
codegen-units = 1
strip = "symbols"
panic = "abort"

[profile.release.package."*"]
opt-level = 3
codegen-units = 4

[profile.dist]
inherits = "release"
lto = "thin"

# Windows needs ort-load-dynamic to avoid MSVC linker errors with ort-sys.
# Linux needs ort-load-dynamic because ort's prebuilt static libraries require
# glibc 2.38+ (__isoc23_strtol etc), which is not available on ubuntu-22.04.
# macOS uses static linking via ort-download-binaries (works reliably).
[target.'cfg(any(target_os = "windows", target_os = "linux"))'.dependencies]
fastembed   = { version = "5", default-features = false, features = ["hf-hub-rustls-tls", "ort-download-binaries-rustls-tls", "ort-load-dynamic"], optional = true }