[package]
edition = "2021"
rust-version = "1.77"
name = "ruvllm"
version = "2.2.0"
authors = ["Ruvector Team"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "LLM serving runtime with Ruvector integration - Paged attention, KV cache, and SONA learning"
readme = "README.md"
keywords = [
"llm",
"inference",
"paged-attention",
"kv-cache",
"ruvector",
]
categories = [
"science",
"algorithms",
]
license = "MIT"
repository = "https://github.com/ruvnet/ruvector"
[features]
accelerate = []
async-runtime = [
"tokio",
"tokio-stream",
]
attention = ["dep:ruvector-attention"]
candle = [
"candle-core",
"candle-nn",
"candle-transformers",
"tokenizers",
]
coreml = [
"dep:objc2",
"dep:objc2-foundation",
"dep:objc2-core-ml",
"dep:block2",
]
cuda = [
"candle-core/cuda",
"candle-nn/cuda",
"candle-transformers/cuda",
]
default = [
"async-runtime",
"candle",
"routing-metrics",
"quantize",
"hub-download",
]
gguf-mmap = ["mmap"]
gnn = ["dep:ruvector-gnn"]
graph = ["dep:ruvector-graph"]
hub-download = [
"dep:hf-hub",
"hf-hub/ureq",
]
hybrid-ane = [
"metal-compute",
"coreml",
]
inference-cuda = [
"candle",
"cuda",
]
inference-metal = [
"candle",
"metal",
]
inference-metal-native = [
"candle",
"metal",
"metal-compute",
]
metal = [
"candle-core/metal",
"candle-nn/metal",
"candle-transformers/metal",
]
metal-compute = [
"dep:metal",
"dep:objc",
]
minimal = ["async-runtime"]
mmap = ["dep:memmap2"]
parallel = ["dep:rayon"]
quantize = []
routing-metrics = []
ruvector-full = [
"attention",
"graph",
"gnn",
]
wasm = []
wasm-simd = []
[lib]
name = "ruvllm"
crate-type = ["rlib"]
path = "src/lib.rs"
[[example]]
name = "benchmark_model"
path = "examples/benchmark_model.rs"
[[example]]
name = "download_test_model"
path = "examples/download_test_model.rs"
[[example]]
name = "generate_claude_dataset"
path = "examples/generate_claude_dataset.rs"
[[example]]
name = "hub_cli"
path = "examples/hub_cli.rs"
[[example]]
name = "run_eval"
path = "examples/run_eval.rs"
required-features = ["async-runtime"]
[[example]]
name = "train_contrastive"
path = "examples/train_contrastive.rs"
[[test]]
name = "acceptance_gates"
path = "tests/acceptance_gates.rs"
[[test]]
name = "adapter_integration"
path = "tests/adapter_integration.rs"
[[test]]
name = "ane_integration"
path = "tests/ane_integration.rs"
[[test]]
name = "ane_test_utils"
path = "tests/ane_test_utils.rs"
[[test]]
name = "autodetect_integration"
path = "tests/autodetect_integration.rs"
[[test]]
name = "backend_integration"
path = "tests/backend_integration.rs"
[[test]]
name = "cross_platform"
path = "tests/cross_platform.rs"
[[test]]
name = "cross_platform_v21"
path = "tests/cross_platform_v21.rs"
[[test]]
name = "e2e_integration"
path = "tests/e2e_integration.rs"
[[test]]
name = "e2e_integration_test"
path = "tests/e2e_integration_test.rs"
[[test]]
name = "gguf_integration"
path = "tests/gguf_integration.rs"
[[test]]
name = "gguf_loader_test"
path = "tests/gguf_loader_test.rs"
[[test]]
name = "hadamard_tests"
path = "tests/hadamard_tests.rs"
[[test]]
name = "kernel_integration"
path = "tests/kernel_integration.rs"
[[test]]
name = "lora_integration"
path = "tests/lora_integration.rs"
[[test]]
name = "mistral_backend_test"
path = "tests/mistral_backend_test.rs"
[[test]]
name = "model_arch_integration"
path = "tests/model_arch_integration.rs"
[[test]]
name = "moe_integration"
path = "tests/moe_integration.rs"
[[test]]
name = "pi_quant_tests"
path = "tests/pi_quant_tests.rs"
[[test]]
name = "real_model_test"
path = "tests/real_model_test.rs"
[[test]]
name = "ruvltra_e2e"
path = "tests/ruvltra_e2e.rs"
[[test]]
name = "ruvltra_tests"
path = "tests/ruvltra_tests.rs"
[[test]]
name = "serving_integration"
path = "tests/serving_integration.rs"
[[test]]
name = "simd_equivalence_tests"
path = "tests/simd_equivalence_tests.rs"
[[test]]
name = "sona_integration"
path = "tests/sona_integration.rs"
[[test]]
name = "speculative_integration"
path = "tests/speculative_integration.rs"
[[test]]
name = "ste_tests"
path = "tests/ste_tests.rs"
[[bench]]
name = "ane_bench"
path = "benches/ane_bench.rs"
harness = false
[[bench]]
name = "attention_bench"
path = "benches/attention_bench.rs"
harness = false
[[bench]]
name = "e2e_bench"
path = "benches/e2e_bench.rs"
harness = false
[[bench]]
name = "lora_bench"
path = "benches/lora_bench.rs"
harness = false
[[bench]]
name = "matmul_bench"
path = "benches/matmul_bench.rs"
harness = false
[[bench]]
name = "metal_bench"
path = "benches/metal_bench.rs"
harness = false
[[bench]]
name = "moe_bench"
path = "benches/moe_bench.rs"
harness = false
[[bench]]
name = "norm_bench"
path = "benches/norm_bench.rs"
harness = false
[[bench]]
name = "pi_quant_bench"
path = "benches/pi_quant_bench.rs"
harness = false
[[bench]]
name = "rope_bench"
path = "benches/rope_bench.rs"
harness = false
[[bench]]
name = "ruvltra_benchmark"
path = "benches/ruvltra_benchmark.rs"
harness = false
[[bench]]
name = "serving_bench"
path = "benches/serving_bench.rs"
harness = false
[[bench]]
name = "turbo_quant_bench"
path = "benches/turbo_quant_bench.rs"
harness = false
required-features = ["quantize"]
[dependencies.anyhow]
version = "1.0"
[dependencies.async-trait]
version = "0.1"
[dependencies.bincode]
version = "2.0.0-rc.3"
features = ["serde"]
[dependencies.candle-core]
version = "0.8"
optional = true
[dependencies.candle-nn]
version = "0.8"
optional = true
[dependencies.candle-transformers]
version = "0.8"
optional = true
[dependencies.chrono]
version = "0.4"
features = [
"serde",
"serde",
]
[dependencies.dashmap]
version = "6.1"
[dependencies.dirs]
version = "5.0"
[dependencies.futures-core]
version = "0.3"
[dependencies.half]
version = "2.4"
[dependencies.hf-hub]
version = "0.4"
features = [
"tokio",
"rustls-tls",
]
optional = true
default-features = false
[dependencies.md5]
version = "0.7"
[dependencies.memmap2]
version = "0.9"
optional = true
[dependencies.ndarray]
version = "0.16"
[dependencies.once_cell]
version = "1.20"
[dependencies.parking_lot]
version = "0.12"
[dependencies.rand]
version = "0.8"
[dependencies.rayon]
version = "1.10"
optional = true
[dependencies.regex]
version = "1.10"
[dependencies.ruvector-attention]
version = "2.0"
optional = true
[dependencies.ruvector-core]
version = "2.0"
features = [
"storage",
"hnsw",
"parallel",
"simd",
]
default-features = false
[dependencies.ruvector-gnn]
version = "2.0"
optional = true
[dependencies.ruvector-graph]
version = "2.0"
optional = true
default-features = false
[dependencies.ruvector-sona]
version = "0.2.0"
features = ["serde-support"]
default-features = false
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1.0"
[dependencies.sha2]
version = "0.10"
[dependencies.smallvec]
version = "1.13"
[dependencies.thiserror]
version = "2.0"
[dependencies.tokenizers]
version = "0.20"
features = ["onig"]
optional = true
default-features = false
[dependencies.tokio]
version = "1.41"
features = [
"rt-multi-thread",
"sync",
"macros",
]
optional = true
[dependencies.tokio-stream]
version = "0.1"
optional = true
[dependencies.tracing]
version = "0.1"
[dependencies.uuid]
version = "1.11"
features = [
"v4",
"serde",
"js",
"v4",
"serde",
]
[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]
[dev-dependencies.tempfile]
version = "3.13"
[dev-dependencies.tracing-subscriber]
version = "0.3"
features = ["env-filter"]
[target.'cfg(target_os = "macos")'.dependencies.block2]
version = "0.6"
optional = true
[target.'cfg(target_os = "macos")'.dependencies.metal]
version = "0.29"
optional = true
[target.'cfg(target_os = "macos")'.dependencies.objc]
version = "0.2"
optional = true
[target.'cfg(target_os = "macos")'.dependencies.objc2]
version = "0.6"
optional = true
[target.'cfg(target_os = "macos")'.dependencies.objc2-core-ml]
version = "0.3"
features = [
"MLModel",
"MLModelConfiguration",
"MLFeatureProvider",
"MLFeatureValue",
"MLMultiArray",
"MLPredictionOptions",
"MLModelDescription",
"MLFeatureDescription",
"MLDictionaryFeatureProvider",
"MLModelError",
]
optional = true
[target.'cfg(target_os = "macos")'.dependencies.objc2-foundation]
version = "0.3"
features = [
"NSString",
"NSError",
"NSURL",
"NSArray",
"NSDictionary",
"NSData",
]
optional = true
[lints.clippy]
excessive_precision = "allow"
explicit_auto_deref = "allow"
manual_ok_err = "allow"
manual_range_contains = "allow"
manual_slice_size_calculation = "allow"
needless_range_loop = "allow"
redundant_pattern_matching = "allow"
unnecessary_cast = "allow"
useless_vec = "allow"
[lints.rust]
dead_code = "allow"
unreachable_code = "allow"
unused_imports = "allow"
unused_parens = "allow"
unused_variables = "allow"
[lints.rustdoc]
broken_intra_doc_links = "allow"