gllm 0.10.6

Pure Rust library for local embeddings, reranking, and text generation with MoE-optimized inference and aggressive performance tuning
Documentation
[[bench]]
harness = false
name = "moe_layer"
path = "benches/moe_layer.rs"

[[bench]]
harness = false
name = "quantized_matmul"
path = "benches/quantized_matmul.rs"
required-features = ["quantized"]

[[bin]]
name = "stress_test"
path = "stress_test.rs"

[[bin]]
name = "test_models"
path = "test_models.rs"

[dependencies.burn]
default-features = false
features = ["std", "wgpu", "candle", "ndarray"]
version = "0.20.0-pre.6"

[dependencies.burn-import]
default-features = false
features = ["safetensors"]
version = "0.20.0-pre.6"

[dependencies.dirs]
version = "5"

[dependencies.gllm-kernels]
default-features = false
version = "0.1.3"

[dependencies.half]
version = "2.4"

[dependencies.hf-hub]
default-features = false
features = ["ureq", "rustls-tls"]
version = "0.4.3"

[dependencies.log]
version = "0.4"

[dependencies.memmap2]
version = "0.9"

[dependencies.pollster]
optional = true
version = "0.4"

[dependencies.rand]
version = "0.8"

[dependencies.safetensors]
version = "0.4"

[dependencies.serde]
features = ["derive"]
version = "1"

[dependencies.serde_json]
version = "1"

[dependencies.thiserror]
version = "2"

[dependencies.tokenizers]
default-features = false
features = ["fancy-regex"]
version = "0.22.1"

[dependencies.tokio]
features = ["rt-multi-thread", "macros", "sync"]
optional = true
version = "1"

[dependencies.wgpu]
optional = true
version = "26.0"

[dev-dependencies.criterion]
version = "0.5"

[dev-dependencies.tempfile]
version = "3"

[[example]]
name = "test_all_new_models"
path = "examples/test_all_new_models.rs"

[[example]]
name = "test_download"
path = "examples/test_download.rs"

[[example]]
name = "test_glm4"
path = "examples/test_glm4.rs"

[[example]]
name = "test_moe"
path = "examples/test_moe.rs"

[[example]]
name = "test_quantization"
path = "examples/test_quantization.rs"

[[example]]
name = "test_qwen3_small"
path = "examples/test_qwen3_small.rs"

[features]
cpu = ["gllm-kernels/cpu"]
cuda = ["gllm-kernels/cuda"]
default = ["cpu"]
flash-attention = []
gpu-quantized = ["quantized"]
paged-attention = []
quantized = []
tokio = ["dep:tokio"]
wgpu = ["gllm-kernels/wgpu"]
wgpu-detect = ["dep:wgpu", "dep:pollster"]

[lib]
name = "gllm"
path = "src/lib.rs"

[package]
authors = ["gllm contributors"]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["algorithms", "science", "text-processing"]
description = "Pure Rust library for local embeddings, reranking, and text generation with MoE-optimized inference and aggressive performance tuning"
edition = "2021"
homepage = "https://github.com/putao520/gllm"
keywords = ["embeddings", "reranking", "nlp", "ml", "rust"]
license = "Apache-2.0"
name = "gllm"
readme = "README.md"
repository = "https://github.com/putao520/gllm"
version = "0.10.6"

[[test]]
name = "api"
path = "tests/api.rs"

[[test]]
name = "code_models"
path = "tests/code_models.rs"

[[test]]
name = "integration"
path = "tests/integration.rs"