[package]
edition = "2024"
name = "model-rs"
version = "0.1.1"
authors = ["Ying Kit WONG"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "A Rust CLI tool for downloading HuggingFace models and running local LLM inference"
readme = "README.md"
keywords = [
"llm",
"machine-learning",
"huggingface",
"inference",
"cli",
]
categories = [
"command-line-utilities",
"science",
"api-bindings",
]
license = "Apache-2.0"
repository = "https://github.com/yingkitw/model-rs"
[features]
accelerate = [
"dep:accelerate-src",
"candle-core/accelerate",
"candle-nn/accelerate",
"candle-transformers/accelerate",
]
cuda = [
"dep:candle-kernels",
"candle-core/cuda",
"candle-nn/cuda",
"candle-transformers/cuda",
]
cudnn = [
"cuda",
"candle-core/cudnn",
]
default = ["metal"]
gguf = [
"dep:llama_cpp",
"dep:gguf-rs-lib",
]
metal = [
"candle-core/metal",
"candle-nn/metal",
"candle-transformers/metal",
]
mlx = [
"dep:mlx-rs",
"mlx-rs/accelerate",
]
mlx-metal = [
"mlx",
"mlx-rs/metal",
]
nccl = [
"cuda",
"candle-core/nccl",
]
[lib]
name = "model_rs"
path = "src/lib.rs"
[[bin]]
name = "model-rs"
path = "src/main.rs"
[[example]]
name = "library_run_cli"
path = "examples/library_run_cli.rs"
[[example]]
name = "library_sampling"
path = "examples/library_sampling.rs"
[[test]]
name = "e2e_test"
path = "tests/e2e_test.rs"
[[test]]
name = "integration_test"
path = "tests/integration_test.rs"
[[bench]]
name = "throughput"
path = "benches/throughput.rs"
harness = false
[dependencies.accelerate-src]
version = "0.3"
optional = true
[dependencies.axum]
version = "0.8"
[dependencies.base64]
version = "0.22"
[dependencies.bytes]
version = "1.9"
[dependencies.candle-core]
version = "0.10"
[dependencies.candle-kernels]
version = "0.10"
optional = true
[dependencies.candle-metal-kernels]
version = "0.10"
[dependencies.candle-nn]
version = "0.10"
[dependencies.candle-transformers]
version = "0.10"
[dependencies.chrono]
version = "0.4"
[dependencies.clap]
version = "4.5"
features = ["derive"]
[dependencies.directories]
version = "5.0"
[dependencies.dotenvy]
version = "0.15"
[dependencies.futures-util]
version = "0.3"
[dependencies.gguf-rs-lib]
version = "0.2"
optional = true
[dependencies.hex]
version = "0.4"
[dependencies.hf-hub]
version = "0.5"
[dependencies.indicatif]
version = "0.17"
[dependencies.llama_cpp]
version = "0.3"
optional = true
[dependencies.mlx-rs]
version = "0.25"
optional = true
default-features = false
[dependencies.rand]
version = "0.8"
[dependencies.reqwest]
version = "0.13"
features = [
"json",
"stream",
]
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1.0"
[dependencies.sha2]
version = "0.10"
[dependencies.syntect]
version = "5.3"
[dependencies.termimad]
version = "0.30"
[dependencies.thiserror]
version = "2.0"
[dependencies.tokenizers]
version = "0.21"
[dependencies.tokio]
version = "1"
features = [
"macros",
"rt-multi-thread",
"fs",
"io-util",
"net",
"time",
"sync",
]
[dependencies.tokio-stream]
version = "0.1"
[dependencies.tower-http]
version = "0.6"
features = ["cors"]
[dependencies.tracing]
version = "0.1"
[dependencies.tracing-subscriber]
version = "0.3"
features = ["env-filter"]
[dependencies.urlencoding]
version = "2.1"
[dev-dependencies.criterion]
version = "0.5"
features = ["html_reports"]
[dev-dependencies.mockito]
version = "1.6"
[dev-dependencies.reqwest]
version = "0.13"
features = [
"json",
"stream",
]
[dev-dependencies.serde_json]
version = "1.0"
[dev-dependencies.tempfile]
version = "3.14"
[dev-dependencies.tokio]
version = "1"
features = [
"macros",
"rt-multi-thread",
"time",
]
[dev-dependencies.tower]
version = "0.5"
[profile.release]
opt-level = 3
lto = true
codegen-units = 1
strip = true