ferrum-models 0.5.0

Model architectures (LLaMA, Qwen, BERT) for Ferrum inference
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
name = "ferrum-models"
version = "0.5.0"
authors = ["Ferrum Team"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Model architectures (LLaMA, Qwen, BERT) for Ferrum inference"
readme = "README.md"
license = "MIT"

[features]
cuda = [
    "dep:candle-flash-attn",
    "candle-nn/cuda",
    "dep:ferrum-cuda-kernels",
    "ferrum-cuda-kernels?/cuda",
]
default = []
integration-tests = []
marlin = ["cuda"]
registry = []
runtime = ["dep:ferrum-runtime"]
tensor-parallel = ["cuda"]

[lib]
name = "ferrum_models"
path = "src/lib.rs"

[[test]]
name = "executor_common_test"
path = "tests/executor_common_test.rs"

[[test]]
name = "tp_sharding_test"
path = "tests/tp_sharding_test.rs"

[dependencies.anyhow]
version = "1.0"

[dependencies.async-trait]
version = "0.1"

[dependencies.base64]
version = "0.22"

[dependencies.bytemuck]
version = "1.23"
features = ["derive"]

[dependencies.candle-core]
version = "0.9.2"

[dependencies.candle-flash-attn]
version = "0.9.2"
optional = true

[dependencies.candle-nn]
version = "0.9.2"

[dependencies.candle-transformers]
version = "0.9.2"

[dependencies.chrono]
version = "0.4"
features = ["serde"]

[dependencies.dirs]
version = "5.0"

[dependencies.ferrum-cuda-kernels]
version = "0.5.0"
optional = true

[dependencies.ferrum-interfaces]
version = "0.5.0"

[dependencies.ferrum-runtime]
version = "0.5.0"
optional = true

[dependencies.ferrum-types]
version = "0.5.0"

[dependencies.futures]
version = "0.3"

[dependencies.futures-util]
version = "0.3"

[dependencies.half]
version = "2.5.0"
features = [
    "num-traits",
    "use-intrinsics",
    "rand_distr",
]

[dependencies.hf-hub]
version = "0.4.1"

[dependencies.image]
version = "0.25"
features = [
    "jpeg",
    "png",
    "webp",
]
default-features = false

[dependencies.indicatif]
version = "0.17"

[dependencies.once_cell]
version = "1.19"

[dependencies.parking_lot]
version = "0.12"

[dependencies.pathdiff]
version = "0.2"

[dependencies.rand]
version = "0.8"

[dependencies.reqwest]
version = "0.12"
features = [
    "rustls-tls",
    "socks",
    "stream",
    "json",
    "stream",
]
default-features = false

[dependencies.safetensors]
version = "0.4"

[dependencies.serde]
version = "1.0"
features = ["derive"]

[dependencies.serde_json]
version = "1.0"

[dependencies.thiserror]
version = "1.0"

[dependencies.tokenizers]
version = "0.21"
features = ["onig"]

[dependencies.tokio]
version = "1.35"
features = ["full"]

[dependencies.toml]
version = "0.8"

[dependencies.tracing]
version = "0.1"

[dependencies.uuid]
version = "1.6"
features = [
    "v4",
    "serde",
]

[dev-dependencies.ferrum-testkit]
version = "0.5.0"

[dev-dependencies.tempfile]
version = "3.8"

[dev-dependencies.tokio-test]
version = "0.4"