oxillama 0.1.3

Pure Rust LLM inference engine — the sovereign alternative to llama.cpp (meta crate)
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
rust-version = "1.89"
name = "oxillama"
version = "0.1.3"
authors = ["COOLJAPAN OU (Team KitaSan)"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Pure Rust LLM inference engine — the sovereign alternative to llama.cpp (meta crate)"
homepage = "https://github.com/cool-japan/oxillama"
readme = "README.md"
keywords = [
    "llm",
    "inference",
    "gguf",
    "llama",
    "pure-rust",
]
categories = [
    "science",
    "algorithms",
]
license = "Apache-2.0"
repository = "https://github.com/cool-japan/oxillama"

[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
    "--cfg",
    "docsrs",
]
targets = ["x86_64-unknown-linux-gnu"]

[features]
bench = ["dep:oxillama-bench"]
command-r = [
    "oxillama-arch/command-r",
    "oxillama-runtime/command-r",
]
dbrx = ["oxillama-arch/dbrx"]
deepseek = ["oxillama-arch/deepseek"]
default = [
    "server",
    "bench",
]
gemma = [
    "oxillama-arch/gemma",
    "oxillama-runtime/gemma",
]
gpu = [
    "dep:oxillama-gpu",
    "oxillama-gpu/gpu",
]
grok = ["oxillama-arch/grok"]
jamba = ["oxillama-arch/jamba"]
llama = [
    "oxillama-arch/llama",
    "oxillama-runtime/llama",
]
llava = ["oxillama-arch/llava"]
mamba2 = ["oxillama-arch/mamba2"]
mistral = [
    "oxillama-arch/mistral",
    "oxillama-runtime/mistral",
]
phi = [
    "oxillama-arch/phi",
    "oxillama-runtime/phi",
]
qwen3 = [
    "oxillama-arch/qwen3",
    "oxillama-runtime/qwen3",
]
server = ["dep:oxillama-server"]
simd-avx2 = ["oxillama-quant/simd-avx2"]
simd-avx512 = ["oxillama-quant/simd-avx512"]
simd-neon = ["oxillama-quant/simd-neon"]
starcoder = [
    "oxillama-arch/starcoder",
    "oxillama-runtime/starcoder",
]

[lib]
name = "oxillama"
path = "src/lib.rs"

[[example]]
name = "01_load_model"
path = "examples/01_load_model.rs"

[[example]]
name = "02_inference"
path = "examples/02_inference.rs"

[[example]]
name = "03_streaming"
path = "examples/03_streaming.rs"

[[example]]
name = "04_lora"
path = "examples/04_lora.rs"

[[example]]
name = "05_speculative"
path = "examples/05_speculative.rs"

[[example]]
name = "06_metrics"
path = "examples/06_metrics.rs"

[[example]]
name = "gpu_enabled"
path = "examples/gpu_enabled.rs"

[[example]]
name = "load_and_generate"
path = "examples/load_and_generate.rs"
required-features = []

[[example]]
name = "lora_apply"
path = "examples/lora_apply.rs"
required-features = []

[[example]]
name = "openai_server"
path = "examples/openai_server.rs"

[[example]]
name = "python_bridge"
path = "examples/python_bridge.rs"

[[example]]
name = "speculative"
path = "examples/speculative.rs"
required-features = []

[[test]]
name = "error_types"
path = "tests/error_types.rs"

[[test]]
name = "feature_matrix"
path = "tests/feature_matrix.rs"

[[test]]
name = "recipes_doctest"
path = "tests/recipes_doctest.rs"

[dependencies.anyhow]
version = "1.0.102"

[dependencies.oxillama-arch]
version = "0.1.3"

[dependencies.oxillama-bench]
version = "0.1.3"
optional = true

[dependencies.oxillama-gguf]
version = "0.1.3"
features = ["mmap"]
default-features = false

[dependencies.oxillama-gpu]
version = "0.1.3"
optional = true
default-features = false

[dependencies.oxillama-quant]
version = "0.1.3"
features = ["parallel"]
default-features = false

[dependencies.oxillama-runtime]
version = "0.1.3"
features = [
    "tokenizer-wasm",
    "parallel",
    "native-async",
    "mmap",
]
default-features = false

[dependencies.oxillama-server]
version = "0.1.3"
optional = true

[dev-dependencies.axum]
version = "0.8.9"
features = [
    "ws",
    "multipart",
]

[dev-dependencies.clap]
version = "4.6.1"
features = [
    "derive",
    "env",
]

[dev-dependencies.tokio]
version = "1.52.2"
features = ["full"]