oxillama-runtime 0.1.3

Inference engine — KV cache, sampling, tokenizer bridge
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
rust-version = "1.89"
name = "oxillama-runtime"
version = "0.1.3"
authors = ["COOLJAPAN OU (Team KitaSan)"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Inference engine — KV cache, sampling, tokenizer bridge"
readme = "README.md"
keywords = [
    "llm",
    "inference",
    "llama",
    "transformer",
    "sampling",
]
categories = [
    "science",
    "algorithms",
]
license = "Apache-2.0"
repository = "https://github.com/cool-japan/oxillama"

[features]
command-r = ["oxillama-arch/command-r"]
default = [
    "llama",
    "qwen3",
    "mistral",
    "gemma",
    "phi",
    "command-r",
    "starcoder",
    "tokenizer-wasm",
    "parallel",
    "native-async",
    "mmap",
    "offload",
]
gemma = ["oxillama-arch/gemma"]
llama = ["oxillama-arch/llama"]
mistral = ["oxillama-arch/mistral"]
mmap = [
    "oxillama-gguf/mmap",
    "dep:memmap2",
]
native-async = [
    "dep:tokio",
    "dep:tokio-util",
]
offload = []
parallel = [
    "dep:rayon",
    "oxillama-quant/parallel",
]
phi = ["oxillama-arch/phi"]
qwen3 = ["oxillama-arch/qwen3"]
starcoder = ["oxillama-arch/starcoder"]
tokenizer-onig = [
    "dep:tokenizers",
    "tokenizers/onig",
]
tokenizer-wasm = [
    "dep:tokenizers",
    "tokenizers/unstable_wasm",
]

[lib]
name = "oxillama_runtime"
path = "src/lib.rs"

[[test]]
name = "snapshot"
path = "tests/snapshot.rs"

[[bench]]
name = "sampling"
path = "benches/sampling.rs"
harness = false

[dependencies.blake3]
version = "1.8.5"

[dependencies.half]
version = "2.7.1"
features = ["serde"]

[dependencies.memmap2]
version = "0.9.10"
optional = true

[dependencies.oxicode]
version = "0.2.2"
features = ["serde"]

[dependencies.oxillama-arch]
version = "0.1.3"

[dependencies.oxillama-gguf]
version = "0.1.3"
default-features = false

[dependencies.oxillama-quant]
version = "0.1.3"
default-features = false

[dependencies.rayon]
version = "1.12.0"
optional = true

[dependencies.serde]
version = "1.0.228"
features = ["derive"]

[dependencies.serde_json]
version = "1.0.149"

[dependencies.tempfile]
version = "3.27.0"

[dependencies.thiserror]
version = "2.0.18"

[dependencies.tokenizers]
version = "0.23.1"
optional = true
default-features = false

[dependencies.tokio]
version = "1.52.2"
features = ["full"]
optional = true

[dependencies.tokio-util]
version = "0.7.18"
features = ["full"]
optional = true

[dependencies.tracing]
version = "0.1.44"

[dev-dependencies.criterion]
version = "0.8.2"
features = ["html_reports"]

[dev-dependencies.oxillama-gguf]
version = "0.1.3"
features = ["test-utils"]
default-features = false