cortex_rust 0.6.0

High-performance LLM inference with 4-bit quantization and Test-Time Training (TTT)
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2021"
name = "cortex_rust"
version = "0.6.0"
build = "build.rs"
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "High-performance LLM inference with 4-bit quantization and Test-Time Training (TTT)"
homepage = "https://github.com/imonoonoko/Bit-TTT-Engine"
documentation = "https://docs.rs/cortex_rust"
readme = "README_PYPI.md"
keywords = [
    "llm",
    "rust",
    "ttt",
    "quantization",
    "inference",
]
categories = [
    "science",
    "algorithms",
]
license = "MIT"
repository = "https://github.com/imonoonoko/Bit-TTT-Engine"

[features]
cuda = [
    "dep:cuda-runtime-sys",
    "candle-core/cuda",
]
default = [
    "python",
    "tokenizers",
]
dev-bins = [
    "dep:safetensors",
    "dep:env_logger",
    "dep:windows-sys",
    "tokenizers",
]
flash-attention = []
python = ["dep:pyo3"]
safetensors = ["dep:safetensors"]
tokenizers = ["dep:tokenizers"]
wasm = [
    "dep:wasm-bindgen",
    "dep:js-sys",
    "dep:web-sys",
    "dep:console_error_panic_hook",
    "dep:getrandom",
]

[lib]
name = "cortex_rust"
crate-type = [
    "cdylib",
    "rlib",
]
path = "src/lib.rs"

[[bin]]
name = "bench_4bit_gpu"
path = "src/bin/bench_4bit_gpu.rs"
required-features = ["tokenizers"]

[[bin]]
name = "bench_cpu_kernel"
path = "src/bin/bench_cpu_kernel.rs"

[[bin]]
name = "bench_gemm_4bit"
path = "src/bin/bench_gemm_4bit.rs"

[[bin]]
name = "bench_sizes"
path = "src/bin/bench_sizes.rs"

[[bin]]
name = "bench_tinyllama"
path = "src/bin/bench_tinyllama.rs"
required-features = ["tokenizers"]

[[bin]]
name = "detect_model"
path = "src/bin/detect_model.rs"

[[bin]]
name = "fast_download"
path = "src/bin/fast_download.rs"

[[bin]]
name = "quick_gen"
path = "src/bin/quick_gen.rs"
required-features = ["tokenizers"]

[[bin]]
name = "run_4bit_llama"
path = "src/bin/run_4bit_llama.rs"
required-features = ["tokenizers"]

[[bin]]
name = "test_13b"
path = "src/bin/test_13b.rs"
required-features = ["tokenizers"]

[[bin]]
name = "test_4bit_inference"
path = "src/bin/test_4bit_inference.rs"
required-features = ["tokenizers"]

[[bin]]
name = "test_cuda_gemm"
path = "src/bin/test_cuda_gemm.rs"

[[bin]]
name = "test_memory"
path = "src/bin/test_memory.rs"

[[example]]
name = "basic_generate"
path = "examples/basic_generate.rs"

[[example]]
name = "benchmark"
path = "examples/benchmark.rs"

[[example]]
name = "cuda_test"
path = "examples/cuda_test.rs"

[[example]]
name = "debug_load"
path = "examples/debug_load.rs"

[[example]]
name = "e2e_benchmark"
path = "examples/e2e_benchmark.rs"

[[example]]
name = "interactive_chat"
path = "examples/interactive_chat.rs"

[[example]]
name = "model_info"
path = "examples/model_info.rs"

[[example]]
name = "ttt_benchmark"
path = "examples/ttt_benchmark.rs"

[[test]]
name = "accuracy_test"
path = "tests/accuracy_test.rs"

[[test]]
name = "bitllama_e2e"
path = "tests/bitllama_e2e.rs"

[[test]]
name = "common"
path = "tests/common.rs"

[[test]]
name = "gguf_e2e"
path = "tests/gguf_e2e.rs"

[[test]]
name = "load_direct_benchmark"
path = "tests/load_direct_benchmark.rs"

[[test]]
name = "load_packed_e2e"
path = "tests/load_packed_e2e.rs"

[dependencies.anyhow]
version = "1.0"

[dependencies.byteorder]
version = "1.5.0"

[dependencies.candle-core]
version = "=0.8.4"
default-features = false

[dependencies.candle-nn]
version = "=0.8.4"
default-features = false

[dependencies.console_error_panic_hook]
version = "0.1"
optional = true

[dependencies.cuda-runtime-sys]
version = "0.3.0-alpha.1"
optional = true

[dependencies.env_logger]
version = "0.11"
optional = true

[dependencies.getrandom]
version = "0.3"
features = ["wasm_js"]
optional = true

[dependencies.half]
version = "2.3"

[dependencies.js-sys]
version = "0.3"
optional = true

[dependencies.libc]
version = "0.2"

[dependencies.pyo3]
version = "0.24"
features = [
    "extension-module",
    "macros",
]
optional = true

[dependencies.rand]
version = "0.8"

[dependencies.rayon]
version = "1.8"

[dependencies.reqwest]
version = "0.12"
features = ["blocking"]

[dependencies.safetensors]
version = "0.5"
optional = true

[dependencies.serde]
version = "1.0"
features = ["derive"]

[dependencies.serde_json]
version = "1.0"

[dependencies.thiserror]
version = "2.0"

[dependencies.tokenizers]
version = "0.22"
optional = true

[dependencies.tracing]
version = "0.1"

[dependencies.wasm-bindgen]
version = "0.2"
optional = true

[dependencies.web-sys]
version = "0.3"
features = ["console"]
optional = true

[dependencies.windows-sys]
version = "0.59"
features = [
    "Win32_System_ProcessStatus",
    "Win32_System_Threading",
]
optional = true

[build-dependencies.anyhow]
version = "1.0"

[build-dependencies.cc]
version = "1.0"

[build-dependencies.glob]
version = "0.3"

[target.'cfg(target_arch = "wasm32")'.dependencies.getrandom]
version = "0.2"
features = ["js"]