[package]
edition = "2021"
name = "cortex_rust"
version = "0.6.0"
build = "build.rs"
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "High-performance LLM inference with 4-bit quantization and Test-Time Training (TTT)"
homepage = "https://github.com/imonoonoko/Bit-TTT-Engine"
documentation = "https://docs.rs/cortex_rust"
readme = "README_PYPI.md"
keywords = [
"llm",
"rust",
"ttt",
"quantization",
"inference",
]
categories = [
"science",
"algorithms",
]
license = "MIT"
repository = "https://github.com/imonoonoko/Bit-TTT-Engine"
[features]
cuda = [
"dep:cuda-runtime-sys",
"candle-core/cuda",
]
default = [
"python",
"tokenizers",
]
dev-bins = [
"dep:safetensors",
"dep:env_logger",
"dep:windows-sys",
"tokenizers",
]
flash-attention = []
python = ["dep:pyo3"]
safetensors = ["dep:safetensors"]
tokenizers = ["dep:tokenizers"]
wasm = [
"dep:wasm-bindgen",
"dep:js-sys",
"dep:web-sys",
"dep:console_error_panic_hook",
"dep:getrandom",
]
[lib]
name = "cortex_rust"
crate-type = [
"cdylib",
"rlib",
]
path = "src/lib.rs"
[[bin]]
name = "bench_4bit_gpu"
path = "src/bin/bench_4bit_gpu.rs"
required-features = ["tokenizers"]
[[bin]]
name = "bench_cpu_kernel"
path = "src/bin/bench_cpu_kernel.rs"
[[bin]]
name = "bench_gemm_4bit"
path = "src/bin/bench_gemm_4bit.rs"
[[bin]]
name = "bench_sizes"
path = "src/bin/bench_sizes.rs"
[[bin]]
name = "bench_tinyllama"
path = "src/bin/bench_tinyllama.rs"
required-features = ["tokenizers"]
[[bin]]
name = "detect_model"
path = "src/bin/detect_model.rs"
[[bin]]
name = "fast_download"
path = "src/bin/fast_download.rs"
[[bin]]
name = "quick_gen"
path = "src/bin/quick_gen.rs"
required-features = ["tokenizers"]
[[bin]]
name = "run_4bit_llama"
path = "src/bin/run_4bit_llama.rs"
required-features = ["tokenizers"]
[[bin]]
name = "test_13b"
path = "src/bin/test_13b.rs"
required-features = ["tokenizers"]
[[bin]]
name = "test_4bit_inference"
path = "src/bin/test_4bit_inference.rs"
required-features = ["tokenizers"]
[[bin]]
name = "test_cuda_gemm"
path = "src/bin/test_cuda_gemm.rs"
[[bin]]
name = "test_memory"
path = "src/bin/test_memory.rs"
[[example]]
name = "basic_generate"
path = "examples/basic_generate.rs"
[[example]]
name = "benchmark"
path = "examples/benchmark.rs"
[[example]]
name = "cuda_test"
path = "examples/cuda_test.rs"
[[example]]
name = "debug_load"
path = "examples/debug_load.rs"
[[example]]
name = "e2e_benchmark"
path = "examples/e2e_benchmark.rs"
[[example]]
name = "interactive_chat"
path = "examples/interactive_chat.rs"
[[example]]
name = "model_info"
path = "examples/model_info.rs"
[[example]]
name = "ttt_benchmark"
path = "examples/ttt_benchmark.rs"
[[test]]
name = "accuracy_test"
path = "tests/accuracy_test.rs"
[[test]]
name = "bitllama_e2e"
path = "tests/bitllama_e2e.rs"
[[test]]
name = "common"
path = "tests/common.rs"
[[test]]
name = "gguf_e2e"
path = "tests/gguf_e2e.rs"
[[test]]
name = "load_direct_benchmark"
path = "tests/load_direct_benchmark.rs"
[[test]]
name = "load_packed_e2e"
path = "tests/load_packed_e2e.rs"
[dependencies.anyhow]
version = "1.0"
[dependencies.byteorder]
version = "1.5.0"
[dependencies.candle-core]
version = "=0.8.4"
default-features = false
[dependencies.candle-nn]
version = "=0.8.4"
default-features = false
[dependencies.console_error_panic_hook]
version = "0.1"
optional = true
[dependencies.cuda-runtime-sys]
version = "0.3.0-alpha.1"
optional = true
[dependencies.env_logger]
version = "0.11"
optional = true
[dependencies.getrandom]
version = "0.3"
features = ["wasm_js"]
optional = true
[dependencies.half]
version = "2.3"
[dependencies.js-sys]
version = "0.3"
optional = true
[dependencies.libc]
version = "0.2"
[dependencies.pyo3]
version = "0.24"
features = [
"extension-module",
"macros",
]
optional = true
[dependencies.rand]
version = "0.8"
[dependencies.rayon]
version = "1.8"
[dependencies.reqwest]
version = "0.12"
features = ["blocking"]
[dependencies.safetensors]
version = "0.5"
optional = true
[dependencies.serde]
version = "1.0"
features = ["derive"]
[dependencies.serde_json]
version = "1.0"
[dependencies.thiserror]
version = "2.0"
[dependencies.tokenizers]
version = "0.22"
optional = true
[dependencies.tracing]
version = "0.1"
[dependencies.wasm-bindgen]
version = "0.2"
optional = true
[dependencies.web-sys]
version = "0.3"
features = ["console"]
optional = true
[dependencies.windows-sys]
version = "0.59"
features = [
"Win32_System_ProcessStatus",
"Win32_System_Threading",
]
optional = true
[build-dependencies.anyhow]
version = "1.0"
[build-dependencies.cc]
version = "1.0"
[build-dependencies.glob]
version = "0.3"
[target.'cfg(target_arch = "wasm32")'.dependencies.getrandom]
version = "0.2"
features = ["js"]