[package]
edition = "2024"
rust-version = "1.93.0"
name = "wordchipper"
version = "0.9.2"
authors = [
"Crutcher Dunnavant <crutcher@gmail.com>",
"Dilshod Tadjibaev",
]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "HPC Rust LLM Tokenizer Library"
readme = "README.md"
keywords = [
"ai",
"gpt",
"bpe",
"tokenizer",
]
license = "MIT"
repository = "https://github.com/zspacelabs/wordchipper"
resolver = "2"
[package.metadata.docs.rs]
all-features = true
[features]
client = [
"download",
"datagym",
"default-tls",
"huggingface",
]
concurrent = ["std"]
datagym = [
"dep:serde_json",
"std",
]
default = [
"std",
"fast-hash",
"parallel",
]
default-tls = [
"download",
"wordchipper-disk-cache/default-tls",
]
download = [
"dep:wordchipper-disk-cache",
"std",
]
fast-hash = ["dep:foldhash"]
huggingface = [
"std",
"dep:tokenizers",
]
parallel = [
"dep:rayon",
"concurrent",
]
rustls-tls = [
"download",
"wordchipper-disk-cache/rustls-tls",
]
std = [
"thiserror/std",
"base64/std",
"strum/std",
"aho-corasick/std",
"fancy-regex/default",
"foldhash?/std",
"log/std",
"num-traits/std",
"regex/default",
"regex-automata/std",
]
testing = []
tracing = ["tracing/attributes"]
[lib]
name = "wordchipper"
path = "src/lib.rs"
[[test]]
name = "validation"
path = "tests/validation.rs"
[dependencies.aho-corasick]
version = "1.1"
default-features = false
[dependencies.base64]
version = "0.22.1"
features = ["alloc"]
default-features = false
[dependencies.cfg-if]
version = "1.0.4"
[dependencies.document-features]
version = "0.2.12"
[dependencies.fancy-regex]
version = "0.13.0"
features = ["unicode"]
default-features = false
[dependencies.foldhash]
version = "0.2.0"
optional = true
default-features = false
[dependencies.hashbrown]
version = "0.16.0"
features = ["alloc"]
[dependencies.indoc]
version = "2.0.7"
[dependencies.inventory]
version = "0.3.20"
[dependencies.log]
version = "0.4.20"
default-features = false
[dependencies.logos]
version = "0.16.1"
features = ["export_derive"]
default-features = false
[dependencies.num-traits]
version = "0.2.10"
default-features = false
[dependencies.once_cell]
version = "1.21.0"
features = [
"alloc",
"critical-section",
]
default-features = false
[dependencies.rayon]
version = "1.11"
optional = true
[dependencies.regex]
version = "1.12.3"
features = ["unicode"]
default-features = false
[dependencies.regex-automata]
version = "0.4"
features = [
"alloc",
"meta",
"nfa-thompson",
"hybrid",
"unicode",
]
default-features = false
[dependencies.ringbuffer]
version = "0.16"
default-features = false
[dependencies.serde_json]
version = "1.0.140"
optional = true
[dependencies.spin]
version = "0.9.8"
[dependencies.strum]
version = "0.27.0"
features = ["derive"]
default-features = false
[dependencies.thiserror]
version = "2.0.10"
default-features = false
[dependencies.tokenizers]
version = "0.22.2"
features = ["http"]
optional = true
[dependencies.tracing]
version = "0.1.40"
optional = true
default-features = false
[dependencies.unicode-general-category]
version = "1.1.0"
default-features = false
[dependencies.wordchipper-disk-cache]
version = "0.9.2"
optional = true
default-features = false
[dev-dependencies.proptest]
version = "1.10.0"
[dev-dependencies.serial_test]
version = "3.4.0"
[dev-dependencies.tempdir]
version = "0.3.7"
[dev-dependencies.tiktoken-rs]
version = "0.9.1"
[dev-dependencies.tokenizers]
version = "0.22.2"
features = ["http"]
[lints.clippy]
doc_markdown = "deny"
double_must_use = "allow"
[lints.rust]
warnings = "deny"