[package]
edition = "2021"
name = "splintr"
version = "0.9.0"
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Fast Rust tokenizer (BPE + SentencePiece + WordPiece) with Python bindings"
homepage = "https://github.com/ml-rust/splintr"
readme = "README.md"
keywords = [
"tokenizer",
"bpe",
"sentencepiece",
"wordpiece",
"llm",
]
categories = [
"text-processing",
"encoding",
]
license = "MIT"
repository = "https://github.com/ml-rust/splintr"
[features]
default = [
"rayon",
"regexr-jit",
]
pcre2 = ["dep:pcre2"]
python = ["dep:pyo3"]
rayon = ["dep:rayon"]
regexr-jit = [
"regexr/jit",
"regexr/simd",
]
wasm = []
[lib]
name = "splintr"
crate-type = [
"cdylib",
"rlib",
]
path = "src/lib.rs"
[[test]]
name = "cl100k"
path = "tests/cl100k.rs"
[[test]]
name = "deepseek_v3"
path = "tests/deepseek_v3.rs"
[[test]]
name = "llama3"
path = "tests/llama3.rs"
[[test]]
name = "mistral_v2"
path = "tests/mistral_v2.rs"
[[test]]
name = "mistral_v3"
path = "tests/mistral_v3.rs"
[[test]]
name = "o200k"
path = "tests/o200k.rs"
[dependencies.aho-corasick]
version = "1.1"
[dependencies.base64]
version = "0.22"
[dependencies.lru]
version = "0.16"
[dependencies.pcre2]
version = "0.2"
optional = true
[dependencies.pyo3]
version = "0.27"
features = ["extension-module"]
optional = true
[dependencies.rayon]
version = "1.10"
optional = true
[dependencies.regexr]
version = "0.1.0-beta.5"
default-features = false
[dependencies.rustc-hash]
version = "2.0"
[dependencies.thiserror]
version = "2.0"
[dependencies.unicode-general-category]
version = "1.0"
[dependencies.unicode-normalization]
version = "0.1"
[dev-dependencies.pcre2]
version = "0.2"
[profile.release]
opt-level = 3
lto = true
codegen-units = 1