[dependencies.aho-corasick]
version = "1.1"
[dependencies.base64]
version = "0.22"
[dependencies.lru]
version = "0.16"
[dependencies.pcre2]
optional = true
version = "0.2"
[dependencies.pyo3]
features = ["extension-module"]
optional = true
version = "0.27"
[dependencies.rayon]
version = "1.10"
[dependencies.regexr]
features = ["jit", "simd"]
version = "0.1.0-beta.5"
[dependencies.rustc-hash]
version = "2.0"
[dependencies.thiserror]
version = "2.0"
[dev-dependencies.pcre2]
version = "0.2"
[features]
default = ["pcre2"]
pcre2 = ["dep:pcre2"]
python = ["dep:pyo3"]
[lib]
crate-type = ["cdylib", "rlib"]
name = "splintr"
path = "src/lib.rs"
[package]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["text-processing", "encoding"]
description = "Fast Rust BPE tokenizer with Python bindings"
edition = "2021"
homepage = "https://github.com/ml-rust/splintr"
keywords = ["tokenizer", "bpe", "tiktoken", "gpt", "llm"]
license = "MIT"
name = "splintr"
readme = "README.md"
repository = "https://github.com/ml-rust/splintr"
version = "0.8.0"
[profile.release]
codegen-units = 1
lto = true
opt-level = 3
[[test]]
name = "cl100k"
path = "tests/cl100k.rs"
[[test]]
name = "deepseek_v3"
path = "tests/deepseek_v3.rs"
[[test]]
name = "llama3"
path = "tests/llama3.rs"
[[test]]
name = "mistral_v2"
path = "tests/mistral_v2.rs"
[[test]]
name = "mistral_v3"
path = "tests/mistral_v3.rs"
[[test]]
name = "o200k"
path = "tests/o200k.rs"