splintr 0.8.0

Fast Rust BPE tokenizer with Python bindings
Documentation
[dependencies.aho-corasick]
version = "1.1"

[dependencies.base64]
version = "0.22"

[dependencies.lru]
version = "0.16"

[dependencies.pcre2]
optional = true
version = "0.2"

[dependencies.pyo3]
features = ["extension-module"]
optional = true
version = "0.27"

[dependencies.rayon]
version = "1.10"

[dependencies.regexr]
features = ["jit", "simd"]
version = "0.1.0-beta.5"

[dependencies.rustc-hash]
version = "2.0"

[dependencies.thiserror]
version = "2.0"

[dev-dependencies.pcre2]
version = "0.2"

[features]
default = ["pcre2"]
pcre2 = ["dep:pcre2"]
python = ["dep:pyo3"]

[lib]
crate-type = ["cdylib", "rlib"]
name = "splintr"
path = "src/lib.rs"

[package]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["text-processing", "encoding"]
description = "Fast Rust BPE tokenizer with Python bindings"
edition = "2021"
homepage = "https://github.com/ml-rust/splintr"
keywords = ["tokenizer", "bpe", "tiktoken", "gpt", "llm"]
license = "MIT"
name = "splintr"
readme = "README.md"
repository = "https://github.com/ml-rust/splintr"
version = "0.8.0"

[profile.release]
codegen-units = 1
lto = true
opt-level = 3

[[test]]
name = "cl100k"
path = "tests/cl100k.rs"

[[test]]
name = "deepseek_v3"
path = "tests/deepseek_v3.rs"

[[test]]
name = "llama3"
path = "tests/llama3.rs"

[[test]]
name = "mistral_v2"
path = "tests/mistral_v2.rs"

[[test]]
name = "mistral_v3"
path = "tests/mistral_v3.rs"

[[test]]
name = "o200k"
path = "tests/o200k.rs"