[package]
edition = "2024"
rust-version = "1.86.0"
name = "kitoken"
version = "0.11.0"
authors = ["Christian Sdunek <me@systemcluster.me>"]
build = false
include = [
"Cargo.toml",
"src/**/*",
"LICENCE",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Fast tokenizer for language models, supporting BPE, Unigram and WordPiece tokenization"
homepage = "https://kitoken.dev"
readme = "README.md"
keywords = [
"tokenizer",
"nlp",
"bpe",
"unigram",
"wordpiece",
]
categories = [
"text-processing",
"algorithms",
"wasm",
"no-std",
"parser-implementations",
]
license = "BSD-2-Clause"
repository = "https://github.com/Systemcluster/kitoken"
resolver = "2"
[package.metadata.docs.rs]
rustdoc-args = [
"--cfg",
"docsrs",
]
features = ["all"]
[features]
all = [
"std",
"serialization",
"normalization",
"convert",
"split",
"regex-unicode",
"regex-perf",
"multiversion",
"web",
]
convert = [
"convert-tiktoken",
"convert-sentencepiece",
"convert-tokenizers",
"convert-tekken",
"convert-detect",
]
convert-detect = ["serialization"]
convert-sentencepiece = ["dep:sentencepiece-model"]
convert-tekken = [
"dep:base64",
"dep:serde",
"dep:serde_json",
]
convert-tiktoken = ["dep:base64"]
convert-tokenizers = [
"dep:base64",
"dep:serde",
"dep:serde_json",
"hashbrown/serde",
]
default = [
"std",
"serialization",
"normalization",
"convert",
"regex-perf",
"multiversion",
]
multiversion = ["dep:multiversion"]
normalization = [
"normalization-unicode",
"normalization-charsmap",
]
normalization-charsmap = ["bstr/unicode"]
normalization-unicode = ["dep:unicode-normalization"]
regex-onig = ["dep:onig"]
regex-perf = ["fancy-regex/perf"]
regex-unicode = ["fancy-regex/unicode"]
serialization = [
"dep:serde",
"dep:postcard",
]
split = ["split-unicode-script"]
split-unicode-script = ["dep:unicode-script"]
std = [
"thiserror/std",
"orx-priority-queue/std",
"memchr/std",
"multiversion?/std",
]
unstable = []
web = [
"std",
"dep:reqwest",
]
[lib]
name = "kitoken"
path = "src/lib.rs"
[dependencies.base64]
version = "0.22"
features = ["alloc"]
optional = true
default-features = false
[dependencies.bstr]
version = "1.12"
features = ["alloc"]
default-features = false
[dependencies.derive_more]
version = "2.1"
features = [
"deref",
"deref_mut",
"as_ref",
"index",
"index_mut",
]
default-features = false
[dependencies.fancy-regex]
version = "0.18"
default-features = false
[dependencies.hashbrown]
version = "0.17"
features = [
"default-hasher",
"inline-more",
"allocator-api2",
]
default-features = false
[dependencies.log]
version = "0.4"
[dependencies.memchr]
version = "2.8"
features = ["alloc"]
default-features = false
[dependencies.multiversion]
version = "0.8"
optional = true
default-features = false
[dependencies.once_cell]
version = "1.21"
features = [
"alloc",
"race",
]
default-features = false
[dependencies.onig]
version = "6.5"
optional = true
default-features = false
[dependencies.orx-priority-queue]
version = "1.8"
default-features = false
[dependencies.postcard]
version = "1.1"
features = ["alloc"]
optional = true
default-features = false
[dependencies.regex-automata]
version = "0.4"
features = [
"alloc",
"syntax",
"meta",
"nfa",
"dfa",
"hybrid",
"unicode-perl",
"unicode-gencat",
"unicode-case",
]
default-features = false
[dependencies.regex-syntax]
version = "0.8"
features = [
"unicode-perl",
"unicode-gencat",
"unicode-case",
]
default-features = false
[dependencies.reqwest]
version = "0.13"
features = [
"blocking",
"rustls",
"system-proxy",
]
optional = true
default-features = false
[dependencies.sentencepiece-model]
version = "0.1"
optional = true
default-features = false
[dependencies.serde]
version = "1.0"
features = [
"alloc",
"derive",
]
optional = true
default-features = false
[dependencies.serde_json]
version = "1.0"
features = ["alloc"]
optional = true
default-features = false
[dependencies.thiserror]
version = "2.0"
default-features = false
[dependencies.unicode-normalization]
version = "0.1"
optional = true
[dependencies.unicode-script]
version = "0.5"
optional = true
[dev-dependencies.console]
version = "0.16"
features = ["windows-console-colors"]
[dev-dependencies.criterion]
version = "0.8"
features = ["cargo_bench_support"]
default-features = false
[dev-dependencies.simple_logger]
version = "5.2"
[profile.bench]
opt-level = 3
debug = 2
inherits = "release"
strip = "none"
[profile.bench.build-override]
opt-level = 3
[profile.performance]
opt-level = 3
inherits = "release"
[profile.performance.build-override]
opt-level = 3
[profile.release]
opt-level = "s"
lto = "fat"
codegen-units = 1
debug = 0
debug-assertions = false
panic = "abort"
overflow-checks = false
incremental = false
strip = "symbols"
[profile.release.build-override]
opt-level = "s"
[profile.test]
opt-level = 1
inherits = "dev"