kitoken 0.10.1

Fast and versatile tokenizer for language models, supporting BPE, Unigram and WordPiece tokenization
Documentation
[dependencies.base64]
default-features = false
features = ["alloc"]
optional = true
version = "0.22"

[dependencies.bstr]
default-features = false
features = ["alloc"]
version = "1.10"

[dependencies.derive_more]
default-features = false
features = ["deref", "deref_mut", "as_ref", "index", "index_mut"]
version = "1.0"

[dependencies.fancy-regex]
default-features = false
version = "0.14"

[dependencies.hashbrown]
default-features = false
features = ["default-hasher", "inline-more", "allocator-api2"]
version = "0.15"

[dependencies.log]
version = "0.4"

[dependencies.memchr]
default-features = false
features = ["alloc"]
version = "2.7"

[dependencies.multiversion]
default-features = false
optional = true
version = "0.8"

[dependencies.once_cell]
default-features = false
features = ["alloc", "race"]
version = "1.19"

[dependencies.onig]
default-features = false
optional = true
version = "6.4"

[dependencies.orx-priority-queue]
default-features = false
version = "1.4"

[dependencies.postcard]
default-features = false
features = ["alloc"]
optional = true
version = "1.0"

[dependencies.regex-automata]
default-features = false
features = ["alloc", "syntax", "meta", "nfa", "dfa", "hybrid", "unicode-perl", "unicode-gencat", "unicode-case"]
version = "0.4"

[dependencies.regex-syntax]
default-features = false
features = ["unicode-perl", "unicode-gencat", "unicode-case"]
version = "0.8"

[dependencies.sentencepiece-model]
default-features = false
optional = true
version = "0.1"

[dependencies.serde]
default-features = false
features = ["alloc", "derive"]
optional = true
version = "1.0"

[dependencies.serde_json]
default-features = false
features = ["alloc"]
optional = true
version = "1.0"

[dependencies.thiserror]
optional = true
version = "1.0"

[dependencies.unicode-normalization]
optional = true
version = "0.1"

[dependencies.unicode-script]
optional = true
version = "0.5"

[dev-dependencies.console]
features = ["windows-console-colors"]
version = "0.15"

[dev-dependencies.criterion]
default-features = false
features = ["cargo_bench_support"]
version = "0.5"

[dev-dependencies.simple_logger]
version = "5.0"

[features]
all = ["std", "serialization", "normalization", "convert", "split", "regex-unicode", "regex-perf", "multiversion"]
convert = ["convert-tiktoken", "convert-sentencepiece", "convert-tokenizers", "convert-tekken", "convert-detect"]
convert-detect = ["serialization"]
convert-sentencepiece = ["dep:sentencepiece-model"]
convert-tekken = ["dep:base64", "dep:serde", "dep:serde_json"]
convert-tiktoken = ["dep:base64"]
convert-tokenizers = ["dep:base64", "dep:serde", "dep:serde_json", "hashbrown/serde"]
default = ["std", "serialization", "normalization", "convert", "regex-perf", "multiversion"]
multiversion = ["dep:multiversion"]
normalization = ["normalization-unicode", "normalization-charsmap"]
normalization-charsmap = ["bstr/unicode"]
normalization-unicode = ["dep:unicode-normalization"]
regex-onig = ["dep:onig"]
regex-perf = ["fancy-regex/perf"]
regex-unicode = ["fancy-regex/unicode"]
serialization = ["dep:serde", "dep:postcard"]
split = ["split-unicode-script"]
split-unicode-script = ["dep:unicode-script"]
std = ["dep:thiserror", "orx-priority-queue/std", "memchr/std", "multiversion?/std"]
unstable = []

[lib]
name = "kitoken"
path = "src/lib.rs"

[package]
authors = ["Christian Sdunek <me@systemcluster.me>"]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["text-processing", "algorithms", "wasm", "no-std", "parser-implementations"]
description = "Fast and versatile tokenizer for language models, supporting BPE, Unigram and WordPiece tokenization"
edition = "2021"
homepage = "https://kitoken.dev"
include = ["Cargo.toml", "src/**/*", "LICENCE"]
keywords = ["tokenizer", "nlp", "bpe", "unigram", "wordpiece"]
license = "BSD-2-Clause"
name = "kitoken"
readme = "README.md"
repository = "https://github.com/Systemcluster/kitoken"
resolver = "2"
rust-version = "1.82.0"
version = "0.10.1"

[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]

[profile.bench]
debug = 2
inherits = "release"
opt-level = 3
strip = "none"

[profile.bench.build-override]
opt-level = 3

[profile.performance]
inherits = "release"
opt-level = 3

[profile.performance.build-override]
opt-level = 3

[profile.release]
codegen-units = 1
debug = 0
debug-assertions = false
incremental = false
lto = "fat"
opt-level = "s"
overflow-checks = false
panic = "abort"
strip = "symbols"

[profile.release.build-override]
opt-level = "s"

[profile.test]
inherits = "dev"
opt-level = 1