kitoken 0.11.0

Fast tokenizer for language models, supporting BPE, Unigram and WordPiece tokenization
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2024"
rust-version = "1.86.0"
name = "kitoken"
version = "0.11.0"
authors = ["Christian Sdunek <me@systemcluster.me>"]
build = false
include = [
    "Cargo.toml",
    "src/**/*",
    "LICENCE",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Fast tokenizer for language models, supporting BPE, Unigram and WordPiece tokenization"
homepage = "https://kitoken.dev"
readme = "README.md"
keywords = [
    "tokenizer",
    "nlp",
    "bpe",
    "unigram",
    "wordpiece",
]
categories = [
    "text-processing",
    "algorithms",
    "wasm",
    "no-std",
    "parser-implementations",
]
license = "BSD-2-Clause"
repository = "https://github.com/Systemcluster/kitoken"
resolver = "2"

[package.metadata.docs.rs]
rustdoc-args = [
    "--cfg",
    "docsrs",
]
features = ["all"]

[features]
all = [
    "std",
    "serialization",
    "normalization",
    "convert",
    "split",
    "regex-unicode",
    "regex-perf",
    "multiversion",
    "web",
]
convert = [
    "convert-tiktoken",
    "convert-sentencepiece",
    "convert-tokenizers",
    "convert-tekken",
    "convert-detect",
]
convert-detect = ["serialization"]
convert-sentencepiece = ["dep:sentencepiece-model"]
convert-tekken = [
    "dep:base64",
    "dep:serde",
    "dep:serde_json",
]
convert-tiktoken = ["dep:base64"]
convert-tokenizers = [
    "dep:base64",
    "dep:serde",
    "dep:serde_json",
    "hashbrown/serde",
]
default = [
    "std",
    "serialization",
    "normalization",
    "convert",
    "regex-perf",
    "multiversion",
]
multiversion = ["dep:multiversion"]
normalization = [
    "normalization-unicode",
    "normalization-charsmap",
]
normalization-charsmap = ["bstr/unicode"]
normalization-unicode = ["dep:unicode-normalization"]
regex-onig = ["dep:onig"]
regex-perf = ["fancy-regex/perf"]
regex-unicode = ["fancy-regex/unicode"]
serialization = [
    "dep:serde",
    "dep:postcard",
]
split = ["split-unicode-script"]
split-unicode-script = ["dep:unicode-script"]
std = [
    "thiserror/std",
    "orx-priority-queue/std",
    "memchr/std",
    "multiversion?/std",
]
unstable = []
web = [
    "std",
    "dep:reqwest",
]

[lib]
name = "kitoken"
path = "src/lib.rs"

[dependencies.base64]
version = "0.22"
features = ["alloc"]
optional = true
default-features = false

[dependencies.bstr]
version = "1.12"
features = ["alloc"]
default-features = false

[dependencies.derive_more]
version = "2.1"
features = [
    "deref",
    "deref_mut",
    "as_ref",
    "index",
    "index_mut",
]
default-features = false

[dependencies.fancy-regex]
version = "0.18"
default-features = false

[dependencies.hashbrown]
version = "0.17"
features = [
    "default-hasher",
    "inline-more",
    "allocator-api2",
]
default-features = false

[dependencies.log]
version = "0.4"

[dependencies.memchr]
version = "2.8"
features = ["alloc"]
default-features = false

[dependencies.multiversion]
version = "0.8"
optional = true
default-features = false

[dependencies.once_cell]
version = "1.21"
features = [
    "alloc",
    "race",
]
default-features = false

[dependencies.onig]
version = "6.5"
optional = true
default-features = false

[dependencies.orx-priority-queue]
version = "1.8"
default-features = false

[dependencies.postcard]
version = "1.1"
features = ["alloc"]
optional = true
default-features = false

[dependencies.regex-automata]
version = "0.4"
features = [
    "alloc",
    "syntax",
    "meta",
    "nfa",
    "dfa",
    "hybrid",
    "unicode-perl",
    "unicode-gencat",
    "unicode-case",
]
default-features = false

[dependencies.regex-syntax]
version = "0.8"
features = [
    "unicode-perl",
    "unicode-gencat",
    "unicode-case",
]
default-features = false

[dependencies.reqwest]
version = "0.13"
features = [
    "blocking",
    "rustls",
    "system-proxy",
]
optional = true
default-features = false

[dependencies.sentencepiece-model]
version = "0.1"
optional = true
default-features = false

[dependencies.serde]
version = "1.0"
features = [
    "alloc",
    "derive",
]
optional = true
default-features = false

[dependencies.serde_json]
version = "1.0"
features = ["alloc"]
optional = true
default-features = false

[dependencies.thiserror]
version = "2.0"
default-features = false

[dependencies.unicode-normalization]
version = "0.1"
optional = true

[dependencies.unicode-script]
version = "0.5"
optional = true

[dev-dependencies.console]
version = "0.16"
features = ["windows-console-colors"]

[dev-dependencies.criterion]
version = "0.8"
features = ["cargo_bench_support"]
default-features = false

[dev-dependencies.simple_logger]
version = "5.2"

[profile.bench]
opt-level = 3
debug = 2
inherits = "release"
strip = "none"

[profile.bench.build-override]
opt-level = 3

[profile.performance]
opt-level = 3
inherits = "release"

[profile.performance.build-override]
opt-level = 3

[profile.release]
opt-level = "s"
lto = "fat"
codegen-units = 1
debug = 0
debug-assertions = false
panic = "abort"
overflow-checks = false
incremental = false
strip = "symbols"

[profile.release.build-override]
opt-level = "s"

[profile.test]
opt-level = 1
inherits = "dev"