tokenizers 0.23.1

Provides an implementation of today's most used tokenizers, with a focus on performances and versatility.
Documentation
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.

[package]
edition = "2018"
name = "tokenizers"
version = "0.23.1"
authors = [
    "Anthony MOI <m.anthony.moi@gmail.com>",
    "Nicolas Patry <patry.nicolas@protonmail.com>",
]
build = false
exclude = [
    "rust-toolchain",
    "target/*",
    "Cargo.lock",
    "benches/*.txt",
    "benches/*.json",
    "data/*",
]
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = """
Provides an implementation of today's most used tokenizers,
with a focus on performances and versatility.
"""
homepage = "https://github.com/huggingface/tokenizers"
documentation = "https://docs.rs/tokenizers/"
readme = "README.md"
keywords = [
    "tokenizer",
    "NLP",
    "huggingface",
    "BPE",
    "WordPiece",
]
license = "Apache-2.0"
repository = "https://github.com/huggingface/tokenizers"

[package.metadata.docs.rs]
all-features = true

[features]
default = [
    "progressbar",
    "onig",
    "esaxx_fast",
]
esaxx_fast = ["esaxx-rs/cpp"]
http = ["hf-hub"]
progressbar = ["indicatif"]
rustls-tls = ["hf-hub?/rustls-tls"]
unstable_wasm = [
    "fancy-regex",
    "getrandom/wasm_js",
]

[lib]
name = "tokenizers"
path = "src/lib.rs"
bench = false

[[example]]
name = "encode_batch"
path = "examples/encode_batch.rs"
required-features = ["http"]

[[example]]
name = "serialization"
path = "examples/serialization.rs"

[[test]]
name = "added_tokens"
path = "tests/added_tokens.rs"

[[test]]
name = "documentation"
path = "tests/documentation.rs"

[[test]]
name = "from_pretrained"
path = "tests/from_pretrained.rs"

[[test]]
name = "offsets"
path = "tests/offsets.rs"

[[test]]
name = "serialization"
path = "tests/serialization.rs"

[[test]]
name = "stream"
path = "tests/stream.rs"

[[test]]
name = "training"
path = "tests/training.rs"

[[test]]
name = "unigram"
path = "tests/unigram.rs"

[[bench]]
name = "added_vocab_deserialize"
path = "benches/added_vocab_deserialize.rs"
harness = false
required-features = ["http"]

[[bench]]
name = "bert_benchmark"
path = "benches/bert_benchmark.rs"
harness = false

[[bench]]
name = "bpe_benchmark"
path = "benches/bpe_benchmark.rs"
harness = false

[[bench]]
name = "ci_benchmark"
path = "benches/ci_benchmark.rs"
harness = false

[[bench]]
name = "layout_benchmark"
path = "benches/layout_benchmark.rs"
harness = false

[[bench]]
name = "llama3_benchmark"
path = "benches/llama3_benchmark.rs"
harness = false

[[bench]]
name = "truncation_benchmark"
path = "benches/truncation_benchmark.rs"
harness = false

[[bench]]
name = "unigram_benchmark"
path = "benches/unigram_benchmark.rs"
harness = false

[dependencies.ahash]
version = "0.8.11"
features = ["serde"]

[dependencies.compact_str]
version = "0.9"
features = ["serde"]

[dependencies.daachorse]
version = "1.0.1"

[dependencies.dary_heap]
version = "0.3.6"
features = ["serde"]

[dependencies.derive_builder]
version = "0.20"

[dependencies.esaxx-rs]
version = "0.1.10"
features = []
default-features = false

[dependencies.fancy-regex]
version = "0.17"
optional = true

[dependencies.getrandom]
version = "0.3"

[dependencies.hf-hub]
version = "0.4.1"
features = ["ureq"]
optional = true
default-features = false

[dependencies.indicatif]
version = "0.18"
optional = true

[dependencies.itertools]
version = "0.14"

[dependencies.log]
version = "0.4"

[dependencies.macro_rules_attribute]
version = "0.2.0"

[dependencies.monostate]
version = "0.1.12"

[dependencies.onig]
version = "6.5.1"
optional = true
default-features = false

[dependencies.paste]
version = "1.0.14"

[dependencies.rand]
version = "0.9"

[dependencies.rayon]
version = "1.10"

[dependencies.rayon-cond]
version = "0.4"

[dependencies.regex]
version = "1.10"

[dependencies.regex-syntax]
version = "0.8"

[dependencies.serde]
version = "1.0"
features = ["derive"]

[dependencies.serde_json]
version = "1.0"

[dependencies.spm_precompiled]
version = "0.1.3"

[dependencies.thiserror]
version = "2"

[dependencies.unicode-normalization-alignments]
version = "0.1"

[dependencies.unicode-segmentation]
version = "1.11"

[dependencies.unicode_categories]
version = "0.1"

[dev-dependencies.assert_approx_eq]
version = "1.1"

[dev-dependencies.criterion]
version = "0.6"

[dev-dependencies.tempfile]
version = "3.10"

[dev-dependencies.tracing]
version = "0.1"

[dev-dependencies.tracing-subscriber]
version = "0.3.18"

[profile.profiling]
debug = 2
inherits = "release"

[profile.release]
lto = "fat"