tokenizers 0.11.3

Provides an implementation of today's most used tokenizers, with a focus on performances and versatility.
Documentation
[package]
authors = ["Anthony MOI <m.anthony.moi@gmail.com>"]
edition = "2018"
name = "tokenizers"
version = "0.11.3"
homepage = "https://github.com/huggingface/tokenizers"
repository = "https://github.com/huggingface/tokenizers"
documentation = "https://docs.rs/tokenizers/"
license = "Apache-2.0"
keywords = ["tokenizer", "NLP", "huggingface", "BPE", "WordPiece"]
readme = "./README.md"
description = """
Provides an implementation of today's most used tokenizers,
with a focus on performances and versatility.
"""
exclude = [ "rust-toolchain", "target/*", "Cargo.lock", "benches/*.txt", "benches/*.json", "data/*" ]

[lib]
name = "tokenizers"
path = "src/lib.rs"
bench = false

[[bin]]
name = "cli"
path = "src/cli.rs"
bench = false

[[bench]]
name = "bpe_benchmark"
harness = false

[[bench]]
name = "bert_benchmark"
harness = false

[dependencies]
lazy_static = "1.4"
rand = "0.7"
onig = { version = "6.0", default-features = false }
regex = "1.3"
regex-syntax = "0.6"
rayon = "1.3"
rayon-cond = "0.1"
serde = { version = "1.0", features = [ "derive" ] }
serde_json = "1.0"
clap = "2.33"
unicode-normalization-alignments = "0.1"
unicode_categories = "0.1"
unicode-segmentation = "1.6"
indicatif = {version = "0.15", optional = true}
itertools = "0.9"
log = "0.4"
esaxx-rs = "0.1"
derive_builder = "0.9"
spm_precompiled = "0.1"
dirs = "3.0"
reqwest = { version = "0.11", optional = true }
cached-path = { version = "0.5", optional = true }
aho-corasick = "0.7"
paste = "1.0.6"
macro_rules_attribute = "0.0.2"

[features]
default = ["progressbar", "http"]
progressbar = ["indicatif"]
http = ["reqwest", "cached-path"]

[dev-dependencies]
criterion = "0.3"
tempfile = "3.1"
assert_approx_eq = "1.1"