aleph-alpha-tokenizer 0.1.0

A fast implementation of a wordpiece-inspired tokenizer
Documentation
[package]
authors = ["Andre Bogus <andre.bogus@aleph-alpha.de>"]
description = "A fast implementation of a wordpiece-inspired tokenizer"
documentation = "https://docs.rs/aleph-alpha-tokenizer"
edition = "2018"
include = ["README.md", "LICENSE-MIT", "LICENSE-APACHE", "src/lib.rs"]
keywords = ["NLP", "tokenizer"]
license = "MIT OR Apache-2.0"
name = "aleph-alpha-tokenizer"
readme = "README.md"
repository = "https://github.com/Aleph-Alpha/aleph-alpha-tokenizer"
version = "0.1.0"

[features]
# Enable this to allow using the tokenizer as a `tokenizer::Model`
huggingface = ["tokenizers"]
default = []

[dependencies]
tokenizers = { version = "0.9.0", optional = true }
fst = "0.4.1"

[dev-dependencies]
criterion = "0.3.1"

[[bench]]
name = "bench"
path = "benches/bench.rs"
harness = false