[[bench]]
harness = false
name = "training"
path = "benches/training.rs"
[[bin]]
name = "bbpe"
path = "src/bin/bbpe.rs"
required-features = ["cli"]
[build-dependencies]
[dependencies.ahash]
version = "0.8.11"
[dependencies.anyhow]
version = "1.0"
[dependencies.clap]
features = ["derive"]
optional = true
version = "4.5"
[dependencies.env_logger]
optional = true
version = "0.11"
[dependencies.indicatif]
features = ["rayon"]
optional = true
version = "0.18"
[dependencies.log]
version = "0.4"
[dependencies.rayon]
version = "1.10"
[dependencies.rustc-hash]
version = "2.1"
[dependencies.serde]
features = ["derive"]
version = "1.0"
[dependencies.serde_json]
version = "1.0"
[dependencies.thiserror]
version = "2.0"
[dependencies.tokenizers]
default-features = true
features = ["onig"]
version = "0.22.1"
[dependencies.walkdir]
version = "2.5"
[dev-dependencies.assert_cmd]
version = "2.0"
[dev-dependencies.criterion]
default-features = false
features = ["cargo_bench_support"]
version = "0.7"
[dev-dependencies.insta]
features = ["yaml"]
version = "1.41"
[dev-dependencies.tempfile]
version = "3.12"
[features]
cli = ["clap", "indicatif", "env_logger"]
default = ["cli"]
[lib]
name = "bbpe"
path = "src/lib.rs"
[package]
authors = ["Binary BPE Maintainers <michael.bommarito@gmail.com>"]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["encoding", "text-processing"]
description = "Binary byte pair encoding (BPE) trainer and CLI compatible with Hugging Face tokenizers"
documentation = "https://docs.rs/bbpe"
edition = "2021"
exclude = ["/target", "/*.json", "/tokenizer*.json", "/**/*.swp", "/**/*.rs.bk", "/test_data", "/.gitignore", "/.github"]
homepage = "https://github.com/mjbommar/binary-bpe"
keywords = ["bpe", "binary", "malware", "tokenizer", "huggingface"]
license = "Apache-2.0"
name = "bbpe"
readme = "README.md"
repository = "https://github.com/mjbommar/binary-bpe"
rust-version = "1.74"
version = "0.3.0"
[package.metadata.docs.rs]
all-features = true
[[test]]
name = "cli"
path = "tests/cli.rs"