[package]
edition = "2024"
rust-version = "1.91.0"
name = "lance-tokenizer"
version = "6.0.0"
authors = ["Lance Devs <dev@lance.org>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "Tokenizer abstractions and implementations for Lance"
readme = "README.md"
keywords = [
"data-format",
"data-science",
"machine-learning",
"apache-arrow",
"data-analytics",
]
categories = [
"database-implementations",
"data-structures",
"development-tools",
"science",
]
license = "Apache-2.0"
repository = "https://github.com/lance-format/lance"
[features]
jieba-rs = ["dep:jieba-rs"]
lindera = ["dep:lindera"]
tokenizer-jieba = ["jieba-rs"]
tokenizer-lindera = ["lindera"]
[lib]
name = "lance_tokenizer"
path = "src/lib.rs"
[dependencies.jieba-rs]
version = "0.9.0"
optional = true
default-features = false
[dependencies.lindera]
version = "0.44.0"
optional = true
[dependencies.rust-stemmers]
version = "1.2.0"
[dependencies.serde]
version = "^1"
features = ["derive"]
[dependencies.unicode-normalization]
version = "0.1.25"
[lints.clippy]
dbg_macro = "deny"
disallowed_macros = "deny"
fallible_impl_from = "deny"
large_futures = "deny"
manual_let_else = "deny"
multiple-crate-versions = "allow"
print_stderr = "deny"
print_stdout = "deny"
redundant_clone = "deny"
redundant_pub_crate = "deny"
single_range_in_vec_init = "allow"
string_add = "deny"
string_add_assign = "deny"
string_lit_as_bytes = "deny"
trait_duplication_in_bounds = "deny"
use_self = "deny"
[lints.clippy.all]
level = "deny"
priority = -1
[lints.clippy.cargo]
level = "deny"
priority = -1
[lints.clippy.style]
level = "deny"
priority = -1
[lints.rust]
unsafe_op_in_unsafe_fn = "allow"
[lints.rust.unexpected_cfgs]
level = "warn"
priority = 0
check-cfg = ["cfg(coverage,coverage_nightly)"]