charabia 0.9.9

A simple library to detect the language, tokenize the text and normalize the tokens
Documentation
[[bench]]
harness = false
name = "bench"
path = "benches/bench.rs"

[dependencies.aho-corasick]
version = "1.1.3"

[dependencies.csv]
version = "1.3.1"

[dependencies.either]
version = "1.15.0"

[dependencies.finl_unicode]
optional = true
version = "1.3.0"

[dependencies.fst]
version = "0.4"

[dependencies.irg-kvariants]
version = "=0.1.1"

[dependencies.jieba-rs]
optional = true
version = "0.8.1"

[dependencies.lindera]
default-features = false
optional = true
version = "0.43.3"

[dependencies.pinyin]
default-features = false
features = ["with_tone"]
optional = true
version = "0.10"

[dependencies.serde]
version = "1.0.223"

[dependencies.slice-group-by]
version = "0.3.1"

[dependencies.unicode-normalization]
version = "0.1.24"

[dependencies.wana_kana]
optional = true
version = "4.0.0"

[dependencies.whatlang]
version = "0.16.4"

[dev-dependencies.criterion]
version = "0.7"

[dev-dependencies.mimalloc]
version = "0.1.48"

[dev-dependencies.quickcheck]
version = "1"

[dev-dependencies.quickcheck_macros]
version = "1"

[features]
chinese = ["chinese-segmentation", "chinese-normalization"]
chinese-normalization = []
chinese-normalization-pinyin = ["dep:pinyin", "chinese-normalization"]
chinese-segmentation = ["dep:jieba-rs"]
default = ["chinese", "hebrew", "japanese", "thai", "korean", "greek", "khmer", "vietnamese", "swedish-recomposition", "turkish", "german-segmentation"]
german-segmentation = []
greek = []
hebrew = []
japanese = ["japanese-segmentation-unidic", "japanese-transliteration"]
japanese-segmentation-ipadic = ["lindera/ipadic", "lindera/compress"]
japanese-segmentation-unidic = ["lindera/unidic", "lindera/compress"]
japanese-transliteration = ["dep:wana_kana"]
khmer = []
korean = ["lindera/ko-dic", "lindera/compress"]
latin-camelcase = ["dep:finl_unicode"]
latin-snakecase = ["dep:finl_unicode"]
swedish-recomposition = []
thai = []
turkish = []
vietnamese = []

[lib]
name = "charabia"
path = "src/lib.rs"

[package]
authors = ["Many <many@meilisearch.com>"]
autobenches = false
autobins = false
autoexamples = false
autolib = false
autotests = false
build = false
categories = ["text-processing"]
description = "A simple library to detect the language, tokenize the text and normalize the tokens"
documentation = "https://docs.rs/charabia"
edition = "2021"
exclude = ["dictionaries/txt/thai/words.txt"]
keywords = ["segmenter", "tokenizer", "normalize", "language"]
license = "MIT"
name = "charabia"
readme = "README.md"
repository = "https://github.com/meilisearch/charabia"
version = "0.9.9"