lingua 1.6.0

An accurate natural language detection library, suitable for long and short text alike
Documentation
# Copyright © 2020-present Peter M. Stahl pemistahl@gmail.com
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expressed or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[workspace]
members = ["language-models/*"]

[package]
name = "lingua"
version = "1.6.0"
authors = ["Peter M. Stahl <pemistahl@gmail.com>"]
description = """
An accurate natural language detection library, suitable for long and short text alike
"""
homepage = "https://github.com/pemistahl/lingua-rs"
repository = "https://github.com/pemistahl/lingua-rs"
documentation = "https://docs.rs/lingua"
license = "Apache-2.0"
readme = "README.md"
edition = "2021"
categories = ["text-processing"]
keywords = [
    "language-processing",
    "language-detection",
    "language-recognition",
    "nlp"
]

[profile.dev]
opt-level = 1

[lib]
crate-type = ["cdylib", "rlib"]

[[bin]]
name = "accuracy_reports"
required-features = ["accuracy-reports"]

[[bench]]
name = "benchmark"
harness = false
required-features = ["benchmark"]

[dependencies]
brotli = "3.4.0"
compact_str = "0.7.1"
fraction = "0.14.0"
include_dir = "0.7.3"
itertools = "0.11.0"
maplit = "1.0.2"
once_cell = "1.18.0"
regex = "1.10.2"
serde = { version = "1.0.192", features = ["derive"] }
serde_json = "1.0.108"
strum = "0.25.0"
strum_macros = "0.25.3"
lingua-afrikaans-language-model = { path = "language-models/af", version = "1.1.0", optional = true }
lingua-albanian-language-model = { path = "language-models/sq", version = "1.1.0", optional = true }
lingua-arabic-language-model = { path = "language-models/ar", version = "1.1.0", optional = true }
lingua-armenian-language-model = { path = "language-models/hy", version = "1.1.0", optional = true }
lingua-azerbaijani-language-model = { path = "language-models/az", version = "1.1.0", optional = true }
lingua-basque-language-model = { path = "language-models/eu", version = "1.1.0", optional = true }
lingua-belarusian-language-model = { path = "language-models/be", version = "1.1.0", optional = true }
lingua-bengali-language-model = { path = "language-models/bn", version = "1.1.0", optional = true }
lingua-bokmal-language-model = { path = "language-models/nb", version = "1.1.0", optional = true }
lingua-bosnian-language-model = { path = "language-models/bs", version = "1.1.0", optional = true }
lingua-bulgarian-language-model = { path = "language-models/bg", version = "1.1.0", optional = true }
lingua-catalan-language-model = { path = "language-models/ca", version = "1.1.0", optional = true }
lingua-chinese-language-model = { path = "language-models/zh", version = "1.1.0", optional = true }
lingua-croatian-language-model = { path = "language-models/hr", version = "1.1.0", optional = true }
lingua-czech-language-model = { path = "language-models/cs", version = "1.1.0", optional = true }
lingua-danish-language-model = { path = "language-models/da", version = "1.1.0", optional = true }
lingua-dutch-language-model = { path = "language-models/nl", version = "1.1.0", optional = true }
lingua-english-language-model = { path = "language-models/en", version = "1.1.0", optional = true }
lingua-esperanto-language-model = { path = "language-models/eo", version = "1.1.0", optional = true }
lingua-estonian-language-model = { path = "language-models/et", version = "1.1.0", optional = true }
lingua-finnish-language-model = { path = "language-models/fi", version = "1.1.0", optional = true }
lingua-french-language-model = { path = "language-models/fr", version = "1.1.0", optional = true }
lingua-ganda-language-model = { path = "language-models/lg", version = "1.1.0", optional = true }
lingua-georgian-language-model = { path = "language-models/ka", version = "1.1.0", optional = true }
lingua-german-language-model = { path = "language-models/de", version = "1.1.0", optional = true }
lingua-greek-language-model = { path = "language-models/el", version = "1.1.0", optional = true }
lingua-gujarati-language-model = { path = "language-models/gu", version = "1.1.0", optional = true }
lingua-hebrew-language-model = { path = "language-models/he", version = "1.1.0", optional = true }
lingua-hindi-language-model = { path = "language-models/hi", version = "1.1.0", optional = true }
lingua-hungarian-language-model = { path = "language-models/hu", version = "1.1.0", optional = true }
lingua-icelandic-language-model = { path = "language-models/is", version = "1.1.0", optional = true }
lingua-indonesian-language-model = { path = "language-models/id", version = "1.1.0", optional = true }
lingua-irish-language-model = { path = "language-models/ga", version = "1.1.0", optional = true }
lingua-italian-language-model = { path = "language-models/it", version = "1.1.0", optional = true }
lingua-japanese-language-model = { path = "language-models/ja", version = "1.1.0", optional = true }
lingua-kazakh-language-model = { path = "language-models/kk", version = "1.1.0", optional = true }
lingua-korean-language-model = { path = "language-models/ko", version = "1.1.0", optional = true }
lingua-latin-language-model = { path = "language-models/la", version = "1.1.0", optional = true }
lingua-latvian-language-model = { path = "language-models/lv", version = "1.1.0", optional = true }
lingua-lithuanian-language-model = { path = "language-models/lt", version = "1.1.0", optional = true }
lingua-macedonian-language-model = { path = "language-models/mk", version = "1.1.0", optional = true }
lingua-malay-language-model = { path = "language-models/ms", version = "1.1.0", optional = true }
lingua-maori-language-model = { path = "language-models/mi", version = "1.1.0", optional = true }
lingua-marathi-language-model = { path = "language-models/mr", version = "1.1.0", optional = true }
lingua-mongolian-language-model = { path = "language-models/mn", version = "1.1.0", optional = true }
lingua-nynorsk-language-model = { path = "language-models/nn", version = "1.1.0", optional = true }
lingua-persian-language-model = { path = "language-models/fa", version = "1.1.0", optional = true }
lingua-polish-language-model = { path = "language-models/pl", version = "1.1.0", optional = true }
lingua-portuguese-language-model = { path = "language-models/pt", version = "1.1.0", optional = true }
lingua-punjabi-language-model = { path = "language-models/pa", version = "1.1.0", optional = true }
lingua-romanian-language-model = { path = "language-models/ro", version = "1.1.0", optional = true }
lingua-russian-language-model = { path = "language-models/ru", version = "1.1.0", optional = true }
lingua-serbian-language-model = { path = "language-models/sr", version = "1.1.0", optional = true }
lingua-shona-language-model = { path = "language-models/sn", version = "1.1.0", optional = true }
lingua-slovak-language-model = { path = "language-models/sk", version = "1.1.0", optional = true }
lingua-slovene-language-model = { path = "language-models/sl", version = "1.1.0", optional = true }
lingua-somali-language-model = { path = "language-models/so", version = "1.1.0", optional = true }
lingua-sotho-language-model = { path = "language-models/st", version = "1.1.0", optional = true }
lingua-spanish-language-model = { path = "language-models/es", version = "1.1.0", optional = true }
lingua-swahili-language-model = { path = "language-models/sw", version = "1.1.0", optional = true }
lingua-swedish-language-model = { path = "language-models/sv", version = "1.1.0", optional = true }
lingua-tagalog-language-model = { path = "language-models/tl", version = "1.1.0", optional = true }
lingua-tamil-language-model = { path = "language-models/ta", version = "1.1.0", optional = true }
lingua-telugu-language-model = { path = "language-models/te", version = "1.1.0", optional = true }
lingua-thai-language-model = { path = "language-models/th", version = "1.1.0", optional = true }
lingua-tsonga-language-model = { path = "language-models/ts", version = "1.1.0", optional = true }
lingua-tswana-language-model = { path = "language-models/tn", version = "1.1.0", optional = true }
lingua-turkish-language-model = { path = "language-models/tr", version = "1.1.0", optional = true }
lingua-ukrainian-language-model = { path = "language-models/uk", version = "1.1.0", optional = true }
lingua-urdu-language-model = { path = "language-models/ur", version = "1.1.0", optional = true }
lingua-vietnamese-language-model = { path = "language-models/vi", version = "1.1.0", optional = true }
lingua-welsh-language-model = { path = "language-models/cy", version = "1.1.0", optional = true }
lingua-xhosa-language-model = { path = "language-models/xh", version = "1.1.0", optional = true }
lingua-yoruba-language-model = { path = "language-models/yo", version = "1.1.0", optional = true }
lingua-zulu-language-model = { path = "language-models/zu", version = "1.1.0", optional = true }

[target.'cfg(not(target_family = "wasm"))'.dependencies]
ahash = "0.8.6"
cld2 = { version = "1.0.2", optional = true }
indoc = { version = "2.0.4", optional = true }
pyo3 = { version = "0.20.0", optional = true }
rayon = "1.8.0"
titlecase = { version = "2.2.0", optional = true }
whatlang = { version = "0.16.3", optional = true }
whichlang = { version = "0.1.0", optional = true }

[target.'cfg(target_family = "wasm")'.dependencies]
ahash = { version = "0.8.6", default-features = false, features = ["std", "compile-time-rng"] }
serde-wasm-bindgen = "0.6.1"
wasm-bindgen = "0.2.88"

[dev-dependencies]
float-cmp = "0.9.0"
indoc = "2.0.4"
rstest = "0.18.2"
tempfile = "3.8.1"

[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
criterion = "0.5.1"

[target.'cfg(target_family = "wasm")'.dev-dependencies]
wasm-bindgen-test = "0.3.38"

[features]
default = [
    "afrikaans", "albanian", "arabic", "armenian", "azerbaijani", "basque",
    "belarusian", "bengali", "bokmal", "bosnian", "bulgarian", "catalan",
    "chinese", "croatian", "czech", "danish", "dutch", "english", "esperanto",
    "estonian", "finnish", "french", "ganda", "georgian", "german", "greek",
    "gujarati", "hebrew", "hindi", "hungarian", "icelandic", "indonesian",
    "irish", "italian", "japanese", "kazakh", "korean", "latin", "latvian",
    "lithuanian", "macedonian", "malay", "maori", "marathi", "mongolian",
    "nynorsk", "persian", "polish", "portuguese", "punjabi", "romanian",
    "russian", "serbian", "shona", "slovak", "slovene", "somali", "sotho",
    "spanish", "swahili", "swedish", "tagalog", "tamil", "telugu", "thai",
    "tsonga", "tswana", "turkish", "ukrainian", "urdu", "vietnamese",
    "welsh", "xhosa", "yoruba", "zulu"
]
accuracy-reports = ["cld2", "indoc", "titlecase", "whatlang", "whichlang"]
benchmark = ["cld2", "whatlang", "whichlang"]
python = ["pyo3"]
afrikaans = ["lingua-afrikaans-language-model"]
albanian = ["lingua-albanian-language-model"]
arabic = ["lingua-arabic-language-model"]
armenian = ["lingua-armenian-language-model"]
azerbaijani = ["lingua-azerbaijani-language-model"]
basque = ["lingua-basque-language-model"]
belarusian = ["lingua-belarusian-language-model"]
bengali = ["lingua-bengali-language-model"]
bokmal = ["lingua-bokmal-language-model"]
bosnian = ["lingua-bosnian-language-model"]
bulgarian = ["lingua-bulgarian-language-model"]
catalan = ["lingua-catalan-language-model"]
chinese = ["lingua-chinese-language-model"]
croatian = ["lingua-croatian-language-model"]
czech = ["lingua-czech-language-model"]
danish = ["lingua-danish-language-model"]
dutch = ["lingua-dutch-language-model"]
english = ["lingua-english-language-model"]
esperanto = ["lingua-esperanto-language-model"]
estonian = ["lingua-estonian-language-model"]
finnish = ["lingua-finnish-language-model"]
french = ["lingua-french-language-model"]
ganda = ["lingua-ganda-language-model"]
georgian = ["lingua-georgian-language-model"]
german = ["lingua-german-language-model"]
greek = ["lingua-greek-language-model"]
gujarati = ["lingua-gujarati-language-model"]
hebrew = ["lingua-hebrew-language-model"]
hindi = ["lingua-hindi-language-model"]
hungarian = ["lingua-hungarian-language-model"]
icelandic = ["lingua-icelandic-language-model"]
indonesian = ["lingua-indonesian-language-model"]
irish = ["lingua-irish-language-model"]
italian = ["lingua-italian-language-model"]
japanese = ["lingua-japanese-language-model"]
kazakh = ["lingua-kazakh-language-model"]
korean = ["lingua-korean-language-model"]
latin = ["lingua-latin-language-model"]
latvian = ["lingua-latvian-language-model"]
lithuanian = ["lingua-lithuanian-language-model"]
macedonian = ["lingua-macedonian-language-model"]
malay = ["lingua-malay-language-model"]
maori = ["lingua-maori-language-model"]
marathi = ["lingua-marathi-language-model"]
mongolian = ["lingua-mongolian-language-model"]
nynorsk = ["lingua-nynorsk-language-model"]
persian = ["lingua-persian-language-model"]
polish = ["lingua-polish-language-model"]
portuguese = ["lingua-portuguese-language-model"]
punjabi = ["lingua-punjabi-language-model"]
romanian = ["lingua-romanian-language-model"]
russian = ["lingua-russian-language-model"]
serbian = ["lingua-serbian-language-model"]
shona = ["lingua-shona-language-model"]
slovak = ["lingua-slovak-language-model"]
slovene = ["lingua-slovene-language-model"]
somali = ["lingua-somali-language-model"]
sotho = ["lingua-sotho-language-model"]
spanish = ["lingua-spanish-language-model"]
swahili = ["lingua-swahili-language-model"]
swedish = ["lingua-swedish-language-model"]
tagalog = ["lingua-tagalog-language-model"]
tamil = ["lingua-tamil-language-model"]
telugu = ["lingua-telugu-language-model"]
thai = ["lingua-thai-language-model"]
tsonga = ["lingua-tsonga-language-model"]
tswana = ["lingua-tswana-language-model"]
turkish = ["lingua-turkish-language-model"]
ukrainian = ["lingua-ukrainian-language-model"]
urdu = ["lingua-urdu-language-model"]
vietnamese = ["lingua-vietnamese-language-model"]
welsh = ["lingua-welsh-language-model"]
xhosa = ["lingua-xhosa-language-model"]
yoruba = ["lingua-yoruba-language-model"]
zulu = ["lingua-zulu-language-model"]