stop_words/
lib.rs

1#![warn(clippy::all)]
2#![warn(missing_docs)]
3#![warn(clippy::missing_docs_in_private_items)]
4#![doc = include_str!("../README.md")]
5
6mod language_names;
7pub use language_names::LANGUAGE;
8
9/// This function fetches stop words for a language using either a member of the `LANGUAGE` enum,
10/// or a two-character ISO language code as either a `str` or a `String` type. Please note that
11/// constructed languages use either a member of the `LANGUAGE` enum, or a __three__-character ISO
12/// language code as either a `str` or a `String` type
13/// ```ignore
14/// let first_list = stop_words::get("ar");
15/// let second_list = stop_words::get(stop_words::LANGUAGE::Arabic);
16/// assert_eq!(first_list, second_list)
17/// ```
18pub fn get<T: Into<String>>(input_language: T) -> Vec<String> {
19    // Check the input
20    let language_name_as_string = input_language.into();
21
22    // Get the bytes
23    let json_as_bytes: &[u8] = if cfg!(feature = "nltk") {
24        include_bytes!(concat!(env!("OUT_DIR"), "/stopwords-nltk.json"))
25    } else if cfg!(feature = "constructed") {
26        include_bytes!(concat!(env!("OUT_DIR"), "/stopwords-constructed.json"))
27    } else {
28        include_bytes!("iso/stopwords-iso.json")
29    };
30
31    // Get the JSON
32    let mut json: serde_json::Value = serde_json::from_slice(json_as_bytes)
33        .expect("Could not read JSON file from Stopwords ISO.");
34
35    // Get the words
36    json.get_mut(&language_name_as_string)
37        .take()
38        .unwrap_or_else(|| panic!("The '{language_name_as_string}' language is not recognized. Please check the documentation for a supported list of languages."))
39        .as_array_mut()
40        .expect("The referenced value is not a mutable array.")
41        .iter_mut()
42        .map(|x| {
43            let x = x.take();
44            if let serde_json::Value::String(s) = x {
45                s
46            } else {
47                panic!("The referenced value is not a string.")
48            }
49        })
50        .collect()
51}