quickner/
utils.rs

1// quickner
2//
3// NER tool for quick and simple NER annotation
4// Copyright (C) 2023, Omar MHAIMDAT
5//
6// Licensed under Mozilla Public License 2.0
7//
8use std::{collections::HashSet, str};
9
10use indicatif::{ProgressBar, ProgressStyle};
11
12/// Checks if a string is alphanumeric.
13/// # Examples
14/// ```
15/// use utils::is_alphanumeric;
16/// let text = "Hello, world!";
17/// assert_eq!(is_alphanumeric(text), true);
18/// ```
19pub(crate) fn is_alphanumeric(text: &str) -> bool {
20    if text.is_empty() {
21        return false;
22    }
23    text.chars().all(|c| c.is_alphanumeric())
24}
25
26/// Checks if a string contains punctuation.
27/// # Examples
28/// ```
29/// use utils::contains_punctuation;
30/// let text = "Hello, world!";
31/// assert_eq!(contains_punctuation(text), true);
32/// ```
33pub(crate) fn contains_punctuation(text: &str) -> bool {
34    if text.is_empty() {
35        return false;
36    }
37    text.chars().any(|c| c.is_ascii_punctuation())
38}
39
40/// Checks if a string contains numbers.
41/// # Examples
42/// ```
43/// use utils::contains_numbers;
44/// let text_without = "Hello, world!";
45/// assert_eq!(contains_numbers(text), false);
46/// let text_with = "Hello, 123!";
47/// assert_eq!(contains_numbers(text), true);
48/// ```
49/// # Panics
50/// Panics if the string contains non-ASCII characters.
51/// # Errors
52/// Returns an error if the string contains non-ASCII characters.
53pub(crate) fn contains_numbers(text: &str) -> bool {
54    if text.is_empty() {
55        return false;
56    }
57    text.chars().any(|c| c.is_ascii_digit())
58}
59
60/// Checks if a string contains special characters.
61/// # Examples
62/// ```
63/// use utils::contains_special_characters;
64/// let text_without = "Hello, world!";
65/// assert_eq!(contains_special_characters(text), false);
66/// let text_with = "Hello, world@!";
67/// assert_eq!(contains_special_characters(text), true);
68/// ```
69/// # Panics
70/// Panics if the string contains non-ASCII characters.
71/// # Errors
72/// Returns an error if the string contains non-ASCII characters.
73pub(crate) fn contains_special_characters(text: &str, special_characters: HashSet<char>) -> bool {
74    if text.is_empty() {
75        return false;
76    }
77    text.chars().any(|c| special_characters.contains(&c))
78}
79
80/// Get a progress bar with a custom style.
81/// # Examples
82/// ```
83/// use utils::get_progress_bar;
84/// let progress_bar = get_progress_bar(100);
85/// ```
86pub(crate) fn get_progress_bar(total: u64) -> ProgressBar {
87    let progress_bar = ProgressBar::new(total);
88
89    progress_bar.set_style(ProgressStyle::with_template("{spinner:.green} [{elapsed_precise}] [{wide_bar:.green/blue}] {human_pos}/{human_len} ({eta})")
90        .unwrap()
91        .progress_chars("##-"));
92    progress_bar
93}
94
95pub fn hash_string(text: &str) -> String {
96    use std::collections::hash_map::DefaultHasher;
97    use std::hash::{Hash, Hasher};
98
99    let mut hasher = DefaultHasher::new();
100    text.hash(&mut hasher);
101    format!("{:x}", hasher.finish())
102}
103
104pub(crate) fn is_valid_utf8(text: &str) -> bool {
105    let b = text.as_bytes();
106    if str::from_utf8(b).is_err() {
107        return false;
108    } else {
109        return true;
110    }
111}
112
113pub(crate) fn char_to_byte(text: String, start: usize, end: usize) -> (usize, usize) {
114    let start = text.char_indices().nth(start);
115    let end = text.char_indices().nth(end);
116    let start = match start {
117        Some(start) => start.0,
118        None => 0,
119    };
120    let end = match end {
121        Some(end) => end.0,
122        None => text.len(),
123    };
124    (start, end)
125}