voca_rs/
count.rs

1//! Counts the characters in `subject`.
2
3/// Counts the characters in `subject`.
4///
5/// # Arguments
6///
7/// * `subject` - The string to count characters.
8///
9/// # Example
10///
11/// ```
12/// use voca_rs::*;
13/// count::count("rain");
14/// // => 4
15/// count::count("błąd");
16/// // => 4
17/// use voca_rs::Voca;
18/// "rain"._count();
19/// // => 4
20/// ```
21pub fn count(subject: &str) -> usize {
22    match subject.len() {
23        0 => 0,
24        _ => crate::split::chars(subject).len(),
25    }
26}
27
28/// Counts the graphemes in `subject` taking care of surrogate pairs and combining marks.
29///
30/// # Arguments
31///
32/// * `subject` - The string to count graphemes.
33///
34/// # Example
35///
36/// ```
37/// use voca_rs::*;
38/// count::count_graphemes("cafe\u{0301}"); // or "café"
39/// // => 4
40/// count::count_graphemes("b\u{0142}\u{0105}d"); // or "błąd"
41/// // => 4
42/// count::count_graphemes("a̐éö̲");
43/// // => 3
44/// count::count_graphemes("rain");
45/// // => 4
46/// use voca_rs::Voca;
47/// "cafe\u{0301}"._count_graphemes(); // or "café"
48/// // => 4
49/// ```
50pub fn count_graphemes(subject: &str) -> usize {
51    match subject.len() {
52        0 => 0,
53        _ => crate::split::graphemes(subject).len(),
54    }
55}
56
57/// Counts the number of `substring` appearances in `subject`.
58///
59/// # Arguments
60///
61/// * `subject` - The string where to count.
62/// * `substring` - The substring to be counted.
63///
64/// # Example
65///
66/// ```
67/// use voca_rs::*;
68/// count::count_substrings("bad boys, bad boys whatcha gonna do?", "boys");
69/// // => 2
70/// count::count_substrings("Cafe\u{0301} del Mar", "Café"); // or "Café del Mar"
71/// // => 1
72/// count::count_substrings("every dog has its day", "cat");
73/// // => 0
74/// use voca_rs::Voca;
75/// "bad boys, bad boys whatcha gonna do?"._count_substrings("boys");
76/// // => 2
77/// ```
78pub fn count_substrings(subject: &str, substring: &str) -> usize {
79    fn match_substring(subject: &str, substring: &str) -> usize {
80        match substring.len() {
81            0 => 0,
82            _ => subject.matches(substring).count(),
83        }
84    }
85
86    match subject.len() {
87        0 => 0,
88        _ => match_substring(subject, substring),
89    }
90}
91
92/// Counts the characters in `subject` for which `predicate` returns true. This function respects unicode.
93///
94/// # Arguments
95///
96/// * `subject` - The string to count characters.
97/// * `predicate` - The predicate function invoked on each character with a parameter `(string)`.
98///
99/// # Example
100///
101/// ```
102/// use voca_rs::*;
103/// count::count_where("hola!", voca_rs::query::is_alpha);
104/// // => 4
105/// count::count_where("2022", |s: &str| -> bool { s == "2" });
106/// // => 3
107/// use voca_rs::Voca;
108/// "hola!"._count_where(voca_rs::query::is_alpha);
109/// // => 4
110/// ```
111pub fn count_where(subject: &str, f: fn(&str) -> bool) -> usize {
112    match subject.len() {
113        0 => 0,
114        _ => {
115            let mut res = 0;
116            for c in crate::split::graphemes(subject).iter() {
117                if f(c) {
118                    res += 1;
119                }
120            }
121            res
122        }
123    }
124}
125
126/// Counts the number of words in `subject`.
127///
128/// # Arguments
129///
130/// * `subject` - The string where to count.
131/// * `pattern` - The pattern to watch words.
132///
133/// # Example
134///
135/// ```
136/// use voca_rs::*;
137/// count::count_words("Gravity - can cross dimensions!", "");
138/// // => 4
139/// count::count_words("GravityCanCrossDimensions", "");
140/// // => 4
141/// count::count_words("Cafe\u{0301}-del-Mar-andBossaNova1", "-");
142/// // => 4
143/// use voca_rs::Voca;
144/// "Gravity - can cross dimensions!"._count_words("");
145/// // => 4
146/// ```
147pub fn count_words(subject: &str, pattern: &str) -> usize {
148    fn match_substring(subject: &str, pattern: &str) -> usize {
149        match pattern.len() {
150            0 => crate::split::words(subject).len(),
151            _ => subject
152                .split_terminator(pattern)
153                .count(),
154        }
155    }
156    match subject.len() {
157        0 => 0,
158        _ => match_substring(subject, pattern),
159    }
160}
161
162use std::collections::HashMap;
163/// Counting occurrences of unique words in `subject`. This function respects unicode.
164///
165/// # Arguments
166///
167/// * `subject` - The string to count characters.
168/// * `pattern` - The pattern to watch words.
169///
170/// # Example
171///
172/// ```
173/// use voca_rs::*;
174/// count::count_unique_words("hello world wonderful world", "");
175/// // => 3
176/// count::count_unique_words("Arabic: أنا قادر على أكل الزجاج و هذا لا يؤلمني. أنا قادر على أكل الزجاج و.", "");
177/// // => ???????????????????????????????????
178/// use voca_rs::Voca;
179/// "Hebrew: אני יכול לאכול זכוכית וזה לא מזיק לי. אני יכול לאכול זכוכית."._count_words("");
180/// // => 9
181/// ```
182pub fn count_unique_words(subject: &str, pattern: &str) -> usize {
183    let mut unique_words = HashMap::new();
184    let words = match pattern.len() {
185        0 => crate::split::words(subject),
186        _ => subject.split_terminator(pattern).collect::<Vec<_>>(),
187    };
188    if words.is_empty() {
189        return 0;
190    };
191
192    for word in words {
193        unique_words.entry(word).or_insert(0);
194    }
195    unique_words.len()
196}