simple_word_count/lib.rs
1//! a simple word count function, try to get same result with **Microsoft Office Word** application.
2//!
3//! Not guarantee same result on special characters. Such like Emoji "😘😦😒" is count `1` in **Microsoft Office Word**, but `3` by [word_count].
4//!
5//! # Examples
6//!
7//! ``` rust
8//! use simple_word_count::word_count;
9//!
10//! fn main() {
11//! assert_eq!(word_count("helloworld"), 1);
12//! assert_eq!(word_count("hello world"), 2);
13//! assert_eq!(word_count("hello, world"), 2);
14//! assert_eq!(word_count("h e l l o w o r l d"), 10);
15//! assert_eq!(word_count("hi......"), 1);
16//! assert_eq!(word_count("hello, world | 你好,世界"), 8);
17//! assert_eq!(word_count("你好世界"), 4);
18//! assert_eq!(word_count("你好,世界"), 5);
19//! assert_eq!(word_count("你 好 世 界"), 4);
20//! assert_eq!(word_count("你好。。。。"), 6);
21//! assert_eq!(word_count("a=b+c-d"), 1);
22//! assert_eq!(word_count("a = b + c - d"), 7);
23//! assert_eq!(word_count("123"), 1);
24//! assert_eq!(word_count("123.456"), 1);
25//! assert_eq!(word_count("123..456"), 1);
26//! assert_eq!(word_count("123.456."), 1);
27//! assert_eq!(word_count("123 456"), 2);
28//! assert_eq!(word_count("1+1=2"), 1);
29//! assert_eq!(word_count("1 + 1 = 2"), 5);
30//! assert_eq!(word_count("&&%%$$¥¥"), 3);
31//! assert_eq!(word_count("<>《》"), 3);
32//! }
33//! ```
34
35/// Count the number of words in the given text.
36///
37/// # Examples
38///
39/// ``` rust
40/// use simple_word_count::word_count;
41///
42/// fn main() {
43/// assert_eq!(word_count("helloworld"), 1);
44/// assert_eq!(word_count("hello world"), 2);
45/// assert_eq!(word_count("hello, world"), 2);
46/// assert_eq!(word_count("h e l l o w o r l d"), 10);
47/// assert_eq!(word_count("hi......"), 1);
48/// assert_eq!(word_count("hello, world | 你好,世界"), 8);
49/// assert_eq!(word_count("你好世界"), 4);
50/// assert_eq!(word_count("你好,世界"), 5);
51/// assert_eq!(word_count("你 好 世 界"), 4);
52/// assert_eq!(word_count("你好。。。。"), 6);
53/// assert_eq!(word_count("a=b+c-d"), 1);
54/// assert_eq!(word_count("a = b + c - d"), 7);
55/// assert_eq!(word_count("123"), 1);
56/// assert_eq!(word_count("123.456"), 1);
57/// assert_eq!(word_count("123..456"), 1);
58/// assert_eq!(word_count("123.456."), 1);
59/// assert_eq!(word_count("123 456"), 2);
60/// assert_eq!(word_count("1+1=2"), 1);
61/// assert_eq!(word_count("1 + 1 = 2"), 5);
62/// assert_eq!(word_count("(╯°□°)╯︵ ┻━┻"), 11);
63/// assert_eq!(word_count("(●'◡'●)"), 7);
64/// assert_eq!(word_count("( ´・・)ノ(._.`)"), 7);
65/// assert_eq!(word_count("&&%%$$¥¥"), 3);
66/// assert_eq!(word_count("EXH.C-◇"), 2);
67/// assert_eq!(word_count("<>《》"), 3);
68/// }
69/// ```
70
71pub fn word_count(text: &str) -> usize {
72 let mut is_ascii = false;
73 let mut count = 0;
74 for char in text.chars() {
75 if char.is_whitespace() {
76 is_ascii = false;
77 } else if char.is_ascii() {
78 if !is_ascii {
79 count += 1;
80 is_ascii = true;
81 }
82 } else {
83 is_ascii = false;
84 count += 1;
85 }
86 }
87 count
88}
89
90#[cfg(test)]
91mod tests {
92 use super::word_count;
93
94 #[test]
95 fn number() {
96 assert_eq!(word_count("123"), 1);
97 assert_eq!(word_count("123.456"), 1);
98 assert_eq!(word_count("123..456"), 1);
99 assert_eq!(word_count("123.456."), 1);
100 assert_eq!(word_count("123 456"), 2);
101 assert_eq!(word_count("1+1=2"), 1);
102 assert_eq!(word_count("1 + 1 = 2"), 5);
103 }
104
105 #[test]
106 fn english() {
107 assert_eq!(word_count("helloworld"), 1);
108 assert_eq!(word_count("hello world"), 2);
109 assert_eq!(word_count("hello, world"), 2);
110 assert_eq!(word_count("hello, world."), 2);
111 assert_eq!(word_count("h e l l o w o r l d"), 10);
112 assert_eq!(word_count("a = b + c - d"), 7);
113 assert_eq!(word_count("a=b+c-d"), 1);
114 assert_eq!(word_count("hi......"), 1);
115 assert_eq!(word_count("......"), 1);
116 }
117
118 #[test]
119 fn chinese() {
120 assert_eq!(word_count("你好世界"), 4);
121 assert_eq!(word_count("你好,世界"), 5);
122 assert_eq!(word_count("你 好 世 界"), 4);
123 assert_eq!(word_count("。。。。"), 4);
124 }
125
126 #[test]
127 fn punctuation() {
128 assert_eq!(word_count(":::"), 1);
129 assert_eq!(word_count(":::"), 3);
130 assert_eq!(word_count("'''"), 1);
131 assert_eq!(word_count("‘’‘"), 3);
132 assert_eq!(word_count(";;;"), 1);
133 assert_eq!(word_count(";;;"), 3);
134 assert_eq!(word_count("()()"), 3);
135 assert_eq!(word_count("<>《》"), 3);
136 }
137
138 #[test]
139 fn others() {
140 assert_eq!(word_count("hello, world | 你好,世界"), 8);
141 assert_eq!(word_count("(╯°□°)╯︵ ┻━┻"), 11);
142 assert_eq!(word_count("(●'◡'●)"), 7);
143 assert_eq!(word_count("( ´・・)ノ(._.`)"), 7);
144 assert_eq!(word_count("&&%%$$¥¥"), 3);
145 assert_eq!(word_count("EXH.C-◇"), 2);
146 }
147}