simple_word_count/
lib.rs

1//! a simple word count function, try to get same result with **Microsoft Office Word** application.
2//!
3//! Not guarantee same result on special characters. Such like Emoji "😘😦😒" is count `1` in **Microsoft Office Word**, but `3` by [word_count].
4//!
5//! # Examples
6//!
7//! ``` rust
8//! use simple_word_count::word_count;
9//!
10//! fn main() {
11//!     assert_eq!(word_count("helloworld"), 1);
12//!     assert_eq!(word_count("hello world"), 2);
13//!     assert_eq!(word_count("hello, world"), 2);
14//!     assert_eq!(word_count("h e l l o	w o r l d"), 10);
15//!     assert_eq!(word_count("hi......"), 1);
16//!     assert_eq!(word_count("hello, world | 你好,世界"), 8);
17//!     assert_eq!(word_count("你好世界"), 4);
18//!     assert_eq!(word_count("你好,世界"), 5);
19//!     assert_eq!(word_count("你 好 世 界"), 4);
20//!     assert_eq!(word_count("你好。。。。"), 6);
21//!     assert_eq!(word_count("a=b+c-d"), 1);
22//!     assert_eq!(word_count("a = b + c - d"), 7);
23//!     assert_eq!(word_count("123"), 1);
24//!     assert_eq!(word_count("123.456"), 1);
25//!     assert_eq!(word_count("123..456"), 1);
26//!     assert_eq!(word_count("123.456."), 1);
27//!     assert_eq!(word_count("123 456"), 2);
28//!     assert_eq!(word_count("1+1=2"), 1);
29//!     assert_eq!(word_count("1 + 1 = 2"), 5);
30//!     assert_eq!(word_count("&&%%$$¥¥"), 3);
31//!     assert_eq!(word_count("<>《》"), 3);
32//! }
33//! ```
34
35/// Count the number of words in the given text.
36///
37/// # Examples
38///
39/// ``` rust
40/// use simple_word_count::word_count;
41///
42/// fn main() {
43///     assert_eq!(word_count("helloworld"), 1);
44///     assert_eq!(word_count("hello world"), 2);
45///     assert_eq!(word_count("hello, world"), 2);
46///     assert_eq!(word_count("h e l l o	w o r l d"), 10);
47///     assert_eq!(word_count("hi......"), 1);
48///     assert_eq!(word_count("hello, world | 你好,世界"), 8);
49///     assert_eq!(word_count("你好世界"), 4);
50///     assert_eq!(word_count("你好,世界"), 5);
51///     assert_eq!(word_count("你 好 世 界"), 4);
52///     assert_eq!(word_count("你好。。。。"), 6);
53///     assert_eq!(word_count("a=b+c-d"), 1);
54///     assert_eq!(word_count("a = b + c - d"), 7);
55///     assert_eq!(word_count("123"), 1);
56///     assert_eq!(word_count("123.456"), 1);
57///     assert_eq!(word_count("123..456"), 1);
58///     assert_eq!(word_count("123.456."), 1);
59///     assert_eq!(word_count("123 456"), 2);
60///     assert_eq!(word_count("1+1=2"), 1);
61///     assert_eq!(word_count("1 + 1 = 2"), 5);
62///     assert_eq!(word_count("(╯°□°)╯︵ ┻━┻"), 11);
63///     assert_eq!(word_count("(●'◡'●)"), 7);
64///     assert_eq!(word_count("( ´・・)ノ(._.`)"), 7);
65///     assert_eq!(word_count("&&%%$$¥¥"), 3);
66///     assert_eq!(word_count("EXH.C-◇"), 2);
67///     assert_eq!(word_count("<>《》"), 3);
68/// }
69/// ```
70
71pub fn word_count(text: &str) -> usize {
72    let mut is_ascii = false;
73    let mut count = 0;
74    for char in text.chars() {
75        if char.is_whitespace() {
76            is_ascii = false;
77        } else if char.is_ascii() {
78            if !is_ascii {
79                count += 1;
80                is_ascii = true;
81            }
82        } else {
83            is_ascii = false;
84            count += 1;
85        }
86    }
87    count
88}
89
90#[cfg(test)]
91mod tests {
92    use super::word_count;
93
94    #[test]
95    fn number() {
96        assert_eq!(word_count("123"), 1);
97        assert_eq!(word_count("123.456"), 1);
98        assert_eq!(word_count("123..456"), 1);
99        assert_eq!(word_count("123.456."), 1);
100        assert_eq!(word_count("123 456"), 2);
101        assert_eq!(word_count("1+1=2"), 1);
102        assert_eq!(word_count("1 + 1 = 2"), 5);
103    }
104
105    #[test]
106    fn english() {
107        assert_eq!(word_count("helloworld"), 1);
108        assert_eq!(word_count("hello world"), 2);
109        assert_eq!(word_count("hello, world"), 2);
110        assert_eq!(word_count("hello, world."), 2);
111        assert_eq!(word_count("h e l l o	w o r l d"), 10);
112        assert_eq!(word_count("a = b + c - d"), 7);
113        assert_eq!(word_count("a=b+c-d"), 1);
114        assert_eq!(word_count("hi......"), 1);
115        assert_eq!(word_count("......"), 1);
116    }
117
118    #[test]
119    fn chinese() {
120        assert_eq!(word_count("你好世界"), 4);
121        assert_eq!(word_count("你好,世界"), 5);
122        assert_eq!(word_count("你 好 世 界"), 4);
123        assert_eq!(word_count("。。。。"), 4);
124    }
125
126    #[test]
127    fn punctuation() {
128        assert_eq!(word_count(":::"), 1);
129        assert_eq!(word_count(":::"), 3);
130        assert_eq!(word_count("'''"), 1);
131        assert_eq!(word_count("‘’‘"), 3);
132        assert_eq!(word_count(";;;"), 1);
133        assert_eq!(word_count(";;;"), 3);
134        assert_eq!(word_count("()()"), 3);
135        assert_eq!(word_count("<>《》"), 3);
136    }
137
138    #[test]
139    fn others() {
140        assert_eq!(word_count("hello, world | 你好,世界"), 8);
141        assert_eq!(word_count("(╯°□°)╯︵ ┻━┻"), 11);
142        assert_eq!(word_count("(●'◡'●)"), 7);
143        assert_eq!(word_count("( ´・・)ノ(._.`)"), 7);
144        assert_eq!(word_count("&&%%$$¥¥"), 3);
145        assert_eq!(word_count("EXH.C-◇"), 2);
146    }
147}