1#![no_std]
30
31extern crate alloc;
32
33use alloc::collections::BTreeMap;
34use core::{
35 ops::{Add, AddAssign},
36 str::from_utf8_unchecked,
37};
38
39#[derive(Debug, Clone, Default, Eq, PartialEq)]
40pub struct WordsCount {
41 pub words: usize,
42 pub characters: usize,
43 pub whitespaces: usize,
44 pub cjk: usize,
45}
46
47pub const NEWLINE: WordsCount =
63 WordsCount {
64 words: 0, characters: 1, whitespaces: 1, cjk: 0
65 };
66
67impl AddAssign for WordsCount {
68 #[inline]
69 fn add_assign(&mut self, other: Self) {
70 *self = Self {
71 words: self.words + other.words,
72 characters: self.characters + other.characters,
73 whitespaces: self.whitespaces + other.whitespaces,
74 cjk: self.cjk + other.cjk,
75 }
76 }
77}
78
79impl Add for WordsCount {
80 type Output = Self;
81
82 #[inline]
83 fn add(mut self, other: Self) -> Self {
84 self += other;
85 self
86 }
87}
88
89pub fn count<S: AsRef<str>>(s: S) -> WordsCount {
91 let mut in_word = false;
92 let mut consecutive_dashes = 0usize;
93
94 let mut count = WordsCount::default();
95
96 for c in s.as_ref().chars() {
97 count.characters += 1;
98
99 if c.is_whitespace() {
100 consecutive_dashes = 0;
101
102 count.whitespaces += 1;
103
104 if in_word {
105 count.words += 1;
106
107 in_word = false;
108 }
109 } else {
110 match c {
111 '-' => {
112 consecutive_dashes += 1;
113
114 if consecutive_dashes > 1 && in_word {
115 if consecutive_dashes == 2 {
116 count.words += 1;
117 }
118
119 in_word = false;
120
121 continue;
122 }
123 },
124 _ => {
125 consecutive_dashes = 0;
126
127 if unicode_blocks::is_cjk(c) {
128 count.words += 1;
129 count.cjk += 1;
130
131 if in_word {
132 count.words += 1;
133
134 in_word = false;
135 }
136
137 continue;
138 }
139 },
140 }
141
142 if !in_word {
143 in_word = true;
144 }
145 }
146 }
147
148 if in_word {
149 count.words += 1;
150 }
151
152 count
153}
154
155pub fn count_separately<S: ?Sized + AsRef<str>>(s: &S) -> BTreeMap<&str, usize> {
157 let mut in_word = false;
158 let mut consecutive_dashes = 0usize;
159
160 let mut count = BTreeMap::new();
161
162 let mut p = 0;
163 let mut pp = 0;
164
165 let s = s.as_ref();
166 let bytes = s.as_bytes();
167
168 for c in s.chars() {
169 let cl = c.len_utf8();
170
171 if c.is_whitespace() {
172 if in_word {
173 inc_or_insert(&mut count, unsafe { from_utf8_unchecked(&bytes[p..pp]) });
174
175 in_word = false;
176 }
177
178 p = pp + cl;
179
180 consecutive_dashes = 0;
181 } else {
182 match c {
183 '-' => {
184 consecutive_dashes += 1;
185
186 if consecutive_dashes > 1 {
187 if in_word {
188 if consecutive_dashes == 2 {
189 inc_or_insert(&mut count, unsafe {
190 from_utf8_unchecked(&bytes[p..(pp - 1)])
191 });
192 }
193
194 in_word = false;
195
196 pp += cl;
197 p = pp;
198 continue;
199 } else {
200 p = pp + cl;
201 }
202 }
203 },
204 _ => {
205 if unicode_blocks::is_cjk(c) {
206 inc_or_insert(&mut count, unsafe {
207 from_utf8_unchecked(&bytes[pp..(pp + cl)])
208 });
209
210 if in_word {
211 inc_or_insert(&mut count, unsafe {
212 from_utf8_unchecked(&bytes[p..pp])
213 });
214
215 in_word = false;
216 }
217
218 consecutive_dashes = 0;
219 pp += cl;
220 p = pp;
221 continue;
222 }
223
224 consecutive_dashes = 0;
225 },
226 }
227
228 if !in_word {
229 in_word = true;
230 }
231 }
232
233 pp += cl;
234 }
235
236 if in_word {
237 inc_or_insert(&mut count, unsafe { from_utf8_unchecked(&bytes[p..pp]) });
238 }
239
240 count
241}
242
243#[inline]
244fn inc_or_insert<'a>(map: &mut BTreeMap<&'a str, usize>, s: &'a str) {
245 if let Some(count) = map.get_mut(s) {
246 *count += 1;
247 } else {
248 map.insert(s, 1);
249 }
250}