1use memchr::memchr_iter;
2use rayon::prelude::*;
3
4const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
7
8#[derive(Debug, Clone, Default, PartialEq, Eq)]
10pub struct WcCounts {
11 pub lines: u64,
12 pub words: u64,
13 pub bytes: u64,
14 pub chars: u64,
15 pub max_line_length: u64,
16}
17
18const fn make_ws_table_utf8() -> [u8; 256] {
26 let mut t = [0u8; 256];
27 let mut i = 0u16;
29 while i <= 0x1F {
30 t[i as usize] = 1;
31 i += 1;
32 }
33 t[0x20] = 1; t[0x7F] = 1; t[0xC0] = 1; t[0xC1] = 1; t[0xFE] = 1; t[0xFF] = 1; t
41}
42
43const fn make_ws_table_c() -> [u8; 256] {
48 let mut t = [1u8; 256]; let mut i = 0x21u16;
51 while i <= 0x7E {
52 t[i as usize] = 0;
53 i += 1;
54 }
55 t
56}
57
58const WS_TABLE_UTF8: [u8; 256] = make_ws_table_utf8();
60
61const WS_TABLE_C: [u8; 256] = make_ws_table_c();
63
64#[inline]
67pub fn ws_table(utf8: bool) -> &'static [u8; 256] {
68 if utf8 { &WS_TABLE_UTF8 } else { &WS_TABLE_C }
69}
70
71const fn make_printable_table() -> [u8; 256] {
73 let mut t = [0u8; 256];
74 let mut i = 0x20u16;
75 while i <= 0x7E {
76 t[i as usize] = 1;
77 i += 1;
78 }
79 t
80}
81
82const PRINTABLE_TABLE: [u8; 256] = make_printable_table();
83
84#[inline]
87pub fn count_lines(data: &[u8]) -> u64 {
88 memchr_iter(b'\n', data).count() as u64
89}
90
91#[inline]
93pub fn count_bytes(data: &[u8]) -> u64 {
94 data.len() as u64
95}
96
97pub fn count_words(data: &[u8]) -> u64 {
106 count_words_locale(data, true)
107}
108
109pub fn count_words_locale(data: &[u8], utf8: bool) -> u64 {
113 if utf8 {
114 count_words_utf8(data)
115 } else {
116 count_words_with_table(data, &WS_TABLE_C)
117 }
118}
119
120fn count_words_utf8(data: &[u8]) -> u64 {
127 let mut words = 0u64;
128 let mut in_word = false;
129 let mut i = 0;
130
131 while i < data.len() {
132 let b = data[i];
133
134 if b <= 0x20 || b == 0x7F {
135 in_word = false;
137 i += 1;
138 } else if b < 0x80 {
139 if !in_word {
141 in_word = true;
142 words += 1;
143 }
144 i += 1;
145 } else if b < 0xC2 {
146 in_word = false;
149 i += 1;
150 } else if b < 0xE0 {
151 if i + 1 < data.len() && (data[i + 1] & 0xC0) == 0x80 {
153 if !in_word {
154 in_word = true;
155 words += 1;
156 }
157 i += 2;
158 } else {
159 in_word = false;
160 i += 1;
161 }
162 } else if b < 0xF0 {
163 if i + 2 < data.len() && (data[i + 1] & 0xC0) == 0x80 && (data[i + 2] & 0xC0) == 0x80 {
165 if !in_word {
166 in_word = true;
167 words += 1;
168 }
169 i += 3;
170 } else {
171 in_word = false;
172 i += 1;
173 }
174 } else if b < 0xF5 {
175 if i + 3 < data.len()
177 && (data[i + 1] & 0xC0) == 0x80
178 && (data[i + 2] & 0xC0) == 0x80
179 && (data[i + 3] & 0xC0) == 0x80
180 {
181 if !in_word {
182 in_word = true;
183 words += 1;
184 }
185 i += 4;
186 } else {
187 in_word = false;
188 i += 1;
189 }
190 } else {
191 in_word = false;
193 i += 1;
194 }
195 }
196
197 words
198}
199
200fn count_words_with_table(data: &[u8], table: &[u8; 256]) -> u64 {
202 let mut words = 0u64;
203 let mut prev_ws_bit = 1u64;
204
205 let chunks = data.chunks_exact(64);
206 let remainder = chunks.remainder();
207
208 for chunk in chunks {
209 let mut ws_mask = 0u64;
210 let mut i = 0;
211 while i + 7 < 64 {
212 ws_mask |= (table[chunk[i] as usize] as u64) << i;
213 ws_mask |= (table[chunk[i + 1] as usize] as u64) << (i + 1);
214 ws_mask |= (table[chunk[i + 2] as usize] as u64) << (i + 2);
215 ws_mask |= (table[chunk[i + 3] as usize] as u64) << (i + 3);
216 ws_mask |= (table[chunk[i + 4] as usize] as u64) << (i + 4);
217 ws_mask |= (table[chunk[i + 5] as usize] as u64) << (i + 5);
218 ws_mask |= (table[chunk[i + 6] as usize] as u64) << (i + 6);
219 ws_mask |= (table[chunk[i + 7] as usize] as u64) << (i + 7);
220 i += 8;
221 }
222
223 let prev_mask = (ws_mask << 1) | prev_ws_bit;
224 let word_starts = prev_mask & !ws_mask;
225 words += word_starts.count_ones() as u64;
226 prev_ws_bit = (ws_mask >> 63) & 1;
227 }
228
229 let mut prev_ws = prev_ws_bit as u8;
230 for &b in remainder {
231 let curr_ws = table[b as usize];
232 words += (prev_ws & (curr_ws ^ 1)) as u64;
233 prev_ws = curr_ws;
234 }
235 words
236}
237
238#[cfg(target_arch = "x86_64")]
242#[target_feature(enable = "sse2")]
243#[allow(dead_code)]
244unsafe fn count_words_sse2(data: &[u8]) -> u64 {
245 use std::arch::x86_64::*;
246
247 unsafe {
248 let zero = _mm_setzero_si128();
252 let min_ws = _mm_set1_epi8(0x08); let max_ws = _mm_set1_epi8(0x0E); let space = _mm_set1_epi8(0x20);
255
256 let mut words = 0u64;
257 let mut prev_ws_bit = 1u64; let chunks = data.chunks_exact(64);
260 let remainder = chunks.remainder();
261
262 for chunk in chunks {
263 let ptr = chunk.as_ptr();
264
265 let v0 = _mm_loadu_si128(ptr as *const __m128i);
267 let v1 = _mm_loadu_si128(ptr.add(16) as *const __m128i);
268 let v2 = _mm_loadu_si128(ptr.add(32) as *const __m128i);
269 let v3 = _mm_loadu_si128(ptr.add(48) as *const __m128i);
270
271 macro_rules! detect_ws {
273 ($v:expr) => {{
274 let ge_9 = _mm_cmpgt_epi8($v, min_ws);
275 let le_d = _mm_cmpgt_epi8(max_ws, $v);
276 let in_range = _mm_and_si128(ge_9, le_d);
277 let is_sp = _mm_cmpeq_epi8($v, space);
278 let is_null = _mm_cmpeq_epi8($v, zero);
279 let ws = _mm_or_si128(in_range, is_sp);
280 _mm_or_si128(ws, is_null)
281 }};
282 }
283
284 let ws0 = detect_ws!(v0);
285 let ws1 = detect_ws!(v1);
286 let ws2 = detect_ws!(v2);
287 let ws3 = detect_ws!(v3);
288
289 let m0 = (_mm_movemask_epi8(ws0) as u16) as u64;
291 let m1 = (_mm_movemask_epi8(ws1) as u16) as u64;
292 let m2 = (_mm_movemask_epi8(ws2) as u16) as u64;
293 let m3 = (_mm_movemask_epi8(ws3) as u16) as u64;
294 let ws_mask = m0 | (m1 << 16) | (m2 << 32) | (m3 << 48);
295
296 let prev_mask = (ws_mask << 1) | prev_ws_bit;
298 let word_starts = prev_mask & !ws_mask;
299 words += word_starts.count_ones() as u64;
300
301 prev_ws_bit = (ws_mask >> 63) & 1;
302 }
303
304 let sub_chunks = remainder.chunks_exact(16);
306 let sub_remainder = sub_chunks.remainder();
307 let mut prev_ws_u32 = prev_ws_bit as u32;
308
309 for chunk in sub_chunks {
310 let v = _mm_loadu_si128(chunk.as_ptr() as *const __m128i);
311 let ge_9 = _mm_cmpgt_epi8(v, min_ws);
312 let le_d = _mm_cmpgt_epi8(max_ws, v);
313 let in_range = _mm_and_si128(ge_9, le_d);
314 let is_sp = _mm_cmpeq_epi8(v, space);
315 let is_null = _mm_cmpeq_epi8(v, zero);
316 let ws_vec = _mm_or_si128(_mm_or_si128(in_range, is_sp), is_null);
317 let ws_mask = _mm_movemask_epi8(ws_vec) as u32;
318
319 let prev_mask = (ws_mask << 1) | prev_ws_u32;
320 let word_starts = prev_mask & (!ws_mask & 0xFFFF);
321 words += word_starts.count_ones() as u64;
322 prev_ws_u32 = (ws_mask >> 15) & 1;
323 }
324
325 let mut prev_ws = prev_ws_u32 as u8;
327 for &b in sub_remainder {
328 let curr_ws = WS_TABLE_UTF8[b as usize];
329 words += (prev_ws & (curr_ws ^ 1)) as u64;
330 prev_ws = curr_ws;
331 }
332 words
333 }
334}
335
336pub fn count_lines_words(data: &[u8], utf8: bool) -> (u64, u64) {
338 let table = ws_table(utf8);
339 let mut words = 0u64;
340 let mut lines = 0u64;
341 let mut prev_ws_bit = 1u64;
342
343 let chunks = data.chunks_exact(64);
344 let remainder = chunks.remainder();
345
346 for chunk in chunks {
347 let mut ws_mask = 0u64;
348 let mut nl_mask = 0u64;
349 let mut i = 0;
350 while i + 7 < 64 {
351 let b0 = chunk[i];
352 let b1 = chunk[i + 1];
353 let b2 = chunk[i + 2];
354 let b3 = chunk[i + 3];
355 let b4 = chunk[i + 4];
356 let b5 = chunk[i + 5];
357 let b6 = chunk[i + 6];
358 let b7 = chunk[i + 7];
359 ws_mask |= (table[b0 as usize] as u64) << i;
360 ws_mask |= (table[b1 as usize] as u64) << (i + 1);
361 ws_mask |= (table[b2 as usize] as u64) << (i + 2);
362 ws_mask |= (table[b3 as usize] as u64) << (i + 3);
363 ws_mask |= (table[b4 as usize] as u64) << (i + 4);
364 ws_mask |= (table[b5 as usize] as u64) << (i + 5);
365 ws_mask |= (table[b6 as usize] as u64) << (i + 6);
366 ws_mask |= (table[b7 as usize] as u64) << (i + 7);
367 nl_mask |= ((b0 == b'\n') as u64) << i;
368 nl_mask |= ((b1 == b'\n') as u64) << (i + 1);
369 nl_mask |= ((b2 == b'\n') as u64) << (i + 2);
370 nl_mask |= ((b3 == b'\n') as u64) << (i + 3);
371 nl_mask |= ((b4 == b'\n') as u64) << (i + 4);
372 nl_mask |= ((b5 == b'\n') as u64) << (i + 5);
373 nl_mask |= ((b6 == b'\n') as u64) << (i + 6);
374 nl_mask |= ((b7 == b'\n') as u64) << (i + 7);
375 i += 8;
376 }
377
378 let prev_mask = (ws_mask << 1) | prev_ws_bit;
379 let word_starts = prev_mask & !ws_mask;
380 words += word_starts.count_ones() as u64;
381 lines += nl_mask.count_ones() as u64;
382 prev_ws_bit = (ws_mask >> 63) & 1;
383 }
384
385 let mut prev_ws = prev_ws_bit as u8;
386 for &b in remainder {
387 if b == b'\n' {
388 lines += 1;
389 }
390 let curr_ws = table[b as usize];
391 words += (prev_ws & (curr_ws ^ 1)) as u64;
392 prev_ws = curr_ws;
393 }
394 (lines, words)
395}
396
397pub fn count_lines_words_chars(data: &[u8], utf8: bool) -> (u64, u64, u64) {
399 let table = ws_table(utf8);
400 let mut words = 0u64;
401 let mut lines = 0u64;
402 let mut chars = 0u64;
403 let mut prev_ws_bit = 1u64;
404
405 let chunks = data.chunks_exact(64);
406 let remainder = chunks.remainder();
407
408 for chunk in chunks {
409 let mut ws_mask = 0u64;
410 let mut nl_mask = 0u64;
411 let mut char_mask = 0u64;
412 let mut i = 0;
413 while i + 7 < 64 {
414 let b0 = chunk[i];
415 let b1 = chunk[i + 1];
416 let b2 = chunk[i + 2];
417 let b3 = chunk[i + 3];
418 let b4 = chunk[i + 4];
419 let b5 = chunk[i + 5];
420 let b6 = chunk[i + 6];
421 let b7 = chunk[i + 7];
422
423 ws_mask |= (table[b0 as usize] as u64) << i
424 | (table[b1 as usize] as u64) << (i + 1)
425 | (table[b2 as usize] as u64) << (i + 2)
426 | (table[b3 as usize] as u64) << (i + 3)
427 | (table[b4 as usize] as u64) << (i + 4)
428 | (table[b5 as usize] as u64) << (i + 5)
429 | (table[b6 as usize] as u64) << (i + 6)
430 | (table[b7 as usize] as u64) << (i + 7);
431
432 nl_mask |= ((b0 == b'\n') as u64) << i
433 | ((b1 == b'\n') as u64) << (i + 1)
434 | ((b2 == b'\n') as u64) << (i + 2)
435 | ((b3 == b'\n') as u64) << (i + 3)
436 | ((b4 == b'\n') as u64) << (i + 4)
437 | ((b5 == b'\n') as u64) << (i + 5)
438 | ((b6 == b'\n') as u64) << (i + 6)
439 | ((b7 == b'\n') as u64) << (i + 7);
440
441 if utf8 {
442 char_mask |= (((b0 & 0xC0) != 0x80) as u64) << i
443 | (((b1 & 0xC0) != 0x80) as u64) << (i + 1)
444 | (((b2 & 0xC0) != 0x80) as u64) << (i + 2)
445 | (((b3 & 0xC0) != 0x80) as u64) << (i + 3)
446 | (((b4 & 0xC0) != 0x80) as u64) << (i + 4)
447 | (((b5 & 0xC0) != 0x80) as u64) << (i + 5)
448 | (((b6 & 0xC0) != 0x80) as u64) << (i + 6)
449 | (((b7 & 0xC0) != 0x80) as u64) << (i + 7);
450 }
451
452 i += 8;
453 }
454 let prev_mask = (ws_mask << 1) | prev_ws_bit;
455 let word_starts = prev_mask & !ws_mask;
456 words += word_starts.count_ones() as u64;
457 lines += nl_mask.count_ones() as u64;
458 chars += char_mask.count_ones() as u64;
459 prev_ws_bit = (ws_mask >> 63) & 1;
460 }
461
462 let mut prev_ws = prev_ws_bit as u8;
463 for &b in remainder {
464 if b == b'\n' {
465 lines += 1;
466 }
467 let curr_ws = table[b as usize];
468 words += (prev_ws & (curr_ws ^ 1)) as u64;
469 prev_ws = curr_ws;
470 if utf8 {
471 chars += ((b & 0xC0) != 0x80) as u64;
472 }
473 }
474 if !utf8 {
475 chars = data.len() as u64;
476 }
477 (lines, words, chars)
478}
479
480pub fn count_chars_utf8(data: &[u8]) -> u64 {
486 let mut count = 0u64;
487 let chunks = data.chunks_exact(64);
488 let remainder = chunks.remainder();
489
490 for chunk in chunks {
491 let mut char_mask = 0u64;
493 let mut i = 0;
494 while i + 7 < 64 {
495 char_mask |= (((chunk[i] & 0xC0) != 0x80) as u64) << i;
496 char_mask |= (((chunk[i + 1] & 0xC0) != 0x80) as u64) << (i + 1);
497 char_mask |= (((chunk[i + 2] & 0xC0) != 0x80) as u64) << (i + 2);
498 char_mask |= (((chunk[i + 3] & 0xC0) != 0x80) as u64) << (i + 3);
499 char_mask |= (((chunk[i + 4] & 0xC0) != 0x80) as u64) << (i + 4);
500 char_mask |= (((chunk[i + 5] & 0xC0) != 0x80) as u64) << (i + 5);
501 char_mask |= (((chunk[i + 6] & 0xC0) != 0x80) as u64) << (i + 6);
502 char_mask |= (((chunk[i + 7] & 0xC0) != 0x80) as u64) << (i + 7);
503 i += 8;
504 }
505 count += char_mask.count_ones() as u64;
506 }
507
508 for &b in remainder {
509 count += ((b & 0xC0) != 0x80) as u64;
510 }
511 count
512}
513
514#[inline]
516pub fn count_chars_c(data: &[u8]) -> u64 {
517 data.len() as u64
518}
519
520#[inline]
522pub fn count_chars(data: &[u8], utf8: bool) -> u64 {
523 if utf8 {
524 count_chars_utf8(data)
525 } else {
526 count_chars_c(data)
527 }
528}
529
530pub fn is_utf8_locale() -> bool {
532 for var in &["LC_ALL", "LC_CTYPE", "LANG"] {
533 if let Ok(val) = std::env::var(var) {
534 if !val.is_empty() {
535 let lower = val.to_ascii_lowercase();
536 return lower.contains("utf-8") || lower.contains("utf8");
537 }
538 }
539 }
540 false
541}
542
543#[inline]
546fn decode_utf8(bytes: &[u8]) -> (u32, usize) {
547 let b0 = bytes[0];
548 if b0 < 0x80 {
549 return (b0 as u32, 1);
550 }
551 if b0 < 0xC2 {
552 return (b0 as u32, 1);
554 }
555 if b0 < 0xE0 {
556 if bytes.len() < 2 || bytes[1] & 0xC0 != 0x80 {
557 return (b0 as u32, 1);
558 }
559 let cp = ((b0 as u32 & 0x1F) << 6) | (bytes[1] as u32 & 0x3F);
560 return (cp, 2);
561 }
562 if b0 < 0xF0 {
563 if bytes.len() < 3 || bytes[1] & 0xC0 != 0x80 || bytes[2] & 0xC0 != 0x80 {
564 return (b0 as u32, 1);
565 }
566 let cp =
567 ((b0 as u32 & 0x0F) << 12) | ((bytes[1] as u32 & 0x3F) << 6) | (bytes[2] as u32 & 0x3F);
568 return (cp, 3);
569 }
570 if b0 < 0xF5 {
571 if bytes.len() < 4
572 || bytes[1] & 0xC0 != 0x80
573 || bytes[2] & 0xC0 != 0x80
574 || bytes[3] & 0xC0 != 0x80
575 {
576 return (b0 as u32, 1);
577 }
578 let cp = ((b0 as u32 & 0x07) << 18)
579 | ((bytes[1] as u32 & 0x3F) << 12)
580 | ((bytes[2] as u32 & 0x3F) << 6)
581 | (bytes[3] as u32 & 0x3F);
582 return (cp, 4);
583 }
584 (b0 as u32, 1)
585}
586
587#[inline]
589fn is_wide_char(cp: u32) -> bool {
590 matches!(cp,
591 0x1100..=0x115F | 0x231A..=0x231B | 0x2329..=0x232A | 0x23E9..=0x23F3 | 0x23F8..=0x23FA
596 | 0x25FD..=0x25FE
597 | 0x2614..=0x2615
598 | 0x2648..=0x2653
599 | 0x267F
600 | 0x2693
601 | 0x26A1
602 | 0x26AA..=0x26AB
603 | 0x26BD..=0x26BE
604 | 0x26C4..=0x26C5
605 | 0x26CE
606 | 0x26D4
607 | 0x26EA
608 | 0x26F2..=0x26F3
609 | 0x26F5
610 | 0x26FA
611 | 0x26FD
612 | 0x2702
613 | 0x2705
614 | 0x2708..=0x270D
615 | 0x270F
616 | 0x2712
617 | 0x2714
618 | 0x2716
619 | 0x271D
620 | 0x2721
621 | 0x2728
622 | 0x2733..=0x2734
623 | 0x2744
624 | 0x2747
625 | 0x274C
626 | 0x274E
627 | 0x2753..=0x2755
628 | 0x2757
629 | 0x2763..=0x2764
630 | 0x2795..=0x2797
631 | 0x27A1
632 | 0x27B0
633 | 0x27BF
634 | 0x2934..=0x2935
635 | 0x2B05..=0x2B07
636 | 0x2B1B..=0x2B1C
637 | 0x2B50
638 | 0x2B55
639 | 0x2E80..=0x303E | 0x3041..=0x33BF | 0x3400..=0x4DBF | 0x4E00..=0xA4CF | 0xA960..=0xA97C | 0xAC00..=0xD7A3 | 0xF900..=0xFAFF | 0xFE10..=0xFE19 | 0xFE30..=0xFE6F | 0xFF01..=0xFF60 | 0xFFE0..=0xFFE6 | 0x1F004
651 | 0x1F0CF
652 | 0x1F170..=0x1F171
653 | 0x1F17E..=0x1F17F
654 | 0x1F18E
655 | 0x1F191..=0x1F19A
656 | 0x1F1E0..=0x1F1FF | 0x1F200..=0x1F202
658 | 0x1F210..=0x1F23B
659 | 0x1F240..=0x1F248
660 | 0x1F250..=0x1F251
661 | 0x1F260..=0x1F265
662 | 0x1F300..=0x1F64F | 0x1F680..=0x1F6FF | 0x1F900..=0x1F9FF | 0x1FA00..=0x1FA6F
666 | 0x1FA70..=0x1FAFF
667 | 0x20000..=0x2FFFD | 0x30000..=0x3FFFD )
670}
671
672pub fn max_line_length_c(data: &[u8]) -> u64 {
682 let mut max_len: u64 = 0;
683 let mut line_len: u64 = 0; let mut linepos: u64 = 0; for &b in data {
687 match b {
688 b'\n' => {
689 if line_len > max_len {
690 max_len = line_len;
691 }
692 linepos = 0;
693 line_len = 0;
694 }
695 b'\t' => {
696 linepos = (linepos + 8) & !7;
697 if linepos > line_len {
698 line_len = linepos;
699 }
700 }
701 b'\r' => {
702 linepos = 0;
703 }
704 0x0C => {
705 if line_len > max_len {
707 max_len = line_len;
708 }
709 linepos = 0;
710 line_len = 0;
711 }
712 _ => {
713 if PRINTABLE_TABLE[b as usize] != 0 {
714 linepos += 1;
715 if linepos > line_len {
716 line_len = linepos;
717 }
718 }
719 }
721 }
722 }
723
724 if line_len > max_len {
726 max_len = line_len;
727 }
728
729 max_len
730}
731
732pub fn max_line_length_utf8(data: &[u8]) -> u64 {
737 let mut max_len: u64 = 0;
738 let mut line_len: u64 = 0;
739 let mut linepos: u64 = 0;
740 let mut i = 0;
741
742 while i < data.len() {
743 let b = data[i];
744
745 if b < 0x80 {
747 match b {
748 b'\n' => {
749 if line_len > max_len {
750 max_len = line_len;
751 }
752 linepos = 0;
753 line_len = 0;
754 }
755 b'\t' => {
756 linepos = (linepos + 8) & !7;
757 if linepos > line_len {
758 line_len = linepos;
759 }
760 }
761 b'\r' => {
762 linepos = 0;
763 }
764 0x0C => {
765 if line_len > max_len {
767 max_len = line_len;
768 }
769 linepos = 0;
770 line_len = 0;
771 }
772 0x20..=0x7E => {
773 linepos += 1;
775 if linepos > line_len {
776 line_len = linepos;
777 }
778 }
779 _ => {
780 }
782 }
783 i += 1;
784 } else {
785 let (cp, len) = decode_utf8(&data[i..]);
787
788 if cp <= 0x9F {
790 } else if is_wide_char(cp) {
792 linepos += 2;
793 if linepos > line_len {
794 line_len = linepos;
795 }
796 } else {
797 linepos += 1;
799 if linepos > line_len {
800 line_len = linepos;
801 }
802 }
803 i += len;
804 }
805 }
806
807 if line_len > max_len {
809 max_len = line_len;
810 }
811
812 max_len
813}
814
815#[inline]
817pub fn max_line_length(data: &[u8], utf8: bool) -> u64 {
818 if utf8 {
819 max_line_length_utf8(data)
820 } else {
821 max_line_length_c(data)
822 }
823}
824
825pub fn count_all(data: &[u8], utf8: bool) -> WcCounts {
837 WcCounts {
838 lines: count_lines(data),
839 words: count_words_locale(data, utf8),
840 bytes: data.len() as u64,
841 chars: count_chars(data, utf8),
842 max_line_length: max_line_length(data, utf8),
843 }
844}
845
846pub fn count_lines_parallel(data: &[u8]) -> u64 {
852 if data.len() < PARALLEL_THRESHOLD {
853 return count_lines(data);
854 }
855
856 let num_threads = rayon::current_num_threads().max(1);
857 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
858
859 data.par_chunks(chunk_size)
860 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
861 .sum()
862}
863
864pub fn count_words_parallel(data: &[u8], utf8: bool) -> u64 {
866 if utf8 || data.len() < PARALLEL_THRESHOLD {
867 return count_words_locale(data, utf8);
870 }
871
872 let table = &WS_TABLE_C;
873 let num_threads = rayon::current_num_threads().max(1);
874 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
875
876 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
877
878 let results: Vec<(u64, bool, bool)> = chunks
879 .par_iter()
880 .map(|chunk| {
881 let words = count_words_with_table(chunk, table);
882 let starts_non_ws = chunk.first().is_some_and(|&b| table[b as usize] == 0);
883 let ends_non_ws = chunk.last().is_some_and(|&b| table[b as usize] == 0);
884 (words, starts_non_ws, ends_non_ws)
885 })
886 .collect();
887
888 let mut total = 0u64;
889 for i in 0..results.len() {
890 total += results[i].0;
891 if i > 0 && results[i].1 && results[i - 1].2 {
892 total -= 1;
893 }
894 }
895 total
896}
897
898pub fn count_chars_parallel(data: &[u8], utf8: bool) -> u64 {
900 if !utf8 {
901 return data.len() as u64;
902 }
903 if data.len() < PARALLEL_THRESHOLD {
904 return count_chars_utf8(data);
905 }
906
907 let num_threads = rayon::current_num_threads().max(1);
908 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
909
910 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
911}
912
913pub fn count_lwc_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
915 if data.len() < PARALLEL_THRESHOLD {
916 let lines = count_lines(data);
917 let words = count_words_locale(data, utf8);
918 let chars = count_chars(data, utf8);
919 return (lines, words, chars);
920 }
921
922 let words = count_words_parallel(data, utf8);
924
925 let num_threads = rayon::current_num_threads().max(1);
927 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
928
929 let lines: u64 = data
930 .par_chunks(chunk_size)
931 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
932 .sum();
933
934 let chars = if utf8 {
935 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
936 } else {
937 data.len() as u64
938 };
939
940 (lines, words, chars)
941}