1use memchr::memchr_iter;
2use rayon::prelude::*;
3
4const PARALLEL_THRESHOLD: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Default, PartialEq, Eq)]
11pub struct WcCounts {
12 pub lines: u64,
13 pub words: u64,
14 pub bytes: u64,
15 pub chars: u64,
16 pub max_line_length: u64,
17}
18
19const fn make_is_space() -> [bool; 256] {
48 let mut t = [false; 256];
49 t[0x09] = true; t[0x0A] = true; t[0x0B] = true; t[0x0C] = true; t[0x0D] = true; t[0x20] = true; t[0xA0] = true; t
57}
58const IS_SPACE: [bool; 256] = make_is_space();
59
60#[inline]
63pub(crate) fn first_is_word(data: &[u8]) -> bool {
64 !data.is_empty() && !IS_SPACE[data[0] as usize]
65}
66
67#[inline]
74fn is_unicode_space(cp: u32) -> bool {
75 matches!(
76 cp,
77 0x1680 | 0x2000
79 ..=0x200A | 0x2028 | 0x2029 | 0x205F | 0x3000 )
85}
86
87#[inline]
91fn is_wnbspace(cp: u32) -> bool {
92 matches!(cp, 0x00A0 | 0x2007 | 0x202F | 0x2060)
93}
94
95#[inline]
97fn is_unicode_word_break(cp: u32) -> bool {
98 is_unicode_space(cp) || is_wnbspace(cp)
99}
100
101#[inline]
106fn is_printable_unicode(cp: u32) -> bool {
107 if cp < 0xA0 {
111 return false;
116 }
117 if (0xD800..=0xDFFF).contains(&cp) || cp > 0x10FFFF {
120 return false;
121 }
122 if (0xFDD0..=0xFDEF).contains(&cp) || (cp & 0xFFFE) == 0xFFFE {
124 return false;
125 }
126 true
127}
128
129#[inline]
136pub fn count_lines(data: &[u8]) -> u64 {
137 memchr_iter(b'\n', data).count() as u64
138}
139
140#[inline]
142pub fn count_bytes(data: &[u8]) -> u64 {
143 data.len() as u64
144}
145
146pub fn count_words(data: &[u8]) -> u64 {
148 count_words_locale(data, true)
149}
150
151pub fn count_words_locale(data: &[u8], utf8: bool) -> u64 {
157 if utf8 {
158 count_words_utf8(data)
159 } else {
160 count_words_c(data)
161 }
162}
163
164fn count_words_c(data: &[u8]) -> u64 {
168 let mut words = 0u64;
169 let mut in_word = false;
170 let mut i = 0;
171 let len = data.len();
172
173 while i < len {
174 let b = unsafe { *data.get_unchecked(i) };
175 if IS_SPACE[b as usize] {
176 in_word = false;
177 } else if !in_word {
178 in_word = true;
179 words += 1;
180 }
181 i += 1;
182 }
183 words
184}
185
186#[cfg(target_arch = "x86_64")]
190#[inline(always)]
191fn count_lw_c_scalar_tail(
192 ptr: *const u8,
193 mut i: usize,
194 len: usize,
195 mut total_lines: u64,
196 mut total_words: u64,
197 mut prev_in_word: bool,
198 data: &[u8],
199) -> (u64, u64, bool, bool) {
200 while i < len {
201 let b = unsafe { *ptr.add(i) };
202 if IS_SPACE[b as usize] {
203 if b == b'\n' {
204 total_lines += 1;
205 }
206 prev_in_word = false;
207 } else if !prev_in_word {
208 total_words += 1;
209 prev_in_word = true;
210 }
211 i += 1;
212 }
213 let first_word = first_is_word(data);
214 (total_lines, total_words, first_word, prev_in_word)
215}
216
217#[cfg(target_arch = "x86_64")]
223#[target_feature(enable = "avx2")]
224unsafe fn count_lw_c_chunk_avx2(data: &[u8]) -> (u64, u64, bool, bool) {
225 use std::arch::x86_64::*;
226
227 let len = data.len();
228 let ptr = data.as_ptr();
229 let mut i = 0usize;
230 let mut total_lines = 0u64;
231 let mut total_words = 0u64;
232 let mut prev_in_word = false;
233
234 unsafe {
235 let nl_byte = _mm256_set1_epi8(b'\n' as i8);
236 let zero = _mm256_setzero_si256();
237 let ones = _mm256_set1_epi8(1);
238 let const_0x09 = _mm256_set1_epi8(0x09u8 as i8);
240 let const_0x0d = _mm256_set1_epi8(0x0Du8 as i8);
241 let const_0x20 = _mm256_set1_epi8(0x20u8 as i8);
242 let const_0xa0 = _mm256_set1_epi8(0xA0u8 as i8);
243
244 let mut line_acc = _mm256_setzero_si256();
245 let mut batch = 0u32;
246
247 while i + 32 <= len {
248 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
249 let is_nl = _mm256_cmpeq_epi8(v, nl_byte);
250 line_acc = _mm256_add_epi8(line_acc, _mm256_and_si256(is_nl, ones));
251
252 let ge_09 = _mm256_cmpeq_epi8(_mm256_max_epu8(v, const_0x09), v);
254 let le_0d = _mm256_cmpeq_epi8(_mm256_min_epu8(v, const_0x0d), v);
255 let in_tab_range = _mm256_and_si256(ge_09, le_0d);
256 let is_sp = _mm256_cmpeq_epi8(v, const_0x20);
257 let is_nbsp = _mm256_cmpeq_epi8(v, const_0xa0);
258 let is_space = _mm256_or_si256(_mm256_or_si256(in_tab_range, is_sp), is_nbsp);
259 let space_mask = _mm256_movemask_epi8(is_space) as u32;
260
261 let nonspace_mask = !space_mask;
264 let prev_space = (space_mask << 1) | if prev_in_word { 0u32 } else { 1u32 };
266 let starts = nonspace_mask & prev_space;
267 total_words += starts.count_ones() as u64;
268
269 prev_in_word = (nonspace_mask >> 31) & 1 == 1;
271
272 batch += 1;
273 if batch >= 255 {
274 let sad = _mm256_sad_epu8(line_acc, zero);
275 let hi = _mm256_extracti128_si256(sad, 1);
276 let lo = _mm256_castsi256_si128(sad);
277 let s = _mm_add_epi64(lo, hi);
278 let h64 = _mm_unpackhi_epi64(s, s);
279 let t = _mm_add_epi64(s, h64);
280 total_lines += _mm_cvtsi128_si64(t) as u64;
281 line_acc = _mm256_setzero_si256();
282 batch = 0;
283 }
284 i += 32;
285 }
286
287 if batch > 0 {
288 let sad = _mm256_sad_epu8(line_acc, zero);
289 let hi = _mm256_extracti128_si256(sad, 1);
290 let lo = _mm256_castsi256_si128(sad);
291 let s = _mm_add_epi64(lo, hi);
292 let h64 = _mm_unpackhi_epi64(s, s);
293 let t = _mm_add_epi64(s, h64);
294 total_lines += _mm_cvtsi128_si64(t) as u64;
295 }
296 }
297
298 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
299}
300
301#[cfg(target_arch = "x86_64")]
304#[target_feature(enable = "sse2")]
305unsafe fn count_lw_c_chunk_sse2(data: &[u8]) -> (u64, u64, bool, bool) {
306 use std::arch::x86_64::*;
307
308 let len = data.len();
309 let ptr = data.as_ptr();
310 let mut i = 0usize;
311 let mut total_lines = 0u64;
312 let mut total_words = 0u64;
313 let mut prev_in_word = false;
314
315 unsafe {
316 let nl_byte = _mm_set1_epi8(b'\n' as i8);
317 let zero = _mm_setzero_si128();
318 let ones = _mm_set1_epi8(1);
319 let const_0x09 = _mm_set1_epi8(0x09u8 as i8);
321 let const_0x0d = _mm_set1_epi8(0x0Du8 as i8);
322 let const_0x20 = _mm_set1_epi8(0x20u8 as i8);
323 let const_0xa0 = _mm_set1_epi8(0xA0u8 as i8);
324
325 let mut line_acc = _mm_setzero_si128();
326 let mut batch = 0u32;
327
328 while i + 16 <= len {
329 let v = _mm_loadu_si128(ptr.add(i) as *const __m128i);
330 let is_nl = _mm_cmpeq_epi8(v, nl_byte);
331 line_acc = _mm_add_epi8(line_acc, _mm_and_si128(is_nl, ones));
332
333 let ge_09 = _mm_cmpeq_epi8(_mm_max_epu8(v, const_0x09), v);
335 let le_0d = _mm_cmpeq_epi8(_mm_min_epu8(v, const_0x0d), v);
336 let in_tab_range = _mm_and_si128(ge_09, le_0d);
337 let is_sp = _mm_cmpeq_epi8(v, const_0x20);
338 let is_nbsp = _mm_cmpeq_epi8(v, const_0xa0);
339 let is_space = _mm_or_si128(_mm_or_si128(in_tab_range, is_sp), is_nbsp);
340 let space_mask = (_mm_movemask_epi8(is_space) as u32) & 0xFFFF;
341
342 let nonspace_mask = !space_mask & 0xFFFF;
344 let prev_space = ((space_mask << 1) | if prev_in_word { 0u32 } else { 1u32 }) & 0xFFFF;
345 let starts = nonspace_mask & prev_space;
346 total_words += starts.count_ones() as u64;
347
348 prev_in_word = (nonspace_mask >> 15) & 1 == 1;
349
350 batch += 1;
351 if batch >= 255 {
352 let sad = _mm_sad_epu8(line_acc, zero);
353 let hi = _mm_unpackhi_epi64(sad, sad);
354 let t = _mm_add_epi64(sad, hi);
355 total_lines += _mm_cvtsi128_si64(t) as u64;
356 line_acc = _mm_setzero_si128();
357 batch = 0;
358 }
359 i += 16;
360 }
361
362 if batch > 0 {
363 let sad = _mm_sad_epu8(line_acc, zero);
364 let hi = _mm_unpackhi_epi64(sad, sad);
365 let t = _mm_add_epi64(sad, hi);
366 total_lines += _mm_cvtsi128_si64(t) as u64;
367 }
368 }
369
370 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
371}
372
373#[inline]
375fn count_lw_c_chunk_fast(data: &[u8]) -> (u64, u64, bool, bool) {
376 #[cfg(target_arch = "x86_64")]
377 {
378 if is_x86_feature_detected!("avx2") && data.len() >= 64 {
379 return unsafe { count_lw_c_chunk_avx2(data) };
380 }
381 if data.len() >= 32 {
382 return unsafe { count_lw_c_chunk_sse2(data) };
383 }
384 }
385 count_lw_c_chunk(data)
386}
387
388fn count_lw_c_chunk(data: &[u8]) -> (u64, u64, bool, bool) {
392 let mut lines = 0u64;
393 let mut words = 0u64;
394 let mut in_word = false;
395 let mut i = 0;
396 let len = data.len();
397
398 let first_word = first_is_word(data);
399
400 while i < len {
401 let b = unsafe { *data.get_unchecked(i) };
402 if IS_SPACE[b as usize] {
403 if b == b'\n' {
404 lines += 1;
405 }
406 in_word = false;
407 } else if !in_word {
408 in_word = true;
409 words += 1;
410 }
411 i += 1;
412 }
413 (lines, words, first_word, in_word)
414}
415
416fn count_words_utf8(data: &[u8]) -> u64 {
428 let mut words = 0u64;
429 let mut in_word = false;
430 let mut i = 0;
431 let len = data.len();
432
433 while i < len {
434 let b = unsafe { *data.get_unchecked(i) };
435
436 if b < 0x80 {
437 if IS_SPACE[b as usize] {
442 in_word = false;
443 } else if b >= 0x21 && b <= 0x7E {
444 if !in_word {
446 in_word = true;
447 words += 1;
448 }
449 }
450 i += 1;
452 } else if b < 0xC2 {
453 i += 1;
456 } else if b < 0xE0 {
457 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
458 let cp = ((b as u32 & 0x1F) << 6)
459 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
460 if is_unicode_word_break(cp) {
461 in_word = false;
462 } else if is_printable_unicode(cp) {
463 if !in_word {
464 in_word = true;
465 words += 1;
466 }
467 }
468 i += 2;
470 } else {
471 i += 1;
473 }
474 } else if b < 0xF0 {
475 if i + 2 < len
476 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
477 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
478 {
479 let cp = ((b as u32 & 0x0F) << 12)
480 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
481 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
482 if is_unicode_word_break(cp) {
483 in_word = false;
484 } else if is_printable_unicode(cp) {
485 if !in_word {
486 in_word = true;
487 words += 1;
488 }
489 }
490 i += 3;
492 } else {
493 i += 1;
495 }
496 } else if b < 0xF5 {
497 if i + 3 < len
498 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
499 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
500 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
501 {
502 let cp = ((b as u32 & 0x07) << 18)
503 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
504 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
505 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
506 if is_unicode_word_break(cp) {
507 in_word = false;
508 } else if is_printable_unicode(cp) {
509 if !in_word {
510 in_word = true;
511 words += 1;
512 }
513 }
514 i += 4;
516 } else {
517 i += 1;
519 }
520 } else {
521 i += 1;
523 }
524 }
525
526 words
527}
528
529pub fn count_lines_words(data: &[u8], utf8: bool) -> (u64, u64) {
533 if utf8 {
534 count_lines_words_utf8_fused(data)
535 } else {
536 let (lines, words, _, _) = count_lw_c_chunk_fast(data);
537 (lines, words)
538 }
539}
540
541fn count_lines_words_utf8_fused(data: &[u8]) -> (u64, u64) {
549 let mut lines = 0u64;
550 let mut words = 0u64;
551 let mut in_word = false;
552 let mut i = 0;
553 let len = data.len();
554
555 while i < len {
556 let b = unsafe { *data.get_unchecked(i) };
557
558 if b == b'\n' {
559 lines += 1;
560 in_word = false;
561 i += 1;
562 } else if b < 0x80 {
563 if IS_SPACE[b as usize] {
567 in_word = false;
568 } else if b >= 0x21 && b <= 0x7E {
569 if !in_word {
570 in_word = true;
571 words += 1;
572 }
573 }
574 i += 1;
576 } else if b < 0xC2 {
577 i += 1;
579 } else if b < 0xE0 {
580 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
581 let cp = ((b as u32 & 0x1F) << 6)
582 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
583 if is_unicode_word_break(cp) {
584 in_word = false;
585 } else if is_printable_unicode(cp) {
586 if !in_word {
587 in_word = true;
588 words += 1;
589 }
590 }
591 i += 2;
592 } else {
593 i += 1;
595 }
596 } else if b < 0xF0 {
597 if i + 2 < len
598 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
599 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
600 {
601 let cp = ((b as u32 & 0x0F) << 12)
602 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
603 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
604 if is_unicode_word_break(cp) {
605 in_word = false;
606 } else if is_printable_unicode(cp) {
607 if !in_word {
608 in_word = true;
609 words += 1;
610 }
611 }
612 i += 3;
613 } else {
614 i += 1;
616 }
617 } else if b < 0xF5 {
618 if i + 3 < len
619 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
620 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
621 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
622 {
623 let cp = ((b as u32 & 0x07) << 18)
624 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
625 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
626 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
627 if is_unicode_word_break(cp) {
628 in_word = false;
629 } else if is_printable_unicode(cp) {
630 if !in_word {
631 in_word = true;
632 words += 1;
633 }
634 }
635 i += 4;
636 } else {
637 i += 1;
639 }
640 } else {
641 i += 1;
643 }
644 }
645
646 (lines, words)
647}
648
649pub fn count_lines_words_chars(data: &[u8], utf8: bool) -> (u64, u64, u64) {
651 if utf8 {
652 let (lines, words) = count_lines_words_utf8_fused(data);
654 let chars = count_chars_utf8(data);
655 (lines, words, chars)
656 } else {
657 let (lines, words) = count_lines_words(data, false);
659 (lines, words, data.len() as u64)
660 }
661}
662
663pub fn count_chars_utf8(data: &[u8]) -> u64 {
670 #[cfg(target_arch = "x86_64")]
671 {
672 if is_x86_feature_detected!("avx2") {
673 return unsafe { count_chars_utf8_avx2(data) };
674 }
675 }
676 count_chars_utf8_scalar(data)
677}
678
679#[cfg(target_arch = "x86_64")]
683#[target_feature(enable = "avx2")]
684unsafe fn count_chars_utf8_avx2(data: &[u8]) -> u64 {
685 unsafe {
686 use std::arch::x86_64::*;
687
688 let mask_c0 = _mm256_set1_epi8(0xC0u8 as i8);
689 let val_80 = _mm256_set1_epi8(0x80u8 as i8);
690 let ones = _mm256_set1_epi8(1);
691 let zero = _mm256_setzero_si256();
692
693 let mut total = 0u64;
694 let len = data.len();
695 let ptr = data.as_ptr();
696 let mut i = 0;
697 let mut acc = _mm256_setzero_si256();
698 let mut batch = 0u32;
699
700 while i + 32 <= len {
701 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
702 let masked = _mm256_and_si256(v, mask_c0);
703 let is_cont = _mm256_cmpeq_epi8(masked, val_80);
704 let non_cont = _mm256_andnot_si256(is_cont, ones);
705 acc = _mm256_add_epi8(acc, non_cont);
706
707 batch += 1;
708 if batch >= 255 {
709 let sad = _mm256_sad_epu8(acc, zero);
711 let hi = _mm256_extracti128_si256(sad, 1);
712 let lo = _mm256_castsi256_si128(sad);
713 let sum = _mm_add_epi64(lo, hi);
714 let hi64 = _mm_unpackhi_epi64(sum, sum);
715 let t = _mm_add_epi64(sum, hi64);
716 total += _mm_cvtsi128_si64(t) as u64;
717 acc = _mm256_setzero_si256();
718 batch = 0;
719 }
720 i += 32;
721 }
722
723 if batch > 0 {
725 let sad = _mm256_sad_epu8(acc, zero);
726 let hi = _mm256_extracti128_si256(sad, 1);
727 let lo = _mm256_castsi256_si128(sad);
728 let sum = _mm_add_epi64(lo, hi);
729 let hi64 = _mm_unpackhi_epi64(sum, sum);
730 let t = _mm_add_epi64(sum, hi64);
731 total += _mm_cvtsi128_si64(t) as u64;
732 }
733
734 while i < len {
735 total += ((*ptr.add(i) & 0xC0) != 0x80) as u64;
736 i += 1;
737 }
738
739 total
740 }
741}
742
743fn count_chars_utf8_scalar(data: &[u8]) -> u64 {
745 let mut count = 0u64;
746 let chunks = data.chunks_exact(64);
747 let remainder = chunks.remainder();
748
749 for chunk in chunks {
750 let mut any_high = 0u8;
752 let mut i = 0;
753 while i + 8 <= 64 {
754 unsafe {
755 any_high |= *chunk.get_unchecked(i);
756 any_high |= *chunk.get_unchecked(i + 1);
757 any_high |= *chunk.get_unchecked(i + 2);
758 any_high |= *chunk.get_unchecked(i + 3);
759 any_high |= *chunk.get_unchecked(i + 4);
760 any_high |= *chunk.get_unchecked(i + 5);
761 any_high |= *chunk.get_unchecked(i + 6);
762 any_high |= *chunk.get_unchecked(i + 7);
763 }
764 i += 8;
765 }
766 if any_high < 0x80 {
767 count += 64;
768 continue;
769 }
770
771 let mut char_mask = 0u64;
772 i = 0;
773 while i + 7 < 64 {
774 unsafe {
775 char_mask |= (((*chunk.get_unchecked(i) & 0xC0) != 0x80) as u64) << i;
776 char_mask |= (((*chunk.get_unchecked(i + 1) & 0xC0) != 0x80) as u64) << (i + 1);
777 char_mask |= (((*chunk.get_unchecked(i + 2) & 0xC0) != 0x80) as u64) << (i + 2);
778 char_mask |= (((*chunk.get_unchecked(i + 3) & 0xC0) != 0x80) as u64) << (i + 3);
779 char_mask |= (((*chunk.get_unchecked(i + 4) & 0xC0) != 0x80) as u64) << (i + 4);
780 char_mask |= (((*chunk.get_unchecked(i + 5) & 0xC0) != 0x80) as u64) << (i + 5);
781 char_mask |= (((*chunk.get_unchecked(i + 6) & 0xC0) != 0x80) as u64) << (i + 6);
782 char_mask |= (((*chunk.get_unchecked(i + 7) & 0xC0) != 0x80) as u64) << (i + 7);
783 }
784 i += 8;
785 }
786 count += char_mask.count_ones() as u64;
787 }
788
789 for &b in remainder {
790 count += ((b & 0xC0) != 0x80) as u64;
791 }
792 count
793}
794
795#[inline]
797pub fn count_chars_c(data: &[u8]) -> u64 {
798 data.len() as u64
799}
800
801#[inline]
803pub fn count_chars(data: &[u8], utf8: bool) -> u64 {
804 if utf8 {
805 count_chars_utf8(data)
806 } else {
807 count_chars_c(data)
808 }
809}
810
811pub fn is_utf8_locale() -> bool {
813 for var in &["LC_ALL", "LC_CTYPE", "LANG"] {
814 if let Ok(val) = std::env::var(var) {
815 if !val.is_empty() {
816 let lower = val.to_ascii_lowercase();
817 return lower.contains("utf-8") || lower.contains("utf8");
818 }
819 }
820 }
821 false
822}
823
824#[inline]
827fn decode_utf8(bytes: &[u8]) -> (u32, usize) {
828 let b0 = bytes[0];
829 if b0 < 0x80 {
830 return (b0 as u32, 1);
831 }
832 if b0 < 0xC2 {
833 return (b0 as u32, 1);
835 }
836 if b0 < 0xE0 {
837 if bytes.len() < 2 || bytes[1] & 0xC0 != 0x80 {
838 return (b0 as u32, 1);
839 }
840 let cp = ((b0 as u32 & 0x1F) << 6) | (bytes[1] as u32 & 0x3F);
841 return (cp, 2);
842 }
843 if b0 < 0xF0 {
844 if bytes.len() < 3 || bytes[1] & 0xC0 != 0x80 || bytes[2] & 0xC0 != 0x80 {
845 return (b0 as u32, 1);
846 }
847 let cp =
848 ((b0 as u32 & 0x0F) << 12) | ((bytes[1] as u32 & 0x3F) << 6) | (bytes[2] as u32 & 0x3F);
849 return (cp, 3);
850 }
851 if b0 < 0xF5 {
852 if bytes.len() < 4
853 || bytes[1] & 0xC0 != 0x80
854 || bytes[2] & 0xC0 != 0x80
855 || bytes[3] & 0xC0 != 0x80
856 {
857 return (b0 as u32, 1);
858 }
859 let cp = ((b0 as u32 & 0x07) << 18)
860 | ((bytes[1] as u32 & 0x3F) << 12)
861 | ((bytes[2] as u32 & 0x3F) << 6)
862 | (bytes[3] as u32 & 0x3F);
863 return (cp, 4);
864 }
865 (b0 as u32, 1)
866}
867
868#[inline]
871fn is_zero_width(cp: u32) -> bool {
872 matches!(
873 cp,
874 0x0300..=0x036F | 0x0483..=0x0489 | 0x0591..=0x05BD | 0x05BF
878 | 0x05C1..=0x05C2
879 | 0x05C4..=0x05C5
880 | 0x05C7
881 | 0x0600..=0x0605 | 0x0610..=0x061A | 0x064B..=0x065F | 0x0670
885 | 0x06D6..=0x06DD
886 | 0x06DF..=0x06E4
887 | 0x06E7..=0x06E8
888 | 0x06EA..=0x06ED
889 | 0x070F
890 | 0x0711
891 | 0x0730..=0x074A
892 | 0x07A6..=0x07B0
893 | 0x07EB..=0x07F3
894 | 0x07FD
895 | 0x0816..=0x0819
896 | 0x081B..=0x0823
897 | 0x0825..=0x0827
898 | 0x0829..=0x082D
899 | 0x0859..=0x085B
900 | 0x08D3..=0x08E1
901 | 0x08E3..=0x0902
902 | 0x093A
903 | 0x093C
904 | 0x0941..=0x0948
905 | 0x094D
906 | 0x0951..=0x0957
907 | 0x0962..=0x0963
908 | 0x0981
909 | 0x09BC
910 | 0x09C1..=0x09C4
911 | 0x09CD
912 | 0x09E2..=0x09E3
913 | 0x09FE
914 | 0x0A01..=0x0A02
915 | 0x0A3C
916 | 0x0A41..=0x0A42
917 | 0x0A47..=0x0A48
918 | 0x0A4B..=0x0A4D
919 | 0x0A51
920 | 0x0A70..=0x0A71
921 | 0x0A75
922 | 0x0A81..=0x0A82
923 | 0x0ABC
924 | 0x0AC1..=0x0AC5
925 | 0x0AC7..=0x0AC8
926 | 0x0ACD
927 | 0x0AE2..=0x0AE3
928 | 0x0AFA..=0x0AFF
929 | 0x0B01
930 | 0x0B3C
931 | 0x0B3F
932 | 0x0B41..=0x0B44
933 | 0x0B4D
934 | 0x0B56
935 | 0x0B62..=0x0B63
936 | 0x0B82
937 | 0x0BC0
938 | 0x0BCD
939 | 0x0C00
940 | 0x0C04
941 | 0x0C3E..=0x0C40
942 | 0x0C46..=0x0C48
943 | 0x0C4A..=0x0C4D
944 | 0x0C55..=0x0C56
945 | 0x0C62..=0x0C63
946 | 0x0C81
947 | 0x0CBC
948 | 0x0CBF
949 | 0x0CC6
950 | 0x0CCC..=0x0CCD
951 | 0x0CE2..=0x0CE3
952 | 0x0D00..=0x0D01
953 | 0x0D3B..=0x0D3C
954 | 0x0D41..=0x0D44
955 | 0x0D4D
956 | 0x0D62..=0x0D63
957 | 0x0DCA
958 | 0x0DD2..=0x0DD4
959 | 0x0DD6
960 | 0x0E31
961 | 0x0E34..=0x0E3A
962 | 0x0E47..=0x0E4E
963 | 0x0EB1
964 | 0x0EB4..=0x0EBC
965 | 0x0EC8..=0x0ECD
966 | 0x0F18..=0x0F19
967 | 0x0F35
968 | 0x0F37
969 | 0x0F39
970 | 0x0F71..=0x0F7E
971 | 0x0F80..=0x0F84
972 | 0x0F86..=0x0F87
973 | 0x0F8D..=0x0F97
974 | 0x0F99..=0x0FBC
975 | 0x0FC6
976 | 0x102D..=0x1030
977 | 0x1032..=0x1037
978 | 0x1039..=0x103A
979 | 0x103D..=0x103E
980 | 0x1058..=0x1059
981 | 0x105E..=0x1060
982 | 0x1071..=0x1074
983 | 0x1082
984 | 0x1085..=0x1086
985 | 0x108D
986 | 0x109D
987 | 0x1160..=0x11FF | 0x135D..=0x135F
989 | 0x1712..=0x1714
990 | 0x1732..=0x1734
991 | 0x1752..=0x1753
992 | 0x1772..=0x1773
993 | 0x17B4..=0x17B5
994 | 0x17B7..=0x17BD
995 | 0x17C6
996 | 0x17C9..=0x17D3
997 | 0x17DD
998 | 0x180B..=0x180D
999 | 0x1885..=0x1886
1000 | 0x18A9
1001 | 0x1920..=0x1922
1002 | 0x1927..=0x1928
1003 | 0x1932
1004 | 0x1939..=0x193B
1005 | 0x1A17..=0x1A18
1006 | 0x1A1B
1007 | 0x1A56
1008 | 0x1A58..=0x1A5E
1009 | 0x1A60
1010 | 0x1A62
1011 | 0x1A65..=0x1A6C
1012 | 0x1A73..=0x1A7C
1013 | 0x1A7F
1014 | 0x1AB0..=0x1ABE
1015 | 0x1B00..=0x1B03
1016 | 0x1B34
1017 | 0x1B36..=0x1B3A
1018 | 0x1B3C
1019 | 0x1B42
1020 | 0x1B6B..=0x1B73
1021 | 0x1B80..=0x1B81
1022 | 0x1BA2..=0x1BA5
1023 | 0x1BA8..=0x1BA9
1024 | 0x1BAB..=0x1BAD
1025 | 0x1BE6
1026 | 0x1BE8..=0x1BE9
1027 | 0x1BED
1028 | 0x1BEF..=0x1BF1
1029 | 0x1C2C..=0x1C33
1030 | 0x1C36..=0x1C37
1031 | 0x1CD0..=0x1CD2
1032 | 0x1CD4..=0x1CE0
1033 | 0x1CE2..=0x1CE8
1034 | 0x1CED
1035 | 0x1CF4
1036 | 0x1CF8..=0x1CF9
1037 | 0x1DC0..=0x1DF9
1038 | 0x1DFB..=0x1DFF
1039 | 0x200B..=0x200F | 0x202A..=0x202E | 0x2060..=0x2064 | 0x2066..=0x206F | 0x20D0..=0x20F0 | 0xFE00..=0xFE0F | 0xFE20..=0xFE2F | 0xFEFF | 0xFFF9..=0xFFFB | 0x1D167..=0x1D169
1049 | 0x1D173..=0x1D182
1050 | 0x1D185..=0x1D18B
1051 | 0x1D1AA..=0x1D1AD
1052 | 0x1D242..=0x1D244
1053 | 0xE0001
1054 | 0xE0020..=0xE007F
1055 | 0xE0100..=0xE01EF )
1057}
1058
1059#[inline]
1062fn is_wide_char(cp: u32) -> bool {
1063 matches!(
1064 cp,
1065 0x1100..=0x115F | 0x231A..=0x231B | 0x2329..=0x232A | 0x23E9..=0x23F3 | 0x23F8..=0x23FA
1070 | 0x25FD..=0x25FE
1071 | 0x2614..=0x2615
1072 | 0x2648..=0x2653
1073 | 0x267F
1074 | 0x2693
1075 | 0x26A1
1076 | 0x26AA..=0x26AB
1077 | 0x26BD..=0x26BE
1078 | 0x26C4..=0x26C5
1079 | 0x26CE
1080 | 0x26D4
1081 | 0x26EA
1082 | 0x26F2..=0x26F3
1083 | 0x26F5
1084 | 0x26FA
1085 | 0x26FD
1086 | 0x2702
1087 | 0x2705
1088 | 0x2708..=0x270D
1089 | 0x270F
1090 | 0x2712
1091 | 0x2714
1092 | 0x2716
1093 | 0x271D
1094 | 0x2721
1095 | 0x2728
1096 | 0x2733..=0x2734
1097 | 0x2744
1098 | 0x2747
1099 | 0x274C
1100 | 0x274E
1101 | 0x2753..=0x2755
1102 | 0x2757
1103 | 0x2763..=0x2764
1104 | 0x2795..=0x2797
1105 | 0x27A1
1106 | 0x27B0
1107 | 0x27BF
1108 | 0x2934..=0x2935
1109 | 0x2B05..=0x2B07
1110 | 0x2B1B..=0x2B1C
1111 | 0x2B50
1112 | 0x2B55
1113 | 0x2E80..=0x303E | 0x3040..=0x33BF | 0x3400..=0x4DBF | 0x4E00..=0xA4CF | 0xA960..=0xA97C | 0xAC00..=0xD7A3 | 0xF900..=0xFAFF | 0xFE10..=0xFE19 | 0xFE30..=0xFE6F | 0xFF01..=0xFF60 | 0xFFE0..=0xFFE6 | 0x1F004
1125 | 0x1F0CF
1126 | 0x1F170..=0x1F171
1127 | 0x1F17E..=0x1F17F
1128 | 0x1F18E
1129 | 0x1F191..=0x1F19A
1130 | 0x1F1E0..=0x1F1FF | 0x1F200..=0x1F202
1132 | 0x1F210..=0x1F23B
1133 | 0x1F240..=0x1F248
1134 | 0x1F250..=0x1F251
1135 | 0x1F260..=0x1F265
1136 | 0x1F300..=0x1F64F | 0x1F680..=0x1F6FF | 0x1F900..=0x1F9FF | 0x1FA00..=0x1FA6F
1140 | 0x1FA70..=0x1FAFF
1141 | 0x20000..=0x2FFFD | 0x30000..=0x3FFFD )
1144}
1145
1146pub fn max_line_length_c(data: &[u8]) -> u64 {
1159 let mut max_len: u64 = 0;
1160 let mut line_len: u64 = 0;
1161 let mut linepos: u64 = 0;
1162 let mut i = 0;
1163 let len = data.len();
1164
1165 while i < len {
1166 let b = unsafe { *data.get_unchecked(i) };
1167 if b >= 0x21 && b <= 0x7E {
1168 i += 1;
1170 let mut run = 1u64;
1171 while i < len {
1172 let b = unsafe { *data.get_unchecked(i) };
1173 if b >= 0x21 && b <= 0x7E {
1174 run += 1;
1175 i += 1;
1176 } else {
1177 break;
1178 }
1179 }
1180 linepos += run;
1181 if linepos > line_len {
1182 line_len = linepos;
1183 }
1184 } else {
1185 match b {
1186 b' ' => {
1187 linepos += 1;
1188 if linepos > line_len {
1189 line_len = linepos;
1190 }
1191 }
1192 b'\n' => {
1193 if line_len > max_len {
1194 max_len = line_len;
1195 }
1196 linepos = 0;
1197 line_len = 0;
1198 }
1199 b'\t' => {
1200 linepos = (linepos + 8) & !7;
1201 if linepos > line_len {
1202 line_len = linepos;
1203 }
1204 }
1205 b'\r' => {
1206 linepos = 0;
1207 }
1208 0x0C => {
1209 if line_len > max_len {
1210 max_len = line_len;
1211 }
1212 linepos = 0;
1213 line_len = 0;
1214 }
1215 _ => {} }
1217 i += 1;
1218 }
1219 }
1220
1221 if line_len > max_len {
1222 max_len = line_len;
1223 }
1224
1225 max_len
1226}
1227
1228pub fn max_line_length_utf8(data: &[u8]) -> u64 {
1235 let mut max_len: u64 = 0;
1236 let mut line_len: u64 = 0;
1237 let mut linepos: u64 = 0;
1238 let mut i = 0;
1239 let len = data.len();
1240
1241 while i < len {
1242 let b = unsafe { *data.get_unchecked(i) };
1243
1244 if b >= 0x21 && b <= 0x7E {
1245 i += 1;
1247 let mut run = 1u64;
1248 while i < len {
1249 let b = unsafe { *data.get_unchecked(i) };
1250 if b >= 0x21 && b <= 0x7E {
1251 run += 1;
1252 i += 1;
1253 } else {
1254 break;
1255 }
1256 }
1257 linepos += run;
1258 if linepos > line_len {
1259 line_len = linepos;
1260 }
1261 } else if b < 0x80 {
1262 match b {
1264 b' ' => {
1265 linepos += 1;
1266 if linepos > line_len {
1267 line_len = linepos;
1268 }
1269 }
1270 b'\n' => {
1271 if line_len > max_len {
1272 max_len = line_len;
1273 }
1274 linepos = 0;
1275 line_len = 0;
1276 }
1277 b'\t' => {
1278 linepos = (linepos + 8) & !7;
1279 if linepos > line_len {
1280 line_len = linepos;
1281 }
1282 }
1283 b'\r' => {
1284 linepos = 0;
1285 }
1286 0x0C => {
1287 if line_len > max_len {
1288 max_len = line_len;
1289 }
1290 linepos = 0;
1291 line_len = 0;
1292 }
1293 _ => {} }
1295 i += 1;
1296 } else {
1297 let (cp, blen) = decode_utf8(&data[i..]);
1299
1300 if cp <= 0x9F {
1302 } else if is_zero_width(cp) {
1304 } else if is_wide_char(cp) {
1306 linepos += 2;
1307 if linepos > line_len {
1308 line_len = linepos;
1309 }
1310 } else {
1311 linepos += 1;
1313 if linepos > line_len {
1314 line_len = linepos;
1315 }
1316 }
1317 i += blen;
1318 }
1319 }
1320
1321 if line_len > max_len {
1323 max_len = line_len;
1324 }
1325
1326 max_len
1327}
1328
1329#[inline]
1331pub fn max_line_length(data: &[u8], utf8: bool) -> u64 {
1332 if utf8 {
1333 max_line_length_utf8(data)
1334 } else {
1335 max_line_length_c(data)
1336 }
1337}
1338
1339pub fn count_all(data: &[u8], utf8: bool) -> WcCounts {
1351 if utf8 {
1352 let (lines, words) = count_lines_words_utf8_fused(data);
1353 WcCounts {
1354 lines,
1355 words,
1356 bytes: data.len() as u64,
1357 chars: count_chars_utf8(data),
1358 max_line_length: max_line_length_utf8(data),
1359 }
1360 } else {
1361 WcCounts {
1362 lines: count_lines(data),
1363 words: count_words_locale(data, false),
1364 bytes: data.len() as u64,
1365 chars: data.len() as u64,
1366 max_line_length: max_line_length_c(data),
1367 }
1368 }
1369}
1370
1371#[inline]
1375fn check_ascii_sample(data: &[u8]) -> bool {
1376 let len = data.len();
1377 if len == 0 {
1378 return true;
1379 }
1380
1381 let check_region = |start: usize, end: usize| -> bool {
1383 let mut or_acc = 0u8;
1384 let region = &data[start..end];
1385 let mut i = 0;
1386 while i + 8 <= region.len() {
1387 unsafe {
1388 or_acc |= *region.get_unchecked(i);
1389 or_acc |= *region.get_unchecked(i + 1);
1390 or_acc |= *region.get_unchecked(i + 2);
1391 or_acc |= *region.get_unchecked(i + 3);
1392 or_acc |= *region.get_unchecked(i + 4);
1393 or_acc |= *region.get_unchecked(i + 5);
1394 or_acc |= *region.get_unchecked(i + 6);
1395 or_acc |= *region.get_unchecked(i + 7);
1396 }
1397 i += 8;
1398 }
1399 while i < region.len() {
1400 or_acc |= region[i];
1401 i += 1;
1402 }
1403 or_acc < 0x80
1404 };
1405
1406 let sample = 256.min(len);
1407
1408 if !check_region(0, sample) {
1410 return false;
1411 }
1412 if len > sample * 2 {
1414 let mid = len / 2;
1415 let mid_start = mid.saturating_sub(sample / 2);
1416 if !check_region(mid_start, (mid_start + sample).min(len)) {
1417 return false;
1418 }
1419 }
1420 if len > sample {
1422 if !check_region(len - sample, len) {
1423 return false;
1424 }
1425 }
1426
1427 true
1428}
1429
1430fn split_at_newlines(data: &[u8], num_chunks: usize) -> Vec<&[u8]> {
1439 if data.is_empty() || num_chunks <= 1 {
1440 return vec![data];
1441 }
1442 let chunk_size = data.len() / num_chunks;
1443 let mut chunks = Vec::with_capacity(num_chunks);
1444 let mut pos = 0;
1445
1446 for _ in 0..num_chunks - 1 {
1447 let target = pos + chunk_size;
1448 if target >= data.len() {
1449 break;
1450 }
1451 let boundary = memchr::memchr(b'\n', &data[target..])
1452 .map(|p| target + p + 1)
1453 .unwrap_or(data.len());
1454 if boundary > pos {
1455 chunks.push(&data[pos..boundary]);
1456 }
1457 pos = boundary;
1458 }
1459 if pos < data.len() {
1460 chunks.push(&data[pos..]);
1461 }
1462 chunks
1463}
1464
1465pub fn count_lines_parallel(data: &[u8]) -> u64 {
1468 if data.len() < PARALLEL_THRESHOLD {
1469 return count_lines(data);
1470 }
1471
1472 let num_threads = rayon::current_num_threads().max(1);
1473 let chunk_size = (data.len() / num_threads).max(2 * 1024 * 1024);
1475
1476 data.par_chunks(chunk_size)
1477 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
1478 .sum()
1479}
1480
1481pub fn count_words_parallel(data: &[u8], utf8: bool) -> u64 {
1483 if data.len() < PARALLEL_THRESHOLD {
1484 return count_words_locale(data, utf8);
1485 }
1486
1487 let num_threads = rayon::current_num_threads().max(1);
1488
1489 if utf8 {
1490 let chunks = split_at_newlines(data, num_threads);
1493 chunks.par_iter().map(|chunk| count_words_utf8(chunk)).sum()
1494 } else {
1495 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1497
1498 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1499
1500 let results: Vec<(u64, u64, bool, bool)> = chunks
1502 .par_iter()
1503 .map(|chunk| count_lw_c_chunk(chunk))
1504 .collect();
1505
1506 let mut total = 0u64;
1507 for i in 0..results.len() {
1508 total += results[i].1;
1509 if i > 0 && results[i - 1].3 && results[i].2 {
1513 total -= 1;
1514 }
1515 }
1516 total
1517 }
1518}
1519
1520pub fn count_chars_parallel(data: &[u8], utf8: bool) -> u64 {
1522 if !utf8 {
1523 return data.len() as u64;
1524 }
1525 if data.len() < PARALLEL_THRESHOLD {
1526 return count_chars_utf8(data);
1527 }
1528
1529 let num_threads = rayon::current_num_threads().max(1);
1530 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1531
1532 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
1533}
1534
1535pub fn count_lwb(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1538 let (lines, words) = count_lines_words(data, utf8);
1539 (lines, words, data.len() as u64)
1540}
1541
1542pub fn count_lwb_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1548 if data.len() < PARALLEL_THRESHOLD {
1549 return count_lwb(data, utf8);
1551 }
1552
1553 let num_threads = rayon::current_num_threads().max(1);
1554
1555 let (lines, words) = if !utf8 {
1556 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1558
1559 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1560 let results: Vec<(u64, u64, bool, bool)> = chunks
1561 .par_iter()
1562 .map(|chunk| count_lw_c_chunk_fast(chunk))
1563 .collect();
1564
1565 let mut line_total = 0u64;
1566 let mut word_total = 0u64;
1567 for i in 0..results.len() {
1568 line_total += results[i].0;
1569 word_total += results[i].1;
1570 if i > 0 && results[i - 1].3 && results[i].2 {
1571 word_total -= 1;
1572 }
1573 }
1574
1575 (line_total, word_total)
1576 } else {
1577 let is_ascii = check_ascii_sample(data);
1579 if is_ascii {
1580 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1582 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1583 let results: Vec<(u64, u64, bool, bool)> = chunks
1584 .par_iter()
1585 .map(|chunk| count_lw_c_chunk_fast(chunk))
1586 .collect();
1587
1588 let mut line_total = 0u64;
1589 let mut word_total = 0u64;
1590 for i in 0..results.len() {
1591 line_total += results[i].0;
1592 word_total += results[i].1;
1593 if i > 0 && results[i - 1].3 && results[i].2 {
1594 word_total -= 1;
1595 }
1596 }
1597 (line_total, word_total)
1598 } else {
1599 let chunks = split_at_newlines(data, num_threads);
1602 let results: Vec<(u64, u64)> = chunks
1603 .par_iter()
1604 .map(|chunk| count_lines_words_utf8_fused(chunk))
1605 .collect();
1606 let mut line_total = 0u64;
1607 let mut word_total = 0u64;
1608 for (l, w) in results {
1609 line_total += l;
1610 word_total += w;
1611 }
1612 (line_total, word_total)
1613 }
1614 };
1615
1616 (lines, words, data.len() as u64)
1617}
1618
1619pub fn count_lwc_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1623 if data.len() < PARALLEL_THRESHOLD {
1624 let lines = count_lines(data);
1625 let words = count_words_locale(data, utf8);
1626 let chars = count_chars(data, utf8);
1627 return (lines, words, chars);
1628 }
1629
1630 let num_threads = rayon::current_num_threads().max(1);
1631
1632 if utf8 {
1633 let chunks = split_at_newlines(data, num_threads);
1635 let results: Vec<(u64, u64, u64)> = chunks
1636 .par_iter()
1637 .map(|chunk| {
1638 let (lines, words) = count_lines_words_utf8_fused(chunk);
1639 let chars = count_chars_utf8(chunk);
1640 (lines, words, chars)
1641 })
1642 .collect();
1643 let mut lines = 0u64;
1644 let mut words = 0u64;
1645 let mut chars = 0u64;
1646 for (l, w, c) in results {
1647 lines += l;
1648 words += w;
1649 chars += c;
1650 }
1651 (lines, words, chars)
1652 } else {
1653 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1655 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1656 let results: Vec<(u64, u64, bool, bool)> = chunks
1657 .par_iter()
1658 .map(|chunk| count_lw_c_chunk_fast(chunk))
1659 .collect();
1660 let mut lines = 0u64;
1661 let mut words = 0u64;
1662 for i in 0..results.len() {
1663 lines += results[i].0;
1664 words += results[i].1;
1665 if i > 0 && results[i - 1].3 && results[i].2 {
1666 words -= 1;
1667 }
1668 }
1669 (lines, words, data.len() as u64)
1670 }
1671}
1672
1673pub fn max_line_length_parallel(data: &[u8], utf8: bool) -> u64 {
1677 if data.len() < PARALLEL_THRESHOLD {
1678 return max_line_length(data, utf8);
1679 }
1680 let num_threads = rayon::current_num_threads().max(1);
1681 let chunks = split_at_newlines(data, num_threads);
1682 chunks
1683 .par_iter()
1684 .map(|chunk| {
1685 if utf8 {
1686 max_line_length_utf8(chunk)
1687 } else {
1688 max_line_length_c(chunk)
1689 }
1690 })
1691 .max()
1692 .unwrap_or(0)
1693}
1694
1695pub fn count_all_parallel(data: &[u8], utf8: bool) -> WcCounts {
1699 if data.len() < PARALLEL_THRESHOLD {
1700 return count_all(data, utf8);
1701 }
1702
1703 let num_threads = rayon::current_num_threads().max(1);
1704 let chunks = split_at_newlines(data, num_threads);
1705
1706 if utf8 {
1707 let results: Vec<(u64, u64, u64, u64)> = chunks
1708 .par_iter()
1709 .map(|chunk| {
1710 let (lines, words) = count_lines_words_utf8_fused(chunk);
1711 let chars = count_chars_utf8(chunk);
1712 let max_ll = max_line_length_utf8(chunk);
1713 (lines, words, chars, max_ll)
1714 })
1715 .collect();
1716
1717 let mut counts = WcCounts {
1718 bytes: data.len() as u64,
1719 ..Default::default()
1720 };
1721 for (l, w, c, m) in results {
1722 counts.lines += l;
1723 counts.words += w;
1724 counts.chars += c;
1725 if m > counts.max_line_length {
1726 counts.max_line_length = m;
1727 }
1728 }
1729 counts
1730 } else {
1731 let results: Vec<(u64, u64, u64)> = chunks
1733 .par_iter()
1734 .map(|chunk| {
1735 let (lines, words) = count_lines_words(chunk, false);
1736 let max_ll = max_line_length_c(chunk);
1737 (lines, words, max_ll)
1738 })
1739 .collect();
1740
1741 let mut counts = WcCounts {
1742 bytes: data.len() as u64,
1743 chars: data.len() as u64,
1744 ..Default::default()
1745 };
1746 for (l, w, m) in &results {
1747 counts.lines += l;
1748 counts.words += w;
1749 if *m > counts.max_line_length {
1750 counts.max_line_length = *m;
1751 }
1752 }
1753 counts
1754 }
1755}