1use memchr::memchr_iter;
2use rayon::prelude::*;
3
4const PARALLEL_THRESHOLD: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Default, PartialEq, Eq)]
11pub struct WcCounts {
12 pub lines: u64,
13 pub words: u64,
14 pub bytes: u64,
15 pub chars: u64,
16 pub max_line_length: u64,
17}
18
19const fn make_is_space() -> [bool; 256] {
49 let mut t = [false; 256];
50 t[0x09] = true; t[0x0A] = true; t[0x0B] = true; t[0x0C] = true; t[0x0D] = true; t[0x20] = true; t
57}
58const IS_SPACE: [bool; 256] = make_is_space();
59
60#[inline]
63pub(crate) fn first_is_word(data: &[u8]) -> bool {
64 !data.is_empty() && !IS_SPACE[data[0] as usize]
65}
66
67#[inline]
74fn is_unicode_space(cp: u32) -> bool {
75 matches!(
76 cp,
77 0x1680 | 0x2000
79 ..=0x200A | 0x2028 | 0x2029 | 0x205F | 0x3000 )
85}
86
87#[inline]
91fn is_wnbspace(cp: u32) -> bool {
92 matches!(cp, 0x00A0 | 0x2007 | 0x202F | 0x2060)
93}
94
95#[inline]
97fn is_unicode_word_break(cp: u32) -> bool {
98 is_unicode_space(cp) || is_wnbspace(cp)
99}
100
101#[inline]
106fn is_printable_unicode(cp: u32) -> bool {
107 if cp < 0xA0 {
111 return false;
116 }
117 if (0xD800..=0xDFFF).contains(&cp) || cp > 0x10FFFF {
120 return false;
121 }
122 if (0xFDD0..=0xFDEF).contains(&cp) || (cp & 0xFFFE) == 0xFFFE {
124 return false;
125 }
126 true
127}
128
129#[inline]
136pub fn count_lines(data: &[u8]) -> u64 {
137 memchr_iter(b'\n', data).count() as u64
138}
139
140#[inline]
142pub fn count_bytes(data: &[u8]) -> u64 {
143 data.len() as u64
144}
145
146pub fn count_words(data: &[u8]) -> u64 {
148 count_words_locale(data, true)
149}
150
151pub fn count_words_locale(data: &[u8], utf8: bool) -> u64 {
157 if utf8 {
158 count_words_utf8(data)
159 } else {
160 count_words_c(data)
161 }
162}
163
164fn count_words_c(data: &[u8]) -> u64 {
168 let mut words = 0u64;
169 let mut in_word = false;
170 let mut i = 0;
171 let len = data.len();
172
173 while i < len {
174 let b = unsafe { *data.get_unchecked(i) };
175 if IS_SPACE[b as usize] {
176 in_word = false;
177 } else if !in_word {
178 in_word = true;
179 words += 1;
180 }
181 i += 1;
182 }
183 words
184}
185
186#[cfg(target_arch = "x86_64")]
190#[inline(always)]
191fn count_lw_c_scalar_tail(
192 ptr: *const u8,
193 mut i: usize,
194 len: usize,
195 mut total_lines: u64,
196 mut total_words: u64,
197 mut prev_in_word: bool,
198 data: &[u8],
199) -> (u64, u64, bool, bool) {
200 while i < len {
201 let b = unsafe { *ptr.add(i) };
202 if IS_SPACE[b as usize] {
203 if b == b'\n' {
204 total_lines += 1;
205 }
206 prev_in_word = false;
207 } else if !prev_in_word {
208 total_words += 1;
209 prev_in_word = true;
210 }
211 i += 1;
212 }
213 let first_word = first_is_word(data);
214 (total_lines, total_words, first_word, prev_in_word)
215}
216
217#[cfg(target_arch = "x86_64")]
223#[target_feature(enable = "avx2")]
224unsafe fn count_lw_c_chunk_avx2(data: &[u8]) -> (u64, u64, bool, bool) {
225 use std::arch::x86_64::*;
226
227 let len = data.len();
228 let ptr = data.as_ptr();
229 let mut i = 0usize;
230 let mut total_lines = 0u64;
231 let mut total_words = 0u64;
232 let mut prev_in_word = false;
233
234 unsafe {
235 let nl_byte = _mm256_set1_epi8(b'\n' as i8);
236 let zero = _mm256_setzero_si256();
237 let ones = _mm256_set1_epi8(1);
238 let const_0x09 = _mm256_set1_epi8(0x09u8 as i8);
240 let const_0x0d = _mm256_set1_epi8(0x0Du8 as i8);
241 let const_0x20 = _mm256_set1_epi8(0x20u8 as i8);
242
243 let mut line_acc = _mm256_setzero_si256();
244 let mut batch = 0u32;
245
246 while i + 32 <= len {
247 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
248 let is_nl = _mm256_cmpeq_epi8(v, nl_byte);
249 line_acc = _mm256_add_epi8(line_acc, _mm256_and_si256(is_nl, ones));
250
251 let ge_09 = _mm256_cmpeq_epi8(_mm256_max_epu8(v, const_0x09), v);
253 let le_0d = _mm256_cmpeq_epi8(_mm256_min_epu8(v, const_0x0d), v);
254 let in_tab_range = _mm256_and_si256(ge_09, le_0d);
255 let is_sp = _mm256_cmpeq_epi8(v, const_0x20);
256 let is_space = _mm256_or_si256(in_tab_range, is_sp);
257 let space_mask = _mm256_movemask_epi8(is_space) as u32;
258
259 let nonspace_mask = !space_mask;
262 let prev_space = (space_mask << 1) | if prev_in_word { 0u32 } else { 1u32 };
264 let starts = nonspace_mask & prev_space;
265 total_words += starts.count_ones() as u64;
266
267 prev_in_word = (nonspace_mask >> 31) & 1 == 1;
269
270 batch += 1;
271 if batch >= 255 {
272 let sad = _mm256_sad_epu8(line_acc, zero);
273 let hi = _mm256_extracti128_si256(sad, 1);
274 let lo = _mm256_castsi256_si128(sad);
275 let s = _mm_add_epi64(lo, hi);
276 let h64 = _mm_unpackhi_epi64(s, s);
277 let t = _mm_add_epi64(s, h64);
278 total_lines += _mm_cvtsi128_si64(t) as u64;
279 line_acc = _mm256_setzero_si256();
280 batch = 0;
281 }
282 i += 32;
283 }
284
285 if batch > 0 {
286 let sad = _mm256_sad_epu8(line_acc, zero);
287 let hi = _mm256_extracti128_si256(sad, 1);
288 let lo = _mm256_castsi256_si128(sad);
289 let s = _mm_add_epi64(lo, hi);
290 let h64 = _mm_unpackhi_epi64(s, s);
291 let t = _mm_add_epi64(s, h64);
292 total_lines += _mm_cvtsi128_si64(t) as u64;
293 }
294 }
295
296 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
297}
298
299#[cfg(target_arch = "x86_64")]
302#[target_feature(enable = "sse2")]
303unsafe fn count_lw_c_chunk_sse2(data: &[u8]) -> (u64, u64, bool, bool) {
304 use std::arch::x86_64::*;
305
306 let len = data.len();
307 let ptr = data.as_ptr();
308 let mut i = 0usize;
309 let mut total_lines = 0u64;
310 let mut total_words = 0u64;
311 let mut prev_in_word = false;
312
313 unsafe {
314 let nl_byte = _mm_set1_epi8(b'\n' as i8);
315 let zero = _mm_setzero_si128();
316 let ones = _mm_set1_epi8(1);
317 let const_0x09 = _mm_set1_epi8(0x09u8 as i8);
319 let const_0x0d = _mm_set1_epi8(0x0Du8 as i8);
320 let const_0x20 = _mm_set1_epi8(0x20u8 as i8);
321
322 let mut line_acc = _mm_setzero_si128();
323 let mut batch = 0u32;
324
325 while i + 16 <= len {
326 let v = _mm_loadu_si128(ptr.add(i) as *const __m128i);
327 let is_nl = _mm_cmpeq_epi8(v, nl_byte);
328 line_acc = _mm_add_epi8(line_acc, _mm_and_si128(is_nl, ones));
329
330 let ge_09 = _mm_cmpeq_epi8(_mm_max_epu8(v, const_0x09), v);
332 let le_0d = _mm_cmpeq_epi8(_mm_min_epu8(v, const_0x0d), v);
333 let in_tab_range = _mm_and_si128(ge_09, le_0d);
334 let is_sp = _mm_cmpeq_epi8(v, const_0x20);
335 let is_space = _mm_or_si128(in_tab_range, is_sp);
336 let space_mask = (_mm_movemask_epi8(is_space) as u32) & 0xFFFF;
337
338 let nonspace_mask = !space_mask & 0xFFFF;
340 let prev_space = ((space_mask << 1) | if prev_in_word { 0u32 } else { 1u32 }) & 0xFFFF;
341 let starts = nonspace_mask & prev_space;
342 total_words += starts.count_ones() as u64;
343
344 prev_in_word = (nonspace_mask >> 15) & 1 == 1;
345
346 batch += 1;
347 if batch >= 255 {
348 let sad = _mm_sad_epu8(line_acc, zero);
349 let hi = _mm_unpackhi_epi64(sad, sad);
350 let t = _mm_add_epi64(sad, hi);
351 total_lines += _mm_cvtsi128_si64(t) as u64;
352 line_acc = _mm_setzero_si128();
353 batch = 0;
354 }
355 i += 16;
356 }
357
358 if batch > 0 {
359 let sad = _mm_sad_epu8(line_acc, zero);
360 let hi = _mm_unpackhi_epi64(sad, sad);
361 let t = _mm_add_epi64(sad, hi);
362 total_lines += _mm_cvtsi128_si64(t) as u64;
363 }
364 }
365
366 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
367}
368
369#[inline]
371fn count_lw_c_chunk_fast(data: &[u8]) -> (u64, u64, bool, bool) {
372 #[cfg(target_arch = "x86_64")]
373 {
374 if is_x86_feature_detected!("avx2") && data.len() >= 64 {
375 return unsafe { count_lw_c_chunk_avx2(data) };
376 }
377 if data.len() >= 32 {
378 return unsafe { count_lw_c_chunk_sse2(data) };
379 }
380 }
381 count_lw_c_chunk(data)
382}
383
384fn count_lw_c_chunk(data: &[u8]) -> (u64, u64, bool, bool) {
388 let mut lines = 0u64;
389 let mut words = 0u64;
390 let mut in_word = false;
391 let mut i = 0;
392 let len = data.len();
393
394 let first_word = first_is_word(data);
395
396 while i < len {
397 let b = unsafe { *data.get_unchecked(i) };
398 if IS_SPACE[b as usize] {
399 if b == b'\n' {
400 lines += 1;
401 }
402 in_word = false;
403 } else if !in_word {
404 in_word = true;
405 words += 1;
406 }
407 i += 1;
408 }
409 (lines, words, first_word, in_word)
410}
411
412fn count_words_utf8(data: &[u8]) -> u64 {
424 let mut words = 0u64;
425 let mut in_word = false;
426 let mut i = 0;
427 let len = data.len();
428
429 while i < len {
430 let b = unsafe { *data.get_unchecked(i) };
431
432 if b < 0x80 {
433 if IS_SPACE[b as usize] {
438 in_word = false;
439 } else if b >= 0x21 && b <= 0x7E {
440 if !in_word {
442 in_word = true;
443 words += 1;
444 }
445 }
446 i += 1;
448 } else if b < 0xC2 {
449 i += 1;
452 } else if b < 0xE0 {
453 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
454 let cp = ((b as u32 & 0x1F) << 6)
455 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
456 if is_unicode_word_break(cp) {
457 in_word = false;
458 } else if is_printable_unicode(cp) {
459 if !in_word {
460 in_word = true;
461 words += 1;
462 }
463 }
464 i += 2;
466 } else {
467 i += 1;
469 }
470 } else if b < 0xF0 {
471 if i + 2 < len
472 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
473 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
474 {
475 let cp = ((b as u32 & 0x0F) << 12)
476 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
477 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
478 if is_unicode_word_break(cp) {
479 in_word = false;
480 } else if is_printable_unicode(cp) {
481 if !in_word {
482 in_word = true;
483 words += 1;
484 }
485 }
486 i += 3;
488 } else {
489 i += 1;
491 }
492 } else if b < 0xF5 {
493 if i + 3 < len
494 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
495 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
496 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
497 {
498 let cp = ((b as u32 & 0x07) << 18)
499 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
500 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
501 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
502 if is_unicode_word_break(cp) {
503 in_word = false;
504 } else if is_printable_unicode(cp) {
505 if !in_word {
506 in_word = true;
507 words += 1;
508 }
509 }
510 i += 4;
512 } else {
513 i += 1;
515 }
516 } else {
517 i += 1;
519 }
520 }
521
522 words
523}
524
525pub fn count_lines_words(data: &[u8], utf8: bool) -> (u64, u64) {
529 if utf8 {
530 count_lines_words_utf8_fused(data)
531 } else {
532 let (lines, words, _, _) = count_lw_c_chunk_fast(data);
533 (lines, words)
534 }
535}
536
537fn count_lines_words_utf8_fused(data: &[u8]) -> (u64, u64) {
545 let mut lines = 0u64;
546 let mut words = 0u64;
547 let mut in_word = false;
548 let mut i = 0;
549 let len = data.len();
550
551 while i < len {
552 let b = unsafe { *data.get_unchecked(i) };
553
554 if b == b'\n' {
555 lines += 1;
556 in_word = false;
557 i += 1;
558 } else if b < 0x80 {
559 if IS_SPACE[b as usize] {
563 in_word = false;
564 } else if b >= 0x21 && b <= 0x7E {
565 if !in_word {
566 in_word = true;
567 words += 1;
568 }
569 }
570 i += 1;
572 } else if b < 0xC2 {
573 i += 1;
575 } else if b < 0xE0 {
576 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
577 let cp = ((b as u32 & 0x1F) << 6)
578 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
579 if is_unicode_word_break(cp) {
580 in_word = false;
581 } else if is_printable_unicode(cp) {
582 if !in_word {
583 in_word = true;
584 words += 1;
585 }
586 }
587 i += 2;
588 } else {
589 i += 1;
591 }
592 } else if b < 0xF0 {
593 if i + 2 < len
594 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
595 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
596 {
597 let cp = ((b as u32 & 0x0F) << 12)
598 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
599 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
600 if is_unicode_word_break(cp) {
601 in_word = false;
602 } else if is_printable_unicode(cp) {
603 if !in_word {
604 in_word = true;
605 words += 1;
606 }
607 }
608 i += 3;
609 } else {
610 i += 1;
612 }
613 } else if b < 0xF5 {
614 if i + 3 < len
615 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
616 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
617 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
618 {
619 let cp = ((b as u32 & 0x07) << 18)
620 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
621 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
622 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
623 if is_unicode_word_break(cp) {
624 in_word = false;
625 } else if is_printable_unicode(cp) {
626 if !in_word {
627 in_word = true;
628 words += 1;
629 }
630 }
631 i += 4;
632 } else {
633 i += 1;
635 }
636 } else {
637 i += 1;
639 }
640 }
641
642 (lines, words)
643}
644
645pub fn count_lines_words_chars(data: &[u8], utf8: bool) -> (u64, u64, u64) {
647 if utf8 {
648 let (lines, words) = count_lines_words_utf8_fused(data);
650 let chars = count_chars_utf8(data);
651 (lines, words, chars)
652 } else {
653 let (lines, words) = count_lines_words(data, false);
655 (lines, words, data.len() as u64)
656 }
657}
658
659pub fn count_chars_utf8(data: &[u8]) -> u64 {
666 #[cfg(target_arch = "x86_64")]
667 {
668 if is_x86_feature_detected!("avx2") {
669 return unsafe { count_chars_utf8_avx2(data) };
670 }
671 }
672 count_chars_utf8_scalar(data)
673}
674
675#[cfg(target_arch = "x86_64")]
679#[target_feature(enable = "avx2")]
680unsafe fn count_chars_utf8_avx2(data: &[u8]) -> u64 {
681 unsafe {
682 use std::arch::x86_64::*;
683
684 let mask_c0 = _mm256_set1_epi8(0xC0u8 as i8);
685 let val_80 = _mm256_set1_epi8(0x80u8 as i8);
686 let ones = _mm256_set1_epi8(1);
687 let zero = _mm256_setzero_si256();
688
689 let mut total = 0u64;
690 let len = data.len();
691 let ptr = data.as_ptr();
692 let mut i = 0;
693 let mut acc = _mm256_setzero_si256();
694 let mut batch = 0u32;
695
696 while i + 32 <= len {
697 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
698 let masked = _mm256_and_si256(v, mask_c0);
699 let is_cont = _mm256_cmpeq_epi8(masked, val_80);
700 let non_cont = _mm256_andnot_si256(is_cont, ones);
701 acc = _mm256_add_epi8(acc, non_cont);
702
703 batch += 1;
704 if batch >= 255 {
705 let sad = _mm256_sad_epu8(acc, zero);
707 let hi = _mm256_extracti128_si256(sad, 1);
708 let lo = _mm256_castsi256_si128(sad);
709 let sum = _mm_add_epi64(lo, hi);
710 let hi64 = _mm_unpackhi_epi64(sum, sum);
711 let t = _mm_add_epi64(sum, hi64);
712 total += _mm_cvtsi128_si64(t) as u64;
713 acc = _mm256_setzero_si256();
714 batch = 0;
715 }
716 i += 32;
717 }
718
719 if batch > 0 {
721 let sad = _mm256_sad_epu8(acc, zero);
722 let hi = _mm256_extracti128_si256(sad, 1);
723 let lo = _mm256_castsi256_si128(sad);
724 let sum = _mm_add_epi64(lo, hi);
725 let hi64 = _mm_unpackhi_epi64(sum, sum);
726 let t = _mm_add_epi64(sum, hi64);
727 total += _mm_cvtsi128_si64(t) as u64;
728 }
729
730 while i < len {
731 total += ((*ptr.add(i) & 0xC0) != 0x80) as u64;
732 i += 1;
733 }
734
735 total
736 }
737}
738
739fn count_chars_utf8_scalar(data: &[u8]) -> u64 {
741 let mut count = 0u64;
742 let chunks = data.chunks_exact(64);
743 let remainder = chunks.remainder();
744
745 for chunk in chunks {
746 let mut any_high = 0u8;
748 let mut i = 0;
749 while i + 8 <= 64 {
750 unsafe {
751 any_high |= *chunk.get_unchecked(i);
752 any_high |= *chunk.get_unchecked(i + 1);
753 any_high |= *chunk.get_unchecked(i + 2);
754 any_high |= *chunk.get_unchecked(i + 3);
755 any_high |= *chunk.get_unchecked(i + 4);
756 any_high |= *chunk.get_unchecked(i + 5);
757 any_high |= *chunk.get_unchecked(i + 6);
758 any_high |= *chunk.get_unchecked(i + 7);
759 }
760 i += 8;
761 }
762 if any_high < 0x80 {
763 count += 64;
764 continue;
765 }
766
767 let mut char_mask = 0u64;
768 i = 0;
769 while i + 7 < 64 {
770 unsafe {
771 char_mask |= (((*chunk.get_unchecked(i) & 0xC0) != 0x80) as u64) << i;
772 char_mask |= (((*chunk.get_unchecked(i + 1) & 0xC0) != 0x80) as u64) << (i + 1);
773 char_mask |= (((*chunk.get_unchecked(i + 2) & 0xC0) != 0x80) as u64) << (i + 2);
774 char_mask |= (((*chunk.get_unchecked(i + 3) & 0xC0) != 0x80) as u64) << (i + 3);
775 char_mask |= (((*chunk.get_unchecked(i + 4) & 0xC0) != 0x80) as u64) << (i + 4);
776 char_mask |= (((*chunk.get_unchecked(i + 5) & 0xC0) != 0x80) as u64) << (i + 5);
777 char_mask |= (((*chunk.get_unchecked(i + 6) & 0xC0) != 0x80) as u64) << (i + 6);
778 char_mask |= (((*chunk.get_unchecked(i + 7) & 0xC0) != 0x80) as u64) << (i + 7);
779 }
780 i += 8;
781 }
782 count += char_mask.count_ones() as u64;
783 }
784
785 for &b in remainder {
786 count += ((b & 0xC0) != 0x80) as u64;
787 }
788 count
789}
790
791#[inline]
793pub fn count_chars_c(data: &[u8]) -> u64 {
794 data.len() as u64
795}
796
797#[inline]
799pub fn count_chars(data: &[u8], utf8: bool) -> u64 {
800 if utf8 {
801 count_chars_utf8(data)
802 } else {
803 count_chars_c(data)
804 }
805}
806
807pub fn is_utf8_locale() -> bool {
809 for var in &["LC_ALL", "LC_CTYPE", "LANG"] {
810 if let Ok(val) = std::env::var(var) {
811 if !val.is_empty() {
812 let lower = val.to_ascii_lowercase();
813 return lower.contains("utf-8") || lower.contains("utf8");
814 }
815 }
816 }
817 false
818}
819
820#[inline]
823fn decode_utf8(bytes: &[u8]) -> (u32, usize) {
824 let b0 = bytes[0];
825 if b0 < 0x80 {
826 return (b0 as u32, 1);
827 }
828 if b0 < 0xC2 {
829 return (b0 as u32, 1);
831 }
832 if b0 < 0xE0 {
833 if bytes.len() < 2 || bytes[1] & 0xC0 != 0x80 {
834 return (b0 as u32, 1);
835 }
836 let cp = ((b0 as u32 & 0x1F) << 6) | (bytes[1] as u32 & 0x3F);
837 return (cp, 2);
838 }
839 if b0 < 0xF0 {
840 if bytes.len() < 3 || bytes[1] & 0xC0 != 0x80 || bytes[2] & 0xC0 != 0x80 {
841 return (b0 as u32, 1);
842 }
843 let cp =
844 ((b0 as u32 & 0x0F) << 12) | ((bytes[1] as u32 & 0x3F) << 6) | (bytes[2] as u32 & 0x3F);
845 return (cp, 3);
846 }
847 if b0 < 0xF5 {
848 if bytes.len() < 4
849 || bytes[1] & 0xC0 != 0x80
850 || bytes[2] & 0xC0 != 0x80
851 || bytes[3] & 0xC0 != 0x80
852 {
853 return (b0 as u32, 1);
854 }
855 let cp = ((b0 as u32 & 0x07) << 18)
856 | ((bytes[1] as u32 & 0x3F) << 12)
857 | ((bytes[2] as u32 & 0x3F) << 6)
858 | (bytes[3] as u32 & 0x3F);
859 return (cp, 4);
860 }
861 (b0 as u32, 1)
862}
863
864#[inline]
867fn is_zero_width(cp: u32) -> bool {
868 matches!(
869 cp,
870 0x0300..=0x036F | 0x0483..=0x0489 | 0x0591..=0x05BD | 0x05BF
874 | 0x05C1..=0x05C2
875 | 0x05C4..=0x05C5
876 | 0x05C7
877 | 0x0600..=0x0605 | 0x0610..=0x061A | 0x064B..=0x065F | 0x0670
881 | 0x06D6..=0x06DD
882 | 0x06DF..=0x06E4
883 | 0x06E7..=0x06E8
884 | 0x06EA..=0x06ED
885 | 0x070F
886 | 0x0711
887 | 0x0730..=0x074A
888 | 0x07A6..=0x07B0
889 | 0x07EB..=0x07F3
890 | 0x07FD
891 | 0x0816..=0x0819
892 | 0x081B..=0x0823
893 | 0x0825..=0x0827
894 | 0x0829..=0x082D
895 | 0x0859..=0x085B
896 | 0x08D3..=0x08E1
897 | 0x08E3..=0x0902
898 | 0x093A
899 | 0x093C
900 | 0x0941..=0x0948
901 | 0x094D
902 | 0x0951..=0x0957
903 | 0x0962..=0x0963
904 | 0x0981
905 | 0x09BC
906 | 0x09C1..=0x09C4
907 | 0x09CD
908 | 0x09E2..=0x09E3
909 | 0x09FE
910 | 0x0A01..=0x0A02
911 | 0x0A3C
912 | 0x0A41..=0x0A42
913 | 0x0A47..=0x0A48
914 | 0x0A4B..=0x0A4D
915 | 0x0A51
916 | 0x0A70..=0x0A71
917 | 0x0A75
918 | 0x0A81..=0x0A82
919 | 0x0ABC
920 | 0x0AC1..=0x0AC5
921 | 0x0AC7..=0x0AC8
922 | 0x0ACD
923 | 0x0AE2..=0x0AE3
924 | 0x0AFA..=0x0AFF
925 | 0x0B01
926 | 0x0B3C
927 | 0x0B3F
928 | 0x0B41..=0x0B44
929 | 0x0B4D
930 | 0x0B56
931 | 0x0B62..=0x0B63
932 | 0x0B82
933 | 0x0BC0
934 | 0x0BCD
935 | 0x0C00
936 | 0x0C04
937 | 0x0C3E..=0x0C40
938 | 0x0C46..=0x0C48
939 | 0x0C4A..=0x0C4D
940 | 0x0C55..=0x0C56
941 | 0x0C62..=0x0C63
942 | 0x0C81
943 | 0x0CBC
944 | 0x0CBF
945 | 0x0CC6
946 | 0x0CCC..=0x0CCD
947 | 0x0CE2..=0x0CE3
948 | 0x0D00..=0x0D01
949 | 0x0D3B..=0x0D3C
950 | 0x0D41..=0x0D44
951 | 0x0D4D
952 | 0x0D62..=0x0D63
953 | 0x0DCA
954 | 0x0DD2..=0x0DD4
955 | 0x0DD6
956 | 0x0E31
957 | 0x0E34..=0x0E3A
958 | 0x0E47..=0x0E4E
959 | 0x0EB1
960 | 0x0EB4..=0x0EBC
961 | 0x0EC8..=0x0ECD
962 | 0x0F18..=0x0F19
963 | 0x0F35
964 | 0x0F37
965 | 0x0F39
966 | 0x0F71..=0x0F7E
967 | 0x0F80..=0x0F84
968 | 0x0F86..=0x0F87
969 | 0x0F8D..=0x0F97
970 | 0x0F99..=0x0FBC
971 | 0x0FC6
972 | 0x102D..=0x1030
973 | 0x1032..=0x1037
974 | 0x1039..=0x103A
975 | 0x103D..=0x103E
976 | 0x1058..=0x1059
977 | 0x105E..=0x1060
978 | 0x1071..=0x1074
979 | 0x1082
980 | 0x1085..=0x1086
981 | 0x108D
982 | 0x109D
983 | 0x1160..=0x11FF | 0x135D..=0x135F
985 | 0x1712..=0x1714
986 | 0x1732..=0x1734
987 | 0x1752..=0x1753
988 | 0x1772..=0x1773
989 | 0x17B4..=0x17B5
990 | 0x17B7..=0x17BD
991 | 0x17C6
992 | 0x17C9..=0x17D3
993 | 0x17DD
994 | 0x180B..=0x180D
995 | 0x1885..=0x1886
996 | 0x18A9
997 | 0x1920..=0x1922
998 | 0x1927..=0x1928
999 | 0x1932
1000 | 0x1939..=0x193B
1001 | 0x1A17..=0x1A18
1002 | 0x1A1B
1003 | 0x1A56
1004 | 0x1A58..=0x1A5E
1005 | 0x1A60
1006 | 0x1A62
1007 | 0x1A65..=0x1A6C
1008 | 0x1A73..=0x1A7C
1009 | 0x1A7F
1010 | 0x1AB0..=0x1ABE
1011 | 0x1B00..=0x1B03
1012 | 0x1B34
1013 | 0x1B36..=0x1B3A
1014 | 0x1B3C
1015 | 0x1B42
1016 | 0x1B6B..=0x1B73
1017 | 0x1B80..=0x1B81
1018 | 0x1BA2..=0x1BA5
1019 | 0x1BA8..=0x1BA9
1020 | 0x1BAB..=0x1BAD
1021 | 0x1BE6
1022 | 0x1BE8..=0x1BE9
1023 | 0x1BED
1024 | 0x1BEF..=0x1BF1
1025 | 0x1C2C..=0x1C33
1026 | 0x1C36..=0x1C37
1027 | 0x1CD0..=0x1CD2
1028 | 0x1CD4..=0x1CE0
1029 | 0x1CE2..=0x1CE8
1030 | 0x1CED
1031 | 0x1CF4
1032 | 0x1CF8..=0x1CF9
1033 | 0x1DC0..=0x1DF9
1034 | 0x1DFB..=0x1DFF
1035 | 0x200B..=0x200F | 0x202A..=0x202E | 0x2060..=0x2064 | 0x2066..=0x206F | 0x20D0..=0x20F0 | 0xFE00..=0xFE0F | 0xFE20..=0xFE2F | 0xFEFF | 0xFFF9..=0xFFFB | 0x1D167..=0x1D169
1045 | 0x1D173..=0x1D182
1046 | 0x1D185..=0x1D18B
1047 | 0x1D1AA..=0x1D1AD
1048 | 0x1D242..=0x1D244
1049 | 0xE0001
1050 | 0xE0020..=0xE007F
1051 | 0xE0100..=0xE01EF )
1053}
1054
1055#[inline]
1058fn is_wide_char(cp: u32) -> bool {
1059 matches!(
1060 cp,
1061 0x1100..=0x115F | 0x231A..=0x231B | 0x2329..=0x232A | 0x23E9..=0x23F3 | 0x23F8..=0x23FA
1066 | 0x25FD..=0x25FE
1067 | 0x2614..=0x2615
1068 | 0x2648..=0x2653
1069 | 0x267F
1070 | 0x2693
1071 | 0x26A1
1072 | 0x26AA..=0x26AB
1073 | 0x26BD..=0x26BE
1074 | 0x26C4..=0x26C5
1075 | 0x26CE
1076 | 0x26D4
1077 | 0x26EA
1078 | 0x26F2..=0x26F3
1079 | 0x26F5
1080 | 0x26FA
1081 | 0x26FD
1082 | 0x2702
1083 | 0x2705
1084 | 0x2708..=0x270D
1085 | 0x270F
1086 | 0x2712
1087 | 0x2714
1088 | 0x2716
1089 | 0x271D
1090 | 0x2721
1091 | 0x2728
1092 | 0x2733..=0x2734
1093 | 0x2744
1094 | 0x2747
1095 | 0x274C
1096 | 0x274E
1097 | 0x2753..=0x2755
1098 | 0x2757
1099 | 0x2763..=0x2764
1100 | 0x2795..=0x2797
1101 | 0x27A1
1102 | 0x27B0
1103 | 0x27BF
1104 | 0x2934..=0x2935
1105 | 0x2B05..=0x2B07
1106 | 0x2B1B..=0x2B1C
1107 | 0x2B50
1108 | 0x2B55
1109 | 0x2E80..=0x303E | 0x3040..=0x33BF | 0x3400..=0x4DBF | 0x4E00..=0xA4CF | 0xA960..=0xA97C | 0xAC00..=0xD7A3 | 0xF900..=0xFAFF | 0xFE10..=0xFE19 | 0xFE30..=0xFE6F | 0xFF01..=0xFF60 | 0xFFE0..=0xFFE6 | 0x1F004
1121 | 0x1F0CF
1122 | 0x1F170..=0x1F171
1123 | 0x1F17E..=0x1F17F
1124 | 0x1F18E
1125 | 0x1F191..=0x1F19A
1126 | 0x1F1E0..=0x1F1FF | 0x1F200..=0x1F202
1128 | 0x1F210..=0x1F23B
1129 | 0x1F240..=0x1F248
1130 | 0x1F250..=0x1F251
1131 | 0x1F260..=0x1F265
1132 | 0x1F300..=0x1F64F | 0x1F680..=0x1F6FF | 0x1F900..=0x1F9FF | 0x1FA00..=0x1FA6F
1136 | 0x1FA70..=0x1FAFF
1137 | 0x20000..=0x2FFFD | 0x30000..=0x3FFFD )
1140}
1141
1142pub fn max_line_length_c(data: &[u8]) -> u64 {
1155 let mut max_len: u64 = 0;
1156 let mut line_len: u64 = 0;
1157 let mut linepos: u64 = 0;
1158 let mut i = 0;
1159 let len = data.len();
1160
1161 while i < len {
1162 let b = unsafe { *data.get_unchecked(i) };
1163 if b >= 0x21 && b <= 0x7E {
1164 i += 1;
1166 let mut run = 1u64;
1167 while i < len {
1168 let b = unsafe { *data.get_unchecked(i) };
1169 if b >= 0x21 && b <= 0x7E {
1170 run += 1;
1171 i += 1;
1172 } else {
1173 break;
1174 }
1175 }
1176 linepos += run;
1177 if linepos > line_len {
1178 line_len = linepos;
1179 }
1180 } else {
1181 match b {
1182 b' ' => {
1183 linepos += 1;
1184 if linepos > line_len {
1185 line_len = linepos;
1186 }
1187 }
1188 b'\n' => {
1189 if line_len > max_len {
1190 max_len = line_len;
1191 }
1192 linepos = 0;
1193 line_len = 0;
1194 }
1195 b'\t' => {
1196 linepos = (linepos + 8) & !7;
1197 if linepos > line_len {
1198 line_len = linepos;
1199 }
1200 }
1201 b'\r' => {
1202 linepos = 0;
1203 }
1204 0x0C => {
1205 if line_len > max_len {
1206 max_len = line_len;
1207 }
1208 linepos = 0;
1209 line_len = 0;
1210 }
1211 _ => {} }
1213 i += 1;
1214 }
1215 }
1216
1217 if line_len > max_len {
1218 max_len = line_len;
1219 }
1220
1221 max_len
1222}
1223
1224pub fn max_line_length_utf8(data: &[u8]) -> u64 {
1231 let mut max_len: u64 = 0;
1232 let mut line_len: u64 = 0;
1233 let mut linepos: u64 = 0;
1234 let mut i = 0;
1235 let len = data.len();
1236
1237 while i < len {
1238 let b = unsafe { *data.get_unchecked(i) };
1239
1240 if b >= 0x21 && b <= 0x7E {
1241 i += 1;
1243 let mut run = 1u64;
1244 while i < len {
1245 let b = unsafe { *data.get_unchecked(i) };
1246 if b >= 0x21 && b <= 0x7E {
1247 run += 1;
1248 i += 1;
1249 } else {
1250 break;
1251 }
1252 }
1253 linepos += run;
1254 if linepos > line_len {
1255 line_len = linepos;
1256 }
1257 } else if b < 0x80 {
1258 match b {
1260 b' ' => {
1261 linepos += 1;
1262 if linepos > line_len {
1263 line_len = linepos;
1264 }
1265 }
1266 b'\n' => {
1267 if line_len > max_len {
1268 max_len = line_len;
1269 }
1270 linepos = 0;
1271 line_len = 0;
1272 }
1273 b'\t' => {
1274 linepos = (linepos + 8) & !7;
1275 if linepos > line_len {
1276 line_len = linepos;
1277 }
1278 }
1279 b'\r' => {
1280 linepos = 0;
1281 }
1282 0x0C => {
1283 if line_len > max_len {
1284 max_len = line_len;
1285 }
1286 linepos = 0;
1287 line_len = 0;
1288 }
1289 _ => {} }
1291 i += 1;
1292 } else {
1293 let (cp, blen) = decode_utf8(&data[i..]);
1295
1296 if cp <= 0x9F {
1298 } else if is_zero_width(cp) {
1300 } else if is_wide_char(cp) {
1302 linepos += 2;
1303 if linepos > line_len {
1304 line_len = linepos;
1305 }
1306 } else {
1307 linepos += 1;
1309 if linepos > line_len {
1310 line_len = linepos;
1311 }
1312 }
1313 i += blen;
1314 }
1315 }
1316
1317 if line_len > max_len {
1319 max_len = line_len;
1320 }
1321
1322 max_len
1323}
1324
1325#[inline]
1327pub fn max_line_length(data: &[u8], utf8: bool) -> u64 {
1328 if utf8 {
1329 max_line_length_utf8(data)
1330 } else {
1331 max_line_length_c(data)
1332 }
1333}
1334
1335pub fn count_all(data: &[u8], utf8: bool) -> WcCounts {
1347 if utf8 {
1348 let (lines, words) = count_lines_words_utf8_fused(data);
1349 WcCounts {
1350 lines,
1351 words,
1352 bytes: data.len() as u64,
1353 chars: count_chars_utf8(data),
1354 max_line_length: max_line_length_utf8(data),
1355 }
1356 } else {
1357 WcCounts {
1358 lines: count_lines(data),
1359 words: count_words_locale(data, false),
1360 bytes: data.len() as u64,
1361 chars: data.len() as u64,
1362 max_line_length: max_line_length_c(data),
1363 }
1364 }
1365}
1366
1367#[inline]
1371fn check_ascii_sample(data: &[u8]) -> bool {
1372 let len = data.len();
1373 if len == 0 {
1374 return true;
1375 }
1376
1377 let check_region = |start: usize, end: usize| -> bool {
1379 let mut or_acc = 0u8;
1380 let region = &data[start..end];
1381 let mut i = 0;
1382 while i + 8 <= region.len() {
1383 unsafe {
1384 or_acc |= *region.get_unchecked(i);
1385 or_acc |= *region.get_unchecked(i + 1);
1386 or_acc |= *region.get_unchecked(i + 2);
1387 or_acc |= *region.get_unchecked(i + 3);
1388 or_acc |= *region.get_unchecked(i + 4);
1389 or_acc |= *region.get_unchecked(i + 5);
1390 or_acc |= *region.get_unchecked(i + 6);
1391 or_acc |= *region.get_unchecked(i + 7);
1392 }
1393 i += 8;
1394 }
1395 while i < region.len() {
1396 or_acc |= region[i];
1397 i += 1;
1398 }
1399 or_acc < 0x80
1400 };
1401
1402 let sample = 256.min(len);
1403
1404 if !check_region(0, sample) {
1406 return false;
1407 }
1408 if len > sample * 2 {
1410 let mid = len / 2;
1411 let mid_start = mid.saturating_sub(sample / 2);
1412 if !check_region(mid_start, (mid_start + sample).min(len)) {
1413 return false;
1414 }
1415 }
1416 if len > sample {
1418 if !check_region(len - sample, len) {
1419 return false;
1420 }
1421 }
1422
1423 true
1424}
1425
1426fn split_at_newlines(data: &[u8], num_chunks: usize) -> Vec<&[u8]> {
1435 if data.is_empty() || num_chunks <= 1 {
1436 return vec![data];
1437 }
1438 let chunk_size = data.len() / num_chunks;
1439 let mut chunks = Vec::with_capacity(num_chunks);
1440 let mut pos = 0;
1441
1442 for _ in 0..num_chunks - 1 {
1443 let target = pos + chunk_size;
1444 if target >= data.len() {
1445 break;
1446 }
1447 let boundary = memchr::memchr(b'\n', &data[target..])
1448 .map(|p| target + p + 1)
1449 .unwrap_or(data.len());
1450 if boundary > pos {
1451 chunks.push(&data[pos..boundary]);
1452 }
1453 pos = boundary;
1454 }
1455 if pos < data.len() {
1456 chunks.push(&data[pos..]);
1457 }
1458 chunks
1459}
1460
1461pub fn count_lines_parallel(data: &[u8]) -> u64 {
1464 if data.len() < PARALLEL_THRESHOLD {
1465 return count_lines(data);
1466 }
1467
1468 let num_threads = rayon::current_num_threads().max(1);
1469 let chunk_size = (data.len() / num_threads).max(2 * 1024 * 1024);
1471
1472 data.par_chunks(chunk_size)
1473 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
1474 .sum()
1475}
1476
1477pub fn count_words_parallel(data: &[u8], utf8: bool) -> u64 {
1479 if data.len() < PARALLEL_THRESHOLD {
1480 return count_words_locale(data, utf8);
1481 }
1482
1483 let num_threads = rayon::current_num_threads().max(1);
1484
1485 if utf8 {
1486 let chunks = split_at_newlines(data, num_threads);
1489 chunks.par_iter().map(|chunk| count_words_utf8(chunk)).sum()
1490 } else {
1491 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1493
1494 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1495
1496 let results: Vec<(u64, u64, bool, bool)> = chunks
1498 .par_iter()
1499 .map(|chunk| count_lw_c_chunk(chunk))
1500 .collect();
1501
1502 let mut total = 0u64;
1503 for i in 0..results.len() {
1504 total += results[i].1;
1505 if i > 0 && results[i - 1].3 && results[i].2 {
1509 total -= 1;
1510 }
1511 }
1512 total
1513 }
1514}
1515
1516pub fn count_chars_parallel(data: &[u8], utf8: bool) -> u64 {
1518 if !utf8 {
1519 return data.len() as u64;
1520 }
1521 if data.len() < PARALLEL_THRESHOLD {
1522 return count_chars_utf8(data);
1523 }
1524
1525 let num_threads = rayon::current_num_threads().max(1);
1526 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1527
1528 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
1529}
1530
1531pub fn count_lwb(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1534 let (lines, words) = count_lines_words(data, utf8);
1535 (lines, words, data.len() as u64)
1536}
1537
1538pub fn count_lwb_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1544 if data.len() < PARALLEL_THRESHOLD {
1545 return count_lwb(data, utf8);
1547 }
1548
1549 let num_threads = rayon::current_num_threads().max(1);
1550
1551 let (lines, words) = if !utf8 {
1552 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1554
1555 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1556 let results: Vec<(u64, u64, bool, bool)> = chunks
1557 .par_iter()
1558 .map(|chunk| count_lw_c_chunk_fast(chunk))
1559 .collect();
1560
1561 let mut line_total = 0u64;
1562 let mut word_total = 0u64;
1563 for i in 0..results.len() {
1564 line_total += results[i].0;
1565 word_total += results[i].1;
1566 if i > 0 && results[i - 1].3 && results[i].2 {
1567 word_total -= 1;
1568 }
1569 }
1570
1571 (line_total, word_total)
1572 } else {
1573 let is_ascii = check_ascii_sample(data);
1575 if is_ascii {
1576 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1578 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1579 let results: Vec<(u64, u64, bool, bool)> = chunks
1580 .par_iter()
1581 .map(|chunk| count_lw_c_chunk_fast(chunk))
1582 .collect();
1583
1584 let mut line_total = 0u64;
1585 let mut word_total = 0u64;
1586 for i in 0..results.len() {
1587 line_total += results[i].0;
1588 word_total += results[i].1;
1589 if i > 0 && results[i - 1].3 && results[i].2 {
1590 word_total -= 1;
1591 }
1592 }
1593 (line_total, word_total)
1594 } else {
1595 let chunks = split_at_newlines(data, num_threads);
1598 let results: Vec<(u64, u64)> = chunks
1599 .par_iter()
1600 .map(|chunk| count_lines_words_utf8_fused(chunk))
1601 .collect();
1602 let mut line_total = 0u64;
1603 let mut word_total = 0u64;
1604 for (l, w) in results {
1605 line_total += l;
1606 word_total += w;
1607 }
1608 (line_total, word_total)
1609 }
1610 };
1611
1612 (lines, words, data.len() as u64)
1613}
1614
1615pub fn count_lwc_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1619 if data.len() < PARALLEL_THRESHOLD {
1620 let lines = count_lines(data);
1621 let words = count_words_locale(data, utf8);
1622 let chars = count_chars(data, utf8);
1623 return (lines, words, chars);
1624 }
1625
1626 let num_threads = rayon::current_num_threads().max(1);
1627
1628 if utf8 {
1629 let chunks = split_at_newlines(data, num_threads);
1631 let results: Vec<(u64, u64, u64)> = chunks
1632 .par_iter()
1633 .map(|chunk| {
1634 let (lines, words) = count_lines_words_utf8_fused(chunk);
1635 let chars = count_chars_utf8(chunk);
1636 (lines, words, chars)
1637 })
1638 .collect();
1639 let mut lines = 0u64;
1640 let mut words = 0u64;
1641 let mut chars = 0u64;
1642 for (l, w, c) in results {
1643 lines += l;
1644 words += w;
1645 chars += c;
1646 }
1647 (lines, words, chars)
1648 } else {
1649 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1651 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1652 let results: Vec<(u64, u64, bool, bool)> = chunks
1653 .par_iter()
1654 .map(|chunk| count_lw_c_chunk_fast(chunk))
1655 .collect();
1656 let mut lines = 0u64;
1657 let mut words = 0u64;
1658 for i in 0..results.len() {
1659 lines += results[i].0;
1660 words += results[i].1;
1661 if i > 0 && results[i - 1].3 && results[i].2 {
1662 words -= 1;
1663 }
1664 }
1665 (lines, words, data.len() as u64)
1666 }
1667}
1668
1669pub fn max_line_length_parallel(data: &[u8], utf8: bool) -> u64 {
1673 if data.len() < PARALLEL_THRESHOLD {
1674 return max_line_length(data, utf8);
1675 }
1676 let num_threads = rayon::current_num_threads().max(1);
1677 let chunks = split_at_newlines(data, num_threads);
1678 chunks
1679 .par_iter()
1680 .map(|chunk| {
1681 if utf8 {
1682 max_line_length_utf8(chunk)
1683 } else {
1684 max_line_length_c(chunk)
1685 }
1686 })
1687 .max()
1688 .unwrap_or(0)
1689}
1690
1691pub fn count_all_parallel(data: &[u8], utf8: bool) -> WcCounts {
1695 if data.len() < PARALLEL_THRESHOLD {
1696 return count_all(data, utf8);
1697 }
1698
1699 let num_threads = rayon::current_num_threads().max(1);
1700 let chunks = split_at_newlines(data, num_threads);
1701
1702 if utf8 {
1703 let results: Vec<(u64, u64, u64, u64)> = chunks
1704 .par_iter()
1705 .map(|chunk| {
1706 let (lines, words) = count_lines_words_utf8_fused(chunk);
1707 let chars = count_chars_utf8(chunk);
1708 let max_ll = max_line_length_utf8(chunk);
1709 (lines, words, chars, max_ll)
1710 })
1711 .collect();
1712
1713 let mut counts = WcCounts {
1714 bytes: data.len() as u64,
1715 ..Default::default()
1716 };
1717 for (l, w, c, m) in results {
1718 counts.lines += l;
1719 counts.words += w;
1720 counts.chars += c;
1721 if m > counts.max_line_length {
1722 counts.max_line_length = m;
1723 }
1724 }
1725 counts
1726 } else {
1727 let results: Vec<(u64, u64, u64)> = chunks
1729 .par_iter()
1730 .map(|chunk| {
1731 let (lines, words) = count_lines_words(chunk, false);
1732 let max_ll = max_line_length_c(chunk);
1733 (lines, words, max_ll)
1734 })
1735 .collect();
1736
1737 let mut counts = WcCounts {
1738 bytes: data.len() as u64,
1739 chars: data.len() as u64,
1740 ..Default::default()
1741 };
1742 for (l, w, m) in &results {
1743 counts.lines += l;
1744 counts.words += w;
1745 if *m > counts.max_line_length {
1746 counts.max_line_length = *m;
1747 }
1748 }
1749 counts
1750 }
1751}