1use memchr::memchr_iter;
2use rayon::prelude::*;
3
4const PARALLEL_THRESHOLD: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Default, PartialEq, Eq)]
11pub struct WcCounts {
12 pub lines: u64,
13 pub words: u64,
14 pub bytes: u64,
15 pub chars: u64,
16 pub max_line_length: u64,
17}
18
19const fn make_byte_class_c() -> [u8; 256] {
36 let mut t = make_byte_class_utf8();
42 t[0xA0] = 1;
43 t
44}
45const BYTE_CLASS_C: [u8; 256] = make_byte_class_c();
46
47const fn make_byte_class_utf8() -> [u8; 256] {
50 let mut t = [0u8; 256]; t[0x09] = 1; t[0x0A] = 1; t[0x0B] = 1; t[0x0C] = 1; t[0x0D] = 1; t[0x20] = 1; t
59}
60
61const BYTE_CLASS_UTF8: [u8; 256] = make_byte_class_utf8();
62
63#[inline]
70fn is_unicode_space(cp: u32) -> bool {
71 matches!(
72 cp,
73 0x00A0 | 0x1680 | 0x2000
76 ..=0x200A | 0x2028 | 0x2029 | 0x202F | 0x205F | 0x3000 )
83}
84
85#[inline]
94pub fn count_lines(data: &[u8]) -> u64 {
95 memchr_iter(b'\n', data).count() as u64
96}
97
98#[inline]
100pub fn count_bytes(data: &[u8]) -> u64 {
101 data.len() as u64
102}
103
104pub fn count_words(data: &[u8]) -> u64 {
106 count_words_locale(data, true)
107}
108
109pub fn count_words_locale(data: &[u8], utf8: bool) -> u64 {
115 if utf8 {
116 count_words_utf8(data)
117 } else {
118 count_words_c(data)
119 }
120}
121
122fn count_words_c(data: &[u8]) -> u64 {
127 let mut words = 0u64;
128 let mut in_word = false;
129 let mut i = 0;
130 let len = data.len();
131
132 while i < len {
133 let b = unsafe { *data.get_unchecked(i) };
134 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
135 if class == 1 {
136 in_word = false;
138 } else if !in_word {
139 in_word = true;
141 words += 1;
142 }
143 i += 1;
144 }
145 words
146}
147
148#[cfg(target_arch = "x86_64")]
154#[target_feature(enable = "avx2")]
155unsafe fn count_lw_c_chunk_avx2(data: &[u8]) -> (u64, u64, bool, bool) {
156 use std::arch::x86_64::*;
157
158 let len = data.len();
159 let ptr = data.as_ptr();
160 let mut i = 0usize;
161 let mut total_lines = 0u64;
162 let mut total_words = 0u64;
163 let mut prev_in_word = false;
164
165 unsafe {
166 let nl_byte = _mm256_set1_epi8(b'\n' as i8);
167 let zero = _mm256_setzero_si256();
168 let ones = _mm256_set1_epi8(1);
169 let space_char = _mm256_set1_epi8(0x20i8);
171 let tab_lo = _mm256_set1_epi8(0x08i8);
172 let tab_hi = _mm256_set1_epi8(0x0Ei8);
173 let nbsp_char = _mm256_set1_epi8(0xA0u8 as i8);
174
175 let mut line_acc = _mm256_setzero_si256();
176 let mut batch = 0u32;
177
178 while i + 32 <= len {
179 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
180 let is_nl = _mm256_cmpeq_epi8(v, nl_byte);
181 line_acc = _mm256_add_epi8(line_acc, _mm256_and_si256(is_nl, ones));
182
183 let is_sp = _mm256_cmpeq_epi8(v, space_char);
185 let is_nbsp = _mm256_cmpeq_epi8(v, nbsp_char);
186 let gt_08 = _mm256_cmpgt_epi8(v, tab_lo);
187 let lt_0e = _mm256_cmpgt_epi8(tab_hi, v);
188 let is_tab_range = _mm256_and_si256(gt_08, lt_0e);
189 let is_space = _mm256_or_si256(_mm256_or_si256(is_sp, is_nbsp), is_tab_range);
190
191 let space_mask = _mm256_movemask_epi8(is_space) as u32;
192 let word_mask = !space_mask;
194
195 let prev_mask = (word_mask << 1) | (prev_in_word as u32);
197 total_words += (word_mask & !prev_mask).count_ones() as u64;
198 prev_in_word = (word_mask >> 31) & 1 == 1;
199
200 batch += 1;
201 if batch >= 255 {
202 let sad = _mm256_sad_epu8(line_acc, zero);
203 let hi = _mm256_extracti128_si256(sad, 1);
204 let lo = _mm256_castsi256_si128(sad);
205 let s = _mm_add_epi64(lo, hi);
206 let h64 = _mm_unpackhi_epi64(s, s);
207 let t = _mm_add_epi64(s, h64);
208 total_lines += _mm_cvtsi128_si64(t) as u64;
209 line_acc = _mm256_setzero_si256();
210 batch = 0;
211 }
212 i += 32;
213 }
214
215 if batch > 0 {
216 let sad = _mm256_sad_epu8(line_acc, zero);
217 let hi = _mm256_extracti128_si256(sad, 1);
218 let lo = _mm256_castsi256_si128(sad);
219 let s = _mm_add_epi64(lo, hi);
220 let h64 = _mm_unpackhi_epi64(s, s);
221 let t = _mm_add_epi64(s, h64);
222 total_lines += _mm_cvtsi128_si64(t) as u64;
223 }
224
225 while i < len {
227 let b = *ptr.add(i);
228 if b == b'\n' {
229 total_lines += 1;
230 prev_in_word = false;
231 } else if *BYTE_CLASS_C.get_unchecked(b as usize) == 1 {
232 prev_in_word = false;
234 } else if !prev_in_word {
235 total_words += 1;
237 prev_in_word = true;
238 }
239 i += 1;
240 }
241 }
242
243 let first_is_word = !data.is_empty() && BYTE_CLASS_C[data[0] as usize] != 1;
244 (total_lines, total_words, first_is_word, prev_in_word)
245}
246
247#[cfg(target_arch = "x86_64")]
251#[target_feature(enable = "sse2")]
252unsafe fn count_lw_c_chunk_sse2(data: &[u8]) -> (u64, u64, bool, bool) {
253 use std::arch::x86_64::*;
254
255 let len = data.len();
256 let ptr = data.as_ptr();
257 let mut i = 0usize;
258 let mut total_lines = 0u64;
259 let mut total_words = 0u64;
260 let mut prev_in_word = false;
261
262 unsafe {
263 let nl_byte = _mm_set1_epi8(b'\n' as i8);
264 let zero = _mm_setzero_si128();
265 let ones = _mm_set1_epi8(1);
266 let space_char = _mm_set1_epi8(0x20i8);
268 let tab_lo = _mm_set1_epi8(0x08i8);
269 let tab_hi = _mm_set1_epi8(0x0Ei8);
270 let nbsp_char = _mm_set1_epi8(0xA0u8 as i8);
271
272 let mut line_acc = _mm_setzero_si128();
273 let mut batch = 0u32;
274
275 while i + 16 <= len {
276 let v = _mm_loadu_si128(ptr.add(i) as *const __m128i);
277 let is_nl = _mm_cmpeq_epi8(v, nl_byte);
278 line_acc = _mm_add_epi8(line_acc, _mm_and_si128(is_nl, ones));
279
280 let is_sp = _mm_cmpeq_epi8(v, space_char);
282 let is_nbsp = _mm_cmpeq_epi8(v, nbsp_char);
283 let gt_08 = _mm_cmpgt_epi8(v, tab_lo);
284 let lt_0e = _mm_cmpgt_epi8(tab_hi, v);
285 let is_tab_range = _mm_and_si128(gt_08, lt_0e);
286 let is_space = _mm_or_si128(_mm_or_si128(is_sp, is_nbsp), is_tab_range);
287
288 let space_mask = _mm_movemask_epi8(is_space) as u32;
289 let word_mask = (!space_mask) & 0xFFFF;
291
292 let prev_mask = (word_mask << 1) | (prev_in_word as u32);
294 total_words += (word_mask & !prev_mask).count_ones() as u64;
295 prev_in_word = (word_mask >> 15) & 1 == 1;
296
297 batch += 1;
298 if batch >= 255 {
299 let sad = _mm_sad_epu8(line_acc, zero);
300 let hi = _mm_unpackhi_epi64(sad, sad);
301 let t = _mm_add_epi64(sad, hi);
302 total_lines += _mm_cvtsi128_si64(t) as u64;
303 line_acc = _mm_setzero_si128();
304 batch = 0;
305 }
306 i += 16;
307 }
308
309 if batch > 0 {
310 let sad = _mm_sad_epu8(line_acc, zero);
311 let hi = _mm_unpackhi_epi64(sad, sad);
312 let t = _mm_add_epi64(sad, hi);
313 total_lines += _mm_cvtsi128_si64(t) as u64;
314 }
315
316 while i < len {
318 let b = *ptr.add(i);
319 if b == b'\n' {
320 total_lines += 1;
321 prev_in_word = false;
322 } else if *BYTE_CLASS_C.get_unchecked(b as usize) == 1 {
323 prev_in_word = false;
324 } else if !prev_in_word {
325 total_words += 1;
326 prev_in_word = true;
327 }
328 i += 1;
329 }
330 }
331
332 let first_is_word = !data.is_empty() && BYTE_CLASS_C[data[0] as usize] != 1;
333 (total_lines, total_words, first_is_word, prev_in_word)
334}
335
336#[inline]
338fn count_lw_c_chunk_fast(data: &[u8]) -> (u64, u64, bool, bool) {
339 #[cfg(target_arch = "x86_64")]
340 {
341 if is_x86_feature_detected!("avx2") && data.len() >= 64 {
342 return unsafe { count_lw_c_chunk_avx2(data) };
343 }
344 if data.len() >= 32 {
345 return unsafe { count_lw_c_chunk_sse2(data) };
346 }
347 }
348 count_lw_c_chunk(data)
349}
350
351fn count_lw_c_chunk(data: &[u8]) -> (u64, u64, bool, bool) {
356 let mut lines = 0u64;
357 let mut words = 0u64;
358 let mut in_word = false;
359 let mut i = 0;
360 let len = data.len();
361
362 let first_is_word = !data.is_empty() && BYTE_CLASS_C[data[0] as usize] != 1;
364
365 while i < len {
366 let b = unsafe { *data.get_unchecked(i) };
367 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
368 if class == 1 {
369 if b == b'\n' {
371 lines += 1;
372 }
373 in_word = false;
374 } else if !in_word {
375 in_word = true;
377 words += 1;
378 }
379 i += 1;
380 }
381 (lines, words, first_is_word, in_word)
382}
383
384fn count_words_utf8(data: &[u8]) -> u64 {
395 let mut words = 0u64;
396 let mut in_word = false;
397 let mut i = 0;
398 let len = data.len();
399
400 while i < len {
401 let b = unsafe { *data.get_unchecked(i) };
402
403 if b < 0x80 {
404 let class = unsafe { *BYTE_CLASS_UTF8.get_unchecked(b as usize) };
406 if class == 1 {
407 in_word = false;
408 } else if !in_word {
409 in_word = true;
410 words += 1;
411 }
412 i += 1;
413 } else if b < 0xC2 {
414 if !in_word {
416 in_word = true;
417 words += 1;
418 }
419 i += 1;
420 } else if b < 0xE0 {
421 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
422 let cp = ((b as u32 & 0x1F) << 6)
423 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
424 if is_unicode_space(cp) {
425 in_word = false;
426 } else if !in_word {
427 in_word = true;
428 words += 1;
429 }
430 i += 2;
431 } else {
432 if !in_word {
434 in_word = true;
435 words += 1;
436 }
437 i += 1;
438 }
439 } else if b < 0xF0 {
440 if i + 2 < len
441 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
442 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
443 {
444 let cp = ((b as u32 & 0x0F) << 12)
445 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
446 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
447 if is_unicode_space(cp) {
448 in_word = false;
449 } else if !in_word {
450 in_word = true;
451 words += 1;
452 }
453 i += 3;
454 } else {
455 if !in_word {
456 in_word = true;
457 words += 1;
458 }
459 i += 1;
460 }
461 } else if b < 0xF5 {
462 if i + 3 < len
463 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
464 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
465 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
466 {
467 let cp = ((b as u32 & 0x07) << 18)
468 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
469 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
470 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
471 if is_unicode_space(cp) {
472 in_word = false;
473 } else if !in_word {
474 in_word = true;
475 words += 1;
476 }
477 i += 4;
478 } else {
479 if !in_word {
480 in_word = true;
481 words += 1;
482 }
483 i += 1;
484 }
485 } else {
486 if !in_word {
488 in_word = true;
489 words += 1;
490 }
491 i += 1;
492 }
493 }
494
495 words
496}
497
498pub fn count_lines_words(data: &[u8], utf8: bool) -> (u64, u64) {
502 if utf8 {
503 count_lines_words_utf8_fused(data)
504 } else {
505 let (lines, words, _, _) = count_lw_c_chunk_fast(data);
506 (lines, words)
507 }
508}
509
510fn count_lines_words_utf8_fused(data: &[u8]) -> (u64, u64) {
514 let mut lines = 0u64;
515 let mut words = 0u64;
516 let mut in_word = false;
517 let mut i = 0;
518 let len = data.len();
519
520 while i < len {
521 let b = unsafe { *data.get_unchecked(i) };
522
523 if b == b'\n' {
524 lines += 1;
525 in_word = false;
526 i += 1;
527 } else if b < 0x80 {
528 let class = unsafe { *BYTE_CLASS_UTF8.get_unchecked(b as usize) };
530 if class == 1 {
531 in_word = false;
532 } else if !in_word {
533 in_word = true;
534 words += 1;
535 }
536 i += 1;
537 } else if b < 0xC2 {
538 if !in_word {
540 in_word = true;
541 words += 1;
542 }
543 i += 1;
544 } else if b < 0xE0 {
545 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
546 let cp = ((b as u32 & 0x1F) << 6)
547 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
548 if is_unicode_space(cp) {
549 in_word = false;
550 } else if !in_word {
551 in_word = true;
552 words += 1;
553 }
554 i += 2;
555 } else {
556 if !in_word {
557 in_word = true;
558 words += 1;
559 }
560 i += 1;
561 }
562 } else if b < 0xF0 {
563 if i + 2 < len
564 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
565 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
566 {
567 let cp = ((b as u32 & 0x0F) << 12)
568 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
569 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
570 if is_unicode_space(cp) {
571 in_word = false;
572 } else if !in_word {
573 in_word = true;
574 words += 1;
575 }
576 i += 3;
577 } else {
578 if !in_word {
579 in_word = true;
580 words += 1;
581 }
582 i += 1;
583 }
584 } else if b < 0xF5 {
585 if i + 3 < len
586 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
587 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
588 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
589 {
590 let cp = ((b as u32 & 0x07) << 18)
591 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
592 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
593 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
594 if is_unicode_space(cp) {
595 in_word = false;
596 } else if !in_word {
597 in_word = true;
598 words += 1;
599 }
600 i += 4;
601 } else {
602 if !in_word {
603 in_word = true;
604 words += 1;
605 }
606 i += 1;
607 }
608 } else {
609 if !in_word {
611 in_word = true;
612 words += 1;
613 }
614 i += 1;
615 }
616 }
617
618 (lines, words)
619}
620
621pub fn count_lines_words_chars(data: &[u8], utf8: bool) -> (u64, u64, u64) {
623 if utf8 {
624 let (lines, words) = count_lines_words_utf8_fused(data);
626 let chars = count_chars_utf8(data);
627 (lines, words, chars)
628 } else {
629 let (lines, words) = count_lines_words(data, false);
631 (lines, words, data.len() as u64)
632 }
633}
634
635pub fn count_chars_utf8(data: &[u8]) -> u64 {
642 #[cfg(target_arch = "x86_64")]
643 {
644 if is_x86_feature_detected!("avx2") {
645 return unsafe { count_chars_utf8_avx2(data) };
646 }
647 }
648 count_chars_utf8_scalar(data)
649}
650
651#[cfg(target_arch = "x86_64")]
655#[target_feature(enable = "avx2")]
656unsafe fn count_chars_utf8_avx2(data: &[u8]) -> u64 {
657 unsafe {
658 use std::arch::x86_64::*;
659
660 let mask_c0 = _mm256_set1_epi8(0xC0u8 as i8);
661 let val_80 = _mm256_set1_epi8(0x80u8 as i8);
662 let ones = _mm256_set1_epi8(1);
663 let zero = _mm256_setzero_si256();
664
665 let mut total = 0u64;
666 let len = data.len();
667 let ptr = data.as_ptr();
668 let mut i = 0;
669 let mut acc = _mm256_setzero_si256();
670 let mut batch = 0u32;
671
672 while i + 32 <= len {
673 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
674 let masked = _mm256_and_si256(v, mask_c0);
675 let is_cont = _mm256_cmpeq_epi8(masked, val_80);
676 let non_cont = _mm256_andnot_si256(is_cont, ones);
677 acc = _mm256_add_epi8(acc, non_cont);
678
679 batch += 1;
680 if batch >= 255 {
681 let sad = _mm256_sad_epu8(acc, zero);
683 let hi = _mm256_extracti128_si256(sad, 1);
684 let lo = _mm256_castsi256_si128(sad);
685 let sum = _mm_add_epi64(lo, hi);
686 let hi64 = _mm_unpackhi_epi64(sum, sum);
687 let t = _mm_add_epi64(sum, hi64);
688 total += _mm_cvtsi128_si64(t) as u64;
689 acc = _mm256_setzero_si256();
690 batch = 0;
691 }
692 i += 32;
693 }
694
695 if batch > 0 {
697 let sad = _mm256_sad_epu8(acc, zero);
698 let hi = _mm256_extracti128_si256(sad, 1);
699 let lo = _mm256_castsi256_si128(sad);
700 let sum = _mm_add_epi64(lo, hi);
701 let hi64 = _mm_unpackhi_epi64(sum, sum);
702 let t = _mm_add_epi64(sum, hi64);
703 total += _mm_cvtsi128_si64(t) as u64;
704 }
705
706 while i < len {
707 total += ((*ptr.add(i) & 0xC0) != 0x80) as u64;
708 i += 1;
709 }
710
711 total
712 }
713}
714
715fn count_chars_utf8_scalar(data: &[u8]) -> u64 {
717 let mut count = 0u64;
718 let chunks = data.chunks_exact(64);
719 let remainder = chunks.remainder();
720
721 for chunk in chunks {
722 let mut any_high = 0u8;
724 let mut i = 0;
725 while i + 8 <= 64 {
726 unsafe {
727 any_high |= *chunk.get_unchecked(i);
728 any_high |= *chunk.get_unchecked(i + 1);
729 any_high |= *chunk.get_unchecked(i + 2);
730 any_high |= *chunk.get_unchecked(i + 3);
731 any_high |= *chunk.get_unchecked(i + 4);
732 any_high |= *chunk.get_unchecked(i + 5);
733 any_high |= *chunk.get_unchecked(i + 6);
734 any_high |= *chunk.get_unchecked(i + 7);
735 }
736 i += 8;
737 }
738 if any_high < 0x80 {
739 count += 64;
740 continue;
741 }
742
743 let mut char_mask = 0u64;
744 i = 0;
745 while i + 7 < 64 {
746 unsafe {
747 char_mask |= (((*chunk.get_unchecked(i) & 0xC0) != 0x80) as u64) << i;
748 char_mask |= (((*chunk.get_unchecked(i + 1) & 0xC0) != 0x80) as u64) << (i + 1);
749 char_mask |= (((*chunk.get_unchecked(i + 2) & 0xC0) != 0x80) as u64) << (i + 2);
750 char_mask |= (((*chunk.get_unchecked(i + 3) & 0xC0) != 0x80) as u64) << (i + 3);
751 char_mask |= (((*chunk.get_unchecked(i + 4) & 0xC0) != 0x80) as u64) << (i + 4);
752 char_mask |= (((*chunk.get_unchecked(i + 5) & 0xC0) != 0x80) as u64) << (i + 5);
753 char_mask |= (((*chunk.get_unchecked(i + 6) & 0xC0) != 0x80) as u64) << (i + 6);
754 char_mask |= (((*chunk.get_unchecked(i + 7) & 0xC0) != 0x80) as u64) << (i + 7);
755 }
756 i += 8;
757 }
758 count += char_mask.count_ones() as u64;
759 }
760
761 for &b in remainder {
762 count += ((b & 0xC0) != 0x80) as u64;
763 }
764 count
765}
766
767#[inline]
769pub fn count_chars_c(data: &[u8]) -> u64 {
770 data.len() as u64
771}
772
773#[inline]
775pub fn count_chars(data: &[u8], utf8: bool) -> u64 {
776 if utf8 {
777 count_chars_utf8(data)
778 } else {
779 count_chars_c(data)
780 }
781}
782
783pub fn is_utf8_locale() -> bool {
785 for var in &["LC_ALL", "LC_CTYPE", "LANG"] {
786 if let Ok(val) = std::env::var(var) {
787 if !val.is_empty() {
788 let lower = val.to_ascii_lowercase();
789 return lower.contains("utf-8") || lower.contains("utf8");
790 }
791 }
792 }
793 false
794}
795
796#[inline]
799fn decode_utf8(bytes: &[u8]) -> (u32, usize) {
800 let b0 = bytes[0];
801 if b0 < 0x80 {
802 return (b0 as u32, 1);
803 }
804 if b0 < 0xC2 {
805 return (b0 as u32, 1);
807 }
808 if b0 < 0xE0 {
809 if bytes.len() < 2 || bytes[1] & 0xC0 != 0x80 {
810 return (b0 as u32, 1);
811 }
812 let cp = ((b0 as u32 & 0x1F) << 6) | (bytes[1] as u32 & 0x3F);
813 return (cp, 2);
814 }
815 if b0 < 0xF0 {
816 if bytes.len() < 3 || bytes[1] & 0xC0 != 0x80 || bytes[2] & 0xC0 != 0x80 {
817 return (b0 as u32, 1);
818 }
819 let cp =
820 ((b0 as u32 & 0x0F) << 12) | ((bytes[1] as u32 & 0x3F) << 6) | (bytes[2] as u32 & 0x3F);
821 return (cp, 3);
822 }
823 if b0 < 0xF5 {
824 if bytes.len() < 4
825 || bytes[1] & 0xC0 != 0x80
826 || bytes[2] & 0xC0 != 0x80
827 || bytes[3] & 0xC0 != 0x80
828 {
829 return (b0 as u32, 1);
830 }
831 let cp = ((b0 as u32 & 0x07) << 18)
832 | ((bytes[1] as u32 & 0x3F) << 12)
833 | ((bytes[2] as u32 & 0x3F) << 6)
834 | (bytes[3] as u32 & 0x3F);
835 return (cp, 4);
836 }
837 (b0 as u32, 1)
838}
839
840#[inline]
843fn is_zero_width(cp: u32) -> bool {
844 matches!(
845 cp,
846 0x0300..=0x036F | 0x0483..=0x0489 | 0x0591..=0x05BD | 0x05BF
850 | 0x05C1..=0x05C2
851 | 0x05C4..=0x05C5
852 | 0x05C7
853 | 0x0600..=0x0605 | 0x0610..=0x061A | 0x064B..=0x065F | 0x0670
857 | 0x06D6..=0x06DD
858 | 0x06DF..=0x06E4
859 | 0x06E7..=0x06E8
860 | 0x06EA..=0x06ED
861 | 0x070F
862 | 0x0711
863 | 0x0730..=0x074A
864 | 0x07A6..=0x07B0
865 | 0x07EB..=0x07F3
866 | 0x07FD
867 | 0x0816..=0x0819
868 | 0x081B..=0x0823
869 | 0x0825..=0x0827
870 | 0x0829..=0x082D
871 | 0x0859..=0x085B
872 | 0x08D3..=0x08E1
873 | 0x08E3..=0x0902
874 | 0x093A
875 | 0x093C
876 | 0x0941..=0x0948
877 | 0x094D
878 | 0x0951..=0x0957
879 | 0x0962..=0x0963
880 | 0x0981
881 | 0x09BC
882 | 0x09C1..=0x09C4
883 | 0x09CD
884 | 0x09E2..=0x09E3
885 | 0x09FE
886 | 0x0A01..=0x0A02
887 | 0x0A3C
888 | 0x0A41..=0x0A42
889 | 0x0A47..=0x0A48
890 | 0x0A4B..=0x0A4D
891 | 0x0A51
892 | 0x0A70..=0x0A71
893 | 0x0A75
894 | 0x0A81..=0x0A82
895 | 0x0ABC
896 | 0x0AC1..=0x0AC5
897 | 0x0AC7..=0x0AC8
898 | 0x0ACD
899 | 0x0AE2..=0x0AE3
900 | 0x0AFA..=0x0AFF
901 | 0x0B01
902 | 0x0B3C
903 | 0x0B3F
904 | 0x0B41..=0x0B44
905 | 0x0B4D
906 | 0x0B56
907 | 0x0B62..=0x0B63
908 | 0x0B82
909 | 0x0BC0
910 | 0x0BCD
911 | 0x0C00
912 | 0x0C04
913 | 0x0C3E..=0x0C40
914 | 0x0C46..=0x0C48
915 | 0x0C4A..=0x0C4D
916 | 0x0C55..=0x0C56
917 | 0x0C62..=0x0C63
918 | 0x0C81
919 | 0x0CBC
920 | 0x0CBF
921 | 0x0CC6
922 | 0x0CCC..=0x0CCD
923 | 0x0CE2..=0x0CE3
924 | 0x0D00..=0x0D01
925 | 0x0D3B..=0x0D3C
926 | 0x0D41..=0x0D44
927 | 0x0D4D
928 | 0x0D62..=0x0D63
929 | 0x0DCA
930 | 0x0DD2..=0x0DD4
931 | 0x0DD6
932 | 0x0E31
933 | 0x0E34..=0x0E3A
934 | 0x0E47..=0x0E4E
935 | 0x0EB1
936 | 0x0EB4..=0x0EBC
937 | 0x0EC8..=0x0ECD
938 | 0x0F18..=0x0F19
939 | 0x0F35
940 | 0x0F37
941 | 0x0F39
942 | 0x0F71..=0x0F7E
943 | 0x0F80..=0x0F84
944 | 0x0F86..=0x0F87
945 | 0x0F8D..=0x0F97
946 | 0x0F99..=0x0FBC
947 | 0x0FC6
948 | 0x102D..=0x1030
949 | 0x1032..=0x1037
950 | 0x1039..=0x103A
951 | 0x103D..=0x103E
952 | 0x1058..=0x1059
953 | 0x105E..=0x1060
954 | 0x1071..=0x1074
955 | 0x1082
956 | 0x1085..=0x1086
957 | 0x108D
958 | 0x109D
959 | 0x1160..=0x11FF | 0x135D..=0x135F
961 | 0x1712..=0x1714
962 | 0x1732..=0x1734
963 | 0x1752..=0x1753
964 | 0x1772..=0x1773
965 | 0x17B4..=0x17B5
966 | 0x17B7..=0x17BD
967 | 0x17C6
968 | 0x17C9..=0x17D3
969 | 0x17DD
970 | 0x180B..=0x180D
971 | 0x1885..=0x1886
972 | 0x18A9
973 | 0x1920..=0x1922
974 | 0x1927..=0x1928
975 | 0x1932
976 | 0x1939..=0x193B
977 | 0x1A17..=0x1A18
978 | 0x1A1B
979 | 0x1A56
980 | 0x1A58..=0x1A5E
981 | 0x1A60
982 | 0x1A62
983 | 0x1A65..=0x1A6C
984 | 0x1A73..=0x1A7C
985 | 0x1A7F
986 | 0x1AB0..=0x1ABE
987 | 0x1B00..=0x1B03
988 | 0x1B34
989 | 0x1B36..=0x1B3A
990 | 0x1B3C
991 | 0x1B42
992 | 0x1B6B..=0x1B73
993 | 0x1B80..=0x1B81
994 | 0x1BA2..=0x1BA5
995 | 0x1BA8..=0x1BA9
996 | 0x1BAB..=0x1BAD
997 | 0x1BE6
998 | 0x1BE8..=0x1BE9
999 | 0x1BED
1000 | 0x1BEF..=0x1BF1
1001 | 0x1C2C..=0x1C33
1002 | 0x1C36..=0x1C37
1003 | 0x1CD0..=0x1CD2
1004 | 0x1CD4..=0x1CE0
1005 | 0x1CE2..=0x1CE8
1006 | 0x1CED
1007 | 0x1CF4
1008 | 0x1CF8..=0x1CF9
1009 | 0x1DC0..=0x1DF9
1010 | 0x1DFB..=0x1DFF
1011 | 0x200B..=0x200F | 0x202A..=0x202E | 0x2060..=0x2064 | 0x2066..=0x206F | 0x20D0..=0x20F0 | 0xFE00..=0xFE0F | 0xFE20..=0xFE2F | 0xFEFF | 0xFFF9..=0xFFFB | 0x1D167..=0x1D169
1021 | 0x1D173..=0x1D182
1022 | 0x1D185..=0x1D18B
1023 | 0x1D1AA..=0x1D1AD
1024 | 0x1D242..=0x1D244
1025 | 0xE0001
1026 | 0xE0020..=0xE007F
1027 | 0xE0100..=0xE01EF )
1029}
1030
1031#[inline]
1034fn is_wide_char(cp: u32) -> bool {
1035 matches!(
1036 cp,
1037 0x1100..=0x115F | 0x231A..=0x231B | 0x2329..=0x232A | 0x23E9..=0x23F3 | 0x23F8..=0x23FA
1042 | 0x25FD..=0x25FE
1043 | 0x2614..=0x2615
1044 | 0x2648..=0x2653
1045 | 0x267F
1046 | 0x2693
1047 | 0x26A1
1048 | 0x26AA..=0x26AB
1049 | 0x26BD..=0x26BE
1050 | 0x26C4..=0x26C5
1051 | 0x26CE
1052 | 0x26D4
1053 | 0x26EA
1054 | 0x26F2..=0x26F3
1055 | 0x26F5
1056 | 0x26FA
1057 | 0x26FD
1058 | 0x2702
1059 | 0x2705
1060 | 0x2708..=0x270D
1061 | 0x270F
1062 | 0x2712
1063 | 0x2714
1064 | 0x2716
1065 | 0x271D
1066 | 0x2721
1067 | 0x2728
1068 | 0x2733..=0x2734
1069 | 0x2744
1070 | 0x2747
1071 | 0x274C
1072 | 0x274E
1073 | 0x2753..=0x2755
1074 | 0x2757
1075 | 0x2763..=0x2764
1076 | 0x2795..=0x2797
1077 | 0x27A1
1078 | 0x27B0
1079 | 0x27BF
1080 | 0x2934..=0x2935
1081 | 0x2B05..=0x2B07
1082 | 0x2B1B..=0x2B1C
1083 | 0x2B50
1084 | 0x2B55
1085 | 0x2E80..=0x303E | 0x3040..=0x33BF | 0x3400..=0x4DBF | 0x4E00..=0xA4CF | 0xA960..=0xA97C | 0xAC00..=0xD7A3 | 0xF900..=0xFAFF | 0xFE10..=0xFE19 | 0xFE30..=0xFE6F | 0xFF01..=0xFF60 | 0xFFE0..=0xFFE6 | 0x1F004
1097 | 0x1F0CF
1098 | 0x1F170..=0x1F171
1099 | 0x1F17E..=0x1F17F
1100 | 0x1F18E
1101 | 0x1F191..=0x1F19A
1102 | 0x1F1E0..=0x1F1FF | 0x1F200..=0x1F202
1104 | 0x1F210..=0x1F23B
1105 | 0x1F240..=0x1F248
1106 | 0x1F250..=0x1F251
1107 | 0x1F260..=0x1F265
1108 | 0x1F300..=0x1F64F | 0x1F680..=0x1F6FF | 0x1F900..=0x1F9FF | 0x1FA00..=0x1FA6F
1112 | 0x1FA70..=0x1FAFF
1113 | 0x20000..=0x2FFFD | 0x30000..=0x3FFFD )
1116}
1117
1118pub fn max_line_length_c(data: &[u8]) -> u64 {
1131 let mut max_len: u64 = 0;
1132 let mut line_len: u64 = 0;
1133 let mut linepos: u64 = 0;
1134 let mut i = 0;
1135 let len = data.len();
1136
1137 while i < len {
1138 let b = unsafe { *data.get_unchecked(i) };
1139 if b >= 0x21 && b <= 0x7E {
1140 i += 1;
1142 let mut run = 1u64;
1143 while i < len {
1144 let b = unsafe { *data.get_unchecked(i) };
1145 if b >= 0x21 && b <= 0x7E {
1146 run += 1;
1147 i += 1;
1148 } else {
1149 break;
1150 }
1151 }
1152 linepos += run;
1153 if linepos > line_len {
1154 line_len = linepos;
1155 }
1156 } else {
1157 match b {
1158 b' ' => {
1159 linepos += 1;
1160 if linepos > line_len {
1161 line_len = linepos;
1162 }
1163 }
1164 b'\n' => {
1165 if line_len > max_len {
1166 max_len = line_len;
1167 }
1168 linepos = 0;
1169 line_len = 0;
1170 }
1171 b'\t' => {
1172 linepos = (linepos + 8) & !7;
1173 if linepos > line_len {
1174 line_len = linepos;
1175 }
1176 }
1177 b'\r' => {
1178 linepos = 0;
1179 }
1180 0x0C => {
1181 if line_len > max_len {
1182 max_len = line_len;
1183 }
1184 linepos = 0;
1185 line_len = 0;
1186 }
1187 _ => {} }
1189 i += 1;
1190 }
1191 }
1192
1193 if line_len > max_len {
1194 max_len = line_len;
1195 }
1196
1197 max_len
1198}
1199
1200pub fn max_line_length_utf8(data: &[u8]) -> u64 {
1207 let mut max_len: u64 = 0;
1208 let mut line_len: u64 = 0;
1209 let mut linepos: u64 = 0;
1210 let mut i = 0;
1211 let len = data.len();
1212
1213 while i < len {
1214 let b = unsafe { *data.get_unchecked(i) };
1215
1216 if b >= 0x21 && b <= 0x7E {
1217 i += 1;
1219 let mut run = 1u64;
1220 while i < len {
1221 let b = unsafe { *data.get_unchecked(i) };
1222 if b >= 0x21 && b <= 0x7E {
1223 run += 1;
1224 i += 1;
1225 } else {
1226 break;
1227 }
1228 }
1229 linepos += run;
1230 if linepos > line_len {
1231 line_len = linepos;
1232 }
1233 } else if b < 0x80 {
1234 match b {
1236 b' ' => {
1237 linepos += 1;
1238 if linepos > line_len {
1239 line_len = linepos;
1240 }
1241 }
1242 b'\n' => {
1243 if line_len > max_len {
1244 max_len = line_len;
1245 }
1246 linepos = 0;
1247 line_len = 0;
1248 }
1249 b'\t' => {
1250 linepos = (linepos + 8) & !7;
1251 if linepos > line_len {
1252 line_len = linepos;
1253 }
1254 }
1255 b'\r' => {
1256 linepos = 0;
1257 }
1258 0x0C => {
1259 if line_len > max_len {
1260 max_len = line_len;
1261 }
1262 linepos = 0;
1263 line_len = 0;
1264 }
1265 _ => {} }
1267 i += 1;
1268 } else {
1269 let (cp, len) = decode_utf8(&data[i..]);
1271
1272 if cp <= 0x9F {
1274 } else if is_zero_width(cp) {
1276 } else if is_wide_char(cp) {
1278 linepos += 2;
1279 if linepos > line_len {
1280 line_len = linepos;
1281 }
1282 } else {
1283 linepos += 1;
1285 if linepos > line_len {
1286 line_len = linepos;
1287 }
1288 }
1289 i += len;
1290 }
1291 }
1292
1293 if line_len > max_len {
1295 max_len = line_len;
1296 }
1297
1298 max_len
1299}
1300
1301#[inline]
1303pub fn max_line_length(data: &[u8], utf8: bool) -> u64 {
1304 if utf8 {
1305 max_line_length_utf8(data)
1306 } else {
1307 max_line_length_c(data)
1308 }
1309}
1310
1311pub fn count_all(data: &[u8], utf8: bool) -> WcCounts {
1323 if utf8 {
1324 let (lines, words) = count_lines_words_utf8_fused(data);
1325 WcCounts {
1326 lines,
1327 words,
1328 bytes: data.len() as u64,
1329 chars: count_chars_utf8(data),
1330 max_line_length: max_line_length_utf8(data),
1331 }
1332 } else {
1333 WcCounts {
1334 lines: count_lines(data),
1335 words: count_words_locale(data, false),
1336 bytes: data.len() as u64,
1337 chars: data.len() as u64,
1338 max_line_length: max_line_length_c(data),
1339 }
1340 }
1341}
1342
1343#[inline]
1347fn check_ascii_sample(data: &[u8]) -> bool {
1348 let len = data.len();
1349 if len == 0 {
1350 return true;
1351 }
1352
1353 let check_region = |start: usize, end: usize| -> bool {
1355 let mut or_acc = 0u8;
1356 let region = &data[start..end];
1357 let mut i = 0;
1358 while i + 8 <= region.len() {
1359 unsafe {
1360 or_acc |= *region.get_unchecked(i);
1361 or_acc |= *region.get_unchecked(i + 1);
1362 or_acc |= *region.get_unchecked(i + 2);
1363 or_acc |= *region.get_unchecked(i + 3);
1364 or_acc |= *region.get_unchecked(i + 4);
1365 or_acc |= *region.get_unchecked(i + 5);
1366 or_acc |= *region.get_unchecked(i + 6);
1367 or_acc |= *region.get_unchecked(i + 7);
1368 }
1369 i += 8;
1370 }
1371 while i < region.len() {
1372 or_acc |= region[i];
1373 i += 1;
1374 }
1375 or_acc < 0x80
1376 };
1377
1378 let sample = 256.min(len);
1379
1380 if !check_region(0, sample) {
1382 return false;
1383 }
1384 if len > sample * 2 {
1386 let mid = len / 2;
1387 let mid_start = mid.saturating_sub(sample / 2);
1388 if !check_region(mid_start, (mid_start + sample).min(len)) {
1389 return false;
1390 }
1391 }
1392 if len > sample {
1394 if !check_region(len - sample, len) {
1395 return false;
1396 }
1397 }
1398
1399 true
1400}
1401
1402fn split_at_newlines(data: &[u8], num_chunks: usize) -> Vec<&[u8]> {
1411 if data.is_empty() || num_chunks <= 1 {
1412 return vec![data];
1413 }
1414 let chunk_size = data.len() / num_chunks;
1415 let mut chunks = Vec::with_capacity(num_chunks);
1416 let mut pos = 0;
1417
1418 for _ in 0..num_chunks - 1 {
1419 let target = pos + chunk_size;
1420 if target >= data.len() {
1421 break;
1422 }
1423 let boundary = memchr::memchr(b'\n', &data[target..])
1424 .map(|p| target + p + 1)
1425 .unwrap_or(data.len());
1426 if boundary > pos {
1427 chunks.push(&data[pos..boundary]);
1428 }
1429 pos = boundary;
1430 }
1431 if pos < data.len() {
1432 chunks.push(&data[pos..]);
1433 }
1434 chunks
1435}
1436
1437pub fn count_lines_parallel(data: &[u8]) -> u64 {
1440 if data.len() < PARALLEL_THRESHOLD {
1441 return count_lines(data);
1442 }
1443
1444 let num_threads = rayon::current_num_threads().max(1);
1445 let chunk_size = (data.len() / num_threads).max(2 * 1024 * 1024);
1447
1448 data.par_chunks(chunk_size)
1449 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
1450 .sum()
1451}
1452
1453pub fn count_words_parallel(data: &[u8], utf8: bool) -> u64 {
1455 if data.len() < PARALLEL_THRESHOLD {
1456 return count_words_locale(data, utf8);
1457 }
1458
1459 let num_threads = rayon::current_num_threads().max(1);
1460
1461 if utf8 {
1462 let chunks = split_at_newlines(data, num_threads);
1465 chunks.par_iter().map(|chunk| count_words_utf8(chunk)).sum()
1466 } else {
1467 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1469
1470 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1471
1472 let results: Vec<(u64, u64, bool, bool)> = chunks
1474 .par_iter()
1475 .map(|chunk| count_lw_c_chunk(chunk))
1476 .collect();
1477
1478 let mut total = 0u64;
1479 for i in 0..results.len() {
1480 total += results[i].1;
1481 if i > 0 && results[i - 1].3 && results[i].2 {
1485 total -= 1;
1486 }
1487 }
1488 total
1489 }
1490}
1491
1492pub fn count_chars_parallel(data: &[u8], utf8: bool) -> u64 {
1494 if !utf8 {
1495 return data.len() as u64;
1496 }
1497 if data.len() < PARALLEL_THRESHOLD {
1498 return count_chars_utf8(data);
1499 }
1500
1501 let num_threads = rayon::current_num_threads().max(1);
1502 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1503
1504 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
1505}
1506
1507pub fn count_lwb(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1510 let (lines, words) = count_lines_words(data, utf8);
1511 (lines, words, data.len() as u64)
1512}
1513
1514pub fn count_lwb_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1520 if data.len() < PARALLEL_THRESHOLD {
1521 return count_lwb(data, utf8);
1523 }
1524
1525 let num_threads = rayon::current_num_threads().max(1);
1526
1527 let (lines, words) = if !utf8 {
1528 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1530
1531 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1532 let results: Vec<(u64, u64, bool, bool)> = chunks
1533 .par_iter()
1534 .map(|chunk| count_lw_c_chunk_fast(chunk))
1535 .collect();
1536
1537 let mut line_total = 0u64;
1538 let mut word_total = 0u64;
1539 for i in 0..results.len() {
1540 line_total += results[i].0;
1541 word_total += results[i].1;
1542 if i > 0 && results[i - 1].3 && results[i].2 {
1543 word_total -= 1;
1544 }
1545 }
1546
1547 (line_total, word_total)
1548 } else {
1549 let is_ascii = check_ascii_sample(data);
1551 if is_ascii {
1552 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1554 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1555 let results: Vec<(u64, u64, bool, bool)> = chunks
1556 .par_iter()
1557 .map(|chunk| count_lw_c_chunk_fast(chunk))
1558 .collect();
1559
1560 let mut line_total = 0u64;
1561 let mut word_total = 0u64;
1562 for i in 0..results.len() {
1563 line_total += results[i].0;
1564 word_total += results[i].1;
1565 if i > 0 && results[i - 1].3 && results[i].2 {
1566 word_total -= 1;
1567 }
1568 }
1569 (line_total, word_total)
1570 } else {
1571 let chunks = split_at_newlines(data, num_threads);
1574 let results: Vec<(u64, u64)> = chunks
1575 .par_iter()
1576 .map(|chunk| count_lines_words_utf8_fused(chunk))
1577 .collect();
1578 let mut line_total = 0u64;
1579 let mut word_total = 0u64;
1580 for (l, w) in results {
1581 line_total += l;
1582 word_total += w;
1583 }
1584 (line_total, word_total)
1585 }
1586 };
1587
1588 (lines, words, data.len() as u64)
1589}
1590
1591pub fn count_lwc_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1595 if data.len() < PARALLEL_THRESHOLD {
1596 let lines = count_lines(data);
1597 let words = count_words_locale(data, utf8);
1598 let chars = count_chars(data, utf8);
1599 return (lines, words, chars);
1600 }
1601
1602 let num_threads = rayon::current_num_threads().max(1);
1603
1604 if utf8 {
1605 let chunks = split_at_newlines(data, num_threads);
1607 let results: Vec<(u64, u64, u64)> = chunks
1608 .par_iter()
1609 .map(|chunk| {
1610 let (lines, words) = count_lines_words_utf8_fused(chunk);
1611 let chars = count_chars_utf8(chunk);
1612 (lines, words, chars)
1613 })
1614 .collect();
1615 let mut lines = 0u64;
1616 let mut words = 0u64;
1617 let mut chars = 0u64;
1618 for (l, w, c) in results {
1619 lines += l;
1620 words += w;
1621 chars += c;
1622 }
1623 (lines, words, chars)
1624 } else {
1625 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1627 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1628 let results: Vec<(u64, u64, bool, bool)> = chunks
1629 .par_iter()
1630 .map(|chunk| count_lw_c_chunk_fast(chunk))
1631 .collect();
1632 let mut lines = 0u64;
1633 let mut words = 0u64;
1634 for i in 0..results.len() {
1635 lines += results[i].0;
1636 words += results[i].1;
1637 if i > 0 && results[i - 1].3 && results[i].2 {
1638 words -= 1;
1639 }
1640 }
1641 (lines, words, data.len() as u64)
1642 }
1643}
1644
1645pub fn max_line_length_parallel(data: &[u8], utf8: bool) -> u64 {
1649 if data.len() < PARALLEL_THRESHOLD {
1650 return max_line_length(data, utf8);
1651 }
1652 let num_threads = rayon::current_num_threads().max(1);
1653 let chunks = split_at_newlines(data, num_threads);
1654 chunks
1655 .par_iter()
1656 .map(|chunk| {
1657 if utf8 {
1658 max_line_length_utf8(chunk)
1659 } else {
1660 max_line_length_c(chunk)
1661 }
1662 })
1663 .max()
1664 .unwrap_or(0)
1665}
1666
1667pub fn count_all_parallel(data: &[u8], utf8: bool) -> WcCounts {
1671 if data.len() < PARALLEL_THRESHOLD {
1672 return count_all(data, utf8);
1673 }
1674
1675 let num_threads = rayon::current_num_threads().max(1);
1676 let chunks = split_at_newlines(data, num_threads);
1677
1678 if utf8 {
1679 let results: Vec<(u64, u64, u64, u64)> = chunks
1680 .par_iter()
1681 .map(|chunk| {
1682 let (lines, words) = count_lines_words_utf8_fused(chunk);
1683 let chars = count_chars_utf8(chunk);
1684 let max_ll = max_line_length_utf8(chunk);
1685 (lines, words, chars, max_ll)
1686 })
1687 .collect();
1688
1689 let mut counts = WcCounts {
1690 bytes: data.len() as u64,
1691 ..Default::default()
1692 };
1693 for (l, w, c, m) in results {
1694 counts.lines += l;
1695 counts.words += w;
1696 counts.chars += c;
1697 if m > counts.max_line_length {
1698 counts.max_line_length = m;
1699 }
1700 }
1701 counts
1702 } else {
1703 let results: Vec<(u64, u64, u64)> = chunks
1705 .par_iter()
1706 .map(|chunk| {
1707 let (lines, words) = count_lines_words(chunk, false);
1708 let max_ll = max_line_length_c(chunk);
1709 (lines, words, max_ll)
1710 })
1711 .collect();
1712
1713 let mut counts = WcCounts {
1714 bytes: data.len() as u64,
1715 chars: data.len() as u64,
1716 ..Default::default()
1717 };
1718 for (l, w, m) in &results {
1719 counts.lines += l;
1720 counts.words += w;
1721 if *m > counts.max_line_length {
1722 counts.max_line_length = *m;
1723 }
1724 }
1725 counts
1726 }
1727}