1use memchr::memchr_iter;
2use rayon::prelude::*;
3
4const PARALLEL_THRESHOLD: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Default, PartialEq, Eq)]
11pub struct WcCounts {
12 pub lines: u64,
13 pub words: u64,
14 pub bytes: u64,
15 pub chars: u64,
16 pub max_line_length: u64,
17}
18
19const fn make_is_space() -> [bool; 256] {
43 let mut t = [false; 256];
44 t[0x09] = true; t[0x0A] = true; t[0x0B] = true; t[0x0C] = true; t[0x0D] = true; t[0x20] = true; t
51}
52const IS_SPACE: [bool; 256] = make_is_space();
53
54#[inline]
57pub(crate) fn first_is_word(data: &[u8]) -> bool {
58 !data.is_empty() && !IS_SPACE[data[0] as usize]
59}
60
61#[inline]
68fn is_unicode_space(cp: u32) -> bool {
69 matches!(
70 cp,
71 0x1680 | 0x2000
73 ..=0x200A | 0x2028 | 0x2029 | 0x205F | 0x3000 )
79}
80
81#[inline]
85fn is_wnbspace(cp: u32) -> bool {
86 matches!(cp, 0x00A0 | 0x2007 | 0x202F | 0x2060)
87}
88
89#[inline]
91fn is_unicode_word_break(cp: u32) -> bool {
92 is_unicode_space(cp) || is_wnbspace(cp)
93}
94
95#[inline]
102pub fn count_lines(data: &[u8]) -> u64 {
103 memchr_iter(b'\n', data).count() as u64
104}
105
106#[inline]
108pub fn count_bytes(data: &[u8]) -> u64 {
109 data.len() as u64
110}
111
112pub fn count_words(data: &[u8]) -> u64 {
114 count_words_locale(data, true)
115}
116
117pub fn count_words_locale(data: &[u8], utf8: bool) -> u64 {
123 if utf8 {
124 count_words_utf8(data)
125 } else {
126 count_words_c(data)
127 }
128}
129
130fn count_words_c(data: &[u8]) -> u64 {
134 let mut words = 0u64;
135 let mut in_word = false;
136 let mut i = 0;
137 let len = data.len();
138
139 while i < len {
140 let b = unsafe { *data.get_unchecked(i) };
141 if IS_SPACE[b as usize] {
142 in_word = false;
143 } else if !in_word {
144 in_word = true;
145 words += 1;
146 }
147 i += 1;
148 }
149 words
150}
151
152#[cfg(target_arch = "x86_64")]
156#[inline(always)]
157fn count_lw_c_scalar_tail(
158 ptr: *const u8,
159 mut i: usize,
160 len: usize,
161 mut total_lines: u64,
162 mut total_words: u64,
163 mut prev_in_word: bool,
164 data: &[u8],
165) -> (u64, u64, bool, bool) {
166 while i < len {
167 let b = unsafe { *ptr.add(i) };
168 if IS_SPACE[b as usize] {
169 if b == b'\n' {
170 total_lines += 1;
171 }
172 prev_in_word = false;
173 } else if !prev_in_word {
174 total_words += 1;
175 prev_in_word = true;
176 }
177 i += 1;
178 }
179 let first_word = first_is_word(data);
180 (total_lines, total_words, first_word, prev_in_word)
181}
182
183#[cfg(target_arch = "x86_64")]
189#[target_feature(enable = "avx2")]
190unsafe fn count_lw_c_chunk_avx2(data: &[u8]) -> (u64, u64, bool, bool) {
191 use std::arch::x86_64::*;
192
193 let len = data.len();
194 let ptr = data.as_ptr();
195 let mut i = 0usize;
196 let mut total_lines = 0u64;
197 let mut total_words = 0u64;
198 let mut prev_in_word = false;
199
200 unsafe {
201 let nl_byte = _mm256_set1_epi8(b'\n' as i8);
202 let zero = _mm256_setzero_si256();
203 let ones = _mm256_set1_epi8(1);
204 let const_0x09 = _mm256_set1_epi8(0x09u8 as i8);
206 let const_0x0d = _mm256_set1_epi8(0x0Du8 as i8);
207 let const_0x20 = _mm256_set1_epi8(0x20u8 as i8);
208
209 let mut line_acc = _mm256_setzero_si256();
210 let mut batch = 0u32;
211
212 while i + 32 <= len {
213 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
214 let is_nl = _mm256_cmpeq_epi8(v, nl_byte);
215 line_acc = _mm256_add_epi8(line_acc, _mm256_and_si256(is_nl, ones));
216
217 let ge_09 = _mm256_cmpeq_epi8(_mm256_max_epu8(v, const_0x09), v);
219 let le_0d = _mm256_cmpeq_epi8(_mm256_min_epu8(v, const_0x0d), v);
220 let in_tab_range = _mm256_and_si256(ge_09, le_0d);
221 let is_sp = _mm256_cmpeq_epi8(v, const_0x20);
222 let is_space = _mm256_or_si256(in_tab_range, is_sp);
223 let space_mask = _mm256_movemask_epi8(is_space) as u32;
224
225 let nonspace_mask = !space_mask;
228 let prev_space = (space_mask << 1) | if prev_in_word { 0u32 } else { 1u32 };
230 let starts = nonspace_mask & prev_space;
231 total_words += starts.count_ones() as u64;
232
233 prev_in_word = (nonspace_mask >> 31) & 1 == 1;
235
236 batch += 1;
237 if batch >= 255 {
238 let sad = _mm256_sad_epu8(line_acc, zero);
239 let hi = _mm256_extracti128_si256(sad, 1);
240 let lo = _mm256_castsi256_si128(sad);
241 let s = _mm_add_epi64(lo, hi);
242 let h64 = _mm_unpackhi_epi64(s, s);
243 let t = _mm_add_epi64(s, h64);
244 total_lines += _mm_cvtsi128_si64(t) as u64;
245 line_acc = _mm256_setzero_si256();
246 batch = 0;
247 }
248 i += 32;
249 }
250
251 if batch > 0 {
252 let sad = _mm256_sad_epu8(line_acc, zero);
253 let hi = _mm256_extracti128_si256(sad, 1);
254 let lo = _mm256_castsi256_si128(sad);
255 let s = _mm_add_epi64(lo, hi);
256 let h64 = _mm_unpackhi_epi64(s, s);
257 let t = _mm_add_epi64(s, h64);
258 total_lines += _mm_cvtsi128_si64(t) as u64;
259 }
260 }
261
262 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
263}
264
265#[cfg(target_arch = "x86_64")]
268#[target_feature(enable = "sse2")]
269unsafe fn count_lw_c_chunk_sse2(data: &[u8]) -> (u64, u64, bool, bool) {
270 use std::arch::x86_64::*;
271
272 let len = data.len();
273 let ptr = data.as_ptr();
274 let mut i = 0usize;
275 let mut total_lines = 0u64;
276 let mut total_words = 0u64;
277 let mut prev_in_word = false;
278
279 unsafe {
280 let nl_byte = _mm_set1_epi8(b'\n' as i8);
281 let zero = _mm_setzero_si128();
282 let ones = _mm_set1_epi8(1);
283 let const_0x09 = _mm_set1_epi8(0x09u8 as i8);
285 let const_0x0d = _mm_set1_epi8(0x0Du8 as i8);
286 let const_0x20 = _mm_set1_epi8(0x20u8 as i8);
287
288 let mut line_acc = _mm_setzero_si128();
289 let mut batch = 0u32;
290
291 while i + 16 <= len {
292 let v = _mm_loadu_si128(ptr.add(i) as *const __m128i);
293 let is_nl = _mm_cmpeq_epi8(v, nl_byte);
294 line_acc = _mm_add_epi8(line_acc, _mm_and_si128(is_nl, ones));
295
296 let ge_09 = _mm_cmpeq_epi8(_mm_max_epu8(v, const_0x09), v);
298 let le_0d = _mm_cmpeq_epi8(_mm_min_epu8(v, const_0x0d), v);
299 let in_tab_range = _mm_and_si128(ge_09, le_0d);
300 let is_sp = _mm_cmpeq_epi8(v, const_0x20);
301 let is_space = _mm_or_si128(in_tab_range, is_sp);
302 let space_mask = (_mm_movemask_epi8(is_space) as u32) & 0xFFFF;
303
304 let nonspace_mask = !space_mask & 0xFFFF;
306 let prev_space = ((space_mask << 1) | if prev_in_word { 0u32 } else { 1u32 }) & 0xFFFF;
307 let starts = nonspace_mask & prev_space;
308 total_words += starts.count_ones() as u64;
309
310 prev_in_word = (nonspace_mask >> 15) & 1 == 1;
311
312 batch += 1;
313 if batch >= 255 {
314 let sad = _mm_sad_epu8(line_acc, zero);
315 let hi = _mm_unpackhi_epi64(sad, sad);
316 let t = _mm_add_epi64(sad, hi);
317 total_lines += _mm_cvtsi128_si64(t) as u64;
318 line_acc = _mm_setzero_si128();
319 batch = 0;
320 }
321 i += 16;
322 }
323
324 if batch > 0 {
325 let sad = _mm_sad_epu8(line_acc, zero);
326 let hi = _mm_unpackhi_epi64(sad, sad);
327 let t = _mm_add_epi64(sad, hi);
328 total_lines += _mm_cvtsi128_si64(t) as u64;
329 }
330 }
331
332 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
333}
334
335#[inline]
337fn count_lw_c_chunk_fast(data: &[u8]) -> (u64, u64, bool, bool) {
338 #[cfg(target_arch = "x86_64")]
339 {
340 if is_x86_feature_detected!("avx2") && data.len() >= 64 {
341 return unsafe { count_lw_c_chunk_avx2(data) };
342 }
343 if data.len() >= 32 {
344 return unsafe { count_lw_c_chunk_sse2(data) };
345 }
346 }
347 count_lw_c_chunk(data)
348}
349
350fn count_lw_c_chunk(data: &[u8]) -> (u64, u64, bool, bool) {
354 let mut lines = 0u64;
355 let mut words = 0u64;
356 let mut in_word = false;
357 let mut i = 0;
358 let len = data.len();
359
360 let first_word = first_is_word(data);
361
362 while i < len {
363 let b = unsafe { *data.get_unchecked(i) };
364 if IS_SPACE[b as usize] {
365 if b == b'\n' {
366 lines += 1;
367 }
368 in_word = false;
369 } else if !in_word {
370 in_word = true;
371 words += 1;
372 }
373 i += 1;
374 }
375 (lines, words, first_word, in_word)
376}
377
378fn count_words_utf8(data: &[u8]) -> u64 {
388 let mut words = 0u64;
389 let mut in_word = false;
390 let mut i = 0;
391 let len = data.len();
392
393 while i < len {
394 let b = unsafe { *data.get_unchecked(i) };
395
396 if b < 0x80 {
397 if IS_SPACE[b as usize] {
399 in_word = false;
400 } else if !in_word {
401 in_word = true;
402 words += 1;
403 }
404 i += 1;
405 } else if b < 0xC2 {
406 if !in_word {
409 in_word = true;
410 words += 1;
411 }
412 i += 1;
413 } else if b < 0xE0 {
414 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
415 let cp = ((b as u32 & 0x1F) << 6)
416 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
417 if is_unicode_word_break(cp) {
418 in_word = false;
419 } else if !in_word {
420 in_word = true;
421 words += 1;
422 }
423 i += 2;
424 } else {
425 if !in_word {
427 in_word = true;
428 words += 1;
429 }
430 i += 1;
431 }
432 } else if b < 0xF0 {
433 if i + 2 < len
434 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
435 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
436 {
437 let cp = ((b as u32 & 0x0F) << 12)
438 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
439 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
440 if is_unicode_word_break(cp) {
441 in_word = false;
442 } else if !in_word {
443 in_word = true;
444 words += 1;
445 }
446 i += 3;
447 } else {
448 if !in_word {
450 in_word = true;
451 words += 1;
452 }
453 i += 1;
454 }
455 } else if b < 0xF5 {
456 if i + 3 < len
457 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
458 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
459 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
460 {
461 let cp = ((b as u32 & 0x07) << 18)
462 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
463 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
464 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
465 if is_unicode_word_break(cp) {
466 in_word = false;
467 } else if !in_word {
468 in_word = true;
469 words += 1;
470 }
471 i += 4;
472 } else {
473 if !in_word {
475 in_word = true;
476 words += 1;
477 }
478 i += 1;
479 }
480 } else {
481 if !in_word {
483 in_word = true;
484 words += 1;
485 }
486 i += 1;
487 }
488 }
489
490 words
491}
492
493pub fn count_lines_words(data: &[u8], utf8: bool) -> (u64, u64) {
497 if utf8 {
498 count_lines_words_utf8_fused(data)
499 } else {
500 let (lines, words, _, _) = count_lw_c_chunk_fast(data);
501 (lines, words)
502 }
503}
504
505fn count_lines_words_utf8_fused(data: &[u8]) -> (u64, u64) {
512 let mut lines = 0u64;
513 let mut words = 0u64;
514 let mut in_word = false;
515 let mut i = 0;
516 let len = data.len();
517
518 while i < len {
519 let b = unsafe { *data.get_unchecked(i) };
520
521 if b == b'\n' {
522 lines += 1;
523 in_word = false;
524 i += 1;
525 } else if b < 0x80 {
526 if IS_SPACE[b as usize] {
528 in_word = false;
529 } else if !in_word {
530 in_word = true;
531 words += 1;
532 }
533 i += 1;
534 } else if b < 0xC2 {
535 if !in_word {
537 in_word = true;
538 words += 1;
539 }
540 i += 1;
541 } else if b < 0xE0 {
542 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
543 let cp = ((b as u32 & 0x1F) << 6)
544 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
545 if is_unicode_word_break(cp) {
546 in_word = false;
547 } else if !in_word {
548 in_word = true;
549 words += 1;
550 }
551 i += 2;
552 } else {
553 if !in_word {
554 in_word = true;
555 words += 1;
556 }
557 i += 1;
558 }
559 } else if b < 0xF0 {
560 if i + 2 < len
561 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
562 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
563 {
564 let cp = ((b as u32 & 0x0F) << 12)
565 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
566 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
567 if is_unicode_word_break(cp) {
568 in_word = false;
569 } else if !in_word {
570 in_word = true;
571 words += 1;
572 }
573 i += 3;
574 } else {
575 if !in_word {
576 in_word = true;
577 words += 1;
578 }
579 i += 1;
580 }
581 } else if b < 0xF5 {
582 if i + 3 < len
583 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
584 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
585 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
586 {
587 let cp = ((b as u32 & 0x07) << 18)
588 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
589 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
590 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
591 if is_unicode_word_break(cp) {
592 in_word = false;
593 } else if !in_word {
594 in_word = true;
595 words += 1;
596 }
597 i += 4;
598 } else {
599 if !in_word {
600 in_word = true;
601 words += 1;
602 }
603 i += 1;
604 }
605 } else {
606 if !in_word {
608 in_word = true;
609 words += 1;
610 }
611 i += 1;
612 }
613 }
614
615 (lines, words)
616}
617
618pub fn count_lines_words_chars(data: &[u8], utf8: bool) -> (u64, u64, u64) {
620 if utf8 {
621 let (lines, words) = count_lines_words_utf8_fused(data);
623 let chars = count_chars_utf8(data);
624 (lines, words, chars)
625 } else {
626 let (lines, words) = count_lines_words(data, false);
628 (lines, words, data.len() as u64)
629 }
630}
631
632pub fn count_chars_utf8(data: &[u8]) -> u64 {
639 #[cfg(target_arch = "x86_64")]
640 {
641 if is_x86_feature_detected!("avx2") {
642 return unsafe { count_chars_utf8_avx2(data) };
643 }
644 }
645 count_chars_utf8_scalar(data)
646}
647
648#[cfg(target_arch = "x86_64")]
652#[target_feature(enable = "avx2")]
653unsafe fn count_chars_utf8_avx2(data: &[u8]) -> u64 {
654 unsafe {
655 use std::arch::x86_64::*;
656
657 let mask_c0 = _mm256_set1_epi8(0xC0u8 as i8);
658 let val_80 = _mm256_set1_epi8(0x80u8 as i8);
659 let ones = _mm256_set1_epi8(1);
660 let zero = _mm256_setzero_si256();
661
662 let mut total = 0u64;
663 let len = data.len();
664 let ptr = data.as_ptr();
665 let mut i = 0;
666 let mut acc = _mm256_setzero_si256();
667 let mut batch = 0u32;
668
669 while i + 32 <= len {
670 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
671 let masked = _mm256_and_si256(v, mask_c0);
672 let is_cont = _mm256_cmpeq_epi8(masked, val_80);
673 let non_cont = _mm256_andnot_si256(is_cont, ones);
674 acc = _mm256_add_epi8(acc, non_cont);
675
676 batch += 1;
677 if batch >= 255 {
678 let sad = _mm256_sad_epu8(acc, zero);
680 let hi = _mm256_extracti128_si256(sad, 1);
681 let lo = _mm256_castsi256_si128(sad);
682 let sum = _mm_add_epi64(lo, hi);
683 let hi64 = _mm_unpackhi_epi64(sum, sum);
684 let t = _mm_add_epi64(sum, hi64);
685 total += _mm_cvtsi128_si64(t) as u64;
686 acc = _mm256_setzero_si256();
687 batch = 0;
688 }
689 i += 32;
690 }
691
692 if batch > 0 {
694 let sad = _mm256_sad_epu8(acc, zero);
695 let hi = _mm256_extracti128_si256(sad, 1);
696 let lo = _mm256_castsi256_si128(sad);
697 let sum = _mm_add_epi64(lo, hi);
698 let hi64 = _mm_unpackhi_epi64(sum, sum);
699 let t = _mm_add_epi64(sum, hi64);
700 total += _mm_cvtsi128_si64(t) as u64;
701 }
702
703 while i < len {
704 total += ((*ptr.add(i) & 0xC0) != 0x80) as u64;
705 i += 1;
706 }
707
708 total
709 }
710}
711
712fn count_chars_utf8_scalar(data: &[u8]) -> u64 {
714 let mut count = 0u64;
715 let chunks = data.chunks_exact(64);
716 let remainder = chunks.remainder();
717
718 for chunk in chunks {
719 let mut any_high = 0u8;
721 let mut i = 0;
722 while i + 8 <= 64 {
723 unsafe {
724 any_high |= *chunk.get_unchecked(i);
725 any_high |= *chunk.get_unchecked(i + 1);
726 any_high |= *chunk.get_unchecked(i + 2);
727 any_high |= *chunk.get_unchecked(i + 3);
728 any_high |= *chunk.get_unchecked(i + 4);
729 any_high |= *chunk.get_unchecked(i + 5);
730 any_high |= *chunk.get_unchecked(i + 6);
731 any_high |= *chunk.get_unchecked(i + 7);
732 }
733 i += 8;
734 }
735 if any_high < 0x80 {
736 count += 64;
737 continue;
738 }
739
740 let mut char_mask = 0u64;
741 i = 0;
742 while i + 7 < 64 {
743 unsafe {
744 char_mask |= (((*chunk.get_unchecked(i) & 0xC0) != 0x80) as u64) << i;
745 char_mask |= (((*chunk.get_unchecked(i + 1) & 0xC0) != 0x80) as u64) << (i + 1);
746 char_mask |= (((*chunk.get_unchecked(i + 2) & 0xC0) != 0x80) as u64) << (i + 2);
747 char_mask |= (((*chunk.get_unchecked(i + 3) & 0xC0) != 0x80) as u64) << (i + 3);
748 char_mask |= (((*chunk.get_unchecked(i + 4) & 0xC0) != 0x80) as u64) << (i + 4);
749 char_mask |= (((*chunk.get_unchecked(i + 5) & 0xC0) != 0x80) as u64) << (i + 5);
750 char_mask |= (((*chunk.get_unchecked(i + 6) & 0xC0) != 0x80) as u64) << (i + 6);
751 char_mask |= (((*chunk.get_unchecked(i + 7) & 0xC0) != 0x80) as u64) << (i + 7);
752 }
753 i += 8;
754 }
755 count += char_mask.count_ones() as u64;
756 }
757
758 for &b in remainder {
759 count += ((b & 0xC0) != 0x80) as u64;
760 }
761 count
762}
763
764#[inline]
766pub fn count_chars_c(data: &[u8]) -> u64 {
767 data.len() as u64
768}
769
770#[inline]
772pub fn count_chars(data: &[u8], utf8: bool) -> u64 {
773 if utf8 {
774 count_chars_utf8(data)
775 } else {
776 count_chars_c(data)
777 }
778}
779
780pub fn is_utf8_locale() -> bool {
782 for var in &["LC_ALL", "LC_CTYPE", "LANG"] {
783 if let Ok(val) = std::env::var(var) {
784 if !val.is_empty() {
785 let lower = val.to_ascii_lowercase();
786 return lower.contains("utf-8") || lower.contains("utf8");
787 }
788 }
789 }
790 false
791}
792
793#[inline]
796fn decode_utf8(bytes: &[u8]) -> (u32, usize) {
797 let b0 = bytes[0];
798 if b0 < 0x80 {
799 return (b0 as u32, 1);
800 }
801 if b0 < 0xC2 {
802 return (b0 as u32, 1);
804 }
805 if b0 < 0xE0 {
806 if bytes.len() < 2 || bytes[1] & 0xC0 != 0x80 {
807 return (b0 as u32, 1);
808 }
809 let cp = ((b0 as u32 & 0x1F) << 6) | (bytes[1] as u32 & 0x3F);
810 return (cp, 2);
811 }
812 if b0 < 0xF0 {
813 if bytes.len() < 3 || bytes[1] & 0xC0 != 0x80 || bytes[2] & 0xC0 != 0x80 {
814 return (b0 as u32, 1);
815 }
816 let cp =
817 ((b0 as u32 & 0x0F) << 12) | ((bytes[1] as u32 & 0x3F) << 6) | (bytes[2] as u32 & 0x3F);
818 return (cp, 3);
819 }
820 if b0 < 0xF5 {
821 if bytes.len() < 4
822 || bytes[1] & 0xC0 != 0x80
823 || bytes[2] & 0xC0 != 0x80
824 || bytes[3] & 0xC0 != 0x80
825 {
826 return (b0 as u32, 1);
827 }
828 let cp = ((b0 as u32 & 0x07) << 18)
829 | ((bytes[1] as u32 & 0x3F) << 12)
830 | ((bytes[2] as u32 & 0x3F) << 6)
831 | (bytes[3] as u32 & 0x3F);
832 return (cp, 4);
833 }
834 (b0 as u32, 1)
835}
836
837#[inline]
840fn is_zero_width(cp: u32) -> bool {
841 matches!(
842 cp,
843 0x0300..=0x036F | 0x0483..=0x0489 | 0x0591..=0x05BD | 0x05BF
847 | 0x05C1..=0x05C2
848 | 0x05C4..=0x05C5
849 | 0x05C7
850 | 0x0600..=0x0605 | 0x0610..=0x061A | 0x064B..=0x065F | 0x0670
854 | 0x06D6..=0x06DD
855 | 0x06DF..=0x06E4
856 | 0x06E7..=0x06E8
857 | 0x06EA..=0x06ED
858 | 0x070F
859 | 0x0711
860 | 0x0730..=0x074A
861 | 0x07A6..=0x07B0
862 | 0x07EB..=0x07F3
863 | 0x07FD
864 | 0x0816..=0x0819
865 | 0x081B..=0x0823
866 | 0x0825..=0x0827
867 | 0x0829..=0x082D
868 | 0x0859..=0x085B
869 | 0x08D3..=0x08E1
870 | 0x08E3..=0x0902
871 | 0x093A
872 | 0x093C
873 | 0x0941..=0x0948
874 | 0x094D
875 | 0x0951..=0x0957
876 | 0x0962..=0x0963
877 | 0x0981
878 | 0x09BC
879 | 0x09C1..=0x09C4
880 | 0x09CD
881 | 0x09E2..=0x09E3
882 | 0x09FE
883 | 0x0A01..=0x0A02
884 | 0x0A3C
885 | 0x0A41..=0x0A42
886 | 0x0A47..=0x0A48
887 | 0x0A4B..=0x0A4D
888 | 0x0A51
889 | 0x0A70..=0x0A71
890 | 0x0A75
891 | 0x0A81..=0x0A82
892 | 0x0ABC
893 | 0x0AC1..=0x0AC5
894 | 0x0AC7..=0x0AC8
895 | 0x0ACD
896 | 0x0AE2..=0x0AE3
897 | 0x0AFA..=0x0AFF
898 | 0x0B01
899 | 0x0B3C
900 | 0x0B3F
901 | 0x0B41..=0x0B44
902 | 0x0B4D
903 | 0x0B56
904 | 0x0B62..=0x0B63
905 | 0x0B82
906 | 0x0BC0
907 | 0x0BCD
908 | 0x0C00
909 | 0x0C04
910 | 0x0C3E..=0x0C40
911 | 0x0C46..=0x0C48
912 | 0x0C4A..=0x0C4D
913 | 0x0C55..=0x0C56
914 | 0x0C62..=0x0C63
915 | 0x0C81
916 | 0x0CBC
917 | 0x0CBF
918 | 0x0CC6
919 | 0x0CCC..=0x0CCD
920 | 0x0CE2..=0x0CE3
921 | 0x0D00..=0x0D01
922 | 0x0D3B..=0x0D3C
923 | 0x0D41..=0x0D44
924 | 0x0D4D
925 | 0x0D62..=0x0D63
926 | 0x0DCA
927 | 0x0DD2..=0x0DD4
928 | 0x0DD6
929 | 0x0E31
930 | 0x0E34..=0x0E3A
931 | 0x0E47..=0x0E4E
932 | 0x0EB1
933 | 0x0EB4..=0x0EBC
934 | 0x0EC8..=0x0ECD
935 | 0x0F18..=0x0F19
936 | 0x0F35
937 | 0x0F37
938 | 0x0F39
939 | 0x0F71..=0x0F7E
940 | 0x0F80..=0x0F84
941 | 0x0F86..=0x0F87
942 | 0x0F8D..=0x0F97
943 | 0x0F99..=0x0FBC
944 | 0x0FC6
945 | 0x102D..=0x1030
946 | 0x1032..=0x1037
947 | 0x1039..=0x103A
948 | 0x103D..=0x103E
949 | 0x1058..=0x1059
950 | 0x105E..=0x1060
951 | 0x1071..=0x1074
952 | 0x1082
953 | 0x1085..=0x1086
954 | 0x108D
955 | 0x109D
956 | 0x1160..=0x11FF | 0x135D..=0x135F
958 | 0x1712..=0x1714
959 | 0x1732..=0x1734
960 | 0x1752..=0x1753
961 | 0x1772..=0x1773
962 | 0x17B4..=0x17B5
963 | 0x17B7..=0x17BD
964 | 0x17C6
965 | 0x17C9..=0x17D3
966 | 0x17DD
967 | 0x180B..=0x180D
968 | 0x1885..=0x1886
969 | 0x18A9
970 | 0x1920..=0x1922
971 | 0x1927..=0x1928
972 | 0x1932
973 | 0x1939..=0x193B
974 | 0x1A17..=0x1A18
975 | 0x1A1B
976 | 0x1A56
977 | 0x1A58..=0x1A5E
978 | 0x1A60
979 | 0x1A62
980 | 0x1A65..=0x1A6C
981 | 0x1A73..=0x1A7C
982 | 0x1A7F
983 | 0x1AB0..=0x1ABE
984 | 0x1B00..=0x1B03
985 | 0x1B34
986 | 0x1B36..=0x1B3A
987 | 0x1B3C
988 | 0x1B42
989 | 0x1B6B..=0x1B73
990 | 0x1B80..=0x1B81
991 | 0x1BA2..=0x1BA5
992 | 0x1BA8..=0x1BA9
993 | 0x1BAB..=0x1BAD
994 | 0x1BE6
995 | 0x1BE8..=0x1BE9
996 | 0x1BED
997 | 0x1BEF..=0x1BF1
998 | 0x1C2C..=0x1C33
999 | 0x1C36..=0x1C37
1000 | 0x1CD0..=0x1CD2
1001 | 0x1CD4..=0x1CE0
1002 | 0x1CE2..=0x1CE8
1003 | 0x1CED
1004 | 0x1CF4
1005 | 0x1CF8..=0x1CF9
1006 | 0x1DC0..=0x1DF9
1007 | 0x1DFB..=0x1DFF
1008 | 0x200B..=0x200F | 0x202A..=0x202E | 0x2060..=0x2064 | 0x2066..=0x206F | 0x20D0..=0x20F0 | 0xFE00..=0xFE0F | 0xFE20..=0xFE2F | 0xFEFF | 0xFFF9..=0xFFFB | 0x1D167..=0x1D169
1018 | 0x1D173..=0x1D182
1019 | 0x1D185..=0x1D18B
1020 | 0x1D1AA..=0x1D1AD
1021 | 0x1D242..=0x1D244
1022 | 0xE0001
1023 | 0xE0020..=0xE007F
1024 | 0xE0100..=0xE01EF )
1026}
1027
1028#[inline]
1031fn is_wide_char(cp: u32) -> bool {
1032 matches!(
1033 cp,
1034 0x1100..=0x115F | 0x231A..=0x231B | 0x2329..=0x232A | 0x23E9..=0x23F3 | 0x23F8..=0x23FA
1039 | 0x25FD..=0x25FE
1040 | 0x2614..=0x2615
1041 | 0x2648..=0x2653
1042 | 0x267F
1043 | 0x2693
1044 | 0x26A1
1045 | 0x26AA..=0x26AB
1046 | 0x26BD..=0x26BE
1047 | 0x26C4..=0x26C5
1048 | 0x26CE
1049 | 0x26D4
1050 | 0x26EA
1051 | 0x26F2..=0x26F3
1052 | 0x26F5
1053 | 0x26FA
1054 | 0x26FD
1055 | 0x2702
1056 | 0x2705
1057 | 0x2708..=0x270D
1058 | 0x270F
1059 | 0x2712
1060 | 0x2714
1061 | 0x2716
1062 | 0x271D
1063 | 0x2721
1064 | 0x2728
1065 | 0x2733..=0x2734
1066 | 0x2744
1067 | 0x2747
1068 | 0x274C
1069 | 0x274E
1070 | 0x2753..=0x2755
1071 | 0x2757
1072 | 0x2763..=0x2764
1073 | 0x2795..=0x2797
1074 | 0x27A1
1075 | 0x27B0
1076 | 0x27BF
1077 | 0x2934..=0x2935
1078 | 0x2B05..=0x2B07
1079 | 0x2B1B..=0x2B1C
1080 | 0x2B50
1081 | 0x2B55
1082 | 0x2E80..=0x303E | 0x3040..=0x33BF | 0x3400..=0x4DBF | 0x4E00..=0xA4CF | 0xA960..=0xA97C | 0xAC00..=0xD7A3 | 0xF900..=0xFAFF | 0xFE10..=0xFE19 | 0xFE30..=0xFE6F | 0xFF01..=0xFF60 | 0xFFE0..=0xFFE6 | 0x1F004
1094 | 0x1F0CF
1095 | 0x1F170..=0x1F171
1096 | 0x1F17E..=0x1F17F
1097 | 0x1F18E
1098 | 0x1F191..=0x1F19A
1099 | 0x1F1E0..=0x1F1FF | 0x1F200..=0x1F202
1101 | 0x1F210..=0x1F23B
1102 | 0x1F240..=0x1F248
1103 | 0x1F250..=0x1F251
1104 | 0x1F260..=0x1F265
1105 | 0x1F300..=0x1F64F | 0x1F680..=0x1F6FF | 0x1F900..=0x1F9FF | 0x1FA00..=0x1FA6F
1109 | 0x1FA70..=0x1FAFF
1110 | 0x20000..=0x2FFFD | 0x30000..=0x3FFFD )
1113}
1114
1115pub fn max_line_length_c(data: &[u8]) -> u64 {
1128 let mut max_len: u64 = 0;
1129 let mut line_len: u64 = 0;
1130 let mut linepos: u64 = 0;
1131 let mut i = 0;
1132 let len = data.len();
1133
1134 while i < len {
1135 let b = unsafe { *data.get_unchecked(i) };
1136 if b >= 0x21 && b <= 0x7E {
1137 i += 1;
1139 let mut run = 1u64;
1140 while i < len {
1141 let b = unsafe { *data.get_unchecked(i) };
1142 if b >= 0x21 && b <= 0x7E {
1143 run += 1;
1144 i += 1;
1145 } else {
1146 break;
1147 }
1148 }
1149 linepos += run;
1150 if linepos > line_len {
1151 line_len = linepos;
1152 }
1153 } else {
1154 match b {
1155 b' ' => {
1156 linepos += 1;
1157 if linepos > line_len {
1158 line_len = linepos;
1159 }
1160 }
1161 b'\n' => {
1162 if line_len > max_len {
1163 max_len = line_len;
1164 }
1165 linepos = 0;
1166 line_len = 0;
1167 }
1168 b'\t' => {
1169 linepos = (linepos + 8) & !7;
1170 if linepos > line_len {
1171 line_len = linepos;
1172 }
1173 }
1174 b'\r' => {
1175 linepos = 0;
1176 }
1177 0x0C => {
1178 if line_len > max_len {
1179 max_len = line_len;
1180 }
1181 linepos = 0;
1182 line_len = 0;
1183 }
1184 _ => {} }
1186 i += 1;
1187 }
1188 }
1189
1190 if line_len > max_len {
1191 max_len = line_len;
1192 }
1193
1194 max_len
1195}
1196
1197pub fn max_line_length_utf8(data: &[u8]) -> u64 {
1204 let mut max_len: u64 = 0;
1205 let mut line_len: u64 = 0;
1206 let mut linepos: u64 = 0;
1207 let mut i = 0;
1208 let len = data.len();
1209
1210 while i < len {
1211 let b = unsafe { *data.get_unchecked(i) };
1212
1213 if b >= 0x21 && b <= 0x7E {
1214 i += 1;
1216 let mut run = 1u64;
1217 while i < len {
1218 let b = unsafe { *data.get_unchecked(i) };
1219 if b >= 0x21 && b <= 0x7E {
1220 run += 1;
1221 i += 1;
1222 } else {
1223 break;
1224 }
1225 }
1226 linepos += run;
1227 if linepos > line_len {
1228 line_len = linepos;
1229 }
1230 } else if b < 0x80 {
1231 match b {
1233 b' ' => {
1234 linepos += 1;
1235 if linepos > line_len {
1236 line_len = linepos;
1237 }
1238 }
1239 b'\n' => {
1240 if line_len > max_len {
1241 max_len = line_len;
1242 }
1243 linepos = 0;
1244 line_len = 0;
1245 }
1246 b'\t' => {
1247 linepos = (linepos + 8) & !7;
1248 if linepos > line_len {
1249 line_len = linepos;
1250 }
1251 }
1252 b'\r' => {
1253 linepos = 0;
1254 }
1255 0x0C => {
1256 if line_len > max_len {
1257 max_len = line_len;
1258 }
1259 linepos = 0;
1260 line_len = 0;
1261 }
1262 _ => {} }
1264 i += 1;
1265 } else {
1266 let (cp, blen) = decode_utf8(&data[i..]);
1268
1269 if cp <= 0x9F {
1271 } else if is_zero_width(cp) {
1273 } else if is_wide_char(cp) {
1275 linepos += 2;
1276 if linepos > line_len {
1277 line_len = linepos;
1278 }
1279 } else {
1280 linepos += 1;
1282 if linepos > line_len {
1283 line_len = linepos;
1284 }
1285 }
1286 i += blen;
1287 }
1288 }
1289
1290 if line_len > max_len {
1292 max_len = line_len;
1293 }
1294
1295 max_len
1296}
1297
1298#[inline]
1300pub fn max_line_length(data: &[u8], utf8: bool) -> u64 {
1301 if utf8 {
1302 max_line_length_utf8(data)
1303 } else {
1304 max_line_length_c(data)
1305 }
1306}
1307
1308pub fn count_all(data: &[u8], utf8: bool) -> WcCounts {
1320 if utf8 {
1321 let (lines, words) = count_lines_words_utf8_fused(data);
1322 WcCounts {
1323 lines,
1324 words,
1325 bytes: data.len() as u64,
1326 chars: count_chars_utf8(data),
1327 max_line_length: max_line_length_utf8(data),
1328 }
1329 } else {
1330 WcCounts {
1331 lines: count_lines(data),
1332 words: count_words_locale(data, false),
1333 bytes: data.len() as u64,
1334 chars: data.len() as u64,
1335 max_line_length: max_line_length_c(data),
1336 }
1337 }
1338}
1339
1340#[inline]
1344fn check_ascii_sample(data: &[u8]) -> bool {
1345 let len = data.len();
1346 if len == 0 {
1347 return true;
1348 }
1349
1350 let check_region = |start: usize, end: usize| -> bool {
1352 let mut or_acc = 0u8;
1353 let region = &data[start..end];
1354 let mut i = 0;
1355 while i + 8 <= region.len() {
1356 unsafe {
1357 or_acc |= *region.get_unchecked(i);
1358 or_acc |= *region.get_unchecked(i + 1);
1359 or_acc |= *region.get_unchecked(i + 2);
1360 or_acc |= *region.get_unchecked(i + 3);
1361 or_acc |= *region.get_unchecked(i + 4);
1362 or_acc |= *region.get_unchecked(i + 5);
1363 or_acc |= *region.get_unchecked(i + 6);
1364 or_acc |= *region.get_unchecked(i + 7);
1365 }
1366 i += 8;
1367 }
1368 while i < region.len() {
1369 or_acc |= region[i];
1370 i += 1;
1371 }
1372 or_acc < 0x80
1373 };
1374
1375 let sample = 256.min(len);
1376
1377 if !check_region(0, sample) {
1379 return false;
1380 }
1381 if len > sample * 2 {
1383 let mid = len / 2;
1384 let mid_start = mid.saturating_sub(sample / 2);
1385 if !check_region(mid_start, (mid_start + sample).min(len)) {
1386 return false;
1387 }
1388 }
1389 if len > sample {
1391 if !check_region(len - sample, len) {
1392 return false;
1393 }
1394 }
1395
1396 true
1397}
1398
1399fn split_at_newlines(data: &[u8], num_chunks: usize) -> Vec<&[u8]> {
1408 if data.is_empty() || num_chunks <= 1 {
1409 return vec![data];
1410 }
1411 let chunk_size = data.len() / num_chunks;
1412 let mut chunks = Vec::with_capacity(num_chunks);
1413 let mut pos = 0;
1414
1415 for _ in 0..num_chunks - 1 {
1416 let target = pos + chunk_size;
1417 if target >= data.len() {
1418 break;
1419 }
1420 let boundary = memchr::memchr(b'\n', &data[target..])
1421 .map(|p| target + p + 1)
1422 .unwrap_or(data.len());
1423 if boundary > pos {
1424 chunks.push(&data[pos..boundary]);
1425 }
1426 pos = boundary;
1427 }
1428 if pos < data.len() {
1429 chunks.push(&data[pos..]);
1430 }
1431 chunks
1432}
1433
1434pub fn count_lines_parallel(data: &[u8]) -> u64 {
1437 if data.len() < PARALLEL_THRESHOLD {
1438 return count_lines(data);
1439 }
1440
1441 let num_threads = rayon::current_num_threads().max(1);
1442 let chunk_size = (data.len() / num_threads).max(2 * 1024 * 1024);
1444
1445 data.par_chunks(chunk_size)
1446 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
1447 .sum()
1448}
1449
1450pub fn count_words_parallel(data: &[u8], utf8: bool) -> u64 {
1452 if data.len() < PARALLEL_THRESHOLD {
1453 return count_words_locale(data, utf8);
1454 }
1455
1456 let num_threads = rayon::current_num_threads().max(1);
1457
1458 if utf8 {
1459 let chunks = split_at_newlines(data, num_threads);
1462 chunks.par_iter().map(|chunk| count_words_utf8(chunk)).sum()
1463 } else {
1464 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1466
1467 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1468
1469 let results: Vec<(u64, u64, bool, bool)> = chunks
1471 .par_iter()
1472 .map(|chunk| count_lw_c_chunk(chunk))
1473 .collect();
1474
1475 let mut total = 0u64;
1476 for i in 0..results.len() {
1477 total += results[i].1;
1478 if i > 0 && results[i - 1].3 && results[i].2 {
1482 total -= 1;
1483 }
1484 }
1485 total
1486 }
1487}
1488
1489pub fn count_chars_parallel(data: &[u8], utf8: bool) -> u64 {
1491 if !utf8 {
1492 return data.len() as u64;
1493 }
1494 if data.len() < PARALLEL_THRESHOLD {
1495 return count_chars_utf8(data);
1496 }
1497
1498 let num_threads = rayon::current_num_threads().max(1);
1499 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1500
1501 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
1502}
1503
1504pub fn count_lwb(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1507 let (lines, words) = count_lines_words(data, utf8);
1508 (lines, words, data.len() as u64)
1509}
1510
1511pub fn count_lwb_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1517 if data.len() < PARALLEL_THRESHOLD {
1518 return count_lwb(data, utf8);
1520 }
1521
1522 let num_threads = rayon::current_num_threads().max(1);
1523
1524 let (lines, words) = if !utf8 {
1525 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1527
1528 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1529 let results: Vec<(u64, u64, bool, bool)> = chunks
1530 .par_iter()
1531 .map(|chunk| count_lw_c_chunk_fast(chunk))
1532 .collect();
1533
1534 let mut line_total = 0u64;
1535 let mut word_total = 0u64;
1536 for i in 0..results.len() {
1537 line_total += results[i].0;
1538 word_total += results[i].1;
1539 if i > 0 && results[i - 1].3 && results[i].2 {
1540 word_total -= 1;
1541 }
1542 }
1543
1544 (line_total, word_total)
1545 } else {
1546 let is_ascii = check_ascii_sample(data);
1548 if is_ascii {
1549 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1551 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1552 let results: Vec<(u64, u64, bool, bool)> = chunks
1553 .par_iter()
1554 .map(|chunk| count_lw_c_chunk_fast(chunk))
1555 .collect();
1556
1557 let mut line_total = 0u64;
1558 let mut word_total = 0u64;
1559 for i in 0..results.len() {
1560 line_total += results[i].0;
1561 word_total += results[i].1;
1562 if i > 0 && results[i - 1].3 && results[i].2 {
1563 word_total -= 1;
1564 }
1565 }
1566 (line_total, word_total)
1567 } else {
1568 let chunks = split_at_newlines(data, num_threads);
1571 let results: Vec<(u64, u64)> = chunks
1572 .par_iter()
1573 .map(|chunk| count_lines_words_utf8_fused(chunk))
1574 .collect();
1575 let mut line_total = 0u64;
1576 let mut word_total = 0u64;
1577 for (l, w) in results {
1578 line_total += l;
1579 word_total += w;
1580 }
1581 (line_total, word_total)
1582 }
1583 };
1584
1585 (lines, words, data.len() as u64)
1586}
1587
1588pub fn count_lwc_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1592 if data.len() < PARALLEL_THRESHOLD {
1593 let lines = count_lines(data);
1594 let words = count_words_locale(data, utf8);
1595 let chars = count_chars(data, utf8);
1596 return (lines, words, chars);
1597 }
1598
1599 let num_threads = rayon::current_num_threads().max(1);
1600
1601 if utf8 {
1602 let chunks = split_at_newlines(data, num_threads);
1604 let results: Vec<(u64, u64, u64)> = chunks
1605 .par_iter()
1606 .map(|chunk| {
1607 let (lines, words) = count_lines_words_utf8_fused(chunk);
1608 let chars = count_chars_utf8(chunk);
1609 (lines, words, chars)
1610 })
1611 .collect();
1612 let mut lines = 0u64;
1613 let mut words = 0u64;
1614 let mut chars = 0u64;
1615 for (l, w, c) in results {
1616 lines += l;
1617 words += w;
1618 chars += c;
1619 }
1620 (lines, words, chars)
1621 } else {
1622 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1624 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1625 let results: Vec<(u64, u64, bool, bool)> = chunks
1626 .par_iter()
1627 .map(|chunk| count_lw_c_chunk_fast(chunk))
1628 .collect();
1629 let mut lines = 0u64;
1630 let mut words = 0u64;
1631 for i in 0..results.len() {
1632 lines += results[i].0;
1633 words += results[i].1;
1634 if i > 0 && results[i - 1].3 && results[i].2 {
1635 words -= 1;
1636 }
1637 }
1638 (lines, words, data.len() as u64)
1639 }
1640}
1641
1642pub fn max_line_length_parallel(data: &[u8], utf8: bool) -> u64 {
1646 if data.len() < PARALLEL_THRESHOLD {
1647 return max_line_length(data, utf8);
1648 }
1649 let num_threads = rayon::current_num_threads().max(1);
1650 let chunks = split_at_newlines(data, num_threads);
1651 chunks
1652 .par_iter()
1653 .map(|chunk| {
1654 if utf8 {
1655 max_line_length_utf8(chunk)
1656 } else {
1657 max_line_length_c(chunk)
1658 }
1659 })
1660 .max()
1661 .unwrap_or(0)
1662}
1663
1664pub fn count_all_parallel(data: &[u8], utf8: bool) -> WcCounts {
1668 if data.len() < PARALLEL_THRESHOLD {
1669 return count_all(data, utf8);
1670 }
1671
1672 let num_threads = rayon::current_num_threads().max(1);
1673 let chunks = split_at_newlines(data, num_threads);
1674
1675 if utf8 {
1676 let results: Vec<(u64, u64, u64, u64)> = chunks
1677 .par_iter()
1678 .map(|chunk| {
1679 let (lines, words) = count_lines_words_utf8_fused(chunk);
1680 let chars = count_chars_utf8(chunk);
1681 let max_ll = max_line_length_utf8(chunk);
1682 (lines, words, chars, max_ll)
1683 })
1684 .collect();
1685
1686 let mut counts = WcCounts {
1687 bytes: data.len() as u64,
1688 ..Default::default()
1689 };
1690 for (l, w, c, m) in results {
1691 counts.lines += l;
1692 counts.words += w;
1693 counts.chars += c;
1694 if m > counts.max_line_length {
1695 counts.max_line_length = m;
1696 }
1697 }
1698 counts
1699 } else {
1700 let results: Vec<(u64, u64, u64)> = chunks
1702 .par_iter()
1703 .map(|chunk| {
1704 let (lines, words) = count_lines_words(chunk, false);
1705 let max_ll = max_line_length_c(chunk);
1706 (lines, words, max_ll)
1707 })
1708 .collect();
1709
1710 let mut counts = WcCounts {
1711 bytes: data.len() as u64,
1712 chars: data.len() as u64,
1713 ..Default::default()
1714 };
1715 for (l, w, m) in &results {
1716 counts.lines += l;
1717 counts.words += w;
1718 if *m > counts.max_line_length {
1719 counts.max_line_length = *m;
1720 }
1721 }
1722 counts
1723 }
1724}