1use memchr::memchr_iter;
2use rayon::prelude::*;
3
4const PARALLEL_THRESHOLD: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Default, PartialEq, Eq)]
11pub struct WcCounts {
12 pub lines: u64,
13 pub words: u64,
14 pub bytes: u64,
15 pub chars: u64,
16 pub max_line_length: u64,
17}
18
19const fn make_byte_class_c() -> [u8; 256] {
39 let mut t = [0u8; 256]; t[0x09] = 1; t[0x0A] = 1; t[0x0B] = 1; t[0x0C] = 1; t[0x0D] = 1; t[0x20] = 1; t
48}
49
50const BYTE_CLASS_C: [u8; 256] = make_byte_class_c();
51
52const fn make_byte_class_utf8() -> [u8; 256] {
55 let mut t = [2u8; 256]; t[0x09] = 1; t[0x0A] = 1; t[0x0B] = 1; t[0x0C] = 1; t[0x0D] = 1; t[0x20] = 1; let mut i = 0x21u16;
65 while i <= 0x7E {
66 t[i as usize] = 0;
67 i += 1;
68 }
69 t
70}
71
72const BYTE_CLASS_UTF8: [u8; 256] = make_byte_class_utf8();
73
74#[inline]
81fn is_unicode_space(cp: u32) -> bool {
82 matches!(
83 cp,
84 0x00A0 | 0x1680 | 0x2000
87 ..=0x200A | 0x2028 | 0x2029 | 0x202F | 0x205F | 0x3000 )
94}
95
96#[inline]
100fn is_unicode_printable(cp: u32) -> bool {
101 cp >= 0xA0
102}
103
104#[inline]
111pub fn count_lines(data: &[u8]) -> u64 {
112 memchr_iter(b'\n', data).count() as u64
113}
114
115#[inline]
117pub fn count_bytes(data: &[u8]) -> u64 {
118 data.len() as u64
119}
120
121pub fn count_words(data: &[u8]) -> u64 {
123 count_words_locale(data, true)
124}
125
126pub fn count_words_locale(data: &[u8], utf8: bool) -> u64 {
133 if utf8 {
134 count_words_utf8(data)
135 } else {
136 count_words_c(data)
137 }
138}
139
140fn count_words_c(data: &[u8]) -> u64 {
146 let mut words = 0u64;
147 let mut in_word = false;
148 let mut i = 0;
149 let len = data.len();
150
151 while i < len {
152 let b = unsafe { *data.get_unchecked(i) };
153 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
154 if class == 1 {
155 in_word = false;
157 i += 1;
158 } else {
159 if !in_word {
161 in_word = true;
162 words += 1;
163 }
164 i += 1;
165 while i < len {
167 let b = unsafe { *data.get_unchecked(i) };
168 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
169 if class == 0 {
170 i += 1;
171 } else {
172 break;
173 }
174 }
175 }
176 }
177 words
178}
179
180#[cfg(target_arch = "x86_64")]
186#[target_feature(enable = "avx2")]
187unsafe fn count_lw_c_chunk_avx2(data: &[u8]) -> (u64, u64, bool, bool) {
188 use std::arch::x86_64::*;
189
190 let len = data.len();
191 let ptr = data.as_ptr();
192 let mut i = 0usize;
193 let mut total_lines = 0u64;
194 let mut total_words = 0u64;
195 let mut prev_was_word = false;
196
197 unsafe {
198 let nl_byte = _mm256_set1_epi8(b'\n' as i8);
199 let zero = _mm256_setzero_si256();
200 let ones = _mm256_set1_epi8(1);
201 let space_char = _mm256_set1_epi8(0x20i8);
203 let tab_lo = _mm256_set1_epi8(0x08i8); let tab_hi = _mm256_set1_epi8(0x0Ei8); let mut line_acc = _mm256_setzero_si256();
207 let mut batch = 0u32;
208
209 while i + 32 <= len {
210 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
211 let is_nl = _mm256_cmpeq_epi8(v, nl_byte);
212 line_acc = _mm256_add_epi8(line_acc, _mm256_and_si256(is_nl, ones));
213
214 let is_sp = _mm256_cmpeq_epi8(v, space_char);
216 let gt_08 = _mm256_cmpgt_epi8(v, tab_lo);
217 let lt_0e = _mm256_cmpgt_epi8(tab_hi, v);
218 let is_tab_range = _mm256_and_si256(gt_08, lt_0e);
219 let is_space = _mm256_or_si256(is_sp, is_tab_range);
220 let is_word = _mm256_andnot_si256(is_space, _mm256_set1_epi8(-1));
222
223 let word_mask = _mm256_movemask_epi8(is_word) as u32;
224 let prev_mask = (word_mask << 1) | (prev_was_word as u32);
225 total_words += (word_mask & !prev_mask).count_ones() as u64;
226 prev_was_word = (word_mask >> 31) & 1 == 1;
227
228 batch += 1;
229 if batch >= 255 {
230 let sad = _mm256_sad_epu8(line_acc, zero);
231 let hi = _mm256_extracti128_si256(sad, 1);
232 let lo = _mm256_castsi256_si128(sad);
233 let s = _mm_add_epi64(lo, hi);
234 let h64 = _mm_unpackhi_epi64(s, s);
235 let t = _mm_add_epi64(s, h64);
236 total_lines += _mm_cvtsi128_si64(t) as u64;
237 line_acc = _mm256_setzero_si256();
238 batch = 0;
239 }
240 i += 32;
241 }
242
243 if batch > 0 {
244 let sad = _mm256_sad_epu8(line_acc, zero);
245 let hi = _mm256_extracti128_si256(sad, 1);
246 let lo = _mm256_castsi256_si128(sad);
247 let s = _mm_add_epi64(lo, hi);
248 let h64 = _mm_unpackhi_epi64(s, s);
249 let t = _mm_add_epi64(s, h64);
250 total_lines += _mm_cvtsi128_si64(t) as u64;
251 }
252
253 while i < len {
255 let b = *ptr.add(i);
256 if b == b'\n' {
257 total_lines += 1;
258 prev_was_word = false;
259 } else if *BYTE_CLASS_C.get_unchecked(b as usize) == 1 {
260 prev_was_word = false;
262 } else {
263 if !prev_was_word {
265 total_words += 1;
266 }
267 prev_was_word = true;
268 }
269 i += 1;
270 }
271 }
272
273 let first_is_word = !data.is_empty() && BYTE_CLASS_C[data[0] as usize] == 0;
274 (total_lines, total_words, first_is_word, prev_was_word)
275}
276
277#[cfg(target_arch = "x86_64")]
282#[target_feature(enable = "sse2")]
283unsafe fn count_lw_c_chunk_sse2(data: &[u8]) -> (u64, u64, bool, bool) {
284 use std::arch::x86_64::*;
285
286 let len = data.len();
287 let ptr = data.as_ptr();
288 let mut i = 0usize;
289 let mut total_lines = 0u64;
290 let mut total_words = 0u64;
291 let mut prev_was_word = false;
292
293 unsafe {
294 let nl_byte = _mm_set1_epi8(b'\n' as i8);
295 let zero = _mm_setzero_si128();
296 let ones = _mm_set1_epi8(1);
297 let space_char = _mm_set1_epi8(0x20i8);
299 let tab_lo = _mm_set1_epi8(0x08i8);
300 let tab_hi = _mm_set1_epi8(0x0Ei8);
301
302 let mut line_acc = _mm_setzero_si128();
303 let mut batch = 0u32;
304
305 while i + 16 <= len {
306 let v = _mm_loadu_si128(ptr.add(i) as *const __m128i);
307 let is_nl = _mm_cmpeq_epi8(v, nl_byte);
308 line_acc = _mm_add_epi8(line_acc, _mm_and_si128(is_nl, ones));
309
310 let is_sp = _mm_cmpeq_epi8(v, space_char);
312 let gt_08 = _mm_cmpgt_epi8(v, tab_lo);
313 let lt_0e = _mm_cmpgt_epi8(tab_hi, v);
314 let is_tab_range = _mm_and_si128(gt_08, lt_0e);
315 let is_space = _mm_or_si128(is_sp, is_tab_range);
316 let is_word = _mm_andnot_si128(is_space, _mm_set1_epi8(-1));
318
319 let word_mask = _mm_movemask_epi8(is_word) as u32;
320 let prev_mask = (word_mask << 1) | (prev_was_word as u32);
321 total_words += (word_mask & !prev_mask).count_ones() as u64;
322 prev_was_word = (word_mask >> 15) & 1 == 1;
323
324 batch += 1;
325 if batch >= 255 {
326 let sad = _mm_sad_epu8(line_acc, zero);
327 let hi = _mm_unpackhi_epi64(sad, sad);
328 let t = _mm_add_epi64(sad, hi);
329 total_lines += _mm_cvtsi128_si64(t) as u64;
330 line_acc = _mm_setzero_si128();
331 batch = 0;
332 }
333 i += 16;
334 }
335
336 if batch > 0 {
337 let sad = _mm_sad_epu8(line_acc, zero);
338 let hi = _mm_unpackhi_epi64(sad, sad);
339 let t = _mm_add_epi64(sad, hi);
340 total_lines += _mm_cvtsi128_si64(t) as u64;
341 }
342
343 while i < len {
345 let b = *ptr.add(i);
346 if b == b'\n' {
347 total_lines += 1;
348 prev_was_word = false;
349 } else if *BYTE_CLASS_C.get_unchecked(b as usize) == 1 {
350 prev_was_word = false;
351 } else {
352 if !prev_was_word {
353 total_words += 1;
354 }
355 prev_was_word = true;
356 }
357 i += 1;
358 }
359 }
360
361 let first_is_word = !data.is_empty() && BYTE_CLASS_C[data[0] as usize] == 0;
362 (total_lines, total_words, first_is_word, prev_was_word)
363}
364
365#[inline]
367fn count_lw_c_chunk_fast(data: &[u8]) -> (u64, u64, bool, bool) {
368 #[cfg(target_arch = "x86_64")]
369 {
370 if is_x86_feature_detected!("avx2") && data.len() >= 64 {
371 return unsafe { count_lw_c_chunk_avx2(data) };
372 }
373 if data.len() >= 32 {
374 return unsafe { count_lw_c_chunk_sse2(data) };
375 }
376 }
377 count_lw_c_chunk(data)
378}
379
380fn count_lw_c_chunk(data: &[u8]) -> (u64, u64, bool, bool) {
385 let mut lines = 0u64;
386 let mut words = 0u64;
387 let mut in_word = false;
388 let mut first_is_word = false;
389 let mut seen_first = false;
390 let mut i = 0;
391 let len = data.len();
392
393 while i < len {
394 let b = unsafe { *data.get_unchecked(i) };
395 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
396 if class == 1 {
397 if !seen_first {
399 seen_first = true;
400 }
402 if b == b'\n' {
403 lines += 1;
404 }
405 in_word = false;
406 i += 1;
407 } else {
408 if !seen_first {
410 seen_first = true;
411 first_is_word = true;
412 }
413 if !in_word {
414 in_word = true;
415 words += 1;
416 }
417 i += 1;
418 while i < len {
420 let b = unsafe { *data.get_unchecked(i) };
421 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
422 if class == 0 {
423 i += 1;
424 } else {
425 break;
426 }
427 }
428 }
429 }
430 (lines, words, first_is_word, in_word)
431}
432
433fn count_words_utf8(data: &[u8]) -> u64 {
446 let mut words = 0u64;
447 let mut in_word = false;
448 let mut i = 0;
449 let len = data.len();
450
451 while i < len {
452 let b = unsafe { *data.get_unchecked(i) };
453
454 if b >= 0x21 && b <= 0x7E {
455 if !in_word {
457 in_word = true;
458 words += 1;
459 }
460 i += 1;
461 while i < len {
463 let b = unsafe { *data.get_unchecked(i) };
464 if b >= 0x21 && b <= 0x7E {
465 i += 1;
466 } else {
467 break;
468 }
469 }
470 } else if b < 0x80 {
471 let class = unsafe { *BYTE_CLASS_UTF8.get_unchecked(b as usize) };
473 if class == 1 {
474 in_word = false;
475 }
476 i += 1;
478 } else if b < 0xC2 {
479 i += 1;
480 } else if b < 0xE0 {
481 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
482 let cp = ((b as u32 & 0x1F) << 6)
483 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
484 if is_unicode_space(cp) {
485 in_word = false;
486 } else if is_unicode_printable(cp) {
487 if !in_word {
488 in_word = true;
489 words += 1;
490 }
491 }
492 i += 2;
493 } else {
494 i += 1;
495 }
496 } else if b < 0xF0 {
497 if i + 2 < len
498 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
499 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
500 {
501 let cp = ((b as u32 & 0x0F) << 12)
502 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
503 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
504 if is_unicode_space(cp) {
505 in_word = false;
506 } else if is_unicode_printable(cp) {
507 if !in_word {
508 in_word = true;
509 words += 1;
510 }
511 }
512 i += 3;
513 } else {
514 i += 1;
515 }
516 } else if b < 0xF5 {
517 if i + 3 < len
518 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
519 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
520 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
521 {
522 let cp = ((b as u32 & 0x07) << 18)
523 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
524 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
525 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
526 if is_unicode_space(cp) {
527 in_word = false;
528 } else if is_unicode_printable(cp) {
529 if !in_word {
530 in_word = true;
531 words += 1;
532 }
533 }
534 i += 4;
535 } else {
536 i += 1;
537 }
538 } else {
539 i += 1;
540 }
541 }
542
543 words
544}
545
546pub fn count_lines_words(data: &[u8], utf8: bool) -> (u64, u64) {
550 if utf8 {
551 count_lines_words_utf8_fused(data)
552 } else {
553 let (lines, words, _, _) = count_lw_c_chunk_fast(data);
554 (lines, words)
555 }
556}
557
558fn count_lines_words_utf8_fused(data: &[u8]) -> (u64, u64) {
565 let mut lines = 0u64;
566 let mut words = 0u64;
567 let mut in_word = false;
568 let mut i = 0;
569 let len = data.len();
570
571 while i < len {
572 let b = unsafe { *data.get_unchecked(i) };
573
574 if b >= 0x21 && b <= 0x7E {
575 if !in_word {
577 in_word = true;
578 words += 1;
579 }
580 i += 1;
581 while i < len {
583 let b = unsafe { *data.get_unchecked(i) };
584 if b >= 0x21 && b <= 0x7E {
585 i += 1;
586 } else {
587 break;
588 }
589 }
590 } else if b == b'\n' {
591 lines += 1;
592 in_word = false;
593 i += 1;
594 } else if b == b' ' {
595 in_word = false;
596 i += 1;
597 } else if b < 0x80 {
598 let class = unsafe { *BYTE_CLASS_UTF8.get_unchecked(b as usize) };
600 if class == 1 {
601 in_word = false;
602 }
603 i += 1;
605 } else if b < 0xC2 {
606 i += 1;
607 } else if b < 0xE0 {
608 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
609 let cp = ((b as u32 & 0x1F) << 6)
610 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
611 if is_unicode_space(cp) {
612 in_word = false;
613 } else if is_unicode_printable(cp) {
614 if !in_word {
615 in_word = true;
616 words += 1;
617 }
618 }
619 i += 2;
620 } else {
621 i += 1;
622 }
623 } else if b < 0xF0 {
624 if i + 2 < len
625 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
626 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
627 {
628 let cp = ((b as u32 & 0x0F) << 12)
629 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
630 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
631 if is_unicode_space(cp) {
632 in_word = false;
633 } else if is_unicode_printable(cp) {
634 if !in_word {
635 in_word = true;
636 words += 1;
637 }
638 }
639 i += 3;
640 } else {
641 i += 1;
642 }
643 } else if b < 0xF5 {
644 if i + 3 < len
645 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
646 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
647 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
648 {
649 let cp = ((b as u32 & 0x07) << 18)
650 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
651 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
652 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
653 if is_unicode_space(cp) {
654 in_word = false;
655 } else if is_unicode_printable(cp) {
656 if !in_word {
657 in_word = true;
658 words += 1;
659 }
660 }
661 i += 4;
662 } else {
663 i += 1;
664 }
665 } else {
666 i += 1;
667 }
668 }
669
670 (lines, words)
671}
672
673pub fn count_lines_words_chars(data: &[u8], utf8: bool) -> (u64, u64, u64) {
675 if utf8 {
676 let (lines, words) = count_lines_words_utf8_fused(data);
678 let chars = count_chars_utf8(data);
679 (lines, words, chars)
680 } else {
681 let (lines, words) = count_lines_words(data, false);
683 (lines, words, data.len() as u64)
684 }
685}
686
687pub fn count_chars_utf8(data: &[u8]) -> u64 {
694 #[cfg(target_arch = "x86_64")]
695 {
696 if is_x86_feature_detected!("avx2") {
697 return unsafe { count_chars_utf8_avx2(data) };
698 }
699 }
700 count_chars_utf8_scalar(data)
701}
702
703#[cfg(target_arch = "x86_64")]
707#[target_feature(enable = "avx2")]
708unsafe fn count_chars_utf8_avx2(data: &[u8]) -> u64 {
709 unsafe {
710 use std::arch::x86_64::*;
711
712 let mask_c0 = _mm256_set1_epi8(0xC0u8 as i8);
713 let val_80 = _mm256_set1_epi8(0x80u8 as i8);
714 let ones = _mm256_set1_epi8(1);
715 let zero = _mm256_setzero_si256();
716
717 let mut total = 0u64;
718 let len = data.len();
719 let ptr = data.as_ptr();
720 let mut i = 0;
721 let mut acc = _mm256_setzero_si256();
722 let mut batch = 0u32;
723
724 while i + 32 <= len {
725 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
726 let masked = _mm256_and_si256(v, mask_c0);
727 let is_cont = _mm256_cmpeq_epi8(masked, val_80);
728 let non_cont = _mm256_andnot_si256(is_cont, ones);
729 acc = _mm256_add_epi8(acc, non_cont);
730
731 batch += 1;
732 if batch >= 255 {
733 let sad = _mm256_sad_epu8(acc, zero);
735 let hi = _mm256_extracti128_si256(sad, 1);
736 let lo = _mm256_castsi256_si128(sad);
737 let sum = _mm_add_epi64(lo, hi);
738 let hi64 = _mm_unpackhi_epi64(sum, sum);
739 let t = _mm_add_epi64(sum, hi64);
740 total += _mm_cvtsi128_si64(t) as u64;
741 acc = _mm256_setzero_si256();
742 batch = 0;
743 }
744 i += 32;
745 }
746
747 if batch > 0 {
749 let sad = _mm256_sad_epu8(acc, zero);
750 let hi = _mm256_extracti128_si256(sad, 1);
751 let lo = _mm256_castsi256_si128(sad);
752 let sum = _mm_add_epi64(lo, hi);
753 let hi64 = _mm_unpackhi_epi64(sum, sum);
754 let t = _mm_add_epi64(sum, hi64);
755 total += _mm_cvtsi128_si64(t) as u64;
756 }
757
758 while i < len {
759 total += ((*ptr.add(i) & 0xC0) != 0x80) as u64;
760 i += 1;
761 }
762
763 total
764 }
765}
766
767fn count_chars_utf8_scalar(data: &[u8]) -> u64 {
769 let mut count = 0u64;
770 let chunks = data.chunks_exact(64);
771 let remainder = chunks.remainder();
772
773 for chunk in chunks {
774 let mut any_high = 0u8;
776 let mut i = 0;
777 while i + 8 <= 64 {
778 unsafe {
779 any_high |= *chunk.get_unchecked(i);
780 any_high |= *chunk.get_unchecked(i + 1);
781 any_high |= *chunk.get_unchecked(i + 2);
782 any_high |= *chunk.get_unchecked(i + 3);
783 any_high |= *chunk.get_unchecked(i + 4);
784 any_high |= *chunk.get_unchecked(i + 5);
785 any_high |= *chunk.get_unchecked(i + 6);
786 any_high |= *chunk.get_unchecked(i + 7);
787 }
788 i += 8;
789 }
790 if any_high < 0x80 {
791 count += 64;
792 continue;
793 }
794
795 let mut char_mask = 0u64;
796 i = 0;
797 while i + 7 < 64 {
798 unsafe {
799 char_mask |= (((*chunk.get_unchecked(i) & 0xC0) != 0x80) as u64) << i;
800 char_mask |= (((*chunk.get_unchecked(i + 1) & 0xC0) != 0x80) as u64) << (i + 1);
801 char_mask |= (((*chunk.get_unchecked(i + 2) & 0xC0) != 0x80) as u64) << (i + 2);
802 char_mask |= (((*chunk.get_unchecked(i + 3) & 0xC0) != 0x80) as u64) << (i + 3);
803 char_mask |= (((*chunk.get_unchecked(i + 4) & 0xC0) != 0x80) as u64) << (i + 4);
804 char_mask |= (((*chunk.get_unchecked(i + 5) & 0xC0) != 0x80) as u64) << (i + 5);
805 char_mask |= (((*chunk.get_unchecked(i + 6) & 0xC0) != 0x80) as u64) << (i + 6);
806 char_mask |= (((*chunk.get_unchecked(i + 7) & 0xC0) != 0x80) as u64) << (i + 7);
807 }
808 i += 8;
809 }
810 count += char_mask.count_ones() as u64;
811 }
812
813 for &b in remainder {
814 count += ((b & 0xC0) != 0x80) as u64;
815 }
816 count
817}
818
819#[inline]
821pub fn count_chars_c(data: &[u8]) -> u64 {
822 data.len() as u64
823}
824
825#[inline]
827pub fn count_chars(data: &[u8], utf8: bool) -> u64 {
828 if utf8 {
829 count_chars_utf8(data)
830 } else {
831 count_chars_c(data)
832 }
833}
834
835pub fn is_utf8_locale() -> bool {
837 for var in &["LC_ALL", "LC_CTYPE", "LANG"] {
838 if let Ok(val) = std::env::var(var) {
839 if !val.is_empty() {
840 let lower = val.to_ascii_lowercase();
841 return lower.contains("utf-8") || lower.contains("utf8");
842 }
843 }
844 }
845 false
846}
847
848#[inline]
851fn decode_utf8(bytes: &[u8]) -> (u32, usize) {
852 let b0 = bytes[0];
853 if b0 < 0x80 {
854 return (b0 as u32, 1);
855 }
856 if b0 < 0xC2 {
857 return (b0 as u32, 1);
859 }
860 if b0 < 0xE0 {
861 if bytes.len() < 2 || bytes[1] & 0xC0 != 0x80 {
862 return (b0 as u32, 1);
863 }
864 let cp = ((b0 as u32 & 0x1F) << 6) | (bytes[1] as u32 & 0x3F);
865 return (cp, 2);
866 }
867 if b0 < 0xF0 {
868 if bytes.len() < 3 || bytes[1] & 0xC0 != 0x80 || bytes[2] & 0xC0 != 0x80 {
869 return (b0 as u32, 1);
870 }
871 let cp =
872 ((b0 as u32 & 0x0F) << 12) | ((bytes[1] as u32 & 0x3F) << 6) | (bytes[2] as u32 & 0x3F);
873 return (cp, 3);
874 }
875 if b0 < 0xF5 {
876 if bytes.len() < 4
877 || bytes[1] & 0xC0 != 0x80
878 || bytes[2] & 0xC0 != 0x80
879 || bytes[3] & 0xC0 != 0x80
880 {
881 return (b0 as u32, 1);
882 }
883 let cp = ((b0 as u32 & 0x07) << 18)
884 | ((bytes[1] as u32 & 0x3F) << 12)
885 | ((bytes[2] as u32 & 0x3F) << 6)
886 | (bytes[3] as u32 & 0x3F);
887 return (cp, 4);
888 }
889 (b0 as u32, 1)
890}
891
892#[inline]
895fn is_zero_width(cp: u32) -> bool {
896 matches!(
897 cp,
898 0x0300..=0x036F | 0x0483..=0x0489 | 0x0591..=0x05BD | 0x05BF
902 | 0x05C1..=0x05C2
903 | 0x05C4..=0x05C5
904 | 0x05C7
905 | 0x0600..=0x0605 | 0x0610..=0x061A | 0x064B..=0x065F | 0x0670
909 | 0x06D6..=0x06DD
910 | 0x06DF..=0x06E4
911 | 0x06E7..=0x06E8
912 | 0x06EA..=0x06ED
913 | 0x070F
914 | 0x0711
915 | 0x0730..=0x074A
916 | 0x07A6..=0x07B0
917 | 0x07EB..=0x07F3
918 | 0x07FD
919 | 0x0816..=0x0819
920 | 0x081B..=0x0823
921 | 0x0825..=0x0827
922 | 0x0829..=0x082D
923 | 0x0859..=0x085B
924 | 0x08D3..=0x08E1
925 | 0x08E3..=0x0902
926 | 0x093A
927 | 0x093C
928 | 0x0941..=0x0948
929 | 0x094D
930 | 0x0951..=0x0957
931 | 0x0962..=0x0963
932 | 0x0981
933 | 0x09BC
934 | 0x09C1..=0x09C4
935 | 0x09CD
936 | 0x09E2..=0x09E3
937 | 0x09FE
938 | 0x0A01..=0x0A02
939 | 0x0A3C
940 | 0x0A41..=0x0A42
941 | 0x0A47..=0x0A48
942 | 0x0A4B..=0x0A4D
943 | 0x0A51
944 | 0x0A70..=0x0A71
945 | 0x0A75
946 | 0x0A81..=0x0A82
947 | 0x0ABC
948 | 0x0AC1..=0x0AC5
949 | 0x0AC7..=0x0AC8
950 | 0x0ACD
951 | 0x0AE2..=0x0AE3
952 | 0x0AFA..=0x0AFF
953 | 0x0B01
954 | 0x0B3C
955 | 0x0B3F
956 | 0x0B41..=0x0B44
957 | 0x0B4D
958 | 0x0B56
959 | 0x0B62..=0x0B63
960 | 0x0B82
961 | 0x0BC0
962 | 0x0BCD
963 | 0x0C00
964 | 0x0C04
965 | 0x0C3E..=0x0C40
966 | 0x0C46..=0x0C48
967 | 0x0C4A..=0x0C4D
968 | 0x0C55..=0x0C56
969 | 0x0C62..=0x0C63
970 | 0x0C81
971 | 0x0CBC
972 | 0x0CBF
973 | 0x0CC6
974 | 0x0CCC..=0x0CCD
975 | 0x0CE2..=0x0CE3
976 | 0x0D00..=0x0D01
977 | 0x0D3B..=0x0D3C
978 | 0x0D41..=0x0D44
979 | 0x0D4D
980 | 0x0D62..=0x0D63
981 | 0x0DCA
982 | 0x0DD2..=0x0DD4
983 | 0x0DD6
984 | 0x0E31
985 | 0x0E34..=0x0E3A
986 | 0x0E47..=0x0E4E
987 | 0x0EB1
988 | 0x0EB4..=0x0EBC
989 | 0x0EC8..=0x0ECD
990 | 0x0F18..=0x0F19
991 | 0x0F35
992 | 0x0F37
993 | 0x0F39
994 | 0x0F71..=0x0F7E
995 | 0x0F80..=0x0F84
996 | 0x0F86..=0x0F87
997 | 0x0F8D..=0x0F97
998 | 0x0F99..=0x0FBC
999 | 0x0FC6
1000 | 0x102D..=0x1030
1001 | 0x1032..=0x1037
1002 | 0x1039..=0x103A
1003 | 0x103D..=0x103E
1004 | 0x1058..=0x1059
1005 | 0x105E..=0x1060
1006 | 0x1071..=0x1074
1007 | 0x1082
1008 | 0x1085..=0x1086
1009 | 0x108D
1010 | 0x109D
1011 | 0x1160..=0x11FF | 0x135D..=0x135F
1013 | 0x1712..=0x1714
1014 | 0x1732..=0x1734
1015 | 0x1752..=0x1753
1016 | 0x1772..=0x1773
1017 | 0x17B4..=0x17B5
1018 | 0x17B7..=0x17BD
1019 | 0x17C6
1020 | 0x17C9..=0x17D3
1021 | 0x17DD
1022 | 0x180B..=0x180D
1023 | 0x1885..=0x1886
1024 | 0x18A9
1025 | 0x1920..=0x1922
1026 | 0x1927..=0x1928
1027 | 0x1932
1028 | 0x1939..=0x193B
1029 | 0x1A17..=0x1A18
1030 | 0x1A1B
1031 | 0x1A56
1032 | 0x1A58..=0x1A5E
1033 | 0x1A60
1034 | 0x1A62
1035 | 0x1A65..=0x1A6C
1036 | 0x1A73..=0x1A7C
1037 | 0x1A7F
1038 | 0x1AB0..=0x1ABE
1039 | 0x1B00..=0x1B03
1040 | 0x1B34
1041 | 0x1B36..=0x1B3A
1042 | 0x1B3C
1043 | 0x1B42
1044 | 0x1B6B..=0x1B73
1045 | 0x1B80..=0x1B81
1046 | 0x1BA2..=0x1BA5
1047 | 0x1BA8..=0x1BA9
1048 | 0x1BAB..=0x1BAD
1049 | 0x1BE6
1050 | 0x1BE8..=0x1BE9
1051 | 0x1BED
1052 | 0x1BEF..=0x1BF1
1053 | 0x1C2C..=0x1C33
1054 | 0x1C36..=0x1C37
1055 | 0x1CD0..=0x1CD2
1056 | 0x1CD4..=0x1CE0
1057 | 0x1CE2..=0x1CE8
1058 | 0x1CED
1059 | 0x1CF4
1060 | 0x1CF8..=0x1CF9
1061 | 0x1DC0..=0x1DF9
1062 | 0x1DFB..=0x1DFF
1063 | 0x200B..=0x200F | 0x202A..=0x202E | 0x2060..=0x2064 | 0x2066..=0x206F | 0x20D0..=0x20F0 | 0xFE00..=0xFE0F | 0xFE20..=0xFE2F | 0xFEFF | 0xFFF9..=0xFFFB | 0x1D167..=0x1D169
1073 | 0x1D173..=0x1D182
1074 | 0x1D185..=0x1D18B
1075 | 0x1D1AA..=0x1D1AD
1076 | 0x1D242..=0x1D244
1077 | 0xE0001
1078 | 0xE0020..=0xE007F
1079 | 0xE0100..=0xE01EF )
1081}
1082
1083#[inline]
1086fn is_wide_char(cp: u32) -> bool {
1087 matches!(
1088 cp,
1089 0x1100..=0x115F | 0x231A..=0x231B | 0x2329..=0x232A | 0x23E9..=0x23F3 | 0x23F8..=0x23FA
1094 | 0x25FD..=0x25FE
1095 | 0x2614..=0x2615
1096 | 0x2648..=0x2653
1097 | 0x267F
1098 | 0x2693
1099 | 0x26A1
1100 | 0x26AA..=0x26AB
1101 | 0x26BD..=0x26BE
1102 | 0x26C4..=0x26C5
1103 | 0x26CE
1104 | 0x26D4
1105 | 0x26EA
1106 | 0x26F2..=0x26F3
1107 | 0x26F5
1108 | 0x26FA
1109 | 0x26FD
1110 | 0x2702
1111 | 0x2705
1112 | 0x2708..=0x270D
1113 | 0x270F
1114 | 0x2712
1115 | 0x2714
1116 | 0x2716
1117 | 0x271D
1118 | 0x2721
1119 | 0x2728
1120 | 0x2733..=0x2734
1121 | 0x2744
1122 | 0x2747
1123 | 0x274C
1124 | 0x274E
1125 | 0x2753..=0x2755
1126 | 0x2757
1127 | 0x2763..=0x2764
1128 | 0x2795..=0x2797
1129 | 0x27A1
1130 | 0x27B0
1131 | 0x27BF
1132 | 0x2934..=0x2935
1133 | 0x2B05..=0x2B07
1134 | 0x2B1B..=0x2B1C
1135 | 0x2B50
1136 | 0x2B55
1137 | 0x2E80..=0x303E | 0x3040..=0x33BF | 0x3400..=0x4DBF | 0x4E00..=0xA4CF | 0xA960..=0xA97C | 0xAC00..=0xD7A3 | 0xF900..=0xFAFF | 0xFE10..=0xFE19 | 0xFE30..=0xFE6F | 0xFF01..=0xFF60 | 0xFFE0..=0xFFE6 | 0x1F004
1149 | 0x1F0CF
1150 | 0x1F170..=0x1F171
1151 | 0x1F17E..=0x1F17F
1152 | 0x1F18E
1153 | 0x1F191..=0x1F19A
1154 | 0x1F1E0..=0x1F1FF | 0x1F200..=0x1F202
1156 | 0x1F210..=0x1F23B
1157 | 0x1F240..=0x1F248
1158 | 0x1F250..=0x1F251
1159 | 0x1F260..=0x1F265
1160 | 0x1F300..=0x1F64F | 0x1F680..=0x1F6FF | 0x1F900..=0x1F9FF | 0x1FA00..=0x1FA6F
1164 | 0x1FA70..=0x1FAFF
1165 | 0x20000..=0x2FFFD | 0x30000..=0x3FFFD )
1168}
1169
1170pub fn max_line_length_c(data: &[u8]) -> u64 {
1183 let mut max_len: u64 = 0;
1184 let mut line_len: u64 = 0;
1185 let mut linepos: u64 = 0;
1186 let mut i = 0;
1187 let len = data.len();
1188
1189 while i < len {
1190 let b = unsafe { *data.get_unchecked(i) };
1191 if b >= 0x21 && b <= 0x7E {
1192 i += 1;
1194 let mut run = 1u64;
1195 while i < len {
1196 let b = unsafe { *data.get_unchecked(i) };
1197 if b >= 0x21 && b <= 0x7E {
1198 run += 1;
1199 i += 1;
1200 } else {
1201 break;
1202 }
1203 }
1204 linepos += run;
1205 if linepos > line_len {
1206 line_len = linepos;
1207 }
1208 } else {
1209 match b {
1210 b' ' => {
1211 linepos += 1;
1212 if linepos > line_len {
1213 line_len = linepos;
1214 }
1215 }
1216 b'\n' => {
1217 if line_len > max_len {
1218 max_len = line_len;
1219 }
1220 linepos = 0;
1221 line_len = 0;
1222 }
1223 b'\t' => {
1224 linepos = (linepos + 8) & !7;
1225 if linepos > line_len {
1226 line_len = linepos;
1227 }
1228 }
1229 b'\r' => {
1230 linepos = 0;
1231 }
1232 0x0C => {
1233 if line_len > max_len {
1234 max_len = line_len;
1235 }
1236 linepos = 0;
1237 line_len = 0;
1238 }
1239 _ => {} }
1241 i += 1;
1242 }
1243 }
1244
1245 if line_len > max_len {
1246 max_len = line_len;
1247 }
1248
1249 max_len
1250}
1251
1252pub fn max_line_length_utf8(data: &[u8]) -> u64 {
1259 let mut max_len: u64 = 0;
1260 let mut line_len: u64 = 0;
1261 let mut linepos: u64 = 0;
1262 let mut i = 0;
1263 let len = data.len();
1264
1265 while i < len {
1266 let b = unsafe { *data.get_unchecked(i) };
1267
1268 if b >= 0x21 && b <= 0x7E {
1269 i += 1;
1271 let mut run = 1u64;
1272 while i < len {
1273 let b = unsafe { *data.get_unchecked(i) };
1274 if b >= 0x21 && b <= 0x7E {
1275 run += 1;
1276 i += 1;
1277 } else {
1278 break;
1279 }
1280 }
1281 linepos += run;
1282 if linepos > line_len {
1283 line_len = linepos;
1284 }
1285 } else if b < 0x80 {
1286 match b {
1288 b' ' => {
1289 linepos += 1;
1290 if linepos > line_len {
1291 line_len = linepos;
1292 }
1293 }
1294 b'\n' => {
1295 if line_len > max_len {
1296 max_len = line_len;
1297 }
1298 linepos = 0;
1299 line_len = 0;
1300 }
1301 b'\t' => {
1302 linepos = (linepos + 8) & !7;
1303 if linepos > line_len {
1304 line_len = linepos;
1305 }
1306 }
1307 b'\r' => {
1308 linepos = 0;
1309 }
1310 0x0C => {
1311 if line_len > max_len {
1312 max_len = line_len;
1313 }
1314 linepos = 0;
1315 line_len = 0;
1316 }
1317 _ => {} }
1319 i += 1;
1320 } else {
1321 let (cp, len) = decode_utf8(&data[i..]);
1323
1324 if cp <= 0x9F {
1326 } else if is_zero_width(cp) {
1328 } else if is_wide_char(cp) {
1330 linepos += 2;
1331 if linepos > line_len {
1332 line_len = linepos;
1333 }
1334 } else {
1335 linepos += 1;
1337 if linepos > line_len {
1338 line_len = linepos;
1339 }
1340 }
1341 i += len;
1342 }
1343 }
1344
1345 if line_len > max_len {
1347 max_len = line_len;
1348 }
1349
1350 max_len
1351}
1352
1353#[inline]
1355pub fn max_line_length(data: &[u8], utf8: bool) -> u64 {
1356 if utf8 {
1357 max_line_length_utf8(data)
1358 } else {
1359 max_line_length_c(data)
1360 }
1361}
1362
1363pub fn count_all(data: &[u8], utf8: bool) -> WcCounts {
1375 if utf8 {
1376 let (lines, words) = count_lines_words_utf8_fused(data);
1377 WcCounts {
1378 lines,
1379 words,
1380 bytes: data.len() as u64,
1381 chars: count_chars_utf8(data),
1382 max_line_length: max_line_length_utf8(data),
1383 }
1384 } else {
1385 WcCounts {
1386 lines: count_lines(data),
1387 words: count_words_locale(data, false),
1388 bytes: data.len() as u64,
1389 chars: data.len() as u64,
1390 max_line_length: max_line_length_c(data),
1391 }
1392 }
1393}
1394
1395#[inline]
1399fn check_ascii_sample(data: &[u8]) -> bool {
1400 let len = data.len();
1401 if len == 0 {
1402 return true;
1403 }
1404
1405 let check_region = |start: usize, end: usize| -> bool {
1407 let mut or_acc = 0u8;
1408 let region = &data[start..end];
1409 let mut i = 0;
1410 while i + 8 <= region.len() {
1411 unsafe {
1412 or_acc |= *region.get_unchecked(i);
1413 or_acc |= *region.get_unchecked(i + 1);
1414 or_acc |= *region.get_unchecked(i + 2);
1415 or_acc |= *region.get_unchecked(i + 3);
1416 or_acc |= *region.get_unchecked(i + 4);
1417 or_acc |= *region.get_unchecked(i + 5);
1418 or_acc |= *region.get_unchecked(i + 6);
1419 or_acc |= *region.get_unchecked(i + 7);
1420 }
1421 i += 8;
1422 }
1423 while i < region.len() {
1424 or_acc |= region[i];
1425 i += 1;
1426 }
1427 or_acc < 0x80
1428 };
1429
1430 let sample = 256.min(len);
1431
1432 if !check_region(0, sample) {
1434 return false;
1435 }
1436 if len > sample * 2 {
1438 let mid = len / 2;
1439 let mid_start = mid.saturating_sub(sample / 2);
1440 if !check_region(mid_start, (mid_start + sample).min(len)) {
1441 return false;
1442 }
1443 }
1444 if len > sample {
1446 if !check_region(len - sample, len) {
1447 return false;
1448 }
1449 }
1450
1451 true
1452}
1453
1454fn split_at_newlines(data: &[u8], num_chunks: usize) -> Vec<&[u8]> {
1463 if data.is_empty() || num_chunks <= 1 {
1464 return vec![data];
1465 }
1466 let chunk_size = data.len() / num_chunks;
1467 let mut chunks = Vec::with_capacity(num_chunks);
1468 let mut pos = 0;
1469
1470 for _ in 0..num_chunks - 1 {
1471 let target = pos + chunk_size;
1472 if target >= data.len() {
1473 break;
1474 }
1475 let boundary = memchr::memchr(b'\n', &data[target..])
1476 .map(|p| target + p + 1)
1477 .unwrap_or(data.len());
1478 if boundary > pos {
1479 chunks.push(&data[pos..boundary]);
1480 }
1481 pos = boundary;
1482 }
1483 if pos < data.len() {
1484 chunks.push(&data[pos..]);
1485 }
1486 chunks
1487}
1488
1489pub fn count_lines_parallel(data: &[u8]) -> u64 {
1492 if data.len() < PARALLEL_THRESHOLD {
1493 return count_lines(data);
1494 }
1495
1496 let num_threads = rayon::current_num_threads().max(1);
1497 let chunk_size = (data.len() / num_threads).max(2 * 1024 * 1024);
1499
1500 data.par_chunks(chunk_size)
1501 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
1502 .sum()
1503}
1504
1505pub fn count_words_parallel(data: &[u8], utf8: bool) -> u64 {
1507 if data.len() < PARALLEL_THRESHOLD {
1508 return count_words_locale(data, utf8);
1509 }
1510
1511 let num_threads = rayon::current_num_threads().max(1);
1512
1513 if utf8 {
1514 let chunks = split_at_newlines(data, num_threads);
1517 chunks.par_iter().map(|chunk| count_words_utf8(chunk)).sum()
1518 } else {
1519 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1521
1522 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1523
1524 let results: Vec<(u64, u64, bool, bool)> = chunks
1526 .par_iter()
1527 .map(|chunk| count_lw_c_chunk(chunk))
1528 .collect();
1529
1530 let mut total = 0u64;
1531 for i in 0..results.len() {
1532 total += results[i].1;
1533 if i > 0 && results[i - 1].3 && results[i].2 {
1537 total -= 1;
1538 }
1539 }
1540 total
1541 }
1542}
1543
1544pub fn count_chars_parallel(data: &[u8], utf8: bool) -> u64 {
1546 if !utf8 {
1547 return data.len() as u64;
1548 }
1549 if data.len() < PARALLEL_THRESHOLD {
1550 return count_chars_utf8(data);
1551 }
1552
1553 let num_threads = rayon::current_num_threads().max(1);
1554 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1555
1556 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
1557}
1558
1559pub fn count_lwb(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1562 let (lines, words) = count_lines_words(data, utf8);
1563 (lines, words, data.len() as u64)
1564}
1565
1566pub fn count_lwb_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1572 if data.len() < PARALLEL_THRESHOLD {
1573 return count_lwb(data, utf8);
1575 }
1576
1577 let num_threads = rayon::current_num_threads().max(1);
1578
1579 let (lines, words) = if !utf8 {
1580 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1582
1583 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1584 let results: Vec<(u64, u64, bool, bool)> = chunks
1585 .par_iter()
1586 .map(|chunk| count_lw_c_chunk_fast(chunk))
1587 .collect();
1588
1589 let mut line_total = 0u64;
1590 let mut word_total = 0u64;
1591 for i in 0..results.len() {
1592 line_total += results[i].0;
1593 word_total += results[i].1;
1594 if i > 0 && results[i - 1].3 && results[i].2 {
1595 word_total -= 1;
1596 }
1597 }
1598
1599 (line_total, word_total)
1600 } else {
1601 let is_ascii = check_ascii_sample(data);
1603 if is_ascii {
1604 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1606 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1607 let results: Vec<(u64, u64, bool, bool)> = chunks
1608 .par_iter()
1609 .map(|chunk| count_lw_c_chunk_fast(chunk))
1610 .collect();
1611
1612 let mut line_total = 0u64;
1613 let mut word_total = 0u64;
1614 for i in 0..results.len() {
1615 line_total += results[i].0;
1616 word_total += results[i].1;
1617 if i > 0 && results[i - 1].3 && results[i].2 {
1618 word_total -= 1;
1619 }
1620 }
1621 (line_total, word_total)
1622 } else {
1623 let chunks = split_at_newlines(data, num_threads);
1626 let results: Vec<(u64, u64)> = chunks
1627 .par_iter()
1628 .map(|chunk| count_lines_words_utf8_fused(chunk))
1629 .collect();
1630 let mut line_total = 0u64;
1631 let mut word_total = 0u64;
1632 for (l, w) in results {
1633 line_total += l;
1634 word_total += w;
1635 }
1636 (line_total, word_total)
1637 }
1638 };
1639
1640 (lines, words, data.len() as u64)
1641}
1642
1643pub fn count_lwc_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1647 if data.len() < PARALLEL_THRESHOLD {
1648 let lines = count_lines(data);
1649 let words = count_words_locale(data, utf8);
1650 let chars = count_chars(data, utf8);
1651 return (lines, words, chars);
1652 }
1653
1654 let num_threads = rayon::current_num_threads().max(1);
1655
1656 if utf8 {
1657 let chunks = split_at_newlines(data, num_threads);
1659 let results: Vec<(u64, u64, u64)> = chunks
1660 .par_iter()
1661 .map(|chunk| {
1662 let (lines, words) = count_lines_words_utf8_fused(chunk);
1663 let chars = count_chars_utf8(chunk);
1664 (lines, words, chars)
1665 })
1666 .collect();
1667 let mut lines = 0u64;
1668 let mut words = 0u64;
1669 let mut chars = 0u64;
1670 for (l, w, c) in results {
1671 lines += l;
1672 words += w;
1673 chars += c;
1674 }
1675 (lines, words, chars)
1676 } else {
1677 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1679 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1680 let results: Vec<(u64, u64, bool, bool)> = chunks
1681 .par_iter()
1682 .map(|chunk| count_lw_c_chunk_fast(chunk))
1683 .collect();
1684 let mut lines = 0u64;
1685 let mut words = 0u64;
1686 for i in 0..results.len() {
1687 lines += results[i].0;
1688 words += results[i].1;
1689 if i > 0 && results[i - 1].3 && results[i].2 {
1690 words -= 1;
1691 }
1692 }
1693 (lines, words, data.len() as u64)
1694 }
1695}
1696
1697pub fn max_line_length_parallel(data: &[u8], utf8: bool) -> u64 {
1701 if data.len() < PARALLEL_THRESHOLD {
1702 return max_line_length(data, utf8);
1703 }
1704 let num_threads = rayon::current_num_threads().max(1);
1705 let chunks = split_at_newlines(data, num_threads);
1706 chunks
1707 .par_iter()
1708 .map(|chunk| {
1709 if utf8 {
1710 max_line_length_utf8(chunk)
1711 } else {
1712 max_line_length_c(chunk)
1713 }
1714 })
1715 .max()
1716 .unwrap_or(0)
1717}
1718
1719pub fn count_all_parallel(data: &[u8], utf8: bool) -> WcCounts {
1723 if data.len() < PARALLEL_THRESHOLD {
1724 return count_all(data, utf8);
1725 }
1726
1727 let num_threads = rayon::current_num_threads().max(1);
1728 let chunks = split_at_newlines(data, num_threads);
1729
1730 if utf8 {
1731 let results: Vec<(u64, u64, u64, u64)> = chunks
1732 .par_iter()
1733 .map(|chunk| {
1734 let (lines, words) = count_lines_words_utf8_fused(chunk);
1735 let chars = count_chars_utf8(chunk);
1736 let max_ll = max_line_length_utf8(chunk);
1737 (lines, words, chars, max_ll)
1738 })
1739 .collect();
1740
1741 let mut counts = WcCounts {
1742 bytes: data.len() as u64,
1743 ..Default::default()
1744 };
1745 for (l, w, c, m) in results {
1746 counts.lines += l;
1747 counts.words += w;
1748 counts.chars += c;
1749 if m > counts.max_line_length {
1750 counts.max_line_length = m;
1751 }
1752 }
1753 counts
1754 } else {
1755 let results: Vec<(u64, u64, u64)> = chunks
1757 .par_iter()
1758 .map(|chunk| {
1759 let (lines, words) = count_lines_words(chunk, false);
1760 let max_ll = max_line_length_c(chunk);
1761 (lines, words, max_ll)
1762 })
1763 .collect();
1764
1765 let mut counts = WcCounts {
1766 bytes: data.len() as u64,
1767 chars: data.len() as u64,
1768 ..Default::default()
1769 };
1770 for (l, w, m) in &results {
1771 counts.lines += l;
1772 counts.words += w;
1773 if *m > counts.max_line_length {
1774 counts.max_line_length = *m;
1775 }
1776 }
1777 counts
1778 }
1779}