1use memchr::memchr_iter;
2use rayon::prelude::*;
3
4const PARALLEL_THRESHOLD: usize = 1024 * 1024;
8
9#[derive(Debug, Clone, Default, PartialEq, Eq)]
11pub struct WcCounts {
12 pub lines: u64,
13 pub words: u64,
14 pub bytes: u64,
15 pub chars: u64,
16 pub max_line_length: u64,
17}
18
19const fn make_byte_class_c() -> [u8; 256] {
42 let mut t = [0u8; 256]; t[0x09] = 1; t[0x0A] = 1; t[0x0B] = 1; t[0x0C] = 1; t[0x0D] = 1; t[0x20] = 1; let mut b = 0x21u16;
52 while b <= 0x7E {
53 t[b as usize] = 2;
54 b += 1;
55 }
56 t
57}
58const BYTE_CLASS_C: [u8; 256] = make_byte_class_c();
59
60#[inline]
65pub(crate) fn first_is_word_c(data: &[u8]) -> bool {
66 for &b in data {
67 let class = BYTE_CLASS_C[b as usize];
68 if class != 0 {
69 return class == 2;
70 }
71 }
72 false }
74
75#[inline]
82fn is_unicode_space(cp: u32) -> bool {
83 matches!(
84 cp,
85 0x00A0 | 0x1680 | 0x2000
88 ..=0x200A | 0x2028 | 0x2029 | 0x202F | 0x205F | 0x3000 )
95}
96
97#[inline]
106pub fn count_lines(data: &[u8]) -> u64 {
107 memchr_iter(b'\n', data).count() as u64
108}
109
110#[inline]
112pub fn count_bytes(data: &[u8]) -> u64 {
113 data.len() as u64
114}
115
116pub fn count_words(data: &[u8]) -> u64 {
118 count_words_locale(data, true)
119}
120
121pub fn count_words_locale(data: &[u8], utf8: bool) -> u64 {
127 if utf8 {
128 count_words_utf8(data)
129 } else {
130 count_words_c(data)
131 }
132}
133
134fn count_words_c(data: &[u8]) -> u64 {
140 let mut words = 0u64;
141 let mut in_word = false;
142 let mut i = 0;
143 let len = data.len();
144
145 while i < len {
146 let b = unsafe { *data.get_unchecked(i) };
147 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
148 if class == 1 {
149 in_word = false;
151 } else if class == 2 {
152 if !in_word {
154 in_word = true;
155 words += 1;
156 }
157 }
158 i += 1;
160 }
161 words
162}
163
164#[cfg(target_arch = "x86_64")]
168#[inline(always)]
169fn count_lw_c_scalar_tail(
170 ptr: *const u8,
171 mut i: usize,
172 len: usize,
173 mut total_lines: u64,
174 mut total_words: u64,
175 mut prev_in_word: bool,
176 data: &[u8],
177) -> (u64, u64, bool, bool) {
178 while i < len {
179 let b = unsafe { *ptr.add(i) };
180 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
181 if class == 1 {
182 if b == b'\n' {
184 total_lines += 1;
185 }
186 prev_in_word = false;
187 } else if class == 2 && !prev_in_word {
188 total_words += 1;
190 prev_in_word = true;
191 }
192 i += 1;
194 }
195 let first_is_word = first_is_word_c(data);
196 (total_lines, total_words, first_is_word, prev_in_word)
197}
198
199#[cfg(target_arch = "x86_64")]
206#[target_feature(enable = "avx2")]
207unsafe fn count_lw_c_chunk_avx2(data: &[u8]) -> (u64, u64, bool, bool) {
208 use std::arch::x86_64::*;
209
210 let len = data.len();
211 let ptr = data.as_ptr();
212 let mut i = 0usize;
213 let mut total_lines = 0u64;
214 let mut total_words = 0u64;
215 let mut prev_in_word = false;
216
217 unsafe {
218 let nl_byte = _mm256_set1_epi8(b'\n' as i8);
219 let zero = _mm256_setzero_si256();
220 let ones = _mm256_set1_epi8(1);
221 let const_0x21 = _mm256_set1_epi8(0x21u8 as i8);
223 let const_0x7e = _mm256_set1_epi8(0x7Eu8 as i8);
224 let const_0x09 = _mm256_set1_epi8(0x09u8 as i8);
226 let const_0x0d = _mm256_set1_epi8(0x0Du8 as i8);
227 let const_0x20 = _mm256_set1_epi8(0x20u8 as i8);
228
229 let mut line_acc = _mm256_setzero_si256();
230 let mut batch = 0u32;
231
232 while i + 32 <= len {
233 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
234 let is_nl = _mm256_cmpeq_epi8(v, nl_byte);
235 line_acc = _mm256_add_epi8(line_acc, _mm256_and_si256(is_nl, ones));
236
237 let ge_21 = _mm256_cmpeq_epi8(_mm256_max_epu8(v, const_0x21), v);
239 let le_7e = _mm256_cmpeq_epi8(_mm256_min_epu8(v, const_0x7e), v);
240 let is_printable = _mm256_and_si256(ge_21, le_7e);
241 let word_mask = _mm256_movemask_epi8(is_printable) as u32;
242
243 let ge_09 = _mm256_cmpeq_epi8(_mm256_max_epu8(v, const_0x09), v);
245 let le_0d = _mm256_cmpeq_epi8(_mm256_min_epu8(v, const_0x0d), v);
246 let in_tab_range = _mm256_and_si256(ge_09, le_0d);
247 let is_space = _mm256_cmpeq_epi8(v, const_0x20);
248 let is_break = _mm256_or_si256(in_tab_range, is_space);
249 let break_mask = _mm256_movemask_epi8(is_break) as u32;
250
251 let transparent = !break_mask & !word_mask;
256 let mut carry = word_mask | if prev_in_word { 1u32 } else { 0u32 };
257 carry &= !break_mask;
258 let mut pass = transparent;
259 carry |= (carry << 1) & pass;
260 pass &= pass << 1;
261 carry |= (carry << 2) & pass;
262 pass &= pass << 2;
263 carry |= (carry << 4) & pass;
264 pass &= pass << 4;
265 carry |= (carry << 8) & pass;
266 pass &= pass << 8;
267 carry |= (carry << 16) & pass;
268 let prev_carry = (carry << 1) | if prev_in_word { 1u32 } else { 0u32 };
269 let starts = word_mask & !prev_carry;
270 total_words += starts.count_ones() as u64;
271 prev_in_word = (carry >> 31) & 1 == 1;
272
273 batch += 1;
274 if batch >= 255 {
275 let sad = _mm256_sad_epu8(line_acc, zero);
276 let hi = _mm256_extracti128_si256(sad, 1);
277 let lo = _mm256_castsi256_si128(sad);
278 let s = _mm_add_epi64(lo, hi);
279 let h64 = _mm_unpackhi_epi64(s, s);
280 let t = _mm_add_epi64(s, h64);
281 total_lines += _mm_cvtsi128_si64(t) as u64;
282 line_acc = _mm256_setzero_si256();
283 batch = 0;
284 }
285 i += 32;
286 }
287
288 if batch > 0 {
289 let sad = _mm256_sad_epu8(line_acc, zero);
290 let hi = _mm256_extracti128_si256(sad, 1);
291 let lo = _mm256_castsi256_si128(sad);
292 let s = _mm_add_epi64(lo, hi);
293 let h64 = _mm_unpackhi_epi64(s, s);
294 let t = _mm_add_epi64(s, h64);
295 total_lines += _mm_cvtsi128_si64(t) as u64;
296 }
297 }
298
299 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
300}
301
302#[cfg(target_arch = "x86_64")]
305#[target_feature(enable = "sse2")]
306unsafe fn count_lw_c_chunk_sse2(data: &[u8]) -> (u64, u64, bool, bool) {
307 use std::arch::x86_64::*;
308
309 let len = data.len();
310 let ptr = data.as_ptr();
311 let mut i = 0usize;
312 let mut total_lines = 0u64;
313 let mut total_words = 0u64;
314 let mut prev_in_word = false;
315
316 unsafe {
317 let nl_byte = _mm_set1_epi8(b'\n' as i8);
318 let zero = _mm_setzero_si128();
319 let ones = _mm_set1_epi8(1);
320 let const_0x21 = _mm_set1_epi8(0x21u8 as i8);
322 let const_0x7e = _mm_set1_epi8(0x7Eu8 as i8);
323 let const_0x09 = _mm_set1_epi8(0x09u8 as i8);
325 let const_0x0d = _mm_set1_epi8(0x0Du8 as i8);
326 let const_0x20 = _mm_set1_epi8(0x20u8 as i8);
327
328 let mut line_acc = _mm_setzero_si128();
329 let mut batch = 0u32;
330
331 while i + 16 <= len {
332 let v = _mm_loadu_si128(ptr.add(i) as *const __m128i);
333 let is_nl = _mm_cmpeq_epi8(v, nl_byte);
334 line_acc = _mm_add_epi8(line_acc, _mm_and_si128(is_nl, ones));
335
336 let ge_21 = _mm_cmpeq_epi8(_mm_max_epu8(v, const_0x21), v);
338 let le_7e = _mm_cmpeq_epi8(_mm_min_epu8(v, const_0x7e), v);
339 let is_printable = _mm_and_si128(ge_21, le_7e);
340 let word_mask = (_mm_movemask_epi8(is_printable) as u32) & 0xFFFF;
341
342 let ge_09 = _mm_cmpeq_epi8(_mm_max_epu8(v, const_0x09), v);
344 let le_0d = _mm_cmpeq_epi8(_mm_min_epu8(v, const_0x0d), v);
345 let in_tab_range = _mm_and_si128(ge_09, le_0d);
346 let is_space = _mm_cmpeq_epi8(v, const_0x20);
347 let is_break = _mm_or_si128(in_tab_range, is_space);
348 let break_mask = (_mm_movemask_epi8(is_break) as u32) & 0xFFFF;
349
350 let transparent = !break_mask & !word_mask & 0xFFFF;
352 let mut carry = (word_mask | if prev_in_word { 1u32 } else { 0u32 }) & 0xFFFF;
353 carry &= !break_mask;
354 let mut pass = transparent;
355 carry |= (carry << 1) & pass;
356 pass &= pass << 1;
357 carry |= (carry << 2) & pass;
358 pass &= pass << 2;
359 carry |= (carry << 4) & pass;
360 pass &= pass << 4;
361 carry |= (carry << 8) & pass;
362 let prev_carry = ((carry << 1) | if prev_in_word { 1u32 } else { 0u32 }) & 0xFFFF;
363 let starts = word_mask & !prev_carry & 0xFFFF;
364 total_words += starts.count_ones() as u64;
365 prev_in_word = (carry >> 15) & 1 == 1;
366
367 batch += 1;
368 if batch >= 255 {
369 let sad = _mm_sad_epu8(line_acc, zero);
370 let hi = _mm_unpackhi_epi64(sad, sad);
371 let t = _mm_add_epi64(sad, hi);
372 total_lines += _mm_cvtsi128_si64(t) as u64;
373 line_acc = _mm_setzero_si128();
374 batch = 0;
375 }
376 i += 16;
377 }
378
379 if batch > 0 {
380 let sad = _mm_sad_epu8(line_acc, zero);
381 let hi = _mm_unpackhi_epi64(sad, sad);
382 let t = _mm_add_epi64(sad, hi);
383 total_lines += _mm_cvtsi128_si64(t) as u64;
384 }
385 }
386
387 count_lw_c_scalar_tail(ptr, i, len, total_lines, total_words, prev_in_word, data)
388}
389
390#[inline]
392fn count_lw_c_chunk_fast(data: &[u8]) -> (u64, u64, bool, bool) {
393 #[cfg(target_arch = "x86_64")]
394 {
395 if is_x86_feature_detected!("avx2") && data.len() >= 64 {
396 return unsafe { count_lw_c_chunk_avx2(data) };
397 }
398 if data.len() >= 32 {
399 return unsafe { count_lw_c_chunk_sse2(data) };
400 }
401 }
402 count_lw_c_chunk(data)
403}
404
405fn count_lw_c_chunk(data: &[u8]) -> (u64, u64, bool, bool) {
410 let mut lines = 0u64;
411 let mut words = 0u64;
412 let mut in_word = false;
413 let mut i = 0;
414 let len = data.len();
415
416 let first_is_word = first_is_word_c(data);
418
419 while i < len {
420 let b = unsafe { *data.get_unchecked(i) };
421 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
422 if class == 1 {
423 if b == b'\n' {
425 lines += 1;
426 }
427 in_word = false;
428 } else if class == 2 && !in_word {
429 in_word = true;
431 words += 1;
432 }
433 i += 1;
435 }
436 (lines, words, first_is_word, in_word)
437}
438
439fn count_words_utf8(data: &[u8]) -> u64 {
452 let mut words = 0u64;
453 let mut in_word = false;
454 let mut i = 0;
455 let len = data.len();
456
457 while i < len {
458 let b = unsafe { *data.get_unchecked(i) };
459
460 if b < 0x80 {
461 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
464 if class == 1 {
465 in_word = false;
466 } else if class == 2 && !in_word {
467 in_word = true;
468 words += 1;
469 }
470 i += 1;
472 } else if b < 0xC2 {
473 i += 1;
477 } else if b < 0xE0 {
478 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
479 let cp = ((b as u32 & 0x1F) << 6)
480 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
481 if is_unicode_space(cp) {
482 in_word = false;
483 } else if !in_word {
484 in_word = true;
485 words += 1;
486 }
487 i += 2;
488 } else {
489 i += 1;
491 }
492 } else if b < 0xF0 {
493 if i + 2 < len
494 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
495 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
496 {
497 let cp = ((b as u32 & 0x0F) << 12)
498 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
499 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
500 if is_unicode_space(cp) {
501 in_word = false;
502 } else if !in_word {
503 in_word = true;
504 words += 1;
505 }
506 i += 3;
507 } else {
508 i += 1;
510 }
511 } else if b < 0xF5 {
512 if i + 3 < len
513 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
514 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
515 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
516 {
517 let cp = ((b as u32 & 0x07) << 18)
518 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
519 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
520 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
521 if is_unicode_space(cp) {
522 in_word = false;
523 } else if !in_word {
524 in_word = true;
525 words += 1;
526 }
527 i += 4;
528 } else {
529 i += 1;
531 }
532 } else {
533 i += 1;
535 }
536 }
537
538 words
539}
540
541pub fn count_lines_words(data: &[u8], utf8: bool) -> (u64, u64) {
545 if utf8 {
546 count_lines_words_utf8_fused(data)
547 } else {
548 let (lines, words, _, _) = count_lw_c_chunk_fast(data);
549 (lines, words)
550 }
551}
552
553fn count_lines_words_utf8_fused(data: &[u8]) -> (u64, u64) {
561 let mut lines = 0u64;
562 let mut words = 0u64;
563 let mut in_word = false;
564 let mut i = 0;
565 let len = data.len();
566
567 while i < len {
568 let b = unsafe { *data.get_unchecked(i) };
569
570 if b == b'\n' {
571 lines += 1;
572 in_word = false;
573 i += 1;
574 } else if b < 0x80 {
575 let class = unsafe { *BYTE_CLASS_C.get_unchecked(b as usize) };
577 if class == 1 {
578 in_word = false;
579 } else if class == 2 && !in_word {
580 in_word = true;
581 words += 1;
582 }
583 i += 1;
585 } else if b < 0xC2 {
586 i += 1;
588 } else if b < 0xE0 {
589 if i + 1 < len && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80 {
590 let cp = ((b as u32 & 0x1F) << 6)
591 | (unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F);
592 if is_unicode_space(cp) {
593 in_word = false;
594 } else if !in_word {
595 in_word = true;
596 words += 1;
597 }
598 i += 2;
599 } else {
600 i += 1;
602 }
603 } else if b < 0xF0 {
604 if i + 2 < len
605 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
606 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
607 {
608 let cp = ((b as u32 & 0x0F) << 12)
609 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 6)
610 | (unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F);
611 if is_unicode_space(cp) {
612 in_word = false;
613 } else if !in_word {
614 in_word = true;
615 words += 1;
616 }
617 i += 3;
618 } else {
619 i += 1;
621 }
622 } else if b < 0xF5 {
623 if i + 3 < len
624 && (unsafe { *data.get_unchecked(i + 1) } & 0xC0) == 0x80
625 && (unsafe { *data.get_unchecked(i + 2) } & 0xC0) == 0x80
626 && (unsafe { *data.get_unchecked(i + 3) } & 0xC0) == 0x80
627 {
628 let cp = ((b as u32 & 0x07) << 18)
629 | ((unsafe { *data.get_unchecked(i + 1) } as u32 & 0x3F) << 12)
630 | ((unsafe { *data.get_unchecked(i + 2) } as u32 & 0x3F) << 6)
631 | (unsafe { *data.get_unchecked(i + 3) } as u32 & 0x3F);
632 if is_unicode_space(cp) {
633 in_word = false;
634 } else if !in_word {
635 in_word = true;
636 words += 1;
637 }
638 i += 4;
639 } else {
640 i += 1;
642 }
643 } else {
644 i += 1;
646 }
647 }
648
649 (lines, words)
650}
651
652pub fn count_lines_words_chars(data: &[u8], utf8: bool) -> (u64, u64, u64) {
654 if utf8 {
655 let (lines, words) = count_lines_words_utf8_fused(data);
657 let chars = count_chars_utf8(data);
658 (lines, words, chars)
659 } else {
660 let (lines, words) = count_lines_words(data, false);
662 (lines, words, data.len() as u64)
663 }
664}
665
666pub fn count_chars_utf8(data: &[u8]) -> u64 {
673 #[cfg(target_arch = "x86_64")]
674 {
675 if is_x86_feature_detected!("avx2") {
676 return unsafe { count_chars_utf8_avx2(data) };
677 }
678 }
679 count_chars_utf8_scalar(data)
680}
681
682#[cfg(target_arch = "x86_64")]
686#[target_feature(enable = "avx2")]
687unsafe fn count_chars_utf8_avx2(data: &[u8]) -> u64 {
688 unsafe {
689 use std::arch::x86_64::*;
690
691 let mask_c0 = _mm256_set1_epi8(0xC0u8 as i8);
692 let val_80 = _mm256_set1_epi8(0x80u8 as i8);
693 let ones = _mm256_set1_epi8(1);
694 let zero = _mm256_setzero_si256();
695
696 let mut total = 0u64;
697 let len = data.len();
698 let ptr = data.as_ptr();
699 let mut i = 0;
700 let mut acc = _mm256_setzero_si256();
701 let mut batch = 0u32;
702
703 while i + 32 <= len {
704 let v = _mm256_loadu_si256(ptr.add(i) as *const __m256i);
705 let masked = _mm256_and_si256(v, mask_c0);
706 let is_cont = _mm256_cmpeq_epi8(masked, val_80);
707 let non_cont = _mm256_andnot_si256(is_cont, ones);
708 acc = _mm256_add_epi8(acc, non_cont);
709
710 batch += 1;
711 if batch >= 255 {
712 let sad = _mm256_sad_epu8(acc, zero);
714 let hi = _mm256_extracti128_si256(sad, 1);
715 let lo = _mm256_castsi256_si128(sad);
716 let sum = _mm_add_epi64(lo, hi);
717 let hi64 = _mm_unpackhi_epi64(sum, sum);
718 let t = _mm_add_epi64(sum, hi64);
719 total += _mm_cvtsi128_si64(t) as u64;
720 acc = _mm256_setzero_si256();
721 batch = 0;
722 }
723 i += 32;
724 }
725
726 if batch > 0 {
728 let sad = _mm256_sad_epu8(acc, zero);
729 let hi = _mm256_extracti128_si256(sad, 1);
730 let lo = _mm256_castsi256_si128(sad);
731 let sum = _mm_add_epi64(lo, hi);
732 let hi64 = _mm_unpackhi_epi64(sum, sum);
733 let t = _mm_add_epi64(sum, hi64);
734 total += _mm_cvtsi128_si64(t) as u64;
735 }
736
737 while i < len {
738 total += ((*ptr.add(i) & 0xC0) != 0x80) as u64;
739 i += 1;
740 }
741
742 total
743 }
744}
745
746fn count_chars_utf8_scalar(data: &[u8]) -> u64 {
748 let mut count = 0u64;
749 let chunks = data.chunks_exact(64);
750 let remainder = chunks.remainder();
751
752 for chunk in chunks {
753 let mut any_high = 0u8;
755 let mut i = 0;
756 while i + 8 <= 64 {
757 unsafe {
758 any_high |= *chunk.get_unchecked(i);
759 any_high |= *chunk.get_unchecked(i + 1);
760 any_high |= *chunk.get_unchecked(i + 2);
761 any_high |= *chunk.get_unchecked(i + 3);
762 any_high |= *chunk.get_unchecked(i + 4);
763 any_high |= *chunk.get_unchecked(i + 5);
764 any_high |= *chunk.get_unchecked(i + 6);
765 any_high |= *chunk.get_unchecked(i + 7);
766 }
767 i += 8;
768 }
769 if any_high < 0x80 {
770 count += 64;
771 continue;
772 }
773
774 let mut char_mask = 0u64;
775 i = 0;
776 while i + 7 < 64 {
777 unsafe {
778 char_mask |= (((*chunk.get_unchecked(i) & 0xC0) != 0x80) as u64) << i;
779 char_mask |= (((*chunk.get_unchecked(i + 1) & 0xC0) != 0x80) as u64) << (i + 1);
780 char_mask |= (((*chunk.get_unchecked(i + 2) & 0xC0) != 0x80) as u64) << (i + 2);
781 char_mask |= (((*chunk.get_unchecked(i + 3) & 0xC0) != 0x80) as u64) << (i + 3);
782 char_mask |= (((*chunk.get_unchecked(i + 4) & 0xC0) != 0x80) as u64) << (i + 4);
783 char_mask |= (((*chunk.get_unchecked(i + 5) & 0xC0) != 0x80) as u64) << (i + 5);
784 char_mask |= (((*chunk.get_unchecked(i + 6) & 0xC0) != 0x80) as u64) << (i + 6);
785 char_mask |= (((*chunk.get_unchecked(i + 7) & 0xC0) != 0x80) as u64) << (i + 7);
786 }
787 i += 8;
788 }
789 count += char_mask.count_ones() as u64;
790 }
791
792 for &b in remainder {
793 count += ((b & 0xC0) != 0x80) as u64;
794 }
795 count
796}
797
798#[inline]
800pub fn count_chars_c(data: &[u8]) -> u64 {
801 data.len() as u64
802}
803
804#[inline]
806pub fn count_chars(data: &[u8], utf8: bool) -> u64 {
807 if utf8 {
808 count_chars_utf8(data)
809 } else {
810 count_chars_c(data)
811 }
812}
813
814pub fn is_utf8_locale() -> bool {
816 for var in &["LC_ALL", "LC_CTYPE", "LANG"] {
817 if let Ok(val) = std::env::var(var) {
818 if !val.is_empty() {
819 let lower = val.to_ascii_lowercase();
820 return lower.contains("utf-8") || lower.contains("utf8");
821 }
822 }
823 }
824 false
825}
826
827#[inline]
830fn decode_utf8(bytes: &[u8]) -> (u32, usize) {
831 let b0 = bytes[0];
832 if b0 < 0x80 {
833 return (b0 as u32, 1);
834 }
835 if b0 < 0xC2 {
836 return (b0 as u32, 1);
838 }
839 if b0 < 0xE0 {
840 if bytes.len() < 2 || bytes[1] & 0xC0 != 0x80 {
841 return (b0 as u32, 1);
842 }
843 let cp = ((b0 as u32 & 0x1F) << 6) | (bytes[1] as u32 & 0x3F);
844 return (cp, 2);
845 }
846 if b0 < 0xF0 {
847 if bytes.len() < 3 || bytes[1] & 0xC0 != 0x80 || bytes[2] & 0xC0 != 0x80 {
848 return (b0 as u32, 1);
849 }
850 let cp =
851 ((b0 as u32 & 0x0F) << 12) | ((bytes[1] as u32 & 0x3F) << 6) | (bytes[2] as u32 & 0x3F);
852 return (cp, 3);
853 }
854 if b0 < 0xF5 {
855 if bytes.len() < 4
856 || bytes[1] & 0xC0 != 0x80
857 || bytes[2] & 0xC0 != 0x80
858 || bytes[3] & 0xC0 != 0x80
859 {
860 return (b0 as u32, 1);
861 }
862 let cp = ((b0 as u32 & 0x07) << 18)
863 | ((bytes[1] as u32 & 0x3F) << 12)
864 | ((bytes[2] as u32 & 0x3F) << 6)
865 | (bytes[3] as u32 & 0x3F);
866 return (cp, 4);
867 }
868 (b0 as u32, 1)
869}
870
871#[inline]
874fn is_zero_width(cp: u32) -> bool {
875 matches!(
876 cp,
877 0x0300..=0x036F | 0x0483..=0x0489 | 0x0591..=0x05BD | 0x05BF
881 | 0x05C1..=0x05C2
882 | 0x05C4..=0x05C5
883 | 0x05C7
884 | 0x0600..=0x0605 | 0x0610..=0x061A | 0x064B..=0x065F | 0x0670
888 | 0x06D6..=0x06DD
889 | 0x06DF..=0x06E4
890 | 0x06E7..=0x06E8
891 | 0x06EA..=0x06ED
892 | 0x070F
893 | 0x0711
894 | 0x0730..=0x074A
895 | 0x07A6..=0x07B0
896 | 0x07EB..=0x07F3
897 | 0x07FD
898 | 0x0816..=0x0819
899 | 0x081B..=0x0823
900 | 0x0825..=0x0827
901 | 0x0829..=0x082D
902 | 0x0859..=0x085B
903 | 0x08D3..=0x08E1
904 | 0x08E3..=0x0902
905 | 0x093A
906 | 0x093C
907 | 0x0941..=0x0948
908 | 0x094D
909 | 0x0951..=0x0957
910 | 0x0962..=0x0963
911 | 0x0981
912 | 0x09BC
913 | 0x09C1..=0x09C4
914 | 0x09CD
915 | 0x09E2..=0x09E3
916 | 0x09FE
917 | 0x0A01..=0x0A02
918 | 0x0A3C
919 | 0x0A41..=0x0A42
920 | 0x0A47..=0x0A48
921 | 0x0A4B..=0x0A4D
922 | 0x0A51
923 | 0x0A70..=0x0A71
924 | 0x0A75
925 | 0x0A81..=0x0A82
926 | 0x0ABC
927 | 0x0AC1..=0x0AC5
928 | 0x0AC7..=0x0AC8
929 | 0x0ACD
930 | 0x0AE2..=0x0AE3
931 | 0x0AFA..=0x0AFF
932 | 0x0B01
933 | 0x0B3C
934 | 0x0B3F
935 | 0x0B41..=0x0B44
936 | 0x0B4D
937 | 0x0B56
938 | 0x0B62..=0x0B63
939 | 0x0B82
940 | 0x0BC0
941 | 0x0BCD
942 | 0x0C00
943 | 0x0C04
944 | 0x0C3E..=0x0C40
945 | 0x0C46..=0x0C48
946 | 0x0C4A..=0x0C4D
947 | 0x0C55..=0x0C56
948 | 0x0C62..=0x0C63
949 | 0x0C81
950 | 0x0CBC
951 | 0x0CBF
952 | 0x0CC6
953 | 0x0CCC..=0x0CCD
954 | 0x0CE2..=0x0CE3
955 | 0x0D00..=0x0D01
956 | 0x0D3B..=0x0D3C
957 | 0x0D41..=0x0D44
958 | 0x0D4D
959 | 0x0D62..=0x0D63
960 | 0x0DCA
961 | 0x0DD2..=0x0DD4
962 | 0x0DD6
963 | 0x0E31
964 | 0x0E34..=0x0E3A
965 | 0x0E47..=0x0E4E
966 | 0x0EB1
967 | 0x0EB4..=0x0EBC
968 | 0x0EC8..=0x0ECD
969 | 0x0F18..=0x0F19
970 | 0x0F35
971 | 0x0F37
972 | 0x0F39
973 | 0x0F71..=0x0F7E
974 | 0x0F80..=0x0F84
975 | 0x0F86..=0x0F87
976 | 0x0F8D..=0x0F97
977 | 0x0F99..=0x0FBC
978 | 0x0FC6
979 | 0x102D..=0x1030
980 | 0x1032..=0x1037
981 | 0x1039..=0x103A
982 | 0x103D..=0x103E
983 | 0x1058..=0x1059
984 | 0x105E..=0x1060
985 | 0x1071..=0x1074
986 | 0x1082
987 | 0x1085..=0x1086
988 | 0x108D
989 | 0x109D
990 | 0x1160..=0x11FF | 0x135D..=0x135F
992 | 0x1712..=0x1714
993 | 0x1732..=0x1734
994 | 0x1752..=0x1753
995 | 0x1772..=0x1773
996 | 0x17B4..=0x17B5
997 | 0x17B7..=0x17BD
998 | 0x17C6
999 | 0x17C9..=0x17D3
1000 | 0x17DD
1001 | 0x180B..=0x180D
1002 | 0x1885..=0x1886
1003 | 0x18A9
1004 | 0x1920..=0x1922
1005 | 0x1927..=0x1928
1006 | 0x1932
1007 | 0x1939..=0x193B
1008 | 0x1A17..=0x1A18
1009 | 0x1A1B
1010 | 0x1A56
1011 | 0x1A58..=0x1A5E
1012 | 0x1A60
1013 | 0x1A62
1014 | 0x1A65..=0x1A6C
1015 | 0x1A73..=0x1A7C
1016 | 0x1A7F
1017 | 0x1AB0..=0x1ABE
1018 | 0x1B00..=0x1B03
1019 | 0x1B34
1020 | 0x1B36..=0x1B3A
1021 | 0x1B3C
1022 | 0x1B42
1023 | 0x1B6B..=0x1B73
1024 | 0x1B80..=0x1B81
1025 | 0x1BA2..=0x1BA5
1026 | 0x1BA8..=0x1BA9
1027 | 0x1BAB..=0x1BAD
1028 | 0x1BE6
1029 | 0x1BE8..=0x1BE9
1030 | 0x1BED
1031 | 0x1BEF..=0x1BF1
1032 | 0x1C2C..=0x1C33
1033 | 0x1C36..=0x1C37
1034 | 0x1CD0..=0x1CD2
1035 | 0x1CD4..=0x1CE0
1036 | 0x1CE2..=0x1CE8
1037 | 0x1CED
1038 | 0x1CF4
1039 | 0x1CF8..=0x1CF9
1040 | 0x1DC0..=0x1DF9
1041 | 0x1DFB..=0x1DFF
1042 | 0x200B..=0x200F | 0x202A..=0x202E | 0x2060..=0x2064 | 0x2066..=0x206F | 0x20D0..=0x20F0 | 0xFE00..=0xFE0F | 0xFE20..=0xFE2F | 0xFEFF | 0xFFF9..=0xFFFB | 0x1D167..=0x1D169
1052 | 0x1D173..=0x1D182
1053 | 0x1D185..=0x1D18B
1054 | 0x1D1AA..=0x1D1AD
1055 | 0x1D242..=0x1D244
1056 | 0xE0001
1057 | 0xE0020..=0xE007F
1058 | 0xE0100..=0xE01EF )
1060}
1061
1062#[inline]
1065fn is_wide_char(cp: u32) -> bool {
1066 matches!(
1067 cp,
1068 0x1100..=0x115F | 0x231A..=0x231B | 0x2329..=0x232A | 0x23E9..=0x23F3 | 0x23F8..=0x23FA
1073 | 0x25FD..=0x25FE
1074 | 0x2614..=0x2615
1075 | 0x2648..=0x2653
1076 | 0x267F
1077 | 0x2693
1078 | 0x26A1
1079 | 0x26AA..=0x26AB
1080 | 0x26BD..=0x26BE
1081 | 0x26C4..=0x26C5
1082 | 0x26CE
1083 | 0x26D4
1084 | 0x26EA
1085 | 0x26F2..=0x26F3
1086 | 0x26F5
1087 | 0x26FA
1088 | 0x26FD
1089 | 0x2702
1090 | 0x2705
1091 | 0x2708..=0x270D
1092 | 0x270F
1093 | 0x2712
1094 | 0x2714
1095 | 0x2716
1096 | 0x271D
1097 | 0x2721
1098 | 0x2728
1099 | 0x2733..=0x2734
1100 | 0x2744
1101 | 0x2747
1102 | 0x274C
1103 | 0x274E
1104 | 0x2753..=0x2755
1105 | 0x2757
1106 | 0x2763..=0x2764
1107 | 0x2795..=0x2797
1108 | 0x27A1
1109 | 0x27B0
1110 | 0x27BF
1111 | 0x2934..=0x2935
1112 | 0x2B05..=0x2B07
1113 | 0x2B1B..=0x2B1C
1114 | 0x2B50
1115 | 0x2B55
1116 | 0x2E80..=0x303E | 0x3040..=0x33BF | 0x3400..=0x4DBF | 0x4E00..=0xA4CF | 0xA960..=0xA97C | 0xAC00..=0xD7A3 | 0xF900..=0xFAFF | 0xFE10..=0xFE19 | 0xFE30..=0xFE6F | 0xFF01..=0xFF60 | 0xFFE0..=0xFFE6 | 0x1F004
1128 | 0x1F0CF
1129 | 0x1F170..=0x1F171
1130 | 0x1F17E..=0x1F17F
1131 | 0x1F18E
1132 | 0x1F191..=0x1F19A
1133 | 0x1F1E0..=0x1F1FF | 0x1F200..=0x1F202
1135 | 0x1F210..=0x1F23B
1136 | 0x1F240..=0x1F248
1137 | 0x1F250..=0x1F251
1138 | 0x1F260..=0x1F265
1139 | 0x1F300..=0x1F64F | 0x1F680..=0x1F6FF | 0x1F900..=0x1F9FF | 0x1FA00..=0x1FA6F
1143 | 0x1FA70..=0x1FAFF
1144 | 0x20000..=0x2FFFD | 0x30000..=0x3FFFD )
1147}
1148
1149pub fn max_line_length_c(data: &[u8]) -> u64 {
1162 let mut max_len: u64 = 0;
1163 let mut line_len: u64 = 0;
1164 let mut linepos: u64 = 0;
1165 let mut i = 0;
1166 let len = data.len();
1167
1168 while i < len {
1169 let b = unsafe { *data.get_unchecked(i) };
1170 if b >= 0x21 && b <= 0x7E {
1171 i += 1;
1173 let mut run = 1u64;
1174 while i < len {
1175 let b = unsafe { *data.get_unchecked(i) };
1176 if b >= 0x21 && b <= 0x7E {
1177 run += 1;
1178 i += 1;
1179 } else {
1180 break;
1181 }
1182 }
1183 linepos += run;
1184 if linepos > line_len {
1185 line_len = linepos;
1186 }
1187 } else {
1188 match b {
1189 b' ' => {
1190 linepos += 1;
1191 if linepos > line_len {
1192 line_len = linepos;
1193 }
1194 }
1195 b'\n' => {
1196 if line_len > max_len {
1197 max_len = line_len;
1198 }
1199 linepos = 0;
1200 line_len = 0;
1201 }
1202 b'\t' => {
1203 linepos = (linepos + 8) & !7;
1204 if linepos > line_len {
1205 line_len = linepos;
1206 }
1207 }
1208 b'\r' => {
1209 linepos = 0;
1210 }
1211 0x0C => {
1212 if line_len > max_len {
1213 max_len = line_len;
1214 }
1215 linepos = 0;
1216 line_len = 0;
1217 }
1218 _ => {} }
1220 i += 1;
1221 }
1222 }
1223
1224 if line_len > max_len {
1225 max_len = line_len;
1226 }
1227
1228 max_len
1229}
1230
1231pub fn max_line_length_utf8(data: &[u8]) -> u64 {
1238 let mut max_len: u64 = 0;
1239 let mut line_len: u64 = 0;
1240 let mut linepos: u64 = 0;
1241 let mut i = 0;
1242 let len = data.len();
1243
1244 while i < len {
1245 let b = unsafe { *data.get_unchecked(i) };
1246
1247 if b >= 0x21 && b <= 0x7E {
1248 i += 1;
1250 let mut run = 1u64;
1251 while i < len {
1252 let b = unsafe { *data.get_unchecked(i) };
1253 if b >= 0x21 && b <= 0x7E {
1254 run += 1;
1255 i += 1;
1256 } else {
1257 break;
1258 }
1259 }
1260 linepos += run;
1261 if linepos > line_len {
1262 line_len = linepos;
1263 }
1264 } else if b < 0x80 {
1265 match b {
1267 b' ' => {
1268 linepos += 1;
1269 if linepos > line_len {
1270 line_len = linepos;
1271 }
1272 }
1273 b'\n' => {
1274 if line_len > max_len {
1275 max_len = line_len;
1276 }
1277 linepos = 0;
1278 line_len = 0;
1279 }
1280 b'\t' => {
1281 linepos = (linepos + 8) & !7;
1282 if linepos > line_len {
1283 line_len = linepos;
1284 }
1285 }
1286 b'\r' => {
1287 linepos = 0;
1288 }
1289 0x0C => {
1290 if line_len > max_len {
1291 max_len = line_len;
1292 }
1293 linepos = 0;
1294 line_len = 0;
1295 }
1296 _ => {} }
1298 i += 1;
1299 } else {
1300 let (cp, len) = decode_utf8(&data[i..]);
1302
1303 if cp <= 0x9F {
1305 } else if is_zero_width(cp) {
1307 } else if is_wide_char(cp) {
1309 linepos += 2;
1310 if linepos > line_len {
1311 line_len = linepos;
1312 }
1313 } else {
1314 linepos += 1;
1316 if linepos > line_len {
1317 line_len = linepos;
1318 }
1319 }
1320 i += len;
1321 }
1322 }
1323
1324 if line_len > max_len {
1326 max_len = line_len;
1327 }
1328
1329 max_len
1330}
1331
1332#[inline]
1334pub fn max_line_length(data: &[u8], utf8: bool) -> u64 {
1335 if utf8 {
1336 max_line_length_utf8(data)
1337 } else {
1338 max_line_length_c(data)
1339 }
1340}
1341
1342pub fn count_all(data: &[u8], utf8: bool) -> WcCounts {
1354 if utf8 {
1355 let (lines, words) = count_lines_words_utf8_fused(data);
1356 WcCounts {
1357 lines,
1358 words,
1359 bytes: data.len() as u64,
1360 chars: count_chars_utf8(data),
1361 max_line_length: max_line_length_utf8(data),
1362 }
1363 } else {
1364 WcCounts {
1365 lines: count_lines(data),
1366 words: count_words_locale(data, false),
1367 bytes: data.len() as u64,
1368 chars: data.len() as u64,
1369 max_line_length: max_line_length_c(data),
1370 }
1371 }
1372}
1373
1374#[inline]
1378fn check_ascii_sample(data: &[u8]) -> bool {
1379 let len = data.len();
1380 if len == 0 {
1381 return true;
1382 }
1383
1384 let check_region = |start: usize, end: usize| -> bool {
1386 let mut or_acc = 0u8;
1387 let region = &data[start..end];
1388 let mut i = 0;
1389 while i + 8 <= region.len() {
1390 unsafe {
1391 or_acc |= *region.get_unchecked(i);
1392 or_acc |= *region.get_unchecked(i + 1);
1393 or_acc |= *region.get_unchecked(i + 2);
1394 or_acc |= *region.get_unchecked(i + 3);
1395 or_acc |= *region.get_unchecked(i + 4);
1396 or_acc |= *region.get_unchecked(i + 5);
1397 or_acc |= *region.get_unchecked(i + 6);
1398 or_acc |= *region.get_unchecked(i + 7);
1399 }
1400 i += 8;
1401 }
1402 while i < region.len() {
1403 or_acc |= region[i];
1404 i += 1;
1405 }
1406 or_acc < 0x80
1407 };
1408
1409 let sample = 256.min(len);
1410
1411 if !check_region(0, sample) {
1413 return false;
1414 }
1415 if len > sample * 2 {
1417 let mid = len / 2;
1418 let mid_start = mid.saturating_sub(sample / 2);
1419 if !check_region(mid_start, (mid_start + sample).min(len)) {
1420 return false;
1421 }
1422 }
1423 if len > sample {
1425 if !check_region(len - sample, len) {
1426 return false;
1427 }
1428 }
1429
1430 true
1431}
1432
1433fn split_at_newlines(data: &[u8], num_chunks: usize) -> Vec<&[u8]> {
1442 if data.is_empty() || num_chunks <= 1 {
1443 return vec![data];
1444 }
1445 let chunk_size = data.len() / num_chunks;
1446 let mut chunks = Vec::with_capacity(num_chunks);
1447 let mut pos = 0;
1448
1449 for _ in 0..num_chunks - 1 {
1450 let target = pos + chunk_size;
1451 if target >= data.len() {
1452 break;
1453 }
1454 let boundary = memchr::memchr(b'\n', &data[target..])
1455 .map(|p| target + p + 1)
1456 .unwrap_or(data.len());
1457 if boundary > pos {
1458 chunks.push(&data[pos..boundary]);
1459 }
1460 pos = boundary;
1461 }
1462 if pos < data.len() {
1463 chunks.push(&data[pos..]);
1464 }
1465 chunks
1466}
1467
1468pub fn count_lines_parallel(data: &[u8]) -> u64 {
1471 if data.len() < PARALLEL_THRESHOLD {
1472 return count_lines(data);
1473 }
1474
1475 let num_threads = rayon::current_num_threads().max(1);
1476 let chunk_size = (data.len() / num_threads).max(2 * 1024 * 1024);
1478
1479 data.par_chunks(chunk_size)
1480 .map(|chunk| memchr_iter(b'\n', chunk).count() as u64)
1481 .sum()
1482}
1483
1484pub fn count_words_parallel(data: &[u8], utf8: bool) -> u64 {
1486 if data.len() < PARALLEL_THRESHOLD {
1487 return count_words_locale(data, utf8);
1488 }
1489
1490 let num_threads = rayon::current_num_threads().max(1);
1491
1492 if utf8 {
1493 let chunks = split_at_newlines(data, num_threads);
1496 chunks.par_iter().map(|chunk| count_words_utf8(chunk)).sum()
1497 } else {
1498 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1500
1501 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1502
1503 let results: Vec<(u64, u64, bool, bool)> = chunks
1505 .par_iter()
1506 .map(|chunk| count_lw_c_chunk(chunk))
1507 .collect();
1508
1509 let mut total = 0u64;
1510 for i in 0..results.len() {
1511 total += results[i].1;
1512 if i > 0 && results[i - 1].3 && results[i].2 {
1516 total -= 1;
1517 }
1518 }
1519 total
1520 }
1521}
1522
1523pub fn count_chars_parallel(data: &[u8], utf8: bool) -> u64 {
1525 if !utf8 {
1526 return data.len() as u64;
1527 }
1528 if data.len() < PARALLEL_THRESHOLD {
1529 return count_chars_utf8(data);
1530 }
1531
1532 let num_threads = rayon::current_num_threads().max(1);
1533 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1534
1535 data.par_chunks(chunk_size).map(count_chars_utf8).sum()
1536}
1537
1538pub fn count_lwb(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1541 let (lines, words) = count_lines_words(data, utf8);
1542 (lines, words, data.len() as u64)
1543}
1544
1545pub fn count_lwb_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1551 if data.len() < PARALLEL_THRESHOLD {
1552 return count_lwb(data, utf8);
1554 }
1555
1556 let num_threads = rayon::current_num_threads().max(1);
1557
1558 let (lines, words) = if !utf8 {
1559 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1561
1562 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1563 let results: Vec<(u64, u64, bool, bool)> = chunks
1564 .par_iter()
1565 .map(|chunk| count_lw_c_chunk_fast(chunk))
1566 .collect();
1567
1568 let mut line_total = 0u64;
1569 let mut word_total = 0u64;
1570 for i in 0..results.len() {
1571 line_total += results[i].0;
1572 word_total += results[i].1;
1573 if i > 0 && results[i - 1].3 && results[i].2 {
1574 word_total -= 1;
1575 }
1576 }
1577
1578 (line_total, word_total)
1579 } else {
1580 let is_ascii = check_ascii_sample(data);
1582 if is_ascii {
1583 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1585 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1586 let results: Vec<(u64, u64, bool, bool)> = chunks
1587 .par_iter()
1588 .map(|chunk| count_lw_c_chunk_fast(chunk))
1589 .collect();
1590
1591 let mut line_total = 0u64;
1592 let mut word_total = 0u64;
1593 for i in 0..results.len() {
1594 line_total += results[i].0;
1595 word_total += results[i].1;
1596 if i > 0 && results[i - 1].3 && results[i].2 {
1597 word_total -= 1;
1598 }
1599 }
1600 (line_total, word_total)
1601 } else {
1602 let chunks = split_at_newlines(data, num_threads);
1605 let results: Vec<(u64, u64)> = chunks
1606 .par_iter()
1607 .map(|chunk| count_lines_words_utf8_fused(chunk))
1608 .collect();
1609 let mut line_total = 0u64;
1610 let mut word_total = 0u64;
1611 for (l, w) in results {
1612 line_total += l;
1613 word_total += w;
1614 }
1615 (line_total, word_total)
1616 }
1617 };
1618
1619 (lines, words, data.len() as u64)
1620}
1621
1622pub fn count_lwc_parallel(data: &[u8], utf8: bool) -> (u64, u64, u64) {
1626 if data.len() < PARALLEL_THRESHOLD {
1627 let lines = count_lines(data);
1628 let words = count_words_locale(data, utf8);
1629 let chars = count_chars(data, utf8);
1630 return (lines, words, chars);
1631 }
1632
1633 let num_threads = rayon::current_num_threads().max(1);
1634
1635 if utf8 {
1636 let chunks = split_at_newlines(data, num_threads);
1638 let results: Vec<(u64, u64, u64)> = chunks
1639 .par_iter()
1640 .map(|chunk| {
1641 let (lines, words) = count_lines_words_utf8_fused(chunk);
1642 let chars = count_chars_utf8(chunk);
1643 (lines, words, chars)
1644 })
1645 .collect();
1646 let mut lines = 0u64;
1647 let mut words = 0u64;
1648 let mut chars = 0u64;
1649 for (l, w, c) in results {
1650 lines += l;
1651 words += w;
1652 chars += c;
1653 }
1654 (lines, words, chars)
1655 } else {
1656 let chunk_size = (data.len() / num_threads).max(1024 * 1024);
1658 let chunks: Vec<&[u8]> = data.chunks(chunk_size).collect();
1659 let results: Vec<(u64, u64, bool, bool)> = chunks
1660 .par_iter()
1661 .map(|chunk| count_lw_c_chunk_fast(chunk))
1662 .collect();
1663 let mut lines = 0u64;
1664 let mut words = 0u64;
1665 for i in 0..results.len() {
1666 lines += results[i].0;
1667 words += results[i].1;
1668 if i > 0 && results[i - 1].3 && results[i].2 {
1669 words -= 1;
1670 }
1671 }
1672 (lines, words, data.len() as u64)
1673 }
1674}
1675
1676pub fn max_line_length_parallel(data: &[u8], utf8: bool) -> u64 {
1680 if data.len() < PARALLEL_THRESHOLD {
1681 return max_line_length(data, utf8);
1682 }
1683 let num_threads = rayon::current_num_threads().max(1);
1684 let chunks = split_at_newlines(data, num_threads);
1685 chunks
1686 .par_iter()
1687 .map(|chunk| {
1688 if utf8 {
1689 max_line_length_utf8(chunk)
1690 } else {
1691 max_line_length_c(chunk)
1692 }
1693 })
1694 .max()
1695 .unwrap_or(0)
1696}
1697
1698pub fn count_all_parallel(data: &[u8], utf8: bool) -> WcCounts {
1702 if data.len() < PARALLEL_THRESHOLD {
1703 return count_all(data, utf8);
1704 }
1705
1706 let num_threads = rayon::current_num_threads().max(1);
1707 let chunks = split_at_newlines(data, num_threads);
1708
1709 if utf8 {
1710 let results: Vec<(u64, u64, u64, u64)> = chunks
1711 .par_iter()
1712 .map(|chunk| {
1713 let (lines, words) = count_lines_words_utf8_fused(chunk);
1714 let chars = count_chars_utf8(chunk);
1715 let max_ll = max_line_length_utf8(chunk);
1716 (lines, words, chars, max_ll)
1717 })
1718 .collect();
1719
1720 let mut counts = WcCounts {
1721 bytes: data.len() as u64,
1722 ..Default::default()
1723 };
1724 for (l, w, c, m) in results {
1725 counts.lines += l;
1726 counts.words += w;
1727 counts.chars += c;
1728 if m > counts.max_line_length {
1729 counts.max_line_length = m;
1730 }
1731 }
1732 counts
1733 } else {
1734 let results: Vec<(u64, u64, u64)> = chunks
1736 .par_iter()
1737 .map(|chunk| {
1738 let (lines, words) = count_lines_words(chunk, false);
1739 let max_ll = max_line_length_c(chunk);
1740 (lines, words, max_ll)
1741 })
1742 .collect();
1743
1744 let mut counts = WcCounts {
1745 bytes: data.len() as u64,
1746 chars: data.len() as u64,
1747 ..Default::default()
1748 };
1749 for (l, w, m) in &results {
1750 counts.lines += l;
1751 counts.words += w;
1752 if *m > counts.max_line_length {
1753 counts.max_line_length = *m;
1754 }
1755 }
1756 counts
1757 }
1758}