1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, IoSlice, Write};
4
5const PARALLEL_THRESHOLD: usize = 512 * 1024;
10
11const MAX_IOV: usize = 1024;
13
14pub struct CutConfig<'a> {
16 pub mode: CutMode,
17 pub ranges: &'a [Range],
18 pub complement: bool,
19 pub delim: u8,
20 pub output_delim: &'a [u8],
21 pub suppress_no_delim: bool,
22 pub line_delim: u8,
23}
24
25#[derive(Debug, Clone)]
27pub struct Range {
28 pub start: usize, pub end: usize, }
31
32pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
35 let mut ranges = Vec::new();
36
37 for part in spec.split(',') {
38 let part = part.trim();
39 if part.is_empty() {
40 continue;
41 }
42
43 if let Some(idx) = part.find('-') {
44 let left = &part[..idx];
45 let right = &part[idx + 1..];
46
47 let start = if left.is_empty() {
48 1
49 } else {
50 left.parse::<usize>()
51 .map_err(|_| format!("invalid range: '{}'", part))?
52 };
53
54 let end = if right.is_empty() {
55 usize::MAX
56 } else {
57 right
58 .parse::<usize>()
59 .map_err(|_| format!("invalid range: '{}'", part))?
60 };
61
62 if start == 0 {
63 return Err("fields and positions are numbered from 1".to_string());
64 }
65 if start > end {
66 return Err(format!("invalid decreasing range: '{}'", part));
67 }
68
69 ranges.push(Range { start, end });
70 } else {
71 let n = part
72 .parse::<usize>()
73 .map_err(|_| format!("invalid field: '{}'", part))?;
74 if n == 0 {
75 return Err("fields and positions are numbered from 1".to_string());
76 }
77 ranges.push(Range { start: n, end: n });
78 }
79 }
80
81 if ranges.is_empty() {
82 return Err("you must specify a list of bytes, characters, or fields".to_string());
83 }
84
85 ranges.sort_by_key(|r| (r.start, r.end));
87 let mut merged = vec![ranges[0].clone()];
88 for r in &ranges[1..] {
89 let last = merged.last_mut().unwrap();
90 if r.start <= last.end.saturating_add(1) {
91 last.end = last.end.max(r.end);
92 } else {
93 merged.push(r.clone());
94 }
95 }
96
97 Ok(merged)
98}
99
100#[inline(always)]
103fn in_ranges(ranges: &[Range], pos: usize) -> bool {
104 for r in ranges {
105 if pos < r.start {
106 return false;
107 }
108 if pos <= r.end {
109 return true;
110 }
111 }
112 false
113}
114
115#[inline]
118fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
119 let mut mask: u64 = 0;
120 for i in 1..=64u32 {
121 let in_range = in_ranges(ranges, i as usize);
122 if in_range != complement {
123 mask |= 1u64 << (i - 1);
124 }
125 }
126 mask
127}
128
129#[inline(always)]
131fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
132 if field_num <= 64 {
133 (mask >> (field_num - 1)) & 1 == 1
134 } else {
135 in_ranges(ranges, field_num) != complement
136 }
137}
138
139#[inline(always)]
144unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
145 unsafe {
146 let len = buf.len();
147 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
148 buf.set_len(len + data.len());
149 }
150}
151
152#[inline(always)]
155unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
156 unsafe {
157 let len = buf.len();
158 *buf.as_mut_ptr().add(len) = b;
159 buf.set_len(len + 1);
160 }
161}
162
163#[inline]
167fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
168 if slices.is_empty() {
169 return Ok(());
170 }
171 for batch in slices.chunks(MAX_IOV) {
172 let total: usize = batch.iter().map(|s| s.len()).sum();
173 let written = out.write_vectored(batch)?;
174 if written >= total {
175 continue;
176 }
177 if written == 0 {
178 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
179 }
180 write_ioslices_slow(out, batch, written)?;
181 }
182 Ok(())
183}
184
185#[cold]
187#[inline(never)]
188fn write_ioslices_slow(
189 out: &mut impl Write,
190 slices: &[IoSlice],
191 mut skip: usize,
192) -> io::Result<()> {
193 for slice in slices {
194 let len = slice.len();
195 if skip >= len {
196 skip -= len;
197 continue;
198 }
199 out.write_all(&slice[skip..])?;
200 skip = 0;
201 }
202 Ok(())
203}
204
205fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
209 let num_threads = rayon::current_num_threads().max(1);
210 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
211 return vec![data];
212 }
213
214 let chunk_size = data.len() / num_threads;
215 let mut chunks = Vec::with_capacity(num_threads);
216 let mut pos = 0;
217
218 for _ in 0..num_threads - 1 {
219 let target = pos + chunk_size;
220 if target >= data.len() {
221 break;
222 }
223 let boundary = memchr::memchr(line_delim, &data[target..])
224 .map(|p| target + p + 1)
225 .unwrap_or(data.len());
226 if boundary > pos {
227 chunks.push(&data[pos..boundary]);
228 }
229 pos = boundary;
230 }
231
232 if pos < data.len() {
233 chunks.push(&data[pos..]);
234 }
235
236 chunks
237}
238
239fn process_fields_multi_select(
246 data: &[u8],
247 delim: u8,
248 line_delim: u8,
249 ranges: &[Range],
250 suppress: bool,
251 out: &mut impl Write,
252) -> io::Result<()> {
253 let max_field = ranges.last().map_or(0, |r| r.end);
254
255 if data.len() >= PARALLEL_THRESHOLD {
256 let chunks = split_into_chunks(data, line_delim);
257 let results: Vec<Vec<u8>> = chunks
258 .par_iter()
259 .map(|chunk| {
260 let mut buf = Vec::with_capacity(chunk.len() * 3 / 4);
262 multi_select_chunk(
263 chunk, delim, line_delim, ranges, max_field, suppress, &mut buf,
264 );
265 buf
266 })
267 .collect();
268 let slices: Vec<IoSlice> = results
269 .iter()
270 .filter(|r| !r.is_empty())
271 .map(|r| IoSlice::new(r))
272 .collect();
273 write_ioslices(out, &slices)?;
274 } else {
275 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
276 multi_select_chunk(
277 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
278 );
279 if !buf.is_empty() {
280 out.write_all(&buf)?;
281 }
282 }
283 Ok(())
284}
285
286fn multi_select_chunk(
292 data: &[u8],
293 delim: u8,
294 line_delim: u8,
295 ranges: &[Range],
296 max_field: usize,
297 suppress: bool,
298 buf: &mut Vec<u8>,
299) {
300 if delim == line_delim {
302 buf.reserve(data.len());
303 let base = data.as_ptr();
304 let mut start = 0;
305 for end_pos in memchr_iter(line_delim, data) {
306 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
307 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
308 start = end_pos + 1;
309 }
310 if start < data.len() {
311 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
312 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
313 }
314 return;
315 }
316
317 buf.reserve(data.len());
318 let base = data.as_ptr();
319 let data_len = data.len();
320
321 let mut line_start: usize = 0;
323 let mut delim_pos = [0usize; 64];
324 let mut num_delims: usize = 0;
325 let max_delims = max_field.min(64);
326 let mut at_max = false;
327
328 for pos in memchr::memchr2_iter(delim, line_delim, data) {
330 let byte = unsafe { *base.add(pos) };
331
332 if byte == line_delim {
333 let line_len = pos - line_start;
335 if num_delims == 0 {
336 if !suppress {
338 unsafe {
339 buf_extend(
340 buf,
341 std::slice::from_raw_parts(base.add(line_start), line_len),
342 );
343 buf_push(buf, line_delim);
344 }
345 }
346 } else {
347 let total_fields = num_delims + 1;
349 let mut first_output = true;
350
351 for r in ranges {
352 let range_start = r.start;
353 let range_end = r.end.min(total_fields);
354 if range_start > total_fields {
355 break;
356 }
357 for field_num in range_start..=range_end {
358 if field_num > total_fields {
359 break;
360 }
361
362 let field_start = if field_num == 1 {
363 line_start
364 } else if field_num - 2 < num_delims {
365 delim_pos[field_num - 2] + 1
366 } else {
367 continue;
368 };
369 let field_end = if field_num <= num_delims {
370 delim_pos[field_num - 1]
371 } else {
372 pos
373 };
374
375 if !first_output {
376 unsafe { buf_push(buf, delim) };
377 }
378 unsafe {
379 buf_extend(
380 buf,
381 std::slice::from_raw_parts(
382 base.add(field_start),
383 field_end - field_start,
384 ),
385 );
386 }
387 first_output = false;
388 }
389 }
390
391 unsafe { buf_push(buf, line_delim) };
392 }
393
394 line_start = pos + 1;
396 num_delims = 0;
397 at_max = false;
398 } else {
399 if !at_max && num_delims < max_delims {
401 delim_pos[num_delims] = pos;
402 num_delims += 1;
403 if num_delims >= max_delims {
404 at_max = true;
405 }
406 }
407 }
408 }
409
410 if line_start < data_len {
412 if num_delims == 0 {
413 if !suppress {
414 unsafe {
415 buf_extend(
416 buf,
417 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
418 );
419 buf_push(buf, line_delim);
420 }
421 }
422 } else {
423 let total_fields = num_delims + 1;
424 let mut first_output = true;
425
426 for r in ranges {
427 let range_start = r.start;
428 let range_end = r.end.min(total_fields);
429 if range_start > total_fields {
430 break;
431 }
432 for field_num in range_start..=range_end {
433 if field_num > total_fields {
434 break;
435 }
436
437 let field_start = if field_num == 1 {
438 line_start
439 } else if field_num - 2 < num_delims {
440 delim_pos[field_num - 2] + 1
441 } else {
442 continue;
443 };
444 let field_end = if field_num <= num_delims {
445 delim_pos[field_num - 1]
446 } else {
447 data_len
448 };
449
450 if !first_output {
451 unsafe { buf_push(buf, delim) };
452 }
453 unsafe {
454 buf_extend(
455 buf,
456 std::slice::from_raw_parts(
457 base.add(field_start),
458 field_end - field_start,
459 ),
460 );
461 }
462 first_output = false;
463 }
464 }
465
466 unsafe { buf_push(buf, line_delim) };
467 }
468 }
469}
470
471#[inline(always)]
476fn multi_select_line(
477 line: &[u8],
478 delim: u8,
479 line_delim: u8,
480 ranges: &[Range],
481 max_field: usize,
482 suppress: bool,
483 buf: &mut Vec<u8>,
484) {
485 let len = line.len();
486 if len == 0 {
487 if !suppress {
488 unsafe { buf_push(buf, line_delim) };
489 }
490 return;
491 }
492
493 let base = line.as_ptr();
495
496 let mut delim_pos = [0usize; 64];
499 let mut num_delims: usize = 0;
500 let max_delims = max_field.min(64);
501
502 for pos in memchr_iter(delim, line) {
503 if num_delims < max_delims {
504 delim_pos[num_delims] = pos;
505 num_delims += 1;
506 if num_delims >= max_delims {
507 break;
508 }
509 }
510 }
511
512 if num_delims == 0 {
513 if !suppress {
514 unsafe {
515 buf_extend(buf, line);
516 buf_push(buf, line_delim);
517 }
518 }
519 return;
520 }
521
522 let total_fields = num_delims + 1;
526 let mut first_output = true;
527
528 for r in ranges {
529 let range_start = r.start;
530 let range_end = r.end.min(total_fields);
531 if range_start > total_fields {
532 break;
533 }
534 for field_num in range_start..=range_end {
535 if field_num > total_fields {
536 break;
537 }
538
539 let field_start = if field_num == 1 {
540 0
541 } else if field_num - 2 < num_delims {
542 delim_pos[field_num - 2] + 1
543 } else {
544 continue;
545 };
546 let field_end = if field_num <= num_delims {
547 delim_pos[field_num - 1]
548 } else {
549 len
550 };
551
552 if !first_output {
553 unsafe { buf_push(buf, delim) };
554 }
555 unsafe {
556 buf_extend(
557 buf,
558 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
559 );
560 }
561 first_output = false;
562 }
563 }
564
565 unsafe { buf_push(buf, line_delim) };
566}
567
568fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
572 let delim = cfg.delim;
573 let line_delim = cfg.line_delim;
574 let ranges = cfg.ranges;
575 let complement = cfg.complement;
576 let output_delim = cfg.output_delim;
577 let suppress = cfg.suppress_no_delim;
578
579 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
587 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
588 }
589
590 if complement
592 && ranges.len() == 1
593 && output_delim.len() == 1
594 && output_delim[0] == delim
595 && ranges[0].start == ranges[0].end
596 {
597 return process_complement_single_field(
598 data,
599 delim,
600 line_delim,
601 ranges[0].start,
602 suppress,
603 out,
604 );
605 }
606
607 if complement
610 && ranges.len() == 1
611 && ranges[0].start > 1
612 && ranges[0].end < usize::MAX
613 && output_delim.len() == 1
614 && output_delim[0] == delim
615 {
616 return process_complement_range(
617 data,
618 delim,
619 line_delim,
620 ranges[0].start,
621 ranges[0].end,
622 suppress,
623 out,
624 );
625 }
626
627 if !complement
629 && ranges.len() == 1
630 && ranges[0].start == 1
631 && output_delim.len() == 1
632 && output_delim[0] == delim
633 && ranges[0].end < usize::MAX
634 {
635 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
636 }
637
638 if !complement
640 && ranges.len() == 1
641 && ranges[0].end == usize::MAX
642 && ranges[0].start > 1
643 && output_delim.len() == 1
644 && output_delim[0] == delim
645 {
646 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
647 }
648
649 if !complement
651 && ranges.len() == 1
652 && ranges[0].start > 1
653 && ranges[0].end < usize::MAX
654 && output_delim.len() == 1
655 && output_delim[0] == delim
656 {
657 return process_fields_mid_range(
658 data,
659 delim,
660 line_delim,
661 ranges[0].start,
662 ranges[0].end,
663 suppress,
664 out,
665 );
666 }
667
668 if !complement
674 && ranges.len() > 1
675 && ranges.last().map_or(false, |r| r.end < usize::MAX)
676 && output_delim.len() == 1
677 && output_delim[0] == delim
678 && delim != line_delim
679 {
680 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
681 }
682
683 let max_field = if complement {
685 usize::MAX
686 } else {
687 ranges.last().map(|r| r.end).unwrap_or(0)
688 };
689 let field_mask = compute_field_mask(ranges, complement);
690
691 if data.len() >= PARALLEL_THRESHOLD {
692 let chunks = split_into_chunks(data, line_delim);
693 let results: Vec<Vec<u8>> = chunks
694 .par_iter()
695 .map(|chunk| {
696 let mut buf = Vec::with_capacity(chunk.len());
697 process_fields_chunk(
698 chunk,
699 delim,
700 ranges,
701 output_delim,
702 suppress,
703 max_field,
704 field_mask,
705 line_delim,
706 complement,
707 &mut buf,
708 );
709 buf
710 })
711 .collect();
712 let slices: Vec<IoSlice> = results
714 .iter()
715 .filter(|r| !r.is_empty())
716 .map(|r| IoSlice::new(r))
717 .collect();
718 write_ioslices(out, &slices)?;
719 } else {
720 let mut buf = Vec::with_capacity(data.len());
721 process_fields_chunk(
722 data,
723 delim,
724 ranges,
725 output_delim,
726 suppress,
727 max_field,
728 field_mask,
729 line_delim,
730 complement,
731 &mut buf,
732 );
733 if !buf.is_empty() {
734 out.write_all(&buf)?;
735 }
736 }
737 Ok(())
738}
739
740fn process_fields_chunk(
745 data: &[u8],
746 delim: u8,
747 ranges: &[Range],
748 output_delim: &[u8],
749 suppress: bool,
750 max_field: usize,
751 field_mask: u64,
752 line_delim: u8,
753 complement: bool,
754 buf: &mut Vec<u8>,
755) {
756 if delim != line_delim && max_field < usize::MAX && !complement {
763 buf.reserve(data.len());
764 let mut start = 0;
765 for end_pos in memchr_iter(line_delim, data) {
766 let line = &data[start..end_pos];
767 extract_fields_to_buf(
768 line,
769 delim,
770 ranges,
771 output_delim,
772 suppress,
773 max_field,
774 field_mask,
775 line_delim,
776 buf,
777 complement,
778 );
779 start = end_pos + 1;
780 }
781 if start < data.len() {
782 extract_fields_to_buf(
783 &data[start..],
784 delim,
785 ranges,
786 output_delim,
787 suppress,
788 max_field,
789 field_mask,
790 line_delim,
791 buf,
792 complement,
793 );
794 }
795 return;
796 }
797
798 if delim != line_delim {
802 buf.reserve(data.len());
803
804 let data_len = data.len();
805 let base = data.as_ptr();
806 let mut line_start: usize = 0;
807 let mut field_start: usize = 0;
808 let mut field_num: usize = 1;
809 let mut first_output = true;
810 let mut has_delim = false;
811
812 for pos in memchr::memchr2_iter(delim, line_delim, data) {
813 let byte = unsafe { *base.add(pos) };
814
815 if byte == line_delim {
816 if (field_num <= max_field || complement)
818 && has_delim
819 && is_selected(field_num, field_mask, ranges, complement)
820 {
821 if !first_output {
822 unsafe { buf_extend(buf, output_delim) };
823 }
824 unsafe {
825 buf_extend(
826 buf,
827 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
828 )
829 };
830 first_output = false;
831 }
832
833 if !first_output {
834 unsafe { buf_push(buf, line_delim) };
835 } else if !has_delim {
836 if !suppress {
837 unsafe {
838 buf_extend(
839 buf,
840 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
841 );
842 buf_push(buf, line_delim);
843 }
844 }
845 } else {
846 unsafe { buf_push(buf, line_delim) };
847 }
848
849 line_start = pos + 1;
851 field_start = pos + 1;
852 field_num = 1;
853 first_output = true;
854 has_delim = false;
855 } else {
856 has_delim = true;
858
859 if is_selected(field_num, field_mask, ranges, complement) {
860 if !first_output {
861 unsafe { buf_extend(buf, output_delim) };
862 }
863 unsafe {
864 buf_extend(
865 buf,
866 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
867 )
868 };
869 first_output = false;
870 }
871
872 field_num += 1;
873 field_start = pos + 1;
874 }
875 }
876
877 if line_start < data_len {
879 if line_start < data_len {
880 if (field_num <= max_field || complement)
881 && has_delim
882 && is_selected(field_num, field_mask, ranges, complement)
883 {
884 if !first_output {
885 unsafe { buf_extend(buf, output_delim) };
886 }
887 unsafe {
888 buf_extend(
889 buf,
890 std::slice::from_raw_parts(
891 base.add(field_start),
892 data_len - field_start,
893 ),
894 )
895 };
896 first_output = false;
897 }
898
899 if !first_output {
900 unsafe { buf_push(buf, line_delim) };
901 } else if !has_delim {
902 if !suppress {
903 unsafe {
904 buf_extend(
905 buf,
906 std::slice::from_raw_parts(
907 base.add(line_start),
908 data_len - line_start,
909 ),
910 );
911 buf_push(buf, line_delim);
912 }
913 }
914 } else {
915 unsafe { buf_push(buf, line_delim) };
916 }
917 }
918 }
919
920 return;
921 }
922
923 let mut start = 0;
925 for end_pos in memchr_iter(line_delim, data) {
926 let line = &data[start..end_pos];
927 extract_fields_to_buf(
928 line,
929 delim,
930 ranges,
931 output_delim,
932 suppress,
933 max_field,
934 field_mask,
935 line_delim,
936 buf,
937 complement,
938 );
939 start = end_pos + 1;
940 }
941 if start < data.len() {
942 extract_fields_to_buf(
943 &data[start..],
944 delim,
945 ranges,
946 output_delim,
947 suppress,
948 max_field,
949 field_mask,
950 line_delim,
951 buf,
952 complement,
953 );
954 }
955}
956
957fn process_single_field(
963 data: &[u8],
964 delim: u8,
965 line_delim: u8,
966 target: usize,
967 suppress: bool,
968 out: &mut impl Write,
969) -> io::Result<()> {
970 let target_idx = target - 1;
971
972 if delim != line_delim {
973 if target_idx == 0 && !suppress {
979 if data.len() >= PARALLEL_THRESHOLD {
980 return single_field1_parallel(data, delim, line_delim, out);
981 }
982 return single_field1_zerocopy(data, delim, line_delim, out);
983 }
984
985 if data.len() >= PARALLEL_THRESHOLD {
989 let chunks = split_into_chunks(data, line_delim);
990 let results: Vec<Vec<u8>> = chunks
991 .par_iter()
992 .map(|chunk| {
993 let mut buf = Vec::with_capacity(chunk.len() / 2);
994 process_single_field_chunk(
995 chunk, delim, target_idx, line_delim, suppress, &mut buf,
996 );
997 buf
998 })
999 .collect();
1000 let slices: Vec<IoSlice> = results
1001 .iter()
1002 .filter(|r| !r.is_empty())
1003 .map(|r| IoSlice::new(r))
1004 .collect();
1005 write_ioslices(out, &slices)?;
1006 } else {
1007 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1008 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1009 if !buf.is_empty() {
1010 out.write_all(&buf)?;
1011 }
1012 }
1013 return Ok(());
1014 }
1015
1016 if data.len() >= PARALLEL_THRESHOLD {
1018 let chunks = split_into_chunks(data, line_delim);
1019 let results: Vec<Vec<u8>> = chunks
1020 .par_iter()
1021 .map(|chunk| {
1022 let mut buf = Vec::with_capacity(chunk.len() / 4);
1023 process_single_field_chunk(
1024 chunk, delim, target_idx, line_delim, suppress, &mut buf,
1025 );
1026 buf
1027 })
1028 .collect();
1029 let slices: Vec<IoSlice> = results
1031 .iter()
1032 .filter(|r| !r.is_empty())
1033 .map(|r| IoSlice::new(r))
1034 .collect();
1035 write_ioslices(out, &slices)?;
1036 } else {
1037 let mut buf = Vec::with_capacity(data.len() / 4);
1038 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1039 if !buf.is_empty() {
1040 out.write_all(&buf)?;
1041 }
1042 }
1043 Ok(())
1044}
1045
1046fn process_complement_range(
1049 data: &[u8],
1050 delim: u8,
1051 line_delim: u8,
1052 skip_start: usize,
1053 skip_end: usize,
1054 suppress: bool,
1055 out: &mut impl Write,
1056) -> io::Result<()> {
1057 if data.len() >= PARALLEL_THRESHOLD {
1058 let chunks = split_into_chunks(data, line_delim);
1059 let results: Vec<Vec<u8>> = chunks
1060 .par_iter()
1061 .map(|chunk| {
1062 let mut buf = Vec::with_capacity(chunk.len());
1063 complement_range_chunk(
1064 chunk, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1065 );
1066 buf
1067 })
1068 .collect();
1069 let slices: Vec<IoSlice> = results
1070 .iter()
1071 .filter(|r| !r.is_empty())
1072 .map(|r| IoSlice::new(r))
1073 .collect();
1074 write_ioslices(out, &slices)?;
1075 } else {
1076 let mut buf = Vec::with_capacity(data.len());
1077 complement_range_chunk(
1078 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1079 );
1080 if !buf.is_empty() {
1081 out.write_all(&buf)?;
1082 }
1083 }
1084 Ok(())
1085}
1086
1087fn complement_range_chunk(
1089 data: &[u8],
1090 delim: u8,
1091 skip_start: usize,
1092 skip_end: usize,
1093 line_delim: u8,
1094 suppress: bool,
1095 buf: &mut Vec<u8>,
1096) {
1097 let mut start = 0;
1098 for end_pos in memchr_iter(line_delim, data) {
1099 let line = &data[start..end_pos];
1100 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1101 start = end_pos + 1;
1102 }
1103 if start < data.len() {
1104 complement_range_line(
1105 &data[start..],
1106 delim,
1107 skip_start,
1108 skip_end,
1109 line_delim,
1110 suppress,
1111 buf,
1112 );
1113 }
1114}
1115
1116#[inline(always)]
1123fn complement_range_line(
1124 line: &[u8],
1125 delim: u8,
1126 skip_start: usize,
1127 skip_end: usize,
1128 line_delim: u8,
1129 suppress: bool,
1130 buf: &mut Vec<u8>,
1131) {
1132 let len = line.len();
1133 if len == 0 {
1134 if !suppress {
1135 buf.push(line_delim);
1136 }
1137 return;
1138 }
1139
1140 buf.reserve(len + 1);
1141 let base = line.as_ptr();
1142
1143 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1153
1154 let mut delim_count: usize = 0;
1156 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1160 delim_count += 1;
1161 if delim_count == need_prefix_delims {
1162 prefix_end_pos = pos;
1163 }
1164 if delim_count == total_need {
1165 suffix_start_pos = pos + 1;
1166 break;
1167 }
1168 }
1169
1170 if delim_count == 0 {
1171 if !suppress {
1173 unsafe {
1174 buf_extend(buf, line);
1175 buf_push(buf, line_delim);
1176 }
1177 }
1178 return;
1179 }
1180
1181 if delim_count < need_prefix_delims {
1187 unsafe {
1189 buf_extend(buf, line);
1190 buf_push(buf, line_delim);
1191 }
1192 return;
1193 }
1194
1195 let has_prefix = need_prefix_delims > 0;
1196 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1197
1198 if has_prefix && has_suffix {
1199 unsafe {
1201 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1202 buf_push(buf, delim);
1203 buf_extend(
1204 buf,
1205 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1206 );
1207 buf_push(buf, line_delim);
1208 }
1209 } else if has_prefix {
1210 unsafe {
1212 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1213 buf_push(buf, line_delim);
1214 }
1215 } else if has_suffix {
1216 unsafe {
1218 buf_extend(
1219 buf,
1220 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1221 );
1222 buf_push(buf, line_delim);
1223 }
1224 } else {
1225 unsafe { buf_push(buf, line_delim) };
1227 }
1228}
1229
1230fn process_complement_single_field(
1232 data: &[u8],
1233 delim: u8,
1234 line_delim: u8,
1235 skip_field: usize,
1236 suppress: bool,
1237 out: &mut impl Write,
1238) -> io::Result<()> {
1239 let skip_idx = skip_field - 1;
1240
1241 if data.len() >= PARALLEL_THRESHOLD {
1242 let chunks = split_into_chunks(data, line_delim);
1243 let results: Vec<Vec<u8>> = chunks
1244 .par_iter()
1245 .map(|chunk| {
1246 let mut buf = Vec::with_capacity(chunk.len());
1247 complement_single_field_chunk(
1248 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
1249 );
1250 buf
1251 })
1252 .collect();
1253 let slices: Vec<IoSlice> = results
1255 .iter()
1256 .filter(|r| !r.is_empty())
1257 .map(|r| IoSlice::new(r))
1258 .collect();
1259 write_ioslices(out, &slices)?;
1260 } else {
1261 let mut buf = Vec::with_capacity(data.len());
1262 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1263 if !buf.is_empty() {
1264 out.write_all(&buf)?;
1265 }
1266 }
1267 Ok(())
1268}
1269
1270fn complement_single_field_chunk(
1272 data: &[u8],
1273 delim: u8,
1274 skip_idx: usize,
1275 line_delim: u8,
1276 suppress: bool,
1277 buf: &mut Vec<u8>,
1278) {
1279 let mut start = 0;
1280 for end_pos in memchr_iter(line_delim, data) {
1281 let line = &data[start..end_pos];
1282 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1283 start = end_pos + 1;
1284 }
1285 if start < data.len() {
1286 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
1287 }
1288}
1289
1290#[inline(always)]
1295fn complement_single_field_line(
1296 line: &[u8],
1297 delim: u8,
1298 skip_idx: usize,
1299 line_delim: u8,
1300 suppress: bool,
1301 buf: &mut Vec<u8>,
1302) {
1303 let len = line.len();
1304 if len == 0 {
1305 if !suppress {
1306 buf.push(line_delim);
1307 }
1308 return;
1309 }
1310
1311 buf.reserve(len + 1);
1312 let base = line.as_ptr();
1313
1314 let need_before = skip_idx; let need_total = skip_idx + 1; let mut delim_count: usize = 0;
1323 let mut skip_start_pos: usize = 0; let mut skip_end_pos: usize = len; let mut found_end = false;
1326
1327 for pos in memchr_iter(delim, line) {
1328 delim_count += 1;
1329 if delim_count == need_before {
1330 skip_start_pos = pos + 1;
1331 }
1332 if delim_count == need_total {
1333 skip_end_pos = pos;
1334 found_end = true;
1335 break;
1336 }
1337 }
1338
1339 if delim_count == 0 {
1340 if !suppress {
1342 unsafe {
1343 buf_extend(buf, line);
1344 buf_push(buf, line_delim);
1345 }
1346 }
1347 return;
1348 }
1349
1350 if delim_count < need_before {
1352 unsafe {
1353 buf_extend(buf, line);
1354 buf_push(buf, line_delim);
1355 }
1356 return;
1357 }
1358
1359 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1362 let has_suffix = found_end && skip_end_pos < len;
1363
1364 if has_prefix && has_suffix {
1365 unsafe {
1368 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1369 buf_push(buf, delim);
1370 buf_extend(
1371 buf,
1372 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1373 );
1374 buf_push(buf, line_delim);
1375 }
1376 } else if has_prefix {
1377 unsafe {
1379 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1380 buf_push(buf, line_delim);
1381 }
1382 } else if has_suffix {
1383 unsafe {
1385 buf_extend(
1386 buf,
1387 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1388 );
1389 buf_push(buf, line_delim);
1390 }
1391 } else {
1392 unsafe { buf_push(buf, line_delim) };
1394 }
1395}
1396
1397fn process_fields_prefix(
1401 data: &[u8],
1402 delim: u8,
1403 line_delim: u8,
1404 last_field: usize,
1405 suppress: bool,
1406 out: &mut impl Write,
1407) -> io::Result<()> {
1408 if data.len() >= PARALLEL_THRESHOLD {
1409 let chunks = split_into_chunks(data, line_delim);
1410 let results: Vec<Vec<u8>> = chunks
1411 .par_iter()
1412 .map(|chunk| {
1413 let mut buf = Vec::with_capacity(chunk.len());
1414 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
1415 buf
1416 })
1417 .collect();
1418 let slices: Vec<IoSlice> = results
1420 .iter()
1421 .filter(|r| !r.is_empty())
1422 .map(|r| IoSlice::new(r))
1423 .collect();
1424 write_ioslices(out, &slices)?;
1425 } else if !suppress {
1426 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1430 } else {
1431 let mut buf = Vec::with_capacity(data.len());
1432 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1433 if !buf.is_empty() {
1434 out.write_all(&buf)?;
1435 }
1436 }
1437 Ok(())
1438}
1439
1440#[inline]
1446fn fields_prefix_zerocopy(
1447 data: &[u8],
1448 delim: u8,
1449 line_delim: u8,
1450 last_field: usize,
1451 out: &mut impl Write,
1452) -> io::Result<()> {
1453 let newline_buf: [u8; 1] = [line_delim];
1454 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1455 let mut start = 0;
1456 let mut run_start: usize = 0;
1457
1458 for end_pos in memchr_iter(line_delim, data) {
1459 let line = &data[start..end_pos];
1460 let mut field_count = 1;
1461 let mut truncate_at: Option<usize> = None;
1462 for dpos in memchr_iter(delim, line) {
1463 if field_count >= last_field {
1464 truncate_at = Some(start + dpos);
1465 break;
1466 }
1467 field_count += 1;
1468 }
1469
1470 if let Some(trunc_pos) = truncate_at {
1471 if run_start < start {
1472 iov.push(IoSlice::new(&data[run_start..start]));
1473 }
1474 iov.push(IoSlice::new(&data[start..trunc_pos]));
1475 iov.push(IoSlice::new(&newline_buf));
1476 run_start = end_pos + 1;
1477
1478 if iov.len() >= MAX_IOV - 2 {
1479 write_ioslices(out, &iov)?;
1480 iov.clear();
1481 }
1482 }
1483 start = end_pos + 1;
1484 }
1485 if start < data.len() {
1487 let line = &data[start..];
1488 let mut field_count = 1;
1489 let mut truncate_at: Option<usize> = None;
1490 for dpos in memchr_iter(delim, line) {
1491 if field_count >= last_field {
1492 truncate_at = Some(start + dpos);
1493 break;
1494 }
1495 field_count += 1;
1496 }
1497 if let Some(trunc_pos) = truncate_at {
1498 if run_start < start {
1499 iov.push(IoSlice::new(&data[run_start..start]));
1500 }
1501 iov.push(IoSlice::new(&data[start..trunc_pos]));
1502 iov.push(IoSlice::new(&newline_buf));
1503 if !iov.is_empty() {
1504 write_ioslices(out, &iov)?;
1505 }
1506 return Ok(());
1507 }
1508 }
1509 if run_start < data.len() {
1511 iov.push(IoSlice::new(&data[run_start..]));
1512 if !data.is_empty() && *data.last().unwrap() != line_delim {
1513 iov.push(IoSlice::new(&newline_buf));
1514 }
1515 }
1516 if !iov.is_empty() {
1517 write_ioslices(out, &iov)?;
1518 }
1519 Ok(())
1520}
1521
1522fn fields_prefix_chunk(
1524 data: &[u8],
1525 delim: u8,
1526 line_delim: u8,
1527 last_field: usize,
1528 suppress: bool,
1529 buf: &mut Vec<u8>,
1530) {
1531 let mut start = 0;
1532 for end_pos in memchr_iter(line_delim, data) {
1533 let line = &data[start..end_pos];
1534 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1535 start = end_pos + 1;
1536 }
1537 if start < data.len() {
1538 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1539 }
1540}
1541
1542#[inline(always)]
1545fn fields_prefix_line(
1546 line: &[u8],
1547 delim: u8,
1548 line_delim: u8,
1549 last_field: usize,
1550 suppress: bool,
1551 buf: &mut Vec<u8>,
1552) {
1553 let len = line.len();
1554 if len == 0 {
1555 if !suppress {
1556 buf.push(line_delim);
1557 }
1558 return;
1559 }
1560
1561 buf.reserve(len + 1);
1562 let base = line.as_ptr();
1563
1564 let mut field_count = 1usize;
1565 let mut has_delim = false;
1566
1567 for pos in memchr_iter(delim, line) {
1568 has_delim = true;
1569 if field_count >= last_field {
1570 unsafe {
1571 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1572 buf_push(buf, line_delim);
1573 }
1574 return;
1575 }
1576 field_count += 1;
1577 }
1578
1579 if !has_delim {
1580 if !suppress {
1581 unsafe {
1582 buf_extend(buf, line);
1583 buf_push(buf, line_delim);
1584 }
1585 }
1586 return;
1587 }
1588
1589 unsafe {
1590 buf_extend(buf, line);
1591 buf_push(buf, line_delim);
1592 }
1593}
1594
1595fn process_fields_suffix(
1597 data: &[u8],
1598 delim: u8,
1599 line_delim: u8,
1600 start_field: usize,
1601 suppress: bool,
1602 out: &mut impl Write,
1603) -> io::Result<()> {
1604 if data.len() >= PARALLEL_THRESHOLD {
1605 let chunks = split_into_chunks(data, line_delim);
1606 let results: Vec<Vec<u8>> = chunks
1607 .par_iter()
1608 .map(|chunk| {
1609 let mut buf = Vec::with_capacity(chunk.len());
1610 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
1611 buf
1612 })
1613 .collect();
1614 let slices: Vec<IoSlice> = results
1616 .iter()
1617 .filter(|r| !r.is_empty())
1618 .map(|r| IoSlice::new(r))
1619 .collect();
1620 write_ioslices(out, &slices)?;
1621 } else {
1622 let mut buf = Vec::with_capacity(data.len());
1623 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1624 if !buf.is_empty() {
1625 out.write_all(&buf)?;
1626 }
1627 }
1628 Ok(())
1629}
1630
1631fn fields_suffix_chunk(
1633 data: &[u8],
1634 delim: u8,
1635 line_delim: u8,
1636 start_field: usize,
1637 suppress: bool,
1638 buf: &mut Vec<u8>,
1639) {
1640 let mut start = 0;
1641 for end_pos in memchr_iter(line_delim, data) {
1642 let line = &data[start..end_pos];
1643 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1644 start = end_pos + 1;
1645 }
1646 if start < data.len() {
1647 fields_suffix_line(
1648 &data[start..],
1649 delim,
1650 line_delim,
1651 start_field,
1652 suppress,
1653 buf,
1654 );
1655 }
1656}
1657
1658#[inline(always)]
1661fn fields_suffix_line(
1662 line: &[u8],
1663 delim: u8,
1664 line_delim: u8,
1665 start_field: usize,
1666 suppress: bool,
1667 buf: &mut Vec<u8>,
1668) {
1669 let len = line.len();
1670 if len == 0 {
1671 if !suppress {
1672 buf.push(line_delim);
1673 }
1674 return;
1675 }
1676
1677 buf.reserve(len + 1);
1678 let base = line.as_ptr();
1679
1680 let skip_delims = start_field - 1;
1681 let mut delim_count = 0usize;
1682 let mut has_delim = false;
1683
1684 for pos in memchr_iter(delim, line) {
1685 has_delim = true;
1686 delim_count += 1;
1687 if delim_count >= skip_delims {
1688 unsafe {
1689 buf_extend(
1690 buf,
1691 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1692 );
1693 buf_push(buf, line_delim);
1694 }
1695 return;
1696 }
1697 }
1698
1699 if !has_delim {
1700 if !suppress {
1701 unsafe {
1702 buf_extend(buf, line);
1703 buf_push(buf, line_delim);
1704 }
1705 }
1706 return;
1707 }
1708
1709 unsafe { buf_push(buf, line_delim) };
1711}
1712
1713fn process_fields_mid_range(
1716 data: &[u8],
1717 delim: u8,
1718 line_delim: u8,
1719 start_field: usize,
1720 end_field: usize,
1721 suppress: bool,
1722 out: &mut impl Write,
1723) -> io::Result<()> {
1724 if data.len() >= PARALLEL_THRESHOLD {
1725 let chunks = split_into_chunks(data, line_delim);
1726 let results: Vec<Vec<u8>> = chunks
1727 .par_iter()
1728 .map(|chunk| {
1729 let mut buf = Vec::with_capacity(chunk.len());
1730 fields_mid_range_chunk(
1731 chunk,
1732 delim,
1733 line_delim,
1734 start_field,
1735 end_field,
1736 suppress,
1737 &mut buf,
1738 );
1739 buf
1740 })
1741 .collect();
1742 let slices: Vec<IoSlice> = results
1743 .iter()
1744 .filter(|r| !r.is_empty())
1745 .map(|r| IoSlice::new(r))
1746 .collect();
1747 write_ioslices(out, &slices)?;
1748 } else {
1749 let mut buf = Vec::with_capacity(data.len());
1750 fields_mid_range_chunk(
1751 data,
1752 delim,
1753 line_delim,
1754 start_field,
1755 end_field,
1756 suppress,
1757 &mut buf,
1758 );
1759 if !buf.is_empty() {
1760 out.write_all(&buf)?;
1761 }
1762 }
1763 Ok(())
1764}
1765
1766fn fields_mid_range_chunk(
1768 data: &[u8],
1769 delim: u8,
1770 line_delim: u8,
1771 start_field: usize,
1772 end_field: usize,
1773 suppress: bool,
1774 buf: &mut Vec<u8>,
1775) {
1776 let mut start = 0;
1777 for end_pos in memchr_iter(line_delim, data) {
1778 let line = &data[start..end_pos];
1779 fields_mid_range_line(
1780 line,
1781 delim,
1782 line_delim,
1783 start_field,
1784 end_field,
1785 suppress,
1786 buf,
1787 );
1788 start = end_pos + 1;
1789 }
1790 if start < data.len() {
1791 fields_mid_range_line(
1792 &data[start..],
1793 delim,
1794 line_delim,
1795 start_field,
1796 end_field,
1797 suppress,
1798 buf,
1799 );
1800 }
1801}
1802
1803#[inline(always)]
1807fn fields_mid_range_line(
1808 line: &[u8],
1809 delim: u8,
1810 line_delim: u8,
1811 start_field: usize,
1812 end_field: usize,
1813 suppress: bool,
1814 buf: &mut Vec<u8>,
1815) {
1816 let len = line.len();
1817 if len == 0 {
1818 if !suppress {
1819 buf.push(line_delim);
1820 }
1821 return;
1822 }
1823
1824 buf.reserve(len + 1);
1825 let base = line.as_ptr();
1826
1827 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
1831 let mut delim_count = 0;
1832 let mut range_start = 0;
1833 let mut has_delim = false;
1834
1835 for pos in memchr_iter(delim, line) {
1836 has_delim = true;
1837 delim_count += 1;
1838 if delim_count == skip_before {
1839 range_start = pos + 1;
1840 }
1841 if delim_count == target_end_delim {
1842 if skip_before == 0 {
1843 range_start = 0;
1844 }
1845 unsafe {
1846 buf_extend(
1847 buf,
1848 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1849 );
1850 buf_push(buf, line_delim);
1851 }
1852 return;
1853 }
1854 }
1855
1856 if !has_delim {
1857 if !suppress {
1858 unsafe {
1859 buf_extend(buf, line);
1860 buf_push(buf, line_delim);
1861 }
1862 }
1863 return;
1864 }
1865
1866 if delim_count >= skip_before {
1868 if skip_before == 0 {
1870 range_start = 0;
1871 }
1872 unsafe {
1873 buf_extend(
1874 buf,
1875 std::slice::from_raw_parts(base.add(range_start), len - range_start),
1876 );
1877 buf_push(buf, line_delim);
1878 }
1879 } else {
1880 unsafe { buf_push(buf, line_delim) };
1882 }
1883}
1884
1885fn single_field1_parallel(
1896 data: &[u8],
1897 delim: u8,
1898 line_delim: u8,
1899 out: &mut impl Write,
1900) -> io::Result<()> {
1901 let chunks = split_into_chunks(data, line_delim);
1902 let results: Vec<Vec<u8>> = chunks
1903 .par_iter()
1904 .map(|chunk| {
1905 let mut buf = Vec::with_capacity(chunk.len());
1906 single_field1_to_buf(chunk, delim, line_delim, &mut buf);
1907 buf
1908 })
1909 .collect();
1910 let slices: Vec<IoSlice> = results
1911 .iter()
1912 .filter(|r| !r.is_empty())
1913 .map(|r| IoSlice::new(r))
1914 .collect();
1915 write_ioslices(out, &slices)
1916}
1917
1918#[inline]
1924fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
1925 use memchr::memchr2;
1926 buf.reserve(data.len());
1927 let mut pos = 0;
1928 while pos < data.len() {
1929 match memchr2(delim, line_delim, &data[pos..]) {
1930 None => {
1931 unsafe {
1933 buf_extend(buf, &data[pos..]);
1934 }
1935 break;
1936 }
1937 Some(offset) => {
1938 let actual = pos + offset;
1939 if data[actual] == line_delim {
1940 unsafe {
1942 buf_extend(buf, &data[pos..actual + 1]);
1943 }
1944 pos = actual + 1;
1945 } else {
1946 unsafe {
1948 buf_extend(buf, &data[pos..actual]);
1949 buf_push(buf, line_delim);
1950 }
1951 match memchr::memchr(line_delim, &data[actual + 1..]) {
1953 None => {
1954 pos = data.len();
1955 }
1956 Some(nl_off) => {
1957 pos = actual + 1 + nl_off + 1;
1958 }
1959 }
1960 }
1961 }
1962 }
1963 }
1964}
1965
1966#[inline]
1975fn single_field1_zerocopy(
1976 data: &[u8],
1977 delim: u8,
1978 line_delim: u8,
1979 out: &mut impl Write,
1980) -> io::Result<()> {
1981 let newline_buf: [u8; 1] = [line_delim];
1982
1983 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1984 let mut run_start: usize = 0;
1985 let mut start = 0;
1986
1987 for end_pos in memchr_iter(line_delim, data) {
1988 let line = &data[start..end_pos];
1989 if let Some(dp) = memchr::memchr(delim, line) {
1990 if run_start < start {
1993 iov.push(IoSlice::new(&data[run_start..start]));
1994 }
1995 iov.push(IoSlice::new(&data[start..start + dp]));
1996 iov.push(IoSlice::new(&newline_buf));
1997 run_start = end_pos + 1;
1998
1999 if iov.len() >= MAX_IOV - 2 {
2000 write_ioslices(out, &iov)?;
2001 iov.clear();
2002 }
2003 }
2004 start = end_pos + 1;
2006 }
2007
2008 if start < data.len() {
2010 let line = &data[start..];
2011 if let Some(dp) = memchr::memchr(delim, line) {
2012 if run_start < start {
2013 iov.push(IoSlice::new(&data[run_start..start]));
2014 }
2015 iov.push(IoSlice::new(&data[start..start + dp]));
2016 iov.push(IoSlice::new(&newline_buf));
2017 if !iov.is_empty() {
2018 write_ioslices(out, &iov)?;
2019 }
2020 return Ok(());
2021 }
2022 }
2023
2024 if run_start < data.len() {
2026 iov.push(IoSlice::new(&data[run_start..]));
2027 if !data.is_empty() && *data.last().unwrap() != line_delim {
2028 iov.push(IoSlice::new(&newline_buf));
2029 }
2030 }
2031 if !iov.is_empty() {
2032 write_ioslices(out, &iov)?;
2033 }
2034 Ok(())
2035}
2036
2037fn process_single_field_chunk(
2039 data: &[u8],
2040 delim: u8,
2041 target_idx: usize,
2042 line_delim: u8,
2043 suppress: bool,
2044 buf: &mut Vec<u8>,
2045) {
2046 let mut start = 0;
2047 for end_pos in memchr_iter(line_delim, data) {
2048 let line = &data[start..end_pos];
2049 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2050 start = end_pos + 1;
2051 }
2052 if start < data.len() {
2053 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2054 }
2055}
2056
2057#[inline(always)]
2062fn extract_single_field_line(
2063 line: &[u8],
2064 delim: u8,
2065 target_idx: usize,
2066 line_delim: u8,
2067 suppress: bool,
2068 buf: &mut Vec<u8>,
2069) {
2070 let len = line.len();
2071 if len == 0 {
2072 if !suppress {
2073 buf.push(line_delim);
2074 }
2075 return;
2076 }
2077
2078 buf.reserve(len + 1);
2080
2081 let base = line.as_ptr();
2082
2083 if target_idx == 0 {
2085 match memchr::memchr(delim, line) {
2086 Some(pos) => unsafe {
2087 buf_extend(buf, std::slice::from_raw_parts(base, pos));
2088 buf_push(buf, line_delim);
2089 },
2090 None => {
2091 if !suppress {
2092 unsafe {
2093 buf_extend(buf, line);
2094 buf_push(buf, line_delim);
2095 }
2096 }
2097 }
2098 }
2099 return;
2100 }
2101
2102 let mut field_start = 0;
2104 let mut field_idx = 0;
2105 let mut has_delim = false;
2106
2107 for pos in memchr_iter(delim, line) {
2108 has_delim = true;
2109 if field_idx == target_idx {
2110 unsafe {
2111 buf_extend(
2112 buf,
2113 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2114 );
2115 buf_push(buf, line_delim);
2116 }
2117 return;
2118 }
2119 field_idx += 1;
2120 field_start = pos + 1;
2121 }
2122
2123 if !has_delim {
2124 if !suppress {
2125 unsafe {
2126 buf_extend(buf, line);
2127 buf_push(buf, line_delim);
2128 }
2129 }
2130 return;
2131 }
2132
2133 if field_idx == target_idx {
2134 unsafe {
2135 buf_extend(
2136 buf,
2137 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2138 );
2139 buf_push(buf, line_delim);
2140 }
2141 } else {
2142 unsafe { buf_push(buf, line_delim) };
2143 }
2144}
2145
2146#[inline(always)]
2150fn extract_fields_to_buf(
2151 line: &[u8],
2152 delim: u8,
2153 ranges: &[Range],
2154 output_delim: &[u8],
2155 suppress: bool,
2156 max_field: usize,
2157 field_mask: u64,
2158 line_delim: u8,
2159 buf: &mut Vec<u8>,
2160 complement: bool,
2161) {
2162 let len = line.len();
2163
2164 if len == 0 {
2165 if !suppress {
2166 buf.push(line_delim);
2167 }
2168 return;
2169 }
2170
2171 let needed = len + output_delim.len() * 16 + 1;
2174 if buf.capacity() - buf.len() < needed {
2175 buf.reserve(needed);
2176 }
2177
2178 let base = line.as_ptr();
2179 let mut field_num: usize = 1;
2180 let mut field_start: usize = 0;
2181 let mut first_output = true;
2182 let mut has_delim = false;
2183
2184 for delim_pos in memchr_iter(delim, line) {
2186 has_delim = true;
2187
2188 if is_selected(field_num, field_mask, ranges, complement) {
2189 if !first_output {
2190 unsafe { buf_extend(buf, output_delim) };
2191 }
2192 unsafe {
2193 buf_extend(
2194 buf,
2195 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2196 )
2197 };
2198 first_output = false;
2199 }
2200
2201 field_num += 1;
2202 field_start = delim_pos + 1;
2203
2204 if field_num > max_field {
2205 break;
2206 }
2207 }
2208
2209 if (field_num <= max_field || complement)
2211 && has_delim
2212 && is_selected(field_num, field_mask, ranges, complement)
2213 {
2214 if !first_output {
2215 unsafe { buf_extend(buf, output_delim) };
2216 }
2217 unsafe {
2218 buf_extend(
2219 buf,
2220 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2221 )
2222 };
2223 first_output = false;
2224 }
2225
2226 if !first_output {
2227 unsafe { buf_push(buf, line_delim) };
2228 } else if !has_delim {
2229 if !suppress {
2230 unsafe {
2231 buf_extend(buf, line);
2232 buf_push(buf, line_delim);
2233 }
2234 }
2235 } else {
2236 unsafe { buf_push(buf, line_delim) };
2237 }
2238}
2239
2240fn process_bytes_from_start(
2247 data: &[u8],
2248 max_bytes: usize,
2249 line_delim: u8,
2250 out: &mut impl Write,
2251) -> io::Result<()> {
2252 if max_bytes > 0 && max_bytes < usize::MAX {
2257 let mut start = 0;
2258 let mut all_fit = true;
2259 for pos in memchr_iter(line_delim, data) {
2260 if pos - start > max_bytes {
2261 all_fit = false;
2262 break;
2263 }
2264 start = pos + 1;
2265 }
2266 if all_fit && start < data.len() && data.len() - start > max_bytes {
2268 all_fit = false;
2269 }
2270 if all_fit {
2271 if !data.is_empty() && data[data.len() - 1] == line_delim {
2273 return out.write_all(data);
2274 } else if !data.is_empty() {
2275 out.write_all(data)?;
2276 return out.write_all(&[line_delim]);
2277 }
2278 return Ok(());
2279 }
2280 }
2281
2282 if data.len() >= PARALLEL_THRESHOLD {
2283 let chunks = split_into_chunks(data, line_delim);
2284 let results: Vec<Vec<u8>> = chunks
2285 .par_iter()
2286 .map(|chunk| {
2287 let est_out = (chunk.len() / 4).max(max_bytes + 2);
2293 let mut buf = Vec::with_capacity(est_out.min(chunk.len()));
2294 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
2295 buf
2296 })
2297 .collect();
2298 let slices: Vec<IoSlice> = results
2300 .iter()
2301 .filter(|r| !r.is_empty())
2302 .map(|r| IoSlice::new(r))
2303 .collect();
2304 write_ioslices(out, &slices)?;
2305 } else {
2306 if max_bytes <= 512 {
2312 let est_out = (data.len() / 4).max(max_bytes + 2);
2315 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2316 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2317 if !buf.is_empty() {
2318 out.write_all(&buf)?;
2319 }
2320 } else {
2321 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2325 }
2326 }
2327 Ok(())
2328}
2329
2330#[inline]
2335fn bytes_from_start_zerocopy(
2336 data: &[u8],
2337 max_bytes: usize,
2338 line_delim: u8,
2339 out: &mut impl Write,
2340) -> io::Result<()> {
2341 let newline_buf: [u8; 1] = [line_delim];
2342 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2343 let mut start = 0;
2344 let mut run_start: usize = 0;
2345
2346 for pos in memchr_iter(line_delim, data) {
2347 let line_len = pos - start;
2348 if line_len > max_bytes {
2349 if run_start < start {
2351 iov.push(IoSlice::new(&data[run_start..start]));
2352 }
2353 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2354 iov.push(IoSlice::new(&newline_buf));
2355 run_start = pos + 1;
2356
2357 if iov.len() >= MAX_IOV - 2 {
2358 write_ioslices(out, &iov)?;
2359 iov.clear();
2360 }
2361 }
2362 start = pos + 1;
2363 }
2364 if start < data.len() {
2366 let line_len = data.len() - start;
2367 if line_len > max_bytes {
2368 if run_start < start {
2369 iov.push(IoSlice::new(&data[run_start..start]));
2370 }
2371 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2372 iov.push(IoSlice::new(&newline_buf));
2373 if !iov.is_empty() {
2374 write_ioslices(out, &iov)?;
2375 }
2376 return Ok(());
2377 }
2378 }
2379 if run_start < data.len() {
2381 iov.push(IoSlice::new(&data[run_start..]));
2382 if !data.is_empty() && *data.last().unwrap() != line_delim {
2383 iov.push(IoSlice::new(&newline_buf));
2384 }
2385 }
2386 if !iov.is_empty() {
2387 write_ioslices(out, &iov)?;
2388 }
2389 Ok(())
2390}
2391
2392#[inline]
2397fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2398 buf.reserve(data.len());
2401
2402 let src = data.as_ptr();
2403 let dst_base = buf.as_mut_ptr();
2404 let mut wp = buf.len();
2405 let mut start = 0;
2406
2407 for pos in memchr_iter(line_delim, data) {
2408 let line_len = pos - start;
2409 let take = line_len.min(max_bytes);
2410 unsafe {
2411 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2412 *dst_base.add(wp + take) = line_delim;
2413 }
2414 wp += take + 1;
2415 start = pos + 1;
2416 }
2417 if start < data.len() {
2419 let line_len = data.len() - start;
2420 let take = line_len.min(max_bytes);
2421 unsafe {
2422 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2423 *dst_base.add(wp + take) = line_delim;
2424 }
2425 wp += take + 1;
2426 }
2427 unsafe { buf.set_len(wp) };
2428}
2429
2430fn process_bytes_from_offset(
2432 data: &[u8],
2433 skip_bytes: usize,
2434 line_delim: u8,
2435 out: &mut impl Write,
2436) -> io::Result<()> {
2437 if data.len() >= PARALLEL_THRESHOLD {
2438 let chunks = split_into_chunks(data, line_delim);
2439 let results: Vec<Vec<u8>> = chunks
2440 .par_iter()
2441 .map(|chunk| {
2442 let mut buf = Vec::with_capacity(chunk.len());
2443 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
2444 buf
2445 })
2446 .collect();
2447 let slices: Vec<IoSlice> = results
2449 .iter()
2450 .filter(|r| !r.is_empty())
2451 .map(|r| IoSlice::new(r))
2452 .collect();
2453 write_ioslices(out, &slices)?;
2454 } else {
2455 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2457 }
2458 Ok(())
2459}
2460
2461#[inline]
2465fn bytes_from_offset_zerocopy(
2466 data: &[u8],
2467 skip_bytes: usize,
2468 line_delim: u8,
2469 out: &mut impl Write,
2470) -> io::Result<()> {
2471 let delim_buf = [line_delim];
2472 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2473
2474 let mut start = 0;
2475 for pos in memchr_iter(line_delim, data) {
2476 let line_len = pos - start;
2477 if line_len > skip_bytes {
2478 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2479 }
2480 iov.push(IoSlice::new(&delim_buf));
2481 if iov.len() >= MAX_IOV - 1 {
2483 write_ioslices(out, &iov)?;
2484 iov.clear();
2485 }
2486 start = pos + 1;
2487 }
2488 if start < data.len() {
2489 let line_len = data.len() - start;
2490 if line_len > skip_bytes {
2491 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2492 }
2493 iov.push(IoSlice::new(&delim_buf));
2494 }
2495 if !iov.is_empty() {
2496 write_ioslices(out, &iov)?;
2497 }
2498 Ok(())
2499}
2500
2501#[inline]
2504fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2505 buf.reserve(data.len());
2506
2507 let src = data.as_ptr();
2508 let dst_base = buf.as_mut_ptr();
2509 let mut wp = buf.len();
2510 let mut start = 0;
2511
2512 for pos in memchr_iter(line_delim, data) {
2513 let line_len = pos - start;
2514 if line_len > skip_bytes {
2515 let take = line_len - skip_bytes;
2516 unsafe {
2517 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2518 }
2519 wp += take;
2520 }
2521 unsafe {
2522 *dst_base.add(wp) = line_delim;
2523 }
2524 wp += 1;
2525 start = pos + 1;
2526 }
2527 if start < data.len() {
2528 let line_len = data.len() - start;
2529 if line_len > skip_bytes {
2530 let take = line_len - skip_bytes;
2531 unsafe {
2532 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2533 }
2534 wp += take;
2535 }
2536 unsafe {
2537 *dst_base.add(wp) = line_delim;
2538 }
2539 wp += 1;
2540 }
2541 unsafe { buf.set_len(wp) };
2542}
2543
2544fn process_bytes_mid_range(
2546 data: &[u8],
2547 start_byte: usize,
2548 end_byte: usize,
2549 line_delim: u8,
2550 out: &mut impl Write,
2551) -> io::Result<()> {
2552 let skip = start_byte.saturating_sub(1);
2553
2554 if data.len() >= PARALLEL_THRESHOLD {
2555 let chunks = split_into_chunks(data, line_delim);
2556 let results: Vec<Vec<u8>> = chunks
2557 .par_iter()
2558 .map(|chunk| {
2559 let mut buf = Vec::with_capacity(chunk.len());
2560 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, &mut buf);
2561 buf
2562 })
2563 .collect();
2564 let slices: Vec<IoSlice> = results
2565 .iter()
2566 .filter(|r| !r.is_empty())
2567 .map(|r| IoSlice::new(r))
2568 .collect();
2569 write_ioslices(out, &slices)?;
2570 } else {
2571 let mut buf = Vec::with_capacity(data.len());
2572 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2573 if !buf.is_empty() {
2574 out.write_all(&buf)?;
2575 }
2576 }
2577 Ok(())
2578}
2579
2580#[inline]
2584fn bytes_mid_range_chunk(
2585 data: &[u8],
2586 skip: usize,
2587 end_byte: usize,
2588 line_delim: u8,
2589 buf: &mut Vec<u8>,
2590) {
2591 buf.reserve(data.len());
2592
2593 let src = data.as_ptr();
2594 let dst_base = buf.as_mut_ptr();
2595 let mut wp = buf.len();
2596 let mut start = 0;
2597
2598 for pos in memchr_iter(line_delim, data) {
2599 let line_len = pos - start;
2600 if line_len > skip {
2601 let take_end = line_len.min(end_byte);
2602 let take = take_end - skip;
2603 unsafe {
2604 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2605 }
2606 wp += take;
2607 }
2608 unsafe {
2609 *dst_base.add(wp) = line_delim;
2610 }
2611 wp += 1;
2612 start = pos + 1;
2613 }
2614 if start < data.len() {
2615 let line_len = data.len() - start;
2616 if line_len > skip {
2617 let take_end = line_len.min(end_byte);
2618 let take = take_end - skip;
2619 unsafe {
2620 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2621 }
2622 wp += take;
2623 }
2624 unsafe {
2625 *dst_base.add(wp) = line_delim;
2626 }
2627 wp += 1;
2628 }
2629 unsafe { buf.set_len(wp) };
2630}
2631
2632fn process_bytes_complement_mid(
2634 data: &[u8],
2635 skip_start: usize,
2636 skip_end: usize,
2637 line_delim: u8,
2638 out: &mut impl Write,
2639) -> io::Result<()> {
2640 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2642 let chunks = split_into_chunks(data, line_delim);
2643 let results: Vec<Vec<u8>> = chunks
2644 .par_iter()
2645 .map(|chunk| {
2646 let mut buf = Vec::with_capacity(chunk.len());
2647 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, &mut buf);
2648 buf
2649 })
2650 .collect();
2651 let slices: Vec<IoSlice> = results
2652 .iter()
2653 .filter(|r| !r.is_empty())
2654 .map(|r| IoSlice::new(r))
2655 .collect();
2656 write_ioslices(out, &slices)?;
2657 } else {
2658 let mut buf = Vec::with_capacity(data.len());
2659 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
2660 if !buf.is_empty() {
2661 out.write_all(&buf)?;
2662 }
2663 }
2664 Ok(())
2665}
2666
2667#[inline]
2670fn bytes_complement_mid_chunk(
2671 data: &[u8],
2672 prefix_bytes: usize,
2673 skip_end: usize,
2674 line_delim: u8,
2675 buf: &mut Vec<u8>,
2676) {
2677 buf.reserve(data.len());
2678
2679 let src = data.as_ptr();
2680 let dst_base = buf.as_mut_ptr();
2681 let mut wp = buf.len();
2682 let mut start = 0;
2683
2684 for pos in memchr_iter(line_delim, data) {
2685 let line_len = pos - start;
2686 let take_prefix = prefix_bytes.min(line_len);
2688 if take_prefix > 0 {
2689 unsafe {
2690 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2691 }
2692 wp += take_prefix;
2693 }
2694 if line_len > skip_end {
2696 let suffix_len = line_len - skip_end;
2697 unsafe {
2698 std::ptr::copy_nonoverlapping(
2699 src.add(start + skip_end),
2700 dst_base.add(wp),
2701 suffix_len,
2702 );
2703 }
2704 wp += suffix_len;
2705 }
2706 unsafe {
2707 *dst_base.add(wp) = line_delim;
2708 }
2709 wp += 1;
2710 start = pos + 1;
2711 }
2712 if start < data.len() {
2713 let line_len = data.len() - start;
2714 let take_prefix = prefix_bytes.min(line_len);
2715 if take_prefix > 0 {
2716 unsafe {
2717 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2718 }
2719 wp += take_prefix;
2720 }
2721 if line_len > skip_end {
2722 let suffix_len = line_len - skip_end;
2723 unsafe {
2724 std::ptr::copy_nonoverlapping(
2725 src.add(start + skip_end),
2726 dst_base.add(wp),
2727 suffix_len,
2728 );
2729 }
2730 wp += suffix_len;
2731 }
2732 unsafe {
2733 *dst_base.add(wp) = line_delim;
2734 }
2735 wp += 1;
2736 }
2737 unsafe { buf.set_len(wp) };
2738}
2739
2740fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2742 let line_delim = cfg.line_delim;
2743 let ranges = cfg.ranges;
2744 let complement = cfg.complement;
2745 let output_delim = cfg.output_delim;
2746
2747 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
2749 let max_bytes = ranges[0].end;
2750 if max_bytes < usize::MAX {
2751 return process_bytes_from_start(data, max_bytes, line_delim, out);
2752 }
2753 }
2754
2755 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
2757 let skip_bytes = ranges[0].start.saturating_sub(1);
2758 if skip_bytes > 0 {
2759 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
2760 }
2761 }
2762
2763 if !complement
2765 && ranges.len() == 1
2766 && ranges[0].start > 1
2767 && ranges[0].end < usize::MAX
2768 && output_delim.is_empty()
2769 {
2770 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
2771 }
2772
2773 if complement
2775 && ranges.len() == 1
2776 && ranges[0].start == 1
2777 && ranges[0].end < usize::MAX
2778 && output_delim.is_empty()
2779 {
2780 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
2781 }
2782
2783 if complement
2785 && ranges.len() == 1
2786 && ranges[0].end == usize::MAX
2787 && ranges[0].start > 1
2788 && output_delim.is_empty()
2789 {
2790 let max_bytes = ranges[0].start - 1;
2791 return process_bytes_from_start(data, max_bytes, line_delim, out);
2792 }
2793
2794 if complement
2796 && ranges.len() == 1
2797 && ranges[0].start > 1
2798 && ranges[0].end < usize::MAX
2799 && output_delim.is_empty()
2800 {
2801 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
2802 }
2803
2804 if data.len() >= PARALLEL_THRESHOLD {
2805 let chunks = split_into_chunks(data, line_delim);
2806 let results: Vec<Vec<u8>> = chunks
2807 .par_iter()
2808 .map(|chunk| {
2809 let mut buf = Vec::with_capacity(chunk.len());
2810 process_bytes_chunk(
2811 chunk,
2812 ranges,
2813 complement,
2814 output_delim,
2815 line_delim,
2816 &mut buf,
2817 );
2818 buf
2819 })
2820 .collect();
2821 let slices: Vec<IoSlice> = results
2823 .iter()
2824 .filter(|r| !r.is_empty())
2825 .map(|r| IoSlice::new(r))
2826 .collect();
2827 write_ioslices(out, &slices)?;
2828 } else {
2829 let mut buf = Vec::with_capacity(data.len());
2830 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
2831 if !buf.is_empty() {
2832 out.write_all(&buf)?;
2833 }
2834 }
2835 Ok(())
2836}
2837
2838fn process_bytes_chunk(
2843 data: &[u8],
2844 ranges: &[Range],
2845 complement: bool,
2846 output_delim: &[u8],
2847 line_delim: u8,
2848 buf: &mut Vec<u8>,
2849) {
2850 buf.reserve(data.len());
2851 let base = data.as_ptr();
2852 let mut start = 0;
2853 for end_pos in memchr_iter(line_delim, data) {
2854 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2855 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2856 unsafe { buf_push(buf, line_delim) };
2857 start = end_pos + 1;
2858 }
2859 if start < data.len() {
2860 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2861 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2862 unsafe { buf_push(buf, line_delim) };
2863 }
2864}
2865
2866#[inline(always)]
2870fn cut_bytes_to_buf(
2871 line: &[u8],
2872 ranges: &[Range],
2873 complement: bool,
2874 output_delim: &[u8],
2875 buf: &mut Vec<u8>,
2876) {
2877 let len = line.len();
2878 let base = line.as_ptr();
2879 let mut first_range = true;
2880
2881 let needed = len + output_delim.len() * ranges.len() + 1;
2883 if buf.capacity() - buf.len() < needed {
2884 buf.reserve(needed);
2885 }
2886
2887 if complement {
2888 let mut pos: usize = 1;
2889 for r in ranges {
2890 let rs = r.start;
2891 let re = r.end.min(len);
2892 if pos < rs {
2893 if !first_range && !output_delim.is_empty() {
2894 unsafe { buf_extend(buf, output_delim) };
2895 }
2896 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
2897 first_range = false;
2898 }
2899 pos = re + 1;
2900 if pos > len {
2901 break;
2902 }
2903 }
2904 if pos <= len {
2905 if !first_range && !output_delim.is_empty() {
2906 unsafe { buf_extend(buf, output_delim) };
2907 }
2908 unsafe {
2909 buf_extend(
2910 buf,
2911 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
2912 )
2913 };
2914 }
2915 } else if output_delim.is_empty() && ranges.len() == 1 {
2916 let start = ranges[0].start.saturating_sub(1);
2918 let end = ranges[0].end.min(len);
2919 if start < len {
2920 unsafe {
2921 buf_extend(
2922 buf,
2923 std::slice::from_raw_parts(base.add(start), end - start),
2924 )
2925 };
2926 }
2927 } else {
2928 for r in ranges {
2929 let start = r.start.saturating_sub(1);
2930 let end = r.end.min(len);
2931 if start >= len {
2932 break;
2933 }
2934 if !first_range && !output_delim.is_empty() {
2935 unsafe { buf_extend(buf, output_delim) };
2936 }
2937 unsafe {
2938 buf_extend(
2939 buf,
2940 std::slice::from_raw_parts(base.add(start), end - start),
2941 )
2942 };
2943 first_range = false;
2944 }
2945 }
2946}
2947
2948#[inline]
2952pub fn cut_fields(
2953 line: &[u8],
2954 delim: u8,
2955 ranges: &[Range],
2956 complement: bool,
2957 output_delim: &[u8],
2958 suppress_no_delim: bool,
2959 out: &mut impl Write,
2960) -> io::Result<bool> {
2961 if memchr::memchr(delim, line).is_none() {
2962 if !suppress_no_delim {
2963 out.write_all(line)?;
2964 return Ok(true);
2965 }
2966 return Ok(false);
2967 }
2968
2969 let mut field_num: usize = 1;
2970 let mut field_start: usize = 0;
2971 let mut first_output = true;
2972
2973 for delim_pos in memchr_iter(delim, line) {
2974 let selected = in_ranges(ranges, field_num) != complement;
2975 if selected {
2976 if !first_output {
2977 out.write_all(output_delim)?;
2978 }
2979 out.write_all(&line[field_start..delim_pos])?;
2980 first_output = false;
2981 }
2982 field_start = delim_pos + 1;
2983 field_num += 1;
2984 }
2985
2986 let selected = in_ranges(ranges, field_num) != complement;
2987 if selected {
2988 if !first_output {
2989 out.write_all(output_delim)?;
2990 }
2991 out.write_all(&line[field_start..])?;
2992 }
2993
2994 Ok(true)
2995}
2996
2997#[inline]
2999pub fn cut_bytes(
3000 line: &[u8],
3001 ranges: &[Range],
3002 complement: bool,
3003 output_delim: &[u8],
3004 out: &mut impl Write,
3005) -> io::Result<bool> {
3006 let mut first_range = true;
3007
3008 if complement {
3009 let len = line.len();
3010 let mut comp_ranges = Vec::new();
3011 let mut pos: usize = 1;
3012 for r in ranges {
3013 let rs = r.start;
3014 let re = r.end.min(len);
3015 if pos < rs {
3016 comp_ranges.push((pos, rs - 1));
3017 }
3018 pos = re + 1;
3019 if pos > len {
3020 break;
3021 }
3022 }
3023 if pos <= len {
3024 comp_ranges.push((pos, len));
3025 }
3026 for &(s, e) in &comp_ranges {
3027 if !first_range && !output_delim.is_empty() {
3028 out.write_all(output_delim)?;
3029 }
3030 out.write_all(&line[s - 1..e])?;
3031 first_range = false;
3032 }
3033 } else {
3034 for r in ranges {
3035 let start = r.start.saturating_sub(1);
3036 let end = r.end.min(line.len());
3037 if start >= line.len() {
3038 break;
3039 }
3040 if !first_range && !output_delim.is_empty() {
3041 out.write_all(output_delim)?;
3042 }
3043 out.write_all(&line[start..end])?;
3044 first_range = false;
3045 }
3046 }
3047 Ok(true)
3048}
3049
3050pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3052 match cfg.mode {
3053 CutMode::Fields => process_fields_fast(data, cfg, out),
3054 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3055 }
3056}
3057
3058pub fn process_cut_reader<R: BufRead>(
3063 mut reader: R,
3064 cfg: &CutConfig,
3065 out: &mut impl Write,
3066) -> io::Result<()> {
3067 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3069
3070 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3073
3074 loop {
3075 buf.reserve(CHUNK_SIZE);
3077 let read_start = buf.len();
3078 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3079 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3080 buf.truncate(read_start + n);
3081
3082 if buf.is_empty() {
3083 break;
3084 }
3085
3086 if n == 0 {
3087 process_cut_data(&buf, cfg, out)?;
3089 break;
3090 }
3091
3092 let process_end = match memchr::memrchr(line_delim, &buf) {
3094 Some(pos) => pos + 1,
3095 None => {
3096 continue;
3098 }
3099 };
3100
3101 process_cut_data(&buf[..process_end], cfg, out)?;
3103
3104 let leftover_len = buf.len() - process_end;
3106 if leftover_len > 0 {
3107 buf.copy_within(process_end.., 0);
3108 }
3109 buf.truncate(leftover_len);
3110 }
3111
3112 Ok(())
3113}
3114
3115#[inline]
3117fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3118 let n = reader.read(buf)?;
3119 if n == buf.len() || n == 0 {
3120 return Ok(n);
3121 }
3122 let mut total = n;
3124 while total < buf.len() {
3125 match reader.read(&mut buf[total..]) {
3126 Ok(0) => break,
3127 Ok(n) => total += n,
3128 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3129 Err(e) => return Err(e),
3130 }
3131 }
3132 Ok(total)
3133}
3134
3135#[derive(Debug, Clone, Copy, PartialEq)]
3137pub enum CutMode {
3138 Bytes,
3139 Characters,
3140 Fields,
3141}