1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, IoSlice, Write};
4
5const PARALLEL_THRESHOLD: usize = 512 * 1024;
10
11const MAX_IOV: usize = 1024;
13
14pub struct CutConfig<'a> {
16 pub mode: CutMode,
17 pub ranges: &'a [Range],
18 pub complement: bool,
19 pub delim: u8,
20 pub output_delim: &'a [u8],
21 pub suppress_no_delim: bool,
22 pub line_delim: u8,
23}
24
25#[derive(Debug, Clone)]
27pub struct Range {
28 pub start: usize, pub end: usize, }
31
32pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
35 let mut ranges = Vec::new();
36
37 for part in spec.split(',') {
38 let part = part.trim();
39 if part.is_empty() {
40 continue;
41 }
42
43 if let Some(idx) = part.find('-') {
44 let left = &part[..idx];
45 let right = &part[idx + 1..];
46
47 let start = if left.is_empty() {
48 1
49 } else {
50 left.parse::<usize>()
51 .map_err(|_| format!("invalid range: '{}'", part))?
52 };
53
54 let end = if right.is_empty() {
55 usize::MAX
56 } else {
57 right
58 .parse::<usize>()
59 .map_err(|_| format!("invalid range: '{}'", part))?
60 };
61
62 if start == 0 {
63 return Err("fields and positions are numbered from 1".to_string());
64 }
65 if start > end {
66 return Err(format!("invalid decreasing range: '{}'", part));
67 }
68
69 ranges.push(Range { start, end });
70 } else {
71 let n = part
72 .parse::<usize>()
73 .map_err(|_| format!("invalid field: '{}'", part))?;
74 if n == 0 {
75 return Err("fields and positions are numbered from 1".to_string());
76 }
77 ranges.push(Range { start: n, end: n });
78 }
79 }
80
81 if ranges.is_empty() {
82 return Err("you must specify a list of bytes, characters, or fields".to_string());
83 }
84
85 ranges.sort_by_key(|r| (r.start, r.end));
87 let mut merged = vec![ranges[0].clone()];
88 for r in &ranges[1..] {
89 let last = merged.last_mut().unwrap();
90 if r.start <= last.end.saturating_add(1) {
91 last.end = last.end.max(r.end);
92 } else {
93 merged.push(r.clone());
94 }
95 }
96
97 Ok(merged)
98}
99
100#[inline(always)]
103fn in_ranges(ranges: &[Range], pos: usize) -> bool {
104 for r in ranges {
105 if pos < r.start {
106 return false;
107 }
108 if pos <= r.end {
109 return true;
110 }
111 }
112 false
113}
114
115#[inline]
118fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
119 let mut mask: u64 = 0;
120 for i in 1..=64u32 {
121 let in_range = in_ranges(ranges, i as usize);
122 if in_range != complement {
123 mask |= 1u64 << (i - 1);
124 }
125 }
126 mask
127}
128
129#[inline(always)]
131fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
132 if field_num <= 64 {
133 (mask >> (field_num - 1)) & 1 == 1
134 } else {
135 in_ranges(ranges, field_num) != complement
136 }
137}
138
139#[inline(always)]
144unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
145 unsafe {
146 let len = buf.len();
147 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
148 buf.set_len(len + data.len());
149 }
150}
151
152#[inline(always)]
155unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
156 unsafe {
157 let len = buf.len();
158 *buf.as_mut_ptr().add(len) = b;
159 buf.set_len(len + 1);
160 }
161}
162
163#[inline]
166fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
167 if slices.is_empty() {
168 return Ok(());
169 }
170 for batch in slices.chunks(MAX_IOV) {
171 let total: usize = batch.iter().map(|s| s.len()).sum();
172 match out.write_vectored(batch) {
173 Ok(n) if n >= total => continue,
174 Ok(mut written) => {
175 for slice in batch {
177 let slen = slice.len();
178 if written >= slen {
179 written -= slen;
180 continue;
181 }
182 if written > 0 {
183 out.write_all(&slice[written..])?;
184 written = 0;
185 } else {
186 out.write_all(slice)?;
187 }
188 }
189 }
190 Err(e) => return Err(e),
191 }
192 }
193 Ok(())
194}
195
196fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
200 let num_threads = rayon::current_num_threads().max(1);
201 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
202 return vec![data];
203 }
204
205 let chunk_size = data.len() / num_threads;
206 let mut chunks = Vec::with_capacity(num_threads);
207 let mut pos = 0;
208
209 for _ in 0..num_threads - 1 {
210 let target = pos + chunk_size;
211 if target >= data.len() {
212 break;
213 }
214 let boundary = memchr::memchr(line_delim, &data[target..])
215 .map(|p| target + p + 1)
216 .unwrap_or(data.len());
217 if boundary > pos {
218 chunks.push(&data[pos..boundary]);
219 }
220 pos = boundary;
221 }
222
223 if pos < data.len() {
224 chunks.push(&data[pos..]);
225 }
226
227 chunks
228}
229
230fn process_fields_multi_select(
237 data: &[u8],
238 delim: u8,
239 line_delim: u8,
240 ranges: &[Range],
241 suppress: bool,
242 out: &mut impl Write,
243) -> io::Result<()> {
244 let max_field = ranges.last().map_or(0, |r| r.end);
245
246 if data.len() >= PARALLEL_THRESHOLD {
247 let chunks = split_into_chunks(data, line_delim);
248 let results: Vec<Vec<u8>> = chunks
249 .par_iter()
250 .map(|chunk| {
251 let mut buf = Vec::with_capacity(chunk.len());
252 multi_select_chunk(
253 chunk, delim, line_delim, ranges, max_field, suppress, &mut buf,
254 );
255 buf
256 })
257 .collect();
258 let slices: Vec<IoSlice> = results
259 .iter()
260 .filter(|r| !r.is_empty())
261 .map(|r| IoSlice::new(r))
262 .collect();
263 write_ioslices(out, &slices)?;
264 } else {
265 let mut buf = Vec::with_capacity(data.len());
266 multi_select_chunk(
267 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
268 );
269 if !buf.is_empty() {
270 out.write_all(&buf)?;
271 }
272 }
273 Ok(())
274}
275
276fn multi_select_chunk(
282 data: &[u8],
283 delim: u8,
284 line_delim: u8,
285 ranges: &[Range],
286 max_field: usize,
287 suppress: bool,
288 buf: &mut Vec<u8>,
289) {
290 if delim == line_delim {
292 buf.reserve(data.len());
293 let base = data.as_ptr();
294 let mut start = 0;
295 for end_pos in memchr_iter(line_delim, data) {
296 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
297 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
298 start = end_pos + 1;
299 }
300 if start < data.len() {
301 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
302 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
303 }
304 return;
305 }
306
307 buf.reserve(data.len());
308 let base = data.as_ptr();
309 let data_len = data.len();
310
311 let mut line_start: usize = 0;
313 let mut delim_pos = [0usize; 64];
314 let mut num_delims: usize = 0;
315 let max_delims = max_field.min(64);
316 let mut at_max = false;
317
318 for pos in memchr::memchr2_iter(delim, line_delim, data) {
320 let byte = unsafe { *base.add(pos) };
321
322 if byte == line_delim {
323 let line_len = pos - line_start;
325 if num_delims == 0 {
326 if !suppress {
328 unsafe {
329 buf_extend(
330 buf,
331 std::slice::from_raw_parts(base.add(line_start), line_len),
332 );
333 buf_push(buf, line_delim);
334 }
335 }
336 } else {
337 let total_fields = num_delims + 1;
339 let mut first_output = true;
340
341 for r in ranges {
342 let range_start = r.start;
343 let range_end = r.end.min(total_fields);
344 if range_start > total_fields {
345 break;
346 }
347 for field_num in range_start..=range_end {
348 if field_num > total_fields {
349 break;
350 }
351
352 let field_start = if field_num == 1 {
353 line_start
354 } else if field_num - 2 < num_delims {
355 delim_pos[field_num - 2] + 1
356 } else {
357 continue;
358 };
359 let field_end = if field_num <= num_delims {
360 delim_pos[field_num - 1]
361 } else {
362 pos
363 };
364
365 if !first_output {
366 unsafe { buf_push(buf, delim) };
367 }
368 unsafe {
369 buf_extend(
370 buf,
371 std::slice::from_raw_parts(
372 base.add(field_start),
373 field_end - field_start,
374 ),
375 );
376 }
377 first_output = false;
378 }
379 }
380
381 unsafe { buf_push(buf, line_delim) };
382 }
383
384 line_start = pos + 1;
386 num_delims = 0;
387 at_max = false;
388 } else {
389 if !at_max && num_delims < max_delims {
391 delim_pos[num_delims] = pos;
392 num_delims += 1;
393 if num_delims >= max_delims {
394 at_max = true;
395 }
396 }
397 }
398 }
399
400 if line_start < data_len {
402 if num_delims == 0 {
403 if !suppress {
404 unsafe {
405 buf_extend(
406 buf,
407 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
408 );
409 buf_push(buf, line_delim);
410 }
411 }
412 } else {
413 let total_fields = num_delims + 1;
414 let mut first_output = true;
415
416 for r in ranges {
417 let range_start = r.start;
418 let range_end = r.end.min(total_fields);
419 if range_start > total_fields {
420 break;
421 }
422 for field_num in range_start..=range_end {
423 if field_num > total_fields {
424 break;
425 }
426
427 let field_start = if field_num == 1 {
428 line_start
429 } else if field_num - 2 < num_delims {
430 delim_pos[field_num - 2] + 1
431 } else {
432 continue;
433 };
434 let field_end = if field_num <= num_delims {
435 delim_pos[field_num - 1]
436 } else {
437 data_len
438 };
439
440 if !first_output {
441 unsafe { buf_push(buf, delim) };
442 }
443 unsafe {
444 buf_extend(
445 buf,
446 std::slice::from_raw_parts(
447 base.add(field_start),
448 field_end - field_start,
449 ),
450 );
451 }
452 first_output = false;
453 }
454 }
455
456 unsafe { buf_push(buf, line_delim) };
457 }
458 }
459}
460
461#[inline(always)]
466fn multi_select_line(
467 line: &[u8],
468 delim: u8,
469 line_delim: u8,
470 ranges: &[Range],
471 max_field: usize,
472 suppress: bool,
473 buf: &mut Vec<u8>,
474) {
475 let len = line.len();
476 if len == 0 {
477 if !suppress {
478 unsafe { buf_push(buf, line_delim) };
479 }
480 return;
481 }
482
483 let base = line.as_ptr();
485
486 let mut delim_pos = [0usize; 64];
489 let mut num_delims: usize = 0;
490 let max_delims = max_field.min(64);
491
492 for pos in memchr_iter(delim, line) {
493 if num_delims < max_delims {
494 delim_pos[num_delims] = pos;
495 num_delims += 1;
496 if num_delims >= max_delims {
497 break;
498 }
499 }
500 }
501
502 if num_delims == 0 {
503 if !suppress {
504 unsafe {
505 buf_extend(buf, line);
506 buf_push(buf, line_delim);
507 }
508 }
509 return;
510 }
511
512 let total_fields = num_delims + 1;
516 let mut first_output = true;
517
518 for r in ranges {
519 let range_start = r.start;
520 let range_end = r.end.min(total_fields);
521 if range_start > total_fields {
522 break;
523 }
524 for field_num in range_start..=range_end {
525 if field_num > total_fields {
526 break;
527 }
528
529 let field_start = if field_num == 1 {
530 0
531 } else if field_num - 2 < num_delims {
532 delim_pos[field_num - 2] + 1
533 } else {
534 continue;
535 };
536 let field_end = if field_num <= num_delims {
537 delim_pos[field_num - 1]
538 } else {
539 len
540 };
541
542 if !first_output {
543 unsafe { buf_push(buf, delim) };
544 }
545 unsafe {
546 buf_extend(
547 buf,
548 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
549 );
550 }
551 first_output = false;
552 }
553 }
554
555 unsafe { buf_push(buf, line_delim) };
556}
557
558fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
562 let delim = cfg.delim;
563 let line_delim = cfg.line_delim;
564 let ranges = cfg.ranges;
565 let complement = cfg.complement;
566 let output_delim = cfg.output_delim;
567 let suppress = cfg.suppress_no_delim;
568
569 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
577 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
578 }
579
580 if complement
582 && ranges.len() == 1
583 && output_delim.len() == 1
584 && output_delim[0] == delim
585 && ranges[0].start == ranges[0].end
586 {
587 return process_complement_single_field(
588 data,
589 delim,
590 line_delim,
591 ranges[0].start,
592 suppress,
593 out,
594 );
595 }
596
597 if complement
600 && ranges.len() == 1
601 && ranges[0].start > 1
602 && ranges[0].end < usize::MAX
603 && output_delim.len() == 1
604 && output_delim[0] == delim
605 {
606 return process_complement_range(
607 data,
608 delim,
609 line_delim,
610 ranges[0].start,
611 ranges[0].end,
612 suppress,
613 out,
614 );
615 }
616
617 if !complement
619 && ranges.len() == 1
620 && ranges[0].start == 1
621 && output_delim.len() == 1
622 && output_delim[0] == delim
623 && ranges[0].end < usize::MAX
624 {
625 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
626 }
627
628 if !complement
630 && ranges.len() == 1
631 && ranges[0].end == usize::MAX
632 && ranges[0].start > 1
633 && output_delim.len() == 1
634 && output_delim[0] == delim
635 {
636 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
637 }
638
639 if !complement
641 && ranges.len() == 1
642 && ranges[0].start > 1
643 && ranges[0].end < usize::MAX
644 && output_delim.len() == 1
645 && output_delim[0] == delim
646 {
647 return process_fields_mid_range(
648 data,
649 delim,
650 line_delim,
651 ranges[0].start,
652 ranges[0].end,
653 suppress,
654 out,
655 );
656 }
657
658 if !complement
664 && ranges.len() > 1
665 && ranges.last().map_or(false, |r| r.end < usize::MAX)
666 && output_delim.len() == 1
667 && output_delim[0] == delim
668 && delim != line_delim
669 {
670 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
671 }
672
673 let max_field = if complement {
675 usize::MAX
676 } else {
677 ranges.last().map(|r| r.end).unwrap_or(0)
678 };
679 let field_mask = compute_field_mask(ranges, complement);
680
681 if data.len() >= PARALLEL_THRESHOLD {
682 let chunks = split_into_chunks(data, line_delim);
683 let results: Vec<Vec<u8>> = chunks
684 .par_iter()
685 .map(|chunk| {
686 let mut buf = Vec::with_capacity(chunk.len());
687 process_fields_chunk(
688 chunk,
689 delim,
690 ranges,
691 output_delim,
692 suppress,
693 max_field,
694 field_mask,
695 line_delim,
696 complement,
697 &mut buf,
698 );
699 buf
700 })
701 .collect();
702 let slices: Vec<IoSlice> = results
704 .iter()
705 .filter(|r| !r.is_empty())
706 .map(|r| IoSlice::new(r))
707 .collect();
708 write_ioslices(out, &slices)?;
709 } else {
710 let mut buf = Vec::with_capacity(data.len());
711 process_fields_chunk(
712 data,
713 delim,
714 ranges,
715 output_delim,
716 suppress,
717 max_field,
718 field_mask,
719 line_delim,
720 complement,
721 &mut buf,
722 );
723 if !buf.is_empty() {
724 out.write_all(&buf)?;
725 }
726 }
727 Ok(())
728}
729
730fn process_fields_chunk(
735 data: &[u8],
736 delim: u8,
737 ranges: &[Range],
738 output_delim: &[u8],
739 suppress: bool,
740 max_field: usize,
741 field_mask: u64,
742 line_delim: u8,
743 complement: bool,
744 buf: &mut Vec<u8>,
745) {
746 if delim != line_delim && max_field < usize::MAX && !complement {
753 buf.reserve(data.len());
754 let mut start = 0;
755 for end_pos in memchr_iter(line_delim, data) {
756 let line = &data[start..end_pos];
757 extract_fields_to_buf(
758 line,
759 delim,
760 ranges,
761 output_delim,
762 suppress,
763 max_field,
764 field_mask,
765 line_delim,
766 buf,
767 complement,
768 );
769 start = end_pos + 1;
770 }
771 if start < data.len() {
772 extract_fields_to_buf(
773 &data[start..],
774 delim,
775 ranges,
776 output_delim,
777 suppress,
778 max_field,
779 field_mask,
780 line_delim,
781 buf,
782 complement,
783 );
784 }
785 return;
786 }
787
788 if delim != line_delim {
792 buf.reserve(data.len());
793
794 let data_len = data.len();
795 let base = data.as_ptr();
796 let mut line_start: usize = 0;
797 let mut field_start: usize = 0;
798 let mut field_num: usize = 1;
799 let mut first_output = true;
800 let mut has_delim = false;
801
802 for pos in memchr::memchr2_iter(delim, line_delim, data) {
803 let byte = unsafe { *base.add(pos) };
804
805 if byte == line_delim {
806 if (field_num <= max_field || complement)
808 && has_delim
809 && is_selected(field_num, field_mask, ranges, complement)
810 {
811 if !first_output {
812 unsafe { buf_extend(buf, output_delim) };
813 }
814 unsafe {
815 buf_extend(
816 buf,
817 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
818 )
819 };
820 first_output = false;
821 }
822
823 if !first_output {
824 unsafe { buf_push(buf, line_delim) };
825 } else if !has_delim {
826 if !suppress {
827 unsafe {
828 buf_extend(
829 buf,
830 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
831 );
832 buf_push(buf, line_delim);
833 }
834 }
835 } else {
836 unsafe { buf_push(buf, line_delim) };
837 }
838
839 line_start = pos + 1;
841 field_start = pos + 1;
842 field_num = 1;
843 first_output = true;
844 has_delim = false;
845 } else {
846 has_delim = true;
848
849 if is_selected(field_num, field_mask, ranges, complement) {
850 if !first_output {
851 unsafe { buf_extend(buf, output_delim) };
852 }
853 unsafe {
854 buf_extend(
855 buf,
856 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
857 )
858 };
859 first_output = false;
860 }
861
862 field_num += 1;
863 field_start = pos + 1;
864 }
865 }
866
867 if line_start < data_len {
869 if line_start < data_len {
870 if (field_num <= max_field || complement)
871 && has_delim
872 && is_selected(field_num, field_mask, ranges, complement)
873 {
874 if !first_output {
875 unsafe { buf_extend(buf, output_delim) };
876 }
877 unsafe {
878 buf_extend(
879 buf,
880 std::slice::from_raw_parts(
881 base.add(field_start),
882 data_len - field_start,
883 ),
884 )
885 };
886 first_output = false;
887 }
888
889 if !first_output {
890 unsafe { buf_push(buf, line_delim) };
891 } else if !has_delim {
892 if !suppress {
893 unsafe {
894 buf_extend(
895 buf,
896 std::slice::from_raw_parts(
897 base.add(line_start),
898 data_len - line_start,
899 ),
900 );
901 buf_push(buf, line_delim);
902 }
903 }
904 } else {
905 unsafe { buf_push(buf, line_delim) };
906 }
907 }
908 }
909
910 return;
911 }
912
913 let mut start = 0;
915 for end_pos in memchr_iter(line_delim, data) {
916 let line = &data[start..end_pos];
917 extract_fields_to_buf(
918 line,
919 delim,
920 ranges,
921 output_delim,
922 suppress,
923 max_field,
924 field_mask,
925 line_delim,
926 buf,
927 complement,
928 );
929 start = end_pos + 1;
930 }
931 if start < data.len() {
932 extract_fields_to_buf(
933 &data[start..],
934 delim,
935 ranges,
936 output_delim,
937 suppress,
938 max_field,
939 field_mask,
940 line_delim,
941 buf,
942 complement,
943 );
944 }
945}
946
947fn process_single_field(
953 data: &[u8],
954 delim: u8,
955 line_delim: u8,
956 target: usize,
957 suppress: bool,
958 out: &mut impl Write,
959) -> io::Result<()> {
960 let target_idx = target - 1;
961
962 if delim != line_delim {
964 if data.len() >= PARALLEL_THRESHOLD {
965 let chunks = split_into_chunks(data, line_delim);
966 let results: Vec<Vec<u8>> = chunks
967 .par_iter()
968 .map(|chunk| {
969 let mut buf = Vec::with_capacity(chunk.len());
970 process_nth_field_combined(
971 chunk, delim, line_delim, target_idx, suppress, &mut buf,
972 );
973 buf
974 })
975 .collect();
976 let slices: Vec<IoSlice> = results
978 .iter()
979 .filter(|r| !r.is_empty())
980 .map(|r| IoSlice::new(r))
981 .collect();
982 write_ioslices(out, &slices)?;
983 } else if target_idx == 0 && !suppress {
984 single_field1_zerocopy(data, delim, line_delim, out)?;
989 } else {
990 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
995 process_nth_field_combined(data, delim, line_delim, target_idx, suppress, &mut buf);
996 if !buf.is_empty() {
997 out.write_all(&buf)?;
998 }
999 }
1000 return Ok(());
1001 }
1002
1003 if data.len() >= PARALLEL_THRESHOLD {
1005 let chunks = split_into_chunks(data, line_delim);
1006 let results: Vec<Vec<u8>> = chunks
1007 .par_iter()
1008 .map(|chunk| {
1009 let mut buf = Vec::with_capacity(chunk.len() / 4);
1010 process_single_field_chunk(
1011 chunk, delim, target_idx, line_delim, suppress, &mut buf,
1012 );
1013 buf
1014 })
1015 .collect();
1016 let slices: Vec<IoSlice> = results
1018 .iter()
1019 .filter(|r| !r.is_empty())
1020 .map(|r| IoSlice::new(r))
1021 .collect();
1022 write_ioslices(out, &slices)?;
1023 } else {
1024 let mut buf = Vec::with_capacity(data.len() / 4);
1025 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1026 if !buf.is_empty() {
1027 out.write_all(&buf)?;
1028 }
1029 }
1030 Ok(())
1031}
1032
1033fn process_complement_range(
1036 data: &[u8],
1037 delim: u8,
1038 line_delim: u8,
1039 skip_start: usize,
1040 skip_end: usize,
1041 suppress: bool,
1042 out: &mut impl Write,
1043) -> io::Result<()> {
1044 if data.len() >= PARALLEL_THRESHOLD {
1045 let chunks = split_into_chunks(data, line_delim);
1046 let results: Vec<Vec<u8>> = chunks
1047 .par_iter()
1048 .map(|chunk| {
1049 let mut buf = Vec::with_capacity(chunk.len());
1050 complement_range_chunk(
1051 chunk, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1052 );
1053 buf
1054 })
1055 .collect();
1056 let slices: Vec<IoSlice> = results
1057 .iter()
1058 .filter(|r| !r.is_empty())
1059 .map(|r| IoSlice::new(r))
1060 .collect();
1061 write_ioslices(out, &slices)?;
1062 } else {
1063 let mut buf = Vec::with_capacity(data.len());
1064 complement_range_chunk(
1065 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1066 );
1067 if !buf.is_empty() {
1068 out.write_all(&buf)?;
1069 }
1070 }
1071 Ok(())
1072}
1073
1074fn complement_range_chunk(
1076 data: &[u8],
1077 delim: u8,
1078 skip_start: usize,
1079 skip_end: usize,
1080 line_delim: u8,
1081 suppress: bool,
1082 buf: &mut Vec<u8>,
1083) {
1084 let mut start = 0;
1085 for end_pos in memchr_iter(line_delim, data) {
1086 let line = &data[start..end_pos];
1087 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1088 start = end_pos + 1;
1089 }
1090 if start < data.len() {
1091 complement_range_line(
1092 &data[start..],
1093 delim,
1094 skip_start,
1095 skip_end,
1096 line_delim,
1097 suppress,
1098 buf,
1099 );
1100 }
1101}
1102
1103#[inline(always)]
1110fn complement_range_line(
1111 line: &[u8],
1112 delim: u8,
1113 skip_start: usize,
1114 skip_end: usize,
1115 line_delim: u8,
1116 suppress: bool,
1117 buf: &mut Vec<u8>,
1118) {
1119 let len = line.len();
1120 if len == 0 {
1121 if !suppress {
1122 buf.push(line_delim);
1123 }
1124 return;
1125 }
1126
1127 buf.reserve(len + 1);
1128 let base = line.as_ptr();
1129
1130 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1140
1141 let mut delim_count: usize = 0;
1143 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1147 delim_count += 1;
1148 if delim_count == need_prefix_delims {
1149 prefix_end_pos = pos;
1150 }
1151 if delim_count == total_need {
1152 suffix_start_pos = pos + 1;
1153 break;
1154 }
1155 }
1156
1157 if delim_count == 0 {
1158 if !suppress {
1160 unsafe {
1161 buf_extend(buf, line);
1162 buf_push(buf, line_delim);
1163 }
1164 }
1165 return;
1166 }
1167
1168 if delim_count < need_prefix_delims {
1174 unsafe {
1176 buf_extend(buf, line);
1177 buf_push(buf, line_delim);
1178 }
1179 return;
1180 }
1181
1182 let has_prefix = need_prefix_delims > 0;
1183 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1184
1185 if has_prefix && has_suffix {
1186 unsafe {
1188 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1189 buf_push(buf, delim);
1190 buf_extend(
1191 buf,
1192 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1193 );
1194 buf_push(buf, line_delim);
1195 }
1196 } else if has_prefix {
1197 unsafe {
1199 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1200 buf_push(buf, line_delim);
1201 }
1202 } else if has_suffix {
1203 unsafe {
1205 buf_extend(
1206 buf,
1207 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1208 );
1209 buf_push(buf, line_delim);
1210 }
1211 } else {
1212 unsafe { buf_push(buf, line_delim) };
1214 }
1215}
1216
1217fn process_complement_single_field(
1219 data: &[u8],
1220 delim: u8,
1221 line_delim: u8,
1222 skip_field: usize,
1223 suppress: bool,
1224 out: &mut impl Write,
1225) -> io::Result<()> {
1226 let skip_idx = skip_field - 1;
1227
1228 if data.len() >= PARALLEL_THRESHOLD {
1229 let chunks = split_into_chunks(data, line_delim);
1230 let results: Vec<Vec<u8>> = chunks
1231 .par_iter()
1232 .map(|chunk| {
1233 let mut buf = Vec::with_capacity(chunk.len());
1234 complement_single_field_chunk(
1235 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
1236 );
1237 buf
1238 })
1239 .collect();
1240 let slices: Vec<IoSlice> = results
1242 .iter()
1243 .filter(|r| !r.is_empty())
1244 .map(|r| IoSlice::new(r))
1245 .collect();
1246 write_ioslices(out, &slices)?;
1247 } else {
1248 let mut buf = Vec::with_capacity(data.len());
1249 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1250 if !buf.is_empty() {
1251 out.write_all(&buf)?;
1252 }
1253 }
1254 Ok(())
1255}
1256
1257fn complement_single_field_chunk(
1259 data: &[u8],
1260 delim: u8,
1261 skip_idx: usize,
1262 line_delim: u8,
1263 suppress: bool,
1264 buf: &mut Vec<u8>,
1265) {
1266 let mut start = 0;
1267 for end_pos in memchr_iter(line_delim, data) {
1268 let line = &data[start..end_pos];
1269 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1270 start = end_pos + 1;
1271 }
1272 if start < data.len() {
1273 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
1274 }
1275}
1276
1277#[inline(always)]
1282fn complement_single_field_line(
1283 line: &[u8],
1284 delim: u8,
1285 skip_idx: usize,
1286 line_delim: u8,
1287 suppress: bool,
1288 buf: &mut Vec<u8>,
1289) {
1290 let len = line.len();
1291 if len == 0 {
1292 if !suppress {
1293 buf.push(line_delim);
1294 }
1295 return;
1296 }
1297
1298 buf.reserve(len + 1);
1299 let base = line.as_ptr();
1300
1301 let need_before = skip_idx; let need_total = skip_idx + 1; let mut delim_count: usize = 0;
1310 let mut skip_start_pos: usize = 0; let mut skip_end_pos: usize = len; let mut found_end = false;
1313
1314 for pos in memchr_iter(delim, line) {
1315 delim_count += 1;
1316 if delim_count == need_before {
1317 skip_start_pos = pos + 1;
1318 }
1319 if delim_count == need_total {
1320 skip_end_pos = pos;
1321 found_end = true;
1322 break;
1323 }
1324 }
1325
1326 if delim_count == 0 {
1327 if !suppress {
1329 unsafe {
1330 buf_extend(buf, line);
1331 buf_push(buf, line_delim);
1332 }
1333 }
1334 return;
1335 }
1336
1337 if delim_count < need_before {
1339 unsafe {
1340 buf_extend(buf, line);
1341 buf_push(buf, line_delim);
1342 }
1343 return;
1344 }
1345
1346 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1349 let has_suffix = found_end && skip_end_pos < len;
1350
1351 if has_prefix && has_suffix {
1352 unsafe {
1355 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1356 buf_push(buf, delim);
1357 buf_extend(
1358 buf,
1359 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1360 );
1361 buf_push(buf, line_delim);
1362 }
1363 } else if has_prefix {
1364 unsafe {
1366 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1367 buf_push(buf, line_delim);
1368 }
1369 } else if has_suffix {
1370 unsafe {
1372 buf_extend(
1373 buf,
1374 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1375 );
1376 buf_push(buf, line_delim);
1377 }
1378 } else {
1379 unsafe { buf_push(buf, line_delim) };
1381 }
1382}
1383
1384fn process_fields_prefix(
1388 data: &[u8],
1389 delim: u8,
1390 line_delim: u8,
1391 last_field: usize,
1392 suppress: bool,
1393 out: &mut impl Write,
1394) -> io::Result<()> {
1395 if data.len() >= PARALLEL_THRESHOLD {
1396 let chunks = split_into_chunks(data, line_delim);
1397 let results: Vec<Vec<u8>> = chunks
1398 .par_iter()
1399 .map(|chunk| {
1400 let mut buf = Vec::with_capacity(chunk.len());
1401 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
1402 buf
1403 })
1404 .collect();
1405 let slices: Vec<IoSlice> = results
1407 .iter()
1408 .filter(|r| !r.is_empty())
1409 .map(|r| IoSlice::new(r))
1410 .collect();
1411 write_ioslices(out, &slices)?;
1412 } else if !suppress {
1413 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1417 } else {
1418 let mut buf = Vec::with_capacity(data.len());
1419 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1420 if !buf.is_empty() {
1421 out.write_all(&buf)?;
1422 }
1423 }
1424 Ok(())
1425}
1426
1427#[inline]
1433fn fields_prefix_zerocopy(
1434 data: &[u8],
1435 delim: u8,
1436 line_delim: u8,
1437 last_field: usize,
1438 out: &mut impl Write,
1439) -> io::Result<()> {
1440 let newline_buf: [u8; 1] = [line_delim];
1441 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1442 let mut start = 0;
1443 let mut run_start: usize = 0;
1444
1445 for end_pos in memchr_iter(line_delim, data) {
1446 let line = &data[start..end_pos];
1447 let mut field_count = 1;
1448 let mut truncate_at: Option<usize> = None;
1449 for dpos in memchr_iter(delim, line) {
1450 if field_count >= last_field {
1451 truncate_at = Some(start + dpos);
1452 break;
1453 }
1454 field_count += 1;
1455 }
1456
1457 if let Some(trunc_pos) = truncate_at {
1458 if run_start < start {
1459 iov.push(IoSlice::new(&data[run_start..start]));
1460 }
1461 iov.push(IoSlice::new(&data[start..trunc_pos]));
1462 iov.push(IoSlice::new(&newline_buf));
1463 run_start = end_pos + 1;
1464
1465 if iov.len() >= MAX_IOV - 2 {
1466 write_ioslices(out, &iov)?;
1467 iov.clear();
1468 }
1469 }
1470 start = end_pos + 1;
1471 }
1472 if start < data.len() {
1474 let line = &data[start..];
1475 let mut field_count = 1;
1476 let mut truncate_at: Option<usize> = None;
1477 for dpos in memchr_iter(delim, line) {
1478 if field_count >= last_field {
1479 truncate_at = Some(start + dpos);
1480 break;
1481 }
1482 field_count += 1;
1483 }
1484 if let Some(trunc_pos) = truncate_at {
1485 if run_start < start {
1486 iov.push(IoSlice::new(&data[run_start..start]));
1487 }
1488 iov.push(IoSlice::new(&data[start..trunc_pos]));
1489 iov.push(IoSlice::new(&newline_buf));
1490 if !iov.is_empty() {
1491 write_ioslices(out, &iov)?;
1492 }
1493 return Ok(());
1494 }
1495 }
1496 if run_start < data.len() {
1498 iov.push(IoSlice::new(&data[run_start..]));
1499 if !data.is_empty() && *data.last().unwrap() != line_delim {
1500 iov.push(IoSlice::new(&newline_buf));
1501 }
1502 }
1503 if !iov.is_empty() {
1504 write_ioslices(out, &iov)?;
1505 }
1506 Ok(())
1507}
1508
1509fn fields_prefix_chunk(
1511 data: &[u8],
1512 delim: u8,
1513 line_delim: u8,
1514 last_field: usize,
1515 suppress: bool,
1516 buf: &mut Vec<u8>,
1517) {
1518 let mut start = 0;
1519 for end_pos in memchr_iter(line_delim, data) {
1520 let line = &data[start..end_pos];
1521 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1522 start = end_pos + 1;
1523 }
1524 if start < data.len() {
1525 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1526 }
1527}
1528
1529#[inline(always)]
1532fn fields_prefix_line(
1533 line: &[u8],
1534 delim: u8,
1535 line_delim: u8,
1536 last_field: usize,
1537 suppress: bool,
1538 buf: &mut Vec<u8>,
1539) {
1540 let len = line.len();
1541 if len == 0 {
1542 if !suppress {
1543 buf.push(line_delim);
1544 }
1545 return;
1546 }
1547
1548 buf.reserve(len + 1);
1549 let base = line.as_ptr();
1550
1551 let mut field_count = 1usize;
1552 let mut has_delim = false;
1553
1554 for pos in memchr_iter(delim, line) {
1555 has_delim = true;
1556 if field_count >= last_field {
1557 unsafe {
1558 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1559 buf_push(buf, line_delim);
1560 }
1561 return;
1562 }
1563 field_count += 1;
1564 }
1565
1566 if !has_delim {
1567 if !suppress {
1568 unsafe {
1569 buf_extend(buf, line);
1570 buf_push(buf, line_delim);
1571 }
1572 }
1573 return;
1574 }
1575
1576 unsafe {
1577 buf_extend(buf, line);
1578 buf_push(buf, line_delim);
1579 }
1580}
1581
1582fn process_fields_suffix(
1584 data: &[u8],
1585 delim: u8,
1586 line_delim: u8,
1587 start_field: usize,
1588 suppress: bool,
1589 out: &mut impl Write,
1590) -> io::Result<()> {
1591 if data.len() >= PARALLEL_THRESHOLD {
1592 let chunks = split_into_chunks(data, line_delim);
1593 let results: Vec<Vec<u8>> = chunks
1594 .par_iter()
1595 .map(|chunk| {
1596 let mut buf = Vec::with_capacity(chunk.len());
1597 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
1598 buf
1599 })
1600 .collect();
1601 let slices: Vec<IoSlice> = results
1603 .iter()
1604 .filter(|r| !r.is_empty())
1605 .map(|r| IoSlice::new(r))
1606 .collect();
1607 write_ioslices(out, &slices)?;
1608 } else {
1609 let mut buf = Vec::with_capacity(data.len());
1610 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1611 if !buf.is_empty() {
1612 out.write_all(&buf)?;
1613 }
1614 }
1615 Ok(())
1616}
1617
1618fn fields_suffix_chunk(
1620 data: &[u8],
1621 delim: u8,
1622 line_delim: u8,
1623 start_field: usize,
1624 suppress: bool,
1625 buf: &mut Vec<u8>,
1626) {
1627 let mut start = 0;
1628 for end_pos in memchr_iter(line_delim, data) {
1629 let line = &data[start..end_pos];
1630 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1631 start = end_pos + 1;
1632 }
1633 if start < data.len() {
1634 fields_suffix_line(
1635 &data[start..],
1636 delim,
1637 line_delim,
1638 start_field,
1639 suppress,
1640 buf,
1641 );
1642 }
1643}
1644
1645#[inline(always)]
1648fn fields_suffix_line(
1649 line: &[u8],
1650 delim: u8,
1651 line_delim: u8,
1652 start_field: usize,
1653 suppress: bool,
1654 buf: &mut Vec<u8>,
1655) {
1656 let len = line.len();
1657 if len == 0 {
1658 if !suppress {
1659 buf.push(line_delim);
1660 }
1661 return;
1662 }
1663
1664 buf.reserve(len + 1);
1665 let base = line.as_ptr();
1666
1667 let skip_delims = start_field - 1;
1668 let mut delim_count = 0usize;
1669 let mut has_delim = false;
1670
1671 for pos in memchr_iter(delim, line) {
1672 has_delim = true;
1673 delim_count += 1;
1674 if delim_count >= skip_delims {
1675 unsafe {
1676 buf_extend(
1677 buf,
1678 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1679 );
1680 buf_push(buf, line_delim);
1681 }
1682 return;
1683 }
1684 }
1685
1686 if !has_delim {
1687 if !suppress {
1688 unsafe {
1689 buf_extend(buf, line);
1690 buf_push(buf, line_delim);
1691 }
1692 }
1693 return;
1694 }
1695
1696 unsafe { buf_push(buf, line_delim) };
1698}
1699
1700fn process_fields_mid_range(
1703 data: &[u8],
1704 delim: u8,
1705 line_delim: u8,
1706 start_field: usize,
1707 end_field: usize,
1708 suppress: bool,
1709 out: &mut impl Write,
1710) -> io::Result<()> {
1711 if data.len() >= PARALLEL_THRESHOLD {
1712 let chunks = split_into_chunks(data, line_delim);
1713 let results: Vec<Vec<u8>> = chunks
1714 .par_iter()
1715 .map(|chunk| {
1716 let mut buf = Vec::with_capacity(chunk.len());
1717 fields_mid_range_chunk(
1718 chunk,
1719 delim,
1720 line_delim,
1721 start_field,
1722 end_field,
1723 suppress,
1724 &mut buf,
1725 );
1726 buf
1727 })
1728 .collect();
1729 let slices: Vec<IoSlice> = results
1730 .iter()
1731 .filter(|r| !r.is_empty())
1732 .map(|r| IoSlice::new(r))
1733 .collect();
1734 write_ioslices(out, &slices)?;
1735 } else {
1736 let mut buf = Vec::with_capacity(data.len());
1737 fields_mid_range_chunk(
1738 data,
1739 delim,
1740 line_delim,
1741 start_field,
1742 end_field,
1743 suppress,
1744 &mut buf,
1745 );
1746 if !buf.is_empty() {
1747 out.write_all(&buf)?;
1748 }
1749 }
1750 Ok(())
1751}
1752
1753fn fields_mid_range_chunk(
1755 data: &[u8],
1756 delim: u8,
1757 line_delim: u8,
1758 start_field: usize,
1759 end_field: usize,
1760 suppress: bool,
1761 buf: &mut Vec<u8>,
1762) {
1763 let mut start = 0;
1764 for end_pos in memchr_iter(line_delim, data) {
1765 let line = &data[start..end_pos];
1766 fields_mid_range_line(
1767 line,
1768 delim,
1769 line_delim,
1770 start_field,
1771 end_field,
1772 suppress,
1773 buf,
1774 );
1775 start = end_pos + 1;
1776 }
1777 if start < data.len() {
1778 fields_mid_range_line(
1779 &data[start..],
1780 delim,
1781 line_delim,
1782 start_field,
1783 end_field,
1784 suppress,
1785 buf,
1786 );
1787 }
1788}
1789
1790#[inline(always)]
1794fn fields_mid_range_line(
1795 line: &[u8],
1796 delim: u8,
1797 line_delim: u8,
1798 start_field: usize,
1799 end_field: usize,
1800 suppress: bool,
1801 buf: &mut Vec<u8>,
1802) {
1803 let len = line.len();
1804 if len == 0 {
1805 if !suppress {
1806 buf.push(line_delim);
1807 }
1808 return;
1809 }
1810
1811 buf.reserve(len + 1);
1812 let base = line.as_ptr();
1813
1814 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
1818 let mut delim_count = 0;
1819 let mut range_start = 0;
1820 let mut has_delim = false;
1821
1822 for pos in memchr_iter(delim, line) {
1823 has_delim = true;
1824 delim_count += 1;
1825 if delim_count == skip_before {
1826 range_start = pos + 1;
1827 }
1828 if delim_count == target_end_delim {
1829 if skip_before == 0 {
1830 range_start = 0;
1831 }
1832 unsafe {
1833 buf_extend(
1834 buf,
1835 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1836 );
1837 buf_push(buf, line_delim);
1838 }
1839 return;
1840 }
1841 }
1842
1843 if !has_delim {
1844 if !suppress {
1845 unsafe {
1846 buf_extend(buf, line);
1847 buf_push(buf, line_delim);
1848 }
1849 }
1850 return;
1851 }
1852
1853 if delim_count >= skip_before {
1855 if skip_before == 0 {
1857 range_start = 0;
1858 }
1859 unsafe {
1860 buf_extend(
1861 buf,
1862 std::slice::from_raw_parts(base.add(range_start), len - range_start),
1863 );
1864 buf_push(buf, line_delim);
1865 }
1866 } else {
1867 unsafe { buf_push(buf, line_delim) };
1869 }
1870}
1871
1872fn process_nth_field_combined(
1880 data: &[u8],
1881 delim: u8,
1882 line_delim: u8,
1883 target_idx: usize,
1884 suppress: bool,
1885 buf: &mut Vec<u8>,
1886) {
1887 buf.reserve(data.len());
1888
1889 let data_len = data.len();
1890 let base = data.as_ptr();
1891 let mut line_start: usize = 0;
1892 let mut field_start: usize = 0;
1893 let mut field_idx: usize = 0;
1894 let mut has_delim = false;
1895 let mut emitted = false;
1896
1897 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1898 let byte = unsafe { *base.add(pos) };
1899
1900 if byte == line_delim {
1901 if !emitted {
1903 if has_delim && field_idx == target_idx {
1904 unsafe {
1906 buf_extend(
1907 buf,
1908 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
1909 );
1910 buf_push(buf, line_delim);
1911 }
1912 } else if has_delim {
1913 unsafe {
1915 buf_push(buf, line_delim);
1916 }
1917 } else if !suppress {
1918 unsafe {
1920 buf_extend(
1921 buf,
1922 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1923 );
1924 buf_push(buf, line_delim);
1925 }
1926 }
1927 }
1928 line_start = pos + 1;
1930 field_start = pos + 1;
1931 field_idx = 0;
1932 has_delim = false;
1933 emitted = false;
1934 } else {
1935 has_delim = true;
1937 if field_idx == target_idx {
1938 unsafe {
1939 buf_extend(
1940 buf,
1941 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
1942 );
1943 buf_push(buf, line_delim);
1944 }
1945 emitted = true;
1946 }
1947 field_idx += 1;
1948 field_start = pos + 1;
1949 }
1950 }
1951
1952 if line_start < data_len && !emitted {
1954 if has_delim && field_idx == target_idx {
1955 unsafe {
1956 buf_extend(
1957 buf,
1958 std::slice::from_raw_parts(base.add(field_start), data_len - field_start),
1959 );
1960 buf_push(buf, line_delim);
1961 }
1962 } else if has_delim {
1963 unsafe {
1964 buf_push(buf, line_delim);
1965 }
1966 } else if !suppress {
1967 unsafe {
1968 buf_extend(
1969 buf,
1970 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
1971 );
1972 buf_push(buf, line_delim);
1973 }
1974 }
1975 }
1976}
1977
1978#[inline]
1987fn single_field1_zerocopy(
1988 data: &[u8],
1989 delim: u8,
1990 line_delim: u8,
1991 out: &mut impl Write,
1992) -> io::Result<()> {
1993 let newline_buf: [u8; 1] = [line_delim];
1995
1996 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1997 let mut line_start: usize = 0;
1998 let mut run_start: usize = 0;
1999 let mut first_delim: Option<usize> = None;
2000
2001 for pos in memchr::memchr2_iter(delim, line_delim, data) {
2002 let byte = unsafe { *data.get_unchecked(pos) };
2003
2004 if byte == line_delim {
2005 if let Some(dp) = first_delim {
2007 if run_start < line_start {
2010 iov.push(IoSlice::new(&data[run_start..line_start]));
2011 }
2012 iov.push(IoSlice::new(&data[line_start..dp]));
2013 iov.push(IoSlice::new(&newline_buf));
2014 run_start = pos + 1;
2015
2016 if iov.len() >= MAX_IOV - 2 {
2018 write_ioslices(out, &iov)?;
2019 iov.clear();
2020 }
2021 }
2022 line_start = pos + 1;
2024 first_delim = None;
2025 } else {
2026 if first_delim.is_none() {
2028 first_delim = Some(pos);
2029 }
2030 }
2031 }
2032
2033 if line_start < data.len() {
2035 if let Some(dp) = first_delim {
2036 if run_start < line_start {
2037 iov.push(IoSlice::new(&data[run_start..line_start]));
2038 }
2039 iov.push(IoSlice::new(&data[line_start..dp]));
2040 iov.push(IoSlice::new(&newline_buf));
2041 if !iov.is_empty() {
2042 write_ioslices(out, &iov)?;
2043 }
2044 return Ok(());
2045 }
2046 }
2047
2048 if run_start < data.len() {
2050 iov.push(IoSlice::new(&data[run_start..]));
2051 if !data.is_empty() && *data.last().unwrap() != line_delim {
2052 iov.push(IoSlice::new(&newline_buf));
2053 }
2054 }
2055 if !iov.is_empty() {
2056 write_ioslices(out, &iov)?;
2057 }
2058 Ok(())
2059}
2060
2061fn process_single_field_chunk(
2063 data: &[u8],
2064 delim: u8,
2065 target_idx: usize,
2066 line_delim: u8,
2067 suppress: bool,
2068 buf: &mut Vec<u8>,
2069) {
2070 let mut start = 0;
2071 for end_pos in memchr_iter(line_delim, data) {
2072 let line = &data[start..end_pos];
2073 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2074 start = end_pos + 1;
2075 }
2076 if start < data.len() {
2077 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2078 }
2079}
2080
2081#[inline(always)]
2086fn extract_single_field_line(
2087 line: &[u8],
2088 delim: u8,
2089 target_idx: usize,
2090 line_delim: u8,
2091 suppress: bool,
2092 buf: &mut Vec<u8>,
2093) {
2094 let len = line.len();
2095 if len == 0 {
2096 if !suppress {
2097 buf.push(line_delim);
2098 }
2099 return;
2100 }
2101
2102 buf.reserve(len + 1);
2104
2105 let base = line.as_ptr();
2106
2107 if target_idx == 0 {
2109 match memchr::memchr(delim, line) {
2110 Some(pos) => unsafe {
2111 buf_extend(buf, std::slice::from_raw_parts(base, pos));
2112 buf_push(buf, line_delim);
2113 },
2114 None => {
2115 if !suppress {
2116 unsafe {
2117 buf_extend(buf, line);
2118 buf_push(buf, line_delim);
2119 }
2120 }
2121 }
2122 }
2123 return;
2124 }
2125
2126 let mut field_start = 0;
2128 let mut field_idx = 0;
2129 let mut has_delim = false;
2130
2131 for pos in memchr_iter(delim, line) {
2132 has_delim = true;
2133 if field_idx == target_idx {
2134 unsafe {
2135 buf_extend(
2136 buf,
2137 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2138 );
2139 buf_push(buf, line_delim);
2140 }
2141 return;
2142 }
2143 field_idx += 1;
2144 field_start = pos + 1;
2145 }
2146
2147 if !has_delim {
2148 if !suppress {
2149 unsafe {
2150 buf_extend(buf, line);
2151 buf_push(buf, line_delim);
2152 }
2153 }
2154 return;
2155 }
2156
2157 if field_idx == target_idx {
2158 unsafe {
2159 buf_extend(
2160 buf,
2161 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2162 );
2163 buf_push(buf, line_delim);
2164 }
2165 } else {
2166 unsafe { buf_push(buf, line_delim) };
2167 }
2168}
2169
2170#[inline(always)]
2174fn extract_fields_to_buf(
2175 line: &[u8],
2176 delim: u8,
2177 ranges: &[Range],
2178 output_delim: &[u8],
2179 suppress: bool,
2180 max_field: usize,
2181 field_mask: u64,
2182 line_delim: u8,
2183 buf: &mut Vec<u8>,
2184 complement: bool,
2185) {
2186 let len = line.len();
2187
2188 if len == 0 {
2189 if !suppress {
2190 buf.push(line_delim);
2191 }
2192 return;
2193 }
2194
2195 let needed = len + output_delim.len() * 16 + 1;
2198 if buf.capacity() - buf.len() < needed {
2199 buf.reserve(needed);
2200 }
2201
2202 let base = line.as_ptr();
2203 let mut field_num: usize = 1;
2204 let mut field_start: usize = 0;
2205 let mut first_output = true;
2206 let mut has_delim = false;
2207
2208 for delim_pos in memchr_iter(delim, line) {
2210 has_delim = true;
2211
2212 if is_selected(field_num, field_mask, ranges, complement) {
2213 if !first_output {
2214 unsafe { buf_extend(buf, output_delim) };
2215 }
2216 unsafe {
2217 buf_extend(
2218 buf,
2219 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2220 )
2221 };
2222 first_output = false;
2223 }
2224
2225 field_num += 1;
2226 field_start = delim_pos + 1;
2227
2228 if field_num > max_field {
2229 break;
2230 }
2231 }
2232
2233 if (field_num <= max_field || complement)
2235 && has_delim
2236 && is_selected(field_num, field_mask, ranges, complement)
2237 {
2238 if !first_output {
2239 unsafe { buf_extend(buf, output_delim) };
2240 }
2241 unsafe {
2242 buf_extend(
2243 buf,
2244 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2245 )
2246 };
2247 first_output = false;
2248 }
2249
2250 if !first_output {
2251 unsafe { buf_push(buf, line_delim) };
2252 } else if !has_delim {
2253 if !suppress {
2254 unsafe {
2255 buf_extend(buf, line);
2256 buf_push(buf, line_delim);
2257 }
2258 }
2259 } else {
2260 unsafe { buf_push(buf, line_delim) };
2261 }
2262}
2263
2264fn process_bytes_from_start(
2271 data: &[u8],
2272 max_bytes: usize,
2273 line_delim: u8,
2274 out: &mut impl Write,
2275) -> io::Result<()> {
2276 if data.len() >= PARALLEL_THRESHOLD {
2277 let chunks = split_into_chunks(data, line_delim);
2278 let results: Vec<Vec<u8>> = chunks
2279 .par_iter()
2280 .map(|chunk| {
2281 let est_out = (chunk.len() / 4).max(max_bytes + 2);
2287 let mut buf = Vec::with_capacity(est_out.min(chunk.len()));
2288 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
2289 buf
2290 })
2291 .collect();
2292 let slices: Vec<IoSlice> = results
2294 .iter()
2295 .filter(|r| !r.is_empty())
2296 .map(|r| IoSlice::new(r))
2297 .collect();
2298 write_ioslices(out, &slices)?;
2299 } else {
2300 if max_bytes <= 512 {
2306 let est_out = (data.len() / 4).max(max_bytes + 2);
2309 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2310 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2311 if !buf.is_empty() {
2312 out.write_all(&buf)?;
2313 }
2314 } else {
2315 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2319 }
2320 }
2321 Ok(())
2322}
2323
2324#[inline]
2329fn bytes_from_start_zerocopy(
2330 data: &[u8],
2331 max_bytes: usize,
2332 line_delim: u8,
2333 out: &mut impl Write,
2334) -> io::Result<()> {
2335 let newline_buf: [u8; 1] = [line_delim];
2336 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2337 let mut start = 0;
2338 let mut run_start: usize = 0;
2339
2340 for pos in memchr_iter(line_delim, data) {
2341 let line_len = pos - start;
2342 if line_len > max_bytes {
2343 if run_start < start {
2345 iov.push(IoSlice::new(&data[run_start..start]));
2346 }
2347 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2348 iov.push(IoSlice::new(&newline_buf));
2349 run_start = pos + 1;
2350
2351 if iov.len() >= MAX_IOV - 2 {
2352 write_ioslices(out, &iov)?;
2353 iov.clear();
2354 }
2355 }
2356 start = pos + 1;
2357 }
2358 if start < data.len() {
2360 let line_len = data.len() - start;
2361 if line_len > max_bytes {
2362 if run_start < start {
2363 iov.push(IoSlice::new(&data[run_start..start]));
2364 }
2365 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2366 iov.push(IoSlice::new(&newline_buf));
2367 if !iov.is_empty() {
2368 write_ioslices(out, &iov)?;
2369 }
2370 return Ok(());
2371 }
2372 }
2373 if run_start < data.len() {
2375 iov.push(IoSlice::new(&data[run_start..]));
2376 if !data.is_empty() && *data.last().unwrap() != line_delim {
2377 iov.push(IoSlice::new(&newline_buf));
2378 }
2379 }
2380 if !iov.is_empty() {
2381 write_ioslices(out, &iov)?;
2382 }
2383 Ok(())
2384}
2385
2386#[inline]
2391fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2392 buf.reserve(data.len());
2395
2396 let src = data.as_ptr();
2397 let dst_base = buf.as_mut_ptr();
2398 let mut wp = buf.len();
2399 let mut start = 0;
2400
2401 for pos in memchr_iter(line_delim, data) {
2402 let line_len = pos - start;
2403 let take = line_len.min(max_bytes);
2404 unsafe {
2405 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2406 *dst_base.add(wp + take) = line_delim;
2407 }
2408 wp += take + 1;
2409 start = pos + 1;
2410 }
2411 if start < data.len() {
2413 let line_len = data.len() - start;
2414 let take = line_len.min(max_bytes);
2415 unsafe {
2416 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2417 *dst_base.add(wp + take) = line_delim;
2418 }
2419 wp += take + 1;
2420 }
2421 unsafe { buf.set_len(wp) };
2422}
2423
2424fn process_bytes_from_offset(
2426 data: &[u8],
2427 skip_bytes: usize,
2428 line_delim: u8,
2429 out: &mut impl Write,
2430) -> io::Result<()> {
2431 if data.len() >= PARALLEL_THRESHOLD {
2432 let chunks = split_into_chunks(data, line_delim);
2433 let results: Vec<Vec<u8>> = chunks
2434 .par_iter()
2435 .map(|chunk| {
2436 let mut buf = Vec::with_capacity(chunk.len());
2437 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
2438 buf
2439 })
2440 .collect();
2441 let slices: Vec<IoSlice> = results
2443 .iter()
2444 .filter(|r| !r.is_empty())
2445 .map(|r| IoSlice::new(r))
2446 .collect();
2447 write_ioslices(out, &slices)?;
2448 } else {
2449 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2451 }
2452 Ok(())
2453}
2454
2455#[inline]
2459fn bytes_from_offset_zerocopy(
2460 data: &[u8],
2461 skip_bytes: usize,
2462 line_delim: u8,
2463 out: &mut impl Write,
2464) -> io::Result<()> {
2465 let delim_buf = [line_delim];
2466 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2467
2468 let mut start = 0;
2469 for pos in memchr_iter(line_delim, data) {
2470 let line_len = pos - start;
2471 if line_len > skip_bytes {
2472 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2473 }
2474 iov.push(IoSlice::new(&delim_buf));
2475 if iov.len() >= MAX_IOV - 1 {
2477 write_ioslices(out, &iov)?;
2478 iov.clear();
2479 }
2480 start = pos + 1;
2481 }
2482 if start < data.len() {
2483 let line_len = data.len() - start;
2484 if line_len > skip_bytes {
2485 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2486 }
2487 iov.push(IoSlice::new(&delim_buf));
2488 }
2489 if !iov.is_empty() {
2490 write_ioslices(out, &iov)?;
2491 }
2492 Ok(())
2493}
2494
2495#[inline]
2498fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2499 buf.reserve(data.len());
2500
2501 let src = data.as_ptr();
2502 let dst_base = buf.as_mut_ptr();
2503 let mut wp = buf.len();
2504 let mut start = 0;
2505
2506 for pos in memchr_iter(line_delim, data) {
2507 let line_len = pos - start;
2508 if line_len > skip_bytes {
2509 let take = line_len - skip_bytes;
2510 unsafe {
2511 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2512 }
2513 wp += take;
2514 }
2515 unsafe {
2516 *dst_base.add(wp) = line_delim;
2517 }
2518 wp += 1;
2519 start = pos + 1;
2520 }
2521 if start < data.len() {
2522 let line_len = data.len() - start;
2523 if line_len > skip_bytes {
2524 let take = line_len - skip_bytes;
2525 unsafe {
2526 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2527 }
2528 wp += take;
2529 }
2530 unsafe {
2531 *dst_base.add(wp) = line_delim;
2532 }
2533 wp += 1;
2534 }
2535 unsafe { buf.set_len(wp) };
2536}
2537
2538fn process_bytes_mid_range(
2540 data: &[u8],
2541 start_byte: usize,
2542 end_byte: usize,
2543 line_delim: u8,
2544 out: &mut impl Write,
2545) -> io::Result<()> {
2546 let skip = start_byte.saturating_sub(1);
2547
2548 if data.len() >= PARALLEL_THRESHOLD {
2549 let chunks = split_into_chunks(data, line_delim);
2550 let results: Vec<Vec<u8>> = chunks
2551 .par_iter()
2552 .map(|chunk| {
2553 let mut buf = Vec::with_capacity(chunk.len());
2554 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, &mut buf);
2555 buf
2556 })
2557 .collect();
2558 let slices: Vec<IoSlice> = results
2559 .iter()
2560 .filter(|r| !r.is_empty())
2561 .map(|r| IoSlice::new(r))
2562 .collect();
2563 write_ioslices(out, &slices)?;
2564 } else {
2565 let mut buf = Vec::with_capacity(data.len());
2566 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2567 if !buf.is_empty() {
2568 out.write_all(&buf)?;
2569 }
2570 }
2571 Ok(())
2572}
2573
2574#[inline]
2578fn bytes_mid_range_chunk(
2579 data: &[u8],
2580 skip: usize,
2581 end_byte: usize,
2582 line_delim: u8,
2583 buf: &mut Vec<u8>,
2584) {
2585 buf.reserve(data.len());
2586
2587 let src = data.as_ptr();
2588 let dst_base = buf.as_mut_ptr();
2589 let mut wp = buf.len();
2590 let mut start = 0;
2591
2592 for pos in memchr_iter(line_delim, data) {
2593 let line_len = pos - start;
2594 if line_len > skip {
2595 let take_end = line_len.min(end_byte);
2596 let take = take_end - skip;
2597 unsafe {
2598 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2599 }
2600 wp += take;
2601 }
2602 unsafe {
2603 *dst_base.add(wp) = line_delim;
2604 }
2605 wp += 1;
2606 start = pos + 1;
2607 }
2608 if start < data.len() {
2609 let line_len = data.len() - start;
2610 if line_len > skip {
2611 let take_end = line_len.min(end_byte);
2612 let take = take_end - skip;
2613 unsafe {
2614 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2615 }
2616 wp += take;
2617 }
2618 unsafe {
2619 *dst_base.add(wp) = line_delim;
2620 }
2621 wp += 1;
2622 }
2623 unsafe { buf.set_len(wp) };
2624}
2625
2626fn process_bytes_complement_mid(
2628 data: &[u8],
2629 skip_start: usize,
2630 skip_end: usize,
2631 line_delim: u8,
2632 out: &mut impl Write,
2633) -> io::Result<()> {
2634 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2636 let chunks = split_into_chunks(data, line_delim);
2637 let results: Vec<Vec<u8>> = chunks
2638 .par_iter()
2639 .map(|chunk| {
2640 let mut buf = Vec::with_capacity(chunk.len());
2641 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, &mut buf);
2642 buf
2643 })
2644 .collect();
2645 let slices: Vec<IoSlice> = results
2646 .iter()
2647 .filter(|r| !r.is_empty())
2648 .map(|r| IoSlice::new(r))
2649 .collect();
2650 write_ioslices(out, &slices)?;
2651 } else {
2652 let mut buf = Vec::with_capacity(data.len());
2653 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
2654 if !buf.is_empty() {
2655 out.write_all(&buf)?;
2656 }
2657 }
2658 Ok(())
2659}
2660
2661#[inline]
2664fn bytes_complement_mid_chunk(
2665 data: &[u8],
2666 prefix_bytes: usize,
2667 skip_end: usize,
2668 line_delim: u8,
2669 buf: &mut Vec<u8>,
2670) {
2671 buf.reserve(data.len());
2672
2673 let src = data.as_ptr();
2674 let dst_base = buf.as_mut_ptr();
2675 let mut wp = buf.len();
2676 let mut start = 0;
2677
2678 for pos in memchr_iter(line_delim, data) {
2679 let line_len = pos - start;
2680 let take_prefix = prefix_bytes.min(line_len);
2682 if take_prefix > 0 {
2683 unsafe {
2684 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2685 }
2686 wp += take_prefix;
2687 }
2688 if line_len > skip_end {
2690 let suffix_len = line_len - skip_end;
2691 unsafe {
2692 std::ptr::copy_nonoverlapping(
2693 src.add(start + skip_end),
2694 dst_base.add(wp),
2695 suffix_len,
2696 );
2697 }
2698 wp += suffix_len;
2699 }
2700 unsafe {
2701 *dst_base.add(wp) = line_delim;
2702 }
2703 wp += 1;
2704 start = pos + 1;
2705 }
2706 if start < data.len() {
2707 let line_len = data.len() - start;
2708 let take_prefix = prefix_bytes.min(line_len);
2709 if take_prefix > 0 {
2710 unsafe {
2711 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2712 }
2713 wp += take_prefix;
2714 }
2715 if line_len > skip_end {
2716 let suffix_len = line_len - skip_end;
2717 unsafe {
2718 std::ptr::copy_nonoverlapping(
2719 src.add(start + skip_end),
2720 dst_base.add(wp),
2721 suffix_len,
2722 );
2723 }
2724 wp += suffix_len;
2725 }
2726 unsafe {
2727 *dst_base.add(wp) = line_delim;
2728 }
2729 wp += 1;
2730 }
2731 unsafe { buf.set_len(wp) };
2732}
2733
2734fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2736 let line_delim = cfg.line_delim;
2737 let ranges = cfg.ranges;
2738 let complement = cfg.complement;
2739 let output_delim = cfg.output_delim;
2740
2741 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
2743 let max_bytes = ranges[0].end;
2744 if max_bytes < usize::MAX {
2745 return process_bytes_from_start(data, max_bytes, line_delim, out);
2746 }
2747 }
2748
2749 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
2751 let skip_bytes = ranges[0].start.saturating_sub(1);
2752 if skip_bytes > 0 {
2753 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
2754 }
2755 }
2756
2757 if !complement
2759 && ranges.len() == 1
2760 && ranges[0].start > 1
2761 && ranges[0].end < usize::MAX
2762 && output_delim.is_empty()
2763 {
2764 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
2765 }
2766
2767 if complement
2769 && ranges.len() == 1
2770 && ranges[0].start == 1
2771 && ranges[0].end < usize::MAX
2772 && output_delim.is_empty()
2773 {
2774 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
2775 }
2776
2777 if complement
2779 && ranges.len() == 1
2780 && ranges[0].end == usize::MAX
2781 && ranges[0].start > 1
2782 && output_delim.is_empty()
2783 {
2784 let max_bytes = ranges[0].start - 1;
2785 return process_bytes_from_start(data, max_bytes, line_delim, out);
2786 }
2787
2788 if complement
2790 && ranges.len() == 1
2791 && ranges[0].start > 1
2792 && ranges[0].end < usize::MAX
2793 && output_delim.is_empty()
2794 {
2795 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
2796 }
2797
2798 if data.len() >= PARALLEL_THRESHOLD {
2799 let chunks = split_into_chunks(data, line_delim);
2800 let results: Vec<Vec<u8>> = chunks
2801 .par_iter()
2802 .map(|chunk| {
2803 let mut buf = Vec::with_capacity(chunk.len());
2804 process_bytes_chunk(
2805 chunk,
2806 ranges,
2807 complement,
2808 output_delim,
2809 line_delim,
2810 &mut buf,
2811 );
2812 buf
2813 })
2814 .collect();
2815 let slices: Vec<IoSlice> = results
2817 .iter()
2818 .filter(|r| !r.is_empty())
2819 .map(|r| IoSlice::new(r))
2820 .collect();
2821 write_ioslices(out, &slices)?;
2822 } else {
2823 let mut buf = Vec::with_capacity(data.len());
2824 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
2825 if !buf.is_empty() {
2826 out.write_all(&buf)?;
2827 }
2828 }
2829 Ok(())
2830}
2831
2832fn process_bytes_chunk(
2837 data: &[u8],
2838 ranges: &[Range],
2839 complement: bool,
2840 output_delim: &[u8],
2841 line_delim: u8,
2842 buf: &mut Vec<u8>,
2843) {
2844 buf.reserve(data.len());
2845 let base = data.as_ptr();
2846 let mut start = 0;
2847 for end_pos in memchr_iter(line_delim, data) {
2848 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2849 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2850 unsafe { buf_push(buf, line_delim) };
2851 start = end_pos + 1;
2852 }
2853 if start < data.len() {
2854 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2855 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2856 unsafe { buf_push(buf, line_delim) };
2857 }
2858}
2859
2860#[inline(always)]
2864fn cut_bytes_to_buf(
2865 line: &[u8],
2866 ranges: &[Range],
2867 complement: bool,
2868 output_delim: &[u8],
2869 buf: &mut Vec<u8>,
2870) {
2871 let len = line.len();
2872 let base = line.as_ptr();
2873 let mut first_range = true;
2874
2875 let needed = len + output_delim.len() * ranges.len() + 1;
2877 if buf.capacity() - buf.len() < needed {
2878 buf.reserve(needed);
2879 }
2880
2881 if complement {
2882 let mut pos: usize = 1;
2883 for r in ranges {
2884 let rs = r.start;
2885 let re = r.end.min(len);
2886 if pos < rs {
2887 if !first_range && !output_delim.is_empty() {
2888 unsafe { buf_extend(buf, output_delim) };
2889 }
2890 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
2891 first_range = false;
2892 }
2893 pos = re + 1;
2894 if pos > len {
2895 break;
2896 }
2897 }
2898 if pos <= len {
2899 if !first_range && !output_delim.is_empty() {
2900 unsafe { buf_extend(buf, output_delim) };
2901 }
2902 unsafe {
2903 buf_extend(
2904 buf,
2905 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
2906 )
2907 };
2908 }
2909 } else if output_delim.is_empty() && ranges.len() == 1 {
2910 let start = ranges[0].start.saturating_sub(1);
2912 let end = ranges[0].end.min(len);
2913 if start < len {
2914 unsafe {
2915 buf_extend(
2916 buf,
2917 std::slice::from_raw_parts(base.add(start), end - start),
2918 )
2919 };
2920 }
2921 } else {
2922 for r in ranges {
2923 let start = r.start.saturating_sub(1);
2924 let end = r.end.min(len);
2925 if start >= len {
2926 break;
2927 }
2928 if !first_range && !output_delim.is_empty() {
2929 unsafe { buf_extend(buf, output_delim) };
2930 }
2931 unsafe {
2932 buf_extend(
2933 buf,
2934 std::slice::from_raw_parts(base.add(start), end - start),
2935 )
2936 };
2937 first_range = false;
2938 }
2939 }
2940}
2941
2942#[inline]
2946pub fn cut_fields(
2947 line: &[u8],
2948 delim: u8,
2949 ranges: &[Range],
2950 complement: bool,
2951 output_delim: &[u8],
2952 suppress_no_delim: bool,
2953 out: &mut impl Write,
2954) -> io::Result<bool> {
2955 if memchr::memchr(delim, line).is_none() {
2956 if !suppress_no_delim {
2957 out.write_all(line)?;
2958 return Ok(true);
2959 }
2960 return Ok(false);
2961 }
2962
2963 let mut field_num: usize = 1;
2964 let mut field_start: usize = 0;
2965 let mut first_output = true;
2966
2967 for delim_pos in memchr_iter(delim, line) {
2968 let selected = in_ranges(ranges, field_num) != complement;
2969 if selected {
2970 if !first_output {
2971 out.write_all(output_delim)?;
2972 }
2973 out.write_all(&line[field_start..delim_pos])?;
2974 first_output = false;
2975 }
2976 field_start = delim_pos + 1;
2977 field_num += 1;
2978 }
2979
2980 let selected = in_ranges(ranges, field_num) != complement;
2981 if selected {
2982 if !first_output {
2983 out.write_all(output_delim)?;
2984 }
2985 out.write_all(&line[field_start..])?;
2986 }
2987
2988 Ok(true)
2989}
2990
2991#[inline]
2993pub fn cut_bytes(
2994 line: &[u8],
2995 ranges: &[Range],
2996 complement: bool,
2997 output_delim: &[u8],
2998 out: &mut impl Write,
2999) -> io::Result<bool> {
3000 let mut first_range = true;
3001
3002 if complement {
3003 let len = line.len();
3004 let mut comp_ranges = Vec::new();
3005 let mut pos: usize = 1;
3006 for r in ranges {
3007 let rs = r.start;
3008 let re = r.end.min(len);
3009 if pos < rs {
3010 comp_ranges.push((pos, rs - 1));
3011 }
3012 pos = re + 1;
3013 if pos > len {
3014 break;
3015 }
3016 }
3017 if pos <= len {
3018 comp_ranges.push((pos, len));
3019 }
3020 for &(s, e) in &comp_ranges {
3021 if !first_range && !output_delim.is_empty() {
3022 out.write_all(output_delim)?;
3023 }
3024 out.write_all(&line[s - 1..e])?;
3025 first_range = false;
3026 }
3027 } else {
3028 for r in ranges {
3029 let start = r.start.saturating_sub(1);
3030 let end = r.end.min(line.len());
3031 if start >= line.len() {
3032 break;
3033 }
3034 if !first_range && !output_delim.is_empty() {
3035 out.write_all(output_delim)?;
3036 }
3037 out.write_all(&line[start..end])?;
3038 first_range = false;
3039 }
3040 }
3041 Ok(true)
3042}
3043
3044pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3046 match cfg.mode {
3047 CutMode::Fields => process_fields_fast(data, cfg, out),
3048 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3049 }
3050}
3051
3052pub fn process_cut_reader<R: BufRead>(
3057 mut reader: R,
3058 cfg: &CutConfig,
3059 out: &mut impl Write,
3060) -> io::Result<()> {
3061 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3063
3064 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3067
3068 loop {
3069 buf.reserve(CHUNK_SIZE);
3071 let read_start = buf.len();
3072 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3073 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3074 buf.truncate(read_start + n);
3075
3076 if buf.is_empty() {
3077 break;
3078 }
3079
3080 if n == 0 {
3081 process_cut_data(&buf, cfg, out)?;
3083 break;
3084 }
3085
3086 let process_end = match memchr::memrchr(line_delim, &buf) {
3088 Some(pos) => pos + 1,
3089 None => {
3090 continue;
3092 }
3093 };
3094
3095 process_cut_data(&buf[..process_end], cfg, out)?;
3097
3098 let leftover_len = buf.len() - process_end;
3100 if leftover_len > 0 {
3101 buf.copy_within(process_end.., 0);
3102 }
3103 buf.truncate(leftover_len);
3104 }
3105
3106 Ok(())
3107}
3108
3109#[inline]
3111fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3112 let n = reader.read(buf)?;
3113 if n == buf.len() || n == 0 {
3114 return Ok(n);
3115 }
3116 let mut total = n;
3118 while total < buf.len() {
3119 match reader.read(&mut buf[total..]) {
3120 Ok(0) => break,
3121 Ok(n) => total += n,
3122 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3123 Err(e) => return Err(e),
3124 }
3125 }
3126 Ok(total)
3127}
3128
3129#[derive(Debug, Clone, Copy, PartialEq)]
3131pub enum CutMode {
3132 Bytes,
3133 Characters,
3134 Fields,
3135}