1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, IoSlice, Write};
4
5const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
7
8const MAX_IOV: usize = 1024;
10
11pub struct CutConfig<'a> {
13 pub mode: CutMode,
14 pub ranges: &'a [Range],
15 pub complement: bool,
16 pub delim: u8,
17 pub output_delim: &'a [u8],
18 pub suppress_no_delim: bool,
19 pub line_delim: u8,
20}
21
22#[derive(Debug, Clone)]
24pub struct Range {
25 pub start: usize, pub end: usize, }
28
29pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
32 let mut ranges = Vec::new();
33
34 for part in spec.split(',') {
35 let part = part.trim();
36 if part.is_empty() {
37 continue;
38 }
39
40 if let Some(idx) = part.find('-') {
41 let left = &part[..idx];
42 let right = &part[idx + 1..];
43
44 let start = if left.is_empty() {
45 1
46 } else {
47 left.parse::<usize>()
48 .map_err(|_| format!("invalid range: '{}'", part))?
49 };
50
51 let end = if right.is_empty() {
52 usize::MAX
53 } else {
54 right
55 .parse::<usize>()
56 .map_err(|_| format!("invalid range: '{}'", part))?
57 };
58
59 if start == 0 {
60 return Err("fields and positions are numbered from 1".to_string());
61 }
62 if start > end {
63 return Err(format!("invalid decreasing range: '{}'", part));
64 }
65
66 ranges.push(Range { start, end });
67 } else {
68 let n = part
69 .parse::<usize>()
70 .map_err(|_| format!("invalid field: '{}'", part))?;
71 if n == 0 {
72 return Err("fields and positions are numbered from 1".to_string());
73 }
74 ranges.push(Range { start: n, end: n });
75 }
76 }
77
78 if ranges.is_empty() {
79 return Err("you must specify a list of bytes, characters, or fields".to_string());
80 }
81
82 ranges.sort_by_key(|r| (r.start, r.end));
84 let mut merged = vec![ranges[0].clone()];
85 for r in &ranges[1..] {
86 let last = merged.last_mut().unwrap();
87 if r.start <= last.end.saturating_add(1) {
88 last.end = last.end.max(r.end);
89 } else {
90 merged.push(r.clone());
91 }
92 }
93
94 Ok(merged)
95}
96
97#[inline(always)]
100fn in_ranges(ranges: &[Range], pos: usize) -> bool {
101 for r in ranges {
102 if pos < r.start {
103 return false;
104 }
105 if pos <= r.end {
106 return true;
107 }
108 }
109 false
110}
111
112#[inline]
115fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
116 let mut mask: u64 = 0;
117 for i in 1..=64u32 {
118 let in_range = in_ranges(ranges, i as usize);
119 if in_range != complement {
120 mask |= 1u64 << (i - 1);
121 }
122 }
123 mask
124}
125
126#[inline(always)]
128fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
129 if field_num <= 64 {
130 (mask >> (field_num - 1)) & 1 == 1
131 } else {
132 in_ranges(ranges, field_num) != complement
133 }
134}
135
136#[inline(always)]
141unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
142 unsafe {
143 let len = buf.len();
144 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
145 buf.set_len(len + data.len());
146 }
147}
148
149#[inline(always)]
152unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
153 unsafe {
154 let len = buf.len();
155 *buf.as_mut_ptr().add(len) = b;
156 buf.set_len(len + 1);
157 }
158}
159
160#[inline]
163fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
164 if slices.is_empty() {
165 return Ok(());
166 }
167 for batch in slices.chunks(MAX_IOV) {
168 let total: usize = batch.iter().map(|s| s.len()).sum();
169 match out.write_vectored(batch) {
170 Ok(n) if n >= total => continue,
171 Ok(mut written) => {
172 for slice in batch {
174 let slen = slice.len();
175 if written >= slen {
176 written -= slen;
177 continue;
178 }
179 if written > 0 {
180 out.write_all(&slice[written..])?;
181 written = 0;
182 } else {
183 out.write_all(slice)?;
184 }
185 }
186 }
187 Err(e) => return Err(e),
188 }
189 }
190 Ok(())
191}
192
193fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
197 let num_threads = rayon::current_num_threads().max(1);
198 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
199 return vec![data];
200 }
201
202 let chunk_size = data.len() / num_threads;
203 let mut chunks = Vec::with_capacity(num_threads);
204 let mut pos = 0;
205
206 for _ in 0..num_threads - 1 {
207 let target = pos + chunk_size;
208 if target >= data.len() {
209 break;
210 }
211 let boundary = memchr::memchr(line_delim, &data[target..])
212 .map(|p| target + p + 1)
213 .unwrap_or(data.len());
214 if boundary > pos {
215 chunks.push(&data[pos..boundary]);
216 }
217 pos = boundary;
218 }
219
220 if pos < data.len() {
221 chunks.push(&data[pos..]);
222 }
223
224 chunks
225}
226
227fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
231 let delim = cfg.delim;
232 let line_delim = cfg.line_delim;
233 let ranges = cfg.ranges;
234 let complement = cfg.complement;
235 let output_delim = cfg.output_delim;
236 let suppress = cfg.suppress_no_delim;
237
238 if !complement && memchr::memchr(delim, data).is_none() {
240 if suppress {
241 return Ok(());
242 }
243 out.write_all(data)?;
244 if !data.is_empty() && *data.last().unwrap() != line_delim {
245 out.write_all(&[line_delim])?;
246 }
247 return Ok(());
248 }
249
250 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
252 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
253 }
254
255 if complement
257 && ranges.len() == 1
258 && ranges[0].start == ranges[0].end
259 && output_delim.len() == 1
260 && output_delim[0] == delim
261 {
262 return process_complement_single_field(
263 data,
264 delim,
265 line_delim,
266 ranges[0].start,
267 suppress,
268 out,
269 );
270 }
271
272 if !complement
274 && ranges.len() == 1
275 && ranges[0].start == 1
276 && output_delim.len() == 1
277 && output_delim[0] == delim
278 && ranges[0].end < usize::MAX
279 {
280 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
281 }
282
283 if !complement
285 && ranges.len() == 1
286 && ranges[0].end == usize::MAX
287 && ranges[0].start > 1
288 && output_delim.len() == 1
289 && output_delim[0] == delim
290 {
291 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
292 }
293
294 if !complement
296 && ranges.len() == 1
297 && ranges[0].start > 1
298 && ranges[0].end < usize::MAX
299 && output_delim.len() == 1
300 && output_delim[0] == delim
301 {
302 return process_fields_mid_range(
303 data,
304 delim,
305 line_delim,
306 ranges[0].start,
307 ranges[0].end,
308 suppress,
309 out,
310 );
311 }
312
313 let max_field = if complement {
315 usize::MAX
316 } else {
317 ranges.last().map(|r| r.end).unwrap_or(0)
318 };
319 let field_mask = compute_field_mask(ranges, complement);
320
321 if data.len() >= PARALLEL_THRESHOLD {
322 let chunks = split_into_chunks(data, line_delim);
323 let results: Vec<Vec<u8>> = chunks
324 .par_iter()
325 .map(|chunk| {
326 let mut buf = Vec::with_capacity(chunk.len());
327 process_fields_chunk(
328 chunk,
329 delim,
330 ranges,
331 output_delim,
332 suppress,
333 max_field,
334 field_mask,
335 line_delim,
336 complement,
337 &mut buf,
338 );
339 buf
340 })
341 .collect();
342 let slices: Vec<IoSlice> = results
344 .iter()
345 .filter(|r| !r.is_empty())
346 .map(|r| IoSlice::new(r))
347 .collect();
348 write_ioslices(out, &slices)?;
349 } else {
350 let mut buf = Vec::with_capacity(data.len());
351 process_fields_chunk(
352 data,
353 delim,
354 ranges,
355 output_delim,
356 suppress,
357 max_field,
358 field_mask,
359 line_delim,
360 complement,
361 &mut buf,
362 );
363 if !buf.is_empty() {
364 out.write_all(&buf)?;
365 }
366 }
367 Ok(())
368}
369
370fn process_fields_chunk(
375 data: &[u8],
376 delim: u8,
377 ranges: &[Range],
378 output_delim: &[u8],
379 suppress: bool,
380 max_field: usize,
381 field_mask: u64,
382 line_delim: u8,
383 complement: bool,
384 buf: &mut Vec<u8>,
385) {
386 if delim != line_delim && max_field < usize::MAX && !complement {
393 buf.reserve(data.len());
394 let mut start = 0;
395 for end_pos in memchr_iter(line_delim, data) {
396 let line = &data[start..end_pos];
397 extract_fields_to_buf(
398 line,
399 delim,
400 ranges,
401 output_delim,
402 suppress,
403 max_field,
404 field_mask,
405 line_delim,
406 buf,
407 complement,
408 );
409 start = end_pos + 1;
410 }
411 if start < data.len() {
412 extract_fields_to_buf(
413 &data[start..],
414 delim,
415 ranges,
416 output_delim,
417 suppress,
418 max_field,
419 field_mask,
420 line_delim,
421 buf,
422 complement,
423 );
424 }
425 return;
426 }
427
428 if delim != line_delim {
431 buf.reserve(data.len());
432
433 let mut line_start: usize = 0;
434 let mut field_start: usize = 0;
435 let mut field_num: usize = 1;
436 let mut first_output = true;
437 let mut has_delim = false;
438
439 for pos in memchr::memchr2_iter(delim, line_delim, data) {
440 let byte = unsafe { *data.get_unchecked(pos) };
441
442 if byte == line_delim {
443 if (field_num <= max_field || complement)
445 && has_delim
446 && is_selected(field_num, field_mask, ranges, complement)
447 {
448 if !first_output {
449 unsafe { buf_extend(buf, output_delim) };
450 }
451 unsafe { buf_extend(buf, &data[field_start..pos]) };
452 first_output = false;
453 }
454
455 if !first_output {
456 unsafe { buf_push(buf, line_delim) };
457 } else if !has_delim {
458 if !suppress {
459 unsafe {
460 buf_extend(buf, &data[line_start..pos]);
461 buf_push(buf, line_delim);
462 }
463 }
464 } else {
465 unsafe { buf_push(buf, line_delim) };
466 }
467
468 line_start = pos + 1;
470 field_start = pos + 1;
471 field_num = 1;
472 first_output = true;
473 has_delim = false;
474 } else {
475 has_delim = true;
477
478 if is_selected(field_num, field_mask, ranges, complement) {
479 if !first_output {
480 unsafe { buf_extend(buf, output_delim) };
481 }
482 unsafe { buf_extend(buf, &data[field_start..pos]) };
483 first_output = false;
484 }
485
486 field_num += 1;
487 field_start = pos + 1;
488 }
489 }
490
491 if line_start < data.len() {
493 let line = &data[line_start..];
494 if !line.is_empty() {
495 if (field_num <= max_field || complement)
496 && has_delim
497 && is_selected(field_num, field_mask, ranges, complement)
498 {
499 if !first_output {
500 unsafe { buf_extend(buf, output_delim) };
501 }
502 unsafe { buf_extend(buf, &data[field_start..data.len()]) };
503 first_output = false;
504 }
505
506 if !first_output {
507 unsafe { buf_push(buf, line_delim) };
508 } else if !has_delim {
509 if !suppress {
510 unsafe {
511 buf_extend(buf, &data[line_start..data.len()]);
512 buf_push(buf, line_delim);
513 }
514 }
515 } else {
516 unsafe { buf_push(buf, line_delim) };
517 }
518 }
519 }
520
521 return;
522 }
523
524 let mut start = 0;
526 for end_pos in memchr_iter(line_delim, data) {
527 let line = &data[start..end_pos];
528 extract_fields_to_buf(
529 line,
530 delim,
531 ranges,
532 output_delim,
533 suppress,
534 max_field,
535 field_mask,
536 line_delim,
537 buf,
538 complement,
539 );
540 start = end_pos + 1;
541 }
542 if start < data.len() {
543 extract_fields_to_buf(
544 &data[start..],
545 delim,
546 ranges,
547 output_delim,
548 suppress,
549 max_field,
550 field_mask,
551 line_delim,
552 buf,
553 complement,
554 );
555 }
556}
557
558fn process_single_field(
564 data: &[u8],
565 delim: u8,
566 line_delim: u8,
567 target: usize,
568 suppress: bool,
569 out: &mut impl Write,
570) -> io::Result<()> {
571 let target_idx = target - 1;
572
573 if delim != line_delim {
575 if data.len() >= PARALLEL_THRESHOLD {
576 let chunks = split_into_chunks(data, line_delim);
577 let results: Vec<Vec<u8>> = chunks
578 .par_iter()
579 .map(|chunk| {
580 let mut buf = Vec::with_capacity(chunk.len());
581 process_nth_field_combined(
582 chunk, delim, line_delim, target_idx, suppress, &mut buf,
583 );
584 buf
585 })
586 .collect();
587 let slices: Vec<IoSlice> = results
589 .iter()
590 .filter(|r| !r.is_empty())
591 .map(|r| IoSlice::new(r))
592 .collect();
593 write_ioslices(out, &slices)?;
594 } else if target_idx == 0 && !suppress {
595 single_field1_zerocopy(data, delim, line_delim, out)?;
600 } else if target_idx <= 3 && !suppress {
601 let mut buf = Vec::with_capacity(data.len());
606 process_small_field_combined(data, delim, line_delim, target_idx, &mut buf);
607 if !buf.is_empty() {
608 out.write_all(&buf)?;
609 }
610 } else {
611 let mut buf = Vec::with_capacity(data.len());
612 process_nth_field_combined(data, delim, line_delim, target_idx, suppress, &mut buf);
613 if !buf.is_empty() {
614 out.write_all(&buf)?;
615 }
616 }
617 return Ok(());
618 }
619
620 if data.len() >= PARALLEL_THRESHOLD {
622 let chunks = split_into_chunks(data, line_delim);
623 let results: Vec<Vec<u8>> = chunks
624 .par_iter()
625 .map(|chunk| {
626 let mut buf = Vec::with_capacity(chunk.len() / 4);
627 process_single_field_chunk(
628 chunk, delim, target_idx, line_delim, suppress, &mut buf,
629 );
630 buf
631 })
632 .collect();
633 let slices: Vec<IoSlice> = results
635 .iter()
636 .filter(|r| !r.is_empty())
637 .map(|r| IoSlice::new(r))
638 .collect();
639 write_ioslices(out, &slices)?;
640 } else {
641 let mut buf = Vec::with_capacity(data.len() / 4);
642 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
643 if !buf.is_empty() {
644 out.write_all(&buf)?;
645 }
646 }
647 Ok(())
648}
649
650fn process_complement_single_field(
652 data: &[u8],
653 delim: u8,
654 line_delim: u8,
655 skip_field: usize,
656 suppress: bool,
657 out: &mut impl Write,
658) -> io::Result<()> {
659 let skip_idx = skip_field - 1;
660
661 if data.len() >= PARALLEL_THRESHOLD {
662 let chunks = split_into_chunks(data, line_delim);
663 let results: Vec<Vec<u8>> = chunks
664 .par_iter()
665 .map(|chunk| {
666 let mut buf = Vec::with_capacity(chunk.len());
667 complement_single_field_chunk(
668 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
669 );
670 buf
671 })
672 .collect();
673 let slices: Vec<IoSlice> = results
675 .iter()
676 .filter(|r| !r.is_empty())
677 .map(|r| IoSlice::new(r))
678 .collect();
679 write_ioslices(out, &slices)?;
680 } else {
681 let mut buf = Vec::with_capacity(data.len());
682 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
683 if !buf.is_empty() {
684 out.write_all(&buf)?;
685 }
686 }
687 Ok(())
688}
689
690fn complement_single_field_chunk(
692 data: &[u8],
693 delim: u8,
694 skip_idx: usize,
695 line_delim: u8,
696 suppress: bool,
697 buf: &mut Vec<u8>,
698) {
699 let mut start = 0;
700 for end_pos in memchr_iter(line_delim, data) {
701 let line = &data[start..end_pos];
702 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
703 start = end_pos + 1;
704 }
705 if start < data.len() {
706 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
707 }
708}
709
710#[inline(always)]
712fn complement_single_field_line(
713 line: &[u8],
714 delim: u8,
715 skip_idx: usize,
716 line_delim: u8,
717 suppress: bool,
718 buf: &mut Vec<u8>,
719) {
720 if line.is_empty() {
721 if !suppress {
722 buf.push(line_delim);
723 }
724 return;
725 }
726
727 buf.reserve(line.len() + 1);
728
729 let mut field_idx = 0;
730 let mut field_start = 0;
731 let mut first_output = true;
732 let mut has_delim = false;
733
734 for pos in memchr_iter(delim, line) {
735 has_delim = true;
736 if field_idx != skip_idx {
737 if !first_output {
738 unsafe { buf_push(buf, delim) };
739 }
740 unsafe { buf_extend(buf, &line[field_start..pos]) };
741 first_output = false;
742 }
743 field_idx += 1;
744 field_start = pos + 1;
745 }
746
747 if !has_delim {
748 if !suppress {
749 unsafe {
750 buf_extend(buf, line);
751 buf_push(buf, line_delim);
752 }
753 }
754 return;
755 }
756
757 if field_idx != skip_idx {
759 if !first_output {
760 unsafe { buf_push(buf, delim) };
761 }
762 unsafe { buf_extend(buf, &line[field_start..]) };
763 }
764
765 unsafe { buf_push(buf, line_delim) };
766}
767
768fn process_fields_prefix(
772 data: &[u8],
773 delim: u8,
774 line_delim: u8,
775 last_field: usize,
776 suppress: bool,
777 out: &mut impl Write,
778) -> io::Result<()> {
779 if data.len() >= PARALLEL_THRESHOLD {
780 let chunks = split_into_chunks(data, line_delim);
781 let results: Vec<Vec<u8>> = chunks
782 .par_iter()
783 .map(|chunk| {
784 let mut buf = Vec::with_capacity(chunk.len());
785 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
786 buf
787 })
788 .collect();
789 let slices: Vec<IoSlice> = results
791 .iter()
792 .filter(|r| !r.is_empty())
793 .map(|r| IoSlice::new(r))
794 .collect();
795 write_ioslices(out, &slices)?;
796 } else if !suppress {
797 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
801 } else {
802 let mut buf = Vec::with_capacity(data.len());
803 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
804 if !buf.is_empty() {
805 out.write_all(&buf)?;
806 }
807 }
808 Ok(())
809}
810
811#[inline]
816fn fields_prefix_zerocopy(
817 data: &[u8],
818 delim: u8,
819 line_delim: u8,
820 last_field: usize,
821 out: &mut impl Write,
822) -> io::Result<()> {
823 let mut start = 0;
824 let mut run_start: usize = 0;
825
826 for end_pos in memchr_iter(line_delim, data) {
827 let line = &data[start..end_pos];
828 let mut field_count = 1;
830 let mut truncate_at: Option<usize> = None;
831 for dpos in memchr_iter(delim, line) {
832 if field_count >= last_field {
833 truncate_at = Some(start + dpos);
834 break;
835 }
836 field_count += 1;
837 }
838
839 if let Some(trunc_pos) = truncate_at {
840 if run_start < start {
842 out.write_all(&data[run_start..start])?;
843 }
844 out.write_all(&data[start..trunc_pos])?;
845 out.write_all(&[line_delim])?;
846 run_start = end_pos + 1;
847 }
848 start = end_pos + 1;
850 }
851 if start < data.len() {
853 let line = &data[start..];
854 let mut field_count = 1;
855 let mut truncate_at: Option<usize> = None;
856 for dpos in memchr_iter(delim, line) {
857 if field_count >= last_field {
858 truncate_at = Some(start + dpos);
859 break;
860 }
861 field_count += 1;
862 }
863 if let Some(trunc_pos) = truncate_at {
864 if run_start < start {
865 out.write_all(&data[run_start..start])?;
866 }
867 out.write_all(&data[start..trunc_pos])?;
868 out.write_all(&[line_delim])?;
869 return Ok(());
870 }
871 }
872 if run_start < data.len() {
874 out.write_all(&data[run_start..])?;
875 if !data.is_empty() && *data.last().unwrap() != line_delim {
876 out.write_all(&[line_delim])?;
877 }
878 }
879 Ok(())
880}
881
882fn fields_prefix_chunk(
884 data: &[u8],
885 delim: u8,
886 line_delim: u8,
887 last_field: usize,
888 suppress: bool,
889 buf: &mut Vec<u8>,
890) {
891 let mut start = 0;
892 for end_pos in memchr_iter(line_delim, data) {
893 let line = &data[start..end_pos];
894 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
895 start = end_pos + 1;
896 }
897 if start < data.len() {
898 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
899 }
900}
901
902#[inline(always)]
904fn fields_prefix_line(
905 line: &[u8],
906 delim: u8,
907 line_delim: u8,
908 last_field: usize,
909 suppress: bool,
910 buf: &mut Vec<u8>,
911) {
912 if line.is_empty() {
913 if !suppress {
914 buf.push(line_delim);
915 }
916 return;
917 }
918
919 buf.reserve(line.len() + 1);
920
921 let mut field_count = 1;
922 let mut has_delim = false;
923
924 for pos in memchr_iter(delim, line) {
925 has_delim = true;
926 if field_count >= last_field {
927 unsafe {
928 buf_extend(buf, &line[..pos]);
929 buf_push(buf, line_delim);
930 }
931 return;
932 }
933 field_count += 1;
934 }
935
936 if !has_delim {
937 if !suppress {
938 unsafe {
939 buf_extend(buf, line);
940 buf_push(buf, line_delim);
941 }
942 }
943 return;
944 }
945
946 unsafe {
947 buf_extend(buf, line);
948 buf_push(buf, line_delim);
949 }
950}
951
952fn process_fields_suffix(
954 data: &[u8],
955 delim: u8,
956 line_delim: u8,
957 start_field: usize,
958 suppress: bool,
959 out: &mut impl Write,
960) -> io::Result<()> {
961 if data.len() >= PARALLEL_THRESHOLD {
962 let chunks = split_into_chunks(data, line_delim);
963 let results: Vec<Vec<u8>> = chunks
964 .par_iter()
965 .map(|chunk| {
966 let mut buf = Vec::with_capacity(chunk.len());
967 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
968 buf
969 })
970 .collect();
971 let slices: Vec<IoSlice> = results
973 .iter()
974 .filter(|r| !r.is_empty())
975 .map(|r| IoSlice::new(r))
976 .collect();
977 write_ioslices(out, &slices)?;
978 } else {
979 let mut buf = Vec::with_capacity(data.len());
980 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
981 if !buf.is_empty() {
982 out.write_all(&buf)?;
983 }
984 }
985 Ok(())
986}
987
988fn fields_suffix_chunk(
990 data: &[u8],
991 delim: u8,
992 line_delim: u8,
993 start_field: usize,
994 suppress: bool,
995 buf: &mut Vec<u8>,
996) {
997 let mut start = 0;
998 for end_pos in memchr_iter(line_delim, data) {
999 let line = &data[start..end_pos];
1000 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1001 start = end_pos + 1;
1002 }
1003 if start < data.len() {
1004 fields_suffix_line(
1005 &data[start..],
1006 delim,
1007 line_delim,
1008 start_field,
1009 suppress,
1010 buf,
1011 );
1012 }
1013}
1014
1015#[inline(always)]
1017fn fields_suffix_line(
1018 line: &[u8],
1019 delim: u8,
1020 line_delim: u8,
1021 start_field: usize,
1022 suppress: bool,
1023 buf: &mut Vec<u8>,
1024) {
1025 if line.is_empty() {
1026 if !suppress {
1027 buf.push(line_delim);
1028 }
1029 return;
1030 }
1031
1032 buf.reserve(line.len() + 1);
1033
1034 let skip_delims = start_field - 1;
1035 let mut delim_count = 0;
1036 let mut has_delim = false;
1037
1038 for pos in memchr_iter(delim, line) {
1039 has_delim = true;
1040 delim_count += 1;
1041 if delim_count >= skip_delims {
1042 unsafe {
1043 buf_extend(buf, &line[pos + 1..]);
1044 buf_push(buf, line_delim);
1045 }
1046 return;
1047 }
1048 }
1049
1050 if !has_delim {
1051 if !suppress {
1052 unsafe {
1053 buf_extend(buf, line);
1054 buf_push(buf, line_delim);
1055 }
1056 }
1057 return;
1058 }
1059
1060 unsafe { buf_push(buf, line_delim) };
1062}
1063
1064fn process_fields_mid_range(
1067 data: &[u8],
1068 delim: u8,
1069 line_delim: u8,
1070 start_field: usize,
1071 end_field: usize,
1072 suppress: bool,
1073 out: &mut impl Write,
1074) -> io::Result<()> {
1075 if data.len() >= PARALLEL_THRESHOLD {
1076 let chunks = split_into_chunks(data, line_delim);
1077 let results: Vec<Vec<u8>> = chunks
1078 .par_iter()
1079 .map(|chunk| {
1080 let mut buf = Vec::with_capacity(chunk.len());
1081 fields_mid_range_chunk(
1082 chunk,
1083 delim,
1084 line_delim,
1085 start_field,
1086 end_field,
1087 suppress,
1088 &mut buf,
1089 );
1090 buf
1091 })
1092 .collect();
1093 let slices: Vec<IoSlice> = results
1094 .iter()
1095 .filter(|r| !r.is_empty())
1096 .map(|r| IoSlice::new(r))
1097 .collect();
1098 write_ioslices(out, &slices)?;
1099 } else {
1100 let mut buf = Vec::with_capacity(data.len());
1101 fields_mid_range_chunk(
1102 data,
1103 delim,
1104 line_delim,
1105 start_field,
1106 end_field,
1107 suppress,
1108 &mut buf,
1109 );
1110 if !buf.is_empty() {
1111 out.write_all(&buf)?;
1112 }
1113 }
1114 Ok(())
1115}
1116
1117fn fields_mid_range_chunk(
1119 data: &[u8],
1120 delim: u8,
1121 line_delim: u8,
1122 start_field: usize,
1123 end_field: usize,
1124 suppress: bool,
1125 buf: &mut Vec<u8>,
1126) {
1127 let mut start = 0;
1128 for end_pos in memchr_iter(line_delim, data) {
1129 let line = &data[start..end_pos];
1130 fields_mid_range_line(
1131 line,
1132 delim,
1133 line_delim,
1134 start_field,
1135 end_field,
1136 suppress,
1137 buf,
1138 );
1139 start = end_pos + 1;
1140 }
1141 if start < data.len() {
1142 fields_mid_range_line(
1143 &data[start..],
1144 delim,
1145 line_delim,
1146 start_field,
1147 end_field,
1148 suppress,
1149 buf,
1150 );
1151 }
1152}
1153
1154#[inline(always)]
1157fn fields_mid_range_line(
1158 line: &[u8],
1159 delim: u8,
1160 line_delim: u8,
1161 start_field: usize,
1162 end_field: usize,
1163 suppress: bool,
1164 buf: &mut Vec<u8>,
1165) {
1166 if line.is_empty() {
1167 if !suppress {
1168 buf.push(line_delim);
1169 }
1170 return;
1171 }
1172
1173 buf.reserve(line.len() + 1);
1174
1175 let skip_before = start_field - 1; let field_span = end_field - start_field; let mut delim_count = 0;
1179 let mut range_start = 0;
1180 let mut has_delim = false;
1181
1182 for pos in memchr_iter(delim, line) {
1183 has_delim = true;
1184 delim_count += 1;
1185 if delim_count == skip_before {
1186 range_start = pos + 1;
1187 }
1188 if delim_count == skip_before + field_span + 1 {
1189 if skip_before == 0 {
1191 range_start = 0;
1192 }
1193 unsafe {
1194 buf_extend(buf, &line[range_start..pos]);
1195 buf_push(buf, line_delim);
1196 }
1197 return;
1198 }
1199 }
1200
1201 if !has_delim {
1202 if !suppress {
1203 unsafe {
1204 buf_extend(buf, line);
1205 buf_push(buf, line_delim);
1206 }
1207 }
1208 return;
1209 }
1210
1211 if delim_count >= skip_before {
1213 if skip_before == 0 {
1215 range_start = 0;
1216 }
1217 unsafe {
1218 buf_extend(buf, &line[range_start..]);
1219 buf_push(buf, line_delim);
1220 }
1221 } else {
1222 unsafe { buf_push(buf, line_delim) };
1224 }
1225}
1226
1227fn process_nth_field_combined(
1232 data: &[u8],
1233 delim: u8,
1234 line_delim: u8,
1235 target_idx: usize,
1236 suppress: bool,
1237 buf: &mut Vec<u8>,
1238) {
1239 buf.reserve(data.len());
1240
1241 let mut line_start: usize = 0;
1242 let mut field_start: usize = 0;
1243 let mut field_idx: usize = 0;
1244 let mut has_delim = false;
1245 let mut emitted = false;
1246
1247 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1248 let byte = unsafe { *data.get_unchecked(pos) };
1249
1250 if byte == line_delim {
1251 if !emitted {
1253 if has_delim && field_idx == target_idx {
1254 unsafe {
1256 buf_extend(buf, &data[field_start..pos]);
1257 buf_push(buf, line_delim);
1258 }
1259 } else if has_delim {
1260 unsafe {
1262 buf_push(buf, line_delim);
1263 }
1264 } else if !suppress {
1265 unsafe {
1267 buf_extend(buf, &data[line_start..pos]);
1268 buf_push(buf, line_delim);
1269 }
1270 }
1271 }
1272 line_start = pos + 1;
1274 field_start = pos + 1;
1275 field_idx = 0;
1276 has_delim = false;
1277 emitted = false;
1278 } else {
1279 has_delim = true;
1281 if field_idx == target_idx {
1282 unsafe {
1283 buf_extend(buf, &data[field_start..pos]);
1284 buf_push(buf, line_delim);
1285 }
1286 emitted = true;
1287 }
1288 field_idx += 1;
1289 field_start = pos + 1;
1290 }
1291 }
1292
1293 if line_start < data.len() && !emitted {
1295 if has_delim && field_idx == target_idx {
1296 unsafe {
1297 buf_extend(buf, &data[field_start..data.len()]);
1298 buf_push(buf, line_delim);
1299 }
1300 } else if has_delim {
1301 unsafe {
1302 buf_push(buf, line_delim);
1303 }
1304 } else if !suppress {
1305 unsafe {
1306 buf_extend(buf, &data[line_start..data.len()]);
1307 buf_push(buf, line_delim);
1308 }
1309 }
1310 }
1311}
1312
1313#[inline]
1317fn single_field1_zerocopy(
1318 data: &[u8],
1319 delim: u8,
1320 line_delim: u8,
1321 out: &mut impl Write,
1322) -> io::Result<()> {
1323 let mut line_start: usize = 0;
1324 let mut run_start: usize = 0;
1325 let mut first_delim: Option<usize> = None;
1326
1327 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1328 let byte = unsafe { *data.get_unchecked(pos) };
1329
1330 if byte == line_delim {
1331 if let Some(dp) = first_delim {
1333 if run_start < line_start {
1336 out.write_all(&data[run_start..line_start])?;
1337 }
1338 out.write_all(&data[line_start..dp])?;
1339 out.write_all(&[line_delim])?;
1340 run_start = pos + 1;
1341 }
1342 line_start = pos + 1;
1344 first_delim = None;
1345 } else {
1346 if first_delim.is_none() {
1348 first_delim = Some(pos);
1349 }
1350 }
1351 }
1352
1353 if line_start < data.len() {
1355 if let Some(dp) = first_delim {
1356 if run_start < line_start {
1357 out.write_all(&data[run_start..line_start])?;
1358 }
1359 out.write_all(&data[line_start..dp])?;
1360 out.write_all(&[line_delim])?;
1361 return Ok(());
1362 }
1363 }
1364
1365 if run_start < data.len() {
1367 out.write_all(&data[run_start..])?;
1368 if !data.is_empty() && *data.last().unwrap() != line_delim {
1369 out.write_all(&[line_delim])?;
1370 }
1371 }
1372 Ok(())
1373}
1374
1375fn process_small_field_combined(
1380 data: &[u8],
1381 delim: u8,
1382 line_delim: u8,
1383 target_idx: usize,
1384 buf: &mut Vec<u8>,
1385) {
1386 buf.reserve(data.len());
1387 let mut start = 0;
1388 for end_pos in memchr_iter(line_delim, data) {
1389 let line = &data[start..end_pos];
1390 let mut field_start = 0;
1392 let mut found_start = target_idx == 0;
1393 let mut delim_count = 0;
1394 if !found_start {
1395 let mut search_start = 0;
1396 while let Some(pos) = memchr::memchr(delim, &line[search_start..]) {
1397 delim_count += 1;
1398 if delim_count == target_idx {
1399 field_start = search_start + pos + 1;
1400 found_start = true;
1401 break;
1402 }
1403 search_start = search_start + pos + 1;
1404 }
1405 }
1406 if !found_start {
1407 unsafe {
1409 buf_extend(buf, line);
1410 buf_push(buf, line_delim);
1411 }
1412 } else if field_start >= line.len() {
1413 unsafe { buf_push(buf, line_delim) };
1415 } else {
1416 match memchr::memchr(delim, &line[field_start..]) {
1418 Some(pos) => unsafe {
1419 buf_extend(buf, &line[field_start..field_start + pos]);
1420 buf_push(buf, line_delim);
1421 },
1422 None => unsafe {
1423 buf_extend(buf, &line[field_start..]);
1424 buf_push(buf, line_delim);
1425 },
1426 }
1427 }
1428 start = end_pos + 1;
1429 }
1430 if start < data.len() {
1432 let line = &data[start..];
1433 let mut field_start = 0;
1434 let mut found_start = target_idx == 0;
1435 let mut delim_count = 0;
1436 if !found_start {
1437 let mut search_start = 0;
1438 while let Some(pos) = memchr::memchr(delim, &line[search_start..]) {
1439 delim_count += 1;
1440 if delim_count == target_idx {
1441 field_start = search_start + pos + 1;
1442 found_start = true;
1443 break;
1444 }
1445 search_start = search_start + pos + 1;
1446 }
1447 }
1448 if !found_start {
1449 unsafe {
1450 buf_extend(buf, line);
1451 buf_push(buf, line_delim);
1452 }
1453 } else if field_start >= line.len() {
1454 unsafe { buf_push(buf, line_delim) };
1455 } else {
1456 match memchr::memchr(delim, &line[field_start..]) {
1457 Some(pos) => unsafe {
1458 buf_extend(buf, &line[field_start..field_start + pos]);
1459 buf_push(buf, line_delim);
1460 },
1461 None => unsafe {
1462 buf_extend(buf, &line[field_start..]);
1463 buf_push(buf, line_delim);
1464 },
1465 }
1466 }
1467 }
1468}
1469
1470fn process_single_field_chunk(
1472 data: &[u8],
1473 delim: u8,
1474 target_idx: usize,
1475 line_delim: u8,
1476 suppress: bool,
1477 buf: &mut Vec<u8>,
1478) {
1479 let mut start = 0;
1480 for end_pos in memchr_iter(line_delim, data) {
1481 let line = &data[start..end_pos];
1482 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
1483 start = end_pos + 1;
1484 }
1485 if start < data.len() {
1486 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
1487 }
1488}
1489
1490#[inline(always)]
1493fn extract_single_field_line(
1494 line: &[u8],
1495 delim: u8,
1496 target_idx: usize,
1497 line_delim: u8,
1498 suppress: bool,
1499 buf: &mut Vec<u8>,
1500) {
1501 if line.is_empty() {
1502 if !suppress {
1503 buf.push(line_delim);
1504 }
1505 return;
1506 }
1507
1508 buf.reserve(line.len() + 1);
1510
1511 if target_idx == 0 {
1513 match memchr::memchr(delim, line) {
1514 Some(pos) => unsafe {
1515 buf_extend(buf, &line[..pos]);
1516 buf_push(buf, line_delim);
1517 },
1518 None => {
1519 if !suppress {
1520 unsafe {
1521 buf_extend(buf, line);
1522 buf_push(buf, line_delim);
1523 }
1524 }
1525 }
1526 }
1527 return;
1528 }
1529
1530 let mut field_start = 0;
1531 let mut field_idx = 0;
1532 let mut has_delim = false;
1533
1534 for pos in memchr_iter(delim, line) {
1535 has_delim = true;
1536 if field_idx == target_idx {
1537 unsafe {
1538 buf_extend(buf, &line[field_start..pos]);
1539 buf_push(buf, line_delim);
1540 }
1541 return;
1542 }
1543 field_idx += 1;
1544 field_start = pos + 1;
1545 }
1546
1547 if !has_delim {
1548 if !suppress {
1549 unsafe {
1550 buf_extend(buf, line);
1551 buf_push(buf, line_delim);
1552 }
1553 }
1554 return;
1555 }
1556
1557 if field_idx == target_idx {
1558 unsafe {
1559 buf_extend(buf, &line[field_start..]);
1560 buf_push(buf, line_delim);
1561 }
1562 } else {
1563 unsafe { buf_push(buf, line_delim) };
1564 }
1565}
1566
1567#[inline(always)]
1570fn extract_fields_to_buf(
1571 line: &[u8],
1572 delim: u8,
1573 ranges: &[Range],
1574 output_delim: &[u8],
1575 suppress: bool,
1576 max_field: usize,
1577 field_mask: u64,
1578 line_delim: u8,
1579 buf: &mut Vec<u8>,
1580 complement: bool,
1581) {
1582 let len = line.len();
1583
1584 if len == 0 {
1585 if !suppress {
1586 buf.push(line_delim);
1587 }
1588 return;
1589 }
1590
1591 let needed = len + output_delim.len() * 16 + 1;
1594 if buf.capacity() - buf.len() < needed {
1595 buf.reserve(needed);
1596 }
1597
1598 let mut field_num: usize = 1;
1599 let mut field_start: usize = 0;
1600 let mut first_output = true;
1601 let mut has_delim = false;
1602
1603 for delim_pos in memchr_iter(delim, line) {
1604 has_delim = true;
1605
1606 if is_selected(field_num, field_mask, ranges, complement) {
1607 if !first_output {
1608 unsafe { buf_extend(buf, output_delim) };
1609 }
1610 unsafe { buf_extend(buf, &line[field_start..delim_pos]) };
1611 first_output = false;
1612 }
1613
1614 field_num += 1;
1615 field_start = delim_pos + 1;
1616
1617 if field_num > max_field {
1618 break;
1619 }
1620 }
1621
1622 if (field_num <= max_field || complement)
1624 && has_delim
1625 && is_selected(field_num, field_mask, ranges, complement)
1626 {
1627 if !first_output {
1628 unsafe { buf_extend(buf, output_delim) };
1629 }
1630 unsafe { buf_extend(buf, &line[field_start..len]) };
1631 first_output = false;
1632 }
1633
1634 if !first_output {
1635 unsafe { buf_push(buf, line_delim) };
1636 } else if !has_delim {
1637 if !suppress {
1638 unsafe {
1639 buf_extend(buf, line);
1640 buf_push(buf, line_delim);
1641 }
1642 }
1643 } else {
1644 unsafe { buf_push(buf, line_delim) };
1645 }
1646}
1647
1648fn process_bytes_from_start(
1655 data: &[u8],
1656 max_bytes: usize,
1657 line_delim: u8,
1658 out: &mut impl Write,
1659) -> io::Result<()> {
1660 if data.len() >= PARALLEL_THRESHOLD {
1661 let chunks = split_into_chunks(data, line_delim);
1662 let results: Vec<Vec<u8>> = chunks
1663 .par_iter()
1664 .map(|chunk| {
1665 let mut buf = Vec::with_capacity(chunk.len());
1666 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
1667 buf
1668 })
1669 .collect();
1670 let slices: Vec<IoSlice> = results
1672 .iter()
1673 .filter(|r| !r.is_empty())
1674 .map(|r| IoSlice::new(r))
1675 .collect();
1676 write_ioslices(out, &slices)?;
1677 } else {
1678 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
1682 }
1683 Ok(())
1684}
1685
1686#[inline]
1689fn bytes_from_start_zerocopy(
1690 data: &[u8],
1691 max_bytes: usize,
1692 line_delim: u8,
1693 out: &mut impl Write,
1694) -> io::Result<()> {
1695 let mut start = 0;
1696 let mut run_start: usize = 0;
1697
1698 for pos in memchr_iter(line_delim, data) {
1699 let line_len = pos - start;
1700 if line_len > max_bytes {
1701 if run_start < start {
1703 out.write_all(&data[run_start..start])?;
1704 }
1705 out.write_all(&data[start..start + max_bytes])?;
1706 out.write_all(&[line_delim])?;
1707 run_start = pos + 1;
1708 }
1709 start = pos + 1;
1711 }
1712 if start < data.len() {
1714 let line_len = data.len() - start;
1715 if line_len > max_bytes {
1716 if run_start < start {
1717 out.write_all(&data[run_start..start])?;
1718 }
1719 out.write_all(&data[start..start + max_bytes])?;
1720 out.write_all(&[line_delim])?;
1721 return Ok(());
1722 }
1723 }
1724 if run_start < data.len() {
1726 out.write_all(&data[run_start..])?;
1727 if !data.is_empty() && *data.last().unwrap() != line_delim {
1729 out.write_all(&[line_delim])?;
1730 }
1731 }
1732 Ok(())
1733}
1734
1735#[inline]
1738fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1739 buf.reserve(data.len());
1741
1742 let mut start = 0;
1743 for pos in memchr_iter(line_delim, data) {
1744 let line_len = pos - start;
1745 let take = line_len.min(max_bytes);
1746 unsafe {
1747 buf_extend(buf, &data[start..start + take]);
1748 buf_push(buf, line_delim);
1749 }
1750 start = pos + 1;
1751 }
1752 if start < data.len() {
1754 let line_len = data.len() - start;
1755 let take = line_len.min(max_bytes);
1756 unsafe {
1757 buf_extend(buf, &data[start..start + take]);
1758 buf_push(buf, line_delim);
1759 }
1760 }
1761}
1762
1763fn process_bytes_from_offset(
1765 data: &[u8],
1766 skip_bytes: usize,
1767 line_delim: u8,
1768 out: &mut impl Write,
1769) -> io::Result<()> {
1770 if data.len() >= PARALLEL_THRESHOLD {
1771 let chunks = split_into_chunks(data, line_delim);
1772 let results: Vec<Vec<u8>> = chunks
1773 .par_iter()
1774 .map(|chunk| {
1775 let mut buf = Vec::with_capacity(chunk.len());
1776 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1777 buf
1778 })
1779 .collect();
1780 let slices: Vec<IoSlice> = results
1782 .iter()
1783 .filter(|r| !r.is_empty())
1784 .map(|r| IoSlice::new(r))
1785 .collect();
1786 write_ioslices(out, &slices)?;
1787 } else {
1788 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
1790 }
1791 Ok(())
1792}
1793
1794#[inline]
1798fn bytes_from_offset_zerocopy(
1799 data: &[u8],
1800 skip_bytes: usize,
1801 line_delim: u8,
1802 out: &mut impl Write,
1803) -> io::Result<()> {
1804 let delim_buf = [line_delim];
1805 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
1806
1807 let mut start = 0;
1808 for pos in memchr_iter(line_delim, data) {
1809 let line_len = pos - start;
1810 if line_len > skip_bytes {
1811 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
1812 }
1813 iov.push(IoSlice::new(&delim_buf));
1814 if iov.len() >= MAX_IOV - 1 {
1816 write_ioslices(out, &iov)?;
1817 iov.clear();
1818 }
1819 start = pos + 1;
1820 }
1821 if start < data.len() {
1822 let line_len = data.len() - start;
1823 if line_len > skip_bytes {
1824 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
1825 }
1826 iov.push(IoSlice::new(&delim_buf));
1827 }
1828 if !iov.is_empty() {
1829 write_ioslices(out, &iov)?;
1830 }
1831 Ok(())
1832}
1833
1834#[inline]
1837fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1838 buf.reserve(data.len());
1839
1840 let mut start = 0;
1841 for pos in memchr_iter(line_delim, data) {
1842 let line_len = pos - start;
1843 if line_len > skip_bytes {
1844 unsafe {
1845 buf_extend(buf, &data[start + skip_bytes..pos]);
1846 }
1847 }
1848 unsafe {
1849 buf_push(buf, line_delim);
1850 }
1851 start = pos + 1;
1852 }
1853 if start < data.len() {
1854 let line_len = data.len() - start;
1855 if line_len > skip_bytes {
1856 unsafe {
1857 buf_extend(buf, &data[start + skip_bytes..data.len()]);
1858 }
1859 }
1860 unsafe {
1861 buf_push(buf, line_delim);
1862 }
1863 }
1864}
1865
1866fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1868 let line_delim = cfg.line_delim;
1869 let ranges = cfg.ranges;
1870 let complement = cfg.complement;
1871 let output_delim = cfg.output_delim;
1872
1873 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1875 let max_bytes = ranges[0].end;
1876 if max_bytes < usize::MAX {
1877 return process_bytes_from_start(data, max_bytes, line_delim, out);
1878 }
1879 }
1880
1881 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1883 let skip_bytes = ranges[0].start.saturating_sub(1);
1884 if skip_bytes > 0 {
1885 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1886 }
1887 }
1888
1889 if data.len() >= PARALLEL_THRESHOLD {
1890 let chunks = split_into_chunks(data, line_delim);
1891 let results: Vec<Vec<u8>> = chunks
1892 .par_iter()
1893 .map(|chunk| {
1894 let mut buf = Vec::with_capacity(chunk.len());
1895 process_bytes_chunk(
1896 chunk,
1897 ranges,
1898 complement,
1899 output_delim,
1900 line_delim,
1901 &mut buf,
1902 );
1903 buf
1904 })
1905 .collect();
1906 let slices: Vec<IoSlice> = results
1908 .iter()
1909 .filter(|r| !r.is_empty())
1910 .map(|r| IoSlice::new(r))
1911 .collect();
1912 write_ioslices(out, &slices)?;
1913 } else {
1914 let mut buf = Vec::with_capacity(data.len());
1915 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1916 if !buf.is_empty() {
1917 out.write_all(&buf)?;
1918 }
1919 }
1920 Ok(())
1921}
1922
1923fn process_bytes_chunk(
1925 data: &[u8],
1926 ranges: &[Range],
1927 complement: bool,
1928 output_delim: &[u8],
1929 line_delim: u8,
1930 buf: &mut Vec<u8>,
1931) {
1932 let mut start = 0;
1933 for end_pos in memchr_iter(line_delim, data) {
1934 let line = &data[start..end_pos];
1935 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1936 buf.push(line_delim);
1937 start = end_pos + 1;
1938 }
1939 if start < data.len() {
1940 cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1941 buf.push(line_delim);
1942 }
1943}
1944
1945#[inline(always)]
1948fn cut_bytes_to_buf(
1949 line: &[u8],
1950 ranges: &[Range],
1951 complement: bool,
1952 output_delim: &[u8],
1953 buf: &mut Vec<u8>,
1954) {
1955 let len = line.len();
1956 let mut first_range = true;
1957
1958 buf.reserve(len + output_delim.len() * ranges.len() + 1);
1960
1961 if complement {
1962 let mut pos: usize = 1;
1963 for r in ranges {
1964 let rs = r.start;
1965 let re = r.end.min(len);
1966 if pos < rs {
1967 if !first_range && !output_delim.is_empty() {
1968 unsafe { buf_extend(buf, output_delim) };
1969 }
1970 unsafe { buf_extend(buf, &line[pos - 1..rs - 1]) };
1971 first_range = false;
1972 }
1973 pos = re + 1;
1974 if pos > len {
1975 break;
1976 }
1977 }
1978 if pos <= len {
1979 if !first_range && !output_delim.is_empty() {
1980 unsafe { buf_extend(buf, output_delim) };
1981 }
1982 unsafe { buf_extend(buf, &line[pos - 1..len]) };
1983 }
1984 } else if output_delim.is_empty() && ranges.len() == 1 {
1985 let start = ranges[0].start.saturating_sub(1);
1987 let end = ranges[0].end.min(len);
1988 if start < len {
1989 unsafe { buf_extend(buf, &line[start..end]) };
1990 }
1991 } else {
1992 for r in ranges {
1993 let start = r.start.saturating_sub(1);
1994 let end = r.end.min(len);
1995 if start >= len {
1996 break;
1997 }
1998 if !first_range && !output_delim.is_empty() {
1999 unsafe { buf_extend(buf, output_delim) };
2000 }
2001 unsafe { buf_extend(buf, &line[start..end]) };
2002 first_range = false;
2003 }
2004 }
2005}
2006
2007#[inline]
2011pub fn cut_fields(
2012 line: &[u8],
2013 delim: u8,
2014 ranges: &[Range],
2015 complement: bool,
2016 output_delim: &[u8],
2017 suppress_no_delim: bool,
2018 out: &mut impl Write,
2019) -> io::Result<bool> {
2020 if memchr::memchr(delim, line).is_none() {
2021 if !suppress_no_delim {
2022 out.write_all(line)?;
2023 return Ok(true);
2024 }
2025 return Ok(false);
2026 }
2027
2028 let mut field_num: usize = 1;
2029 let mut field_start: usize = 0;
2030 let mut first_output = true;
2031
2032 for delim_pos in memchr_iter(delim, line) {
2033 let selected = in_ranges(ranges, field_num) != complement;
2034 if selected {
2035 if !first_output {
2036 out.write_all(output_delim)?;
2037 }
2038 out.write_all(&line[field_start..delim_pos])?;
2039 first_output = false;
2040 }
2041 field_start = delim_pos + 1;
2042 field_num += 1;
2043 }
2044
2045 let selected = in_ranges(ranges, field_num) != complement;
2046 if selected {
2047 if !first_output {
2048 out.write_all(output_delim)?;
2049 }
2050 out.write_all(&line[field_start..])?;
2051 }
2052
2053 Ok(true)
2054}
2055
2056#[inline]
2058pub fn cut_bytes(
2059 line: &[u8],
2060 ranges: &[Range],
2061 complement: bool,
2062 output_delim: &[u8],
2063 out: &mut impl Write,
2064) -> io::Result<bool> {
2065 let mut first_range = true;
2066
2067 if complement {
2068 let len = line.len();
2069 let mut comp_ranges = Vec::new();
2070 let mut pos: usize = 1;
2071 for r in ranges {
2072 let rs = r.start;
2073 let re = r.end.min(len);
2074 if pos < rs {
2075 comp_ranges.push((pos, rs - 1));
2076 }
2077 pos = re + 1;
2078 if pos > len {
2079 break;
2080 }
2081 }
2082 if pos <= len {
2083 comp_ranges.push((pos, len));
2084 }
2085 for &(s, e) in &comp_ranges {
2086 if !first_range && !output_delim.is_empty() {
2087 out.write_all(output_delim)?;
2088 }
2089 out.write_all(&line[s - 1..e])?;
2090 first_range = false;
2091 }
2092 } else {
2093 for r in ranges {
2094 let start = r.start.saturating_sub(1);
2095 let end = r.end.min(line.len());
2096 if start >= line.len() {
2097 break;
2098 }
2099 if !first_range && !output_delim.is_empty() {
2100 out.write_all(output_delim)?;
2101 }
2102 out.write_all(&line[start..end])?;
2103 first_range = false;
2104 }
2105 }
2106 Ok(true)
2107}
2108
2109pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2111 match cfg.mode {
2112 CutMode::Fields => process_fields_fast(data, cfg, out),
2113 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
2114 }
2115}
2116
2117pub fn process_cut_reader<R: BufRead>(
2122 mut reader: R,
2123 cfg: &CutConfig,
2124 out: &mut impl Write,
2125) -> io::Result<()> {
2126 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
2128
2129 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
2132
2133 loop {
2134 buf.reserve(CHUNK_SIZE);
2136 let read_start = buf.len();
2137 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
2138 let n = read_fully(&mut reader, &mut buf[read_start..])?;
2139 buf.truncate(read_start + n);
2140
2141 if buf.is_empty() {
2142 break;
2143 }
2144
2145 if n == 0 {
2146 process_cut_data(&buf, cfg, out)?;
2148 break;
2149 }
2150
2151 let process_end = match memchr::memrchr(line_delim, &buf) {
2153 Some(pos) => pos + 1,
2154 None => {
2155 continue;
2157 }
2158 };
2159
2160 process_cut_data(&buf[..process_end], cfg, out)?;
2162
2163 let leftover_len = buf.len() - process_end;
2165 if leftover_len > 0 {
2166 buf.copy_within(process_end.., 0);
2167 }
2168 buf.truncate(leftover_len);
2169 }
2170
2171 Ok(())
2172}
2173
2174#[inline]
2176fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
2177 let n = reader.read(buf)?;
2178 if n == buf.len() || n == 0 {
2179 return Ok(n);
2180 }
2181 let mut total = n;
2183 while total < buf.len() {
2184 match reader.read(&mut buf[total..]) {
2185 Ok(0) => break,
2186 Ok(n) => total += n,
2187 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2188 Err(e) => return Err(e),
2189 }
2190 }
2191 Ok(total)
2192}
2193
2194#[derive(Debug, Clone, Copy, PartialEq)]
2196pub enum CutMode {
2197 Bytes,
2198 Characters,
2199 Fields,
2200}