1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, IoSlice, Write};
4
5const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
7
8const MAX_IOV: usize = 1024;
10
11pub struct CutConfig<'a> {
13 pub mode: CutMode,
14 pub ranges: &'a [Range],
15 pub complement: bool,
16 pub delim: u8,
17 pub output_delim: &'a [u8],
18 pub suppress_no_delim: bool,
19 pub line_delim: u8,
20}
21
22#[derive(Debug, Clone)]
24pub struct Range {
25 pub start: usize, pub end: usize, }
28
29pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
32 let mut ranges = Vec::new();
33
34 for part in spec.split(',') {
35 let part = part.trim();
36 if part.is_empty() {
37 continue;
38 }
39
40 if let Some(idx) = part.find('-') {
41 let left = &part[..idx];
42 let right = &part[idx + 1..];
43
44 let start = if left.is_empty() {
45 1
46 } else {
47 left.parse::<usize>()
48 .map_err(|_| format!("invalid range: '{}'", part))?
49 };
50
51 let end = if right.is_empty() {
52 usize::MAX
53 } else {
54 right
55 .parse::<usize>()
56 .map_err(|_| format!("invalid range: '{}'", part))?
57 };
58
59 if start == 0 {
60 return Err("fields and positions are numbered from 1".to_string());
61 }
62 if start > end {
63 return Err(format!("invalid decreasing range: '{}'", part));
64 }
65
66 ranges.push(Range { start, end });
67 } else {
68 let n = part
69 .parse::<usize>()
70 .map_err(|_| format!("invalid field: '{}'", part))?;
71 if n == 0 {
72 return Err("fields and positions are numbered from 1".to_string());
73 }
74 ranges.push(Range { start: n, end: n });
75 }
76 }
77
78 if ranges.is_empty() {
79 return Err("you must specify a list of bytes, characters, or fields".to_string());
80 }
81
82 ranges.sort_by_key(|r| (r.start, r.end));
84 let mut merged = vec![ranges[0].clone()];
85 for r in &ranges[1..] {
86 let last = merged.last_mut().unwrap();
87 if r.start <= last.end.saturating_add(1) {
88 last.end = last.end.max(r.end);
89 } else {
90 merged.push(r.clone());
91 }
92 }
93
94 Ok(merged)
95}
96
97#[inline(always)]
100fn in_ranges(ranges: &[Range], pos: usize) -> bool {
101 for r in ranges {
102 if pos < r.start {
103 return false;
104 }
105 if pos <= r.end {
106 return true;
107 }
108 }
109 false
110}
111
112#[inline]
115fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
116 let mut mask: u64 = 0;
117 for i in 1..=64u32 {
118 let in_range = in_ranges(ranges, i as usize);
119 if in_range != complement {
120 mask |= 1u64 << (i - 1);
121 }
122 }
123 mask
124}
125
126#[inline(always)]
128fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
129 if field_num <= 64 {
130 (mask >> (field_num - 1)) & 1 == 1
131 } else {
132 in_ranges(ranges, field_num) != complement
133 }
134}
135
136#[inline(always)]
141unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
142 unsafe {
143 let len = buf.len();
144 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
145 buf.set_len(len + data.len());
146 }
147}
148
149#[inline(always)]
152unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
153 unsafe {
154 let len = buf.len();
155 *buf.as_mut_ptr().add(len) = b;
156 buf.set_len(len + 1);
157 }
158}
159
160#[inline]
163fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
164 if slices.is_empty() {
165 return Ok(());
166 }
167 for batch in slices.chunks(MAX_IOV) {
168 let total: usize = batch.iter().map(|s| s.len()).sum();
169 match out.write_vectored(batch) {
170 Ok(n) if n >= total => continue,
171 Ok(mut written) => {
172 for slice in batch {
174 let slen = slice.len();
175 if written >= slen {
176 written -= slen;
177 continue;
178 }
179 if written > 0 {
180 out.write_all(&slice[written..])?;
181 written = 0;
182 } else {
183 out.write_all(slice)?;
184 }
185 }
186 }
187 Err(e) => return Err(e),
188 }
189 }
190 Ok(())
191}
192
193fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
197 let num_threads = rayon::current_num_threads().max(1);
198 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
199 return vec![data];
200 }
201
202 let chunk_size = data.len() / num_threads;
203 let mut chunks = Vec::with_capacity(num_threads);
204 let mut pos = 0;
205
206 for _ in 0..num_threads - 1 {
207 let target = pos + chunk_size;
208 if target >= data.len() {
209 break;
210 }
211 let boundary = memchr::memchr(line_delim, &data[target..])
212 .map(|p| target + p + 1)
213 .unwrap_or(data.len());
214 if boundary > pos {
215 chunks.push(&data[pos..boundary]);
216 }
217 pos = boundary;
218 }
219
220 if pos < data.len() {
221 chunks.push(&data[pos..]);
222 }
223
224 chunks
225}
226
227fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
231 let delim = cfg.delim;
232 let line_delim = cfg.line_delim;
233 let ranges = cfg.ranges;
234 let complement = cfg.complement;
235 let output_delim = cfg.output_delim;
236 let suppress = cfg.suppress_no_delim;
237
238 if !complement && memchr::memchr(delim, data).is_none() {
240 if suppress {
241 return Ok(());
242 }
243 out.write_all(data)?;
244 if !data.is_empty() && *data.last().unwrap() != line_delim {
245 out.write_all(&[line_delim])?;
246 }
247 return Ok(());
248 }
249
250 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
252 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
253 }
254
255 if complement
257 && ranges.len() == 1
258 && ranges[0].start == ranges[0].end
259 && output_delim.len() == 1
260 && output_delim[0] == delim
261 {
262 return process_complement_single_field(
263 data,
264 delim,
265 line_delim,
266 ranges[0].start,
267 suppress,
268 out,
269 );
270 }
271
272 if !complement
274 && ranges.len() == 1
275 && ranges[0].start == 1
276 && output_delim.len() == 1
277 && output_delim[0] == delim
278 && ranges[0].end < usize::MAX
279 {
280 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
281 }
282
283 if !complement
285 && ranges.len() == 1
286 && ranges[0].end == usize::MAX
287 && ranges[0].start > 1
288 && output_delim.len() == 1
289 && output_delim[0] == delim
290 {
291 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
292 }
293
294 if !complement
296 && ranges.len() == 1
297 && ranges[0].start > 1
298 && ranges[0].end < usize::MAX
299 && output_delim.len() == 1
300 && output_delim[0] == delim
301 {
302 return process_fields_mid_range(
303 data,
304 delim,
305 line_delim,
306 ranges[0].start,
307 ranges[0].end,
308 suppress,
309 out,
310 );
311 }
312
313 let max_field = if complement {
315 usize::MAX
316 } else {
317 ranges.last().map(|r| r.end).unwrap_or(0)
318 };
319 let field_mask = compute_field_mask(ranges, complement);
320
321 if data.len() >= PARALLEL_THRESHOLD {
322 let chunks = split_into_chunks(data, line_delim);
323 let results: Vec<Vec<u8>> = chunks
324 .par_iter()
325 .map(|chunk| {
326 let mut buf = Vec::with_capacity(chunk.len());
327 process_fields_chunk(
328 chunk,
329 delim,
330 ranges,
331 output_delim,
332 suppress,
333 max_field,
334 field_mask,
335 line_delim,
336 complement,
337 &mut buf,
338 );
339 buf
340 })
341 .collect();
342 let slices: Vec<IoSlice> = results
344 .iter()
345 .filter(|r| !r.is_empty())
346 .map(|r| IoSlice::new(r))
347 .collect();
348 write_ioslices(out, &slices)?;
349 } else {
350 let mut buf = Vec::with_capacity(data.len());
351 process_fields_chunk(
352 data,
353 delim,
354 ranges,
355 output_delim,
356 suppress,
357 max_field,
358 field_mask,
359 line_delim,
360 complement,
361 &mut buf,
362 );
363 if !buf.is_empty() {
364 out.write_all(&buf)?;
365 }
366 }
367 Ok(())
368}
369
370fn process_fields_chunk(
375 data: &[u8],
376 delim: u8,
377 ranges: &[Range],
378 output_delim: &[u8],
379 suppress: bool,
380 max_field: usize,
381 field_mask: u64,
382 line_delim: u8,
383 complement: bool,
384 buf: &mut Vec<u8>,
385) {
386 if delim != line_delim && max_field < usize::MAX && !complement {
393 buf.reserve(data.len());
394 let mut start = 0;
395 for end_pos in memchr_iter(line_delim, data) {
396 let line = &data[start..end_pos];
397 extract_fields_to_buf(
398 line,
399 delim,
400 ranges,
401 output_delim,
402 suppress,
403 max_field,
404 field_mask,
405 line_delim,
406 buf,
407 complement,
408 );
409 start = end_pos + 1;
410 }
411 if start < data.len() {
412 extract_fields_to_buf(
413 &data[start..],
414 delim,
415 ranges,
416 output_delim,
417 suppress,
418 max_field,
419 field_mask,
420 line_delim,
421 buf,
422 complement,
423 );
424 }
425 return;
426 }
427
428 if delim != line_delim {
431 buf.reserve(data.len());
432
433 let mut line_start: usize = 0;
434 let mut field_start: usize = 0;
435 let mut field_num: usize = 1;
436 let mut first_output = true;
437 let mut has_delim = false;
438
439 for pos in memchr::memchr2_iter(delim, line_delim, data) {
440 let byte = unsafe { *data.get_unchecked(pos) };
441
442 if byte == line_delim {
443 if (field_num <= max_field || complement)
445 && has_delim
446 && is_selected(field_num, field_mask, ranges, complement)
447 {
448 if !first_output {
449 unsafe { buf_extend(buf, output_delim) };
450 }
451 unsafe { buf_extend(buf, &data[field_start..pos]) };
452 first_output = false;
453 }
454
455 if !first_output {
456 unsafe { buf_push(buf, line_delim) };
457 } else if !has_delim {
458 if !suppress {
459 unsafe {
460 buf_extend(buf, &data[line_start..pos]);
461 buf_push(buf, line_delim);
462 }
463 }
464 } else {
465 unsafe { buf_push(buf, line_delim) };
466 }
467
468 line_start = pos + 1;
470 field_start = pos + 1;
471 field_num = 1;
472 first_output = true;
473 has_delim = false;
474 } else {
475 has_delim = true;
477
478 if is_selected(field_num, field_mask, ranges, complement) {
479 if !first_output {
480 unsafe { buf_extend(buf, output_delim) };
481 }
482 unsafe { buf_extend(buf, &data[field_start..pos]) };
483 first_output = false;
484 }
485
486 field_num += 1;
487 field_start = pos + 1;
488 }
489 }
490
491 if line_start < data.len() {
493 let line = &data[line_start..];
494 if !line.is_empty() {
495 if (field_num <= max_field || complement)
496 && has_delim
497 && is_selected(field_num, field_mask, ranges, complement)
498 {
499 if !first_output {
500 unsafe { buf_extend(buf, output_delim) };
501 }
502 unsafe { buf_extend(buf, &data[field_start..data.len()]) };
503 first_output = false;
504 }
505
506 if !first_output {
507 unsafe { buf_push(buf, line_delim) };
508 } else if !has_delim {
509 if !suppress {
510 unsafe {
511 buf_extend(buf, &data[line_start..data.len()]);
512 buf_push(buf, line_delim);
513 }
514 }
515 } else {
516 unsafe { buf_push(buf, line_delim) };
517 }
518 }
519 }
520
521 return;
522 }
523
524 let mut start = 0;
526 for end_pos in memchr_iter(line_delim, data) {
527 let line = &data[start..end_pos];
528 extract_fields_to_buf(
529 line,
530 delim,
531 ranges,
532 output_delim,
533 suppress,
534 max_field,
535 field_mask,
536 line_delim,
537 buf,
538 complement,
539 );
540 start = end_pos + 1;
541 }
542 if start < data.len() {
543 extract_fields_to_buf(
544 &data[start..],
545 delim,
546 ranges,
547 output_delim,
548 suppress,
549 max_field,
550 field_mask,
551 line_delim,
552 buf,
553 complement,
554 );
555 }
556}
557
558fn process_single_field(
564 data: &[u8],
565 delim: u8,
566 line_delim: u8,
567 target: usize,
568 suppress: bool,
569 out: &mut impl Write,
570) -> io::Result<()> {
571 let target_idx = target - 1;
572
573 if delim != line_delim {
575 if data.len() >= PARALLEL_THRESHOLD {
576 let chunks = split_into_chunks(data, line_delim);
577 let results: Vec<Vec<u8>> = chunks
578 .par_iter()
579 .map(|chunk| {
580 let mut buf = Vec::with_capacity(chunk.len());
581 process_nth_field_combined(
582 chunk, delim, line_delim, target_idx, suppress, &mut buf,
583 );
584 buf
585 })
586 .collect();
587 for result in &results {
588 if !result.is_empty() {
589 out.write_all(result)?;
590 }
591 }
592 } else if target_idx == 0 && !suppress {
593 single_field1_zerocopy(data, delim, line_delim, out)?;
598 } else if target_idx <= 3 && !suppress {
599 let mut buf = Vec::with_capacity(data.len());
604 process_small_field_combined(data, delim, line_delim, target_idx, &mut buf);
605 if !buf.is_empty() {
606 out.write_all(&buf)?;
607 }
608 } else {
609 let mut buf = Vec::with_capacity(data.len());
610 process_nth_field_combined(data, delim, line_delim, target_idx, suppress, &mut buf);
611 if !buf.is_empty() {
612 out.write_all(&buf)?;
613 }
614 }
615 return Ok(());
616 }
617
618 if data.len() >= PARALLEL_THRESHOLD {
620 let chunks = split_into_chunks(data, line_delim);
621 let results: Vec<Vec<u8>> = chunks
622 .par_iter()
623 .map(|chunk| {
624 let mut buf = Vec::with_capacity(chunk.len() / 4);
625 process_single_field_chunk(
626 chunk, delim, target_idx, line_delim, suppress, &mut buf,
627 );
628 buf
629 })
630 .collect();
631 let slices: Vec<IoSlice> = results
633 .iter()
634 .filter(|r| !r.is_empty())
635 .map(|r| IoSlice::new(r))
636 .collect();
637 write_ioslices(out, &slices)?;
638 } else {
639 let mut buf = Vec::with_capacity(data.len() / 4);
640 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
641 if !buf.is_empty() {
642 out.write_all(&buf)?;
643 }
644 }
645 Ok(())
646}
647
648fn process_complement_single_field(
650 data: &[u8],
651 delim: u8,
652 line_delim: u8,
653 skip_field: usize,
654 suppress: bool,
655 out: &mut impl Write,
656) -> io::Result<()> {
657 let skip_idx = skip_field - 1;
658
659 if data.len() >= PARALLEL_THRESHOLD {
660 let chunks = split_into_chunks(data, line_delim);
661 let results: Vec<Vec<u8>> = chunks
662 .par_iter()
663 .map(|chunk| {
664 let mut buf = Vec::with_capacity(chunk.len());
665 complement_single_field_chunk(
666 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
667 );
668 buf
669 })
670 .collect();
671 let slices: Vec<IoSlice> = results
673 .iter()
674 .filter(|r| !r.is_empty())
675 .map(|r| IoSlice::new(r))
676 .collect();
677 write_ioslices(out, &slices)?;
678 } else {
679 let mut buf = Vec::with_capacity(data.len());
680 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
681 if !buf.is_empty() {
682 out.write_all(&buf)?;
683 }
684 }
685 Ok(())
686}
687
688fn complement_single_field_chunk(
690 data: &[u8],
691 delim: u8,
692 skip_idx: usize,
693 line_delim: u8,
694 suppress: bool,
695 buf: &mut Vec<u8>,
696) {
697 let mut start = 0;
698 for end_pos in memchr_iter(line_delim, data) {
699 let line = &data[start..end_pos];
700 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
701 start = end_pos + 1;
702 }
703 if start < data.len() {
704 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
705 }
706}
707
708#[inline(always)]
710fn complement_single_field_line(
711 line: &[u8],
712 delim: u8,
713 skip_idx: usize,
714 line_delim: u8,
715 suppress: bool,
716 buf: &mut Vec<u8>,
717) {
718 if line.is_empty() {
719 if !suppress {
720 buf.push(line_delim);
721 }
722 return;
723 }
724
725 buf.reserve(line.len() + 1);
726
727 let mut field_idx = 0;
728 let mut field_start = 0;
729 let mut first_output = true;
730 let mut has_delim = false;
731
732 for pos in memchr_iter(delim, line) {
733 has_delim = true;
734 if field_idx != skip_idx {
735 if !first_output {
736 unsafe { buf_push(buf, delim) };
737 }
738 unsafe { buf_extend(buf, &line[field_start..pos]) };
739 first_output = false;
740 }
741 field_idx += 1;
742 field_start = pos + 1;
743 }
744
745 if !has_delim {
746 if !suppress {
747 unsafe {
748 buf_extend(buf, line);
749 buf_push(buf, line_delim);
750 }
751 }
752 return;
753 }
754
755 if field_idx != skip_idx {
757 if !first_output {
758 unsafe { buf_push(buf, delim) };
759 }
760 unsafe { buf_extend(buf, &line[field_start..]) };
761 }
762
763 unsafe { buf_push(buf, line_delim) };
764}
765
766fn process_fields_prefix(
770 data: &[u8],
771 delim: u8,
772 line_delim: u8,
773 last_field: usize,
774 suppress: bool,
775 out: &mut impl Write,
776) -> io::Result<()> {
777 if data.len() >= PARALLEL_THRESHOLD {
778 let chunks = split_into_chunks(data, line_delim);
779 let results: Vec<Vec<u8>> = chunks
780 .par_iter()
781 .map(|chunk| {
782 let mut buf = Vec::with_capacity(chunk.len());
783 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
784 buf
785 })
786 .collect();
787 let slices: Vec<IoSlice> = results
789 .iter()
790 .filter(|r| !r.is_empty())
791 .map(|r| IoSlice::new(r))
792 .collect();
793 write_ioslices(out, &slices)?;
794 } else if !suppress {
795 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
799 } else {
800 let mut buf = Vec::with_capacity(data.len());
801 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
802 if !buf.is_empty() {
803 out.write_all(&buf)?;
804 }
805 }
806 Ok(())
807}
808
809#[inline]
814fn fields_prefix_zerocopy(
815 data: &[u8],
816 delim: u8,
817 line_delim: u8,
818 last_field: usize,
819 out: &mut impl Write,
820) -> io::Result<()> {
821 let mut start = 0;
822 let mut run_start: usize = 0;
823
824 for end_pos in memchr_iter(line_delim, data) {
825 let line = &data[start..end_pos];
826 let mut field_count = 1;
828 let mut truncate_at: Option<usize> = None;
829 for dpos in memchr_iter(delim, line) {
830 if field_count >= last_field {
831 truncate_at = Some(start + dpos);
832 break;
833 }
834 field_count += 1;
835 }
836
837 if let Some(trunc_pos) = truncate_at {
838 if run_start < start {
840 out.write_all(&data[run_start..start])?;
841 }
842 out.write_all(&data[start..trunc_pos])?;
843 out.write_all(&[line_delim])?;
844 run_start = end_pos + 1;
845 }
846 start = end_pos + 1;
848 }
849 if start < data.len() {
851 let line = &data[start..];
852 let mut field_count = 1;
853 let mut truncate_at: Option<usize> = None;
854 for dpos in memchr_iter(delim, line) {
855 if field_count >= last_field {
856 truncate_at = Some(start + dpos);
857 break;
858 }
859 field_count += 1;
860 }
861 if let Some(trunc_pos) = truncate_at {
862 if run_start < start {
863 out.write_all(&data[run_start..start])?;
864 }
865 out.write_all(&data[start..trunc_pos])?;
866 out.write_all(&[line_delim])?;
867 return Ok(());
868 }
869 }
870 if run_start < data.len() {
872 out.write_all(&data[run_start..])?;
873 if !data.is_empty() && *data.last().unwrap() != line_delim {
874 out.write_all(&[line_delim])?;
875 }
876 }
877 Ok(())
878}
879
880fn fields_prefix_chunk(
882 data: &[u8],
883 delim: u8,
884 line_delim: u8,
885 last_field: usize,
886 suppress: bool,
887 buf: &mut Vec<u8>,
888) {
889 let mut start = 0;
890 for end_pos in memchr_iter(line_delim, data) {
891 let line = &data[start..end_pos];
892 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
893 start = end_pos + 1;
894 }
895 if start < data.len() {
896 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
897 }
898}
899
900#[inline(always)]
902fn fields_prefix_line(
903 line: &[u8],
904 delim: u8,
905 line_delim: u8,
906 last_field: usize,
907 suppress: bool,
908 buf: &mut Vec<u8>,
909) {
910 if line.is_empty() {
911 if !suppress {
912 buf.push(line_delim);
913 }
914 return;
915 }
916
917 buf.reserve(line.len() + 1);
918
919 let mut field_count = 1;
920 let mut has_delim = false;
921
922 for pos in memchr_iter(delim, line) {
923 has_delim = true;
924 if field_count >= last_field {
925 unsafe {
926 buf_extend(buf, &line[..pos]);
927 buf_push(buf, line_delim);
928 }
929 return;
930 }
931 field_count += 1;
932 }
933
934 if !has_delim {
935 if !suppress {
936 unsafe {
937 buf_extend(buf, line);
938 buf_push(buf, line_delim);
939 }
940 }
941 return;
942 }
943
944 unsafe {
945 buf_extend(buf, line);
946 buf_push(buf, line_delim);
947 }
948}
949
950fn process_fields_suffix(
952 data: &[u8],
953 delim: u8,
954 line_delim: u8,
955 start_field: usize,
956 suppress: bool,
957 out: &mut impl Write,
958) -> io::Result<()> {
959 if data.len() >= PARALLEL_THRESHOLD {
960 let chunks = split_into_chunks(data, line_delim);
961 let results: Vec<Vec<u8>> = chunks
962 .par_iter()
963 .map(|chunk| {
964 let mut buf = Vec::with_capacity(chunk.len());
965 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
966 buf
967 })
968 .collect();
969 let slices: Vec<IoSlice> = results
971 .iter()
972 .filter(|r| !r.is_empty())
973 .map(|r| IoSlice::new(r))
974 .collect();
975 write_ioslices(out, &slices)?;
976 } else {
977 let mut buf = Vec::with_capacity(data.len());
978 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
979 if !buf.is_empty() {
980 out.write_all(&buf)?;
981 }
982 }
983 Ok(())
984}
985
986fn fields_suffix_chunk(
988 data: &[u8],
989 delim: u8,
990 line_delim: u8,
991 start_field: usize,
992 suppress: bool,
993 buf: &mut Vec<u8>,
994) {
995 let mut start = 0;
996 for end_pos in memchr_iter(line_delim, data) {
997 let line = &data[start..end_pos];
998 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
999 start = end_pos + 1;
1000 }
1001 if start < data.len() {
1002 fields_suffix_line(
1003 &data[start..],
1004 delim,
1005 line_delim,
1006 start_field,
1007 suppress,
1008 buf,
1009 );
1010 }
1011}
1012
1013#[inline(always)]
1015fn fields_suffix_line(
1016 line: &[u8],
1017 delim: u8,
1018 line_delim: u8,
1019 start_field: usize,
1020 suppress: bool,
1021 buf: &mut Vec<u8>,
1022) {
1023 if line.is_empty() {
1024 if !suppress {
1025 buf.push(line_delim);
1026 }
1027 return;
1028 }
1029
1030 buf.reserve(line.len() + 1);
1031
1032 let skip_delims = start_field - 1;
1033 let mut delim_count = 0;
1034 let mut has_delim = false;
1035
1036 for pos in memchr_iter(delim, line) {
1037 has_delim = true;
1038 delim_count += 1;
1039 if delim_count >= skip_delims {
1040 unsafe {
1041 buf_extend(buf, &line[pos + 1..]);
1042 buf_push(buf, line_delim);
1043 }
1044 return;
1045 }
1046 }
1047
1048 if !has_delim {
1049 if !suppress {
1050 unsafe {
1051 buf_extend(buf, line);
1052 buf_push(buf, line_delim);
1053 }
1054 }
1055 return;
1056 }
1057
1058 unsafe { buf_push(buf, line_delim) };
1060}
1061
1062fn process_fields_mid_range(
1065 data: &[u8],
1066 delim: u8,
1067 line_delim: u8,
1068 start_field: usize,
1069 end_field: usize,
1070 suppress: bool,
1071 out: &mut impl Write,
1072) -> io::Result<()> {
1073 if data.len() >= PARALLEL_THRESHOLD {
1074 let chunks = split_into_chunks(data, line_delim);
1075 let results: Vec<Vec<u8>> = chunks
1076 .par_iter()
1077 .map(|chunk| {
1078 let mut buf = Vec::with_capacity(chunk.len());
1079 fields_mid_range_chunk(
1080 chunk,
1081 delim,
1082 line_delim,
1083 start_field,
1084 end_field,
1085 suppress,
1086 &mut buf,
1087 );
1088 buf
1089 })
1090 .collect();
1091 let slices: Vec<IoSlice> = results
1092 .iter()
1093 .filter(|r| !r.is_empty())
1094 .map(|r| IoSlice::new(r))
1095 .collect();
1096 write_ioslices(out, &slices)?;
1097 } else {
1098 let mut buf = Vec::with_capacity(data.len());
1099 fields_mid_range_chunk(
1100 data,
1101 delim,
1102 line_delim,
1103 start_field,
1104 end_field,
1105 suppress,
1106 &mut buf,
1107 );
1108 if !buf.is_empty() {
1109 out.write_all(&buf)?;
1110 }
1111 }
1112 Ok(())
1113}
1114
1115fn fields_mid_range_chunk(
1117 data: &[u8],
1118 delim: u8,
1119 line_delim: u8,
1120 start_field: usize,
1121 end_field: usize,
1122 suppress: bool,
1123 buf: &mut Vec<u8>,
1124) {
1125 let mut start = 0;
1126 for end_pos in memchr_iter(line_delim, data) {
1127 let line = &data[start..end_pos];
1128 fields_mid_range_line(
1129 line,
1130 delim,
1131 line_delim,
1132 start_field,
1133 end_field,
1134 suppress,
1135 buf,
1136 );
1137 start = end_pos + 1;
1138 }
1139 if start < data.len() {
1140 fields_mid_range_line(
1141 &data[start..],
1142 delim,
1143 line_delim,
1144 start_field,
1145 end_field,
1146 suppress,
1147 buf,
1148 );
1149 }
1150}
1151
1152#[inline(always)]
1155fn fields_mid_range_line(
1156 line: &[u8],
1157 delim: u8,
1158 line_delim: u8,
1159 start_field: usize,
1160 end_field: usize,
1161 suppress: bool,
1162 buf: &mut Vec<u8>,
1163) {
1164 if line.is_empty() {
1165 if !suppress {
1166 buf.push(line_delim);
1167 }
1168 return;
1169 }
1170
1171 buf.reserve(line.len() + 1);
1172
1173 let skip_before = start_field - 1; let field_span = end_field - start_field; let mut delim_count = 0;
1177 let mut range_start = 0;
1178 let mut has_delim = false;
1179
1180 for pos in memchr_iter(delim, line) {
1181 has_delim = true;
1182 delim_count += 1;
1183 if delim_count == skip_before {
1184 range_start = pos + 1;
1185 }
1186 if delim_count == skip_before + field_span + 1 {
1187 if skip_before == 0 {
1189 range_start = 0;
1190 }
1191 unsafe {
1192 buf_extend(buf, &line[range_start..pos]);
1193 buf_push(buf, line_delim);
1194 }
1195 return;
1196 }
1197 }
1198
1199 if !has_delim {
1200 if !suppress {
1201 unsafe {
1202 buf_extend(buf, line);
1203 buf_push(buf, line_delim);
1204 }
1205 }
1206 return;
1207 }
1208
1209 if delim_count >= skip_before {
1211 if skip_before == 0 {
1213 range_start = 0;
1214 }
1215 unsafe {
1216 buf_extend(buf, &line[range_start..]);
1217 buf_push(buf, line_delim);
1218 }
1219 } else {
1220 unsafe { buf_push(buf, line_delim) };
1222 }
1223}
1224
1225fn process_nth_field_combined(
1230 data: &[u8],
1231 delim: u8,
1232 line_delim: u8,
1233 target_idx: usize,
1234 suppress: bool,
1235 buf: &mut Vec<u8>,
1236) {
1237 buf.reserve(data.len());
1238
1239 let mut line_start: usize = 0;
1240 let mut field_start: usize = 0;
1241 let mut field_idx: usize = 0;
1242 let mut has_delim = false;
1243 let mut emitted = false;
1244
1245 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1246 let byte = unsafe { *data.get_unchecked(pos) };
1247
1248 if byte == line_delim {
1249 if !emitted {
1251 if has_delim && field_idx == target_idx {
1252 unsafe {
1254 buf_extend(buf, &data[field_start..pos]);
1255 buf_push(buf, line_delim);
1256 }
1257 } else if has_delim {
1258 unsafe {
1260 buf_push(buf, line_delim);
1261 }
1262 } else if !suppress {
1263 unsafe {
1265 buf_extend(buf, &data[line_start..pos]);
1266 buf_push(buf, line_delim);
1267 }
1268 }
1269 }
1270 line_start = pos + 1;
1272 field_start = pos + 1;
1273 field_idx = 0;
1274 has_delim = false;
1275 emitted = false;
1276 } else {
1277 has_delim = true;
1279 if field_idx == target_idx {
1280 unsafe {
1281 buf_extend(buf, &data[field_start..pos]);
1282 buf_push(buf, line_delim);
1283 }
1284 emitted = true;
1285 }
1286 field_idx += 1;
1287 field_start = pos + 1;
1288 }
1289 }
1290
1291 if line_start < data.len() && !emitted {
1293 if has_delim && field_idx == target_idx {
1294 unsafe {
1295 buf_extend(buf, &data[field_start..data.len()]);
1296 buf_push(buf, line_delim);
1297 }
1298 } else if has_delim {
1299 unsafe {
1300 buf_push(buf, line_delim);
1301 }
1302 } else if !suppress {
1303 unsafe {
1304 buf_extend(buf, &data[line_start..data.len()]);
1305 buf_push(buf, line_delim);
1306 }
1307 }
1308 }
1309}
1310
1311#[inline]
1315fn single_field1_zerocopy(
1316 data: &[u8],
1317 delim: u8,
1318 line_delim: u8,
1319 out: &mut impl Write,
1320) -> io::Result<()> {
1321 let mut line_start: usize = 0;
1322 let mut run_start: usize = 0;
1323 let mut first_delim: Option<usize> = None;
1324
1325 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1326 let byte = unsafe { *data.get_unchecked(pos) };
1327
1328 if byte == line_delim {
1329 if let Some(dp) = first_delim {
1331 if run_start < line_start {
1334 out.write_all(&data[run_start..line_start])?;
1335 }
1336 out.write_all(&data[line_start..dp])?;
1337 out.write_all(&[line_delim])?;
1338 run_start = pos + 1;
1339 }
1340 line_start = pos + 1;
1342 first_delim = None;
1343 } else {
1344 if first_delim.is_none() {
1346 first_delim = Some(pos);
1347 }
1348 }
1349 }
1350
1351 if line_start < data.len() {
1353 if let Some(dp) = first_delim {
1354 if run_start < line_start {
1355 out.write_all(&data[run_start..line_start])?;
1356 }
1357 out.write_all(&data[line_start..dp])?;
1358 out.write_all(&[line_delim])?;
1359 return Ok(());
1360 }
1361 }
1362
1363 if run_start < data.len() {
1365 out.write_all(&data[run_start..])?;
1366 if !data.is_empty() && *data.last().unwrap() != line_delim {
1367 out.write_all(&[line_delim])?;
1368 }
1369 }
1370 Ok(())
1371}
1372
1373fn process_small_field_combined(
1378 data: &[u8],
1379 delim: u8,
1380 line_delim: u8,
1381 target_idx: usize,
1382 buf: &mut Vec<u8>,
1383) {
1384 buf.reserve(data.len());
1385 let mut start = 0;
1386 for end_pos in memchr_iter(line_delim, data) {
1387 let line = &data[start..end_pos];
1388 let mut field_start = 0;
1390 let mut found_start = target_idx == 0;
1391 let mut delim_count = 0;
1392 if !found_start {
1393 let mut search_start = 0;
1394 while let Some(pos) = memchr::memchr(delim, &line[search_start..]) {
1395 delim_count += 1;
1396 if delim_count == target_idx {
1397 field_start = search_start + pos + 1;
1398 found_start = true;
1399 break;
1400 }
1401 search_start = search_start + pos + 1;
1402 }
1403 }
1404 if !found_start {
1405 unsafe {
1407 buf_extend(buf, line);
1408 buf_push(buf, line_delim);
1409 }
1410 } else if field_start >= line.len() {
1411 unsafe { buf_push(buf, line_delim) };
1413 } else {
1414 match memchr::memchr(delim, &line[field_start..]) {
1416 Some(pos) => unsafe {
1417 buf_extend(buf, &line[field_start..field_start + pos]);
1418 buf_push(buf, line_delim);
1419 },
1420 None => unsafe {
1421 buf_extend(buf, &line[field_start..]);
1422 buf_push(buf, line_delim);
1423 },
1424 }
1425 }
1426 start = end_pos + 1;
1427 }
1428 if start < data.len() {
1430 let line = &data[start..];
1431 let mut field_start = 0;
1432 let mut found_start = target_idx == 0;
1433 let mut delim_count = 0;
1434 if !found_start {
1435 let mut search_start = 0;
1436 while let Some(pos) = memchr::memchr(delim, &line[search_start..]) {
1437 delim_count += 1;
1438 if delim_count == target_idx {
1439 field_start = search_start + pos + 1;
1440 found_start = true;
1441 break;
1442 }
1443 search_start = search_start + pos + 1;
1444 }
1445 }
1446 if !found_start {
1447 unsafe {
1448 buf_extend(buf, line);
1449 buf_push(buf, line_delim);
1450 }
1451 } else if field_start >= line.len() {
1452 unsafe { buf_push(buf, line_delim) };
1453 } else {
1454 match memchr::memchr(delim, &line[field_start..]) {
1455 Some(pos) => unsafe {
1456 buf_extend(buf, &line[field_start..field_start + pos]);
1457 buf_push(buf, line_delim);
1458 },
1459 None => unsafe {
1460 buf_extend(buf, &line[field_start..]);
1461 buf_push(buf, line_delim);
1462 },
1463 }
1464 }
1465 }
1466}
1467
1468fn process_single_field_chunk(
1470 data: &[u8],
1471 delim: u8,
1472 target_idx: usize,
1473 line_delim: u8,
1474 suppress: bool,
1475 buf: &mut Vec<u8>,
1476) {
1477 let mut start = 0;
1478 for end_pos in memchr_iter(line_delim, data) {
1479 let line = &data[start..end_pos];
1480 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
1481 start = end_pos + 1;
1482 }
1483 if start < data.len() {
1484 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
1485 }
1486}
1487
1488#[inline(always)]
1491fn extract_single_field_line(
1492 line: &[u8],
1493 delim: u8,
1494 target_idx: usize,
1495 line_delim: u8,
1496 suppress: bool,
1497 buf: &mut Vec<u8>,
1498) {
1499 if line.is_empty() {
1500 if !suppress {
1501 buf.push(line_delim);
1502 }
1503 return;
1504 }
1505
1506 buf.reserve(line.len() + 1);
1508
1509 if target_idx == 0 {
1511 match memchr::memchr(delim, line) {
1512 Some(pos) => unsafe {
1513 buf_extend(buf, &line[..pos]);
1514 buf_push(buf, line_delim);
1515 },
1516 None => {
1517 if !suppress {
1518 unsafe {
1519 buf_extend(buf, line);
1520 buf_push(buf, line_delim);
1521 }
1522 }
1523 }
1524 }
1525 return;
1526 }
1527
1528 let mut field_start = 0;
1529 let mut field_idx = 0;
1530 let mut has_delim = false;
1531
1532 for pos in memchr_iter(delim, line) {
1533 has_delim = true;
1534 if field_idx == target_idx {
1535 unsafe {
1536 buf_extend(buf, &line[field_start..pos]);
1537 buf_push(buf, line_delim);
1538 }
1539 return;
1540 }
1541 field_idx += 1;
1542 field_start = pos + 1;
1543 }
1544
1545 if !has_delim {
1546 if !suppress {
1547 unsafe {
1548 buf_extend(buf, line);
1549 buf_push(buf, line_delim);
1550 }
1551 }
1552 return;
1553 }
1554
1555 if field_idx == target_idx {
1556 unsafe {
1557 buf_extend(buf, &line[field_start..]);
1558 buf_push(buf, line_delim);
1559 }
1560 } else {
1561 unsafe { buf_push(buf, line_delim) };
1562 }
1563}
1564
1565#[inline(always)]
1568fn extract_fields_to_buf(
1569 line: &[u8],
1570 delim: u8,
1571 ranges: &[Range],
1572 output_delim: &[u8],
1573 suppress: bool,
1574 max_field: usize,
1575 field_mask: u64,
1576 line_delim: u8,
1577 buf: &mut Vec<u8>,
1578 complement: bool,
1579) {
1580 let len = line.len();
1581
1582 if len == 0 {
1583 if !suppress {
1584 buf.push(line_delim);
1585 }
1586 return;
1587 }
1588
1589 let needed = len + output_delim.len() * 16 + 1;
1592 if buf.capacity() - buf.len() < needed {
1593 buf.reserve(needed);
1594 }
1595
1596 let mut field_num: usize = 1;
1597 let mut field_start: usize = 0;
1598 let mut first_output = true;
1599 let mut has_delim = false;
1600
1601 for delim_pos in memchr_iter(delim, line) {
1602 has_delim = true;
1603
1604 if is_selected(field_num, field_mask, ranges, complement) {
1605 if !first_output {
1606 unsafe { buf_extend(buf, output_delim) };
1607 }
1608 unsafe { buf_extend(buf, &line[field_start..delim_pos]) };
1609 first_output = false;
1610 }
1611
1612 field_num += 1;
1613 field_start = delim_pos + 1;
1614
1615 if field_num > max_field {
1616 break;
1617 }
1618 }
1619
1620 if (field_num <= max_field || complement)
1622 && has_delim
1623 && is_selected(field_num, field_mask, ranges, complement)
1624 {
1625 if !first_output {
1626 unsafe { buf_extend(buf, output_delim) };
1627 }
1628 unsafe { buf_extend(buf, &line[field_start..len]) };
1629 first_output = false;
1630 }
1631
1632 if !first_output {
1633 unsafe { buf_push(buf, line_delim) };
1634 } else if !has_delim {
1635 if !suppress {
1636 unsafe {
1637 buf_extend(buf, line);
1638 buf_push(buf, line_delim);
1639 }
1640 }
1641 } else {
1642 unsafe { buf_push(buf, line_delim) };
1643 }
1644}
1645
1646fn process_bytes_from_start(
1653 data: &[u8],
1654 max_bytes: usize,
1655 line_delim: u8,
1656 out: &mut impl Write,
1657) -> io::Result<()> {
1658 if data.len() >= PARALLEL_THRESHOLD {
1659 let chunks = split_into_chunks(data, line_delim);
1660 let results: Vec<Vec<u8>> = chunks
1661 .par_iter()
1662 .map(|chunk| {
1663 let mut buf = Vec::with_capacity(chunk.len());
1664 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
1665 buf
1666 })
1667 .collect();
1668 let slices: Vec<IoSlice> = results
1670 .iter()
1671 .filter(|r| !r.is_empty())
1672 .map(|r| IoSlice::new(r))
1673 .collect();
1674 write_ioslices(out, &slices)?;
1675 } else {
1676 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
1680 }
1681 Ok(())
1682}
1683
1684#[inline]
1687fn bytes_from_start_zerocopy(
1688 data: &[u8],
1689 max_bytes: usize,
1690 line_delim: u8,
1691 out: &mut impl Write,
1692) -> io::Result<()> {
1693 let mut start = 0;
1694 let mut run_start: usize = 0;
1695
1696 for pos in memchr_iter(line_delim, data) {
1697 let line_len = pos - start;
1698 if line_len > max_bytes {
1699 if run_start < start {
1701 out.write_all(&data[run_start..start])?;
1702 }
1703 out.write_all(&data[start..start + max_bytes])?;
1704 out.write_all(&[line_delim])?;
1705 run_start = pos + 1;
1706 }
1707 start = pos + 1;
1709 }
1710 if start < data.len() {
1712 let line_len = data.len() - start;
1713 if line_len > max_bytes {
1714 if run_start < start {
1715 out.write_all(&data[run_start..start])?;
1716 }
1717 out.write_all(&data[start..start + max_bytes])?;
1718 out.write_all(&[line_delim])?;
1719 return Ok(());
1720 }
1721 }
1722 if run_start < data.len() {
1724 out.write_all(&data[run_start..])?;
1725 if !data.is_empty() && *data.last().unwrap() != line_delim {
1727 out.write_all(&[line_delim])?;
1728 }
1729 }
1730 Ok(())
1731}
1732
1733#[inline]
1736fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1737 buf.reserve(data.len());
1739
1740 let mut start = 0;
1741 for pos in memchr_iter(line_delim, data) {
1742 let line_len = pos - start;
1743 let take = line_len.min(max_bytes);
1744 unsafe {
1745 buf_extend(buf, &data[start..start + take]);
1746 buf_push(buf, line_delim);
1747 }
1748 start = pos + 1;
1749 }
1750 if start < data.len() {
1752 let line_len = data.len() - start;
1753 let take = line_len.min(max_bytes);
1754 unsafe {
1755 buf_extend(buf, &data[start..start + take]);
1756 buf_push(buf, line_delim);
1757 }
1758 }
1759}
1760
1761fn process_bytes_from_offset(
1763 data: &[u8],
1764 skip_bytes: usize,
1765 line_delim: u8,
1766 out: &mut impl Write,
1767) -> io::Result<()> {
1768 if data.len() >= PARALLEL_THRESHOLD {
1769 let chunks = split_into_chunks(data, line_delim);
1770 let results: Vec<Vec<u8>> = chunks
1771 .par_iter()
1772 .map(|chunk| {
1773 let mut buf = Vec::with_capacity(chunk.len());
1774 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1775 buf
1776 })
1777 .collect();
1778 let slices: Vec<IoSlice> = results
1780 .iter()
1781 .filter(|r| !r.is_empty())
1782 .map(|r| IoSlice::new(r))
1783 .collect();
1784 write_ioslices(out, &slices)?;
1785 } else {
1786 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
1788 }
1789 Ok(())
1790}
1791
1792#[inline]
1796fn bytes_from_offset_zerocopy(
1797 data: &[u8],
1798 skip_bytes: usize,
1799 line_delim: u8,
1800 out: &mut impl Write,
1801) -> io::Result<()> {
1802 let delim_buf = [line_delim];
1803 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
1804
1805 let mut start = 0;
1806 for pos in memchr_iter(line_delim, data) {
1807 let line_len = pos - start;
1808 if line_len > skip_bytes {
1809 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
1810 }
1811 iov.push(IoSlice::new(&delim_buf));
1812 if iov.len() >= MAX_IOV - 1 {
1814 write_ioslices(out, &iov)?;
1815 iov.clear();
1816 }
1817 start = pos + 1;
1818 }
1819 if start < data.len() {
1820 let line_len = data.len() - start;
1821 if line_len > skip_bytes {
1822 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
1823 }
1824 iov.push(IoSlice::new(&delim_buf));
1825 }
1826 if !iov.is_empty() {
1827 write_ioslices(out, &iov)?;
1828 }
1829 Ok(())
1830}
1831
1832#[inline]
1835fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1836 buf.reserve(data.len());
1837
1838 let mut start = 0;
1839 for pos in memchr_iter(line_delim, data) {
1840 let line_len = pos - start;
1841 if line_len > skip_bytes {
1842 unsafe {
1843 buf_extend(buf, &data[start + skip_bytes..pos]);
1844 }
1845 }
1846 unsafe {
1847 buf_push(buf, line_delim);
1848 }
1849 start = pos + 1;
1850 }
1851 if start < data.len() {
1852 let line_len = data.len() - start;
1853 if line_len > skip_bytes {
1854 unsafe {
1855 buf_extend(buf, &data[start + skip_bytes..data.len()]);
1856 }
1857 }
1858 unsafe {
1859 buf_push(buf, line_delim);
1860 }
1861 }
1862}
1863
1864fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1866 let line_delim = cfg.line_delim;
1867 let ranges = cfg.ranges;
1868 let complement = cfg.complement;
1869 let output_delim = cfg.output_delim;
1870
1871 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1873 let max_bytes = ranges[0].end;
1874 if max_bytes < usize::MAX {
1875 return process_bytes_from_start(data, max_bytes, line_delim, out);
1876 }
1877 }
1878
1879 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1881 let skip_bytes = ranges[0].start.saturating_sub(1);
1882 if skip_bytes > 0 {
1883 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1884 }
1885 }
1886
1887 if data.len() >= PARALLEL_THRESHOLD {
1888 let chunks = split_into_chunks(data, line_delim);
1889 let results: Vec<Vec<u8>> = chunks
1890 .par_iter()
1891 .map(|chunk| {
1892 let mut buf = Vec::with_capacity(chunk.len());
1893 process_bytes_chunk(
1894 chunk,
1895 ranges,
1896 complement,
1897 output_delim,
1898 line_delim,
1899 &mut buf,
1900 );
1901 buf
1902 })
1903 .collect();
1904 let slices: Vec<IoSlice> = results
1906 .iter()
1907 .filter(|r| !r.is_empty())
1908 .map(|r| IoSlice::new(r))
1909 .collect();
1910 write_ioslices(out, &slices)?;
1911 } else {
1912 let mut buf = Vec::with_capacity(data.len());
1913 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1914 if !buf.is_empty() {
1915 out.write_all(&buf)?;
1916 }
1917 }
1918 Ok(())
1919}
1920
1921fn process_bytes_chunk(
1923 data: &[u8],
1924 ranges: &[Range],
1925 complement: bool,
1926 output_delim: &[u8],
1927 line_delim: u8,
1928 buf: &mut Vec<u8>,
1929) {
1930 let mut start = 0;
1931 for end_pos in memchr_iter(line_delim, data) {
1932 let line = &data[start..end_pos];
1933 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1934 buf.push(line_delim);
1935 start = end_pos + 1;
1936 }
1937 if start < data.len() {
1938 cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1939 buf.push(line_delim);
1940 }
1941}
1942
1943#[inline(always)]
1946fn cut_bytes_to_buf(
1947 line: &[u8],
1948 ranges: &[Range],
1949 complement: bool,
1950 output_delim: &[u8],
1951 buf: &mut Vec<u8>,
1952) {
1953 let len = line.len();
1954 let mut first_range = true;
1955
1956 buf.reserve(len + output_delim.len() * ranges.len() + 1);
1958
1959 if complement {
1960 let mut pos: usize = 1;
1961 for r in ranges {
1962 let rs = r.start;
1963 let re = r.end.min(len);
1964 if pos < rs {
1965 if !first_range && !output_delim.is_empty() {
1966 unsafe { buf_extend(buf, output_delim) };
1967 }
1968 unsafe { buf_extend(buf, &line[pos - 1..rs - 1]) };
1969 first_range = false;
1970 }
1971 pos = re + 1;
1972 if pos > len {
1973 break;
1974 }
1975 }
1976 if pos <= len {
1977 if !first_range && !output_delim.is_empty() {
1978 unsafe { buf_extend(buf, output_delim) };
1979 }
1980 unsafe { buf_extend(buf, &line[pos - 1..len]) };
1981 }
1982 } else if output_delim.is_empty() && ranges.len() == 1 {
1983 let start = ranges[0].start.saturating_sub(1);
1985 let end = ranges[0].end.min(len);
1986 if start < len {
1987 unsafe { buf_extend(buf, &line[start..end]) };
1988 }
1989 } else {
1990 for r in ranges {
1991 let start = r.start.saturating_sub(1);
1992 let end = r.end.min(len);
1993 if start >= len {
1994 break;
1995 }
1996 if !first_range && !output_delim.is_empty() {
1997 unsafe { buf_extend(buf, output_delim) };
1998 }
1999 unsafe { buf_extend(buf, &line[start..end]) };
2000 first_range = false;
2001 }
2002 }
2003}
2004
2005#[inline]
2009pub fn cut_fields(
2010 line: &[u8],
2011 delim: u8,
2012 ranges: &[Range],
2013 complement: bool,
2014 output_delim: &[u8],
2015 suppress_no_delim: bool,
2016 out: &mut impl Write,
2017) -> io::Result<bool> {
2018 if memchr::memchr(delim, line).is_none() {
2019 if !suppress_no_delim {
2020 out.write_all(line)?;
2021 return Ok(true);
2022 }
2023 return Ok(false);
2024 }
2025
2026 let mut field_num: usize = 1;
2027 let mut field_start: usize = 0;
2028 let mut first_output = true;
2029
2030 for delim_pos in memchr_iter(delim, line) {
2031 let selected = in_ranges(ranges, field_num) != complement;
2032 if selected {
2033 if !first_output {
2034 out.write_all(output_delim)?;
2035 }
2036 out.write_all(&line[field_start..delim_pos])?;
2037 first_output = false;
2038 }
2039 field_start = delim_pos + 1;
2040 field_num += 1;
2041 }
2042
2043 let selected = in_ranges(ranges, field_num) != complement;
2044 if selected {
2045 if !first_output {
2046 out.write_all(output_delim)?;
2047 }
2048 out.write_all(&line[field_start..])?;
2049 }
2050
2051 Ok(true)
2052}
2053
2054#[inline]
2056pub fn cut_bytes(
2057 line: &[u8],
2058 ranges: &[Range],
2059 complement: bool,
2060 output_delim: &[u8],
2061 out: &mut impl Write,
2062) -> io::Result<bool> {
2063 let mut first_range = true;
2064
2065 if complement {
2066 let len = line.len();
2067 let mut comp_ranges = Vec::new();
2068 let mut pos: usize = 1;
2069 for r in ranges {
2070 let rs = r.start;
2071 let re = r.end.min(len);
2072 if pos < rs {
2073 comp_ranges.push((pos, rs - 1));
2074 }
2075 pos = re + 1;
2076 if pos > len {
2077 break;
2078 }
2079 }
2080 if pos <= len {
2081 comp_ranges.push((pos, len));
2082 }
2083 for &(s, e) in &comp_ranges {
2084 if !first_range && !output_delim.is_empty() {
2085 out.write_all(output_delim)?;
2086 }
2087 out.write_all(&line[s - 1..e])?;
2088 first_range = false;
2089 }
2090 } else {
2091 for r in ranges {
2092 let start = r.start.saturating_sub(1);
2093 let end = r.end.min(line.len());
2094 if start >= line.len() {
2095 break;
2096 }
2097 if !first_range && !output_delim.is_empty() {
2098 out.write_all(output_delim)?;
2099 }
2100 out.write_all(&line[start..end])?;
2101 first_range = false;
2102 }
2103 }
2104 Ok(true)
2105}
2106
2107pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2109 match cfg.mode {
2110 CutMode::Fields => process_fields_fast(data, cfg, out),
2111 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
2112 }
2113}
2114
2115pub fn process_cut_reader<R: BufRead>(
2119 mut reader: R,
2120 cfg: &CutConfig,
2121 out: &mut impl Write,
2122) -> io::Result<()> {
2123 const CHUNK_SIZE: usize = 4 * 1024 * 1024; let line_delim = cfg.line_delim;
2125
2126 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
2129
2130 loop {
2131 buf.reserve(CHUNK_SIZE);
2133 let read_start = buf.len();
2134 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
2135 let n = read_fully(&mut reader, &mut buf[read_start..])?;
2136 buf.truncate(read_start + n);
2137
2138 if buf.is_empty() {
2139 break;
2140 }
2141
2142 if n == 0 {
2143 process_cut_data(&buf, cfg, out)?;
2145 break;
2146 }
2147
2148 let process_end = match memchr::memrchr(line_delim, &buf) {
2150 Some(pos) => pos + 1,
2151 None => {
2152 continue;
2154 }
2155 };
2156
2157 process_cut_data(&buf[..process_end], cfg, out)?;
2159
2160 let leftover_len = buf.len() - process_end;
2162 if leftover_len > 0 {
2163 buf.copy_within(process_end.., 0);
2164 }
2165 buf.truncate(leftover_len);
2166 }
2167
2168 Ok(())
2169}
2170
2171#[inline]
2173fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
2174 let n = reader.read(buf)?;
2175 if n == buf.len() || n == 0 {
2176 return Ok(n);
2177 }
2178 let mut total = n;
2180 while total < buf.len() {
2181 match reader.read(&mut buf[total..]) {
2182 Ok(0) => break,
2183 Ok(n) => total += n,
2184 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2185 Err(e) => return Err(e),
2186 }
2187 }
2188 Ok(total)
2189}
2190
2191#[derive(Debug, Clone, Copy, PartialEq)]
2193pub enum CutMode {
2194 Bytes,
2195 Characters,
2196 Fields,
2197}