1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, IoSlice, Write};
4
5const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
7
8const MAX_IOV: usize = 1024;
10
11pub struct CutConfig<'a> {
13 pub mode: CutMode,
14 pub ranges: &'a [Range],
15 pub complement: bool,
16 pub delim: u8,
17 pub output_delim: &'a [u8],
18 pub suppress_no_delim: bool,
19 pub line_delim: u8,
20}
21
22#[derive(Debug, Clone)]
24pub struct Range {
25 pub start: usize, pub end: usize, }
28
29pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
32 let mut ranges = Vec::new();
33
34 for part in spec.split(',') {
35 let part = part.trim();
36 if part.is_empty() {
37 continue;
38 }
39
40 if let Some(idx) = part.find('-') {
41 let left = &part[..idx];
42 let right = &part[idx + 1..];
43
44 let start = if left.is_empty() {
45 1
46 } else {
47 left.parse::<usize>()
48 .map_err(|_| format!("invalid range: '{}'", part))?
49 };
50
51 let end = if right.is_empty() {
52 usize::MAX
53 } else {
54 right
55 .parse::<usize>()
56 .map_err(|_| format!("invalid range: '{}'", part))?
57 };
58
59 if start == 0 {
60 return Err("fields and positions are numbered from 1".to_string());
61 }
62 if start > end {
63 return Err(format!("invalid decreasing range: '{}'", part));
64 }
65
66 ranges.push(Range { start, end });
67 } else {
68 let n = part
69 .parse::<usize>()
70 .map_err(|_| format!("invalid field: '{}'", part))?;
71 if n == 0 {
72 return Err("fields and positions are numbered from 1".to_string());
73 }
74 ranges.push(Range { start: n, end: n });
75 }
76 }
77
78 if ranges.is_empty() {
79 return Err("you must specify a list of bytes, characters, or fields".to_string());
80 }
81
82 ranges.sort_by_key(|r| (r.start, r.end));
84 let mut merged = vec![ranges[0].clone()];
85 for r in &ranges[1..] {
86 let last = merged.last_mut().unwrap();
87 if r.start <= last.end.saturating_add(1) {
88 last.end = last.end.max(r.end);
89 } else {
90 merged.push(r.clone());
91 }
92 }
93
94 Ok(merged)
95}
96
97#[inline(always)]
100fn in_ranges(ranges: &[Range], pos: usize) -> bool {
101 for r in ranges {
102 if pos < r.start {
103 return false;
104 }
105 if pos <= r.end {
106 return true;
107 }
108 }
109 false
110}
111
112#[inline]
115fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
116 let mut mask: u64 = 0;
117 for i in 1..=64u32 {
118 let in_range = in_ranges(ranges, i as usize);
119 if in_range != complement {
120 mask |= 1u64 << (i - 1);
121 }
122 }
123 mask
124}
125
126#[inline(always)]
128fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
129 if field_num <= 64 {
130 (mask >> (field_num - 1)) & 1 == 1
131 } else {
132 in_ranges(ranges, field_num) != complement
133 }
134}
135
136#[inline(always)]
141unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
142 unsafe {
143 let len = buf.len();
144 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
145 buf.set_len(len + data.len());
146 }
147}
148
149#[inline(always)]
152unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
153 unsafe {
154 let len = buf.len();
155 *buf.as_mut_ptr().add(len) = b;
156 buf.set_len(len + 1);
157 }
158}
159
160#[inline]
163fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
164 if slices.is_empty() {
165 return Ok(());
166 }
167 for batch in slices.chunks(MAX_IOV) {
168 let total: usize = batch.iter().map(|s| s.len()).sum();
169 match out.write_vectored(batch) {
170 Ok(n) if n >= total => continue,
171 Ok(mut written) => {
172 for slice in batch {
174 let slen = slice.len();
175 if written >= slen {
176 written -= slen;
177 continue;
178 }
179 if written > 0 {
180 out.write_all(&slice[written..])?;
181 written = 0;
182 } else {
183 out.write_all(slice)?;
184 }
185 }
186 }
187 Err(e) => return Err(e),
188 }
189 }
190 Ok(())
191}
192
193fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
197 let num_threads = rayon::current_num_threads().max(1);
198 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
199 return vec![data];
200 }
201
202 let chunk_size = data.len() / num_threads;
203 let mut chunks = Vec::with_capacity(num_threads);
204 let mut pos = 0;
205
206 for _ in 0..num_threads - 1 {
207 let target = pos + chunk_size;
208 if target >= data.len() {
209 break;
210 }
211 let boundary = memchr::memchr(line_delim, &data[target..])
212 .map(|p| target + p + 1)
213 .unwrap_or(data.len());
214 if boundary > pos {
215 chunks.push(&data[pos..boundary]);
216 }
217 pos = boundary;
218 }
219
220 if pos < data.len() {
221 chunks.push(&data[pos..]);
222 }
223
224 chunks
225}
226
227fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
231 let delim = cfg.delim;
232 let line_delim = cfg.line_delim;
233 let ranges = cfg.ranges;
234 let complement = cfg.complement;
235 let output_delim = cfg.output_delim;
236 let suppress = cfg.suppress_no_delim;
237
238 if !complement && memchr::memchr(delim, data).is_none() {
240 if suppress {
241 return Ok(());
242 }
243 out.write_all(data)?;
244 if !data.is_empty() && *data.last().unwrap() != line_delim {
245 out.write_all(&[line_delim])?;
246 }
247 return Ok(());
248 }
249
250 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
252 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
253 }
254
255 if complement
257 && ranges.len() == 1
258 && ranges[0].start == ranges[0].end
259 && output_delim.len() == 1
260 && output_delim[0] == delim
261 {
262 return process_complement_single_field(
263 data,
264 delim,
265 line_delim,
266 ranges[0].start,
267 suppress,
268 out,
269 );
270 }
271
272 if !complement
274 && ranges.len() == 1
275 && ranges[0].start == 1
276 && output_delim.len() == 1
277 && output_delim[0] == delim
278 && ranges[0].end < usize::MAX
279 {
280 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
281 }
282
283 if !complement
285 && ranges.len() == 1
286 && ranges[0].end == usize::MAX
287 && ranges[0].start > 1
288 && output_delim.len() == 1
289 && output_delim[0] == delim
290 {
291 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
292 }
293
294 if !complement
296 && ranges.len() == 1
297 && ranges[0].start > 1
298 && ranges[0].end < usize::MAX
299 && output_delim.len() == 1
300 && output_delim[0] == delim
301 {
302 return process_fields_mid_range(
303 data,
304 delim,
305 line_delim,
306 ranges[0].start,
307 ranges[0].end,
308 suppress,
309 out,
310 );
311 }
312
313 let max_field = if complement {
315 usize::MAX
316 } else {
317 ranges.last().map(|r| r.end).unwrap_or(0)
318 };
319 let field_mask = compute_field_mask(ranges, complement);
320
321 if data.len() >= PARALLEL_THRESHOLD {
322 let chunks = split_into_chunks(data, line_delim);
323 let results: Vec<Vec<u8>> = chunks
324 .par_iter()
325 .map(|chunk| {
326 let mut buf = Vec::with_capacity(chunk.len());
327 process_fields_chunk(
328 chunk,
329 delim,
330 ranges,
331 output_delim,
332 suppress,
333 max_field,
334 field_mask,
335 line_delim,
336 complement,
337 &mut buf,
338 );
339 buf
340 })
341 .collect();
342 let slices: Vec<IoSlice> = results
344 .iter()
345 .filter(|r| !r.is_empty())
346 .map(|r| IoSlice::new(r))
347 .collect();
348 write_ioslices(out, &slices)?;
349 } else {
350 let mut buf = Vec::with_capacity(data.len());
351 process_fields_chunk(
352 data,
353 delim,
354 ranges,
355 output_delim,
356 suppress,
357 max_field,
358 field_mask,
359 line_delim,
360 complement,
361 &mut buf,
362 );
363 if !buf.is_empty() {
364 out.write_all(&buf)?;
365 }
366 }
367 Ok(())
368}
369
370fn process_fields_chunk(
375 data: &[u8],
376 delim: u8,
377 ranges: &[Range],
378 output_delim: &[u8],
379 suppress: bool,
380 max_field: usize,
381 field_mask: u64,
382 line_delim: u8,
383 complement: bool,
384 buf: &mut Vec<u8>,
385) {
386 if delim != line_delim && max_field < usize::MAX && !complement {
393 buf.reserve(data.len());
394 let mut start = 0;
395 for end_pos in memchr_iter(line_delim, data) {
396 let line = &data[start..end_pos];
397 extract_fields_to_buf(
398 line,
399 delim,
400 ranges,
401 output_delim,
402 suppress,
403 max_field,
404 field_mask,
405 line_delim,
406 buf,
407 complement,
408 );
409 start = end_pos + 1;
410 }
411 if start < data.len() {
412 extract_fields_to_buf(
413 &data[start..],
414 delim,
415 ranges,
416 output_delim,
417 suppress,
418 max_field,
419 field_mask,
420 line_delim,
421 buf,
422 complement,
423 );
424 }
425 return;
426 }
427
428 if delim != line_delim {
431 buf.reserve(data.len());
432
433 let mut line_start: usize = 0;
434 let mut field_start: usize = 0;
435 let mut field_num: usize = 1;
436 let mut first_output = true;
437 let mut has_delim = false;
438
439 for pos in memchr::memchr2_iter(delim, line_delim, data) {
440 let byte = unsafe { *data.get_unchecked(pos) };
441
442 if byte == line_delim {
443 if (field_num <= max_field || complement)
445 && has_delim
446 && is_selected(field_num, field_mask, ranges, complement)
447 {
448 if !first_output {
449 unsafe { buf_extend(buf, output_delim) };
450 }
451 unsafe { buf_extend(buf, &data[field_start..pos]) };
452 first_output = false;
453 }
454
455 if !first_output {
456 unsafe { buf_push(buf, line_delim) };
457 } else if !has_delim {
458 if !suppress {
459 unsafe {
460 buf_extend(buf, &data[line_start..pos]);
461 buf_push(buf, line_delim);
462 }
463 }
464 } else {
465 unsafe { buf_push(buf, line_delim) };
466 }
467
468 line_start = pos + 1;
470 field_start = pos + 1;
471 field_num = 1;
472 first_output = true;
473 has_delim = false;
474 } else {
475 has_delim = true;
477
478 if is_selected(field_num, field_mask, ranges, complement) {
479 if !first_output {
480 unsafe { buf_extend(buf, output_delim) };
481 }
482 unsafe { buf_extend(buf, &data[field_start..pos]) };
483 first_output = false;
484 }
485
486 field_num += 1;
487 field_start = pos + 1;
488 }
489 }
490
491 if line_start < data.len() {
493 let line = &data[line_start..];
494 if !line.is_empty() {
495 if (field_num <= max_field || complement)
496 && has_delim
497 && is_selected(field_num, field_mask, ranges, complement)
498 {
499 if !first_output {
500 unsafe { buf_extend(buf, output_delim) };
501 }
502 unsafe { buf_extend(buf, &data[field_start..data.len()]) };
503 first_output = false;
504 }
505
506 if !first_output {
507 unsafe { buf_push(buf, line_delim) };
508 } else if !has_delim {
509 if !suppress {
510 unsafe {
511 buf_extend(buf, &data[line_start..data.len()]);
512 buf_push(buf, line_delim);
513 }
514 }
515 } else {
516 unsafe { buf_push(buf, line_delim) };
517 }
518 }
519 }
520
521 return;
522 }
523
524 let mut start = 0;
526 for end_pos in memchr_iter(line_delim, data) {
527 let line = &data[start..end_pos];
528 extract_fields_to_buf(
529 line,
530 delim,
531 ranges,
532 output_delim,
533 suppress,
534 max_field,
535 field_mask,
536 line_delim,
537 buf,
538 complement,
539 );
540 start = end_pos + 1;
541 }
542 if start < data.len() {
543 extract_fields_to_buf(
544 &data[start..],
545 delim,
546 ranges,
547 output_delim,
548 suppress,
549 max_field,
550 field_mask,
551 line_delim,
552 buf,
553 complement,
554 );
555 }
556}
557
558fn process_single_field(
564 data: &[u8],
565 delim: u8,
566 line_delim: u8,
567 target: usize,
568 suppress: bool,
569 out: &mut impl Write,
570) -> io::Result<()> {
571 let target_idx = target - 1;
572
573 if delim != line_delim {
575 if data.len() >= PARALLEL_THRESHOLD {
576 let chunks = split_into_chunks(data, line_delim);
577 let results: Vec<Vec<u8>> = chunks
578 .par_iter()
579 .map(|chunk| {
580 let mut buf = Vec::with_capacity(chunk.len());
581 process_nth_field_combined(
582 chunk, delim, line_delim, target_idx, suppress, &mut buf,
583 );
584 buf
585 })
586 .collect();
587 for result in &results {
588 if !result.is_empty() {
589 out.write_all(result)?;
590 }
591 }
592 } else if target_idx == 0 && !suppress {
593 single_field1_zerocopy(data, delim, line_delim, out)?;
598 } else {
599 let mut buf = Vec::with_capacity(data.len());
600 process_nth_field_combined(data, delim, line_delim, target_idx, suppress, &mut buf);
601 if !buf.is_empty() {
602 out.write_all(&buf)?;
603 }
604 }
605 return Ok(());
606 }
607
608 if data.len() >= PARALLEL_THRESHOLD {
610 let chunks = split_into_chunks(data, line_delim);
611 let results: Vec<Vec<u8>> = chunks
612 .par_iter()
613 .map(|chunk| {
614 let mut buf = Vec::with_capacity(chunk.len() / 4);
615 process_single_field_chunk(
616 chunk, delim, target_idx, line_delim, suppress, &mut buf,
617 );
618 buf
619 })
620 .collect();
621 let slices: Vec<IoSlice> = results
623 .iter()
624 .filter(|r| !r.is_empty())
625 .map(|r| IoSlice::new(r))
626 .collect();
627 write_ioslices(out, &slices)?;
628 } else {
629 let mut buf = Vec::with_capacity(data.len() / 4);
630 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
631 if !buf.is_empty() {
632 out.write_all(&buf)?;
633 }
634 }
635 Ok(())
636}
637
638fn process_complement_single_field(
640 data: &[u8],
641 delim: u8,
642 line_delim: u8,
643 skip_field: usize,
644 suppress: bool,
645 out: &mut impl Write,
646) -> io::Result<()> {
647 let skip_idx = skip_field - 1;
648
649 if data.len() >= PARALLEL_THRESHOLD {
650 let chunks = split_into_chunks(data, line_delim);
651 let results: Vec<Vec<u8>> = chunks
652 .par_iter()
653 .map(|chunk| {
654 let mut buf = Vec::with_capacity(chunk.len());
655 complement_single_field_chunk(
656 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
657 );
658 buf
659 })
660 .collect();
661 let slices: Vec<IoSlice> = results
663 .iter()
664 .filter(|r| !r.is_empty())
665 .map(|r| IoSlice::new(r))
666 .collect();
667 write_ioslices(out, &slices)?;
668 } else {
669 let mut buf = Vec::with_capacity(data.len());
670 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
671 if !buf.is_empty() {
672 out.write_all(&buf)?;
673 }
674 }
675 Ok(())
676}
677
678fn complement_single_field_chunk(
680 data: &[u8],
681 delim: u8,
682 skip_idx: usize,
683 line_delim: u8,
684 suppress: bool,
685 buf: &mut Vec<u8>,
686) {
687 let mut start = 0;
688 for end_pos in memchr_iter(line_delim, data) {
689 let line = &data[start..end_pos];
690 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
691 start = end_pos + 1;
692 }
693 if start < data.len() {
694 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
695 }
696}
697
698#[inline(always)]
700fn complement_single_field_line(
701 line: &[u8],
702 delim: u8,
703 skip_idx: usize,
704 line_delim: u8,
705 suppress: bool,
706 buf: &mut Vec<u8>,
707) {
708 if line.is_empty() {
709 if !suppress {
710 buf.push(line_delim);
711 }
712 return;
713 }
714
715 buf.reserve(line.len() + 1);
716
717 let mut field_idx = 0;
718 let mut field_start = 0;
719 let mut first_output = true;
720 let mut has_delim = false;
721
722 for pos in memchr_iter(delim, line) {
723 has_delim = true;
724 if field_idx != skip_idx {
725 if !first_output {
726 unsafe { buf_push(buf, delim) };
727 }
728 unsafe { buf_extend(buf, &line[field_start..pos]) };
729 first_output = false;
730 }
731 field_idx += 1;
732 field_start = pos + 1;
733 }
734
735 if !has_delim {
736 if !suppress {
737 unsafe {
738 buf_extend(buf, line);
739 buf_push(buf, line_delim);
740 }
741 }
742 return;
743 }
744
745 if field_idx != skip_idx {
747 if !first_output {
748 unsafe { buf_push(buf, delim) };
749 }
750 unsafe { buf_extend(buf, &line[field_start..]) };
751 }
752
753 unsafe { buf_push(buf, line_delim) };
754}
755
756fn process_fields_prefix(
760 data: &[u8],
761 delim: u8,
762 line_delim: u8,
763 last_field: usize,
764 suppress: bool,
765 out: &mut impl Write,
766) -> io::Result<()> {
767 if data.len() >= PARALLEL_THRESHOLD {
768 let chunks = split_into_chunks(data, line_delim);
769 let results: Vec<Vec<u8>> = chunks
770 .par_iter()
771 .map(|chunk| {
772 let mut buf = Vec::with_capacity(chunk.len());
773 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
774 buf
775 })
776 .collect();
777 let slices: Vec<IoSlice> = results
779 .iter()
780 .filter(|r| !r.is_empty())
781 .map(|r| IoSlice::new(r))
782 .collect();
783 write_ioslices(out, &slices)?;
784 } else if !suppress {
785 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
789 } else {
790 let mut buf = Vec::with_capacity(data.len());
791 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
792 if !buf.is_empty() {
793 out.write_all(&buf)?;
794 }
795 }
796 Ok(())
797}
798
799#[inline]
804fn fields_prefix_zerocopy(
805 data: &[u8],
806 delim: u8,
807 line_delim: u8,
808 last_field: usize,
809 out: &mut impl Write,
810) -> io::Result<()> {
811 let mut start = 0;
812 let mut run_start: usize = 0;
813
814 for end_pos in memchr_iter(line_delim, data) {
815 let line = &data[start..end_pos];
816 let mut field_count = 1;
818 let mut truncate_at: Option<usize> = None;
819 for dpos in memchr_iter(delim, line) {
820 if field_count >= last_field {
821 truncate_at = Some(start + dpos);
822 break;
823 }
824 field_count += 1;
825 }
826
827 if let Some(trunc_pos) = truncate_at {
828 if run_start < start {
830 out.write_all(&data[run_start..start])?;
831 }
832 out.write_all(&data[start..trunc_pos])?;
833 out.write_all(&[line_delim])?;
834 run_start = end_pos + 1;
835 }
836 start = end_pos + 1;
838 }
839 if start < data.len() {
841 let line = &data[start..];
842 let mut field_count = 1;
843 let mut truncate_at: Option<usize> = None;
844 for dpos in memchr_iter(delim, line) {
845 if field_count >= last_field {
846 truncate_at = Some(start + dpos);
847 break;
848 }
849 field_count += 1;
850 }
851 if let Some(trunc_pos) = truncate_at {
852 if run_start < start {
853 out.write_all(&data[run_start..start])?;
854 }
855 out.write_all(&data[start..trunc_pos])?;
856 out.write_all(&[line_delim])?;
857 return Ok(());
858 }
859 }
860 if run_start < data.len() {
862 out.write_all(&data[run_start..])?;
863 if !data.is_empty() && *data.last().unwrap() != line_delim {
864 out.write_all(&[line_delim])?;
865 }
866 }
867 Ok(())
868}
869
870fn fields_prefix_chunk(
872 data: &[u8],
873 delim: u8,
874 line_delim: u8,
875 last_field: usize,
876 suppress: bool,
877 buf: &mut Vec<u8>,
878) {
879 let mut start = 0;
880 for end_pos in memchr_iter(line_delim, data) {
881 let line = &data[start..end_pos];
882 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
883 start = end_pos + 1;
884 }
885 if start < data.len() {
886 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
887 }
888}
889
890#[inline(always)]
892fn fields_prefix_line(
893 line: &[u8],
894 delim: u8,
895 line_delim: u8,
896 last_field: usize,
897 suppress: bool,
898 buf: &mut Vec<u8>,
899) {
900 if line.is_empty() {
901 if !suppress {
902 buf.push(line_delim);
903 }
904 return;
905 }
906
907 buf.reserve(line.len() + 1);
908
909 let mut field_count = 1;
910 let mut has_delim = false;
911
912 for pos in memchr_iter(delim, line) {
913 has_delim = true;
914 if field_count >= last_field {
915 unsafe {
916 buf_extend(buf, &line[..pos]);
917 buf_push(buf, line_delim);
918 }
919 return;
920 }
921 field_count += 1;
922 }
923
924 if !has_delim {
925 if !suppress {
926 unsafe {
927 buf_extend(buf, line);
928 buf_push(buf, line_delim);
929 }
930 }
931 return;
932 }
933
934 unsafe {
935 buf_extend(buf, line);
936 buf_push(buf, line_delim);
937 }
938}
939
940fn process_fields_suffix(
942 data: &[u8],
943 delim: u8,
944 line_delim: u8,
945 start_field: usize,
946 suppress: bool,
947 out: &mut impl Write,
948) -> io::Result<()> {
949 if data.len() >= PARALLEL_THRESHOLD {
950 let chunks = split_into_chunks(data, line_delim);
951 let results: Vec<Vec<u8>> = chunks
952 .par_iter()
953 .map(|chunk| {
954 let mut buf = Vec::with_capacity(chunk.len());
955 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
956 buf
957 })
958 .collect();
959 let slices: Vec<IoSlice> = results
961 .iter()
962 .filter(|r| !r.is_empty())
963 .map(|r| IoSlice::new(r))
964 .collect();
965 write_ioslices(out, &slices)?;
966 } else {
967 let mut buf = Vec::with_capacity(data.len());
968 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
969 if !buf.is_empty() {
970 out.write_all(&buf)?;
971 }
972 }
973 Ok(())
974}
975
976fn fields_suffix_chunk(
978 data: &[u8],
979 delim: u8,
980 line_delim: u8,
981 start_field: usize,
982 suppress: bool,
983 buf: &mut Vec<u8>,
984) {
985 let mut start = 0;
986 for end_pos in memchr_iter(line_delim, data) {
987 let line = &data[start..end_pos];
988 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
989 start = end_pos + 1;
990 }
991 if start < data.len() {
992 fields_suffix_line(
993 &data[start..],
994 delim,
995 line_delim,
996 start_field,
997 suppress,
998 buf,
999 );
1000 }
1001}
1002
1003#[inline(always)]
1005fn fields_suffix_line(
1006 line: &[u8],
1007 delim: u8,
1008 line_delim: u8,
1009 start_field: usize,
1010 suppress: bool,
1011 buf: &mut Vec<u8>,
1012) {
1013 if line.is_empty() {
1014 if !suppress {
1015 buf.push(line_delim);
1016 }
1017 return;
1018 }
1019
1020 buf.reserve(line.len() + 1);
1021
1022 let skip_delims = start_field - 1;
1023 let mut delim_count = 0;
1024 let mut has_delim = false;
1025
1026 for pos in memchr_iter(delim, line) {
1027 has_delim = true;
1028 delim_count += 1;
1029 if delim_count >= skip_delims {
1030 unsafe {
1031 buf_extend(buf, &line[pos + 1..]);
1032 buf_push(buf, line_delim);
1033 }
1034 return;
1035 }
1036 }
1037
1038 if !has_delim {
1039 if !suppress {
1040 unsafe {
1041 buf_extend(buf, line);
1042 buf_push(buf, line_delim);
1043 }
1044 }
1045 return;
1046 }
1047
1048 unsafe { buf_push(buf, line_delim) };
1050}
1051
1052fn process_fields_mid_range(
1055 data: &[u8],
1056 delim: u8,
1057 line_delim: u8,
1058 start_field: usize,
1059 end_field: usize,
1060 suppress: bool,
1061 out: &mut impl Write,
1062) -> io::Result<()> {
1063 if data.len() >= PARALLEL_THRESHOLD {
1064 let chunks = split_into_chunks(data, line_delim);
1065 let results: Vec<Vec<u8>> = chunks
1066 .par_iter()
1067 .map(|chunk| {
1068 let mut buf = Vec::with_capacity(chunk.len());
1069 fields_mid_range_chunk(
1070 chunk,
1071 delim,
1072 line_delim,
1073 start_field,
1074 end_field,
1075 suppress,
1076 &mut buf,
1077 );
1078 buf
1079 })
1080 .collect();
1081 let slices: Vec<IoSlice> = results
1082 .iter()
1083 .filter(|r| !r.is_empty())
1084 .map(|r| IoSlice::new(r))
1085 .collect();
1086 write_ioslices(out, &slices)?;
1087 } else {
1088 let mut buf = Vec::with_capacity(data.len());
1089 fields_mid_range_chunk(
1090 data,
1091 delim,
1092 line_delim,
1093 start_field,
1094 end_field,
1095 suppress,
1096 &mut buf,
1097 );
1098 if !buf.is_empty() {
1099 out.write_all(&buf)?;
1100 }
1101 }
1102 Ok(())
1103}
1104
1105fn fields_mid_range_chunk(
1107 data: &[u8],
1108 delim: u8,
1109 line_delim: u8,
1110 start_field: usize,
1111 end_field: usize,
1112 suppress: bool,
1113 buf: &mut Vec<u8>,
1114) {
1115 let mut start = 0;
1116 for end_pos in memchr_iter(line_delim, data) {
1117 let line = &data[start..end_pos];
1118 fields_mid_range_line(
1119 line,
1120 delim,
1121 line_delim,
1122 start_field,
1123 end_field,
1124 suppress,
1125 buf,
1126 );
1127 start = end_pos + 1;
1128 }
1129 if start < data.len() {
1130 fields_mid_range_line(
1131 &data[start..],
1132 delim,
1133 line_delim,
1134 start_field,
1135 end_field,
1136 suppress,
1137 buf,
1138 );
1139 }
1140}
1141
1142#[inline(always)]
1145fn fields_mid_range_line(
1146 line: &[u8],
1147 delim: u8,
1148 line_delim: u8,
1149 start_field: usize,
1150 end_field: usize,
1151 suppress: bool,
1152 buf: &mut Vec<u8>,
1153) {
1154 if line.is_empty() {
1155 if !suppress {
1156 buf.push(line_delim);
1157 }
1158 return;
1159 }
1160
1161 buf.reserve(line.len() + 1);
1162
1163 let skip_before = start_field - 1; let field_span = end_field - start_field; let mut delim_count = 0;
1167 let mut range_start = 0;
1168 let mut has_delim = false;
1169
1170 for pos in memchr_iter(delim, line) {
1171 has_delim = true;
1172 delim_count += 1;
1173 if delim_count == skip_before {
1174 range_start = pos + 1;
1175 }
1176 if delim_count == skip_before + field_span + 1 {
1177 if skip_before == 0 {
1179 range_start = 0;
1180 }
1181 unsafe {
1182 buf_extend(buf, &line[range_start..pos]);
1183 buf_push(buf, line_delim);
1184 }
1185 return;
1186 }
1187 }
1188
1189 if !has_delim {
1190 if !suppress {
1191 unsafe {
1192 buf_extend(buf, line);
1193 buf_push(buf, line_delim);
1194 }
1195 }
1196 return;
1197 }
1198
1199 if delim_count >= skip_before {
1201 if skip_before == 0 {
1203 range_start = 0;
1204 }
1205 unsafe {
1206 buf_extend(buf, &line[range_start..]);
1207 buf_push(buf, line_delim);
1208 }
1209 } else {
1210 unsafe { buf_push(buf, line_delim) };
1212 }
1213}
1214
1215fn process_nth_field_combined(
1220 data: &[u8],
1221 delim: u8,
1222 line_delim: u8,
1223 target_idx: usize,
1224 suppress: bool,
1225 buf: &mut Vec<u8>,
1226) {
1227 buf.reserve(data.len());
1228
1229 let mut line_start: usize = 0;
1230 let mut field_start: usize = 0;
1231 let mut field_idx: usize = 0;
1232 let mut has_delim = false;
1233 let mut emitted = false;
1234
1235 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1236 let byte = unsafe { *data.get_unchecked(pos) };
1237
1238 if byte == line_delim {
1239 if !emitted {
1241 if has_delim && field_idx == target_idx {
1242 unsafe {
1244 buf_extend(buf, &data[field_start..pos]);
1245 buf_push(buf, line_delim);
1246 }
1247 } else if has_delim {
1248 unsafe {
1250 buf_push(buf, line_delim);
1251 }
1252 } else if !suppress {
1253 unsafe {
1255 buf_extend(buf, &data[line_start..pos]);
1256 buf_push(buf, line_delim);
1257 }
1258 }
1259 }
1260 line_start = pos + 1;
1262 field_start = pos + 1;
1263 field_idx = 0;
1264 has_delim = false;
1265 emitted = false;
1266 } else {
1267 has_delim = true;
1269 if field_idx == target_idx {
1270 unsafe {
1271 buf_extend(buf, &data[field_start..pos]);
1272 buf_push(buf, line_delim);
1273 }
1274 emitted = true;
1275 }
1276 field_idx += 1;
1277 field_start = pos + 1;
1278 }
1279 }
1280
1281 if line_start < data.len() && !emitted {
1283 if has_delim && field_idx == target_idx {
1284 unsafe {
1285 buf_extend(buf, &data[field_start..data.len()]);
1286 buf_push(buf, line_delim);
1287 }
1288 } else if has_delim {
1289 unsafe {
1290 buf_push(buf, line_delim);
1291 }
1292 } else if !suppress {
1293 unsafe {
1294 buf_extend(buf, &data[line_start..data.len()]);
1295 buf_push(buf, line_delim);
1296 }
1297 }
1298 }
1299}
1300
1301#[inline]
1305fn single_field1_zerocopy(
1306 data: &[u8],
1307 delim: u8,
1308 line_delim: u8,
1309 out: &mut impl Write,
1310) -> io::Result<()> {
1311 let mut line_start: usize = 0;
1312 let mut run_start: usize = 0;
1313 let mut first_delim: Option<usize> = None;
1314
1315 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1316 let byte = unsafe { *data.get_unchecked(pos) };
1317
1318 if byte == line_delim {
1319 if let Some(dp) = first_delim {
1321 if run_start < line_start {
1324 out.write_all(&data[run_start..line_start])?;
1325 }
1326 out.write_all(&data[line_start..dp])?;
1327 out.write_all(&[line_delim])?;
1328 run_start = pos + 1;
1329 }
1330 line_start = pos + 1;
1332 first_delim = None;
1333 } else {
1334 if first_delim.is_none() {
1336 first_delim = Some(pos);
1337 }
1338 }
1339 }
1340
1341 if line_start < data.len() {
1343 if let Some(dp) = first_delim {
1344 if run_start < line_start {
1345 out.write_all(&data[run_start..line_start])?;
1346 }
1347 out.write_all(&data[line_start..dp])?;
1348 out.write_all(&[line_delim])?;
1349 return Ok(());
1350 }
1351 }
1352
1353 if run_start < data.len() {
1355 out.write_all(&data[run_start..])?;
1356 if !data.is_empty() && *data.last().unwrap() != line_delim {
1357 out.write_all(&[line_delim])?;
1358 }
1359 }
1360 Ok(())
1361}
1362
1363fn process_single_field_chunk(
1365 data: &[u8],
1366 delim: u8,
1367 target_idx: usize,
1368 line_delim: u8,
1369 suppress: bool,
1370 buf: &mut Vec<u8>,
1371) {
1372 let mut start = 0;
1373 for end_pos in memchr_iter(line_delim, data) {
1374 let line = &data[start..end_pos];
1375 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
1376 start = end_pos + 1;
1377 }
1378 if start < data.len() {
1379 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
1380 }
1381}
1382
1383#[inline(always)]
1386fn extract_single_field_line(
1387 line: &[u8],
1388 delim: u8,
1389 target_idx: usize,
1390 line_delim: u8,
1391 suppress: bool,
1392 buf: &mut Vec<u8>,
1393) {
1394 if line.is_empty() {
1395 if !suppress {
1396 buf.push(line_delim);
1397 }
1398 return;
1399 }
1400
1401 buf.reserve(line.len() + 1);
1403
1404 if target_idx == 0 {
1406 match memchr::memchr(delim, line) {
1407 Some(pos) => unsafe {
1408 buf_extend(buf, &line[..pos]);
1409 buf_push(buf, line_delim);
1410 },
1411 None => {
1412 if !suppress {
1413 unsafe {
1414 buf_extend(buf, line);
1415 buf_push(buf, line_delim);
1416 }
1417 }
1418 }
1419 }
1420 return;
1421 }
1422
1423 let mut field_start = 0;
1424 let mut field_idx = 0;
1425 let mut has_delim = false;
1426
1427 for pos in memchr_iter(delim, line) {
1428 has_delim = true;
1429 if field_idx == target_idx {
1430 unsafe {
1431 buf_extend(buf, &line[field_start..pos]);
1432 buf_push(buf, line_delim);
1433 }
1434 return;
1435 }
1436 field_idx += 1;
1437 field_start = pos + 1;
1438 }
1439
1440 if !has_delim {
1441 if !suppress {
1442 unsafe {
1443 buf_extend(buf, line);
1444 buf_push(buf, line_delim);
1445 }
1446 }
1447 return;
1448 }
1449
1450 if field_idx == target_idx {
1451 unsafe {
1452 buf_extend(buf, &line[field_start..]);
1453 buf_push(buf, line_delim);
1454 }
1455 } else {
1456 unsafe { buf_push(buf, line_delim) };
1457 }
1458}
1459
1460#[inline(always)]
1463fn extract_fields_to_buf(
1464 line: &[u8],
1465 delim: u8,
1466 ranges: &[Range],
1467 output_delim: &[u8],
1468 suppress: bool,
1469 max_field: usize,
1470 field_mask: u64,
1471 line_delim: u8,
1472 buf: &mut Vec<u8>,
1473 complement: bool,
1474) {
1475 let len = line.len();
1476
1477 if len == 0 {
1478 if !suppress {
1479 buf.push(line_delim);
1480 }
1481 return;
1482 }
1483
1484 let needed = len + output_delim.len() * 16 + 1;
1487 if buf.capacity() - buf.len() < needed {
1488 buf.reserve(needed);
1489 }
1490
1491 let mut field_num: usize = 1;
1492 let mut field_start: usize = 0;
1493 let mut first_output = true;
1494 let mut has_delim = false;
1495
1496 for delim_pos in memchr_iter(delim, line) {
1497 has_delim = true;
1498
1499 if is_selected(field_num, field_mask, ranges, complement) {
1500 if !first_output {
1501 unsafe { buf_extend(buf, output_delim) };
1502 }
1503 unsafe { buf_extend(buf, &line[field_start..delim_pos]) };
1504 first_output = false;
1505 }
1506
1507 field_num += 1;
1508 field_start = delim_pos + 1;
1509
1510 if field_num > max_field {
1511 break;
1512 }
1513 }
1514
1515 if (field_num <= max_field || complement)
1517 && has_delim
1518 && is_selected(field_num, field_mask, ranges, complement)
1519 {
1520 if !first_output {
1521 unsafe { buf_extend(buf, output_delim) };
1522 }
1523 unsafe { buf_extend(buf, &line[field_start..len]) };
1524 first_output = false;
1525 }
1526
1527 if !first_output {
1528 unsafe { buf_push(buf, line_delim) };
1529 } else if !has_delim {
1530 if !suppress {
1531 unsafe {
1532 buf_extend(buf, line);
1533 buf_push(buf, line_delim);
1534 }
1535 }
1536 } else {
1537 unsafe { buf_push(buf, line_delim) };
1538 }
1539}
1540
1541fn process_bytes_from_start(
1548 data: &[u8],
1549 max_bytes: usize,
1550 line_delim: u8,
1551 out: &mut impl Write,
1552) -> io::Result<()> {
1553 if data.len() >= PARALLEL_THRESHOLD {
1554 let chunks = split_into_chunks(data, line_delim);
1555 let results: Vec<Vec<u8>> = chunks
1556 .par_iter()
1557 .map(|chunk| {
1558 let mut buf = Vec::with_capacity(chunk.len());
1559 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
1560 buf
1561 })
1562 .collect();
1563 let slices: Vec<IoSlice> = results
1565 .iter()
1566 .filter(|r| !r.is_empty())
1567 .map(|r| IoSlice::new(r))
1568 .collect();
1569 write_ioslices(out, &slices)?;
1570 } else {
1571 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
1575 }
1576 Ok(())
1577}
1578
1579#[inline]
1582fn bytes_from_start_zerocopy(
1583 data: &[u8],
1584 max_bytes: usize,
1585 line_delim: u8,
1586 out: &mut impl Write,
1587) -> io::Result<()> {
1588 let mut start = 0;
1589 let mut run_start: usize = 0;
1590
1591 for pos in memchr_iter(line_delim, data) {
1592 let line_len = pos - start;
1593 if line_len > max_bytes {
1594 if run_start < start {
1596 out.write_all(&data[run_start..start])?;
1597 }
1598 out.write_all(&data[start..start + max_bytes])?;
1599 out.write_all(&[line_delim])?;
1600 run_start = pos + 1;
1601 }
1602 start = pos + 1;
1604 }
1605 if start < data.len() {
1607 let line_len = data.len() - start;
1608 if line_len > max_bytes {
1609 if run_start < start {
1610 out.write_all(&data[run_start..start])?;
1611 }
1612 out.write_all(&data[start..start + max_bytes])?;
1613 out.write_all(&[line_delim])?;
1614 return Ok(());
1615 }
1616 }
1617 if run_start < data.len() {
1619 out.write_all(&data[run_start..])?;
1620 if !data.is_empty() && *data.last().unwrap() != line_delim {
1622 out.write_all(&[line_delim])?;
1623 }
1624 }
1625 Ok(())
1626}
1627
1628#[inline]
1631fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1632 buf.reserve(data.len());
1634
1635 let mut start = 0;
1636 for pos in memchr_iter(line_delim, data) {
1637 let line_len = pos - start;
1638 let take = line_len.min(max_bytes);
1639 unsafe {
1640 buf_extend(buf, &data[start..start + take]);
1641 buf_push(buf, line_delim);
1642 }
1643 start = pos + 1;
1644 }
1645 if start < data.len() {
1647 let line_len = data.len() - start;
1648 let take = line_len.min(max_bytes);
1649 unsafe {
1650 buf_extend(buf, &data[start..start + take]);
1651 buf_push(buf, line_delim);
1652 }
1653 }
1654}
1655
1656fn process_bytes_from_offset(
1658 data: &[u8],
1659 skip_bytes: usize,
1660 line_delim: u8,
1661 out: &mut impl Write,
1662) -> io::Result<()> {
1663 if data.len() >= PARALLEL_THRESHOLD {
1664 let chunks = split_into_chunks(data, line_delim);
1665 let results: Vec<Vec<u8>> = chunks
1666 .par_iter()
1667 .map(|chunk| {
1668 let mut buf = Vec::with_capacity(chunk.len());
1669 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1670 buf
1671 })
1672 .collect();
1673 let slices: Vec<IoSlice> = results
1675 .iter()
1676 .filter(|r| !r.is_empty())
1677 .map(|r| IoSlice::new(r))
1678 .collect();
1679 write_ioslices(out, &slices)?;
1680 } else {
1681 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
1683 }
1684 Ok(())
1685}
1686
1687#[inline]
1691fn bytes_from_offset_zerocopy(
1692 data: &[u8],
1693 skip_bytes: usize,
1694 line_delim: u8,
1695 out: &mut impl Write,
1696) -> io::Result<()> {
1697 let delim_buf = [line_delim];
1698 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
1699
1700 let mut start = 0;
1701 for pos in memchr_iter(line_delim, data) {
1702 let line_len = pos - start;
1703 if line_len > skip_bytes {
1704 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
1705 }
1706 iov.push(IoSlice::new(&delim_buf));
1707 if iov.len() >= MAX_IOV - 1 {
1709 write_ioslices(out, &iov)?;
1710 iov.clear();
1711 }
1712 start = pos + 1;
1713 }
1714 if start < data.len() {
1715 let line_len = data.len() - start;
1716 if line_len > skip_bytes {
1717 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
1718 }
1719 iov.push(IoSlice::new(&delim_buf));
1720 }
1721 if !iov.is_empty() {
1722 write_ioslices(out, &iov)?;
1723 }
1724 Ok(())
1725}
1726
1727#[inline]
1730fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1731 buf.reserve(data.len());
1732
1733 let mut start = 0;
1734 for pos in memchr_iter(line_delim, data) {
1735 let line_len = pos - start;
1736 if line_len > skip_bytes {
1737 unsafe {
1738 buf_extend(buf, &data[start + skip_bytes..pos]);
1739 }
1740 }
1741 unsafe {
1742 buf_push(buf, line_delim);
1743 }
1744 start = pos + 1;
1745 }
1746 if start < data.len() {
1747 let line_len = data.len() - start;
1748 if line_len > skip_bytes {
1749 unsafe {
1750 buf_extend(buf, &data[start + skip_bytes..data.len()]);
1751 }
1752 }
1753 unsafe {
1754 buf_push(buf, line_delim);
1755 }
1756 }
1757}
1758
1759fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1761 let line_delim = cfg.line_delim;
1762 let ranges = cfg.ranges;
1763 let complement = cfg.complement;
1764 let output_delim = cfg.output_delim;
1765
1766 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1768 let max_bytes = ranges[0].end;
1769 if max_bytes < usize::MAX {
1770 return process_bytes_from_start(data, max_bytes, line_delim, out);
1771 }
1772 }
1773
1774 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1776 let skip_bytes = ranges[0].start.saturating_sub(1);
1777 if skip_bytes > 0 {
1778 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1779 }
1780 }
1781
1782 if data.len() >= PARALLEL_THRESHOLD {
1783 let chunks = split_into_chunks(data, line_delim);
1784 let results: Vec<Vec<u8>> = chunks
1785 .par_iter()
1786 .map(|chunk| {
1787 let mut buf = Vec::with_capacity(chunk.len());
1788 process_bytes_chunk(
1789 chunk,
1790 ranges,
1791 complement,
1792 output_delim,
1793 line_delim,
1794 &mut buf,
1795 );
1796 buf
1797 })
1798 .collect();
1799 let slices: Vec<IoSlice> = results
1801 .iter()
1802 .filter(|r| !r.is_empty())
1803 .map(|r| IoSlice::new(r))
1804 .collect();
1805 write_ioslices(out, &slices)?;
1806 } else {
1807 let mut buf = Vec::with_capacity(data.len());
1808 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1809 if !buf.is_empty() {
1810 out.write_all(&buf)?;
1811 }
1812 }
1813 Ok(())
1814}
1815
1816fn process_bytes_chunk(
1818 data: &[u8],
1819 ranges: &[Range],
1820 complement: bool,
1821 output_delim: &[u8],
1822 line_delim: u8,
1823 buf: &mut Vec<u8>,
1824) {
1825 let mut start = 0;
1826 for end_pos in memchr_iter(line_delim, data) {
1827 let line = &data[start..end_pos];
1828 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1829 buf.push(line_delim);
1830 start = end_pos + 1;
1831 }
1832 if start < data.len() {
1833 cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1834 buf.push(line_delim);
1835 }
1836}
1837
1838#[inline(always)]
1841fn cut_bytes_to_buf(
1842 line: &[u8],
1843 ranges: &[Range],
1844 complement: bool,
1845 output_delim: &[u8],
1846 buf: &mut Vec<u8>,
1847) {
1848 let len = line.len();
1849 let mut first_range = true;
1850
1851 buf.reserve(len + output_delim.len() * ranges.len() + 1);
1853
1854 if complement {
1855 let mut pos: usize = 1;
1856 for r in ranges {
1857 let rs = r.start;
1858 let re = r.end.min(len);
1859 if pos < rs {
1860 if !first_range && !output_delim.is_empty() {
1861 unsafe { buf_extend(buf, output_delim) };
1862 }
1863 unsafe { buf_extend(buf, &line[pos - 1..rs - 1]) };
1864 first_range = false;
1865 }
1866 pos = re + 1;
1867 if pos > len {
1868 break;
1869 }
1870 }
1871 if pos <= len {
1872 if !first_range && !output_delim.is_empty() {
1873 unsafe { buf_extend(buf, output_delim) };
1874 }
1875 unsafe { buf_extend(buf, &line[pos - 1..len]) };
1876 }
1877 } else if output_delim.is_empty() && ranges.len() == 1 {
1878 let start = ranges[0].start.saturating_sub(1);
1880 let end = ranges[0].end.min(len);
1881 if start < len {
1882 unsafe { buf_extend(buf, &line[start..end]) };
1883 }
1884 } else {
1885 for r in ranges {
1886 let start = r.start.saturating_sub(1);
1887 let end = r.end.min(len);
1888 if start >= len {
1889 break;
1890 }
1891 if !first_range && !output_delim.is_empty() {
1892 unsafe { buf_extend(buf, output_delim) };
1893 }
1894 unsafe { buf_extend(buf, &line[start..end]) };
1895 first_range = false;
1896 }
1897 }
1898}
1899
1900#[inline]
1904pub fn cut_fields(
1905 line: &[u8],
1906 delim: u8,
1907 ranges: &[Range],
1908 complement: bool,
1909 output_delim: &[u8],
1910 suppress_no_delim: bool,
1911 out: &mut impl Write,
1912) -> io::Result<bool> {
1913 if memchr::memchr(delim, line).is_none() {
1914 if !suppress_no_delim {
1915 out.write_all(line)?;
1916 return Ok(true);
1917 }
1918 return Ok(false);
1919 }
1920
1921 let mut field_num: usize = 1;
1922 let mut field_start: usize = 0;
1923 let mut first_output = true;
1924
1925 for delim_pos in memchr_iter(delim, line) {
1926 let selected = in_ranges(ranges, field_num) != complement;
1927 if selected {
1928 if !first_output {
1929 out.write_all(output_delim)?;
1930 }
1931 out.write_all(&line[field_start..delim_pos])?;
1932 first_output = false;
1933 }
1934 field_start = delim_pos + 1;
1935 field_num += 1;
1936 }
1937
1938 let selected = in_ranges(ranges, field_num) != complement;
1939 if selected {
1940 if !first_output {
1941 out.write_all(output_delim)?;
1942 }
1943 out.write_all(&line[field_start..])?;
1944 }
1945
1946 Ok(true)
1947}
1948
1949#[inline]
1951pub fn cut_bytes(
1952 line: &[u8],
1953 ranges: &[Range],
1954 complement: bool,
1955 output_delim: &[u8],
1956 out: &mut impl Write,
1957) -> io::Result<bool> {
1958 let mut first_range = true;
1959
1960 if complement {
1961 let len = line.len();
1962 let mut comp_ranges = Vec::new();
1963 let mut pos: usize = 1;
1964 for r in ranges {
1965 let rs = r.start;
1966 let re = r.end.min(len);
1967 if pos < rs {
1968 comp_ranges.push((pos, rs - 1));
1969 }
1970 pos = re + 1;
1971 if pos > len {
1972 break;
1973 }
1974 }
1975 if pos <= len {
1976 comp_ranges.push((pos, len));
1977 }
1978 for &(s, e) in &comp_ranges {
1979 if !first_range && !output_delim.is_empty() {
1980 out.write_all(output_delim)?;
1981 }
1982 out.write_all(&line[s - 1..e])?;
1983 first_range = false;
1984 }
1985 } else {
1986 for r in ranges {
1987 let start = r.start.saturating_sub(1);
1988 let end = r.end.min(line.len());
1989 if start >= line.len() {
1990 break;
1991 }
1992 if !first_range && !output_delim.is_empty() {
1993 out.write_all(output_delim)?;
1994 }
1995 out.write_all(&line[start..end])?;
1996 first_range = false;
1997 }
1998 }
1999 Ok(true)
2000}
2001
2002pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2004 match cfg.mode {
2005 CutMode::Fields => process_fields_fast(data, cfg, out),
2006 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
2007 }
2008}
2009
2010pub fn process_cut_reader<R: BufRead>(
2014 mut reader: R,
2015 cfg: &CutConfig,
2016 out: &mut impl Write,
2017) -> io::Result<()> {
2018 const CHUNK_SIZE: usize = 4 * 1024 * 1024; let line_delim = cfg.line_delim;
2020
2021 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
2024
2025 loop {
2026 buf.reserve(CHUNK_SIZE);
2028 let read_start = buf.len();
2029 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
2030 let n = read_fully(&mut reader, &mut buf[read_start..])?;
2031 buf.truncate(read_start + n);
2032
2033 if buf.is_empty() {
2034 break;
2035 }
2036
2037 if n == 0 {
2038 process_cut_data(&buf, cfg, out)?;
2040 break;
2041 }
2042
2043 let process_end = match memchr::memrchr(line_delim, &buf) {
2045 Some(pos) => pos + 1,
2046 None => {
2047 continue;
2049 }
2050 };
2051
2052 process_cut_data(&buf[..process_end], cfg, out)?;
2054
2055 let leftover_len = buf.len() - process_end;
2057 if leftover_len > 0 {
2058 buf.copy_within(process_end.., 0);
2059 }
2060 buf.truncate(leftover_len);
2061 }
2062
2063 Ok(())
2064}
2065
2066#[inline]
2068fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
2069 let n = reader.read(buf)?;
2070 if n == buf.len() || n == 0 {
2071 return Ok(n);
2072 }
2073 let mut total = n;
2075 while total < buf.len() {
2076 match reader.read(&mut buf[total..]) {
2077 Ok(0) => break,
2078 Ok(n) => total += n,
2079 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2080 Err(e) => return Err(e),
2081 }
2082 }
2083 Ok(total)
2084}
2085
2086#[derive(Debug, Clone, Copy, PartialEq)]
2088pub enum CutMode {
2089 Bytes,
2090 Characters,
2091 Fields,
2092}