1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
9
10const MAX_IOV: usize = 1024;
12
13pub struct CutConfig<'a> {
15 pub mode: CutMode,
16 pub ranges: &'a [Range],
17 pub complement: bool,
18 pub delim: u8,
19 pub output_delim: &'a [u8],
20 pub suppress_no_delim: bool,
21 pub line_delim: u8,
22}
23
24#[derive(Debug, Clone)]
26pub struct Range {
27 pub start: usize, pub end: usize, }
30
31pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
34 let mut ranges = Vec::new();
35
36 for part in spec.split(',') {
37 let part = part.trim();
38 if part.is_empty() {
39 continue;
40 }
41
42 if let Some(idx) = part.find('-') {
43 let left = &part[..idx];
44 let right = &part[idx + 1..];
45
46 let start = if left.is_empty() {
47 1
48 } else {
49 left.parse::<usize>()
50 .map_err(|_| format!("invalid range: '{}'", part))?
51 };
52
53 let end = if right.is_empty() {
54 usize::MAX
55 } else {
56 right
57 .parse::<usize>()
58 .map_err(|_| format!("invalid range: '{}'", part))?
59 };
60
61 if start == 0 {
62 return Err("fields and positions are numbered from 1".to_string());
63 }
64 if start > end {
65 return Err(format!("invalid decreasing range: '{}'", part));
66 }
67
68 ranges.push(Range { start, end });
69 } else {
70 let n = part
71 .parse::<usize>()
72 .map_err(|_| format!("invalid field: '{}'", part))?;
73 if n == 0 {
74 return Err("fields and positions are numbered from 1".to_string());
75 }
76 ranges.push(Range { start: n, end: n });
77 }
78 }
79
80 if ranges.is_empty() {
81 return Err("you must specify a list of bytes, characters, or fields".to_string());
82 }
83
84 ranges.sort_by_key(|r| (r.start, r.end));
86 let mut merged = vec![ranges[0].clone()];
87 for r in &ranges[1..] {
88 let last = merged.last_mut().unwrap();
89 if r.start <= last.end.saturating_add(1) {
90 last.end = last.end.max(r.end);
91 } else {
92 merged.push(r.clone());
93 }
94 }
95
96 Ok(merged)
97}
98
99#[inline(always)]
102fn in_ranges(ranges: &[Range], pos: usize) -> bool {
103 for r in ranges {
104 if pos < r.start {
105 return false;
106 }
107 if pos <= r.end {
108 return true;
109 }
110 }
111 false
112}
113
114#[inline]
117fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
118 let mut mask: u64 = 0;
119 for i in 1..=64u32 {
120 let in_range = in_ranges(ranges, i as usize);
121 if in_range != complement {
122 mask |= 1u64 << (i - 1);
123 }
124 }
125 mask
126}
127
128#[inline(always)]
130fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
131 if field_num <= 64 {
132 (mask >> (field_num - 1)) & 1 == 1
133 } else {
134 in_ranges(ranges, field_num) != complement
135 }
136}
137
138#[inline(always)]
143unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
144 unsafe {
145 let len = buf.len();
146 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
147 buf.set_len(len + data.len());
148 }
149}
150
151#[inline(always)]
154unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
155 unsafe {
156 let len = buf.len();
157 *buf.as_mut_ptr().add(len) = b;
158 buf.set_len(len + 1);
159 }
160}
161
162#[inline]
166fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
167 if slices.is_empty() {
168 return Ok(());
169 }
170 for batch in slices.chunks(MAX_IOV) {
171 let total: usize = batch.iter().map(|s| s.len()).sum();
172 let written = out.write_vectored(batch)?;
173 if written >= total {
174 continue;
175 }
176 if written == 0 {
177 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
178 }
179 write_ioslices_slow(out, batch, written)?;
180 }
181 Ok(())
182}
183
184#[cold]
186#[inline(never)]
187fn write_ioslices_slow(
188 out: &mut impl Write,
189 slices: &[IoSlice],
190 mut skip: usize,
191) -> io::Result<()> {
192 for slice in slices {
193 let len = slice.len();
194 if skip >= len {
195 skip -= len;
196 continue;
197 }
198 out.write_all(&slice[skip..])?;
199 skip = 0;
200 }
201 Ok(())
202}
203
204#[inline]
208fn num_cpus() -> usize {
209 std::thread::available_parallelism()
210 .map(|n| n.get())
211 .unwrap_or(1)
212}
213
214fn par_process<'a, F>(chunks: &[&'a [u8]], f: F) -> Vec<Vec<u8>>
218where
219 F: Fn(&'a [u8]) -> Vec<u8> + Sync,
220{
221 if chunks.len() <= 1 {
222 return chunks.iter().map(|c| f(c)).collect();
223 }
224 std::thread::scope(|s| {
225 let f = &f;
226 let handles: Vec<_> = chunks
227 .iter()
228 .map(|&chunk| s.spawn(move || f(chunk)))
229 .collect();
230 handles.into_iter().map(|h| h.join().unwrap()).collect()
231 })
232}
233
234fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
236 let num_threads = num_cpus();
237 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
238 return vec![data];
239 }
240
241 let chunk_size = data.len() / num_threads;
242 let mut chunks = Vec::with_capacity(num_threads);
243 let mut pos = 0;
244
245 for _ in 0..num_threads - 1 {
246 let target = pos + chunk_size;
247 if target >= data.len() {
248 break;
249 }
250 let boundary = memchr::memchr(line_delim, &data[target..])
251 .map(|p| target + p + 1)
252 .unwrap_or(data.len());
253 if boundary > pos {
254 chunks.push(&data[pos..boundary]);
255 }
256 pos = boundary;
257 }
258
259 if pos < data.len() {
260 chunks.push(&data[pos..]);
261 }
262
263 chunks
264}
265
266fn process_fields_multi_select(
273 data: &[u8],
274 delim: u8,
275 line_delim: u8,
276 ranges: &[Range],
277 suppress: bool,
278 out: &mut impl Write,
279) -> io::Result<()> {
280 let max_field = ranges.last().map_or(0, |r| r.end);
281
282 if data.len() >= PARALLEL_THRESHOLD {
283 let chunks = split_into_chunks(data, line_delim);
284 let results = par_process(&chunks, |chunk| {
285 let mut buf = Vec::with_capacity(chunk.len() * 3 / 4);
286 multi_select_chunk(
287 chunk, delim, line_delim, ranges, max_field, suppress, &mut buf,
288 );
289 buf
290 });
291 let slices: Vec<IoSlice> = results
292 .iter()
293 .filter(|r| !r.is_empty())
294 .map(|r| IoSlice::new(r))
295 .collect();
296 write_ioslices(out, &slices)?;
297 } else {
298 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
299 multi_select_chunk(
300 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
301 );
302 if !buf.is_empty() {
303 out.write_all(&buf)?;
304 }
305 }
306 Ok(())
307}
308
309fn multi_select_chunk(
317 data: &[u8],
318 delim: u8,
319 line_delim: u8,
320 ranges: &[Range],
321 max_field: usize,
322 suppress: bool,
323 buf: &mut Vec<u8>,
324) {
325 buf.reserve(data.len());
326 let base = data.as_ptr();
327 let mut start = 0;
328 for end_pos in memchr_iter(line_delim, data) {
329 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
330 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
331 start = end_pos + 1;
332 }
333 if start < data.len() {
334 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
335 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
336 }
337}
338
339#[inline(always)]
344fn multi_select_line(
345 line: &[u8],
346 delim: u8,
347 line_delim: u8,
348 ranges: &[Range],
349 max_field: usize,
350 suppress: bool,
351 buf: &mut Vec<u8>,
352) {
353 let len = line.len();
354 if len == 0 {
355 if !suppress {
356 unsafe { buf_push(buf, line_delim) };
357 }
358 return;
359 }
360
361 let base = line.as_ptr();
363
364 let mut delim_pos = [0usize; 64];
367 let mut num_delims: usize = 0;
368 let max_delims = max_field.min(64);
369
370 for pos in memchr_iter(delim, line) {
371 if num_delims < max_delims {
372 delim_pos[num_delims] = pos;
373 num_delims += 1;
374 if num_delims >= max_delims {
375 break;
376 }
377 }
378 }
379
380 if num_delims == 0 {
381 if !suppress {
382 unsafe {
383 buf_extend(buf, line);
384 buf_push(buf, line_delim);
385 }
386 }
387 return;
388 }
389
390 let total_fields = num_delims + 1;
394 let mut first_output = true;
395
396 for r in ranges {
397 let range_start = r.start;
398 let range_end = r.end.min(total_fields);
399 if range_start > total_fields {
400 break;
401 }
402 for field_num in range_start..=range_end {
403 if field_num > total_fields {
404 break;
405 }
406
407 let field_start = if field_num == 1 {
408 0
409 } else if field_num - 2 < num_delims {
410 delim_pos[field_num - 2] + 1
411 } else {
412 continue;
413 };
414 let field_end = if field_num <= num_delims {
415 delim_pos[field_num - 1]
416 } else {
417 len
418 };
419
420 if !first_output {
421 unsafe { buf_push(buf, delim) };
422 }
423 unsafe {
424 buf_extend(
425 buf,
426 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
427 );
428 }
429 first_output = false;
430 }
431 }
432
433 unsafe { buf_push(buf, line_delim) };
434}
435
436fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
440 let delim = cfg.delim;
441 let line_delim = cfg.line_delim;
442 let ranges = cfg.ranges;
443 let complement = cfg.complement;
444 let output_delim = cfg.output_delim;
445 let suppress = cfg.suppress_no_delim;
446
447 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
455 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
456 }
457
458 if complement
460 && ranges.len() == 1
461 && output_delim.len() == 1
462 && output_delim[0] == delim
463 && ranges[0].start == ranges[0].end
464 {
465 return process_complement_single_field(
466 data,
467 delim,
468 line_delim,
469 ranges[0].start,
470 suppress,
471 out,
472 );
473 }
474
475 if complement
478 && ranges.len() == 1
479 && ranges[0].start > 1
480 && ranges[0].end < usize::MAX
481 && output_delim.len() == 1
482 && output_delim[0] == delim
483 {
484 return process_complement_range(
485 data,
486 delim,
487 line_delim,
488 ranges[0].start,
489 ranges[0].end,
490 suppress,
491 out,
492 );
493 }
494
495 if !complement
497 && ranges.len() == 1
498 && ranges[0].start == 1
499 && output_delim.len() == 1
500 && output_delim[0] == delim
501 && ranges[0].end < usize::MAX
502 {
503 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
504 }
505
506 if !complement
508 && ranges.len() == 1
509 && ranges[0].end == usize::MAX
510 && ranges[0].start > 1
511 && output_delim.len() == 1
512 && output_delim[0] == delim
513 {
514 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
515 }
516
517 if !complement
519 && ranges.len() == 1
520 && ranges[0].start > 1
521 && ranges[0].end < usize::MAX
522 && output_delim.len() == 1
523 && output_delim[0] == delim
524 {
525 return process_fields_mid_range(
526 data,
527 delim,
528 line_delim,
529 ranges[0].start,
530 ranges[0].end,
531 suppress,
532 out,
533 );
534 }
535
536 if !complement
542 && ranges.len() > 1
543 && ranges.last().map_or(false, |r| r.end < usize::MAX)
544 && output_delim.len() == 1
545 && output_delim[0] == delim
546 && delim != line_delim
547 {
548 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
549 }
550
551 let max_field = if complement {
553 usize::MAX
554 } else {
555 ranges.last().map(|r| r.end).unwrap_or(0)
556 };
557 let field_mask = compute_field_mask(ranges, complement);
558
559 if data.len() >= PARALLEL_THRESHOLD {
560 let chunks = split_into_chunks(data, line_delim);
561 let results = par_process(&chunks, |chunk| {
562 let mut buf = Vec::with_capacity(chunk.len());
563 process_fields_chunk(
564 chunk,
565 delim,
566 ranges,
567 output_delim,
568 suppress,
569 max_field,
570 field_mask,
571 line_delim,
572 complement,
573 &mut buf,
574 );
575 buf
576 });
577 let slices: Vec<IoSlice> = results
578 .iter()
579 .filter(|r| !r.is_empty())
580 .map(|r| IoSlice::new(r))
581 .collect();
582 write_ioslices(out, &slices)?;
583 } else {
584 let mut buf = Vec::with_capacity(data.len());
585 process_fields_chunk(
586 data,
587 delim,
588 ranges,
589 output_delim,
590 suppress,
591 max_field,
592 field_mask,
593 line_delim,
594 complement,
595 &mut buf,
596 );
597 if !buf.is_empty() {
598 out.write_all(&buf)?;
599 }
600 }
601 Ok(())
602}
603
604fn process_fields_chunk(
609 data: &[u8],
610 delim: u8,
611 ranges: &[Range],
612 output_delim: &[u8],
613 suppress: bool,
614 max_field: usize,
615 field_mask: u64,
616 line_delim: u8,
617 complement: bool,
618 buf: &mut Vec<u8>,
619) {
620 if delim != line_delim && max_field < usize::MAX && !complement {
627 buf.reserve(data.len());
628 let mut start = 0;
629 for end_pos in memchr_iter(line_delim, data) {
630 let line = &data[start..end_pos];
631 extract_fields_to_buf(
632 line,
633 delim,
634 ranges,
635 output_delim,
636 suppress,
637 max_field,
638 field_mask,
639 line_delim,
640 buf,
641 complement,
642 );
643 start = end_pos + 1;
644 }
645 if start < data.len() {
646 extract_fields_to_buf(
647 &data[start..],
648 delim,
649 ranges,
650 output_delim,
651 suppress,
652 max_field,
653 field_mask,
654 line_delim,
655 buf,
656 complement,
657 );
658 }
659 return;
660 }
661
662 if delim != line_delim {
666 buf.reserve(data.len());
667
668 let data_len = data.len();
669 let base = data.as_ptr();
670 let mut line_start: usize = 0;
671 let mut field_start: usize = 0;
672 let mut field_num: usize = 1;
673 let mut first_output = true;
674 let mut has_delim = false;
675
676 for pos in memchr::memchr2_iter(delim, line_delim, data) {
677 let byte = unsafe { *base.add(pos) };
678
679 if byte == line_delim {
680 if (field_num <= max_field || complement)
682 && has_delim
683 && is_selected(field_num, field_mask, ranges, complement)
684 {
685 if !first_output {
686 unsafe { buf_extend(buf, output_delim) };
687 }
688 unsafe {
689 buf_extend(
690 buf,
691 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
692 )
693 };
694 first_output = false;
695 }
696
697 if !first_output {
698 unsafe { buf_push(buf, line_delim) };
699 } else if !has_delim {
700 if !suppress {
701 unsafe {
702 buf_extend(
703 buf,
704 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
705 );
706 buf_push(buf, line_delim);
707 }
708 }
709 } else {
710 unsafe { buf_push(buf, line_delim) };
711 }
712
713 line_start = pos + 1;
715 field_start = pos + 1;
716 field_num = 1;
717 first_output = true;
718 has_delim = false;
719 } else {
720 has_delim = true;
722
723 if is_selected(field_num, field_mask, ranges, complement) {
724 if !first_output {
725 unsafe { buf_extend(buf, output_delim) };
726 }
727 unsafe {
728 buf_extend(
729 buf,
730 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
731 )
732 };
733 first_output = false;
734 }
735
736 field_num += 1;
737 field_start = pos + 1;
738 }
739 }
740
741 if line_start < data_len {
743 if line_start < data_len {
744 if (field_num <= max_field || complement)
745 && has_delim
746 && is_selected(field_num, field_mask, ranges, complement)
747 {
748 if !first_output {
749 unsafe { buf_extend(buf, output_delim) };
750 }
751 unsafe {
752 buf_extend(
753 buf,
754 std::slice::from_raw_parts(
755 base.add(field_start),
756 data_len - field_start,
757 ),
758 )
759 };
760 first_output = false;
761 }
762
763 if !first_output {
764 unsafe { buf_push(buf, line_delim) };
765 } else if !has_delim {
766 if !suppress {
767 unsafe {
768 buf_extend(
769 buf,
770 std::slice::from_raw_parts(
771 base.add(line_start),
772 data_len - line_start,
773 ),
774 );
775 buf_push(buf, line_delim);
776 }
777 }
778 } else {
779 unsafe { buf_push(buf, line_delim) };
780 }
781 }
782 }
783
784 return;
785 }
786
787 let mut start = 0;
789 for end_pos in memchr_iter(line_delim, data) {
790 let line = &data[start..end_pos];
791 extract_fields_to_buf(
792 line,
793 delim,
794 ranges,
795 output_delim,
796 suppress,
797 max_field,
798 field_mask,
799 line_delim,
800 buf,
801 complement,
802 );
803 start = end_pos + 1;
804 }
805 if start < data.len() {
806 extract_fields_to_buf(
807 &data[start..],
808 delim,
809 ranges,
810 output_delim,
811 suppress,
812 max_field,
813 field_mask,
814 line_delim,
815 buf,
816 complement,
817 );
818 }
819}
820
821fn process_single_field(
827 data: &[u8],
828 delim: u8,
829 line_delim: u8,
830 target: usize,
831 suppress: bool,
832 out: &mut impl Write,
833) -> io::Result<()> {
834 let target_idx = target - 1;
835
836 const FIELD_PARALLEL_MIN: usize = 2 * 1024 * 1024;
839
840 if delim != line_delim {
841 if target_idx == 0 && !suppress {
845 if data.len() >= FIELD_PARALLEL_MIN {
846 return single_field1_parallel(data, delim, line_delim, out);
847 }
848 let mut buf = Vec::with_capacity(data.len());
853 single_field1_to_buf(data, delim, line_delim, &mut buf);
854 if !buf.is_empty() {
855 out.write_all(&buf)?;
856 }
857 return Ok(());
858 }
859
860 if data.len() >= FIELD_PARALLEL_MIN {
864 let chunks = split_into_chunks(data, line_delim);
865 let results = par_process(&chunks, |chunk| {
866 let mut buf = Vec::with_capacity(chunk.len() / 2);
867 process_single_field_chunk(
868 chunk, delim, target_idx, line_delim, suppress, &mut buf,
869 );
870 buf
871 });
872 let slices: Vec<IoSlice> = results
873 .iter()
874 .filter(|r| !r.is_empty())
875 .map(|r| IoSlice::new(r))
876 .collect();
877 write_ioslices(out, &slices)?;
878 } else {
879 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
880 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
881 if !buf.is_empty() {
882 out.write_all(&buf)?;
883 }
884 }
885 return Ok(());
886 }
887
888 if data.len() >= FIELD_PARALLEL_MIN {
890 let chunks = split_into_chunks(data, line_delim);
891 let results = par_process(&chunks, |chunk| {
892 let mut buf = Vec::with_capacity(chunk.len() / 4);
893 process_single_field_chunk(chunk, delim, target_idx, line_delim, suppress, &mut buf);
894 buf
895 });
896 let slices: Vec<IoSlice> = results
897 .iter()
898 .filter(|r| !r.is_empty())
899 .map(|r| IoSlice::new(r))
900 .collect();
901 write_ioslices(out, &slices)?;
902 } else {
903 let mut buf = Vec::with_capacity(data.len() / 4);
904 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
905 if !buf.is_empty() {
906 out.write_all(&buf)?;
907 }
908 }
909 Ok(())
910}
911
912fn process_complement_range(
915 data: &[u8],
916 delim: u8,
917 line_delim: u8,
918 skip_start: usize,
919 skip_end: usize,
920 suppress: bool,
921 out: &mut impl Write,
922) -> io::Result<()> {
923 if data.len() >= PARALLEL_THRESHOLD {
924 let chunks = split_into_chunks(data, line_delim);
925 let results = par_process(&chunks, |chunk| {
926 let mut buf = Vec::with_capacity(chunk.len());
927 complement_range_chunk(
928 chunk, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
929 );
930 buf
931 });
932 let slices: Vec<IoSlice> = results
933 .iter()
934 .filter(|r| !r.is_empty())
935 .map(|r| IoSlice::new(r))
936 .collect();
937 write_ioslices(out, &slices)?;
938 } else {
939 let mut buf = Vec::with_capacity(data.len());
940 complement_range_chunk(
941 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
942 );
943 if !buf.is_empty() {
944 out.write_all(&buf)?;
945 }
946 }
947 Ok(())
948}
949
950fn complement_range_chunk(
952 data: &[u8],
953 delim: u8,
954 skip_start: usize,
955 skip_end: usize,
956 line_delim: u8,
957 suppress: bool,
958 buf: &mut Vec<u8>,
959) {
960 buf.reserve(data.len());
962 let mut start = 0;
963 for end_pos in memchr_iter(line_delim, data) {
964 let line = &data[start..end_pos];
965 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
966 start = end_pos + 1;
967 }
968 if start < data.len() {
969 complement_range_line(
970 &data[start..],
971 delim,
972 skip_start,
973 skip_end,
974 line_delim,
975 suppress,
976 buf,
977 );
978 }
979}
980
981#[inline(always)]
988fn complement_range_line(
989 line: &[u8],
990 delim: u8,
991 skip_start: usize,
992 skip_end: usize,
993 line_delim: u8,
994 suppress: bool,
995 buf: &mut Vec<u8>,
996) {
997 let len = line.len();
998 if len == 0 {
999 if !suppress {
1000 unsafe { buf_push(buf, line_delim) };
1001 }
1002 return;
1003 }
1004
1005 let base = line.as_ptr();
1007
1008 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1018
1019 let mut delim_count: usize = 0;
1021 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1025 delim_count += 1;
1026 if delim_count == need_prefix_delims {
1027 prefix_end_pos = pos;
1028 }
1029 if delim_count == total_need {
1030 suffix_start_pos = pos + 1;
1031 break;
1032 }
1033 }
1034
1035 if delim_count == 0 {
1036 if !suppress {
1038 unsafe {
1039 buf_extend(buf, line);
1040 buf_push(buf, line_delim);
1041 }
1042 }
1043 return;
1044 }
1045
1046 if delim_count < need_prefix_delims {
1052 unsafe {
1054 buf_extend(buf, line);
1055 buf_push(buf, line_delim);
1056 }
1057 return;
1058 }
1059
1060 let has_prefix = need_prefix_delims > 0;
1061 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1062
1063 if has_prefix && has_suffix {
1064 unsafe {
1066 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1067 buf_push(buf, delim);
1068 buf_extend(
1069 buf,
1070 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1071 );
1072 buf_push(buf, line_delim);
1073 }
1074 } else if has_prefix {
1075 unsafe {
1077 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1078 buf_push(buf, line_delim);
1079 }
1080 } else if has_suffix {
1081 unsafe {
1083 buf_extend(
1084 buf,
1085 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1086 );
1087 buf_push(buf, line_delim);
1088 }
1089 } else {
1090 unsafe { buf_push(buf, line_delim) };
1092 }
1093}
1094
1095fn process_complement_single_field(
1097 data: &[u8],
1098 delim: u8,
1099 line_delim: u8,
1100 skip_field: usize,
1101 suppress: bool,
1102 out: &mut impl Write,
1103) -> io::Result<()> {
1104 let skip_idx = skip_field - 1;
1105
1106 if data.len() >= PARALLEL_THRESHOLD {
1107 let chunks = split_into_chunks(data, line_delim);
1108 let results = par_process(&chunks, |chunk| {
1109 let mut buf = Vec::with_capacity(chunk.len());
1110 complement_single_field_chunk(chunk, delim, skip_idx, line_delim, suppress, &mut buf);
1111 buf
1112 });
1113 let slices: Vec<IoSlice> = results
1114 .iter()
1115 .filter(|r| !r.is_empty())
1116 .map(|r| IoSlice::new(r))
1117 .collect();
1118 write_ioslices(out, &slices)?;
1119 } else {
1120 let mut buf = Vec::with_capacity(data.len());
1121 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1122 if !buf.is_empty() {
1123 out.write_all(&buf)?;
1124 }
1125 }
1126 Ok(())
1127}
1128
1129fn complement_single_field_chunk(
1135 data: &[u8],
1136 delim: u8,
1137 skip_idx: usize,
1138 line_delim: u8,
1139 suppress: bool,
1140 buf: &mut Vec<u8>,
1141) {
1142 if delim == line_delim {
1144 buf.reserve(data.len());
1145 let mut start = 0;
1146 for end_pos in memchr_iter(line_delim, data) {
1147 let line = &data[start..end_pos];
1148 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1149 start = end_pos + 1;
1150 }
1151 if start < data.len() {
1152 complement_single_field_line(
1153 &data[start..],
1154 delim,
1155 skip_idx,
1156 line_delim,
1157 suppress,
1158 buf,
1159 );
1160 }
1161 return;
1162 }
1163
1164 buf.reserve(data.len());
1165 let base = data.as_ptr();
1166 let data_len = data.len();
1167 let need_before = skip_idx; let need_total = skip_idx + 1; let mut line_start: usize = 0;
1172 let mut delim_count: usize = 0;
1173 let mut skip_start_pos: usize = 0;
1174 let mut skip_end_pos: usize = 0;
1175 let mut found_start = need_before == 0; let mut found_end = false;
1177
1178 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1179 let byte = unsafe { *base.add(pos) };
1180
1181 if byte == line_delim {
1182 if delim_count == 0 {
1184 if !suppress {
1186 unsafe {
1187 buf_extend(
1188 buf,
1189 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1190 );
1191 buf_push(buf, line_delim);
1192 }
1193 }
1194 } else if !found_start || delim_count < need_before {
1195 unsafe {
1197 buf_extend(
1198 buf,
1199 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1200 );
1201 buf_push(buf, line_delim);
1202 }
1203 } else {
1204 let has_prefix = skip_idx > 0;
1205 let has_suffix = found_end && skip_end_pos < pos;
1206
1207 if has_prefix && has_suffix {
1208 unsafe {
1209 buf_extend(
1210 buf,
1211 std::slice::from_raw_parts(
1212 base.add(line_start),
1213 skip_start_pos - 1 - line_start,
1214 ),
1215 );
1216 buf_push(buf, delim);
1217 buf_extend(
1218 buf,
1219 std::slice::from_raw_parts(
1220 base.add(skip_end_pos + 1),
1221 pos - skip_end_pos - 1,
1222 ),
1223 );
1224 buf_push(buf, line_delim);
1225 }
1226 } else if has_prefix {
1227 unsafe {
1228 buf_extend(
1229 buf,
1230 std::slice::from_raw_parts(
1231 base.add(line_start),
1232 skip_start_pos - 1 - line_start,
1233 ),
1234 );
1235 buf_push(buf, line_delim);
1236 }
1237 } else if has_suffix {
1238 unsafe {
1239 buf_extend(
1240 buf,
1241 std::slice::from_raw_parts(
1242 base.add(skip_end_pos + 1),
1243 pos - skip_end_pos - 1,
1244 ),
1245 );
1246 buf_push(buf, line_delim);
1247 }
1248 } else {
1249 unsafe { buf_push(buf, line_delim) };
1250 }
1251 }
1252
1253 line_start = pos + 1;
1255 delim_count = 0;
1256 skip_start_pos = 0;
1257 skip_end_pos = 0;
1258 found_start = need_before == 0;
1259 found_end = false;
1260 } else {
1261 delim_count += 1;
1263 if delim_count == need_before {
1264 skip_start_pos = pos + 1;
1265 found_start = true;
1266 }
1267 if delim_count == need_total {
1268 skip_end_pos = pos;
1269 found_end = true;
1270 }
1271 }
1272 }
1273
1274 if line_start < data_len {
1276 let pos = data_len;
1277 if delim_count == 0 {
1278 if !suppress {
1279 unsafe {
1280 buf_extend(
1281 buf,
1282 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1283 );
1284 buf_push(buf, line_delim);
1285 }
1286 }
1287 } else if !found_start || delim_count < need_before {
1288 unsafe {
1289 buf_extend(
1290 buf,
1291 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1292 );
1293 buf_push(buf, line_delim);
1294 }
1295 } else {
1296 let has_prefix = skip_idx > 0;
1297 let has_suffix = found_end && skip_end_pos < pos;
1298
1299 if has_prefix && has_suffix {
1300 unsafe {
1301 buf_extend(
1302 buf,
1303 std::slice::from_raw_parts(
1304 base.add(line_start),
1305 skip_start_pos - 1 - line_start,
1306 ),
1307 );
1308 buf_push(buf, delim);
1309 buf_extend(
1310 buf,
1311 std::slice::from_raw_parts(
1312 base.add(skip_end_pos + 1),
1313 pos - skip_end_pos - 1,
1314 ),
1315 );
1316 buf_push(buf, line_delim);
1317 }
1318 } else if has_prefix {
1319 unsafe {
1320 buf_extend(
1321 buf,
1322 std::slice::from_raw_parts(
1323 base.add(line_start),
1324 skip_start_pos - 1 - line_start,
1325 ),
1326 );
1327 buf_push(buf, line_delim);
1328 }
1329 } else if has_suffix {
1330 unsafe {
1331 buf_extend(
1332 buf,
1333 std::slice::from_raw_parts(
1334 base.add(skip_end_pos + 1),
1335 pos - skip_end_pos - 1,
1336 ),
1337 );
1338 buf_push(buf, line_delim);
1339 }
1340 } else {
1341 unsafe { buf_push(buf, line_delim) };
1342 }
1343 }
1344 }
1345}
1346
1347#[inline(always)]
1349fn complement_single_field_line(
1350 line: &[u8],
1351 delim: u8,
1352 skip_idx: usize,
1353 line_delim: u8,
1354 suppress: bool,
1355 buf: &mut Vec<u8>,
1356) {
1357 let len = line.len();
1358 if len == 0 {
1359 if !suppress {
1360 unsafe { buf_push(buf, line_delim) };
1361 }
1362 return;
1363 }
1364
1365 let base = line.as_ptr();
1366 let need_before = skip_idx;
1367 let need_total = skip_idx + 1;
1368
1369 let mut delim_count: usize = 0;
1370 let mut skip_start_pos: usize = 0;
1371 let mut skip_end_pos: usize = len;
1372 let mut found_end = false;
1373
1374 for pos in memchr_iter(delim, line) {
1375 delim_count += 1;
1376 if delim_count == need_before {
1377 skip_start_pos = pos + 1;
1378 }
1379 if delim_count == need_total {
1380 skip_end_pos = pos;
1381 found_end = true;
1382 break;
1383 }
1384 }
1385
1386 if delim_count == 0 {
1387 if !suppress {
1388 unsafe {
1389 buf_extend(buf, line);
1390 buf_push(buf, line_delim);
1391 }
1392 }
1393 return;
1394 }
1395
1396 if delim_count < need_before {
1397 unsafe {
1398 buf_extend(buf, line);
1399 buf_push(buf, line_delim);
1400 }
1401 return;
1402 }
1403
1404 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1405 let has_suffix = found_end && skip_end_pos < len;
1406
1407 if has_prefix && has_suffix {
1408 unsafe {
1409 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1410 buf_push(buf, delim);
1411 buf_extend(
1412 buf,
1413 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1414 );
1415 buf_push(buf, line_delim);
1416 }
1417 } else if has_prefix {
1418 unsafe {
1419 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1420 buf_push(buf, line_delim);
1421 }
1422 } else if has_suffix {
1423 unsafe {
1424 buf_extend(
1425 buf,
1426 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1427 );
1428 buf_push(buf, line_delim);
1429 }
1430 } else {
1431 unsafe { buf_push(buf, line_delim) };
1432 }
1433}
1434
1435fn process_fields_prefix(
1439 data: &[u8],
1440 delim: u8,
1441 line_delim: u8,
1442 last_field: usize,
1443 suppress: bool,
1444 out: &mut impl Write,
1445) -> io::Result<()> {
1446 if data.len() >= PARALLEL_THRESHOLD {
1447 let chunks = split_into_chunks(data, line_delim);
1448 let results = par_process(&chunks, |chunk| {
1449 let mut buf = Vec::with_capacity(chunk.len());
1450 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
1451 buf
1452 });
1453 let slices: Vec<IoSlice> = results
1454 .iter()
1455 .filter(|r| !r.is_empty())
1456 .map(|r| IoSlice::new(r))
1457 .collect();
1458 write_ioslices(out, &slices)?;
1459 } else if !suppress {
1460 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1464 } else {
1465 let mut buf = Vec::with_capacity(data.len());
1466 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1467 if !buf.is_empty() {
1468 out.write_all(&buf)?;
1469 }
1470 }
1471 Ok(())
1472}
1473
1474#[inline]
1480fn fields_prefix_zerocopy(
1481 data: &[u8],
1482 delim: u8,
1483 line_delim: u8,
1484 last_field: usize,
1485 out: &mut impl Write,
1486) -> io::Result<()> {
1487 let newline_buf: [u8; 1] = [line_delim];
1488 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1489 let mut start = 0;
1490 let mut run_start: usize = 0;
1491
1492 for end_pos in memchr_iter(line_delim, data) {
1493 let line = &data[start..end_pos];
1494 let mut field_count = 1;
1495 let mut truncate_at: Option<usize> = None;
1496 for dpos in memchr_iter(delim, line) {
1497 if field_count >= last_field {
1498 truncate_at = Some(start + dpos);
1499 break;
1500 }
1501 field_count += 1;
1502 }
1503
1504 if let Some(trunc_pos) = truncate_at {
1505 if run_start < start {
1506 iov.push(IoSlice::new(&data[run_start..start]));
1507 }
1508 iov.push(IoSlice::new(&data[start..trunc_pos]));
1509 iov.push(IoSlice::new(&newline_buf));
1510 run_start = end_pos + 1;
1511
1512 if iov.len() >= MAX_IOV - 2 {
1513 write_ioslices(out, &iov)?;
1514 iov.clear();
1515 }
1516 }
1517 start = end_pos + 1;
1518 }
1519 if start < data.len() {
1521 let line = &data[start..];
1522 let mut field_count = 1;
1523 let mut truncate_at: Option<usize> = None;
1524 for dpos in memchr_iter(delim, line) {
1525 if field_count >= last_field {
1526 truncate_at = Some(start + dpos);
1527 break;
1528 }
1529 field_count += 1;
1530 }
1531 if let Some(trunc_pos) = truncate_at {
1532 if run_start < start {
1533 iov.push(IoSlice::new(&data[run_start..start]));
1534 }
1535 iov.push(IoSlice::new(&data[start..trunc_pos]));
1536 iov.push(IoSlice::new(&newline_buf));
1537 if !iov.is_empty() {
1538 write_ioslices(out, &iov)?;
1539 }
1540 return Ok(());
1541 }
1542 }
1543 if run_start < data.len() {
1545 iov.push(IoSlice::new(&data[run_start..]));
1546 if !data.is_empty() && *data.last().unwrap() != line_delim {
1547 iov.push(IoSlice::new(&newline_buf));
1548 }
1549 }
1550 if !iov.is_empty() {
1551 write_ioslices(out, &iov)?;
1552 }
1553 Ok(())
1554}
1555
1556fn fields_prefix_chunk(
1558 data: &[u8],
1559 delim: u8,
1560 line_delim: u8,
1561 last_field: usize,
1562 suppress: bool,
1563 buf: &mut Vec<u8>,
1564) {
1565 buf.reserve(data.len());
1566 let mut start = 0;
1567 for end_pos in memchr_iter(line_delim, data) {
1568 let line = &data[start..end_pos];
1569 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1570 start = end_pos + 1;
1571 }
1572 if start < data.len() {
1573 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1574 }
1575}
1576
1577#[inline(always)]
1580fn fields_prefix_line(
1581 line: &[u8],
1582 delim: u8,
1583 line_delim: u8,
1584 last_field: usize,
1585 suppress: bool,
1586 buf: &mut Vec<u8>,
1587) {
1588 let len = line.len();
1589 if len == 0 {
1590 if !suppress {
1591 unsafe { buf_push(buf, line_delim) };
1592 }
1593 return;
1594 }
1595
1596 let base = line.as_ptr();
1598
1599 let mut field_count = 1usize;
1600 let mut has_delim = false;
1601
1602 for pos in memchr_iter(delim, line) {
1603 has_delim = true;
1604 if field_count >= last_field {
1605 unsafe {
1606 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1607 buf_push(buf, line_delim);
1608 }
1609 return;
1610 }
1611 field_count += 1;
1612 }
1613
1614 if !has_delim {
1615 if !suppress {
1616 unsafe {
1617 buf_extend(buf, line);
1618 buf_push(buf, line_delim);
1619 }
1620 }
1621 return;
1622 }
1623
1624 unsafe {
1625 buf_extend(buf, line);
1626 buf_push(buf, line_delim);
1627 }
1628}
1629
1630fn process_fields_suffix(
1632 data: &[u8],
1633 delim: u8,
1634 line_delim: u8,
1635 start_field: usize,
1636 suppress: bool,
1637 out: &mut impl Write,
1638) -> io::Result<()> {
1639 if data.len() >= PARALLEL_THRESHOLD {
1640 let chunks = split_into_chunks(data, line_delim);
1641 let results = par_process(&chunks, |chunk| {
1642 let mut buf = Vec::with_capacity(chunk.len());
1643 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
1644 buf
1645 });
1646 let slices: Vec<IoSlice> = results
1647 .iter()
1648 .filter(|r| !r.is_empty())
1649 .map(|r| IoSlice::new(r))
1650 .collect();
1651 write_ioslices(out, &slices)?;
1652 } else {
1653 let mut buf = Vec::with_capacity(data.len());
1654 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1655 if !buf.is_empty() {
1656 out.write_all(&buf)?;
1657 }
1658 }
1659 Ok(())
1660}
1661
1662fn fields_suffix_chunk(
1664 data: &[u8],
1665 delim: u8,
1666 line_delim: u8,
1667 start_field: usize,
1668 suppress: bool,
1669 buf: &mut Vec<u8>,
1670) {
1671 buf.reserve(data.len());
1672 let mut start = 0;
1673 for end_pos in memchr_iter(line_delim, data) {
1674 let line = &data[start..end_pos];
1675 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1676 start = end_pos + 1;
1677 }
1678 if start < data.len() {
1679 fields_suffix_line(
1680 &data[start..],
1681 delim,
1682 line_delim,
1683 start_field,
1684 suppress,
1685 buf,
1686 );
1687 }
1688}
1689
1690#[inline(always)]
1693fn fields_suffix_line(
1694 line: &[u8],
1695 delim: u8,
1696 line_delim: u8,
1697 start_field: usize,
1698 suppress: bool,
1699 buf: &mut Vec<u8>,
1700) {
1701 let len = line.len();
1702 if len == 0 {
1703 if !suppress {
1704 unsafe { buf_push(buf, line_delim) };
1705 }
1706 return;
1707 }
1708
1709 let base = line.as_ptr();
1711
1712 let skip_delims = start_field - 1;
1713 let mut delim_count = 0usize;
1714 let mut has_delim = false;
1715
1716 for pos in memchr_iter(delim, line) {
1717 has_delim = true;
1718 delim_count += 1;
1719 if delim_count >= skip_delims {
1720 unsafe {
1721 buf_extend(
1722 buf,
1723 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1724 );
1725 buf_push(buf, line_delim);
1726 }
1727 return;
1728 }
1729 }
1730
1731 if !has_delim {
1732 if !suppress {
1733 unsafe {
1734 buf_extend(buf, line);
1735 buf_push(buf, line_delim);
1736 }
1737 }
1738 return;
1739 }
1740
1741 unsafe { buf_push(buf, line_delim) };
1743}
1744
1745fn process_fields_mid_range(
1748 data: &[u8],
1749 delim: u8,
1750 line_delim: u8,
1751 start_field: usize,
1752 end_field: usize,
1753 suppress: bool,
1754 out: &mut impl Write,
1755) -> io::Result<()> {
1756 if data.len() >= PARALLEL_THRESHOLD {
1757 let chunks = split_into_chunks(data, line_delim);
1758 let results = par_process(&chunks, |chunk| {
1759 let mut buf = Vec::with_capacity(chunk.len());
1760 fields_mid_range_chunk(
1761 chunk,
1762 delim,
1763 line_delim,
1764 start_field,
1765 end_field,
1766 suppress,
1767 &mut buf,
1768 );
1769 buf
1770 });
1771 let slices: Vec<IoSlice> = results
1772 .iter()
1773 .filter(|r| !r.is_empty())
1774 .map(|r| IoSlice::new(r))
1775 .collect();
1776 write_ioslices(out, &slices)?;
1777 } else {
1778 let mut buf = Vec::with_capacity(data.len());
1779 fields_mid_range_chunk(
1780 data,
1781 delim,
1782 line_delim,
1783 start_field,
1784 end_field,
1785 suppress,
1786 &mut buf,
1787 );
1788 if !buf.is_empty() {
1789 out.write_all(&buf)?;
1790 }
1791 }
1792 Ok(())
1793}
1794
1795fn fields_mid_range_chunk(
1797 data: &[u8],
1798 delim: u8,
1799 line_delim: u8,
1800 start_field: usize,
1801 end_field: usize,
1802 suppress: bool,
1803 buf: &mut Vec<u8>,
1804) {
1805 buf.reserve(data.len());
1806 let mut start = 0;
1807 for end_pos in memchr_iter(line_delim, data) {
1808 let line = &data[start..end_pos];
1809 fields_mid_range_line(
1810 line,
1811 delim,
1812 line_delim,
1813 start_field,
1814 end_field,
1815 suppress,
1816 buf,
1817 );
1818 start = end_pos + 1;
1819 }
1820 if start < data.len() {
1821 fields_mid_range_line(
1822 &data[start..],
1823 delim,
1824 line_delim,
1825 start_field,
1826 end_field,
1827 suppress,
1828 buf,
1829 );
1830 }
1831}
1832
1833#[inline(always)]
1837fn fields_mid_range_line(
1838 line: &[u8],
1839 delim: u8,
1840 line_delim: u8,
1841 start_field: usize,
1842 end_field: usize,
1843 suppress: bool,
1844 buf: &mut Vec<u8>,
1845) {
1846 let len = line.len();
1847 if len == 0 {
1848 if !suppress {
1849 unsafe { buf_push(buf, line_delim) };
1850 }
1851 return;
1852 }
1853
1854 let base = line.as_ptr();
1856
1857 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
1861 let mut delim_count = 0;
1862 let mut range_start = 0;
1863 let mut has_delim = false;
1864
1865 for pos in memchr_iter(delim, line) {
1866 has_delim = true;
1867 delim_count += 1;
1868 if delim_count == skip_before {
1869 range_start = pos + 1;
1870 }
1871 if delim_count == target_end_delim {
1872 if skip_before == 0 {
1873 range_start = 0;
1874 }
1875 unsafe {
1876 buf_extend(
1877 buf,
1878 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1879 );
1880 buf_push(buf, line_delim);
1881 }
1882 return;
1883 }
1884 }
1885
1886 if !has_delim {
1887 if !suppress {
1888 unsafe {
1889 buf_extend(buf, line);
1890 buf_push(buf, line_delim);
1891 }
1892 }
1893 return;
1894 }
1895
1896 if delim_count >= skip_before {
1898 if skip_before == 0 {
1900 range_start = 0;
1901 }
1902 unsafe {
1903 buf_extend(
1904 buf,
1905 std::slice::from_raw_parts(base.add(range_start), len - range_start),
1906 );
1907 buf_push(buf, line_delim);
1908 }
1909 } else {
1910 unsafe { buf_push(buf, line_delim) };
1912 }
1913}
1914
1915fn single_field1_parallel(
1926 data: &[u8],
1927 delim: u8,
1928 line_delim: u8,
1929 out: &mut impl Write,
1930) -> io::Result<()> {
1931 let chunks = split_into_chunks(data, line_delim);
1932 let results = par_process(&chunks, |chunk| {
1933 let mut buf = Vec::with_capacity(chunk.len());
1934 single_field1_to_buf(chunk, delim, line_delim, &mut buf);
1935 buf
1936 });
1937 let slices: Vec<IoSlice> = results
1938 .iter()
1939 .filter(|r| !r.is_empty())
1940 .map(|r| IoSlice::new(r))
1941 .collect();
1942 write_ioslices(out, &slices)
1943}
1944
1945#[inline]
1951fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
1952 use memchr::memchr2;
1953 buf.reserve(data.len());
1954 let mut pos = 0;
1955 while pos < data.len() {
1956 match memchr2(delim, line_delim, &data[pos..]) {
1957 None => {
1958 unsafe {
1960 buf_extend(buf, &data[pos..]);
1961 }
1962 break;
1963 }
1964 Some(offset) => {
1965 let actual = pos + offset;
1966 if data[actual] == line_delim {
1967 unsafe {
1969 buf_extend(buf, &data[pos..actual + 1]);
1970 }
1971 pos = actual + 1;
1972 } else {
1973 unsafe {
1975 buf_extend(buf, &data[pos..actual]);
1976 buf_push(buf, line_delim);
1977 }
1978 match memchr::memchr(line_delim, &data[actual + 1..]) {
1980 None => {
1981 pos = data.len();
1982 }
1983 Some(nl_off) => {
1984 pos = actual + 1 + nl_off + 1;
1985 }
1986 }
1987 }
1988 }
1989 }
1990 }
1991}
1992
1993#[inline]
2002#[allow(dead_code)]
2003fn single_field1_zerocopy(
2004 data: &[u8],
2005 delim: u8,
2006 line_delim: u8,
2007 out: &mut impl Write,
2008) -> io::Result<()> {
2009 let newline_buf: [u8; 1] = [line_delim];
2010
2011 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2012 let mut run_start: usize = 0;
2013 let mut start = 0;
2014
2015 for end_pos in memchr_iter(line_delim, data) {
2016 let line = &data[start..end_pos];
2017 if let Some(dp) = memchr::memchr(delim, line) {
2018 if run_start < start {
2021 iov.push(IoSlice::new(&data[run_start..start]));
2022 }
2023 iov.push(IoSlice::new(&data[start..start + dp]));
2024 iov.push(IoSlice::new(&newline_buf));
2025 run_start = end_pos + 1;
2026
2027 if iov.len() >= MAX_IOV - 2 {
2028 write_ioslices(out, &iov)?;
2029 iov.clear();
2030 }
2031 }
2032 start = end_pos + 1;
2034 }
2035
2036 if start < data.len() {
2038 let line = &data[start..];
2039 if let Some(dp) = memchr::memchr(delim, line) {
2040 if run_start < start {
2041 iov.push(IoSlice::new(&data[run_start..start]));
2042 }
2043 iov.push(IoSlice::new(&data[start..start + dp]));
2044 iov.push(IoSlice::new(&newline_buf));
2045 if !iov.is_empty() {
2046 write_ioslices(out, &iov)?;
2047 }
2048 return Ok(());
2049 }
2050 }
2051
2052 if run_start < data.len() {
2054 iov.push(IoSlice::new(&data[run_start..]));
2055 if !data.is_empty() && *data.last().unwrap() != line_delim {
2056 iov.push(IoSlice::new(&newline_buf));
2057 }
2058 }
2059 if !iov.is_empty() {
2060 write_ioslices(out, &iov)?;
2061 }
2062 Ok(())
2063}
2064
2065fn process_single_field_chunk(
2067 data: &[u8],
2068 delim: u8,
2069 target_idx: usize,
2070 line_delim: u8,
2071 suppress: bool,
2072 buf: &mut Vec<u8>,
2073) {
2074 buf.reserve(data.len());
2076 let mut start = 0;
2077 for end_pos in memchr_iter(line_delim, data) {
2078 let line = &data[start..end_pos];
2079 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2080 start = end_pos + 1;
2081 }
2082 if start < data.len() {
2083 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2084 }
2085}
2086
2087#[inline(always)]
2092fn extract_single_field_line(
2093 line: &[u8],
2094 delim: u8,
2095 target_idx: usize,
2096 line_delim: u8,
2097 suppress: bool,
2098 buf: &mut Vec<u8>,
2099) {
2100 let len = line.len();
2101 if len == 0 {
2102 if !suppress {
2103 unsafe { buf_push(buf, line_delim) };
2104 }
2105 return;
2106 }
2107
2108 let base = line.as_ptr();
2110
2111 if target_idx == 0 {
2113 match memchr::memchr(delim, line) {
2114 Some(pos) => unsafe {
2115 buf_extend(buf, std::slice::from_raw_parts(base, pos));
2116 buf_push(buf, line_delim);
2117 },
2118 None => {
2119 if !suppress {
2120 unsafe {
2121 buf_extend(buf, line);
2122 buf_push(buf, line_delim);
2123 }
2124 }
2125 }
2126 }
2127 return;
2128 }
2129
2130 let mut field_start = 0;
2132 let mut field_idx = 0;
2133 let mut has_delim = false;
2134
2135 for pos in memchr_iter(delim, line) {
2136 has_delim = true;
2137 if field_idx == target_idx {
2138 unsafe {
2139 buf_extend(
2140 buf,
2141 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2142 );
2143 buf_push(buf, line_delim);
2144 }
2145 return;
2146 }
2147 field_idx += 1;
2148 field_start = pos + 1;
2149 }
2150
2151 if !has_delim {
2152 if !suppress {
2153 unsafe {
2154 buf_extend(buf, line);
2155 buf_push(buf, line_delim);
2156 }
2157 }
2158 return;
2159 }
2160
2161 if field_idx == target_idx {
2162 unsafe {
2163 buf_extend(
2164 buf,
2165 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2166 );
2167 buf_push(buf, line_delim);
2168 }
2169 } else {
2170 unsafe { buf_push(buf, line_delim) };
2171 }
2172}
2173
2174#[inline(always)]
2178fn extract_fields_to_buf(
2179 line: &[u8],
2180 delim: u8,
2181 ranges: &[Range],
2182 output_delim: &[u8],
2183 suppress: bool,
2184 max_field: usize,
2185 field_mask: u64,
2186 line_delim: u8,
2187 buf: &mut Vec<u8>,
2188 complement: bool,
2189) {
2190 let len = line.len();
2191
2192 if len == 0 {
2193 if !suppress {
2194 buf.push(line_delim);
2195 }
2196 return;
2197 }
2198
2199 let needed = len + output_delim.len() * 16 + 1;
2202 if buf.capacity() - buf.len() < needed {
2203 buf.reserve(needed);
2204 }
2205
2206 let base = line.as_ptr();
2207 let mut field_num: usize = 1;
2208 let mut field_start: usize = 0;
2209 let mut first_output = true;
2210 let mut has_delim = false;
2211
2212 for delim_pos in memchr_iter(delim, line) {
2214 has_delim = true;
2215
2216 if is_selected(field_num, field_mask, ranges, complement) {
2217 if !first_output {
2218 unsafe { buf_extend(buf, output_delim) };
2219 }
2220 unsafe {
2221 buf_extend(
2222 buf,
2223 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2224 )
2225 };
2226 first_output = false;
2227 }
2228
2229 field_num += 1;
2230 field_start = delim_pos + 1;
2231
2232 if field_num > max_field {
2233 break;
2234 }
2235 }
2236
2237 if (field_num <= max_field || complement)
2239 && has_delim
2240 && is_selected(field_num, field_mask, ranges, complement)
2241 {
2242 if !first_output {
2243 unsafe { buf_extend(buf, output_delim) };
2244 }
2245 unsafe {
2246 buf_extend(
2247 buf,
2248 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2249 )
2250 };
2251 first_output = false;
2252 }
2253
2254 if !first_output {
2255 unsafe { buf_push(buf, line_delim) };
2256 } else if !has_delim {
2257 if !suppress {
2258 unsafe {
2259 buf_extend(buf, line);
2260 buf_push(buf, line_delim);
2261 }
2262 }
2263 } else {
2264 unsafe { buf_push(buf, line_delim) };
2265 }
2266}
2267
2268fn process_bytes_from_start(
2275 data: &[u8],
2276 max_bytes: usize,
2277 line_delim: u8,
2278 out: &mut impl Write,
2279) -> io::Result<()> {
2280 if max_bytes > 0 && max_bytes < usize::MAX {
2285 let mut start = 0;
2286 let mut all_fit = true;
2287 for pos in memchr_iter(line_delim, data) {
2288 if pos - start > max_bytes {
2289 all_fit = false;
2290 break;
2291 }
2292 start = pos + 1;
2293 }
2294 if all_fit && start < data.len() && data.len() - start > max_bytes {
2296 all_fit = false;
2297 }
2298 if all_fit {
2299 if !data.is_empty() && data[data.len() - 1] == line_delim {
2301 return out.write_all(data);
2302 } else if !data.is_empty() {
2303 out.write_all(data)?;
2304 return out.write_all(&[line_delim]);
2305 }
2306 return Ok(());
2307 }
2308 }
2309
2310 if data.len() >= PARALLEL_THRESHOLD {
2311 let chunks = split_into_chunks(data, line_delim);
2312 let results = par_process(&chunks, |chunk| {
2313 let est_out = (chunk.len() / 4).max(max_bytes + 2);
2314 let mut buf = Vec::with_capacity(est_out.min(chunk.len()));
2315 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
2316 buf
2317 });
2318 let slices: Vec<IoSlice> = results
2319 .iter()
2320 .filter(|r| !r.is_empty())
2321 .map(|r| IoSlice::new(r))
2322 .collect();
2323 write_ioslices(out, &slices)?;
2324 } else {
2325 if max_bytes <= 512 {
2331 let est_out = (data.len() / 4).max(max_bytes + 2);
2334 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2335 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2336 if !buf.is_empty() {
2337 out.write_all(&buf)?;
2338 }
2339 } else {
2340 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2344 }
2345 }
2346 Ok(())
2347}
2348
2349#[inline]
2354fn bytes_from_start_zerocopy(
2355 data: &[u8],
2356 max_bytes: usize,
2357 line_delim: u8,
2358 out: &mut impl Write,
2359) -> io::Result<()> {
2360 let newline_buf: [u8; 1] = [line_delim];
2361 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2362 let mut start = 0;
2363 let mut run_start: usize = 0;
2364
2365 for pos in memchr_iter(line_delim, data) {
2366 let line_len = pos - start;
2367 if line_len > max_bytes {
2368 if run_start < start {
2370 iov.push(IoSlice::new(&data[run_start..start]));
2371 }
2372 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2373 iov.push(IoSlice::new(&newline_buf));
2374 run_start = pos + 1;
2375
2376 if iov.len() >= MAX_IOV - 2 {
2377 write_ioslices(out, &iov)?;
2378 iov.clear();
2379 }
2380 }
2381 start = pos + 1;
2382 }
2383 if start < data.len() {
2385 let line_len = data.len() - start;
2386 if line_len > max_bytes {
2387 if run_start < start {
2388 iov.push(IoSlice::new(&data[run_start..start]));
2389 }
2390 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2391 iov.push(IoSlice::new(&newline_buf));
2392 if !iov.is_empty() {
2393 write_ioslices(out, &iov)?;
2394 }
2395 return Ok(());
2396 }
2397 }
2398 if run_start < data.len() {
2400 iov.push(IoSlice::new(&data[run_start..]));
2401 if !data.is_empty() && *data.last().unwrap() != line_delim {
2402 iov.push(IoSlice::new(&newline_buf));
2403 }
2404 }
2405 if !iov.is_empty() {
2406 write_ioslices(out, &iov)?;
2407 }
2408 Ok(())
2409}
2410
2411#[inline]
2416fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2417 buf.reserve(data.len());
2420
2421 let src = data.as_ptr();
2422 let dst_base = buf.as_mut_ptr();
2423 let mut wp = buf.len();
2424 let mut start = 0;
2425
2426 for pos in memchr_iter(line_delim, data) {
2427 let line_len = pos - start;
2428 let take = line_len.min(max_bytes);
2429 unsafe {
2430 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2431 *dst_base.add(wp + take) = line_delim;
2432 }
2433 wp += take + 1;
2434 start = pos + 1;
2435 }
2436 if start < data.len() {
2438 let line_len = data.len() - start;
2439 let take = line_len.min(max_bytes);
2440 unsafe {
2441 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2442 *dst_base.add(wp + take) = line_delim;
2443 }
2444 wp += take + 1;
2445 }
2446 unsafe { buf.set_len(wp) };
2447}
2448
2449fn process_bytes_from_offset(
2451 data: &[u8],
2452 skip_bytes: usize,
2453 line_delim: u8,
2454 out: &mut impl Write,
2455) -> io::Result<()> {
2456 if data.len() >= PARALLEL_THRESHOLD {
2457 let chunks = split_into_chunks(data, line_delim);
2458 let results = par_process(&chunks, |chunk| {
2459 let mut buf = Vec::with_capacity(chunk.len());
2460 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
2461 buf
2462 });
2463 let slices: Vec<IoSlice> = results
2464 .iter()
2465 .filter(|r| !r.is_empty())
2466 .map(|r| IoSlice::new(r))
2467 .collect();
2468 write_ioslices(out, &slices)?;
2469 } else {
2470 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2472 }
2473 Ok(())
2474}
2475
2476#[inline]
2480fn bytes_from_offset_zerocopy(
2481 data: &[u8],
2482 skip_bytes: usize,
2483 line_delim: u8,
2484 out: &mut impl Write,
2485) -> io::Result<()> {
2486 let delim_buf = [line_delim];
2487 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2488
2489 let mut start = 0;
2490 for pos in memchr_iter(line_delim, data) {
2491 let line_len = pos - start;
2492 if line_len > skip_bytes {
2493 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2494 }
2495 iov.push(IoSlice::new(&delim_buf));
2496 if iov.len() >= MAX_IOV - 1 {
2498 write_ioslices(out, &iov)?;
2499 iov.clear();
2500 }
2501 start = pos + 1;
2502 }
2503 if start < data.len() {
2504 let line_len = data.len() - start;
2505 if line_len > skip_bytes {
2506 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2507 }
2508 iov.push(IoSlice::new(&delim_buf));
2509 }
2510 if !iov.is_empty() {
2511 write_ioslices(out, &iov)?;
2512 }
2513 Ok(())
2514}
2515
2516#[inline]
2519fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2520 buf.reserve(data.len());
2521
2522 let src = data.as_ptr();
2523 let dst_base = buf.as_mut_ptr();
2524 let mut wp = buf.len();
2525 let mut start = 0;
2526
2527 for pos in memchr_iter(line_delim, data) {
2528 let line_len = pos - start;
2529 if line_len > skip_bytes {
2530 let take = line_len - skip_bytes;
2531 unsafe {
2532 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2533 }
2534 wp += take;
2535 }
2536 unsafe {
2537 *dst_base.add(wp) = line_delim;
2538 }
2539 wp += 1;
2540 start = pos + 1;
2541 }
2542 if start < data.len() {
2543 let line_len = data.len() - start;
2544 if line_len > skip_bytes {
2545 let take = line_len - skip_bytes;
2546 unsafe {
2547 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2548 }
2549 wp += take;
2550 }
2551 unsafe {
2552 *dst_base.add(wp) = line_delim;
2553 }
2554 wp += 1;
2555 }
2556 unsafe { buf.set_len(wp) };
2557}
2558
2559fn process_bytes_mid_range(
2561 data: &[u8],
2562 start_byte: usize,
2563 end_byte: usize,
2564 line_delim: u8,
2565 out: &mut impl Write,
2566) -> io::Result<()> {
2567 let skip = start_byte.saturating_sub(1);
2568
2569 if data.len() >= PARALLEL_THRESHOLD {
2570 let chunks = split_into_chunks(data, line_delim);
2571 let results = par_process(&chunks, |chunk| {
2572 let mut buf = Vec::with_capacity(chunk.len());
2573 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, &mut buf);
2574 buf
2575 });
2576 let slices: Vec<IoSlice> = results
2577 .iter()
2578 .filter(|r| !r.is_empty())
2579 .map(|r| IoSlice::new(r))
2580 .collect();
2581 write_ioslices(out, &slices)?;
2582 } else {
2583 let mut buf = Vec::with_capacity(data.len());
2584 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2585 if !buf.is_empty() {
2586 out.write_all(&buf)?;
2587 }
2588 }
2589 Ok(())
2590}
2591
2592#[inline]
2596fn bytes_mid_range_chunk(
2597 data: &[u8],
2598 skip: usize,
2599 end_byte: usize,
2600 line_delim: u8,
2601 buf: &mut Vec<u8>,
2602) {
2603 buf.reserve(data.len());
2604
2605 let src = data.as_ptr();
2606 let dst_base = buf.as_mut_ptr();
2607 let mut wp = buf.len();
2608 let mut start = 0;
2609
2610 for pos in memchr_iter(line_delim, data) {
2611 let line_len = pos - start;
2612 if line_len > skip {
2613 let take_end = line_len.min(end_byte);
2614 let take = take_end - skip;
2615 unsafe {
2616 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2617 }
2618 wp += take;
2619 }
2620 unsafe {
2621 *dst_base.add(wp) = line_delim;
2622 }
2623 wp += 1;
2624 start = pos + 1;
2625 }
2626 if start < data.len() {
2627 let line_len = data.len() - start;
2628 if line_len > skip {
2629 let take_end = line_len.min(end_byte);
2630 let take = take_end - skip;
2631 unsafe {
2632 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2633 }
2634 wp += take;
2635 }
2636 unsafe {
2637 *dst_base.add(wp) = line_delim;
2638 }
2639 wp += 1;
2640 }
2641 unsafe { buf.set_len(wp) };
2642}
2643
2644fn process_bytes_complement_mid(
2646 data: &[u8],
2647 skip_start: usize,
2648 skip_end: usize,
2649 line_delim: u8,
2650 out: &mut impl Write,
2651) -> io::Result<()> {
2652 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2654 let chunks = split_into_chunks(data, line_delim);
2655 let results = par_process(&chunks, |chunk| {
2656 let mut buf = Vec::with_capacity(chunk.len());
2657 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, &mut buf);
2658 buf
2659 });
2660 let slices: Vec<IoSlice> = results
2661 .iter()
2662 .filter(|r| !r.is_empty())
2663 .map(|r| IoSlice::new(r))
2664 .collect();
2665 write_ioslices(out, &slices)?;
2666 } else {
2667 let mut buf = Vec::with_capacity(data.len());
2668 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
2669 if !buf.is_empty() {
2670 out.write_all(&buf)?;
2671 }
2672 }
2673 Ok(())
2674}
2675
2676#[inline]
2679fn bytes_complement_mid_chunk(
2680 data: &[u8],
2681 prefix_bytes: usize,
2682 skip_end: usize,
2683 line_delim: u8,
2684 buf: &mut Vec<u8>,
2685) {
2686 buf.reserve(data.len());
2687
2688 let src = data.as_ptr();
2689 let dst_base = buf.as_mut_ptr();
2690 let mut wp = buf.len();
2691 let mut start = 0;
2692
2693 for pos in memchr_iter(line_delim, data) {
2694 let line_len = pos - start;
2695 let take_prefix = prefix_bytes.min(line_len);
2697 if take_prefix > 0 {
2698 unsafe {
2699 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2700 }
2701 wp += take_prefix;
2702 }
2703 if line_len > skip_end {
2705 let suffix_len = line_len - skip_end;
2706 unsafe {
2707 std::ptr::copy_nonoverlapping(
2708 src.add(start + skip_end),
2709 dst_base.add(wp),
2710 suffix_len,
2711 );
2712 }
2713 wp += suffix_len;
2714 }
2715 unsafe {
2716 *dst_base.add(wp) = line_delim;
2717 }
2718 wp += 1;
2719 start = pos + 1;
2720 }
2721 if start < data.len() {
2722 let line_len = data.len() - start;
2723 let take_prefix = prefix_bytes.min(line_len);
2724 if take_prefix > 0 {
2725 unsafe {
2726 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2727 }
2728 wp += take_prefix;
2729 }
2730 if line_len > skip_end {
2731 let suffix_len = line_len - skip_end;
2732 unsafe {
2733 std::ptr::copy_nonoverlapping(
2734 src.add(start + skip_end),
2735 dst_base.add(wp),
2736 suffix_len,
2737 );
2738 }
2739 wp += suffix_len;
2740 }
2741 unsafe {
2742 *dst_base.add(wp) = line_delim;
2743 }
2744 wp += 1;
2745 }
2746 unsafe { buf.set_len(wp) };
2747}
2748
2749fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2751 let line_delim = cfg.line_delim;
2752 let ranges = cfg.ranges;
2753 let complement = cfg.complement;
2754 let output_delim = cfg.output_delim;
2755
2756 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
2758 let max_bytes = ranges[0].end;
2759 if max_bytes < usize::MAX {
2760 return process_bytes_from_start(data, max_bytes, line_delim, out);
2761 }
2762 }
2763
2764 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
2766 let skip_bytes = ranges[0].start.saturating_sub(1);
2767 if skip_bytes > 0 {
2768 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
2769 }
2770 }
2771
2772 if !complement
2774 && ranges.len() == 1
2775 && ranges[0].start > 1
2776 && ranges[0].end < usize::MAX
2777 && output_delim.is_empty()
2778 {
2779 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
2780 }
2781
2782 if complement
2784 && ranges.len() == 1
2785 && ranges[0].start == 1
2786 && ranges[0].end < usize::MAX
2787 && output_delim.is_empty()
2788 {
2789 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
2790 }
2791
2792 if complement
2794 && ranges.len() == 1
2795 && ranges[0].end == usize::MAX
2796 && ranges[0].start > 1
2797 && output_delim.is_empty()
2798 {
2799 let max_bytes = ranges[0].start - 1;
2800 return process_bytes_from_start(data, max_bytes, line_delim, out);
2801 }
2802
2803 if complement
2805 && ranges.len() == 1
2806 && ranges[0].start > 1
2807 && ranges[0].end < usize::MAX
2808 && output_delim.is_empty()
2809 {
2810 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
2811 }
2812
2813 if data.len() >= PARALLEL_THRESHOLD {
2814 let chunks = split_into_chunks(data, line_delim);
2815 let results = par_process(&chunks, |chunk| {
2816 let mut buf = Vec::with_capacity(chunk.len());
2817 process_bytes_chunk(
2818 chunk,
2819 ranges,
2820 complement,
2821 output_delim,
2822 line_delim,
2823 &mut buf,
2824 );
2825 buf
2826 });
2827 let slices: Vec<IoSlice> = results
2828 .iter()
2829 .filter(|r| !r.is_empty())
2830 .map(|r| IoSlice::new(r))
2831 .collect();
2832 write_ioslices(out, &slices)?;
2833 } else {
2834 let mut buf = Vec::with_capacity(data.len());
2835 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
2836 if !buf.is_empty() {
2837 out.write_all(&buf)?;
2838 }
2839 }
2840 Ok(())
2841}
2842
2843fn process_bytes_chunk(
2848 data: &[u8],
2849 ranges: &[Range],
2850 complement: bool,
2851 output_delim: &[u8],
2852 line_delim: u8,
2853 buf: &mut Vec<u8>,
2854) {
2855 buf.reserve(data.len());
2856 let base = data.as_ptr();
2857 let mut start = 0;
2858 for end_pos in memchr_iter(line_delim, data) {
2859 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2860 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2861 unsafe { buf_push(buf, line_delim) };
2862 start = end_pos + 1;
2863 }
2864 if start < data.len() {
2865 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2866 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2867 unsafe { buf_push(buf, line_delim) };
2868 }
2869}
2870
2871#[inline(always)]
2875fn cut_bytes_to_buf(
2876 line: &[u8],
2877 ranges: &[Range],
2878 complement: bool,
2879 output_delim: &[u8],
2880 buf: &mut Vec<u8>,
2881) {
2882 let len = line.len();
2883 let base = line.as_ptr();
2884 let mut first_range = true;
2885
2886 let needed = len + output_delim.len() * ranges.len() + 1;
2888 if buf.capacity() - buf.len() < needed {
2889 buf.reserve(needed);
2890 }
2891
2892 if complement {
2893 let mut pos: usize = 1;
2894 for r in ranges {
2895 let rs = r.start;
2896 let re = r.end.min(len);
2897 if pos < rs {
2898 if !first_range && !output_delim.is_empty() {
2899 unsafe { buf_extend(buf, output_delim) };
2900 }
2901 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
2902 first_range = false;
2903 }
2904 pos = re + 1;
2905 if pos > len {
2906 break;
2907 }
2908 }
2909 if pos <= len {
2910 if !first_range && !output_delim.is_empty() {
2911 unsafe { buf_extend(buf, output_delim) };
2912 }
2913 unsafe {
2914 buf_extend(
2915 buf,
2916 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
2917 )
2918 };
2919 }
2920 } else if output_delim.is_empty() && ranges.len() == 1 {
2921 let start = ranges[0].start.saturating_sub(1);
2923 let end = ranges[0].end.min(len);
2924 if start < len {
2925 unsafe {
2926 buf_extend(
2927 buf,
2928 std::slice::from_raw_parts(base.add(start), end - start),
2929 )
2930 };
2931 }
2932 } else {
2933 for r in ranges {
2934 let start = r.start.saturating_sub(1);
2935 let end = r.end.min(len);
2936 if start >= len {
2937 break;
2938 }
2939 if !first_range && !output_delim.is_empty() {
2940 unsafe { buf_extend(buf, output_delim) };
2941 }
2942 unsafe {
2943 buf_extend(
2944 buf,
2945 std::slice::from_raw_parts(base.add(start), end - start),
2946 )
2947 };
2948 first_range = false;
2949 }
2950 }
2951}
2952
2953#[inline]
2957pub fn cut_fields(
2958 line: &[u8],
2959 delim: u8,
2960 ranges: &[Range],
2961 complement: bool,
2962 output_delim: &[u8],
2963 suppress_no_delim: bool,
2964 out: &mut impl Write,
2965) -> io::Result<bool> {
2966 if memchr::memchr(delim, line).is_none() {
2967 if !suppress_no_delim {
2968 out.write_all(line)?;
2969 return Ok(true);
2970 }
2971 return Ok(false);
2972 }
2973
2974 let mut field_num: usize = 1;
2975 let mut field_start: usize = 0;
2976 let mut first_output = true;
2977
2978 for delim_pos in memchr_iter(delim, line) {
2979 let selected = in_ranges(ranges, field_num) != complement;
2980 if selected {
2981 if !first_output {
2982 out.write_all(output_delim)?;
2983 }
2984 out.write_all(&line[field_start..delim_pos])?;
2985 first_output = false;
2986 }
2987 field_start = delim_pos + 1;
2988 field_num += 1;
2989 }
2990
2991 let selected = in_ranges(ranges, field_num) != complement;
2992 if selected {
2993 if !first_output {
2994 out.write_all(output_delim)?;
2995 }
2996 out.write_all(&line[field_start..])?;
2997 }
2998
2999 Ok(true)
3000}
3001
3002#[inline]
3004pub fn cut_bytes(
3005 line: &[u8],
3006 ranges: &[Range],
3007 complement: bool,
3008 output_delim: &[u8],
3009 out: &mut impl Write,
3010) -> io::Result<bool> {
3011 let mut first_range = true;
3012
3013 if complement {
3014 let len = line.len();
3015 let mut comp_ranges = Vec::new();
3016 let mut pos: usize = 1;
3017 for r in ranges {
3018 let rs = r.start;
3019 let re = r.end.min(len);
3020 if pos < rs {
3021 comp_ranges.push((pos, rs - 1));
3022 }
3023 pos = re + 1;
3024 if pos > len {
3025 break;
3026 }
3027 }
3028 if pos <= len {
3029 comp_ranges.push((pos, len));
3030 }
3031 for &(s, e) in &comp_ranges {
3032 if !first_range && !output_delim.is_empty() {
3033 out.write_all(output_delim)?;
3034 }
3035 out.write_all(&line[s - 1..e])?;
3036 first_range = false;
3037 }
3038 } else {
3039 for r in ranges {
3040 let start = r.start.saturating_sub(1);
3041 let end = r.end.min(line.len());
3042 if start >= line.len() {
3043 break;
3044 }
3045 if !first_range && !output_delim.is_empty() {
3046 out.write_all(output_delim)?;
3047 }
3048 out.write_all(&line[start..end])?;
3049 first_range = false;
3050 }
3051 }
3052 Ok(true)
3053}
3054
3055pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3063 let len = data.len();
3064 let mut wp: usize = 0;
3065 let mut rp: usize = 0;
3066
3067 while rp < len {
3068 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3069 None => {
3070 if suppress {
3072 break;
3074 }
3075 let remaining = len - rp;
3076 if wp != rp {
3077 data.copy_within(rp..len, wp);
3078 }
3079 wp += remaining;
3080 break;
3081 }
3082 Some(offset) => {
3083 let actual = rp + offset;
3084 if data[actual] == line_delim {
3085 if suppress {
3087 rp = actual + 1;
3089 } else {
3090 let chunk_len = actual + 1 - rp;
3092 if wp != rp {
3093 data.copy_within(rp..actual + 1, wp);
3094 }
3095 wp += chunk_len;
3096 rp = actual + 1;
3097 }
3098 } else {
3099 let field_len = actual - rp;
3101 if wp != rp && field_len > 0 {
3102 data.copy_within(rp..actual, wp);
3103 }
3104 wp += field_len;
3105 data[wp] = line_delim;
3106 wp += 1;
3107 match memchr::memchr(line_delim, &data[actual + 1..]) {
3109 None => {
3110 rp = len;
3111 }
3112 Some(nl_off) => {
3113 rp = actual + 1 + nl_off + 1;
3114 }
3115 }
3116 }
3117 }
3118 }
3119 }
3120 wp
3121}
3122
3123pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3125 match cfg.mode {
3126 CutMode::Fields => process_fields_fast(data, cfg, out),
3127 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3128 }
3129}
3130
3131pub fn process_cut_reader<R: BufRead>(
3136 mut reader: R,
3137 cfg: &CutConfig,
3138 out: &mut impl Write,
3139) -> io::Result<()> {
3140 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3142
3143 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3146
3147 loop {
3148 buf.reserve(CHUNK_SIZE);
3150 let read_start = buf.len();
3151 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3152 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3153 buf.truncate(read_start + n);
3154
3155 if buf.is_empty() {
3156 break;
3157 }
3158
3159 if n == 0 {
3160 process_cut_data(&buf, cfg, out)?;
3162 break;
3163 }
3164
3165 let process_end = match memchr::memrchr(line_delim, &buf) {
3167 Some(pos) => pos + 1,
3168 None => {
3169 continue;
3171 }
3172 };
3173
3174 process_cut_data(&buf[..process_end], cfg, out)?;
3176
3177 let leftover_len = buf.len() - process_end;
3179 if leftover_len > 0 {
3180 buf.copy_within(process_end.., 0);
3181 }
3182 buf.truncate(leftover_len);
3183 }
3184
3185 Ok(())
3186}
3187
3188#[inline]
3190fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3191 let n = reader.read(buf)?;
3192 if n == buf.len() || n == 0 {
3193 return Ok(n);
3194 }
3195 let mut total = n;
3197 while total < buf.len() {
3198 match reader.read(&mut buf[total..]) {
3199 Ok(0) => break,
3200 Ok(n) => total += n,
3201 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3202 Err(e) => return Err(e),
3203 }
3204 }
3205 Ok(total)
3206}
3207
3208#[derive(Debug, Clone, Copy, PartialEq)]
3210pub enum CutMode {
3211 Bytes,
3212 Characters,
3213 Fields,
3214}