1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 8 * 1024 * 1024;
8
9const MAX_IOV: usize = 1024;
11
12pub struct CutConfig<'a> {
14 pub mode: CutMode,
15 pub ranges: &'a [Range],
16 pub complement: bool,
17 pub delim: u8,
18 pub output_delim: &'a [u8],
19 pub suppress_no_delim: bool,
20 pub line_delim: u8,
21}
22
23#[derive(Debug, Clone)]
25pub struct Range {
26 pub start: usize, pub end: usize, }
29
30pub fn parse_ranges(spec: &str, no_merge_adjacent: bool) -> Result<Vec<Range>, String> {
37 let mut ranges = Vec::new();
38
39 for part in spec.split(',') {
40 let part = part.trim();
41 if part.is_empty() {
42 continue;
43 }
44
45 if let Some(idx) = part.find('-') {
46 let left = &part[..idx];
47 let right = &part[idx + 1..];
48
49 if left.is_empty() && right.is_empty() {
51 return Err("invalid range with no endpoint: -".to_string());
52 }
53
54 let start = if left.is_empty() {
55 1
56 } else {
57 left.parse::<usize>()
58 .map_err(|_| format!("invalid range: '{}'", part))?
59 };
60
61 let end = if right.is_empty() {
62 usize::MAX
63 } else {
64 right
65 .parse::<usize>()
66 .map_err(|_| format!("invalid range: '{}'", part))?
67 };
68
69 if start == 0 {
70 return Err("fields and positions are numbered from 1".to_string());
71 }
72 if start > end {
73 return Err(format!("invalid decreasing range: '{}'", part));
74 }
75
76 ranges.push(Range { start, end });
77 } else {
78 let n = part
79 .parse::<usize>()
80 .map_err(|_| format!("invalid field: '{}'", part))?;
81 if n == 0 {
82 return Err("fields and positions are numbered from 1".to_string());
83 }
84 ranges.push(Range { start: n, end: n });
85 }
86 }
87
88 if ranges.is_empty() {
89 return Err("you must specify a list of bytes, characters, or fields".to_string());
90 }
91
92 ranges.sort_by_key(|r| (r.start, r.end));
94 let mut merged = vec![ranges[0].clone()];
95 for r in &ranges[1..] {
96 let last = merged.last_mut().unwrap();
97 if no_merge_adjacent {
98 if r.start <= last.end {
100 last.end = last.end.max(r.end);
101 } else {
102 merged.push(r.clone());
103 }
104 } else {
105 if r.start <= last.end.saturating_add(1) {
107 last.end = last.end.max(r.end);
108 } else {
109 merged.push(r.clone());
110 }
111 }
112 }
113
114 Ok(merged)
115}
116
117#[inline(always)]
120fn in_ranges(ranges: &[Range], pos: usize) -> bool {
121 for r in ranges {
122 if pos < r.start {
123 return false;
124 }
125 if pos <= r.end {
126 return true;
127 }
128 }
129 false
130}
131
132#[inline]
135fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
136 let mut mask: u64 = 0;
137 for i in 1..=64u32 {
138 let in_range = in_ranges(ranges, i as usize);
139 if in_range != complement {
140 mask |= 1u64 << (i - 1);
141 }
142 }
143 mask
144}
145
146#[inline(always)]
148fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
149 if field_num <= 64 {
150 (mask >> (field_num - 1)) & 1 == 1
151 } else {
152 in_ranges(ranges, field_num) != complement
153 }
154}
155
156#[inline(always)]
161unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
162 unsafe {
163 let len = buf.len();
164 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
165 buf.set_len(len + data.len());
166 }
167}
168
169#[inline(always)]
172unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
173 unsafe {
174 let len = buf.len();
175 *buf.as_mut_ptr().add(len) = b;
176 buf.set_len(len + 1);
177 }
178}
179
180#[inline]
184fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
185 if slices.is_empty() {
186 return Ok(());
187 }
188 for batch in slices.chunks(MAX_IOV) {
189 let total: usize = batch.iter().map(|s| s.len()).sum();
190 let written = out.write_vectored(batch)?;
191 if written >= total {
192 continue;
193 }
194 if written == 0 {
195 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
196 }
197 write_ioslices_slow(out, batch, written)?;
198 }
199 Ok(())
200}
201
202#[cold]
204#[inline(never)]
205fn write_ioslices_slow(
206 out: &mut impl Write,
207 slices: &[IoSlice],
208 mut skip: usize,
209) -> io::Result<()> {
210 for slice in slices {
211 let len = slice.len();
212 if skip >= len {
213 skip -= len;
214 continue;
215 }
216 out.write_all(&slice[skip..])?;
217 skip = 0;
218 }
219 Ok(())
220}
221
222#[inline]
228fn num_cpus() -> usize {
229 std::thread::available_parallelism()
230 .map(|n| n.get())
231 .unwrap_or(1)
232}
233
234fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
237 let num_threads = num_cpus().max(1);
238 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
239 return vec![data];
240 }
241
242 let chunk_size = data.len() / num_threads;
243 let mut chunks = Vec::with_capacity(num_threads);
244 let mut pos = 0;
245
246 for _ in 0..num_threads - 1 {
247 let target = pos + chunk_size;
248 if target >= data.len() {
249 break;
250 }
251 let boundary = memchr::memchr(line_delim, &data[target..])
252 .map(|p| target + p + 1)
253 .unwrap_or(data.len());
254 if boundary > pos {
255 chunks.push(&data[pos..boundary]);
256 }
257 pos = boundary;
258 }
259
260 if pos < data.len() {
261 chunks.push(&data[pos..]);
262 }
263
264 chunks
265}
266
267fn process_fields_multi_select(
274 data: &[u8],
275 delim: u8,
276 line_delim: u8,
277 ranges: &[Range],
278 suppress: bool,
279 out: &mut impl Write,
280) -> io::Result<()> {
281 let max_field = ranges.last().map_or(0, |r| r.end);
282
283 if data.len() >= PARALLEL_THRESHOLD {
284 let chunks = split_for_scope(data, line_delim);
285 let n = chunks.len();
286 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
287 rayon::scope(|s| {
288 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
289 s.spawn(move |_| {
290 result.reserve(chunk.len() * 3 / 4);
291 multi_select_chunk(
292 chunk, delim, line_delim, ranges, max_field, suppress, result,
293 );
294 });
295 }
296 });
297 let slices: Vec<IoSlice> = results
298 .iter()
299 .filter(|r| !r.is_empty())
300 .map(|r| IoSlice::new(r))
301 .collect();
302 write_ioslices(out, &slices)?;
303 } else {
304 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
305 multi_select_chunk(
306 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
307 );
308 if !buf.is_empty() {
309 out.write_all(&buf)?;
310 }
311 }
312 Ok(())
313}
314
315fn multi_select_chunk(
321 data: &[u8],
322 delim: u8,
323 line_delim: u8,
324 ranges: &[Range],
325 max_field: usize,
326 suppress: bool,
327 buf: &mut Vec<u8>,
328) {
329 buf.reserve(data.len());
330 let base = data.as_ptr();
331 let mut start = 0;
332 let max_delims = max_field.min(64);
333
334 for end_pos in memchr_iter(line_delim, data) {
335 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
336 multi_select_line_fast(
337 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
338 );
339 start = end_pos + 1;
340 }
341 if start < data.len() {
342 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
343 multi_select_line_fast(
344 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
345 );
346 }
347}
348
349#[inline(always)]
353fn multi_select_line_fast(
354 line: &[u8],
355 delim: u8,
356 line_delim: u8,
357 ranges: &[Range],
358 max_delims: usize,
359 suppress: bool,
360 buf: &mut Vec<u8>,
361 _line_abs_start: usize,
362 _data_base: *const u8,
363) {
364 let len = line.len();
365 if len == 0 {
366 if !suppress {
367 unsafe { buf_push(buf, line_delim) };
368 }
369 return;
370 }
371
372 let base = line.as_ptr();
373
374 let mut delim_pos = [0usize; 64];
376 let mut num_delims: usize = 0;
377
378 for pos in memchr_iter(delim, line) {
379 if num_delims < max_delims {
380 delim_pos[num_delims] = pos;
381 num_delims += 1;
382 if num_delims >= max_delims {
383 break;
384 }
385 }
386 }
387
388 if num_delims == 0 {
389 if !suppress {
390 unsafe {
391 buf_extend(buf, line);
392 buf_push(buf, line_delim);
393 }
394 }
395 return;
396 }
397
398 let total_fields = num_delims + 1;
399 let mut first_output = true;
400
401 for r in ranges {
402 let range_start = r.start;
403 let range_end = r.end.min(total_fields);
404 if range_start > total_fields {
405 break;
406 }
407 for field_num in range_start..=range_end {
408 if field_num > total_fields {
409 break;
410 }
411
412 let field_start = if field_num == 1 {
413 0
414 } else if field_num - 2 < num_delims {
415 delim_pos[field_num - 2] + 1
416 } else {
417 continue;
418 };
419 let field_end = if field_num <= num_delims {
420 delim_pos[field_num - 1]
421 } else {
422 len
423 };
424
425 if !first_output {
426 unsafe { buf_push(buf, delim) };
427 }
428 unsafe {
429 buf_extend(
430 buf,
431 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
432 );
433 }
434 first_output = false;
435 }
436 }
437
438 unsafe { buf_push(buf, line_delim) };
439}
440
441fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
445 let delim = cfg.delim;
446 let line_delim = cfg.line_delim;
447 let ranges = cfg.ranges;
448 let complement = cfg.complement;
449 let output_delim = cfg.output_delim;
450 let suppress = cfg.suppress_no_delim;
451
452 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
460 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
461 }
462
463 if complement
465 && ranges.len() == 1
466 && output_delim.len() == 1
467 && output_delim[0] == delim
468 && ranges[0].start == ranges[0].end
469 {
470 return process_complement_single_field(
471 data,
472 delim,
473 line_delim,
474 ranges[0].start,
475 suppress,
476 out,
477 );
478 }
479
480 if complement
483 && ranges.len() == 1
484 && ranges[0].start > 1
485 && ranges[0].end < usize::MAX
486 && output_delim.len() == 1
487 && output_delim[0] == delim
488 {
489 return process_complement_range(
490 data,
491 delim,
492 line_delim,
493 ranges[0].start,
494 ranges[0].end,
495 suppress,
496 out,
497 );
498 }
499
500 if !complement
502 && ranges.len() == 1
503 && ranges[0].start == 1
504 && output_delim.len() == 1
505 && output_delim[0] == delim
506 && ranges[0].end < usize::MAX
507 {
508 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
509 }
510
511 if !complement
513 && ranges.len() == 1
514 && ranges[0].end == usize::MAX
515 && ranges[0].start > 1
516 && output_delim.len() == 1
517 && output_delim[0] == delim
518 {
519 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
520 }
521
522 if !complement
524 && ranges.len() == 1
525 && ranges[0].start > 1
526 && ranges[0].end < usize::MAX
527 && output_delim.len() == 1
528 && output_delim[0] == delim
529 {
530 return process_fields_mid_range(
531 data,
532 delim,
533 line_delim,
534 ranges[0].start,
535 ranges[0].end,
536 suppress,
537 out,
538 );
539 }
540
541 if !complement
547 && ranges.len() > 1
548 && ranges.last().map_or(false, |r| r.end < usize::MAX)
549 && output_delim.len() == 1
550 && output_delim[0] == delim
551 && delim != line_delim
552 {
553 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
554 }
555
556 let max_field = if complement {
558 usize::MAX
559 } else {
560 ranges.last().map(|r| r.end).unwrap_or(0)
561 };
562 let field_mask = compute_field_mask(ranges, complement);
563
564 if data.len() >= PARALLEL_THRESHOLD {
565 let chunks = split_for_scope(data, line_delim);
566 let n = chunks.len();
567 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
568 rayon::scope(|s| {
569 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
570 s.spawn(move |_| {
571 result.reserve(chunk.len() + 1);
572 process_fields_chunk(
573 chunk,
574 delim,
575 ranges,
576 output_delim,
577 suppress,
578 max_field,
579 field_mask,
580 line_delim,
581 complement,
582 result,
583 );
584 });
585 }
586 });
587 let slices: Vec<IoSlice> = results
588 .iter()
589 .filter(|r| !r.is_empty())
590 .map(|r| IoSlice::new(r))
591 .collect();
592 write_ioslices(out, &slices)?;
593 } else {
594 let mut buf = Vec::with_capacity(data.len() + 1);
596 process_fields_chunk(
597 data,
598 delim,
599 ranges,
600 output_delim,
601 suppress,
602 max_field,
603 field_mask,
604 line_delim,
605 complement,
606 &mut buf,
607 );
608 if !buf.is_empty() {
609 out.write_all(&buf)?;
610 }
611 }
612 Ok(())
613}
614
615fn process_fields_chunk(
620 data: &[u8],
621 delim: u8,
622 ranges: &[Range],
623 output_delim: &[u8],
624 suppress: bool,
625 max_field: usize,
626 field_mask: u64,
627 line_delim: u8,
628 complement: bool,
629 buf: &mut Vec<u8>,
630) {
631 if delim != line_delim {
636 buf.reserve(data.len());
637 let mut start = 0;
638 for end_pos in memchr_iter(line_delim, data) {
639 let line = &data[start..end_pos];
640 extract_fields_to_buf(
641 line,
642 delim,
643 ranges,
644 output_delim,
645 suppress,
646 max_field,
647 field_mask,
648 line_delim,
649 buf,
650 complement,
651 );
652 start = end_pos + 1;
653 }
654 if start < data.len() {
655 extract_fields_to_buf(
656 &data[start..],
657 delim,
658 ranges,
659 output_delim,
660 suppress,
661 max_field,
662 field_mask,
663 line_delim,
664 buf,
665 complement,
666 );
667 }
668 return;
669 }
670
671 let mut start = 0;
673 for end_pos in memchr_iter(line_delim, data) {
674 let line = &data[start..end_pos];
675 extract_fields_to_buf(
676 line,
677 delim,
678 ranges,
679 output_delim,
680 suppress,
681 max_field,
682 field_mask,
683 line_delim,
684 buf,
685 complement,
686 );
687 start = end_pos + 1;
688 }
689 if start < data.len() {
690 extract_fields_to_buf(
691 &data[start..],
692 delim,
693 ranges,
694 output_delim,
695 suppress,
696 max_field,
697 field_mask,
698 line_delim,
699 buf,
700 complement,
701 );
702 }
703}
704
705fn process_single_field(
711 data: &[u8],
712 delim: u8,
713 line_delim: u8,
714 target: usize,
715 suppress: bool,
716 out: &mut impl Write,
717) -> io::Result<()> {
718 let target_idx = target - 1;
719
720 const FIELD_PARALLEL_MIN: usize = 16 * 1024 * 1024;
722
723 if delim != line_delim {
724 if target_idx == 0 && !suppress {
728 if data.len() >= FIELD_PARALLEL_MIN {
729 return single_field1_parallel(data, delim, line_delim, out);
730 }
731 let mut buf = Vec::with_capacity(data.len() + 1);
736 single_field1_to_buf(data, delim, line_delim, &mut buf);
737 if !buf.is_empty() {
738 out.write_all(&buf)?;
739 }
740 return Ok(());
741 }
742
743 if data.len() >= FIELD_PARALLEL_MIN {
747 let chunks = split_for_scope(data, line_delim);
748 let n = chunks.len();
749 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
750 rayon::scope(|s| {
751 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
752 s.spawn(move |_| {
753 result.reserve(chunk.len() / 2);
754 process_single_field_chunk(
755 chunk, delim, target_idx, line_delim, suppress, result,
756 );
757 });
758 }
759 });
760 let slices: Vec<IoSlice> = results
761 .iter()
762 .filter(|r| !r.is_empty())
763 .map(|r| IoSlice::new(r))
764 .collect();
765 write_ioslices(out, &slices)?;
766 } else {
767 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
768 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
769 if !buf.is_empty() {
770 out.write_all(&buf)?;
771 }
772 }
773 return Ok(());
774 }
775
776 if data.len() >= FIELD_PARALLEL_MIN {
778 let chunks = split_for_scope(data, line_delim);
779 let n = chunks.len();
780 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
781 rayon::scope(|s| {
782 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
783 s.spawn(move |_| {
784 result.reserve(chunk.len() / 4);
785 process_single_field_chunk(
786 chunk, delim, target_idx, line_delim, suppress, result,
787 );
788 });
789 }
790 });
791 let slices: Vec<IoSlice> = results
792 .iter()
793 .filter(|r| !r.is_empty())
794 .map(|r| IoSlice::new(r))
795 .collect();
796 write_ioslices(out, &slices)?;
797 } else {
798 let mut buf = Vec::with_capacity(data.len() / 4);
799 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
800 if !buf.is_empty() {
801 out.write_all(&buf)?;
802 }
803 }
804 Ok(())
805}
806
807fn process_complement_range(
810 data: &[u8],
811 delim: u8,
812 line_delim: u8,
813 skip_start: usize,
814 skip_end: usize,
815 suppress: bool,
816 out: &mut impl Write,
817) -> io::Result<()> {
818 if data.len() >= PARALLEL_THRESHOLD {
819 let chunks = split_for_scope(data, line_delim);
820 let n = chunks.len();
821 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
822 rayon::scope(|s| {
823 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
824 s.spawn(move |_| {
825 result.reserve(chunk.len());
826 complement_range_chunk(
827 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
828 );
829 });
830 }
831 });
832 let slices: Vec<IoSlice> = results
833 .iter()
834 .filter(|r| !r.is_empty())
835 .map(|r| IoSlice::new(r))
836 .collect();
837 write_ioslices(out, &slices)?;
838 } else {
839 let mut buf = Vec::with_capacity(data.len());
840 complement_range_chunk(
841 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
842 );
843 if !buf.is_empty() {
844 out.write_all(&buf)?;
845 }
846 }
847 Ok(())
848}
849
850fn complement_range_chunk(
852 data: &[u8],
853 delim: u8,
854 skip_start: usize,
855 skip_end: usize,
856 line_delim: u8,
857 suppress: bool,
858 buf: &mut Vec<u8>,
859) {
860 buf.reserve(data.len());
862 let mut start = 0;
863 for end_pos in memchr_iter(line_delim, data) {
864 let line = &data[start..end_pos];
865 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
866 start = end_pos + 1;
867 }
868 if start < data.len() {
869 complement_range_line(
870 &data[start..],
871 delim,
872 skip_start,
873 skip_end,
874 line_delim,
875 suppress,
876 buf,
877 );
878 }
879}
880
881#[inline(always)]
888fn complement_range_line(
889 line: &[u8],
890 delim: u8,
891 skip_start: usize,
892 skip_end: usize,
893 line_delim: u8,
894 suppress: bool,
895 buf: &mut Vec<u8>,
896) {
897 let len = line.len();
898 if len == 0 {
899 if !suppress {
900 unsafe { buf_push(buf, line_delim) };
901 }
902 return;
903 }
904
905 let base = line.as_ptr();
907
908 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
918
919 let mut delim_count: usize = 0;
921 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
925 delim_count += 1;
926 if delim_count == need_prefix_delims {
927 prefix_end_pos = pos;
928 }
929 if delim_count == total_need {
930 suffix_start_pos = pos + 1;
931 break;
932 }
933 }
934
935 if delim_count == 0 {
936 if !suppress {
938 unsafe {
939 buf_extend(buf, line);
940 buf_push(buf, line_delim);
941 }
942 }
943 return;
944 }
945
946 if delim_count < need_prefix_delims {
952 unsafe {
954 buf_extend(buf, line);
955 buf_push(buf, line_delim);
956 }
957 return;
958 }
959
960 let has_prefix = need_prefix_delims > 0;
961 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
962
963 if has_prefix && has_suffix {
964 unsafe {
966 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
967 buf_push(buf, delim);
968 buf_extend(
969 buf,
970 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
971 );
972 buf_push(buf, line_delim);
973 }
974 } else if has_prefix {
975 unsafe {
977 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
978 buf_push(buf, line_delim);
979 }
980 } else if has_suffix {
981 unsafe {
983 buf_extend(
984 buf,
985 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
986 );
987 buf_push(buf, line_delim);
988 }
989 } else {
990 unsafe { buf_push(buf, line_delim) };
992 }
993}
994
995fn process_complement_single_field(
997 data: &[u8],
998 delim: u8,
999 line_delim: u8,
1000 skip_field: usize,
1001 suppress: bool,
1002 out: &mut impl Write,
1003) -> io::Result<()> {
1004 let skip_idx = skip_field - 1;
1005
1006 if data.len() >= PARALLEL_THRESHOLD {
1007 let chunks = split_for_scope(data, line_delim);
1008 let n = chunks.len();
1009 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1010 rayon::scope(|s| {
1011 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1012 s.spawn(move |_| {
1013 result.reserve(chunk.len());
1014 complement_single_field_chunk(
1015 chunk, delim, skip_idx, line_delim, suppress, result,
1016 );
1017 });
1018 }
1019 });
1020 let slices: Vec<IoSlice> = results
1021 .iter()
1022 .filter(|r| !r.is_empty())
1023 .map(|r| IoSlice::new(r))
1024 .collect();
1025 write_ioslices(out, &slices)?;
1026 } else {
1027 let mut buf = Vec::with_capacity(data.len());
1028 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1029 if !buf.is_empty() {
1030 out.write_all(&buf)?;
1031 }
1032 }
1033 Ok(())
1034}
1035
1036fn complement_single_field_chunk(
1041 data: &[u8],
1042 delim: u8,
1043 skip_idx: usize,
1044 line_delim: u8,
1045 suppress: bool,
1046 buf: &mut Vec<u8>,
1047) {
1048 buf.reserve(data.len());
1049 let mut start = 0;
1050 for end_pos in memchr_iter(line_delim, data) {
1051 let line = &data[start..end_pos];
1052 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1053 start = end_pos + 1;
1054 }
1055 if start < data.len() {
1056 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
1057 }
1058}
1059
1060#[inline(always)]
1062fn complement_single_field_line(
1063 line: &[u8],
1064 delim: u8,
1065 skip_idx: usize,
1066 line_delim: u8,
1067 suppress: bool,
1068 buf: &mut Vec<u8>,
1069) {
1070 let len = line.len();
1071 if len == 0 {
1072 if !suppress {
1073 unsafe { buf_push(buf, line_delim) };
1074 }
1075 return;
1076 }
1077
1078 let base = line.as_ptr();
1079 let need_before = skip_idx;
1080 let need_total = skip_idx + 1;
1081
1082 let mut delim_count: usize = 0;
1083 let mut skip_start_pos: usize = 0;
1084 let mut skip_end_pos: usize = len;
1085 let mut found_end = false;
1086
1087 for pos in memchr_iter(delim, line) {
1088 delim_count += 1;
1089 if delim_count == need_before {
1090 skip_start_pos = pos + 1;
1091 }
1092 if delim_count == need_total {
1093 skip_end_pos = pos;
1094 found_end = true;
1095 break;
1096 }
1097 }
1098
1099 if delim_count == 0 {
1100 if !suppress {
1101 unsafe {
1102 buf_extend(buf, line);
1103 buf_push(buf, line_delim);
1104 }
1105 }
1106 return;
1107 }
1108
1109 if delim_count < need_before {
1110 unsafe {
1111 buf_extend(buf, line);
1112 buf_push(buf, line_delim);
1113 }
1114 return;
1115 }
1116
1117 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1118 let has_suffix = found_end && skip_end_pos < len;
1119
1120 if has_prefix && has_suffix {
1121 unsafe {
1122 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1123 buf_push(buf, delim);
1124 buf_extend(
1125 buf,
1126 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1127 );
1128 buf_push(buf, line_delim);
1129 }
1130 } else if has_prefix {
1131 unsafe {
1132 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1133 buf_push(buf, line_delim);
1134 }
1135 } else if has_suffix {
1136 unsafe {
1137 buf_extend(
1138 buf,
1139 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1140 );
1141 buf_push(buf, line_delim);
1142 }
1143 } else {
1144 unsafe { buf_push(buf, line_delim) };
1145 }
1146}
1147
1148fn process_fields_prefix(
1152 data: &[u8],
1153 delim: u8,
1154 line_delim: u8,
1155 last_field: usize,
1156 suppress: bool,
1157 out: &mut impl Write,
1158) -> io::Result<()> {
1159 if data.len() >= PARALLEL_THRESHOLD {
1160 let chunks = split_for_scope(data, line_delim);
1161 let n = chunks.len();
1162 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1163 rayon::scope(|s| {
1164 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1165 s.spawn(move |_| {
1166 result.reserve(chunk.len());
1167 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1168 });
1169 }
1170 });
1171 let slices: Vec<IoSlice> = results
1172 .iter()
1173 .filter(|r| !r.is_empty())
1174 .map(|r| IoSlice::new(r))
1175 .collect();
1176 write_ioslices(out, &slices)?;
1177 } else if !suppress {
1178 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1182 } else {
1183 let mut buf = Vec::with_capacity(data.len());
1184 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1185 if !buf.is_empty() {
1186 out.write_all(&buf)?;
1187 }
1188 }
1189 Ok(())
1190}
1191
1192#[inline]
1198fn fields_prefix_zerocopy(
1199 data: &[u8],
1200 delim: u8,
1201 line_delim: u8,
1202 last_field: usize,
1203 out: &mut impl Write,
1204) -> io::Result<()> {
1205 let newline_buf: [u8; 1] = [line_delim];
1206 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1207 let mut start = 0;
1208 let mut run_start: usize = 0;
1209
1210 for end_pos in memchr_iter(line_delim, data) {
1211 let line = &data[start..end_pos];
1212 let mut field_count = 1;
1213 let mut truncate_at: Option<usize> = None;
1214 for dpos in memchr_iter(delim, line) {
1215 if field_count >= last_field {
1216 truncate_at = Some(start + dpos);
1217 break;
1218 }
1219 field_count += 1;
1220 }
1221
1222 if let Some(trunc_pos) = truncate_at {
1223 if run_start < start {
1224 iov.push(IoSlice::new(&data[run_start..start]));
1225 }
1226 iov.push(IoSlice::new(&data[start..trunc_pos]));
1227 iov.push(IoSlice::new(&newline_buf));
1228 run_start = end_pos + 1;
1229
1230 if iov.len() >= MAX_IOV - 2 {
1231 write_ioslices(out, &iov)?;
1232 iov.clear();
1233 }
1234 }
1235 start = end_pos + 1;
1236 }
1237 if start < data.len() {
1239 let line = &data[start..];
1240 let mut field_count = 1;
1241 let mut truncate_at: Option<usize> = None;
1242 for dpos in memchr_iter(delim, line) {
1243 if field_count >= last_field {
1244 truncate_at = Some(start + dpos);
1245 break;
1246 }
1247 field_count += 1;
1248 }
1249 if let Some(trunc_pos) = truncate_at {
1250 if run_start < start {
1251 iov.push(IoSlice::new(&data[run_start..start]));
1252 }
1253 iov.push(IoSlice::new(&data[start..trunc_pos]));
1254 iov.push(IoSlice::new(&newline_buf));
1255 if !iov.is_empty() {
1256 write_ioslices(out, &iov)?;
1257 }
1258 return Ok(());
1259 }
1260 }
1261 if run_start < data.len() {
1263 iov.push(IoSlice::new(&data[run_start..]));
1264 if !data.is_empty() && *data.last().unwrap() != line_delim {
1265 iov.push(IoSlice::new(&newline_buf));
1266 }
1267 }
1268 if !iov.is_empty() {
1269 write_ioslices(out, &iov)?;
1270 }
1271 Ok(())
1272}
1273
1274fn fields_prefix_chunk(
1276 data: &[u8],
1277 delim: u8,
1278 line_delim: u8,
1279 last_field: usize,
1280 suppress: bool,
1281 buf: &mut Vec<u8>,
1282) {
1283 buf.reserve(data.len());
1284 let mut start = 0;
1285 for end_pos in memchr_iter(line_delim, data) {
1286 let line = &data[start..end_pos];
1287 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1288 start = end_pos + 1;
1289 }
1290 if start < data.len() {
1291 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1292 }
1293}
1294
1295#[inline(always)]
1298fn fields_prefix_line(
1299 line: &[u8],
1300 delim: u8,
1301 line_delim: u8,
1302 last_field: usize,
1303 suppress: bool,
1304 buf: &mut Vec<u8>,
1305) {
1306 let len = line.len();
1307 if len == 0 {
1308 if !suppress {
1309 unsafe { buf_push(buf, line_delim) };
1310 }
1311 return;
1312 }
1313
1314 let base = line.as_ptr();
1316
1317 let mut field_count = 1usize;
1318 let mut has_delim = false;
1319
1320 for pos in memchr_iter(delim, line) {
1321 has_delim = true;
1322 if field_count >= last_field {
1323 unsafe {
1324 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1325 buf_push(buf, line_delim);
1326 }
1327 return;
1328 }
1329 field_count += 1;
1330 }
1331
1332 if !has_delim {
1333 if !suppress {
1334 unsafe {
1335 buf_extend(buf, line);
1336 buf_push(buf, line_delim);
1337 }
1338 }
1339 return;
1340 }
1341
1342 unsafe {
1343 buf_extend(buf, line);
1344 buf_push(buf, line_delim);
1345 }
1346}
1347
1348fn process_fields_suffix(
1350 data: &[u8],
1351 delim: u8,
1352 line_delim: u8,
1353 start_field: usize,
1354 suppress: bool,
1355 out: &mut impl Write,
1356) -> io::Result<()> {
1357 if data.len() >= PARALLEL_THRESHOLD {
1358 let chunks = split_for_scope(data, line_delim);
1359 let n = chunks.len();
1360 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1361 rayon::scope(|s| {
1362 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1363 s.spawn(move |_| {
1364 result.reserve(chunk.len());
1365 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1366 });
1367 }
1368 });
1369 let slices: Vec<IoSlice> = results
1370 .iter()
1371 .filter(|r| !r.is_empty())
1372 .map(|r| IoSlice::new(r))
1373 .collect();
1374 write_ioslices(out, &slices)?;
1375 } else {
1376 let mut buf = Vec::with_capacity(data.len());
1377 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1378 if !buf.is_empty() {
1379 out.write_all(&buf)?;
1380 }
1381 }
1382 Ok(())
1383}
1384
1385fn fields_suffix_chunk(
1387 data: &[u8],
1388 delim: u8,
1389 line_delim: u8,
1390 start_field: usize,
1391 suppress: bool,
1392 buf: &mut Vec<u8>,
1393) {
1394 buf.reserve(data.len());
1395 let mut start = 0;
1396 for end_pos in memchr_iter(line_delim, data) {
1397 let line = &data[start..end_pos];
1398 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1399 start = end_pos + 1;
1400 }
1401 if start < data.len() {
1402 fields_suffix_line(
1403 &data[start..],
1404 delim,
1405 line_delim,
1406 start_field,
1407 suppress,
1408 buf,
1409 );
1410 }
1411}
1412
1413#[inline(always)]
1416fn fields_suffix_line(
1417 line: &[u8],
1418 delim: u8,
1419 line_delim: u8,
1420 start_field: usize,
1421 suppress: bool,
1422 buf: &mut Vec<u8>,
1423) {
1424 let len = line.len();
1425 if len == 0 {
1426 if !suppress {
1427 unsafe { buf_push(buf, line_delim) };
1428 }
1429 return;
1430 }
1431
1432 let base = line.as_ptr();
1434
1435 let skip_delims = start_field - 1;
1436 let mut delim_count = 0usize;
1437 let mut has_delim = false;
1438
1439 for pos in memchr_iter(delim, line) {
1440 has_delim = true;
1441 delim_count += 1;
1442 if delim_count >= skip_delims {
1443 unsafe {
1444 buf_extend(
1445 buf,
1446 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1447 );
1448 buf_push(buf, line_delim);
1449 }
1450 return;
1451 }
1452 }
1453
1454 if !has_delim {
1455 if !suppress {
1456 unsafe {
1457 buf_extend(buf, line);
1458 buf_push(buf, line_delim);
1459 }
1460 }
1461 return;
1462 }
1463
1464 unsafe { buf_push(buf, line_delim) };
1466}
1467
1468fn process_fields_mid_range(
1471 data: &[u8],
1472 delim: u8,
1473 line_delim: u8,
1474 start_field: usize,
1475 end_field: usize,
1476 suppress: bool,
1477 out: &mut impl Write,
1478) -> io::Result<()> {
1479 if data.len() >= PARALLEL_THRESHOLD {
1480 let chunks = split_for_scope(data, line_delim);
1481 let n = chunks.len();
1482 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1483 rayon::scope(|s| {
1484 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1485 s.spawn(move |_| {
1486 result.reserve(chunk.len());
1487 fields_mid_range_chunk(
1488 chunk,
1489 delim,
1490 line_delim,
1491 start_field,
1492 end_field,
1493 suppress,
1494 result,
1495 );
1496 });
1497 }
1498 });
1499 let slices: Vec<IoSlice> = results
1500 .iter()
1501 .filter(|r| !r.is_empty())
1502 .map(|r| IoSlice::new(r))
1503 .collect();
1504 write_ioslices(out, &slices)?;
1505 } else {
1506 let mut buf = Vec::with_capacity(data.len());
1507 fields_mid_range_chunk(
1508 data,
1509 delim,
1510 line_delim,
1511 start_field,
1512 end_field,
1513 suppress,
1514 &mut buf,
1515 );
1516 if !buf.is_empty() {
1517 out.write_all(&buf)?;
1518 }
1519 }
1520 Ok(())
1521}
1522
1523fn fields_mid_range_chunk(
1528 data: &[u8],
1529 delim: u8,
1530 line_delim: u8,
1531 start_field: usize,
1532 end_field: usize,
1533 suppress: bool,
1534 buf: &mut Vec<u8>,
1535) {
1536 buf.reserve(data.len());
1537 let mut start = 0;
1538 for end_pos in memchr_iter(line_delim, data) {
1539 let line = &data[start..end_pos];
1540 fields_mid_range_line(
1541 line,
1542 delim,
1543 line_delim,
1544 start_field,
1545 end_field,
1546 suppress,
1547 buf,
1548 );
1549 start = end_pos + 1;
1550 }
1551 if start < data.len() {
1552 fields_mid_range_line(
1553 &data[start..],
1554 delim,
1555 line_delim,
1556 start_field,
1557 end_field,
1558 suppress,
1559 buf,
1560 );
1561 }
1562}
1563
1564#[inline(always)]
1568fn fields_mid_range_line(
1569 line: &[u8],
1570 delim: u8,
1571 line_delim: u8,
1572 start_field: usize,
1573 end_field: usize,
1574 suppress: bool,
1575 buf: &mut Vec<u8>,
1576) {
1577 let len = line.len();
1578 if len == 0 {
1579 if !suppress {
1580 unsafe { buf_push(buf, line_delim) };
1581 }
1582 return;
1583 }
1584
1585 let base = line.as_ptr();
1587
1588 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
1592 let mut delim_count = 0;
1593 let mut range_start = 0;
1594 let mut has_delim = false;
1595
1596 for pos in memchr_iter(delim, line) {
1597 has_delim = true;
1598 delim_count += 1;
1599 if delim_count == skip_before {
1600 range_start = pos + 1;
1601 }
1602 if delim_count == target_end_delim {
1603 if skip_before == 0 {
1604 range_start = 0;
1605 }
1606 unsafe {
1607 buf_extend(
1608 buf,
1609 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1610 );
1611 buf_push(buf, line_delim);
1612 }
1613 return;
1614 }
1615 }
1616
1617 if !has_delim {
1618 if !suppress {
1619 unsafe {
1620 buf_extend(buf, line);
1621 buf_push(buf, line_delim);
1622 }
1623 }
1624 return;
1625 }
1626
1627 if delim_count >= skip_before {
1629 if skip_before == 0 {
1631 range_start = 0;
1632 }
1633 unsafe {
1634 buf_extend(
1635 buf,
1636 std::slice::from_raw_parts(base.add(range_start), len - range_start),
1637 );
1638 buf_push(buf, line_delim);
1639 }
1640 } else {
1641 unsafe { buf_push(buf, line_delim) };
1643 }
1644}
1645
1646fn single_field1_parallel(
1657 data: &[u8],
1658 delim: u8,
1659 line_delim: u8,
1660 out: &mut impl Write,
1661) -> io::Result<()> {
1662 let chunks = split_for_scope(data, line_delim);
1663 let n = chunks.len();
1664 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1665 rayon::scope(|s| {
1666 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1667 s.spawn(move |_| {
1668 result.reserve(chunk.len() + 1);
1669 single_field1_to_buf(chunk, delim, line_delim, result);
1670 });
1671 }
1672 });
1673 let slices: Vec<IoSlice> = results
1674 .iter()
1675 .filter(|r| !r.is_empty())
1676 .map(|r| IoSlice::new(r))
1677 .collect();
1678 write_ioslices(out, &slices)
1679}
1680
1681#[inline]
1692fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
1693 debug_assert_ne!(delim, line_delim, "delim and line_delim must differ");
1694 buf.reserve(data.len() + 1);
1697
1698 let base = data.as_ptr();
1699 let initial_len = buf.len();
1700 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
1701 let mut start = 0;
1702 let mut run_start: usize = 0;
1704 let mut in_run = true; for end_pos in memchr_iter(line_delim, data) {
1707 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
1708 match memchr::memchr(delim, line) {
1709 Some(dp) => {
1710 if in_run && run_start < start {
1712 let run_len = start - run_start;
1714 unsafe {
1715 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1716 out_ptr = out_ptr.add(run_len);
1717 }
1718 }
1719 unsafe {
1721 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
1722 out_ptr = out_ptr.add(dp);
1723 *out_ptr = line_delim;
1724 out_ptr = out_ptr.add(1);
1725 }
1726 run_start = end_pos + 1;
1727 in_run = true;
1728 }
1729 None => {
1730 if !in_run {
1732 run_start = start;
1733 in_run = true;
1734 }
1735 }
1736 }
1737 start = end_pos + 1;
1738 }
1739
1740 if in_run && run_start < start {
1742 let run_len = start - run_start;
1743 unsafe {
1744 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1745 out_ptr = out_ptr.add(run_len);
1746 }
1747 }
1748
1749 if start < data.len() {
1751 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
1752 match memchr::memchr(delim, line) {
1753 Some(dp) => {
1754 unsafe {
1756 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
1757 out_ptr = out_ptr.add(dp);
1758 *out_ptr = line_delim;
1759 out_ptr = out_ptr.add(1);
1760 }
1761 }
1762 None => {
1763 let len = data.len() - start;
1765 unsafe {
1766 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, len);
1767 out_ptr = out_ptr.add(len);
1768 *out_ptr = line_delim;
1769 out_ptr = out_ptr.add(1);
1770 }
1771 }
1772 }
1773 }
1774
1775 unsafe {
1776 let new_len = out_ptr as usize - buf.as_ptr() as usize;
1777 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
1778 buf.set_len(new_len);
1779 }
1780}
1781
1782#[inline]
1791#[allow(dead_code)]
1792fn single_field1_zerocopy(
1793 data: &[u8],
1794 delim: u8,
1795 line_delim: u8,
1796 out: &mut impl Write,
1797) -> io::Result<()> {
1798 let newline_buf: [u8; 1] = [line_delim];
1799
1800 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1801 let mut run_start: usize = 0;
1802 let mut start = 0;
1803
1804 for end_pos in memchr_iter(line_delim, data) {
1805 let line = &data[start..end_pos];
1806 if let Some(dp) = memchr::memchr(delim, line) {
1807 if run_start < start {
1810 iov.push(IoSlice::new(&data[run_start..start]));
1811 }
1812 iov.push(IoSlice::new(&data[start..start + dp]));
1813 iov.push(IoSlice::new(&newline_buf));
1814 run_start = end_pos + 1;
1815
1816 if iov.len() >= MAX_IOV - 2 {
1817 write_ioslices(out, &iov)?;
1818 iov.clear();
1819 }
1820 }
1821 start = end_pos + 1;
1823 }
1824
1825 if start < data.len() {
1827 let line = &data[start..];
1828 if let Some(dp) = memchr::memchr(delim, line) {
1829 if run_start < start {
1830 iov.push(IoSlice::new(&data[run_start..start]));
1831 }
1832 iov.push(IoSlice::new(&data[start..start + dp]));
1833 iov.push(IoSlice::new(&newline_buf));
1834 if !iov.is_empty() {
1835 write_ioslices(out, &iov)?;
1836 }
1837 return Ok(());
1838 }
1839 }
1840
1841 if run_start < data.len() {
1843 iov.push(IoSlice::new(&data[run_start..]));
1844 if !data.is_empty() && *data.last().unwrap() != line_delim {
1845 iov.push(IoSlice::new(&newline_buf));
1846 }
1847 }
1848 if !iov.is_empty() {
1849 write_ioslices(out, &iov)?;
1850 }
1851 Ok(())
1852}
1853
1854fn process_single_field_chunk(
1858 data: &[u8],
1859 delim: u8,
1860 target_idx: usize,
1861 line_delim: u8,
1862 suppress: bool,
1863 buf: &mut Vec<u8>,
1864) {
1865 buf.reserve(data.len() + 1);
1867
1868 let base = data.as_ptr();
1869 let initial_len = buf.len();
1870 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
1871 let mut start = 0;
1872 let mut run_start: usize = 0;
1874 let mut in_run = !suppress; for end_pos in memchr_iter(line_delim, data) {
1877 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
1878 let line_len = end_pos - start;
1879
1880 if line_len == 0 {
1881 if !suppress {
1882 if !in_run {
1884 run_start = start;
1885 in_run = true;
1886 }
1887 }
1888 start = end_pos + 1;
1889 continue;
1890 }
1891
1892 let mut field_start_offset = 0;
1894 let mut field_idx = 0;
1895 let mut found = false;
1896 let mut has_delim = false;
1897
1898 for pos in memchr_iter(delim, line) {
1899 has_delim = true;
1900 if field_idx == target_idx {
1901 if in_run && run_start < start {
1904 let run_len = start - run_start;
1905 unsafe {
1906 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1907 out_ptr = out_ptr.add(run_len);
1908 }
1909 }
1910 let field_len = pos - field_start_offset;
1911 unsafe {
1912 std::ptr::copy_nonoverlapping(
1913 base.add(start + field_start_offset),
1914 out_ptr,
1915 field_len,
1916 );
1917 out_ptr = out_ptr.add(field_len);
1918 *out_ptr = line_delim;
1919 out_ptr = out_ptr.add(1);
1920 }
1921 run_start = end_pos + 1;
1922 in_run = true;
1923 found = true;
1924 break;
1925 }
1926 field_idx += 1;
1927 field_start_offset = pos + 1;
1928 }
1929
1930 if !found {
1931 if !has_delim {
1932 if !suppress {
1934 if !in_run {
1936 run_start = start;
1937 in_run = true;
1938 }
1939 } else {
1940 if in_run && run_start < start {
1942 let run_len = start - run_start;
1943 unsafe {
1944 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1945 out_ptr = out_ptr.add(run_len);
1946 }
1947 }
1948 in_run = false;
1949 run_start = end_pos + 1;
1950 }
1951 } else if field_idx == target_idx {
1952 if in_run && run_start < start {
1954 let run_len = start - run_start;
1955 unsafe {
1956 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1957 out_ptr = out_ptr.add(run_len);
1958 }
1959 }
1960 let field_len = line_len - field_start_offset;
1961 unsafe {
1962 std::ptr::copy_nonoverlapping(
1963 base.add(start + field_start_offset),
1964 out_ptr,
1965 field_len,
1966 );
1967 out_ptr = out_ptr.add(field_len);
1968 *out_ptr = line_delim;
1969 out_ptr = out_ptr.add(1);
1970 }
1971 run_start = end_pos + 1;
1972 in_run = true;
1973 } else {
1974 if in_run && run_start < start {
1976 let run_len = start - run_start;
1977 unsafe {
1978 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1979 out_ptr = out_ptr.add(run_len);
1980 }
1981 }
1982 unsafe {
1983 *out_ptr = line_delim;
1984 out_ptr = out_ptr.add(1);
1985 }
1986 run_start = end_pos + 1;
1987 in_run = true;
1988 }
1989 }
1990
1991 start = end_pos + 1;
1992 }
1993
1994 if in_run && run_start < start {
1996 let run_len = start - run_start;
1997 unsafe {
1998 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1999 out_ptr = out_ptr.add(run_len);
2000 }
2001 }
2002
2003 if start < data.len() {
2005 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2006 let line_len = data.len() - start;
2007
2008 if line_len == 0 {
2009 if !suppress {
2010 unsafe {
2011 *out_ptr = line_delim;
2012 out_ptr = out_ptr.add(1);
2013 }
2014 }
2015 } else {
2016 let mut field_start_offset = 0;
2017 let mut field_idx = 0;
2018 let mut found = false;
2019 let mut has_delim = false;
2020
2021 for pos in memchr_iter(delim, line) {
2022 has_delim = true;
2023 if field_idx == target_idx {
2024 let field_len = pos - field_start_offset;
2025 unsafe {
2026 std::ptr::copy_nonoverlapping(
2027 base.add(start + field_start_offset),
2028 out_ptr,
2029 field_len,
2030 );
2031 out_ptr = out_ptr.add(field_len);
2032 *out_ptr = line_delim;
2033 out_ptr = out_ptr.add(1);
2034 }
2035 found = true;
2036 break;
2037 }
2038 field_idx += 1;
2039 field_start_offset = pos + 1;
2040 }
2041
2042 if !found {
2043 if !has_delim {
2044 if !suppress {
2045 unsafe {
2046 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, line_len);
2047 out_ptr = out_ptr.add(line_len);
2048 *out_ptr = line_delim;
2049 out_ptr = out_ptr.add(1);
2050 }
2051 }
2052 } else if field_idx == target_idx {
2053 let field_len = line_len - field_start_offset;
2054 unsafe {
2055 std::ptr::copy_nonoverlapping(
2056 base.add(start + field_start_offset),
2057 out_ptr,
2058 field_len,
2059 );
2060 out_ptr = out_ptr.add(field_len);
2061 *out_ptr = line_delim;
2062 out_ptr = out_ptr.add(1);
2063 }
2064 } else {
2065 unsafe {
2066 *out_ptr = line_delim;
2067 out_ptr = out_ptr.add(1);
2068 }
2069 }
2070 }
2071 }
2072 }
2073
2074 unsafe {
2075 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2076 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2077 buf.set_len(new_len);
2078 }
2079}
2080
2081#[inline(always)]
2085fn extract_fields_to_buf(
2086 line: &[u8],
2087 delim: u8,
2088 ranges: &[Range],
2089 output_delim: &[u8],
2090 suppress: bool,
2091 max_field: usize,
2092 field_mask: u64,
2093 line_delim: u8,
2094 buf: &mut Vec<u8>,
2095 complement: bool,
2096) {
2097 let len = line.len();
2098
2099 if len == 0 {
2100 if !suppress {
2101 buf.push(line_delim);
2102 }
2103 return;
2104 }
2105
2106 let needed = len + output_delim.len() * 16 + 1;
2109 if buf.capacity() - buf.len() < needed {
2110 buf.reserve(needed);
2111 }
2112
2113 let base = line.as_ptr();
2114 let mut field_num: usize = 1;
2115 let mut field_start: usize = 0;
2116 let mut first_output = true;
2117 let mut has_delim = false;
2118
2119 for delim_pos in memchr_iter(delim, line) {
2121 has_delim = true;
2122
2123 if is_selected(field_num, field_mask, ranges, complement) {
2124 if !first_output {
2125 unsafe { buf_extend(buf, output_delim) };
2126 }
2127 unsafe {
2128 buf_extend(
2129 buf,
2130 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2131 )
2132 };
2133 first_output = false;
2134 }
2135
2136 field_num += 1;
2137 field_start = delim_pos + 1;
2138
2139 if field_num > max_field {
2140 break;
2141 }
2142 }
2143
2144 if (field_num <= max_field || complement)
2146 && has_delim
2147 && is_selected(field_num, field_mask, ranges, complement)
2148 {
2149 if !first_output {
2150 unsafe { buf_extend(buf, output_delim) };
2151 }
2152 unsafe {
2153 buf_extend(
2154 buf,
2155 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2156 )
2157 };
2158 first_output = false;
2159 }
2160
2161 if !first_output {
2162 unsafe { buf_push(buf, line_delim) };
2163 } else if !has_delim {
2164 if !suppress {
2165 unsafe {
2166 buf_extend(buf, line);
2167 buf_push(buf, line_delim);
2168 }
2169 }
2170 } else {
2171 unsafe { buf_push(buf, line_delim) };
2172 }
2173}
2174
2175fn process_bytes_from_start(
2182 data: &[u8],
2183 max_bytes: usize,
2184 line_delim: u8,
2185 out: &mut impl Write,
2186) -> io::Result<()> {
2187 if data.len() < PARALLEL_THRESHOLD && max_bytes > 0 && max_bytes < usize::MAX {
2195 let mut start = 0;
2196 let mut all_fit = true;
2197 for pos in memchr_iter(line_delim, data) {
2198 if pos - start > max_bytes {
2199 all_fit = false;
2200 break;
2201 }
2202 start = pos + 1;
2203 }
2204 if all_fit && start < data.len() && data.len() - start > max_bytes {
2206 all_fit = false;
2207 }
2208 if all_fit {
2209 if !data.is_empty() && data[data.len() - 1] == line_delim {
2211 return out.write_all(data);
2212 } else if !data.is_empty() {
2213 out.write_all(data)?;
2214 return out.write_all(&[line_delim]);
2215 }
2216 return Ok(());
2217 }
2218 }
2219
2220 if data.len() >= PARALLEL_THRESHOLD {
2221 let chunks = split_for_scope(data, line_delim);
2222 let n = chunks.len();
2223 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2224 rayon::scope(|s| {
2225 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2226 s.spawn(move |_| {
2227 result.reserve(chunk.len());
2230 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2231 });
2232 }
2233 });
2234 let slices: Vec<IoSlice> = results
2236 .iter()
2237 .filter(|r| !r.is_empty())
2238 .map(|r| IoSlice::new(r))
2239 .collect();
2240 write_ioslices(out, &slices)?;
2241 } else {
2242 if max_bytes <= 512 {
2248 let est_out = (data.len() / 4).max(max_bytes + 2);
2251 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2252 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2253 if !buf.is_empty() {
2254 out.write_all(&buf)?;
2255 }
2256 } else {
2257 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2261 }
2262 }
2263 Ok(())
2264}
2265
2266#[inline]
2271fn bytes_from_start_zerocopy(
2272 data: &[u8],
2273 max_bytes: usize,
2274 line_delim: u8,
2275 out: &mut impl Write,
2276) -> io::Result<()> {
2277 let newline_buf: [u8; 1] = [line_delim];
2278 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2279 let mut start = 0;
2280 let mut run_start: usize = 0;
2281
2282 for pos in memchr_iter(line_delim, data) {
2283 let line_len = pos - start;
2284 if line_len > max_bytes {
2285 if run_start < start {
2287 iov.push(IoSlice::new(&data[run_start..start]));
2288 }
2289 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2290 iov.push(IoSlice::new(&newline_buf));
2291 run_start = pos + 1;
2292
2293 if iov.len() >= MAX_IOV - 2 {
2294 write_ioslices(out, &iov)?;
2295 iov.clear();
2296 }
2297 }
2298 start = pos + 1;
2299 }
2300 if start < data.len() {
2302 let line_len = data.len() - start;
2303 if line_len > max_bytes {
2304 if run_start < start {
2305 iov.push(IoSlice::new(&data[run_start..start]));
2306 }
2307 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2308 iov.push(IoSlice::new(&newline_buf));
2309 if !iov.is_empty() {
2310 write_ioslices(out, &iov)?;
2311 }
2312 return Ok(());
2313 }
2314 }
2315 if run_start < data.len() {
2317 iov.push(IoSlice::new(&data[run_start..]));
2318 if !data.is_empty() && *data.last().unwrap() != line_delim {
2319 iov.push(IoSlice::new(&newline_buf));
2320 }
2321 }
2322 if !iov.is_empty() {
2323 write_ioslices(out, &iov)?;
2324 }
2325 Ok(())
2326}
2327
2328#[inline]
2333fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2334 buf.reserve(data.len());
2337
2338 let src = data.as_ptr();
2339 let dst_base = buf.as_mut_ptr();
2340 let mut wp = buf.len();
2341 let mut start = 0;
2342
2343 for pos in memchr_iter(line_delim, data) {
2344 let line_len = pos - start;
2345 let take = line_len.min(max_bytes);
2346 unsafe {
2347 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2348 *dst_base.add(wp + take) = line_delim;
2349 }
2350 wp += take + 1;
2351 start = pos + 1;
2352 }
2353 if start < data.len() {
2355 let line_len = data.len() - start;
2356 let take = line_len.min(max_bytes);
2357 unsafe {
2358 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2359 *dst_base.add(wp + take) = line_delim;
2360 }
2361 wp += take + 1;
2362 }
2363 unsafe { buf.set_len(wp) };
2364}
2365
2366fn process_bytes_from_offset(
2368 data: &[u8],
2369 skip_bytes: usize,
2370 line_delim: u8,
2371 out: &mut impl Write,
2372) -> io::Result<()> {
2373 if data.len() >= PARALLEL_THRESHOLD {
2374 let chunks = split_for_scope(data, line_delim);
2375 let n = chunks.len();
2376 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2377 rayon::scope(|s| {
2378 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2379 s.spawn(move |_| {
2380 result.reserve(chunk.len());
2381 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2382 });
2383 }
2384 });
2385 let slices: Vec<IoSlice> = results
2387 .iter()
2388 .filter(|r| !r.is_empty())
2389 .map(|r| IoSlice::new(r))
2390 .collect();
2391 write_ioslices(out, &slices)?;
2392 } else {
2393 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2395 }
2396 Ok(())
2397}
2398
2399#[inline]
2403fn bytes_from_offset_zerocopy(
2404 data: &[u8],
2405 skip_bytes: usize,
2406 line_delim: u8,
2407 out: &mut impl Write,
2408) -> io::Result<()> {
2409 let delim_buf = [line_delim];
2410 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2411
2412 let mut start = 0;
2413 for pos in memchr_iter(line_delim, data) {
2414 let line_len = pos - start;
2415 if line_len > skip_bytes {
2416 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2417 }
2418 iov.push(IoSlice::new(&delim_buf));
2419 if iov.len() >= MAX_IOV - 1 {
2421 write_ioslices(out, &iov)?;
2422 iov.clear();
2423 }
2424 start = pos + 1;
2425 }
2426 if start < data.len() {
2427 let line_len = data.len() - start;
2428 if line_len > skip_bytes {
2429 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2430 }
2431 iov.push(IoSlice::new(&delim_buf));
2432 }
2433 if !iov.is_empty() {
2434 write_ioslices(out, &iov)?;
2435 }
2436 Ok(())
2437}
2438
2439#[inline]
2442fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2443 buf.reserve(data.len());
2444
2445 let src = data.as_ptr();
2446 let dst_base = buf.as_mut_ptr();
2447 let mut wp = buf.len();
2448 let mut start = 0;
2449
2450 for pos in memchr_iter(line_delim, data) {
2451 let line_len = pos - start;
2452 if line_len > skip_bytes {
2453 let take = line_len - skip_bytes;
2454 unsafe {
2455 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2456 }
2457 wp += take;
2458 }
2459 unsafe {
2460 *dst_base.add(wp) = line_delim;
2461 }
2462 wp += 1;
2463 start = pos + 1;
2464 }
2465 if start < data.len() {
2466 let line_len = data.len() - start;
2467 if line_len > skip_bytes {
2468 let take = line_len - skip_bytes;
2469 unsafe {
2470 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2471 }
2472 wp += take;
2473 }
2474 unsafe {
2475 *dst_base.add(wp) = line_delim;
2476 }
2477 wp += 1;
2478 }
2479 unsafe { buf.set_len(wp) };
2480}
2481
2482fn process_bytes_mid_range(
2484 data: &[u8],
2485 start_byte: usize,
2486 end_byte: usize,
2487 line_delim: u8,
2488 out: &mut impl Write,
2489) -> io::Result<()> {
2490 let skip = start_byte.saturating_sub(1);
2491
2492 if data.len() >= PARALLEL_THRESHOLD {
2493 let chunks = split_for_scope(data, line_delim);
2494 let n = chunks.len();
2495 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2496 rayon::scope(|s| {
2497 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2498 s.spawn(move |_| {
2499 result.reserve(chunk.len());
2500 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2501 });
2502 }
2503 });
2504 let slices: Vec<IoSlice> = results
2505 .iter()
2506 .filter(|r| !r.is_empty())
2507 .map(|r| IoSlice::new(r))
2508 .collect();
2509 write_ioslices(out, &slices)?;
2510 } else {
2511 let mut buf = Vec::with_capacity(data.len());
2512 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2513 if !buf.is_empty() {
2514 out.write_all(&buf)?;
2515 }
2516 }
2517 Ok(())
2518}
2519
2520#[inline]
2524fn bytes_mid_range_chunk(
2525 data: &[u8],
2526 skip: usize,
2527 end_byte: usize,
2528 line_delim: u8,
2529 buf: &mut Vec<u8>,
2530) {
2531 buf.reserve(data.len());
2532
2533 let src = data.as_ptr();
2534 let dst_base = buf.as_mut_ptr();
2535 let mut wp = buf.len();
2536 let mut start = 0;
2537
2538 for pos in memchr_iter(line_delim, data) {
2539 let line_len = pos - start;
2540 if line_len > skip {
2541 let take_end = line_len.min(end_byte);
2542 let take = take_end - skip;
2543 unsafe {
2544 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2545 }
2546 wp += take;
2547 }
2548 unsafe {
2549 *dst_base.add(wp) = line_delim;
2550 }
2551 wp += 1;
2552 start = pos + 1;
2553 }
2554 if start < data.len() {
2555 let line_len = data.len() - start;
2556 if line_len > skip {
2557 let take_end = line_len.min(end_byte);
2558 let take = take_end - skip;
2559 unsafe {
2560 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2561 }
2562 wp += take;
2563 }
2564 unsafe {
2565 *dst_base.add(wp) = line_delim;
2566 }
2567 wp += 1;
2568 }
2569 unsafe { buf.set_len(wp) };
2570}
2571
2572fn process_bytes_complement_mid(
2574 data: &[u8],
2575 skip_start: usize,
2576 skip_end: usize,
2577 line_delim: u8,
2578 out: &mut impl Write,
2579) -> io::Result<()> {
2580 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2582 let chunks = split_for_scope(data, line_delim);
2583 let n = chunks.len();
2584 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2585 rayon::scope(|s| {
2586 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2587 s.spawn(move |_| {
2588 result.reserve(chunk.len());
2589 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
2590 });
2591 }
2592 });
2593 let slices: Vec<IoSlice> = results
2594 .iter()
2595 .filter(|r| !r.is_empty())
2596 .map(|r| IoSlice::new(r))
2597 .collect();
2598 write_ioslices(out, &slices)?;
2599 } else {
2600 let mut buf = Vec::with_capacity(data.len());
2601 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
2602 if !buf.is_empty() {
2603 out.write_all(&buf)?;
2604 }
2605 }
2606 Ok(())
2607}
2608
2609#[inline]
2612fn bytes_complement_mid_chunk(
2613 data: &[u8],
2614 prefix_bytes: usize,
2615 skip_end: usize,
2616 line_delim: u8,
2617 buf: &mut Vec<u8>,
2618) {
2619 buf.reserve(data.len());
2620
2621 let src = data.as_ptr();
2622 let dst_base = buf.as_mut_ptr();
2623 let mut wp = buf.len();
2624 let mut start = 0;
2625
2626 for pos in memchr_iter(line_delim, data) {
2627 let line_len = pos - start;
2628 let take_prefix = prefix_bytes.min(line_len);
2630 if take_prefix > 0 {
2631 unsafe {
2632 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2633 }
2634 wp += take_prefix;
2635 }
2636 if line_len > skip_end {
2638 let suffix_len = line_len - skip_end;
2639 unsafe {
2640 std::ptr::copy_nonoverlapping(
2641 src.add(start + skip_end),
2642 dst_base.add(wp),
2643 suffix_len,
2644 );
2645 }
2646 wp += suffix_len;
2647 }
2648 unsafe {
2649 *dst_base.add(wp) = line_delim;
2650 }
2651 wp += 1;
2652 start = pos + 1;
2653 }
2654 if start < data.len() {
2655 let line_len = data.len() - start;
2656 let take_prefix = prefix_bytes.min(line_len);
2657 if take_prefix > 0 {
2658 unsafe {
2659 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2660 }
2661 wp += take_prefix;
2662 }
2663 if line_len > skip_end {
2664 let suffix_len = line_len - skip_end;
2665 unsafe {
2666 std::ptr::copy_nonoverlapping(
2667 src.add(start + skip_end),
2668 dst_base.add(wp),
2669 suffix_len,
2670 );
2671 }
2672 wp += suffix_len;
2673 }
2674 unsafe {
2675 *dst_base.add(wp) = line_delim;
2676 }
2677 wp += 1;
2678 }
2679 unsafe { buf.set_len(wp) };
2680}
2681
2682fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2684 let line_delim = cfg.line_delim;
2685 let ranges = cfg.ranges;
2686 let complement = cfg.complement;
2687 let output_delim = cfg.output_delim;
2688
2689 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
2691 let max_bytes = ranges[0].end;
2692 if max_bytes < usize::MAX {
2693 return process_bytes_from_start(data, max_bytes, line_delim, out);
2694 }
2695 }
2696
2697 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
2699 let skip_bytes = ranges[0].start.saturating_sub(1);
2700 if skip_bytes > 0 {
2701 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
2702 }
2703 }
2704
2705 if !complement
2707 && ranges.len() == 1
2708 && ranges[0].start > 1
2709 && ranges[0].end < usize::MAX
2710 && output_delim.is_empty()
2711 {
2712 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
2713 }
2714
2715 if complement
2717 && ranges.len() == 1
2718 && ranges[0].start == 1
2719 && ranges[0].end < usize::MAX
2720 && output_delim.is_empty()
2721 {
2722 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
2723 }
2724
2725 if complement
2727 && ranges.len() == 1
2728 && ranges[0].end == usize::MAX
2729 && ranges[0].start > 1
2730 && output_delim.is_empty()
2731 {
2732 let max_bytes = ranges[0].start - 1;
2733 return process_bytes_from_start(data, max_bytes, line_delim, out);
2734 }
2735
2736 if complement
2738 && ranges.len() == 1
2739 && ranges[0].start > 1
2740 && ranges[0].end < usize::MAX
2741 && output_delim.is_empty()
2742 {
2743 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
2744 }
2745
2746 if data.len() >= PARALLEL_THRESHOLD {
2747 let chunks = split_for_scope(data, line_delim);
2748 let n = chunks.len();
2749 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2750 rayon::scope(|s| {
2751 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2752 s.spawn(move |_| {
2753 result.reserve(chunk.len() + 1);
2754 process_bytes_chunk(
2755 chunk,
2756 ranges,
2757 complement,
2758 output_delim,
2759 line_delim,
2760 result,
2761 );
2762 });
2763 }
2764 });
2765 let slices: Vec<IoSlice> = results
2766 .iter()
2767 .filter(|r| !r.is_empty())
2768 .map(|r| IoSlice::new(r))
2769 .collect();
2770 write_ioslices(out, &slices)?;
2771 } else {
2772 let mut buf = Vec::with_capacity(data.len() + 1);
2774 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
2775 if !buf.is_empty() {
2776 out.write_all(&buf)?;
2777 }
2778 }
2779 Ok(())
2780}
2781
2782fn process_bytes_chunk(
2787 data: &[u8],
2788 ranges: &[Range],
2789 complement: bool,
2790 output_delim: &[u8],
2791 line_delim: u8,
2792 buf: &mut Vec<u8>,
2793) {
2794 buf.reserve(data.len());
2795 let base = data.as_ptr();
2796 let mut start = 0;
2797 for end_pos in memchr_iter(line_delim, data) {
2798 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2799 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2800 unsafe { buf_push(buf, line_delim) };
2801 start = end_pos + 1;
2802 }
2803 if start < data.len() {
2804 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2805 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2806 unsafe { buf_push(buf, line_delim) };
2807 }
2808}
2809
2810#[inline(always)]
2814fn cut_bytes_to_buf(
2815 line: &[u8],
2816 ranges: &[Range],
2817 complement: bool,
2818 output_delim: &[u8],
2819 buf: &mut Vec<u8>,
2820) {
2821 let len = line.len();
2822 let base = line.as_ptr();
2823 let mut first_range = true;
2824
2825 let needed = len + output_delim.len() * ranges.len() + 1;
2827 if buf.capacity() - buf.len() < needed {
2828 buf.reserve(needed);
2829 }
2830
2831 if complement {
2832 let mut pos: usize = 1;
2833 for r in ranges {
2834 let rs = r.start;
2835 let re = r.end.min(len);
2836 if pos < rs {
2837 if !first_range && !output_delim.is_empty() {
2838 unsafe { buf_extend(buf, output_delim) };
2839 }
2840 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
2841 first_range = false;
2842 }
2843 pos = re + 1;
2844 if pos > len {
2845 break;
2846 }
2847 }
2848 if pos <= len {
2849 if !first_range && !output_delim.is_empty() {
2850 unsafe { buf_extend(buf, output_delim) };
2851 }
2852 unsafe {
2853 buf_extend(
2854 buf,
2855 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
2856 )
2857 };
2858 }
2859 } else if output_delim.is_empty() && ranges.len() == 1 {
2860 let start = ranges[0].start.saturating_sub(1);
2862 let end = ranges[0].end.min(len);
2863 if start < len {
2864 unsafe {
2865 buf_extend(
2866 buf,
2867 std::slice::from_raw_parts(base.add(start), end - start),
2868 )
2869 };
2870 }
2871 } else {
2872 for r in ranges {
2873 let start = r.start.saturating_sub(1);
2874 let end = r.end.min(len);
2875 if start >= len {
2876 break;
2877 }
2878 if !first_range && !output_delim.is_empty() {
2879 unsafe { buf_extend(buf, output_delim) };
2880 }
2881 unsafe {
2882 buf_extend(
2883 buf,
2884 std::slice::from_raw_parts(base.add(start), end - start),
2885 )
2886 };
2887 first_range = false;
2888 }
2889 }
2890}
2891
2892#[inline]
2896pub fn cut_fields(
2897 line: &[u8],
2898 delim: u8,
2899 ranges: &[Range],
2900 complement: bool,
2901 output_delim: &[u8],
2902 suppress_no_delim: bool,
2903 out: &mut impl Write,
2904) -> io::Result<bool> {
2905 if memchr::memchr(delim, line).is_none() {
2906 if !suppress_no_delim {
2907 out.write_all(line)?;
2908 return Ok(true);
2909 }
2910 return Ok(false);
2911 }
2912
2913 let mut field_num: usize = 1;
2914 let mut field_start: usize = 0;
2915 let mut first_output = true;
2916
2917 for delim_pos in memchr_iter(delim, line) {
2918 let selected = in_ranges(ranges, field_num) != complement;
2919 if selected {
2920 if !first_output {
2921 out.write_all(output_delim)?;
2922 }
2923 out.write_all(&line[field_start..delim_pos])?;
2924 first_output = false;
2925 }
2926 field_start = delim_pos + 1;
2927 field_num += 1;
2928 }
2929
2930 let selected = in_ranges(ranges, field_num) != complement;
2931 if selected {
2932 if !first_output {
2933 out.write_all(output_delim)?;
2934 }
2935 out.write_all(&line[field_start..])?;
2936 }
2937
2938 Ok(true)
2939}
2940
2941#[inline]
2943pub fn cut_bytes(
2944 line: &[u8],
2945 ranges: &[Range],
2946 complement: bool,
2947 output_delim: &[u8],
2948 out: &mut impl Write,
2949) -> io::Result<bool> {
2950 let mut first_range = true;
2951
2952 if complement {
2953 let len = line.len();
2954 let mut comp_ranges = Vec::new();
2955 let mut pos: usize = 1;
2956 for r in ranges {
2957 let rs = r.start;
2958 let re = r.end.min(len);
2959 if pos < rs {
2960 comp_ranges.push((pos, rs - 1));
2961 }
2962 pos = re + 1;
2963 if pos > len {
2964 break;
2965 }
2966 }
2967 if pos <= len {
2968 comp_ranges.push((pos, len));
2969 }
2970 for &(s, e) in &comp_ranges {
2971 if !first_range && !output_delim.is_empty() {
2972 out.write_all(output_delim)?;
2973 }
2974 out.write_all(&line[s - 1..e])?;
2975 first_range = false;
2976 }
2977 } else {
2978 for r in ranges {
2979 let start = r.start.saturating_sub(1);
2980 let end = r.end.min(line.len());
2981 if start >= line.len() {
2982 break;
2983 }
2984 if !first_range && !output_delim.is_empty() {
2985 out.write_all(output_delim)?;
2986 }
2987 out.write_all(&line[start..end])?;
2988 first_range = false;
2989 }
2990 }
2991 Ok(true)
2992}
2993
2994pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3002 let len = data.len();
3003 let mut wp: usize = 0;
3004 let mut rp: usize = 0;
3005
3006 while rp < len {
3007 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3008 None => {
3009 if suppress {
3011 break;
3013 }
3014 let remaining = len - rp;
3015 if wp != rp {
3016 data.copy_within(rp..len, wp);
3017 }
3018 wp += remaining;
3019 break;
3020 }
3021 Some(offset) => {
3022 let actual = rp + offset;
3023 if data[actual] == line_delim {
3024 if suppress {
3026 rp = actual + 1;
3028 } else {
3029 let chunk_len = actual + 1 - rp;
3031 if wp != rp {
3032 data.copy_within(rp..actual + 1, wp);
3033 }
3034 wp += chunk_len;
3035 rp = actual + 1;
3036 }
3037 } else {
3038 let field_len = actual - rp;
3040 if wp != rp && field_len > 0 {
3041 data.copy_within(rp..actual, wp);
3042 }
3043 wp += field_len;
3044 data[wp] = line_delim;
3045 wp += 1;
3046 match memchr::memchr(line_delim, &data[actual + 1..]) {
3048 None => {
3049 rp = len;
3050 }
3051 Some(nl_off) => {
3052 rp = actual + 1 + nl_off + 1;
3053 }
3054 }
3055 }
3056 }
3057 }
3058 }
3059 wp
3060}
3061
3062pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3064 match cfg.mode {
3065 CutMode::Fields => process_fields_fast(data, cfg, out),
3066 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3067 }
3068}
3069
3070pub fn process_cut_reader<R: BufRead>(
3075 mut reader: R,
3076 cfg: &CutConfig,
3077 out: &mut impl Write,
3078) -> io::Result<()> {
3079 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3081
3082 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3085
3086 loop {
3087 buf.reserve(CHUNK_SIZE);
3089 let read_start = buf.len();
3090 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3091 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3092 buf.truncate(read_start + n);
3093
3094 if buf.is_empty() {
3095 break;
3096 }
3097
3098 if n == 0 {
3099 process_cut_data(&buf, cfg, out)?;
3101 break;
3102 }
3103
3104 let process_end = match memchr::memrchr(line_delim, &buf) {
3106 Some(pos) => pos + 1,
3107 None => {
3108 continue;
3110 }
3111 };
3112
3113 process_cut_data(&buf[..process_end], cfg, out)?;
3115
3116 let leftover_len = buf.len() - process_end;
3118 if leftover_len > 0 {
3119 buf.copy_within(process_end.., 0);
3120 }
3121 buf.truncate(leftover_len);
3122 }
3123
3124 Ok(())
3125}
3126
3127#[inline]
3129fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3130 let n = reader.read(buf)?;
3131 if n == buf.len() || n == 0 {
3132 return Ok(n);
3133 }
3134 let mut total = n;
3136 while total < buf.len() {
3137 match reader.read(&mut buf[total..]) {
3138 Ok(0) => break,
3139 Ok(n) => total += n,
3140 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3141 Err(e) => return Err(e),
3142 }
3143 }
3144 Ok(total)
3145}
3146
3147pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3159 if cfg.complement {
3160 return None;
3161 }
3162 if data.is_empty() || data[data.len() - 1] != cfg.line_delim {
3166 return None;
3167 }
3168
3169 match cfg.mode {
3170 CutMode::Fields => {
3171 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3173 return None;
3174 }
3175 if cfg.delim == cfg.line_delim {
3176 return None;
3177 }
3178 Some(cut_fields_inplace_general(
3179 data,
3180 cfg.delim,
3181 cfg.line_delim,
3182 cfg.ranges,
3183 cfg.suppress_no_delim,
3184 ))
3185 }
3186 CutMode::Bytes | CutMode::Characters => {
3187 if !cfg.output_delim.is_empty() {
3188 return None;
3189 }
3190 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3191 }
3192 }
3193}
3194
3195fn cut_fields_inplace_general(
3198 data: &mut [u8],
3199 delim: u8,
3200 line_delim: u8,
3201 ranges: &[Range],
3202 suppress: bool,
3203) -> usize {
3204 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3206 return cut_field1_inplace(data, delim, line_delim, suppress);
3207 }
3208
3209 let len = data.len();
3210 if len == 0 {
3211 return 0;
3212 }
3213
3214 let max_field = ranges.last().map_or(0, |r| r.end);
3215 let max_delims = max_field.min(64);
3216 let mut wp: usize = 0;
3217 let mut rp: usize = 0;
3218
3219 while rp < len {
3220 let line_end = memchr::memchr(line_delim, &data[rp..])
3221 .map(|p| rp + p)
3222 .unwrap_or(len);
3223 let line_len = line_end - rp;
3224
3225 let mut delim_pos = [0usize; 64];
3227 let mut num_delims: usize = 0;
3228
3229 for pos in memchr_iter(delim, &data[rp..line_end]) {
3230 if num_delims < max_delims {
3231 delim_pos[num_delims] = pos;
3232 num_delims += 1;
3233 if num_delims >= max_delims {
3234 break;
3235 }
3236 }
3237 }
3238
3239 if num_delims == 0 {
3240 if !suppress {
3242 if wp != rp {
3243 data.copy_within(rp..line_end, wp);
3244 }
3245 wp += line_len;
3246 if line_end < len {
3247 data[wp] = line_delim;
3248 wp += 1;
3249 }
3250 }
3251 } else {
3252 let total_fields = num_delims + 1;
3253 let mut first_output = true;
3254
3255 for r in ranges {
3256 let range_start = r.start;
3257 let range_end = r.end.min(total_fields);
3258 if range_start > total_fields {
3259 break;
3260 }
3261 for field_num in range_start..=range_end {
3262 if field_num > total_fields {
3263 break;
3264 }
3265
3266 let field_start = if field_num == 1 {
3267 0
3268 } else if field_num - 2 < num_delims {
3269 delim_pos[field_num - 2] + 1
3270 } else {
3271 continue;
3272 };
3273 let field_end = if field_num <= num_delims {
3274 delim_pos[field_num - 1]
3275 } else {
3276 line_len
3277 };
3278
3279 if !first_output {
3280 data[wp] = delim;
3281 wp += 1;
3282 }
3283 let flen = field_end - field_start;
3284 if flen > 0 {
3285 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3286 wp += flen;
3287 }
3288 first_output = false;
3289 }
3290 }
3291
3292 if !first_output && line_end < len {
3293 data[wp] = line_delim;
3294 wp += 1;
3295 } else if first_output && line_end < len {
3296 data[wp] = line_delim;
3298 wp += 1;
3299 }
3300 }
3301
3302 rp = if line_end < len { line_end + 1 } else { len };
3303 }
3304
3305 wp
3306}
3307
3308fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3310 let len = data.len();
3311 if len == 0 {
3312 return 0;
3313 }
3314
3315 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3317 return len;
3318 }
3319
3320 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3322 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3323 }
3324
3325 let mut wp: usize = 0;
3326 let mut rp: usize = 0;
3327
3328 while rp < len {
3329 let line_end = memchr::memchr(line_delim, &data[rp..])
3330 .map(|p| rp + p)
3331 .unwrap_or(len);
3332 let line_len = line_end - rp;
3333
3334 for r in ranges {
3335 let start = r.start.saturating_sub(1);
3336 let end = r.end.min(line_len);
3337 if start >= line_len {
3338 break;
3339 }
3340 let flen = end - start;
3341 if flen > 0 {
3342 data.copy_within(rp + start..rp + start + flen, wp);
3343 wp += flen;
3344 }
3345 }
3346
3347 if line_end < len {
3348 data[wp] = line_delim;
3349 wp += 1;
3350 }
3351
3352 rp = if line_end < len { line_end + 1 } else { len };
3353 }
3354
3355 wp
3356}
3357
3358fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3360 let len = data.len();
3361
3362 let mut all_fit = true;
3364 let mut start = 0;
3365 for pos in memchr_iter(line_delim, data) {
3366 if pos - start > max_bytes {
3367 all_fit = false;
3368 break;
3369 }
3370 start = pos + 1;
3371 }
3372 if all_fit && start < len && len - start > max_bytes {
3373 all_fit = false;
3374 }
3375 if all_fit {
3376 return len;
3377 }
3378
3379 let mut wp: usize = 0;
3381 let mut rp: usize = 0;
3382
3383 while rp < len {
3384 let line_end = memchr::memchr(line_delim, &data[rp..])
3385 .map(|p| rp + p)
3386 .unwrap_or(len);
3387 let line_len = line_end - rp;
3388
3389 let take = line_len.min(max_bytes);
3390 if take > 0 && wp != rp {
3391 data.copy_within(rp..rp + take, wp);
3392 }
3393 wp += take;
3394
3395 if line_end < len {
3396 data[wp] = line_delim;
3397 wp += 1;
3398 }
3399
3400 rp = if line_end < len { line_end + 1 } else { len };
3401 }
3402
3403 wp
3404}
3405
3406#[derive(Debug, Clone, Copy, PartialEq)]
3408pub enum CutMode {
3409 Bytes,
3410 Characters,
3411 Fields,
3412}