1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 32 * 1024 * 1024;
9
10const MAX_IOV: usize = 1024;
12
13const SEQ_CHUNK: usize = 4 * 1024 * 1024;
17
18fn process_chunked(
21 data: &[u8],
22 line_delim: u8,
23 out: &mut impl Write,
24 mut process_fn: impl FnMut(&[u8], &mut Vec<u8>),
25) -> io::Result<()> {
26 let mut buf = Vec::with_capacity(SEQ_CHUNK * 2);
27 let mut start = 0;
28 while start < data.len() {
29 let end = if start + SEQ_CHUNK >= data.len() {
30 data.len()
31 } else {
32 match memchr::memrchr(line_delim, &data[start..start + SEQ_CHUNK]) {
33 Some(pos) => start + pos + 1,
34 None => (start + SEQ_CHUNK).min(data.len()),
35 }
36 };
37 buf.clear();
38 process_fn(&data[start..end], &mut buf);
39 if !buf.is_empty() {
40 out.write_all(&buf)?;
41 }
42 start = end;
43 }
44 Ok(())
45}
46
47pub struct CutConfig<'a> {
49 pub mode: CutMode,
50 pub ranges: &'a [Range],
51 pub complement: bool,
52 pub delim: u8,
53 pub output_delim: &'a [u8],
54 pub suppress_no_delim: bool,
55 pub line_delim: u8,
56}
57
58#[derive(Debug, Clone)]
60pub struct Range {
61 pub start: usize, pub end: usize, }
64
65pub fn parse_ranges(spec: &str, no_merge_adjacent: bool) -> Result<Vec<Range>, String> {
72 let mut ranges = Vec::new();
73
74 for part in spec.split(',') {
75 let part = part.trim();
76 if part.is_empty() {
77 continue;
78 }
79
80 if let Some(idx) = part.find('-') {
81 let left = &part[..idx];
82 let right = &part[idx + 1..];
83
84 if left.is_empty() && right.is_empty() {
86 return Err("invalid range with no endpoint: -".to_string());
87 }
88
89 let start = if left.is_empty() {
90 1
91 } else {
92 left.parse::<usize>()
93 .map_err(|_| format!("invalid range: '{}'", part))?
94 };
95
96 let end = if right.is_empty() {
97 usize::MAX
98 } else {
99 right
100 .parse::<usize>()
101 .map_err(|_| format!("invalid range: '{}'", part))?
102 };
103
104 if start == 0 {
105 return Err("fields and positions are numbered from 1".to_string());
106 }
107 if start > end {
108 return Err(format!("invalid decreasing range: '{}'", part));
109 }
110
111 ranges.push(Range { start, end });
112 } else {
113 let n = part
114 .parse::<usize>()
115 .map_err(|_| format!("invalid field: '{}'", part))?;
116 if n == 0 {
117 return Err("fields and positions are numbered from 1".to_string());
118 }
119 ranges.push(Range { start: n, end: n });
120 }
121 }
122
123 if ranges.is_empty() {
124 return Err("you must specify a list of bytes, characters, or fields".to_string());
125 }
126
127 ranges.sort_by_key(|r| (r.start, r.end));
129 let mut merged = vec![ranges[0].clone()];
130 for r in &ranges[1..] {
131 let last = merged.last_mut().unwrap();
132 if no_merge_adjacent {
133 if r.start <= last.end {
135 last.end = last.end.max(r.end);
136 } else {
137 merged.push(r.clone());
138 }
139 } else {
140 if r.start <= last.end.saturating_add(1) {
142 last.end = last.end.max(r.end);
143 } else {
144 merged.push(r.clone());
145 }
146 }
147 }
148
149 Ok(merged)
150}
151
152#[inline(always)]
155fn in_ranges(ranges: &[Range], pos: usize) -> bool {
156 for r in ranges {
157 if pos < r.start {
158 return false;
159 }
160 if pos <= r.end {
161 return true;
162 }
163 }
164 false
165}
166
167#[inline]
170fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
171 let mut mask: u64 = 0;
172 for i in 1..=64u32 {
173 let in_range = in_ranges(ranges, i as usize);
174 if in_range != complement {
175 mask |= 1u64 << (i - 1);
176 }
177 }
178 mask
179}
180
181#[inline(always)]
183fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
184 if field_num <= 64 {
185 (mask >> (field_num - 1)) & 1 == 1
186 } else {
187 in_ranges(ranges, field_num) != complement
188 }
189}
190
191#[inline(always)]
196unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
197 unsafe {
198 let len = buf.len();
199 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
200 buf.set_len(len + data.len());
201 }
202}
203
204#[inline(always)]
207unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
208 unsafe {
209 let len = buf.len();
210 *buf.as_mut_ptr().add(len) = b;
211 buf.set_len(len + 1);
212 }
213}
214
215#[inline]
219fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
220 if slices.is_empty() {
221 return Ok(());
222 }
223 for batch in slices.chunks(MAX_IOV) {
224 let total: usize = batch.iter().map(|s| s.len()).sum();
225 let written = out.write_vectored(batch)?;
226 if written >= total {
227 continue;
228 }
229 if written == 0 {
230 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
231 }
232 write_ioslices_slow(out, batch, written)?;
233 }
234 Ok(())
235}
236
237#[cold]
239#[inline(never)]
240fn write_ioslices_slow(
241 out: &mut impl Write,
242 slices: &[IoSlice],
243 mut skip: usize,
244) -> io::Result<()> {
245 for slice in slices {
246 let len = slice.len();
247 if skip >= len {
248 skip -= len;
249 continue;
250 }
251 out.write_all(&slice[skip..])?;
252 skip = 0;
253 }
254 Ok(())
255}
256
257#[inline]
263fn num_cpus() -> usize {
264 std::thread::available_parallelism()
265 .map(|n| n.get())
266 .unwrap_or(1)
267}
268
269fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
272 let num_threads = num_cpus().max(1);
273 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
274 return vec![data];
275 }
276
277 let chunk_size = data.len() / num_threads;
278 let mut chunks = Vec::with_capacity(num_threads);
279 let mut pos = 0;
280
281 for _ in 0..num_threads - 1 {
282 let target = pos + chunk_size;
283 if target >= data.len() {
284 break;
285 }
286 let boundary = memchr::memchr(line_delim, &data[target..])
287 .map(|p| target + p + 1)
288 .unwrap_or(data.len());
289 if boundary > pos {
290 chunks.push(&data[pos..boundary]);
291 }
292 pos = boundary;
293 }
294
295 if pos < data.len() {
296 chunks.push(&data[pos..]);
297 }
298
299 chunks
300}
301
302fn process_fields_multi_select(
309 data: &[u8],
310 delim: u8,
311 line_delim: u8,
312 ranges: &[Range],
313 suppress: bool,
314 out: &mut impl Write,
315) -> io::Result<()> {
316 let max_field = ranges.last().map_or(0, |r| r.end);
317
318 if data.len() >= PARALLEL_THRESHOLD {
319 let chunks = split_for_scope(data, line_delim);
320 let n = chunks.len();
321 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
322 rayon::scope(|s| {
323 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
324 s.spawn(move |_| {
325 result.reserve(chunk.len() * 3 / 4);
326 multi_select_chunk(
327 chunk, delim, line_delim, ranges, max_field, suppress, result,
328 );
329 });
330 }
331 });
332 let slices: Vec<IoSlice> = results
333 .iter()
334 .filter(|r| !r.is_empty())
335 .map(|r| IoSlice::new(r))
336 .collect();
337 write_ioslices(out, &slices)?;
338 } else {
339 process_chunked(data, line_delim, out, |chunk, buf| {
340 multi_select_chunk(chunk, delim, line_delim, ranges, max_field, suppress, buf);
341 })?;
342 }
343 Ok(())
344}
345
346fn multi_select_chunk(
350 data: &[u8],
351 delim: u8,
352 line_delim: u8,
353 ranges: &[Range],
354 max_field: usize,
355 suppress: bool,
356 buf: &mut Vec<u8>,
357) {
358 if max_field <= 64 && delim != line_delim {
364 let mut mask: u64 = 0;
365 for r in ranges {
366 let s = r.start.max(1);
367 let e = r.end.min(64);
368 for f in s..=e {
369 mask |= 1u64 << (f - 1);
370 }
371 }
372 multi_select_twolevel(data, delim, line_delim, mask, max_field, suppress, buf);
373 return;
374 }
375
376 buf.reserve(data.len());
378 let base = data.as_ptr();
379 let mut start = 0;
380 let max_delims = max_field.min(128);
381
382 for end_pos in memchr_iter(line_delim, data) {
383 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
384 multi_select_line_fast(
385 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
386 );
387 start = end_pos + 1;
388 }
389 if start < data.len() {
390 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
391 multi_select_line_fast(
392 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
393 );
394 }
395}
396
397#[allow(dead_code)]
401fn multi_select_chunk_bitmask(
402 data: &[u8],
403 delim: u8,
404 line_delim: u8,
405 mask: u64,
406 max_field: usize,
407 suppress: bool,
408 buf: &mut Vec<u8>,
409) {
410 buf.reserve(data.len() + 1);
414 let initial_len = buf.len();
415 let out_base = unsafe { buf.as_mut_ptr().add(initial_len) };
416 let src = data.as_ptr();
417 let mut wp: usize = 0;
418
419 let mut field_num: usize = 1; let mut field_start: usize = 0; let mut first_output = true; let mut has_delim = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
425 if data[pos] == line_delim {
426 if !has_delim {
428 if !suppress {
430 let len = pos - field_start;
431 unsafe {
432 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
433 }
434 wp += len;
435 unsafe {
436 *out_base.add(wp) = line_delim;
437 }
438 wp += 1;
439 }
440 } else {
441 if field_num <= 64 && (mask & (1u64 << (field_num - 1))) != 0 {
443 if !first_output {
444 unsafe {
445 *out_base.add(wp) = delim;
446 }
447 wp += 1;
448 }
449 let len = pos - field_start;
450 unsafe {
451 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
452 }
453 wp += len;
454 }
455 unsafe {
456 *out_base.add(wp) = line_delim;
457 }
458 wp += 1;
459 }
460 field_num = 1;
462 field_start = pos + 1;
463 first_output = true;
464 has_delim = false;
465 } else {
466 has_delim = true;
468 if field_num <= max_field && (mask & (1u64 << (field_num - 1))) != 0 {
469 if !first_output {
470 unsafe {
471 *out_base.add(wp) = delim;
472 }
473 wp += 1;
474 }
475 let len = pos - field_start;
476 unsafe {
477 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
478 }
479 wp += len;
480 first_output = false;
481 }
482 field_num += 1;
483 field_start = pos + 1;
484 }
485 }
486
487 if field_start < data.len() {
489 if !has_delim {
490 if !suppress {
491 let len = data.len() - field_start;
492 unsafe {
493 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
494 }
495 wp += len;
496 unsafe {
497 *out_base.add(wp) = line_delim;
498 }
499 wp += 1;
500 }
501 } else {
502 if field_num <= 64 && (mask & (1u64 << (field_num - 1))) != 0 {
503 if !first_output {
504 unsafe {
505 *out_base.add(wp) = delim;
506 }
507 wp += 1;
508 }
509 let len = data.len() - field_start;
510 unsafe {
511 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
512 }
513 wp += len;
514 }
515 unsafe {
516 *out_base.add(wp) = line_delim;
517 }
518 wp += 1;
519 }
520 }
521
522 unsafe {
523 buf.set_len(initial_len + wp);
524 }
525}
526
527fn multi_select_twolevel(
532 data: &[u8],
533 delim: u8,
534 line_delim: u8,
535 mask: u64,
536 max_field: usize,
537 suppress: bool,
538 buf: &mut Vec<u8>,
539) {
540 buf.reserve(data.len() + 1);
541 let initial_len = buf.len();
542 let out_base = unsafe { buf.as_mut_ptr().add(initial_len) };
543 let src = data.as_ptr();
544 let mut wp: usize = 0;
545 let mut line_start: usize = 0;
546
547 for nl_pos in memchr_iter(line_delim, data) {
548 let line_len = nl_pos - line_start;
549 let line = &data[line_start..nl_pos];
550
551 if line_len == 0 {
552 if !suppress {
553 unsafe {
554 *out_base.add(wp) = line_delim;
555 }
556 wp += 1;
557 }
558 line_start = nl_pos + 1;
559 continue;
560 }
561
562 let mut field_num: usize = 1;
565 let mut field_start: usize = 0;
566 let mut first_output = true;
567 let mut has_delim = false;
568
569 for dp in memchr::memchr_iter(delim, line) {
570 has_delim = true;
571 if (mask >> (field_num - 1)) & 1 == 1 {
572 if !first_output {
573 unsafe {
574 *out_base.add(wp) = delim;
575 }
576 wp += 1;
577 }
578 let flen = dp - field_start;
579 unsafe {
580 std::ptr::copy_nonoverlapping(
581 src.add(line_start + field_start),
582 out_base.add(wp),
583 flen,
584 );
585 }
586 wp += flen;
587 first_output = false;
588 }
589 field_num += 1;
590 field_start = dp + 1;
591 if field_num > max_field {
592 break;
593 }
594 }
595
596 if !has_delim {
597 if !suppress {
599 unsafe {
600 std::ptr::copy_nonoverlapping(src.add(line_start), out_base.add(wp), line_len);
601 }
602 wp += line_len;
603 unsafe {
604 *out_base.add(wp) = line_delim;
605 }
606 wp += 1;
607 }
608 } else {
609 if field_num <= 64 && (mask >> (field_num - 1)) & 1 == 1 {
611 if !first_output {
612 unsafe {
613 *out_base.add(wp) = delim;
614 }
615 wp += 1;
616 }
617 let flen = line_len - field_start;
618 unsafe {
619 std::ptr::copy_nonoverlapping(
620 src.add(line_start + field_start),
621 out_base.add(wp),
622 flen,
623 );
624 }
625 wp += flen;
626 }
627 unsafe {
628 *out_base.add(wp) = line_delim;
629 }
630 wp += 1;
631 }
632
633 line_start = nl_pos + 1;
634 }
635
636 if line_start < data.len() {
638 let line = &data[line_start..];
639 let line_len = line.len();
640 let mut field_num: usize = 1;
641 let mut field_start: usize = 0;
642 let mut first_output = true;
643 let mut has_delim = false;
644
645 for dp in memchr::memchr_iter(delim, line) {
646 has_delim = true;
647 if (mask >> (field_num - 1)) & 1 == 1 {
648 if !first_output {
649 unsafe {
650 *out_base.add(wp) = delim;
651 }
652 wp += 1;
653 }
654 let flen = dp - field_start;
655 unsafe {
656 std::ptr::copy_nonoverlapping(
657 src.add(line_start + field_start),
658 out_base.add(wp),
659 flen,
660 );
661 }
662 wp += flen;
663 first_output = false;
664 }
665 field_num += 1;
666 field_start = dp + 1;
667 if field_num > max_field {
668 break;
669 }
670 }
671
672 if !has_delim {
673 if !suppress {
674 unsafe {
675 std::ptr::copy_nonoverlapping(src.add(line_start), out_base.add(wp), line_len);
676 }
677 wp += line_len;
678 unsafe {
679 *out_base.add(wp) = line_delim;
680 }
681 wp += 1;
682 }
683 } else {
684 if field_num <= 64 && (mask >> (field_num - 1)) & 1 == 1 {
685 if !first_output {
686 unsafe {
687 *out_base.add(wp) = delim;
688 }
689 wp += 1;
690 }
691 let flen = line_len - field_start;
692 unsafe {
693 std::ptr::copy_nonoverlapping(
694 src.add(line_start + field_start),
695 out_base.add(wp),
696 flen,
697 );
698 }
699 wp += flen;
700 }
701 unsafe {
702 *out_base.add(wp) = line_delim;
703 }
704 wp += 1;
705 }
706 }
707
708 debug_assert!(
709 wp <= data.len() + 1,
710 "wp={} exceeded reservation data.len()+1={}",
711 wp,
712 data.len() + 1
713 );
714 unsafe {
715 buf.set_len(initial_len + wp);
716 }
717}
718
719#[inline(always)]
723fn multi_select_line_fast(
724 line: &[u8],
725 delim: u8,
726 line_delim: u8,
727 ranges: &[Range],
728 max_delims: usize,
729 suppress: bool,
730 buf: &mut Vec<u8>,
731 _line_abs_start: usize,
732 _data_base: *const u8,
733) {
734 let len = line.len();
735 if len == 0 {
736 if !suppress {
737 unsafe { buf_push(buf, line_delim) };
738 }
739 return;
740 }
741
742 let base = line.as_ptr();
743
744 let mut delim_pos = [0usize; 128];
746 let mut num_delims: usize = 0;
747
748 for pos in memchr_iter(delim, line) {
749 if num_delims < max_delims {
750 delim_pos[num_delims] = pos;
751 num_delims += 1;
752 if num_delims >= max_delims {
753 break;
754 }
755 }
756 }
757
758 if num_delims == 0 {
759 if !suppress {
760 unsafe {
761 buf_extend(buf, line);
762 buf_push(buf, line_delim);
763 }
764 }
765 return;
766 }
767
768 let total_fields = num_delims + 1;
769 let mut first_output = true;
770
771 for r in ranges {
772 let range_start = r.start;
773 let range_end = r.end.min(total_fields);
774 if range_start > total_fields {
775 break;
776 }
777 for field_num in range_start..=range_end {
778 if field_num > total_fields {
779 break;
780 }
781
782 let field_start = if field_num == 1 {
783 0
784 } else if field_num - 2 < num_delims {
785 delim_pos[field_num - 2] + 1
786 } else {
787 continue;
788 };
789 let field_end = if field_num <= num_delims {
790 delim_pos[field_num - 1]
791 } else {
792 len
793 };
794
795 if !first_output {
796 unsafe { buf_push(buf, delim) };
797 }
798 unsafe {
799 buf_extend(
800 buf,
801 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
802 );
803 }
804 first_output = false;
805 }
806 }
807
808 unsafe { buf_push(buf, line_delim) };
809}
810
811fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
815 let delim = cfg.delim;
816 let line_delim = cfg.line_delim;
817 let ranges = cfg.ranges;
818 let complement = cfg.complement;
819 let output_delim = cfg.output_delim;
820 let suppress = cfg.suppress_no_delim;
821
822 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
830 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
831 }
832
833 if complement
835 && ranges.len() == 1
836 && output_delim.len() == 1
837 && output_delim[0] == delim
838 && ranges[0].start == ranges[0].end
839 {
840 return process_complement_single_field(
841 data,
842 delim,
843 line_delim,
844 ranges[0].start,
845 suppress,
846 out,
847 );
848 }
849
850 if complement
853 && ranges.len() == 1
854 && ranges[0].start > 1
855 && ranges[0].end < usize::MAX
856 && output_delim.len() == 1
857 && output_delim[0] == delim
858 {
859 return process_complement_range(
860 data,
861 delim,
862 line_delim,
863 ranges[0].start,
864 ranges[0].end,
865 suppress,
866 out,
867 );
868 }
869
870 if !complement
872 && ranges.len() == 1
873 && ranges[0].start == 1
874 && output_delim.len() == 1
875 && output_delim[0] == delim
876 && ranges[0].end < usize::MAX
877 {
878 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
879 }
880
881 if !complement
883 && ranges.len() == 1
884 && ranges[0].end == usize::MAX
885 && ranges[0].start > 1
886 && output_delim.len() == 1
887 && output_delim[0] == delim
888 {
889 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
890 }
891
892 if !complement
894 && ranges.len() == 1
895 && ranges[0].start > 1
896 && ranges[0].end < usize::MAX
897 && output_delim.len() == 1
898 && output_delim[0] == delim
899 {
900 return process_fields_mid_range(
901 data,
902 delim,
903 line_delim,
904 ranges[0].start,
905 ranges[0].end,
906 suppress,
907 out,
908 );
909 }
910
911 if !complement
917 && ranges.len() > 1
918 && ranges.last().map_or(false, |r| r.end < usize::MAX)
919 && output_delim.len() == 1
920 && output_delim[0] == delim
921 && delim != line_delim
922 {
923 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
924 }
925
926 let max_field = if complement {
928 usize::MAX
929 } else {
930 ranges.last().map(|r| r.end).unwrap_or(0)
931 };
932 let field_mask = compute_field_mask(ranges, complement);
933
934 if data.len() >= PARALLEL_THRESHOLD {
935 let chunks = split_for_scope(data, line_delim);
936 let n = chunks.len();
937 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
938 rayon::scope(|s| {
939 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
940 s.spawn(move |_| {
941 result.reserve(chunk.len() + 1);
942 process_fields_chunk(
943 chunk,
944 delim,
945 ranges,
946 output_delim,
947 suppress,
948 max_field,
949 field_mask,
950 line_delim,
951 complement,
952 result,
953 );
954 });
955 }
956 });
957 let slices: Vec<IoSlice> = results
958 .iter()
959 .filter(|r| !r.is_empty())
960 .map(|r| IoSlice::new(r))
961 .collect();
962 write_ioslices(out, &slices)?;
963 } else {
964 process_chunked(data, line_delim, out, |chunk, buf| {
965 process_fields_chunk(
966 chunk,
967 delim,
968 ranges,
969 output_delim,
970 suppress,
971 max_field,
972 field_mask,
973 line_delim,
974 complement,
975 buf,
976 );
977 })?;
978 }
979 Ok(())
980}
981
982fn process_fields_chunk(
987 data: &[u8],
988 delim: u8,
989 ranges: &[Range],
990 output_delim: &[u8],
991 suppress: bool,
992 max_field: usize,
993 field_mask: u64,
994 line_delim: u8,
995 complement: bool,
996 buf: &mut Vec<u8>,
997) {
998 if delim != line_delim {
1003 buf.reserve(data.len());
1004 let mut start = 0;
1005 for end_pos in memchr_iter(line_delim, data) {
1006 let line = &data[start..end_pos];
1007 extract_fields_to_buf(
1008 line,
1009 delim,
1010 ranges,
1011 output_delim,
1012 suppress,
1013 max_field,
1014 field_mask,
1015 line_delim,
1016 buf,
1017 complement,
1018 );
1019 start = end_pos + 1;
1020 }
1021 if start < data.len() {
1022 extract_fields_to_buf(
1023 &data[start..],
1024 delim,
1025 ranges,
1026 output_delim,
1027 suppress,
1028 max_field,
1029 field_mask,
1030 line_delim,
1031 buf,
1032 complement,
1033 );
1034 }
1035 return;
1036 }
1037
1038 let mut start = 0;
1040 for end_pos in memchr_iter(line_delim, data) {
1041 let line = &data[start..end_pos];
1042 extract_fields_to_buf(
1043 line,
1044 delim,
1045 ranges,
1046 output_delim,
1047 suppress,
1048 max_field,
1049 field_mask,
1050 line_delim,
1051 buf,
1052 complement,
1053 );
1054 start = end_pos + 1;
1055 }
1056 if start < data.len() {
1057 extract_fields_to_buf(
1058 &data[start..],
1059 delim,
1060 ranges,
1061 output_delim,
1062 suppress,
1063 max_field,
1064 field_mask,
1065 line_delim,
1066 buf,
1067 complement,
1068 );
1069 }
1070}
1071
1072fn process_single_field(
1078 data: &[u8],
1079 delim: u8,
1080 line_delim: u8,
1081 target: usize,
1082 suppress: bool,
1083 out: &mut impl Write,
1084) -> io::Result<()> {
1085 let target_idx = target - 1;
1086
1087 const FIELD_PARALLEL_MIN: usize = 16 * 1024 * 1024;
1089
1090 if delim != line_delim {
1091 if target_idx == 0 && !suppress {
1095 if data.len() >= FIELD_PARALLEL_MIN {
1096 return single_field1_parallel(data, delim, line_delim, out);
1097 }
1098 return process_chunked(data, line_delim, out, |chunk, buf| {
1099 single_field1_to_buf(chunk, delim, line_delim, buf);
1100 });
1101 }
1102
1103 if data.len() >= FIELD_PARALLEL_MIN {
1107 let chunks = split_for_scope(data, line_delim);
1108 let n = chunks.len();
1109 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1110 rayon::scope(|s| {
1111 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1112 s.spawn(move |_| {
1113 result.reserve(chunk.len() / 2);
1114 process_single_field_chunk(
1115 chunk, delim, target_idx, line_delim, suppress, result,
1116 );
1117 });
1118 }
1119 });
1120 let slices: Vec<IoSlice> = results
1121 .iter()
1122 .filter(|r| !r.is_empty())
1123 .map(|r| IoSlice::new(r))
1124 .collect();
1125 write_ioslices(out, &slices)?;
1126 } else {
1127 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1128 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1129 if !buf.is_empty() {
1130 out.write_all(&buf)?;
1131 }
1132 }
1133 return Ok(());
1134 }
1135
1136 if data.len() >= FIELD_PARALLEL_MIN {
1138 let chunks = split_for_scope(data, line_delim);
1139 let n = chunks.len();
1140 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1141 rayon::scope(|s| {
1142 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1143 s.spawn(move |_| {
1144 result.reserve(chunk.len() / 4);
1145 process_single_field_chunk(
1146 chunk, delim, target_idx, line_delim, suppress, result,
1147 );
1148 });
1149 }
1150 });
1151 let slices: Vec<IoSlice> = results
1152 .iter()
1153 .filter(|r| !r.is_empty())
1154 .map(|r| IoSlice::new(r))
1155 .collect();
1156 write_ioslices(out, &slices)?;
1157 } else {
1158 let mut buf = Vec::with_capacity(data.len() / 4);
1159 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1160 if !buf.is_empty() {
1161 out.write_all(&buf)?;
1162 }
1163 }
1164 Ok(())
1165}
1166
1167fn process_complement_range(
1170 data: &[u8],
1171 delim: u8,
1172 line_delim: u8,
1173 skip_start: usize,
1174 skip_end: usize,
1175 suppress: bool,
1176 out: &mut impl Write,
1177) -> io::Result<()> {
1178 if data.len() >= PARALLEL_THRESHOLD {
1179 let chunks = split_for_scope(data, line_delim);
1180 let n = chunks.len();
1181 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1182 rayon::scope(|s| {
1183 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1184 s.spawn(move |_| {
1185 result.reserve(chunk.len());
1186 complement_range_chunk(
1187 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
1188 );
1189 });
1190 }
1191 });
1192 let slices: Vec<IoSlice> = results
1193 .iter()
1194 .filter(|r| !r.is_empty())
1195 .map(|r| IoSlice::new(r))
1196 .collect();
1197 write_ioslices(out, &slices)?;
1198 } else {
1199 process_chunked(data, line_delim, out, |chunk, buf| {
1200 complement_range_chunk(
1201 chunk, delim, skip_start, skip_end, line_delim, suppress, buf,
1202 );
1203 })?;
1204 }
1205 Ok(())
1206}
1207
1208fn complement_range_chunk(
1210 data: &[u8],
1211 delim: u8,
1212 skip_start: usize,
1213 skip_end: usize,
1214 line_delim: u8,
1215 suppress: bool,
1216 buf: &mut Vec<u8>,
1217) {
1218 buf.reserve(data.len());
1220 let mut start = 0;
1221 for end_pos in memchr_iter(line_delim, data) {
1222 let line = &data[start..end_pos];
1223 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1224 start = end_pos + 1;
1225 }
1226 if start < data.len() {
1227 complement_range_line(
1228 &data[start..],
1229 delim,
1230 skip_start,
1231 skip_end,
1232 line_delim,
1233 suppress,
1234 buf,
1235 );
1236 }
1237}
1238
1239#[inline(always)]
1246fn complement_range_line(
1247 line: &[u8],
1248 delim: u8,
1249 skip_start: usize,
1250 skip_end: usize,
1251 line_delim: u8,
1252 suppress: bool,
1253 buf: &mut Vec<u8>,
1254) {
1255 let len = line.len();
1256 if len == 0 {
1257 if !suppress {
1258 unsafe { buf_push(buf, line_delim) };
1259 }
1260 return;
1261 }
1262
1263 let base = line.as_ptr();
1265
1266 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1276
1277 let mut delim_count: usize = 0;
1279 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1283 delim_count += 1;
1284 if delim_count == need_prefix_delims {
1285 prefix_end_pos = pos;
1286 }
1287 if delim_count == total_need {
1288 suffix_start_pos = pos + 1;
1289 break;
1290 }
1291 }
1292
1293 if delim_count == 0 {
1294 if !suppress {
1296 unsafe {
1297 buf_extend(buf, line);
1298 buf_push(buf, line_delim);
1299 }
1300 }
1301 return;
1302 }
1303
1304 if delim_count < need_prefix_delims {
1310 unsafe {
1312 buf_extend(buf, line);
1313 buf_push(buf, line_delim);
1314 }
1315 return;
1316 }
1317
1318 let has_prefix = need_prefix_delims > 0;
1319 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1320
1321 if has_prefix && has_suffix {
1322 unsafe {
1324 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1325 buf_push(buf, delim);
1326 buf_extend(
1327 buf,
1328 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1329 );
1330 buf_push(buf, line_delim);
1331 }
1332 } else if has_prefix {
1333 unsafe {
1335 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1336 buf_push(buf, line_delim);
1337 }
1338 } else if has_suffix {
1339 unsafe {
1341 buf_extend(
1342 buf,
1343 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1344 );
1345 buf_push(buf, line_delim);
1346 }
1347 } else {
1348 unsafe { buf_push(buf, line_delim) };
1350 }
1351}
1352
1353fn process_complement_single_field(
1355 data: &[u8],
1356 delim: u8,
1357 line_delim: u8,
1358 skip_field: usize,
1359 suppress: bool,
1360 out: &mut impl Write,
1361) -> io::Result<()> {
1362 let skip_idx = skip_field - 1;
1363
1364 if data.len() >= PARALLEL_THRESHOLD {
1365 let chunks = split_for_scope(data, line_delim);
1366 let n = chunks.len();
1367 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1368 rayon::scope(|s| {
1369 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1370 s.spawn(move |_| {
1371 result.reserve(chunk.len());
1372 complement_single_field_chunk(
1373 chunk, delim, skip_idx, line_delim, suppress, result,
1374 );
1375 });
1376 }
1377 });
1378 let slices: Vec<IoSlice> = results
1379 .iter()
1380 .filter(|r| !r.is_empty())
1381 .map(|r| IoSlice::new(r))
1382 .collect();
1383 write_ioslices(out, &slices)?;
1384 } else {
1385 process_chunked(data, line_delim, out, |chunk, buf| {
1386 complement_single_field_chunk(chunk, delim, skip_idx, line_delim, suppress, buf);
1387 })?;
1388 }
1389 Ok(())
1390}
1391
1392fn complement_single_field_chunk(
1397 data: &[u8],
1398 delim: u8,
1399 skip_idx: usize,
1400 line_delim: u8,
1401 suppress: bool,
1402 buf: &mut Vec<u8>,
1403) {
1404 buf.reserve(data.len());
1405 let mut start = 0;
1406 for end_pos in memchr_iter(line_delim, data) {
1407 let line = &data[start..end_pos];
1408 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1409 start = end_pos + 1;
1410 }
1411 if start < data.len() {
1412 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
1413 }
1414}
1415
1416#[inline(always)]
1418fn complement_single_field_line(
1419 line: &[u8],
1420 delim: u8,
1421 skip_idx: usize,
1422 line_delim: u8,
1423 suppress: bool,
1424 buf: &mut Vec<u8>,
1425) {
1426 let len = line.len();
1427 if len == 0 {
1428 if !suppress {
1429 unsafe { buf_push(buf, line_delim) };
1430 }
1431 return;
1432 }
1433
1434 let base = line.as_ptr();
1435 let need_before = skip_idx;
1436 let need_total = skip_idx + 1;
1437
1438 let mut delim_count: usize = 0;
1439 let mut skip_start_pos: usize = 0;
1440 let mut skip_end_pos: usize = len;
1441 let mut found_end = false;
1442
1443 for pos in memchr_iter(delim, line) {
1444 delim_count += 1;
1445 if delim_count == need_before {
1446 skip_start_pos = pos + 1;
1447 }
1448 if delim_count == need_total {
1449 skip_end_pos = pos;
1450 found_end = true;
1451 break;
1452 }
1453 }
1454
1455 if delim_count == 0 {
1456 if !suppress {
1457 unsafe {
1458 buf_extend(buf, line);
1459 buf_push(buf, line_delim);
1460 }
1461 }
1462 return;
1463 }
1464
1465 if delim_count < need_before {
1466 unsafe {
1467 buf_extend(buf, line);
1468 buf_push(buf, line_delim);
1469 }
1470 return;
1471 }
1472
1473 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1474 let has_suffix = found_end && skip_end_pos < len;
1475
1476 if has_prefix && has_suffix {
1477 unsafe {
1478 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1479 buf_push(buf, delim);
1480 buf_extend(
1481 buf,
1482 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1483 );
1484 buf_push(buf, line_delim);
1485 }
1486 } else if has_prefix {
1487 unsafe {
1488 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1489 buf_push(buf, line_delim);
1490 }
1491 } else if has_suffix {
1492 unsafe {
1493 buf_extend(
1494 buf,
1495 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1496 );
1497 buf_push(buf, line_delim);
1498 }
1499 } else {
1500 unsafe { buf_push(buf, line_delim) };
1501 }
1502}
1503
1504fn process_fields_prefix(
1508 data: &[u8],
1509 delim: u8,
1510 line_delim: u8,
1511 last_field: usize,
1512 suppress: bool,
1513 out: &mut impl Write,
1514) -> io::Result<()> {
1515 if data.len() >= PARALLEL_THRESHOLD {
1516 let chunks = split_for_scope(data, line_delim);
1517 let n = chunks.len();
1518 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1519 rayon::scope(|s| {
1520 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1521 s.spawn(move |_| {
1522 result.reserve(chunk.len());
1523 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1524 });
1525 }
1526 });
1527 let slices: Vec<IoSlice> = results
1528 .iter()
1529 .filter(|r| !r.is_empty())
1530 .map(|r| IoSlice::new(r))
1531 .collect();
1532 write_ioslices(out, &slices)?;
1533 } else if !suppress {
1534 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1538 } else {
1539 process_chunked(data, line_delim, out, |chunk, buf| {
1540 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, buf);
1541 })?;
1542 }
1543 Ok(())
1544}
1545
1546#[inline]
1552fn fields_prefix_zerocopy(
1553 data: &[u8],
1554 delim: u8,
1555 line_delim: u8,
1556 last_field: usize,
1557 out: &mut impl Write,
1558) -> io::Result<()> {
1559 let newline_buf: [u8; 1] = [line_delim];
1560 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1561 let mut start = 0;
1562 let mut run_start: usize = 0;
1563
1564 for end_pos in memchr_iter(line_delim, data) {
1565 let line = &data[start..end_pos];
1566 let mut field_count = 1;
1567 let mut truncate_at: Option<usize> = None;
1568 for dpos in memchr_iter(delim, line) {
1569 if field_count >= last_field {
1570 truncate_at = Some(start + dpos);
1571 break;
1572 }
1573 field_count += 1;
1574 }
1575
1576 if let Some(trunc_pos) = truncate_at {
1577 if run_start < start {
1578 iov.push(IoSlice::new(&data[run_start..start]));
1579 }
1580 iov.push(IoSlice::new(&data[start..trunc_pos]));
1581 iov.push(IoSlice::new(&newline_buf));
1582 run_start = end_pos + 1;
1583
1584 if iov.len() >= MAX_IOV - 2 {
1585 write_ioslices(out, &iov)?;
1586 iov.clear();
1587 }
1588 }
1589 start = end_pos + 1;
1590 }
1591 if start < data.len() {
1593 let line = &data[start..];
1594 let mut field_count = 1;
1595 let mut truncate_at: Option<usize> = None;
1596 for dpos in memchr_iter(delim, line) {
1597 if field_count >= last_field {
1598 truncate_at = Some(start + dpos);
1599 break;
1600 }
1601 field_count += 1;
1602 }
1603 if let Some(trunc_pos) = truncate_at {
1604 if run_start < start {
1605 iov.push(IoSlice::new(&data[run_start..start]));
1606 }
1607 iov.push(IoSlice::new(&data[start..trunc_pos]));
1608 iov.push(IoSlice::new(&newline_buf));
1609 if !iov.is_empty() {
1610 write_ioslices(out, &iov)?;
1611 }
1612 return Ok(());
1613 }
1614 }
1615 if run_start < data.len() {
1617 iov.push(IoSlice::new(&data[run_start..]));
1618 if !data.is_empty() && *data.last().unwrap() != line_delim {
1619 iov.push(IoSlice::new(&newline_buf));
1620 }
1621 }
1622 if !iov.is_empty() {
1623 write_ioslices(out, &iov)?;
1624 }
1625 Ok(())
1626}
1627
1628fn fields_prefix_chunk(
1630 data: &[u8],
1631 delim: u8,
1632 line_delim: u8,
1633 last_field: usize,
1634 suppress: bool,
1635 buf: &mut Vec<u8>,
1636) {
1637 buf.reserve(data.len());
1638 let mut start = 0;
1639 for end_pos in memchr_iter(line_delim, data) {
1640 let line = &data[start..end_pos];
1641 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1642 start = end_pos + 1;
1643 }
1644 if start < data.len() {
1645 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1646 }
1647}
1648
1649#[inline(always)]
1652fn fields_prefix_line(
1653 line: &[u8],
1654 delim: u8,
1655 line_delim: u8,
1656 last_field: usize,
1657 suppress: bool,
1658 buf: &mut Vec<u8>,
1659) {
1660 let len = line.len();
1661 if len == 0 {
1662 if !suppress {
1663 unsafe { buf_push(buf, line_delim) };
1664 }
1665 return;
1666 }
1667
1668 let base = line.as_ptr();
1670
1671 let mut field_count = 1usize;
1672 let mut has_delim = false;
1673
1674 for pos in memchr_iter(delim, line) {
1675 has_delim = true;
1676 if field_count >= last_field {
1677 unsafe {
1678 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1679 buf_push(buf, line_delim);
1680 }
1681 return;
1682 }
1683 field_count += 1;
1684 }
1685
1686 if !has_delim {
1687 if !suppress {
1688 unsafe {
1689 buf_extend(buf, line);
1690 buf_push(buf, line_delim);
1691 }
1692 }
1693 return;
1694 }
1695
1696 unsafe {
1697 buf_extend(buf, line);
1698 buf_push(buf, line_delim);
1699 }
1700}
1701
1702fn process_fields_suffix(
1704 data: &[u8],
1705 delim: u8,
1706 line_delim: u8,
1707 start_field: usize,
1708 suppress: bool,
1709 out: &mut impl Write,
1710) -> io::Result<()> {
1711 if data.len() >= PARALLEL_THRESHOLD {
1712 let chunks = split_for_scope(data, line_delim);
1713 let n = chunks.len();
1714 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1715 rayon::scope(|s| {
1716 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1717 s.spawn(move |_| {
1718 result.reserve(chunk.len());
1719 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1720 });
1721 }
1722 });
1723 let slices: Vec<IoSlice> = results
1724 .iter()
1725 .filter(|r| !r.is_empty())
1726 .map(|r| IoSlice::new(r))
1727 .collect();
1728 write_ioslices(out, &slices)?;
1729 } else {
1730 process_chunked(data, line_delim, out, |chunk, buf| {
1731 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, buf);
1732 })?;
1733 }
1734 Ok(())
1735}
1736
1737fn fields_suffix_chunk(
1739 data: &[u8],
1740 delim: u8,
1741 line_delim: u8,
1742 start_field: usize,
1743 suppress: bool,
1744 buf: &mut Vec<u8>,
1745) {
1746 buf.reserve(data.len());
1747 let mut start = 0;
1748 for end_pos in memchr_iter(line_delim, data) {
1749 let line = &data[start..end_pos];
1750 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1751 start = end_pos + 1;
1752 }
1753 if start < data.len() {
1754 fields_suffix_line(
1755 &data[start..],
1756 delim,
1757 line_delim,
1758 start_field,
1759 suppress,
1760 buf,
1761 );
1762 }
1763}
1764
1765#[inline(always)]
1768fn fields_suffix_line(
1769 line: &[u8],
1770 delim: u8,
1771 line_delim: u8,
1772 start_field: usize,
1773 suppress: bool,
1774 buf: &mut Vec<u8>,
1775) {
1776 let len = line.len();
1777 if len == 0 {
1778 if !suppress {
1779 unsafe { buf_push(buf, line_delim) };
1780 }
1781 return;
1782 }
1783
1784 let base = line.as_ptr();
1786
1787 let skip_delims = start_field - 1;
1788 let mut delim_count = 0usize;
1789 let mut has_delim = false;
1790
1791 for pos in memchr_iter(delim, line) {
1792 has_delim = true;
1793 delim_count += 1;
1794 if delim_count >= skip_delims {
1795 unsafe {
1796 buf_extend(
1797 buf,
1798 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1799 );
1800 buf_push(buf, line_delim);
1801 }
1802 return;
1803 }
1804 }
1805
1806 if !has_delim {
1807 if !suppress {
1808 unsafe {
1809 buf_extend(buf, line);
1810 buf_push(buf, line_delim);
1811 }
1812 }
1813 return;
1814 }
1815
1816 unsafe { buf_push(buf, line_delim) };
1818}
1819
1820fn process_fields_mid_range(
1823 data: &[u8],
1824 delim: u8,
1825 line_delim: u8,
1826 start_field: usize,
1827 end_field: usize,
1828 suppress: bool,
1829 out: &mut impl Write,
1830) -> io::Result<()> {
1831 if data.len() >= PARALLEL_THRESHOLD {
1832 let chunks = split_for_scope(data, line_delim);
1833 let n = chunks.len();
1834 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1835 rayon::scope(|s| {
1836 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1837 s.spawn(move |_| {
1838 result.reserve(chunk.len());
1839 fields_mid_range_chunk(
1840 chunk,
1841 delim,
1842 line_delim,
1843 start_field,
1844 end_field,
1845 suppress,
1846 result,
1847 );
1848 });
1849 }
1850 });
1851 let slices: Vec<IoSlice> = results
1852 .iter()
1853 .filter(|r| !r.is_empty())
1854 .map(|r| IoSlice::new(r))
1855 .collect();
1856 write_ioslices(out, &slices)?;
1857 } else {
1858 process_chunked(data, line_delim, out, |chunk, buf| {
1859 fields_mid_range_chunk(
1860 chunk,
1861 delim,
1862 line_delim,
1863 start_field,
1864 end_field,
1865 suppress,
1866 buf,
1867 );
1868 })?;
1869 }
1870 Ok(())
1871}
1872
1873fn fields_mid_range_chunk(
1878 data: &[u8],
1879 delim: u8,
1880 line_delim: u8,
1881 start_field: usize,
1882 end_field: usize,
1883 suppress: bool,
1884 buf: &mut Vec<u8>,
1885) {
1886 buf.reserve(data.len());
1887 let mut start = 0;
1888 for end_pos in memchr_iter(line_delim, data) {
1889 let line = &data[start..end_pos];
1890 fields_mid_range_line(
1891 line,
1892 delim,
1893 line_delim,
1894 start_field,
1895 end_field,
1896 suppress,
1897 buf,
1898 );
1899 start = end_pos + 1;
1900 }
1901 if start < data.len() {
1902 fields_mid_range_line(
1903 &data[start..],
1904 delim,
1905 line_delim,
1906 start_field,
1907 end_field,
1908 suppress,
1909 buf,
1910 );
1911 }
1912}
1913
1914#[inline(always)]
1918fn fields_mid_range_line(
1919 line: &[u8],
1920 delim: u8,
1921 line_delim: u8,
1922 start_field: usize,
1923 end_field: usize,
1924 suppress: bool,
1925 buf: &mut Vec<u8>,
1926) {
1927 let len = line.len();
1928 if len == 0 {
1929 if !suppress {
1930 unsafe { buf_push(buf, line_delim) };
1931 }
1932 return;
1933 }
1934
1935 let base = line.as_ptr();
1937
1938 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
1942 let mut delim_count = 0;
1943 let mut range_start = 0;
1944 let mut has_delim = false;
1945
1946 for pos in memchr_iter(delim, line) {
1947 has_delim = true;
1948 delim_count += 1;
1949 if delim_count == skip_before {
1950 range_start = pos + 1;
1951 }
1952 if delim_count == target_end_delim {
1953 if skip_before == 0 {
1954 range_start = 0;
1955 }
1956 unsafe {
1957 buf_extend(
1958 buf,
1959 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1960 );
1961 buf_push(buf, line_delim);
1962 }
1963 return;
1964 }
1965 }
1966
1967 if !has_delim {
1968 if !suppress {
1969 unsafe {
1970 buf_extend(buf, line);
1971 buf_push(buf, line_delim);
1972 }
1973 }
1974 return;
1975 }
1976
1977 if delim_count >= skip_before {
1979 if skip_before == 0 {
1981 range_start = 0;
1982 }
1983 unsafe {
1984 buf_extend(
1985 buf,
1986 std::slice::from_raw_parts(base.add(range_start), len - range_start),
1987 );
1988 buf_push(buf, line_delim);
1989 }
1990 } else {
1991 unsafe { buf_push(buf, line_delim) };
1993 }
1994}
1995
1996fn single_field1_parallel(
2007 data: &[u8],
2008 delim: u8,
2009 line_delim: u8,
2010 out: &mut impl Write,
2011) -> io::Result<()> {
2012 let chunks = split_for_scope(data, line_delim);
2013 let n = chunks.len();
2014 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2015 rayon::scope(|s| {
2016 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2017 s.spawn(move |_| {
2018 result.reserve(chunk.len() + 1);
2019 single_field1_to_buf(chunk, delim, line_delim, result);
2020 });
2021 }
2022 });
2023 let slices: Vec<IoSlice> = results
2024 .iter()
2025 .filter(|r| !r.is_empty())
2026 .map(|r| IoSlice::new(r))
2027 .collect();
2028 write_ioslices(out, &slices)
2029}
2030
2031#[inline]
2042fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
2043 debug_assert_ne!(delim, line_delim, "delim and line_delim must differ");
2044 buf.reserve(data.len() + 1);
2047
2048 let base = data.as_ptr();
2049 let initial_len = buf.len();
2050 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
2051 let mut start = 0;
2052 let mut run_start: usize = 0;
2054 let mut in_run = true; for end_pos in memchr_iter(line_delim, data) {
2057 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2058 match memchr::memchr(delim, line) {
2059 Some(dp) => {
2060 if in_run && run_start < start {
2062 let run_len = start - run_start;
2064 unsafe {
2065 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2066 out_ptr = out_ptr.add(run_len);
2067 }
2068 }
2069 unsafe {
2071 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
2072 out_ptr = out_ptr.add(dp);
2073 *out_ptr = line_delim;
2074 out_ptr = out_ptr.add(1);
2075 }
2076 run_start = end_pos + 1;
2077 in_run = true;
2078 }
2079 None => {
2080 if !in_run {
2082 run_start = start;
2083 in_run = true;
2084 }
2085 }
2086 }
2087 start = end_pos + 1;
2088 }
2089
2090 if in_run && run_start < start {
2092 let run_len = start - run_start;
2093 unsafe {
2094 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2095 out_ptr = out_ptr.add(run_len);
2096 }
2097 }
2098
2099 if start < data.len() {
2101 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2102 match memchr::memchr(delim, line) {
2103 Some(dp) => {
2104 unsafe {
2106 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
2107 out_ptr = out_ptr.add(dp);
2108 *out_ptr = line_delim;
2109 out_ptr = out_ptr.add(1);
2110 }
2111 }
2112 None => {
2113 let len = data.len() - start;
2115 unsafe {
2116 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, len);
2117 out_ptr = out_ptr.add(len);
2118 *out_ptr = line_delim;
2119 out_ptr = out_ptr.add(1);
2120 }
2121 }
2122 }
2123 }
2124
2125 unsafe {
2126 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2127 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2128 buf.set_len(new_len);
2129 }
2130}
2131
2132#[inline]
2141#[allow(dead_code)]
2142fn single_field1_zerocopy(
2143 data: &[u8],
2144 delim: u8,
2145 line_delim: u8,
2146 out: &mut impl Write,
2147) -> io::Result<()> {
2148 let newline_buf: [u8; 1] = [line_delim];
2149
2150 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2151 let mut run_start: usize = 0;
2152 let mut start = 0;
2153
2154 for end_pos in memchr_iter(line_delim, data) {
2155 let line = &data[start..end_pos];
2156 if let Some(dp) = memchr::memchr(delim, line) {
2157 if run_start < start {
2160 iov.push(IoSlice::new(&data[run_start..start]));
2161 }
2162 iov.push(IoSlice::new(&data[start..start + dp]));
2163 iov.push(IoSlice::new(&newline_buf));
2164 run_start = end_pos + 1;
2165
2166 if iov.len() >= MAX_IOV - 2 {
2167 write_ioslices(out, &iov)?;
2168 iov.clear();
2169 }
2170 }
2171 start = end_pos + 1;
2173 }
2174
2175 if start < data.len() {
2177 let line = &data[start..];
2178 if let Some(dp) = memchr::memchr(delim, line) {
2179 if run_start < start {
2180 iov.push(IoSlice::new(&data[run_start..start]));
2181 }
2182 iov.push(IoSlice::new(&data[start..start + dp]));
2183 iov.push(IoSlice::new(&newline_buf));
2184 if !iov.is_empty() {
2185 write_ioslices(out, &iov)?;
2186 }
2187 return Ok(());
2188 }
2189 }
2190
2191 if run_start < data.len() {
2193 iov.push(IoSlice::new(&data[run_start..]));
2194 if !data.is_empty() && *data.last().unwrap() != line_delim {
2195 iov.push(IoSlice::new(&newline_buf));
2196 }
2197 }
2198 if !iov.is_empty() {
2199 write_ioslices(out, &iov)?;
2200 }
2201 Ok(())
2202}
2203
2204fn process_single_field_chunk(
2208 data: &[u8],
2209 delim: u8,
2210 target_idx: usize,
2211 line_delim: u8,
2212 suppress: bool,
2213 buf: &mut Vec<u8>,
2214) {
2215 buf.reserve(data.len() + 1);
2217
2218 let base = data.as_ptr();
2219 let initial_len = buf.len();
2220 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
2221 let mut start = 0;
2222 let mut run_start: usize = 0;
2224 let mut in_run = !suppress; for end_pos in memchr_iter(line_delim, data) {
2227 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2228 let line_len = end_pos - start;
2229
2230 if line_len == 0 {
2231 if !suppress {
2232 if !in_run {
2234 run_start = start;
2235 in_run = true;
2236 }
2237 }
2238 start = end_pos + 1;
2239 continue;
2240 }
2241
2242 let mut field_start_offset = 0;
2244 let mut field_idx = 0;
2245 let mut found = false;
2246 let mut has_delim = false;
2247
2248 for pos in memchr_iter(delim, line) {
2249 has_delim = true;
2250 if field_idx == target_idx {
2251 if in_run && run_start < start {
2254 let run_len = start - run_start;
2255 unsafe {
2256 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2257 out_ptr = out_ptr.add(run_len);
2258 }
2259 }
2260 let field_len = pos - field_start_offset;
2261 unsafe {
2262 std::ptr::copy_nonoverlapping(
2263 base.add(start + field_start_offset),
2264 out_ptr,
2265 field_len,
2266 );
2267 out_ptr = out_ptr.add(field_len);
2268 *out_ptr = line_delim;
2269 out_ptr = out_ptr.add(1);
2270 }
2271 run_start = end_pos + 1;
2272 in_run = true;
2273 found = true;
2274 break;
2275 }
2276 field_idx += 1;
2277 field_start_offset = pos + 1;
2278 }
2279
2280 if !found {
2281 if !has_delim {
2282 if !suppress {
2284 if !in_run {
2286 run_start = start;
2287 in_run = true;
2288 }
2289 } else {
2290 if in_run && run_start < start {
2292 let run_len = start - run_start;
2293 unsafe {
2294 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2295 out_ptr = out_ptr.add(run_len);
2296 }
2297 }
2298 in_run = false;
2299 run_start = end_pos + 1;
2300 }
2301 } else if field_idx == target_idx {
2302 if in_run && run_start < start {
2304 let run_len = start - run_start;
2305 unsafe {
2306 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2307 out_ptr = out_ptr.add(run_len);
2308 }
2309 }
2310 let field_len = line_len - field_start_offset;
2311 unsafe {
2312 std::ptr::copy_nonoverlapping(
2313 base.add(start + field_start_offset),
2314 out_ptr,
2315 field_len,
2316 );
2317 out_ptr = out_ptr.add(field_len);
2318 *out_ptr = line_delim;
2319 out_ptr = out_ptr.add(1);
2320 }
2321 run_start = end_pos + 1;
2322 in_run = true;
2323 } else {
2324 if in_run && run_start < start {
2326 let run_len = start - run_start;
2327 unsafe {
2328 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2329 out_ptr = out_ptr.add(run_len);
2330 }
2331 }
2332 unsafe {
2333 *out_ptr = line_delim;
2334 out_ptr = out_ptr.add(1);
2335 }
2336 run_start = end_pos + 1;
2337 in_run = true;
2338 }
2339 }
2340
2341 start = end_pos + 1;
2342 }
2343
2344 if in_run && run_start < start {
2346 let run_len = start - run_start;
2347 unsafe {
2348 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2349 out_ptr = out_ptr.add(run_len);
2350 }
2351 }
2352
2353 if start < data.len() {
2355 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2356 let line_len = data.len() - start;
2357
2358 if line_len == 0 {
2359 if !suppress {
2360 unsafe {
2361 *out_ptr = line_delim;
2362 out_ptr = out_ptr.add(1);
2363 }
2364 }
2365 } else {
2366 let mut field_start_offset = 0;
2367 let mut field_idx = 0;
2368 let mut found = false;
2369 let mut has_delim = false;
2370
2371 for pos in memchr_iter(delim, line) {
2372 has_delim = true;
2373 if field_idx == target_idx {
2374 let field_len = pos - field_start_offset;
2375 unsafe {
2376 std::ptr::copy_nonoverlapping(
2377 base.add(start + field_start_offset),
2378 out_ptr,
2379 field_len,
2380 );
2381 out_ptr = out_ptr.add(field_len);
2382 *out_ptr = line_delim;
2383 out_ptr = out_ptr.add(1);
2384 }
2385 found = true;
2386 break;
2387 }
2388 field_idx += 1;
2389 field_start_offset = pos + 1;
2390 }
2391
2392 if !found {
2393 if !has_delim {
2394 if !suppress {
2395 unsafe {
2396 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, line_len);
2397 out_ptr = out_ptr.add(line_len);
2398 *out_ptr = line_delim;
2399 out_ptr = out_ptr.add(1);
2400 }
2401 }
2402 } else if field_idx == target_idx {
2403 let field_len = line_len - field_start_offset;
2404 unsafe {
2405 std::ptr::copy_nonoverlapping(
2406 base.add(start + field_start_offset),
2407 out_ptr,
2408 field_len,
2409 );
2410 out_ptr = out_ptr.add(field_len);
2411 *out_ptr = line_delim;
2412 out_ptr = out_ptr.add(1);
2413 }
2414 } else {
2415 unsafe {
2416 *out_ptr = line_delim;
2417 out_ptr = out_ptr.add(1);
2418 }
2419 }
2420 }
2421 }
2422 }
2423
2424 unsafe {
2425 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2426 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2427 buf.set_len(new_len);
2428 }
2429}
2430
2431#[inline(always)]
2435fn extract_fields_to_buf(
2436 line: &[u8],
2437 delim: u8,
2438 ranges: &[Range],
2439 output_delim: &[u8],
2440 suppress: bool,
2441 max_field: usize,
2442 field_mask: u64,
2443 line_delim: u8,
2444 buf: &mut Vec<u8>,
2445 complement: bool,
2446) {
2447 let len = line.len();
2448
2449 if len == 0 {
2450 if !suppress {
2451 buf.push(line_delim);
2452 }
2453 return;
2454 }
2455
2456 let needed = len + output_delim.len() * 16 + 1;
2459 if buf.capacity() - buf.len() < needed {
2460 buf.reserve(needed);
2461 }
2462
2463 let base = line.as_ptr();
2464 let mut field_num: usize = 1;
2465 let mut field_start: usize = 0;
2466 let mut first_output = true;
2467 let mut has_delim = false;
2468
2469 for delim_pos in memchr_iter(delim, line) {
2471 has_delim = true;
2472
2473 if is_selected(field_num, field_mask, ranges, complement) {
2474 if !first_output {
2475 unsafe { buf_extend(buf, output_delim) };
2476 }
2477 unsafe {
2478 buf_extend(
2479 buf,
2480 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2481 )
2482 };
2483 first_output = false;
2484 }
2485
2486 field_num += 1;
2487 field_start = delim_pos + 1;
2488
2489 if field_num > max_field {
2490 break;
2491 }
2492 }
2493
2494 if (field_num <= max_field || complement)
2496 && has_delim
2497 && is_selected(field_num, field_mask, ranges, complement)
2498 {
2499 if !first_output {
2500 unsafe { buf_extend(buf, output_delim) };
2501 }
2502 unsafe {
2503 buf_extend(
2504 buf,
2505 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2506 )
2507 };
2508 first_output = false;
2509 }
2510
2511 if !first_output {
2512 unsafe { buf_push(buf, line_delim) };
2513 } else if !has_delim {
2514 if !suppress {
2515 unsafe {
2516 buf_extend(buf, line);
2517 buf_push(buf, line_delim);
2518 }
2519 }
2520 } else {
2521 unsafe { buf_push(buf, line_delim) };
2522 }
2523}
2524
2525fn process_bytes_from_start(
2532 data: &[u8],
2533 max_bytes: usize,
2534 line_delim: u8,
2535 out: &mut impl Write,
2536) -> io::Result<()> {
2537 if data.len() < PARALLEL_THRESHOLD && max_bytes > 0 && max_bytes < usize::MAX {
2545 let mut start = 0;
2546 let mut all_fit = true;
2547 for pos in memchr_iter(line_delim, data) {
2548 if pos - start > max_bytes {
2549 all_fit = false;
2550 break;
2551 }
2552 start = pos + 1;
2553 }
2554 if all_fit && start < data.len() && data.len() - start > max_bytes {
2556 all_fit = false;
2557 }
2558 if all_fit {
2559 if !data.is_empty() && data[data.len() - 1] == line_delim {
2561 return out.write_all(data);
2562 } else if !data.is_empty() {
2563 out.write_all(data)?;
2564 return out.write_all(&[line_delim]);
2565 }
2566 return Ok(());
2567 }
2568 }
2569
2570 if data.len() >= PARALLEL_THRESHOLD {
2571 let chunks = split_for_scope(data, line_delim);
2572 let n = chunks.len();
2573 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2574 rayon::scope(|s| {
2575 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2576 s.spawn(move |_| {
2577 result.reserve(chunk.len());
2580 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2581 });
2582 }
2583 });
2584 let slices: Vec<IoSlice> = results
2586 .iter()
2587 .filter(|r| !r.is_empty())
2588 .map(|r| IoSlice::new(r))
2589 .collect();
2590 write_ioslices(out, &slices)?;
2591 } else {
2592 if max_bytes <= 512 {
2598 let est_out = (data.len() / 4).max(max_bytes + 2);
2601 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2602 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2603 if !buf.is_empty() {
2604 out.write_all(&buf)?;
2605 }
2606 } else {
2607 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2611 }
2612 }
2613 Ok(())
2614}
2615
2616#[inline]
2621fn bytes_from_start_zerocopy(
2622 data: &[u8],
2623 max_bytes: usize,
2624 line_delim: u8,
2625 out: &mut impl Write,
2626) -> io::Result<()> {
2627 let newline_buf: [u8; 1] = [line_delim];
2628 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2629 let mut start = 0;
2630 let mut run_start: usize = 0;
2631
2632 for pos in memchr_iter(line_delim, data) {
2633 let line_len = pos - start;
2634 if line_len > max_bytes {
2635 if run_start < start {
2637 iov.push(IoSlice::new(&data[run_start..start]));
2638 }
2639 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2640 iov.push(IoSlice::new(&newline_buf));
2641 run_start = pos + 1;
2642
2643 if iov.len() >= MAX_IOV - 2 {
2644 write_ioslices(out, &iov)?;
2645 iov.clear();
2646 }
2647 }
2648 start = pos + 1;
2649 }
2650 if start < data.len() {
2652 let line_len = data.len() - start;
2653 if line_len > max_bytes {
2654 if run_start < start {
2655 iov.push(IoSlice::new(&data[run_start..start]));
2656 }
2657 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2658 iov.push(IoSlice::new(&newline_buf));
2659 if !iov.is_empty() {
2660 write_ioslices(out, &iov)?;
2661 }
2662 return Ok(());
2663 }
2664 }
2665 if run_start < data.len() {
2667 iov.push(IoSlice::new(&data[run_start..]));
2668 if !data.is_empty() && *data.last().unwrap() != line_delim {
2669 iov.push(IoSlice::new(&newline_buf));
2670 }
2671 }
2672 if !iov.is_empty() {
2673 write_ioslices(out, &iov)?;
2674 }
2675 Ok(())
2676}
2677
2678#[inline]
2683fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2684 buf.reserve(data.len());
2687
2688 let src = data.as_ptr();
2689 let dst_base = buf.as_mut_ptr();
2690 let mut wp = buf.len();
2691 let mut start = 0;
2692
2693 for pos in memchr_iter(line_delim, data) {
2694 let line_len = pos - start;
2695 let take = line_len.min(max_bytes);
2696 unsafe {
2697 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2698 *dst_base.add(wp + take) = line_delim;
2699 }
2700 wp += take + 1;
2701 start = pos + 1;
2702 }
2703 if start < data.len() {
2705 let line_len = data.len() - start;
2706 let take = line_len.min(max_bytes);
2707 unsafe {
2708 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2709 *dst_base.add(wp + take) = line_delim;
2710 }
2711 wp += take + 1;
2712 }
2713 unsafe { buf.set_len(wp) };
2714}
2715
2716fn process_bytes_from_offset(
2718 data: &[u8],
2719 skip_bytes: usize,
2720 line_delim: u8,
2721 out: &mut impl Write,
2722) -> io::Result<()> {
2723 if data.len() >= PARALLEL_THRESHOLD {
2724 let chunks = split_for_scope(data, line_delim);
2725 let n = chunks.len();
2726 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2727 rayon::scope(|s| {
2728 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2729 s.spawn(move |_| {
2730 result.reserve(chunk.len());
2731 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2732 });
2733 }
2734 });
2735 let slices: Vec<IoSlice> = results
2737 .iter()
2738 .filter(|r| !r.is_empty())
2739 .map(|r| IoSlice::new(r))
2740 .collect();
2741 write_ioslices(out, &slices)?;
2742 } else {
2743 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2745 }
2746 Ok(())
2747}
2748
2749#[inline]
2753fn bytes_from_offset_zerocopy(
2754 data: &[u8],
2755 skip_bytes: usize,
2756 line_delim: u8,
2757 out: &mut impl Write,
2758) -> io::Result<()> {
2759 let delim_buf = [line_delim];
2760 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2761
2762 let mut start = 0;
2763 for pos in memchr_iter(line_delim, data) {
2764 let line_len = pos - start;
2765 if line_len > skip_bytes {
2766 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2767 }
2768 iov.push(IoSlice::new(&delim_buf));
2769 if iov.len() >= MAX_IOV - 1 {
2771 write_ioslices(out, &iov)?;
2772 iov.clear();
2773 }
2774 start = pos + 1;
2775 }
2776 if start < data.len() {
2777 let line_len = data.len() - start;
2778 if line_len > skip_bytes {
2779 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2780 }
2781 iov.push(IoSlice::new(&delim_buf));
2782 }
2783 if !iov.is_empty() {
2784 write_ioslices(out, &iov)?;
2785 }
2786 Ok(())
2787}
2788
2789#[inline]
2792fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2793 buf.reserve(data.len());
2794
2795 let src = data.as_ptr();
2796 let dst_base = buf.as_mut_ptr();
2797 let mut wp = buf.len();
2798 let mut start = 0;
2799
2800 for pos in memchr_iter(line_delim, data) {
2801 let line_len = pos - start;
2802 if line_len > skip_bytes {
2803 let take = line_len - skip_bytes;
2804 unsafe {
2805 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2806 }
2807 wp += take;
2808 }
2809 unsafe {
2810 *dst_base.add(wp) = line_delim;
2811 }
2812 wp += 1;
2813 start = pos + 1;
2814 }
2815 if start < data.len() {
2816 let line_len = data.len() - start;
2817 if line_len > skip_bytes {
2818 let take = line_len - skip_bytes;
2819 unsafe {
2820 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2821 }
2822 wp += take;
2823 }
2824 unsafe {
2825 *dst_base.add(wp) = line_delim;
2826 }
2827 wp += 1;
2828 }
2829 unsafe { buf.set_len(wp) };
2830}
2831
2832fn process_bytes_mid_range(
2834 data: &[u8],
2835 start_byte: usize,
2836 end_byte: usize,
2837 line_delim: u8,
2838 out: &mut impl Write,
2839) -> io::Result<()> {
2840 let skip = start_byte.saturating_sub(1);
2841
2842 if data.len() >= PARALLEL_THRESHOLD {
2843 let chunks = split_for_scope(data, line_delim);
2844 let n = chunks.len();
2845 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2846 rayon::scope(|s| {
2847 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2848 s.spawn(move |_| {
2849 result.reserve(chunk.len());
2850 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2851 });
2852 }
2853 });
2854 let slices: Vec<IoSlice> = results
2855 .iter()
2856 .filter(|r| !r.is_empty())
2857 .map(|r| IoSlice::new(r))
2858 .collect();
2859 write_ioslices(out, &slices)?;
2860 } else {
2861 process_chunked(data, line_delim, out, |chunk, buf| {
2862 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, buf);
2863 })?;
2864 }
2865 Ok(())
2866}
2867
2868#[inline]
2872fn bytes_mid_range_chunk(
2873 data: &[u8],
2874 skip: usize,
2875 end_byte: usize,
2876 line_delim: u8,
2877 buf: &mut Vec<u8>,
2878) {
2879 buf.reserve(data.len());
2880
2881 let src = data.as_ptr();
2882 let dst_base = buf.as_mut_ptr();
2883 let mut wp = buf.len();
2884 let mut start = 0;
2885
2886 for pos in memchr_iter(line_delim, data) {
2887 let line_len = pos - start;
2888 if line_len > skip {
2889 let take_end = line_len.min(end_byte);
2890 let take = take_end - skip;
2891 unsafe {
2892 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2893 }
2894 wp += take;
2895 }
2896 unsafe {
2897 *dst_base.add(wp) = line_delim;
2898 }
2899 wp += 1;
2900 start = pos + 1;
2901 }
2902 if start < data.len() {
2903 let line_len = data.len() - start;
2904 if line_len > skip {
2905 let take_end = line_len.min(end_byte);
2906 let take = take_end - skip;
2907 unsafe {
2908 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2909 }
2910 wp += take;
2911 }
2912 unsafe {
2913 *dst_base.add(wp) = line_delim;
2914 }
2915 wp += 1;
2916 }
2917 unsafe { buf.set_len(wp) };
2918}
2919
2920fn process_bytes_complement_mid(
2922 data: &[u8],
2923 skip_start: usize,
2924 skip_end: usize,
2925 line_delim: u8,
2926 out: &mut impl Write,
2927) -> io::Result<()> {
2928 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2930 let chunks = split_for_scope(data, line_delim);
2931 let n = chunks.len();
2932 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2933 rayon::scope(|s| {
2934 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2935 s.spawn(move |_| {
2936 result.reserve(chunk.len());
2937 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
2938 });
2939 }
2940 });
2941 let slices: Vec<IoSlice> = results
2942 .iter()
2943 .filter(|r| !r.is_empty())
2944 .map(|r| IoSlice::new(r))
2945 .collect();
2946 write_ioslices(out, &slices)?;
2947 } else {
2948 process_chunked(data, line_delim, out, |chunk, buf| {
2949 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, buf);
2950 })?;
2951 }
2952 Ok(())
2953}
2954
2955#[inline]
2958fn bytes_complement_mid_chunk(
2959 data: &[u8],
2960 prefix_bytes: usize,
2961 skip_end: usize,
2962 line_delim: u8,
2963 buf: &mut Vec<u8>,
2964) {
2965 buf.reserve(data.len());
2966
2967 let src = data.as_ptr();
2968 let dst_base = buf.as_mut_ptr();
2969 let mut wp = buf.len();
2970 let mut start = 0;
2971
2972 for pos in memchr_iter(line_delim, data) {
2973 let line_len = pos - start;
2974 let take_prefix = prefix_bytes.min(line_len);
2976 if take_prefix > 0 {
2977 unsafe {
2978 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2979 }
2980 wp += take_prefix;
2981 }
2982 if line_len > skip_end {
2984 let suffix_len = line_len - skip_end;
2985 unsafe {
2986 std::ptr::copy_nonoverlapping(
2987 src.add(start + skip_end),
2988 dst_base.add(wp),
2989 suffix_len,
2990 );
2991 }
2992 wp += suffix_len;
2993 }
2994 unsafe {
2995 *dst_base.add(wp) = line_delim;
2996 }
2997 wp += 1;
2998 start = pos + 1;
2999 }
3000 if start < data.len() {
3001 let line_len = data.len() - start;
3002 let take_prefix = prefix_bytes.min(line_len);
3003 if take_prefix > 0 {
3004 unsafe {
3005 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3006 }
3007 wp += take_prefix;
3008 }
3009 if line_len > skip_end {
3010 let suffix_len = line_len - skip_end;
3011 unsafe {
3012 std::ptr::copy_nonoverlapping(
3013 src.add(start + skip_end),
3014 dst_base.add(wp),
3015 suffix_len,
3016 );
3017 }
3018 wp += suffix_len;
3019 }
3020 unsafe {
3021 *dst_base.add(wp) = line_delim;
3022 }
3023 wp += 1;
3024 }
3025 unsafe { buf.set_len(wp) };
3026}
3027
3028fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3030 let line_delim = cfg.line_delim;
3031 let ranges = cfg.ranges;
3032 let complement = cfg.complement;
3033 let output_delim = cfg.output_delim;
3034
3035 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
3037 let max_bytes = ranges[0].end;
3038 if max_bytes < usize::MAX {
3039 return process_bytes_from_start(data, max_bytes, line_delim, out);
3040 }
3041 }
3042
3043 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
3045 let skip_bytes = ranges[0].start.saturating_sub(1);
3046 if skip_bytes > 0 {
3047 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
3048 }
3049 }
3050
3051 if !complement
3053 && ranges.len() == 1
3054 && ranges[0].start > 1
3055 && ranges[0].end < usize::MAX
3056 && output_delim.is_empty()
3057 {
3058 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
3059 }
3060
3061 if complement
3063 && ranges.len() == 1
3064 && ranges[0].start == 1
3065 && ranges[0].end < usize::MAX
3066 && output_delim.is_empty()
3067 {
3068 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
3069 }
3070
3071 if complement
3073 && ranges.len() == 1
3074 && ranges[0].end == usize::MAX
3075 && ranges[0].start > 1
3076 && output_delim.is_empty()
3077 {
3078 let max_bytes = ranges[0].start - 1;
3079 return process_bytes_from_start(data, max_bytes, line_delim, out);
3080 }
3081
3082 if complement
3084 && ranges.len() == 1
3085 && ranges[0].start > 1
3086 && ranges[0].end < usize::MAX
3087 && output_delim.is_empty()
3088 {
3089 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
3090 }
3091
3092 if data.len() >= PARALLEL_THRESHOLD {
3093 let chunks = split_for_scope(data, line_delim);
3094 let n = chunks.len();
3095 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3096 rayon::scope(|s| {
3097 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3098 s.spawn(move |_| {
3099 result.reserve(chunk.len() + 1);
3100 process_bytes_chunk(
3101 chunk,
3102 ranges,
3103 complement,
3104 output_delim,
3105 line_delim,
3106 result,
3107 );
3108 });
3109 }
3110 });
3111 let slices: Vec<IoSlice> = results
3112 .iter()
3113 .filter(|r| !r.is_empty())
3114 .map(|r| IoSlice::new(r))
3115 .collect();
3116 write_ioslices(out, &slices)?;
3117 } else {
3118 process_chunked(data, line_delim, out, |chunk, buf| {
3119 process_bytes_chunk(chunk, ranges, complement, output_delim, line_delim, buf);
3120 })?;
3121 }
3122 Ok(())
3123}
3124
3125fn process_bytes_chunk(
3130 data: &[u8],
3131 ranges: &[Range],
3132 complement: bool,
3133 output_delim: &[u8],
3134 line_delim: u8,
3135 buf: &mut Vec<u8>,
3136) {
3137 buf.reserve(data.len());
3138 let base = data.as_ptr();
3139 let mut start = 0;
3140 for end_pos in memchr_iter(line_delim, data) {
3141 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
3142 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3143 unsafe { buf_push(buf, line_delim) };
3144 start = end_pos + 1;
3145 }
3146 if start < data.len() {
3147 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
3148 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3149 unsafe { buf_push(buf, line_delim) };
3150 }
3151}
3152
3153#[inline(always)]
3157fn cut_bytes_to_buf(
3158 line: &[u8],
3159 ranges: &[Range],
3160 complement: bool,
3161 output_delim: &[u8],
3162 buf: &mut Vec<u8>,
3163) {
3164 let len = line.len();
3165 let base = line.as_ptr();
3166 let mut first_range = true;
3167
3168 let needed = len + output_delim.len() * ranges.len() + 1;
3170 if buf.capacity() - buf.len() < needed {
3171 buf.reserve(needed);
3172 }
3173
3174 if complement {
3175 let mut pos: usize = 1;
3176 for r in ranges {
3177 let rs = r.start;
3178 let re = r.end.min(len);
3179 if pos < rs {
3180 if !first_range && !output_delim.is_empty() {
3181 unsafe { buf_extend(buf, output_delim) };
3182 }
3183 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
3184 first_range = false;
3185 }
3186 pos = re + 1;
3187 if pos > len {
3188 break;
3189 }
3190 }
3191 if pos <= len {
3192 if !first_range && !output_delim.is_empty() {
3193 unsafe { buf_extend(buf, output_delim) };
3194 }
3195 unsafe {
3196 buf_extend(
3197 buf,
3198 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3199 )
3200 };
3201 }
3202 } else if output_delim.is_empty() && ranges.len() == 1 {
3203 let start = ranges[0].start.saturating_sub(1);
3205 let end = ranges[0].end.min(len);
3206 if start < len {
3207 unsafe {
3208 buf_extend(
3209 buf,
3210 std::slice::from_raw_parts(base.add(start), end - start),
3211 )
3212 };
3213 }
3214 } else {
3215 for r in ranges {
3216 let start = r.start.saturating_sub(1);
3217 let end = r.end.min(len);
3218 if start >= len {
3219 break;
3220 }
3221 if !first_range && !output_delim.is_empty() {
3222 unsafe { buf_extend(buf, output_delim) };
3223 }
3224 unsafe {
3225 buf_extend(
3226 buf,
3227 std::slice::from_raw_parts(base.add(start), end - start),
3228 )
3229 };
3230 first_range = false;
3231 }
3232 }
3233}
3234
3235#[inline]
3239pub fn cut_fields(
3240 line: &[u8],
3241 delim: u8,
3242 ranges: &[Range],
3243 complement: bool,
3244 output_delim: &[u8],
3245 suppress_no_delim: bool,
3246 out: &mut impl Write,
3247) -> io::Result<bool> {
3248 if memchr::memchr(delim, line).is_none() {
3249 if !suppress_no_delim {
3250 out.write_all(line)?;
3251 return Ok(true);
3252 }
3253 return Ok(false);
3254 }
3255
3256 let mut field_num: usize = 1;
3257 let mut field_start: usize = 0;
3258 let mut first_output = true;
3259
3260 for delim_pos in memchr_iter(delim, line) {
3261 let selected = in_ranges(ranges, field_num) != complement;
3262 if selected {
3263 if !first_output {
3264 out.write_all(output_delim)?;
3265 }
3266 out.write_all(&line[field_start..delim_pos])?;
3267 first_output = false;
3268 }
3269 field_start = delim_pos + 1;
3270 field_num += 1;
3271 }
3272
3273 let selected = in_ranges(ranges, field_num) != complement;
3274 if selected {
3275 if !first_output {
3276 out.write_all(output_delim)?;
3277 }
3278 out.write_all(&line[field_start..])?;
3279 }
3280
3281 Ok(true)
3282}
3283
3284#[inline]
3286pub fn cut_bytes(
3287 line: &[u8],
3288 ranges: &[Range],
3289 complement: bool,
3290 output_delim: &[u8],
3291 out: &mut impl Write,
3292) -> io::Result<bool> {
3293 let mut first_range = true;
3294
3295 if complement {
3296 let len = line.len();
3297 let mut comp_ranges = Vec::new();
3298 let mut pos: usize = 1;
3299 for r in ranges {
3300 let rs = r.start;
3301 let re = r.end.min(len);
3302 if pos < rs {
3303 comp_ranges.push((pos, rs - 1));
3304 }
3305 pos = re + 1;
3306 if pos > len {
3307 break;
3308 }
3309 }
3310 if pos <= len {
3311 comp_ranges.push((pos, len));
3312 }
3313 for &(s, e) in &comp_ranges {
3314 if !first_range && !output_delim.is_empty() {
3315 out.write_all(output_delim)?;
3316 }
3317 out.write_all(&line[s - 1..e])?;
3318 first_range = false;
3319 }
3320 } else {
3321 for r in ranges {
3322 let start = r.start.saturating_sub(1);
3323 let end = r.end.min(line.len());
3324 if start >= line.len() {
3325 break;
3326 }
3327 if !first_range && !output_delim.is_empty() {
3328 out.write_all(output_delim)?;
3329 }
3330 out.write_all(&line[start..end])?;
3331 first_range = false;
3332 }
3333 }
3334 Ok(true)
3335}
3336
3337pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3345 let len = data.len();
3346 let mut wp: usize = 0;
3347 let mut rp: usize = 0;
3348
3349 while rp < len {
3350 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3351 None => {
3352 if suppress {
3354 break;
3356 }
3357 let remaining = len - rp;
3358 if wp != rp {
3359 data.copy_within(rp..len, wp);
3360 }
3361 wp += remaining;
3362 break;
3363 }
3364 Some(offset) => {
3365 let actual = rp + offset;
3366 if data[actual] == line_delim {
3367 if suppress {
3369 rp = actual + 1;
3371 } else {
3372 let chunk_len = actual + 1 - rp;
3374 if wp != rp {
3375 data.copy_within(rp..actual + 1, wp);
3376 }
3377 wp += chunk_len;
3378 rp = actual + 1;
3379 }
3380 } else {
3381 let field_len = actual - rp;
3383 if wp != rp && field_len > 0 {
3384 data.copy_within(rp..actual, wp);
3385 }
3386 wp += field_len;
3387 data[wp] = line_delim;
3388 wp += 1;
3389 match memchr::memchr(line_delim, &data[actual + 1..]) {
3391 None => {
3392 rp = len;
3393 }
3394 Some(nl_off) => {
3395 rp = actual + 1 + nl_off + 1;
3396 }
3397 }
3398 }
3399 }
3400 }
3401 }
3402 wp
3403}
3404
3405pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3407 match cfg.mode {
3408 CutMode::Fields => process_fields_fast(data, cfg, out),
3409 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3410 }
3411}
3412
3413pub fn process_cut_reader<R: BufRead>(
3418 mut reader: R,
3419 cfg: &CutConfig,
3420 out: &mut impl Write,
3421) -> io::Result<()> {
3422 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3424
3425 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3428
3429 loop {
3430 buf.reserve(CHUNK_SIZE);
3432 let read_start = buf.len();
3433 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3434 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3435 buf.truncate(read_start + n);
3436
3437 if buf.is_empty() {
3438 break;
3439 }
3440
3441 if n == 0 {
3442 process_cut_data(&buf, cfg, out)?;
3444 break;
3445 }
3446
3447 let process_end = match memchr::memrchr(line_delim, &buf) {
3449 Some(pos) => pos + 1,
3450 None => {
3451 continue;
3453 }
3454 };
3455
3456 process_cut_data(&buf[..process_end], cfg, out)?;
3458
3459 let leftover_len = buf.len() - process_end;
3461 if leftover_len > 0 {
3462 buf.copy_within(process_end.., 0);
3463 }
3464 buf.truncate(leftover_len);
3465 }
3466
3467 Ok(())
3468}
3469
3470#[inline]
3472fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3473 let n = reader.read(buf)?;
3474 if n == buf.len() || n == 0 {
3475 return Ok(n);
3476 }
3477 let mut total = n;
3479 while total < buf.len() {
3480 match reader.read(&mut buf[total..]) {
3481 Ok(0) => break,
3482 Ok(n) => total += n,
3483 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3484 Err(e) => return Err(e),
3485 }
3486 }
3487 Ok(total)
3488}
3489
3490pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3502 if cfg.complement {
3503 return None;
3504 }
3505 if data.is_empty() || data[data.len() - 1] != cfg.line_delim {
3509 return None;
3510 }
3511
3512 match cfg.mode {
3513 CutMode::Fields => {
3514 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3516 return None;
3517 }
3518 if cfg.delim == cfg.line_delim {
3519 return None;
3520 }
3521 Some(cut_fields_inplace_general(
3522 data,
3523 cfg.delim,
3524 cfg.line_delim,
3525 cfg.ranges,
3526 cfg.suppress_no_delim,
3527 ))
3528 }
3529 CutMode::Bytes | CutMode::Characters => {
3530 if !cfg.output_delim.is_empty() {
3531 return None;
3532 }
3533 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3534 }
3535 }
3536}
3537
3538fn cut_fields_inplace_general(
3541 data: &mut [u8],
3542 delim: u8,
3543 line_delim: u8,
3544 ranges: &[Range],
3545 suppress: bool,
3546) -> usize {
3547 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3549 return cut_field1_inplace(data, delim, line_delim, suppress);
3550 }
3551
3552 let len = data.len();
3553 if len == 0 {
3554 return 0;
3555 }
3556
3557 let max_field = ranges.last().map_or(0, |r| r.end);
3558 let max_delims = max_field.min(128);
3559 let mut wp: usize = 0;
3560 let mut rp: usize = 0;
3561
3562 while rp < len {
3563 let line_end = memchr::memchr(line_delim, &data[rp..])
3564 .map(|p| rp + p)
3565 .unwrap_or(len);
3566 let line_len = line_end - rp;
3567
3568 let mut delim_pos = [0usize; 128];
3570 let mut num_delims: usize = 0;
3571
3572 for pos in memchr_iter(delim, &data[rp..line_end]) {
3573 if num_delims < max_delims {
3574 delim_pos[num_delims] = pos;
3575 num_delims += 1;
3576 if num_delims >= max_delims {
3577 break;
3578 }
3579 }
3580 }
3581
3582 if num_delims == 0 {
3583 if !suppress {
3585 if wp != rp {
3586 data.copy_within(rp..line_end, wp);
3587 }
3588 wp += line_len;
3589 if line_end < len {
3590 data[wp] = line_delim;
3591 wp += 1;
3592 }
3593 }
3594 } else {
3595 let total_fields = num_delims + 1;
3596 let mut first_output = true;
3597
3598 for r in ranges {
3599 let range_start = r.start;
3600 let range_end = r.end.min(total_fields);
3601 if range_start > total_fields {
3602 break;
3603 }
3604 for field_num in range_start..=range_end {
3605 if field_num > total_fields {
3606 break;
3607 }
3608
3609 let field_start = if field_num == 1 {
3610 0
3611 } else if field_num - 2 < num_delims {
3612 delim_pos[field_num - 2] + 1
3613 } else {
3614 continue;
3615 };
3616 let field_end = if field_num <= num_delims {
3617 delim_pos[field_num - 1]
3618 } else {
3619 line_len
3620 };
3621
3622 if !first_output {
3623 data[wp] = delim;
3624 wp += 1;
3625 }
3626 let flen = field_end - field_start;
3627 if flen > 0 {
3628 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3629 wp += flen;
3630 }
3631 first_output = false;
3632 }
3633 }
3634
3635 if !first_output && line_end < len {
3636 data[wp] = line_delim;
3637 wp += 1;
3638 } else if first_output && line_end < len {
3639 data[wp] = line_delim;
3641 wp += 1;
3642 }
3643 }
3644
3645 rp = if line_end < len { line_end + 1 } else { len };
3646 }
3647
3648 wp
3649}
3650
3651fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3653 let len = data.len();
3654 if len == 0 {
3655 return 0;
3656 }
3657
3658 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3660 return len;
3661 }
3662
3663 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3665 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3666 }
3667
3668 let mut wp: usize = 0;
3669 let mut rp: usize = 0;
3670
3671 while rp < len {
3672 let line_end = memchr::memchr(line_delim, &data[rp..])
3673 .map(|p| rp + p)
3674 .unwrap_or(len);
3675 let line_len = line_end - rp;
3676
3677 for r in ranges {
3678 let start = r.start.saturating_sub(1);
3679 let end = r.end.min(line_len);
3680 if start >= line_len {
3681 break;
3682 }
3683 let flen = end - start;
3684 if flen > 0 {
3685 data.copy_within(rp + start..rp + start + flen, wp);
3686 wp += flen;
3687 }
3688 }
3689
3690 if line_end < len {
3691 data[wp] = line_delim;
3692 wp += 1;
3693 }
3694
3695 rp = if line_end < len { line_end + 1 } else { len };
3696 }
3697
3698 wp
3699}
3700
3701fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3703 let len = data.len();
3704
3705 let mut all_fit = true;
3707 let mut start = 0;
3708 for pos in memchr_iter(line_delim, data) {
3709 if pos - start > max_bytes {
3710 all_fit = false;
3711 break;
3712 }
3713 start = pos + 1;
3714 }
3715 if all_fit && start < len && len - start > max_bytes {
3716 all_fit = false;
3717 }
3718 if all_fit {
3719 return len;
3720 }
3721
3722 let mut wp: usize = 0;
3724 let mut rp: usize = 0;
3725
3726 while rp < len {
3727 let line_end = memchr::memchr(line_delim, &data[rp..])
3728 .map(|p| rp + p)
3729 .unwrap_or(len);
3730 let line_len = line_end - rp;
3731
3732 let take = line_len.min(max_bytes);
3733 if take > 0 && wp != rp {
3734 data.copy_within(rp..rp + take, wp);
3735 }
3736 wp += take;
3737
3738 if line_end < len {
3739 data[wp] = line_delim;
3740 wp += 1;
3741 }
3742
3743 rp = if line_end < len { line_end + 1 } else { len };
3744 }
3745
3746 wp
3747}
3748
3749#[derive(Debug, Clone, Copy, PartialEq)]
3751pub enum CutMode {
3752 Bytes,
3753 Characters,
3754 Fields,
3755}