1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
8
9const MAX_IOV: usize = 1024;
11
12pub struct CutConfig<'a> {
14 pub mode: CutMode,
15 pub ranges: &'a [Range],
16 pub complement: bool,
17 pub delim: u8,
18 pub output_delim: &'a [u8],
19 pub suppress_no_delim: bool,
20 pub line_delim: u8,
21}
22
23#[derive(Debug, Clone)]
25pub struct Range {
26 pub start: usize, pub end: usize, }
29
30pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
33 let mut ranges = Vec::new();
34
35 for part in spec.split(',') {
36 let part = part.trim();
37 if part.is_empty() {
38 continue;
39 }
40
41 if let Some(idx) = part.find('-') {
42 let left = &part[..idx];
43 let right = &part[idx + 1..];
44
45 let start = if left.is_empty() {
46 1
47 } else {
48 left.parse::<usize>()
49 .map_err(|_| format!("invalid range: '{}'", part))?
50 };
51
52 let end = if right.is_empty() {
53 usize::MAX
54 } else {
55 right
56 .parse::<usize>()
57 .map_err(|_| format!("invalid range: '{}'", part))?
58 };
59
60 if start == 0 {
61 return Err("fields and positions are numbered from 1".to_string());
62 }
63 if start > end {
64 return Err(format!("invalid decreasing range: '{}'", part));
65 }
66
67 ranges.push(Range { start, end });
68 } else {
69 let n = part
70 .parse::<usize>()
71 .map_err(|_| format!("invalid field: '{}'", part))?;
72 if n == 0 {
73 return Err("fields and positions are numbered from 1".to_string());
74 }
75 ranges.push(Range { start: n, end: n });
76 }
77 }
78
79 if ranges.is_empty() {
80 return Err("you must specify a list of bytes, characters, or fields".to_string());
81 }
82
83 ranges.sort_by_key(|r| (r.start, r.end));
85 let mut merged = vec![ranges[0].clone()];
86 for r in &ranges[1..] {
87 let last = merged.last_mut().unwrap();
88 if r.start <= last.end.saturating_add(1) {
89 last.end = last.end.max(r.end);
90 } else {
91 merged.push(r.clone());
92 }
93 }
94
95 Ok(merged)
96}
97
98#[inline(always)]
101fn in_ranges(ranges: &[Range], pos: usize) -> bool {
102 for r in ranges {
103 if pos < r.start {
104 return false;
105 }
106 if pos <= r.end {
107 return true;
108 }
109 }
110 false
111}
112
113#[inline]
116fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
117 let mut mask: u64 = 0;
118 for i in 1..=64u32 {
119 let in_range = in_ranges(ranges, i as usize);
120 if in_range != complement {
121 mask |= 1u64 << (i - 1);
122 }
123 }
124 mask
125}
126
127#[inline(always)]
129fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
130 if field_num <= 64 {
131 (mask >> (field_num - 1)) & 1 == 1
132 } else {
133 in_ranges(ranges, field_num) != complement
134 }
135}
136
137#[inline(always)]
142unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
143 unsafe {
144 let len = buf.len();
145 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
146 buf.set_len(len + data.len());
147 }
148}
149
150#[inline(always)]
153unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
154 unsafe {
155 let len = buf.len();
156 *buf.as_mut_ptr().add(len) = b;
157 buf.set_len(len + 1);
158 }
159}
160
161#[inline]
165fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
166 if slices.is_empty() {
167 return Ok(());
168 }
169 for batch in slices.chunks(MAX_IOV) {
170 let total: usize = batch.iter().map(|s| s.len()).sum();
171 let written = out.write_vectored(batch)?;
172 if written >= total {
173 continue;
174 }
175 if written == 0 {
176 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
177 }
178 write_ioslices_slow(out, batch, written)?;
179 }
180 Ok(())
181}
182
183#[cold]
185#[inline(never)]
186fn write_ioslices_slow(
187 out: &mut impl Write,
188 slices: &[IoSlice],
189 mut skip: usize,
190) -> io::Result<()> {
191 for slice in slices {
192 let len = slice.len();
193 if skip >= len {
194 skip -= len;
195 continue;
196 }
197 out.write_all(&slice[skip..])?;
198 skip = 0;
199 }
200 Ok(())
201}
202
203#[inline]
209fn num_cpus() -> usize {
210 std::thread::available_parallelism()
211 .map(|n| n.get())
212 .unwrap_or(1)
213}
214
215fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
218 let num_threads = num_cpus().max(1);
219 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
220 return vec![data];
221 }
222
223 let chunk_size = data.len() / num_threads;
224 let mut chunks = Vec::with_capacity(num_threads);
225 let mut pos = 0;
226
227 for _ in 0..num_threads - 1 {
228 let target = pos + chunk_size;
229 if target >= data.len() {
230 break;
231 }
232 let boundary = memchr::memchr(line_delim, &data[target..])
233 .map(|p| target + p + 1)
234 .unwrap_or(data.len());
235 if boundary > pos {
236 chunks.push(&data[pos..boundary]);
237 }
238 pos = boundary;
239 }
240
241 if pos < data.len() {
242 chunks.push(&data[pos..]);
243 }
244
245 chunks
246}
247
248fn process_fields_multi_select(
255 data: &[u8],
256 delim: u8,
257 line_delim: u8,
258 ranges: &[Range],
259 suppress: bool,
260 out: &mut impl Write,
261) -> io::Result<()> {
262 let max_field = ranges.last().map_or(0, |r| r.end);
263
264 if data.len() >= PARALLEL_THRESHOLD {
265 let chunks = split_for_scope(data, line_delim);
266 let n = chunks.len();
267 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
268 rayon::scope(|s| {
269 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
270 s.spawn(move |_| {
271 result.reserve(chunk.len() * 3 / 4);
272 multi_select_chunk(
273 chunk, delim, line_delim, ranges, max_field, suppress, result,
274 );
275 });
276 }
277 });
278 let slices: Vec<IoSlice> = results
279 .iter()
280 .filter(|r| !r.is_empty())
281 .map(|r| IoSlice::new(r))
282 .collect();
283 write_ioslices(out, &slices)?;
284 } else {
285 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
286 multi_select_chunk(
287 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
288 );
289 if !buf.is_empty() {
290 out.write_all(&buf)?;
291 }
292 }
293 Ok(())
294}
295
296fn multi_select_chunk(
302 data: &[u8],
303 delim: u8,
304 line_delim: u8,
305 ranges: &[Range],
306 max_field: usize,
307 suppress: bool,
308 buf: &mut Vec<u8>,
309) {
310 if delim == line_delim {
312 buf.reserve(data.len());
313 let base = data.as_ptr();
314 let mut start = 0;
315 for end_pos in memchr_iter(line_delim, data) {
316 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
317 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
318 start = end_pos + 1;
319 }
320 if start < data.len() {
321 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
322 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
323 }
324 return;
325 }
326
327 buf.reserve(data.len());
328 let base = data.as_ptr();
329 let data_len = data.len();
330
331 let mut line_start: usize = 0;
333 let mut delim_pos = [0usize; 64];
334 let mut num_delims: usize = 0;
335 let max_delims = max_field.min(64);
336 let mut at_max = false;
337
338 for pos in memchr::memchr2_iter(delim, line_delim, data) {
340 let byte = unsafe { *base.add(pos) };
341
342 if byte == line_delim {
343 let line_len = pos - line_start;
345 if num_delims == 0 {
346 if !suppress {
348 unsafe {
349 buf_extend(
350 buf,
351 std::slice::from_raw_parts(base.add(line_start), line_len),
352 );
353 buf_push(buf, line_delim);
354 }
355 }
356 } else {
357 let total_fields = num_delims + 1;
359 let mut first_output = true;
360
361 for r in ranges {
362 let range_start = r.start;
363 let range_end = r.end.min(total_fields);
364 if range_start > total_fields {
365 break;
366 }
367 for field_num in range_start..=range_end {
368 if field_num > total_fields {
369 break;
370 }
371
372 let field_start = if field_num == 1 {
373 line_start
374 } else if field_num - 2 < num_delims {
375 delim_pos[field_num - 2] + 1
376 } else {
377 continue;
378 };
379 let field_end = if field_num <= num_delims {
380 delim_pos[field_num - 1]
381 } else {
382 pos
383 };
384
385 if !first_output {
386 unsafe { buf_push(buf, delim) };
387 }
388 unsafe {
389 buf_extend(
390 buf,
391 std::slice::from_raw_parts(
392 base.add(field_start),
393 field_end - field_start,
394 ),
395 );
396 }
397 first_output = false;
398 }
399 }
400
401 unsafe { buf_push(buf, line_delim) };
402 }
403
404 line_start = pos + 1;
406 num_delims = 0;
407 at_max = false;
408 } else {
409 if !at_max && num_delims < max_delims {
411 delim_pos[num_delims] = pos;
412 num_delims += 1;
413 if num_delims >= max_delims {
414 at_max = true;
415 }
416 }
417 }
418 }
419
420 if line_start < data_len {
422 if num_delims == 0 {
423 if !suppress {
424 unsafe {
425 buf_extend(
426 buf,
427 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
428 );
429 buf_push(buf, line_delim);
430 }
431 }
432 } else {
433 let total_fields = num_delims + 1;
434 let mut first_output = true;
435
436 for r in ranges {
437 let range_start = r.start;
438 let range_end = r.end.min(total_fields);
439 if range_start > total_fields {
440 break;
441 }
442 for field_num in range_start..=range_end {
443 if field_num > total_fields {
444 break;
445 }
446
447 let field_start = if field_num == 1 {
448 line_start
449 } else if field_num - 2 < num_delims {
450 delim_pos[field_num - 2] + 1
451 } else {
452 continue;
453 };
454 let field_end = if field_num <= num_delims {
455 delim_pos[field_num - 1]
456 } else {
457 data_len
458 };
459
460 if !first_output {
461 unsafe { buf_push(buf, delim) };
462 }
463 unsafe {
464 buf_extend(
465 buf,
466 std::slice::from_raw_parts(
467 base.add(field_start),
468 field_end - field_start,
469 ),
470 );
471 }
472 first_output = false;
473 }
474 }
475
476 unsafe { buf_push(buf, line_delim) };
477 }
478 }
479}
480
481#[inline(always)]
486fn multi_select_line(
487 line: &[u8],
488 delim: u8,
489 line_delim: u8,
490 ranges: &[Range],
491 max_field: usize,
492 suppress: bool,
493 buf: &mut Vec<u8>,
494) {
495 let len = line.len();
496 if len == 0 {
497 if !suppress {
498 unsafe { buf_push(buf, line_delim) };
499 }
500 return;
501 }
502
503 let base = line.as_ptr();
505
506 let mut delim_pos = [0usize; 64];
509 let mut num_delims: usize = 0;
510 let max_delims = max_field.min(64);
511
512 for pos in memchr_iter(delim, line) {
513 if num_delims < max_delims {
514 delim_pos[num_delims] = pos;
515 num_delims += 1;
516 if num_delims >= max_delims {
517 break;
518 }
519 }
520 }
521
522 if num_delims == 0 {
523 if !suppress {
524 unsafe {
525 buf_extend(buf, line);
526 buf_push(buf, line_delim);
527 }
528 }
529 return;
530 }
531
532 let total_fields = num_delims + 1;
536 let mut first_output = true;
537
538 for r in ranges {
539 let range_start = r.start;
540 let range_end = r.end.min(total_fields);
541 if range_start > total_fields {
542 break;
543 }
544 for field_num in range_start..=range_end {
545 if field_num > total_fields {
546 break;
547 }
548
549 let field_start = if field_num == 1 {
550 0
551 } else if field_num - 2 < num_delims {
552 delim_pos[field_num - 2] + 1
553 } else {
554 continue;
555 };
556 let field_end = if field_num <= num_delims {
557 delim_pos[field_num - 1]
558 } else {
559 len
560 };
561
562 if !first_output {
563 unsafe { buf_push(buf, delim) };
564 }
565 unsafe {
566 buf_extend(
567 buf,
568 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
569 );
570 }
571 first_output = false;
572 }
573 }
574
575 unsafe { buf_push(buf, line_delim) };
576}
577
578fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
582 let delim = cfg.delim;
583 let line_delim = cfg.line_delim;
584 let ranges = cfg.ranges;
585 let complement = cfg.complement;
586 let output_delim = cfg.output_delim;
587 let suppress = cfg.suppress_no_delim;
588
589 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
597 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
598 }
599
600 if complement
602 && ranges.len() == 1
603 && output_delim.len() == 1
604 && output_delim[0] == delim
605 && ranges[0].start == ranges[0].end
606 {
607 return process_complement_single_field(
608 data,
609 delim,
610 line_delim,
611 ranges[0].start,
612 suppress,
613 out,
614 );
615 }
616
617 if complement
620 && ranges.len() == 1
621 && ranges[0].start > 1
622 && ranges[0].end < usize::MAX
623 && output_delim.len() == 1
624 && output_delim[0] == delim
625 {
626 return process_complement_range(
627 data,
628 delim,
629 line_delim,
630 ranges[0].start,
631 ranges[0].end,
632 suppress,
633 out,
634 );
635 }
636
637 if !complement
639 && ranges.len() == 1
640 && ranges[0].start == 1
641 && output_delim.len() == 1
642 && output_delim[0] == delim
643 && ranges[0].end < usize::MAX
644 {
645 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
646 }
647
648 if !complement
650 && ranges.len() == 1
651 && ranges[0].end == usize::MAX
652 && ranges[0].start > 1
653 && output_delim.len() == 1
654 && output_delim[0] == delim
655 {
656 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
657 }
658
659 if !complement
661 && ranges.len() == 1
662 && ranges[0].start > 1
663 && ranges[0].end < usize::MAX
664 && output_delim.len() == 1
665 && output_delim[0] == delim
666 {
667 return process_fields_mid_range(
668 data,
669 delim,
670 line_delim,
671 ranges[0].start,
672 ranges[0].end,
673 suppress,
674 out,
675 );
676 }
677
678 if !complement
684 && ranges.len() > 1
685 && ranges.last().map_or(false, |r| r.end < usize::MAX)
686 && output_delim.len() == 1
687 && output_delim[0] == delim
688 && delim != line_delim
689 {
690 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
691 }
692
693 let max_field = if complement {
695 usize::MAX
696 } else {
697 ranges.last().map(|r| r.end).unwrap_or(0)
698 };
699 let field_mask = compute_field_mask(ranges, complement);
700
701 if data.len() >= PARALLEL_THRESHOLD {
702 let chunks = split_for_scope(data, line_delim);
703 let n = chunks.len();
704 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
705 rayon::scope(|s| {
706 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
707 s.spawn(move |_| {
708 result.reserve(chunk.len());
709 process_fields_chunk(
710 chunk,
711 delim,
712 ranges,
713 output_delim,
714 suppress,
715 max_field,
716 field_mask,
717 line_delim,
718 complement,
719 result,
720 );
721 });
722 }
723 });
724 let slices: Vec<IoSlice> = results
725 .iter()
726 .filter(|r| !r.is_empty())
727 .map(|r| IoSlice::new(r))
728 .collect();
729 write_ioslices(out, &slices)?;
730 } else {
731 let mut buf = Vec::with_capacity(data.len());
732 process_fields_chunk(
733 data,
734 delim,
735 ranges,
736 output_delim,
737 suppress,
738 max_field,
739 field_mask,
740 line_delim,
741 complement,
742 &mut buf,
743 );
744 if !buf.is_empty() {
745 out.write_all(&buf)?;
746 }
747 }
748 Ok(())
749}
750
751fn process_fields_chunk(
756 data: &[u8],
757 delim: u8,
758 ranges: &[Range],
759 output_delim: &[u8],
760 suppress: bool,
761 max_field: usize,
762 field_mask: u64,
763 line_delim: u8,
764 complement: bool,
765 buf: &mut Vec<u8>,
766) {
767 if delim != line_delim && max_field < usize::MAX && !complement {
774 buf.reserve(data.len());
775 let mut start = 0;
776 for end_pos in memchr_iter(line_delim, data) {
777 let line = &data[start..end_pos];
778 extract_fields_to_buf(
779 line,
780 delim,
781 ranges,
782 output_delim,
783 suppress,
784 max_field,
785 field_mask,
786 line_delim,
787 buf,
788 complement,
789 );
790 start = end_pos + 1;
791 }
792 if start < data.len() {
793 extract_fields_to_buf(
794 &data[start..],
795 delim,
796 ranges,
797 output_delim,
798 suppress,
799 max_field,
800 field_mask,
801 line_delim,
802 buf,
803 complement,
804 );
805 }
806 return;
807 }
808
809 if delim != line_delim {
813 buf.reserve(data.len());
814
815 let data_len = data.len();
816 let base = data.as_ptr();
817 let mut line_start: usize = 0;
818 let mut field_start: usize = 0;
819 let mut field_num: usize = 1;
820 let mut first_output = true;
821 let mut has_delim = false;
822
823 for pos in memchr::memchr2_iter(delim, line_delim, data) {
824 let byte = unsafe { *base.add(pos) };
825
826 if byte == line_delim {
827 if (field_num <= max_field || complement)
829 && has_delim
830 && is_selected(field_num, field_mask, ranges, complement)
831 {
832 if !first_output {
833 unsafe { buf_extend(buf, output_delim) };
834 }
835 unsafe {
836 buf_extend(
837 buf,
838 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
839 )
840 };
841 first_output = false;
842 }
843
844 if !first_output {
845 unsafe { buf_push(buf, line_delim) };
846 } else if !has_delim {
847 if !suppress {
848 unsafe {
849 buf_extend(
850 buf,
851 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
852 );
853 buf_push(buf, line_delim);
854 }
855 }
856 } else {
857 unsafe { buf_push(buf, line_delim) };
858 }
859
860 line_start = pos + 1;
862 field_start = pos + 1;
863 field_num = 1;
864 first_output = true;
865 has_delim = false;
866 } else {
867 has_delim = true;
869
870 if is_selected(field_num, field_mask, ranges, complement) {
871 if !first_output {
872 unsafe { buf_extend(buf, output_delim) };
873 }
874 unsafe {
875 buf_extend(
876 buf,
877 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
878 )
879 };
880 first_output = false;
881 }
882
883 field_num += 1;
884 field_start = pos + 1;
885 }
886 }
887
888 if line_start < data_len {
890 if line_start < data_len {
891 if (field_num <= max_field || complement)
892 && has_delim
893 && is_selected(field_num, field_mask, ranges, complement)
894 {
895 if !first_output {
896 unsafe { buf_extend(buf, output_delim) };
897 }
898 unsafe {
899 buf_extend(
900 buf,
901 std::slice::from_raw_parts(
902 base.add(field_start),
903 data_len - field_start,
904 ),
905 )
906 };
907 first_output = false;
908 }
909
910 if !first_output {
911 unsafe { buf_push(buf, line_delim) };
912 } else if !has_delim {
913 if !suppress {
914 unsafe {
915 buf_extend(
916 buf,
917 std::slice::from_raw_parts(
918 base.add(line_start),
919 data_len - line_start,
920 ),
921 );
922 buf_push(buf, line_delim);
923 }
924 }
925 } else {
926 unsafe { buf_push(buf, line_delim) };
927 }
928 }
929 }
930
931 return;
932 }
933
934 let mut start = 0;
936 for end_pos in memchr_iter(line_delim, data) {
937 let line = &data[start..end_pos];
938 extract_fields_to_buf(
939 line,
940 delim,
941 ranges,
942 output_delim,
943 suppress,
944 max_field,
945 field_mask,
946 line_delim,
947 buf,
948 complement,
949 );
950 start = end_pos + 1;
951 }
952 if start < data.len() {
953 extract_fields_to_buf(
954 &data[start..],
955 delim,
956 ranges,
957 output_delim,
958 suppress,
959 max_field,
960 field_mask,
961 line_delim,
962 buf,
963 complement,
964 );
965 }
966}
967
968fn process_single_field(
974 data: &[u8],
975 delim: u8,
976 line_delim: u8,
977 target: usize,
978 suppress: bool,
979 out: &mut impl Write,
980) -> io::Result<()> {
981 let target_idx = target - 1;
982
983 const FIELD_PARALLEL_MIN: usize = 2 * 1024 * 1024;
986
987 if delim != line_delim {
988 if target_idx == 0 && !suppress {
992 if data.len() >= FIELD_PARALLEL_MIN {
993 return single_field1_parallel(data, delim, line_delim, out);
994 }
995 let mut buf = Vec::with_capacity(data.len());
1000 single_field1_to_buf(data, delim, line_delim, &mut buf);
1001 if !buf.is_empty() {
1002 out.write_all(&buf)?;
1003 }
1004 return Ok(());
1005 }
1006
1007 if data.len() >= FIELD_PARALLEL_MIN {
1011 let chunks = split_for_scope(data, line_delim);
1012 let n = chunks.len();
1013 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1014 rayon::scope(|s| {
1015 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1016 s.spawn(move |_| {
1017 result.reserve(chunk.len() / 2);
1018 process_single_field_chunk(
1019 chunk, delim, target_idx, line_delim, suppress, result,
1020 );
1021 });
1022 }
1023 });
1024 let slices: Vec<IoSlice> = results
1025 .iter()
1026 .filter(|r| !r.is_empty())
1027 .map(|r| IoSlice::new(r))
1028 .collect();
1029 write_ioslices(out, &slices)?;
1030 } else {
1031 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1032 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1033 if !buf.is_empty() {
1034 out.write_all(&buf)?;
1035 }
1036 }
1037 return Ok(());
1038 }
1039
1040 if data.len() >= FIELD_PARALLEL_MIN {
1042 let chunks = split_for_scope(data, line_delim);
1043 let n = chunks.len();
1044 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1045 rayon::scope(|s| {
1046 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1047 s.spawn(move |_| {
1048 result.reserve(chunk.len() / 4);
1049 process_single_field_chunk(
1050 chunk, delim, target_idx, line_delim, suppress, result,
1051 );
1052 });
1053 }
1054 });
1055 let slices: Vec<IoSlice> = results
1056 .iter()
1057 .filter(|r| !r.is_empty())
1058 .map(|r| IoSlice::new(r))
1059 .collect();
1060 write_ioslices(out, &slices)?;
1061 } else {
1062 let mut buf = Vec::with_capacity(data.len() / 4);
1063 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1064 if !buf.is_empty() {
1065 out.write_all(&buf)?;
1066 }
1067 }
1068 Ok(())
1069}
1070
1071fn process_complement_range(
1074 data: &[u8],
1075 delim: u8,
1076 line_delim: u8,
1077 skip_start: usize,
1078 skip_end: usize,
1079 suppress: bool,
1080 out: &mut impl Write,
1081) -> io::Result<()> {
1082 if data.len() >= PARALLEL_THRESHOLD {
1083 let chunks = split_for_scope(data, line_delim);
1084 let n = chunks.len();
1085 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1086 rayon::scope(|s| {
1087 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1088 s.spawn(move |_| {
1089 result.reserve(chunk.len());
1090 complement_range_chunk(
1091 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
1092 );
1093 });
1094 }
1095 });
1096 let slices: Vec<IoSlice> = results
1097 .iter()
1098 .filter(|r| !r.is_empty())
1099 .map(|r| IoSlice::new(r))
1100 .collect();
1101 write_ioslices(out, &slices)?;
1102 } else {
1103 let mut buf = Vec::with_capacity(data.len());
1104 complement_range_chunk(
1105 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1106 );
1107 if !buf.is_empty() {
1108 out.write_all(&buf)?;
1109 }
1110 }
1111 Ok(())
1112}
1113
1114fn complement_range_chunk(
1116 data: &[u8],
1117 delim: u8,
1118 skip_start: usize,
1119 skip_end: usize,
1120 line_delim: u8,
1121 suppress: bool,
1122 buf: &mut Vec<u8>,
1123) {
1124 buf.reserve(data.len());
1126 let mut start = 0;
1127 for end_pos in memchr_iter(line_delim, data) {
1128 let line = &data[start..end_pos];
1129 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1130 start = end_pos + 1;
1131 }
1132 if start < data.len() {
1133 complement_range_line(
1134 &data[start..],
1135 delim,
1136 skip_start,
1137 skip_end,
1138 line_delim,
1139 suppress,
1140 buf,
1141 );
1142 }
1143}
1144
1145#[inline(always)]
1152fn complement_range_line(
1153 line: &[u8],
1154 delim: u8,
1155 skip_start: usize,
1156 skip_end: usize,
1157 line_delim: u8,
1158 suppress: bool,
1159 buf: &mut Vec<u8>,
1160) {
1161 let len = line.len();
1162 if len == 0 {
1163 if !suppress {
1164 unsafe { buf_push(buf, line_delim) };
1165 }
1166 return;
1167 }
1168
1169 let base = line.as_ptr();
1171
1172 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1182
1183 let mut delim_count: usize = 0;
1185 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1189 delim_count += 1;
1190 if delim_count == need_prefix_delims {
1191 prefix_end_pos = pos;
1192 }
1193 if delim_count == total_need {
1194 suffix_start_pos = pos + 1;
1195 break;
1196 }
1197 }
1198
1199 if delim_count == 0 {
1200 if !suppress {
1202 unsafe {
1203 buf_extend(buf, line);
1204 buf_push(buf, line_delim);
1205 }
1206 }
1207 return;
1208 }
1209
1210 if delim_count < need_prefix_delims {
1216 unsafe {
1218 buf_extend(buf, line);
1219 buf_push(buf, line_delim);
1220 }
1221 return;
1222 }
1223
1224 let has_prefix = need_prefix_delims > 0;
1225 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1226
1227 if has_prefix && has_suffix {
1228 unsafe {
1230 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1231 buf_push(buf, delim);
1232 buf_extend(
1233 buf,
1234 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1235 );
1236 buf_push(buf, line_delim);
1237 }
1238 } else if has_prefix {
1239 unsafe {
1241 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1242 buf_push(buf, line_delim);
1243 }
1244 } else if has_suffix {
1245 unsafe {
1247 buf_extend(
1248 buf,
1249 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1250 );
1251 buf_push(buf, line_delim);
1252 }
1253 } else {
1254 unsafe { buf_push(buf, line_delim) };
1256 }
1257}
1258
1259fn process_complement_single_field(
1261 data: &[u8],
1262 delim: u8,
1263 line_delim: u8,
1264 skip_field: usize,
1265 suppress: bool,
1266 out: &mut impl Write,
1267) -> io::Result<()> {
1268 let skip_idx = skip_field - 1;
1269
1270 if data.len() >= PARALLEL_THRESHOLD {
1271 let chunks = split_for_scope(data, line_delim);
1272 let n = chunks.len();
1273 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1274 rayon::scope(|s| {
1275 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1276 s.spawn(move |_| {
1277 result.reserve(chunk.len());
1278 complement_single_field_chunk(
1279 chunk, delim, skip_idx, line_delim, suppress, result,
1280 );
1281 });
1282 }
1283 });
1284 let slices: Vec<IoSlice> = results
1285 .iter()
1286 .filter(|r| !r.is_empty())
1287 .map(|r| IoSlice::new(r))
1288 .collect();
1289 write_ioslices(out, &slices)?;
1290 } else {
1291 let mut buf = Vec::with_capacity(data.len());
1292 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1293 if !buf.is_empty() {
1294 out.write_all(&buf)?;
1295 }
1296 }
1297 Ok(())
1298}
1299
1300fn complement_single_field_chunk(
1306 data: &[u8],
1307 delim: u8,
1308 skip_idx: usize,
1309 line_delim: u8,
1310 suppress: bool,
1311 buf: &mut Vec<u8>,
1312) {
1313 if delim == line_delim {
1315 buf.reserve(data.len());
1316 let mut start = 0;
1317 for end_pos in memchr_iter(line_delim, data) {
1318 let line = &data[start..end_pos];
1319 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1320 start = end_pos + 1;
1321 }
1322 if start < data.len() {
1323 complement_single_field_line(
1324 &data[start..],
1325 delim,
1326 skip_idx,
1327 line_delim,
1328 suppress,
1329 buf,
1330 );
1331 }
1332 return;
1333 }
1334
1335 buf.reserve(data.len());
1336 let base = data.as_ptr();
1337 let data_len = data.len();
1338 let need_before = skip_idx; let need_total = skip_idx + 1; let mut line_start: usize = 0;
1343 let mut delim_count: usize = 0;
1344 let mut skip_start_pos: usize = 0;
1345 let mut skip_end_pos: usize = 0;
1346 let mut found_start = need_before == 0; let mut found_end = false;
1348
1349 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1350 let byte = unsafe { *base.add(pos) };
1351
1352 if byte == line_delim {
1353 if delim_count == 0 {
1355 if !suppress {
1357 unsafe {
1358 buf_extend(
1359 buf,
1360 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1361 );
1362 buf_push(buf, line_delim);
1363 }
1364 }
1365 } else if !found_start || delim_count < need_before {
1366 unsafe {
1368 buf_extend(
1369 buf,
1370 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1371 );
1372 buf_push(buf, line_delim);
1373 }
1374 } else {
1375 let has_prefix = skip_idx > 0;
1376 let has_suffix = found_end && skip_end_pos < pos;
1377
1378 if has_prefix && has_suffix {
1379 unsafe {
1380 buf_extend(
1381 buf,
1382 std::slice::from_raw_parts(
1383 base.add(line_start),
1384 skip_start_pos - 1 - line_start,
1385 ),
1386 );
1387 buf_push(buf, delim);
1388 buf_extend(
1389 buf,
1390 std::slice::from_raw_parts(
1391 base.add(skip_end_pos + 1),
1392 pos - skip_end_pos - 1,
1393 ),
1394 );
1395 buf_push(buf, line_delim);
1396 }
1397 } else if has_prefix {
1398 unsafe {
1399 buf_extend(
1400 buf,
1401 std::slice::from_raw_parts(
1402 base.add(line_start),
1403 skip_start_pos - 1 - line_start,
1404 ),
1405 );
1406 buf_push(buf, line_delim);
1407 }
1408 } else if has_suffix {
1409 unsafe {
1410 buf_extend(
1411 buf,
1412 std::slice::from_raw_parts(
1413 base.add(skip_end_pos + 1),
1414 pos - skip_end_pos - 1,
1415 ),
1416 );
1417 buf_push(buf, line_delim);
1418 }
1419 } else {
1420 unsafe { buf_push(buf, line_delim) };
1421 }
1422 }
1423
1424 line_start = pos + 1;
1426 delim_count = 0;
1427 skip_start_pos = 0;
1428 skip_end_pos = 0;
1429 found_start = need_before == 0;
1430 found_end = false;
1431 } else {
1432 delim_count += 1;
1434 if delim_count == need_before {
1435 skip_start_pos = pos + 1;
1436 found_start = true;
1437 }
1438 if delim_count == need_total {
1439 skip_end_pos = pos;
1440 found_end = true;
1441 }
1442 }
1443 }
1444
1445 if line_start < data_len {
1447 let pos = data_len;
1448 if delim_count == 0 {
1449 if !suppress {
1450 unsafe {
1451 buf_extend(
1452 buf,
1453 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1454 );
1455 buf_push(buf, line_delim);
1456 }
1457 }
1458 } else if !found_start || delim_count < need_before {
1459 unsafe {
1460 buf_extend(
1461 buf,
1462 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1463 );
1464 buf_push(buf, line_delim);
1465 }
1466 } else {
1467 let has_prefix = skip_idx > 0;
1468 let has_suffix = found_end && skip_end_pos < pos;
1469
1470 if has_prefix && has_suffix {
1471 unsafe {
1472 buf_extend(
1473 buf,
1474 std::slice::from_raw_parts(
1475 base.add(line_start),
1476 skip_start_pos - 1 - line_start,
1477 ),
1478 );
1479 buf_push(buf, delim);
1480 buf_extend(
1481 buf,
1482 std::slice::from_raw_parts(
1483 base.add(skip_end_pos + 1),
1484 pos - skip_end_pos - 1,
1485 ),
1486 );
1487 buf_push(buf, line_delim);
1488 }
1489 } else if has_prefix {
1490 unsafe {
1491 buf_extend(
1492 buf,
1493 std::slice::from_raw_parts(
1494 base.add(line_start),
1495 skip_start_pos - 1 - line_start,
1496 ),
1497 );
1498 buf_push(buf, line_delim);
1499 }
1500 } else if has_suffix {
1501 unsafe {
1502 buf_extend(
1503 buf,
1504 std::slice::from_raw_parts(
1505 base.add(skip_end_pos + 1),
1506 pos - skip_end_pos - 1,
1507 ),
1508 );
1509 buf_push(buf, line_delim);
1510 }
1511 } else {
1512 unsafe { buf_push(buf, line_delim) };
1513 }
1514 }
1515 }
1516}
1517
1518#[inline(always)]
1520fn complement_single_field_line(
1521 line: &[u8],
1522 delim: u8,
1523 skip_idx: usize,
1524 line_delim: u8,
1525 suppress: bool,
1526 buf: &mut Vec<u8>,
1527) {
1528 let len = line.len();
1529 if len == 0 {
1530 if !suppress {
1531 unsafe { buf_push(buf, line_delim) };
1532 }
1533 return;
1534 }
1535
1536 let base = line.as_ptr();
1537 let need_before = skip_idx;
1538 let need_total = skip_idx + 1;
1539
1540 let mut delim_count: usize = 0;
1541 let mut skip_start_pos: usize = 0;
1542 let mut skip_end_pos: usize = len;
1543 let mut found_end = false;
1544
1545 for pos in memchr_iter(delim, line) {
1546 delim_count += 1;
1547 if delim_count == need_before {
1548 skip_start_pos = pos + 1;
1549 }
1550 if delim_count == need_total {
1551 skip_end_pos = pos;
1552 found_end = true;
1553 break;
1554 }
1555 }
1556
1557 if delim_count == 0 {
1558 if !suppress {
1559 unsafe {
1560 buf_extend(buf, line);
1561 buf_push(buf, line_delim);
1562 }
1563 }
1564 return;
1565 }
1566
1567 if delim_count < need_before {
1568 unsafe {
1569 buf_extend(buf, line);
1570 buf_push(buf, line_delim);
1571 }
1572 return;
1573 }
1574
1575 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1576 let has_suffix = found_end && skip_end_pos < len;
1577
1578 if has_prefix && has_suffix {
1579 unsafe {
1580 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1581 buf_push(buf, delim);
1582 buf_extend(
1583 buf,
1584 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1585 );
1586 buf_push(buf, line_delim);
1587 }
1588 } else if has_prefix {
1589 unsafe {
1590 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1591 buf_push(buf, line_delim);
1592 }
1593 } else if has_suffix {
1594 unsafe {
1595 buf_extend(
1596 buf,
1597 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1598 );
1599 buf_push(buf, line_delim);
1600 }
1601 } else {
1602 unsafe { buf_push(buf, line_delim) };
1603 }
1604}
1605
1606fn process_fields_prefix(
1610 data: &[u8],
1611 delim: u8,
1612 line_delim: u8,
1613 last_field: usize,
1614 suppress: bool,
1615 out: &mut impl Write,
1616) -> io::Result<()> {
1617 if data.len() >= PARALLEL_THRESHOLD {
1618 let chunks = split_for_scope(data, line_delim);
1619 let n = chunks.len();
1620 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1621 rayon::scope(|s| {
1622 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1623 s.spawn(move |_| {
1624 result.reserve(chunk.len());
1625 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1626 });
1627 }
1628 });
1629 let slices: Vec<IoSlice> = results
1630 .iter()
1631 .filter(|r| !r.is_empty())
1632 .map(|r| IoSlice::new(r))
1633 .collect();
1634 write_ioslices(out, &slices)?;
1635 } else if !suppress {
1636 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1640 } else {
1641 let mut buf = Vec::with_capacity(data.len());
1642 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1643 if !buf.is_empty() {
1644 out.write_all(&buf)?;
1645 }
1646 }
1647 Ok(())
1648}
1649
1650#[inline]
1656fn fields_prefix_zerocopy(
1657 data: &[u8],
1658 delim: u8,
1659 line_delim: u8,
1660 last_field: usize,
1661 out: &mut impl Write,
1662) -> io::Result<()> {
1663 let newline_buf: [u8; 1] = [line_delim];
1664 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1665 let mut start = 0;
1666 let mut run_start: usize = 0;
1667
1668 for end_pos in memchr_iter(line_delim, data) {
1669 let line = &data[start..end_pos];
1670 let mut field_count = 1;
1671 let mut truncate_at: Option<usize> = None;
1672 for dpos in memchr_iter(delim, line) {
1673 if field_count >= last_field {
1674 truncate_at = Some(start + dpos);
1675 break;
1676 }
1677 field_count += 1;
1678 }
1679
1680 if let Some(trunc_pos) = truncate_at {
1681 if run_start < start {
1682 iov.push(IoSlice::new(&data[run_start..start]));
1683 }
1684 iov.push(IoSlice::new(&data[start..trunc_pos]));
1685 iov.push(IoSlice::new(&newline_buf));
1686 run_start = end_pos + 1;
1687
1688 if iov.len() >= MAX_IOV - 2 {
1689 write_ioslices(out, &iov)?;
1690 iov.clear();
1691 }
1692 }
1693 start = end_pos + 1;
1694 }
1695 if start < data.len() {
1697 let line = &data[start..];
1698 let mut field_count = 1;
1699 let mut truncate_at: Option<usize> = None;
1700 for dpos in memchr_iter(delim, line) {
1701 if field_count >= last_field {
1702 truncate_at = Some(start + dpos);
1703 break;
1704 }
1705 field_count += 1;
1706 }
1707 if let Some(trunc_pos) = truncate_at {
1708 if run_start < start {
1709 iov.push(IoSlice::new(&data[run_start..start]));
1710 }
1711 iov.push(IoSlice::new(&data[start..trunc_pos]));
1712 iov.push(IoSlice::new(&newline_buf));
1713 if !iov.is_empty() {
1714 write_ioslices(out, &iov)?;
1715 }
1716 return Ok(());
1717 }
1718 }
1719 if run_start < data.len() {
1721 iov.push(IoSlice::new(&data[run_start..]));
1722 if !data.is_empty() && *data.last().unwrap() != line_delim {
1723 iov.push(IoSlice::new(&newline_buf));
1724 }
1725 }
1726 if !iov.is_empty() {
1727 write_ioslices(out, &iov)?;
1728 }
1729 Ok(())
1730}
1731
1732fn fields_prefix_chunk(
1734 data: &[u8],
1735 delim: u8,
1736 line_delim: u8,
1737 last_field: usize,
1738 suppress: bool,
1739 buf: &mut Vec<u8>,
1740) {
1741 buf.reserve(data.len());
1742 let mut start = 0;
1743 for end_pos in memchr_iter(line_delim, data) {
1744 let line = &data[start..end_pos];
1745 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1746 start = end_pos + 1;
1747 }
1748 if start < data.len() {
1749 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1750 }
1751}
1752
1753#[inline(always)]
1756fn fields_prefix_line(
1757 line: &[u8],
1758 delim: u8,
1759 line_delim: u8,
1760 last_field: usize,
1761 suppress: bool,
1762 buf: &mut Vec<u8>,
1763) {
1764 let len = line.len();
1765 if len == 0 {
1766 if !suppress {
1767 unsafe { buf_push(buf, line_delim) };
1768 }
1769 return;
1770 }
1771
1772 let base = line.as_ptr();
1774
1775 let mut field_count = 1usize;
1776 let mut has_delim = false;
1777
1778 for pos in memchr_iter(delim, line) {
1779 has_delim = true;
1780 if field_count >= last_field {
1781 unsafe {
1782 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1783 buf_push(buf, line_delim);
1784 }
1785 return;
1786 }
1787 field_count += 1;
1788 }
1789
1790 if !has_delim {
1791 if !suppress {
1792 unsafe {
1793 buf_extend(buf, line);
1794 buf_push(buf, line_delim);
1795 }
1796 }
1797 return;
1798 }
1799
1800 unsafe {
1801 buf_extend(buf, line);
1802 buf_push(buf, line_delim);
1803 }
1804}
1805
1806fn process_fields_suffix(
1808 data: &[u8],
1809 delim: u8,
1810 line_delim: u8,
1811 start_field: usize,
1812 suppress: bool,
1813 out: &mut impl Write,
1814) -> io::Result<()> {
1815 if data.len() >= PARALLEL_THRESHOLD {
1816 let chunks = split_for_scope(data, line_delim);
1817 let n = chunks.len();
1818 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1819 rayon::scope(|s| {
1820 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1821 s.spawn(move |_| {
1822 result.reserve(chunk.len());
1823 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1824 });
1825 }
1826 });
1827 let slices: Vec<IoSlice> = results
1828 .iter()
1829 .filter(|r| !r.is_empty())
1830 .map(|r| IoSlice::new(r))
1831 .collect();
1832 write_ioslices(out, &slices)?;
1833 } else {
1834 let mut buf = Vec::with_capacity(data.len());
1835 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1836 if !buf.is_empty() {
1837 out.write_all(&buf)?;
1838 }
1839 }
1840 Ok(())
1841}
1842
1843fn fields_suffix_chunk(
1845 data: &[u8],
1846 delim: u8,
1847 line_delim: u8,
1848 start_field: usize,
1849 suppress: bool,
1850 buf: &mut Vec<u8>,
1851) {
1852 buf.reserve(data.len());
1853 let mut start = 0;
1854 for end_pos in memchr_iter(line_delim, data) {
1855 let line = &data[start..end_pos];
1856 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1857 start = end_pos + 1;
1858 }
1859 if start < data.len() {
1860 fields_suffix_line(
1861 &data[start..],
1862 delim,
1863 line_delim,
1864 start_field,
1865 suppress,
1866 buf,
1867 );
1868 }
1869}
1870
1871#[inline(always)]
1874fn fields_suffix_line(
1875 line: &[u8],
1876 delim: u8,
1877 line_delim: u8,
1878 start_field: usize,
1879 suppress: bool,
1880 buf: &mut Vec<u8>,
1881) {
1882 let len = line.len();
1883 if len == 0 {
1884 if !suppress {
1885 unsafe { buf_push(buf, line_delim) };
1886 }
1887 return;
1888 }
1889
1890 let base = line.as_ptr();
1892
1893 let skip_delims = start_field - 1;
1894 let mut delim_count = 0usize;
1895 let mut has_delim = false;
1896
1897 for pos in memchr_iter(delim, line) {
1898 has_delim = true;
1899 delim_count += 1;
1900 if delim_count >= skip_delims {
1901 unsafe {
1902 buf_extend(
1903 buf,
1904 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1905 );
1906 buf_push(buf, line_delim);
1907 }
1908 return;
1909 }
1910 }
1911
1912 if !has_delim {
1913 if !suppress {
1914 unsafe {
1915 buf_extend(buf, line);
1916 buf_push(buf, line_delim);
1917 }
1918 }
1919 return;
1920 }
1921
1922 unsafe { buf_push(buf, line_delim) };
1924}
1925
1926fn process_fields_mid_range(
1929 data: &[u8],
1930 delim: u8,
1931 line_delim: u8,
1932 start_field: usize,
1933 end_field: usize,
1934 suppress: bool,
1935 out: &mut impl Write,
1936) -> io::Result<()> {
1937 if data.len() >= PARALLEL_THRESHOLD {
1938 let chunks = split_for_scope(data, line_delim);
1939 let n = chunks.len();
1940 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1941 rayon::scope(|s| {
1942 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1943 s.spawn(move |_| {
1944 result.reserve(chunk.len());
1945 fields_mid_range_chunk(
1946 chunk,
1947 delim,
1948 line_delim,
1949 start_field,
1950 end_field,
1951 suppress,
1952 result,
1953 );
1954 });
1955 }
1956 });
1957 let slices: Vec<IoSlice> = results
1958 .iter()
1959 .filter(|r| !r.is_empty())
1960 .map(|r| IoSlice::new(r))
1961 .collect();
1962 write_ioslices(out, &slices)?;
1963 } else {
1964 let mut buf = Vec::with_capacity(data.len());
1965 fields_mid_range_chunk(
1966 data,
1967 delim,
1968 line_delim,
1969 start_field,
1970 end_field,
1971 suppress,
1972 &mut buf,
1973 );
1974 if !buf.is_empty() {
1975 out.write_all(&buf)?;
1976 }
1977 }
1978 Ok(())
1979}
1980
1981fn fields_mid_range_chunk(
1985 data: &[u8],
1986 delim: u8,
1987 line_delim: u8,
1988 start_field: usize,
1989 end_field: usize,
1990 suppress: bool,
1991 buf: &mut Vec<u8>,
1992) {
1993 if delim == line_delim {
1995 buf.reserve(data.len());
1996 let mut start = 0;
1997 for end_pos in memchr_iter(line_delim, data) {
1998 let line = &data[start..end_pos];
1999 fields_mid_range_line(
2000 line,
2001 delim,
2002 line_delim,
2003 start_field,
2004 end_field,
2005 suppress,
2006 buf,
2007 );
2008 start = end_pos + 1;
2009 }
2010 if start < data.len() {
2011 fields_mid_range_line(
2012 &data[start..],
2013 delim,
2014 line_delim,
2015 start_field,
2016 end_field,
2017 suppress,
2018 buf,
2019 );
2020 }
2021 return;
2022 }
2023
2024 buf.reserve(data.len());
2025 let base = data.as_ptr();
2026 let skip_before = start_field - 1; let target_end_delim = skip_before + (end_field - start_field) + 1;
2028
2029 let mut line_start: usize = 0;
2030 let mut delim_count: usize = 0;
2031 let mut range_start: usize = 0;
2032 let mut has_delim = false;
2033 let mut found_end = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
2036 let byte = unsafe { *base.add(pos) };
2037 if byte == line_delim {
2038 if found_end {
2040 } else if !has_delim {
2042 if !suppress {
2044 unsafe {
2045 buf_extend(
2046 buf,
2047 std::slice::from_raw_parts(base.add(line_start), pos + 1 - line_start),
2048 );
2049 }
2050 }
2051 } else if delim_count >= skip_before {
2052 if skip_before == 0 {
2054 range_start = line_start;
2055 }
2056 unsafe {
2057 buf_extend(
2058 buf,
2059 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2060 );
2061 buf_push(buf, line_delim);
2062 }
2063 } else {
2064 unsafe { buf_push(buf, line_delim) };
2066 }
2067 line_start = pos + 1;
2068 delim_count = 0;
2069 has_delim = false;
2070 found_end = false;
2071 } else if !found_end {
2072 has_delim = true;
2074 delim_count += 1;
2075 if delim_count == skip_before {
2076 range_start = pos + 1;
2077 }
2078 if delim_count == target_end_delim {
2079 if skip_before == 0 {
2080 range_start = line_start;
2081 }
2082 unsafe {
2083 buf_extend(
2084 buf,
2085 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2086 );
2087 buf_push(buf, line_delim);
2088 }
2089 found_end = true;
2090 }
2091 }
2092 }
2093 if line_start < data.len() && !found_end {
2095 if !has_delim {
2096 if !suppress {
2097 unsafe {
2098 buf_extend(
2099 buf,
2100 std::slice::from_raw_parts(base.add(line_start), data.len() - line_start),
2101 );
2102 }
2103 }
2104 } else if delim_count >= skip_before {
2105 if skip_before == 0 {
2106 range_start = line_start;
2107 }
2108 unsafe {
2109 buf_extend(
2110 buf,
2111 std::slice::from_raw_parts(base.add(range_start), data.len() - range_start),
2112 );
2113 }
2114 }
2115 }
2116}
2117
2118#[inline(always)]
2122fn fields_mid_range_line(
2123 line: &[u8],
2124 delim: u8,
2125 line_delim: u8,
2126 start_field: usize,
2127 end_field: usize,
2128 suppress: bool,
2129 buf: &mut Vec<u8>,
2130) {
2131 let len = line.len();
2132 if len == 0 {
2133 if !suppress {
2134 unsafe { buf_push(buf, line_delim) };
2135 }
2136 return;
2137 }
2138
2139 let base = line.as_ptr();
2141
2142 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
2146 let mut delim_count = 0;
2147 let mut range_start = 0;
2148 let mut has_delim = false;
2149
2150 for pos in memchr_iter(delim, line) {
2151 has_delim = true;
2152 delim_count += 1;
2153 if delim_count == skip_before {
2154 range_start = pos + 1;
2155 }
2156 if delim_count == target_end_delim {
2157 if skip_before == 0 {
2158 range_start = 0;
2159 }
2160 unsafe {
2161 buf_extend(
2162 buf,
2163 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2164 );
2165 buf_push(buf, line_delim);
2166 }
2167 return;
2168 }
2169 }
2170
2171 if !has_delim {
2172 if !suppress {
2173 unsafe {
2174 buf_extend(buf, line);
2175 buf_push(buf, line_delim);
2176 }
2177 }
2178 return;
2179 }
2180
2181 if delim_count >= skip_before {
2183 if skip_before == 0 {
2185 range_start = 0;
2186 }
2187 unsafe {
2188 buf_extend(
2189 buf,
2190 std::slice::from_raw_parts(base.add(range_start), len - range_start),
2191 );
2192 buf_push(buf, line_delim);
2193 }
2194 } else {
2195 unsafe { buf_push(buf, line_delim) };
2197 }
2198}
2199
2200fn single_field1_parallel(
2211 data: &[u8],
2212 delim: u8,
2213 line_delim: u8,
2214 out: &mut impl Write,
2215) -> io::Result<()> {
2216 let chunks = split_for_scope(data, line_delim);
2217 let n = chunks.len();
2218 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2219 rayon::scope(|s| {
2220 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2221 s.spawn(move |_| {
2222 result.reserve(chunk.len());
2223 single_field1_to_buf(chunk, delim, line_delim, result);
2224 });
2225 }
2226 });
2227 let slices: Vec<IoSlice> = results
2228 .iter()
2229 .filter(|r| !r.is_empty())
2230 .map(|r| IoSlice::new(r))
2231 .collect();
2232 write_ioslices(out, &slices)
2233}
2234
2235#[inline]
2241fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
2242 buf.reserve(data.len());
2243 let base = data.as_ptr();
2244 let mut line_start: usize = 0;
2245 let mut found_delim = false;
2246
2247 for pos in memchr::memchr2_iter(delim, line_delim, data) {
2248 let byte = unsafe { *base.add(pos) };
2249 if byte == line_delim {
2250 if !found_delim {
2251 unsafe {
2253 buf_extend(
2254 buf,
2255 std::slice::from_raw_parts(base.add(line_start), pos + 1 - line_start),
2256 );
2257 }
2258 } else {
2259 unsafe { buf_push(buf, line_delim) };
2261 }
2262 line_start = pos + 1;
2263 found_delim = false;
2264 } else if !found_delim {
2265 unsafe {
2267 buf_extend(
2268 buf,
2269 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
2270 );
2271 }
2272 found_delim = true;
2273 }
2274 }
2276 if line_start < data.len() {
2278 if !found_delim {
2279 unsafe {
2280 buf_extend(
2281 buf,
2282 std::slice::from_raw_parts(base.add(line_start), data.len() - line_start),
2283 );
2284 }
2285 }
2286 }
2287}
2288
2289#[inline]
2298#[allow(dead_code)]
2299fn single_field1_zerocopy(
2300 data: &[u8],
2301 delim: u8,
2302 line_delim: u8,
2303 out: &mut impl Write,
2304) -> io::Result<()> {
2305 let newline_buf: [u8; 1] = [line_delim];
2306
2307 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2308 let mut run_start: usize = 0;
2309 let mut start = 0;
2310
2311 for end_pos in memchr_iter(line_delim, data) {
2312 let line = &data[start..end_pos];
2313 if let Some(dp) = memchr::memchr(delim, line) {
2314 if run_start < start {
2317 iov.push(IoSlice::new(&data[run_start..start]));
2318 }
2319 iov.push(IoSlice::new(&data[start..start + dp]));
2320 iov.push(IoSlice::new(&newline_buf));
2321 run_start = end_pos + 1;
2322
2323 if iov.len() >= MAX_IOV - 2 {
2324 write_ioslices(out, &iov)?;
2325 iov.clear();
2326 }
2327 }
2328 start = end_pos + 1;
2330 }
2331
2332 if start < data.len() {
2334 let line = &data[start..];
2335 if let Some(dp) = memchr::memchr(delim, line) {
2336 if run_start < start {
2337 iov.push(IoSlice::new(&data[run_start..start]));
2338 }
2339 iov.push(IoSlice::new(&data[start..start + dp]));
2340 iov.push(IoSlice::new(&newline_buf));
2341 if !iov.is_empty() {
2342 write_ioslices(out, &iov)?;
2343 }
2344 return Ok(());
2345 }
2346 }
2347
2348 if run_start < data.len() {
2350 iov.push(IoSlice::new(&data[run_start..]));
2351 if !data.is_empty() && *data.last().unwrap() != line_delim {
2352 iov.push(IoSlice::new(&newline_buf));
2353 }
2354 }
2355 if !iov.is_empty() {
2356 write_ioslices(out, &iov)?;
2357 }
2358 Ok(())
2359}
2360
2361fn process_single_field_chunk(
2363 data: &[u8],
2364 delim: u8,
2365 target_idx: usize,
2366 line_delim: u8,
2367 suppress: bool,
2368 buf: &mut Vec<u8>,
2369) {
2370 buf.reserve(data.len());
2372 let mut start = 0;
2373 for end_pos in memchr_iter(line_delim, data) {
2374 let line = &data[start..end_pos];
2375 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2376 start = end_pos + 1;
2377 }
2378 if start < data.len() {
2379 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2380 }
2381}
2382
2383#[inline(always)]
2388fn extract_single_field_line(
2389 line: &[u8],
2390 delim: u8,
2391 target_idx: usize,
2392 line_delim: u8,
2393 suppress: bool,
2394 buf: &mut Vec<u8>,
2395) {
2396 let len = line.len();
2397 if len == 0 {
2398 if !suppress {
2399 unsafe { buf_push(buf, line_delim) };
2400 }
2401 return;
2402 }
2403
2404 let base = line.as_ptr();
2406
2407 if target_idx == 0 {
2409 match memchr::memchr(delim, line) {
2410 Some(pos) => unsafe {
2411 buf_extend(buf, std::slice::from_raw_parts(base, pos));
2412 buf_push(buf, line_delim);
2413 },
2414 None => {
2415 if !suppress {
2416 unsafe {
2417 buf_extend(buf, line);
2418 buf_push(buf, line_delim);
2419 }
2420 }
2421 }
2422 }
2423 return;
2424 }
2425
2426 let mut field_start = 0;
2428 let mut field_idx = 0;
2429 let mut has_delim = false;
2430
2431 for pos in memchr_iter(delim, line) {
2432 has_delim = true;
2433 if field_idx == target_idx {
2434 unsafe {
2435 buf_extend(
2436 buf,
2437 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2438 );
2439 buf_push(buf, line_delim);
2440 }
2441 return;
2442 }
2443 field_idx += 1;
2444 field_start = pos + 1;
2445 }
2446
2447 if !has_delim {
2448 if !suppress {
2449 unsafe {
2450 buf_extend(buf, line);
2451 buf_push(buf, line_delim);
2452 }
2453 }
2454 return;
2455 }
2456
2457 if field_idx == target_idx {
2458 unsafe {
2459 buf_extend(
2460 buf,
2461 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2462 );
2463 buf_push(buf, line_delim);
2464 }
2465 } else {
2466 unsafe { buf_push(buf, line_delim) };
2467 }
2468}
2469
2470#[inline(always)]
2474fn extract_fields_to_buf(
2475 line: &[u8],
2476 delim: u8,
2477 ranges: &[Range],
2478 output_delim: &[u8],
2479 suppress: bool,
2480 max_field: usize,
2481 field_mask: u64,
2482 line_delim: u8,
2483 buf: &mut Vec<u8>,
2484 complement: bool,
2485) {
2486 let len = line.len();
2487
2488 if len == 0 {
2489 if !suppress {
2490 buf.push(line_delim);
2491 }
2492 return;
2493 }
2494
2495 let needed = len + output_delim.len() * 16 + 1;
2498 if buf.capacity() - buf.len() < needed {
2499 buf.reserve(needed);
2500 }
2501
2502 let base = line.as_ptr();
2503 let mut field_num: usize = 1;
2504 let mut field_start: usize = 0;
2505 let mut first_output = true;
2506 let mut has_delim = false;
2507
2508 for delim_pos in memchr_iter(delim, line) {
2510 has_delim = true;
2511
2512 if is_selected(field_num, field_mask, ranges, complement) {
2513 if !first_output {
2514 unsafe { buf_extend(buf, output_delim) };
2515 }
2516 unsafe {
2517 buf_extend(
2518 buf,
2519 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2520 )
2521 };
2522 first_output = false;
2523 }
2524
2525 field_num += 1;
2526 field_start = delim_pos + 1;
2527
2528 if field_num > max_field {
2529 break;
2530 }
2531 }
2532
2533 if (field_num <= max_field || complement)
2535 && has_delim
2536 && is_selected(field_num, field_mask, ranges, complement)
2537 {
2538 if !first_output {
2539 unsafe { buf_extend(buf, output_delim) };
2540 }
2541 unsafe {
2542 buf_extend(
2543 buf,
2544 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2545 )
2546 };
2547 first_output = false;
2548 }
2549
2550 if !first_output {
2551 unsafe { buf_push(buf, line_delim) };
2552 } else if !has_delim {
2553 if !suppress {
2554 unsafe {
2555 buf_extend(buf, line);
2556 buf_push(buf, line_delim);
2557 }
2558 }
2559 } else {
2560 unsafe { buf_push(buf, line_delim) };
2561 }
2562}
2563
2564fn process_bytes_from_start(
2571 data: &[u8],
2572 max_bytes: usize,
2573 line_delim: u8,
2574 out: &mut impl Write,
2575) -> io::Result<()> {
2576 if data.len() < PARALLEL_THRESHOLD && max_bytes > 0 && max_bytes < usize::MAX {
2584 let mut start = 0;
2585 let mut all_fit = true;
2586 for pos in memchr_iter(line_delim, data) {
2587 if pos - start > max_bytes {
2588 all_fit = false;
2589 break;
2590 }
2591 start = pos + 1;
2592 }
2593 if all_fit && start < data.len() && data.len() - start > max_bytes {
2595 all_fit = false;
2596 }
2597 if all_fit {
2598 if !data.is_empty() && data[data.len() - 1] == line_delim {
2600 return out.write_all(data);
2601 } else if !data.is_empty() {
2602 out.write_all(data)?;
2603 return out.write_all(&[line_delim]);
2604 }
2605 return Ok(());
2606 }
2607 }
2608
2609 if data.len() >= PARALLEL_THRESHOLD {
2610 let chunks = split_for_scope(data, line_delim);
2611 let n = chunks.len();
2612 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2613 rayon::scope(|s| {
2614 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2615 s.spawn(move |_| {
2616 result.reserve(chunk.len());
2619 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2620 });
2621 }
2622 });
2623 let slices: Vec<IoSlice> = results
2625 .iter()
2626 .filter(|r| !r.is_empty())
2627 .map(|r| IoSlice::new(r))
2628 .collect();
2629 write_ioslices(out, &slices)?;
2630 } else {
2631 if max_bytes <= 512 {
2637 let est_out = (data.len() / 4).max(max_bytes + 2);
2640 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2641 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2642 if !buf.is_empty() {
2643 out.write_all(&buf)?;
2644 }
2645 } else {
2646 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2650 }
2651 }
2652 Ok(())
2653}
2654
2655#[inline]
2660fn bytes_from_start_zerocopy(
2661 data: &[u8],
2662 max_bytes: usize,
2663 line_delim: u8,
2664 out: &mut impl Write,
2665) -> io::Result<()> {
2666 let newline_buf: [u8; 1] = [line_delim];
2667 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2668 let mut start = 0;
2669 let mut run_start: usize = 0;
2670
2671 for pos in memchr_iter(line_delim, data) {
2672 let line_len = pos - start;
2673 if line_len > max_bytes {
2674 if run_start < start {
2676 iov.push(IoSlice::new(&data[run_start..start]));
2677 }
2678 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2679 iov.push(IoSlice::new(&newline_buf));
2680 run_start = pos + 1;
2681
2682 if iov.len() >= MAX_IOV - 2 {
2683 write_ioslices(out, &iov)?;
2684 iov.clear();
2685 }
2686 }
2687 start = pos + 1;
2688 }
2689 if start < data.len() {
2691 let line_len = data.len() - start;
2692 if line_len > max_bytes {
2693 if run_start < start {
2694 iov.push(IoSlice::new(&data[run_start..start]));
2695 }
2696 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2697 iov.push(IoSlice::new(&newline_buf));
2698 if !iov.is_empty() {
2699 write_ioslices(out, &iov)?;
2700 }
2701 return Ok(());
2702 }
2703 }
2704 if run_start < data.len() {
2706 iov.push(IoSlice::new(&data[run_start..]));
2707 if !data.is_empty() && *data.last().unwrap() != line_delim {
2708 iov.push(IoSlice::new(&newline_buf));
2709 }
2710 }
2711 if !iov.is_empty() {
2712 write_ioslices(out, &iov)?;
2713 }
2714 Ok(())
2715}
2716
2717#[inline]
2722fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2723 buf.reserve(data.len());
2726
2727 let src = data.as_ptr();
2728 let dst_base = buf.as_mut_ptr();
2729 let mut wp = buf.len();
2730 let mut start = 0;
2731
2732 for pos in memchr_iter(line_delim, data) {
2733 let line_len = pos - start;
2734 let take = line_len.min(max_bytes);
2735 unsafe {
2736 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2737 *dst_base.add(wp + take) = line_delim;
2738 }
2739 wp += take + 1;
2740 start = pos + 1;
2741 }
2742 if start < data.len() {
2744 let line_len = data.len() - start;
2745 let take = line_len.min(max_bytes);
2746 unsafe {
2747 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2748 *dst_base.add(wp + take) = line_delim;
2749 }
2750 wp += take + 1;
2751 }
2752 unsafe { buf.set_len(wp) };
2753}
2754
2755fn process_bytes_from_offset(
2757 data: &[u8],
2758 skip_bytes: usize,
2759 line_delim: u8,
2760 out: &mut impl Write,
2761) -> io::Result<()> {
2762 if data.len() >= PARALLEL_THRESHOLD {
2763 let chunks = split_for_scope(data, line_delim);
2764 let n = chunks.len();
2765 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2766 rayon::scope(|s| {
2767 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2768 s.spawn(move |_| {
2769 result.reserve(chunk.len());
2770 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2771 });
2772 }
2773 });
2774 let slices: Vec<IoSlice> = results
2776 .iter()
2777 .filter(|r| !r.is_empty())
2778 .map(|r| IoSlice::new(r))
2779 .collect();
2780 write_ioslices(out, &slices)?;
2781 } else {
2782 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2784 }
2785 Ok(())
2786}
2787
2788#[inline]
2792fn bytes_from_offset_zerocopy(
2793 data: &[u8],
2794 skip_bytes: usize,
2795 line_delim: u8,
2796 out: &mut impl Write,
2797) -> io::Result<()> {
2798 let delim_buf = [line_delim];
2799 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2800
2801 let mut start = 0;
2802 for pos in memchr_iter(line_delim, data) {
2803 let line_len = pos - start;
2804 if line_len > skip_bytes {
2805 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2806 }
2807 iov.push(IoSlice::new(&delim_buf));
2808 if iov.len() >= MAX_IOV - 1 {
2810 write_ioslices(out, &iov)?;
2811 iov.clear();
2812 }
2813 start = pos + 1;
2814 }
2815 if start < data.len() {
2816 let line_len = data.len() - start;
2817 if line_len > skip_bytes {
2818 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2819 }
2820 iov.push(IoSlice::new(&delim_buf));
2821 }
2822 if !iov.is_empty() {
2823 write_ioslices(out, &iov)?;
2824 }
2825 Ok(())
2826}
2827
2828#[inline]
2831fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2832 buf.reserve(data.len());
2833
2834 let src = data.as_ptr();
2835 let dst_base = buf.as_mut_ptr();
2836 let mut wp = buf.len();
2837 let mut start = 0;
2838
2839 for pos in memchr_iter(line_delim, data) {
2840 let line_len = pos - start;
2841 if line_len > skip_bytes {
2842 let take = line_len - skip_bytes;
2843 unsafe {
2844 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2845 }
2846 wp += take;
2847 }
2848 unsafe {
2849 *dst_base.add(wp) = line_delim;
2850 }
2851 wp += 1;
2852 start = pos + 1;
2853 }
2854 if start < data.len() {
2855 let line_len = data.len() - start;
2856 if line_len > skip_bytes {
2857 let take = line_len - skip_bytes;
2858 unsafe {
2859 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2860 }
2861 wp += take;
2862 }
2863 unsafe {
2864 *dst_base.add(wp) = line_delim;
2865 }
2866 wp += 1;
2867 }
2868 unsafe { buf.set_len(wp) };
2869}
2870
2871fn process_bytes_mid_range(
2873 data: &[u8],
2874 start_byte: usize,
2875 end_byte: usize,
2876 line_delim: u8,
2877 out: &mut impl Write,
2878) -> io::Result<()> {
2879 let skip = start_byte.saturating_sub(1);
2880
2881 if data.len() >= PARALLEL_THRESHOLD {
2882 let chunks = split_for_scope(data, line_delim);
2883 let n = chunks.len();
2884 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2885 rayon::scope(|s| {
2886 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2887 s.spawn(move |_| {
2888 result.reserve(chunk.len());
2889 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2890 });
2891 }
2892 });
2893 let slices: Vec<IoSlice> = results
2894 .iter()
2895 .filter(|r| !r.is_empty())
2896 .map(|r| IoSlice::new(r))
2897 .collect();
2898 write_ioslices(out, &slices)?;
2899 } else {
2900 let mut buf = Vec::with_capacity(data.len());
2901 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2902 if !buf.is_empty() {
2903 out.write_all(&buf)?;
2904 }
2905 }
2906 Ok(())
2907}
2908
2909#[inline]
2913fn bytes_mid_range_chunk(
2914 data: &[u8],
2915 skip: usize,
2916 end_byte: usize,
2917 line_delim: u8,
2918 buf: &mut Vec<u8>,
2919) {
2920 buf.reserve(data.len());
2921
2922 let src = data.as_ptr();
2923 let dst_base = buf.as_mut_ptr();
2924 let mut wp = buf.len();
2925 let mut start = 0;
2926
2927 for pos in memchr_iter(line_delim, data) {
2928 let line_len = pos - start;
2929 if line_len > skip {
2930 let take_end = line_len.min(end_byte);
2931 let take = take_end - skip;
2932 unsafe {
2933 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2934 }
2935 wp += take;
2936 }
2937 unsafe {
2938 *dst_base.add(wp) = line_delim;
2939 }
2940 wp += 1;
2941 start = pos + 1;
2942 }
2943 if start < data.len() {
2944 let line_len = data.len() - start;
2945 if line_len > skip {
2946 let take_end = line_len.min(end_byte);
2947 let take = take_end - skip;
2948 unsafe {
2949 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2950 }
2951 wp += take;
2952 }
2953 unsafe {
2954 *dst_base.add(wp) = line_delim;
2955 }
2956 wp += 1;
2957 }
2958 unsafe { buf.set_len(wp) };
2959}
2960
2961fn process_bytes_complement_mid(
2963 data: &[u8],
2964 skip_start: usize,
2965 skip_end: usize,
2966 line_delim: u8,
2967 out: &mut impl Write,
2968) -> io::Result<()> {
2969 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2971 let chunks = split_for_scope(data, line_delim);
2972 let n = chunks.len();
2973 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2974 rayon::scope(|s| {
2975 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2976 s.spawn(move |_| {
2977 result.reserve(chunk.len());
2978 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
2979 });
2980 }
2981 });
2982 let slices: Vec<IoSlice> = results
2983 .iter()
2984 .filter(|r| !r.is_empty())
2985 .map(|r| IoSlice::new(r))
2986 .collect();
2987 write_ioslices(out, &slices)?;
2988 } else {
2989 let mut buf = Vec::with_capacity(data.len());
2990 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
2991 if !buf.is_empty() {
2992 out.write_all(&buf)?;
2993 }
2994 }
2995 Ok(())
2996}
2997
2998#[inline]
3001fn bytes_complement_mid_chunk(
3002 data: &[u8],
3003 prefix_bytes: usize,
3004 skip_end: usize,
3005 line_delim: u8,
3006 buf: &mut Vec<u8>,
3007) {
3008 buf.reserve(data.len());
3009
3010 let src = data.as_ptr();
3011 let dst_base = buf.as_mut_ptr();
3012 let mut wp = buf.len();
3013 let mut start = 0;
3014
3015 for pos in memchr_iter(line_delim, data) {
3016 let line_len = pos - start;
3017 let take_prefix = prefix_bytes.min(line_len);
3019 if take_prefix > 0 {
3020 unsafe {
3021 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3022 }
3023 wp += take_prefix;
3024 }
3025 if line_len > skip_end {
3027 let suffix_len = line_len - skip_end;
3028 unsafe {
3029 std::ptr::copy_nonoverlapping(
3030 src.add(start + skip_end),
3031 dst_base.add(wp),
3032 suffix_len,
3033 );
3034 }
3035 wp += suffix_len;
3036 }
3037 unsafe {
3038 *dst_base.add(wp) = line_delim;
3039 }
3040 wp += 1;
3041 start = pos + 1;
3042 }
3043 if start < data.len() {
3044 let line_len = data.len() - start;
3045 let take_prefix = prefix_bytes.min(line_len);
3046 if take_prefix > 0 {
3047 unsafe {
3048 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3049 }
3050 wp += take_prefix;
3051 }
3052 if line_len > skip_end {
3053 let suffix_len = line_len - skip_end;
3054 unsafe {
3055 std::ptr::copy_nonoverlapping(
3056 src.add(start + skip_end),
3057 dst_base.add(wp),
3058 suffix_len,
3059 );
3060 }
3061 wp += suffix_len;
3062 }
3063 unsafe {
3064 *dst_base.add(wp) = line_delim;
3065 }
3066 wp += 1;
3067 }
3068 unsafe { buf.set_len(wp) };
3069}
3070
3071fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3073 let line_delim = cfg.line_delim;
3074 let ranges = cfg.ranges;
3075 let complement = cfg.complement;
3076 let output_delim = cfg.output_delim;
3077
3078 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
3080 let max_bytes = ranges[0].end;
3081 if max_bytes < usize::MAX {
3082 return process_bytes_from_start(data, max_bytes, line_delim, out);
3083 }
3084 }
3085
3086 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
3088 let skip_bytes = ranges[0].start.saturating_sub(1);
3089 if skip_bytes > 0 {
3090 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
3091 }
3092 }
3093
3094 if !complement
3096 && ranges.len() == 1
3097 && ranges[0].start > 1
3098 && ranges[0].end < usize::MAX
3099 && output_delim.is_empty()
3100 {
3101 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
3102 }
3103
3104 if complement
3106 && ranges.len() == 1
3107 && ranges[0].start == 1
3108 && ranges[0].end < usize::MAX
3109 && output_delim.is_empty()
3110 {
3111 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
3112 }
3113
3114 if complement
3116 && ranges.len() == 1
3117 && ranges[0].end == usize::MAX
3118 && ranges[0].start > 1
3119 && output_delim.is_empty()
3120 {
3121 let max_bytes = ranges[0].start - 1;
3122 return process_bytes_from_start(data, max_bytes, line_delim, out);
3123 }
3124
3125 if complement
3127 && ranges.len() == 1
3128 && ranges[0].start > 1
3129 && ranges[0].end < usize::MAX
3130 && output_delim.is_empty()
3131 {
3132 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
3133 }
3134
3135 if data.len() >= PARALLEL_THRESHOLD {
3136 let chunks = split_for_scope(data, line_delim);
3137 let n = chunks.len();
3138 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3139 rayon::scope(|s| {
3140 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3141 s.spawn(move |_| {
3142 result.reserve(chunk.len());
3143 process_bytes_chunk(
3144 chunk,
3145 ranges,
3146 complement,
3147 output_delim,
3148 line_delim,
3149 result,
3150 );
3151 });
3152 }
3153 });
3154 let slices: Vec<IoSlice> = results
3155 .iter()
3156 .filter(|r| !r.is_empty())
3157 .map(|r| IoSlice::new(r))
3158 .collect();
3159 write_ioslices(out, &slices)?;
3160 } else {
3161 let mut buf = Vec::with_capacity(data.len());
3162 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
3163 if !buf.is_empty() {
3164 out.write_all(&buf)?;
3165 }
3166 }
3167 Ok(())
3168}
3169
3170fn process_bytes_chunk(
3175 data: &[u8],
3176 ranges: &[Range],
3177 complement: bool,
3178 output_delim: &[u8],
3179 line_delim: u8,
3180 buf: &mut Vec<u8>,
3181) {
3182 buf.reserve(data.len());
3183 let base = data.as_ptr();
3184 let mut start = 0;
3185 for end_pos in memchr_iter(line_delim, data) {
3186 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
3187 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3188 unsafe { buf_push(buf, line_delim) };
3189 start = end_pos + 1;
3190 }
3191 if start < data.len() {
3192 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
3193 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3194 unsafe { buf_push(buf, line_delim) };
3195 }
3196}
3197
3198#[inline(always)]
3202fn cut_bytes_to_buf(
3203 line: &[u8],
3204 ranges: &[Range],
3205 complement: bool,
3206 output_delim: &[u8],
3207 buf: &mut Vec<u8>,
3208) {
3209 let len = line.len();
3210 let base = line.as_ptr();
3211 let mut first_range = true;
3212
3213 let needed = len + output_delim.len() * ranges.len() + 1;
3215 if buf.capacity() - buf.len() < needed {
3216 buf.reserve(needed);
3217 }
3218
3219 if complement {
3220 let mut pos: usize = 1;
3221 for r in ranges {
3222 let rs = r.start;
3223 let re = r.end.min(len);
3224 if pos < rs {
3225 if !first_range && !output_delim.is_empty() {
3226 unsafe { buf_extend(buf, output_delim) };
3227 }
3228 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
3229 first_range = false;
3230 }
3231 pos = re + 1;
3232 if pos > len {
3233 break;
3234 }
3235 }
3236 if pos <= len {
3237 if !first_range && !output_delim.is_empty() {
3238 unsafe { buf_extend(buf, output_delim) };
3239 }
3240 unsafe {
3241 buf_extend(
3242 buf,
3243 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3244 )
3245 };
3246 }
3247 } else if output_delim.is_empty() && ranges.len() == 1 {
3248 let start = ranges[0].start.saturating_sub(1);
3250 let end = ranges[0].end.min(len);
3251 if start < len {
3252 unsafe {
3253 buf_extend(
3254 buf,
3255 std::slice::from_raw_parts(base.add(start), end - start),
3256 )
3257 };
3258 }
3259 } else {
3260 for r in ranges {
3261 let start = r.start.saturating_sub(1);
3262 let end = r.end.min(len);
3263 if start >= len {
3264 break;
3265 }
3266 if !first_range && !output_delim.is_empty() {
3267 unsafe { buf_extend(buf, output_delim) };
3268 }
3269 unsafe {
3270 buf_extend(
3271 buf,
3272 std::slice::from_raw_parts(base.add(start), end - start),
3273 )
3274 };
3275 first_range = false;
3276 }
3277 }
3278}
3279
3280#[inline]
3284pub fn cut_fields(
3285 line: &[u8],
3286 delim: u8,
3287 ranges: &[Range],
3288 complement: bool,
3289 output_delim: &[u8],
3290 suppress_no_delim: bool,
3291 out: &mut impl Write,
3292) -> io::Result<bool> {
3293 if memchr::memchr(delim, line).is_none() {
3294 if !suppress_no_delim {
3295 out.write_all(line)?;
3296 return Ok(true);
3297 }
3298 return Ok(false);
3299 }
3300
3301 let mut field_num: usize = 1;
3302 let mut field_start: usize = 0;
3303 let mut first_output = true;
3304
3305 for delim_pos in memchr_iter(delim, line) {
3306 let selected = in_ranges(ranges, field_num) != complement;
3307 if selected {
3308 if !first_output {
3309 out.write_all(output_delim)?;
3310 }
3311 out.write_all(&line[field_start..delim_pos])?;
3312 first_output = false;
3313 }
3314 field_start = delim_pos + 1;
3315 field_num += 1;
3316 }
3317
3318 let selected = in_ranges(ranges, field_num) != complement;
3319 if selected {
3320 if !first_output {
3321 out.write_all(output_delim)?;
3322 }
3323 out.write_all(&line[field_start..])?;
3324 }
3325
3326 Ok(true)
3327}
3328
3329#[inline]
3331pub fn cut_bytes(
3332 line: &[u8],
3333 ranges: &[Range],
3334 complement: bool,
3335 output_delim: &[u8],
3336 out: &mut impl Write,
3337) -> io::Result<bool> {
3338 let mut first_range = true;
3339
3340 if complement {
3341 let len = line.len();
3342 let mut comp_ranges = Vec::new();
3343 let mut pos: usize = 1;
3344 for r in ranges {
3345 let rs = r.start;
3346 let re = r.end.min(len);
3347 if pos < rs {
3348 comp_ranges.push((pos, rs - 1));
3349 }
3350 pos = re + 1;
3351 if pos > len {
3352 break;
3353 }
3354 }
3355 if pos <= len {
3356 comp_ranges.push((pos, len));
3357 }
3358 for &(s, e) in &comp_ranges {
3359 if !first_range && !output_delim.is_empty() {
3360 out.write_all(output_delim)?;
3361 }
3362 out.write_all(&line[s - 1..e])?;
3363 first_range = false;
3364 }
3365 } else {
3366 for r in ranges {
3367 let start = r.start.saturating_sub(1);
3368 let end = r.end.min(line.len());
3369 if start >= line.len() {
3370 break;
3371 }
3372 if !first_range && !output_delim.is_empty() {
3373 out.write_all(output_delim)?;
3374 }
3375 out.write_all(&line[start..end])?;
3376 first_range = false;
3377 }
3378 }
3379 Ok(true)
3380}
3381
3382pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3390 let len = data.len();
3391 let mut wp: usize = 0;
3392 let mut rp: usize = 0;
3393
3394 while rp < len {
3395 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3396 None => {
3397 if suppress {
3399 break;
3401 }
3402 let remaining = len - rp;
3403 if wp != rp {
3404 data.copy_within(rp..len, wp);
3405 }
3406 wp += remaining;
3407 break;
3408 }
3409 Some(offset) => {
3410 let actual = rp + offset;
3411 if data[actual] == line_delim {
3412 if suppress {
3414 rp = actual + 1;
3416 } else {
3417 let chunk_len = actual + 1 - rp;
3419 if wp != rp {
3420 data.copy_within(rp..actual + 1, wp);
3421 }
3422 wp += chunk_len;
3423 rp = actual + 1;
3424 }
3425 } else {
3426 let field_len = actual - rp;
3428 if wp != rp && field_len > 0 {
3429 data.copy_within(rp..actual, wp);
3430 }
3431 wp += field_len;
3432 data[wp] = line_delim;
3433 wp += 1;
3434 match memchr::memchr(line_delim, &data[actual + 1..]) {
3436 None => {
3437 rp = len;
3438 }
3439 Some(nl_off) => {
3440 rp = actual + 1 + nl_off + 1;
3441 }
3442 }
3443 }
3444 }
3445 }
3446 }
3447 wp
3448}
3449
3450pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3452 match cfg.mode {
3453 CutMode::Fields => process_fields_fast(data, cfg, out),
3454 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3455 }
3456}
3457
3458pub fn process_cut_reader<R: BufRead>(
3463 mut reader: R,
3464 cfg: &CutConfig,
3465 out: &mut impl Write,
3466) -> io::Result<()> {
3467 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3469
3470 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3473
3474 loop {
3475 buf.reserve(CHUNK_SIZE);
3477 let read_start = buf.len();
3478 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3479 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3480 buf.truncate(read_start + n);
3481
3482 if buf.is_empty() {
3483 break;
3484 }
3485
3486 if n == 0 {
3487 process_cut_data(&buf, cfg, out)?;
3489 break;
3490 }
3491
3492 let process_end = match memchr::memrchr(line_delim, &buf) {
3494 Some(pos) => pos + 1,
3495 None => {
3496 continue;
3498 }
3499 };
3500
3501 process_cut_data(&buf[..process_end], cfg, out)?;
3503
3504 let leftover_len = buf.len() - process_end;
3506 if leftover_len > 0 {
3507 buf.copy_within(process_end.., 0);
3508 }
3509 buf.truncate(leftover_len);
3510 }
3511
3512 Ok(())
3513}
3514
3515#[inline]
3517fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3518 let n = reader.read(buf)?;
3519 if n == buf.len() || n == 0 {
3520 return Ok(n);
3521 }
3522 let mut total = n;
3524 while total < buf.len() {
3525 match reader.read(&mut buf[total..]) {
3526 Ok(0) => break,
3527 Ok(n) => total += n,
3528 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3529 Err(e) => return Err(e),
3530 }
3531 }
3532 Ok(total)
3533}
3534
3535pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3543 if cfg.complement {
3544 return None;
3545 }
3546
3547 match cfg.mode {
3548 CutMode::Fields => {
3549 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3551 return None;
3552 }
3553 if cfg.delim == cfg.line_delim {
3554 return None;
3555 }
3556 Some(cut_fields_inplace_general(
3557 data,
3558 cfg.delim,
3559 cfg.line_delim,
3560 cfg.ranges,
3561 cfg.suppress_no_delim,
3562 ))
3563 }
3564 CutMode::Bytes | CutMode::Characters => {
3565 if !cfg.output_delim.is_empty() {
3566 return None;
3567 }
3568 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3569 }
3570 }
3571}
3572
3573fn cut_fields_inplace_general(
3576 data: &mut [u8],
3577 delim: u8,
3578 line_delim: u8,
3579 ranges: &[Range],
3580 suppress: bool,
3581) -> usize {
3582 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3584 return cut_field1_inplace(data, delim, line_delim, suppress);
3585 }
3586
3587 let len = data.len();
3588 if len == 0 {
3589 return 0;
3590 }
3591
3592 let max_field = ranges.last().map_or(0, |r| r.end);
3593 let max_delims = max_field.min(64);
3594 let mut wp: usize = 0;
3595 let mut rp: usize = 0;
3596
3597 while rp < len {
3598 let line_end = memchr::memchr(line_delim, &data[rp..])
3599 .map(|p| rp + p)
3600 .unwrap_or(len);
3601 let line_len = line_end - rp;
3602
3603 let mut delim_pos = [0usize; 64];
3605 let mut num_delims: usize = 0;
3606
3607 for pos in memchr_iter(delim, &data[rp..line_end]) {
3608 if num_delims < max_delims {
3609 delim_pos[num_delims] = pos;
3610 num_delims += 1;
3611 if num_delims >= max_delims {
3612 break;
3613 }
3614 }
3615 }
3616
3617 if num_delims == 0 {
3618 if !suppress {
3620 if wp != rp {
3621 data.copy_within(rp..line_end, wp);
3622 }
3623 wp += line_len;
3624 if line_end < len {
3625 data[wp] = line_delim;
3626 wp += 1;
3627 }
3628 }
3629 } else {
3630 let total_fields = num_delims + 1;
3631 let mut first_output = true;
3632
3633 for r in ranges {
3634 let range_start = r.start;
3635 let range_end = r.end.min(total_fields);
3636 if range_start > total_fields {
3637 break;
3638 }
3639 for field_num in range_start..=range_end {
3640 if field_num > total_fields {
3641 break;
3642 }
3643
3644 let field_start = if field_num == 1 {
3645 0
3646 } else if field_num - 2 < num_delims {
3647 delim_pos[field_num - 2] + 1
3648 } else {
3649 continue;
3650 };
3651 let field_end = if field_num <= num_delims {
3652 delim_pos[field_num - 1]
3653 } else {
3654 line_len
3655 };
3656
3657 if !first_output {
3658 data[wp] = delim;
3659 wp += 1;
3660 }
3661 let flen = field_end - field_start;
3662 if flen > 0 {
3663 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3664 wp += flen;
3665 }
3666 first_output = false;
3667 }
3668 }
3669
3670 if !first_output && line_end < len {
3671 data[wp] = line_delim;
3672 wp += 1;
3673 } else if first_output && line_end < len {
3674 data[wp] = line_delim;
3676 wp += 1;
3677 }
3678 }
3679
3680 rp = if line_end < len { line_end + 1 } else { len };
3681 }
3682
3683 wp
3684}
3685
3686fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3688 let len = data.len();
3689 if len == 0 {
3690 return 0;
3691 }
3692
3693 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3695 return len;
3696 }
3697
3698 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3700 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3701 }
3702
3703 let mut wp: usize = 0;
3704 let mut rp: usize = 0;
3705
3706 while rp < len {
3707 let line_end = memchr::memchr(line_delim, &data[rp..])
3708 .map(|p| rp + p)
3709 .unwrap_or(len);
3710 let line_len = line_end - rp;
3711
3712 for r in ranges {
3713 let start = r.start.saturating_sub(1);
3714 let end = r.end.min(line_len);
3715 if start >= line_len {
3716 break;
3717 }
3718 let flen = end - start;
3719 if flen > 0 {
3720 data.copy_within(rp + start..rp + start + flen, wp);
3721 wp += flen;
3722 }
3723 }
3724
3725 if line_end < len {
3726 data[wp] = line_delim;
3727 wp += 1;
3728 }
3729
3730 rp = if line_end < len { line_end + 1 } else { len };
3731 }
3732
3733 wp
3734}
3735
3736fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3738 let len = data.len();
3739
3740 let mut all_fit = true;
3742 let mut start = 0;
3743 for pos in memchr_iter(line_delim, data) {
3744 if pos - start > max_bytes {
3745 all_fit = false;
3746 break;
3747 }
3748 start = pos + 1;
3749 }
3750 if all_fit && start < len && len - start > max_bytes {
3751 all_fit = false;
3752 }
3753 if all_fit {
3754 return len;
3755 }
3756
3757 let mut wp: usize = 0;
3759 let mut rp: usize = 0;
3760
3761 while rp < len {
3762 let line_end = memchr::memchr(line_delim, &data[rp..])
3763 .map(|p| rp + p)
3764 .unwrap_or(len);
3765 let line_len = line_end - rp;
3766
3767 let take = line_len.min(max_bytes);
3768 if take > 0 && wp != rp {
3769 data.copy_within(rp..rp + take, wp);
3770 }
3771 wp += take;
3772
3773 if line_end < len {
3774 data[wp] = line_delim;
3775 wp += 1;
3776 }
3777
3778 rp = if line_end < len { line_end + 1 } else { len };
3779 }
3780
3781 wp
3782}
3783
3784#[derive(Debug, Clone, Copy, PartialEq)]
3786pub enum CutMode {
3787 Bytes,
3788 Characters,
3789 Fields,
3790}