1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 16 * 1024 * 1024;
8
9const MAX_IOV: usize = 1024;
11
12pub struct CutConfig<'a> {
14 pub mode: CutMode,
15 pub ranges: &'a [Range],
16 pub complement: bool,
17 pub delim: u8,
18 pub output_delim: &'a [u8],
19 pub suppress_no_delim: bool,
20 pub line_delim: u8,
21}
22
23#[derive(Debug, Clone)]
25pub struct Range {
26 pub start: usize, pub end: usize, }
29
30pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
33 let mut ranges = Vec::new();
34
35 for part in spec.split(',') {
36 let part = part.trim();
37 if part.is_empty() {
38 continue;
39 }
40
41 if let Some(idx) = part.find('-') {
42 let left = &part[..idx];
43 let right = &part[idx + 1..];
44
45 let start = if left.is_empty() {
46 1
47 } else {
48 left.parse::<usize>()
49 .map_err(|_| format!("invalid range: '{}'", part))?
50 };
51
52 let end = if right.is_empty() {
53 usize::MAX
54 } else {
55 right
56 .parse::<usize>()
57 .map_err(|_| format!("invalid range: '{}'", part))?
58 };
59
60 if start == 0 {
61 return Err("fields and positions are numbered from 1".to_string());
62 }
63 if start > end {
64 return Err(format!("invalid decreasing range: '{}'", part));
65 }
66
67 ranges.push(Range { start, end });
68 } else {
69 let n = part
70 .parse::<usize>()
71 .map_err(|_| format!("invalid field: '{}'", part))?;
72 if n == 0 {
73 return Err("fields and positions are numbered from 1".to_string());
74 }
75 ranges.push(Range { start: n, end: n });
76 }
77 }
78
79 if ranges.is_empty() {
80 return Err("you must specify a list of bytes, characters, or fields".to_string());
81 }
82
83 ranges.sort_by_key(|r| (r.start, r.end));
85 let mut merged = vec![ranges[0].clone()];
86 for r in &ranges[1..] {
87 let last = merged.last_mut().unwrap();
88 if r.start <= last.end.saturating_add(1) {
89 last.end = last.end.max(r.end);
90 } else {
91 merged.push(r.clone());
92 }
93 }
94
95 Ok(merged)
96}
97
98#[inline(always)]
101fn in_ranges(ranges: &[Range], pos: usize) -> bool {
102 for r in ranges {
103 if pos < r.start {
104 return false;
105 }
106 if pos <= r.end {
107 return true;
108 }
109 }
110 false
111}
112
113#[inline]
116fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
117 let mut mask: u64 = 0;
118 for i in 1..=64u32 {
119 let in_range = in_ranges(ranges, i as usize);
120 if in_range != complement {
121 mask |= 1u64 << (i - 1);
122 }
123 }
124 mask
125}
126
127#[inline(always)]
129fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
130 if field_num <= 64 {
131 (mask >> (field_num - 1)) & 1 == 1
132 } else {
133 in_ranges(ranges, field_num) != complement
134 }
135}
136
137#[inline(always)]
142unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
143 unsafe {
144 let len = buf.len();
145 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
146 buf.set_len(len + data.len());
147 }
148}
149
150#[inline(always)]
153unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
154 unsafe {
155 let len = buf.len();
156 *buf.as_mut_ptr().add(len) = b;
157 buf.set_len(len + 1);
158 }
159}
160
161#[inline(always)]
166unsafe fn buf_extend_byte(buf: &mut Vec<u8>, data: &[u8], b: u8) {
167 unsafe {
168 let len = buf.len();
169 let ptr = buf.as_mut_ptr().add(len);
170 std::ptr::copy_nonoverlapping(data.as_ptr(), ptr, data.len());
171 *ptr.add(data.len()) = b;
172 buf.set_len(len + data.len() + 1);
173 }
174}
175
176#[inline]
180fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
181 if slices.is_empty() {
182 return Ok(());
183 }
184 for batch in slices.chunks(MAX_IOV) {
185 let total: usize = batch.iter().map(|s| s.len()).sum();
186 let written = out.write_vectored(batch)?;
187 if written >= total {
188 continue;
189 }
190 if written == 0 {
191 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
192 }
193 write_ioslices_slow(out, batch, written)?;
194 }
195 Ok(())
196}
197
198#[cold]
200#[inline(never)]
201fn write_ioslices_slow(
202 out: &mut impl Write,
203 slices: &[IoSlice],
204 mut skip: usize,
205) -> io::Result<()> {
206 for slice in slices {
207 let len = slice.len();
208 if skip >= len {
209 skip -= len;
210 continue;
211 }
212 out.write_all(&slice[skip..])?;
213 skip = 0;
214 }
215 Ok(())
216}
217
218#[inline]
224fn num_cpus() -> usize {
225 std::thread::available_parallelism()
226 .map(|n| n.get())
227 .unwrap_or(1)
228}
229
230fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
233 let num_threads = num_cpus().max(1);
234 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
235 return vec![data];
236 }
237
238 let chunk_size = data.len() / num_threads;
239 let mut chunks = Vec::with_capacity(num_threads);
240 let mut pos = 0;
241
242 for _ in 0..num_threads - 1 {
243 let target = pos + chunk_size;
244 if target >= data.len() {
245 break;
246 }
247 let boundary = memchr::memchr(line_delim, &data[target..])
248 .map(|p| target + p + 1)
249 .unwrap_or(data.len());
250 if boundary > pos {
251 chunks.push(&data[pos..boundary]);
252 }
253 pos = boundary;
254 }
255
256 if pos < data.len() {
257 chunks.push(&data[pos..]);
258 }
259
260 chunks
261}
262
263fn process_fields_multi_select(
270 data: &[u8],
271 delim: u8,
272 line_delim: u8,
273 ranges: &[Range],
274 suppress: bool,
275 out: &mut impl Write,
276) -> io::Result<()> {
277 let max_field = ranges.last().map_or(0, |r| r.end);
278
279 if data.len() >= PARALLEL_THRESHOLD {
280 let chunks = split_for_scope(data, line_delim);
281 let n = chunks.len();
282 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
283 rayon::scope(|s| {
284 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
285 s.spawn(move |_| {
286 result.reserve(chunk.len() * 3 / 4);
287 multi_select_chunk(
288 chunk, delim, line_delim, ranges, max_field, suppress, result,
289 );
290 });
291 }
292 });
293 let slices: Vec<IoSlice> = results
294 .iter()
295 .filter(|r| !r.is_empty())
296 .map(|r| IoSlice::new(r))
297 .collect();
298 write_ioslices(out, &slices)?;
299 } else {
300 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
301 multi_select_chunk(
302 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
303 );
304 if !buf.is_empty() {
305 out.write_all(&buf)?;
306 }
307 }
308 Ok(())
309}
310
311fn multi_select_chunk(
317 data: &[u8],
318 delim: u8,
319 line_delim: u8,
320 ranges: &[Range],
321 max_field: usize,
322 suppress: bool,
323 buf: &mut Vec<u8>,
324) {
325 if delim == line_delim {
327 buf.reserve(data.len());
328 let base = data.as_ptr();
329 let mut start = 0;
330 for end_pos in memchr_iter(line_delim, data) {
331 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
332 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
333 start = end_pos + 1;
334 }
335 if start < data.len() {
336 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
337 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
338 }
339 return;
340 }
341
342 buf.reserve(data.len());
343 let base = data.as_ptr();
344 let data_len = data.len();
345
346 let mut line_start: usize = 0;
348 let mut delim_pos = [0usize; 64];
349 let mut num_delims: usize = 0;
350 let max_delims = max_field.min(64);
351 let mut at_max = false;
352
353 for pos in memchr::memchr2_iter(delim, line_delim, data) {
355 let byte = unsafe { *base.add(pos) };
356
357 if byte == line_delim {
358 let line_len = pos - line_start;
360 if num_delims == 0 {
361 if !suppress {
363 unsafe {
364 buf_extend(
365 buf,
366 std::slice::from_raw_parts(base.add(line_start), line_len),
367 );
368 buf_push(buf, line_delim);
369 }
370 }
371 } else {
372 let total_fields = num_delims + 1;
374 let mut first_output = true;
375
376 for r in ranges {
377 let range_start = r.start;
378 let range_end = r.end.min(total_fields);
379 if range_start > total_fields {
380 break;
381 }
382 for field_num in range_start..=range_end {
383 if field_num > total_fields {
384 break;
385 }
386
387 let field_start = if field_num == 1 {
388 line_start
389 } else if field_num - 2 < num_delims {
390 delim_pos[field_num - 2] + 1
391 } else {
392 continue;
393 };
394 let field_end = if field_num <= num_delims {
395 delim_pos[field_num - 1]
396 } else {
397 pos
398 };
399
400 if !first_output {
401 unsafe { buf_push(buf, delim) };
402 }
403 unsafe {
404 buf_extend(
405 buf,
406 std::slice::from_raw_parts(
407 base.add(field_start),
408 field_end - field_start,
409 ),
410 );
411 }
412 first_output = false;
413 }
414 }
415
416 unsafe { buf_push(buf, line_delim) };
417 }
418
419 line_start = pos + 1;
421 num_delims = 0;
422 at_max = false;
423 } else {
424 if !at_max && num_delims < max_delims {
426 delim_pos[num_delims] = pos;
427 num_delims += 1;
428 if num_delims >= max_delims {
429 at_max = true;
430 }
431 }
432 }
433 }
434
435 if line_start < data_len {
437 if num_delims == 0 {
438 if !suppress {
439 unsafe {
440 buf_extend(
441 buf,
442 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
443 );
444 buf_push(buf, line_delim);
445 }
446 }
447 } else {
448 let total_fields = num_delims + 1;
449 let mut first_output = true;
450
451 for r in ranges {
452 let range_start = r.start;
453 let range_end = r.end.min(total_fields);
454 if range_start > total_fields {
455 break;
456 }
457 for field_num in range_start..=range_end {
458 if field_num > total_fields {
459 break;
460 }
461
462 let field_start = if field_num == 1 {
463 line_start
464 } else if field_num - 2 < num_delims {
465 delim_pos[field_num - 2] + 1
466 } else {
467 continue;
468 };
469 let field_end = if field_num <= num_delims {
470 delim_pos[field_num - 1]
471 } else {
472 data_len
473 };
474
475 if !first_output {
476 unsafe { buf_push(buf, delim) };
477 }
478 unsafe {
479 buf_extend(
480 buf,
481 std::slice::from_raw_parts(
482 base.add(field_start),
483 field_end - field_start,
484 ),
485 );
486 }
487 first_output = false;
488 }
489 }
490
491 unsafe { buf_push(buf, line_delim) };
492 }
493 }
494}
495
496#[inline(always)]
501fn multi_select_line(
502 line: &[u8],
503 delim: u8,
504 line_delim: u8,
505 ranges: &[Range],
506 max_field: usize,
507 suppress: bool,
508 buf: &mut Vec<u8>,
509) {
510 let len = line.len();
511 if len == 0 {
512 if !suppress {
513 unsafe { buf_push(buf, line_delim) };
514 }
515 return;
516 }
517
518 let base = line.as_ptr();
520
521 let mut delim_pos = [0usize; 64];
524 let mut num_delims: usize = 0;
525 let max_delims = max_field.min(64);
526
527 for pos in memchr_iter(delim, line) {
528 if num_delims < max_delims {
529 delim_pos[num_delims] = pos;
530 num_delims += 1;
531 if num_delims >= max_delims {
532 break;
533 }
534 }
535 }
536
537 if num_delims == 0 {
538 if !suppress {
539 unsafe {
540 buf_extend(buf, line);
541 buf_push(buf, line_delim);
542 }
543 }
544 return;
545 }
546
547 let total_fields = num_delims + 1;
551 let mut first_output = true;
552
553 for r in ranges {
554 let range_start = r.start;
555 let range_end = r.end.min(total_fields);
556 if range_start > total_fields {
557 break;
558 }
559 for field_num in range_start..=range_end {
560 if field_num > total_fields {
561 break;
562 }
563
564 let field_start = if field_num == 1 {
565 0
566 } else if field_num - 2 < num_delims {
567 delim_pos[field_num - 2] + 1
568 } else {
569 continue;
570 };
571 let field_end = if field_num <= num_delims {
572 delim_pos[field_num - 1]
573 } else {
574 len
575 };
576
577 if !first_output {
578 unsafe { buf_push(buf, delim) };
579 }
580 unsafe {
581 buf_extend(
582 buf,
583 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
584 );
585 }
586 first_output = false;
587 }
588 }
589
590 unsafe { buf_push(buf, line_delim) };
591}
592
593fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
597 let delim = cfg.delim;
598 let line_delim = cfg.line_delim;
599 let ranges = cfg.ranges;
600 let complement = cfg.complement;
601 let output_delim = cfg.output_delim;
602 let suppress = cfg.suppress_no_delim;
603
604 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
612 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
613 }
614
615 if complement
617 && ranges.len() == 1
618 && output_delim.len() == 1
619 && output_delim[0] == delim
620 && ranges[0].start == ranges[0].end
621 {
622 return process_complement_single_field(
623 data,
624 delim,
625 line_delim,
626 ranges[0].start,
627 suppress,
628 out,
629 );
630 }
631
632 if complement
635 && ranges.len() == 1
636 && ranges[0].start > 1
637 && ranges[0].end < usize::MAX
638 && output_delim.len() == 1
639 && output_delim[0] == delim
640 {
641 return process_complement_range(
642 data,
643 delim,
644 line_delim,
645 ranges[0].start,
646 ranges[0].end,
647 suppress,
648 out,
649 );
650 }
651
652 if !complement
654 && ranges.len() == 1
655 && ranges[0].start == 1
656 && output_delim.len() == 1
657 && output_delim[0] == delim
658 && ranges[0].end < usize::MAX
659 {
660 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
661 }
662
663 if !complement
665 && ranges.len() == 1
666 && ranges[0].end == usize::MAX
667 && ranges[0].start > 1
668 && output_delim.len() == 1
669 && output_delim[0] == delim
670 {
671 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
672 }
673
674 if !complement
676 && ranges.len() == 1
677 && ranges[0].start > 1
678 && ranges[0].end < usize::MAX
679 && output_delim.len() == 1
680 && output_delim[0] == delim
681 {
682 return process_fields_mid_range(
683 data,
684 delim,
685 line_delim,
686 ranges[0].start,
687 ranges[0].end,
688 suppress,
689 out,
690 );
691 }
692
693 if !complement
699 && ranges.len() > 1
700 && ranges.last().map_or(false, |r| r.end < usize::MAX)
701 && output_delim.len() == 1
702 && output_delim[0] == delim
703 && delim != line_delim
704 {
705 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
706 }
707
708 let max_field = if complement {
710 usize::MAX
711 } else {
712 ranges.last().map(|r| r.end).unwrap_or(0)
713 };
714 let field_mask = compute_field_mask(ranges, complement);
715
716 if data.len() >= PARALLEL_THRESHOLD {
717 let chunks = split_for_scope(data, line_delim);
718 let n = chunks.len();
719 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
720 rayon::scope(|s| {
721 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
722 s.spawn(move |_| {
723 result.reserve(chunk.len());
724 process_fields_chunk(
725 chunk,
726 delim,
727 ranges,
728 output_delim,
729 suppress,
730 max_field,
731 field_mask,
732 line_delim,
733 complement,
734 result,
735 );
736 });
737 }
738 });
739 let slices: Vec<IoSlice> = results
740 .iter()
741 .filter(|r| !r.is_empty())
742 .map(|r| IoSlice::new(r))
743 .collect();
744 write_ioslices(out, &slices)?;
745 } else {
746 let mut buf = Vec::with_capacity(data.len());
747 process_fields_chunk(
748 data,
749 delim,
750 ranges,
751 output_delim,
752 suppress,
753 max_field,
754 field_mask,
755 line_delim,
756 complement,
757 &mut buf,
758 );
759 if !buf.is_empty() {
760 out.write_all(&buf)?;
761 }
762 }
763 Ok(())
764}
765
766fn process_fields_chunk(
771 data: &[u8],
772 delim: u8,
773 ranges: &[Range],
774 output_delim: &[u8],
775 suppress: bool,
776 max_field: usize,
777 field_mask: u64,
778 line_delim: u8,
779 complement: bool,
780 buf: &mut Vec<u8>,
781) {
782 if delim != line_delim && max_field < usize::MAX && !complement {
789 buf.reserve(data.len());
790 let mut start = 0;
791 for end_pos in memchr_iter(line_delim, data) {
792 let line = &data[start..end_pos];
793 extract_fields_to_buf(
794 line,
795 delim,
796 ranges,
797 output_delim,
798 suppress,
799 max_field,
800 field_mask,
801 line_delim,
802 buf,
803 complement,
804 );
805 start = end_pos + 1;
806 }
807 if start < data.len() {
808 extract_fields_to_buf(
809 &data[start..],
810 delim,
811 ranges,
812 output_delim,
813 suppress,
814 max_field,
815 field_mask,
816 line_delim,
817 buf,
818 complement,
819 );
820 }
821 return;
822 }
823
824 if delim != line_delim {
828 buf.reserve(data.len());
829
830 let data_len = data.len();
831 let base = data.as_ptr();
832 let mut line_start: usize = 0;
833 let mut field_start: usize = 0;
834 let mut field_num: usize = 1;
835 let mut first_output = true;
836 let mut has_delim = false;
837
838 for pos in memchr::memchr2_iter(delim, line_delim, data) {
839 let byte = unsafe { *base.add(pos) };
840
841 if byte == line_delim {
842 if (field_num <= max_field || complement)
844 && has_delim
845 && is_selected(field_num, field_mask, ranges, complement)
846 {
847 if !first_output {
848 unsafe { buf_extend(buf, output_delim) };
849 }
850 unsafe {
851 buf_extend(
852 buf,
853 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
854 )
855 };
856 first_output = false;
857 }
858
859 if !first_output {
860 unsafe { buf_push(buf, line_delim) };
861 } else if !has_delim {
862 if !suppress {
863 unsafe {
864 buf_extend(
865 buf,
866 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
867 );
868 buf_push(buf, line_delim);
869 }
870 }
871 } else {
872 unsafe { buf_push(buf, line_delim) };
873 }
874
875 line_start = pos + 1;
877 field_start = pos + 1;
878 field_num = 1;
879 first_output = true;
880 has_delim = false;
881 } else {
882 has_delim = true;
884
885 if is_selected(field_num, field_mask, ranges, complement) {
886 if !first_output {
887 unsafe { buf_extend(buf, output_delim) };
888 }
889 unsafe {
890 buf_extend(
891 buf,
892 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
893 )
894 };
895 first_output = false;
896 }
897
898 field_num += 1;
899 field_start = pos + 1;
900 }
901 }
902
903 if line_start < data_len {
905 if line_start < data_len {
906 if (field_num <= max_field || complement)
907 && has_delim
908 && is_selected(field_num, field_mask, ranges, complement)
909 {
910 if !first_output {
911 unsafe { buf_extend(buf, output_delim) };
912 }
913 unsafe {
914 buf_extend(
915 buf,
916 std::slice::from_raw_parts(
917 base.add(field_start),
918 data_len - field_start,
919 ),
920 )
921 };
922 first_output = false;
923 }
924
925 if !first_output {
926 unsafe { buf_push(buf, line_delim) };
927 } else if !has_delim {
928 if !suppress {
929 unsafe {
930 buf_extend(
931 buf,
932 std::slice::from_raw_parts(
933 base.add(line_start),
934 data_len - line_start,
935 ),
936 );
937 buf_push(buf, line_delim);
938 }
939 }
940 } else {
941 unsafe { buf_push(buf, line_delim) };
942 }
943 }
944 }
945
946 return;
947 }
948
949 let mut start = 0;
951 for end_pos in memchr_iter(line_delim, data) {
952 let line = &data[start..end_pos];
953 extract_fields_to_buf(
954 line,
955 delim,
956 ranges,
957 output_delim,
958 suppress,
959 max_field,
960 field_mask,
961 line_delim,
962 buf,
963 complement,
964 );
965 start = end_pos + 1;
966 }
967 if start < data.len() {
968 extract_fields_to_buf(
969 &data[start..],
970 delim,
971 ranges,
972 output_delim,
973 suppress,
974 max_field,
975 field_mask,
976 line_delim,
977 buf,
978 complement,
979 );
980 }
981}
982
983fn process_single_field(
989 data: &[u8],
990 delim: u8,
991 line_delim: u8,
992 target: usize,
993 suppress: bool,
994 out: &mut impl Write,
995) -> io::Result<()> {
996 let target_idx = target - 1;
997
998 const FIELD_PARALLEL_MIN: usize = 16 * 1024 * 1024;
1000
1001 if delim != line_delim {
1002 if target_idx == 0 && !suppress {
1006 if data.len() >= FIELD_PARALLEL_MIN {
1007 return single_field1_parallel(data, delim, line_delim, out);
1008 }
1009 let mut buf = Vec::with_capacity(data.len() + 1);
1014 single_field1_to_buf(data, delim, line_delim, &mut buf);
1015 if !buf.is_empty() {
1016 out.write_all(&buf)?;
1017 }
1018 return Ok(());
1019 }
1020
1021 if data.len() >= FIELD_PARALLEL_MIN {
1025 let chunks = split_for_scope(data, line_delim);
1026 let n = chunks.len();
1027 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1028 rayon::scope(|s| {
1029 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1030 s.spawn(move |_| {
1031 result.reserve(chunk.len() / 2);
1032 process_single_field_chunk(
1033 chunk, delim, target_idx, line_delim, suppress, result,
1034 );
1035 });
1036 }
1037 });
1038 let slices: Vec<IoSlice> = results
1039 .iter()
1040 .filter(|r| !r.is_empty())
1041 .map(|r| IoSlice::new(r))
1042 .collect();
1043 write_ioslices(out, &slices)?;
1044 } else {
1045 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1046 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1047 if !buf.is_empty() {
1048 out.write_all(&buf)?;
1049 }
1050 }
1051 return Ok(());
1052 }
1053
1054 if data.len() >= FIELD_PARALLEL_MIN {
1056 let chunks = split_for_scope(data, line_delim);
1057 let n = chunks.len();
1058 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1059 rayon::scope(|s| {
1060 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1061 s.spawn(move |_| {
1062 result.reserve(chunk.len() / 4);
1063 process_single_field_chunk(
1064 chunk, delim, target_idx, line_delim, suppress, result,
1065 );
1066 });
1067 }
1068 });
1069 let slices: Vec<IoSlice> = results
1070 .iter()
1071 .filter(|r| !r.is_empty())
1072 .map(|r| IoSlice::new(r))
1073 .collect();
1074 write_ioslices(out, &slices)?;
1075 } else {
1076 let mut buf = Vec::with_capacity(data.len() / 4);
1077 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1078 if !buf.is_empty() {
1079 out.write_all(&buf)?;
1080 }
1081 }
1082 Ok(())
1083}
1084
1085fn process_complement_range(
1088 data: &[u8],
1089 delim: u8,
1090 line_delim: u8,
1091 skip_start: usize,
1092 skip_end: usize,
1093 suppress: bool,
1094 out: &mut impl Write,
1095) -> io::Result<()> {
1096 if data.len() >= PARALLEL_THRESHOLD {
1097 let chunks = split_for_scope(data, line_delim);
1098 let n = chunks.len();
1099 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1100 rayon::scope(|s| {
1101 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1102 s.spawn(move |_| {
1103 result.reserve(chunk.len());
1104 complement_range_chunk(
1105 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
1106 );
1107 });
1108 }
1109 });
1110 let slices: Vec<IoSlice> = results
1111 .iter()
1112 .filter(|r| !r.is_empty())
1113 .map(|r| IoSlice::new(r))
1114 .collect();
1115 write_ioslices(out, &slices)?;
1116 } else {
1117 let mut buf = Vec::with_capacity(data.len());
1118 complement_range_chunk(
1119 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1120 );
1121 if !buf.is_empty() {
1122 out.write_all(&buf)?;
1123 }
1124 }
1125 Ok(())
1126}
1127
1128fn complement_range_chunk(
1130 data: &[u8],
1131 delim: u8,
1132 skip_start: usize,
1133 skip_end: usize,
1134 line_delim: u8,
1135 suppress: bool,
1136 buf: &mut Vec<u8>,
1137) {
1138 buf.reserve(data.len());
1140 let mut start = 0;
1141 for end_pos in memchr_iter(line_delim, data) {
1142 let line = &data[start..end_pos];
1143 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1144 start = end_pos + 1;
1145 }
1146 if start < data.len() {
1147 complement_range_line(
1148 &data[start..],
1149 delim,
1150 skip_start,
1151 skip_end,
1152 line_delim,
1153 suppress,
1154 buf,
1155 );
1156 }
1157}
1158
1159#[inline(always)]
1166fn complement_range_line(
1167 line: &[u8],
1168 delim: u8,
1169 skip_start: usize,
1170 skip_end: usize,
1171 line_delim: u8,
1172 suppress: bool,
1173 buf: &mut Vec<u8>,
1174) {
1175 let len = line.len();
1176 if len == 0 {
1177 if !suppress {
1178 unsafe { buf_push(buf, line_delim) };
1179 }
1180 return;
1181 }
1182
1183 let base = line.as_ptr();
1185
1186 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1196
1197 let mut delim_count: usize = 0;
1199 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1203 delim_count += 1;
1204 if delim_count == need_prefix_delims {
1205 prefix_end_pos = pos;
1206 }
1207 if delim_count == total_need {
1208 suffix_start_pos = pos + 1;
1209 break;
1210 }
1211 }
1212
1213 if delim_count == 0 {
1214 if !suppress {
1216 unsafe {
1217 buf_extend(buf, line);
1218 buf_push(buf, line_delim);
1219 }
1220 }
1221 return;
1222 }
1223
1224 if delim_count < need_prefix_delims {
1230 unsafe {
1232 buf_extend(buf, line);
1233 buf_push(buf, line_delim);
1234 }
1235 return;
1236 }
1237
1238 let has_prefix = need_prefix_delims > 0;
1239 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1240
1241 if has_prefix && has_suffix {
1242 unsafe {
1244 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1245 buf_push(buf, delim);
1246 buf_extend(
1247 buf,
1248 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1249 );
1250 buf_push(buf, line_delim);
1251 }
1252 } else if has_prefix {
1253 unsafe {
1255 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1256 buf_push(buf, line_delim);
1257 }
1258 } else if has_suffix {
1259 unsafe {
1261 buf_extend(
1262 buf,
1263 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1264 );
1265 buf_push(buf, line_delim);
1266 }
1267 } else {
1268 unsafe { buf_push(buf, line_delim) };
1270 }
1271}
1272
1273fn process_complement_single_field(
1275 data: &[u8],
1276 delim: u8,
1277 line_delim: u8,
1278 skip_field: usize,
1279 suppress: bool,
1280 out: &mut impl Write,
1281) -> io::Result<()> {
1282 let skip_idx = skip_field - 1;
1283
1284 if data.len() >= PARALLEL_THRESHOLD {
1285 let chunks = split_for_scope(data, line_delim);
1286 let n = chunks.len();
1287 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1288 rayon::scope(|s| {
1289 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1290 s.spawn(move |_| {
1291 result.reserve(chunk.len());
1292 complement_single_field_chunk(
1293 chunk, delim, skip_idx, line_delim, suppress, result,
1294 );
1295 });
1296 }
1297 });
1298 let slices: Vec<IoSlice> = results
1299 .iter()
1300 .filter(|r| !r.is_empty())
1301 .map(|r| IoSlice::new(r))
1302 .collect();
1303 write_ioslices(out, &slices)?;
1304 } else {
1305 let mut buf = Vec::with_capacity(data.len());
1306 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1307 if !buf.is_empty() {
1308 out.write_all(&buf)?;
1309 }
1310 }
1311 Ok(())
1312}
1313
1314fn complement_single_field_chunk(
1320 data: &[u8],
1321 delim: u8,
1322 skip_idx: usize,
1323 line_delim: u8,
1324 suppress: bool,
1325 buf: &mut Vec<u8>,
1326) {
1327 if delim == line_delim {
1329 buf.reserve(data.len());
1330 let mut start = 0;
1331 for end_pos in memchr_iter(line_delim, data) {
1332 let line = &data[start..end_pos];
1333 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1334 start = end_pos + 1;
1335 }
1336 if start < data.len() {
1337 complement_single_field_line(
1338 &data[start..],
1339 delim,
1340 skip_idx,
1341 line_delim,
1342 suppress,
1343 buf,
1344 );
1345 }
1346 return;
1347 }
1348
1349 buf.reserve(data.len());
1350 let base = data.as_ptr();
1351 let data_len = data.len();
1352 let need_before = skip_idx; let need_total = skip_idx + 1; let mut line_start: usize = 0;
1357 let mut delim_count: usize = 0;
1358 let mut skip_start_pos: usize = 0;
1359 let mut skip_end_pos: usize = 0;
1360 let mut found_start = need_before == 0; let mut found_end = false;
1362
1363 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1364 let byte = unsafe { *base.add(pos) };
1365
1366 if byte == line_delim {
1367 if delim_count == 0 {
1369 if !suppress {
1371 unsafe {
1372 buf_extend(
1373 buf,
1374 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1375 );
1376 buf_push(buf, line_delim);
1377 }
1378 }
1379 } else if !found_start || delim_count < need_before {
1380 unsafe {
1382 buf_extend(
1383 buf,
1384 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1385 );
1386 buf_push(buf, line_delim);
1387 }
1388 } else {
1389 let has_prefix = skip_idx > 0;
1390 let has_suffix = found_end && skip_end_pos < pos;
1391
1392 if has_prefix && has_suffix {
1393 unsafe {
1394 buf_extend(
1395 buf,
1396 std::slice::from_raw_parts(
1397 base.add(line_start),
1398 skip_start_pos - 1 - line_start,
1399 ),
1400 );
1401 buf_push(buf, delim);
1402 buf_extend(
1403 buf,
1404 std::slice::from_raw_parts(
1405 base.add(skip_end_pos + 1),
1406 pos - skip_end_pos - 1,
1407 ),
1408 );
1409 buf_push(buf, line_delim);
1410 }
1411 } else if has_prefix {
1412 unsafe {
1413 buf_extend(
1414 buf,
1415 std::slice::from_raw_parts(
1416 base.add(line_start),
1417 skip_start_pos - 1 - line_start,
1418 ),
1419 );
1420 buf_push(buf, line_delim);
1421 }
1422 } else if has_suffix {
1423 unsafe {
1424 buf_extend(
1425 buf,
1426 std::slice::from_raw_parts(
1427 base.add(skip_end_pos + 1),
1428 pos - skip_end_pos - 1,
1429 ),
1430 );
1431 buf_push(buf, line_delim);
1432 }
1433 } else {
1434 unsafe { buf_push(buf, line_delim) };
1435 }
1436 }
1437
1438 line_start = pos + 1;
1440 delim_count = 0;
1441 skip_start_pos = 0;
1442 skip_end_pos = 0;
1443 found_start = need_before == 0;
1444 found_end = false;
1445 } else {
1446 delim_count += 1;
1448 if delim_count == need_before {
1449 skip_start_pos = pos + 1;
1450 found_start = true;
1451 }
1452 if delim_count == need_total {
1453 skip_end_pos = pos;
1454 found_end = true;
1455 }
1456 }
1457 }
1458
1459 if line_start < data_len {
1461 let pos = data_len;
1462 if delim_count == 0 {
1463 if !suppress {
1464 unsafe {
1465 buf_extend(
1466 buf,
1467 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1468 );
1469 buf_push(buf, line_delim);
1470 }
1471 }
1472 } else if !found_start || delim_count < need_before {
1473 unsafe {
1474 buf_extend(
1475 buf,
1476 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1477 );
1478 buf_push(buf, line_delim);
1479 }
1480 } else {
1481 let has_prefix = skip_idx > 0;
1482 let has_suffix = found_end && skip_end_pos < pos;
1483
1484 if has_prefix && has_suffix {
1485 unsafe {
1486 buf_extend(
1487 buf,
1488 std::slice::from_raw_parts(
1489 base.add(line_start),
1490 skip_start_pos - 1 - line_start,
1491 ),
1492 );
1493 buf_push(buf, delim);
1494 buf_extend(
1495 buf,
1496 std::slice::from_raw_parts(
1497 base.add(skip_end_pos + 1),
1498 pos - skip_end_pos - 1,
1499 ),
1500 );
1501 buf_push(buf, line_delim);
1502 }
1503 } else if has_prefix {
1504 unsafe {
1505 buf_extend(
1506 buf,
1507 std::slice::from_raw_parts(
1508 base.add(line_start),
1509 skip_start_pos - 1 - line_start,
1510 ),
1511 );
1512 buf_push(buf, line_delim);
1513 }
1514 } else if has_suffix {
1515 unsafe {
1516 buf_extend(
1517 buf,
1518 std::slice::from_raw_parts(
1519 base.add(skip_end_pos + 1),
1520 pos - skip_end_pos - 1,
1521 ),
1522 );
1523 buf_push(buf, line_delim);
1524 }
1525 } else {
1526 unsafe { buf_push(buf, line_delim) };
1527 }
1528 }
1529 }
1530}
1531
1532#[inline(always)]
1534fn complement_single_field_line(
1535 line: &[u8],
1536 delim: u8,
1537 skip_idx: usize,
1538 line_delim: u8,
1539 suppress: bool,
1540 buf: &mut Vec<u8>,
1541) {
1542 let len = line.len();
1543 if len == 0 {
1544 if !suppress {
1545 unsafe { buf_push(buf, line_delim) };
1546 }
1547 return;
1548 }
1549
1550 let base = line.as_ptr();
1551 let need_before = skip_idx;
1552 let need_total = skip_idx + 1;
1553
1554 let mut delim_count: usize = 0;
1555 let mut skip_start_pos: usize = 0;
1556 let mut skip_end_pos: usize = len;
1557 let mut found_end = false;
1558
1559 for pos in memchr_iter(delim, line) {
1560 delim_count += 1;
1561 if delim_count == need_before {
1562 skip_start_pos = pos + 1;
1563 }
1564 if delim_count == need_total {
1565 skip_end_pos = pos;
1566 found_end = true;
1567 break;
1568 }
1569 }
1570
1571 if delim_count == 0 {
1572 if !suppress {
1573 unsafe {
1574 buf_extend(buf, line);
1575 buf_push(buf, line_delim);
1576 }
1577 }
1578 return;
1579 }
1580
1581 if delim_count < need_before {
1582 unsafe {
1583 buf_extend(buf, line);
1584 buf_push(buf, line_delim);
1585 }
1586 return;
1587 }
1588
1589 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1590 let has_suffix = found_end && skip_end_pos < len;
1591
1592 if has_prefix && has_suffix {
1593 unsafe {
1594 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1595 buf_push(buf, delim);
1596 buf_extend(
1597 buf,
1598 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1599 );
1600 buf_push(buf, line_delim);
1601 }
1602 } else if has_prefix {
1603 unsafe {
1604 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1605 buf_push(buf, line_delim);
1606 }
1607 } else if has_suffix {
1608 unsafe {
1609 buf_extend(
1610 buf,
1611 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1612 );
1613 buf_push(buf, line_delim);
1614 }
1615 } else {
1616 unsafe { buf_push(buf, line_delim) };
1617 }
1618}
1619
1620fn process_fields_prefix(
1624 data: &[u8],
1625 delim: u8,
1626 line_delim: u8,
1627 last_field: usize,
1628 suppress: bool,
1629 out: &mut impl Write,
1630) -> io::Result<()> {
1631 if data.len() >= PARALLEL_THRESHOLD {
1632 let chunks = split_for_scope(data, line_delim);
1633 let n = chunks.len();
1634 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1635 rayon::scope(|s| {
1636 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1637 s.spawn(move |_| {
1638 result.reserve(chunk.len());
1639 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1640 });
1641 }
1642 });
1643 let slices: Vec<IoSlice> = results
1644 .iter()
1645 .filter(|r| !r.is_empty())
1646 .map(|r| IoSlice::new(r))
1647 .collect();
1648 write_ioslices(out, &slices)?;
1649 } else if !suppress {
1650 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1654 } else {
1655 let mut buf = Vec::with_capacity(data.len());
1656 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1657 if !buf.is_empty() {
1658 out.write_all(&buf)?;
1659 }
1660 }
1661 Ok(())
1662}
1663
1664#[inline]
1670fn fields_prefix_zerocopy(
1671 data: &[u8],
1672 delim: u8,
1673 line_delim: u8,
1674 last_field: usize,
1675 out: &mut impl Write,
1676) -> io::Result<()> {
1677 let newline_buf: [u8; 1] = [line_delim];
1678 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1679 let mut start = 0;
1680 let mut run_start: usize = 0;
1681
1682 for end_pos in memchr_iter(line_delim, data) {
1683 let line = &data[start..end_pos];
1684 let mut field_count = 1;
1685 let mut truncate_at: Option<usize> = None;
1686 for dpos in memchr_iter(delim, line) {
1687 if field_count >= last_field {
1688 truncate_at = Some(start + dpos);
1689 break;
1690 }
1691 field_count += 1;
1692 }
1693
1694 if let Some(trunc_pos) = truncate_at {
1695 if run_start < start {
1696 iov.push(IoSlice::new(&data[run_start..start]));
1697 }
1698 iov.push(IoSlice::new(&data[start..trunc_pos]));
1699 iov.push(IoSlice::new(&newline_buf));
1700 run_start = end_pos + 1;
1701
1702 if iov.len() >= MAX_IOV - 2 {
1703 write_ioslices(out, &iov)?;
1704 iov.clear();
1705 }
1706 }
1707 start = end_pos + 1;
1708 }
1709 if start < data.len() {
1711 let line = &data[start..];
1712 let mut field_count = 1;
1713 let mut truncate_at: Option<usize> = None;
1714 for dpos in memchr_iter(delim, line) {
1715 if field_count >= last_field {
1716 truncate_at = Some(start + dpos);
1717 break;
1718 }
1719 field_count += 1;
1720 }
1721 if let Some(trunc_pos) = truncate_at {
1722 if run_start < start {
1723 iov.push(IoSlice::new(&data[run_start..start]));
1724 }
1725 iov.push(IoSlice::new(&data[start..trunc_pos]));
1726 iov.push(IoSlice::new(&newline_buf));
1727 if !iov.is_empty() {
1728 write_ioslices(out, &iov)?;
1729 }
1730 return Ok(());
1731 }
1732 }
1733 if run_start < data.len() {
1735 iov.push(IoSlice::new(&data[run_start..]));
1736 if !data.is_empty() && *data.last().unwrap() != line_delim {
1737 iov.push(IoSlice::new(&newline_buf));
1738 }
1739 }
1740 if !iov.is_empty() {
1741 write_ioslices(out, &iov)?;
1742 }
1743 Ok(())
1744}
1745
1746fn fields_prefix_chunk(
1748 data: &[u8],
1749 delim: u8,
1750 line_delim: u8,
1751 last_field: usize,
1752 suppress: bool,
1753 buf: &mut Vec<u8>,
1754) {
1755 buf.reserve(data.len());
1756 let mut start = 0;
1757 for end_pos in memchr_iter(line_delim, data) {
1758 let line = &data[start..end_pos];
1759 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1760 start = end_pos + 1;
1761 }
1762 if start < data.len() {
1763 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1764 }
1765}
1766
1767#[inline(always)]
1770fn fields_prefix_line(
1771 line: &[u8],
1772 delim: u8,
1773 line_delim: u8,
1774 last_field: usize,
1775 suppress: bool,
1776 buf: &mut Vec<u8>,
1777) {
1778 let len = line.len();
1779 if len == 0 {
1780 if !suppress {
1781 unsafe { buf_push(buf, line_delim) };
1782 }
1783 return;
1784 }
1785
1786 let base = line.as_ptr();
1788
1789 let mut field_count = 1usize;
1790 let mut has_delim = false;
1791
1792 for pos in memchr_iter(delim, line) {
1793 has_delim = true;
1794 if field_count >= last_field {
1795 unsafe {
1796 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1797 buf_push(buf, line_delim);
1798 }
1799 return;
1800 }
1801 field_count += 1;
1802 }
1803
1804 if !has_delim {
1805 if !suppress {
1806 unsafe {
1807 buf_extend(buf, line);
1808 buf_push(buf, line_delim);
1809 }
1810 }
1811 return;
1812 }
1813
1814 unsafe {
1815 buf_extend(buf, line);
1816 buf_push(buf, line_delim);
1817 }
1818}
1819
1820fn process_fields_suffix(
1822 data: &[u8],
1823 delim: u8,
1824 line_delim: u8,
1825 start_field: usize,
1826 suppress: bool,
1827 out: &mut impl Write,
1828) -> io::Result<()> {
1829 if data.len() >= PARALLEL_THRESHOLD {
1830 let chunks = split_for_scope(data, line_delim);
1831 let n = chunks.len();
1832 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1833 rayon::scope(|s| {
1834 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1835 s.spawn(move |_| {
1836 result.reserve(chunk.len());
1837 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1838 });
1839 }
1840 });
1841 let slices: Vec<IoSlice> = results
1842 .iter()
1843 .filter(|r| !r.is_empty())
1844 .map(|r| IoSlice::new(r))
1845 .collect();
1846 write_ioslices(out, &slices)?;
1847 } else {
1848 let mut buf = Vec::with_capacity(data.len());
1849 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1850 if !buf.is_empty() {
1851 out.write_all(&buf)?;
1852 }
1853 }
1854 Ok(())
1855}
1856
1857fn fields_suffix_chunk(
1859 data: &[u8],
1860 delim: u8,
1861 line_delim: u8,
1862 start_field: usize,
1863 suppress: bool,
1864 buf: &mut Vec<u8>,
1865) {
1866 buf.reserve(data.len());
1867 let mut start = 0;
1868 for end_pos in memchr_iter(line_delim, data) {
1869 let line = &data[start..end_pos];
1870 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1871 start = end_pos + 1;
1872 }
1873 if start < data.len() {
1874 fields_suffix_line(
1875 &data[start..],
1876 delim,
1877 line_delim,
1878 start_field,
1879 suppress,
1880 buf,
1881 );
1882 }
1883}
1884
1885#[inline(always)]
1888fn fields_suffix_line(
1889 line: &[u8],
1890 delim: u8,
1891 line_delim: u8,
1892 start_field: usize,
1893 suppress: bool,
1894 buf: &mut Vec<u8>,
1895) {
1896 let len = line.len();
1897 if len == 0 {
1898 if !suppress {
1899 unsafe { buf_push(buf, line_delim) };
1900 }
1901 return;
1902 }
1903
1904 let base = line.as_ptr();
1906
1907 let skip_delims = start_field - 1;
1908 let mut delim_count = 0usize;
1909 let mut has_delim = false;
1910
1911 for pos in memchr_iter(delim, line) {
1912 has_delim = true;
1913 delim_count += 1;
1914 if delim_count >= skip_delims {
1915 unsafe {
1916 buf_extend(
1917 buf,
1918 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1919 );
1920 buf_push(buf, line_delim);
1921 }
1922 return;
1923 }
1924 }
1925
1926 if !has_delim {
1927 if !suppress {
1928 unsafe {
1929 buf_extend(buf, line);
1930 buf_push(buf, line_delim);
1931 }
1932 }
1933 return;
1934 }
1935
1936 unsafe { buf_push(buf, line_delim) };
1938}
1939
1940fn process_fields_mid_range(
1943 data: &[u8],
1944 delim: u8,
1945 line_delim: u8,
1946 start_field: usize,
1947 end_field: usize,
1948 suppress: bool,
1949 out: &mut impl Write,
1950) -> io::Result<()> {
1951 if data.len() >= PARALLEL_THRESHOLD {
1952 let chunks = split_for_scope(data, line_delim);
1953 let n = chunks.len();
1954 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1955 rayon::scope(|s| {
1956 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1957 s.spawn(move |_| {
1958 result.reserve(chunk.len());
1959 fields_mid_range_chunk(
1960 chunk,
1961 delim,
1962 line_delim,
1963 start_field,
1964 end_field,
1965 suppress,
1966 result,
1967 );
1968 });
1969 }
1970 });
1971 let slices: Vec<IoSlice> = results
1972 .iter()
1973 .filter(|r| !r.is_empty())
1974 .map(|r| IoSlice::new(r))
1975 .collect();
1976 write_ioslices(out, &slices)?;
1977 } else {
1978 let mut buf = Vec::with_capacity(data.len());
1979 fields_mid_range_chunk(
1980 data,
1981 delim,
1982 line_delim,
1983 start_field,
1984 end_field,
1985 suppress,
1986 &mut buf,
1987 );
1988 if !buf.is_empty() {
1989 out.write_all(&buf)?;
1990 }
1991 }
1992 Ok(())
1993}
1994
1995fn fields_mid_range_chunk(
1999 data: &[u8],
2000 delim: u8,
2001 line_delim: u8,
2002 start_field: usize,
2003 end_field: usize,
2004 suppress: bool,
2005 buf: &mut Vec<u8>,
2006) {
2007 if delim == line_delim {
2009 buf.reserve(data.len());
2010 let mut start = 0;
2011 for end_pos in memchr_iter(line_delim, data) {
2012 let line = &data[start..end_pos];
2013 fields_mid_range_line(
2014 line,
2015 delim,
2016 line_delim,
2017 start_field,
2018 end_field,
2019 suppress,
2020 buf,
2021 );
2022 start = end_pos + 1;
2023 }
2024 if start < data.len() {
2025 fields_mid_range_line(
2026 &data[start..],
2027 delim,
2028 line_delim,
2029 start_field,
2030 end_field,
2031 suppress,
2032 buf,
2033 );
2034 }
2035 return;
2036 }
2037
2038 buf.reserve(data.len());
2039 let base = data.as_ptr();
2040 let skip_before = start_field - 1; let target_end_delim = skip_before + (end_field - start_field) + 1;
2042
2043 let mut line_start: usize = 0;
2044 let mut delim_count: usize = 0;
2045 let mut range_start: usize = 0;
2046 let mut has_delim = false;
2047 let mut found_end = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
2050 let byte = unsafe { *base.add(pos) };
2051 if byte == line_delim {
2052 if found_end {
2054 } else if !has_delim {
2056 if !suppress {
2058 unsafe {
2059 buf_extend(
2060 buf,
2061 std::slice::from_raw_parts(base.add(line_start), pos + 1 - line_start),
2062 );
2063 }
2064 }
2065 } else if delim_count >= skip_before {
2066 if skip_before == 0 {
2068 range_start = line_start;
2069 }
2070 unsafe {
2071 buf_extend(
2072 buf,
2073 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2074 );
2075 buf_push(buf, line_delim);
2076 }
2077 } else {
2078 unsafe { buf_push(buf, line_delim) };
2080 }
2081 line_start = pos + 1;
2082 delim_count = 0;
2083 has_delim = false;
2084 found_end = false;
2085 } else if !found_end {
2086 has_delim = true;
2088 delim_count += 1;
2089 if delim_count == skip_before {
2090 range_start = pos + 1;
2091 }
2092 if delim_count == target_end_delim {
2093 if skip_before == 0 {
2094 range_start = line_start;
2095 }
2096 unsafe {
2097 buf_extend(
2098 buf,
2099 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2100 );
2101 buf_push(buf, line_delim);
2102 }
2103 found_end = true;
2104 }
2105 }
2106 }
2107 if line_start < data.len() && !found_end {
2109 if !has_delim {
2110 if !suppress {
2111 unsafe {
2112 buf_extend(
2113 buf,
2114 std::slice::from_raw_parts(base.add(line_start), data.len() - line_start),
2115 );
2116 }
2117 }
2118 } else if delim_count >= skip_before {
2119 if skip_before == 0 {
2120 range_start = line_start;
2121 }
2122 unsafe {
2123 buf_extend(
2124 buf,
2125 std::slice::from_raw_parts(base.add(range_start), data.len() - range_start),
2126 );
2127 }
2128 }
2129 }
2130}
2131
2132#[inline(always)]
2136fn fields_mid_range_line(
2137 line: &[u8],
2138 delim: u8,
2139 line_delim: u8,
2140 start_field: usize,
2141 end_field: usize,
2142 suppress: bool,
2143 buf: &mut Vec<u8>,
2144) {
2145 let len = line.len();
2146 if len == 0 {
2147 if !suppress {
2148 unsafe { buf_push(buf, line_delim) };
2149 }
2150 return;
2151 }
2152
2153 let base = line.as_ptr();
2155
2156 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
2160 let mut delim_count = 0;
2161 let mut range_start = 0;
2162 let mut has_delim = false;
2163
2164 for pos in memchr_iter(delim, line) {
2165 has_delim = true;
2166 delim_count += 1;
2167 if delim_count == skip_before {
2168 range_start = pos + 1;
2169 }
2170 if delim_count == target_end_delim {
2171 if skip_before == 0 {
2172 range_start = 0;
2173 }
2174 unsafe {
2175 buf_extend(
2176 buf,
2177 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2178 );
2179 buf_push(buf, line_delim);
2180 }
2181 return;
2182 }
2183 }
2184
2185 if !has_delim {
2186 if !suppress {
2187 unsafe {
2188 buf_extend(buf, line);
2189 buf_push(buf, line_delim);
2190 }
2191 }
2192 return;
2193 }
2194
2195 if delim_count >= skip_before {
2197 if skip_before == 0 {
2199 range_start = 0;
2200 }
2201 unsafe {
2202 buf_extend(
2203 buf,
2204 std::slice::from_raw_parts(base.add(range_start), len - range_start),
2205 );
2206 buf_push(buf, line_delim);
2207 }
2208 } else {
2209 unsafe { buf_push(buf, line_delim) };
2211 }
2212}
2213
2214fn single_field1_parallel(
2225 data: &[u8],
2226 delim: u8,
2227 line_delim: u8,
2228 out: &mut impl Write,
2229) -> io::Result<()> {
2230 let chunks = split_for_scope(data, line_delim);
2231 let n = chunks.len();
2232 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2233 rayon::scope(|s| {
2234 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2235 s.spawn(move |_| {
2236 result.reserve(chunk.len() + 1);
2237 single_field1_to_buf(chunk, delim, line_delim, result);
2238 });
2239 }
2240 });
2241 let slices: Vec<IoSlice> = results
2242 .iter()
2243 .filter(|r| !r.is_empty())
2244 .map(|r| IoSlice::new(r))
2245 .collect();
2246 write_ioslices(out, &slices)
2247}
2248
2249#[inline]
2258fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
2259 buf.reserve(data.len() + 1);
2262 let base = data.as_ptr();
2263 let mut line_start: usize = 0;
2264 let mut found_delim = false;
2265
2266 for pos in memchr::memchr2_iter(delim, line_delim, data) {
2267 let byte = unsafe { *base.add(pos) };
2268 if byte == line_delim {
2269 if !found_delim {
2270 unsafe {
2272 buf_extend(
2273 buf,
2274 std::slice::from_raw_parts(base.add(line_start), pos + 1 - line_start),
2275 );
2276 }
2277 } else {
2278 unsafe { buf_push(buf, line_delim) };
2280 }
2281 line_start = pos + 1;
2282 found_delim = false;
2283 } else if !found_delim {
2284 found_delim = true;
2286 unsafe {
2287 buf_extend(
2288 buf,
2289 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
2290 );
2291 }
2292 }
2293 }
2295
2296 if line_start < data.len() {
2298 if !found_delim {
2299 unsafe {
2301 buf_extend_byte(
2302 buf,
2303 std::slice::from_raw_parts(base.add(line_start), data.len() - line_start),
2304 line_delim,
2305 );
2306 }
2307 } else {
2308 unsafe { buf_push(buf, line_delim) };
2310 }
2311 }
2312}
2313
2314#[inline]
2323#[allow(dead_code)]
2324fn single_field1_zerocopy(
2325 data: &[u8],
2326 delim: u8,
2327 line_delim: u8,
2328 out: &mut impl Write,
2329) -> io::Result<()> {
2330 let newline_buf: [u8; 1] = [line_delim];
2331
2332 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2333 let mut run_start: usize = 0;
2334 let mut start = 0;
2335
2336 for end_pos in memchr_iter(line_delim, data) {
2337 let line = &data[start..end_pos];
2338 if let Some(dp) = memchr::memchr(delim, line) {
2339 if run_start < start {
2342 iov.push(IoSlice::new(&data[run_start..start]));
2343 }
2344 iov.push(IoSlice::new(&data[start..start + dp]));
2345 iov.push(IoSlice::new(&newline_buf));
2346 run_start = end_pos + 1;
2347
2348 if iov.len() >= MAX_IOV - 2 {
2349 write_ioslices(out, &iov)?;
2350 iov.clear();
2351 }
2352 }
2353 start = end_pos + 1;
2355 }
2356
2357 if start < data.len() {
2359 let line = &data[start..];
2360 if let Some(dp) = memchr::memchr(delim, line) {
2361 if run_start < start {
2362 iov.push(IoSlice::new(&data[run_start..start]));
2363 }
2364 iov.push(IoSlice::new(&data[start..start + dp]));
2365 iov.push(IoSlice::new(&newline_buf));
2366 if !iov.is_empty() {
2367 write_ioslices(out, &iov)?;
2368 }
2369 return Ok(());
2370 }
2371 }
2372
2373 if run_start < data.len() {
2375 iov.push(IoSlice::new(&data[run_start..]));
2376 if !data.is_empty() && *data.last().unwrap() != line_delim {
2377 iov.push(IoSlice::new(&newline_buf));
2378 }
2379 }
2380 if !iov.is_empty() {
2381 write_ioslices(out, &iov)?;
2382 }
2383 Ok(())
2384}
2385
2386fn process_single_field_chunk(
2388 data: &[u8],
2389 delim: u8,
2390 target_idx: usize,
2391 line_delim: u8,
2392 suppress: bool,
2393 buf: &mut Vec<u8>,
2394) {
2395 buf.reserve(data.len());
2397 let mut start = 0;
2398 for end_pos in memchr_iter(line_delim, data) {
2399 let line = &data[start..end_pos];
2400 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2401 start = end_pos + 1;
2402 }
2403 if start < data.len() {
2404 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2405 }
2406}
2407
2408#[inline(always)]
2413fn extract_single_field_line(
2414 line: &[u8],
2415 delim: u8,
2416 target_idx: usize,
2417 line_delim: u8,
2418 suppress: bool,
2419 buf: &mut Vec<u8>,
2420) {
2421 let len = line.len();
2422 if len == 0 {
2423 if !suppress {
2424 unsafe { buf_push(buf, line_delim) };
2425 }
2426 return;
2427 }
2428
2429 let base = line.as_ptr();
2431
2432 if target_idx == 0 {
2434 match memchr::memchr(delim, line) {
2435 Some(pos) => unsafe {
2436 buf_extend_byte(buf, std::slice::from_raw_parts(base, pos), line_delim);
2437 },
2438 None => {
2439 if !suppress {
2440 unsafe {
2441 buf_extend_byte(buf, line, line_delim);
2442 }
2443 }
2444 }
2445 }
2446 return;
2447 }
2448
2449 let mut field_start = 0;
2451 let mut field_idx = 0;
2452 let mut has_delim = false;
2453
2454 for pos in memchr_iter(delim, line) {
2455 has_delim = true;
2456 if field_idx == target_idx {
2457 unsafe {
2458 buf_extend_byte(
2459 buf,
2460 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2461 line_delim,
2462 );
2463 }
2464 return;
2465 }
2466 field_idx += 1;
2467 field_start = pos + 1;
2468 }
2469
2470 if !has_delim {
2471 if !suppress {
2472 unsafe {
2473 buf_extend_byte(buf, line, line_delim);
2474 }
2475 }
2476 return;
2477 }
2478
2479 if field_idx == target_idx {
2480 unsafe {
2481 buf_extend_byte(
2482 buf,
2483 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2484 line_delim,
2485 );
2486 }
2487 } else {
2488 unsafe { buf_push(buf, line_delim) };
2489 }
2490}
2491
2492#[inline(always)]
2496fn extract_fields_to_buf(
2497 line: &[u8],
2498 delim: u8,
2499 ranges: &[Range],
2500 output_delim: &[u8],
2501 suppress: bool,
2502 max_field: usize,
2503 field_mask: u64,
2504 line_delim: u8,
2505 buf: &mut Vec<u8>,
2506 complement: bool,
2507) {
2508 let len = line.len();
2509
2510 if len == 0 {
2511 if !suppress {
2512 buf.push(line_delim);
2513 }
2514 return;
2515 }
2516
2517 let needed = len + output_delim.len() * 16 + 1;
2520 if buf.capacity() - buf.len() < needed {
2521 buf.reserve(needed);
2522 }
2523
2524 let base = line.as_ptr();
2525 let mut field_num: usize = 1;
2526 let mut field_start: usize = 0;
2527 let mut first_output = true;
2528 let mut has_delim = false;
2529
2530 for delim_pos in memchr_iter(delim, line) {
2532 has_delim = true;
2533
2534 if is_selected(field_num, field_mask, ranges, complement) {
2535 if !first_output {
2536 unsafe { buf_extend(buf, output_delim) };
2537 }
2538 unsafe {
2539 buf_extend(
2540 buf,
2541 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2542 )
2543 };
2544 first_output = false;
2545 }
2546
2547 field_num += 1;
2548 field_start = delim_pos + 1;
2549
2550 if field_num > max_field {
2551 break;
2552 }
2553 }
2554
2555 if (field_num <= max_field || complement)
2557 && has_delim
2558 && is_selected(field_num, field_mask, ranges, complement)
2559 {
2560 if !first_output {
2561 unsafe { buf_extend(buf, output_delim) };
2562 }
2563 unsafe {
2564 buf_extend(
2565 buf,
2566 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2567 )
2568 };
2569 first_output = false;
2570 }
2571
2572 if !first_output {
2573 unsafe { buf_push(buf, line_delim) };
2574 } else if !has_delim {
2575 if !suppress {
2576 unsafe {
2577 buf_extend(buf, line);
2578 buf_push(buf, line_delim);
2579 }
2580 }
2581 } else {
2582 unsafe { buf_push(buf, line_delim) };
2583 }
2584}
2585
2586fn process_bytes_from_start(
2593 data: &[u8],
2594 max_bytes: usize,
2595 line_delim: u8,
2596 out: &mut impl Write,
2597) -> io::Result<()> {
2598 if data.len() < PARALLEL_THRESHOLD && max_bytes > 0 && max_bytes < usize::MAX {
2606 let mut start = 0;
2607 let mut all_fit = true;
2608 for pos in memchr_iter(line_delim, data) {
2609 if pos - start > max_bytes {
2610 all_fit = false;
2611 break;
2612 }
2613 start = pos + 1;
2614 }
2615 if all_fit && start < data.len() && data.len() - start > max_bytes {
2617 all_fit = false;
2618 }
2619 if all_fit {
2620 if !data.is_empty() && data[data.len() - 1] == line_delim {
2622 return out.write_all(data);
2623 } else if !data.is_empty() {
2624 out.write_all(data)?;
2625 return out.write_all(&[line_delim]);
2626 }
2627 return Ok(());
2628 }
2629 }
2630
2631 if data.len() >= PARALLEL_THRESHOLD {
2632 let chunks = split_for_scope(data, line_delim);
2633 let n = chunks.len();
2634 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2635 rayon::scope(|s| {
2636 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2637 s.spawn(move |_| {
2638 result.reserve(chunk.len());
2641 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2642 });
2643 }
2644 });
2645 let slices: Vec<IoSlice> = results
2647 .iter()
2648 .filter(|r| !r.is_empty())
2649 .map(|r| IoSlice::new(r))
2650 .collect();
2651 write_ioslices(out, &slices)?;
2652 } else {
2653 if max_bytes <= 512 {
2659 let est_out = (data.len() / 4).max(max_bytes + 2);
2662 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2663 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2664 if !buf.is_empty() {
2665 out.write_all(&buf)?;
2666 }
2667 } else {
2668 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2672 }
2673 }
2674 Ok(())
2675}
2676
2677#[inline]
2682fn bytes_from_start_zerocopy(
2683 data: &[u8],
2684 max_bytes: usize,
2685 line_delim: u8,
2686 out: &mut impl Write,
2687) -> io::Result<()> {
2688 let newline_buf: [u8; 1] = [line_delim];
2689 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2690 let mut start = 0;
2691 let mut run_start: usize = 0;
2692
2693 for pos in memchr_iter(line_delim, data) {
2694 let line_len = pos - start;
2695 if line_len > max_bytes {
2696 if run_start < start {
2698 iov.push(IoSlice::new(&data[run_start..start]));
2699 }
2700 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2701 iov.push(IoSlice::new(&newline_buf));
2702 run_start = pos + 1;
2703
2704 if iov.len() >= MAX_IOV - 2 {
2705 write_ioslices(out, &iov)?;
2706 iov.clear();
2707 }
2708 }
2709 start = pos + 1;
2710 }
2711 if start < data.len() {
2713 let line_len = data.len() - start;
2714 if line_len > max_bytes {
2715 if run_start < start {
2716 iov.push(IoSlice::new(&data[run_start..start]));
2717 }
2718 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2719 iov.push(IoSlice::new(&newline_buf));
2720 if !iov.is_empty() {
2721 write_ioslices(out, &iov)?;
2722 }
2723 return Ok(());
2724 }
2725 }
2726 if run_start < data.len() {
2728 iov.push(IoSlice::new(&data[run_start..]));
2729 if !data.is_empty() && *data.last().unwrap() != line_delim {
2730 iov.push(IoSlice::new(&newline_buf));
2731 }
2732 }
2733 if !iov.is_empty() {
2734 write_ioslices(out, &iov)?;
2735 }
2736 Ok(())
2737}
2738
2739#[inline]
2744fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2745 buf.reserve(data.len());
2748
2749 let src = data.as_ptr();
2750 let dst_base = buf.as_mut_ptr();
2751 let mut wp = buf.len();
2752 let mut start = 0;
2753
2754 for pos in memchr_iter(line_delim, data) {
2755 let line_len = pos - start;
2756 let take = line_len.min(max_bytes);
2757 unsafe {
2758 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2759 *dst_base.add(wp + take) = line_delim;
2760 }
2761 wp += take + 1;
2762 start = pos + 1;
2763 }
2764 if start < data.len() {
2766 let line_len = data.len() - start;
2767 let take = line_len.min(max_bytes);
2768 unsafe {
2769 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2770 *dst_base.add(wp + take) = line_delim;
2771 }
2772 wp += take + 1;
2773 }
2774 unsafe { buf.set_len(wp) };
2775}
2776
2777fn process_bytes_from_offset(
2779 data: &[u8],
2780 skip_bytes: usize,
2781 line_delim: u8,
2782 out: &mut impl Write,
2783) -> io::Result<()> {
2784 if data.len() >= PARALLEL_THRESHOLD {
2785 let chunks = split_for_scope(data, line_delim);
2786 let n = chunks.len();
2787 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2788 rayon::scope(|s| {
2789 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2790 s.spawn(move |_| {
2791 result.reserve(chunk.len());
2792 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2793 });
2794 }
2795 });
2796 let slices: Vec<IoSlice> = results
2798 .iter()
2799 .filter(|r| !r.is_empty())
2800 .map(|r| IoSlice::new(r))
2801 .collect();
2802 write_ioslices(out, &slices)?;
2803 } else {
2804 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2806 }
2807 Ok(())
2808}
2809
2810#[inline]
2814fn bytes_from_offset_zerocopy(
2815 data: &[u8],
2816 skip_bytes: usize,
2817 line_delim: u8,
2818 out: &mut impl Write,
2819) -> io::Result<()> {
2820 let delim_buf = [line_delim];
2821 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2822
2823 let mut start = 0;
2824 for pos in memchr_iter(line_delim, data) {
2825 let line_len = pos - start;
2826 if line_len > skip_bytes {
2827 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2828 }
2829 iov.push(IoSlice::new(&delim_buf));
2830 if iov.len() >= MAX_IOV - 1 {
2832 write_ioslices(out, &iov)?;
2833 iov.clear();
2834 }
2835 start = pos + 1;
2836 }
2837 if start < data.len() {
2838 let line_len = data.len() - start;
2839 if line_len > skip_bytes {
2840 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2841 }
2842 iov.push(IoSlice::new(&delim_buf));
2843 }
2844 if !iov.is_empty() {
2845 write_ioslices(out, &iov)?;
2846 }
2847 Ok(())
2848}
2849
2850#[inline]
2853fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2854 buf.reserve(data.len());
2855
2856 let src = data.as_ptr();
2857 let dst_base = buf.as_mut_ptr();
2858 let mut wp = buf.len();
2859 let mut start = 0;
2860
2861 for pos in memchr_iter(line_delim, data) {
2862 let line_len = pos - start;
2863 if line_len > skip_bytes {
2864 let take = line_len - skip_bytes;
2865 unsafe {
2866 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2867 }
2868 wp += take;
2869 }
2870 unsafe {
2871 *dst_base.add(wp) = line_delim;
2872 }
2873 wp += 1;
2874 start = pos + 1;
2875 }
2876 if start < data.len() {
2877 let line_len = data.len() - start;
2878 if line_len > skip_bytes {
2879 let take = line_len - skip_bytes;
2880 unsafe {
2881 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2882 }
2883 wp += take;
2884 }
2885 unsafe {
2886 *dst_base.add(wp) = line_delim;
2887 }
2888 wp += 1;
2889 }
2890 unsafe { buf.set_len(wp) };
2891}
2892
2893fn process_bytes_mid_range(
2895 data: &[u8],
2896 start_byte: usize,
2897 end_byte: usize,
2898 line_delim: u8,
2899 out: &mut impl Write,
2900) -> io::Result<()> {
2901 let skip = start_byte.saturating_sub(1);
2902
2903 if data.len() >= PARALLEL_THRESHOLD {
2904 let chunks = split_for_scope(data, line_delim);
2905 let n = chunks.len();
2906 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2907 rayon::scope(|s| {
2908 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2909 s.spawn(move |_| {
2910 result.reserve(chunk.len());
2911 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2912 });
2913 }
2914 });
2915 let slices: Vec<IoSlice> = results
2916 .iter()
2917 .filter(|r| !r.is_empty())
2918 .map(|r| IoSlice::new(r))
2919 .collect();
2920 write_ioslices(out, &slices)?;
2921 } else {
2922 let mut buf = Vec::with_capacity(data.len());
2923 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2924 if !buf.is_empty() {
2925 out.write_all(&buf)?;
2926 }
2927 }
2928 Ok(())
2929}
2930
2931#[inline]
2935fn bytes_mid_range_chunk(
2936 data: &[u8],
2937 skip: usize,
2938 end_byte: usize,
2939 line_delim: u8,
2940 buf: &mut Vec<u8>,
2941) {
2942 buf.reserve(data.len());
2943
2944 let src = data.as_ptr();
2945 let dst_base = buf.as_mut_ptr();
2946 let mut wp = buf.len();
2947 let mut start = 0;
2948
2949 for pos in memchr_iter(line_delim, data) {
2950 let line_len = pos - start;
2951 if line_len > skip {
2952 let take_end = line_len.min(end_byte);
2953 let take = take_end - skip;
2954 unsafe {
2955 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2956 }
2957 wp += take;
2958 }
2959 unsafe {
2960 *dst_base.add(wp) = line_delim;
2961 }
2962 wp += 1;
2963 start = pos + 1;
2964 }
2965 if start < data.len() {
2966 let line_len = data.len() - start;
2967 if line_len > skip {
2968 let take_end = line_len.min(end_byte);
2969 let take = take_end - skip;
2970 unsafe {
2971 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2972 }
2973 wp += take;
2974 }
2975 unsafe {
2976 *dst_base.add(wp) = line_delim;
2977 }
2978 wp += 1;
2979 }
2980 unsafe { buf.set_len(wp) };
2981}
2982
2983fn process_bytes_complement_mid(
2985 data: &[u8],
2986 skip_start: usize,
2987 skip_end: usize,
2988 line_delim: u8,
2989 out: &mut impl Write,
2990) -> io::Result<()> {
2991 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2993 let chunks = split_for_scope(data, line_delim);
2994 let n = chunks.len();
2995 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2996 rayon::scope(|s| {
2997 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2998 s.spawn(move |_| {
2999 result.reserve(chunk.len());
3000 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
3001 });
3002 }
3003 });
3004 let slices: Vec<IoSlice> = results
3005 .iter()
3006 .filter(|r| !r.is_empty())
3007 .map(|r| IoSlice::new(r))
3008 .collect();
3009 write_ioslices(out, &slices)?;
3010 } else {
3011 let mut buf = Vec::with_capacity(data.len());
3012 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
3013 if !buf.is_empty() {
3014 out.write_all(&buf)?;
3015 }
3016 }
3017 Ok(())
3018}
3019
3020#[inline]
3023fn bytes_complement_mid_chunk(
3024 data: &[u8],
3025 prefix_bytes: usize,
3026 skip_end: usize,
3027 line_delim: u8,
3028 buf: &mut Vec<u8>,
3029) {
3030 buf.reserve(data.len());
3031
3032 let src = data.as_ptr();
3033 let dst_base = buf.as_mut_ptr();
3034 let mut wp = buf.len();
3035 let mut start = 0;
3036
3037 for pos in memchr_iter(line_delim, data) {
3038 let line_len = pos - start;
3039 let take_prefix = prefix_bytes.min(line_len);
3041 if take_prefix > 0 {
3042 unsafe {
3043 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3044 }
3045 wp += take_prefix;
3046 }
3047 if line_len > skip_end {
3049 let suffix_len = line_len - skip_end;
3050 unsafe {
3051 std::ptr::copy_nonoverlapping(
3052 src.add(start + skip_end),
3053 dst_base.add(wp),
3054 suffix_len,
3055 );
3056 }
3057 wp += suffix_len;
3058 }
3059 unsafe {
3060 *dst_base.add(wp) = line_delim;
3061 }
3062 wp += 1;
3063 start = pos + 1;
3064 }
3065 if start < data.len() {
3066 let line_len = data.len() - start;
3067 let take_prefix = prefix_bytes.min(line_len);
3068 if take_prefix > 0 {
3069 unsafe {
3070 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3071 }
3072 wp += take_prefix;
3073 }
3074 if line_len > skip_end {
3075 let suffix_len = line_len - skip_end;
3076 unsafe {
3077 std::ptr::copy_nonoverlapping(
3078 src.add(start + skip_end),
3079 dst_base.add(wp),
3080 suffix_len,
3081 );
3082 }
3083 wp += suffix_len;
3084 }
3085 unsafe {
3086 *dst_base.add(wp) = line_delim;
3087 }
3088 wp += 1;
3089 }
3090 unsafe { buf.set_len(wp) };
3091}
3092
3093fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3095 let line_delim = cfg.line_delim;
3096 let ranges = cfg.ranges;
3097 let complement = cfg.complement;
3098 let output_delim = cfg.output_delim;
3099
3100 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
3102 let max_bytes = ranges[0].end;
3103 if max_bytes < usize::MAX {
3104 return process_bytes_from_start(data, max_bytes, line_delim, out);
3105 }
3106 }
3107
3108 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
3110 let skip_bytes = ranges[0].start.saturating_sub(1);
3111 if skip_bytes > 0 {
3112 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
3113 }
3114 }
3115
3116 if !complement
3118 && ranges.len() == 1
3119 && ranges[0].start > 1
3120 && ranges[0].end < usize::MAX
3121 && output_delim.is_empty()
3122 {
3123 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
3124 }
3125
3126 if complement
3128 && ranges.len() == 1
3129 && ranges[0].start == 1
3130 && ranges[0].end < usize::MAX
3131 && output_delim.is_empty()
3132 {
3133 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
3134 }
3135
3136 if complement
3138 && ranges.len() == 1
3139 && ranges[0].end == usize::MAX
3140 && ranges[0].start > 1
3141 && output_delim.is_empty()
3142 {
3143 let max_bytes = ranges[0].start - 1;
3144 return process_bytes_from_start(data, max_bytes, line_delim, out);
3145 }
3146
3147 if complement
3149 && ranges.len() == 1
3150 && ranges[0].start > 1
3151 && ranges[0].end < usize::MAX
3152 && output_delim.is_empty()
3153 {
3154 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
3155 }
3156
3157 if data.len() >= PARALLEL_THRESHOLD {
3158 let chunks = split_for_scope(data, line_delim);
3159 let n = chunks.len();
3160 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3161 rayon::scope(|s| {
3162 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3163 s.spawn(move |_| {
3164 result.reserve(chunk.len());
3165 process_bytes_chunk(
3166 chunk,
3167 ranges,
3168 complement,
3169 output_delim,
3170 line_delim,
3171 result,
3172 );
3173 });
3174 }
3175 });
3176 let slices: Vec<IoSlice> = results
3177 .iter()
3178 .filter(|r| !r.is_empty())
3179 .map(|r| IoSlice::new(r))
3180 .collect();
3181 write_ioslices(out, &slices)?;
3182 } else {
3183 let mut buf = Vec::with_capacity(data.len());
3184 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
3185 if !buf.is_empty() {
3186 out.write_all(&buf)?;
3187 }
3188 }
3189 Ok(())
3190}
3191
3192fn process_bytes_chunk(
3197 data: &[u8],
3198 ranges: &[Range],
3199 complement: bool,
3200 output_delim: &[u8],
3201 line_delim: u8,
3202 buf: &mut Vec<u8>,
3203) {
3204 buf.reserve(data.len());
3205 let base = data.as_ptr();
3206 let mut start = 0;
3207 for end_pos in memchr_iter(line_delim, data) {
3208 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
3209 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3210 unsafe { buf_push(buf, line_delim) };
3211 start = end_pos + 1;
3212 }
3213 if start < data.len() {
3214 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
3215 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3216 unsafe { buf_push(buf, line_delim) };
3217 }
3218}
3219
3220#[inline(always)]
3224fn cut_bytes_to_buf(
3225 line: &[u8],
3226 ranges: &[Range],
3227 complement: bool,
3228 output_delim: &[u8],
3229 buf: &mut Vec<u8>,
3230) {
3231 let len = line.len();
3232 let base = line.as_ptr();
3233 let mut first_range = true;
3234
3235 let needed = len + output_delim.len() * ranges.len() + 1;
3237 if buf.capacity() - buf.len() < needed {
3238 buf.reserve(needed);
3239 }
3240
3241 if complement {
3242 let mut pos: usize = 1;
3243 for r in ranges {
3244 let rs = r.start;
3245 let re = r.end.min(len);
3246 if pos < rs {
3247 if !first_range && !output_delim.is_empty() {
3248 unsafe { buf_extend(buf, output_delim) };
3249 }
3250 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
3251 first_range = false;
3252 }
3253 pos = re + 1;
3254 if pos > len {
3255 break;
3256 }
3257 }
3258 if pos <= len {
3259 if !first_range && !output_delim.is_empty() {
3260 unsafe { buf_extend(buf, output_delim) };
3261 }
3262 unsafe {
3263 buf_extend(
3264 buf,
3265 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3266 )
3267 };
3268 }
3269 } else if output_delim.is_empty() && ranges.len() == 1 {
3270 let start = ranges[0].start.saturating_sub(1);
3272 let end = ranges[0].end.min(len);
3273 if start < len {
3274 unsafe {
3275 buf_extend(
3276 buf,
3277 std::slice::from_raw_parts(base.add(start), end - start),
3278 )
3279 };
3280 }
3281 } else {
3282 for r in ranges {
3283 let start = r.start.saturating_sub(1);
3284 let end = r.end.min(len);
3285 if start >= len {
3286 break;
3287 }
3288 if !first_range && !output_delim.is_empty() {
3289 unsafe { buf_extend(buf, output_delim) };
3290 }
3291 unsafe {
3292 buf_extend(
3293 buf,
3294 std::slice::from_raw_parts(base.add(start), end - start),
3295 )
3296 };
3297 first_range = false;
3298 }
3299 }
3300}
3301
3302#[inline]
3306pub fn cut_fields(
3307 line: &[u8],
3308 delim: u8,
3309 ranges: &[Range],
3310 complement: bool,
3311 output_delim: &[u8],
3312 suppress_no_delim: bool,
3313 out: &mut impl Write,
3314) -> io::Result<bool> {
3315 if memchr::memchr(delim, line).is_none() {
3316 if !suppress_no_delim {
3317 out.write_all(line)?;
3318 return Ok(true);
3319 }
3320 return Ok(false);
3321 }
3322
3323 let mut field_num: usize = 1;
3324 let mut field_start: usize = 0;
3325 let mut first_output = true;
3326
3327 for delim_pos in memchr_iter(delim, line) {
3328 let selected = in_ranges(ranges, field_num) != complement;
3329 if selected {
3330 if !first_output {
3331 out.write_all(output_delim)?;
3332 }
3333 out.write_all(&line[field_start..delim_pos])?;
3334 first_output = false;
3335 }
3336 field_start = delim_pos + 1;
3337 field_num += 1;
3338 }
3339
3340 let selected = in_ranges(ranges, field_num) != complement;
3341 if selected {
3342 if !first_output {
3343 out.write_all(output_delim)?;
3344 }
3345 out.write_all(&line[field_start..])?;
3346 }
3347
3348 Ok(true)
3349}
3350
3351#[inline]
3353pub fn cut_bytes(
3354 line: &[u8],
3355 ranges: &[Range],
3356 complement: bool,
3357 output_delim: &[u8],
3358 out: &mut impl Write,
3359) -> io::Result<bool> {
3360 let mut first_range = true;
3361
3362 if complement {
3363 let len = line.len();
3364 let mut comp_ranges = Vec::new();
3365 let mut pos: usize = 1;
3366 for r in ranges {
3367 let rs = r.start;
3368 let re = r.end.min(len);
3369 if pos < rs {
3370 comp_ranges.push((pos, rs - 1));
3371 }
3372 pos = re + 1;
3373 if pos > len {
3374 break;
3375 }
3376 }
3377 if pos <= len {
3378 comp_ranges.push((pos, len));
3379 }
3380 for &(s, e) in &comp_ranges {
3381 if !first_range && !output_delim.is_empty() {
3382 out.write_all(output_delim)?;
3383 }
3384 out.write_all(&line[s - 1..e])?;
3385 first_range = false;
3386 }
3387 } else {
3388 for r in ranges {
3389 let start = r.start.saturating_sub(1);
3390 let end = r.end.min(line.len());
3391 if start >= line.len() {
3392 break;
3393 }
3394 if !first_range && !output_delim.is_empty() {
3395 out.write_all(output_delim)?;
3396 }
3397 out.write_all(&line[start..end])?;
3398 first_range = false;
3399 }
3400 }
3401 Ok(true)
3402}
3403
3404pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3412 let len = data.len();
3413 let mut wp: usize = 0;
3414 let mut rp: usize = 0;
3415
3416 while rp < len {
3417 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3418 None => {
3419 if suppress {
3421 break;
3423 }
3424 let remaining = len - rp;
3425 if wp != rp {
3426 data.copy_within(rp..len, wp);
3427 }
3428 wp += remaining;
3429 break;
3430 }
3431 Some(offset) => {
3432 let actual = rp + offset;
3433 if data[actual] == line_delim {
3434 if suppress {
3436 rp = actual + 1;
3438 } else {
3439 let chunk_len = actual + 1 - rp;
3441 if wp != rp {
3442 data.copy_within(rp..actual + 1, wp);
3443 }
3444 wp += chunk_len;
3445 rp = actual + 1;
3446 }
3447 } else {
3448 let field_len = actual - rp;
3450 if wp != rp && field_len > 0 {
3451 data.copy_within(rp..actual, wp);
3452 }
3453 wp += field_len;
3454 data[wp] = line_delim;
3455 wp += 1;
3456 match memchr::memchr(line_delim, &data[actual + 1..]) {
3458 None => {
3459 rp = len;
3460 }
3461 Some(nl_off) => {
3462 rp = actual + 1 + nl_off + 1;
3463 }
3464 }
3465 }
3466 }
3467 }
3468 }
3469 wp
3470}
3471
3472pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3474 match cfg.mode {
3475 CutMode::Fields => process_fields_fast(data, cfg, out),
3476 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3477 }
3478}
3479
3480pub fn process_cut_reader<R: BufRead>(
3485 mut reader: R,
3486 cfg: &CutConfig,
3487 out: &mut impl Write,
3488) -> io::Result<()> {
3489 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3491
3492 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3495
3496 loop {
3497 buf.reserve(CHUNK_SIZE);
3499 let read_start = buf.len();
3500 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3501 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3502 buf.truncate(read_start + n);
3503
3504 if buf.is_empty() {
3505 break;
3506 }
3507
3508 if n == 0 {
3509 process_cut_data(&buf, cfg, out)?;
3511 break;
3512 }
3513
3514 let process_end = match memchr::memrchr(line_delim, &buf) {
3516 Some(pos) => pos + 1,
3517 None => {
3518 continue;
3520 }
3521 };
3522
3523 process_cut_data(&buf[..process_end], cfg, out)?;
3525
3526 let leftover_len = buf.len() - process_end;
3528 if leftover_len > 0 {
3529 buf.copy_within(process_end.., 0);
3530 }
3531 buf.truncate(leftover_len);
3532 }
3533
3534 Ok(())
3535}
3536
3537#[inline]
3539fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3540 let n = reader.read(buf)?;
3541 if n == buf.len() || n == 0 {
3542 return Ok(n);
3543 }
3544 let mut total = n;
3546 while total < buf.len() {
3547 match reader.read(&mut buf[total..]) {
3548 Ok(0) => break,
3549 Ok(n) => total += n,
3550 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3551 Err(e) => return Err(e),
3552 }
3553 }
3554 Ok(total)
3555}
3556
3557pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3565 if cfg.complement {
3566 return None;
3567 }
3568
3569 match cfg.mode {
3570 CutMode::Fields => {
3571 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3573 return None;
3574 }
3575 if cfg.delim == cfg.line_delim {
3576 return None;
3577 }
3578 Some(cut_fields_inplace_general(
3579 data,
3580 cfg.delim,
3581 cfg.line_delim,
3582 cfg.ranges,
3583 cfg.suppress_no_delim,
3584 ))
3585 }
3586 CutMode::Bytes | CutMode::Characters => {
3587 if !cfg.output_delim.is_empty() {
3588 return None;
3589 }
3590 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3591 }
3592 }
3593}
3594
3595fn cut_fields_inplace_general(
3598 data: &mut [u8],
3599 delim: u8,
3600 line_delim: u8,
3601 ranges: &[Range],
3602 suppress: bool,
3603) -> usize {
3604 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3606 return cut_field1_inplace(data, delim, line_delim, suppress);
3607 }
3608
3609 let len = data.len();
3610 if len == 0 {
3611 return 0;
3612 }
3613
3614 let max_field = ranges.last().map_or(0, |r| r.end);
3615 let max_delims = max_field.min(64);
3616 let mut wp: usize = 0;
3617 let mut rp: usize = 0;
3618
3619 while rp < len {
3620 let line_end = memchr::memchr(line_delim, &data[rp..])
3621 .map(|p| rp + p)
3622 .unwrap_or(len);
3623 let line_len = line_end - rp;
3624
3625 let mut delim_pos = [0usize; 64];
3627 let mut num_delims: usize = 0;
3628
3629 for pos in memchr_iter(delim, &data[rp..line_end]) {
3630 if num_delims < max_delims {
3631 delim_pos[num_delims] = pos;
3632 num_delims += 1;
3633 if num_delims >= max_delims {
3634 break;
3635 }
3636 }
3637 }
3638
3639 if num_delims == 0 {
3640 if !suppress {
3642 if wp != rp {
3643 data.copy_within(rp..line_end, wp);
3644 }
3645 wp += line_len;
3646 if line_end < len {
3647 data[wp] = line_delim;
3648 wp += 1;
3649 }
3650 }
3651 } else {
3652 let total_fields = num_delims + 1;
3653 let mut first_output = true;
3654
3655 for r in ranges {
3656 let range_start = r.start;
3657 let range_end = r.end.min(total_fields);
3658 if range_start > total_fields {
3659 break;
3660 }
3661 for field_num in range_start..=range_end {
3662 if field_num > total_fields {
3663 break;
3664 }
3665
3666 let field_start = if field_num == 1 {
3667 0
3668 } else if field_num - 2 < num_delims {
3669 delim_pos[field_num - 2] + 1
3670 } else {
3671 continue;
3672 };
3673 let field_end = if field_num <= num_delims {
3674 delim_pos[field_num - 1]
3675 } else {
3676 line_len
3677 };
3678
3679 if !first_output {
3680 data[wp] = delim;
3681 wp += 1;
3682 }
3683 let flen = field_end - field_start;
3684 if flen > 0 {
3685 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3686 wp += flen;
3687 }
3688 first_output = false;
3689 }
3690 }
3691
3692 if !first_output && line_end < len {
3693 data[wp] = line_delim;
3694 wp += 1;
3695 } else if first_output && line_end < len {
3696 data[wp] = line_delim;
3698 wp += 1;
3699 }
3700 }
3701
3702 rp = if line_end < len { line_end + 1 } else { len };
3703 }
3704
3705 wp
3706}
3707
3708fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3710 let len = data.len();
3711 if len == 0 {
3712 return 0;
3713 }
3714
3715 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3717 return len;
3718 }
3719
3720 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3722 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3723 }
3724
3725 let mut wp: usize = 0;
3726 let mut rp: usize = 0;
3727
3728 while rp < len {
3729 let line_end = memchr::memchr(line_delim, &data[rp..])
3730 .map(|p| rp + p)
3731 .unwrap_or(len);
3732 let line_len = line_end - rp;
3733
3734 for r in ranges {
3735 let start = r.start.saturating_sub(1);
3736 let end = r.end.min(line_len);
3737 if start >= line_len {
3738 break;
3739 }
3740 let flen = end - start;
3741 if flen > 0 {
3742 data.copy_within(rp + start..rp + start + flen, wp);
3743 wp += flen;
3744 }
3745 }
3746
3747 if line_end < len {
3748 data[wp] = line_delim;
3749 wp += 1;
3750 }
3751
3752 rp = if line_end < len { line_end + 1 } else { len };
3753 }
3754
3755 wp
3756}
3757
3758fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3760 let len = data.len();
3761
3762 let mut all_fit = true;
3764 let mut start = 0;
3765 for pos in memchr_iter(line_delim, data) {
3766 if pos - start > max_bytes {
3767 all_fit = false;
3768 break;
3769 }
3770 start = pos + 1;
3771 }
3772 if all_fit && start < len && len - start > max_bytes {
3773 all_fit = false;
3774 }
3775 if all_fit {
3776 return len;
3777 }
3778
3779 let mut wp: usize = 0;
3781 let mut rp: usize = 0;
3782
3783 while rp < len {
3784 let line_end = memchr::memchr(line_delim, &data[rp..])
3785 .map(|p| rp + p)
3786 .unwrap_or(len);
3787 let line_len = line_end - rp;
3788
3789 let take = line_len.min(max_bytes);
3790 if take > 0 && wp != rp {
3791 data.copy_within(rp..rp + take, wp);
3792 }
3793 wp += take;
3794
3795 if line_end < len {
3796 data[wp] = line_delim;
3797 wp += 1;
3798 }
3799
3800 rp = if line_end < len { line_end + 1 } else { len };
3801 }
3802
3803 wp
3804}
3805
3806#[derive(Debug, Clone, Copy, PartialEq)]
3808pub enum CutMode {
3809 Bytes,
3810 Characters,
3811 Fields,
3812}