1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 16 * 1024 * 1024;
8
9const MAX_IOV: usize = 1024;
11
12pub struct CutConfig<'a> {
14 pub mode: CutMode,
15 pub ranges: &'a [Range],
16 pub complement: bool,
17 pub delim: u8,
18 pub output_delim: &'a [u8],
19 pub suppress_no_delim: bool,
20 pub line_delim: u8,
21}
22
23#[derive(Debug, Clone)]
25pub struct Range {
26 pub start: usize, pub end: usize, }
29
30pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
33 let mut ranges = Vec::new();
34
35 for part in spec.split(',') {
36 let part = part.trim();
37 if part.is_empty() {
38 continue;
39 }
40
41 if let Some(idx) = part.find('-') {
42 let left = &part[..idx];
43 let right = &part[idx + 1..];
44
45 let start = if left.is_empty() {
46 1
47 } else {
48 left.parse::<usize>()
49 .map_err(|_| format!("invalid range: '{}'", part))?
50 };
51
52 let end = if right.is_empty() {
53 usize::MAX
54 } else {
55 right
56 .parse::<usize>()
57 .map_err(|_| format!("invalid range: '{}'", part))?
58 };
59
60 if start == 0 {
61 return Err("fields and positions are numbered from 1".to_string());
62 }
63 if start > end {
64 return Err(format!("invalid decreasing range: '{}'", part));
65 }
66
67 ranges.push(Range { start, end });
68 } else {
69 let n = part
70 .parse::<usize>()
71 .map_err(|_| format!("invalid field: '{}'", part))?;
72 if n == 0 {
73 return Err("fields and positions are numbered from 1".to_string());
74 }
75 ranges.push(Range { start: n, end: n });
76 }
77 }
78
79 if ranges.is_empty() {
80 return Err("you must specify a list of bytes, characters, or fields".to_string());
81 }
82
83 ranges.sort_by_key(|r| (r.start, r.end));
85 let mut merged = vec![ranges[0].clone()];
86 for r in &ranges[1..] {
87 let last = merged.last_mut().unwrap();
88 if r.start <= last.end.saturating_add(1) {
89 last.end = last.end.max(r.end);
90 } else {
91 merged.push(r.clone());
92 }
93 }
94
95 Ok(merged)
96}
97
98#[inline(always)]
101fn in_ranges(ranges: &[Range], pos: usize) -> bool {
102 for r in ranges {
103 if pos < r.start {
104 return false;
105 }
106 if pos <= r.end {
107 return true;
108 }
109 }
110 false
111}
112
113#[inline]
116fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
117 let mut mask: u64 = 0;
118 for i in 1..=64u32 {
119 let in_range = in_ranges(ranges, i as usize);
120 if in_range != complement {
121 mask |= 1u64 << (i - 1);
122 }
123 }
124 mask
125}
126
127#[inline(always)]
129fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
130 if field_num <= 64 {
131 (mask >> (field_num - 1)) & 1 == 1
132 } else {
133 in_ranges(ranges, field_num) != complement
134 }
135}
136
137#[inline(always)]
142unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
143 unsafe {
144 let len = buf.len();
145 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
146 buf.set_len(len + data.len());
147 }
148}
149
150#[inline(always)]
153unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
154 unsafe {
155 let len = buf.len();
156 *buf.as_mut_ptr().add(len) = b;
157 buf.set_len(len + 1);
158 }
159}
160
161#[inline(always)]
166unsafe fn buf_extend_byte(buf: &mut Vec<u8>, data: &[u8], b: u8) {
167 unsafe {
168 let len = buf.len();
169 let ptr = buf.as_mut_ptr().add(len);
170 std::ptr::copy_nonoverlapping(data.as_ptr(), ptr, data.len());
171 *ptr.add(data.len()) = b;
172 buf.set_len(len + data.len() + 1);
173 }
174}
175
176#[inline]
180fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
181 if slices.is_empty() {
182 return Ok(());
183 }
184 for batch in slices.chunks(MAX_IOV) {
185 let total: usize = batch.iter().map(|s| s.len()).sum();
186 let written = out.write_vectored(batch)?;
187 if written >= total {
188 continue;
189 }
190 if written == 0 {
191 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
192 }
193 write_ioslices_slow(out, batch, written)?;
194 }
195 Ok(())
196}
197
198#[cold]
200#[inline(never)]
201fn write_ioslices_slow(
202 out: &mut impl Write,
203 slices: &[IoSlice],
204 mut skip: usize,
205) -> io::Result<()> {
206 for slice in slices {
207 let len = slice.len();
208 if skip >= len {
209 skip -= len;
210 continue;
211 }
212 out.write_all(&slice[skip..])?;
213 skip = 0;
214 }
215 Ok(())
216}
217
218#[inline]
224fn num_cpus() -> usize {
225 std::thread::available_parallelism()
226 .map(|n| n.get())
227 .unwrap_or(1)
228}
229
230fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
233 let num_threads = num_cpus().max(1);
234 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
235 return vec![data];
236 }
237
238 let chunk_size = data.len() / num_threads;
239 let mut chunks = Vec::with_capacity(num_threads);
240 let mut pos = 0;
241
242 for _ in 0..num_threads - 1 {
243 let target = pos + chunk_size;
244 if target >= data.len() {
245 break;
246 }
247 let boundary = memchr::memchr(line_delim, &data[target..])
248 .map(|p| target + p + 1)
249 .unwrap_or(data.len());
250 if boundary > pos {
251 chunks.push(&data[pos..boundary]);
252 }
253 pos = boundary;
254 }
255
256 if pos < data.len() {
257 chunks.push(&data[pos..]);
258 }
259
260 chunks
261}
262
263fn process_fields_multi_select(
270 data: &[u8],
271 delim: u8,
272 line_delim: u8,
273 ranges: &[Range],
274 suppress: bool,
275 out: &mut impl Write,
276) -> io::Result<()> {
277 let max_field = ranges.last().map_or(0, |r| r.end);
278
279 if data.len() >= PARALLEL_THRESHOLD {
280 let chunks = split_for_scope(data, line_delim);
281 let n = chunks.len();
282 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
283 rayon::scope(|s| {
284 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
285 s.spawn(move |_| {
286 result.reserve(chunk.len() * 3 / 4);
287 multi_select_chunk(
288 chunk, delim, line_delim, ranges, max_field, suppress, result,
289 );
290 });
291 }
292 });
293 let slices: Vec<IoSlice> = results
294 .iter()
295 .filter(|r| !r.is_empty())
296 .map(|r| IoSlice::new(r))
297 .collect();
298 write_ioslices(out, &slices)?;
299 } else {
300 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
301 multi_select_chunk(
302 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
303 );
304 if !buf.is_empty() {
305 out.write_all(&buf)?;
306 }
307 }
308 Ok(())
309}
310
311fn multi_select_chunk(
317 data: &[u8],
318 delim: u8,
319 line_delim: u8,
320 ranges: &[Range],
321 max_field: usize,
322 suppress: bool,
323 buf: &mut Vec<u8>,
324) {
325 if delim == line_delim {
327 buf.reserve(data.len());
328 let base = data.as_ptr();
329 let mut start = 0;
330 for end_pos in memchr_iter(line_delim, data) {
331 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
332 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
333 start = end_pos + 1;
334 }
335 if start < data.len() {
336 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
337 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
338 }
339 return;
340 }
341
342 buf.reserve(data.len());
343 let base = data.as_ptr();
344 let data_len = data.len();
345
346 let mut line_start: usize = 0;
348 let mut delim_pos = [0usize; 64];
349 let mut num_delims: usize = 0;
350 let max_delims = max_field.min(64);
351 let mut at_max = false;
352
353 for pos in memchr::memchr2_iter(delim, line_delim, data) {
355 let byte = unsafe { *base.add(pos) };
356
357 if byte == line_delim {
358 let line_len = pos - line_start;
360 if num_delims == 0 {
361 if !suppress {
363 unsafe {
364 buf_extend(
365 buf,
366 std::slice::from_raw_parts(base.add(line_start), line_len),
367 );
368 buf_push(buf, line_delim);
369 }
370 }
371 } else {
372 let total_fields = num_delims + 1;
374 let mut first_output = true;
375
376 for r in ranges {
377 let range_start = r.start;
378 let range_end = r.end.min(total_fields);
379 if range_start > total_fields {
380 break;
381 }
382 for field_num in range_start..=range_end {
383 if field_num > total_fields {
384 break;
385 }
386
387 let field_start = if field_num == 1 {
388 line_start
389 } else if field_num - 2 < num_delims {
390 delim_pos[field_num - 2] + 1
391 } else {
392 continue;
393 };
394 let field_end = if field_num <= num_delims {
395 delim_pos[field_num - 1]
396 } else {
397 pos
398 };
399
400 if !first_output {
401 unsafe { buf_push(buf, delim) };
402 }
403 unsafe {
404 buf_extend(
405 buf,
406 std::slice::from_raw_parts(
407 base.add(field_start),
408 field_end - field_start,
409 ),
410 );
411 }
412 first_output = false;
413 }
414 }
415
416 unsafe { buf_push(buf, line_delim) };
417 }
418
419 line_start = pos + 1;
421 num_delims = 0;
422 at_max = false;
423 } else {
424 if !at_max && num_delims < max_delims {
426 delim_pos[num_delims] = pos;
427 num_delims += 1;
428 if num_delims >= max_delims {
429 at_max = true;
430 }
431 }
432 }
433 }
434
435 if line_start < data_len {
437 if num_delims == 0 {
438 if !suppress {
439 unsafe {
440 buf_extend(
441 buf,
442 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
443 );
444 buf_push(buf, line_delim);
445 }
446 }
447 } else {
448 let total_fields = num_delims + 1;
449 let mut first_output = true;
450
451 for r in ranges {
452 let range_start = r.start;
453 let range_end = r.end.min(total_fields);
454 if range_start > total_fields {
455 break;
456 }
457 for field_num in range_start..=range_end {
458 if field_num > total_fields {
459 break;
460 }
461
462 let field_start = if field_num == 1 {
463 line_start
464 } else if field_num - 2 < num_delims {
465 delim_pos[field_num - 2] + 1
466 } else {
467 continue;
468 };
469 let field_end = if field_num <= num_delims {
470 delim_pos[field_num - 1]
471 } else {
472 data_len
473 };
474
475 if !first_output {
476 unsafe { buf_push(buf, delim) };
477 }
478 unsafe {
479 buf_extend(
480 buf,
481 std::slice::from_raw_parts(
482 base.add(field_start),
483 field_end - field_start,
484 ),
485 );
486 }
487 first_output = false;
488 }
489 }
490
491 unsafe { buf_push(buf, line_delim) };
492 }
493 }
494}
495
496#[inline(always)]
501fn multi_select_line(
502 line: &[u8],
503 delim: u8,
504 line_delim: u8,
505 ranges: &[Range],
506 max_field: usize,
507 suppress: bool,
508 buf: &mut Vec<u8>,
509) {
510 let len = line.len();
511 if len == 0 {
512 if !suppress {
513 unsafe { buf_push(buf, line_delim) };
514 }
515 return;
516 }
517
518 let base = line.as_ptr();
520
521 let mut delim_pos = [0usize; 64];
524 let mut num_delims: usize = 0;
525 let max_delims = max_field.min(64);
526
527 for pos in memchr_iter(delim, line) {
528 if num_delims < max_delims {
529 delim_pos[num_delims] = pos;
530 num_delims += 1;
531 if num_delims >= max_delims {
532 break;
533 }
534 }
535 }
536
537 if num_delims == 0 {
538 if !suppress {
539 unsafe {
540 buf_extend(buf, line);
541 buf_push(buf, line_delim);
542 }
543 }
544 return;
545 }
546
547 let total_fields = num_delims + 1;
551 let mut first_output = true;
552
553 for r in ranges {
554 let range_start = r.start;
555 let range_end = r.end.min(total_fields);
556 if range_start > total_fields {
557 break;
558 }
559 for field_num in range_start..=range_end {
560 if field_num > total_fields {
561 break;
562 }
563
564 let field_start = if field_num == 1 {
565 0
566 } else if field_num - 2 < num_delims {
567 delim_pos[field_num - 2] + 1
568 } else {
569 continue;
570 };
571 let field_end = if field_num <= num_delims {
572 delim_pos[field_num - 1]
573 } else {
574 len
575 };
576
577 if !first_output {
578 unsafe { buf_push(buf, delim) };
579 }
580 unsafe {
581 buf_extend(
582 buf,
583 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
584 );
585 }
586 first_output = false;
587 }
588 }
589
590 unsafe { buf_push(buf, line_delim) };
591}
592
593fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
597 let delim = cfg.delim;
598 let line_delim = cfg.line_delim;
599 let ranges = cfg.ranges;
600 let complement = cfg.complement;
601 let output_delim = cfg.output_delim;
602 let suppress = cfg.suppress_no_delim;
603
604 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
612 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
613 }
614
615 if complement
617 && ranges.len() == 1
618 && output_delim.len() == 1
619 && output_delim[0] == delim
620 && ranges[0].start == ranges[0].end
621 {
622 return process_complement_single_field(
623 data,
624 delim,
625 line_delim,
626 ranges[0].start,
627 suppress,
628 out,
629 );
630 }
631
632 if complement
635 && ranges.len() == 1
636 && ranges[0].start > 1
637 && ranges[0].end < usize::MAX
638 && output_delim.len() == 1
639 && output_delim[0] == delim
640 {
641 return process_complement_range(
642 data,
643 delim,
644 line_delim,
645 ranges[0].start,
646 ranges[0].end,
647 suppress,
648 out,
649 );
650 }
651
652 if !complement
654 && ranges.len() == 1
655 && ranges[0].start == 1
656 && output_delim.len() == 1
657 && output_delim[0] == delim
658 && ranges[0].end < usize::MAX
659 {
660 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
661 }
662
663 if !complement
665 && ranges.len() == 1
666 && ranges[0].end == usize::MAX
667 && ranges[0].start > 1
668 && output_delim.len() == 1
669 && output_delim[0] == delim
670 {
671 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
672 }
673
674 if !complement
676 && ranges.len() == 1
677 && ranges[0].start > 1
678 && ranges[0].end < usize::MAX
679 && output_delim.len() == 1
680 && output_delim[0] == delim
681 {
682 return process_fields_mid_range(
683 data,
684 delim,
685 line_delim,
686 ranges[0].start,
687 ranges[0].end,
688 suppress,
689 out,
690 );
691 }
692
693 if !complement
699 && ranges.len() > 1
700 && ranges.last().map_or(false, |r| r.end < usize::MAX)
701 && output_delim.len() == 1
702 && output_delim[0] == delim
703 && delim != line_delim
704 {
705 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
706 }
707
708 let max_field = if complement {
710 usize::MAX
711 } else {
712 ranges.last().map(|r| r.end).unwrap_or(0)
713 };
714 let field_mask = compute_field_mask(ranges, complement);
715
716 if data.len() >= PARALLEL_THRESHOLD {
717 let chunks = split_for_scope(data, line_delim);
718 let n = chunks.len();
719 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
720 rayon::scope(|s| {
721 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
722 s.spawn(move |_| {
723 result.reserve(chunk.len());
724 process_fields_chunk(
725 chunk,
726 delim,
727 ranges,
728 output_delim,
729 suppress,
730 max_field,
731 field_mask,
732 line_delim,
733 complement,
734 result,
735 );
736 });
737 }
738 });
739 let slices: Vec<IoSlice> = results
740 .iter()
741 .filter(|r| !r.is_empty())
742 .map(|r| IoSlice::new(r))
743 .collect();
744 write_ioslices(out, &slices)?;
745 } else {
746 let mut buf = Vec::with_capacity(data.len());
747 process_fields_chunk(
748 data,
749 delim,
750 ranges,
751 output_delim,
752 suppress,
753 max_field,
754 field_mask,
755 line_delim,
756 complement,
757 &mut buf,
758 );
759 if !buf.is_empty() {
760 out.write_all(&buf)?;
761 }
762 }
763 Ok(())
764}
765
766fn process_fields_chunk(
771 data: &[u8],
772 delim: u8,
773 ranges: &[Range],
774 output_delim: &[u8],
775 suppress: bool,
776 max_field: usize,
777 field_mask: u64,
778 line_delim: u8,
779 complement: bool,
780 buf: &mut Vec<u8>,
781) {
782 if delim != line_delim && max_field < usize::MAX && !complement {
789 buf.reserve(data.len());
790 let mut start = 0;
791 for end_pos in memchr_iter(line_delim, data) {
792 let line = &data[start..end_pos];
793 extract_fields_to_buf(
794 line,
795 delim,
796 ranges,
797 output_delim,
798 suppress,
799 max_field,
800 field_mask,
801 line_delim,
802 buf,
803 complement,
804 );
805 start = end_pos + 1;
806 }
807 if start < data.len() {
808 extract_fields_to_buf(
809 &data[start..],
810 delim,
811 ranges,
812 output_delim,
813 suppress,
814 max_field,
815 field_mask,
816 line_delim,
817 buf,
818 complement,
819 );
820 }
821 return;
822 }
823
824 if delim != line_delim {
828 buf.reserve(data.len());
829
830 let data_len = data.len();
831 let base = data.as_ptr();
832 let mut line_start: usize = 0;
833 let mut field_start: usize = 0;
834 let mut field_num: usize = 1;
835 let mut first_output = true;
836 let mut has_delim = false;
837
838 for pos in memchr::memchr2_iter(delim, line_delim, data) {
839 let byte = unsafe { *base.add(pos) };
840
841 if byte == line_delim {
842 if (field_num <= max_field || complement)
844 && has_delim
845 && is_selected(field_num, field_mask, ranges, complement)
846 {
847 if !first_output {
848 unsafe { buf_extend(buf, output_delim) };
849 }
850 unsafe {
851 buf_extend(
852 buf,
853 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
854 )
855 };
856 first_output = false;
857 }
858
859 if !first_output {
860 unsafe { buf_push(buf, line_delim) };
861 } else if !has_delim {
862 if !suppress {
863 unsafe {
864 buf_extend(
865 buf,
866 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
867 );
868 buf_push(buf, line_delim);
869 }
870 }
871 } else {
872 unsafe { buf_push(buf, line_delim) };
873 }
874
875 line_start = pos + 1;
877 field_start = pos + 1;
878 field_num = 1;
879 first_output = true;
880 has_delim = false;
881 } else {
882 has_delim = true;
884
885 if is_selected(field_num, field_mask, ranges, complement) {
886 if !first_output {
887 unsafe { buf_extend(buf, output_delim) };
888 }
889 unsafe {
890 buf_extend(
891 buf,
892 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
893 )
894 };
895 first_output = false;
896 }
897
898 field_num += 1;
899 field_start = pos + 1;
900 }
901 }
902
903 if line_start < data_len {
905 if line_start < data_len {
906 if (field_num <= max_field || complement)
907 && has_delim
908 && is_selected(field_num, field_mask, ranges, complement)
909 {
910 if !first_output {
911 unsafe { buf_extend(buf, output_delim) };
912 }
913 unsafe {
914 buf_extend(
915 buf,
916 std::slice::from_raw_parts(
917 base.add(field_start),
918 data_len - field_start,
919 ),
920 )
921 };
922 first_output = false;
923 }
924
925 if !first_output {
926 unsafe { buf_push(buf, line_delim) };
927 } else if !has_delim {
928 if !suppress {
929 unsafe {
930 buf_extend(
931 buf,
932 std::slice::from_raw_parts(
933 base.add(line_start),
934 data_len - line_start,
935 ),
936 );
937 buf_push(buf, line_delim);
938 }
939 }
940 } else {
941 unsafe { buf_push(buf, line_delim) };
942 }
943 }
944 }
945
946 return;
947 }
948
949 let mut start = 0;
951 for end_pos in memchr_iter(line_delim, data) {
952 let line = &data[start..end_pos];
953 extract_fields_to_buf(
954 line,
955 delim,
956 ranges,
957 output_delim,
958 suppress,
959 max_field,
960 field_mask,
961 line_delim,
962 buf,
963 complement,
964 );
965 start = end_pos + 1;
966 }
967 if start < data.len() {
968 extract_fields_to_buf(
969 &data[start..],
970 delim,
971 ranges,
972 output_delim,
973 suppress,
974 max_field,
975 field_mask,
976 line_delim,
977 buf,
978 complement,
979 );
980 }
981}
982
983fn process_single_field(
989 data: &[u8],
990 delim: u8,
991 line_delim: u8,
992 target: usize,
993 suppress: bool,
994 out: &mut impl Write,
995) -> io::Result<()> {
996 let target_idx = target - 1;
997
998 const FIELD_PARALLEL_MIN: usize = 16 * 1024 * 1024;
1000
1001 if delim != line_delim {
1002 if target_idx == 0 && !suppress {
1006 if data.len() >= FIELD_PARALLEL_MIN {
1007 return single_field1_parallel(data, delim, line_delim, out);
1008 }
1009 let mut buf = Vec::with_capacity(data.len() + 1);
1014 single_field1_to_buf(data, delim, line_delim, &mut buf);
1015 if !buf.is_empty() {
1016 out.write_all(&buf)?;
1017 }
1018 return Ok(());
1019 }
1020
1021 if data.len() >= FIELD_PARALLEL_MIN {
1025 let chunks = split_for_scope(data, line_delim);
1026 let n = chunks.len();
1027 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1028 rayon::scope(|s| {
1029 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1030 s.spawn(move |_| {
1031 result.reserve(chunk.len() / 2);
1032 process_single_field_chunk(
1033 chunk, delim, target_idx, line_delim, suppress, result,
1034 );
1035 });
1036 }
1037 });
1038 let slices: Vec<IoSlice> = results
1039 .iter()
1040 .filter(|r| !r.is_empty())
1041 .map(|r| IoSlice::new(r))
1042 .collect();
1043 write_ioslices(out, &slices)?;
1044 } else {
1045 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1046 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1047 if !buf.is_empty() {
1048 out.write_all(&buf)?;
1049 }
1050 }
1051 return Ok(());
1052 }
1053
1054 if data.len() >= FIELD_PARALLEL_MIN {
1056 let chunks = split_for_scope(data, line_delim);
1057 let n = chunks.len();
1058 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1059 rayon::scope(|s| {
1060 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1061 s.spawn(move |_| {
1062 result.reserve(chunk.len() / 4);
1063 process_single_field_chunk(
1064 chunk, delim, target_idx, line_delim, suppress, result,
1065 );
1066 });
1067 }
1068 });
1069 let slices: Vec<IoSlice> = results
1070 .iter()
1071 .filter(|r| !r.is_empty())
1072 .map(|r| IoSlice::new(r))
1073 .collect();
1074 write_ioslices(out, &slices)?;
1075 } else {
1076 let mut buf = Vec::with_capacity(data.len() / 4);
1077 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1078 if !buf.is_empty() {
1079 out.write_all(&buf)?;
1080 }
1081 }
1082 Ok(())
1083}
1084
1085fn process_complement_range(
1088 data: &[u8],
1089 delim: u8,
1090 line_delim: u8,
1091 skip_start: usize,
1092 skip_end: usize,
1093 suppress: bool,
1094 out: &mut impl Write,
1095) -> io::Result<()> {
1096 if data.len() >= PARALLEL_THRESHOLD {
1097 let chunks = split_for_scope(data, line_delim);
1098 let n = chunks.len();
1099 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1100 rayon::scope(|s| {
1101 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1102 s.spawn(move |_| {
1103 result.reserve(chunk.len());
1104 complement_range_chunk(
1105 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
1106 );
1107 });
1108 }
1109 });
1110 let slices: Vec<IoSlice> = results
1111 .iter()
1112 .filter(|r| !r.is_empty())
1113 .map(|r| IoSlice::new(r))
1114 .collect();
1115 write_ioslices(out, &slices)?;
1116 } else {
1117 let mut buf = Vec::with_capacity(data.len());
1118 complement_range_chunk(
1119 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1120 );
1121 if !buf.is_empty() {
1122 out.write_all(&buf)?;
1123 }
1124 }
1125 Ok(())
1126}
1127
1128fn complement_range_chunk(
1130 data: &[u8],
1131 delim: u8,
1132 skip_start: usize,
1133 skip_end: usize,
1134 line_delim: u8,
1135 suppress: bool,
1136 buf: &mut Vec<u8>,
1137) {
1138 buf.reserve(data.len());
1140 let mut start = 0;
1141 for end_pos in memchr_iter(line_delim, data) {
1142 let line = &data[start..end_pos];
1143 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1144 start = end_pos + 1;
1145 }
1146 if start < data.len() {
1147 complement_range_line(
1148 &data[start..],
1149 delim,
1150 skip_start,
1151 skip_end,
1152 line_delim,
1153 suppress,
1154 buf,
1155 );
1156 }
1157}
1158
1159#[inline(always)]
1166fn complement_range_line(
1167 line: &[u8],
1168 delim: u8,
1169 skip_start: usize,
1170 skip_end: usize,
1171 line_delim: u8,
1172 suppress: bool,
1173 buf: &mut Vec<u8>,
1174) {
1175 let len = line.len();
1176 if len == 0 {
1177 if !suppress {
1178 unsafe { buf_push(buf, line_delim) };
1179 }
1180 return;
1181 }
1182
1183 let base = line.as_ptr();
1185
1186 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1196
1197 let mut delim_count: usize = 0;
1199 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1203 delim_count += 1;
1204 if delim_count == need_prefix_delims {
1205 prefix_end_pos = pos;
1206 }
1207 if delim_count == total_need {
1208 suffix_start_pos = pos + 1;
1209 break;
1210 }
1211 }
1212
1213 if delim_count == 0 {
1214 if !suppress {
1216 unsafe {
1217 buf_extend(buf, line);
1218 buf_push(buf, line_delim);
1219 }
1220 }
1221 return;
1222 }
1223
1224 if delim_count < need_prefix_delims {
1230 unsafe {
1232 buf_extend(buf, line);
1233 buf_push(buf, line_delim);
1234 }
1235 return;
1236 }
1237
1238 let has_prefix = need_prefix_delims > 0;
1239 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1240
1241 if has_prefix && has_suffix {
1242 unsafe {
1244 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1245 buf_push(buf, delim);
1246 buf_extend(
1247 buf,
1248 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1249 );
1250 buf_push(buf, line_delim);
1251 }
1252 } else if has_prefix {
1253 unsafe {
1255 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1256 buf_push(buf, line_delim);
1257 }
1258 } else if has_suffix {
1259 unsafe {
1261 buf_extend(
1262 buf,
1263 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1264 );
1265 buf_push(buf, line_delim);
1266 }
1267 } else {
1268 unsafe { buf_push(buf, line_delim) };
1270 }
1271}
1272
1273fn process_complement_single_field(
1275 data: &[u8],
1276 delim: u8,
1277 line_delim: u8,
1278 skip_field: usize,
1279 suppress: bool,
1280 out: &mut impl Write,
1281) -> io::Result<()> {
1282 let skip_idx = skip_field - 1;
1283
1284 if data.len() >= PARALLEL_THRESHOLD {
1285 let chunks = split_for_scope(data, line_delim);
1286 let n = chunks.len();
1287 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1288 rayon::scope(|s| {
1289 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1290 s.spawn(move |_| {
1291 result.reserve(chunk.len());
1292 complement_single_field_chunk(
1293 chunk, delim, skip_idx, line_delim, suppress, result,
1294 );
1295 });
1296 }
1297 });
1298 let slices: Vec<IoSlice> = results
1299 .iter()
1300 .filter(|r| !r.is_empty())
1301 .map(|r| IoSlice::new(r))
1302 .collect();
1303 write_ioslices(out, &slices)?;
1304 } else {
1305 let mut buf = Vec::with_capacity(data.len());
1306 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1307 if !buf.is_empty() {
1308 out.write_all(&buf)?;
1309 }
1310 }
1311 Ok(())
1312}
1313
1314fn complement_single_field_chunk(
1320 data: &[u8],
1321 delim: u8,
1322 skip_idx: usize,
1323 line_delim: u8,
1324 suppress: bool,
1325 buf: &mut Vec<u8>,
1326) {
1327 if delim == line_delim {
1329 buf.reserve(data.len());
1330 let mut start = 0;
1331 for end_pos in memchr_iter(line_delim, data) {
1332 let line = &data[start..end_pos];
1333 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1334 start = end_pos + 1;
1335 }
1336 if start < data.len() {
1337 complement_single_field_line(
1338 &data[start..],
1339 delim,
1340 skip_idx,
1341 line_delim,
1342 suppress,
1343 buf,
1344 );
1345 }
1346 return;
1347 }
1348
1349 buf.reserve(data.len());
1350 let base = data.as_ptr();
1351 let data_len = data.len();
1352 let need_before = skip_idx; let need_total = skip_idx + 1; let mut line_start: usize = 0;
1357 let mut delim_count: usize = 0;
1358 let mut skip_start_pos: usize = 0;
1359 let mut skip_end_pos: usize = 0;
1360 let mut found_start = need_before == 0; let mut found_end = false;
1362
1363 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1364 let byte = unsafe { *base.add(pos) };
1365
1366 if byte == line_delim {
1367 if delim_count == 0 {
1369 if !suppress {
1371 unsafe {
1372 buf_extend(
1373 buf,
1374 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1375 );
1376 buf_push(buf, line_delim);
1377 }
1378 }
1379 } else if !found_start || delim_count < need_before {
1380 unsafe {
1382 buf_extend(
1383 buf,
1384 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1385 );
1386 buf_push(buf, line_delim);
1387 }
1388 } else {
1389 let has_prefix = skip_idx > 0;
1390 let has_suffix = found_end && skip_end_pos < pos;
1391
1392 if has_prefix && has_suffix {
1393 unsafe {
1394 buf_extend(
1395 buf,
1396 std::slice::from_raw_parts(
1397 base.add(line_start),
1398 skip_start_pos - 1 - line_start,
1399 ),
1400 );
1401 buf_push(buf, delim);
1402 buf_extend(
1403 buf,
1404 std::slice::from_raw_parts(
1405 base.add(skip_end_pos + 1),
1406 pos - skip_end_pos - 1,
1407 ),
1408 );
1409 buf_push(buf, line_delim);
1410 }
1411 } else if has_prefix {
1412 unsafe {
1413 buf_extend(
1414 buf,
1415 std::slice::from_raw_parts(
1416 base.add(line_start),
1417 skip_start_pos - 1 - line_start,
1418 ),
1419 );
1420 buf_push(buf, line_delim);
1421 }
1422 } else if has_suffix {
1423 unsafe {
1424 buf_extend(
1425 buf,
1426 std::slice::from_raw_parts(
1427 base.add(skip_end_pos + 1),
1428 pos - skip_end_pos - 1,
1429 ),
1430 );
1431 buf_push(buf, line_delim);
1432 }
1433 } else {
1434 unsafe { buf_push(buf, line_delim) };
1435 }
1436 }
1437
1438 line_start = pos + 1;
1440 delim_count = 0;
1441 skip_start_pos = 0;
1442 skip_end_pos = 0;
1443 found_start = need_before == 0;
1444 found_end = false;
1445 } else {
1446 delim_count += 1;
1448 if delim_count == need_before {
1449 skip_start_pos = pos + 1;
1450 found_start = true;
1451 }
1452 if delim_count == need_total {
1453 skip_end_pos = pos;
1454 found_end = true;
1455 }
1456 }
1457 }
1458
1459 if line_start < data_len {
1461 let pos = data_len;
1462 if delim_count == 0 {
1463 if !suppress {
1464 unsafe {
1465 buf_extend(
1466 buf,
1467 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1468 );
1469 buf_push(buf, line_delim);
1470 }
1471 }
1472 } else if !found_start || delim_count < need_before {
1473 unsafe {
1474 buf_extend(
1475 buf,
1476 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1477 );
1478 buf_push(buf, line_delim);
1479 }
1480 } else {
1481 let has_prefix = skip_idx > 0;
1482 let has_suffix = found_end && skip_end_pos < pos;
1483
1484 if has_prefix && has_suffix {
1485 unsafe {
1486 buf_extend(
1487 buf,
1488 std::slice::from_raw_parts(
1489 base.add(line_start),
1490 skip_start_pos - 1 - line_start,
1491 ),
1492 );
1493 buf_push(buf, delim);
1494 buf_extend(
1495 buf,
1496 std::slice::from_raw_parts(
1497 base.add(skip_end_pos + 1),
1498 pos - skip_end_pos - 1,
1499 ),
1500 );
1501 buf_push(buf, line_delim);
1502 }
1503 } else if has_prefix {
1504 unsafe {
1505 buf_extend(
1506 buf,
1507 std::slice::from_raw_parts(
1508 base.add(line_start),
1509 skip_start_pos - 1 - line_start,
1510 ),
1511 );
1512 buf_push(buf, line_delim);
1513 }
1514 } else if has_suffix {
1515 unsafe {
1516 buf_extend(
1517 buf,
1518 std::slice::from_raw_parts(
1519 base.add(skip_end_pos + 1),
1520 pos - skip_end_pos - 1,
1521 ),
1522 );
1523 buf_push(buf, line_delim);
1524 }
1525 } else {
1526 unsafe { buf_push(buf, line_delim) };
1527 }
1528 }
1529 }
1530}
1531
1532#[inline(always)]
1534fn complement_single_field_line(
1535 line: &[u8],
1536 delim: u8,
1537 skip_idx: usize,
1538 line_delim: u8,
1539 suppress: bool,
1540 buf: &mut Vec<u8>,
1541) {
1542 let len = line.len();
1543 if len == 0 {
1544 if !suppress {
1545 unsafe { buf_push(buf, line_delim) };
1546 }
1547 return;
1548 }
1549
1550 let base = line.as_ptr();
1551 let need_before = skip_idx;
1552 let need_total = skip_idx + 1;
1553
1554 let mut delim_count: usize = 0;
1555 let mut skip_start_pos: usize = 0;
1556 let mut skip_end_pos: usize = len;
1557 let mut found_end = false;
1558
1559 for pos in memchr_iter(delim, line) {
1560 delim_count += 1;
1561 if delim_count == need_before {
1562 skip_start_pos = pos + 1;
1563 }
1564 if delim_count == need_total {
1565 skip_end_pos = pos;
1566 found_end = true;
1567 break;
1568 }
1569 }
1570
1571 if delim_count == 0 {
1572 if !suppress {
1573 unsafe {
1574 buf_extend(buf, line);
1575 buf_push(buf, line_delim);
1576 }
1577 }
1578 return;
1579 }
1580
1581 if delim_count < need_before {
1582 unsafe {
1583 buf_extend(buf, line);
1584 buf_push(buf, line_delim);
1585 }
1586 return;
1587 }
1588
1589 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1590 let has_suffix = found_end && skip_end_pos < len;
1591
1592 if has_prefix && has_suffix {
1593 unsafe {
1594 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1595 buf_push(buf, delim);
1596 buf_extend(
1597 buf,
1598 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1599 );
1600 buf_push(buf, line_delim);
1601 }
1602 } else if has_prefix {
1603 unsafe {
1604 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1605 buf_push(buf, line_delim);
1606 }
1607 } else if has_suffix {
1608 unsafe {
1609 buf_extend(
1610 buf,
1611 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1612 );
1613 buf_push(buf, line_delim);
1614 }
1615 } else {
1616 unsafe { buf_push(buf, line_delim) };
1617 }
1618}
1619
1620fn process_fields_prefix(
1624 data: &[u8],
1625 delim: u8,
1626 line_delim: u8,
1627 last_field: usize,
1628 suppress: bool,
1629 out: &mut impl Write,
1630) -> io::Result<()> {
1631 if data.len() >= PARALLEL_THRESHOLD {
1632 let chunks = split_for_scope(data, line_delim);
1633 let n = chunks.len();
1634 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1635 rayon::scope(|s| {
1636 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1637 s.spawn(move |_| {
1638 result.reserve(chunk.len());
1639 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1640 });
1641 }
1642 });
1643 let slices: Vec<IoSlice> = results
1644 .iter()
1645 .filter(|r| !r.is_empty())
1646 .map(|r| IoSlice::new(r))
1647 .collect();
1648 write_ioslices(out, &slices)?;
1649 } else if !suppress {
1650 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1654 } else {
1655 let mut buf = Vec::with_capacity(data.len());
1656 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1657 if !buf.is_empty() {
1658 out.write_all(&buf)?;
1659 }
1660 }
1661 Ok(())
1662}
1663
1664#[inline]
1670fn fields_prefix_zerocopy(
1671 data: &[u8],
1672 delim: u8,
1673 line_delim: u8,
1674 last_field: usize,
1675 out: &mut impl Write,
1676) -> io::Result<()> {
1677 let newline_buf: [u8; 1] = [line_delim];
1678 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1679 let mut start = 0;
1680 let mut run_start: usize = 0;
1681
1682 for end_pos in memchr_iter(line_delim, data) {
1683 let line = &data[start..end_pos];
1684 let mut field_count = 1;
1685 let mut truncate_at: Option<usize> = None;
1686 for dpos in memchr_iter(delim, line) {
1687 if field_count >= last_field {
1688 truncate_at = Some(start + dpos);
1689 break;
1690 }
1691 field_count += 1;
1692 }
1693
1694 if let Some(trunc_pos) = truncate_at {
1695 if run_start < start {
1696 iov.push(IoSlice::new(&data[run_start..start]));
1697 }
1698 iov.push(IoSlice::new(&data[start..trunc_pos]));
1699 iov.push(IoSlice::new(&newline_buf));
1700 run_start = end_pos + 1;
1701
1702 if iov.len() >= MAX_IOV - 2 {
1703 write_ioslices(out, &iov)?;
1704 iov.clear();
1705 }
1706 }
1707 start = end_pos + 1;
1708 }
1709 if start < data.len() {
1711 let line = &data[start..];
1712 let mut field_count = 1;
1713 let mut truncate_at: Option<usize> = None;
1714 for dpos in memchr_iter(delim, line) {
1715 if field_count >= last_field {
1716 truncate_at = Some(start + dpos);
1717 break;
1718 }
1719 field_count += 1;
1720 }
1721 if let Some(trunc_pos) = truncate_at {
1722 if run_start < start {
1723 iov.push(IoSlice::new(&data[run_start..start]));
1724 }
1725 iov.push(IoSlice::new(&data[start..trunc_pos]));
1726 iov.push(IoSlice::new(&newline_buf));
1727 if !iov.is_empty() {
1728 write_ioslices(out, &iov)?;
1729 }
1730 return Ok(());
1731 }
1732 }
1733 if run_start < data.len() {
1735 iov.push(IoSlice::new(&data[run_start..]));
1736 if !data.is_empty() && *data.last().unwrap() != line_delim {
1737 iov.push(IoSlice::new(&newline_buf));
1738 }
1739 }
1740 if !iov.is_empty() {
1741 write_ioslices(out, &iov)?;
1742 }
1743 Ok(())
1744}
1745
1746fn fields_prefix_chunk(
1748 data: &[u8],
1749 delim: u8,
1750 line_delim: u8,
1751 last_field: usize,
1752 suppress: bool,
1753 buf: &mut Vec<u8>,
1754) {
1755 buf.reserve(data.len());
1756 let mut start = 0;
1757 for end_pos in memchr_iter(line_delim, data) {
1758 let line = &data[start..end_pos];
1759 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1760 start = end_pos + 1;
1761 }
1762 if start < data.len() {
1763 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1764 }
1765}
1766
1767#[inline(always)]
1770fn fields_prefix_line(
1771 line: &[u8],
1772 delim: u8,
1773 line_delim: u8,
1774 last_field: usize,
1775 suppress: bool,
1776 buf: &mut Vec<u8>,
1777) {
1778 let len = line.len();
1779 if len == 0 {
1780 if !suppress {
1781 unsafe { buf_push(buf, line_delim) };
1782 }
1783 return;
1784 }
1785
1786 let base = line.as_ptr();
1788
1789 let mut field_count = 1usize;
1790 let mut has_delim = false;
1791
1792 for pos in memchr_iter(delim, line) {
1793 has_delim = true;
1794 if field_count >= last_field {
1795 unsafe {
1796 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1797 buf_push(buf, line_delim);
1798 }
1799 return;
1800 }
1801 field_count += 1;
1802 }
1803
1804 if !has_delim {
1805 if !suppress {
1806 unsafe {
1807 buf_extend(buf, line);
1808 buf_push(buf, line_delim);
1809 }
1810 }
1811 return;
1812 }
1813
1814 unsafe {
1815 buf_extend(buf, line);
1816 buf_push(buf, line_delim);
1817 }
1818}
1819
1820fn process_fields_suffix(
1822 data: &[u8],
1823 delim: u8,
1824 line_delim: u8,
1825 start_field: usize,
1826 suppress: bool,
1827 out: &mut impl Write,
1828) -> io::Result<()> {
1829 if data.len() >= PARALLEL_THRESHOLD {
1830 let chunks = split_for_scope(data, line_delim);
1831 let n = chunks.len();
1832 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1833 rayon::scope(|s| {
1834 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1835 s.spawn(move |_| {
1836 result.reserve(chunk.len());
1837 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1838 });
1839 }
1840 });
1841 let slices: Vec<IoSlice> = results
1842 .iter()
1843 .filter(|r| !r.is_empty())
1844 .map(|r| IoSlice::new(r))
1845 .collect();
1846 write_ioslices(out, &slices)?;
1847 } else {
1848 let mut buf = Vec::with_capacity(data.len());
1849 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1850 if !buf.is_empty() {
1851 out.write_all(&buf)?;
1852 }
1853 }
1854 Ok(())
1855}
1856
1857fn fields_suffix_chunk(
1859 data: &[u8],
1860 delim: u8,
1861 line_delim: u8,
1862 start_field: usize,
1863 suppress: bool,
1864 buf: &mut Vec<u8>,
1865) {
1866 buf.reserve(data.len());
1867 let mut start = 0;
1868 for end_pos in memchr_iter(line_delim, data) {
1869 let line = &data[start..end_pos];
1870 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1871 start = end_pos + 1;
1872 }
1873 if start < data.len() {
1874 fields_suffix_line(
1875 &data[start..],
1876 delim,
1877 line_delim,
1878 start_field,
1879 suppress,
1880 buf,
1881 );
1882 }
1883}
1884
1885#[inline(always)]
1888fn fields_suffix_line(
1889 line: &[u8],
1890 delim: u8,
1891 line_delim: u8,
1892 start_field: usize,
1893 suppress: bool,
1894 buf: &mut Vec<u8>,
1895) {
1896 let len = line.len();
1897 if len == 0 {
1898 if !suppress {
1899 unsafe { buf_push(buf, line_delim) };
1900 }
1901 return;
1902 }
1903
1904 let base = line.as_ptr();
1906
1907 let skip_delims = start_field - 1;
1908 let mut delim_count = 0usize;
1909 let mut has_delim = false;
1910
1911 for pos in memchr_iter(delim, line) {
1912 has_delim = true;
1913 delim_count += 1;
1914 if delim_count >= skip_delims {
1915 unsafe {
1916 buf_extend(
1917 buf,
1918 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1919 );
1920 buf_push(buf, line_delim);
1921 }
1922 return;
1923 }
1924 }
1925
1926 if !has_delim {
1927 if !suppress {
1928 unsafe {
1929 buf_extend(buf, line);
1930 buf_push(buf, line_delim);
1931 }
1932 }
1933 return;
1934 }
1935
1936 unsafe { buf_push(buf, line_delim) };
1938}
1939
1940fn process_fields_mid_range(
1943 data: &[u8],
1944 delim: u8,
1945 line_delim: u8,
1946 start_field: usize,
1947 end_field: usize,
1948 suppress: bool,
1949 out: &mut impl Write,
1950) -> io::Result<()> {
1951 if data.len() >= PARALLEL_THRESHOLD {
1952 let chunks = split_for_scope(data, line_delim);
1953 let n = chunks.len();
1954 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1955 rayon::scope(|s| {
1956 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1957 s.spawn(move |_| {
1958 result.reserve(chunk.len());
1959 fields_mid_range_chunk(
1960 chunk,
1961 delim,
1962 line_delim,
1963 start_field,
1964 end_field,
1965 suppress,
1966 result,
1967 );
1968 });
1969 }
1970 });
1971 let slices: Vec<IoSlice> = results
1972 .iter()
1973 .filter(|r| !r.is_empty())
1974 .map(|r| IoSlice::new(r))
1975 .collect();
1976 write_ioslices(out, &slices)?;
1977 } else {
1978 let mut buf = Vec::with_capacity(data.len());
1979 fields_mid_range_chunk(
1980 data,
1981 delim,
1982 line_delim,
1983 start_field,
1984 end_field,
1985 suppress,
1986 &mut buf,
1987 );
1988 if !buf.is_empty() {
1989 out.write_all(&buf)?;
1990 }
1991 }
1992 Ok(())
1993}
1994
1995fn fields_mid_range_chunk(
1999 data: &[u8],
2000 delim: u8,
2001 line_delim: u8,
2002 start_field: usize,
2003 end_field: usize,
2004 suppress: bool,
2005 buf: &mut Vec<u8>,
2006) {
2007 if delim == line_delim {
2009 buf.reserve(data.len());
2010 let mut start = 0;
2011 for end_pos in memchr_iter(line_delim, data) {
2012 let line = &data[start..end_pos];
2013 fields_mid_range_line(
2014 line,
2015 delim,
2016 line_delim,
2017 start_field,
2018 end_field,
2019 suppress,
2020 buf,
2021 );
2022 start = end_pos + 1;
2023 }
2024 if start < data.len() {
2025 fields_mid_range_line(
2026 &data[start..],
2027 delim,
2028 line_delim,
2029 start_field,
2030 end_field,
2031 suppress,
2032 buf,
2033 );
2034 }
2035 return;
2036 }
2037
2038 buf.reserve(data.len());
2039 let base = data.as_ptr();
2040 let skip_before = start_field - 1; let target_end_delim = skip_before + (end_field - start_field) + 1;
2042
2043 let mut line_start: usize = 0;
2044 let mut delim_count: usize = 0;
2045 let mut range_start: usize = 0;
2046 let mut has_delim = false;
2047 let mut found_end = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
2050 let byte = unsafe { *base.add(pos) };
2051 if byte == line_delim {
2052 if found_end {
2054 } else if !has_delim {
2056 if !suppress {
2058 unsafe {
2059 buf_extend(
2060 buf,
2061 std::slice::from_raw_parts(base.add(line_start), pos + 1 - line_start),
2062 );
2063 }
2064 }
2065 } else if delim_count >= skip_before {
2066 if skip_before == 0 {
2068 range_start = line_start;
2069 }
2070 unsafe {
2071 buf_extend(
2072 buf,
2073 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2074 );
2075 buf_push(buf, line_delim);
2076 }
2077 } else {
2078 unsafe { buf_push(buf, line_delim) };
2080 }
2081 line_start = pos + 1;
2082 delim_count = 0;
2083 has_delim = false;
2084 found_end = false;
2085 } else if !found_end {
2086 has_delim = true;
2088 delim_count += 1;
2089 if delim_count == skip_before {
2090 range_start = pos + 1;
2091 }
2092 if delim_count == target_end_delim {
2093 if skip_before == 0 {
2094 range_start = line_start;
2095 }
2096 unsafe {
2097 buf_extend(
2098 buf,
2099 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2100 );
2101 buf_push(buf, line_delim);
2102 }
2103 found_end = true;
2104 }
2105 }
2106 }
2107 if line_start < data.len() && !found_end {
2109 if !has_delim {
2110 if !suppress {
2111 unsafe {
2112 buf_extend(
2113 buf,
2114 std::slice::from_raw_parts(base.add(line_start), data.len() - line_start),
2115 );
2116 }
2117 }
2118 } else if delim_count >= skip_before {
2119 if skip_before == 0 {
2120 range_start = line_start;
2121 }
2122 unsafe {
2123 buf_extend(
2124 buf,
2125 std::slice::from_raw_parts(base.add(range_start), data.len() - range_start),
2126 );
2127 }
2128 }
2129 }
2130}
2131
2132#[inline(always)]
2136fn fields_mid_range_line(
2137 line: &[u8],
2138 delim: u8,
2139 line_delim: u8,
2140 start_field: usize,
2141 end_field: usize,
2142 suppress: bool,
2143 buf: &mut Vec<u8>,
2144) {
2145 let len = line.len();
2146 if len == 0 {
2147 if !suppress {
2148 unsafe { buf_push(buf, line_delim) };
2149 }
2150 return;
2151 }
2152
2153 let base = line.as_ptr();
2155
2156 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
2160 let mut delim_count = 0;
2161 let mut range_start = 0;
2162 let mut has_delim = false;
2163
2164 for pos in memchr_iter(delim, line) {
2165 has_delim = true;
2166 delim_count += 1;
2167 if delim_count == skip_before {
2168 range_start = pos + 1;
2169 }
2170 if delim_count == target_end_delim {
2171 if skip_before == 0 {
2172 range_start = 0;
2173 }
2174 unsafe {
2175 buf_extend(
2176 buf,
2177 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2178 );
2179 buf_push(buf, line_delim);
2180 }
2181 return;
2182 }
2183 }
2184
2185 if !has_delim {
2186 if !suppress {
2187 unsafe {
2188 buf_extend(buf, line);
2189 buf_push(buf, line_delim);
2190 }
2191 }
2192 return;
2193 }
2194
2195 if delim_count >= skip_before {
2197 if skip_before == 0 {
2199 range_start = 0;
2200 }
2201 unsafe {
2202 buf_extend(
2203 buf,
2204 std::slice::from_raw_parts(base.add(range_start), len - range_start),
2205 );
2206 buf_push(buf, line_delim);
2207 }
2208 } else {
2209 unsafe { buf_push(buf, line_delim) };
2211 }
2212}
2213
2214fn single_field1_parallel(
2225 data: &[u8],
2226 delim: u8,
2227 line_delim: u8,
2228 out: &mut impl Write,
2229) -> io::Result<()> {
2230 let chunks = split_for_scope(data, line_delim);
2231 let n = chunks.len();
2232 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2233 rayon::scope(|s| {
2234 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2235 s.spawn(move |_| {
2236 result.reserve(chunk.len() + 1);
2237 single_field1_to_buf(chunk, delim, line_delim, result);
2238 });
2239 }
2240 });
2241 let slices: Vec<IoSlice> = results
2242 .iter()
2243 .filter(|r| !r.is_empty())
2244 .map(|r| IoSlice::new(r))
2245 .collect();
2246 write_ioslices(out, &slices)
2247}
2248
2249#[inline]
2260fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
2261 debug_assert_ne!(delim, line_delim, "delim and line_delim must differ");
2262 buf.reserve(data.len() + 1);
2265
2266 let base = data.as_ptr();
2269 let initial_len = buf.len();
2270 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
2271 let mut line_start: usize = 0;
2272 let mut found_delim = false;
2273 let mut delim_pos: usize = 0; for pos in memchr::memchr2_iter(delim, line_delim, data) {
2282 let byte = unsafe { *base.add(pos) };
2283 if byte == line_delim {
2284 if !found_delim {
2285 let len = pos + 1 - line_start;
2287 unsafe {
2288 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, len);
2289 out_ptr = out_ptr.add(len);
2290 }
2291 } else {
2292 let field_len = delim_pos - line_start;
2296 unsafe {
2297 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, field_len);
2298 out_ptr = out_ptr.add(field_len);
2299 *out_ptr = line_delim;
2300 out_ptr = out_ptr.add(1);
2301 }
2302 }
2303 line_start = pos + 1;
2304 found_delim = false;
2305 } else if !found_delim {
2306 found_delim = true;
2308 delim_pos = pos;
2309 }
2310 }
2312
2313 if line_start < data.len() {
2315 if !found_delim {
2316 let len = data.len() - line_start;
2318 unsafe {
2319 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, len);
2320 out_ptr = out_ptr.add(len);
2321 *out_ptr = line_delim;
2322 out_ptr = out_ptr.add(1);
2323 }
2324 } else {
2325 let field_len = delim_pos - line_start;
2327 unsafe {
2328 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, field_len);
2329 out_ptr = out_ptr.add(field_len);
2330 *out_ptr = line_delim;
2331 out_ptr = out_ptr.add(1);
2332 }
2333 }
2334 }
2335
2336 unsafe {
2342 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2343 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2344 buf.set_len(new_len);
2345 }
2346}
2347
2348#[inline]
2357#[allow(dead_code)]
2358fn single_field1_zerocopy(
2359 data: &[u8],
2360 delim: u8,
2361 line_delim: u8,
2362 out: &mut impl Write,
2363) -> io::Result<()> {
2364 let newline_buf: [u8; 1] = [line_delim];
2365
2366 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2367 let mut run_start: usize = 0;
2368 let mut start = 0;
2369
2370 for end_pos in memchr_iter(line_delim, data) {
2371 let line = &data[start..end_pos];
2372 if let Some(dp) = memchr::memchr(delim, line) {
2373 if run_start < start {
2376 iov.push(IoSlice::new(&data[run_start..start]));
2377 }
2378 iov.push(IoSlice::new(&data[start..start + dp]));
2379 iov.push(IoSlice::new(&newline_buf));
2380 run_start = end_pos + 1;
2381
2382 if iov.len() >= MAX_IOV - 2 {
2383 write_ioslices(out, &iov)?;
2384 iov.clear();
2385 }
2386 }
2387 start = end_pos + 1;
2389 }
2390
2391 if start < data.len() {
2393 let line = &data[start..];
2394 if let Some(dp) = memchr::memchr(delim, line) {
2395 if run_start < start {
2396 iov.push(IoSlice::new(&data[run_start..start]));
2397 }
2398 iov.push(IoSlice::new(&data[start..start + dp]));
2399 iov.push(IoSlice::new(&newline_buf));
2400 if !iov.is_empty() {
2401 write_ioslices(out, &iov)?;
2402 }
2403 return Ok(());
2404 }
2405 }
2406
2407 if run_start < data.len() {
2409 iov.push(IoSlice::new(&data[run_start..]));
2410 if !data.is_empty() && *data.last().unwrap() != line_delim {
2411 iov.push(IoSlice::new(&newline_buf));
2412 }
2413 }
2414 if !iov.is_empty() {
2415 write_ioslices(out, &iov)?;
2416 }
2417 Ok(())
2418}
2419
2420fn process_single_field_chunk(
2422 data: &[u8],
2423 delim: u8,
2424 target_idx: usize,
2425 line_delim: u8,
2426 suppress: bool,
2427 buf: &mut Vec<u8>,
2428) {
2429 buf.reserve(data.len());
2431 let mut start = 0;
2432 for end_pos in memchr_iter(line_delim, data) {
2433 let line = &data[start..end_pos];
2434 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2435 start = end_pos + 1;
2436 }
2437 if start < data.len() {
2438 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2439 }
2440}
2441
2442#[inline(always)]
2447fn extract_single_field_line(
2448 line: &[u8],
2449 delim: u8,
2450 target_idx: usize,
2451 line_delim: u8,
2452 suppress: bool,
2453 buf: &mut Vec<u8>,
2454) {
2455 let len = line.len();
2456 if len == 0 {
2457 if !suppress {
2458 unsafe { buf_push(buf, line_delim) };
2459 }
2460 return;
2461 }
2462
2463 let base = line.as_ptr();
2465
2466 if target_idx == 0 {
2468 match memchr::memchr(delim, line) {
2469 Some(pos) => unsafe {
2470 buf_extend_byte(buf, std::slice::from_raw_parts(base, pos), line_delim);
2471 },
2472 None => {
2473 if !suppress {
2474 unsafe {
2475 buf_extend_byte(buf, line, line_delim);
2476 }
2477 }
2478 }
2479 }
2480 return;
2481 }
2482
2483 let mut field_start = 0;
2485 let mut field_idx = 0;
2486 let mut has_delim = false;
2487
2488 for pos in memchr_iter(delim, line) {
2489 has_delim = true;
2490 if field_idx == target_idx {
2491 unsafe {
2492 buf_extend_byte(
2493 buf,
2494 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2495 line_delim,
2496 );
2497 }
2498 return;
2499 }
2500 field_idx += 1;
2501 field_start = pos + 1;
2502 }
2503
2504 if !has_delim {
2505 if !suppress {
2506 unsafe {
2507 buf_extend_byte(buf, line, line_delim);
2508 }
2509 }
2510 return;
2511 }
2512
2513 if field_idx == target_idx {
2514 unsafe {
2515 buf_extend_byte(
2516 buf,
2517 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2518 line_delim,
2519 );
2520 }
2521 } else {
2522 unsafe { buf_push(buf, line_delim) };
2523 }
2524}
2525
2526#[inline(always)]
2530fn extract_fields_to_buf(
2531 line: &[u8],
2532 delim: u8,
2533 ranges: &[Range],
2534 output_delim: &[u8],
2535 suppress: bool,
2536 max_field: usize,
2537 field_mask: u64,
2538 line_delim: u8,
2539 buf: &mut Vec<u8>,
2540 complement: bool,
2541) {
2542 let len = line.len();
2543
2544 if len == 0 {
2545 if !suppress {
2546 buf.push(line_delim);
2547 }
2548 return;
2549 }
2550
2551 let needed = len + output_delim.len() * 16 + 1;
2554 if buf.capacity() - buf.len() < needed {
2555 buf.reserve(needed);
2556 }
2557
2558 let base = line.as_ptr();
2559 let mut field_num: usize = 1;
2560 let mut field_start: usize = 0;
2561 let mut first_output = true;
2562 let mut has_delim = false;
2563
2564 for delim_pos in memchr_iter(delim, line) {
2566 has_delim = true;
2567
2568 if is_selected(field_num, field_mask, ranges, complement) {
2569 if !first_output {
2570 unsafe { buf_extend(buf, output_delim) };
2571 }
2572 unsafe {
2573 buf_extend(
2574 buf,
2575 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2576 )
2577 };
2578 first_output = false;
2579 }
2580
2581 field_num += 1;
2582 field_start = delim_pos + 1;
2583
2584 if field_num > max_field {
2585 break;
2586 }
2587 }
2588
2589 if (field_num <= max_field || complement)
2591 && has_delim
2592 && is_selected(field_num, field_mask, ranges, complement)
2593 {
2594 if !first_output {
2595 unsafe { buf_extend(buf, output_delim) };
2596 }
2597 unsafe {
2598 buf_extend(
2599 buf,
2600 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2601 )
2602 };
2603 first_output = false;
2604 }
2605
2606 if !first_output {
2607 unsafe { buf_push(buf, line_delim) };
2608 } else if !has_delim {
2609 if !suppress {
2610 unsafe {
2611 buf_extend(buf, line);
2612 buf_push(buf, line_delim);
2613 }
2614 }
2615 } else {
2616 unsafe { buf_push(buf, line_delim) };
2617 }
2618}
2619
2620fn process_bytes_from_start(
2627 data: &[u8],
2628 max_bytes: usize,
2629 line_delim: u8,
2630 out: &mut impl Write,
2631) -> io::Result<()> {
2632 if data.len() < PARALLEL_THRESHOLD && max_bytes > 0 && max_bytes < usize::MAX {
2640 let mut start = 0;
2641 let mut all_fit = true;
2642 for pos in memchr_iter(line_delim, data) {
2643 if pos - start > max_bytes {
2644 all_fit = false;
2645 break;
2646 }
2647 start = pos + 1;
2648 }
2649 if all_fit && start < data.len() && data.len() - start > max_bytes {
2651 all_fit = false;
2652 }
2653 if all_fit {
2654 if !data.is_empty() && data[data.len() - 1] == line_delim {
2656 return out.write_all(data);
2657 } else if !data.is_empty() {
2658 out.write_all(data)?;
2659 return out.write_all(&[line_delim]);
2660 }
2661 return Ok(());
2662 }
2663 }
2664
2665 if data.len() >= PARALLEL_THRESHOLD {
2666 let chunks = split_for_scope(data, line_delim);
2667 let n = chunks.len();
2668 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2669 rayon::scope(|s| {
2670 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2671 s.spawn(move |_| {
2672 result.reserve(chunk.len());
2675 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2676 });
2677 }
2678 });
2679 let slices: Vec<IoSlice> = results
2681 .iter()
2682 .filter(|r| !r.is_empty())
2683 .map(|r| IoSlice::new(r))
2684 .collect();
2685 write_ioslices(out, &slices)?;
2686 } else {
2687 if max_bytes <= 512 {
2693 let est_out = (data.len() / 4).max(max_bytes + 2);
2696 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2697 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2698 if !buf.is_empty() {
2699 out.write_all(&buf)?;
2700 }
2701 } else {
2702 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2706 }
2707 }
2708 Ok(())
2709}
2710
2711#[inline]
2716fn bytes_from_start_zerocopy(
2717 data: &[u8],
2718 max_bytes: usize,
2719 line_delim: u8,
2720 out: &mut impl Write,
2721) -> io::Result<()> {
2722 let newline_buf: [u8; 1] = [line_delim];
2723 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2724 let mut start = 0;
2725 let mut run_start: usize = 0;
2726
2727 for pos in memchr_iter(line_delim, data) {
2728 let line_len = pos - start;
2729 if line_len > max_bytes {
2730 if run_start < start {
2732 iov.push(IoSlice::new(&data[run_start..start]));
2733 }
2734 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2735 iov.push(IoSlice::new(&newline_buf));
2736 run_start = pos + 1;
2737
2738 if iov.len() >= MAX_IOV - 2 {
2739 write_ioslices(out, &iov)?;
2740 iov.clear();
2741 }
2742 }
2743 start = pos + 1;
2744 }
2745 if start < data.len() {
2747 let line_len = data.len() - start;
2748 if line_len > max_bytes {
2749 if run_start < start {
2750 iov.push(IoSlice::new(&data[run_start..start]));
2751 }
2752 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2753 iov.push(IoSlice::new(&newline_buf));
2754 if !iov.is_empty() {
2755 write_ioslices(out, &iov)?;
2756 }
2757 return Ok(());
2758 }
2759 }
2760 if run_start < data.len() {
2762 iov.push(IoSlice::new(&data[run_start..]));
2763 if !data.is_empty() && *data.last().unwrap() != line_delim {
2764 iov.push(IoSlice::new(&newline_buf));
2765 }
2766 }
2767 if !iov.is_empty() {
2768 write_ioslices(out, &iov)?;
2769 }
2770 Ok(())
2771}
2772
2773#[inline]
2778fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2779 buf.reserve(data.len());
2782
2783 let src = data.as_ptr();
2784 let dst_base = buf.as_mut_ptr();
2785 let mut wp = buf.len();
2786 let mut start = 0;
2787
2788 for pos in memchr_iter(line_delim, data) {
2789 let line_len = pos - start;
2790 let take = line_len.min(max_bytes);
2791 unsafe {
2792 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2793 *dst_base.add(wp + take) = line_delim;
2794 }
2795 wp += take + 1;
2796 start = pos + 1;
2797 }
2798 if start < data.len() {
2800 let line_len = data.len() - start;
2801 let take = line_len.min(max_bytes);
2802 unsafe {
2803 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2804 *dst_base.add(wp + take) = line_delim;
2805 }
2806 wp += take + 1;
2807 }
2808 unsafe { buf.set_len(wp) };
2809}
2810
2811fn process_bytes_from_offset(
2813 data: &[u8],
2814 skip_bytes: usize,
2815 line_delim: u8,
2816 out: &mut impl Write,
2817) -> io::Result<()> {
2818 if data.len() >= PARALLEL_THRESHOLD {
2819 let chunks = split_for_scope(data, line_delim);
2820 let n = chunks.len();
2821 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2822 rayon::scope(|s| {
2823 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2824 s.spawn(move |_| {
2825 result.reserve(chunk.len());
2826 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2827 });
2828 }
2829 });
2830 let slices: Vec<IoSlice> = results
2832 .iter()
2833 .filter(|r| !r.is_empty())
2834 .map(|r| IoSlice::new(r))
2835 .collect();
2836 write_ioslices(out, &slices)?;
2837 } else {
2838 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2840 }
2841 Ok(())
2842}
2843
2844#[inline]
2848fn bytes_from_offset_zerocopy(
2849 data: &[u8],
2850 skip_bytes: usize,
2851 line_delim: u8,
2852 out: &mut impl Write,
2853) -> io::Result<()> {
2854 let delim_buf = [line_delim];
2855 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2856
2857 let mut start = 0;
2858 for pos in memchr_iter(line_delim, data) {
2859 let line_len = pos - start;
2860 if line_len > skip_bytes {
2861 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2862 }
2863 iov.push(IoSlice::new(&delim_buf));
2864 if iov.len() >= MAX_IOV - 1 {
2866 write_ioslices(out, &iov)?;
2867 iov.clear();
2868 }
2869 start = pos + 1;
2870 }
2871 if start < data.len() {
2872 let line_len = data.len() - start;
2873 if line_len > skip_bytes {
2874 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2875 }
2876 iov.push(IoSlice::new(&delim_buf));
2877 }
2878 if !iov.is_empty() {
2879 write_ioslices(out, &iov)?;
2880 }
2881 Ok(())
2882}
2883
2884#[inline]
2887fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2888 buf.reserve(data.len());
2889
2890 let src = data.as_ptr();
2891 let dst_base = buf.as_mut_ptr();
2892 let mut wp = buf.len();
2893 let mut start = 0;
2894
2895 for pos in memchr_iter(line_delim, data) {
2896 let line_len = pos - start;
2897 if line_len > skip_bytes {
2898 let take = line_len - skip_bytes;
2899 unsafe {
2900 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2901 }
2902 wp += take;
2903 }
2904 unsafe {
2905 *dst_base.add(wp) = line_delim;
2906 }
2907 wp += 1;
2908 start = pos + 1;
2909 }
2910 if start < data.len() {
2911 let line_len = data.len() - start;
2912 if line_len > skip_bytes {
2913 let take = line_len - skip_bytes;
2914 unsafe {
2915 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2916 }
2917 wp += take;
2918 }
2919 unsafe {
2920 *dst_base.add(wp) = line_delim;
2921 }
2922 wp += 1;
2923 }
2924 unsafe { buf.set_len(wp) };
2925}
2926
2927fn process_bytes_mid_range(
2929 data: &[u8],
2930 start_byte: usize,
2931 end_byte: usize,
2932 line_delim: u8,
2933 out: &mut impl Write,
2934) -> io::Result<()> {
2935 let skip = start_byte.saturating_sub(1);
2936
2937 if data.len() >= PARALLEL_THRESHOLD {
2938 let chunks = split_for_scope(data, line_delim);
2939 let n = chunks.len();
2940 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2941 rayon::scope(|s| {
2942 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2943 s.spawn(move |_| {
2944 result.reserve(chunk.len());
2945 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2946 });
2947 }
2948 });
2949 let slices: Vec<IoSlice> = results
2950 .iter()
2951 .filter(|r| !r.is_empty())
2952 .map(|r| IoSlice::new(r))
2953 .collect();
2954 write_ioslices(out, &slices)?;
2955 } else {
2956 let mut buf = Vec::with_capacity(data.len());
2957 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2958 if !buf.is_empty() {
2959 out.write_all(&buf)?;
2960 }
2961 }
2962 Ok(())
2963}
2964
2965#[inline]
2969fn bytes_mid_range_chunk(
2970 data: &[u8],
2971 skip: usize,
2972 end_byte: usize,
2973 line_delim: u8,
2974 buf: &mut Vec<u8>,
2975) {
2976 buf.reserve(data.len());
2977
2978 let src = data.as_ptr();
2979 let dst_base = buf.as_mut_ptr();
2980 let mut wp = buf.len();
2981 let mut start = 0;
2982
2983 for pos in memchr_iter(line_delim, data) {
2984 let line_len = pos - start;
2985 if line_len > skip {
2986 let take_end = line_len.min(end_byte);
2987 let take = take_end - skip;
2988 unsafe {
2989 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2990 }
2991 wp += take;
2992 }
2993 unsafe {
2994 *dst_base.add(wp) = line_delim;
2995 }
2996 wp += 1;
2997 start = pos + 1;
2998 }
2999 if start < data.len() {
3000 let line_len = data.len() - start;
3001 if line_len > skip {
3002 let take_end = line_len.min(end_byte);
3003 let take = take_end - skip;
3004 unsafe {
3005 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
3006 }
3007 wp += take;
3008 }
3009 unsafe {
3010 *dst_base.add(wp) = line_delim;
3011 }
3012 wp += 1;
3013 }
3014 unsafe { buf.set_len(wp) };
3015}
3016
3017fn process_bytes_complement_mid(
3019 data: &[u8],
3020 skip_start: usize,
3021 skip_end: usize,
3022 line_delim: u8,
3023 out: &mut impl Write,
3024) -> io::Result<()> {
3025 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
3027 let chunks = split_for_scope(data, line_delim);
3028 let n = chunks.len();
3029 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3030 rayon::scope(|s| {
3031 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3032 s.spawn(move |_| {
3033 result.reserve(chunk.len());
3034 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
3035 });
3036 }
3037 });
3038 let slices: Vec<IoSlice> = results
3039 .iter()
3040 .filter(|r| !r.is_empty())
3041 .map(|r| IoSlice::new(r))
3042 .collect();
3043 write_ioslices(out, &slices)?;
3044 } else {
3045 let mut buf = Vec::with_capacity(data.len());
3046 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
3047 if !buf.is_empty() {
3048 out.write_all(&buf)?;
3049 }
3050 }
3051 Ok(())
3052}
3053
3054#[inline]
3057fn bytes_complement_mid_chunk(
3058 data: &[u8],
3059 prefix_bytes: usize,
3060 skip_end: usize,
3061 line_delim: u8,
3062 buf: &mut Vec<u8>,
3063) {
3064 buf.reserve(data.len());
3065
3066 let src = data.as_ptr();
3067 let dst_base = buf.as_mut_ptr();
3068 let mut wp = buf.len();
3069 let mut start = 0;
3070
3071 for pos in memchr_iter(line_delim, data) {
3072 let line_len = pos - start;
3073 let take_prefix = prefix_bytes.min(line_len);
3075 if take_prefix > 0 {
3076 unsafe {
3077 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3078 }
3079 wp += take_prefix;
3080 }
3081 if line_len > skip_end {
3083 let suffix_len = line_len - skip_end;
3084 unsafe {
3085 std::ptr::copy_nonoverlapping(
3086 src.add(start + skip_end),
3087 dst_base.add(wp),
3088 suffix_len,
3089 );
3090 }
3091 wp += suffix_len;
3092 }
3093 unsafe {
3094 *dst_base.add(wp) = line_delim;
3095 }
3096 wp += 1;
3097 start = pos + 1;
3098 }
3099 if start < data.len() {
3100 let line_len = data.len() - start;
3101 let take_prefix = prefix_bytes.min(line_len);
3102 if take_prefix > 0 {
3103 unsafe {
3104 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3105 }
3106 wp += take_prefix;
3107 }
3108 if line_len > skip_end {
3109 let suffix_len = line_len - skip_end;
3110 unsafe {
3111 std::ptr::copy_nonoverlapping(
3112 src.add(start + skip_end),
3113 dst_base.add(wp),
3114 suffix_len,
3115 );
3116 }
3117 wp += suffix_len;
3118 }
3119 unsafe {
3120 *dst_base.add(wp) = line_delim;
3121 }
3122 wp += 1;
3123 }
3124 unsafe { buf.set_len(wp) };
3125}
3126
3127fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3129 let line_delim = cfg.line_delim;
3130 let ranges = cfg.ranges;
3131 let complement = cfg.complement;
3132 let output_delim = cfg.output_delim;
3133
3134 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
3136 let max_bytes = ranges[0].end;
3137 if max_bytes < usize::MAX {
3138 return process_bytes_from_start(data, max_bytes, line_delim, out);
3139 }
3140 }
3141
3142 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
3144 let skip_bytes = ranges[0].start.saturating_sub(1);
3145 if skip_bytes > 0 {
3146 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
3147 }
3148 }
3149
3150 if !complement
3152 && ranges.len() == 1
3153 && ranges[0].start > 1
3154 && ranges[0].end < usize::MAX
3155 && output_delim.is_empty()
3156 {
3157 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
3158 }
3159
3160 if complement
3162 && ranges.len() == 1
3163 && ranges[0].start == 1
3164 && ranges[0].end < usize::MAX
3165 && output_delim.is_empty()
3166 {
3167 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
3168 }
3169
3170 if complement
3172 && ranges.len() == 1
3173 && ranges[0].end == usize::MAX
3174 && ranges[0].start > 1
3175 && output_delim.is_empty()
3176 {
3177 let max_bytes = ranges[0].start - 1;
3178 return process_bytes_from_start(data, max_bytes, line_delim, out);
3179 }
3180
3181 if complement
3183 && ranges.len() == 1
3184 && ranges[0].start > 1
3185 && ranges[0].end < usize::MAX
3186 && output_delim.is_empty()
3187 {
3188 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
3189 }
3190
3191 if data.len() >= PARALLEL_THRESHOLD {
3192 let chunks = split_for_scope(data, line_delim);
3193 let n = chunks.len();
3194 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3195 rayon::scope(|s| {
3196 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3197 s.spawn(move |_| {
3198 result.reserve(chunk.len());
3199 process_bytes_chunk(
3200 chunk,
3201 ranges,
3202 complement,
3203 output_delim,
3204 line_delim,
3205 result,
3206 );
3207 });
3208 }
3209 });
3210 let slices: Vec<IoSlice> = results
3211 .iter()
3212 .filter(|r| !r.is_empty())
3213 .map(|r| IoSlice::new(r))
3214 .collect();
3215 write_ioslices(out, &slices)?;
3216 } else {
3217 let mut buf = Vec::with_capacity(data.len());
3218 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
3219 if !buf.is_empty() {
3220 out.write_all(&buf)?;
3221 }
3222 }
3223 Ok(())
3224}
3225
3226fn process_bytes_chunk(
3231 data: &[u8],
3232 ranges: &[Range],
3233 complement: bool,
3234 output_delim: &[u8],
3235 line_delim: u8,
3236 buf: &mut Vec<u8>,
3237) {
3238 buf.reserve(data.len());
3239 let base = data.as_ptr();
3240 let mut start = 0;
3241 for end_pos in memchr_iter(line_delim, data) {
3242 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
3243 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3244 unsafe { buf_push(buf, line_delim) };
3245 start = end_pos + 1;
3246 }
3247 if start < data.len() {
3248 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
3249 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3250 unsafe { buf_push(buf, line_delim) };
3251 }
3252}
3253
3254#[inline(always)]
3258fn cut_bytes_to_buf(
3259 line: &[u8],
3260 ranges: &[Range],
3261 complement: bool,
3262 output_delim: &[u8],
3263 buf: &mut Vec<u8>,
3264) {
3265 let len = line.len();
3266 let base = line.as_ptr();
3267 let mut first_range = true;
3268
3269 let needed = len + output_delim.len() * ranges.len() + 1;
3271 if buf.capacity() - buf.len() < needed {
3272 buf.reserve(needed);
3273 }
3274
3275 if complement {
3276 let mut pos: usize = 1;
3277 for r in ranges {
3278 let rs = r.start;
3279 let re = r.end.min(len);
3280 if pos < rs {
3281 if !first_range && !output_delim.is_empty() {
3282 unsafe { buf_extend(buf, output_delim) };
3283 }
3284 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
3285 first_range = false;
3286 }
3287 pos = re + 1;
3288 if pos > len {
3289 break;
3290 }
3291 }
3292 if pos <= len {
3293 if !first_range && !output_delim.is_empty() {
3294 unsafe { buf_extend(buf, output_delim) };
3295 }
3296 unsafe {
3297 buf_extend(
3298 buf,
3299 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3300 )
3301 };
3302 }
3303 } else if output_delim.is_empty() && ranges.len() == 1 {
3304 let start = ranges[0].start.saturating_sub(1);
3306 let end = ranges[0].end.min(len);
3307 if start < len {
3308 unsafe {
3309 buf_extend(
3310 buf,
3311 std::slice::from_raw_parts(base.add(start), end - start),
3312 )
3313 };
3314 }
3315 } else {
3316 for r in ranges {
3317 let start = r.start.saturating_sub(1);
3318 let end = r.end.min(len);
3319 if start >= len {
3320 break;
3321 }
3322 if !first_range && !output_delim.is_empty() {
3323 unsafe { buf_extend(buf, output_delim) };
3324 }
3325 unsafe {
3326 buf_extend(
3327 buf,
3328 std::slice::from_raw_parts(base.add(start), end - start),
3329 )
3330 };
3331 first_range = false;
3332 }
3333 }
3334}
3335
3336#[inline]
3340pub fn cut_fields(
3341 line: &[u8],
3342 delim: u8,
3343 ranges: &[Range],
3344 complement: bool,
3345 output_delim: &[u8],
3346 suppress_no_delim: bool,
3347 out: &mut impl Write,
3348) -> io::Result<bool> {
3349 if memchr::memchr(delim, line).is_none() {
3350 if !suppress_no_delim {
3351 out.write_all(line)?;
3352 return Ok(true);
3353 }
3354 return Ok(false);
3355 }
3356
3357 let mut field_num: usize = 1;
3358 let mut field_start: usize = 0;
3359 let mut first_output = true;
3360
3361 for delim_pos in memchr_iter(delim, line) {
3362 let selected = in_ranges(ranges, field_num) != complement;
3363 if selected {
3364 if !first_output {
3365 out.write_all(output_delim)?;
3366 }
3367 out.write_all(&line[field_start..delim_pos])?;
3368 first_output = false;
3369 }
3370 field_start = delim_pos + 1;
3371 field_num += 1;
3372 }
3373
3374 let selected = in_ranges(ranges, field_num) != complement;
3375 if selected {
3376 if !first_output {
3377 out.write_all(output_delim)?;
3378 }
3379 out.write_all(&line[field_start..])?;
3380 }
3381
3382 Ok(true)
3383}
3384
3385#[inline]
3387pub fn cut_bytes(
3388 line: &[u8],
3389 ranges: &[Range],
3390 complement: bool,
3391 output_delim: &[u8],
3392 out: &mut impl Write,
3393) -> io::Result<bool> {
3394 let mut first_range = true;
3395
3396 if complement {
3397 let len = line.len();
3398 let mut comp_ranges = Vec::new();
3399 let mut pos: usize = 1;
3400 for r in ranges {
3401 let rs = r.start;
3402 let re = r.end.min(len);
3403 if pos < rs {
3404 comp_ranges.push((pos, rs - 1));
3405 }
3406 pos = re + 1;
3407 if pos > len {
3408 break;
3409 }
3410 }
3411 if pos <= len {
3412 comp_ranges.push((pos, len));
3413 }
3414 for &(s, e) in &comp_ranges {
3415 if !first_range && !output_delim.is_empty() {
3416 out.write_all(output_delim)?;
3417 }
3418 out.write_all(&line[s - 1..e])?;
3419 first_range = false;
3420 }
3421 } else {
3422 for r in ranges {
3423 let start = r.start.saturating_sub(1);
3424 let end = r.end.min(line.len());
3425 if start >= line.len() {
3426 break;
3427 }
3428 if !first_range && !output_delim.is_empty() {
3429 out.write_all(output_delim)?;
3430 }
3431 out.write_all(&line[start..end])?;
3432 first_range = false;
3433 }
3434 }
3435 Ok(true)
3436}
3437
3438pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3446 let len = data.len();
3447 let mut wp: usize = 0;
3448 let mut rp: usize = 0;
3449
3450 while rp < len {
3451 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3452 None => {
3453 if suppress {
3455 break;
3457 }
3458 let remaining = len - rp;
3459 if wp != rp {
3460 data.copy_within(rp..len, wp);
3461 }
3462 wp += remaining;
3463 break;
3464 }
3465 Some(offset) => {
3466 let actual = rp + offset;
3467 if data[actual] == line_delim {
3468 if suppress {
3470 rp = actual + 1;
3472 } else {
3473 let chunk_len = actual + 1 - rp;
3475 if wp != rp {
3476 data.copy_within(rp..actual + 1, wp);
3477 }
3478 wp += chunk_len;
3479 rp = actual + 1;
3480 }
3481 } else {
3482 let field_len = actual - rp;
3484 if wp != rp && field_len > 0 {
3485 data.copy_within(rp..actual, wp);
3486 }
3487 wp += field_len;
3488 data[wp] = line_delim;
3489 wp += 1;
3490 match memchr::memchr(line_delim, &data[actual + 1..]) {
3492 None => {
3493 rp = len;
3494 }
3495 Some(nl_off) => {
3496 rp = actual + 1 + nl_off + 1;
3497 }
3498 }
3499 }
3500 }
3501 }
3502 }
3503 wp
3504}
3505
3506pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3508 match cfg.mode {
3509 CutMode::Fields => process_fields_fast(data, cfg, out),
3510 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3511 }
3512}
3513
3514pub fn process_cut_reader<R: BufRead>(
3519 mut reader: R,
3520 cfg: &CutConfig,
3521 out: &mut impl Write,
3522) -> io::Result<()> {
3523 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3525
3526 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3529
3530 loop {
3531 buf.reserve(CHUNK_SIZE);
3533 let read_start = buf.len();
3534 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3535 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3536 buf.truncate(read_start + n);
3537
3538 if buf.is_empty() {
3539 break;
3540 }
3541
3542 if n == 0 {
3543 process_cut_data(&buf, cfg, out)?;
3545 break;
3546 }
3547
3548 let process_end = match memchr::memrchr(line_delim, &buf) {
3550 Some(pos) => pos + 1,
3551 None => {
3552 continue;
3554 }
3555 };
3556
3557 process_cut_data(&buf[..process_end], cfg, out)?;
3559
3560 let leftover_len = buf.len() - process_end;
3562 if leftover_len > 0 {
3563 buf.copy_within(process_end.., 0);
3564 }
3565 buf.truncate(leftover_len);
3566 }
3567
3568 Ok(())
3569}
3570
3571#[inline]
3573fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3574 let n = reader.read(buf)?;
3575 if n == buf.len() || n == 0 {
3576 return Ok(n);
3577 }
3578 let mut total = n;
3580 while total < buf.len() {
3581 match reader.read(&mut buf[total..]) {
3582 Ok(0) => break,
3583 Ok(n) => total += n,
3584 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3585 Err(e) => return Err(e),
3586 }
3587 }
3588 Ok(total)
3589}
3590
3591pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3599 if cfg.complement {
3600 return None;
3601 }
3602
3603 match cfg.mode {
3604 CutMode::Fields => {
3605 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3607 return None;
3608 }
3609 if cfg.delim == cfg.line_delim {
3610 return None;
3611 }
3612 Some(cut_fields_inplace_general(
3613 data,
3614 cfg.delim,
3615 cfg.line_delim,
3616 cfg.ranges,
3617 cfg.suppress_no_delim,
3618 ))
3619 }
3620 CutMode::Bytes | CutMode::Characters => {
3621 if !cfg.output_delim.is_empty() {
3622 return None;
3623 }
3624 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3625 }
3626 }
3627}
3628
3629fn cut_fields_inplace_general(
3632 data: &mut [u8],
3633 delim: u8,
3634 line_delim: u8,
3635 ranges: &[Range],
3636 suppress: bool,
3637) -> usize {
3638 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3640 return cut_field1_inplace(data, delim, line_delim, suppress);
3641 }
3642
3643 let len = data.len();
3644 if len == 0 {
3645 return 0;
3646 }
3647
3648 let max_field = ranges.last().map_or(0, |r| r.end);
3649 let max_delims = max_field.min(64);
3650 let mut wp: usize = 0;
3651 let mut rp: usize = 0;
3652
3653 while rp < len {
3654 let line_end = memchr::memchr(line_delim, &data[rp..])
3655 .map(|p| rp + p)
3656 .unwrap_or(len);
3657 let line_len = line_end - rp;
3658
3659 let mut delim_pos = [0usize; 64];
3661 let mut num_delims: usize = 0;
3662
3663 for pos in memchr_iter(delim, &data[rp..line_end]) {
3664 if num_delims < max_delims {
3665 delim_pos[num_delims] = pos;
3666 num_delims += 1;
3667 if num_delims >= max_delims {
3668 break;
3669 }
3670 }
3671 }
3672
3673 if num_delims == 0 {
3674 if !suppress {
3676 if wp != rp {
3677 data.copy_within(rp..line_end, wp);
3678 }
3679 wp += line_len;
3680 if line_end < len {
3681 data[wp] = line_delim;
3682 wp += 1;
3683 }
3684 }
3685 } else {
3686 let total_fields = num_delims + 1;
3687 let mut first_output = true;
3688
3689 for r in ranges {
3690 let range_start = r.start;
3691 let range_end = r.end.min(total_fields);
3692 if range_start > total_fields {
3693 break;
3694 }
3695 for field_num in range_start..=range_end {
3696 if field_num > total_fields {
3697 break;
3698 }
3699
3700 let field_start = if field_num == 1 {
3701 0
3702 } else if field_num - 2 < num_delims {
3703 delim_pos[field_num - 2] + 1
3704 } else {
3705 continue;
3706 };
3707 let field_end = if field_num <= num_delims {
3708 delim_pos[field_num - 1]
3709 } else {
3710 line_len
3711 };
3712
3713 if !first_output {
3714 data[wp] = delim;
3715 wp += 1;
3716 }
3717 let flen = field_end - field_start;
3718 if flen > 0 {
3719 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3720 wp += flen;
3721 }
3722 first_output = false;
3723 }
3724 }
3725
3726 if !first_output && line_end < len {
3727 data[wp] = line_delim;
3728 wp += 1;
3729 } else if first_output && line_end < len {
3730 data[wp] = line_delim;
3732 wp += 1;
3733 }
3734 }
3735
3736 rp = if line_end < len { line_end + 1 } else { len };
3737 }
3738
3739 wp
3740}
3741
3742fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3744 let len = data.len();
3745 if len == 0 {
3746 return 0;
3747 }
3748
3749 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3751 return len;
3752 }
3753
3754 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3756 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3757 }
3758
3759 let mut wp: usize = 0;
3760 let mut rp: usize = 0;
3761
3762 while rp < len {
3763 let line_end = memchr::memchr(line_delim, &data[rp..])
3764 .map(|p| rp + p)
3765 .unwrap_or(len);
3766 let line_len = line_end - rp;
3767
3768 for r in ranges {
3769 let start = r.start.saturating_sub(1);
3770 let end = r.end.min(line_len);
3771 if start >= line_len {
3772 break;
3773 }
3774 let flen = end - start;
3775 if flen > 0 {
3776 data.copy_within(rp + start..rp + start + flen, wp);
3777 wp += flen;
3778 }
3779 }
3780
3781 if line_end < len {
3782 data[wp] = line_delim;
3783 wp += 1;
3784 }
3785
3786 rp = if line_end < len { line_end + 1 } else { len };
3787 }
3788
3789 wp
3790}
3791
3792fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3794 let len = data.len();
3795
3796 let mut all_fit = true;
3798 let mut start = 0;
3799 for pos in memchr_iter(line_delim, data) {
3800 if pos - start > max_bytes {
3801 all_fit = false;
3802 break;
3803 }
3804 start = pos + 1;
3805 }
3806 if all_fit && start < len && len - start > max_bytes {
3807 all_fit = false;
3808 }
3809 if all_fit {
3810 return len;
3811 }
3812
3813 let mut wp: usize = 0;
3815 let mut rp: usize = 0;
3816
3817 while rp < len {
3818 let line_end = memchr::memchr(line_delim, &data[rp..])
3819 .map(|p| rp + p)
3820 .unwrap_or(len);
3821 let line_len = line_end - rp;
3822
3823 let take = line_len.min(max_bytes);
3824 if take > 0 && wp != rp {
3825 data.copy_within(rp..rp + take, wp);
3826 }
3827 wp += take;
3828
3829 if line_end < len {
3830 data[wp] = line_delim;
3831 wp += 1;
3832 }
3833
3834 rp = if line_end < len { line_end + 1 } else { len };
3835 }
3836
3837 wp
3838}
3839
3840#[derive(Debug, Clone, Copy, PartialEq)]
3842pub enum CutMode {
3843 Bytes,
3844 Characters,
3845 Fields,
3846}