1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 8 * 1024 * 1024;
8
9const MAX_IOV: usize = 1024;
11
12pub struct CutConfig<'a> {
14 pub mode: CutMode,
15 pub ranges: &'a [Range],
16 pub complement: bool,
17 pub delim: u8,
18 pub output_delim: &'a [u8],
19 pub suppress_no_delim: bool,
20 pub line_delim: u8,
21}
22
23#[derive(Debug, Clone)]
25pub struct Range {
26 pub start: usize, pub end: usize, }
29
30pub fn parse_ranges(spec: &str, no_merge_adjacent: bool) -> Result<Vec<Range>, String> {
37 let mut ranges = Vec::new();
38
39 for part in spec.split(',') {
40 let part = part.trim();
41 if part.is_empty() {
42 continue;
43 }
44
45 if let Some(idx) = part.find('-') {
46 let left = &part[..idx];
47 let right = &part[idx + 1..];
48
49 if left.is_empty() && right.is_empty() {
51 return Err("invalid range with no endpoint: -".to_string());
52 }
53
54 let start = if left.is_empty() {
55 1
56 } else {
57 left.parse::<usize>()
58 .map_err(|_| format!("invalid range: '{}'", part))?
59 };
60
61 let end = if right.is_empty() {
62 usize::MAX
63 } else {
64 right
65 .parse::<usize>()
66 .map_err(|_| format!("invalid range: '{}'", part))?
67 };
68
69 if start == 0 {
70 return Err("fields and positions are numbered from 1".to_string());
71 }
72 if start > end {
73 return Err(format!("invalid decreasing range: '{}'", part));
74 }
75
76 ranges.push(Range { start, end });
77 } else {
78 let n = part
79 .parse::<usize>()
80 .map_err(|_| format!("invalid field: '{}'", part))?;
81 if n == 0 {
82 return Err("fields and positions are numbered from 1".to_string());
83 }
84 ranges.push(Range { start: n, end: n });
85 }
86 }
87
88 if ranges.is_empty() {
89 return Err("you must specify a list of bytes, characters, or fields".to_string());
90 }
91
92 ranges.sort_by_key(|r| (r.start, r.end));
94 let mut merged = vec![ranges[0].clone()];
95 for r in &ranges[1..] {
96 let last = merged.last_mut().unwrap();
97 if no_merge_adjacent {
98 if r.start <= last.end {
100 last.end = last.end.max(r.end);
101 } else {
102 merged.push(r.clone());
103 }
104 } else {
105 if r.start <= last.end.saturating_add(1) {
107 last.end = last.end.max(r.end);
108 } else {
109 merged.push(r.clone());
110 }
111 }
112 }
113
114 Ok(merged)
115}
116
117#[inline(always)]
120fn in_ranges(ranges: &[Range], pos: usize) -> bool {
121 for r in ranges {
122 if pos < r.start {
123 return false;
124 }
125 if pos <= r.end {
126 return true;
127 }
128 }
129 false
130}
131
132#[inline]
135fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
136 let mut mask: u64 = 0;
137 for i in 1..=64u32 {
138 let in_range = in_ranges(ranges, i as usize);
139 if in_range != complement {
140 mask |= 1u64 << (i - 1);
141 }
142 }
143 mask
144}
145
146#[inline(always)]
148fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
149 if field_num <= 64 {
150 (mask >> (field_num - 1)) & 1 == 1
151 } else {
152 in_ranges(ranges, field_num) != complement
153 }
154}
155
156#[inline(always)]
161unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
162 unsafe {
163 let len = buf.len();
164 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
165 buf.set_len(len + data.len());
166 }
167}
168
169#[inline(always)]
172unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
173 unsafe {
174 let len = buf.len();
175 *buf.as_mut_ptr().add(len) = b;
176 buf.set_len(len + 1);
177 }
178}
179
180#[inline(always)]
185unsafe fn buf_extend_byte(buf: &mut Vec<u8>, data: &[u8], b: u8) {
186 unsafe {
187 let len = buf.len();
188 let ptr = buf.as_mut_ptr().add(len);
189 std::ptr::copy_nonoverlapping(data.as_ptr(), ptr, data.len());
190 *ptr.add(data.len()) = b;
191 buf.set_len(len + data.len() + 1);
192 }
193}
194
195#[inline]
199fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
200 if slices.is_empty() {
201 return Ok(());
202 }
203 for batch in slices.chunks(MAX_IOV) {
204 let total: usize = batch.iter().map(|s| s.len()).sum();
205 let written = out.write_vectored(batch)?;
206 if written >= total {
207 continue;
208 }
209 if written == 0 {
210 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
211 }
212 write_ioslices_slow(out, batch, written)?;
213 }
214 Ok(())
215}
216
217#[cold]
219#[inline(never)]
220fn write_ioslices_slow(
221 out: &mut impl Write,
222 slices: &[IoSlice],
223 mut skip: usize,
224) -> io::Result<()> {
225 for slice in slices {
226 let len = slice.len();
227 if skip >= len {
228 skip -= len;
229 continue;
230 }
231 out.write_all(&slice[skip..])?;
232 skip = 0;
233 }
234 Ok(())
235}
236
237#[inline]
243fn num_cpus() -> usize {
244 std::thread::available_parallelism()
245 .map(|n| n.get())
246 .unwrap_or(1)
247}
248
249fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
252 let num_threads = num_cpus().max(1);
253 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
254 return vec![data];
255 }
256
257 let chunk_size = data.len() / num_threads;
258 let mut chunks = Vec::with_capacity(num_threads);
259 let mut pos = 0;
260
261 for _ in 0..num_threads - 1 {
262 let target = pos + chunk_size;
263 if target >= data.len() {
264 break;
265 }
266 let boundary = memchr::memchr(line_delim, &data[target..])
267 .map(|p| target + p + 1)
268 .unwrap_or(data.len());
269 if boundary > pos {
270 chunks.push(&data[pos..boundary]);
271 }
272 pos = boundary;
273 }
274
275 if pos < data.len() {
276 chunks.push(&data[pos..]);
277 }
278
279 chunks
280}
281
282fn process_fields_multi_select(
289 data: &[u8],
290 delim: u8,
291 line_delim: u8,
292 ranges: &[Range],
293 suppress: bool,
294 out: &mut impl Write,
295) -> io::Result<()> {
296 let max_field = ranges.last().map_or(0, |r| r.end);
297
298 if data.len() >= PARALLEL_THRESHOLD {
299 let chunks = split_for_scope(data, line_delim);
300 let n = chunks.len();
301 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
302 rayon::scope(|s| {
303 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
304 s.spawn(move |_| {
305 result.reserve(chunk.len() * 3 / 4);
306 multi_select_chunk(
307 chunk, delim, line_delim, ranges, max_field, suppress, result,
308 );
309 });
310 }
311 });
312 let slices: Vec<IoSlice> = results
313 .iter()
314 .filter(|r| !r.is_empty())
315 .map(|r| IoSlice::new(r))
316 .collect();
317 write_ioslices(out, &slices)?;
318 } else {
319 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
320 multi_select_chunk(
321 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
322 );
323 if !buf.is_empty() {
324 out.write_all(&buf)?;
325 }
326 }
327 Ok(())
328}
329
330fn multi_select_chunk(
336 data: &[u8],
337 delim: u8,
338 line_delim: u8,
339 ranges: &[Range],
340 max_field: usize,
341 suppress: bool,
342 buf: &mut Vec<u8>,
343) {
344 if delim == line_delim {
346 buf.reserve(data.len());
347 let base = data.as_ptr();
348 let mut start = 0;
349 for end_pos in memchr_iter(line_delim, data) {
350 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
351 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
352 start = end_pos + 1;
353 }
354 if start < data.len() {
355 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
356 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
357 }
358 return;
359 }
360
361 buf.reserve(data.len());
362 let base = data.as_ptr();
363 let data_len = data.len();
364
365 let mut line_start: usize = 0;
367 let mut delim_pos = [0usize; 64];
368 let mut num_delims: usize = 0;
369 let max_delims = max_field.min(64);
370 let mut at_max = false;
371
372 for pos in memchr::memchr2_iter(delim, line_delim, data) {
374 let byte = unsafe { *base.add(pos) };
375
376 if byte == line_delim {
377 let line_len = pos - line_start;
379 if num_delims == 0 {
380 if !suppress {
382 unsafe {
383 buf_extend(
384 buf,
385 std::slice::from_raw_parts(base.add(line_start), line_len),
386 );
387 buf_push(buf, line_delim);
388 }
389 }
390 } else {
391 let total_fields = num_delims + 1;
393 let mut first_output = true;
394
395 for r in ranges {
396 let range_start = r.start;
397 let range_end = r.end.min(total_fields);
398 if range_start > total_fields {
399 break;
400 }
401 for field_num in range_start..=range_end {
402 if field_num > total_fields {
403 break;
404 }
405
406 let field_start = if field_num == 1 {
407 line_start
408 } else if field_num - 2 < num_delims {
409 delim_pos[field_num - 2] + 1
410 } else {
411 continue;
412 };
413 let field_end = if field_num <= num_delims {
414 delim_pos[field_num - 1]
415 } else {
416 pos
417 };
418
419 if !first_output {
420 unsafe { buf_push(buf, delim) };
421 }
422 unsafe {
423 buf_extend(
424 buf,
425 std::slice::from_raw_parts(
426 base.add(field_start),
427 field_end - field_start,
428 ),
429 );
430 }
431 first_output = false;
432 }
433 }
434
435 unsafe { buf_push(buf, line_delim) };
436 }
437
438 line_start = pos + 1;
440 num_delims = 0;
441 at_max = false;
442 } else {
443 if !at_max && num_delims < max_delims {
445 delim_pos[num_delims] = pos;
446 num_delims += 1;
447 if num_delims >= max_delims {
448 at_max = true;
449 }
450 }
451 }
452 }
453
454 if line_start < data_len {
456 if num_delims == 0 {
457 if !suppress {
458 unsafe {
459 buf_extend(
460 buf,
461 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
462 );
463 buf_push(buf, line_delim);
464 }
465 }
466 } else {
467 let total_fields = num_delims + 1;
468 let mut first_output = true;
469
470 for r in ranges {
471 let range_start = r.start;
472 let range_end = r.end.min(total_fields);
473 if range_start > total_fields {
474 break;
475 }
476 for field_num in range_start..=range_end {
477 if field_num > total_fields {
478 break;
479 }
480
481 let field_start = if field_num == 1 {
482 line_start
483 } else if field_num - 2 < num_delims {
484 delim_pos[field_num - 2] + 1
485 } else {
486 continue;
487 };
488 let field_end = if field_num <= num_delims {
489 delim_pos[field_num - 1]
490 } else {
491 data_len
492 };
493
494 if !first_output {
495 unsafe { buf_push(buf, delim) };
496 }
497 unsafe {
498 buf_extend(
499 buf,
500 std::slice::from_raw_parts(
501 base.add(field_start),
502 field_end - field_start,
503 ),
504 );
505 }
506 first_output = false;
507 }
508 }
509
510 unsafe { buf_push(buf, line_delim) };
511 }
512 }
513}
514
515#[inline(always)]
520fn multi_select_line(
521 line: &[u8],
522 delim: u8,
523 line_delim: u8,
524 ranges: &[Range],
525 max_field: usize,
526 suppress: bool,
527 buf: &mut Vec<u8>,
528) {
529 let len = line.len();
530 if len == 0 {
531 if !suppress {
532 unsafe { buf_push(buf, line_delim) };
533 }
534 return;
535 }
536
537 let base = line.as_ptr();
539
540 let mut delim_pos = [0usize; 64];
543 let mut num_delims: usize = 0;
544 let max_delims = max_field.min(64);
545
546 for pos in memchr_iter(delim, line) {
547 if num_delims < max_delims {
548 delim_pos[num_delims] = pos;
549 num_delims += 1;
550 if num_delims >= max_delims {
551 break;
552 }
553 }
554 }
555
556 if num_delims == 0 {
557 if !suppress {
558 unsafe {
559 buf_extend(buf, line);
560 buf_push(buf, line_delim);
561 }
562 }
563 return;
564 }
565
566 let total_fields = num_delims + 1;
570 let mut first_output = true;
571
572 for r in ranges {
573 let range_start = r.start;
574 let range_end = r.end.min(total_fields);
575 if range_start > total_fields {
576 break;
577 }
578 for field_num in range_start..=range_end {
579 if field_num > total_fields {
580 break;
581 }
582
583 let field_start = if field_num == 1 {
584 0
585 } else if field_num - 2 < num_delims {
586 delim_pos[field_num - 2] + 1
587 } else {
588 continue;
589 };
590 let field_end = if field_num <= num_delims {
591 delim_pos[field_num - 1]
592 } else {
593 len
594 };
595
596 if !first_output {
597 unsafe { buf_push(buf, delim) };
598 }
599 unsafe {
600 buf_extend(
601 buf,
602 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
603 );
604 }
605 first_output = false;
606 }
607 }
608
609 unsafe { buf_push(buf, line_delim) };
610}
611
612fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
616 let delim = cfg.delim;
617 let line_delim = cfg.line_delim;
618 let ranges = cfg.ranges;
619 let complement = cfg.complement;
620 let output_delim = cfg.output_delim;
621 let suppress = cfg.suppress_no_delim;
622
623 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
631 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
632 }
633
634 if complement
636 && ranges.len() == 1
637 && output_delim.len() == 1
638 && output_delim[0] == delim
639 && ranges[0].start == ranges[0].end
640 {
641 return process_complement_single_field(
642 data,
643 delim,
644 line_delim,
645 ranges[0].start,
646 suppress,
647 out,
648 );
649 }
650
651 if complement
654 && ranges.len() == 1
655 && ranges[0].start > 1
656 && ranges[0].end < usize::MAX
657 && output_delim.len() == 1
658 && output_delim[0] == delim
659 {
660 return process_complement_range(
661 data,
662 delim,
663 line_delim,
664 ranges[0].start,
665 ranges[0].end,
666 suppress,
667 out,
668 );
669 }
670
671 if !complement
673 && ranges.len() == 1
674 && ranges[0].start == 1
675 && output_delim.len() == 1
676 && output_delim[0] == delim
677 && ranges[0].end < usize::MAX
678 {
679 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
680 }
681
682 if !complement
684 && ranges.len() == 1
685 && ranges[0].end == usize::MAX
686 && ranges[0].start > 1
687 && output_delim.len() == 1
688 && output_delim[0] == delim
689 {
690 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
691 }
692
693 if !complement
695 && ranges.len() == 1
696 && ranges[0].start > 1
697 && ranges[0].end < usize::MAX
698 && output_delim.len() == 1
699 && output_delim[0] == delim
700 {
701 return process_fields_mid_range(
702 data,
703 delim,
704 line_delim,
705 ranges[0].start,
706 ranges[0].end,
707 suppress,
708 out,
709 );
710 }
711
712 if !complement
718 && ranges.len() > 1
719 && ranges.last().map_or(false, |r| r.end < usize::MAX)
720 && output_delim.len() == 1
721 && output_delim[0] == delim
722 && delim != line_delim
723 {
724 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
725 }
726
727 let max_field = if complement {
729 usize::MAX
730 } else {
731 ranges.last().map(|r| r.end).unwrap_or(0)
732 };
733 let field_mask = compute_field_mask(ranges, complement);
734
735 if data.len() >= PARALLEL_THRESHOLD {
736 let chunks = split_for_scope(data, line_delim);
737 let n = chunks.len();
738 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
739 rayon::scope(|s| {
740 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
741 s.spawn(move |_| {
742 result.reserve(chunk.len() + 1);
743 process_fields_chunk(
744 chunk,
745 delim,
746 ranges,
747 output_delim,
748 suppress,
749 max_field,
750 field_mask,
751 line_delim,
752 complement,
753 result,
754 );
755 });
756 }
757 });
758 let slices: Vec<IoSlice> = results
759 .iter()
760 .filter(|r| !r.is_empty())
761 .map(|r| IoSlice::new(r))
762 .collect();
763 write_ioslices(out, &slices)?;
764 } else {
765 let mut buf = Vec::with_capacity(data.len() + 1);
767 process_fields_chunk(
768 data,
769 delim,
770 ranges,
771 output_delim,
772 suppress,
773 max_field,
774 field_mask,
775 line_delim,
776 complement,
777 &mut buf,
778 );
779 if !buf.is_empty() {
780 out.write_all(&buf)?;
781 }
782 }
783 Ok(())
784}
785
786fn process_fields_chunk(
791 data: &[u8],
792 delim: u8,
793 ranges: &[Range],
794 output_delim: &[u8],
795 suppress: bool,
796 max_field: usize,
797 field_mask: u64,
798 line_delim: u8,
799 complement: bool,
800 buf: &mut Vec<u8>,
801) {
802 if delim != line_delim && max_field < usize::MAX && !complement {
809 buf.reserve(data.len());
810 let mut start = 0;
811 for end_pos in memchr_iter(line_delim, data) {
812 let line = &data[start..end_pos];
813 extract_fields_to_buf(
814 line,
815 delim,
816 ranges,
817 output_delim,
818 suppress,
819 max_field,
820 field_mask,
821 line_delim,
822 buf,
823 complement,
824 );
825 start = end_pos + 1;
826 }
827 if start < data.len() {
828 extract_fields_to_buf(
829 &data[start..],
830 delim,
831 ranges,
832 output_delim,
833 suppress,
834 max_field,
835 field_mask,
836 line_delim,
837 buf,
838 complement,
839 );
840 }
841 return;
842 }
843
844 if delim != line_delim {
848 buf.reserve(data.len());
849
850 let data_len = data.len();
851 let base = data.as_ptr();
852 let mut line_start: usize = 0;
853 let mut field_start: usize = 0;
854 let mut field_num: usize = 1;
855 let mut first_output = true;
856 let mut has_delim = false;
857
858 for pos in memchr::memchr2_iter(delim, line_delim, data) {
859 let byte = unsafe { *base.add(pos) };
860
861 if byte == line_delim {
862 if (field_num <= max_field || complement)
864 && has_delim
865 && is_selected(field_num, field_mask, ranges, complement)
866 {
867 if !first_output {
868 unsafe { buf_extend(buf, output_delim) };
869 }
870 unsafe {
871 buf_extend(
872 buf,
873 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
874 )
875 };
876 first_output = false;
877 }
878
879 if !first_output {
880 unsafe { buf_push(buf, line_delim) };
881 } else if !has_delim {
882 if !suppress {
883 unsafe {
884 buf_extend(
885 buf,
886 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
887 );
888 buf_push(buf, line_delim);
889 }
890 }
891 } else {
892 unsafe { buf_push(buf, line_delim) };
893 }
894
895 line_start = pos + 1;
897 field_start = pos + 1;
898 field_num = 1;
899 first_output = true;
900 has_delim = false;
901 } else {
902 has_delim = true;
904
905 if is_selected(field_num, field_mask, ranges, complement) {
906 if !first_output {
907 unsafe { buf_extend(buf, output_delim) };
908 }
909 unsafe {
910 buf_extend(
911 buf,
912 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
913 )
914 };
915 first_output = false;
916 }
917
918 field_num += 1;
919 field_start = pos + 1;
920 }
921 }
922
923 if line_start < data_len {
925 if line_start < data_len {
926 if (field_num <= max_field || complement)
927 && has_delim
928 && is_selected(field_num, field_mask, ranges, complement)
929 {
930 if !first_output {
931 unsafe { buf_extend(buf, output_delim) };
932 }
933 unsafe {
934 buf_extend(
935 buf,
936 std::slice::from_raw_parts(
937 base.add(field_start),
938 data_len - field_start,
939 ),
940 )
941 };
942 first_output = false;
943 }
944
945 if !first_output {
946 unsafe { buf_push(buf, line_delim) };
947 } else if !has_delim {
948 if !suppress {
949 unsafe {
950 buf_extend(
951 buf,
952 std::slice::from_raw_parts(
953 base.add(line_start),
954 data_len - line_start,
955 ),
956 );
957 buf_push(buf, line_delim);
958 }
959 }
960 } else {
961 unsafe { buf_push(buf, line_delim) };
962 }
963 }
964 }
965
966 return;
967 }
968
969 let mut start = 0;
971 for end_pos in memchr_iter(line_delim, data) {
972 let line = &data[start..end_pos];
973 extract_fields_to_buf(
974 line,
975 delim,
976 ranges,
977 output_delim,
978 suppress,
979 max_field,
980 field_mask,
981 line_delim,
982 buf,
983 complement,
984 );
985 start = end_pos + 1;
986 }
987 if start < data.len() {
988 extract_fields_to_buf(
989 &data[start..],
990 delim,
991 ranges,
992 output_delim,
993 suppress,
994 max_field,
995 field_mask,
996 line_delim,
997 buf,
998 complement,
999 );
1000 }
1001}
1002
1003fn process_single_field(
1009 data: &[u8],
1010 delim: u8,
1011 line_delim: u8,
1012 target: usize,
1013 suppress: bool,
1014 out: &mut impl Write,
1015) -> io::Result<()> {
1016 let target_idx = target - 1;
1017
1018 const FIELD_PARALLEL_MIN: usize = 16 * 1024 * 1024;
1020
1021 if delim != line_delim {
1022 if target_idx == 0 && !suppress {
1026 if data.len() >= FIELD_PARALLEL_MIN {
1027 return single_field1_parallel(data, delim, line_delim, out);
1028 }
1029 let mut buf = Vec::with_capacity(data.len() + 1);
1034 single_field1_to_buf(data, delim, line_delim, &mut buf);
1035 if !buf.is_empty() {
1036 out.write_all(&buf)?;
1037 }
1038 return Ok(());
1039 }
1040
1041 if data.len() >= FIELD_PARALLEL_MIN {
1045 let chunks = split_for_scope(data, line_delim);
1046 let n = chunks.len();
1047 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1048 rayon::scope(|s| {
1049 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1050 s.spawn(move |_| {
1051 result.reserve(chunk.len() / 2);
1052 process_single_field_chunk(
1053 chunk, delim, target_idx, line_delim, suppress, result,
1054 );
1055 });
1056 }
1057 });
1058 let slices: Vec<IoSlice> = results
1059 .iter()
1060 .filter(|r| !r.is_empty())
1061 .map(|r| IoSlice::new(r))
1062 .collect();
1063 write_ioslices(out, &slices)?;
1064 } else {
1065 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1066 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1067 if !buf.is_empty() {
1068 out.write_all(&buf)?;
1069 }
1070 }
1071 return Ok(());
1072 }
1073
1074 if data.len() >= FIELD_PARALLEL_MIN {
1076 let chunks = split_for_scope(data, line_delim);
1077 let n = chunks.len();
1078 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1079 rayon::scope(|s| {
1080 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1081 s.spawn(move |_| {
1082 result.reserve(chunk.len() / 4);
1083 process_single_field_chunk(
1084 chunk, delim, target_idx, line_delim, suppress, result,
1085 );
1086 });
1087 }
1088 });
1089 let slices: Vec<IoSlice> = results
1090 .iter()
1091 .filter(|r| !r.is_empty())
1092 .map(|r| IoSlice::new(r))
1093 .collect();
1094 write_ioslices(out, &slices)?;
1095 } else {
1096 let mut buf = Vec::with_capacity(data.len() / 4);
1097 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1098 if !buf.is_empty() {
1099 out.write_all(&buf)?;
1100 }
1101 }
1102 Ok(())
1103}
1104
1105fn process_complement_range(
1108 data: &[u8],
1109 delim: u8,
1110 line_delim: u8,
1111 skip_start: usize,
1112 skip_end: usize,
1113 suppress: bool,
1114 out: &mut impl Write,
1115) -> io::Result<()> {
1116 if data.len() >= PARALLEL_THRESHOLD {
1117 let chunks = split_for_scope(data, line_delim);
1118 let n = chunks.len();
1119 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1120 rayon::scope(|s| {
1121 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1122 s.spawn(move |_| {
1123 result.reserve(chunk.len());
1124 complement_range_chunk(
1125 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
1126 );
1127 });
1128 }
1129 });
1130 let slices: Vec<IoSlice> = results
1131 .iter()
1132 .filter(|r| !r.is_empty())
1133 .map(|r| IoSlice::new(r))
1134 .collect();
1135 write_ioslices(out, &slices)?;
1136 } else {
1137 let mut buf = Vec::with_capacity(data.len());
1138 complement_range_chunk(
1139 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1140 );
1141 if !buf.is_empty() {
1142 out.write_all(&buf)?;
1143 }
1144 }
1145 Ok(())
1146}
1147
1148fn complement_range_chunk(
1150 data: &[u8],
1151 delim: u8,
1152 skip_start: usize,
1153 skip_end: usize,
1154 line_delim: u8,
1155 suppress: bool,
1156 buf: &mut Vec<u8>,
1157) {
1158 buf.reserve(data.len());
1160 let mut start = 0;
1161 for end_pos in memchr_iter(line_delim, data) {
1162 let line = &data[start..end_pos];
1163 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1164 start = end_pos + 1;
1165 }
1166 if start < data.len() {
1167 complement_range_line(
1168 &data[start..],
1169 delim,
1170 skip_start,
1171 skip_end,
1172 line_delim,
1173 suppress,
1174 buf,
1175 );
1176 }
1177}
1178
1179#[inline(always)]
1186fn complement_range_line(
1187 line: &[u8],
1188 delim: u8,
1189 skip_start: usize,
1190 skip_end: usize,
1191 line_delim: u8,
1192 suppress: bool,
1193 buf: &mut Vec<u8>,
1194) {
1195 let len = line.len();
1196 if len == 0 {
1197 if !suppress {
1198 unsafe { buf_push(buf, line_delim) };
1199 }
1200 return;
1201 }
1202
1203 let base = line.as_ptr();
1205
1206 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1216
1217 let mut delim_count: usize = 0;
1219 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1223 delim_count += 1;
1224 if delim_count == need_prefix_delims {
1225 prefix_end_pos = pos;
1226 }
1227 if delim_count == total_need {
1228 suffix_start_pos = pos + 1;
1229 break;
1230 }
1231 }
1232
1233 if delim_count == 0 {
1234 if !suppress {
1236 unsafe {
1237 buf_extend(buf, line);
1238 buf_push(buf, line_delim);
1239 }
1240 }
1241 return;
1242 }
1243
1244 if delim_count < need_prefix_delims {
1250 unsafe {
1252 buf_extend(buf, line);
1253 buf_push(buf, line_delim);
1254 }
1255 return;
1256 }
1257
1258 let has_prefix = need_prefix_delims > 0;
1259 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1260
1261 if has_prefix && has_suffix {
1262 unsafe {
1264 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1265 buf_push(buf, delim);
1266 buf_extend(
1267 buf,
1268 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1269 );
1270 buf_push(buf, line_delim);
1271 }
1272 } else if has_prefix {
1273 unsafe {
1275 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1276 buf_push(buf, line_delim);
1277 }
1278 } else if has_suffix {
1279 unsafe {
1281 buf_extend(
1282 buf,
1283 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1284 );
1285 buf_push(buf, line_delim);
1286 }
1287 } else {
1288 unsafe { buf_push(buf, line_delim) };
1290 }
1291}
1292
1293fn process_complement_single_field(
1295 data: &[u8],
1296 delim: u8,
1297 line_delim: u8,
1298 skip_field: usize,
1299 suppress: bool,
1300 out: &mut impl Write,
1301) -> io::Result<()> {
1302 let skip_idx = skip_field - 1;
1303
1304 if data.len() >= PARALLEL_THRESHOLD {
1305 let chunks = split_for_scope(data, line_delim);
1306 let n = chunks.len();
1307 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1308 rayon::scope(|s| {
1309 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1310 s.spawn(move |_| {
1311 result.reserve(chunk.len());
1312 complement_single_field_chunk(
1313 chunk, delim, skip_idx, line_delim, suppress, result,
1314 );
1315 });
1316 }
1317 });
1318 let slices: Vec<IoSlice> = results
1319 .iter()
1320 .filter(|r| !r.is_empty())
1321 .map(|r| IoSlice::new(r))
1322 .collect();
1323 write_ioslices(out, &slices)?;
1324 } else {
1325 let mut buf = Vec::with_capacity(data.len());
1326 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1327 if !buf.is_empty() {
1328 out.write_all(&buf)?;
1329 }
1330 }
1331 Ok(())
1332}
1333
1334fn complement_single_field_chunk(
1340 data: &[u8],
1341 delim: u8,
1342 skip_idx: usize,
1343 line_delim: u8,
1344 suppress: bool,
1345 buf: &mut Vec<u8>,
1346) {
1347 if delim == line_delim {
1349 buf.reserve(data.len());
1350 let mut start = 0;
1351 for end_pos in memchr_iter(line_delim, data) {
1352 let line = &data[start..end_pos];
1353 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1354 start = end_pos + 1;
1355 }
1356 if start < data.len() {
1357 complement_single_field_line(
1358 &data[start..],
1359 delim,
1360 skip_idx,
1361 line_delim,
1362 suppress,
1363 buf,
1364 );
1365 }
1366 return;
1367 }
1368
1369 buf.reserve(data.len());
1370 let base = data.as_ptr();
1371 let data_len = data.len();
1372 let need_before = skip_idx; let need_total = skip_idx + 1; let mut line_start: usize = 0;
1377 let mut delim_count: usize = 0;
1378 let mut skip_start_pos: usize = 0;
1379 let mut skip_end_pos: usize = 0;
1380 let mut found_start = need_before == 0; let mut found_end = false;
1382
1383 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1384 let byte = unsafe { *base.add(pos) };
1385
1386 if byte == line_delim {
1387 if delim_count == 0 {
1389 if !suppress {
1391 unsafe {
1392 buf_extend(
1393 buf,
1394 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1395 );
1396 buf_push(buf, line_delim);
1397 }
1398 }
1399 } else if !found_start || delim_count < need_before {
1400 unsafe {
1402 buf_extend(
1403 buf,
1404 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1405 );
1406 buf_push(buf, line_delim);
1407 }
1408 } else {
1409 let has_prefix = skip_idx > 0;
1410 let has_suffix = found_end && skip_end_pos < pos;
1411
1412 if has_prefix && has_suffix {
1413 unsafe {
1414 buf_extend(
1415 buf,
1416 std::slice::from_raw_parts(
1417 base.add(line_start),
1418 skip_start_pos - 1 - line_start,
1419 ),
1420 );
1421 buf_push(buf, delim);
1422 buf_extend(
1423 buf,
1424 std::slice::from_raw_parts(
1425 base.add(skip_end_pos + 1),
1426 pos - skip_end_pos - 1,
1427 ),
1428 );
1429 buf_push(buf, line_delim);
1430 }
1431 } else if has_prefix {
1432 unsafe {
1433 buf_extend(
1434 buf,
1435 std::slice::from_raw_parts(
1436 base.add(line_start),
1437 skip_start_pos - 1 - line_start,
1438 ),
1439 );
1440 buf_push(buf, line_delim);
1441 }
1442 } else if has_suffix {
1443 unsafe {
1444 buf_extend(
1445 buf,
1446 std::slice::from_raw_parts(
1447 base.add(skip_end_pos + 1),
1448 pos - skip_end_pos - 1,
1449 ),
1450 );
1451 buf_push(buf, line_delim);
1452 }
1453 } else {
1454 unsafe { buf_push(buf, line_delim) };
1455 }
1456 }
1457
1458 line_start = pos + 1;
1460 delim_count = 0;
1461 skip_start_pos = 0;
1462 skip_end_pos = 0;
1463 found_start = need_before == 0;
1464 found_end = false;
1465 } else {
1466 delim_count += 1;
1468 if delim_count == need_before {
1469 skip_start_pos = pos + 1;
1470 found_start = true;
1471 }
1472 if delim_count == need_total {
1473 skip_end_pos = pos;
1474 found_end = true;
1475 }
1476 }
1477 }
1478
1479 if line_start < data_len {
1481 let pos = data_len;
1482 if delim_count == 0 {
1483 if !suppress {
1484 unsafe {
1485 buf_extend(
1486 buf,
1487 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1488 );
1489 buf_push(buf, line_delim);
1490 }
1491 }
1492 } else if !found_start || delim_count < need_before {
1493 unsafe {
1494 buf_extend(
1495 buf,
1496 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1497 );
1498 buf_push(buf, line_delim);
1499 }
1500 } else {
1501 let has_prefix = skip_idx > 0;
1502 let has_suffix = found_end && skip_end_pos < pos;
1503
1504 if has_prefix && has_suffix {
1505 unsafe {
1506 buf_extend(
1507 buf,
1508 std::slice::from_raw_parts(
1509 base.add(line_start),
1510 skip_start_pos - 1 - line_start,
1511 ),
1512 );
1513 buf_push(buf, delim);
1514 buf_extend(
1515 buf,
1516 std::slice::from_raw_parts(
1517 base.add(skip_end_pos + 1),
1518 pos - skip_end_pos - 1,
1519 ),
1520 );
1521 buf_push(buf, line_delim);
1522 }
1523 } else if has_prefix {
1524 unsafe {
1525 buf_extend(
1526 buf,
1527 std::slice::from_raw_parts(
1528 base.add(line_start),
1529 skip_start_pos - 1 - line_start,
1530 ),
1531 );
1532 buf_push(buf, line_delim);
1533 }
1534 } else if has_suffix {
1535 unsafe {
1536 buf_extend(
1537 buf,
1538 std::slice::from_raw_parts(
1539 base.add(skip_end_pos + 1),
1540 pos - skip_end_pos - 1,
1541 ),
1542 );
1543 buf_push(buf, line_delim);
1544 }
1545 } else {
1546 unsafe { buf_push(buf, line_delim) };
1547 }
1548 }
1549 }
1550}
1551
1552#[inline(always)]
1554fn complement_single_field_line(
1555 line: &[u8],
1556 delim: u8,
1557 skip_idx: usize,
1558 line_delim: u8,
1559 suppress: bool,
1560 buf: &mut Vec<u8>,
1561) {
1562 let len = line.len();
1563 if len == 0 {
1564 if !suppress {
1565 unsafe { buf_push(buf, line_delim) };
1566 }
1567 return;
1568 }
1569
1570 let base = line.as_ptr();
1571 let need_before = skip_idx;
1572 let need_total = skip_idx + 1;
1573
1574 let mut delim_count: usize = 0;
1575 let mut skip_start_pos: usize = 0;
1576 let mut skip_end_pos: usize = len;
1577 let mut found_end = false;
1578
1579 for pos in memchr_iter(delim, line) {
1580 delim_count += 1;
1581 if delim_count == need_before {
1582 skip_start_pos = pos + 1;
1583 }
1584 if delim_count == need_total {
1585 skip_end_pos = pos;
1586 found_end = true;
1587 break;
1588 }
1589 }
1590
1591 if delim_count == 0 {
1592 if !suppress {
1593 unsafe {
1594 buf_extend(buf, line);
1595 buf_push(buf, line_delim);
1596 }
1597 }
1598 return;
1599 }
1600
1601 if delim_count < need_before {
1602 unsafe {
1603 buf_extend(buf, line);
1604 buf_push(buf, line_delim);
1605 }
1606 return;
1607 }
1608
1609 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1610 let has_suffix = found_end && skip_end_pos < len;
1611
1612 if has_prefix && has_suffix {
1613 unsafe {
1614 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1615 buf_push(buf, delim);
1616 buf_extend(
1617 buf,
1618 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1619 );
1620 buf_push(buf, line_delim);
1621 }
1622 } else if has_prefix {
1623 unsafe {
1624 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1625 buf_push(buf, line_delim);
1626 }
1627 } else if has_suffix {
1628 unsafe {
1629 buf_extend(
1630 buf,
1631 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1632 );
1633 buf_push(buf, line_delim);
1634 }
1635 } else {
1636 unsafe { buf_push(buf, line_delim) };
1637 }
1638}
1639
1640fn process_fields_prefix(
1644 data: &[u8],
1645 delim: u8,
1646 line_delim: u8,
1647 last_field: usize,
1648 suppress: bool,
1649 out: &mut impl Write,
1650) -> io::Result<()> {
1651 if data.len() >= PARALLEL_THRESHOLD {
1652 let chunks = split_for_scope(data, line_delim);
1653 let n = chunks.len();
1654 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1655 rayon::scope(|s| {
1656 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1657 s.spawn(move |_| {
1658 result.reserve(chunk.len());
1659 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1660 });
1661 }
1662 });
1663 let slices: Vec<IoSlice> = results
1664 .iter()
1665 .filter(|r| !r.is_empty())
1666 .map(|r| IoSlice::new(r))
1667 .collect();
1668 write_ioslices(out, &slices)?;
1669 } else if !suppress {
1670 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1674 } else {
1675 let mut buf = Vec::with_capacity(data.len());
1676 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1677 if !buf.is_empty() {
1678 out.write_all(&buf)?;
1679 }
1680 }
1681 Ok(())
1682}
1683
1684#[inline]
1690fn fields_prefix_zerocopy(
1691 data: &[u8],
1692 delim: u8,
1693 line_delim: u8,
1694 last_field: usize,
1695 out: &mut impl Write,
1696) -> io::Result<()> {
1697 let newline_buf: [u8; 1] = [line_delim];
1698 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1699 let mut start = 0;
1700 let mut run_start: usize = 0;
1701
1702 for end_pos in memchr_iter(line_delim, data) {
1703 let line = &data[start..end_pos];
1704 let mut field_count = 1;
1705 let mut truncate_at: Option<usize> = None;
1706 for dpos in memchr_iter(delim, line) {
1707 if field_count >= last_field {
1708 truncate_at = Some(start + dpos);
1709 break;
1710 }
1711 field_count += 1;
1712 }
1713
1714 if let Some(trunc_pos) = truncate_at {
1715 if run_start < start {
1716 iov.push(IoSlice::new(&data[run_start..start]));
1717 }
1718 iov.push(IoSlice::new(&data[start..trunc_pos]));
1719 iov.push(IoSlice::new(&newline_buf));
1720 run_start = end_pos + 1;
1721
1722 if iov.len() >= MAX_IOV - 2 {
1723 write_ioslices(out, &iov)?;
1724 iov.clear();
1725 }
1726 }
1727 start = end_pos + 1;
1728 }
1729 if start < data.len() {
1731 let line = &data[start..];
1732 let mut field_count = 1;
1733 let mut truncate_at: Option<usize> = None;
1734 for dpos in memchr_iter(delim, line) {
1735 if field_count >= last_field {
1736 truncate_at = Some(start + dpos);
1737 break;
1738 }
1739 field_count += 1;
1740 }
1741 if let Some(trunc_pos) = truncate_at {
1742 if run_start < start {
1743 iov.push(IoSlice::new(&data[run_start..start]));
1744 }
1745 iov.push(IoSlice::new(&data[start..trunc_pos]));
1746 iov.push(IoSlice::new(&newline_buf));
1747 if !iov.is_empty() {
1748 write_ioslices(out, &iov)?;
1749 }
1750 return Ok(());
1751 }
1752 }
1753 if run_start < data.len() {
1755 iov.push(IoSlice::new(&data[run_start..]));
1756 if !data.is_empty() && *data.last().unwrap() != line_delim {
1757 iov.push(IoSlice::new(&newline_buf));
1758 }
1759 }
1760 if !iov.is_empty() {
1761 write_ioslices(out, &iov)?;
1762 }
1763 Ok(())
1764}
1765
1766fn fields_prefix_chunk(
1768 data: &[u8],
1769 delim: u8,
1770 line_delim: u8,
1771 last_field: usize,
1772 suppress: bool,
1773 buf: &mut Vec<u8>,
1774) {
1775 buf.reserve(data.len());
1776 let mut start = 0;
1777 for end_pos in memchr_iter(line_delim, data) {
1778 let line = &data[start..end_pos];
1779 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1780 start = end_pos + 1;
1781 }
1782 if start < data.len() {
1783 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1784 }
1785}
1786
1787#[inline(always)]
1790fn fields_prefix_line(
1791 line: &[u8],
1792 delim: u8,
1793 line_delim: u8,
1794 last_field: usize,
1795 suppress: bool,
1796 buf: &mut Vec<u8>,
1797) {
1798 let len = line.len();
1799 if len == 0 {
1800 if !suppress {
1801 unsafe { buf_push(buf, line_delim) };
1802 }
1803 return;
1804 }
1805
1806 let base = line.as_ptr();
1808
1809 let mut field_count = 1usize;
1810 let mut has_delim = false;
1811
1812 for pos in memchr_iter(delim, line) {
1813 has_delim = true;
1814 if field_count >= last_field {
1815 unsafe {
1816 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1817 buf_push(buf, line_delim);
1818 }
1819 return;
1820 }
1821 field_count += 1;
1822 }
1823
1824 if !has_delim {
1825 if !suppress {
1826 unsafe {
1827 buf_extend(buf, line);
1828 buf_push(buf, line_delim);
1829 }
1830 }
1831 return;
1832 }
1833
1834 unsafe {
1835 buf_extend(buf, line);
1836 buf_push(buf, line_delim);
1837 }
1838}
1839
1840fn process_fields_suffix(
1842 data: &[u8],
1843 delim: u8,
1844 line_delim: u8,
1845 start_field: usize,
1846 suppress: bool,
1847 out: &mut impl Write,
1848) -> io::Result<()> {
1849 if data.len() >= PARALLEL_THRESHOLD {
1850 let chunks = split_for_scope(data, line_delim);
1851 let n = chunks.len();
1852 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1853 rayon::scope(|s| {
1854 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1855 s.spawn(move |_| {
1856 result.reserve(chunk.len());
1857 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1858 });
1859 }
1860 });
1861 let slices: Vec<IoSlice> = results
1862 .iter()
1863 .filter(|r| !r.is_empty())
1864 .map(|r| IoSlice::new(r))
1865 .collect();
1866 write_ioslices(out, &slices)?;
1867 } else {
1868 let mut buf = Vec::with_capacity(data.len());
1869 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1870 if !buf.is_empty() {
1871 out.write_all(&buf)?;
1872 }
1873 }
1874 Ok(())
1875}
1876
1877fn fields_suffix_chunk(
1879 data: &[u8],
1880 delim: u8,
1881 line_delim: u8,
1882 start_field: usize,
1883 suppress: bool,
1884 buf: &mut Vec<u8>,
1885) {
1886 buf.reserve(data.len());
1887 let mut start = 0;
1888 for end_pos in memchr_iter(line_delim, data) {
1889 let line = &data[start..end_pos];
1890 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1891 start = end_pos + 1;
1892 }
1893 if start < data.len() {
1894 fields_suffix_line(
1895 &data[start..],
1896 delim,
1897 line_delim,
1898 start_field,
1899 suppress,
1900 buf,
1901 );
1902 }
1903}
1904
1905#[inline(always)]
1908fn fields_suffix_line(
1909 line: &[u8],
1910 delim: u8,
1911 line_delim: u8,
1912 start_field: usize,
1913 suppress: bool,
1914 buf: &mut Vec<u8>,
1915) {
1916 let len = line.len();
1917 if len == 0 {
1918 if !suppress {
1919 unsafe { buf_push(buf, line_delim) };
1920 }
1921 return;
1922 }
1923
1924 let base = line.as_ptr();
1926
1927 let skip_delims = start_field - 1;
1928 let mut delim_count = 0usize;
1929 let mut has_delim = false;
1930
1931 for pos in memchr_iter(delim, line) {
1932 has_delim = true;
1933 delim_count += 1;
1934 if delim_count >= skip_delims {
1935 unsafe {
1936 buf_extend(
1937 buf,
1938 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1939 );
1940 buf_push(buf, line_delim);
1941 }
1942 return;
1943 }
1944 }
1945
1946 if !has_delim {
1947 if !suppress {
1948 unsafe {
1949 buf_extend(buf, line);
1950 buf_push(buf, line_delim);
1951 }
1952 }
1953 return;
1954 }
1955
1956 unsafe { buf_push(buf, line_delim) };
1958}
1959
1960fn process_fields_mid_range(
1963 data: &[u8],
1964 delim: u8,
1965 line_delim: u8,
1966 start_field: usize,
1967 end_field: usize,
1968 suppress: bool,
1969 out: &mut impl Write,
1970) -> io::Result<()> {
1971 if data.len() >= PARALLEL_THRESHOLD {
1972 let chunks = split_for_scope(data, line_delim);
1973 let n = chunks.len();
1974 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1975 rayon::scope(|s| {
1976 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1977 s.spawn(move |_| {
1978 result.reserve(chunk.len());
1979 fields_mid_range_chunk(
1980 chunk,
1981 delim,
1982 line_delim,
1983 start_field,
1984 end_field,
1985 suppress,
1986 result,
1987 );
1988 });
1989 }
1990 });
1991 let slices: Vec<IoSlice> = results
1992 .iter()
1993 .filter(|r| !r.is_empty())
1994 .map(|r| IoSlice::new(r))
1995 .collect();
1996 write_ioslices(out, &slices)?;
1997 } else {
1998 let mut buf = Vec::with_capacity(data.len());
1999 fields_mid_range_chunk(
2000 data,
2001 delim,
2002 line_delim,
2003 start_field,
2004 end_field,
2005 suppress,
2006 &mut buf,
2007 );
2008 if !buf.is_empty() {
2009 out.write_all(&buf)?;
2010 }
2011 }
2012 Ok(())
2013}
2014
2015fn fields_mid_range_chunk(
2019 data: &[u8],
2020 delim: u8,
2021 line_delim: u8,
2022 start_field: usize,
2023 end_field: usize,
2024 suppress: bool,
2025 buf: &mut Vec<u8>,
2026) {
2027 if delim == line_delim {
2029 buf.reserve(data.len());
2030 let mut start = 0;
2031 for end_pos in memchr_iter(line_delim, data) {
2032 let line = &data[start..end_pos];
2033 fields_mid_range_line(
2034 line,
2035 delim,
2036 line_delim,
2037 start_field,
2038 end_field,
2039 suppress,
2040 buf,
2041 );
2042 start = end_pos + 1;
2043 }
2044 if start < data.len() {
2045 fields_mid_range_line(
2046 &data[start..],
2047 delim,
2048 line_delim,
2049 start_field,
2050 end_field,
2051 suppress,
2052 buf,
2053 );
2054 }
2055 return;
2056 }
2057
2058 buf.reserve(data.len());
2059 let base = data.as_ptr();
2060 let skip_before = start_field - 1; let target_end_delim = skip_before + (end_field - start_field) + 1;
2062
2063 let mut line_start: usize = 0;
2064 let mut delim_count: usize = 0;
2065 let mut range_start: usize = 0;
2066 let mut has_delim = false;
2067 let mut found_end = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
2070 let byte = unsafe { *base.add(pos) };
2071 if byte == line_delim {
2072 if found_end {
2074 } else if !has_delim {
2076 if !suppress {
2078 unsafe {
2079 buf_extend(
2080 buf,
2081 std::slice::from_raw_parts(base.add(line_start), pos + 1 - line_start),
2082 );
2083 }
2084 }
2085 } else if delim_count >= skip_before {
2086 if skip_before == 0 {
2088 range_start = line_start;
2089 }
2090 unsafe {
2091 buf_extend(
2092 buf,
2093 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2094 );
2095 buf_push(buf, line_delim);
2096 }
2097 } else {
2098 unsafe { buf_push(buf, line_delim) };
2100 }
2101 line_start = pos + 1;
2102 delim_count = 0;
2103 has_delim = false;
2104 found_end = false;
2105 } else if !found_end {
2106 has_delim = true;
2108 delim_count += 1;
2109 if delim_count == skip_before {
2110 range_start = pos + 1;
2111 }
2112 if delim_count == target_end_delim {
2113 if skip_before == 0 {
2114 range_start = line_start;
2115 }
2116 unsafe {
2117 buf_extend(
2118 buf,
2119 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2120 );
2121 buf_push(buf, line_delim);
2122 }
2123 found_end = true;
2124 }
2125 }
2126 }
2127 if line_start < data.len() && !found_end {
2129 if !has_delim {
2130 if !suppress {
2131 unsafe {
2132 buf_extend(
2133 buf,
2134 std::slice::from_raw_parts(base.add(line_start), data.len() - line_start),
2135 );
2136 }
2137 }
2138 } else if delim_count >= skip_before {
2139 if skip_before == 0 {
2140 range_start = line_start;
2141 }
2142 unsafe {
2143 buf_extend(
2144 buf,
2145 std::slice::from_raw_parts(base.add(range_start), data.len() - range_start),
2146 );
2147 }
2148 }
2149 }
2150}
2151
2152#[inline(always)]
2156fn fields_mid_range_line(
2157 line: &[u8],
2158 delim: u8,
2159 line_delim: u8,
2160 start_field: usize,
2161 end_field: usize,
2162 suppress: bool,
2163 buf: &mut Vec<u8>,
2164) {
2165 let len = line.len();
2166 if len == 0 {
2167 if !suppress {
2168 unsafe { buf_push(buf, line_delim) };
2169 }
2170 return;
2171 }
2172
2173 let base = line.as_ptr();
2175
2176 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
2180 let mut delim_count = 0;
2181 let mut range_start = 0;
2182 let mut has_delim = false;
2183
2184 for pos in memchr_iter(delim, line) {
2185 has_delim = true;
2186 delim_count += 1;
2187 if delim_count == skip_before {
2188 range_start = pos + 1;
2189 }
2190 if delim_count == target_end_delim {
2191 if skip_before == 0 {
2192 range_start = 0;
2193 }
2194 unsafe {
2195 buf_extend(
2196 buf,
2197 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2198 );
2199 buf_push(buf, line_delim);
2200 }
2201 return;
2202 }
2203 }
2204
2205 if !has_delim {
2206 if !suppress {
2207 unsafe {
2208 buf_extend(buf, line);
2209 buf_push(buf, line_delim);
2210 }
2211 }
2212 return;
2213 }
2214
2215 if delim_count >= skip_before {
2217 if skip_before == 0 {
2219 range_start = 0;
2220 }
2221 unsafe {
2222 buf_extend(
2223 buf,
2224 std::slice::from_raw_parts(base.add(range_start), len - range_start),
2225 );
2226 buf_push(buf, line_delim);
2227 }
2228 } else {
2229 unsafe { buf_push(buf, line_delim) };
2231 }
2232}
2233
2234fn single_field1_parallel(
2245 data: &[u8],
2246 delim: u8,
2247 line_delim: u8,
2248 out: &mut impl Write,
2249) -> io::Result<()> {
2250 let chunks = split_for_scope(data, line_delim);
2251 let n = chunks.len();
2252 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2253 rayon::scope(|s| {
2254 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2255 s.spawn(move |_| {
2256 result.reserve(chunk.len() + 1);
2257 single_field1_to_buf(chunk, delim, line_delim, result);
2258 });
2259 }
2260 });
2261 let slices: Vec<IoSlice> = results
2262 .iter()
2263 .filter(|r| !r.is_empty())
2264 .map(|r| IoSlice::new(r))
2265 .collect();
2266 write_ioslices(out, &slices)
2267}
2268
2269#[inline]
2280fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
2281 debug_assert_ne!(delim, line_delim, "delim and line_delim must differ");
2282 buf.reserve(data.len() + 1);
2285
2286 let base = data.as_ptr();
2289 let initial_len = buf.len();
2290 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
2291 let mut line_start: usize = 0;
2292 let mut found_delim = false;
2293 let mut delim_pos: usize = 0; for pos in memchr::memchr2_iter(delim, line_delim, data) {
2302 let byte = unsafe { *base.add(pos) };
2303 if byte == line_delim {
2304 if !found_delim {
2305 let len = pos + 1 - line_start;
2307 unsafe {
2308 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, len);
2309 out_ptr = out_ptr.add(len);
2310 }
2311 } else {
2312 let field_len = delim_pos - line_start;
2316 unsafe {
2317 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, field_len);
2318 out_ptr = out_ptr.add(field_len);
2319 *out_ptr = line_delim;
2320 out_ptr = out_ptr.add(1);
2321 }
2322 }
2323 line_start = pos + 1;
2324 found_delim = false;
2325 } else if !found_delim {
2326 found_delim = true;
2328 delim_pos = pos;
2329 }
2330 }
2332
2333 if line_start < data.len() {
2335 if !found_delim {
2336 let len = data.len() - line_start;
2338 unsafe {
2339 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, len);
2340 out_ptr = out_ptr.add(len);
2341 *out_ptr = line_delim;
2342 out_ptr = out_ptr.add(1);
2343 }
2344 } else {
2345 let field_len = delim_pos - line_start;
2347 unsafe {
2348 std::ptr::copy_nonoverlapping(base.add(line_start), out_ptr, field_len);
2349 out_ptr = out_ptr.add(field_len);
2350 *out_ptr = line_delim;
2351 out_ptr = out_ptr.add(1);
2352 }
2353 }
2354 }
2355
2356 unsafe {
2362 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2363 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2364 buf.set_len(new_len);
2365 }
2366}
2367
2368#[inline]
2377#[allow(dead_code)]
2378fn single_field1_zerocopy(
2379 data: &[u8],
2380 delim: u8,
2381 line_delim: u8,
2382 out: &mut impl Write,
2383) -> io::Result<()> {
2384 let newline_buf: [u8; 1] = [line_delim];
2385
2386 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2387 let mut run_start: usize = 0;
2388 let mut start = 0;
2389
2390 for end_pos in memchr_iter(line_delim, data) {
2391 let line = &data[start..end_pos];
2392 if let Some(dp) = memchr::memchr(delim, line) {
2393 if run_start < start {
2396 iov.push(IoSlice::new(&data[run_start..start]));
2397 }
2398 iov.push(IoSlice::new(&data[start..start + dp]));
2399 iov.push(IoSlice::new(&newline_buf));
2400 run_start = end_pos + 1;
2401
2402 if iov.len() >= MAX_IOV - 2 {
2403 write_ioslices(out, &iov)?;
2404 iov.clear();
2405 }
2406 }
2407 start = end_pos + 1;
2409 }
2410
2411 if start < data.len() {
2413 let line = &data[start..];
2414 if let Some(dp) = memchr::memchr(delim, line) {
2415 if run_start < start {
2416 iov.push(IoSlice::new(&data[run_start..start]));
2417 }
2418 iov.push(IoSlice::new(&data[start..start + dp]));
2419 iov.push(IoSlice::new(&newline_buf));
2420 if !iov.is_empty() {
2421 write_ioslices(out, &iov)?;
2422 }
2423 return Ok(());
2424 }
2425 }
2426
2427 if run_start < data.len() {
2429 iov.push(IoSlice::new(&data[run_start..]));
2430 if !data.is_empty() && *data.last().unwrap() != line_delim {
2431 iov.push(IoSlice::new(&newline_buf));
2432 }
2433 }
2434 if !iov.is_empty() {
2435 write_ioslices(out, &iov)?;
2436 }
2437 Ok(())
2438}
2439
2440fn process_single_field_chunk(
2442 data: &[u8],
2443 delim: u8,
2444 target_idx: usize,
2445 line_delim: u8,
2446 suppress: bool,
2447 buf: &mut Vec<u8>,
2448) {
2449 buf.reserve(data.len());
2451 let mut start = 0;
2452 for end_pos in memchr_iter(line_delim, data) {
2453 let line = &data[start..end_pos];
2454 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2455 start = end_pos + 1;
2456 }
2457 if start < data.len() {
2458 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2459 }
2460}
2461
2462#[inline(always)]
2467fn extract_single_field_line(
2468 line: &[u8],
2469 delim: u8,
2470 target_idx: usize,
2471 line_delim: u8,
2472 suppress: bool,
2473 buf: &mut Vec<u8>,
2474) {
2475 let len = line.len();
2476 if len == 0 {
2477 if !suppress {
2478 unsafe { buf_push(buf, line_delim) };
2479 }
2480 return;
2481 }
2482
2483 let base = line.as_ptr();
2485
2486 if target_idx == 0 {
2488 match memchr::memchr(delim, line) {
2489 Some(pos) => unsafe {
2490 buf_extend_byte(buf, std::slice::from_raw_parts(base, pos), line_delim);
2491 },
2492 None => {
2493 if !suppress {
2494 unsafe {
2495 buf_extend_byte(buf, line, line_delim);
2496 }
2497 }
2498 }
2499 }
2500 return;
2501 }
2502
2503 let mut field_start = 0;
2505 let mut field_idx = 0;
2506 let mut has_delim = false;
2507
2508 for pos in memchr_iter(delim, line) {
2509 has_delim = true;
2510 if field_idx == target_idx {
2511 unsafe {
2512 buf_extend_byte(
2513 buf,
2514 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2515 line_delim,
2516 );
2517 }
2518 return;
2519 }
2520 field_idx += 1;
2521 field_start = pos + 1;
2522 }
2523
2524 if !has_delim {
2525 if !suppress {
2526 unsafe {
2527 buf_extend_byte(buf, line, line_delim);
2528 }
2529 }
2530 return;
2531 }
2532
2533 if field_idx == target_idx {
2534 unsafe {
2535 buf_extend_byte(
2536 buf,
2537 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2538 line_delim,
2539 );
2540 }
2541 } else {
2542 unsafe { buf_push(buf, line_delim) };
2543 }
2544}
2545
2546#[inline(always)]
2550fn extract_fields_to_buf(
2551 line: &[u8],
2552 delim: u8,
2553 ranges: &[Range],
2554 output_delim: &[u8],
2555 suppress: bool,
2556 max_field: usize,
2557 field_mask: u64,
2558 line_delim: u8,
2559 buf: &mut Vec<u8>,
2560 complement: bool,
2561) {
2562 let len = line.len();
2563
2564 if len == 0 {
2565 if !suppress {
2566 buf.push(line_delim);
2567 }
2568 return;
2569 }
2570
2571 let needed = len + output_delim.len() * 16 + 1;
2574 if buf.capacity() - buf.len() < needed {
2575 buf.reserve(needed);
2576 }
2577
2578 let base = line.as_ptr();
2579 let mut field_num: usize = 1;
2580 let mut field_start: usize = 0;
2581 let mut first_output = true;
2582 let mut has_delim = false;
2583
2584 for delim_pos in memchr_iter(delim, line) {
2586 has_delim = true;
2587
2588 if is_selected(field_num, field_mask, ranges, complement) {
2589 if !first_output {
2590 unsafe { buf_extend(buf, output_delim) };
2591 }
2592 unsafe {
2593 buf_extend(
2594 buf,
2595 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2596 )
2597 };
2598 first_output = false;
2599 }
2600
2601 field_num += 1;
2602 field_start = delim_pos + 1;
2603
2604 if field_num > max_field {
2605 break;
2606 }
2607 }
2608
2609 if (field_num <= max_field || complement)
2611 && has_delim
2612 && is_selected(field_num, field_mask, ranges, complement)
2613 {
2614 if !first_output {
2615 unsafe { buf_extend(buf, output_delim) };
2616 }
2617 unsafe {
2618 buf_extend(
2619 buf,
2620 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2621 )
2622 };
2623 first_output = false;
2624 }
2625
2626 if !first_output {
2627 unsafe { buf_push(buf, line_delim) };
2628 } else if !has_delim {
2629 if !suppress {
2630 unsafe {
2631 buf_extend(buf, line);
2632 buf_push(buf, line_delim);
2633 }
2634 }
2635 } else {
2636 unsafe { buf_push(buf, line_delim) };
2637 }
2638}
2639
2640fn process_bytes_from_start(
2647 data: &[u8],
2648 max_bytes: usize,
2649 line_delim: u8,
2650 out: &mut impl Write,
2651) -> io::Result<()> {
2652 if data.len() < PARALLEL_THRESHOLD && max_bytes > 0 && max_bytes < usize::MAX {
2660 let mut start = 0;
2661 let mut all_fit = true;
2662 for pos in memchr_iter(line_delim, data) {
2663 if pos - start > max_bytes {
2664 all_fit = false;
2665 break;
2666 }
2667 start = pos + 1;
2668 }
2669 if all_fit && start < data.len() && data.len() - start > max_bytes {
2671 all_fit = false;
2672 }
2673 if all_fit {
2674 if !data.is_empty() && data[data.len() - 1] == line_delim {
2676 return out.write_all(data);
2677 } else if !data.is_empty() {
2678 out.write_all(data)?;
2679 return out.write_all(&[line_delim]);
2680 }
2681 return Ok(());
2682 }
2683 }
2684
2685 if data.len() >= PARALLEL_THRESHOLD {
2686 let chunks = split_for_scope(data, line_delim);
2687 let n = chunks.len();
2688 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2689 rayon::scope(|s| {
2690 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2691 s.spawn(move |_| {
2692 result.reserve(chunk.len());
2695 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2696 });
2697 }
2698 });
2699 let slices: Vec<IoSlice> = results
2701 .iter()
2702 .filter(|r| !r.is_empty())
2703 .map(|r| IoSlice::new(r))
2704 .collect();
2705 write_ioslices(out, &slices)?;
2706 } else {
2707 if max_bytes <= 512 {
2713 let est_out = (data.len() / 4).max(max_bytes + 2);
2716 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2717 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2718 if !buf.is_empty() {
2719 out.write_all(&buf)?;
2720 }
2721 } else {
2722 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2726 }
2727 }
2728 Ok(())
2729}
2730
2731#[inline]
2736fn bytes_from_start_zerocopy(
2737 data: &[u8],
2738 max_bytes: usize,
2739 line_delim: u8,
2740 out: &mut impl Write,
2741) -> io::Result<()> {
2742 let newline_buf: [u8; 1] = [line_delim];
2743 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2744 let mut start = 0;
2745 let mut run_start: usize = 0;
2746
2747 for pos in memchr_iter(line_delim, data) {
2748 let line_len = pos - start;
2749 if line_len > max_bytes {
2750 if run_start < start {
2752 iov.push(IoSlice::new(&data[run_start..start]));
2753 }
2754 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2755 iov.push(IoSlice::new(&newline_buf));
2756 run_start = pos + 1;
2757
2758 if iov.len() >= MAX_IOV - 2 {
2759 write_ioslices(out, &iov)?;
2760 iov.clear();
2761 }
2762 }
2763 start = pos + 1;
2764 }
2765 if start < data.len() {
2767 let line_len = data.len() - start;
2768 if line_len > max_bytes {
2769 if run_start < start {
2770 iov.push(IoSlice::new(&data[run_start..start]));
2771 }
2772 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2773 iov.push(IoSlice::new(&newline_buf));
2774 if !iov.is_empty() {
2775 write_ioslices(out, &iov)?;
2776 }
2777 return Ok(());
2778 }
2779 }
2780 if run_start < data.len() {
2782 iov.push(IoSlice::new(&data[run_start..]));
2783 if !data.is_empty() && *data.last().unwrap() != line_delim {
2784 iov.push(IoSlice::new(&newline_buf));
2785 }
2786 }
2787 if !iov.is_empty() {
2788 write_ioslices(out, &iov)?;
2789 }
2790 Ok(())
2791}
2792
2793#[inline]
2798fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2799 buf.reserve(data.len());
2802
2803 let src = data.as_ptr();
2804 let dst_base = buf.as_mut_ptr();
2805 let mut wp = buf.len();
2806 let mut start = 0;
2807
2808 for pos in memchr_iter(line_delim, data) {
2809 let line_len = pos - start;
2810 let take = line_len.min(max_bytes);
2811 unsafe {
2812 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2813 *dst_base.add(wp + take) = line_delim;
2814 }
2815 wp += take + 1;
2816 start = pos + 1;
2817 }
2818 if start < data.len() {
2820 let line_len = data.len() - start;
2821 let take = line_len.min(max_bytes);
2822 unsafe {
2823 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2824 *dst_base.add(wp + take) = line_delim;
2825 }
2826 wp += take + 1;
2827 }
2828 unsafe { buf.set_len(wp) };
2829}
2830
2831fn process_bytes_from_offset(
2833 data: &[u8],
2834 skip_bytes: usize,
2835 line_delim: u8,
2836 out: &mut impl Write,
2837) -> io::Result<()> {
2838 if data.len() >= PARALLEL_THRESHOLD {
2839 let chunks = split_for_scope(data, line_delim);
2840 let n = chunks.len();
2841 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2842 rayon::scope(|s| {
2843 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2844 s.spawn(move |_| {
2845 result.reserve(chunk.len());
2846 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2847 });
2848 }
2849 });
2850 let slices: Vec<IoSlice> = results
2852 .iter()
2853 .filter(|r| !r.is_empty())
2854 .map(|r| IoSlice::new(r))
2855 .collect();
2856 write_ioslices(out, &slices)?;
2857 } else {
2858 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2860 }
2861 Ok(())
2862}
2863
2864#[inline]
2868fn bytes_from_offset_zerocopy(
2869 data: &[u8],
2870 skip_bytes: usize,
2871 line_delim: u8,
2872 out: &mut impl Write,
2873) -> io::Result<()> {
2874 let delim_buf = [line_delim];
2875 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2876
2877 let mut start = 0;
2878 for pos in memchr_iter(line_delim, data) {
2879 let line_len = pos - start;
2880 if line_len > skip_bytes {
2881 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2882 }
2883 iov.push(IoSlice::new(&delim_buf));
2884 if iov.len() >= MAX_IOV - 1 {
2886 write_ioslices(out, &iov)?;
2887 iov.clear();
2888 }
2889 start = pos + 1;
2890 }
2891 if start < data.len() {
2892 let line_len = data.len() - start;
2893 if line_len > skip_bytes {
2894 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2895 }
2896 iov.push(IoSlice::new(&delim_buf));
2897 }
2898 if !iov.is_empty() {
2899 write_ioslices(out, &iov)?;
2900 }
2901 Ok(())
2902}
2903
2904#[inline]
2907fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2908 buf.reserve(data.len());
2909
2910 let src = data.as_ptr();
2911 let dst_base = buf.as_mut_ptr();
2912 let mut wp = buf.len();
2913 let mut start = 0;
2914
2915 for pos in memchr_iter(line_delim, data) {
2916 let line_len = pos - start;
2917 if line_len > skip_bytes {
2918 let take = line_len - skip_bytes;
2919 unsafe {
2920 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2921 }
2922 wp += take;
2923 }
2924 unsafe {
2925 *dst_base.add(wp) = line_delim;
2926 }
2927 wp += 1;
2928 start = pos + 1;
2929 }
2930 if start < data.len() {
2931 let line_len = data.len() - start;
2932 if line_len > skip_bytes {
2933 let take = line_len - skip_bytes;
2934 unsafe {
2935 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2936 }
2937 wp += take;
2938 }
2939 unsafe {
2940 *dst_base.add(wp) = line_delim;
2941 }
2942 wp += 1;
2943 }
2944 unsafe { buf.set_len(wp) };
2945}
2946
2947fn process_bytes_mid_range(
2949 data: &[u8],
2950 start_byte: usize,
2951 end_byte: usize,
2952 line_delim: u8,
2953 out: &mut impl Write,
2954) -> io::Result<()> {
2955 let skip = start_byte.saturating_sub(1);
2956
2957 if data.len() >= PARALLEL_THRESHOLD {
2958 let chunks = split_for_scope(data, line_delim);
2959 let n = chunks.len();
2960 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2961 rayon::scope(|s| {
2962 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2963 s.spawn(move |_| {
2964 result.reserve(chunk.len());
2965 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2966 });
2967 }
2968 });
2969 let slices: Vec<IoSlice> = results
2970 .iter()
2971 .filter(|r| !r.is_empty())
2972 .map(|r| IoSlice::new(r))
2973 .collect();
2974 write_ioslices(out, &slices)?;
2975 } else {
2976 let mut buf = Vec::with_capacity(data.len());
2977 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2978 if !buf.is_empty() {
2979 out.write_all(&buf)?;
2980 }
2981 }
2982 Ok(())
2983}
2984
2985#[inline]
2989fn bytes_mid_range_chunk(
2990 data: &[u8],
2991 skip: usize,
2992 end_byte: usize,
2993 line_delim: u8,
2994 buf: &mut Vec<u8>,
2995) {
2996 buf.reserve(data.len());
2997
2998 let src = data.as_ptr();
2999 let dst_base = buf.as_mut_ptr();
3000 let mut wp = buf.len();
3001 let mut start = 0;
3002
3003 for pos in memchr_iter(line_delim, data) {
3004 let line_len = pos - start;
3005 if line_len > skip {
3006 let take_end = line_len.min(end_byte);
3007 let take = take_end - skip;
3008 unsafe {
3009 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
3010 }
3011 wp += take;
3012 }
3013 unsafe {
3014 *dst_base.add(wp) = line_delim;
3015 }
3016 wp += 1;
3017 start = pos + 1;
3018 }
3019 if start < data.len() {
3020 let line_len = data.len() - start;
3021 if line_len > skip {
3022 let take_end = line_len.min(end_byte);
3023 let take = take_end - skip;
3024 unsafe {
3025 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
3026 }
3027 wp += take;
3028 }
3029 unsafe {
3030 *dst_base.add(wp) = line_delim;
3031 }
3032 wp += 1;
3033 }
3034 unsafe { buf.set_len(wp) };
3035}
3036
3037fn process_bytes_complement_mid(
3039 data: &[u8],
3040 skip_start: usize,
3041 skip_end: usize,
3042 line_delim: u8,
3043 out: &mut impl Write,
3044) -> io::Result<()> {
3045 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
3047 let chunks = split_for_scope(data, line_delim);
3048 let n = chunks.len();
3049 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3050 rayon::scope(|s| {
3051 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3052 s.spawn(move |_| {
3053 result.reserve(chunk.len());
3054 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
3055 });
3056 }
3057 });
3058 let slices: Vec<IoSlice> = results
3059 .iter()
3060 .filter(|r| !r.is_empty())
3061 .map(|r| IoSlice::new(r))
3062 .collect();
3063 write_ioslices(out, &slices)?;
3064 } else {
3065 let mut buf = Vec::with_capacity(data.len());
3066 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
3067 if !buf.is_empty() {
3068 out.write_all(&buf)?;
3069 }
3070 }
3071 Ok(())
3072}
3073
3074#[inline]
3077fn bytes_complement_mid_chunk(
3078 data: &[u8],
3079 prefix_bytes: usize,
3080 skip_end: usize,
3081 line_delim: u8,
3082 buf: &mut Vec<u8>,
3083) {
3084 buf.reserve(data.len());
3085
3086 let src = data.as_ptr();
3087 let dst_base = buf.as_mut_ptr();
3088 let mut wp = buf.len();
3089 let mut start = 0;
3090
3091 for pos in memchr_iter(line_delim, data) {
3092 let line_len = pos - start;
3093 let take_prefix = prefix_bytes.min(line_len);
3095 if take_prefix > 0 {
3096 unsafe {
3097 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3098 }
3099 wp += take_prefix;
3100 }
3101 if line_len > skip_end {
3103 let suffix_len = line_len - skip_end;
3104 unsafe {
3105 std::ptr::copy_nonoverlapping(
3106 src.add(start + skip_end),
3107 dst_base.add(wp),
3108 suffix_len,
3109 );
3110 }
3111 wp += suffix_len;
3112 }
3113 unsafe {
3114 *dst_base.add(wp) = line_delim;
3115 }
3116 wp += 1;
3117 start = pos + 1;
3118 }
3119 if start < data.len() {
3120 let line_len = data.len() - start;
3121 let take_prefix = prefix_bytes.min(line_len);
3122 if take_prefix > 0 {
3123 unsafe {
3124 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3125 }
3126 wp += take_prefix;
3127 }
3128 if line_len > skip_end {
3129 let suffix_len = line_len - skip_end;
3130 unsafe {
3131 std::ptr::copy_nonoverlapping(
3132 src.add(start + skip_end),
3133 dst_base.add(wp),
3134 suffix_len,
3135 );
3136 }
3137 wp += suffix_len;
3138 }
3139 unsafe {
3140 *dst_base.add(wp) = line_delim;
3141 }
3142 wp += 1;
3143 }
3144 unsafe { buf.set_len(wp) };
3145}
3146
3147fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3149 let line_delim = cfg.line_delim;
3150 let ranges = cfg.ranges;
3151 let complement = cfg.complement;
3152 let output_delim = cfg.output_delim;
3153
3154 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
3156 let max_bytes = ranges[0].end;
3157 if max_bytes < usize::MAX {
3158 return process_bytes_from_start(data, max_bytes, line_delim, out);
3159 }
3160 }
3161
3162 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
3164 let skip_bytes = ranges[0].start.saturating_sub(1);
3165 if skip_bytes > 0 {
3166 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
3167 }
3168 }
3169
3170 if !complement
3172 && ranges.len() == 1
3173 && ranges[0].start > 1
3174 && ranges[0].end < usize::MAX
3175 && output_delim.is_empty()
3176 {
3177 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
3178 }
3179
3180 if complement
3182 && ranges.len() == 1
3183 && ranges[0].start == 1
3184 && ranges[0].end < usize::MAX
3185 && output_delim.is_empty()
3186 {
3187 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
3188 }
3189
3190 if complement
3192 && ranges.len() == 1
3193 && ranges[0].end == usize::MAX
3194 && ranges[0].start > 1
3195 && output_delim.is_empty()
3196 {
3197 let max_bytes = ranges[0].start - 1;
3198 return process_bytes_from_start(data, max_bytes, line_delim, out);
3199 }
3200
3201 if complement
3203 && ranges.len() == 1
3204 && ranges[0].start > 1
3205 && ranges[0].end < usize::MAX
3206 && output_delim.is_empty()
3207 {
3208 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
3209 }
3210
3211 if data.len() >= PARALLEL_THRESHOLD {
3212 let chunks = split_for_scope(data, line_delim);
3213 let n = chunks.len();
3214 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3215 rayon::scope(|s| {
3216 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3217 s.spawn(move |_| {
3218 result.reserve(chunk.len() + 1);
3219 process_bytes_chunk(
3220 chunk,
3221 ranges,
3222 complement,
3223 output_delim,
3224 line_delim,
3225 result,
3226 );
3227 });
3228 }
3229 });
3230 let slices: Vec<IoSlice> = results
3231 .iter()
3232 .filter(|r| !r.is_empty())
3233 .map(|r| IoSlice::new(r))
3234 .collect();
3235 write_ioslices(out, &slices)?;
3236 } else {
3237 let mut buf = Vec::with_capacity(data.len() + 1);
3239 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
3240 if !buf.is_empty() {
3241 out.write_all(&buf)?;
3242 }
3243 }
3244 Ok(())
3245}
3246
3247fn process_bytes_chunk(
3252 data: &[u8],
3253 ranges: &[Range],
3254 complement: bool,
3255 output_delim: &[u8],
3256 line_delim: u8,
3257 buf: &mut Vec<u8>,
3258) {
3259 buf.reserve(data.len());
3260 let base = data.as_ptr();
3261 let mut start = 0;
3262 for end_pos in memchr_iter(line_delim, data) {
3263 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
3264 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3265 unsafe { buf_push(buf, line_delim) };
3266 start = end_pos + 1;
3267 }
3268 if start < data.len() {
3269 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
3270 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3271 unsafe { buf_push(buf, line_delim) };
3272 }
3273}
3274
3275#[inline(always)]
3279fn cut_bytes_to_buf(
3280 line: &[u8],
3281 ranges: &[Range],
3282 complement: bool,
3283 output_delim: &[u8],
3284 buf: &mut Vec<u8>,
3285) {
3286 let len = line.len();
3287 let base = line.as_ptr();
3288 let mut first_range = true;
3289
3290 let needed = len + output_delim.len() * ranges.len() + 1;
3292 if buf.capacity() - buf.len() < needed {
3293 buf.reserve(needed);
3294 }
3295
3296 if complement {
3297 let mut pos: usize = 1;
3298 for r in ranges {
3299 let rs = r.start;
3300 let re = r.end.min(len);
3301 if pos < rs {
3302 if !first_range && !output_delim.is_empty() {
3303 unsafe { buf_extend(buf, output_delim) };
3304 }
3305 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
3306 first_range = false;
3307 }
3308 pos = re + 1;
3309 if pos > len {
3310 break;
3311 }
3312 }
3313 if pos <= len {
3314 if !first_range && !output_delim.is_empty() {
3315 unsafe { buf_extend(buf, output_delim) };
3316 }
3317 unsafe {
3318 buf_extend(
3319 buf,
3320 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3321 )
3322 };
3323 }
3324 } else if output_delim.is_empty() && ranges.len() == 1 {
3325 let start = ranges[0].start.saturating_sub(1);
3327 let end = ranges[0].end.min(len);
3328 if start < len {
3329 unsafe {
3330 buf_extend(
3331 buf,
3332 std::slice::from_raw_parts(base.add(start), end - start),
3333 )
3334 };
3335 }
3336 } else {
3337 for r in ranges {
3338 let start = r.start.saturating_sub(1);
3339 let end = r.end.min(len);
3340 if start >= len {
3341 break;
3342 }
3343 if !first_range && !output_delim.is_empty() {
3344 unsafe { buf_extend(buf, output_delim) };
3345 }
3346 unsafe {
3347 buf_extend(
3348 buf,
3349 std::slice::from_raw_parts(base.add(start), end - start),
3350 )
3351 };
3352 first_range = false;
3353 }
3354 }
3355}
3356
3357#[inline]
3361pub fn cut_fields(
3362 line: &[u8],
3363 delim: u8,
3364 ranges: &[Range],
3365 complement: bool,
3366 output_delim: &[u8],
3367 suppress_no_delim: bool,
3368 out: &mut impl Write,
3369) -> io::Result<bool> {
3370 if memchr::memchr(delim, line).is_none() {
3371 if !suppress_no_delim {
3372 out.write_all(line)?;
3373 return Ok(true);
3374 }
3375 return Ok(false);
3376 }
3377
3378 let mut field_num: usize = 1;
3379 let mut field_start: usize = 0;
3380 let mut first_output = true;
3381
3382 for delim_pos in memchr_iter(delim, line) {
3383 let selected = in_ranges(ranges, field_num) != complement;
3384 if selected {
3385 if !first_output {
3386 out.write_all(output_delim)?;
3387 }
3388 out.write_all(&line[field_start..delim_pos])?;
3389 first_output = false;
3390 }
3391 field_start = delim_pos + 1;
3392 field_num += 1;
3393 }
3394
3395 let selected = in_ranges(ranges, field_num) != complement;
3396 if selected {
3397 if !first_output {
3398 out.write_all(output_delim)?;
3399 }
3400 out.write_all(&line[field_start..])?;
3401 }
3402
3403 Ok(true)
3404}
3405
3406#[inline]
3408pub fn cut_bytes(
3409 line: &[u8],
3410 ranges: &[Range],
3411 complement: bool,
3412 output_delim: &[u8],
3413 out: &mut impl Write,
3414) -> io::Result<bool> {
3415 let mut first_range = true;
3416
3417 if complement {
3418 let len = line.len();
3419 let mut comp_ranges = Vec::new();
3420 let mut pos: usize = 1;
3421 for r in ranges {
3422 let rs = r.start;
3423 let re = r.end.min(len);
3424 if pos < rs {
3425 comp_ranges.push((pos, rs - 1));
3426 }
3427 pos = re + 1;
3428 if pos > len {
3429 break;
3430 }
3431 }
3432 if pos <= len {
3433 comp_ranges.push((pos, len));
3434 }
3435 for &(s, e) in &comp_ranges {
3436 if !first_range && !output_delim.is_empty() {
3437 out.write_all(output_delim)?;
3438 }
3439 out.write_all(&line[s - 1..e])?;
3440 first_range = false;
3441 }
3442 } else {
3443 for r in ranges {
3444 let start = r.start.saturating_sub(1);
3445 let end = r.end.min(line.len());
3446 if start >= line.len() {
3447 break;
3448 }
3449 if !first_range && !output_delim.is_empty() {
3450 out.write_all(output_delim)?;
3451 }
3452 out.write_all(&line[start..end])?;
3453 first_range = false;
3454 }
3455 }
3456 Ok(true)
3457}
3458
3459pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3467 let len = data.len();
3468 let mut wp: usize = 0;
3469 let mut rp: usize = 0;
3470
3471 while rp < len {
3472 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3473 None => {
3474 if suppress {
3476 break;
3478 }
3479 let remaining = len - rp;
3480 if wp != rp {
3481 data.copy_within(rp..len, wp);
3482 }
3483 wp += remaining;
3484 break;
3485 }
3486 Some(offset) => {
3487 let actual = rp + offset;
3488 if data[actual] == line_delim {
3489 if suppress {
3491 rp = actual + 1;
3493 } else {
3494 let chunk_len = actual + 1 - rp;
3496 if wp != rp {
3497 data.copy_within(rp..actual + 1, wp);
3498 }
3499 wp += chunk_len;
3500 rp = actual + 1;
3501 }
3502 } else {
3503 let field_len = actual - rp;
3505 if wp != rp && field_len > 0 {
3506 data.copy_within(rp..actual, wp);
3507 }
3508 wp += field_len;
3509 data[wp] = line_delim;
3510 wp += 1;
3511 match memchr::memchr(line_delim, &data[actual + 1..]) {
3513 None => {
3514 rp = len;
3515 }
3516 Some(nl_off) => {
3517 rp = actual + 1 + nl_off + 1;
3518 }
3519 }
3520 }
3521 }
3522 }
3523 }
3524 wp
3525}
3526
3527pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3529 match cfg.mode {
3530 CutMode::Fields => process_fields_fast(data, cfg, out),
3531 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3532 }
3533}
3534
3535pub fn process_cut_reader<R: BufRead>(
3540 mut reader: R,
3541 cfg: &CutConfig,
3542 out: &mut impl Write,
3543) -> io::Result<()> {
3544 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3546
3547 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3550
3551 loop {
3552 buf.reserve(CHUNK_SIZE);
3554 let read_start = buf.len();
3555 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3556 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3557 buf.truncate(read_start + n);
3558
3559 if buf.is_empty() {
3560 break;
3561 }
3562
3563 if n == 0 {
3564 process_cut_data(&buf, cfg, out)?;
3566 break;
3567 }
3568
3569 let process_end = match memchr::memrchr(line_delim, &buf) {
3571 Some(pos) => pos + 1,
3572 None => {
3573 continue;
3575 }
3576 };
3577
3578 process_cut_data(&buf[..process_end], cfg, out)?;
3580
3581 let leftover_len = buf.len() - process_end;
3583 if leftover_len > 0 {
3584 buf.copy_within(process_end.., 0);
3585 }
3586 buf.truncate(leftover_len);
3587 }
3588
3589 Ok(())
3590}
3591
3592#[inline]
3594fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3595 let n = reader.read(buf)?;
3596 if n == buf.len() || n == 0 {
3597 return Ok(n);
3598 }
3599 let mut total = n;
3601 while total < buf.len() {
3602 match reader.read(&mut buf[total..]) {
3603 Ok(0) => break,
3604 Ok(n) => total += n,
3605 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3606 Err(e) => return Err(e),
3607 }
3608 }
3609 Ok(total)
3610}
3611
3612pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3624 if cfg.complement {
3625 return None;
3626 }
3627 if data.is_empty() || data[data.len() - 1] != cfg.line_delim {
3631 return None;
3632 }
3633
3634 match cfg.mode {
3635 CutMode::Fields => {
3636 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3638 return None;
3639 }
3640 if cfg.delim == cfg.line_delim {
3641 return None;
3642 }
3643 Some(cut_fields_inplace_general(
3644 data,
3645 cfg.delim,
3646 cfg.line_delim,
3647 cfg.ranges,
3648 cfg.suppress_no_delim,
3649 ))
3650 }
3651 CutMode::Bytes | CutMode::Characters => {
3652 if !cfg.output_delim.is_empty() {
3653 return None;
3654 }
3655 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3656 }
3657 }
3658}
3659
3660fn cut_fields_inplace_general(
3663 data: &mut [u8],
3664 delim: u8,
3665 line_delim: u8,
3666 ranges: &[Range],
3667 suppress: bool,
3668) -> usize {
3669 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3671 return cut_field1_inplace(data, delim, line_delim, suppress);
3672 }
3673
3674 let len = data.len();
3675 if len == 0 {
3676 return 0;
3677 }
3678
3679 let max_field = ranges.last().map_or(0, |r| r.end);
3680 let max_delims = max_field.min(64);
3681 let mut wp: usize = 0;
3682 let mut rp: usize = 0;
3683
3684 while rp < len {
3685 let line_end = memchr::memchr(line_delim, &data[rp..])
3686 .map(|p| rp + p)
3687 .unwrap_or(len);
3688 let line_len = line_end - rp;
3689
3690 let mut delim_pos = [0usize; 64];
3692 let mut num_delims: usize = 0;
3693
3694 for pos in memchr_iter(delim, &data[rp..line_end]) {
3695 if num_delims < max_delims {
3696 delim_pos[num_delims] = pos;
3697 num_delims += 1;
3698 if num_delims >= max_delims {
3699 break;
3700 }
3701 }
3702 }
3703
3704 if num_delims == 0 {
3705 if !suppress {
3707 if wp != rp {
3708 data.copy_within(rp..line_end, wp);
3709 }
3710 wp += line_len;
3711 if line_end < len {
3712 data[wp] = line_delim;
3713 wp += 1;
3714 }
3715 }
3716 } else {
3717 let total_fields = num_delims + 1;
3718 let mut first_output = true;
3719
3720 for r in ranges {
3721 let range_start = r.start;
3722 let range_end = r.end.min(total_fields);
3723 if range_start > total_fields {
3724 break;
3725 }
3726 for field_num in range_start..=range_end {
3727 if field_num > total_fields {
3728 break;
3729 }
3730
3731 let field_start = if field_num == 1 {
3732 0
3733 } else if field_num - 2 < num_delims {
3734 delim_pos[field_num - 2] + 1
3735 } else {
3736 continue;
3737 };
3738 let field_end = if field_num <= num_delims {
3739 delim_pos[field_num - 1]
3740 } else {
3741 line_len
3742 };
3743
3744 if !first_output {
3745 data[wp] = delim;
3746 wp += 1;
3747 }
3748 let flen = field_end - field_start;
3749 if flen > 0 {
3750 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3751 wp += flen;
3752 }
3753 first_output = false;
3754 }
3755 }
3756
3757 if !first_output && line_end < len {
3758 data[wp] = line_delim;
3759 wp += 1;
3760 } else if first_output && line_end < len {
3761 data[wp] = line_delim;
3763 wp += 1;
3764 }
3765 }
3766
3767 rp = if line_end < len { line_end + 1 } else { len };
3768 }
3769
3770 wp
3771}
3772
3773fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3775 let len = data.len();
3776 if len == 0 {
3777 return 0;
3778 }
3779
3780 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3782 return len;
3783 }
3784
3785 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3787 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3788 }
3789
3790 let mut wp: usize = 0;
3791 let mut rp: usize = 0;
3792
3793 while rp < len {
3794 let line_end = memchr::memchr(line_delim, &data[rp..])
3795 .map(|p| rp + p)
3796 .unwrap_or(len);
3797 let line_len = line_end - rp;
3798
3799 for r in ranges {
3800 let start = r.start.saturating_sub(1);
3801 let end = r.end.min(line_len);
3802 if start >= line_len {
3803 break;
3804 }
3805 let flen = end - start;
3806 if flen > 0 {
3807 data.copy_within(rp + start..rp + start + flen, wp);
3808 wp += flen;
3809 }
3810 }
3811
3812 if line_end < len {
3813 data[wp] = line_delim;
3814 wp += 1;
3815 }
3816
3817 rp = if line_end < len { line_end + 1 } else { len };
3818 }
3819
3820 wp
3821}
3822
3823fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3825 let len = data.len();
3826
3827 let mut all_fit = true;
3829 let mut start = 0;
3830 for pos in memchr_iter(line_delim, data) {
3831 if pos - start > max_bytes {
3832 all_fit = false;
3833 break;
3834 }
3835 start = pos + 1;
3836 }
3837 if all_fit && start < len && len - start > max_bytes {
3838 all_fit = false;
3839 }
3840 if all_fit {
3841 return len;
3842 }
3843
3844 let mut wp: usize = 0;
3846 let mut rp: usize = 0;
3847
3848 while rp < len {
3849 let line_end = memchr::memchr(line_delim, &data[rp..])
3850 .map(|p| rp + p)
3851 .unwrap_or(len);
3852 let line_len = line_end - rp;
3853
3854 let take = line_len.min(max_bytes);
3855 if take > 0 && wp != rp {
3856 data.copy_within(rp..rp + take, wp);
3857 }
3858 wp += take;
3859
3860 if line_end < len {
3861 data[wp] = line_delim;
3862 wp += 1;
3863 }
3864
3865 rp = if line_end < len { line_end + 1 } else { len };
3866 }
3867
3868 wp
3869}
3870
3871#[derive(Debug, Clone, Copy, PartialEq)]
3873pub enum CutMode {
3874 Bytes,
3875 Characters,
3876 Fields,
3877}