1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 32 * 1024 * 1024;
9
10const MAX_IOV: usize = 1024;
12
13const SEQ_CHUNK: usize = 256 * 1024;
16
17fn process_chunked(
20 data: &[u8],
21 line_delim: u8,
22 out: &mut impl Write,
23 mut process_fn: impl FnMut(&[u8], &mut Vec<u8>),
24) -> io::Result<()> {
25 let mut buf = Vec::with_capacity(SEQ_CHUNK * 2);
26 let mut start = 0;
27 while start < data.len() {
28 let end = if start + SEQ_CHUNK >= data.len() {
29 data.len()
30 } else {
31 match memchr::memrchr(line_delim, &data[start..start + SEQ_CHUNK]) {
32 Some(pos) => start + pos + 1,
33 None => (start + SEQ_CHUNK).min(data.len()),
34 }
35 };
36 buf.clear();
37 process_fn(&data[start..end], &mut buf);
38 if !buf.is_empty() {
39 out.write_all(&buf)?;
40 }
41 start = end;
42 }
43 Ok(())
44}
45
46pub struct CutConfig<'a> {
48 pub mode: CutMode,
49 pub ranges: &'a [Range],
50 pub complement: bool,
51 pub delim: u8,
52 pub output_delim: &'a [u8],
53 pub suppress_no_delim: bool,
54 pub line_delim: u8,
55}
56
57#[derive(Debug, Clone)]
59pub struct Range {
60 pub start: usize, pub end: usize, }
63
64pub fn parse_ranges(spec: &str, no_merge_adjacent: bool) -> Result<Vec<Range>, String> {
71 let mut ranges = Vec::new();
72
73 for part in spec.split(',') {
74 let part = part.trim();
75 if part.is_empty() {
76 continue;
77 }
78
79 if let Some(idx) = part.find('-') {
80 let left = &part[..idx];
81 let right = &part[idx + 1..];
82
83 if left.is_empty() && right.is_empty() {
85 return Err("invalid range with no endpoint: -".to_string());
86 }
87
88 let start = if left.is_empty() {
89 1
90 } else {
91 left.parse::<usize>()
92 .map_err(|_| format!("invalid range: '{}'", part))?
93 };
94
95 let end = if right.is_empty() {
96 usize::MAX
97 } else {
98 right
99 .parse::<usize>()
100 .map_err(|_| format!("invalid range: '{}'", part))?
101 };
102
103 if start == 0 {
104 return Err("fields and positions are numbered from 1".to_string());
105 }
106 if start > end {
107 return Err(format!("invalid decreasing range: '{}'", part));
108 }
109
110 ranges.push(Range { start, end });
111 } else {
112 let n = part
113 .parse::<usize>()
114 .map_err(|_| format!("invalid field: '{}'", part))?;
115 if n == 0 {
116 return Err("fields and positions are numbered from 1".to_string());
117 }
118 ranges.push(Range { start: n, end: n });
119 }
120 }
121
122 if ranges.is_empty() {
123 return Err("you must specify a list of bytes, characters, or fields".to_string());
124 }
125
126 ranges.sort_by_key(|r| (r.start, r.end));
128 let mut merged = vec![ranges[0].clone()];
129 for r in &ranges[1..] {
130 let last = merged.last_mut().unwrap();
131 if no_merge_adjacent {
132 if r.start <= last.end {
134 last.end = last.end.max(r.end);
135 } else {
136 merged.push(r.clone());
137 }
138 } else {
139 if r.start <= last.end.saturating_add(1) {
141 last.end = last.end.max(r.end);
142 } else {
143 merged.push(r.clone());
144 }
145 }
146 }
147
148 Ok(merged)
149}
150
151#[inline(always)]
154fn in_ranges(ranges: &[Range], pos: usize) -> bool {
155 for r in ranges {
156 if pos < r.start {
157 return false;
158 }
159 if pos <= r.end {
160 return true;
161 }
162 }
163 false
164}
165
166#[inline]
169fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
170 let mut mask: u64 = 0;
171 for i in 1..=64u32 {
172 let in_range = in_ranges(ranges, i as usize);
173 if in_range != complement {
174 mask |= 1u64 << (i - 1);
175 }
176 }
177 mask
178}
179
180#[inline(always)]
182fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
183 if field_num <= 64 {
184 (mask >> (field_num - 1)) & 1 == 1
185 } else {
186 in_ranges(ranges, field_num) != complement
187 }
188}
189
190#[inline(always)]
195unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
196 unsafe {
197 let len = buf.len();
198 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
199 buf.set_len(len + data.len());
200 }
201}
202
203#[inline(always)]
206unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
207 unsafe {
208 let len = buf.len();
209 *buf.as_mut_ptr().add(len) = b;
210 buf.set_len(len + 1);
211 }
212}
213
214#[inline]
218fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
219 if slices.is_empty() {
220 return Ok(());
221 }
222 for batch in slices.chunks(MAX_IOV) {
223 let total: usize = batch.iter().map(|s| s.len()).sum();
224 let written = out.write_vectored(batch)?;
225 if written >= total {
226 continue;
227 }
228 if written == 0 {
229 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
230 }
231 write_ioslices_slow(out, batch, written)?;
232 }
233 Ok(())
234}
235
236#[cold]
238#[inline(never)]
239fn write_ioslices_slow(
240 out: &mut impl Write,
241 slices: &[IoSlice],
242 mut skip: usize,
243) -> io::Result<()> {
244 for slice in slices {
245 let len = slice.len();
246 if skip >= len {
247 skip -= len;
248 continue;
249 }
250 out.write_all(&slice[skip..])?;
251 skip = 0;
252 }
253 Ok(())
254}
255
256#[inline]
262fn num_cpus() -> usize {
263 std::thread::available_parallelism()
264 .map(|n| n.get())
265 .unwrap_or(1)
266}
267
268fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
271 let num_threads = num_cpus().max(1);
272 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
273 return vec![data];
274 }
275
276 let chunk_size = data.len() / num_threads;
277 let mut chunks = Vec::with_capacity(num_threads);
278 let mut pos = 0;
279
280 for _ in 0..num_threads - 1 {
281 let target = pos + chunk_size;
282 if target >= data.len() {
283 break;
284 }
285 let boundary = memchr::memchr(line_delim, &data[target..])
286 .map(|p| target + p + 1)
287 .unwrap_or(data.len());
288 if boundary > pos {
289 chunks.push(&data[pos..boundary]);
290 }
291 pos = boundary;
292 }
293
294 if pos < data.len() {
295 chunks.push(&data[pos..]);
296 }
297
298 chunks
299}
300
301fn process_fields_multi_select(
308 data: &[u8],
309 delim: u8,
310 line_delim: u8,
311 ranges: &[Range],
312 suppress: bool,
313 out: &mut impl Write,
314) -> io::Result<()> {
315 let max_field = ranges.last().map_or(0, |r| r.end);
316
317 if data.len() >= PARALLEL_THRESHOLD {
318 let chunks = split_for_scope(data, line_delim);
319 let n = chunks.len();
320 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
321 rayon::scope(|s| {
322 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
323 s.spawn(move |_| {
324 result.reserve(chunk.len() * 3 / 4);
325 multi_select_chunk(
326 chunk, delim, line_delim, ranges, max_field, suppress, result,
327 );
328 });
329 }
330 });
331 let slices: Vec<IoSlice> = results
332 .iter()
333 .filter(|r| !r.is_empty())
334 .map(|r| IoSlice::new(r))
335 .collect();
336 write_ioslices(out, &slices)?;
337 } else {
338 process_chunked(data, line_delim, out, |chunk, buf| {
339 multi_select_chunk(chunk, delim, line_delim, ranges, max_field, suppress, buf);
340 })?;
341 }
342 Ok(())
343}
344
345fn multi_select_chunk(
349 data: &[u8],
350 delim: u8,
351 line_delim: u8,
352 ranges: &[Range],
353 max_field: usize,
354 suppress: bool,
355 buf: &mut Vec<u8>,
356) {
357 if max_field <= 64 && delim != line_delim {
361 let mut mask: u64 = 0;
362 for r in ranges {
363 let s = r.start.max(1);
364 let e = r.end.min(64);
365 for f in s..=e {
366 mask |= 1u64 << (f - 1);
367 }
368 }
369 multi_select_chunk_bitmask(data, delim, line_delim, mask, max_field, suppress, buf);
370 return;
371 }
372
373 buf.reserve(data.len());
375 let base = data.as_ptr();
376 let mut start = 0;
377 let max_delims = max_field.min(128);
378
379 for end_pos in memchr_iter(line_delim, data) {
380 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
381 multi_select_line_fast(
382 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
383 );
384 start = end_pos + 1;
385 }
386 if start < data.len() {
387 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
388 multi_select_line_fast(
389 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
390 );
391 }
392}
393
394fn multi_select_chunk_bitmask(
398 data: &[u8],
399 delim: u8,
400 line_delim: u8,
401 mask: u64,
402 max_field: usize,
403 suppress: bool,
404 buf: &mut Vec<u8>,
405) {
406 buf.reserve(data.len() + 1);
410 let initial_len = buf.len();
411 let out_base = unsafe { buf.as_mut_ptr().add(initial_len) };
412 let src = data.as_ptr();
413 let mut wp: usize = 0;
414
415 let mut field_num: usize = 1; let mut field_start: usize = 0; let mut first_output = true; let mut has_delim = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
421 if data[pos] == line_delim {
422 if !has_delim {
424 if !suppress {
426 let len = pos - field_start;
427 unsafe {
428 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
429 }
430 wp += len;
431 unsafe {
432 *out_base.add(wp) = line_delim;
433 }
434 wp += 1;
435 }
436 } else {
437 if field_num <= 64 && (mask & (1u64 << (field_num - 1))) != 0 {
439 if !first_output {
440 unsafe {
441 *out_base.add(wp) = delim;
442 }
443 wp += 1;
444 }
445 let len = pos - field_start;
446 unsafe {
447 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
448 }
449 wp += len;
450 }
451 unsafe {
452 *out_base.add(wp) = line_delim;
453 }
454 wp += 1;
455 }
456 field_num = 1;
458 field_start = pos + 1;
459 first_output = true;
460 has_delim = false;
461 } else {
462 has_delim = true;
464 if field_num <= max_field && (mask & (1u64 << (field_num - 1))) != 0 {
465 if !first_output {
466 unsafe {
467 *out_base.add(wp) = delim;
468 }
469 wp += 1;
470 }
471 let len = pos - field_start;
472 unsafe {
473 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
474 }
475 wp += len;
476 first_output = false;
477 }
478 field_num += 1;
479 field_start = pos + 1;
480 }
481 }
482
483 if field_start < data.len() {
485 if !has_delim {
486 if !suppress {
487 let len = data.len() - field_start;
488 unsafe {
489 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
490 }
491 wp += len;
492 unsafe {
493 *out_base.add(wp) = line_delim;
494 }
495 wp += 1;
496 }
497 } else {
498 if field_num <= 64 && (mask & (1u64 << (field_num - 1))) != 0 {
499 if !first_output {
500 unsafe {
501 *out_base.add(wp) = delim;
502 }
503 wp += 1;
504 }
505 let len = data.len() - field_start;
506 unsafe {
507 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
508 }
509 wp += len;
510 }
511 unsafe {
512 *out_base.add(wp) = line_delim;
513 }
514 wp += 1;
515 }
516 }
517
518 unsafe {
519 buf.set_len(initial_len + wp);
520 }
521}
522
523#[inline(always)]
527fn multi_select_line_fast(
528 line: &[u8],
529 delim: u8,
530 line_delim: u8,
531 ranges: &[Range],
532 max_delims: usize,
533 suppress: bool,
534 buf: &mut Vec<u8>,
535 _line_abs_start: usize,
536 _data_base: *const u8,
537) {
538 let len = line.len();
539 if len == 0 {
540 if !suppress {
541 unsafe { buf_push(buf, line_delim) };
542 }
543 return;
544 }
545
546 let base = line.as_ptr();
547
548 let mut delim_pos = [0usize; 128];
550 let mut num_delims: usize = 0;
551
552 for pos in memchr_iter(delim, line) {
553 if num_delims < max_delims {
554 delim_pos[num_delims] = pos;
555 num_delims += 1;
556 if num_delims >= max_delims {
557 break;
558 }
559 }
560 }
561
562 if num_delims == 0 {
563 if !suppress {
564 unsafe {
565 buf_extend(buf, line);
566 buf_push(buf, line_delim);
567 }
568 }
569 return;
570 }
571
572 let total_fields = num_delims + 1;
573 let mut first_output = true;
574
575 for r in ranges {
576 let range_start = r.start;
577 let range_end = r.end.min(total_fields);
578 if range_start > total_fields {
579 break;
580 }
581 for field_num in range_start..=range_end {
582 if field_num > total_fields {
583 break;
584 }
585
586 let field_start = if field_num == 1 {
587 0
588 } else if field_num - 2 < num_delims {
589 delim_pos[field_num - 2] + 1
590 } else {
591 continue;
592 };
593 let field_end = if field_num <= num_delims {
594 delim_pos[field_num - 1]
595 } else {
596 len
597 };
598
599 if !first_output {
600 unsafe { buf_push(buf, delim) };
601 }
602 unsafe {
603 buf_extend(
604 buf,
605 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
606 );
607 }
608 first_output = false;
609 }
610 }
611
612 unsafe { buf_push(buf, line_delim) };
613}
614
615fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
619 let delim = cfg.delim;
620 let line_delim = cfg.line_delim;
621 let ranges = cfg.ranges;
622 let complement = cfg.complement;
623 let output_delim = cfg.output_delim;
624 let suppress = cfg.suppress_no_delim;
625
626 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
634 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
635 }
636
637 if complement
639 && ranges.len() == 1
640 && output_delim.len() == 1
641 && output_delim[0] == delim
642 && ranges[0].start == ranges[0].end
643 {
644 return process_complement_single_field(
645 data,
646 delim,
647 line_delim,
648 ranges[0].start,
649 suppress,
650 out,
651 );
652 }
653
654 if complement
657 && ranges.len() == 1
658 && ranges[0].start > 1
659 && ranges[0].end < usize::MAX
660 && output_delim.len() == 1
661 && output_delim[0] == delim
662 {
663 return process_complement_range(
664 data,
665 delim,
666 line_delim,
667 ranges[0].start,
668 ranges[0].end,
669 suppress,
670 out,
671 );
672 }
673
674 if !complement
676 && ranges.len() == 1
677 && ranges[0].start == 1
678 && output_delim.len() == 1
679 && output_delim[0] == delim
680 && ranges[0].end < usize::MAX
681 {
682 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
683 }
684
685 if !complement
687 && ranges.len() == 1
688 && ranges[0].end == usize::MAX
689 && ranges[0].start > 1
690 && output_delim.len() == 1
691 && output_delim[0] == delim
692 {
693 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
694 }
695
696 if !complement
698 && ranges.len() == 1
699 && ranges[0].start > 1
700 && ranges[0].end < usize::MAX
701 && output_delim.len() == 1
702 && output_delim[0] == delim
703 {
704 return process_fields_mid_range(
705 data,
706 delim,
707 line_delim,
708 ranges[0].start,
709 ranges[0].end,
710 suppress,
711 out,
712 );
713 }
714
715 if !complement
721 && ranges.len() > 1
722 && ranges.last().map_or(false, |r| r.end < usize::MAX)
723 && output_delim.len() == 1
724 && output_delim[0] == delim
725 && delim != line_delim
726 {
727 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
728 }
729
730 let max_field = if complement {
732 usize::MAX
733 } else {
734 ranges.last().map(|r| r.end).unwrap_or(0)
735 };
736 let field_mask = compute_field_mask(ranges, complement);
737
738 if data.len() >= PARALLEL_THRESHOLD {
739 let chunks = split_for_scope(data, line_delim);
740 let n = chunks.len();
741 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
742 rayon::scope(|s| {
743 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
744 s.spawn(move |_| {
745 result.reserve(chunk.len() + 1);
746 process_fields_chunk(
747 chunk,
748 delim,
749 ranges,
750 output_delim,
751 suppress,
752 max_field,
753 field_mask,
754 line_delim,
755 complement,
756 result,
757 );
758 });
759 }
760 });
761 let slices: Vec<IoSlice> = results
762 .iter()
763 .filter(|r| !r.is_empty())
764 .map(|r| IoSlice::new(r))
765 .collect();
766 write_ioslices(out, &slices)?;
767 } else {
768 process_chunked(data, line_delim, out, |chunk, buf| {
769 process_fields_chunk(
770 chunk,
771 delim,
772 ranges,
773 output_delim,
774 suppress,
775 max_field,
776 field_mask,
777 line_delim,
778 complement,
779 buf,
780 );
781 })?;
782 }
783 Ok(())
784}
785
786fn process_fields_chunk(
791 data: &[u8],
792 delim: u8,
793 ranges: &[Range],
794 output_delim: &[u8],
795 suppress: bool,
796 max_field: usize,
797 field_mask: u64,
798 line_delim: u8,
799 complement: bool,
800 buf: &mut Vec<u8>,
801) {
802 if delim != line_delim {
807 buf.reserve(data.len());
808 let mut start = 0;
809 for end_pos in memchr_iter(line_delim, data) {
810 let line = &data[start..end_pos];
811 extract_fields_to_buf(
812 line,
813 delim,
814 ranges,
815 output_delim,
816 suppress,
817 max_field,
818 field_mask,
819 line_delim,
820 buf,
821 complement,
822 );
823 start = end_pos + 1;
824 }
825 if start < data.len() {
826 extract_fields_to_buf(
827 &data[start..],
828 delim,
829 ranges,
830 output_delim,
831 suppress,
832 max_field,
833 field_mask,
834 line_delim,
835 buf,
836 complement,
837 );
838 }
839 return;
840 }
841
842 let mut start = 0;
844 for end_pos in memchr_iter(line_delim, data) {
845 let line = &data[start..end_pos];
846 extract_fields_to_buf(
847 line,
848 delim,
849 ranges,
850 output_delim,
851 suppress,
852 max_field,
853 field_mask,
854 line_delim,
855 buf,
856 complement,
857 );
858 start = end_pos + 1;
859 }
860 if start < data.len() {
861 extract_fields_to_buf(
862 &data[start..],
863 delim,
864 ranges,
865 output_delim,
866 suppress,
867 max_field,
868 field_mask,
869 line_delim,
870 buf,
871 complement,
872 );
873 }
874}
875
876fn process_single_field(
882 data: &[u8],
883 delim: u8,
884 line_delim: u8,
885 target: usize,
886 suppress: bool,
887 out: &mut impl Write,
888) -> io::Result<()> {
889 let target_idx = target - 1;
890
891 const FIELD_PARALLEL_MIN: usize = 16 * 1024 * 1024;
893
894 if delim != line_delim {
895 if target_idx == 0 && !suppress {
899 if data.len() >= FIELD_PARALLEL_MIN {
900 return single_field1_parallel(data, delim, line_delim, out);
901 }
902 return process_chunked(data, line_delim, out, |chunk, buf| {
903 single_field1_to_buf(chunk, delim, line_delim, buf);
904 });
905 }
906
907 if data.len() >= FIELD_PARALLEL_MIN {
911 let chunks = split_for_scope(data, line_delim);
912 let n = chunks.len();
913 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
914 rayon::scope(|s| {
915 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
916 s.spawn(move |_| {
917 result.reserve(chunk.len() / 2);
918 process_single_field_chunk(
919 chunk, delim, target_idx, line_delim, suppress, result,
920 );
921 });
922 }
923 });
924 let slices: Vec<IoSlice> = results
925 .iter()
926 .filter(|r| !r.is_empty())
927 .map(|r| IoSlice::new(r))
928 .collect();
929 write_ioslices(out, &slices)?;
930 } else {
931 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
932 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
933 if !buf.is_empty() {
934 out.write_all(&buf)?;
935 }
936 }
937 return Ok(());
938 }
939
940 if data.len() >= FIELD_PARALLEL_MIN {
942 let chunks = split_for_scope(data, line_delim);
943 let n = chunks.len();
944 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
945 rayon::scope(|s| {
946 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
947 s.spawn(move |_| {
948 result.reserve(chunk.len() / 4);
949 process_single_field_chunk(
950 chunk, delim, target_idx, line_delim, suppress, result,
951 );
952 });
953 }
954 });
955 let slices: Vec<IoSlice> = results
956 .iter()
957 .filter(|r| !r.is_empty())
958 .map(|r| IoSlice::new(r))
959 .collect();
960 write_ioslices(out, &slices)?;
961 } else {
962 let mut buf = Vec::with_capacity(data.len() / 4);
963 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
964 if !buf.is_empty() {
965 out.write_all(&buf)?;
966 }
967 }
968 Ok(())
969}
970
971fn process_complement_range(
974 data: &[u8],
975 delim: u8,
976 line_delim: u8,
977 skip_start: usize,
978 skip_end: usize,
979 suppress: bool,
980 out: &mut impl Write,
981) -> io::Result<()> {
982 if data.len() >= PARALLEL_THRESHOLD {
983 let chunks = split_for_scope(data, line_delim);
984 let n = chunks.len();
985 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
986 rayon::scope(|s| {
987 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
988 s.spawn(move |_| {
989 result.reserve(chunk.len());
990 complement_range_chunk(
991 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
992 );
993 });
994 }
995 });
996 let slices: Vec<IoSlice> = results
997 .iter()
998 .filter(|r| !r.is_empty())
999 .map(|r| IoSlice::new(r))
1000 .collect();
1001 write_ioslices(out, &slices)?;
1002 } else {
1003 process_chunked(data, line_delim, out, |chunk, buf| {
1004 complement_range_chunk(
1005 chunk, delim, skip_start, skip_end, line_delim, suppress, buf,
1006 );
1007 })?;
1008 }
1009 Ok(())
1010}
1011
1012fn complement_range_chunk(
1014 data: &[u8],
1015 delim: u8,
1016 skip_start: usize,
1017 skip_end: usize,
1018 line_delim: u8,
1019 suppress: bool,
1020 buf: &mut Vec<u8>,
1021) {
1022 buf.reserve(data.len());
1024 let mut start = 0;
1025 for end_pos in memchr_iter(line_delim, data) {
1026 let line = &data[start..end_pos];
1027 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1028 start = end_pos + 1;
1029 }
1030 if start < data.len() {
1031 complement_range_line(
1032 &data[start..],
1033 delim,
1034 skip_start,
1035 skip_end,
1036 line_delim,
1037 suppress,
1038 buf,
1039 );
1040 }
1041}
1042
1043#[inline(always)]
1050fn complement_range_line(
1051 line: &[u8],
1052 delim: u8,
1053 skip_start: usize,
1054 skip_end: usize,
1055 line_delim: u8,
1056 suppress: bool,
1057 buf: &mut Vec<u8>,
1058) {
1059 let len = line.len();
1060 if len == 0 {
1061 if !suppress {
1062 unsafe { buf_push(buf, line_delim) };
1063 }
1064 return;
1065 }
1066
1067 let base = line.as_ptr();
1069
1070 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1080
1081 let mut delim_count: usize = 0;
1083 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1087 delim_count += 1;
1088 if delim_count == need_prefix_delims {
1089 prefix_end_pos = pos;
1090 }
1091 if delim_count == total_need {
1092 suffix_start_pos = pos + 1;
1093 break;
1094 }
1095 }
1096
1097 if delim_count == 0 {
1098 if !suppress {
1100 unsafe {
1101 buf_extend(buf, line);
1102 buf_push(buf, line_delim);
1103 }
1104 }
1105 return;
1106 }
1107
1108 if delim_count < need_prefix_delims {
1114 unsafe {
1116 buf_extend(buf, line);
1117 buf_push(buf, line_delim);
1118 }
1119 return;
1120 }
1121
1122 let has_prefix = need_prefix_delims > 0;
1123 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1124
1125 if has_prefix && has_suffix {
1126 unsafe {
1128 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1129 buf_push(buf, delim);
1130 buf_extend(
1131 buf,
1132 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1133 );
1134 buf_push(buf, line_delim);
1135 }
1136 } else if has_prefix {
1137 unsafe {
1139 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1140 buf_push(buf, line_delim);
1141 }
1142 } else if has_suffix {
1143 unsafe {
1145 buf_extend(
1146 buf,
1147 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1148 );
1149 buf_push(buf, line_delim);
1150 }
1151 } else {
1152 unsafe { buf_push(buf, line_delim) };
1154 }
1155}
1156
1157fn process_complement_single_field(
1159 data: &[u8],
1160 delim: u8,
1161 line_delim: u8,
1162 skip_field: usize,
1163 suppress: bool,
1164 out: &mut impl Write,
1165) -> io::Result<()> {
1166 let skip_idx = skip_field - 1;
1167
1168 if data.len() >= PARALLEL_THRESHOLD {
1169 let chunks = split_for_scope(data, line_delim);
1170 let n = chunks.len();
1171 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1172 rayon::scope(|s| {
1173 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1174 s.spawn(move |_| {
1175 result.reserve(chunk.len());
1176 complement_single_field_chunk(
1177 chunk, delim, skip_idx, line_delim, suppress, result,
1178 );
1179 });
1180 }
1181 });
1182 let slices: Vec<IoSlice> = results
1183 .iter()
1184 .filter(|r| !r.is_empty())
1185 .map(|r| IoSlice::new(r))
1186 .collect();
1187 write_ioslices(out, &slices)?;
1188 } else {
1189 process_chunked(data, line_delim, out, |chunk, buf| {
1190 complement_single_field_chunk(chunk, delim, skip_idx, line_delim, suppress, buf);
1191 })?;
1192 }
1193 Ok(())
1194}
1195
1196fn complement_single_field_chunk(
1201 data: &[u8],
1202 delim: u8,
1203 skip_idx: usize,
1204 line_delim: u8,
1205 suppress: bool,
1206 buf: &mut Vec<u8>,
1207) {
1208 buf.reserve(data.len());
1209 let mut start = 0;
1210 for end_pos in memchr_iter(line_delim, data) {
1211 let line = &data[start..end_pos];
1212 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1213 start = end_pos + 1;
1214 }
1215 if start < data.len() {
1216 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
1217 }
1218}
1219
1220#[inline(always)]
1222fn complement_single_field_line(
1223 line: &[u8],
1224 delim: u8,
1225 skip_idx: usize,
1226 line_delim: u8,
1227 suppress: bool,
1228 buf: &mut Vec<u8>,
1229) {
1230 let len = line.len();
1231 if len == 0 {
1232 if !suppress {
1233 unsafe { buf_push(buf, line_delim) };
1234 }
1235 return;
1236 }
1237
1238 let base = line.as_ptr();
1239 let need_before = skip_idx;
1240 let need_total = skip_idx + 1;
1241
1242 let mut delim_count: usize = 0;
1243 let mut skip_start_pos: usize = 0;
1244 let mut skip_end_pos: usize = len;
1245 let mut found_end = false;
1246
1247 for pos in memchr_iter(delim, line) {
1248 delim_count += 1;
1249 if delim_count == need_before {
1250 skip_start_pos = pos + 1;
1251 }
1252 if delim_count == need_total {
1253 skip_end_pos = pos;
1254 found_end = true;
1255 break;
1256 }
1257 }
1258
1259 if delim_count == 0 {
1260 if !suppress {
1261 unsafe {
1262 buf_extend(buf, line);
1263 buf_push(buf, line_delim);
1264 }
1265 }
1266 return;
1267 }
1268
1269 if delim_count < need_before {
1270 unsafe {
1271 buf_extend(buf, line);
1272 buf_push(buf, line_delim);
1273 }
1274 return;
1275 }
1276
1277 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1278 let has_suffix = found_end && skip_end_pos < len;
1279
1280 if has_prefix && has_suffix {
1281 unsafe {
1282 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1283 buf_push(buf, delim);
1284 buf_extend(
1285 buf,
1286 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1287 );
1288 buf_push(buf, line_delim);
1289 }
1290 } else if has_prefix {
1291 unsafe {
1292 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1293 buf_push(buf, line_delim);
1294 }
1295 } else if has_suffix {
1296 unsafe {
1297 buf_extend(
1298 buf,
1299 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1300 );
1301 buf_push(buf, line_delim);
1302 }
1303 } else {
1304 unsafe { buf_push(buf, line_delim) };
1305 }
1306}
1307
1308fn process_fields_prefix(
1312 data: &[u8],
1313 delim: u8,
1314 line_delim: u8,
1315 last_field: usize,
1316 suppress: bool,
1317 out: &mut impl Write,
1318) -> io::Result<()> {
1319 if data.len() >= PARALLEL_THRESHOLD {
1320 let chunks = split_for_scope(data, line_delim);
1321 let n = chunks.len();
1322 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1323 rayon::scope(|s| {
1324 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1325 s.spawn(move |_| {
1326 result.reserve(chunk.len());
1327 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1328 });
1329 }
1330 });
1331 let slices: Vec<IoSlice> = results
1332 .iter()
1333 .filter(|r| !r.is_empty())
1334 .map(|r| IoSlice::new(r))
1335 .collect();
1336 write_ioslices(out, &slices)?;
1337 } else if !suppress {
1338 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1342 } else {
1343 process_chunked(data, line_delim, out, |chunk, buf| {
1344 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, buf);
1345 })?;
1346 }
1347 Ok(())
1348}
1349
1350#[inline]
1356fn fields_prefix_zerocopy(
1357 data: &[u8],
1358 delim: u8,
1359 line_delim: u8,
1360 last_field: usize,
1361 out: &mut impl Write,
1362) -> io::Result<()> {
1363 let newline_buf: [u8; 1] = [line_delim];
1364 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1365 let mut start = 0;
1366 let mut run_start: usize = 0;
1367
1368 for end_pos in memchr_iter(line_delim, data) {
1369 let line = &data[start..end_pos];
1370 let mut field_count = 1;
1371 let mut truncate_at: Option<usize> = None;
1372 for dpos in memchr_iter(delim, line) {
1373 if field_count >= last_field {
1374 truncate_at = Some(start + dpos);
1375 break;
1376 }
1377 field_count += 1;
1378 }
1379
1380 if let Some(trunc_pos) = truncate_at {
1381 if run_start < start {
1382 iov.push(IoSlice::new(&data[run_start..start]));
1383 }
1384 iov.push(IoSlice::new(&data[start..trunc_pos]));
1385 iov.push(IoSlice::new(&newline_buf));
1386 run_start = end_pos + 1;
1387
1388 if iov.len() >= MAX_IOV - 2 {
1389 write_ioslices(out, &iov)?;
1390 iov.clear();
1391 }
1392 }
1393 start = end_pos + 1;
1394 }
1395 if start < data.len() {
1397 let line = &data[start..];
1398 let mut field_count = 1;
1399 let mut truncate_at: Option<usize> = None;
1400 for dpos in memchr_iter(delim, line) {
1401 if field_count >= last_field {
1402 truncate_at = Some(start + dpos);
1403 break;
1404 }
1405 field_count += 1;
1406 }
1407 if let Some(trunc_pos) = truncate_at {
1408 if run_start < start {
1409 iov.push(IoSlice::new(&data[run_start..start]));
1410 }
1411 iov.push(IoSlice::new(&data[start..trunc_pos]));
1412 iov.push(IoSlice::new(&newline_buf));
1413 if !iov.is_empty() {
1414 write_ioslices(out, &iov)?;
1415 }
1416 return Ok(());
1417 }
1418 }
1419 if run_start < data.len() {
1421 iov.push(IoSlice::new(&data[run_start..]));
1422 if !data.is_empty() && *data.last().unwrap() != line_delim {
1423 iov.push(IoSlice::new(&newline_buf));
1424 }
1425 }
1426 if !iov.is_empty() {
1427 write_ioslices(out, &iov)?;
1428 }
1429 Ok(())
1430}
1431
1432fn fields_prefix_chunk(
1434 data: &[u8],
1435 delim: u8,
1436 line_delim: u8,
1437 last_field: usize,
1438 suppress: bool,
1439 buf: &mut Vec<u8>,
1440) {
1441 buf.reserve(data.len());
1442 let mut start = 0;
1443 for end_pos in memchr_iter(line_delim, data) {
1444 let line = &data[start..end_pos];
1445 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1446 start = end_pos + 1;
1447 }
1448 if start < data.len() {
1449 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1450 }
1451}
1452
1453#[inline(always)]
1456fn fields_prefix_line(
1457 line: &[u8],
1458 delim: u8,
1459 line_delim: u8,
1460 last_field: usize,
1461 suppress: bool,
1462 buf: &mut Vec<u8>,
1463) {
1464 let len = line.len();
1465 if len == 0 {
1466 if !suppress {
1467 unsafe { buf_push(buf, line_delim) };
1468 }
1469 return;
1470 }
1471
1472 let base = line.as_ptr();
1474
1475 let mut field_count = 1usize;
1476 let mut has_delim = false;
1477
1478 for pos in memchr_iter(delim, line) {
1479 has_delim = true;
1480 if field_count >= last_field {
1481 unsafe {
1482 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1483 buf_push(buf, line_delim);
1484 }
1485 return;
1486 }
1487 field_count += 1;
1488 }
1489
1490 if !has_delim {
1491 if !suppress {
1492 unsafe {
1493 buf_extend(buf, line);
1494 buf_push(buf, line_delim);
1495 }
1496 }
1497 return;
1498 }
1499
1500 unsafe {
1501 buf_extend(buf, line);
1502 buf_push(buf, line_delim);
1503 }
1504}
1505
1506fn process_fields_suffix(
1508 data: &[u8],
1509 delim: u8,
1510 line_delim: u8,
1511 start_field: usize,
1512 suppress: bool,
1513 out: &mut impl Write,
1514) -> io::Result<()> {
1515 if data.len() >= PARALLEL_THRESHOLD {
1516 let chunks = split_for_scope(data, line_delim);
1517 let n = chunks.len();
1518 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1519 rayon::scope(|s| {
1520 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1521 s.spawn(move |_| {
1522 result.reserve(chunk.len());
1523 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1524 });
1525 }
1526 });
1527 let slices: Vec<IoSlice> = results
1528 .iter()
1529 .filter(|r| !r.is_empty())
1530 .map(|r| IoSlice::new(r))
1531 .collect();
1532 write_ioslices(out, &slices)?;
1533 } else {
1534 process_chunked(data, line_delim, out, |chunk, buf| {
1535 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, buf);
1536 })?;
1537 }
1538 Ok(())
1539}
1540
1541fn fields_suffix_chunk(
1543 data: &[u8],
1544 delim: u8,
1545 line_delim: u8,
1546 start_field: usize,
1547 suppress: bool,
1548 buf: &mut Vec<u8>,
1549) {
1550 buf.reserve(data.len());
1551 let mut start = 0;
1552 for end_pos in memchr_iter(line_delim, data) {
1553 let line = &data[start..end_pos];
1554 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1555 start = end_pos + 1;
1556 }
1557 if start < data.len() {
1558 fields_suffix_line(
1559 &data[start..],
1560 delim,
1561 line_delim,
1562 start_field,
1563 suppress,
1564 buf,
1565 );
1566 }
1567}
1568
1569#[inline(always)]
1572fn fields_suffix_line(
1573 line: &[u8],
1574 delim: u8,
1575 line_delim: u8,
1576 start_field: usize,
1577 suppress: bool,
1578 buf: &mut Vec<u8>,
1579) {
1580 let len = line.len();
1581 if len == 0 {
1582 if !suppress {
1583 unsafe { buf_push(buf, line_delim) };
1584 }
1585 return;
1586 }
1587
1588 let base = line.as_ptr();
1590
1591 let skip_delims = start_field - 1;
1592 let mut delim_count = 0usize;
1593 let mut has_delim = false;
1594
1595 for pos in memchr_iter(delim, line) {
1596 has_delim = true;
1597 delim_count += 1;
1598 if delim_count >= skip_delims {
1599 unsafe {
1600 buf_extend(
1601 buf,
1602 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1603 );
1604 buf_push(buf, line_delim);
1605 }
1606 return;
1607 }
1608 }
1609
1610 if !has_delim {
1611 if !suppress {
1612 unsafe {
1613 buf_extend(buf, line);
1614 buf_push(buf, line_delim);
1615 }
1616 }
1617 return;
1618 }
1619
1620 unsafe { buf_push(buf, line_delim) };
1622}
1623
1624fn process_fields_mid_range(
1627 data: &[u8],
1628 delim: u8,
1629 line_delim: u8,
1630 start_field: usize,
1631 end_field: usize,
1632 suppress: bool,
1633 out: &mut impl Write,
1634) -> io::Result<()> {
1635 if data.len() >= PARALLEL_THRESHOLD {
1636 let chunks = split_for_scope(data, line_delim);
1637 let n = chunks.len();
1638 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1639 rayon::scope(|s| {
1640 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1641 s.spawn(move |_| {
1642 result.reserve(chunk.len());
1643 fields_mid_range_chunk(
1644 chunk,
1645 delim,
1646 line_delim,
1647 start_field,
1648 end_field,
1649 suppress,
1650 result,
1651 );
1652 });
1653 }
1654 });
1655 let slices: Vec<IoSlice> = results
1656 .iter()
1657 .filter(|r| !r.is_empty())
1658 .map(|r| IoSlice::new(r))
1659 .collect();
1660 write_ioslices(out, &slices)?;
1661 } else {
1662 process_chunked(data, line_delim, out, |chunk, buf| {
1663 fields_mid_range_chunk(
1664 chunk,
1665 delim,
1666 line_delim,
1667 start_field,
1668 end_field,
1669 suppress,
1670 buf,
1671 );
1672 })?;
1673 }
1674 Ok(())
1675}
1676
1677fn fields_mid_range_chunk(
1682 data: &[u8],
1683 delim: u8,
1684 line_delim: u8,
1685 start_field: usize,
1686 end_field: usize,
1687 suppress: bool,
1688 buf: &mut Vec<u8>,
1689) {
1690 buf.reserve(data.len());
1691 let mut start = 0;
1692 for end_pos in memchr_iter(line_delim, data) {
1693 let line = &data[start..end_pos];
1694 fields_mid_range_line(
1695 line,
1696 delim,
1697 line_delim,
1698 start_field,
1699 end_field,
1700 suppress,
1701 buf,
1702 );
1703 start = end_pos + 1;
1704 }
1705 if start < data.len() {
1706 fields_mid_range_line(
1707 &data[start..],
1708 delim,
1709 line_delim,
1710 start_field,
1711 end_field,
1712 suppress,
1713 buf,
1714 );
1715 }
1716}
1717
1718#[inline(always)]
1722fn fields_mid_range_line(
1723 line: &[u8],
1724 delim: u8,
1725 line_delim: u8,
1726 start_field: usize,
1727 end_field: usize,
1728 suppress: bool,
1729 buf: &mut Vec<u8>,
1730) {
1731 let len = line.len();
1732 if len == 0 {
1733 if !suppress {
1734 unsafe { buf_push(buf, line_delim) };
1735 }
1736 return;
1737 }
1738
1739 let base = line.as_ptr();
1741
1742 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
1746 let mut delim_count = 0;
1747 let mut range_start = 0;
1748 let mut has_delim = false;
1749
1750 for pos in memchr_iter(delim, line) {
1751 has_delim = true;
1752 delim_count += 1;
1753 if delim_count == skip_before {
1754 range_start = pos + 1;
1755 }
1756 if delim_count == target_end_delim {
1757 if skip_before == 0 {
1758 range_start = 0;
1759 }
1760 unsafe {
1761 buf_extend(
1762 buf,
1763 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1764 );
1765 buf_push(buf, line_delim);
1766 }
1767 return;
1768 }
1769 }
1770
1771 if !has_delim {
1772 if !suppress {
1773 unsafe {
1774 buf_extend(buf, line);
1775 buf_push(buf, line_delim);
1776 }
1777 }
1778 return;
1779 }
1780
1781 if delim_count >= skip_before {
1783 if skip_before == 0 {
1785 range_start = 0;
1786 }
1787 unsafe {
1788 buf_extend(
1789 buf,
1790 std::slice::from_raw_parts(base.add(range_start), len - range_start),
1791 );
1792 buf_push(buf, line_delim);
1793 }
1794 } else {
1795 unsafe { buf_push(buf, line_delim) };
1797 }
1798}
1799
1800fn single_field1_parallel(
1811 data: &[u8],
1812 delim: u8,
1813 line_delim: u8,
1814 out: &mut impl Write,
1815) -> io::Result<()> {
1816 let chunks = split_for_scope(data, line_delim);
1817 let n = chunks.len();
1818 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1819 rayon::scope(|s| {
1820 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1821 s.spawn(move |_| {
1822 result.reserve(chunk.len() + 1);
1823 single_field1_to_buf(chunk, delim, line_delim, result);
1824 });
1825 }
1826 });
1827 let slices: Vec<IoSlice> = results
1828 .iter()
1829 .filter(|r| !r.is_empty())
1830 .map(|r| IoSlice::new(r))
1831 .collect();
1832 write_ioslices(out, &slices)
1833}
1834
1835#[inline]
1846fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
1847 debug_assert_ne!(delim, line_delim, "delim and line_delim must differ");
1848 buf.reserve(data.len() + 1);
1851
1852 let base = data.as_ptr();
1853 let initial_len = buf.len();
1854 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
1855 let mut start = 0;
1856 let mut run_start: usize = 0;
1858 let mut in_run = true; for end_pos in memchr_iter(line_delim, data) {
1861 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
1862 match memchr::memchr(delim, line) {
1863 Some(dp) => {
1864 if in_run && run_start < start {
1866 let run_len = start - run_start;
1868 unsafe {
1869 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1870 out_ptr = out_ptr.add(run_len);
1871 }
1872 }
1873 unsafe {
1875 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
1876 out_ptr = out_ptr.add(dp);
1877 *out_ptr = line_delim;
1878 out_ptr = out_ptr.add(1);
1879 }
1880 run_start = end_pos + 1;
1881 in_run = true;
1882 }
1883 None => {
1884 if !in_run {
1886 run_start = start;
1887 in_run = true;
1888 }
1889 }
1890 }
1891 start = end_pos + 1;
1892 }
1893
1894 if in_run && run_start < start {
1896 let run_len = start - run_start;
1897 unsafe {
1898 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
1899 out_ptr = out_ptr.add(run_len);
1900 }
1901 }
1902
1903 if start < data.len() {
1905 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
1906 match memchr::memchr(delim, line) {
1907 Some(dp) => {
1908 unsafe {
1910 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
1911 out_ptr = out_ptr.add(dp);
1912 *out_ptr = line_delim;
1913 out_ptr = out_ptr.add(1);
1914 }
1915 }
1916 None => {
1917 let len = data.len() - start;
1919 unsafe {
1920 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, len);
1921 out_ptr = out_ptr.add(len);
1922 *out_ptr = line_delim;
1923 out_ptr = out_ptr.add(1);
1924 }
1925 }
1926 }
1927 }
1928
1929 unsafe {
1930 let new_len = out_ptr as usize - buf.as_ptr() as usize;
1931 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
1932 buf.set_len(new_len);
1933 }
1934}
1935
1936#[inline]
1945#[allow(dead_code)]
1946fn single_field1_zerocopy(
1947 data: &[u8],
1948 delim: u8,
1949 line_delim: u8,
1950 out: &mut impl Write,
1951) -> io::Result<()> {
1952 let newline_buf: [u8; 1] = [line_delim];
1953
1954 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1955 let mut run_start: usize = 0;
1956 let mut start = 0;
1957
1958 for end_pos in memchr_iter(line_delim, data) {
1959 let line = &data[start..end_pos];
1960 if let Some(dp) = memchr::memchr(delim, line) {
1961 if run_start < start {
1964 iov.push(IoSlice::new(&data[run_start..start]));
1965 }
1966 iov.push(IoSlice::new(&data[start..start + dp]));
1967 iov.push(IoSlice::new(&newline_buf));
1968 run_start = end_pos + 1;
1969
1970 if iov.len() >= MAX_IOV - 2 {
1971 write_ioslices(out, &iov)?;
1972 iov.clear();
1973 }
1974 }
1975 start = end_pos + 1;
1977 }
1978
1979 if start < data.len() {
1981 let line = &data[start..];
1982 if let Some(dp) = memchr::memchr(delim, line) {
1983 if run_start < start {
1984 iov.push(IoSlice::new(&data[run_start..start]));
1985 }
1986 iov.push(IoSlice::new(&data[start..start + dp]));
1987 iov.push(IoSlice::new(&newline_buf));
1988 if !iov.is_empty() {
1989 write_ioslices(out, &iov)?;
1990 }
1991 return Ok(());
1992 }
1993 }
1994
1995 if run_start < data.len() {
1997 iov.push(IoSlice::new(&data[run_start..]));
1998 if !data.is_empty() && *data.last().unwrap() != line_delim {
1999 iov.push(IoSlice::new(&newline_buf));
2000 }
2001 }
2002 if !iov.is_empty() {
2003 write_ioslices(out, &iov)?;
2004 }
2005 Ok(())
2006}
2007
2008fn process_single_field_chunk(
2012 data: &[u8],
2013 delim: u8,
2014 target_idx: usize,
2015 line_delim: u8,
2016 suppress: bool,
2017 buf: &mut Vec<u8>,
2018) {
2019 buf.reserve(data.len() + 1);
2021
2022 let base = data.as_ptr();
2023 let initial_len = buf.len();
2024 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
2025 let mut start = 0;
2026 let mut run_start: usize = 0;
2028 let mut in_run = !suppress; for end_pos in memchr_iter(line_delim, data) {
2031 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2032 let line_len = end_pos - start;
2033
2034 if line_len == 0 {
2035 if !suppress {
2036 if !in_run {
2038 run_start = start;
2039 in_run = true;
2040 }
2041 }
2042 start = end_pos + 1;
2043 continue;
2044 }
2045
2046 let mut field_start_offset = 0;
2048 let mut field_idx = 0;
2049 let mut found = false;
2050 let mut has_delim = false;
2051
2052 for pos in memchr_iter(delim, line) {
2053 has_delim = true;
2054 if field_idx == target_idx {
2055 if in_run && run_start < start {
2058 let run_len = start - run_start;
2059 unsafe {
2060 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2061 out_ptr = out_ptr.add(run_len);
2062 }
2063 }
2064 let field_len = pos - field_start_offset;
2065 unsafe {
2066 std::ptr::copy_nonoverlapping(
2067 base.add(start + field_start_offset),
2068 out_ptr,
2069 field_len,
2070 );
2071 out_ptr = out_ptr.add(field_len);
2072 *out_ptr = line_delim;
2073 out_ptr = out_ptr.add(1);
2074 }
2075 run_start = end_pos + 1;
2076 in_run = true;
2077 found = true;
2078 break;
2079 }
2080 field_idx += 1;
2081 field_start_offset = pos + 1;
2082 }
2083
2084 if !found {
2085 if !has_delim {
2086 if !suppress {
2088 if !in_run {
2090 run_start = start;
2091 in_run = true;
2092 }
2093 } else {
2094 if in_run && run_start < start {
2096 let run_len = start - run_start;
2097 unsafe {
2098 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2099 out_ptr = out_ptr.add(run_len);
2100 }
2101 }
2102 in_run = false;
2103 run_start = end_pos + 1;
2104 }
2105 } else if field_idx == target_idx {
2106 if in_run && run_start < start {
2108 let run_len = start - run_start;
2109 unsafe {
2110 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2111 out_ptr = out_ptr.add(run_len);
2112 }
2113 }
2114 let field_len = line_len - field_start_offset;
2115 unsafe {
2116 std::ptr::copy_nonoverlapping(
2117 base.add(start + field_start_offset),
2118 out_ptr,
2119 field_len,
2120 );
2121 out_ptr = out_ptr.add(field_len);
2122 *out_ptr = line_delim;
2123 out_ptr = out_ptr.add(1);
2124 }
2125 run_start = end_pos + 1;
2126 in_run = true;
2127 } else {
2128 if in_run && run_start < start {
2130 let run_len = start - run_start;
2131 unsafe {
2132 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2133 out_ptr = out_ptr.add(run_len);
2134 }
2135 }
2136 unsafe {
2137 *out_ptr = line_delim;
2138 out_ptr = out_ptr.add(1);
2139 }
2140 run_start = end_pos + 1;
2141 in_run = true;
2142 }
2143 }
2144
2145 start = end_pos + 1;
2146 }
2147
2148 if in_run && run_start < start {
2150 let run_len = start - run_start;
2151 unsafe {
2152 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2153 out_ptr = out_ptr.add(run_len);
2154 }
2155 }
2156
2157 if start < data.len() {
2159 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2160 let line_len = data.len() - start;
2161
2162 if line_len == 0 {
2163 if !suppress {
2164 unsafe {
2165 *out_ptr = line_delim;
2166 out_ptr = out_ptr.add(1);
2167 }
2168 }
2169 } else {
2170 let mut field_start_offset = 0;
2171 let mut field_idx = 0;
2172 let mut found = false;
2173 let mut has_delim = false;
2174
2175 for pos in memchr_iter(delim, line) {
2176 has_delim = true;
2177 if field_idx == target_idx {
2178 let field_len = pos - field_start_offset;
2179 unsafe {
2180 std::ptr::copy_nonoverlapping(
2181 base.add(start + field_start_offset),
2182 out_ptr,
2183 field_len,
2184 );
2185 out_ptr = out_ptr.add(field_len);
2186 *out_ptr = line_delim;
2187 out_ptr = out_ptr.add(1);
2188 }
2189 found = true;
2190 break;
2191 }
2192 field_idx += 1;
2193 field_start_offset = pos + 1;
2194 }
2195
2196 if !found {
2197 if !has_delim {
2198 if !suppress {
2199 unsafe {
2200 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, line_len);
2201 out_ptr = out_ptr.add(line_len);
2202 *out_ptr = line_delim;
2203 out_ptr = out_ptr.add(1);
2204 }
2205 }
2206 } else if field_idx == target_idx {
2207 let field_len = line_len - field_start_offset;
2208 unsafe {
2209 std::ptr::copy_nonoverlapping(
2210 base.add(start + field_start_offset),
2211 out_ptr,
2212 field_len,
2213 );
2214 out_ptr = out_ptr.add(field_len);
2215 *out_ptr = line_delim;
2216 out_ptr = out_ptr.add(1);
2217 }
2218 } else {
2219 unsafe {
2220 *out_ptr = line_delim;
2221 out_ptr = out_ptr.add(1);
2222 }
2223 }
2224 }
2225 }
2226 }
2227
2228 unsafe {
2229 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2230 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2231 buf.set_len(new_len);
2232 }
2233}
2234
2235#[inline(always)]
2239fn extract_fields_to_buf(
2240 line: &[u8],
2241 delim: u8,
2242 ranges: &[Range],
2243 output_delim: &[u8],
2244 suppress: bool,
2245 max_field: usize,
2246 field_mask: u64,
2247 line_delim: u8,
2248 buf: &mut Vec<u8>,
2249 complement: bool,
2250) {
2251 let len = line.len();
2252
2253 if len == 0 {
2254 if !suppress {
2255 buf.push(line_delim);
2256 }
2257 return;
2258 }
2259
2260 let needed = len + output_delim.len() * 16 + 1;
2263 if buf.capacity() - buf.len() < needed {
2264 buf.reserve(needed);
2265 }
2266
2267 let base = line.as_ptr();
2268 let mut field_num: usize = 1;
2269 let mut field_start: usize = 0;
2270 let mut first_output = true;
2271 let mut has_delim = false;
2272
2273 for delim_pos in memchr_iter(delim, line) {
2275 has_delim = true;
2276
2277 if is_selected(field_num, field_mask, ranges, complement) {
2278 if !first_output {
2279 unsafe { buf_extend(buf, output_delim) };
2280 }
2281 unsafe {
2282 buf_extend(
2283 buf,
2284 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2285 )
2286 };
2287 first_output = false;
2288 }
2289
2290 field_num += 1;
2291 field_start = delim_pos + 1;
2292
2293 if field_num > max_field {
2294 break;
2295 }
2296 }
2297
2298 if (field_num <= max_field || complement)
2300 && has_delim
2301 && is_selected(field_num, field_mask, ranges, complement)
2302 {
2303 if !first_output {
2304 unsafe { buf_extend(buf, output_delim) };
2305 }
2306 unsafe {
2307 buf_extend(
2308 buf,
2309 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2310 )
2311 };
2312 first_output = false;
2313 }
2314
2315 if !first_output {
2316 unsafe { buf_push(buf, line_delim) };
2317 } else if !has_delim {
2318 if !suppress {
2319 unsafe {
2320 buf_extend(buf, line);
2321 buf_push(buf, line_delim);
2322 }
2323 }
2324 } else {
2325 unsafe { buf_push(buf, line_delim) };
2326 }
2327}
2328
2329fn process_bytes_from_start(
2336 data: &[u8],
2337 max_bytes: usize,
2338 line_delim: u8,
2339 out: &mut impl Write,
2340) -> io::Result<()> {
2341 if data.len() < PARALLEL_THRESHOLD && max_bytes > 0 && max_bytes < usize::MAX {
2349 let mut start = 0;
2350 let mut all_fit = true;
2351 for pos in memchr_iter(line_delim, data) {
2352 if pos - start > max_bytes {
2353 all_fit = false;
2354 break;
2355 }
2356 start = pos + 1;
2357 }
2358 if all_fit && start < data.len() && data.len() - start > max_bytes {
2360 all_fit = false;
2361 }
2362 if all_fit {
2363 if !data.is_empty() && data[data.len() - 1] == line_delim {
2365 return out.write_all(data);
2366 } else if !data.is_empty() {
2367 out.write_all(data)?;
2368 return out.write_all(&[line_delim]);
2369 }
2370 return Ok(());
2371 }
2372 }
2373
2374 if data.len() >= PARALLEL_THRESHOLD {
2375 let chunks = split_for_scope(data, line_delim);
2376 let n = chunks.len();
2377 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2378 rayon::scope(|s| {
2379 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2380 s.spawn(move |_| {
2381 result.reserve(chunk.len());
2384 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2385 });
2386 }
2387 });
2388 let slices: Vec<IoSlice> = results
2390 .iter()
2391 .filter(|r| !r.is_empty())
2392 .map(|r| IoSlice::new(r))
2393 .collect();
2394 write_ioslices(out, &slices)?;
2395 } else {
2396 if max_bytes <= 512 {
2402 let est_out = (data.len() / 4).max(max_bytes + 2);
2405 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2406 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2407 if !buf.is_empty() {
2408 out.write_all(&buf)?;
2409 }
2410 } else {
2411 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2415 }
2416 }
2417 Ok(())
2418}
2419
2420#[inline]
2425fn bytes_from_start_zerocopy(
2426 data: &[u8],
2427 max_bytes: usize,
2428 line_delim: u8,
2429 out: &mut impl Write,
2430) -> io::Result<()> {
2431 let newline_buf: [u8; 1] = [line_delim];
2432 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2433 let mut start = 0;
2434 let mut run_start: usize = 0;
2435
2436 for pos in memchr_iter(line_delim, data) {
2437 let line_len = pos - start;
2438 if line_len > max_bytes {
2439 if run_start < start {
2441 iov.push(IoSlice::new(&data[run_start..start]));
2442 }
2443 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2444 iov.push(IoSlice::new(&newline_buf));
2445 run_start = pos + 1;
2446
2447 if iov.len() >= MAX_IOV - 2 {
2448 write_ioslices(out, &iov)?;
2449 iov.clear();
2450 }
2451 }
2452 start = pos + 1;
2453 }
2454 if start < data.len() {
2456 let line_len = data.len() - start;
2457 if line_len > max_bytes {
2458 if run_start < start {
2459 iov.push(IoSlice::new(&data[run_start..start]));
2460 }
2461 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2462 iov.push(IoSlice::new(&newline_buf));
2463 if !iov.is_empty() {
2464 write_ioslices(out, &iov)?;
2465 }
2466 return Ok(());
2467 }
2468 }
2469 if run_start < data.len() {
2471 iov.push(IoSlice::new(&data[run_start..]));
2472 if !data.is_empty() && *data.last().unwrap() != line_delim {
2473 iov.push(IoSlice::new(&newline_buf));
2474 }
2475 }
2476 if !iov.is_empty() {
2477 write_ioslices(out, &iov)?;
2478 }
2479 Ok(())
2480}
2481
2482#[inline]
2487fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2488 buf.reserve(data.len());
2491
2492 let src = data.as_ptr();
2493 let dst_base = buf.as_mut_ptr();
2494 let mut wp = buf.len();
2495 let mut start = 0;
2496
2497 for pos in memchr_iter(line_delim, data) {
2498 let line_len = pos - start;
2499 let take = line_len.min(max_bytes);
2500 unsafe {
2501 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2502 *dst_base.add(wp + take) = line_delim;
2503 }
2504 wp += take + 1;
2505 start = pos + 1;
2506 }
2507 if start < data.len() {
2509 let line_len = data.len() - start;
2510 let take = line_len.min(max_bytes);
2511 unsafe {
2512 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2513 *dst_base.add(wp + take) = line_delim;
2514 }
2515 wp += take + 1;
2516 }
2517 unsafe { buf.set_len(wp) };
2518}
2519
2520fn process_bytes_from_offset(
2522 data: &[u8],
2523 skip_bytes: usize,
2524 line_delim: u8,
2525 out: &mut impl Write,
2526) -> io::Result<()> {
2527 if data.len() >= PARALLEL_THRESHOLD {
2528 let chunks = split_for_scope(data, line_delim);
2529 let n = chunks.len();
2530 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2531 rayon::scope(|s| {
2532 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2533 s.spawn(move |_| {
2534 result.reserve(chunk.len());
2535 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2536 });
2537 }
2538 });
2539 let slices: Vec<IoSlice> = results
2541 .iter()
2542 .filter(|r| !r.is_empty())
2543 .map(|r| IoSlice::new(r))
2544 .collect();
2545 write_ioslices(out, &slices)?;
2546 } else {
2547 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2549 }
2550 Ok(())
2551}
2552
2553#[inline]
2557fn bytes_from_offset_zerocopy(
2558 data: &[u8],
2559 skip_bytes: usize,
2560 line_delim: u8,
2561 out: &mut impl Write,
2562) -> io::Result<()> {
2563 let delim_buf = [line_delim];
2564 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2565
2566 let mut start = 0;
2567 for pos in memchr_iter(line_delim, data) {
2568 let line_len = pos - start;
2569 if line_len > skip_bytes {
2570 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2571 }
2572 iov.push(IoSlice::new(&delim_buf));
2573 if iov.len() >= MAX_IOV - 1 {
2575 write_ioslices(out, &iov)?;
2576 iov.clear();
2577 }
2578 start = pos + 1;
2579 }
2580 if start < data.len() {
2581 let line_len = data.len() - start;
2582 if line_len > skip_bytes {
2583 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2584 }
2585 iov.push(IoSlice::new(&delim_buf));
2586 }
2587 if !iov.is_empty() {
2588 write_ioslices(out, &iov)?;
2589 }
2590 Ok(())
2591}
2592
2593#[inline]
2596fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2597 buf.reserve(data.len());
2598
2599 let src = data.as_ptr();
2600 let dst_base = buf.as_mut_ptr();
2601 let mut wp = buf.len();
2602 let mut start = 0;
2603
2604 for pos in memchr_iter(line_delim, data) {
2605 let line_len = pos - start;
2606 if line_len > skip_bytes {
2607 let take = line_len - skip_bytes;
2608 unsafe {
2609 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2610 }
2611 wp += take;
2612 }
2613 unsafe {
2614 *dst_base.add(wp) = line_delim;
2615 }
2616 wp += 1;
2617 start = pos + 1;
2618 }
2619 if start < data.len() {
2620 let line_len = data.len() - start;
2621 if line_len > skip_bytes {
2622 let take = line_len - skip_bytes;
2623 unsafe {
2624 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2625 }
2626 wp += take;
2627 }
2628 unsafe {
2629 *dst_base.add(wp) = line_delim;
2630 }
2631 wp += 1;
2632 }
2633 unsafe { buf.set_len(wp) };
2634}
2635
2636fn process_bytes_mid_range(
2638 data: &[u8],
2639 start_byte: usize,
2640 end_byte: usize,
2641 line_delim: u8,
2642 out: &mut impl Write,
2643) -> io::Result<()> {
2644 let skip = start_byte.saturating_sub(1);
2645
2646 if data.len() >= PARALLEL_THRESHOLD {
2647 let chunks = split_for_scope(data, line_delim);
2648 let n = chunks.len();
2649 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2650 rayon::scope(|s| {
2651 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2652 s.spawn(move |_| {
2653 result.reserve(chunk.len());
2654 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2655 });
2656 }
2657 });
2658 let slices: Vec<IoSlice> = results
2659 .iter()
2660 .filter(|r| !r.is_empty())
2661 .map(|r| IoSlice::new(r))
2662 .collect();
2663 write_ioslices(out, &slices)?;
2664 } else {
2665 process_chunked(data, line_delim, out, |chunk, buf| {
2666 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, buf);
2667 })?;
2668 }
2669 Ok(())
2670}
2671
2672#[inline]
2676fn bytes_mid_range_chunk(
2677 data: &[u8],
2678 skip: usize,
2679 end_byte: usize,
2680 line_delim: u8,
2681 buf: &mut Vec<u8>,
2682) {
2683 buf.reserve(data.len());
2684
2685 let src = data.as_ptr();
2686 let dst_base = buf.as_mut_ptr();
2687 let mut wp = buf.len();
2688 let mut start = 0;
2689
2690 for pos in memchr_iter(line_delim, data) {
2691 let line_len = pos - start;
2692 if line_len > skip {
2693 let take_end = line_len.min(end_byte);
2694 let take = take_end - skip;
2695 unsafe {
2696 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2697 }
2698 wp += take;
2699 }
2700 unsafe {
2701 *dst_base.add(wp) = line_delim;
2702 }
2703 wp += 1;
2704 start = pos + 1;
2705 }
2706 if start < data.len() {
2707 let line_len = data.len() - start;
2708 if line_len > skip {
2709 let take_end = line_len.min(end_byte);
2710 let take = take_end - skip;
2711 unsafe {
2712 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2713 }
2714 wp += take;
2715 }
2716 unsafe {
2717 *dst_base.add(wp) = line_delim;
2718 }
2719 wp += 1;
2720 }
2721 unsafe { buf.set_len(wp) };
2722}
2723
2724fn process_bytes_complement_mid(
2726 data: &[u8],
2727 skip_start: usize,
2728 skip_end: usize,
2729 line_delim: u8,
2730 out: &mut impl Write,
2731) -> io::Result<()> {
2732 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2734 let chunks = split_for_scope(data, line_delim);
2735 let n = chunks.len();
2736 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2737 rayon::scope(|s| {
2738 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2739 s.spawn(move |_| {
2740 result.reserve(chunk.len());
2741 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
2742 });
2743 }
2744 });
2745 let slices: Vec<IoSlice> = results
2746 .iter()
2747 .filter(|r| !r.is_empty())
2748 .map(|r| IoSlice::new(r))
2749 .collect();
2750 write_ioslices(out, &slices)?;
2751 } else {
2752 process_chunked(data, line_delim, out, |chunk, buf| {
2753 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, buf);
2754 })?;
2755 }
2756 Ok(())
2757}
2758
2759#[inline]
2762fn bytes_complement_mid_chunk(
2763 data: &[u8],
2764 prefix_bytes: usize,
2765 skip_end: usize,
2766 line_delim: u8,
2767 buf: &mut Vec<u8>,
2768) {
2769 buf.reserve(data.len());
2770
2771 let src = data.as_ptr();
2772 let dst_base = buf.as_mut_ptr();
2773 let mut wp = buf.len();
2774 let mut start = 0;
2775
2776 for pos in memchr_iter(line_delim, data) {
2777 let line_len = pos - start;
2778 let take_prefix = prefix_bytes.min(line_len);
2780 if take_prefix > 0 {
2781 unsafe {
2782 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2783 }
2784 wp += take_prefix;
2785 }
2786 if line_len > skip_end {
2788 let suffix_len = line_len - skip_end;
2789 unsafe {
2790 std::ptr::copy_nonoverlapping(
2791 src.add(start + skip_end),
2792 dst_base.add(wp),
2793 suffix_len,
2794 );
2795 }
2796 wp += suffix_len;
2797 }
2798 unsafe {
2799 *dst_base.add(wp) = line_delim;
2800 }
2801 wp += 1;
2802 start = pos + 1;
2803 }
2804 if start < data.len() {
2805 let line_len = data.len() - start;
2806 let take_prefix = prefix_bytes.min(line_len);
2807 if take_prefix > 0 {
2808 unsafe {
2809 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2810 }
2811 wp += take_prefix;
2812 }
2813 if line_len > skip_end {
2814 let suffix_len = line_len - skip_end;
2815 unsafe {
2816 std::ptr::copy_nonoverlapping(
2817 src.add(start + skip_end),
2818 dst_base.add(wp),
2819 suffix_len,
2820 );
2821 }
2822 wp += suffix_len;
2823 }
2824 unsafe {
2825 *dst_base.add(wp) = line_delim;
2826 }
2827 wp += 1;
2828 }
2829 unsafe { buf.set_len(wp) };
2830}
2831
2832fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2834 let line_delim = cfg.line_delim;
2835 let ranges = cfg.ranges;
2836 let complement = cfg.complement;
2837 let output_delim = cfg.output_delim;
2838
2839 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
2841 let max_bytes = ranges[0].end;
2842 if max_bytes < usize::MAX {
2843 return process_bytes_from_start(data, max_bytes, line_delim, out);
2844 }
2845 }
2846
2847 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
2849 let skip_bytes = ranges[0].start.saturating_sub(1);
2850 if skip_bytes > 0 {
2851 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
2852 }
2853 }
2854
2855 if !complement
2857 && ranges.len() == 1
2858 && ranges[0].start > 1
2859 && ranges[0].end < usize::MAX
2860 && output_delim.is_empty()
2861 {
2862 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
2863 }
2864
2865 if complement
2867 && ranges.len() == 1
2868 && ranges[0].start == 1
2869 && ranges[0].end < usize::MAX
2870 && output_delim.is_empty()
2871 {
2872 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
2873 }
2874
2875 if complement
2877 && ranges.len() == 1
2878 && ranges[0].end == usize::MAX
2879 && ranges[0].start > 1
2880 && output_delim.is_empty()
2881 {
2882 let max_bytes = ranges[0].start - 1;
2883 return process_bytes_from_start(data, max_bytes, line_delim, out);
2884 }
2885
2886 if complement
2888 && ranges.len() == 1
2889 && ranges[0].start > 1
2890 && ranges[0].end < usize::MAX
2891 && output_delim.is_empty()
2892 {
2893 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
2894 }
2895
2896 if data.len() >= PARALLEL_THRESHOLD {
2897 let chunks = split_for_scope(data, line_delim);
2898 let n = chunks.len();
2899 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2900 rayon::scope(|s| {
2901 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2902 s.spawn(move |_| {
2903 result.reserve(chunk.len() + 1);
2904 process_bytes_chunk(
2905 chunk,
2906 ranges,
2907 complement,
2908 output_delim,
2909 line_delim,
2910 result,
2911 );
2912 });
2913 }
2914 });
2915 let slices: Vec<IoSlice> = results
2916 .iter()
2917 .filter(|r| !r.is_empty())
2918 .map(|r| IoSlice::new(r))
2919 .collect();
2920 write_ioslices(out, &slices)?;
2921 } else {
2922 process_chunked(data, line_delim, out, |chunk, buf| {
2923 process_bytes_chunk(chunk, ranges, complement, output_delim, line_delim, buf);
2924 })?;
2925 }
2926 Ok(())
2927}
2928
2929fn process_bytes_chunk(
2934 data: &[u8],
2935 ranges: &[Range],
2936 complement: bool,
2937 output_delim: &[u8],
2938 line_delim: u8,
2939 buf: &mut Vec<u8>,
2940) {
2941 buf.reserve(data.len());
2942 let base = data.as_ptr();
2943 let mut start = 0;
2944 for end_pos in memchr_iter(line_delim, data) {
2945 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2946 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2947 unsafe { buf_push(buf, line_delim) };
2948 start = end_pos + 1;
2949 }
2950 if start < data.len() {
2951 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2952 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2953 unsafe { buf_push(buf, line_delim) };
2954 }
2955}
2956
2957#[inline(always)]
2961fn cut_bytes_to_buf(
2962 line: &[u8],
2963 ranges: &[Range],
2964 complement: bool,
2965 output_delim: &[u8],
2966 buf: &mut Vec<u8>,
2967) {
2968 let len = line.len();
2969 let base = line.as_ptr();
2970 let mut first_range = true;
2971
2972 let needed = len + output_delim.len() * ranges.len() + 1;
2974 if buf.capacity() - buf.len() < needed {
2975 buf.reserve(needed);
2976 }
2977
2978 if complement {
2979 let mut pos: usize = 1;
2980 for r in ranges {
2981 let rs = r.start;
2982 let re = r.end.min(len);
2983 if pos < rs {
2984 if !first_range && !output_delim.is_empty() {
2985 unsafe { buf_extend(buf, output_delim) };
2986 }
2987 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
2988 first_range = false;
2989 }
2990 pos = re + 1;
2991 if pos > len {
2992 break;
2993 }
2994 }
2995 if pos <= len {
2996 if !first_range && !output_delim.is_empty() {
2997 unsafe { buf_extend(buf, output_delim) };
2998 }
2999 unsafe {
3000 buf_extend(
3001 buf,
3002 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3003 )
3004 };
3005 }
3006 } else if output_delim.is_empty() && ranges.len() == 1 {
3007 let start = ranges[0].start.saturating_sub(1);
3009 let end = ranges[0].end.min(len);
3010 if start < len {
3011 unsafe {
3012 buf_extend(
3013 buf,
3014 std::slice::from_raw_parts(base.add(start), end - start),
3015 )
3016 };
3017 }
3018 } else {
3019 for r in ranges {
3020 let start = r.start.saturating_sub(1);
3021 let end = r.end.min(len);
3022 if start >= len {
3023 break;
3024 }
3025 if !first_range && !output_delim.is_empty() {
3026 unsafe { buf_extend(buf, output_delim) };
3027 }
3028 unsafe {
3029 buf_extend(
3030 buf,
3031 std::slice::from_raw_parts(base.add(start), end - start),
3032 )
3033 };
3034 first_range = false;
3035 }
3036 }
3037}
3038
3039#[inline]
3043pub fn cut_fields(
3044 line: &[u8],
3045 delim: u8,
3046 ranges: &[Range],
3047 complement: bool,
3048 output_delim: &[u8],
3049 suppress_no_delim: bool,
3050 out: &mut impl Write,
3051) -> io::Result<bool> {
3052 if memchr::memchr(delim, line).is_none() {
3053 if !suppress_no_delim {
3054 out.write_all(line)?;
3055 return Ok(true);
3056 }
3057 return Ok(false);
3058 }
3059
3060 let mut field_num: usize = 1;
3061 let mut field_start: usize = 0;
3062 let mut first_output = true;
3063
3064 for delim_pos in memchr_iter(delim, line) {
3065 let selected = in_ranges(ranges, field_num) != complement;
3066 if selected {
3067 if !first_output {
3068 out.write_all(output_delim)?;
3069 }
3070 out.write_all(&line[field_start..delim_pos])?;
3071 first_output = false;
3072 }
3073 field_start = delim_pos + 1;
3074 field_num += 1;
3075 }
3076
3077 let selected = in_ranges(ranges, field_num) != complement;
3078 if selected {
3079 if !first_output {
3080 out.write_all(output_delim)?;
3081 }
3082 out.write_all(&line[field_start..])?;
3083 }
3084
3085 Ok(true)
3086}
3087
3088#[inline]
3090pub fn cut_bytes(
3091 line: &[u8],
3092 ranges: &[Range],
3093 complement: bool,
3094 output_delim: &[u8],
3095 out: &mut impl Write,
3096) -> io::Result<bool> {
3097 let mut first_range = true;
3098
3099 if complement {
3100 let len = line.len();
3101 let mut comp_ranges = Vec::new();
3102 let mut pos: usize = 1;
3103 for r in ranges {
3104 let rs = r.start;
3105 let re = r.end.min(len);
3106 if pos < rs {
3107 comp_ranges.push((pos, rs - 1));
3108 }
3109 pos = re + 1;
3110 if pos > len {
3111 break;
3112 }
3113 }
3114 if pos <= len {
3115 comp_ranges.push((pos, len));
3116 }
3117 for &(s, e) in &comp_ranges {
3118 if !first_range && !output_delim.is_empty() {
3119 out.write_all(output_delim)?;
3120 }
3121 out.write_all(&line[s - 1..e])?;
3122 first_range = false;
3123 }
3124 } else {
3125 for r in ranges {
3126 let start = r.start.saturating_sub(1);
3127 let end = r.end.min(line.len());
3128 if start >= line.len() {
3129 break;
3130 }
3131 if !first_range && !output_delim.is_empty() {
3132 out.write_all(output_delim)?;
3133 }
3134 out.write_all(&line[start..end])?;
3135 first_range = false;
3136 }
3137 }
3138 Ok(true)
3139}
3140
3141pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3149 let len = data.len();
3150 let mut wp: usize = 0;
3151 let mut rp: usize = 0;
3152
3153 while rp < len {
3154 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3155 None => {
3156 if suppress {
3158 break;
3160 }
3161 let remaining = len - rp;
3162 if wp != rp {
3163 data.copy_within(rp..len, wp);
3164 }
3165 wp += remaining;
3166 break;
3167 }
3168 Some(offset) => {
3169 let actual = rp + offset;
3170 if data[actual] == line_delim {
3171 if suppress {
3173 rp = actual + 1;
3175 } else {
3176 let chunk_len = actual + 1 - rp;
3178 if wp != rp {
3179 data.copy_within(rp..actual + 1, wp);
3180 }
3181 wp += chunk_len;
3182 rp = actual + 1;
3183 }
3184 } else {
3185 let field_len = actual - rp;
3187 if wp != rp && field_len > 0 {
3188 data.copy_within(rp..actual, wp);
3189 }
3190 wp += field_len;
3191 data[wp] = line_delim;
3192 wp += 1;
3193 match memchr::memchr(line_delim, &data[actual + 1..]) {
3195 None => {
3196 rp = len;
3197 }
3198 Some(nl_off) => {
3199 rp = actual + 1 + nl_off + 1;
3200 }
3201 }
3202 }
3203 }
3204 }
3205 }
3206 wp
3207}
3208
3209pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3211 match cfg.mode {
3212 CutMode::Fields => process_fields_fast(data, cfg, out),
3213 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3214 }
3215}
3216
3217pub fn process_cut_reader<R: BufRead>(
3222 mut reader: R,
3223 cfg: &CutConfig,
3224 out: &mut impl Write,
3225) -> io::Result<()> {
3226 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3228
3229 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3232
3233 loop {
3234 buf.reserve(CHUNK_SIZE);
3236 let read_start = buf.len();
3237 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3238 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3239 buf.truncate(read_start + n);
3240
3241 if buf.is_empty() {
3242 break;
3243 }
3244
3245 if n == 0 {
3246 process_cut_data(&buf, cfg, out)?;
3248 break;
3249 }
3250
3251 let process_end = match memchr::memrchr(line_delim, &buf) {
3253 Some(pos) => pos + 1,
3254 None => {
3255 continue;
3257 }
3258 };
3259
3260 process_cut_data(&buf[..process_end], cfg, out)?;
3262
3263 let leftover_len = buf.len() - process_end;
3265 if leftover_len > 0 {
3266 buf.copy_within(process_end.., 0);
3267 }
3268 buf.truncate(leftover_len);
3269 }
3270
3271 Ok(())
3272}
3273
3274#[inline]
3276fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3277 let n = reader.read(buf)?;
3278 if n == buf.len() || n == 0 {
3279 return Ok(n);
3280 }
3281 let mut total = n;
3283 while total < buf.len() {
3284 match reader.read(&mut buf[total..]) {
3285 Ok(0) => break,
3286 Ok(n) => total += n,
3287 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3288 Err(e) => return Err(e),
3289 }
3290 }
3291 Ok(total)
3292}
3293
3294pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3306 if cfg.complement {
3307 return None;
3308 }
3309 if data.is_empty() || data[data.len() - 1] != cfg.line_delim {
3313 return None;
3314 }
3315
3316 match cfg.mode {
3317 CutMode::Fields => {
3318 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3320 return None;
3321 }
3322 if cfg.delim == cfg.line_delim {
3323 return None;
3324 }
3325 Some(cut_fields_inplace_general(
3326 data,
3327 cfg.delim,
3328 cfg.line_delim,
3329 cfg.ranges,
3330 cfg.suppress_no_delim,
3331 ))
3332 }
3333 CutMode::Bytes | CutMode::Characters => {
3334 if !cfg.output_delim.is_empty() {
3335 return None;
3336 }
3337 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3338 }
3339 }
3340}
3341
3342fn cut_fields_inplace_general(
3345 data: &mut [u8],
3346 delim: u8,
3347 line_delim: u8,
3348 ranges: &[Range],
3349 suppress: bool,
3350) -> usize {
3351 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3353 return cut_field1_inplace(data, delim, line_delim, suppress);
3354 }
3355
3356 let len = data.len();
3357 if len == 0 {
3358 return 0;
3359 }
3360
3361 let max_field = ranges.last().map_or(0, |r| r.end);
3362 let max_delims = max_field.min(128);
3363 let mut wp: usize = 0;
3364 let mut rp: usize = 0;
3365
3366 while rp < len {
3367 let line_end = memchr::memchr(line_delim, &data[rp..])
3368 .map(|p| rp + p)
3369 .unwrap_or(len);
3370 let line_len = line_end - rp;
3371
3372 let mut delim_pos = [0usize; 128];
3374 let mut num_delims: usize = 0;
3375
3376 for pos in memchr_iter(delim, &data[rp..line_end]) {
3377 if num_delims < max_delims {
3378 delim_pos[num_delims] = pos;
3379 num_delims += 1;
3380 if num_delims >= max_delims {
3381 break;
3382 }
3383 }
3384 }
3385
3386 if num_delims == 0 {
3387 if !suppress {
3389 if wp != rp {
3390 data.copy_within(rp..line_end, wp);
3391 }
3392 wp += line_len;
3393 if line_end < len {
3394 data[wp] = line_delim;
3395 wp += 1;
3396 }
3397 }
3398 } else {
3399 let total_fields = num_delims + 1;
3400 let mut first_output = true;
3401
3402 for r in ranges {
3403 let range_start = r.start;
3404 let range_end = r.end.min(total_fields);
3405 if range_start > total_fields {
3406 break;
3407 }
3408 for field_num in range_start..=range_end {
3409 if field_num > total_fields {
3410 break;
3411 }
3412
3413 let field_start = if field_num == 1 {
3414 0
3415 } else if field_num - 2 < num_delims {
3416 delim_pos[field_num - 2] + 1
3417 } else {
3418 continue;
3419 };
3420 let field_end = if field_num <= num_delims {
3421 delim_pos[field_num - 1]
3422 } else {
3423 line_len
3424 };
3425
3426 if !first_output {
3427 data[wp] = delim;
3428 wp += 1;
3429 }
3430 let flen = field_end - field_start;
3431 if flen > 0 {
3432 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3433 wp += flen;
3434 }
3435 first_output = false;
3436 }
3437 }
3438
3439 if !first_output && line_end < len {
3440 data[wp] = line_delim;
3441 wp += 1;
3442 } else if first_output && line_end < len {
3443 data[wp] = line_delim;
3445 wp += 1;
3446 }
3447 }
3448
3449 rp = if line_end < len { line_end + 1 } else { len };
3450 }
3451
3452 wp
3453}
3454
3455fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3457 let len = data.len();
3458 if len == 0 {
3459 return 0;
3460 }
3461
3462 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3464 return len;
3465 }
3466
3467 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3469 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3470 }
3471
3472 let mut wp: usize = 0;
3473 let mut rp: usize = 0;
3474
3475 while rp < len {
3476 let line_end = memchr::memchr(line_delim, &data[rp..])
3477 .map(|p| rp + p)
3478 .unwrap_or(len);
3479 let line_len = line_end - rp;
3480
3481 for r in ranges {
3482 let start = r.start.saturating_sub(1);
3483 let end = r.end.min(line_len);
3484 if start >= line_len {
3485 break;
3486 }
3487 let flen = end - start;
3488 if flen > 0 {
3489 data.copy_within(rp + start..rp + start + flen, wp);
3490 wp += flen;
3491 }
3492 }
3493
3494 if line_end < len {
3495 data[wp] = line_delim;
3496 wp += 1;
3497 }
3498
3499 rp = if line_end < len { line_end + 1 } else { len };
3500 }
3501
3502 wp
3503}
3504
3505fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3507 let len = data.len();
3508
3509 let mut all_fit = true;
3511 let mut start = 0;
3512 for pos in memchr_iter(line_delim, data) {
3513 if pos - start > max_bytes {
3514 all_fit = false;
3515 break;
3516 }
3517 start = pos + 1;
3518 }
3519 if all_fit && start < len && len - start > max_bytes {
3520 all_fit = false;
3521 }
3522 if all_fit {
3523 return len;
3524 }
3525
3526 let mut wp: usize = 0;
3528 let mut rp: usize = 0;
3529
3530 while rp < len {
3531 let line_end = memchr::memchr(line_delim, &data[rp..])
3532 .map(|p| rp + p)
3533 .unwrap_or(len);
3534 let line_len = line_end - rp;
3535
3536 let take = line_len.min(max_bytes);
3537 if take > 0 && wp != rp {
3538 data.copy_within(rp..rp + take, wp);
3539 }
3540 wp += take;
3541
3542 if line_end < len {
3543 data[wp] = line_delim;
3544 wp += 1;
3545 }
3546
3547 rp = if line_end < len { line_end + 1 } else { len };
3548 }
3549
3550 wp
3551}
3552
3553#[derive(Debug, Clone, Copy, PartialEq)]
3555pub enum CutMode {
3556 Bytes,
3557 Characters,
3558 Fields,
3559}