1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, IoSlice, Write};
4
5const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
11
12const MAX_IOV: usize = 1024;
14
15pub struct CutConfig<'a> {
17 pub mode: CutMode,
18 pub ranges: &'a [Range],
19 pub complement: bool,
20 pub delim: u8,
21 pub output_delim: &'a [u8],
22 pub suppress_no_delim: bool,
23 pub line_delim: u8,
24}
25
26#[derive(Debug, Clone)]
28pub struct Range {
29 pub start: usize, pub end: usize, }
32
33pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
36 let mut ranges = Vec::new();
37
38 for part in spec.split(',') {
39 let part = part.trim();
40 if part.is_empty() {
41 continue;
42 }
43
44 if let Some(idx) = part.find('-') {
45 let left = &part[..idx];
46 let right = &part[idx + 1..];
47
48 let start = if left.is_empty() {
49 1
50 } else {
51 left.parse::<usize>()
52 .map_err(|_| format!("invalid range: '{}'", part))?
53 };
54
55 let end = if right.is_empty() {
56 usize::MAX
57 } else {
58 right
59 .parse::<usize>()
60 .map_err(|_| format!("invalid range: '{}'", part))?
61 };
62
63 if start == 0 {
64 return Err("fields and positions are numbered from 1".to_string());
65 }
66 if start > end {
67 return Err(format!("invalid decreasing range: '{}'", part));
68 }
69
70 ranges.push(Range { start, end });
71 } else {
72 let n = part
73 .parse::<usize>()
74 .map_err(|_| format!("invalid field: '{}'", part))?;
75 if n == 0 {
76 return Err("fields and positions are numbered from 1".to_string());
77 }
78 ranges.push(Range { start: n, end: n });
79 }
80 }
81
82 if ranges.is_empty() {
83 return Err("you must specify a list of bytes, characters, or fields".to_string());
84 }
85
86 ranges.sort_by_key(|r| (r.start, r.end));
88 let mut merged = vec![ranges[0].clone()];
89 for r in &ranges[1..] {
90 let last = merged.last_mut().unwrap();
91 if r.start <= last.end.saturating_add(1) {
92 last.end = last.end.max(r.end);
93 } else {
94 merged.push(r.clone());
95 }
96 }
97
98 Ok(merged)
99}
100
101#[inline(always)]
104fn in_ranges(ranges: &[Range], pos: usize) -> bool {
105 for r in ranges {
106 if pos < r.start {
107 return false;
108 }
109 if pos <= r.end {
110 return true;
111 }
112 }
113 false
114}
115
116#[inline]
119fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
120 let mut mask: u64 = 0;
121 for i in 1..=64u32 {
122 let in_range = in_ranges(ranges, i as usize);
123 if in_range != complement {
124 mask |= 1u64 << (i - 1);
125 }
126 }
127 mask
128}
129
130#[inline(always)]
132fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
133 if field_num <= 64 {
134 (mask >> (field_num - 1)) & 1 == 1
135 } else {
136 in_ranges(ranges, field_num) != complement
137 }
138}
139
140#[inline(always)]
145unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
146 unsafe {
147 let len = buf.len();
148 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
149 buf.set_len(len + data.len());
150 }
151}
152
153#[inline(always)]
156unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
157 unsafe {
158 let len = buf.len();
159 *buf.as_mut_ptr().add(len) = b;
160 buf.set_len(len + 1);
161 }
162}
163
164#[inline]
168fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
169 if slices.is_empty() {
170 return Ok(());
171 }
172 for batch in slices.chunks(MAX_IOV) {
173 let total: usize = batch.iter().map(|s| s.len()).sum();
174 let written = out.write_vectored(batch)?;
175 if written >= total {
176 continue;
177 }
178 if written == 0 {
179 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
180 }
181 write_ioslices_slow(out, batch, written)?;
182 }
183 Ok(())
184}
185
186#[cold]
188#[inline(never)]
189fn write_ioslices_slow(
190 out: &mut impl Write,
191 slices: &[IoSlice],
192 mut skip: usize,
193) -> io::Result<()> {
194 for slice in slices {
195 let len = slice.len();
196 if skip >= len {
197 skip -= len;
198 continue;
199 }
200 out.write_all(&slice[skip..])?;
201 skip = 0;
202 }
203 Ok(())
204}
205
206fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
210 let num_threads = rayon::current_num_threads().max(1);
211 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
212 return vec![data];
213 }
214
215 let chunk_size = data.len() / num_threads;
216 let mut chunks = Vec::with_capacity(num_threads);
217 let mut pos = 0;
218
219 for _ in 0..num_threads - 1 {
220 let target = pos + chunk_size;
221 if target >= data.len() {
222 break;
223 }
224 let boundary = memchr::memchr(line_delim, &data[target..])
225 .map(|p| target + p + 1)
226 .unwrap_or(data.len());
227 if boundary > pos {
228 chunks.push(&data[pos..boundary]);
229 }
230 pos = boundary;
231 }
232
233 if pos < data.len() {
234 chunks.push(&data[pos..]);
235 }
236
237 chunks
238}
239
240fn process_fields_multi_select(
247 data: &[u8],
248 delim: u8,
249 line_delim: u8,
250 ranges: &[Range],
251 suppress: bool,
252 out: &mut impl Write,
253) -> io::Result<()> {
254 let max_field = ranges.last().map_or(0, |r| r.end);
255
256 if data.len() >= PARALLEL_THRESHOLD {
257 let chunks = split_into_chunks(data, line_delim);
258 let results: Vec<Vec<u8>> = chunks
259 .par_iter()
260 .map(|chunk| {
261 let mut buf = Vec::with_capacity(chunk.len() * 3 / 4);
263 multi_select_chunk(
264 chunk, delim, line_delim, ranges, max_field, suppress, &mut buf,
265 );
266 buf
267 })
268 .collect();
269 let slices: Vec<IoSlice> = results
270 .iter()
271 .filter(|r| !r.is_empty())
272 .map(|r| IoSlice::new(r))
273 .collect();
274 write_ioslices(out, &slices)?;
275 } else {
276 let mut buf = Vec::with_capacity(data.len() * 3 / 4);
277 multi_select_chunk(
278 data, delim, line_delim, ranges, max_field, suppress, &mut buf,
279 );
280 if !buf.is_empty() {
281 out.write_all(&buf)?;
282 }
283 }
284 Ok(())
285}
286
287fn multi_select_chunk(
293 data: &[u8],
294 delim: u8,
295 line_delim: u8,
296 ranges: &[Range],
297 max_field: usize,
298 suppress: bool,
299 buf: &mut Vec<u8>,
300) {
301 if delim == line_delim {
303 buf.reserve(data.len());
304 let base = data.as_ptr();
305 let mut start = 0;
306 for end_pos in memchr_iter(line_delim, data) {
307 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
308 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
309 start = end_pos + 1;
310 }
311 if start < data.len() {
312 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
313 multi_select_line(line, delim, line_delim, ranges, max_field, suppress, buf);
314 }
315 return;
316 }
317
318 buf.reserve(data.len());
319 let base = data.as_ptr();
320 let data_len = data.len();
321
322 let mut line_start: usize = 0;
324 let mut delim_pos = [0usize; 64];
325 let mut num_delims: usize = 0;
326 let max_delims = max_field.min(64);
327 let mut at_max = false;
328
329 for pos in memchr::memchr2_iter(delim, line_delim, data) {
331 let byte = unsafe { *base.add(pos) };
332
333 if byte == line_delim {
334 let line_len = pos - line_start;
336 if num_delims == 0 {
337 if !suppress {
339 unsafe {
340 buf_extend(
341 buf,
342 std::slice::from_raw_parts(base.add(line_start), line_len),
343 );
344 buf_push(buf, line_delim);
345 }
346 }
347 } else {
348 let total_fields = num_delims + 1;
350 let mut first_output = true;
351
352 for r in ranges {
353 let range_start = r.start;
354 let range_end = r.end.min(total_fields);
355 if range_start > total_fields {
356 break;
357 }
358 for field_num in range_start..=range_end {
359 if field_num > total_fields {
360 break;
361 }
362
363 let field_start = if field_num == 1 {
364 line_start
365 } else if field_num - 2 < num_delims {
366 delim_pos[field_num - 2] + 1
367 } else {
368 continue;
369 };
370 let field_end = if field_num <= num_delims {
371 delim_pos[field_num - 1]
372 } else {
373 pos
374 };
375
376 if !first_output {
377 unsafe { buf_push(buf, delim) };
378 }
379 unsafe {
380 buf_extend(
381 buf,
382 std::slice::from_raw_parts(
383 base.add(field_start),
384 field_end - field_start,
385 ),
386 );
387 }
388 first_output = false;
389 }
390 }
391
392 unsafe { buf_push(buf, line_delim) };
393 }
394
395 line_start = pos + 1;
397 num_delims = 0;
398 at_max = false;
399 } else {
400 if !at_max && num_delims < max_delims {
402 delim_pos[num_delims] = pos;
403 num_delims += 1;
404 if num_delims >= max_delims {
405 at_max = true;
406 }
407 }
408 }
409 }
410
411 if line_start < data_len {
413 if num_delims == 0 {
414 if !suppress {
415 unsafe {
416 buf_extend(
417 buf,
418 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
419 );
420 buf_push(buf, line_delim);
421 }
422 }
423 } else {
424 let total_fields = num_delims + 1;
425 let mut first_output = true;
426
427 for r in ranges {
428 let range_start = r.start;
429 let range_end = r.end.min(total_fields);
430 if range_start > total_fields {
431 break;
432 }
433 for field_num in range_start..=range_end {
434 if field_num > total_fields {
435 break;
436 }
437
438 let field_start = if field_num == 1 {
439 line_start
440 } else if field_num - 2 < num_delims {
441 delim_pos[field_num - 2] + 1
442 } else {
443 continue;
444 };
445 let field_end = if field_num <= num_delims {
446 delim_pos[field_num - 1]
447 } else {
448 data_len
449 };
450
451 if !first_output {
452 unsafe { buf_push(buf, delim) };
453 }
454 unsafe {
455 buf_extend(
456 buf,
457 std::slice::from_raw_parts(
458 base.add(field_start),
459 field_end - field_start,
460 ),
461 );
462 }
463 first_output = false;
464 }
465 }
466
467 unsafe { buf_push(buf, line_delim) };
468 }
469 }
470}
471
472#[inline(always)]
477fn multi_select_line(
478 line: &[u8],
479 delim: u8,
480 line_delim: u8,
481 ranges: &[Range],
482 max_field: usize,
483 suppress: bool,
484 buf: &mut Vec<u8>,
485) {
486 let len = line.len();
487 if len == 0 {
488 if !suppress {
489 unsafe { buf_push(buf, line_delim) };
490 }
491 return;
492 }
493
494 let base = line.as_ptr();
496
497 let mut delim_pos = [0usize; 64];
500 let mut num_delims: usize = 0;
501 let max_delims = max_field.min(64);
502
503 for pos in memchr_iter(delim, line) {
504 if num_delims < max_delims {
505 delim_pos[num_delims] = pos;
506 num_delims += 1;
507 if num_delims >= max_delims {
508 break;
509 }
510 }
511 }
512
513 if num_delims == 0 {
514 if !suppress {
515 unsafe {
516 buf_extend(buf, line);
517 buf_push(buf, line_delim);
518 }
519 }
520 return;
521 }
522
523 let total_fields = num_delims + 1;
527 let mut first_output = true;
528
529 for r in ranges {
530 let range_start = r.start;
531 let range_end = r.end.min(total_fields);
532 if range_start > total_fields {
533 break;
534 }
535 for field_num in range_start..=range_end {
536 if field_num > total_fields {
537 break;
538 }
539
540 let field_start = if field_num == 1 {
541 0
542 } else if field_num - 2 < num_delims {
543 delim_pos[field_num - 2] + 1
544 } else {
545 continue;
546 };
547 let field_end = if field_num <= num_delims {
548 delim_pos[field_num - 1]
549 } else {
550 len
551 };
552
553 if !first_output {
554 unsafe { buf_push(buf, delim) };
555 }
556 unsafe {
557 buf_extend(
558 buf,
559 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
560 );
561 }
562 first_output = false;
563 }
564 }
565
566 unsafe { buf_push(buf, line_delim) };
567}
568
569fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
573 let delim = cfg.delim;
574 let line_delim = cfg.line_delim;
575 let ranges = cfg.ranges;
576 let complement = cfg.complement;
577 let output_delim = cfg.output_delim;
578 let suppress = cfg.suppress_no_delim;
579
580 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
588 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
589 }
590
591 if complement
593 && ranges.len() == 1
594 && output_delim.len() == 1
595 && output_delim[0] == delim
596 && ranges[0].start == ranges[0].end
597 {
598 return process_complement_single_field(
599 data,
600 delim,
601 line_delim,
602 ranges[0].start,
603 suppress,
604 out,
605 );
606 }
607
608 if complement
611 && ranges.len() == 1
612 && ranges[0].start > 1
613 && ranges[0].end < usize::MAX
614 && output_delim.len() == 1
615 && output_delim[0] == delim
616 {
617 return process_complement_range(
618 data,
619 delim,
620 line_delim,
621 ranges[0].start,
622 ranges[0].end,
623 suppress,
624 out,
625 );
626 }
627
628 if !complement
630 && ranges.len() == 1
631 && ranges[0].start == 1
632 && output_delim.len() == 1
633 && output_delim[0] == delim
634 && ranges[0].end < usize::MAX
635 {
636 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
637 }
638
639 if !complement
641 && ranges.len() == 1
642 && ranges[0].end == usize::MAX
643 && ranges[0].start > 1
644 && output_delim.len() == 1
645 && output_delim[0] == delim
646 {
647 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
648 }
649
650 if !complement
652 && ranges.len() == 1
653 && ranges[0].start > 1
654 && ranges[0].end < usize::MAX
655 && output_delim.len() == 1
656 && output_delim[0] == delim
657 {
658 return process_fields_mid_range(
659 data,
660 delim,
661 line_delim,
662 ranges[0].start,
663 ranges[0].end,
664 suppress,
665 out,
666 );
667 }
668
669 if !complement
675 && ranges.len() > 1
676 && ranges.last().map_or(false, |r| r.end < usize::MAX)
677 && output_delim.len() == 1
678 && output_delim[0] == delim
679 && delim != line_delim
680 {
681 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
682 }
683
684 let max_field = if complement {
686 usize::MAX
687 } else {
688 ranges.last().map(|r| r.end).unwrap_or(0)
689 };
690 let field_mask = compute_field_mask(ranges, complement);
691
692 if data.len() >= PARALLEL_THRESHOLD {
693 let chunks = split_into_chunks(data, line_delim);
694 let results: Vec<Vec<u8>> = chunks
695 .par_iter()
696 .map(|chunk| {
697 let mut buf = Vec::with_capacity(chunk.len());
698 process_fields_chunk(
699 chunk,
700 delim,
701 ranges,
702 output_delim,
703 suppress,
704 max_field,
705 field_mask,
706 line_delim,
707 complement,
708 &mut buf,
709 );
710 buf
711 })
712 .collect();
713 let slices: Vec<IoSlice> = results
715 .iter()
716 .filter(|r| !r.is_empty())
717 .map(|r| IoSlice::new(r))
718 .collect();
719 write_ioslices(out, &slices)?;
720 } else {
721 let mut buf = Vec::with_capacity(data.len());
722 process_fields_chunk(
723 data,
724 delim,
725 ranges,
726 output_delim,
727 suppress,
728 max_field,
729 field_mask,
730 line_delim,
731 complement,
732 &mut buf,
733 );
734 if !buf.is_empty() {
735 out.write_all(&buf)?;
736 }
737 }
738 Ok(())
739}
740
741fn process_fields_chunk(
746 data: &[u8],
747 delim: u8,
748 ranges: &[Range],
749 output_delim: &[u8],
750 suppress: bool,
751 max_field: usize,
752 field_mask: u64,
753 line_delim: u8,
754 complement: bool,
755 buf: &mut Vec<u8>,
756) {
757 if delim != line_delim && max_field < usize::MAX && !complement {
764 buf.reserve(data.len());
765 let mut start = 0;
766 for end_pos in memchr_iter(line_delim, data) {
767 let line = &data[start..end_pos];
768 extract_fields_to_buf(
769 line,
770 delim,
771 ranges,
772 output_delim,
773 suppress,
774 max_field,
775 field_mask,
776 line_delim,
777 buf,
778 complement,
779 );
780 start = end_pos + 1;
781 }
782 if start < data.len() {
783 extract_fields_to_buf(
784 &data[start..],
785 delim,
786 ranges,
787 output_delim,
788 suppress,
789 max_field,
790 field_mask,
791 line_delim,
792 buf,
793 complement,
794 );
795 }
796 return;
797 }
798
799 if delim != line_delim {
803 buf.reserve(data.len());
804
805 let data_len = data.len();
806 let base = data.as_ptr();
807 let mut line_start: usize = 0;
808 let mut field_start: usize = 0;
809 let mut field_num: usize = 1;
810 let mut first_output = true;
811 let mut has_delim = false;
812
813 for pos in memchr::memchr2_iter(delim, line_delim, data) {
814 let byte = unsafe { *base.add(pos) };
815
816 if byte == line_delim {
817 if (field_num <= max_field || complement)
819 && has_delim
820 && is_selected(field_num, field_mask, ranges, complement)
821 {
822 if !first_output {
823 unsafe { buf_extend(buf, output_delim) };
824 }
825 unsafe {
826 buf_extend(
827 buf,
828 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
829 )
830 };
831 first_output = false;
832 }
833
834 if !first_output {
835 unsafe { buf_push(buf, line_delim) };
836 } else if !has_delim {
837 if !suppress {
838 unsafe {
839 buf_extend(
840 buf,
841 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
842 );
843 buf_push(buf, line_delim);
844 }
845 }
846 } else {
847 unsafe { buf_push(buf, line_delim) };
848 }
849
850 line_start = pos + 1;
852 field_start = pos + 1;
853 field_num = 1;
854 first_output = true;
855 has_delim = false;
856 } else {
857 has_delim = true;
859
860 if is_selected(field_num, field_mask, ranges, complement) {
861 if !first_output {
862 unsafe { buf_extend(buf, output_delim) };
863 }
864 unsafe {
865 buf_extend(
866 buf,
867 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
868 )
869 };
870 first_output = false;
871 }
872
873 field_num += 1;
874 field_start = pos + 1;
875 }
876 }
877
878 if line_start < data_len {
880 if line_start < data_len {
881 if (field_num <= max_field || complement)
882 && has_delim
883 && is_selected(field_num, field_mask, ranges, complement)
884 {
885 if !first_output {
886 unsafe { buf_extend(buf, output_delim) };
887 }
888 unsafe {
889 buf_extend(
890 buf,
891 std::slice::from_raw_parts(
892 base.add(field_start),
893 data_len - field_start,
894 ),
895 )
896 };
897 first_output = false;
898 }
899
900 if !first_output {
901 unsafe { buf_push(buf, line_delim) };
902 } else if !has_delim {
903 if !suppress {
904 unsafe {
905 buf_extend(
906 buf,
907 std::slice::from_raw_parts(
908 base.add(line_start),
909 data_len - line_start,
910 ),
911 );
912 buf_push(buf, line_delim);
913 }
914 }
915 } else {
916 unsafe { buf_push(buf, line_delim) };
917 }
918 }
919 }
920
921 return;
922 }
923
924 let mut start = 0;
926 for end_pos in memchr_iter(line_delim, data) {
927 let line = &data[start..end_pos];
928 extract_fields_to_buf(
929 line,
930 delim,
931 ranges,
932 output_delim,
933 suppress,
934 max_field,
935 field_mask,
936 line_delim,
937 buf,
938 complement,
939 );
940 start = end_pos + 1;
941 }
942 if start < data.len() {
943 extract_fields_to_buf(
944 &data[start..],
945 delim,
946 ranges,
947 output_delim,
948 suppress,
949 max_field,
950 field_mask,
951 line_delim,
952 buf,
953 complement,
954 );
955 }
956}
957
958fn process_single_field(
964 data: &[u8],
965 delim: u8,
966 line_delim: u8,
967 target: usize,
968 suppress: bool,
969 out: &mut impl Write,
970) -> io::Result<()> {
971 let target_idx = target - 1;
972
973 const FIELD_PARALLEL_MIN: usize = 2 * 1024 * 1024;
976
977 if delim != line_delim {
978 if target_idx == 0 && !suppress {
982 if data.len() >= FIELD_PARALLEL_MIN {
983 return single_field1_parallel(data, delim, line_delim, out);
984 }
985 let mut buf = Vec::with_capacity(data.len());
990 single_field1_to_buf(data, delim, line_delim, &mut buf);
991 if !buf.is_empty() {
992 out.write_all(&buf)?;
993 }
994 return Ok(());
995 }
996
997 if data.len() >= FIELD_PARALLEL_MIN {
1001 let chunks = split_into_chunks(data, line_delim);
1002 let results: Vec<Vec<u8>> = chunks
1003 .par_iter()
1004 .map(|chunk| {
1005 let mut buf = Vec::with_capacity(chunk.len() / 2);
1006 process_single_field_chunk(
1007 chunk, delim, target_idx, line_delim, suppress, &mut buf,
1008 );
1009 buf
1010 })
1011 .collect();
1012 let slices: Vec<IoSlice> = results
1013 .iter()
1014 .filter(|r| !r.is_empty())
1015 .map(|r| IoSlice::new(r))
1016 .collect();
1017 write_ioslices(out, &slices)?;
1018 } else {
1019 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1020 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1021 if !buf.is_empty() {
1022 out.write_all(&buf)?;
1023 }
1024 }
1025 return Ok(());
1026 }
1027
1028 if data.len() >= FIELD_PARALLEL_MIN {
1030 let chunks = split_into_chunks(data, line_delim);
1031 let results: Vec<Vec<u8>> = chunks
1032 .par_iter()
1033 .map(|chunk| {
1034 let mut buf = Vec::with_capacity(chunk.len() / 4);
1035 process_single_field_chunk(
1036 chunk, delim, target_idx, line_delim, suppress, &mut buf,
1037 );
1038 buf
1039 })
1040 .collect();
1041 let slices: Vec<IoSlice> = results
1042 .iter()
1043 .filter(|r| !r.is_empty())
1044 .map(|r| IoSlice::new(r))
1045 .collect();
1046 write_ioslices(out, &slices)?;
1047 } else {
1048 let mut buf = Vec::with_capacity(data.len() / 4);
1049 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1050 if !buf.is_empty() {
1051 out.write_all(&buf)?;
1052 }
1053 }
1054 Ok(())
1055}
1056
1057fn process_complement_range(
1060 data: &[u8],
1061 delim: u8,
1062 line_delim: u8,
1063 skip_start: usize,
1064 skip_end: usize,
1065 suppress: bool,
1066 out: &mut impl Write,
1067) -> io::Result<()> {
1068 if data.len() >= PARALLEL_THRESHOLD {
1069 let chunks = split_into_chunks(data, line_delim);
1070 let results: Vec<Vec<u8>> = chunks
1071 .par_iter()
1072 .map(|chunk| {
1073 let mut buf = Vec::with_capacity(chunk.len());
1074 complement_range_chunk(
1075 chunk, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1076 );
1077 buf
1078 })
1079 .collect();
1080 let slices: Vec<IoSlice> = results
1081 .iter()
1082 .filter(|r| !r.is_empty())
1083 .map(|r| IoSlice::new(r))
1084 .collect();
1085 write_ioslices(out, &slices)?;
1086 } else {
1087 let mut buf = Vec::with_capacity(data.len());
1088 complement_range_chunk(
1089 data, delim, skip_start, skip_end, line_delim, suppress, &mut buf,
1090 );
1091 if !buf.is_empty() {
1092 out.write_all(&buf)?;
1093 }
1094 }
1095 Ok(())
1096}
1097
1098fn complement_range_chunk(
1100 data: &[u8],
1101 delim: u8,
1102 skip_start: usize,
1103 skip_end: usize,
1104 line_delim: u8,
1105 suppress: bool,
1106 buf: &mut Vec<u8>,
1107) {
1108 buf.reserve(data.len());
1110 let mut start = 0;
1111 for end_pos in memchr_iter(line_delim, data) {
1112 let line = &data[start..end_pos];
1113 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1114 start = end_pos + 1;
1115 }
1116 if start < data.len() {
1117 complement_range_line(
1118 &data[start..],
1119 delim,
1120 skip_start,
1121 skip_end,
1122 line_delim,
1123 suppress,
1124 buf,
1125 );
1126 }
1127}
1128
1129#[inline(always)]
1136fn complement_range_line(
1137 line: &[u8],
1138 delim: u8,
1139 skip_start: usize,
1140 skip_end: usize,
1141 line_delim: u8,
1142 suppress: bool,
1143 buf: &mut Vec<u8>,
1144) {
1145 let len = line.len();
1146 if len == 0 {
1147 if !suppress {
1148 unsafe { buf_push(buf, line_delim) };
1149 }
1150 return;
1151 }
1152
1153 let base = line.as_ptr();
1155
1156 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1166
1167 let mut delim_count: usize = 0;
1169 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1173 delim_count += 1;
1174 if delim_count == need_prefix_delims {
1175 prefix_end_pos = pos;
1176 }
1177 if delim_count == total_need {
1178 suffix_start_pos = pos + 1;
1179 break;
1180 }
1181 }
1182
1183 if delim_count == 0 {
1184 if !suppress {
1186 unsafe {
1187 buf_extend(buf, line);
1188 buf_push(buf, line_delim);
1189 }
1190 }
1191 return;
1192 }
1193
1194 if delim_count < need_prefix_delims {
1200 unsafe {
1202 buf_extend(buf, line);
1203 buf_push(buf, line_delim);
1204 }
1205 return;
1206 }
1207
1208 let has_prefix = need_prefix_delims > 0;
1209 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1210
1211 if has_prefix && has_suffix {
1212 unsafe {
1214 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1215 buf_push(buf, delim);
1216 buf_extend(
1217 buf,
1218 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1219 );
1220 buf_push(buf, line_delim);
1221 }
1222 } else if has_prefix {
1223 unsafe {
1225 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1226 buf_push(buf, line_delim);
1227 }
1228 } else if has_suffix {
1229 unsafe {
1231 buf_extend(
1232 buf,
1233 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1234 );
1235 buf_push(buf, line_delim);
1236 }
1237 } else {
1238 unsafe { buf_push(buf, line_delim) };
1240 }
1241}
1242
1243fn process_complement_single_field(
1245 data: &[u8],
1246 delim: u8,
1247 line_delim: u8,
1248 skip_field: usize,
1249 suppress: bool,
1250 out: &mut impl Write,
1251) -> io::Result<()> {
1252 let skip_idx = skip_field - 1;
1253
1254 if data.len() >= PARALLEL_THRESHOLD {
1255 let chunks = split_into_chunks(data, line_delim);
1256 let results: Vec<Vec<u8>> = chunks
1257 .par_iter()
1258 .map(|chunk| {
1259 let mut buf = Vec::with_capacity(chunk.len());
1260 complement_single_field_chunk(
1261 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
1262 );
1263 buf
1264 })
1265 .collect();
1266 let slices: Vec<IoSlice> = results
1268 .iter()
1269 .filter(|r| !r.is_empty())
1270 .map(|r| IoSlice::new(r))
1271 .collect();
1272 write_ioslices(out, &slices)?;
1273 } else {
1274 let mut buf = Vec::with_capacity(data.len());
1275 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
1276 if !buf.is_empty() {
1277 out.write_all(&buf)?;
1278 }
1279 }
1280 Ok(())
1281}
1282
1283fn complement_single_field_chunk(
1289 data: &[u8],
1290 delim: u8,
1291 skip_idx: usize,
1292 line_delim: u8,
1293 suppress: bool,
1294 buf: &mut Vec<u8>,
1295) {
1296 if delim == line_delim {
1298 buf.reserve(data.len());
1299 let mut start = 0;
1300 for end_pos in memchr_iter(line_delim, data) {
1301 let line = &data[start..end_pos];
1302 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1303 start = end_pos + 1;
1304 }
1305 if start < data.len() {
1306 complement_single_field_line(
1307 &data[start..],
1308 delim,
1309 skip_idx,
1310 line_delim,
1311 suppress,
1312 buf,
1313 );
1314 }
1315 return;
1316 }
1317
1318 buf.reserve(data.len());
1319 let base = data.as_ptr();
1320 let data_len = data.len();
1321 let need_before = skip_idx; let need_total = skip_idx + 1; let mut line_start: usize = 0;
1326 let mut delim_count: usize = 0;
1327 let mut skip_start_pos: usize = 0;
1328 let mut skip_end_pos: usize = 0;
1329 let mut found_start = need_before == 0; let mut found_end = false;
1331
1332 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1333 let byte = unsafe { *base.add(pos) };
1334
1335 if byte == line_delim {
1336 if delim_count == 0 {
1338 if !suppress {
1340 unsafe {
1341 buf_extend(
1342 buf,
1343 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1344 );
1345 buf_push(buf, line_delim);
1346 }
1347 }
1348 } else if !found_start || delim_count < need_before {
1349 unsafe {
1351 buf_extend(
1352 buf,
1353 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1354 );
1355 buf_push(buf, line_delim);
1356 }
1357 } else {
1358 let has_prefix = skip_idx > 0;
1359 let has_suffix = found_end && skip_end_pos < pos;
1360
1361 if has_prefix && has_suffix {
1362 unsafe {
1363 buf_extend(
1364 buf,
1365 std::slice::from_raw_parts(
1366 base.add(line_start),
1367 skip_start_pos - 1 - line_start,
1368 ),
1369 );
1370 buf_push(buf, delim);
1371 buf_extend(
1372 buf,
1373 std::slice::from_raw_parts(
1374 base.add(skip_end_pos + 1),
1375 pos - skip_end_pos - 1,
1376 ),
1377 );
1378 buf_push(buf, line_delim);
1379 }
1380 } else if has_prefix {
1381 unsafe {
1382 buf_extend(
1383 buf,
1384 std::slice::from_raw_parts(
1385 base.add(line_start),
1386 skip_start_pos - 1 - line_start,
1387 ),
1388 );
1389 buf_push(buf, line_delim);
1390 }
1391 } else if has_suffix {
1392 unsafe {
1393 buf_extend(
1394 buf,
1395 std::slice::from_raw_parts(
1396 base.add(skip_end_pos + 1),
1397 pos - skip_end_pos - 1,
1398 ),
1399 );
1400 buf_push(buf, line_delim);
1401 }
1402 } else {
1403 unsafe { buf_push(buf, line_delim) };
1404 }
1405 }
1406
1407 line_start = pos + 1;
1409 delim_count = 0;
1410 skip_start_pos = 0;
1411 skip_end_pos = 0;
1412 found_start = need_before == 0;
1413 found_end = false;
1414 } else {
1415 delim_count += 1;
1417 if delim_count == need_before {
1418 skip_start_pos = pos + 1;
1419 found_start = true;
1420 }
1421 if delim_count == need_total {
1422 skip_end_pos = pos;
1423 found_end = true;
1424 }
1425 }
1426 }
1427
1428 if line_start < data_len {
1430 let pos = data_len;
1431 if delim_count == 0 {
1432 if !suppress {
1433 unsafe {
1434 buf_extend(
1435 buf,
1436 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1437 );
1438 buf_push(buf, line_delim);
1439 }
1440 }
1441 } else if !found_start || delim_count < need_before {
1442 unsafe {
1443 buf_extend(
1444 buf,
1445 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1446 );
1447 buf_push(buf, line_delim);
1448 }
1449 } else {
1450 let has_prefix = skip_idx > 0;
1451 let has_suffix = found_end && skip_end_pos < pos;
1452
1453 if has_prefix && has_suffix {
1454 unsafe {
1455 buf_extend(
1456 buf,
1457 std::slice::from_raw_parts(
1458 base.add(line_start),
1459 skip_start_pos - 1 - line_start,
1460 ),
1461 );
1462 buf_push(buf, delim);
1463 buf_extend(
1464 buf,
1465 std::slice::from_raw_parts(
1466 base.add(skip_end_pos + 1),
1467 pos - skip_end_pos - 1,
1468 ),
1469 );
1470 buf_push(buf, line_delim);
1471 }
1472 } else if has_prefix {
1473 unsafe {
1474 buf_extend(
1475 buf,
1476 std::slice::from_raw_parts(
1477 base.add(line_start),
1478 skip_start_pos - 1 - line_start,
1479 ),
1480 );
1481 buf_push(buf, line_delim);
1482 }
1483 } else if has_suffix {
1484 unsafe {
1485 buf_extend(
1486 buf,
1487 std::slice::from_raw_parts(
1488 base.add(skip_end_pos + 1),
1489 pos - skip_end_pos - 1,
1490 ),
1491 );
1492 buf_push(buf, line_delim);
1493 }
1494 } else {
1495 unsafe { buf_push(buf, line_delim) };
1496 }
1497 }
1498 }
1499}
1500
1501#[inline(always)]
1503fn complement_single_field_line(
1504 line: &[u8],
1505 delim: u8,
1506 skip_idx: usize,
1507 line_delim: u8,
1508 suppress: bool,
1509 buf: &mut Vec<u8>,
1510) {
1511 let len = line.len();
1512 if len == 0 {
1513 if !suppress {
1514 unsafe { buf_push(buf, line_delim) };
1515 }
1516 return;
1517 }
1518
1519 let base = line.as_ptr();
1520 let need_before = skip_idx;
1521 let need_total = skip_idx + 1;
1522
1523 let mut delim_count: usize = 0;
1524 let mut skip_start_pos: usize = 0;
1525 let mut skip_end_pos: usize = len;
1526 let mut found_end = false;
1527
1528 for pos in memchr_iter(delim, line) {
1529 delim_count += 1;
1530 if delim_count == need_before {
1531 skip_start_pos = pos + 1;
1532 }
1533 if delim_count == need_total {
1534 skip_end_pos = pos;
1535 found_end = true;
1536 break;
1537 }
1538 }
1539
1540 if delim_count == 0 {
1541 if !suppress {
1542 unsafe {
1543 buf_extend(buf, line);
1544 buf_push(buf, line_delim);
1545 }
1546 }
1547 return;
1548 }
1549
1550 if delim_count < need_before {
1551 unsafe {
1552 buf_extend(buf, line);
1553 buf_push(buf, line_delim);
1554 }
1555 return;
1556 }
1557
1558 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1559 let has_suffix = found_end && skip_end_pos < len;
1560
1561 if has_prefix && has_suffix {
1562 unsafe {
1563 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1564 buf_push(buf, delim);
1565 buf_extend(
1566 buf,
1567 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1568 );
1569 buf_push(buf, line_delim);
1570 }
1571 } else if has_prefix {
1572 unsafe {
1573 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1574 buf_push(buf, line_delim);
1575 }
1576 } else if has_suffix {
1577 unsafe {
1578 buf_extend(
1579 buf,
1580 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1581 );
1582 buf_push(buf, line_delim);
1583 }
1584 } else {
1585 unsafe { buf_push(buf, line_delim) };
1586 }
1587}
1588
1589fn process_fields_prefix(
1593 data: &[u8],
1594 delim: u8,
1595 line_delim: u8,
1596 last_field: usize,
1597 suppress: bool,
1598 out: &mut impl Write,
1599) -> io::Result<()> {
1600 if data.len() >= PARALLEL_THRESHOLD {
1601 let chunks = split_into_chunks(data, line_delim);
1602 let results: Vec<Vec<u8>> = chunks
1603 .par_iter()
1604 .map(|chunk| {
1605 let mut buf = Vec::with_capacity(chunk.len());
1606 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
1607 buf
1608 })
1609 .collect();
1610 let slices: Vec<IoSlice> = results
1612 .iter()
1613 .filter(|r| !r.is_empty())
1614 .map(|r| IoSlice::new(r))
1615 .collect();
1616 write_ioslices(out, &slices)?;
1617 } else if !suppress {
1618 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1622 } else {
1623 let mut buf = Vec::with_capacity(data.len());
1624 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
1625 if !buf.is_empty() {
1626 out.write_all(&buf)?;
1627 }
1628 }
1629 Ok(())
1630}
1631
1632#[inline]
1638fn fields_prefix_zerocopy(
1639 data: &[u8],
1640 delim: u8,
1641 line_delim: u8,
1642 last_field: usize,
1643 out: &mut impl Write,
1644) -> io::Result<()> {
1645 let newline_buf: [u8; 1] = [line_delim];
1646 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1647 let mut start = 0;
1648 let mut run_start: usize = 0;
1649
1650 for end_pos in memchr_iter(line_delim, data) {
1651 let line = &data[start..end_pos];
1652 let mut field_count = 1;
1653 let mut truncate_at: Option<usize> = None;
1654 for dpos in memchr_iter(delim, line) {
1655 if field_count >= last_field {
1656 truncate_at = Some(start + dpos);
1657 break;
1658 }
1659 field_count += 1;
1660 }
1661
1662 if let Some(trunc_pos) = truncate_at {
1663 if run_start < start {
1664 iov.push(IoSlice::new(&data[run_start..start]));
1665 }
1666 iov.push(IoSlice::new(&data[start..trunc_pos]));
1667 iov.push(IoSlice::new(&newline_buf));
1668 run_start = end_pos + 1;
1669
1670 if iov.len() >= MAX_IOV - 2 {
1671 write_ioslices(out, &iov)?;
1672 iov.clear();
1673 }
1674 }
1675 start = end_pos + 1;
1676 }
1677 if start < data.len() {
1679 let line = &data[start..];
1680 let mut field_count = 1;
1681 let mut truncate_at: Option<usize> = None;
1682 for dpos in memchr_iter(delim, line) {
1683 if field_count >= last_field {
1684 truncate_at = Some(start + dpos);
1685 break;
1686 }
1687 field_count += 1;
1688 }
1689 if let Some(trunc_pos) = truncate_at {
1690 if run_start < start {
1691 iov.push(IoSlice::new(&data[run_start..start]));
1692 }
1693 iov.push(IoSlice::new(&data[start..trunc_pos]));
1694 iov.push(IoSlice::new(&newline_buf));
1695 if !iov.is_empty() {
1696 write_ioslices(out, &iov)?;
1697 }
1698 return Ok(());
1699 }
1700 }
1701 if run_start < data.len() {
1703 iov.push(IoSlice::new(&data[run_start..]));
1704 if !data.is_empty() && *data.last().unwrap() != line_delim {
1705 iov.push(IoSlice::new(&newline_buf));
1706 }
1707 }
1708 if !iov.is_empty() {
1709 write_ioslices(out, &iov)?;
1710 }
1711 Ok(())
1712}
1713
1714fn fields_prefix_chunk(
1716 data: &[u8],
1717 delim: u8,
1718 line_delim: u8,
1719 last_field: usize,
1720 suppress: bool,
1721 buf: &mut Vec<u8>,
1722) {
1723 buf.reserve(data.len());
1724 let mut start = 0;
1725 for end_pos in memchr_iter(line_delim, data) {
1726 let line = &data[start..end_pos];
1727 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1728 start = end_pos + 1;
1729 }
1730 if start < data.len() {
1731 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1732 }
1733}
1734
1735#[inline(always)]
1738fn fields_prefix_line(
1739 line: &[u8],
1740 delim: u8,
1741 line_delim: u8,
1742 last_field: usize,
1743 suppress: bool,
1744 buf: &mut Vec<u8>,
1745) {
1746 let len = line.len();
1747 if len == 0 {
1748 if !suppress {
1749 unsafe { buf_push(buf, line_delim) };
1750 }
1751 return;
1752 }
1753
1754 let base = line.as_ptr();
1756
1757 let mut field_count = 1usize;
1758 let mut has_delim = false;
1759
1760 for pos in memchr_iter(delim, line) {
1761 has_delim = true;
1762 if field_count >= last_field {
1763 unsafe {
1764 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1765 buf_push(buf, line_delim);
1766 }
1767 return;
1768 }
1769 field_count += 1;
1770 }
1771
1772 if !has_delim {
1773 if !suppress {
1774 unsafe {
1775 buf_extend(buf, line);
1776 buf_push(buf, line_delim);
1777 }
1778 }
1779 return;
1780 }
1781
1782 unsafe {
1783 buf_extend(buf, line);
1784 buf_push(buf, line_delim);
1785 }
1786}
1787
1788fn process_fields_suffix(
1790 data: &[u8],
1791 delim: u8,
1792 line_delim: u8,
1793 start_field: usize,
1794 suppress: bool,
1795 out: &mut impl Write,
1796) -> io::Result<()> {
1797 if data.len() >= PARALLEL_THRESHOLD {
1798 let chunks = split_into_chunks(data, line_delim);
1799 let results: Vec<Vec<u8>> = chunks
1800 .par_iter()
1801 .map(|chunk| {
1802 let mut buf = Vec::with_capacity(chunk.len());
1803 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
1804 buf
1805 })
1806 .collect();
1807 let slices: Vec<IoSlice> = results
1809 .iter()
1810 .filter(|r| !r.is_empty())
1811 .map(|r| IoSlice::new(r))
1812 .collect();
1813 write_ioslices(out, &slices)?;
1814 } else {
1815 let mut buf = Vec::with_capacity(data.len());
1816 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1817 if !buf.is_empty() {
1818 out.write_all(&buf)?;
1819 }
1820 }
1821 Ok(())
1822}
1823
1824fn fields_suffix_chunk(
1826 data: &[u8],
1827 delim: u8,
1828 line_delim: u8,
1829 start_field: usize,
1830 suppress: bool,
1831 buf: &mut Vec<u8>,
1832) {
1833 buf.reserve(data.len());
1834 let mut start = 0;
1835 for end_pos in memchr_iter(line_delim, data) {
1836 let line = &data[start..end_pos];
1837 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1838 start = end_pos + 1;
1839 }
1840 if start < data.len() {
1841 fields_suffix_line(
1842 &data[start..],
1843 delim,
1844 line_delim,
1845 start_field,
1846 suppress,
1847 buf,
1848 );
1849 }
1850}
1851
1852#[inline(always)]
1855fn fields_suffix_line(
1856 line: &[u8],
1857 delim: u8,
1858 line_delim: u8,
1859 start_field: usize,
1860 suppress: bool,
1861 buf: &mut Vec<u8>,
1862) {
1863 let len = line.len();
1864 if len == 0 {
1865 if !suppress {
1866 unsafe { buf_push(buf, line_delim) };
1867 }
1868 return;
1869 }
1870
1871 let base = line.as_ptr();
1873
1874 let skip_delims = start_field - 1;
1875 let mut delim_count = 0usize;
1876 let mut has_delim = false;
1877
1878 for pos in memchr_iter(delim, line) {
1879 has_delim = true;
1880 delim_count += 1;
1881 if delim_count >= skip_delims {
1882 unsafe {
1883 buf_extend(
1884 buf,
1885 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1886 );
1887 buf_push(buf, line_delim);
1888 }
1889 return;
1890 }
1891 }
1892
1893 if !has_delim {
1894 if !suppress {
1895 unsafe {
1896 buf_extend(buf, line);
1897 buf_push(buf, line_delim);
1898 }
1899 }
1900 return;
1901 }
1902
1903 unsafe { buf_push(buf, line_delim) };
1905}
1906
1907fn process_fields_mid_range(
1910 data: &[u8],
1911 delim: u8,
1912 line_delim: u8,
1913 start_field: usize,
1914 end_field: usize,
1915 suppress: bool,
1916 out: &mut impl Write,
1917) -> io::Result<()> {
1918 if data.len() >= PARALLEL_THRESHOLD {
1919 let chunks = split_into_chunks(data, line_delim);
1920 let results: Vec<Vec<u8>> = chunks
1921 .par_iter()
1922 .map(|chunk| {
1923 let mut buf = Vec::with_capacity(chunk.len());
1924 fields_mid_range_chunk(
1925 chunk,
1926 delim,
1927 line_delim,
1928 start_field,
1929 end_field,
1930 suppress,
1931 &mut buf,
1932 );
1933 buf
1934 })
1935 .collect();
1936 let slices: Vec<IoSlice> = results
1937 .iter()
1938 .filter(|r| !r.is_empty())
1939 .map(|r| IoSlice::new(r))
1940 .collect();
1941 write_ioslices(out, &slices)?;
1942 } else {
1943 let mut buf = Vec::with_capacity(data.len());
1944 fields_mid_range_chunk(
1945 data,
1946 delim,
1947 line_delim,
1948 start_field,
1949 end_field,
1950 suppress,
1951 &mut buf,
1952 );
1953 if !buf.is_empty() {
1954 out.write_all(&buf)?;
1955 }
1956 }
1957 Ok(())
1958}
1959
1960fn fields_mid_range_chunk(
1962 data: &[u8],
1963 delim: u8,
1964 line_delim: u8,
1965 start_field: usize,
1966 end_field: usize,
1967 suppress: bool,
1968 buf: &mut Vec<u8>,
1969) {
1970 buf.reserve(data.len());
1971 let mut start = 0;
1972 for end_pos in memchr_iter(line_delim, data) {
1973 let line = &data[start..end_pos];
1974 fields_mid_range_line(
1975 line,
1976 delim,
1977 line_delim,
1978 start_field,
1979 end_field,
1980 suppress,
1981 buf,
1982 );
1983 start = end_pos + 1;
1984 }
1985 if start < data.len() {
1986 fields_mid_range_line(
1987 &data[start..],
1988 delim,
1989 line_delim,
1990 start_field,
1991 end_field,
1992 suppress,
1993 buf,
1994 );
1995 }
1996}
1997
1998#[inline(always)]
2002fn fields_mid_range_line(
2003 line: &[u8],
2004 delim: u8,
2005 line_delim: u8,
2006 start_field: usize,
2007 end_field: usize,
2008 suppress: bool,
2009 buf: &mut Vec<u8>,
2010) {
2011 let len = line.len();
2012 if len == 0 {
2013 if !suppress {
2014 unsafe { buf_push(buf, line_delim) };
2015 }
2016 return;
2017 }
2018
2019 let base = line.as_ptr();
2021
2022 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
2026 let mut delim_count = 0;
2027 let mut range_start = 0;
2028 let mut has_delim = false;
2029
2030 for pos in memchr_iter(delim, line) {
2031 has_delim = true;
2032 delim_count += 1;
2033 if delim_count == skip_before {
2034 range_start = pos + 1;
2035 }
2036 if delim_count == target_end_delim {
2037 if skip_before == 0 {
2038 range_start = 0;
2039 }
2040 unsafe {
2041 buf_extend(
2042 buf,
2043 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
2044 );
2045 buf_push(buf, line_delim);
2046 }
2047 return;
2048 }
2049 }
2050
2051 if !has_delim {
2052 if !suppress {
2053 unsafe {
2054 buf_extend(buf, line);
2055 buf_push(buf, line_delim);
2056 }
2057 }
2058 return;
2059 }
2060
2061 if delim_count >= skip_before {
2063 if skip_before == 0 {
2065 range_start = 0;
2066 }
2067 unsafe {
2068 buf_extend(
2069 buf,
2070 std::slice::from_raw_parts(base.add(range_start), len - range_start),
2071 );
2072 buf_push(buf, line_delim);
2073 }
2074 } else {
2075 unsafe { buf_push(buf, line_delim) };
2077 }
2078}
2079
2080fn single_field1_parallel(
2091 data: &[u8],
2092 delim: u8,
2093 line_delim: u8,
2094 out: &mut impl Write,
2095) -> io::Result<()> {
2096 let chunks = split_into_chunks(data, line_delim);
2097 let results: Vec<Vec<u8>> = chunks
2098 .par_iter()
2099 .map(|chunk| {
2100 let mut buf = Vec::with_capacity(chunk.len());
2101 single_field1_to_buf(chunk, delim, line_delim, &mut buf);
2102 buf
2103 })
2104 .collect();
2105 let slices: Vec<IoSlice> = results
2106 .iter()
2107 .filter(|r| !r.is_empty())
2108 .map(|r| IoSlice::new(r))
2109 .collect();
2110 write_ioslices(out, &slices)
2111}
2112
2113#[inline]
2119fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
2120 use memchr::memchr2;
2121 buf.reserve(data.len());
2122 let mut pos = 0;
2123 while pos < data.len() {
2124 match memchr2(delim, line_delim, &data[pos..]) {
2125 None => {
2126 unsafe {
2128 buf_extend(buf, &data[pos..]);
2129 }
2130 break;
2131 }
2132 Some(offset) => {
2133 let actual = pos + offset;
2134 if data[actual] == line_delim {
2135 unsafe {
2137 buf_extend(buf, &data[pos..actual + 1]);
2138 }
2139 pos = actual + 1;
2140 } else {
2141 unsafe {
2143 buf_extend(buf, &data[pos..actual]);
2144 buf_push(buf, line_delim);
2145 }
2146 match memchr::memchr(line_delim, &data[actual + 1..]) {
2148 None => {
2149 pos = data.len();
2150 }
2151 Some(nl_off) => {
2152 pos = actual + 1 + nl_off + 1;
2153 }
2154 }
2155 }
2156 }
2157 }
2158 }
2159}
2160
2161#[inline]
2170#[allow(dead_code)]
2171fn single_field1_zerocopy(
2172 data: &[u8],
2173 delim: u8,
2174 line_delim: u8,
2175 out: &mut impl Write,
2176) -> io::Result<()> {
2177 let newline_buf: [u8; 1] = [line_delim];
2178
2179 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2180 let mut run_start: usize = 0;
2181 let mut start = 0;
2182
2183 for end_pos in memchr_iter(line_delim, data) {
2184 let line = &data[start..end_pos];
2185 if let Some(dp) = memchr::memchr(delim, line) {
2186 if run_start < start {
2189 iov.push(IoSlice::new(&data[run_start..start]));
2190 }
2191 iov.push(IoSlice::new(&data[start..start + dp]));
2192 iov.push(IoSlice::new(&newline_buf));
2193 run_start = end_pos + 1;
2194
2195 if iov.len() >= MAX_IOV - 2 {
2196 write_ioslices(out, &iov)?;
2197 iov.clear();
2198 }
2199 }
2200 start = end_pos + 1;
2202 }
2203
2204 if start < data.len() {
2206 let line = &data[start..];
2207 if let Some(dp) = memchr::memchr(delim, line) {
2208 if run_start < start {
2209 iov.push(IoSlice::new(&data[run_start..start]));
2210 }
2211 iov.push(IoSlice::new(&data[start..start + dp]));
2212 iov.push(IoSlice::new(&newline_buf));
2213 if !iov.is_empty() {
2214 write_ioslices(out, &iov)?;
2215 }
2216 return Ok(());
2217 }
2218 }
2219
2220 if run_start < data.len() {
2222 iov.push(IoSlice::new(&data[run_start..]));
2223 if !data.is_empty() && *data.last().unwrap() != line_delim {
2224 iov.push(IoSlice::new(&newline_buf));
2225 }
2226 }
2227 if !iov.is_empty() {
2228 write_ioslices(out, &iov)?;
2229 }
2230 Ok(())
2231}
2232
2233fn process_single_field_chunk(
2235 data: &[u8],
2236 delim: u8,
2237 target_idx: usize,
2238 line_delim: u8,
2239 suppress: bool,
2240 buf: &mut Vec<u8>,
2241) {
2242 buf.reserve(data.len());
2244 let mut start = 0;
2245 for end_pos in memchr_iter(line_delim, data) {
2246 let line = &data[start..end_pos];
2247 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
2248 start = end_pos + 1;
2249 }
2250 if start < data.len() {
2251 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
2252 }
2253}
2254
2255#[inline(always)]
2260fn extract_single_field_line(
2261 line: &[u8],
2262 delim: u8,
2263 target_idx: usize,
2264 line_delim: u8,
2265 suppress: bool,
2266 buf: &mut Vec<u8>,
2267) {
2268 let len = line.len();
2269 if len == 0 {
2270 if !suppress {
2271 unsafe { buf_push(buf, line_delim) };
2272 }
2273 return;
2274 }
2275
2276 let base = line.as_ptr();
2278
2279 if target_idx == 0 {
2281 match memchr::memchr(delim, line) {
2282 Some(pos) => unsafe {
2283 buf_extend(buf, std::slice::from_raw_parts(base, pos));
2284 buf_push(buf, line_delim);
2285 },
2286 None => {
2287 if !suppress {
2288 unsafe {
2289 buf_extend(buf, line);
2290 buf_push(buf, line_delim);
2291 }
2292 }
2293 }
2294 }
2295 return;
2296 }
2297
2298 let mut field_start = 0;
2300 let mut field_idx = 0;
2301 let mut has_delim = false;
2302
2303 for pos in memchr_iter(delim, line) {
2304 has_delim = true;
2305 if field_idx == target_idx {
2306 unsafe {
2307 buf_extend(
2308 buf,
2309 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
2310 );
2311 buf_push(buf, line_delim);
2312 }
2313 return;
2314 }
2315 field_idx += 1;
2316 field_start = pos + 1;
2317 }
2318
2319 if !has_delim {
2320 if !suppress {
2321 unsafe {
2322 buf_extend(buf, line);
2323 buf_push(buf, line_delim);
2324 }
2325 }
2326 return;
2327 }
2328
2329 if field_idx == target_idx {
2330 unsafe {
2331 buf_extend(
2332 buf,
2333 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2334 );
2335 buf_push(buf, line_delim);
2336 }
2337 } else {
2338 unsafe { buf_push(buf, line_delim) };
2339 }
2340}
2341
2342#[inline(always)]
2346fn extract_fields_to_buf(
2347 line: &[u8],
2348 delim: u8,
2349 ranges: &[Range],
2350 output_delim: &[u8],
2351 suppress: bool,
2352 max_field: usize,
2353 field_mask: u64,
2354 line_delim: u8,
2355 buf: &mut Vec<u8>,
2356 complement: bool,
2357) {
2358 let len = line.len();
2359
2360 if len == 0 {
2361 if !suppress {
2362 buf.push(line_delim);
2363 }
2364 return;
2365 }
2366
2367 let needed = len + output_delim.len() * 16 + 1;
2370 if buf.capacity() - buf.len() < needed {
2371 buf.reserve(needed);
2372 }
2373
2374 let base = line.as_ptr();
2375 let mut field_num: usize = 1;
2376 let mut field_start: usize = 0;
2377 let mut first_output = true;
2378 let mut has_delim = false;
2379
2380 for delim_pos in memchr_iter(delim, line) {
2382 has_delim = true;
2383
2384 if is_selected(field_num, field_mask, ranges, complement) {
2385 if !first_output {
2386 unsafe { buf_extend(buf, output_delim) };
2387 }
2388 unsafe {
2389 buf_extend(
2390 buf,
2391 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2392 )
2393 };
2394 first_output = false;
2395 }
2396
2397 field_num += 1;
2398 field_start = delim_pos + 1;
2399
2400 if field_num > max_field {
2401 break;
2402 }
2403 }
2404
2405 if (field_num <= max_field || complement)
2407 && has_delim
2408 && is_selected(field_num, field_mask, ranges, complement)
2409 {
2410 if !first_output {
2411 unsafe { buf_extend(buf, output_delim) };
2412 }
2413 unsafe {
2414 buf_extend(
2415 buf,
2416 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2417 )
2418 };
2419 first_output = false;
2420 }
2421
2422 if !first_output {
2423 unsafe { buf_push(buf, line_delim) };
2424 } else if !has_delim {
2425 if !suppress {
2426 unsafe {
2427 buf_extend(buf, line);
2428 buf_push(buf, line_delim);
2429 }
2430 }
2431 } else {
2432 unsafe { buf_push(buf, line_delim) };
2433 }
2434}
2435
2436fn process_bytes_from_start(
2443 data: &[u8],
2444 max_bytes: usize,
2445 line_delim: u8,
2446 out: &mut impl Write,
2447) -> io::Result<()> {
2448 if max_bytes > 0 && max_bytes < usize::MAX {
2453 let mut start = 0;
2454 let mut all_fit = true;
2455 for pos in memchr_iter(line_delim, data) {
2456 if pos - start > max_bytes {
2457 all_fit = false;
2458 break;
2459 }
2460 start = pos + 1;
2461 }
2462 if all_fit && start < data.len() && data.len() - start > max_bytes {
2464 all_fit = false;
2465 }
2466 if all_fit {
2467 if !data.is_empty() && data[data.len() - 1] == line_delim {
2469 return out.write_all(data);
2470 } else if !data.is_empty() {
2471 out.write_all(data)?;
2472 return out.write_all(&[line_delim]);
2473 }
2474 return Ok(());
2475 }
2476 }
2477
2478 if data.len() >= PARALLEL_THRESHOLD {
2479 let chunks = split_into_chunks(data, line_delim);
2480 let results: Vec<Vec<u8>> = chunks
2481 .par_iter()
2482 .map(|chunk| {
2483 let est_out = (chunk.len() / 4).max(max_bytes + 2);
2489 let mut buf = Vec::with_capacity(est_out.min(chunk.len()));
2490 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
2491 buf
2492 })
2493 .collect();
2494 let slices: Vec<IoSlice> = results
2496 .iter()
2497 .filter(|r| !r.is_empty())
2498 .map(|r| IoSlice::new(r))
2499 .collect();
2500 write_ioslices(out, &slices)?;
2501 } else {
2502 if max_bytes <= 512 {
2508 let est_out = (data.len() / 4).max(max_bytes + 2);
2511 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2512 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2513 if !buf.is_empty() {
2514 out.write_all(&buf)?;
2515 }
2516 } else {
2517 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2521 }
2522 }
2523 Ok(())
2524}
2525
2526#[inline]
2531fn bytes_from_start_zerocopy(
2532 data: &[u8],
2533 max_bytes: usize,
2534 line_delim: u8,
2535 out: &mut impl Write,
2536) -> io::Result<()> {
2537 let newline_buf: [u8; 1] = [line_delim];
2538 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2539 let mut start = 0;
2540 let mut run_start: usize = 0;
2541
2542 for pos in memchr_iter(line_delim, data) {
2543 let line_len = pos - start;
2544 if line_len > max_bytes {
2545 if run_start < start {
2547 iov.push(IoSlice::new(&data[run_start..start]));
2548 }
2549 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2550 iov.push(IoSlice::new(&newline_buf));
2551 run_start = pos + 1;
2552
2553 if iov.len() >= MAX_IOV - 2 {
2554 write_ioslices(out, &iov)?;
2555 iov.clear();
2556 }
2557 }
2558 start = pos + 1;
2559 }
2560 if start < data.len() {
2562 let line_len = data.len() - start;
2563 if line_len > max_bytes {
2564 if run_start < start {
2565 iov.push(IoSlice::new(&data[run_start..start]));
2566 }
2567 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2568 iov.push(IoSlice::new(&newline_buf));
2569 if !iov.is_empty() {
2570 write_ioslices(out, &iov)?;
2571 }
2572 return Ok(());
2573 }
2574 }
2575 if run_start < data.len() {
2577 iov.push(IoSlice::new(&data[run_start..]));
2578 if !data.is_empty() && *data.last().unwrap() != line_delim {
2579 iov.push(IoSlice::new(&newline_buf));
2580 }
2581 }
2582 if !iov.is_empty() {
2583 write_ioslices(out, &iov)?;
2584 }
2585 Ok(())
2586}
2587
2588#[inline]
2593fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2594 buf.reserve(data.len());
2597
2598 let src = data.as_ptr();
2599 let dst_base = buf.as_mut_ptr();
2600 let mut wp = buf.len();
2601 let mut start = 0;
2602
2603 for pos in memchr_iter(line_delim, data) {
2604 let line_len = pos - start;
2605 let take = line_len.min(max_bytes);
2606 unsafe {
2607 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2608 *dst_base.add(wp + take) = line_delim;
2609 }
2610 wp += take + 1;
2611 start = pos + 1;
2612 }
2613 if start < data.len() {
2615 let line_len = data.len() - start;
2616 let take = line_len.min(max_bytes);
2617 unsafe {
2618 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2619 *dst_base.add(wp + take) = line_delim;
2620 }
2621 wp += take + 1;
2622 }
2623 unsafe { buf.set_len(wp) };
2624}
2625
2626fn process_bytes_from_offset(
2628 data: &[u8],
2629 skip_bytes: usize,
2630 line_delim: u8,
2631 out: &mut impl Write,
2632) -> io::Result<()> {
2633 if data.len() >= PARALLEL_THRESHOLD {
2634 let chunks = split_into_chunks(data, line_delim);
2635 let results: Vec<Vec<u8>> = chunks
2636 .par_iter()
2637 .map(|chunk| {
2638 let mut buf = Vec::with_capacity(chunk.len());
2639 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
2640 buf
2641 })
2642 .collect();
2643 let slices: Vec<IoSlice> = results
2645 .iter()
2646 .filter(|r| !r.is_empty())
2647 .map(|r| IoSlice::new(r))
2648 .collect();
2649 write_ioslices(out, &slices)?;
2650 } else {
2651 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2653 }
2654 Ok(())
2655}
2656
2657#[inline]
2661fn bytes_from_offset_zerocopy(
2662 data: &[u8],
2663 skip_bytes: usize,
2664 line_delim: u8,
2665 out: &mut impl Write,
2666) -> io::Result<()> {
2667 let delim_buf = [line_delim];
2668 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2669
2670 let mut start = 0;
2671 for pos in memchr_iter(line_delim, data) {
2672 let line_len = pos - start;
2673 if line_len > skip_bytes {
2674 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2675 }
2676 iov.push(IoSlice::new(&delim_buf));
2677 if iov.len() >= MAX_IOV - 1 {
2679 write_ioslices(out, &iov)?;
2680 iov.clear();
2681 }
2682 start = pos + 1;
2683 }
2684 if start < data.len() {
2685 let line_len = data.len() - start;
2686 if line_len > skip_bytes {
2687 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2688 }
2689 iov.push(IoSlice::new(&delim_buf));
2690 }
2691 if !iov.is_empty() {
2692 write_ioslices(out, &iov)?;
2693 }
2694 Ok(())
2695}
2696
2697#[inline]
2700fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2701 buf.reserve(data.len());
2702
2703 let src = data.as_ptr();
2704 let dst_base = buf.as_mut_ptr();
2705 let mut wp = buf.len();
2706 let mut start = 0;
2707
2708 for pos in memchr_iter(line_delim, data) {
2709 let line_len = pos - start;
2710 if line_len > skip_bytes {
2711 let take = line_len - skip_bytes;
2712 unsafe {
2713 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2714 }
2715 wp += take;
2716 }
2717 unsafe {
2718 *dst_base.add(wp) = line_delim;
2719 }
2720 wp += 1;
2721 start = pos + 1;
2722 }
2723 if start < data.len() {
2724 let line_len = data.len() - start;
2725 if line_len > skip_bytes {
2726 let take = line_len - skip_bytes;
2727 unsafe {
2728 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2729 }
2730 wp += take;
2731 }
2732 unsafe {
2733 *dst_base.add(wp) = line_delim;
2734 }
2735 wp += 1;
2736 }
2737 unsafe { buf.set_len(wp) };
2738}
2739
2740fn process_bytes_mid_range(
2742 data: &[u8],
2743 start_byte: usize,
2744 end_byte: usize,
2745 line_delim: u8,
2746 out: &mut impl Write,
2747) -> io::Result<()> {
2748 let skip = start_byte.saturating_sub(1);
2749
2750 if data.len() >= PARALLEL_THRESHOLD {
2751 let chunks = split_into_chunks(data, line_delim);
2752 let results: Vec<Vec<u8>> = chunks
2753 .par_iter()
2754 .map(|chunk| {
2755 let mut buf = Vec::with_capacity(chunk.len());
2756 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, &mut buf);
2757 buf
2758 })
2759 .collect();
2760 let slices: Vec<IoSlice> = results
2761 .iter()
2762 .filter(|r| !r.is_empty())
2763 .map(|r| IoSlice::new(r))
2764 .collect();
2765 write_ioslices(out, &slices)?;
2766 } else {
2767 let mut buf = Vec::with_capacity(data.len());
2768 bytes_mid_range_chunk(data, skip, end_byte, line_delim, &mut buf);
2769 if !buf.is_empty() {
2770 out.write_all(&buf)?;
2771 }
2772 }
2773 Ok(())
2774}
2775
2776#[inline]
2780fn bytes_mid_range_chunk(
2781 data: &[u8],
2782 skip: usize,
2783 end_byte: usize,
2784 line_delim: u8,
2785 buf: &mut Vec<u8>,
2786) {
2787 buf.reserve(data.len());
2788
2789 let src = data.as_ptr();
2790 let dst_base = buf.as_mut_ptr();
2791 let mut wp = buf.len();
2792 let mut start = 0;
2793
2794 for pos in memchr_iter(line_delim, data) {
2795 let line_len = pos - start;
2796 if line_len > skip {
2797 let take_end = line_len.min(end_byte);
2798 let take = take_end - skip;
2799 unsafe {
2800 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2801 }
2802 wp += take;
2803 }
2804 unsafe {
2805 *dst_base.add(wp) = line_delim;
2806 }
2807 wp += 1;
2808 start = pos + 1;
2809 }
2810 if start < data.len() {
2811 let line_len = data.len() - start;
2812 if line_len > skip {
2813 let take_end = line_len.min(end_byte);
2814 let take = take_end - skip;
2815 unsafe {
2816 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2817 }
2818 wp += take;
2819 }
2820 unsafe {
2821 *dst_base.add(wp) = line_delim;
2822 }
2823 wp += 1;
2824 }
2825 unsafe { buf.set_len(wp) };
2826}
2827
2828fn process_bytes_complement_mid(
2830 data: &[u8],
2831 skip_start: usize,
2832 skip_end: usize,
2833 line_delim: u8,
2834 out: &mut impl Write,
2835) -> io::Result<()> {
2836 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2838 let chunks = split_into_chunks(data, line_delim);
2839 let results: Vec<Vec<u8>> = chunks
2840 .par_iter()
2841 .map(|chunk| {
2842 let mut buf = Vec::with_capacity(chunk.len());
2843 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, &mut buf);
2844 buf
2845 })
2846 .collect();
2847 let slices: Vec<IoSlice> = results
2848 .iter()
2849 .filter(|r| !r.is_empty())
2850 .map(|r| IoSlice::new(r))
2851 .collect();
2852 write_ioslices(out, &slices)?;
2853 } else {
2854 let mut buf = Vec::with_capacity(data.len());
2855 bytes_complement_mid_chunk(data, prefix_bytes, skip_end, line_delim, &mut buf);
2856 if !buf.is_empty() {
2857 out.write_all(&buf)?;
2858 }
2859 }
2860 Ok(())
2861}
2862
2863#[inline]
2866fn bytes_complement_mid_chunk(
2867 data: &[u8],
2868 prefix_bytes: usize,
2869 skip_end: usize,
2870 line_delim: u8,
2871 buf: &mut Vec<u8>,
2872) {
2873 buf.reserve(data.len());
2874
2875 let src = data.as_ptr();
2876 let dst_base = buf.as_mut_ptr();
2877 let mut wp = buf.len();
2878 let mut start = 0;
2879
2880 for pos in memchr_iter(line_delim, data) {
2881 let line_len = pos - start;
2882 let take_prefix = prefix_bytes.min(line_len);
2884 if take_prefix > 0 {
2885 unsafe {
2886 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2887 }
2888 wp += take_prefix;
2889 }
2890 if line_len > skip_end {
2892 let suffix_len = line_len - skip_end;
2893 unsafe {
2894 std::ptr::copy_nonoverlapping(
2895 src.add(start + skip_end),
2896 dst_base.add(wp),
2897 suffix_len,
2898 );
2899 }
2900 wp += suffix_len;
2901 }
2902 unsafe {
2903 *dst_base.add(wp) = line_delim;
2904 }
2905 wp += 1;
2906 start = pos + 1;
2907 }
2908 if start < data.len() {
2909 let line_len = data.len() - start;
2910 let take_prefix = prefix_bytes.min(line_len);
2911 if take_prefix > 0 {
2912 unsafe {
2913 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2914 }
2915 wp += take_prefix;
2916 }
2917 if line_len > skip_end {
2918 let suffix_len = line_len - skip_end;
2919 unsafe {
2920 std::ptr::copy_nonoverlapping(
2921 src.add(start + skip_end),
2922 dst_base.add(wp),
2923 suffix_len,
2924 );
2925 }
2926 wp += suffix_len;
2927 }
2928 unsafe {
2929 *dst_base.add(wp) = line_delim;
2930 }
2931 wp += 1;
2932 }
2933 unsafe { buf.set_len(wp) };
2934}
2935
2936fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2938 let line_delim = cfg.line_delim;
2939 let ranges = cfg.ranges;
2940 let complement = cfg.complement;
2941 let output_delim = cfg.output_delim;
2942
2943 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
2945 let max_bytes = ranges[0].end;
2946 if max_bytes < usize::MAX {
2947 return process_bytes_from_start(data, max_bytes, line_delim, out);
2948 }
2949 }
2950
2951 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
2953 let skip_bytes = ranges[0].start.saturating_sub(1);
2954 if skip_bytes > 0 {
2955 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
2956 }
2957 }
2958
2959 if !complement
2961 && ranges.len() == 1
2962 && ranges[0].start > 1
2963 && ranges[0].end < usize::MAX
2964 && output_delim.is_empty()
2965 {
2966 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
2967 }
2968
2969 if complement
2971 && ranges.len() == 1
2972 && ranges[0].start == 1
2973 && ranges[0].end < usize::MAX
2974 && output_delim.is_empty()
2975 {
2976 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
2977 }
2978
2979 if complement
2981 && ranges.len() == 1
2982 && ranges[0].end == usize::MAX
2983 && ranges[0].start > 1
2984 && output_delim.is_empty()
2985 {
2986 let max_bytes = ranges[0].start - 1;
2987 return process_bytes_from_start(data, max_bytes, line_delim, out);
2988 }
2989
2990 if complement
2992 && ranges.len() == 1
2993 && ranges[0].start > 1
2994 && ranges[0].end < usize::MAX
2995 && output_delim.is_empty()
2996 {
2997 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
2998 }
2999
3000 if data.len() >= PARALLEL_THRESHOLD {
3001 let chunks = split_into_chunks(data, line_delim);
3002 let results: Vec<Vec<u8>> = chunks
3003 .par_iter()
3004 .map(|chunk| {
3005 let mut buf = Vec::with_capacity(chunk.len());
3006 process_bytes_chunk(
3007 chunk,
3008 ranges,
3009 complement,
3010 output_delim,
3011 line_delim,
3012 &mut buf,
3013 );
3014 buf
3015 })
3016 .collect();
3017 let slices: Vec<IoSlice> = results
3019 .iter()
3020 .filter(|r| !r.is_empty())
3021 .map(|r| IoSlice::new(r))
3022 .collect();
3023 write_ioslices(out, &slices)?;
3024 } else {
3025 let mut buf = Vec::with_capacity(data.len());
3026 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
3027 if !buf.is_empty() {
3028 out.write_all(&buf)?;
3029 }
3030 }
3031 Ok(())
3032}
3033
3034fn process_bytes_chunk(
3039 data: &[u8],
3040 ranges: &[Range],
3041 complement: bool,
3042 output_delim: &[u8],
3043 line_delim: u8,
3044 buf: &mut Vec<u8>,
3045) {
3046 buf.reserve(data.len());
3047 let base = data.as_ptr();
3048 let mut start = 0;
3049 for end_pos in memchr_iter(line_delim, data) {
3050 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
3051 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3052 unsafe { buf_push(buf, line_delim) };
3053 start = end_pos + 1;
3054 }
3055 if start < data.len() {
3056 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
3057 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3058 unsafe { buf_push(buf, line_delim) };
3059 }
3060}
3061
3062#[inline(always)]
3066fn cut_bytes_to_buf(
3067 line: &[u8],
3068 ranges: &[Range],
3069 complement: bool,
3070 output_delim: &[u8],
3071 buf: &mut Vec<u8>,
3072) {
3073 let len = line.len();
3074 let base = line.as_ptr();
3075 let mut first_range = true;
3076
3077 let needed = len + output_delim.len() * ranges.len() + 1;
3079 if buf.capacity() - buf.len() < needed {
3080 buf.reserve(needed);
3081 }
3082
3083 if complement {
3084 let mut pos: usize = 1;
3085 for r in ranges {
3086 let rs = r.start;
3087 let re = r.end.min(len);
3088 if pos < rs {
3089 if !first_range && !output_delim.is_empty() {
3090 unsafe { buf_extend(buf, output_delim) };
3091 }
3092 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
3093 first_range = false;
3094 }
3095 pos = re + 1;
3096 if pos > len {
3097 break;
3098 }
3099 }
3100 if pos <= len {
3101 if !first_range && !output_delim.is_empty() {
3102 unsafe { buf_extend(buf, output_delim) };
3103 }
3104 unsafe {
3105 buf_extend(
3106 buf,
3107 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3108 )
3109 };
3110 }
3111 } else if output_delim.is_empty() && ranges.len() == 1 {
3112 let start = ranges[0].start.saturating_sub(1);
3114 let end = ranges[0].end.min(len);
3115 if start < len {
3116 unsafe {
3117 buf_extend(
3118 buf,
3119 std::slice::from_raw_parts(base.add(start), end - start),
3120 )
3121 };
3122 }
3123 } else {
3124 for r in ranges {
3125 let start = r.start.saturating_sub(1);
3126 let end = r.end.min(len);
3127 if start >= len {
3128 break;
3129 }
3130 if !first_range && !output_delim.is_empty() {
3131 unsafe { buf_extend(buf, output_delim) };
3132 }
3133 unsafe {
3134 buf_extend(
3135 buf,
3136 std::slice::from_raw_parts(base.add(start), end - start),
3137 )
3138 };
3139 first_range = false;
3140 }
3141 }
3142}
3143
3144#[inline]
3148pub fn cut_fields(
3149 line: &[u8],
3150 delim: u8,
3151 ranges: &[Range],
3152 complement: bool,
3153 output_delim: &[u8],
3154 suppress_no_delim: bool,
3155 out: &mut impl Write,
3156) -> io::Result<bool> {
3157 if memchr::memchr(delim, line).is_none() {
3158 if !suppress_no_delim {
3159 out.write_all(line)?;
3160 return Ok(true);
3161 }
3162 return Ok(false);
3163 }
3164
3165 let mut field_num: usize = 1;
3166 let mut field_start: usize = 0;
3167 let mut first_output = true;
3168
3169 for delim_pos in memchr_iter(delim, line) {
3170 let selected = in_ranges(ranges, field_num) != complement;
3171 if selected {
3172 if !first_output {
3173 out.write_all(output_delim)?;
3174 }
3175 out.write_all(&line[field_start..delim_pos])?;
3176 first_output = false;
3177 }
3178 field_start = delim_pos + 1;
3179 field_num += 1;
3180 }
3181
3182 let selected = in_ranges(ranges, field_num) != complement;
3183 if selected {
3184 if !first_output {
3185 out.write_all(output_delim)?;
3186 }
3187 out.write_all(&line[field_start..])?;
3188 }
3189
3190 Ok(true)
3191}
3192
3193#[inline]
3195pub fn cut_bytes(
3196 line: &[u8],
3197 ranges: &[Range],
3198 complement: bool,
3199 output_delim: &[u8],
3200 out: &mut impl Write,
3201) -> io::Result<bool> {
3202 let mut first_range = true;
3203
3204 if complement {
3205 let len = line.len();
3206 let mut comp_ranges = Vec::new();
3207 let mut pos: usize = 1;
3208 for r in ranges {
3209 let rs = r.start;
3210 let re = r.end.min(len);
3211 if pos < rs {
3212 comp_ranges.push((pos, rs - 1));
3213 }
3214 pos = re + 1;
3215 if pos > len {
3216 break;
3217 }
3218 }
3219 if pos <= len {
3220 comp_ranges.push((pos, len));
3221 }
3222 for &(s, e) in &comp_ranges {
3223 if !first_range && !output_delim.is_empty() {
3224 out.write_all(output_delim)?;
3225 }
3226 out.write_all(&line[s - 1..e])?;
3227 first_range = false;
3228 }
3229 } else {
3230 for r in ranges {
3231 let start = r.start.saturating_sub(1);
3232 let end = r.end.min(line.len());
3233 if start >= line.len() {
3234 break;
3235 }
3236 if !first_range && !output_delim.is_empty() {
3237 out.write_all(output_delim)?;
3238 }
3239 out.write_all(&line[start..end])?;
3240 first_range = false;
3241 }
3242 }
3243 Ok(true)
3244}
3245
3246pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3254 let len = data.len();
3255 let mut wp: usize = 0;
3256 let mut rp: usize = 0;
3257
3258 while rp < len {
3259 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3260 None => {
3261 if suppress {
3263 break;
3265 }
3266 let remaining = len - rp;
3267 if wp != rp {
3268 data.copy_within(rp..len, wp);
3269 }
3270 wp += remaining;
3271 break;
3272 }
3273 Some(offset) => {
3274 let actual = rp + offset;
3275 if data[actual] == line_delim {
3276 if suppress {
3278 rp = actual + 1;
3280 } else {
3281 let chunk_len = actual + 1 - rp;
3283 if wp != rp {
3284 data.copy_within(rp..actual + 1, wp);
3285 }
3286 wp += chunk_len;
3287 rp = actual + 1;
3288 }
3289 } else {
3290 let field_len = actual - rp;
3292 if wp != rp && field_len > 0 {
3293 data.copy_within(rp..actual, wp);
3294 }
3295 wp += field_len;
3296 data[wp] = line_delim;
3297 wp += 1;
3298 match memchr::memchr(line_delim, &data[actual + 1..]) {
3300 None => {
3301 rp = len;
3302 }
3303 Some(nl_off) => {
3304 rp = actual + 1 + nl_off + 1;
3305 }
3306 }
3307 }
3308 }
3309 }
3310 }
3311 wp
3312}
3313
3314pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3316 match cfg.mode {
3317 CutMode::Fields => process_fields_fast(data, cfg, out),
3318 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3319 }
3320}
3321
3322pub fn process_cut_reader<R: BufRead>(
3327 mut reader: R,
3328 cfg: &CutConfig,
3329 out: &mut impl Write,
3330) -> io::Result<()> {
3331 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3333
3334 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3337
3338 loop {
3339 buf.reserve(CHUNK_SIZE);
3341 let read_start = buf.len();
3342 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3343 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3344 buf.truncate(read_start + n);
3345
3346 if buf.is_empty() {
3347 break;
3348 }
3349
3350 if n == 0 {
3351 process_cut_data(&buf, cfg, out)?;
3353 break;
3354 }
3355
3356 let process_end = match memchr::memrchr(line_delim, &buf) {
3358 Some(pos) => pos + 1,
3359 None => {
3360 continue;
3362 }
3363 };
3364
3365 process_cut_data(&buf[..process_end], cfg, out)?;
3367
3368 let leftover_len = buf.len() - process_end;
3370 if leftover_len > 0 {
3371 buf.copy_within(process_end.., 0);
3372 }
3373 buf.truncate(leftover_len);
3374 }
3375
3376 Ok(())
3377}
3378
3379#[inline]
3381fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3382 let n = reader.read(buf)?;
3383 if n == buf.len() || n == 0 {
3384 return Ok(n);
3385 }
3386 let mut total = n;
3388 while total < buf.len() {
3389 match reader.read(&mut buf[total..]) {
3390 Ok(0) => break,
3391 Ok(n) => total += n,
3392 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3393 Err(e) => return Err(e),
3394 }
3395 }
3396 Ok(total)
3397}
3398
3399pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3407 if cfg.complement {
3408 return None;
3409 }
3410
3411 match cfg.mode {
3412 CutMode::Fields => {
3413 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3415 return None;
3416 }
3417 if cfg.delim == cfg.line_delim {
3418 return None;
3419 }
3420 Some(cut_fields_inplace_general(
3421 data,
3422 cfg.delim,
3423 cfg.line_delim,
3424 cfg.ranges,
3425 cfg.suppress_no_delim,
3426 ))
3427 }
3428 CutMode::Bytes | CutMode::Characters => {
3429 if !cfg.output_delim.is_empty() {
3430 return None;
3431 }
3432 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3433 }
3434 }
3435}
3436
3437fn cut_fields_inplace_general(
3440 data: &mut [u8],
3441 delim: u8,
3442 line_delim: u8,
3443 ranges: &[Range],
3444 suppress: bool,
3445) -> usize {
3446 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3448 return cut_field1_inplace(data, delim, line_delim, suppress);
3449 }
3450
3451 let len = data.len();
3452 if len == 0 {
3453 return 0;
3454 }
3455
3456 let max_field = ranges.last().map_or(0, |r| r.end);
3457 let max_delims = max_field.min(64);
3458 let mut wp: usize = 0;
3459 let mut rp: usize = 0;
3460
3461 while rp < len {
3462 let line_end = memchr::memchr(line_delim, &data[rp..])
3463 .map(|p| rp + p)
3464 .unwrap_or(len);
3465 let line_len = line_end - rp;
3466
3467 let mut delim_pos = [0usize; 64];
3469 let mut num_delims: usize = 0;
3470
3471 for pos in memchr_iter(delim, &data[rp..line_end]) {
3472 if num_delims < max_delims {
3473 delim_pos[num_delims] = pos;
3474 num_delims += 1;
3475 if num_delims >= max_delims {
3476 break;
3477 }
3478 }
3479 }
3480
3481 if num_delims == 0 {
3482 if !suppress {
3484 if wp != rp {
3485 data.copy_within(rp..line_end, wp);
3486 }
3487 wp += line_len;
3488 if line_end < len {
3489 data[wp] = line_delim;
3490 wp += 1;
3491 }
3492 }
3493 } else {
3494 let total_fields = num_delims + 1;
3495 let mut first_output = true;
3496
3497 for r in ranges {
3498 let range_start = r.start;
3499 let range_end = r.end.min(total_fields);
3500 if range_start > total_fields {
3501 break;
3502 }
3503 for field_num in range_start..=range_end {
3504 if field_num > total_fields {
3505 break;
3506 }
3507
3508 let field_start = if field_num == 1 {
3509 0
3510 } else if field_num - 2 < num_delims {
3511 delim_pos[field_num - 2] + 1
3512 } else {
3513 continue;
3514 };
3515 let field_end = if field_num <= num_delims {
3516 delim_pos[field_num - 1]
3517 } else {
3518 line_len
3519 };
3520
3521 if !first_output {
3522 data[wp] = delim;
3523 wp += 1;
3524 }
3525 let flen = field_end - field_start;
3526 if flen > 0 {
3527 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3528 wp += flen;
3529 }
3530 first_output = false;
3531 }
3532 }
3533
3534 if !first_output && line_end < len {
3535 data[wp] = line_delim;
3536 wp += 1;
3537 } else if first_output && line_end < len {
3538 data[wp] = line_delim;
3540 wp += 1;
3541 }
3542 }
3543
3544 rp = if line_end < len { line_end + 1 } else { len };
3545 }
3546
3547 wp
3548}
3549
3550fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3552 let len = data.len();
3553 if len == 0 {
3554 return 0;
3555 }
3556
3557 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3559 return len;
3560 }
3561
3562 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3564 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3565 }
3566
3567 let mut wp: usize = 0;
3568 let mut rp: usize = 0;
3569
3570 while rp < len {
3571 let line_end = memchr::memchr(line_delim, &data[rp..])
3572 .map(|p| rp + p)
3573 .unwrap_or(len);
3574 let line_len = line_end - rp;
3575
3576 for r in ranges {
3577 let start = r.start.saturating_sub(1);
3578 let end = r.end.min(line_len);
3579 if start >= line_len {
3580 break;
3581 }
3582 let flen = end - start;
3583 if flen > 0 {
3584 data.copy_within(rp + start..rp + start + flen, wp);
3585 wp += flen;
3586 }
3587 }
3588
3589 if line_end < len {
3590 data[wp] = line_delim;
3591 wp += 1;
3592 }
3593
3594 rp = if line_end < len { line_end + 1 } else { len };
3595 }
3596
3597 wp
3598}
3599
3600fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3602 let len = data.len();
3603
3604 let mut all_fit = true;
3606 let mut start = 0;
3607 for pos in memchr_iter(line_delim, data) {
3608 if pos - start > max_bytes {
3609 all_fit = false;
3610 break;
3611 }
3612 start = pos + 1;
3613 }
3614 if all_fit && start < len && len - start > max_bytes {
3615 all_fit = false;
3616 }
3617 if all_fit {
3618 return len;
3619 }
3620
3621 let mut wp: usize = 0;
3623 let mut rp: usize = 0;
3624
3625 while rp < len {
3626 let line_end = memchr::memchr(line_delim, &data[rp..])
3627 .map(|p| rp + p)
3628 .unwrap_or(len);
3629 let line_len = line_end - rp;
3630
3631 let take = line_len.min(max_bytes);
3632 if take > 0 && wp != rp {
3633 data.copy_within(rp..rp + take, wp);
3634 }
3635 wp += take;
3636
3637 if line_end < len {
3638 data[wp] = line_delim;
3639 wp += 1;
3640 }
3641
3642 rp = if line_end < len { line_end + 1 } else { len };
3643 }
3644
3645 wp
3646}
3647
3648#[derive(Debug, Clone, Copy, PartialEq)]
3650pub enum CutMode {
3651 Bytes,
3652 Characters,
3653 Fields,
3654}