1use memchr::memchr_iter;
2use std::io::{self, BufRead, IoSlice, Write};
3
4const PARALLEL_THRESHOLD: usize = 4 * 1024 * 1024;
9
10const MAX_IOV: usize = 1024;
12
13const SEQ_CHUNK: usize = 8 * 1024 * 1024;
17
18fn process_chunked(
21 data: &[u8],
22 line_delim: u8,
23 out: &mut impl Write,
24 mut process_fn: impl FnMut(&[u8], &mut Vec<u8>),
25) -> io::Result<()> {
26 if data.len() <= SEQ_CHUNK {
28 let mut buf = Vec::with_capacity(data.len() + 256);
29 process_fn(data, &mut buf);
30 if !buf.is_empty() {
31 out.write_all(&buf)?;
32 }
33 return Ok(());
34 }
35 let mut buf = Vec::with_capacity(SEQ_CHUNK * 2);
36 let mut start = 0;
37 while start < data.len() {
38 let end = if start + SEQ_CHUNK >= data.len() {
39 data.len()
40 } else {
41 match memchr::memrchr(line_delim, &data[start..start + SEQ_CHUNK]) {
42 Some(pos) => start + pos + 1,
43 None => (start + SEQ_CHUNK).min(data.len()),
44 }
45 };
46 buf.clear();
47 process_fn(&data[start..end], &mut buf);
48 if !buf.is_empty() {
49 out.write_all(&buf)?;
50 }
51 start = end;
52 }
53 Ok(())
54}
55
56pub struct CutConfig<'a> {
58 pub mode: CutMode,
59 pub ranges: &'a [Range],
60 pub complement: bool,
61 pub delim: u8,
62 pub output_delim: &'a [u8],
63 pub suppress_no_delim: bool,
64 pub line_delim: u8,
65}
66
67#[derive(Debug, Clone)]
69pub struct Range {
70 pub start: usize, pub end: usize, }
73
74pub fn parse_ranges(spec: &str, no_merge_adjacent: bool) -> Result<Vec<Range>, String> {
81 let mut ranges = Vec::new();
82
83 for part in spec.split(',') {
84 let part = part.trim();
85 if part.is_empty() {
86 continue;
87 }
88
89 if let Some(idx) = part.find('-') {
90 let left = &part[..idx];
91 let right = &part[idx + 1..];
92
93 if left.is_empty() && right.is_empty() {
95 return Err("invalid range with no endpoint: -".to_string());
96 }
97
98 let start = if left.is_empty() {
99 1
100 } else {
101 left.parse::<usize>()
102 .map_err(|_| format!("invalid range: '{}'", part))?
103 };
104
105 let end = if right.is_empty() {
106 usize::MAX
107 } else {
108 right
109 .parse::<usize>()
110 .map_err(|_| format!("invalid range: '{}'", part))?
111 };
112
113 if start == 0 {
114 return Err("fields and positions are numbered from 1".to_string());
115 }
116 if start > end {
117 return Err(format!("invalid decreasing range: '{}'", part));
118 }
119
120 ranges.push(Range { start, end });
121 } else {
122 let n = part
123 .parse::<usize>()
124 .map_err(|_| format!("invalid field: '{}'", part))?;
125 if n == 0 {
126 return Err("fields and positions are numbered from 1".to_string());
127 }
128 ranges.push(Range { start: n, end: n });
129 }
130 }
131
132 if ranges.is_empty() {
133 return Err("you must specify a list of bytes, characters, or fields".to_string());
134 }
135
136 ranges.sort_by_key(|r| (r.start, r.end));
138 let mut merged = vec![ranges[0].clone()];
139 for r in &ranges[1..] {
140 let last = merged.last_mut().unwrap();
141 if no_merge_adjacent {
142 if r.start <= last.end {
144 last.end = last.end.max(r.end);
145 } else {
146 merged.push(r.clone());
147 }
148 } else {
149 if r.start <= last.end.saturating_add(1) {
151 last.end = last.end.max(r.end);
152 } else {
153 merged.push(r.clone());
154 }
155 }
156 }
157
158 Ok(merged)
159}
160
161#[inline(always)]
164fn in_ranges(ranges: &[Range], pos: usize) -> bool {
165 for r in ranges {
166 if pos < r.start {
167 return false;
168 }
169 if pos <= r.end {
170 return true;
171 }
172 }
173 false
174}
175
176#[inline]
179fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
180 let mut mask: u64 = 0;
181 for i in 1..=64u32 {
182 let in_range = in_ranges(ranges, i as usize);
183 if in_range != complement {
184 mask |= 1u64 << (i - 1);
185 }
186 }
187 mask
188}
189
190#[inline(always)]
192fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
193 if field_num <= 64 {
194 (mask >> (field_num - 1)) & 1 == 1
195 } else {
196 in_ranges(ranges, field_num) != complement
197 }
198}
199
200#[inline(always)]
205unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
206 unsafe {
207 let len = buf.len();
208 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
209 buf.set_len(len + data.len());
210 }
211}
212
213#[inline(always)]
216unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
217 unsafe {
218 let len = buf.len();
219 *buf.as_mut_ptr().add(len) = b;
220 buf.set_len(len + 1);
221 }
222}
223
224#[inline]
228fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
229 if slices.is_empty() {
230 return Ok(());
231 }
232 for batch in slices.chunks(MAX_IOV) {
233 let total: usize = batch.iter().map(|s| s.len()).sum();
234 let written = out.write_vectored(batch)?;
235 if written >= total {
236 continue;
237 }
238 if written == 0 {
239 return Err(io::Error::new(io::ErrorKind::WriteZero, "write zero"));
240 }
241 write_ioslices_slow(out, batch, written)?;
242 }
243 Ok(())
244}
245
246#[cold]
248#[inline(never)]
249fn write_ioslices_slow(
250 out: &mut impl Write,
251 slices: &[IoSlice],
252 mut skip: usize,
253) -> io::Result<()> {
254 for slice in slices {
255 let len = slice.len();
256 if skip >= len {
257 skip -= len;
258 continue;
259 }
260 out.write_all(&slice[skip..])?;
261 skip = 0;
262 }
263 Ok(())
264}
265
266#[inline]
272fn num_cpus() -> usize {
273 std::thread::available_parallelism()
274 .map(|n| n.get())
275 .unwrap_or(1)
276}
277
278fn split_for_scope<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
281 let num_threads = num_cpus().max(1);
282 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
283 return vec![data];
284 }
285
286 let chunk_size = data.len() / num_threads;
287 let mut chunks = Vec::with_capacity(num_threads);
288 let mut pos = 0;
289
290 for _ in 0..num_threads - 1 {
291 let target = pos + chunk_size;
292 if target >= data.len() {
293 break;
294 }
295 let boundary = memchr::memchr(line_delim, &data[target..])
296 .map(|p| target + p + 1)
297 .unwrap_or(data.len());
298 if boundary > pos {
299 chunks.push(&data[pos..boundary]);
300 }
301 pos = boundary;
302 }
303
304 if pos < data.len() {
305 chunks.push(&data[pos..]);
306 }
307
308 chunks
309}
310
311fn process_fields_multi_select(
318 data: &[u8],
319 delim: u8,
320 line_delim: u8,
321 ranges: &[Range],
322 suppress: bool,
323 out: &mut impl Write,
324) -> io::Result<()> {
325 let max_field = ranges.last().map_or(0, |r| r.end);
326
327 if data.len() >= PARALLEL_THRESHOLD {
328 let chunks = split_for_scope(data, line_delim);
329 let n = chunks.len();
330 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
331 rayon::scope(|s| {
332 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
333 s.spawn(move |_| {
334 result.reserve(chunk.len() * 3 / 4);
335 multi_select_chunk(
336 chunk, delim, line_delim, ranges, max_field, suppress, result,
337 );
338 });
339 }
340 });
341 let slices: Vec<IoSlice> = results
342 .iter()
343 .filter(|r| !r.is_empty())
344 .map(|r| IoSlice::new(r))
345 .collect();
346 write_ioslices(out, &slices)?;
347 } else {
348 process_chunked(data, line_delim, out, |chunk, buf| {
349 multi_select_chunk(chunk, delim, line_delim, ranges, max_field, suppress, buf);
350 })?;
351 }
352 Ok(())
353}
354
355fn multi_select_chunk(
359 data: &[u8],
360 delim: u8,
361 line_delim: u8,
362 ranges: &[Range],
363 max_field: usize,
364 suppress: bool,
365 buf: &mut Vec<u8>,
366) {
367 if max_field <= 64 && delim != line_delim {
373 let mut mask: u64 = 0;
374 for r in ranges {
375 let s = r.start.max(1);
376 let e = r.end.min(64);
377 for f in s..=e {
378 mask |= 1u64 << (f - 1);
379 }
380 }
381 if max_field <= 8 {
389 multi_select_chunk_bitmask(data, delim, line_delim, mask, max_field, suppress, buf);
390 } else {
391 multi_select_twolevel(data, delim, line_delim, mask, max_field, suppress, buf);
392 }
393 return;
394 }
395
396 buf.reserve(data.len());
398 let base = data.as_ptr();
399 let mut start = 0;
400 let max_delims = max_field.min(128);
401
402 for end_pos in memchr_iter(line_delim, data) {
403 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
404 multi_select_line_fast(
405 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
406 );
407 start = end_pos + 1;
408 }
409 if start < data.len() {
410 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
411 multi_select_line_fast(
412 line, delim, line_delim, ranges, max_delims, suppress, buf, start, base,
413 );
414 }
415}
416
417fn multi_select_chunk_bitmask(
422 data: &[u8],
423 delim: u8,
424 line_delim: u8,
425 mask: u64,
426 max_field: usize,
427 suppress: bool,
428 buf: &mut Vec<u8>,
429) {
430 buf.reserve(data.len() + 1);
434 let initial_len = buf.len();
435 let out_base = unsafe { buf.as_mut_ptr().add(initial_len) };
436 let src = data.as_ptr();
437 let mut wp: usize = 0;
438
439 let mut field_num: usize = 1; let mut field_start: usize = 0; let mut first_output = true; let mut has_delim = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
445 if data[pos] == line_delim {
446 if !has_delim {
448 if !suppress {
450 let len = pos - field_start;
451 unsafe {
452 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
453 }
454 wp += len;
455 unsafe {
456 *out_base.add(wp) = line_delim;
457 }
458 wp += 1;
459 }
460 } else {
461 if field_num <= 64 && (mask & (1u64 << (field_num - 1))) != 0 {
463 if !first_output {
464 unsafe {
465 *out_base.add(wp) = delim;
466 }
467 wp += 1;
468 }
469 let len = pos - field_start;
470 unsafe {
471 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
472 }
473 wp += len;
474 }
475 unsafe {
476 *out_base.add(wp) = line_delim;
477 }
478 wp += 1;
479 }
480 field_num = 1;
482 field_start = pos + 1;
483 first_output = true;
484 has_delim = false;
485 } else {
486 has_delim = true;
488 if field_num <= max_field && (mask & (1u64 << (field_num - 1))) != 0 {
489 if !first_output {
490 unsafe {
491 *out_base.add(wp) = delim;
492 }
493 wp += 1;
494 }
495 let len = pos - field_start;
496 unsafe {
497 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
498 }
499 wp += len;
500 first_output = false;
501 }
502 field_num += 1;
503 field_start = pos + 1;
504 }
505 }
506
507 if field_start < data.len() {
509 if !has_delim {
510 if !suppress {
511 let len = data.len() - field_start;
512 unsafe {
513 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
514 }
515 wp += len;
516 unsafe {
517 *out_base.add(wp) = line_delim;
518 }
519 wp += 1;
520 }
521 } else {
522 if field_num <= 64 && (mask & (1u64 << (field_num - 1))) != 0 {
523 if !first_output {
524 unsafe {
525 *out_base.add(wp) = delim;
526 }
527 wp += 1;
528 }
529 let len = data.len() - field_start;
530 unsafe {
531 std::ptr::copy_nonoverlapping(src.add(field_start), out_base.add(wp), len);
532 }
533 wp += len;
534 }
535 unsafe {
536 *out_base.add(wp) = line_delim;
537 }
538 wp += 1;
539 }
540 }
541
542 debug_assert!(wp <= data.len() + 1);
543 unsafe {
544 buf.set_len(initial_len + wp);
545 }
546}
547
548fn multi_select_twolevel(
553 data: &[u8],
554 delim: u8,
555 line_delim: u8,
556 mask: u64,
557 max_field: usize,
558 suppress: bool,
559 buf: &mut Vec<u8>,
560) {
561 buf.reserve(data.len() + 1);
562 let initial_len = buf.len();
563 let out_base = unsafe { buf.as_mut_ptr().add(initial_len) };
564 let src = data.as_ptr();
565 let mut wp: usize = 0;
566 let mut line_start: usize = 0;
567
568 for nl_pos in memchr_iter(line_delim, data) {
569 let line_len = nl_pos - line_start;
570 let line = &data[line_start..nl_pos];
571
572 if line_len == 0 {
573 if !suppress {
574 unsafe {
575 *out_base.add(wp) = line_delim;
576 }
577 wp += 1;
578 }
579 line_start = nl_pos + 1;
580 continue;
581 }
582
583 let mut field_num: usize = 1;
586 let mut field_start: usize = 0;
587 let mut first_output = true;
588 let mut has_delim = false;
589
590 for dp in memchr::memchr_iter(delim, line) {
591 has_delim = true;
592 if (mask >> (field_num - 1)) & 1 == 1 {
593 if !first_output {
594 unsafe {
595 *out_base.add(wp) = delim;
596 }
597 wp += 1;
598 }
599 let flen = dp - field_start;
600 unsafe {
601 std::ptr::copy_nonoverlapping(
602 src.add(line_start + field_start),
603 out_base.add(wp),
604 flen,
605 );
606 }
607 wp += flen;
608 first_output = false;
609 }
610 field_num += 1;
611 field_start = dp + 1;
612 if field_num > max_field {
613 break;
614 }
615 }
616
617 if !has_delim {
618 if !suppress {
620 unsafe {
621 std::ptr::copy_nonoverlapping(src.add(line_start), out_base.add(wp), line_len);
622 }
623 wp += line_len;
624 unsafe {
625 *out_base.add(wp) = line_delim;
626 }
627 wp += 1;
628 }
629 } else {
630 if field_num <= 64 && (mask >> (field_num - 1)) & 1 == 1 {
632 if !first_output {
633 unsafe {
634 *out_base.add(wp) = delim;
635 }
636 wp += 1;
637 }
638 let flen = line_len - field_start;
639 unsafe {
640 std::ptr::copy_nonoverlapping(
641 src.add(line_start + field_start),
642 out_base.add(wp),
643 flen,
644 );
645 }
646 wp += flen;
647 }
648 unsafe {
649 *out_base.add(wp) = line_delim;
650 }
651 wp += 1;
652 }
653
654 line_start = nl_pos + 1;
655 }
656
657 if line_start < data.len() {
659 let line = &data[line_start..];
660 let line_len = line.len();
661 let mut field_num: usize = 1;
662 let mut field_start: usize = 0;
663 let mut first_output = true;
664 let mut has_delim = false;
665
666 for dp in memchr::memchr_iter(delim, line) {
667 has_delim = true;
668 if (mask >> (field_num - 1)) & 1 == 1 {
669 if !first_output {
670 unsafe {
671 *out_base.add(wp) = delim;
672 }
673 wp += 1;
674 }
675 let flen = dp - field_start;
676 unsafe {
677 std::ptr::copy_nonoverlapping(
678 src.add(line_start + field_start),
679 out_base.add(wp),
680 flen,
681 );
682 }
683 wp += flen;
684 first_output = false;
685 }
686 field_num += 1;
687 field_start = dp + 1;
688 if field_num > max_field {
689 break;
690 }
691 }
692
693 if !has_delim {
694 if !suppress {
695 unsafe {
696 std::ptr::copy_nonoverlapping(src.add(line_start), out_base.add(wp), line_len);
697 }
698 wp += line_len;
699 unsafe {
700 *out_base.add(wp) = line_delim;
701 }
702 wp += 1;
703 }
704 } else {
705 if field_num <= 64 && (mask >> (field_num - 1)) & 1 == 1 {
706 if !first_output {
707 unsafe {
708 *out_base.add(wp) = delim;
709 }
710 wp += 1;
711 }
712 let flen = line_len - field_start;
713 unsafe {
714 std::ptr::copy_nonoverlapping(
715 src.add(line_start + field_start),
716 out_base.add(wp),
717 flen,
718 );
719 }
720 wp += flen;
721 }
722 unsafe {
723 *out_base.add(wp) = line_delim;
724 }
725 wp += 1;
726 }
727 }
728
729 debug_assert!(
730 wp <= data.len() + 1,
731 "wp={} exceeded reservation data.len()+1={}",
732 wp,
733 data.len() + 1
734 );
735 unsafe {
736 buf.set_len(initial_len + wp);
737 }
738}
739
740#[inline(always)]
744fn multi_select_line_fast(
745 line: &[u8],
746 delim: u8,
747 line_delim: u8,
748 ranges: &[Range],
749 max_delims: usize,
750 suppress: bool,
751 buf: &mut Vec<u8>,
752 _line_abs_start: usize,
753 _data_base: *const u8,
754) {
755 let len = line.len();
756 if len == 0 {
757 if !suppress {
758 unsafe { buf_push(buf, line_delim) };
759 }
760 return;
761 }
762
763 let base = line.as_ptr();
764
765 let mut delim_pos = [0usize; 128];
767 let mut num_delims: usize = 0;
768
769 for pos in memchr_iter(delim, line) {
770 if num_delims < max_delims {
771 delim_pos[num_delims] = pos;
772 num_delims += 1;
773 if num_delims >= max_delims {
774 break;
775 }
776 }
777 }
778
779 if num_delims == 0 {
780 if !suppress {
781 unsafe {
782 buf_extend(buf, line);
783 buf_push(buf, line_delim);
784 }
785 }
786 return;
787 }
788
789 let total_fields = num_delims + 1;
790 let mut first_output = true;
791
792 for r in ranges {
793 let range_start = r.start;
794 let range_end = r.end.min(total_fields);
795 if range_start > total_fields {
796 break;
797 }
798 for field_num in range_start..=range_end {
799 if field_num > total_fields {
800 break;
801 }
802
803 let field_start = if field_num == 1 {
804 0
805 } else if field_num - 2 < num_delims {
806 delim_pos[field_num - 2] + 1
807 } else {
808 continue;
809 };
810 let field_end = if field_num <= num_delims {
811 delim_pos[field_num - 1]
812 } else {
813 len
814 };
815
816 if !first_output {
817 unsafe { buf_push(buf, delim) };
818 }
819 unsafe {
820 buf_extend(
821 buf,
822 std::slice::from_raw_parts(base.add(field_start), field_end - field_start),
823 );
824 }
825 first_output = false;
826 }
827 }
828
829 unsafe { buf_push(buf, line_delim) };
830}
831
832fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
836 let delim = cfg.delim;
837 let line_delim = cfg.line_delim;
838 let ranges = cfg.ranges;
839 let complement = cfg.complement;
840 let output_delim = cfg.output_delim;
841 let suppress = cfg.suppress_no_delim;
842
843 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
851 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
852 }
853
854 if complement
856 && ranges.len() == 1
857 && output_delim.len() == 1
858 && output_delim[0] == delim
859 && ranges[0].start == ranges[0].end
860 {
861 return process_complement_single_field(
862 data,
863 delim,
864 line_delim,
865 ranges[0].start,
866 suppress,
867 out,
868 );
869 }
870
871 if complement
874 && ranges.len() == 1
875 && ranges[0].start > 1
876 && ranges[0].end < usize::MAX
877 && output_delim.len() == 1
878 && output_delim[0] == delim
879 {
880 return process_complement_range(
881 data,
882 delim,
883 line_delim,
884 ranges[0].start,
885 ranges[0].end,
886 suppress,
887 out,
888 );
889 }
890
891 if !complement
893 && ranges.len() == 1
894 && ranges[0].start == 1
895 && output_delim.len() == 1
896 && output_delim[0] == delim
897 && ranges[0].end < usize::MAX
898 {
899 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
900 }
901
902 if !complement
904 && ranges.len() == 1
905 && ranges[0].end == usize::MAX
906 && ranges[0].start > 1
907 && output_delim.len() == 1
908 && output_delim[0] == delim
909 {
910 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
911 }
912
913 if !complement
915 && ranges.len() == 1
916 && ranges[0].start > 1
917 && ranges[0].end < usize::MAX
918 && output_delim.len() == 1
919 && output_delim[0] == delim
920 {
921 return process_fields_mid_range(
922 data,
923 delim,
924 line_delim,
925 ranges[0].start,
926 ranges[0].end,
927 suppress,
928 out,
929 );
930 }
931
932 if !complement
938 && ranges.len() > 1
939 && ranges.last().map_or(false, |r| r.end < usize::MAX)
940 && output_delim.len() == 1
941 && output_delim[0] == delim
942 && delim != line_delim
943 {
944 return process_fields_multi_select(data, delim, line_delim, ranges, suppress, out);
945 }
946
947 let max_field = if complement {
949 usize::MAX
950 } else {
951 ranges.last().map(|r| r.end).unwrap_or(0)
952 };
953 let field_mask = compute_field_mask(ranges, complement);
954
955 if data.len() >= PARALLEL_THRESHOLD {
956 let chunks = split_for_scope(data, line_delim);
957 let n = chunks.len();
958 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
959 rayon::scope(|s| {
960 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
961 s.spawn(move |_| {
962 result.reserve(chunk.len() + 1);
963 process_fields_chunk(
964 chunk,
965 delim,
966 ranges,
967 output_delim,
968 suppress,
969 max_field,
970 field_mask,
971 line_delim,
972 complement,
973 result,
974 );
975 });
976 }
977 });
978 let slices: Vec<IoSlice> = results
979 .iter()
980 .filter(|r| !r.is_empty())
981 .map(|r| IoSlice::new(r))
982 .collect();
983 write_ioslices(out, &slices)?;
984 } else {
985 process_chunked(data, line_delim, out, |chunk, buf| {
986 process_fields_chunk(
987 chunk,
988 delim,
989 ranges,
990 output_delim,
991 suppress,
992 max_field,
993 field_mask,
994 line_delim,
995 complement,
996 buf,
997 );
998 })?;
999 }
1000 Ok(())
1001}
1002
1003fn process_fields_chunk(
1008 data: &[u8],
1009 delim: u8,
1010 ranges: &[Range],
1011 output_delim: &[u8],
1012 suppress: bool,
1013 max_field: usize,
1014 field_mask: u64,
1015 line_delim: u8,
1016 complement: bool,
1017 buf: &mut Vec<u8>,
1018) {
1019 if delim != line_delim {
1024 buf.reserve(data.len());
1025 let mut start = 0;
1026 for end_pos in memchr_iter(line_delim, data) {
1027 let line = &data[start..end_pos];
1028 extract_fields_to_buf(
1029 line,
1030 delim,
1031 ranges,
1032 output_delim,
1033 suppress,
1034 max_field,
1035 field_mask,
1036 line_delim,
1037 buf,
1038 complement,
1039 );
1040 start = end_pos + 1;
1041 }
1042 if start < data.len() {
1043 extract_fields_to_buf(
1044 &data[start..],
1045 delim,
1046 ranges,
1047 output_delim,
1048 suppress,
1049 max_field,
1050 field_mask,
1051 line_delim,
1052 buf,
1053 complement,
1054 );
1055 }
1056 return;
1057 }
1058
1059 let mut start = 0;
1061 for end_pos in memchr_iter(line_delim, data) {
1062 let line = &data[start..end_pos];
1063 extract_fields_to_buf(
1064 line,
1065 delim,
1066 ranges,
1067 output_delim,
1068 suppress,
1069 max_field,
1070 field_mask,
1071 line_delim,
1072 buf,
1073 complement,
1074 );
1075 start = end_pos + 1;
1076 }
1077 if start < data.len() {
1078 extract_fields_to_buf(
1079 &data[start..],
1080 delim,
1081 ranges,
1082 output_delim,
1083 suppress,
1084 max_field,
1085 field_mask,
1086 line_delim,
1087 buf,
1088 complement,
1089 );
1090 }
1091}
1092
1093fn process_single_field(
1099 data: &[u8],
1100 delim: u8,
1101 line_delim: u8,
1102 target: usize,
1103 suppress: bool,
1104 out: &mut impl Write,
1105) -> io::Result<()> {
1106 let target_idx = target - 1;
1107
1108 if delim != line_delim {
1109 if target_idx == 0 && !suppress {
1113 if data.len() >= PARALLEL_THRESHOLD {
1114 return single_field1_parallel(data, delim, line_delim, out);
1115 }
1116 return process_chunked(data, line_delim, out, |chunk, buf| {
1117 single_field1_to_buf(chunk, delim, line_delim, buf);
1118 });
1119 }
1120
1121 if data.len() >= PARALLEL_THRESHOLD {
1125 let chunks = split_for_scope(data, line_delim);
1126 let n = chunks.len();
1127 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1128 rayon::scope(|s| {
1129 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1130 s.spawn(move |_| {
1131 result.reserve(chunk.len() / 2);
1132 process_single_field_chunk(
1133 chunk, delim, target_idx, line_delim, suppress, result,
1134 );
1135 });
1136 }
1137 });
1138 let slices: Vec<IoSlice> = results
1139 .iter()
1140 .filter(|r| !r.is_empty())
1141 .map(|r| IoSlice::new(r))
1142 .collect();
1143 write_ioslices(out, &slices)?;
1144 } else {
1145 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
1146 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1147 if !buf.is_empty() {
1148 out.write_all(&buf)?;
1149 }
1150 }
1151 return Ok(());
1152 }
1153
1154 if data.len() >= PARALLEL_THRESHOLD {
1156 let chunks = split_for_scope(data, line_delim);
1157 let n = chunks.len();
1158 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1159 rayon::scope(|s| {
1160 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1161 s.spawn(move |_| {
1162 result.reserve(chunk.len() / 4);
1163 process_single_field_chunk(
1164 chunk, delim, target_idx, line_delim, suppress, result,
1165 );
1166 });
1167 }
1168 });
1169 let slices: Vec<IoSlice> = results
1170 .iter()
1171 .filter(|r| !r.is_empty())
1172 .map(|r| IoSlice::new(r))
1173 .collect();
1174 write_ioslices(out, &slices)?;
1175 } else {
1176 let mut buf = Vec::with_capacity(data.len() / 4);
1177 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
1178 if !buf.is_empty() {
1179 out.write_all(&buf)?;
1180 }
1181 }
1182 Ok(())
1183}
1184
1185fn process_complement_range(
1188 data: &[u8],
1189 delim: u8,
1190 line_delim: u8,
1191 skip_start: usize,
1192 skip_end: usize,
1193 suppress: bool,
1194 out: &mut impl Write,
1195) -> io::Result<()> {
1196 if data.len() >= PARALLEL_THRESHOLD {
1197 let chunks = split_for_scope(data, line_delim);
1198 let n = chunks.len();
1199 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1200 rayon::scope(|s| {
1201 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1202 s.spawn(move |_| {
1203 result.reserve(chunk.len());
1204 complement_range_chunk(
1205 chunk, delim, skip_start, skip_end, line_delim, suppress, result,
1206 );
1207 });
1208 }
1209 });
1210 let slices: Vec<IoSlice> = results
1211 .iter()
1212 .filter(|r| !r.is_empty())
1213 .map(|r| IoSlice::new(r))
1214 .collect();
1215 write_ioslices(out, &slices)?;
1216 } else {
1217 process_chunked(data, line_delim, out, |chunk, buf| {
1218 complement_range_chunk(
1219 chunk, delim, skip_start, skip_end, line_delim, suppress, buf,
1220 );
1221 })?;
1222 }
1223 Ok(())
1224}
1225
1226fn complement_range_chunk(
1228 data: &[u8],
1229 delim: u8,
1230 skip_start: usize,
1231 skip_end: usize,
1232 line_delim: u8,
1233 suppress: bool,
1234 buf: &mut Vec<u8>,
1235) {
1236 buf.reserve(data.len());
1238 let mut start = 0;
1239 for end_pos in memchr_iter(line_delim, data) {
1240 let line = &data[start..end_pos];
1241 complement_range_line(line, delim, skip_start, skip_end, line_delim, suppress, buf);
1242 start = end_pos + 1;
1243 }
1244 if start < data.len() {
1245 complement_range_line(
1246 &data[start..],
1247 delim,
1248 skip_start,
1249 skip_end,
1250 line_delim,
1251 suppress,
1252 buf,
1253 );
1254 }
1255}
1256
1257#[inline(always)]
1264fn complement_range_line(
1265 line: &[u8],
1266 delim: u8,
1267 skip_start: usize,
1268 skip_end: usize,
1269 line_delim: u8,
1270 suppress: bool,
1271 buf: &mut Vec<u8>,
1272) {
1273 let len = line.len();
1274 if len == 0 {
1275 if !suppress {
1276 unsafe { buf_push(buf, line_delim) };
1277 }
1278 return;
1279 }
1280
1281 let base = line.as_ptr();
1283
1284 let need_prefix_delims = skip_start - 1; let need_skip_delims = skip_end - skip_start + 1; let total_need = need_prefix_delims + need_skip_delims;
1294
1295 let mut delim_count: usize = 0;
1297 let mut prefix_end_pos: usize = usize::MAX; let mut suffix_start_pos: usize = usize::MAX; for pos in memchr_iter(delim, line) {
1301 delim_count += 1;
1302 if delim_count == need_prefix_delims {
1303 prefix_end_pos = pos;
1304 }
1305 if delim_count == total_need {
1306 suffix_start_pos = pos + 1;
1307 break;
1308 }
1309 }
1310
1311 if delim_count == 0 {
1312 if !suppress {
1314 unsafe {
1315 buf_extend(buf, line);
1316 buf_push(buf, line_delim);
1317 }
1318 }
1319 return;
1320 }
1321
1322 if delim_count < need_prefix_delims {
1328 unsafe {
1330 buf_extend(buf, line);
1331 buf_push(buf, line_delim);
1332 }
1333 return;
1334 }
1335
1336 let has_prefix = need_prefix_delims > 0;
1337 let has_suffix = suffix_start_pos != usize::MAX && suffix_start_pos < len;
1338
1339 if has_prefix && has_suffix {
1340 unsafe {
1342 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1343 buf_push(buf, delim);
1344 buf_extend(
1345 buf,
1346 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1347 );
1348 buf_push(buf, line_delim);
1349 }
1350 } else if has_prefix {
1351 unsafe {
1353 buf_extend(buf, std::slice::from_raw_parts(base, prefix_end_pos));
1354 buf_push(buf, line_delim);
1355 }
1356 } else if has_suffix {
1357 unsafe {
1359 buf_extend(
1360 buf,
1361 std::slice::from_raw_parts(base.add(suffix_start_pos), len - suffix_start_pos),
1362 );
1363 buf_push(buf, line_delim);
1364 }
1365 } else {
1366 unsafe { buf_push(buf, line_delim) };
1368 }
1369}
1370
1371fn process_complement_single_field(
1373 data: &[u8],
1374 delim: u8,
1375 line_delim: u8,
1376 skip_field: usize,
1377 suppress: bool,
1378 out: &mut impl Write,
1379) -> io::Result<()> {
1380 let skip_idx = skip_field - 1;
1381
1382 if data.len() >= PARALLEL_THRESHOLD {
1383 let chunks = split_for_scope(data, line_delim);
1384 let n = chunks.len();
1385 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1386 rayon::scope(|s| {
1387 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1388 s.spawn(move |_| {
1389 result.reserve(chunk.len());
1390 complement_single_field_chunk(
1391 chunk, delim, skip_idx, line_delim, suppress, result,
1392 );
1393 });
1394 }
1395 });
1396 let slices: Vec<IoSlice> = results
1397 .iter()
1398 .filter(|r| !r.is_empty())
1399 .map(|r| IoSlice::new(r))
1400 .collect();
1401 write_ioslices(out, &slices)?;
1402 } else {
1403 process_chunked(data, line_delim, out, |chunk, buf| {
1404 complement_single_field_chunk(chunk, delim, skip_idx, line_delim, suppress, buf);
1405 })?;
1406 }
1407 Ok(())
1408}
1409
1410fn complement_single_field_chunk(
1415 data: &[u8],
1416 delim: u8,
1417 skip_idx: usize,
1418 line_delim: u8,
1419 suppress: bool,
1420 buf: &mut Vec<u8>,
1421) {
1422 buf.reserve(data.len());
1423 let mut start = 0;
1424 for end_pos in memchr_iter(line_delim, data) {
1425 let line = &data[start..end_pos];
1426 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
1427 start = end_pos + 1;
1428 }
1429 if start < data.len() {
1430 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
1431 }
1432}
1433
1434#[inline(always)]
1436fn complement_single_field_line(
1437 line: &[u8],
1438 delim: u8,
1439 skip_idx: usize,
1440 line_delim: u8,
1441 suppress: bool,
1442 buf: &mut Vec<u8>,
1443) {
1444 let len = line.len();
1445 if len == 0 {
1446 if !suppress {
1447 unsafe { buf_push(buf, line_delim) };
1448 }
1449 return;
1450 }
1451
1452 let base = line.as_ptr();
1453 let need_before = skip_idx;
1454 let need_total = skip_idx + 1;
1455
1456 let mut delim_count: usize = 0;
1457 let mut skip_start_pos: usize = 0;
1458 let mut skip_end_pos: usize = len;
1459 let mut found_end = false;
1460
1461 for pos in memchr_iter(delim, line) {
1462 delim_count += 1;
1463 if delim_count == need_before {
1464 skip_start_pos = pos + 1;
1465 }
1466 if delim_count == need_total {
1467 skip_end_pos = pos;
1468 found_end = true;
1469 break;
1470 }
1471 }
1472
1473 if delim_count == 0 {
1474 if !suppress {
1475 unsafe {
1476 buf_extend(buf, line);
1477 buf_push(buf, line_delim);
1478 }
1479 }
1480 return;
1481 }
1482
1483 if delim_count < need_before {
1484 unsafe {
1485 buf_extend(buf, line);
1486 buf_push(buf, line_delim);
1487 }
1488 return;
1489 }
1490
1491 let has_prefix = skip_idx > 0 && skip_start_pos > 0;
1492 let has_suffix = found_end && skip_end_pos < len;
1493
1494 if has_prefix && has_suffix {
1495 unsafe {
1496 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1497 buf_push(buf, delim);
1498 buf_extend(
1499 buf,
1500 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1501 );
1502 buf_push(buf, line_delim);
1503 }
1504 } else if has_prefix {
1505 unsafe {
1506 buf_extend(buf, std::slice::from_raw_parts(base, skip_start_pos - 1));
1507 buf_push(buf, line_delim);
1508 }
1509 } else if has_suffix {
1510 unsafe {
1511 buf_extend(
1512 buf,
1513 std::slice::from_raw_parts(base.add(skip_end_pos + 1), len - skip_end_pos - 1),
1514 );
1515 buf_push(buf, line_delim);
1516 }
1517 } else {
1518 unsafe { buf_push(buf, line_delim) };
1519 }
1520}
1521
1522fn process_fields_prefix(
1526 data: &[u8],
1527 delim: u8,
1528 line_delim: u8,
1529 last_field: usize,
1530 suppress: bool,
1531 out: &mut impl Write,
1532) -> io::Result<()> {
1533 if data.len() >= PARALLEL_THRESHOLD {
1534 let chunks = split_for_scope(data, line_delim);
1535 let n = chunks.len();
1536 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1537 rayon::scope(|s| {
1538 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1539 s.spawn(move |_| {
1540 result.reserve(chunk.len());
1541 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, result);
1542 });
1543 }
1544 });
1545 let slices: Vec<IoSlice> = results
1546 .iter()
1547 .filter(|r| !r.is_empty())
1548 .map(|r| IoSlice::new(r))
1549 .collect();
1550 write_ioslices(out, &slices)?;
1551 } else if !suppress {
1552 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
1556 } else {
1557 process_chunked(data, line_delim, out, |chunk, buf| {
1558 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, buf);
1559 })?;
1560 }
1561 Ok(())
1562}
1563
1564#[inline]
1570fn fields_prefix_zerocopy(
1571 data: &[u8],
1572 delim: u8,
1573 line_delim: u8,
1574 last_field: usize,
1575 out: &mut impl Write,
1576) -> io::Result<()> {
1577 let newline_buf: [u8; 1] = [line_delim];
1578 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
1579 let mut start = 0;
1580 let mut run_start: usize = 0;
1581
1582 for end_pos in memchr_iter(line_delim, data) {
1583 let line = &data[start..end_pos];
1584 let mut field_count = 1;
1585 let mut truncate_at: Option<usize> = None;
1586 for dpos in memchr_iter(delim, line) {
1587 if field_count >= last_field {
1588 truncate_at = Some(start + dpos);
1589 break;
1590 }
1591 field_count += 1;
1592 }
1593
1594 if let Some(trunc_pos) = truncate_at {
1595 if run_start < start {
1596 iov.push(IoSlice::new(&data[run_start..start]));
1597 }
1598 iov.push(IoSlice::new(&data[start..trunc_pos]));
1599 iov.push(IoSlice::new(&newline_buf));
1600 run_start = end_pos + 1;
1601
1602 if iov.len() >= MAX_IOV - 2 {
1603 write_ioslices(out, &iov)?;
1604 iov.clear();
1605 }
1606 }
1607 start = end_pos + 1;
1608 }
1609 if start < data.len() {
1611 let line = &data[start..];
1612 let mut field_count = 1;
1613 let mut truncate_at: Option<usize> = None;
1614 for dpos in memchr_iter(delim, line) {
1615 if field_count >= last_field {
1616 truncate_at = Some(start + dpos);
1617 break;
1618 }
1619 field_count += 1;
1620 }
1621 if let Some(trunc_pos) = truncate_at {
1622 if run_start < start {
1623 iov.push(IoSlice::new(&data[run_start..start]));
1624 }
1625 iov.push(IoSlice::new(&data[start..trunc_pos]));
1626 iov.push(IoSlice::new(&newline_buf));
1627 if !iov.is_empty() {
1628 write_ioslices(out, &iov)?;
1629 }
1630 return Ok(());
1631 }
1632 }
1633 if run_start < data.len() {
1635 iov.push(IoSlice::new(&data[run_start..]));
1636 if !data.is_empty() && *data.last().unwrap() != line_delim {
1637 iov.push(IoSlice::new(&newline_buf));
1638 }
1639 }
1640 if !iov.is_empty() {
1641 write_ioslices(out, &iov)?;
1642 }
1643 Ok(())
1644}
1645
1646fn fields_prefix_chunk(
1648 data: &[u8],
1649 delim: u8,
1650 line_delim: u8,
1651 last_field: usize,
1652 suppress: bool,
1653 buf: &mut Vec<u8>,
1654) {
1655 buf.reserve(data.len());
1656 let mut start = 0;
1657 for end_pos in memchr_iter(line_delim, data) {
1658 let line = &data[start..end_pos];
1659 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
1660 start = end_pos + 1;
1661 }
1662 if start < data.len() {
1663 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
1664 }
1665}
1666
1667#[inline(always)]
1670fn fields_prefix_line(
1671 line: &[u8],
1672 delim: u8,
1673 line_delim: u8,
1674 last_field: usize,
1675 suppress: bool,
1676 buf: &mut Vec<u8>,
1677) {
1678 let len = line.len();
1679 if len == 0 {
1680 if !suppress {
1681 unsafe { buf_push(buf, line_delim) };
1682 }
1683 return;
1684 }
1685
1686 let base = line.as_ptr();
1688
1689 let mut field_count = 1usize;
1690 let mut has_delim = false;
1691
1692 for pos in memchr_iter(delim, line) {
1693 has_delim = true;
1694 if field_count >= last_field {
1695 unsafe {
1696 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1697 buf_push(buf, line_delim);
1698 }
1699 return;
1700 }
1701 field_count += 1;
1702 }
1703
1704 if !has_delim {
1705 if !suppress {
1706 unsafe {
1707 buf_extend(buf, line);
1708 buf_push(buf, line_delim);
1709 }
1710 }
1711 return;
1712 }
1713
1714 unsafe {
1715 buf_extend(buf, line);
1716 buf_push(buf, line_delim);
1717 }
1718}
1719
1720fn process_fields_suffix(
1722 data: &[u8],
1723 delim: u8,
1724 line_delim: u8,
1725 start_field: usize,
1726 suppress: bool,
1727 out: &mut impl Write,
1728) -> io::Result<()> {
1729 if data.len() >= PARALLEL_THRESHOLD {
1730 let chunks = split_for_scope(data, line_delim);
1731 let n = chunks.len();
1732 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1733 rayon::scope(|s| {
1734 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1735 s.spawn(move |_| {
1736 result.reserve(chunk.len());
1737 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, result);
1738 });
1739 }
1740 });
1741 let slices: Vec<IoSlice> = results
1742 .iter()
1743 .filter(|r| !r.is_empty())
1744 .map(|r| IoSlice::new(r))
1745 .collect();
1746 write_ioslices(out, &slices)?;
1747 } else {
1748 process_chunked(data, line_delim, out, |chunk, buf| {
1749 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, buf);
1750 })?;
1751 }
1752 Ok(())
1753}
1754
1755fn fields_suffix_chunk(
1757 data: &[u8],
1758 delim: u8,
1759 line_delim: u8,
1760 start_field: usize,
1761 suppress: bool,
1762 buf: &mut Vec<u8>,
1763) {
1764 buf.reserve(data.len());
1765 let mut start = 0;
1766 for end_pos in memchr_iter(line_delim, data) {
1767 let line = &data[start..end_pos];
1768 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1769 start = end_pos + 1;
1770 }
1771 if start < data.len() {
1772 fields_suffix_line(
1773 &data[start..],
1774 delim,
1775 line_delim,
1776 start_field,
1777 suppress,
1778 buf,
1779 );
1780 }
1781}
1782
1783#[inline(always)]
1786fn fields_suffix_line(
1787 line: &[u8],
1788 delim: u8,
1789 line_delim: u8,
1790 start_field: usize,
1791 suppress: bool,
1792 buf: &mut Vec<u8>,
1793) {
1794 let len = line.len();
1795 if len == 0 {
1796 if !suppress {
1797 unsafe { buf_push(buf, line_delim) };
1798 }
1799 return;
1800 }
1801
1802 let base = line.as_ptr();
1804
1805 let skip_delims = start_field - 1;
1806 let mut delim_count = 0usize;
1807 let mut has_delim = false;
1808
1809 for pos in memchr_iter(delim, line) {
1810 has_delim = true;
1811 delim_count += 1;
1812 if delim_count >= skip_delims {
1813 unsafe {
1814 buf_extend(
1815 buf,
1816 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1817 );
1818 buf_push(buf, line_delim);
1819 }
1820 return;
1821 }
1822 }
1823
1824 if !has_delim {
1825 if !suppress {
1826 unsafe {
1827 buf_extend(buf, line);
1828 buf_push(buf, line_delim);
1829 }
1830 }
1831 return;
1832 }
1833
1834 unsafe { buf_push(buf, line_delim) };
1836}
1837
1838fn process_fields_mid_range(
1841 data: &[u8],
1842 delim: u8,
1843 line_delim: u8,
1844 start_field: usize,
1845 end_field: usize,
1846 suppress: bool,
1847 out: &mut impl Write,
1848) -> io::Result<()> {
1849 if data.len() >= PARALLEL_THRESHOLD {
1850 let chunks = split_for_scope(data, line_delim);
1851 let n = chunks.len();
1852 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
1853 rayon::scope(|s| {
1854 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
1855 s.spawn(move |_| {
1856 result.reserve(chunk.len());
1857 fields_mid_range_chunk(
1858 chunk,
1859 delim,
1860 line_delim,
1861 start_field,
1862 end_field,
1863 suppress,
1864 result,
1865 );
1866 });
1867 }
1868 });
1869 let slices: Vec<IoSlice> = results
1870 .iter()
1871 .filter(|r| !r.is_empty())
1872 .map(|r| IoSlice::new(r))
1873 .collect();
1874 write_ioslices(out, &slices)?;
1875 } else {
1876 process_chunked(data, line_delim, out, |chunk, buf| {
1877 fields_mid_range_chunk(
1878 chunk,
1879 delim,
1880 line_delim,
1881 start_field,
1882 end_field,
1883 suppress,
1884 buf,
1885 );
1886 })?;
1887 }
1888 Ok(())
1889}
1890
1891fn fields_mid_range_chunk(
1896 data: &[u8],
1897 delim: u8,
1898 line_delim: u8,
1899 start_field: usize,
1900 end_field: usize,
1901 suppress: bool,
1902 buf: &mut Vec<u8>,
1903) {
1904 buf.reserve(data.len());
1905 let mut start = 0;
1906 for end_pos in memchr_iter(line_delim, data) {
1907 let line = &data[start..end_pos];
1908 fields_mid_range_line(
1909 line,
1910 delim,
1911 line_delim,
1912 start_field,
1913 end_field,
1914 suppress,
1915 buf,
1916 );
1917 start = end_pos + 1;
1918 }
1919 if start < data.len() {
1920 fields_mid_range_line(
1921 &data[start..],
1922 delim,
1923 line_delim,
1924 start_field,
1925 end_field,
1926 suppress,
1927 buf,
1928 );
1929 }
1930}
1931
1932#[inline(always)]
1936fn fields_mid_range_line(
1937 line: &[u8],
1938 delim: u8,
1939 line_delim: u8,
1940 start_field: usize,
1941 end_field: usize,
1942 suppress: bool,
1943 buf: &mut Vec<u8>,
1944) {
1945 let len = line.len();
1946 if len == 0 {
1947 if !suppress {
1948 unsafe { buf_push(buf, line_delim) };
1949 }
1950 return;
1951 }
1952
1953 let base = line.as_ptr();
1955
1956 let skip_before = start_field - 1; let field_span = end_field - start_field; let target_end_delim = skip_before + field_span + 1;
1960 let mut delim_count = 0;
1961 let mut range_start = 0;
1962 let mut has_delim = false;
1963
1964 for pos in memchr_iter(delim, line) {
1965 has_delim = true;
1966 delim_count += 1;
1967 if delim_count == skip_before {
1968 range_start = pos + 1;
1969 }
1970 if delim_count == target_end_delim {
1971 if skip_before == 0 {
1972 range_start = 0;
1973 }
1974 unsafe {
1975 buf_extend(
1976 buf,
1977 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1978 );
1979 buf_push(buf, line_delim);
1980 }
1981 return;
1982 }
1983 }
1984
1985 if !has_delim {
1986 if !suppress {
1987 unsafe {
1988 buf_extend(buf, line);
1989 buf_push(buf, line_delim);
1990 }
1991 }
1992 return;
1993 }
1994
1995 if delim_count >= skip_before {
1997 if skip_before == 0 {
1999 range_start = 0;
2000 }
2001 unsafe {
2002 buf_extend(
2003 buf,
2004 std::slice::from_raw_parts(base.add(range_start), len - range_start),
2005 );
2006 buf_push(buf, line_delim);
2007 }
2008 } else {
2009 unsafe { buf_push(buf, line_delim) };
2011 }
2012}
2013
2014fn single_field1_parallel(
2025 data: &[u8],
2026 delim: u8,
2027 line_delim: u8,
2028 out: &mut impl Write,
2029) -> io::Result<()> {
2030 let chunks = split_for_scope(data, line_delim);
2031 let n = chunks.len();
2032 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2033 rayon::scope(|s| {
2034 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2035 s.spawn(move |_| {
2036 result.reserve(chunk.len() + 1);
2037 single_field1_to_buf(chunk, delim, line_delim, result);
2038 });
2039 }
2040 });
2041 let slices: Vec<IoSlice> = results
2042 .iter()
2043 .filter(|r| !r.is_empty())
2044 .map(|r| IoSlice::new(r))
2045 .collect();
2046 write_ioslices(out, &slices)
2047}
2048
2049#[inline]
2060fn single_field1_to_buf(data: &[u8], delim: u8, line_delim: u8, buf: &mut Vec<u8>) {
2061 debug_assert_ne!(delim, line_delim, "delim and line_delim must differ");
2062 buf.reserve(data.len() + 1);
2065
2066 let base = data.as_ptr();
2067 let initial_len = buf.len();
2068 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
2069 let mut start = 0;
2070 let mut run_start: usize = 0;
2072 let mut in_run = true; for end_pos in memchr_iter(line_delim, data) {
2075 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2076 match memchr::memchr(delim, line) {
2077 Some(dp) => {
2078 if in_run && run_start < start {
2080 let run_len = start - run_start;
2082 unsafe {
2083 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2084 out_ptr = out_ptr.add(run_len);
2085 }
2086 }
2087 unsafe {
2089 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
2090 out_ptr = out_ptr.add(dp);
2091 *out_ptr = line_delim;
2092 out_ptr = out_ptr.add(1);
2093 }
2094 run_start = end_pos + 1;
2095 in_run = true;
2096 }
2097 None => {
2098 if !in_run {
2100 run_start = start;
2101 in_run = true;
2102 }
2103 }
2104 }
2105 start = end_pos + 1;
2106 }
2107
2108 if in_run && run_start < start {
2110 let run_len = start - run_start;
2111 unsafe {
2112 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2113 out_ptr = out_ptr.add(run_len);
2114 }
2115 }
2116
2117 if start < data.len() {
2119 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2120 match memchr::memchr(delim, line) {
2121 Some(dp) => {
2122 unsafe {
2124 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, dp);
2125 out_ptr = out_ptr.add(dp);
2126 *out_ptr = line_delim;
2127 out_ptr = out_ptr.add(1);
2128 }
2129 }
2130 None => {
2131 let len = data.len() - start;
2133 unsafe {
2134 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, len);
2135 out_ptr = out_ptr.add(len);
2136 *out_ptr = line_delim;
2137 out_ptr = out_ptr.add(1);
2138 }
2139 }
2140 }
2141 }
2142
2143 unsafe {
2144 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2145 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2146 buf.set_len(new_len);
2147 }
2148}
2149
2150#[inline]
2159#[allow(dead_code)]
2160fn single_field1_zerocopy(
2161 data: &[u8],
2162 delim: u8,
2163 line_delim: u8,
2164 out: &mut impl Write,
2165) -> io::Result<()> {
2166 let newline_buf: [u8; 1] = [line_delim];
2167
2168 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2169 let mut run_start: usize = 0;
2170 let mut start = 0;
2171
2172 for end_pos in memchr_iter(line_delim, data) {
2173 let line = &data[start..end_pos];
2174 if let Some(dp) = memchr::memchr(delim, line) {
2175 if run_start < start {
2178 iov.push(IoSlice::new(&data[run_start..start]));
2179 }
2180 iov.push(IoSlice::new(&data[start..start + dp]));
2181 iov.push(IoSlice::new(&newline_buf));
2182 run_start = end_pos + 1;
2183
2184 if iov.len() >= MAX_IOV - 2 {
2185 write_ioslices(out, &iov)?;
2186 iov.clear();
2187 }
2188 }
2189 start = end_pos + 1;
2191 }
2192
2193 if start < data.len() {
2195 let line = &data[start..];
2196 if let Some(dp) = memchr::memchr(delim, line) {
2197 if run_start < start {
2198 iov.push(IoSlice::new(&data[run_start..start]));
2199 }
2200 iov.push(IoSlice::new(&data[start..start + dp]));
2201 iov.push(IoSlice::new(&newline_buf));
2202 if !iov.is_empty() {
2203 write_ioslices(out, &iov)?;
2204 }
2205 return Ok(());
2206 }
2207 }
2208
2209 if run_start < data.len() {
2211 iov.push(IoSlice::new(&data[run_start..]));
2212 if !data.is_empty() && *data.last().unwrap() != line_delim {
2213 iov.push(IoSlice::new(&newline_buf));
2214 }
2215 }
2216 if !iov.is_empty() {
2217 write_ioslices(out, &iov)?;
2218 }
2219 Ok(())
2220}
2221
2222fn process_single_field_chunk(
2226 data: &[u8],
2227 delim: u8,
2228 target_idx: usize,
2229 line_delim: u8,
2230 suppress: bool,
2231 buf: &mut Vec<u8>,
2232) {
2233 buf.reserve(data.len() + 1);
2235
2236 let base = data.as_ptr();
2237 let initial_len = buf.len();
2238 let mut out_ptr = unsafe { buf.as_mut_ptr().add(initial_len) };
2239 let mut start = 0;
2240 let mut run_start: usize = 0;
2242 let mut in_run = !suppress; for end_pos in memchr_iter(line_delim, data) {
2245 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2246 let line_len = end_pos - start;
2247
2248 if line_len == 0 {
2249 if !suppress {
2250 if !in_run {
2252 run_start = start;
2253 in_run = true;
2254 }
2255 }
2256 start = end_pos + 1;
2257 continue;
2258 }
2259
2260 let mut field_start_offset = 0;
2262 let mut field_idx = 0;
2263 let mut found = false;
2264 let mut has_delim = false;
2265
2266 for pos in memchr_iter(delim, line) {
2267 has_delim = true;
2268 if field_idx == target_idx {
2269 if in_run && run_start < start {
2272 let run_len = start - run_start;
2273 unsafe {
2274 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2275 out_ptr = out_ptr.add(run_len);
2276 }
2277 }
2278 let field_len = pos - field_start_offset;
2279 unsafe {
2280 std::ptr::copy_nonoverlapping(
2281 base.add(start + field_start_offset),
2282 out_ptr,
2283 field_len,
2284 );
2285 out_ptr = out_ptr.add(field_len);
2286 *out_ptr = line_delim;
2287 out_ptr = out_ptr.add(1);
2288 }
2289 run_start = end_pos + 1;
2290 in_run = true;
2291 found = true;
2292 break;
2293 }
2294 field_idx += 1;
2295 field_start_offset = pos + 1;
2296 }
2297
2298 if !found {
2299 if !has_delim {
2300 if !suppress {
2302 if !in_run {
2304 run_start = start;
2305 in_run = true;
2306 }
2307 } else {
2308 if in_run && run_start < start {
2310 let run_len = start - run_start;
2311 unsafe {
2312 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2313 out_ptr = out_ptr.add(run_len);
2314 }
2315 }
2316 in_run = false;
2317 run_start = end_pos + 1;
2318 }
2319 } else if field_idx == target_idx {
2320 if in_run && run_start < start {
2322 let run_len = start - run_start;
2323 unsafe {
2324 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2325 out_ptr = out_ptr.add(run_len);
2326 }
2327 }
2328 let field_len = line_len - field_start_offset;
2329 unsafe {
2330 std::ptr::copy_nonoverlapping(
2331 base.add(start + field_start_offset),
2332 out_ptr,
2333 field_len,
2334 );
2335 out_ptr = out_ptr.add(field_len);
2336 *out_ptr = line_delim;
2337 out_ptr = out_ptr.add(1);
2338 }
2339 run_start = end_pos + 1;
2340 in_run = true;
2341 } else {
2342 if in_run && run_start < start {
2344 let run_len = start - run_start;
2345 unsafe {
2346 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2347 out_ptr = out_ptr.add(run_len);
2348 }
2349 }
2350 unsafe {
2351 *out_ptr = line_delim;
2352 out_ptr = out_ptr.add(1);
2353 }
2354 run_start = end_pos + 1;
2355 in_run = true;
2356 }
2357 }
2358
2359 start = end_pos + 1;
2360 }
2361
2362 if in_run && run_start < start {
2364 let run_len = start - run_start;
2365 unsafe {
2366 std::ptr::copy_nonoverlapping(base.add(run_start), out_ptr, run_len);
2367 out_ptr = out_ptr.add(run_len);
2368 }
2369 }
2370
2371 if start < data.len() {
2373 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2374 let line_len = data.len() - start;
2375
2376 if line_len == 0 {
2377 if !suppress {
2378 unsafe {
2379 *out_ptr = line_delim;
2380 out_ptr = out_ptr.add(1);
2381 }
2382 }
2383 } else {
2384 let mut field_start_offset = 0;
2385 let mut field_idx = 0;
2386 let mut found = false;
2387 let mut has_delim = false;
2388
2389 for pos in memchr_iter(delim, line) {
2390 has_delim = true;
2391 if field_idx == target_idx {
2392 let field_len = pos - field_start_offset;
2393 unsafe {
2394 std::ptr::copy_nonoverlapping(
2395 base.add(start + field_start_offset),
2396 out_ptr,
2397 field_len,
2398 );
2399 out_ptr = out_ptr.add(field_len);
2400 *out_ptr = line_delim;
2401 out_ptr = out_ptr.add(1);
2402 }
2403 found = true;
2404 break;
2405 }
2406 field_idx += 1;
2407 field_start_offset = pos + 1;
2408 }
2409
2410 if !found {
2411 if !has_delim {
2412 if !suppress {
2413 unsafe {
2414 std::ptr::copy_nonoverlapping(base.add(start), out_ptr, line_len);
2415 out_ptr = out_ptr.add(line_len);
2416 *out_ptr = line_delim;
2417 out_ptr = out_ptr.add(1);
2418 }
2419 }
2420 } else if field_idx == target_idx {
2421 let field_len = line_len - field_start_offset;
2422 unsafe {
2423 std::ptr::copy_nonoverlapping(
2424 base.add(start + field_start_offset),
2425 out_ptr,
2426 field_len,
2427 );
2428 out_ptr = out_ptr.add(field_len);
2429 *out_ptr = line_delim;
2430 out_ptr = out_ptr.add(1);
2431 }
2432 } else {
2433 unsafe {
2434 *out_ptr = line_delim;
2435 out_ptr = out_ptr.add(1);
2436 }
2437 }
2438 }
2439 }
2440 }
2441
2442 unsafe {
2443 let new_len = out_ptr as usize - buf.as_ptr() as usize;
2444 debug_assert!(new_len >= initial_len && new_len <= buf.capacity());
2445 buf.set_len(new_len);
2446 }
2447}
2448
2449#[inline(always)]
2453fn extract_fields_to_buf(
2454 line: &[u8],
2455 delim: u8,
2456 ranges: &[Range],
2457 output_delim: &[u8],
2458 suppress: bool,
2459 max_field: usize,
2460 field_mask: u64,
2461 line_delim: u8,
2462 buf: &mut Vec<u8>,
2463 complement: bool,
2464) {
2465 let len = line.len();
2466
2467 if len == 0 {
2468 if !suppress {
2469 buf.push(line_delim);
2470 }
2471 return;
2472 }
2473
2474 let needed = len + output_delim.len() * 16 + 1;
2477 if buf.capacity() - buf.len() < needed {
2478 buf.reserve(needed);
2479 }
2480
2481 let base = line.as_ptr();
2482 let mut field_num: usize = 1;
2483 let mut field_start: usize = 0;
2484 let mut first_output = true;
2485 let mut has_delim = false;
2486
2487 for delim_pos in memchr_iter(delim, line) {
2489 has_delim = true;
2490
2491 if is_selected(field_num, field_mask, ranges, complement) {
2492 if !first_output {
2493 unsafe { buf_extend(buf, output_delim) };
2494 }
2495 unsafe {
2496 buf_extend(
2497 buf,
2498 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
2499 )
2500 };
2501 first_output = false;
2502 }
2503
2504 field_num += 1;
2505 field_start = delim_pos + 1;
2506
2507 if field_num > max_field {
2508 break;
2509 }
2510 }
2511
2512 if (field_num <= max_field || complement)
2514 && has_delim
2515 && is_selected(field_num, field_mask, ranges, complement)
2516 {
2517 if !first_output {
2518 unsafe { buf_extend(buf, output_delim) };
2519 }
2520 unsafe {
2521 buf_extend(
2522 buf,
2523 std::slice::from_raw_parts(base.add(field_start), len - field_start),
2524 )
2525 };
2526 first_output = false;
2527 }
2528
2529 if !first_output {
2530 unsafe { buf_push(buf, line_delim) };
2531 } else if !has_delim {
2532 if !suppress {
2533 unsafe {
2534 buf_extend(buf, line);
2535 buf_push(buf, line_delim);
2536 }
2537 }
2538 } else {
2539 unsafe { buf_push(buf, line_delim) };
2540 }
2541}
2542
2543fn process_bytes_from_start(
2550 data: &[u8],
2551 max_bytes: usize,
2552 line_delim: u8,
2553 out: &mut impl Write,
2554) -> io::Result<()> {
2555 if data.len() < 64 * 1024 * 1024 && max_bytes > 0 && max_bytes < usize::MAX {
2565 let mut start = 0;
2566 let mut all_fit = true;
2567 for pos in memchr_iter(line_delim, data) {
2568 if pos - start > max_bytes {
2569 all_fit = false;
2570 break;
2571 }
2572 start = pos + 1;
2573 }
2574 if all_fit && start < data.len() && data.len() - start > max_bytes {
2576 all_fit = false;
2577 }
2578 if all_fit {
2579 if !data.is_empty() && data[data.len() - 1] == line_delim {
2581 return out.write_all(data);
2582 } else if !data.is_empty() {
2583 out.write_all(data)?;
2584 return out.write_all(&[line_delim]);
2585 }
2586 return Ok(());
2587 }
2588 }
2589
2590 if data.len() >= PARALLEL_THRESHOLD {
2591 let chunks = split_for_scope(data, line_delim);
2592 let n = chunks.len();
2593 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2594 rayon::scope(|s| {
2595 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2596 s.spawn(move |_| {
2597 result.reserve(chunk.len());
2600 bytes_from_start_chunk(chunk, max_bytes, line_delim, result);
2601 });
2602 }
2603 });
2604 let slices: Vec<IoSlice> = results
2606 .iter()
2607 .filter(|r| !r.is_empty())
2608 .map(|r| IoSlice::new(r))
2609 .collect();
2610 write_ioslices(out, &slices)?;
2611 } else {
2612 if max_bytes <= 512 {
2618 let est_out = (data.len() / 4).max(max_bytes + 2);
2621 let mut buf = Vec::with_capacity(est_out.min(data.len()));
2622 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
2623 if !buf.is_empty() {
2624 out.write_all(&buf)?;
2625 }
2626 } else {
2627 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
2631 }
2632 }
2633 Ok(())
2634}
2635
2636#[inline]
2641fn bytes_from_start_zerocopy(
2642 data: &[u8],
2643 max_bytes: usize,
2644 line_delim: u8,
2645 out: &mut impl Write,
2646) -> io::Result<()> {
2647 let newline_buf: [u8; 1] = [line_delim];
2648 let mut iov: Vec<IoSlice> = Vec::with_capacity(MAX_IOV);
2649 let mut start = 0;
2650 let mut run_start: usize = 0;
2651
2652 for pos in memchr_iter(line_delim, data) {
2653 let line_len = pos - start;
2654 if line_len > max_bytes {
2655 if run_start < start {
2657 iov.push(IoSlice::new(&data[run_start..start]));
2658 }
2659 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2660 iov.push(IoSlice::new(&newline_buf));
2661 run_start = pos + 1;
2662
2663 if iov.len() >= MAX_IOV - 2 {
2664 write_ioslices(out, &iov)?;
2665 iov.clear();
2666 }
2667 }
2668 start = pos + 1;
2669 }
2670 if start < data.len() {
2672 let line_len = data.len() - start;
2673 if line_len > max_bytes {
2674 if run_start < start {
2675 iov.push(IoSlice::new(&data[run_start..start]));
2676 }
2677 iov.push(IoSlice::new(&data[start..start + max_bytes]));
2678 iov.push(IoSlice::new(&newline_buf));
2679 if !iov.is_empty() {
2680 write_ioslices(out, &iov)?;
2681 }
2682 return Ok(());
2683 }
2684 }
2685 if run_start < data.len() {
2687 iov.push(IoSlice::new(&data[run_start..]));
2688 if !data.is_empty() && *data.last().unwrap() != line_delim {
2689 iov.push(IoSlice::new(&newline_buf));
2690 }
2691 }
2692 if !iov.is_empty() {
2693 write_ioslices(out, &iov)?;
2694 }
2695 Ok(())
2696}
2697
2698#[inline]
2703fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2704 buf.reserve(data.len());
2707
2708 let src = data.as_ptr();
2709 let dst_base = buf.as_mut_ptr();
2710 let mut wp = buf.len();
2711 let mut start = 0;
2712
2713 for pos in memchr_iter(line_delim, data) {
2714 let line_len = pos - start;
2715 let take = line_len.min(max_bytes);
2716 unsafe {
2717 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2718 *dst_base.add(wp + take) = line_delim;
2719 }
2720 wp += take + 1;
2721 start = pos + 1;
2722 }
2723 if start < data.len() {
2725 let line_len = data.len() - start;
2726 let take = line_len.min(max_bytes);
2727 unsafe {
2728 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take);
2729 *dst_base.add(wp + take) = line_delim;
2730 }
2731 wp += take + 1;
2732 }
2733 unsafe { buf.set_len(wp) };
2734}
2735
2736fn process_bytes_from_offset(
2738 data: &[u8],
2739 skip_bytes: usize,
2740 line_delim: u8,
2741 out: &mut impl Write,
2742) -> io::Result<()> {
2743 if data.len() >= PARALLEL_THRESHOLD {
2744 let chunks = split_for_scope(data, line_delim);
2745 let n = chunks.len();
2746 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2747 rayon::scope(|s| {
2748 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2749 s.spawn(move |_| {
2750 result.reserve(chunk.len());
2751 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, result);
2752 });
2753 }
2754 });
2755 let slices: Vec<IoSlice> = results
2757 .iter()
2758 .filter(|r| !r.is_empty())
2759 .map(|r| IoSlice::new(r))
2760 .collect();
2761 write_ioslices(out, &slices)?;
2762 } else {
2763 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
2765 }
2766 Ok(())
2767}
2768
2769#[inline]
2773fn bytes_from_offset_zerocopy(
2774 data: &[u8],
2775 skip_bytes: usize,
2776 line_delim: u8,
2777 out: &mut impl Write,
2778) -> io::Result<()> {
2779 let delim_buf = [line_delim];
2780 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
2781
2782 let mut start = 0;
2783 for pos in memchr_iter(line_delim, data) {
2784 let line_len = pos - start;
2785 if line_len > skip_bytes {
2786 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
2787 }
2788 iov.push(IoSlice::new(&delim_buf));
2789 if iov.len() >= MAX_IOV - 1 {
2791 write_ioslices(out, &iov)?;
2792 iov.clear();
2793 }
2794 start = pos + 1;
2795 }
2796 if start < data.len() {
2797 let line_len = data.len() - start;
2798 if line_len > skip_bytes {
2799 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
2800 }
2801 iov.push(IoSlice::new(&delim_buf));
2802 }
2803 if !iov.is_empty() {
2804 write_ioslices(out, &iov)?;
2805 }
2806 Ok(())
2807}
2808
2809#[inline]
2812fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
2813 buf.reserve(data.len());
2814
2815 let src = data.as_ptr();
2816 let dst_base = buf.as_mut_ptr();
2817 let mut wp = buf.len();
2818 let mut start = 0;
2819
2820 for pos in memchr_iter(line_delim, data) {
2821 let line_len = pos - start;
2822 if line_len > skip_bytes {
2823 let take = line_len - skip_bytes;
2824 unsafe {
2825 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2826 }
2827 wp += take;
2828 }
2829 unsafe {
2830 *dst_base.add(wp) = line_delim;
2831 }
2832 wp += 1;
2833 start = pos + 1;
2834 }
2835 if start < data.len() {
2836 let line_len = data.len() - start;
2837 if line_len > skip_bytes {
2838 let take = line_len - skip_bytes;
2839 unsafe {
2840 std::ptr::copy_nonoverlapping(src.add(start + skip_bytes), dst_base.add(wp), take);
2841 }
2842 wp += take;
2843 }
2844 unsafe {
2845 *dst_base.add(wp) = line_delim;
2846 }
2847 wp += 1;
2848 }
2849 unsafe { buf.set_len(wp) };
2850}
2851
2852fn process_bytes_mid_range(
2854 data: &[u8],
2855 start_byte: usize,
2856 end_byte: usize,
2857 line_delim: u8,
2858 out: &mut impl Write,
2859) -> io::Result<()> {
2860 let skip = start_byte.saturating_sub(1);
2861
2862 if data.len() >= PARALLEL_THRESHOLD {
2863 let chunks = split_for_scope(data, line_delim);
2864 let n = chunks.len();
2865 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2866 rayon::scope(|s| {
2867 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2868 s.spawn(move |_| {
2869 result.reserve(chunk.len());
2870 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, result);
2871 });
2872 }
2873 });
2874 let slices: Vec<IoSlice> = results
2875 .iter()
2876 .filter(|r| !r.is_empty())
2877 .map(|r| IoSlice::new(r))
2878 .collect();
2879 write_ioslices(out, &slices)?;
2880 } else {
2881 process_chunked(data, line_delim, out, |chunk, buf| {
2882 bytes_mid_range_chunk(chunk, skip, end_byte, line_delim, buf);
2883 })?;
2884 }
2885 Ok(())
2886}
2887
2888#[inline]
2892fn bytes_mid_range_chunk(
2893 data: &[u8],
2894 skip: usize,
2895 end_byte: usize,
2896 line_delim: u8,
2897 buf: &mut Vec<u8>,
2898) {
2899 buf.reserve(data.len());
2900
2901 let src = data.as_ptr();
2902 let dst_base = buf.as_mut_ptr();
2903 let mut wp = buf.len();
2904 let mut start = 0;
2905
2906 for pos in memchr_iter(line_delim, data) {
2907 let line_len = pos - start;
2908 if line_len > skip {
2909 let take_end = line_len.min(end_byte);
2910 let take = take_end - skip;
2911 unsafe {
2912 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2913 }
2914 wp += take;
2915 }
2916 unsafe {
2917 *dst_base.add(wp) = line_delim;
2918 }
2919 wp += 1;
2920 start = pos + 1;
2921 }
2922 if start < data.len() {
2923 let line_len = data.len() - start;
2924 if line_len > skip {
2925 let take_end = line_len.min(end_byte);
2926 let take = take_end - skip;
2927 unsafe {
2928 std::ptr::copy_nonoverlapping(src.add(start + skip), dst_base.add(wp), take);
2929 }
2930 wp += take;
2931 }
2932 unsafe {
2933 *dst_base.add(wp) = line_delim;
2934 }
2935 wp += 1;
2936 }
2937 unsafe { buf.set_len(wp) };
2938}
2939
2940fn process_bytes_complement_mid(
2942 data: &[u8],
2943 skip_start: usize,
2944 skip_end: usize,
2945 line_delim: u8,
2946 out: &mut impl Write,
2947) -> io::Result<()> {
2948 let prefix_bytes = skip_start - 1; if data.len() >= PARALLEL_THRESHOLD {
2950 let chunks = split_for_scope(data, line_delim);
2951 let n = chunks.len();
2952 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
2953 rayon::scope(|s| {
2954 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
2955 s.spawn(move |_| {
2956 result.reserve(chunk.len());
2957 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, result);
2958 });
2959 }
2960 });
2961 let slices: Vec<IoSlice> = results
2962 .iter()
2963 .filter(|r| !r.is_empty())
2964 .map(|r| IoSlice::new(r))
2965 .collect();
2966 write_ioslices(out, &slices)?;
2967 } else {
2968 process_chunked(data, line_delim, out, |chunk, buf| {
2969 bytes_complement_mid_chunk(chunk, prefix_bytes, skip_end, line_delim, buf);
2970 })?;
2971 }
2972 Ok(())
2973}
2974
2975#[inline]
2978fn bytes_complement_mid_chunk(
2979 data: &[u8],
2980 prefix_bytes: usize,
2981 skip_end: usize,
2982 line_delim: u8,
2983 buf: &mut Vec<u8>,
2984) {
2985 buf.reserve(data.len());
2986
2987 let src = data.as_ptr();
2988 let dst_base = buf.as_mut_ptr();
2989 let mut wp = buf.len();
2990 let mut start = 0;
2991
2992 for pos in memchr_iter(line_delim, data) {
2993 let line_len = pos - start;
2994 let take_prefix = prefix_bytes.min(line_len);
2996 if take_prefix > 0 {
2997 unsafe {
2998 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
2999 }
3000 wp += take_prefix;
3001 }
3002 if line_len > skip_end {
3004 let suffix_len = line_len - skip_end;
3005 unsafe {
3006 std::ptr::copy_nonoverlapping(
3007 src.add(start + skip_end),
3008 dst_base.add(wp),
3009 suffix_len,
3010 );
3011 }
3012 wp += suffix_len;
3013 }
3014 unsafe {
3015 *dst_base.add(wp) = line_delim;
3016 }
3017 wp += 1;
3018 start = pos + 1;
3019 }
3020 if start < data.len() {
3021 let line_len = data.len() - start;
3022 let take_prefix = prefix_bytes.min(line_len);
3023 if take_prefix > 0 {
3024 unsafe {
3025 std::ptr::copy_nonoverlapping(src.add(start), dst_base.add(wp), take_prefix);
3026 }
3027 wp += take_prefix;
3028 }
3029 if line_len > skip_end {
3030 let suffix_len = line_len - skip_end;
3031 unsafe {
3032 std::ptr::copy_nonoverlapping(
3033 src.add(start + skip_end),
3034 dst_base.add(wp),
3035 suffix_len,
3036 );
3037 }
3038 wp += suffix_len;
3039 }
3040 unsafe {
3041 *dst_base.add(wp) = line_delim;
3042 }
3043 wp += 1;
3044 }
3045 unsafe { buf.set_len(wp) };
3046}
3047
3048fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3050 let line_delim = cfg.line_delim;
3051 let ranges = cfg.ranges;
3052 let complement = cfg.complement;
3053 let output_delim = cfg.output_delim;
3054
3055 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
3057 let max_bytes = ranges[0].end;
3058 if max_bytes < usize::MAX {
3059 return process_bytes_from_start(data, max_bytes, line_delim, out);
3060 }
3061 }
3062
3063 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
3065 let skip_bytes = ranges[0].start.saturating_sub(1);
3066 if skip_bytes > 0 {
3067 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
3068 }
3069 }
3070
3071 if !complement
3073 && ranges.len() == 1
3074 && ranges[0].start > 1
3075 && ranges[0].end < usize::MAX
3076 && output_delim.is_empty()
3077 {
3078 return process_bytes_mid_range(data, ranges[0].start, ranges[0].end, line_delim, out);
3079 }
3080
3081 if complement
3083 && ranges.len() == 1
3084 && ranges[0].start == 1
3085 && ranges[0].end < usize::MAX
3086 && output_delim.is_empty()
3087 {
3088 return process_bytes_from_offset(data, ranges[0].end, line_delim, out);
3089 }
3090
3091 if complement
3093 && ranges.len() == 1
3094 && ranges[0].end == usize::MAX
3095 && ranges[0].start > 1
3096 && output_delim.is_empty()
3097 {
3098 let max_bytes = ranges[0].start - 1;
3099 return process_bytes_from_start(data, max_bytes, line_delim, out);
3100 }
3101
3102 if complement
3104 && ranges.len() == 1
3105 && ranges[0].start > 1
3106 && ranges[0].end < usize::MAX
3107 && output_delim.is_empty()
3108 {
3109 return process_bytes_complement_mid(data, ranges[0].start, ranges[0].end, line_delim, out);
3110 }
3111
3112 if data.len() >= PARALLEL_THRESHOLD {
3113 let chunks = split_for_scope(data, line_delim);
3114 let n = chunks.len();
3115 let mut results: Vec<Vec<u8>> = (0..n).map(|_| Vec::new()).collect();
3116 rayon::scope(|s| {
3117 for (chunk, result) in chunks.iter().zip(results.iter_mut()) {
3118 s.spawn(move |_| {
3119 result.reserve(chunk.len() + 1);
3120 process_bytes_chunk(
3121 chunk,
3122 ranges,
3123 complement,
3124 output_delim,
3125 line_delim,
3126 result,
3127 );
3128 });
3129 }
3130 });
3131 let slices: Vec<IoSlice> = results
3132 .iter()
3133 .filter(|r| !r.is_empty())
3134 .map(|r| IoSlice::new(r))
3135 .collect();
3136 write_ioslices(out, &slices)?;
3137 } else {
3138 process_chunked(data, line_delim, out, |chunk, buf| {
3139 process_bytes_chunk(chunk, ranges, complement, output_delim, line_delim, buf);
3140 })?;
3141 }
3142 Ok(())
3143}
3144
3145fn process_bytes_chunk(
3150 data: &[u8],
3151 ranges: &[Range],
3152 complement: bool,
3153 output_delim: &[u8],
3154 line_delim: u8,
3155 buf: &mut Vec<u8>,
3156) {
3157 buf.reserve(data.len());
3158 let base = data.as_ptr();
3159 let mut start = 0;
3160 for end_pos in memchr_iter(line_delim, data) {
3161 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
3162 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3163 unsafe { buf_push(buf, line_delim) };
3164 start = end_pos + 1;
3165 }
3166 if start < data.len() {
3167 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
3168 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
3169 unsafe { buf_push(buf, line_delim) };
3170 }
3171}
3172
3173#[inline(always)]
3177fn cut_bytes_to_buf(
3178 line: &[u8],
3179 ranges: &[Range],
3180 complement: bool,
3181 output_delim: &[u8],
3182 buf: &mut Vec<u8>,
3183) {
3184 let len = line.len();
3185 let base = line.as_ptr();
3186 let mut first_range = true;
3187
3188 let needed = len + output_delim.len() * ranges.len() + 1;
3190 if buf.capacity() - buf.len() < needed {
3191 buf.reserve(needed);
3192 }
3193
3194 if complement {
3195 let mut pos: usize = 1;
3196 for r in ranges {
3197 let rs = r.start;
3198 let re = r.end.min(len);
3199 if pos < rs {
3200 if !first_range && !output_delim.is_empty() {
3201 unsafe { buf_extend(buf, output_delim) };
3202 }
3203 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
3204 first_range = false;
3205 }
3206 pos = re + 1;
3207 if pos > len {
3208 break;
3209 }
3210 }
3211 if pos <= len {
3212 if !first_range && !output_delim.is_empty() {
3213 unsafe { buf_extend(buf, output_delim) };
3214 }
3215 unsafe {
3216 buf_extend(
3217 buf,
3218 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
3219 )
3220 };
3221 }
3222 } else if output_delim.is_empty() && ranges.len() == 1 {
3223 let start = ranges[0].start.saturating_sub(1);
3225 let end = ranges[0].end.min(len);
3226 if start < len {
3227 unsafe {
3228 buf_extend(
3229 buf,
3230 std::slice::from_raw_parts(base.add(start), end - start),
3231 )
3232 };
3233 }
3234 } else {
3235 for r in ranges {
3236 let start = r.start.saturating_sub(1);
3237 let end = r.end.min(len);
3238 if start >= len {
3239 break;
3240 }
3241 if !first_range && !output_delim.is_empty() {
3242 unsafe { buf_extend(buf, output_delim) };
3243 }
3244 unsafe {
3245 buf_extend(
3246 buf,
3247 std::slice::from_raw_parts(base.add(start), end - start),
3248 )
3249 };
3250 first_range = false;
3251 }
3252 }
3253}
3254
3255#[inline]
3259pub fn cut_fields(
3260 line: &[u8],
3261 delim: u8,
3262 ranges: &[Range],
3263 complement: bool,
3264 output_delim: &[u8],
3265 suppress_no_delim: bool,
3266 out: &mut impl Write,
3267) -> io::Result<bool> {
3268 if memchr::memchr(delim, line).is_none() {
3269 if !suppress_no_delim {
3270 out.write_all(line)?;
3271 return Ok(true);
3272 }
3273 return Ok(false);
3274 }
3275
3276 let mut field_num: usize = 1;
3277 let mut field_start: usize = 0;
3278 let mut first_output = true;
3279
3280 for delim_pos in memchr_iter(delim, line) {
3281 let selected = in_ranges(ranges, field_num) != complement;
3282 if selected {
3283 if !first_output {
3284 out.write_all(output_delim)?;
3285 }
3286 out.write_all(&line[field_start..delim_pos])?;
3287 first_output = false;
3288 }
3289 field_start = delim_pos + 1;
3290 field_num += 1;
3291 }
3292
3293 let selected = in_ranges(ranges, field_num) != complement;
3294 if selected {
3295 if !first_output {
3296 out.write_all(output_delim)?;
3297 }
3298 out.write_all(&line[field_start..])?;
3299 }
3300
3301 Ok(true)
3302}
3303
3304#[inline]
3306pub fn cut_bytes(
3307 line: &[u8],
3308 ranges: &[Range],
3309 complement: bool,
3310 output_delim: &[u8],
3311 out: &mut impl Write,
3312) -> io::Result<bool> {
3313 let mut first_range = true;
3314
3315 if complement {
3316 let len = line.len();
3317 let mut comp_ranges = Vec::new();
3318 let mut pos: usize = 1;
3319 for r in ranges {
3320 let rs = r.start;
3321 let re = r.end.min(len);
3322 if pos < rs {
3323 comp_ranges.push((pos, rs - 1));
3324 }
3325 pos = re + 1;
3326 if pos > len {
3327 break;
3328 }
3329 }
3330 if pos <= len {
3331 comp_ranges.push((pos, len));
3332 }
3333 for &(s, e) in &comp_ranges {
3334 if !first_range && !output_delim.is_empty() {
3335 out.write_all(output_delim)?;
3336 }
3337 out.write_all(&line[s - 1..e])?;
3338 first_range = false;
3339 }
3340 } else {
3341 for r in ranges {
3342 let start = r.start.saturating_sub(1);
3343 let end = r.end.min(line.len());
3344 if start >= line.len() {
3345 break;
3346 }
3347 if !first_range && !output_delim.is_empty() {
3348 out.write_all(output_delim)?;
3349 }
3350 out.write_all(&line[start..end])?;
3351 first_range = false;
3352 }
3353 }
3354 Ok(true)
3355}
3356
3357pub fn cut_field1_inplace(data: &mut [u8], delim: u8, line_delim: u8, suppress: bool) -> usize {
3365 let len = data.len();
3366 let mut wp: usize = 0;
3367 let mut rp: usize = 0;
3368
3369 while rp < len {
3370 match memchr::memchr2(delim, line_delim, &data[rp..]) {
3371 None => {
3372 if suppress {
3374 break;
3376 }
3377 let remaining = len - rp;
3378 if wp != rp {
3379 data.copy_within(rp..len, wp);
3380 }
3381 wp += remaining;
3382 break;
3383 }
3384 Some(offset) => {
3385 let actual = rp + offset;
3386 if data[actual] == line_delim {
3387 if suppress {
3389 rp = actual + 1;
3391 } else {
3392 let chunk_len = actual + 1 - rp;
3394 if wp != rp {
3395 data.copy_within(rp..actual + 1, wp);
3396 }
3397 wp += chunk_len;
3398 rp = actual + 1;
3399 }
3400 } else {
3401 let field_len = actual - rp;
3403 if wp != rp && field_len > 0 {
3404 data.copy_within(rp..actual, wp);
3405 }
3406 wp += field_len;
3407 data[wp] = line_delim;
3408 wp += 1;
3409 match memchr::memchr(line_delim, &data[actual + 1..]) {
3411 None => {
3412 rp = len;
3413 }
3414 Some(nl_off) => {
3415 rp = actual + 1 + nl_off + 1;
3416 }
3417 }
3418 }
3419 }
3420 }
3421 }
3422 wp
3423}
3424
3425pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
3427 match cfg.mode {
3428 CutMode::Fields => process_fields_fast(data, cfg, out),
3429 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
3430 }
3431}
3432
3433pub fn process_cut_reader<R: BufRead>(
3438 mut reader: R,
3439 cfg: &CutConfig,
3440 out: &mut impl Write,
3441) -> io::Result<()> {
3442 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
3444
3445 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
3448
3449 loop {
3450 buf.reserve(CHUNK_SIZE);
3452 let read_start = buf.len();
3453 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
3454 let n = read_fully(&mut reader, &mut buf[read_start..])?;
3455 buf.truncate(read_start + n);
3456
3457 if buf.is_empty() {
3458 break;
3459 }
3460
3461 if n == 0 {
3462 process_cut_data(&buf, cfg, out)?;
3464 break;
3465 }
3466
3467 let process_end = match memchr::memrchr(line_delim, &buf) {
3469 Some(pos) => pos + 1,
3470 None => {
3471 continue;
3473 }
3474 };
3475
3476 process_cut_data(&buf[..process_end], cfg, out)?;
3478
3479 let leftover_len = buf.len() - process_end;
3481 if leftover_len > 0 {
3482 buf.copy_within(process_end.., 0);
3483 }
3484 buf.truncate(leftover_len);
3485 }
3486
3487 Ok(())
3488}
3489
3490#[inline]
3492fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
3493 let n = reader.read(buf)?;
3494 if n == buf.len() || n == 0 {
3495 return Ok(n);
3496 }
3497 let mut total = n;
3499 while total < buf.len() {
3500 match reader.read(&mut buf[total..]) {
3501 Ok(0) => break,
3502 Ok(n) => total += n,
3503 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
3504 Err(e) => return Err(e),
3505 }
3506 }
3507 Ok(total)
3508}
3509
3510pub fn process_cut_data_mut(data: &mut [u8], cfg: &CutConfig) -> Option<usize> {
3522 if cfg.complement {
3523 return None;
3524 }
3525 if data.is_empty() || data[data.len() - 1] != cfg.line_delim {
3529 return None;
3530 }
3531
3532 match cfg.mode {
3533 CutMode::Fields => {
3534 if cfg.output_delim.len() != 1 || cfg.output_delim[0] != cfg.delim {
3536 return None;
3537 }
3538 if cfg.delim == cfg.line_delim {
3539 return None;
3540 }
3541 Some(cut_fields_inplace_general(
3542 data,
3543 cfg.delim,
3544 cfg.line_delim,
3545 cfg.ranges,
3546 cfg.suppress_no_delim,
3547 ))
3548 }
3549 CutMode::Bytes | CutMode::Characters => {
3550 if !cfg.output_delim.is_empty() {
3551 return None;
3552 }
3553 Some(cut_bytes_inplace_general(data, cfg.line_delim, cfg.ranges))
3554 }
3555 }
3556}
3557
3558fn cut_fields_inplace_general(
3561 data: &mut [u8],
3562 delim: u8,
3563 line_delim: u8,
3564 ranges: &[Range],
3565 suppress: bool,
3566) -> usize {
3567 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == 1 {
3569 return cut_field1_inplace(data, delim, line_delim, suppress);
3570 }
3571
3572 let len = data.len();
3573 if len == 0 {
3574 return 0;
3575 }
3576
3577 let max_field = ranges.last().map_or(0, |r| r.end);
3578 let max_delims = max_field.min(128);
3579 let mut wp: usize = 0;
3580 let mut rp: usize = 0;
3581
3582 while rp < len {
3583 let line_end = memchr::memchr(line_delim, &data[rp..])
3584 .map(|p| rp + p)
3585 .unwrap_or(len);
3586 let line_len = line_end - rp;
3587
3588 let mut delim_pos = [0usize; 128];
3590 let mut num_delims: usize = 0;
3591
3592 for pos in memchr_iter(delim, &data[rp..line_end]) {
3593 if num_delims < max_delims {
3594 delim_pos[num_delims] = pos;
3595 num_delims += 1;
3596 if num_delims >= max_delims {
3597 break;
3598 }
3599 }
3600 }
3601
3602 if num_delims == 0 {
3603 if !suppress {
3605 if wp != rp {
3606 data.copy_within(rp..line_end, wp);
3607 }
3608 wp += line_len;
3609 if line_end < len {
3610 data[wp] = line_delim;
3611 wp += 1;
3612 }
3613 }
3614 } else {
3615 let total_fields = num_delims + 1;
3616 let mut first_output = true;
3617
3618 for r in ranges {
3619 let range_start = r.start;
3620 let range_end = r.end.min(total_fields);
3621 if range_start > total_fields {
3622 break;
3623 }
3624 for field_num in range_start..=range_end {
3625 if field_num > total_fields {
3626 break;
3627 }
3628
3629 let field_start = if field_num == 1 {
3630 0
3631 } else if field_num - 2 < num_delims {
3632 delim_pos[field_num - 2] + 1
3633 } else {
3634 continue;
3635 };
3636 let field_end = if field_num <= num_delims {
3637 delim_pos[field_num - 1]
3638 } else {
3639 line_len
3640 };
3641
3642 if !first_output {
3643 data[wp] = delim;
3644 wp += 1;
3645 }
3646 let flen = field_end - field_start;
3647 if flen > 0 {
3648 data.copy_within(rp + field_start..rp + field_start + flen, wp);
3649 wp += flen;
3650 }
3651 first_output = false;
3652 }
3653 }
3654
3655 if !first_output && line_end < len {
3656 data[wp] = line_delim;
3657 wp += 1;
3658 } else if first_output && line_end < len {
3659 data[wp] = line_delim;
3661 wp += 1;
3662 }
3663 }
3664
3665 rp = if line_end < len { line_end + 1 } else { len };
3666 }
3667
3668 wp
3669}
3670
3671fn cut_bytes_inplace_general(data: &mut [u8], line_delim: u8, ranges: &[Range]) -> usize {
3673 let len = data.len();
3674 if len == 0 {
3675 return 0;
3676 }
3677
3678 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end == usize::MAX {
3680 return len;
3681 }
3682
3683 if ranges.len() == 1 && ranges[0].start == 1 && ranges[0].end < usize::MAX {
3685 return cut_bytes_from_start_inplace(data, line_delim, ranges[0].end);
3686 }
3687
3688 let mut wp: usize = 0;
3689 let mut rp: usize = 0;
3690
3691 while rp < len {
3692 let line_end = memchr::memchr(line_delim, &data[rp..])
3693 .map(|p| rp + p)
3694 .unwrap_or(len);
3695 let line_len = line_end - rp;
3696
3697 for r in ranges {
3698 let start = r.start.saturating_sub(1);
3699 let end = r.end.min(line_len);
3700 if start >= line_len {
3701 break;
3702 }
3703 let flen = end - start;
3704 if flen > 0 {
3705 data.copy_within(rp + start..rp + start + flen, wp);
3706 wp += flen;
3707 }
3708 }
3709
3710 if line_end < len {
3711 data[wp] = line_delim;
3712 wp += 1;
3713 }
3714
3715 rp = if line_end < len { line_end + 1 } else { len };
3716 }
3717
3718 wp
3719}
3720
3721fn cut_bytes_from_start_inplace(data: &mut [u8], line_delim: u8, max_bytes: usize) -> usize {
3723 let len = data.len();
3724
3725 let mut all_fit = true;
3727 let mut start = 0;
3728 for pos in memchr_iter(line_delim, data) {
3729 if pos - start > max_bytes {
3730 all_fit = false;
3731 break;
3732 }
3733 start = pos + 1;
3734 }
3735 if all_fit && start < len && len - start > max_bytes {
3736 all_fit = false;
3737 }
3738 if all_fit {
3739 return len;
3740 }
3741
3742 let mut wp: usize = 0;
3744 let mut rp: usize = 0;
3745
3746 while rp < len {
3747 let line_end = memchr::memchr(line_delim, &data[rp..])
3748 .map(|p| rp + p)
3749 .unwrap_or(len);
3750 let line_len = line_end - rp;
3751
3752 let take = line_len.min(max_bytes);
3753 if take > 0 && wp != rp {
3754 data.copy_within(rp..rp + take, wp);
3755 }
3756 wp += take;
3757
3758 if line_end < len {
3759 data[wp] = line_delim;
3760 wp += 1;
3761 }
3762
3763 rp = if line_end < len { line_end + 1 } else { len };
3764 }
3765
3766 wp
3767}
3768
3769#[derive(Debug, Clone, Copy, PartialEq)]
3771pub enum CutMode {
3772 Bytes,
3773 Characters,
3774 Fields,
3775}