1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, IoSlice, Write};
4
5const PARALLEL_THRESHOLD: usize = 1024 * 1024;
8
9const MAX_IOV: usize = 1024;
11
12pub struct CutConfig<'a> {
14 pub mode: CutMode,
15 pub ranges: &'a [Range],
16 pub complement: bool,
17 pub delim: u8,
18 pub output_delim: &'a [u8],
19 pub suppress_no_delim: bool,
20 pub line_delim: u8,
21}
22
23#[derive(Debug, Clone)]
25pub struct Range {
26 pub start: usize, pub end: usize, }
29
30pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
33 let mut ranges = Vec::new();
34
35 for part in spec.split(',') {
36 let part = part.trim();
37 if part.is_empty() {
38 continue;
39 }
40
41 if let Some(idx) = part.find('-') {
42 let left = &part[..idx];
43 let right = &part[idx + 1..];
44
45 let start = if left.is_empty() {
46 1
47 } else {
48 left.parse::<usize>()
49 .map_err(|_| format!("invalid range: '{}'", part))?
50 };
51
52 let end = if right.is_empty() {
53 usize::MAX
54 } else {
55 right
56 .parse::<usize>()
57 .map_err(|_| format!("invalid range: '{}'", part))?
58 };
59
60 if start == 0 {
61 return Err("fields and positions are numbered from 1".to_string());
62 }
63 if start > end {
64 return Err(format!("invalid decreasing range: '{}'", part));
65 }
66
67 ranges.push(Range { start, end });
68 } else {
69 let n = part
70 .parse::<usize>()
71 .map_err(|_| format!("invalid field: '{}'", part))?;
72 if n == 0 {
73 return Err("fields and positions are numbered from 1".to_string());
74 }
75 ranges.push(Range { start: n, end: n });
76 }
77 }
78
79 if ranges.is_empty() {
80 return Err("you must specify a list of bytes, characters, or fields".to_string());
81 }
82
83 ranges.sort_by_key(|r| (r.start, r.end));
85 let mut merged = vec![ranges[0].clone()];
86 for r in &ranges[1..] {
87 let last = merged.last_mut().unwrap();
88 if r.start <= last.end.saturating_add(1) {
89 last.end = last.end.max(r.end);
90 } else {
91 merged.push(r.clone());
92 }
93 }
94
95 Ok(merged)
96}
97
98#[inline(always)]
101fn in_ranges(ranges: &[Range], pos: usize) -> bool {
102 for r in ranges {
103 if pos < r.start {
104 return false;
105 }
106 if pos <= r.end {
107 return true;
108 }
109 }
110 false
111}
112
113#[inline]
116fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
117 let mut mask: u64 = 0;
118 for i in 1..=64u32 {
119 let in_range = in_ranges(ranges, i as usize);
120 if in_range != complement {
121 mask |= 1u64 << (i - 1);
122 }
123 }
124 mask
125}
126
127#[inline(always)]
129fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
130 if field_num <= 64 {
131 (mask >> (field_num - 1)) & 1 == 1
132 } else {
133 in_ranges(ranges, field_num) != complement
134 }
135}
136
137#[inline(always)]
142unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
143 unsafe {
144 let len = buf.len();
145 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
146 buf.set_len(len + data.len());
147 }
148}
149
150#[inline(always)]
153unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
154 unsafe {
155 let len = buf.len();
156 *buf.as_mut_ptr().add(len) = b;
157 buf.set_len(len + 1);
158 }
159}
160
161#[inline]
164fn write_ioslices(out: &mut impl Write, slices: &[IoSlice]) -> io::Result<()> {
165 if slices.is_empty() {
166 return Ok(());
167 }
168 for batch in slices.chunks(MAX_IOV) {
169 let total: usize = batch.iter().map(|s| s.len()).sum();
170 match out.write_vectored(batch) {
171 Ok(n) if n >= total => continue,
172 Ok(mut written) => {
173 for slice in batch {
175 let slen = slice.len();
176 if written >= slen {
177 written -= slen;
178 continue;
179 }
180 if written > 0 {
181 out.write_all(&slice[written..])?;
182 written = 0;
183 } else {
184 out.write_all(slice)?;
185 }
186 }
187 }
188 Err(e) => return Err(e),
189 }
190 }
191 Ok(())
192}
193
194fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
198 let num_threads = rayon::current_num_threads().max(1);
199 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
200 return vec![data];
201 }
202
203 let chunk_size = data.len() / num_threads;
204 let mut chunks = Vec::with_capacity(num_threads);
205 let mut pos = 0;
206
207 for _ in 0..num_threads - 1 {
208 let target = pos + chunk_size;
209 if target >= data.len() {
210 break;
211 }
212 let boundary = memchr::memchr(line_delim, &data[target..])
213 .map(|p| target + p + 1)
214 .unwrap_or(data.len());
215 if boundary > pos {
216 chunks.push(&data[pos..boundary]);
217 }
218 pos = boundary;
219 }
220
221 if pos < data.len() {
222 chunks.push(&data[pos..]);
223 }
224
225 chunks
226}
227
228fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
232 let delim = cfg.delim;
233 let line_delim = cfg.line_delim;
234 let ranges = cfg.ranges;
235 let complement = cfg.complement;
236 let output_delim = cfg.output_delim;
237 let suppress = cfg.suppress_no_delim;
238
239 if !complement && memchr::memchr(delim, data).is_none() {
241 if suppress {
242 return Ok(());
243 }
244 out.write_all(data)?;
245 if !data.is_empty() && *data.last().unwrap() != line_delim {
246 out.write_all(&[line_delim])?;
247 }
248 return Ok(());
249 }
250
251 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
253 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
254 }
255
256 if complement
258 && ranges.len() == 1
259 && ranges[0].start == ranges[0].end
260 && output_delim.len() == 1
261 && output_delim[0] == delim
262 {
263 return process_complement_single_field(
264 data,
265 delim,
266 line_delim,
267 ranges[0].start,
268 suppress,
269 out,
270 );
271 }
272
273 if !complement
275 && ranges.len() == 1
276 && ranges[0].start == 1
277 && output_delim.len() == 1
278 && output_delim[0] == delim
279 && ranges[0].end < usize::MAX
280 {
281 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
282 }
283
284 if !complement
286 && ranges.len() == 1
287 && ranges[0].end == usize::MAX
288 && ranges[0].start > 1
289 && output_delim.len() == 1
290 && output_delim[0] == delim
291 {
292 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
293 }
294
295 if !complement
297 && ranges.len() == 1
298 && ranges[0].start > 1
299 && ranges[0].end < usize::MAX
300 && output_delim.len() == 1
301 && output_delim[0] == delim
302 {
303 return process_fields_mid_range(
304 data,
305 delim,
306 line_delim,
307 ranges[0].start,
308 ranges[0].end,
309 suppress,
310 out,
311 );
312 }
313
314 let max_field = if complement {
316 usize::MAX
317 } else {
318 ranges.last().map(|r| r.end).unwrap_or(0)
319 };
320 let field_mask = compute_field_mask(ranges, complement);
321
322 if data.len() >= PARALLEL_THRESHOLD {
323 let chunks = split_into_chunks(data, line_delim);
324 let results: Vec<Vec<u8>> = chunks
325 .par_iter()
326 .map(|chunk| {
327 let mut buf = Vec::with_capacity(chunk.len());
328 process_fields_chunk(
329 chunk,
330 delim,
331 ranges,
332 output_delim,
333 suppress,
334 max_field,
335 field_mask,
336 line_delim,
337 complement,
338 &mut buf,
339 );
340 buf
341 })
342 .collect();
343 let slices: Vec<IoSlice> = results
345 .iter()
346 .filter(|r| !r.is_empty())
347 .map(|r| IoSlice::new(r))
348 .collect();
349 write_ioslices(out, &slices)?;
350 } else {
351 let mut buf = Vec::with_capacity(data.len());
352 process_fields_chunk(
353 data,
354 delim,
355 ranges,
356 output_delim,
357 suppress,
358 max_field,
359 field_mask,
360 line_delim,
361 complement,
362 &mut buf,
363 );
364 if !buf.is_empty() {
365 out.write_all(&buf)?;
366 }
367 }
368 Ok(())
369}
370
371fn process_fields_chunk(
376 data: &[u8],
377 delim: u8,
378 ranges: &[Range],
379 output_delim: &[u8],
380 suppress: bool,
381 max_field: usize,
382 field_mask: u64,
383 line_delim: u8,
384 complement: bool,
385 buf: &mut Vec<u8>,
386) {
387 if delim != line_delim && max_field < usize::MAX && !complement {
394 buf.reserve(data.len());
395 let mut start = 0;
396 for end_pos in memchr_iter(line_delim, data) {
397 let line = &data[start..end_pos];
398 extract_fields_to_buf(
399 line,
400 delim,
401 ranges,
402 output_delim,
403 suppress,
404 max_field,
405 field_mask,
406 line_delim,
407 buf,
408 complement,
409 );
410 start = end_pos + 1;
411 }
412 if start < data.len() {
413 extract_fields_to_buf(
414 &data[start..],
415 delim,
416 ranges,
417 output_delim,
418 suppress,
419 max_field,
420 field_mask,
421 line_delim,
422 buf,
423 complement,
424 );
425 }
426 return;
427 }
428
429 if delim != line_delim {
433 buf.reserve(data.len());
434
435 let data_len = data.len();
436 let base = data.as_ptr();
437 let mut line_start: usize = 0;
438 let mut field_start: usize = 0;
439 let mut field_num: usize = 1;
440 let mut first_output = true;
441 let mut has_delim = false;
442
443 for pos in memchr::memchr2_iter(delim, line_delim, data) {
444 let byte = unsafe { *base.add(pos) };
445
446 if byte == line_delim {
447 if (field_num <= max_field || complement)
449 && has_delim
450 && is_selected(field_num, field_mask, ranges, complement)
451 {
452 if !first_output {
453 unsafe { buf_extend(buf, output_delim) };
454 }
455 unsafe {
456 buf_extend(
457 buf,
458 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
459 )
460 };
461 first_output = false;
462 }
463
464 if !first_output {
465 unsafe { buf_push(buf, line_delim) };
466 } else if !has_delim {
467 if !suppress {
468 unsafe {
469 buf_extend(
470 buf,
471 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
472 );
473 buf_push(buf, line_delim);
474 }
475 }
476 } else {
477 unsafe { buf_push(buf, line_delim) };
478 }
479
480 line_start = pos + 1;
482 field_start = pos + 1;
483 field_num = 1;
484 first_output = true;
485 has_delim = false;
486 } else {
487 has_delim = true;
489
490 if is_selected(field_num, field_mask, ranges, complement) {
491 if !first_output {
492 unsafe { buf_extend(buf, output_delim) };
493 }
494 unsafe {
495 buf_extend(
496 buf,
497 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
498 )
499 };
500 first_output = false;
501 }
502
503 field_num += 1;
504 field_start = pos + 1;
505 }
506 }
507
508 if line_start < data_len {
510 if line_start < data_len {
511 if (field_num <= max_field || complement)
512 && has_delim
513 && is_selected(field_num, field_mask, ranges, complement)
514 {
515 if !first_output {
516 unsafe { buf_extend(buf, output_delim) };
517 }
518 unsafe {
519 buf_extend(
520 buf,
521 std::slice::from_raw_parts(
522 base.add(field_start),
523 data_len - field_start,
524 ),
525 )
526 };
527 first_output = false;
528 }
529
530 if !first_output {
531 unsafe { buf_push(buf, line_delim) };
532 } else if !has_delim {
533 if !suppress {
534 unsafe {
535 buf_extend(
536 buf,
537 std::slice::from_raw_parts(
538 base.add(line_start),
539 data_len - line_start,
540 ),
541 );
542 buf_push(buf, line_delim);
543 }
544 }
545 } else {
546 unsafe { buf_push(buf, line_delim) };
547 }
548 }
549 }
550
551 return;
552 }
553
554 let mut start = 0;
556 for end_pos in memchr_iter(line_delim, data) {
557 let line = &data[start..end_pos];
558 extract_fields_to_buf(
559 line,
560 delim,
561 ranges,
562 output_delim,
563 suppress,
564 max_field,
565 field_mask,
566 line_delim,
567 buf,
568 complement,
569 );
570 start = end_pos + 1;
571 }
572 if start < data.len() {
573 extract_fields_to_buf(
574 &data[start..],
575 delim,
576 ranges,
577 output_delim,
578 suppress,
579 max_field,
580 field_mask,
581 line_delim,
582 buf,
583 complement,
584 );
585 }
586}
587
588fn process_single_field(
594 data: &[u8],
595 delim: u8,
596 line_delim: u8,
597 target: usize,
598 suppress: bool,
599 out: &mut impl Write,
600) -> io::Result<()> {
601 let target_idx = target - 1;
602
603 if delim != line_delim {
605 if data.len() >= PARALLEL_THRESHOLD {
606 let chunks = split_into_chunks(data, line_delim);
607 let results: Vec<Vec<u8>> = chunks
608 .par_iter()
609 .map(|chunk| {
610 let mut buf = Vec::with_capacity(chunk.len());
611 process_nth_field_combined(
612 chunk, delim, line_delim, target_idx, suppress, &mut buf,
613 );
614 buf
615 })
616 .collect();
617 let slices: Vec<IoSlice> = results
619 .iter()
620 .filter(|r| !r.is_empty())
621 .map(|r| IoSlice::new(r))
622 .collect();
623 write_ioslices(out, &slices)?;
624 } else if target_idx == 0 && !suppress {
625 single_field1_zerocopy(data, delim, line_delim, out)?;
630 } else if target_idx <= 3 && !suppress {
631 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
637 process_small_field_combined(data, delim, line_delim, target_idx, &mut buf);
638 if !buf.is_empty() {
639 out.write_all(&buf)?;
640 }
641 } else {
642 let mut buf = Vec::with_capacity(data.len().min(4 * 1024 * 1024));
646 process_nth_field_combined(data, delim, line_delim, target_idx, suppress, &mut buf);
647 if !buf.is_empty() {
648 out.write_all(&buf)?;
649 }
650 }
651 return Ok(());
652 }
653
654 if data.len() >= PARALLEL_THRESHOLD {
656 let chunks = split_into_chunks(data, line_delim);
657 let results: Vec<Vec<u8>> = chunks
658 .par_iter()
659 .map(|chunk| {
660 let mut buf = Vec::with_capacity(chunk.len() / 4);
661 process_single_field_chunk(
662 chunk, delim, target_idx, line_delim, suppress, &mut buf,
663 );
664 buf
665 })
666 .collect();
667 let slices: Vec<IoSlice> = results
669 .iter()
670 .filter(|r| !r.is_empty())
671 .map(|r| IoSlice::new(r))
672 .collect();
673 write_ioslices(out, &slices)?;
674 } else {
675 let mut buf = Vec::with_capacity(data.len() / 4);
676 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
677 if !buf.is_empty() {
678 out.write_all(&buf)?;
679 }
680 }
681 Ok(())
682}
683
684fn process_complement_single_field(
686 data: &[u8],
687 delim: u8,
688 line_delim: u8,
689 skip_field: usize,
690 suppress: bool,
691 out: &mut impl Write,
692) -> io::Result<()> {
693 let skip_idx = skip_field - 1;
694
695 if data.len() >= PARALLEL_THRESHOLD {
696 let chunks = split_into_chunks(data, line_delim);
697 let results: Vec<Vec<u8>> = chunks
698 .par_iter()
699 .map(|chunk| {
700 let mut buf = Vec::with_capacity(chunk.len());
701 complement_single_field_chunk(
702 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
703 );
704 buf
705 })
706 .collect();
707 let slices: Vec<IoSlice> = results
709 .iter()
710 .filter(|r| !r.is_empty())
711 .map(|r| IoSlice::new(r))
712 .collect();
713 write_ioslices(out, &slices)?;
714 } else {
715 let mut buf = Vec::with_capacity(data.len());
716 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
717 if !buf.is_empty() {
718 out.write_all(&buf)?;
719 }
720 }
721 Ok(())
722}
723
724fn complement_single_field_chunk(
726 data: &[u8],
727 delim: u8,
728 skip_idx: usize,
729 line_delim: u8,
730 suppress: bool,
731 buf: &mut Vec<u8>,
732) {
733 let mut start = 0;
734 for end_pos in memchr_iter(line_delim, data) {
735 let line = &data[start..end_pos];
736 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
737 start = end_pos + 1;
738 }
739 if start < data.len() {
740 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
741 }
742}
743
744#[inline(always)]
747fn complement_single_field_line(
748 line: &[u8],
749 delim: u8,
750 skip_idx: usize,
751 line_delim: u8,
752 suppress: bool,
753 buf: &mut Vec<u8>,
754) {
755 let len = line.len();
756 if len == 0 {
757 if !suppress {
758 buf.push(line_delim);
759 }
760 return;
761 }
762
763 buf.reserve(len + 1);
764 let base = line.as_ptr();
765
766 let mut field_idx = 0;
767 let mut field_start = 0;
768 let mut first_output = true;
769 let mut has_delim = false;
770
771 for pos in memchr_iter(delim, line) {
772 has_delim = true;
773 if field_idx != skip_idx {
774 if !first_output {
775 unsafe { buf_push(buf, delim) };
776 }
777 unsafe {
778 buf_extend(
779 buf,
780 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
781 )
782 };
783 first_output = false;
784 }
785 field_idx += 1;
786 field_start = pos + 1;
787 }
788
789 if !has_delim {
790 if !suppress {
791 unsafe {
792 buf_extend(buf, line);
793 buf_push(buf, line_delim);
794 }
795 }
796 return;
797 }
798
799 if field_idx != skip_idx {
801 if !first_output {
802 unsafe { buf_push(buf, delim) };
803 }
804 unsafe {
805 buf_extend(
806 buf,
807 std::slice::from_raw_parts(base.add(field_start), len - field_start),
808 )
809 };
810 }
811
812 unsafe { buf_push(buf, line_delim) };
813}
814
815fn process_fields_prefix(
819 data: &[u8],
820 delim: u8,
821 line_delim: u8,
822 last_field: usize,
823 suppress: bool,
824 out: &mut impl Write,
825) -> io::Result<()> {
826 if data.len() >= PARALLEL_THRESHOLD {
827 let chunks = split_into_chunks(data, line_delim);
828 let results: Vec<Vec<u8>> = chunks
829 .par_iter()
830 .map(|chunk| {
831 let mut buf = Vec::with_capacity(chunk.len());
832 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
833 buf
834 })
835 .collect();
836 let slices: Vec<IoSlice> = results
838 .iter()
839 .filter(|r| !r.is_empty())
840 .map(|r| IoSlice::new(r))
841 .collect();
842 write_ioslices(out, &slices)?;
843 } else if !suppress {
844 fields_prefix_zerocopy(data, delim, line_delim, last_field, out)?;
848 } else {
849 let mut buf = Vec::with_capacity(data.len());
850 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
851 if !buf.is_empty() {
852 out.write_all(&buf)?;
853 }
854 }
855 Ok(())
856}
857
858#[inline]
863fn fields_prefix_zerocopy(
864 data: &[u8],
865 delim: u8,
866 line_delim: u8,
867 last_field: usize,
868 out: &mut impl Write,
869) -> io::Result<()> {
870 let mut start = 0;
871 let mut run_start: usize = 0;
872
873 for end_pos in memchr_iter(line_delim, data) {
874 let line = &data[start..end_pos];
875 let mut field_count = 1;
877 let mut truncate_at: Option<usize> = None;
878 for dpos in memchr_iter(delim, line) {
879 if field_count >= last_field {
880 truncate_at = Some(start + dpos);
881 break;
882 }
883 field_count += 1;
884 }
885
886 if let Some(trunc_pos) = truncate_at {
887 if run_start < start {
889 out.write_all(&data[run_start..start])?;
890 }
891 out.write_all(&data[start..trunc_pos])?;
892 out.write_all(&[line_delim])?;
893 run_start = end_pos + 1;
894 }
895 start = end_pos + 1;
897 }
898 if start < data.len() {
900 let line = &data[start..];
901 let mut field_count = 1;
902 let mut truncate_at: Option<usize> = None;
903 for dpos in memchr_iter(delim, line) {
904 if field_count >= last_field {
905 truncate_at = Some(start + dpos);
906 break;
907 }
908 field_count += 1;
909 }
910 if let Some(trunc_pos) = truncate_at {
911 if run_start < start {
912 out.write_all(&data[run_start..start])?;
913 }
914 out.write_all(&data[start..trunc_pos])?;
915 out.write_all(&[line_delim])?;
916 return Ok(());
917 }
918 }
919 if run_start < data.len() {
921 out.write_all(&data[run_start..])?;
922 if !data.is_empty() && *data.last().unwrap() != line_delim {
923 out.write_all(&[line_delim])?;
924 }
925 }
926 Ok(())
927}
928
929fn fields_prefix_chunk(
931 data: &[u8],
932 delim: u8,
933 line_delim: u8,
934 last_field: usize,
935 suppress: bool,
936 buf: &mut Vec<u8>,
937) {
938 let mut start = 0;
939 for end_pos in memchr_iter(line_delim, data) {
940 let line = &data[start..end_pos];
941 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
942 start = end_pos + 1;
943 }
944 if start < data.len() {
945 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
946 }
947}
948
949#[inline(always)]
952fn fields_prefix_line(
953 line: &[u8],
954 delim: u8,
955 line_delim: u8,
956 last_field: usize,
957 suppress: bool,
958 buf: &mut Vec<u8>,
959) {
960 let len = line.len();
961 if len == 0 {
962 if !suppress {
963 buf.push(line_delim);
964 }
965 return;
966 }
967
968 buf.reserve(len + 1);
969 let base = line.as_ptr();
970
971 let mut field_count = 1;
972 let mut has_delim = false;
973
974 for pos in memchr_iter(delim, line) {
975 has_delim = true;
976 if field_count >= last_field {
977 unsafe {
978 buf_extend(buf, std::slice::from_raw_parts(base, pos));
979 buf_push(buf, line_delim);
980 }
981 return;
982 }
983 field_count += 1;
984 }
985
986 if !has_delim {
987 if !suppress {
988 unsafe {
989 buf_extend(buf, line);
990 buf_push(buf, line_delim);
991 }
992 }
993 return;
994 }
995
996 unsafe {
997 buf_extend(buf, line);
998 buf_push(buf, line_delim);
999 }
1000}
1001
1002fn process_fields_suffix(
1004 data: &[u8],
1005 delim: u8,
1006 line_delim: u8,
1007 start_field: usize,
1008 suppress: bool,
1009 out: &mut impl Write,
1010) -> io::Result<()> {
1011 if data.len() >= PARALLEL_THRESHOLD {
1012 let chunks = split_into_chunks(data, line_delim);
1013 let results: Vec<Vec<u8>> = chunks
1014 .par_iter()
1015 .map(|chunk| {
1016 let mut buf = Vec::with_capacity(chunk.len());
1017 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
1018 buf
1019 })
1020 .collect();
1021 let slices: Vec<IoSlice> = results
1023 .iter()
1024 .filter(|r| !r.is_empty())
1025 .map(|r| IoSlice::new(r))
1026 .collect();
1027 write_ioslices(out, &slices)?;
1028 } else {
1029 let mut buf = Vec::with_capacity(data.len());
1030 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
1031 if !buf.is_empty() {
1032 out.write_all(&buf)?;
1033 }
1034 }
1035 Ok(())
1036}
1037
1038fn fields_suffix_chunk(
1040 data: &[u8],
1041 delim: u8,
1042 line_delim: u8,
1043 start_field: usize,
1044 suppress: bool,
1045 buf: &mut Vec<u8>,
1046) {
1047 let mut start = 0;
1048 for end_pos in memchr_iter(line_delim, data) {
1049 let line = &data[start..end_pos];
1050 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
1051 start = end_pos + 1;
1052 }
1053 if start < data.len() {
1054 fields_suffix_line(
1055 &data[start..],
1056 delim,
1057 line_delim,
1058 start_field,
1059 suppress,
1060 buf,
1061 );
1062 }
1063}
1064
1065#[inline(always)]
1068fn fields_suffix_line(
1069 line: &[u8],
1070 delim: u8,
1071 line_delim: u8,
1072 start_field: usize,
1073 suppress: bool,
1074 buf: &mut Vec<u8>,
1075) {
1076 let len = line.len();
1077 if len == 0 {
1078 if !suppress {
1079 buf.push(line_delim);
1080 }
1081 return;
1082 }
1083
1084 buf.reserve(len + 1);
1085 let base = line.as_ptr();
1086
1087 let skip_delims = start_field - 1;
1088 let mut delim_count = 0;
1089 let mut has_delim = false;
1090
1091 for pos in memchr_iter(delim, line) {
1092 has_delim = true;
1093 delim_count += 1;
1094 if delim_count >= skip_delims {
1095 unsafe {
1096 buf_extend(
1097 buf,
1098 std::slice::from_raw_parts(base.add(pos + 1), len - pos - 1),
1099 );
1100 buf_push(buf, line_delim);
1101 }
1102 return;
1103 }
1104 }
1105
1106 if !has_delim {
1107 if !suppress {
1108 unsafe {
1109 buf_extend(buf, line);
1110 buf_push(buf, line_delim);
1111 }
1112 }
1113 return;
1114 }
1115
1116 unsafe { buf_push(buf, line_delim) };
1118}
1119
1120fn process_fields_mid_range(
1123 data: &[u8],
1124 delim: u8,
1125 line_delim: u8,
1126 start_field: usize,
1127 end_field: usize,
1128 suppress: bool,
1129 out: &mut impl Write,
1130) -> io::Result<()> {
1131 if data.len() >= PARALLEL_THRESHOLD {
1132 let chunks = split_into_chunks(data, line_delim);
1133 let results: Vec<Vec<u8>> = chunks
1134 .par_iter()
1135 .map(|chunk| {
1136 let mut buf = Vec::with_capacity(chunk.len());
1137 fields_mid_range_chunk(
1138 chunk,
1139 delim,
1140 line_delim,
1141 start_field,
1142 end_field,
1143 suppress,
1144 &mut buf,
1145 );
1146 buf
1147 })
1148 .collect();
1149 let slices: Vec<IoSlice> = results
1150 .iter()
1151 .filter(|r| !r.is_empty())
1152 .map(|r| IoSlice::new(r))
1153 .collect();
1154 write_ioslices(out, &slices)?;
1155 } else {
1156 let mut buf = Vec::with_capacity(data.len());
1157 fields_mid_range_chunk(
1158 data,
1159 delim,
1160 line_delim,
1161 start_field,
1162 end_field,
1163 suppress,
1164 &mut buf,
1165 );
1166 if !buf.is_empty() {
1167 out.write_all(&buf)?;
1168 }
1169 }
1170 Ok(())
1171}
1172
1173fn fields_mid_range_chunk(
1175 data: &[u8],
1176 delim: u8,
1177 line_delim: u8,
1178 start_field: usize,
1179 end_field: usize,
1180 suppress: bool,
1181 buf: &mut Vec<u8>,
1182) {
1183 let mut start = 0;
1184 for end_pos in memchr_iter(line_delim, data) {
1185 let line = &data[start..end_pos];
1186 fields_mid_range_line(
1187 line,
1188 delim,
1189 line_delim,
1190 start_field,
1191 end_field,
1192 suppress,
1193 buf,
1194 );
1195 start = end_pos + 1;
1196 }
1197 if start < data.len() {
1198 fields_mid_range_line(
1199 &data[start..],
1200 delim,
1201 line_delim,
1202 start_field,
1203 end_field,
1204 suppress,
1205 buf,
1206 );
1207 }
1208}
1209
1210#[inline(always)]
1214fn fields_mid_range_line(
1215 line: &[u8],
1216 delim: u8,
1217 line_delim: u8,
1218 start_field: usize,
1219 end_field: usize,
1220 suppress: bool,
1221 buf: &mut Vec<u8>,
1222) {
1223 let len = line.len();
1224 if len == 0 {
1225 if !suppress {
1226 buf.push(line_delim);
1227 }
1228 return;
1229 }
1230
1231 buf.reserve(len + 1);
1232 let base = line.as_ptr();
1233
1234 let skip_before = start_field - 1; let field_span = end_field - start_field; let mut delim_count = 0;
1238 let mut range_start = 0;
1239 let mut has_delim = false;
1240
1241 for pos in memchr_iter(delim, line) {
1242 has_delim = true;
1243 delim_count += 1;
1244 if delim_count == skip_before {
1245 range_start = pos + 1;
1246 }
1247 if delim_count == skip_before + field_span + 1 {
1248 if skip_before == 0 {
1250 range_start = 0;
1251 }
1252 unsafe {
1253 buf_extend(
1254 buf,
1255 std::slice::from_raw_parts(base.add(range_start), pos - range_start),
1256 );
1257 buf_push(buf, line_delim);
1258 }
1259 return;
1260 }
1261 }
1262
1263 if !has_delim {
1264 if !suppress {
1265 unsafe {
1266 buf_extend(buf, line);
1267 buf_push(buf, line_delim);
1268 }
1269 }
1270 return;
1271 }
1272
1273 if delim_count >= skip_before {
1275 if skip_before == 0 {
1277 range_start = 0;
1278 }
1279 unsafe {
1280 buf_extend(
1281 buf,
1282 std::slice::from_raw_parts(base.add(range_start), len - range_start),
1283 );
1284 buf_push(buf, line_delim);
1285 }
1286 } else {
1287 unsafe { buf_push(buf, line_delim) };
1289 }
1290}
1291
1292fn process_nth_field_combined(
1300 data: &[u8],
1301 delim: u8,
1302 line_delim: u8,
1303 target_idx: usize,
1304 suppress: bool,
1305 buf: &mut Vec<u8>,
1306) {
1307 buf.reserve(data.len());
1308
1309 let data_len = data.len();
1310 let base = data.as_ptr();
1311 let mut line_start: usize = 0;
1312 let mut field_start: usize = 0;
1313 let mut field_idx: usize = 0;
1314 let mut has_delim = false;
1315 let mut emitted = false;
1316
1317 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1318 let byte = unsafe { *base.add(pos) };
1319
1320 if byte == line_delim {
1321 if !emitted {
1323 if has_delim && field_idx == target_idx {
1324 unsafe {
1326 buf_extend(
1327 buf,
1328 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
1329 );
1330 buf_push(buf, line_delim);
1331 }
1332 } else if has_delim {
1333 unsafe {
1335 buf_push(buf, line_delim);
1336 }
1337 } else if !suppress {
1338 unsafe {
1340 buf_extend(
1341 buf,
1342 std::slice::from_raw_parts(base.add(line_start), pos - line_start),
1343 );
1344 buf_push(buf, line_delim);
1345 }
1346 }
1347 }
1348 line_start = pos + 1;
1350 field_start = pos + 1;
1351 field_idx = 0;
1352 has_delim = false;
1353 emitted = false;
1354 } else {
1355 has_delim = true;
1357 if field_idx == target_idx {
1358 unsafe {
1359 buf_extend(
1360 buf,
1361 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
1362 );
1363 buf_push(buf, line_delim);
1364 }
1365 emitted = true;
1366 }
1367 field_idx += 1;
1368 field_start = pos + 1;
1369 }
1370 }
1371
1372 if line_start < data_len && !emitted {
1374 if has_delim && field_idx == target_idx {
1375 unsafe {
1376 buf_extend(
1377 buf,
1378 std::slice::from_raw_parts(base.add(field_start), data_len - field_start),
1379 );
1380 buf_push(buf, line_delim);
1381 }
1382 } else if has_delim {
1383 unsafe {
1384 buf_push(buf, line_delim);
1385 }
1386 } else if !suppress {
1387 unsafe {
1388 buf_extend(
1389 buf,
1390 std::slice::from_raw_parts(base.add(line_start), data_len - line_start),
1391 );
1392 buf_push(buf, line_delim);
1393 }
1394 }
1395 }
1396}
1397
1398#[inline]
1402fn single_field1_zerocopy(
1403 data: &[u8],
1404 delim: u8,
1405 line_delim: u8,
1406 out: &mut impl Write,
1407) -> io::Result<()> {
1408 let mut line_start: usize = 0;
1409 let mut run_start: usize = 0;
1410 let mut first_delim: Option<usize> = None;
1411
1412 for pos in memchr::memchr2_iter(delim, line_delim, data) {
1413 let byte = unsafe { *data.get_unchecked(pos) };
1414
1415 if byte == line_delim {
1416 if let Some(dp) = first_delim {
1418 if run_start < line_start {
1421 out.write_all(&data[run_start..line_start])?;
1422 }
1423 out.write_all(&data[line_start..dp])?;
1424 out.write_all(&[line_delim])?;
1425 run_start = pos + 1;
1426 }
1427 line_start = pos + 1;
1429 first_delim = None;
1430 } else {
1431 if first_delim.is_none() {
1433 first_delim = Some(pos);
1434 }
1435 }
1436 }
1437
1438 if line_start < data.len() {
1440 if let Some(dp) = first_delim {
1441 if run_start < line_start {
1442 out.write_all(&data[run_start..line_start])?;
1443 }
1444 out.write_all(&data[line_start..dp])?;
1445 out.write_all(&[line_delim])?;
1446 return Ok(());
1447 }
1448 }
1449
1450 if run_start < data.len() {
1452 out.write_all(&data[run_start..])?;
1453 if !data.is_empty() && *data.last().unwrap() != line_delim {
1454 out.write_all(&[line_delim])?;
1455 }
1456 }
1457 Ok(())
1458}
1459
1460fn process_small_field_combined(
1466 data: &[u8],
1467 delim: u8,
1468 line_delim: u8,
1469 target_idx: usize,
1470 buf: &mut Vec<u8>,
1471) {
1472 buf.reserve(data.len());
1473 let base = data.as_ptr();
1474 let data_len = data.len();
1475 let mut start = 0;
1476 for end_pos in memchr_iter(line_delim, data) {
1477 let line_len = end_pos - start;
1478 let line = unsafe { std::slice::from_raw_parts(base.add(start), line_len) };
1479 let line_base = line.as_ptr();
1480 let mut field_start = 0;
1482 let mut found_start = target_idx == 0;
1483 let mut delim_count = 0;
1484 if !found_start {
1485 let mut search_start = 0;
1486 while let Some(pos) = memchr::memchr(delim, unsafe {
1487 std::slice::from_raw_parts(line_base.add(search_start), line_len - search_start)
1488 }) {
1489 delim_count += 1;
1490 if delim_count == target_idx {
1491 field_start = search_start + pos + 1;
1492 found_start = true;
1493 break;
1494 }
1495 search_start = search_start + pos + 1;
1496 }
1497 }
1498 if !found_start {
1499 unsafe {
1501 buf_extend(buf, line);
1502 buf_push(buf, line_delim);
1503 }
1504 } else if field_start >= line_len {
1505 unsafe { buf_push(buf, line_delim) };
1507 } else {
1508 match memchr::memchr(delim, unsafe {
1510 std::slice::from_raw_parts(line_base.add(field_start), line_len - field_start)
1511 }) {
1512 Some(pos) => unsafe {
1513 buf_extend(
1514 buf,
1515 std::slice::from_raw_parts(line_base.add(field_start), pos),
1516 );
1517 buf_push(buf, line_delim);
1518 },
1519 None => unsafe {
1520 buf_extend(
1521 buf,
1522 std::slice::from_raw_parts(
1523 line_base.add(field_start),
1524 line_len - field_start,
1525 ),
1526 );
1527 buf_push(buf, line_delim);
1528 },
1529 }
1530 }
1531 start = end_pos + 1;
1532 }
1533 if start < data_len {
1535 let line_len = data_len - start;
1536 let line = unsafe { std::slice::from_raw_parts(base.add(start), line_len) };
1537 let line_base = line.as_ptr();
1538 let mut field_start = 0;
1539 let mut found_start = target_idx == 0;
1540 let mut delim_count = 0;
1541 if !found_start {
1542 let mut search_start = 0;
1543 while let Some(pos) = memchr::memchr(delim, unsafe {
1544 std::slice::from_raw_parts(line_base.add(search_start), line_len - search_start)
1545 }) {
1546 delim_count += 1;
1547 if delim_count == target_idx {
1548 field_start = search_start + pos + 1;
1549 found_start = true;
1550 break;
1551 }
1552 search_start = search_start + pos + 1;
1553 }
1554 }
1555 if !found_start {
1556 unsafe {
1557 buf_extend(buf, line);
1558 buf_push(buf, line_delim);
1559 }
1560 } else if field_start >= line_len {
1561 unsafe { buf_push(buf, line_delim) };
1562 } else {
1563 match memchr::memchr(delim, unsafe {
1564 std::slice::from_raw_parts(line_base.add(field_start), line_len - field_start)
1565 }) {
1566 Some(pos) => unsafe {
1567 buf_extend(
1568 buf,
1569 std::slice::from_raw_parts(line_base.add(field_start), pos),
1570 );
1571 buf_push(buf, line_delim);
1572 },
1573 None => unsafe {
1574 buf_extend(
1575 buf,
1576 std::slice::from_raw_parts(
1577 line_base.add(field_start),
1578 line_len - field_start,
1579 ),
1580 );
1581 buf_push(buf, line_delim);
1582 },
1583 }
1584 }
1585 }
1586}
1587
1588fn process_single_field_chunk(
1590 data: &[u8],
1591 delim: u8,
1592 target_idx: usize,
1593 line_delim: u8,
1594 suppress: bool,
1595 buf: &mut Vec<u8>,
1596) {
1597 let mut start = 0;
1598 for end_pos in memchr_iter(line_delim, data) {
1599 let line = &data[start..end_pos];
1600 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
1601 start = end_pos + 1;
1602 }
1603 if start < data.len() {
1604 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
1605 }
1606}
1607
1608#[inline(always)]
1612fn extract_single_field_line(
1613 line: &[u8],
1614 delim: u8,
1615 target_idx: usize,
1616 line_delim: u8,
1617 suppress: bool,
1618 buf: &mut Vec<u8>,
1619) {
1620 let len = line.len();
1621 if len == 0 {
1622 if !suppress {
1623 buf.push(line_delim);
1624 }
1625 return;
1626 }
1627
1628 buf.reserve(len + 1);
1630
1631 let base = line.as_ptr();
1632
1633 if target_idx == 0 {
1635 match memchr::memchr(delim, line) {
1636 Some(pos) => unsafe {
1637 buf_extend(buf, std::slice::from_raw_parts(base, pos));
1638 buf_push(buf, line_delim);
1639 },
1640 None => {
1641 if !suppress {
1642 unsafe {
1643 buf_extend(buf, line);
1644 buf_push(buf, line_delim);
1645 }
1646 }
1647 }
1648 }
1649 return;
1650 }
1651
1652 let mut field_start = 0;
1653 let mut field_idx = 0;
1654 let mut has_delim = false;
1655
1656 for pos in memchr_iter(delim, line) {
1657 has_delim = true;
1658 if field_idx == target_idx {
1659 unsafe {
1660 buf_extend(
1661 buf,
1662 std::slice::from_raw_parts(base.add(field_start), pos - field_start),
1663 );
1664 buf_push(buf, line_delim);
1665 }
1666 return;
1667 }
1668 field_idx += 1;
1669 field_start = pos + 1;
1670 }
1671
1672 if !has_delim {
1673 if !suppress {
1674 unsafe {
1675 buf_extend(buf, line);
1676 buf_push(buf, line_delim);
1677 }
1678 }
1679 return;
1680 }
1681
1682 if field_idx == target_idx {
1683 unsafe {
1684 buf_extend(
1685 buf,
1686 std::slice::from_raw_parts(base.add(field_start), len - field_start),
1687 );
1688 buf_push(buf, line_delim);
1689 }
1690 } else {
1691 unsafe { buf_push(buf, line_delim) };
1692 }
1693}
1694
1695#[inline(always)]
1699fn extract_fields_to_buf(
1700 line: &[u8],
1701 delim: u8,
1702 ranges: &[Range],
1703 output_delim: &[u8],
1704 suppress: bool,
1705 max_field: usize,
1706 field_mask: u64,
1707 line_delim: u8,
1708 buf: &mut Vec<u8>,
1709 complement: bool,
1710) {
1711 let len = line.len();
1712
1713 if len == 0 {
1714 if !suppress {
1715 buf.push(line_delim);
1716 }
1717 return;
1718 }
1719
1720 let needed = len + output_delim.len() * 16 + 1;
1723 if buf.capacity() - buf.len() < needed {
1724 buf.reserve(needed);
1725 }
1726
1727 let base = line.as_ptr();
1728 let mut field_num: usize = 1;
1729 let mut field_start: usize = 0;
1730 let mut first_output = true;
1731 let mut has_delim = false;
1732
1733 for delim_pos in memchr_iter(delim, line) {
1734 has_delim = true;
1735
1736 if is_selected(field_num, field_mask, ranges, complement) {
1737 if !first_output {
1738 unsafe { buf_extend(buf, output_delim) };
1739 }
1740 unsafe {
1741 buf_extend(
1742 buf,
1743 std::slice::from_raw_parts(base.add(field_start), delim_pos - field_start),
1744 )
1745 };
1746 first_output = false;
1747 }
1748
1749 field_num += 1;
1750 field_start = delim_pos + 1;
1751
1752 if field_num > max_field {
1753 break;
1754 }
1755 }
1756
1757 if (field_num <= max_field || complement)
1759 && has_delim
1760 && is_selected(field_num, field_mask, ranges, complement)
1761 {
1762 if !first_output {
1763 unsafe { buf_extend(buf, output_delim) };
1764 }
1765 unsafe {
1766 buf_extend(
1767 buf,
1768 std::slice::from_raw_parts(base.add(field_start), len - field_start),
1769 )
1770 };
1771 first_output = false;
1772 }
1773
1774 if !first_output {
1775 unsafe { buf_push(buf, line_delim) };
1776 } else if !has_delim {
1777 if !suppress {
1778 unsafe {
1779 buf_extend(buf, line);
1780 buf_push(buf, line_delim);
1781 }
1782 }
1783 } else {
1784 unsafe { buf_push(buf, line_delim) };
1785 }
1786}
1787
1788fn process_bytes_from_start(
1795 data: &[u8],
1796 max_bytes: usize,
1797 line_delim: u8,
1798 out: &mut impl Write,
1799) -> io::Result<()> {
1800 if data.len() >= PARALLEL_THRESHOLD {
1801 let chunks = split_into_chunks(data, line_delim);
1802 let results: Vec<Vec<u8>> = chunks
1803 .par_iter()
1804 .map(|chunk| {
1805 let mut buf = Vec::with_capacity(chunk.len());
1806 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
1807 buf
1808 })
1809 .collect();
1810 let slices: Vec<IoSlice> = results
1812 .iter()
1813 .filter(|r| !r.is_empty())
1814 .map(|r| IoSlice::new(r))
1815 .collect();
1816 write_ioslices(out, &slices)?;
1817 } else {
1818 bytes_from_start_zerocopy(data, max_bytes, line_delim, out)?;
1822 }
1823 Ok(())
1824}
1825
1826#[inline]
1829fn bytes_from_start_zerocopy(
1830 data: &[u8],
1831 max_bytes: usize,
1832 line_delim: u8,
1833 out: &mut impl Write,
1834) -> io::Result<()> {
1835 let mut start = 0;
1836 let mut run_start: usize = 0;
1837
1838 for pos in memchr_iter(line_delim, data) {
1839 let line_len = pos - start;
1840 if line_len > max_bytes {
1841 if run_start < start {
1843 out.write_all(&data[run_start..start])?;
1844 }
1845 out.write_all(&data[start..start + max_bytes])?;
1846 out.write_all(&[line_delim])?;
1847 run_start = pos + 1;
1848 }
1849 start = pos + 1;
1851 }
1852 if start < data.len() {
1854 let line_len = data.len() - start;
1855 if line_len > max_bytes {
1856 if run_start < start {
1857 out.write_all(&data[run_start..start])?;
1858 }
1859 out.write_all(&data[start..start + max_bytes])?;
1860 out.write_all(&[line_delim])?;
1861 return Ok(());
1862 }
1863 }
1864 if run_start < data.len() {
1866 out.write_all(&data[run_start..])?;
1867 if !data.is_empty() && *data.last().unwrap() != line_delim {
1869 out.write_all(&[line_delim])?;
1870 }
1871 }
1872 Ok(())
1873}
1874
1875#[inline]
1878fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1879 buf.reserve(data.len());
1881
1882 let mut start = 0;
1883 for pos in memchr_iter(line_delim, data) {
1884 let line_len = pos - start;
1885 let take = line_len.min(max_bytes);
1886 unsafe {
1887 buf_extend(buf, &data[start..start + take]);
1888 buf_push(buf, line_delim);
1889 }
1890 start = pos + 1;
1891 }
1892 if start < data.len() {
1894 let line_len = data.len() - start;
1895 let take = line_len.min(max_bytes);
1896 unsafe {
1897 buf_extend(buf, &data[start..start + take]);
1898 buf_push(buf, line_delim);
1899 }
1900 }
1901}
1902
1903fn process_bytes_from_offset(
1905 data: &[u8],
1906 skip_bytes: usize,
1907 line_delim: u8,
1908 out: &mut impl Write,
1909) -> io::Result<()> {
1910 if data.len() >= PARALLEL_THRESHOLD {
1911 let chunks = split_into_chunks(data, line_delim);
1912 let results: Vec<Vec<u8>> = chunks
1913 .par_iter()
1914 .map(|chunk| {
1915 let mut buf = Vec::with_capacity(chunk.len());
1916 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1917 buf
1918 })
1919 .collect();
1920 let slices: Vec<IoSlice> = results
1922 .iter()
1923 .filter(|r| !r.is_empty())
1924 .map(|r| IoSlice::new(r))
1925 .collect();
1926 write_ioslices(out, &slices)?;
1927 } else {
1928 bytes_from_offset_zerocopy(data, skip_bytes, line_delim, out)?;
1930 }
1931 Ok(())
1932}
1933
1934#[inline]
1938fn bytes_from_offset_zerocopy(
1939 data: &[u8],
1940 skip_bytes: usize,
1941 line_delim: u8,
1942 out: &mut impl Write,
1943) -> io::Result<()> {
1944 let delim_buf = [line_delim];
1945 let mut iov: Vec<IoSlice> = Vec::with_capacity(256);
1946
1947 let mut start = 0;
1948 for pos in memchr_iter(line_delim, data) {
1949 let line_len = pos - start;
1950 if line_len > skip_bytes {
1951 iov.push(IoSlice::new(&data[start + skip_bytes..pos]));
1952 }
1953 iov.push(IoSlice::new(&delim_buf));
1954 if iov.len() >= MAX_IOV - 1 {
1956 write_ioslices(out, &iov)?;
1957 iov.clear();
1958 }
1959 start = pos + 1;
1960 }
1961 if start < data.len() {
1962 let line_len = data.len() - start;
1963 if line_len > skip_bytes {
1964 iov.push(IoSlice::new(&data[start + skip_bytes..data.len()]));
1965 }
1966 iov.push(IoSlice::new(&delim_buf));
1967 }
1968 if !iov.is_empty() {
1969 write_ioslices(out, &iov)?;
1970 }
1971 Ok(())
1972}
1973
1974#[inline]
1977fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1978 buf.reserve(data.len());
1979
1980 let mut start = 0;
1981 for pos in memchr_iter(line_delim, data) {
1982 let line_len = pos - start;
1983 if line_len > skip_bytes {
1984 unsafe {
1985 buf_extend(buf, &data[start + skip_bytes..pos]);
1986 }
1987 }
1988 unsafe {
1989 buf_push(buf, line_delim);
1990 }
1991 start = pos + 1;
1992 }
1993 if start < data.len() {
1994 let line_len = data.len() - start;
1995 if line_len > skip_bytes {
1996 unsafe {
1997 buf_extend(buf, &data[start + skip_bytes..data.len()]);
1998 }
1999 }
2000 unsafe {
2001 buf_push(buf, line_delim);
2002 }
2003 }
2004}
2005
2006fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2008 let line_delim = cfg.line_delim;
2009 let ranges = cfg.ranges;
2010 let complement = cfg.complement;
2011 let output_delim = cfg.output_delim;
2012
2013 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
2015 let max_bytes = ranges[0].end;
2016 if max_bytes < usize::MAX {
2017 return process_bytes_from_start(data, max_bytes, line_delim, out);
2018 }
2019 }
2020
2021 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
2023 let skip_bytes = ranges[0].start.saturating_sub(1);
2024 if skip_bytes > 0 {
2025 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
2026 }
2027 }
2028
2029 if data.len() >= PARALLEL_THRESHOLD {
2030 let chunks = split_into_chunks(data, line_delim);
2031 let results: Vec<Vec<u8>> = chunks
2032 .par_iter()
2033 .map(|chunk| {
2034 let mut buf = Vec::with_capacity(chunk.len());
2035 process_bytes_chunk(
2036 chunk,
2037 ranges,
2038 complement,
2039 output_delim,
2040 line_delim,
2041 &mut buf,
2042 );
2043 buf
2044 })
2045 .collect();
2046 let slices: Vec<IoSlice> = results
2048 .iter()
2049 .filter(|r| !r.is_empty())
2050 .map(|r| IoSlice::new(r))
2051 .collect();
2052 write_ioslices(out, &slices)?;
2053 } else {
2054 let mut buf = Vec::with_capacity(data.len());
2055 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
2056 if !buf.is_empty() {
2057 out.write_all(&buf)?;
2058 }
2059 }
2060 Ok(())
2061}
2062
2063fn process_bytes_chunk(
2066 data: &[u8],
2067 ranges: &[Range],
2068 complement: bool,
2069 output_delim: &[u8],
2070 line_delim: u8,
2071 buf: &mut Vec<u8>,
2072) {
2073 buf.reserve(data.len());
2074 let base = data.as_ptr();
2075 let mut start = 0;
2076 for end_pos in memchr_iter(line_delim, data) {
2077 let line = unsafe { std::slice::from_raw_parts(base.add(start), end_pos - start) };
2078 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2079 unsafe { buf_push(buf, line_delim) };
2080 start = end_pos + 1;
2081 }
2082 if start < data.len() {
2083 let line = unsafe { std::slice::from_raw_parts(base.add(start), data.len() - start) };
2084 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
2085 unsafe { buf_push(buf, line_delim) };
2086 }
2087}
2088
2089#[inline(always)]
2093fn cut_bytes_to_buf(
2094 line: &[u8],
2095 ranges: &[Range],
2096 complement: bool,
2097 output_delim: &[u8],
2098 buf: &mut Vec<u8>,
2099) {
2100 let len = line.len();
2101 let base = line.as_ptr();
2102 let mut first_range = true;
2103
2104 let needed = len + output_delim.len() * ranges.len() + 1;
2106 if buf.capacity() - buf.len() < needed {
2107 buf.reserve(needed);
2108 }
2109
2110 if complement {
2111 let mut pos: usize = 1;
2112 for r in ranges {
2113 let rs = r.start;
2114 let re = r.end.min(len);
2115 if pos < rs {
2116 if !first_range && !output_delim.is_empty() {
2117 unsafe { buf_extend(buf, output_delim) };
2118 }
2119 unsafe { buf_extend(buf, std::slice::from_raw_parts(base.add(pos - 1), rs - pos)) };
2120 first_range = false;
2121 }
2122 pos = re + 1;
2123 if pos > len {
2124 break;
2125 }
2126 }
2127 if pos <= len {
2128 if !first_range && !output_delim.is_empty() {
2129 unsafe { buf_extend(buf, output_delim) };
2130 }
2131 unsafe {
2132 buf_extend(
2133 buf,
2134 std::slice::from_raw_parts(base.add(pos - 1), len - pos + 1),
2135 )
2136 };
2137 }
2138 } else if output_delim.is_empty() && ranges.len() == 1 {
2139 let start = ranges[0].start.saturating_sub(1);
2141 let end = ranges[0].end.min(len);
2142 if start < len {
2143 unsafe {
2144 buf_extend(
2145 buf,
2146 std::slice::from_raw_parts(base.add(start), end - start),
2147 )
2148 };
2149 }
2150 } else {
2151 for r in ranges {
2152 let start = r.start.saturating_sub(1);
2153 let end = r.end.min(len);
2154 if start >= len {
2155 break;
2156 }
2157 if !first_range && !output_delim.is_empty() {
2158 unsafe { buf_extend(buf, output_delim) };
2159 }
2160 unsafe {
2161 buf_extend(
2162 buf,
2163 std::slice::from_raw_parts(base.add(start), end - start),
2164 )
2165 };
2166 first_range = false;
2167 }
2168 }
2169}
2170
2171#[inline]
2175pub fn cut_fields(
2176 line: &[u8],
2177 delim: u8,
2178 ranges: &[Range],
2179 complement: bool,
2180 output_delim: &[u8],
2181 suppress_no_delim: bool,
2182 out: &mut impl Write,
2183) -> io::Result<bool> {
2184 if memchr::memchr(delim, line).is_none() {
2185 if !suppress_no_delim {
2186 out.write_all(line)?;
2187 return Ok(true);
2188 }
2189 return Ok(false);
2190 }
2191
2192 let mut field_num: usize = 1;
2193 let mut field_start: usize = 0;
2194 let mut first_output = true;
2195
2196 for delim_pos in memchr_iter(delim, line) {
2197 let selected = in_ranges(ranges, field_num) != complement;
2198 if selected {
2199 if !first_output {
2200 out.write_all(output_delim)?;
2201 }
2202 out.write_all(&line[field_start..delim_pos])?;
2203 first_output = false;
2204 }
2205 field_start = delim_pos + 1;
2206 field_num += 1;
2207 }
2208
2209 let selected = in_ranges(ranges, field_num) != complement;
2210 if selected {
2211 if !first_output {
2212 out.write_all(output_delim)?;
2213 }
2214 out.write_all(&line[field_start..])?;
2215 }
2216
2217 Ok(true)
2218}
2219
2220#[inline]
2222pub fn cut_bytes(
2223 line: &[u8],
2224 ranges: &[Range],
2225 complement: bool,
2226 output_delim: &[u8],
2227 out: &mut impl Write,
2228) -> io::Result<bool> {
2229 let mut first_range = true;
2230
2231 if complement {
2232 let len = line.len();
2233 let mut comp_ranges = Vec::new();
2234 let mut pos: usize = 1;
2235 for r in ranges {
2236 let rs = r.start;
2237 let re = r.end.min(len);
2238 if pos < rs {
2239 comp_ranges.push((pos, rs - 1));
2240 }
2241 pos = re + 1;
2242 if pos > len {
2243 break;
2244 }
2245 }
2246 if pos <= len {
2247 comp_ranges.push((pos, len));
2248 }
2249 for &(s, e) in &comp_ranges {
2250 if !first_range && !output_delim.is_empty() {
2251 out.write_all(output_delim)?;
2252 }
2253 out.write_all(&line[s - 1..e])?;
2254 first_range = false;
2255 }
2256 } else {
2257 for r in ranges {
2258 let start = r.start.saturating_sub(1);
2259 let end = r.end.min(line.len());
2260 if start >= line.len() {
2261 break;
2262 }
2263 if !first_range && !output_delim.is_empty() {
2264 out.write_all(output_delim)?;
2265 }
2266 out.write_all(&line[start..end])?;
2267 first_range = false;
2268 }
2269 }
2270 Ok(true)
2271}
2272
2273pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
2275 match cfg.mode {
2276 CutMode::Fields => process_fields_fast(data, cfg, out),
2277 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
2278 }
2279}
2280
2281pub fn process_cut_reader<R: BufRead>(
2286 mut reader: R,
2287 cfg: &CutConfig,
2288 out: &mut impl Write,
2289) -> io::Result<()> {
2290 const CHUNK_SIZE: usize = 16 * 1024 * 1024; let line_delim = cfg.line_delim;
2292
2293 let mut buf = Vec::with_capacity(CHUNK_SIZE + 4096);
2296
2297 loop {
2298 buf.reserve(CHUNK_SIZE);
2300 let read_start = buf.len();
2301 unsafe { buf.set_len(read_start + CHUNK_SIZE) };
2302 let n = read_fully(&mut reader, &mut buf[read_start..])?;
2303 buf.truncate(read_start + n);
2304
2305 if buf.is_empty() {
2306 break;
2307 }
2308
2309 if n == 0 {
2310 process_cut_data(&buf, cfg, out)?;
2312 break;
2313 }
2314
2315 let process_end = match memchr::memrchr(line_delim, &buf) {
2317 Some(pos) => pos + 1,
2318 None => {
2319 continue;
2321 }
2322 };
2323
2324 process_cut_data(&buf[..process_end], cfg, out)?;
2326
2327 let leftover_len = buf.len() - process_end;
2329 if leftover_len > 0 {
2330 buf.copy_within(process_end.., 0);
2331 }
2332 buf.truncate(leftover_len);
2333 }
2334
2335 Ok(())
2336}
2337
2338#[inline]
2340fn read_fully<R: BufRead>(reader: &mut R, buf: &mut [u8]) -> io::Result<usize> {
2341 let n = reader.read(buf)?;
2342 if n == buf.len() || n == 0 {
2343 return Ok(n);
2344 }
2345 let mut total = n;
2347 while total < buf.len() {
2348 match reader.read(&mut buf[total..]) {
2349 Ok(0) => break,
2350 Ok(n) => total += n,
2351 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
2352 Err(e) => return Err(e),
2353 }
2354 }
2355 Ok(total)
2356}
2357
2358#[derive(Debug, Clone, Copy, PartialEq)]
2360pub enum CutMode {
2361 Bytes,
2362 Characters,
2363 Fields,
2364}