1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, Write};
4
5const PARALLEL_THRESHOLD: usize = 4 * 1024 * 1024;
8
9pub struct CutConfig<'a> {
11 pub mode: CutMode,
12 pub ranges: &'a [Range],
13 pub complement: bool,
14 pub delim: u8,
15 pub output_delim: &'a [u8],
16 pub suppress_no_delim: bool,
17 pub line_delim: u8,
18}
19
20#[derive(Debug, Clone)]
22pub struct Range {
23 pub start: usize, pub end: usize, }
26
27pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
30 let mut ranges = Vec::new();
31
32 for part in spec.split(',') {
33 let part = part.trim();
34 if part.is_empty() {
35 continue;
36 }
37
38 if let Some(idx) = part.find('-') {
39 let left = &part[..idx];
40 let right = &part[idx + 1..];
41
42 let start = if left.is_empty() {
43 1
44 } else {
45 left.parse::<usize>()
46 .map_err(|_| format!("invalid range: '{}'", part))?
47 };
48
49 let end = if right.is_empty() {
50 usize::MAX
51 } else {
52 right
53 .parse::<usize>()
54 .map_err(|_| format!("invalid range: '{}'", part))?
55 };
56
57 if start == 0 {
58 return Err("fields and positions are numbered from 1".to_string());
59 }
60 if start > end {
61 return Err(format!("invalid decreasing range: '{}'", part));
62 }
63
64 ranges.push(Range { start, end });
65 } else {
66 let n = part
67 .parse::<usize>()
68 .map_err(|_| format!("invalid field: '{}'", part))?;
69 if n == 0 {
70 return Err("fields and positions are numbered from 1".to_string());
71 }
72 ranges.push(Range { start: n, end: n });
73 }
74 }
75
76 if ranges.is_empty() {
77 return Err("you must specify a list of bytes, characters, or fields".to_string());
78 }
79
80 ranges.sort_by_key(|r| (r.start, r.end));
82 let mut merged = vec![ranges[0].clone()];
83 for r in &ranges[1..] {
84 let last = merged.last_mut().unwrap();
85 if r.start <= last.end.saturating_add(1) {
86 last.end = last.end.max(r.end);
87 } else {
88 merged.push(r.clone());
89 }
90 }
91
92 Ok(merged)
93}
94
95#[inline(always)]
98fn in_ranges(ranges: &[Range], pos: usize) -> bool {
99 for r in ranges {
100 if pos < r.start {
101 return false;
102 }
103 if pos <= r.end {
104 return true;
105 }
106 }
107 false
108}
109
110#[inline]
113fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
114 let mut mask: u64 = 0;
115 for i in 1..=64u32 {
116 let in_range = in_ranges(ranges, i as usize);
117 if in_range != complement {
118 mask |= 1u64 << (i - 1);
119 }
120 }
121 mask
122}
123
124#[inline(always)]
126fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
127 if field_num <= 64 {
128 (mask >> (field_num - 1)) & 1 == 1
129 } else {
130 in_ranges(ranges, field_num) != complement
131 }
132}
133
134#[inline(always)]
139unsafe fn buf_extend(buf: &mut Vec<u8>, data: &[u8]) {
140 unsafe {
141 let len = buf.len();
142 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr().add(len), data.len());
143 buf.set_len(len + data.len());
144 }
145}
146
147#[inline(always)]
150unsafe fn buf_push(buf: &mut Vec<u8>, b: u8) {
151 unsafe {
152 let len = buf.len();
153 *buf.as_mut_ptr().add(len) = b;
154 buf.set_len(len + 1);
155 }
156}
157
158fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
162 let num_threads = rayon::current_num_threads().max(1);
163 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
164 return vec![data];
165 }
166
167 let chunk_size = data.len() / num_threads;
168 let mut chunks = Vec::with_capacity(num_threads);
169 let mut pos = 0;
170
171 for _ in 0..num_threads - 1 {
172 let target = pos + chunk_size;
173 if target >= data.len() {
174 break;
175 }
176 let boundary = memchr::memchr(line_delim, &data[target..])
177 .map(|p| target + p + 1)
178 .unwrap_or(data.len());
179 if boundary > pos {
180 chunks.push(&data[pos..boundary]);
181 }
182 pos = boundary;
183 }
184
185 if pos < data.len() {
186 chunks.push(&data[pos..]);
187 }
188
189 chunks
190}
191
192fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
196 let delim = cfg.delim;
197 let line_delim = cfg.line_delim;
198 let ranges = cfg.ranges;
199 let complement = cfg.complement;
200 let output_delim = cfg.output_delim;
201 let suppress = cfg.suppress_no_delim;
202
203 if !complement && memchr::memchr(delim, data).is_none() {
205 if suppress {
206 return Ok(());
207 }
208 out.write_all(data)?;
209 if !data.is_empty() && *data.last().unwrap() != line_delim {
210 out.write_all(&[line_delim])?;
211 }
212 return Ok(());
213 }
214
215 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
217 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
218 }
219
220 if complement
222 && ranges.len() == 1
223 && ranges[0].start == ranges[0].end
224 && output_delim.len() == 1
225 && output_delim[0] == delim
226 {
227 return process_complement_single_field(
228 data,
229 delim,
230 line_delim,
231 ranges[0].start,
232 suppress,
233 out,
234 );
235 }
236
237 if !complement
239 && ranges.len() == 1
240 && ranges[0].start == 1
241 && output_delim.len() == 1
242 && output_delim[0] == delim
243 && ranges[0].end < usize::MAX
244 {
245 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
246 }
247
248 if !complement
250 && ranges.len() == 1
251 && ranges[0].end == usize::MAX
252 && ranges[0].start > 1
253 && output_delim.len() == 1
254 && output_delim[0] == delim
255 {
256 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
257 }
258
259 let max_field = if complement {
261 usize::MAX
262 } else {
263 ranges.last().map(|r| r.end).unwrap_or(0)
264 };
265 let field_mask = compute_field_mask(ranges, complement);
266
267 if data.len() >= PARALLEL_THRESHOLD {
268 let chunks = split_into_chunks(data, line_delim);
269 let results: Vec<Vec<u8>> = chunks
270 .par_iter()
271 .map(|chunk| {
272 let mut buf = Vec::with_capacity(chunk.len());
273 process_fields_chunk(
274 chunk,
275 delim,
276 ranges,
277 output_delim,
278 suppress,
279 max_field,
280 field_mask,
281 line_delim,
282 complement,
283 &mut buf,
284 );
285 buf
286 })
287 .collect();
288 for result in &results {
289 if !result.is_empty() {
290 out.write_all(result)?;
291 }
292 }
293 } else {
294 let mut buf = Vec::with_capacity(data.len());
295 process_fields_chunk(
296 data,
297 delim,
298 ranges,
299 output_delim,
300 suppress,
301 max_field,
302 field_mask,
303 line_delim,
304 complement,
305 &mut buf,
306 );
307 if !buf.is_empty() {
308 out.write_all(&buf)?;
309 }
310 }
311 Ok(())
312}
313
314fn process_fields_chunk(
316 data: &[u8],
317 delim: u8,
318 ranges: &[Range],
319 output_delim: &[u8],
320 suppress: bool,
321 max_field: usize,
322 field_mask: u64,
323 line_delim: u8,
324 complement: bool,
325 buf: &mut Vec<u8>,
326) {
327 let mut start = 0;
328 for end_pos in memchr_iter(line_delim, data) {
329 let line = &data[start..end_pos];
330 extract_fields_to_buf(
331 line,
332 delim,
333 ranges,
334 output_delim,
335 suppress,
336 max_field,
337 field_mask,
338 line_delim,
339 buf,
340 complement,
341 );
342 start = end_pos + 1;
343 }
344 if start < data.len() {
345 extract_fields_to_buf(
346 &data[start..],
347 delim,
348 ranges,
349 output_delim,
350 suppress,
351 max_field,
352 field_mask,
353 line_delim,
354 buf,
355 complement,
356 );
357 }
358}
359
360fn process_single_field(
364 data: &[u8],
365 delim: u8,
366 line_delim: u8,
367 target: usize,
368 suppress: bool,
369 out: &mut impl Write,
370) -> io::Result<()> {
371 let target_idx = target - 1;
372
373 if target_idx == 0 && delim != line_delim {
375 if data.len() >= PARALLEL_THRESHOLD {
376 let chunks = split_into_chunks(data, line_delim);
377 let results: Vec<Vec<u8>> = chunks
378 .par_iter()
379 .map(|chunk| {
380 let mut buf = Vec::with_capacity(chunk.len());
381 process_first_field_combined(chunk, delim, line_delim, suppress, &mut buf);
382 buf
383 })
384 .collect();
385 for result in &results {
386 if !result.is_empty() {
387 out.write_all(result)?;
388 }
389 }
390 } else {
391 let mut buf = Vec::with_capacity(data.len());
392 process_first_field_combined(data, delim, line_delim, suppress, &mut buf);
393 if !buf.is_empty() {
394 out.write_all(&buf)?;
395 }
396 }
397 return Ok(());
398 }
399
400 if data.len() >= PARALLEL_THRESHOLD {
401 let chunks = split_into_chunks(data, line_delim);
402 let results: Vec<Vec<u8>> = chunks
403 .par_iter()
404 .map(|chunk| {
405 let mut buf = Vec::with_capacity(chunk.len() / 4);
406 process_single_field_chunk(
407 chunk, delim, target_idx, line_delim, suppress, &mut buf,
408 );
409 buf
410 })
411 .collect();
412 for result in &results {
413 if !result.is_empty() {
414 out.write_all(result)?;
415 }
416 }
417 } else {
418 let mut buf = Vec::with_capacity(data.len() / 4);
419 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
420 if !buf.is_empty() {
421 out.write_all(&buf)?;
422 }
423 }
424 Ok(())
425}
426
427fn process_complement_single_field(
429 data: &[u8],
430 delim: u8,
431 line_delim: u8,
432 skip_field: usize,
433 suppress: bool,
434 out: &mut impl Write,
435) -> io::Result<()> {
436 let skip_idx = skip_field - 1;
437
438 if data.len() >= PARALLEL_THRESHOLD {
439 let chunks = split_into_chunks(data, line_delim);
440 let results: Vec<Vec<u8>> = chunks
441 .par_iter()
442 .map(|chunk| {
443 let mut buf = Vec::with_capacity(chunk.len());
444 complement_single_field_chunk(
445 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
446 );
447 buf
448 })
449 .collect();
450 for result in &results {
451 if !result.is_empty() {
452 out.write_all(result)?;
453 }
454 }
455 } else {
456 let mut buf = Vec::with_capacity(data.len());
457 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
458 if !buf.is_empty() {
459 out.write_all(&buf)?;
460 }
461 }
462 Ok(())
463}
464
465fn complement_single_field_chunk(
467 data: &[u8],
468 delim: u8,
469 skip_idx: usize,
470 line_delim: u8,
471 suppress: bool,
472 buf: &mut Vec<u8>,
473) {
474 let mut start = 0;
475 for end_pos in memchr_iter(line_delim, data) {
476 let line = &data[start..end_pos];
477 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
478 start = end_pos + 1;
479 }
480 if start < data.len() {
481 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
482 }
483}
484
485#[inline(always)]
487fn complement_single_field_line(
488 line: &[u8],
489 delim: u8,
490 skip_idx: usize,
491 line_delim: u8,
492 suppress: bool,
493 buf: &mut Vec<u8>,
494) {
495 if line.is_empty() {
496 if !suppress {
497 buf.push(line_delim);
498 }
499 return;
500 }
501
502 let mut field_idx = 0;
503 let mut field_start = 0;
504 let mut first_output = true;
505 let mut has_delim = false;
506
507 for pos in memchr_iter(delim, line) {
508 has_delim = true;
509 if field_idx != skip_idx {
510 if !first_output {
511 buf.push(delim);
512 }
513 buf.extend_from_slice(&line[field_start..pos]);
514 first_output = false;
515 }
516 field_idx += 1;
517 field_start = pos + 1;
518 }
519
520 if !has_delim {
521 if !suppress {
522 buf.extend_from_slice(line);
523 buf.push(line_delim);
524 }
525 return;
526 }
527
528 if field_idx != skip_idx {
530 if !first_output {
531 buf.push(delim);
532 }
533 buf.extend_from_slice(&line[field_start..]);
534 }
535
536 buf.push(line_delim);
537}
538
539fn process_fields_prefix(
541 data: &[u8],
542 delim: u8,
543 line_delim: u8,
544 last_field: usize,
545 suppress: bool,
546 out: &mut impl Write,
547) -> io::Result<()> {
548 if data.len() >= PARALLEL_THRESHOLD {
549 let chunks = split_into_chunks(data, line_delim);
550 let results: Vec<Vec<u8>> = chunks
551 .par_iter()
552 .map(|chunk| {
553 let mut buf = Vec::with_capacity(chunk.len());
554 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
555 buf
556 })
557 .collect();
558 for result in &results {
559 if !result.is_empty() {
560 out.write_all(result)?;
561 }
562 }
563 } else {
564 let mut buf = Vec::with_capacity(data.len());
565 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
566 if !buf.is_empty() {
567 out.write_all(&buf)?;
568 }
569 }
570 Ok(())
571}
572
573fn fields_prefix_chunk(
575 data: &[u8],
576 delim: u8,
577 line_delim: u8,
578 last_field: usize,
579 suppress: bool,
580 buf: &mut Vec<u8>,
581) {
582 let mut start = 0;
583 for end_pos in memchr_iter(line_delim, data) {
584 let line = &data[start..end_pos];
585 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
586 start = end_pos + 1;
587 }
588 if start < data.len() {
589 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
590 }
591}
592
593#[inline(always)]
595fn fields_prefix_line(
596 line: &[u8],
597 delim: u8,
598 line_delim: u8,
599 last_field: usize,
600 suppress: bool,
601 buf: &mut Vec<u8>,
602) {
603 if line.is_empty() {
604 if !suppress {
605 buf.push(line_delim);
606 }
607 return;
608 }
609
610 let mut field_count = 1;
611 let mut has_delim = false;
612
613 for pos in memchr_iter(delim, line) {
614 has_delim = true;
615 if field_count >= last_field {
616 buf.extend_from_slice(&line[..pos]);
617 buf.push(line_delim);
618 return;
619 }
620 field_count += 1;
621 }
622
623 if !has_delim {
624 if !suppress {
625 buf.extend_from_slice(line);
626 buf.push(line_delim);
627 }
628 return;
629 }
630
631 buf.extend_from_slice(line);
632 buf.push(line_delim);
633}
634
635fn process_fields_suffix(
637 data: &[u8],
638 delim: u8,
639 line_delim: u8,
640 start_field: usize,
641 suppress: bool,
642 out: &mut impl Write,
643) -> io::Result<()> {
644 if data.len() >= PARALLEL_THRESHOLD {
645 let chunks = split_into_chunks(data, line_delim);
646 let results: Vec<Vec<u8>> = chunks
647 .par_iter()
648 .map(|chunk| {
649 let mut buf = Vec::with_capacity(chunk.len());
650 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
651 buf
652 })
653 .collect();
654 for result in &results {
655 if !result.is_empty() {
656 out.write_all(result)?;
657 }
658 }
659 } else {
660 let mut buf = Vec::with_capacity(data.len());
661 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
662 if !buf.is_empty() {
663 out.write_all(&buf)?;
664 }
665 }
666 Ok(())
667}
668
669fn fields_suffix_chunk(
671 data: &[u8],
672 delim: u8,
673 line_delim: u8,
674 start_field: usize,
675 suppress: bool,
676 buf: &mut Vec<u8>,
677) {
678 let mut start = 0;
679 for end_pos in memchr_iter(line_delim, data) {
680 let line = &data[start..end_pos];
681 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
682 start = end_pos + 1;
683 }
684 if start < data.len() {
685 fields_suffix_line(
686 &data[start..],
687 delim,
688 line_delim,
689 start_field,
690 suppress,
691 buf,
692 );
693 }
694}
695
696#[inline(always)]
698fn fields_suffix_line(
699 line: &[u8],
700 delim: u8,
701 line_delim: u8,
702 start_field: usize,
703 suppress: bool,
704 buf: &mut Vec<u8>,
705) {
706 if line.is_empty() {
707 if !suppress {
708 buf.push(line_delim);
709 }
710 return;
711 }
712
713 let skip_delims = start_field - 1;
714 let mut delim_count = 0;
715 let mut has_delim = false;
716
717 for pos in memchr_iter(delim, line) {
718 has_delim = true;
719 delim_count += 1;
720 if delim_count >= skip_delims {
721 buf.extend_from_slice(&line[pos + 1..]);
722 buf.push(line_delim);
723 return;
724 }
725 }
726
727 if !has_delim {
728 if !suppress {
729 buf.extend_from_slice(line);
730 buf.push(line_delim);
731 }
732 return;
733 }
734
735 buf.push(line_delim);
737}
738
739fn process_first_field_combined(
741 data: &[u8],
742 delim: u8,
743 line_delim: u8,
744 suppress: bool,
745 buf: &mut Vec<u8>,
746) {
747 let mut line_start = 0;
748 let mut found_delim = false;
749
750 for pos in memchr::memchr2_iter(delim, line_delim, data) {
751 let byte = data[pos];
752 if byte == line_delim {
753 if !found_delim {
754 if !suppress {
755 buf.extend_from_slice(&data[line_start..pos]);
756 buf.push(line_delim);
757 }
758 }
759 line_start = pos + 1;
760 found_delim = false;
761 } else if !found_delim {
762 buf.extend_from_slice(&data[line_start..pos]);
763 buf.push(line_delim);
764 found_delim = true;
765 }
766 }
767
768 if line_start < data.len() {
770 if !found_delim {
771 if !suppress {
772 match memchr::memchr(delim, &data[line_start..]) {
773 Some(offset) => {
774 buf.extend_from_slice(&data[line_start..line_start + offset]);
775 buf.push(line_delim);
776 }
777 None => {
778 buf.extend_from_slice(&data[line_start..]);
779 buf.push(line_delim);
780 }
781 }
782 }
783 }
784 }
785}
786
787fn process_single_field_chunk(
789 data: &[u8],
790 delim: u8,
791 target_idx: usize,
792 line_delim: u8,
793 suppress: bool,
794 buf: &mut Vec<u8>,
795) {
796 let mut start = 0;
797 for end_pos in memchr_iter(line_delim, data) {
798 let line = &data[start..end_pos];
799 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
800 start = end_pos + 1;
801 }
802 if start < data.len() {
803 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
804 }
805}
806
807#[inline(always)]
809fn extract_single_field_line(
810 line: &[u8],
811 delim: u8,
812 target_idx: usize,
813 line_delim: u8,
814 suppress: bool,
815 buf: &mut Vec<u8>,
816) {
817 if line.is_empty() {
818 if !suppress {
819 buf.push(line_delim);
820 }
821 return;
822 }
823
824 if target_idx == 0 {
826 match memchr::memchr(delim, line) {
827 Some(pos) => {
828 buf.extend_from_slice(&line[..pos]);
829 buf.push(line_delim);
830 }
831 None => {
832 if !suppress {
833 buf.extend_from_slice(line);
834 buf.push(line_delim);
835 }
836 }
837 }
838 return;
839 }
840
841 let mut field_start = 0;
842 let mut field_idx = 0;
843 let mut has_delim = false;
844
845 for pos in memchr_iter(delim, line) {
846 has_delim = true;
847 if field_idx == target_idx {
848 buf.extend_from_slice(&line[field_start..pos]);
849 buf.push(line_delim);
850 return;
851 }
852 field_idx += 1;
853 field_start = pos + 1;
854 }
855
856 if !has_delim {
857 if !suppress {
858 buf.extend_from_slice(line);
859 buf.push(line_delim);
860 }
861 return;
862 }
863
864 if field_idx == target_idx {
865 buf.extend_from_slice(&line[field_start..]);
866 buf.push(line_delim);
867 } else {
868 buf.push(line_delim);
869 }
870}
871
872#[inline(always)]
874fn extract_fields_to_buf(
875 line: &[u8],
876 delim: u8,
877 ranges: &[Range],
878 output_delim: &[u8],
879 suppress: bool,
880 max_field: usize,
881 field_mask: u64,
882 line_delim: u8,
883 buf: &mut Vec<u8>,
884 complement: bool,
885) {
886 let len = line.len();
887
888 if len == 0 {
889 if !suppress {
890 buf.push(line_delim);
891 }
892 return;
893 }
894
895 let mut field_num: usize = 1;
896 let mut field_start: usize = 0;
897 let mut first_output = true;
898 let mut has_delim = false;
899
900 for delim_pos in memchr_iter(delim, line) {
901 has_delim = true;
902
903 if is_selected(field_num, field_mask, ranges, complement) {
904 if !first_output {
905 buf.extend_from_slice(output_delim);
906 }
907 buf.extend_from_slice(&line[field_start..delim_pos]);
908 first_output = false;
909 }
910
911 field_num += 1;
912 field_start = delim_pos + 1;
913
914 if field_num > max_field {
915 break;
916 }
917 }
918
919 if (field_num <= max_field || complement)
921 && has_delim
922 && is_selected(field_num, field_mask, ranges, complement)
923 {
924 if !first_output {
925 buf.extend_from_slice(output_delim);
926 }
927 buf.extend_from_slice(&line[field_start..len]);
928 first_output = false;
929 }
930
931 if !first_output {
932 buf.push(line_delim);
933 } else if !has_delim {
934 if !suppress {
935 buf.extend_from_slice(line);
936 buf.push(line_delim);
937 }
938 } else {
939 buf.push(line_delim);
940 }
941}
942
943fn process_bytes_from_start(
948 data: &[u8],
949 max_bytes: usize,
950 line_delim: u8,
951 out: &mut impl Write,
952) -> io::Result<()> {
953 if data.len() >= PARALLEL_THRESHOLD {
954 let chunks = split_into_chunks(data, line_delim);
955 let results: Vec<Vec<u8>> = chunks
956 .par_iter()
957 .map(|chunk| {
958 let mut buf = Vec::with_capacity(chunk.len());
960 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
961 buf
962 })
963 .collect();
964 for result in &results {
965 if !result.is_empty() {
966 out.write_all(result)?;
967 }
968 }
969 } else {
970 let mut buf = Vec::with_capacity(data.len());
971 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
972 if !buf.is_empty() {
973 out.write_all(&buf)?;
974 }
975 }
976 Ok(())
977}
978
979#[inline]
982fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
983 buf.reserve(data.len());
985
986 let mut start = 0;
987 for pos in memchr_iter(line_delim, data) {
988 let line_len = pos - start;
989 let take = line_len.min(max_bytes);
990 unsafe {
991 buf_extend(buf, &data[start..start + take]);
992 buf_push(buf, line_delim);
993 }
994 start = pos + 1;
995 }
996 if start < data.len() {
998 let line_len = data.len() - start;
999 let take = line_len.min(max_bytes);
1000 unsafe {
1001 buf_extend(buf, &data[start..start + take]);
1002 buf_push(buf, line_delim);
1003 }
1004 }
1005}
1006
1007fn process_bytes_from_offset(
1009 data: &[u8],
1010 skip_bytes: usize,
1011 line_delim: u8,
1012 out: &mut impl Write,
1013) -> io::Result<()> {
1014 if data.len() >= PARALLEL_THRESHOLD {
1015 let chunks = split_into_chunks(data, line_delim);
1016 let results: Vec<Vec<u8>> = chunks
1017 .par_iter()
1018 .map(|chunk| {
1019 let mut buf = Vec::with_capacity(chunk.len());
1020 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1021 buf
1022 })
1023 .collect();
1024 for result in &results {
1025 if !result.is_empty() {
1026 out.write_all(result)?;
1027 }
1028 }
1029 } else {
1030 let mut buf = Vec::with_capacity(data.len());
1031 bytes_from_offset_chunk(data, skip_bytes, line_delim, &mut buf);
1032 if !buf.is_empty() {
1033 out.write_all(&buf)?;
1034 }
1035 }
1036 Ok(())
1037}
1038
1039#[inline]
1042fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1043 buf.reserve(data.len());
1044
1045 let mut start = 0;
1046 for pos in memchr_iter(line_delim, data) {
1047 let line_len = pos - start;
1048 if line_len > skip_bytes {
1049 unsafe {
1050 buf_extend(buf, &data[start + skip_bytes..pos]);
1051 }
1052 }
1053 unsafe {
1054 buf_push(buf, line_delim);
1055 }
1056 start = pos + 1;
1057 }
1058 if start < data.len() {
1059 let line_len = data.len() - start;
1060 if line_len > skip_bytes {
1061 unsafe {
1062 buf_extend(buf, &data[start + skip_bytes..data.len()]);
1063 }
1064 }
1065 unsafe {
1066 buf_push(buf, line_delim);
1067 }
1068 }
1069}
1070
1071fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1073 let line_delim = cfg.line_delim;
1074 let ranges = cfg.ranges;
1075 let complement = cfg.complement;
1076 let output_delim = cfg.output_delim;
1077
1078 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1080 let max_bytes = ranges[0].end;
1081 if max_bytes < usize::MAX {
1082 return process_bytes_from_start(data, max_bytes, line_delim, out);
1083 }
1084 }
1085
1086 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1088 let skip_bytes = ranges[0].start.saturating_sub(1);
1089 if skip_bytes > 0 {
1090 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1091 }
1092 }
1093
1094 if data.len() >= PARALLEL_THRESHOLD {
1095 let chunks = split_into_chunks(data, line_delim);
1096 let results: Vec<Vec<u8>> = chunks
1097 .par_iter()
1098 .map(|chunk| {
1099 let mut buf = Vec::with_capacity(chunk.len());
1100 process_bytes_chunk(
1101 chunk,
1102 ranges,
1103 complement,
1104 output_delim,
1105 line_delim,
1106 &mut buf,
1107 );
1108 buf
1109 })
1110 .collect();
1111 for result in &results {
1112 if !result.is_empty() {
1113 out.write_all(result)?;
1114 }
1115 }
1116 } else {
1117 let mut buf = Vec::with_capacity(data.len());
1118 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1119 if !buf.is_empty() {
1120 out.write_all(&buf)?;
1121 }
1122 }
1123 Ok(())
1124}
1125
1126fn process_bytes_chunk(
1128 data: &[u8],
1129 ranges: &[Range],
1130 complement: bool,
1131 output_delim: &[u8],
1132 line_delim: u8,
1133 buf: &mut Vec<u8>,
1134) {
1135 let mut start = 0;
1136 for end_pos in memchr_iter(line_delim, data) {
1137 let line = &data[start..end_pos];
1138 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1139 buf.push(line_delim);
1140 start = end_pos + 1;
1141 }
1142 if start < data.len() {
1143 cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1144 buf.push(line_delim);
1145 }
1146}
1147
1148#[inline(always)]
1150fn cut_bytes_to_buf(
1151 line: &[u8],
1152 ranges: &[Range],
1153 complement: bool,
1154 output_delim: &[u8],
1155 buf: &mut Vec<u8>,
1156) {
1157 let len = line.len();
1158 let mut first_range = true;
1159
1160 if complement {
1161 let mut pos: usize = 1;
1162 for r in ranges {
1163 let rs = r.start;
1164 let re = r.end.min(len);
1165 if pos < rs {
1166 if !first_range && !output_delim.is_empty() {
1167 buf.extend_from_slice(output_delim);
1168 }
1169 buf.extend_from_slice(&line[pos - 1..rs - 1]);
1170 first_range = false;
1171 }
1172 pos = re + 1;
1173 if pos > len {
1174 break;
1175 }
1176 }
1177 if pos <= len {
1178 if !first_range && !output_delim.is_empty() {
1179 buf.extend_from_slice(output_delim);
1180 }
1181 buf.extend_from_slice(&line[pos - 1..len]);
1182 }
1183 } else if output_delim.is_empty() && ranges.len() == 1 {
1184 let start = ranges[0].start.saturating_sub(1);
1186 let end = ranges[0].end.min(len);
1187 if start < len {
1188 buf.extend_from_slice(&line[start..end]);
1189 }
1190 } else {
1191 for r in ranges {
1192 let start = r.start.saturating_sub(1);
1193 let end = r.end.min(len);
1194 if start >= len {
1195 break;
1196 }
1197 if !first_range && !output_delim.is_empty() {
1198 buf.extend_from_slice(output_delim);
1199 }
1200 buf.extend_from_slice(&line[start..end]);
1201 first_range = false;
1202 }
1203 }
1204}
1205
1206#[inline]
1210pub fn cut_fields(
1211 line: &[u8],
1212 delim: u8,
1213 ranges: &[Range],
1214 complement: bool,
1215 output_delim: &[u8],
1216 suppress_no_delim: bool,
1217 out: &mut impl Write,
1218) -> io::Result<bool> {
1219 if memchr::memchr(delim, line).is_none() {
1220 if !suppress_no_delim {
1221 out.write_all(line)?;
1222 return Ok(true);
1223 }
1224 return Ok(false);
1225 }
1226
1227 let mut field_num: usize = 1;
1228 let mut field_start: usize = 0;
1229 let mut first_output = true;
1230
1231 for delim_pos in memchr_iter(delim, line) {
1232 let selected = in_ranges(ranges, field_num) != complement;
1233 if selected {
1234 if !first_output {
1235 out.write_all(output_delim)?;
1236 }
1237 out.write_all(&line[field_start..delim_pos])?;
1238 first_output = false;
1239 }
1240 field_start = delim_pos + 1;
1241 field_num += 1;
1242 }
1243
1244 let selected = in_ranges(ranges, field_num) != complement;
1245 if selected {
1246 if !first_output {
1247 out.write_all(output_delim)?;
1248 }
1249 out.write_all(&line[field_start..])?;
1250 }
1251
1252 Ok(true)
1253}
1254
1255#[inline]
1257pub fn cut_bytes(
1258 line: &[u8],
1259 ranges: &[Range],
1260 complement: bool,
1261 output_delim: &[u8],
1262 out: &mut impl Write,
1263) -> io::Result<bool> {
1264 let mut first_range = true;
1265
1266 if complement {
1267 let len = line.len();
1268 let mut comp_ranges = Vec::new();
1269 let mut pos: usize = 1;
1270 for r in ranges {
1271 let rs = r.start;
1272 let re = r.end.min(len);
1273 if pos < rs {
1274 comp_ranges.push((pos, rs - 1));
1275 }
1276 pos = re + 1;
1277 if pos > len {
1278 break;
1279 }
1280 }
1281 if pos <= len {
1282 comp_ranges.push((pos, len));
1283 }
1284 for &(s, e) in &comp_ranges {
1285 if !first_range && !output_delim.is_empty() {
1286 out.write_all(output_delim)?;
1287 }
1288 out.write_all(&line[s - 1..e])?;
1289 first_range = false;
1290 }
1291 } else {
1292 for r in ranges {
1293 let start = r.start.saturating_sub(1);
1294 let end = r.end.min(line.len());
1295 if start >= line.len() {
1296 break;
1297 }
1298 if !first_range && !output_delim.is_empty() {
1299 out.write_all(output_delim)?;
1300 }
1301 out.write_all(&line[start..end])?;
1302 first_range = false;
1303 }
1304 }
1305 Ok(true)
1306}
1307
1308pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1310 match cfg.mode {
1311 CutMode::Fields => process_fields_fast(data, cfg, out),
1312 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
1313 }
1314}
1315
1316pub fn process_cut_reader<R: BufRead>(
1318 mut reader: R,
1319 cfg: &CutConfig,
1320 out: &mut impl Write,
1321) -> io::Result<()> {
1322 let mut buf = Vec::new();
1323
1324 loop {
1325 buf.clear();
1326 let n = reader.read_until(cfg.line_delim, &mut buf)?;
1327 if n == 0 {
1328 break;
1329 }
1330
1331 let has_line_delim = buf.last() == Some(&cfg.line_delim);
1332 let line = if has_line_delim {
1333 &buf[..buf.len() - 1]
1334 } else {
1335 &buf[..]
1336 };
1337
1338 let wrote = process_one_line(line, cfg, out)?;
1339
1340 if wrote {
1341 out.write_all(&[cfg.line_delim])?;
1342 }
1343 }
1344
1345 Ok(())
1346}
1347
1348#[inline]
1350fn process_one_line(line: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<bool> {
1351 match cfg.mode {
1352 CutMode::Fields => cut_fields(
1353 line,
1354 cfg.delim,
1355 cfg.ranges,
1356 cfg.complement,
1357 cfg.output_delim,
1358 cfg.suppress_no_delim,
1359 out,
1360 ),
1361 CutMode::Bytes | CutMode::Characters => {
1362 cut_bytes(line, cfg.ranges, cfg.complement, cfg.output_delim, out)
1363 }
1364 }
1365}
1366
1367#[derive(Debug, Clone, Copy, PartialEq)]
1369pub enum CutMode {
1370 Bytes,
1371 Characters,
1372 Fields,
1373}