1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, Write};
4
5const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
9
10pub struct CutConfig<'a> {
12 pub mode: CutMode,
13 pub ranges: &'a [Range],
14 pub complement: bool,
15 pub delim: u8,
16 pub output_delim: &'a [u8],
17 pub suppress_no_delim: bool,
18 pub line_delim: u8,
19}
20
21#[derive(Debug, Clone)]
23pub struct Range {
24 pub start: usize, pub end: usize, }
27
28pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
31 let mut ranges = Vec::new();
32
33 for part in spec.split(',') {
34 let part = part.trim();
35 if part.is_empty() {
36 continue;
37 }
38
39 if let Some(idx) = part.find('-') {
40 let left = &part[..idx];
41 let right = &part[idx + 1..];
42
43 let start = if left.is_empty() {
44 1
45 } else {
46 left.parse::<usize>()
47 .map_err(|_| format!("invalid range: '{}'", part))?
48 };
49
50 let end = if right.is_empty() {
51 usize::MAX
52 } else {
53 right
54 .parse::<usize>()
55 .map_err(|_| format!("invalid range: '{}'", part))?
56 };
57
58 if start == 0 {
59 return Err("fields and positions are numbered from 1".to_string());
60 }
61 if start > end {
62 return Err(format!("invalid decreasing range: '{}'", part));
63 }
64
65 ranges.push(Range { start, end });
66 } else {
67 let n = part
68 .parse::<usize>()
69 .map_err(|_| format!("invalid field: '{}'", part))?;
70 if n == 0 {
71 return Err("fields and positions are numbered from 1".to_string());
72 }
73 ranges.push(Range { start: n, end: n });
74 }
75 }
76
77 if ranges.is_empty() {
78 return Err("you must specify a list of bytes, characters, or fields".to_string());
79 }
80
81 ranges.sort_by_key(|r| (r.start, r.end));
83 let mut merged = vec![ranges[0].clone()];
84 for r in &ranges[1..] {
85 let last = merged.last_mut().unwrap();
86 if r.start <= last.end.saturating_add(1) {
87 last.end = last.end.max(r.end);
88 } else {
89 merged.push(r.clone());
90 }
91 }
92
93 Ok(merged)
94}
95
96#[inline(always)]
99fn in_ranges(ranges: &[Range], pos: usize) -> bool {
100 for r in ranges {
101 if pos < r.start {
102 return false; }
104 if pos <= r.end {
105 return true;
106 }
107 }
108 false
109}
110
111#[inline]
114fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
115 let mut mask: u64 = 0;
116 for i in 1..=64u32 {
117 let in_range = in_ranges(ranges, i as usize);
118 if in_range != complement {
119 mask |= 1u64 << (i - 1);
120 }
121 }
122 mask
123}
124
125#[inline(always)]
127fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
128 if field_num <= 64 {
129 (mask >> (field_num - 1)) & 1 == 1
130 } else {
131 in_ranges(ranges, field_num) != complement
132 }
133}
134
135fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
140 let num_threads = rayon::current_num_threads().max(1);
141 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
142 return vec![data];
143 }
144
145 let chunk_size = data.len() / num_threads;
146 let mut chunks = Vec::with_capacity(num_threads);
147 let mut pos = 0;
148
149 for _ in 0..num_threads - 1 {
150 let target = pos + chunk_size;
151 if target >= data.len() {
152 break;
153 }
154 let boundary = memchr::memchr(line_delim, &data[target..])
156 .map(|p| target + p + 1)
157 .unwrap_or(data.len());
158 if boundary > pos {
159 chunks.push(&data[pos..boundary]);
160 }
161 pos = boundary;
162 }
163
164 if pos < data.len() {
166 chunks.push(&data[pos..]);
167 }
168
169 chunks
170}
171
172fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
177 let delim = cfg.delim;
178 let line_delim = cfg.line_delim;
179 let ranges = cfg.ranges;
180 let complement = cfg.complement;
181 let output_delim = cfg.output_delim;
182 let suppress = cfg.suppress_no_delim;
183
184 if !complement && memchr::memchr(delim, data).is_none() {
187 if suppress {
188 return Ok(()); }
190 out.write_all(data)?;
192 if !data.is_empty() && *data.last().unwrap() != line_delim {
194 out.write_all(&[line_delim])?;
195 }
196 return Ok(());
197 }
198
199 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
201 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
202 }
203
204 if complement
207 && ranges.len() == 1
208 && ranges[0].start == ranges[0].end
209 && output_delim.len() == 1
210 && output_delim[0] == delim
211 {
212 return process_complement_single_field(
213 data,
214 delim,
215 line_delim,
216 ranges[0].start,
217 suppress,
218 out,
219 );
220 }
221
222 if !complement
225 && ranges.len() == 1
226 && ranges[0].start == 1
227 && output_delim.len() == 1
228 && output_delim[0] == delim
229 && ranges[0].end < usize::MAX
230 {
231 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
232 }
233
234 if !complement
237 && ranges.len() == 1
238 && ranges[0].end == usize::MAX
239 && ranges[0].start > 1
240 && output_delim.len() == 1
241 && output_delim[0] == delim
242 {
243 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
244 }
245
246 let max_field = if complement {
248 usize::MAX
249 } else {
250 ranges.last().map(|r| r.end).unwrap_or(0)
251 };
252 let field_mask = compute_field_mask(ranges, complement);
253
254 if data.len() >= PARALLEL_THRESHOLD {
255 let chunks = split_into_chunks(data, line_delim);
257 let results: Vec<Vec<u8>> = chunks
258 .par_iter()
259 .map(|chunk| {
260 let mut buf = Vec::with_capacity(chunk.len());
261 process_fields_chunk(
262 chunk,
263 delim,
264 ranges,
265 output_delim,
266 suppress,
267 max_field,
268 field_mask,
269 line_delim,
270 complement,
271 &mut buf,
272 );
273 buf
274 })
275 .collect();
276 for result in &results {
277 if !result.is_empty() {
278 out.write_all(result)?;
279 }
280 }
281 } else {
282 let mut buf = Vec::with_capacity(data.len());
284 process_fields_chunk(
285 data,
286 delim,
287 ranges,
288 output_delim,
289 suppress,
290 max_field,
291 field_mask,
292 line_delim,
293 complement,
294 &mut buf,
295 );
296 if !buf.is_empty() {
297 out.write_all(&buf)?;
298 }
299 }
300 Ok(())
301}
302
303fn process_fields_chunk(
305 data: &[u8],
306 delim: u8,
307 ranges: &[Range],
308 output_delim: &[u8],
309 suppress: bool,
310 max_field: usize,
311 field_mask: u64,
312 line_delim: u8,
313 complement: bool,
314 buf: &mut Vec<u8>,
315) {
316 let mut start = 0;
317 for end_pos in memchr_iter(line_delim, data) {
318 let line = &data[start..end_pos];
319 extract_fields_to_buf(
320 line,
321 delim,
322 ranges,
323 output_delim,
324 suppress,
325 max_field,
326 field_mask,
327 line_delim,
328 buf,
329 complement,
330 );
331 start = end_pos + 1;
332 }
333 if start < data.len() {
334 extract_fields_to_buf(
335 &data[start..],
336 delim,
337 ranges,
338 output_delim,
339 suppress,
340 max_field,
341 field_mask,
342 line_delim,
343 buf,
344 complement,
345 );
346 }
347}
348
349fn process_single_field(
355 data: &[u8],
356 delim: u8,
357 line_delim: u8,
358 target: usize,
359 suppress: bool,
360 out: &mut impl Write,
361) -> io::Result<()> {
362 let target_idx = target - 1;
363
364 if target_idx == 0 && delim != line_delim {
368 if data.len() >= PARALLEL_THRESHOLD {
369 let chunks = split_into_chunks(data, line_delim);
370 let results: Vec<Vec<u8>> = chunks
371 .par_iter()
372 .map(|chunk| {
373 let mut buf = Vec::with_capacity(chunk.len());
374 process_first_field_combined(chunk, delim, line_delim, suppress, &mut buf);
375 buf
376 })
377 .collect();
378 for result in &results {
379 if !result.is_empty() {
380 out.write_all(result)?;
381 }
382 }
383 } else {
384 let mut buf = Vec::with_capacity(data.len());
385 process_first_field_combined(data, delim, line_delim, suppress, &mut buf);
386 if !buf.is_empty() {
387 out.write_all(&buf)?;
388 }
389 }
390 return Ok(());
391 }
392
393 if data.len() >= PARALLEL_THRESHOLD {
394 let chunks = split_into_chunks(data, line_delim);
396 let results: Vec<Vec<u8>> = chunks
397 .par_iter()
398 .map(|chunk| {
399 let mut buf = Vec::with_capacity(chunk.len() / 4);
400 process_single_field_chunk(
401 chunk, delim, target_idx, line_delim, suppress, &mut buf,
402 );
403 buf
404 })
405 .collect();
406 for result in &results {
407 if !result.is_empty() {
408 out.write_all(result)?;
409 }
410 }
411 } else {
412 let mut buf = Vec::with_capacity(data.len() / 4);
414 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
415 if !buf.is_empty() {
416 out.write_all(&buf)?;
417 }
418 }
419 Ok(())
420}
421
422fn process_complement_single_field(
427 data: &[u8],
428 delim: u8,
429 line_delim: u8,
430 skip_field: usize,
431 suppress: bool,
432 out: &mut impl Write,
433) -> io::Result<()> {
434 let skip_idx = skip_field - 1;
435
436 if data.len() >= PARALLEL_THRESHOLD {
437 let chunks = split_into_chunks(data, line_delim);
438 let results: Vec<Vec<u8>> = chunks
439 .par_iter()
440 .map(|chunk| {
441 let mut buf = Vec::with_capacity(chunk.len());
442 complement_single_field_chunk(
443 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
444 );
445 buf
446 })
447 .collect();
448 for result in &results {
449 if !result.is_empty() {
450 out.write_all(result)?;
451 }
452 }
453 } else {
454 let mut buf = Vec::with_capacity(data.len());
455 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
456 if !buf.is_empty() {
457 out.write_all(&buf)?;
458 }
459 }
460 Ok(())
461}
462
463fn complement_single_field_chunk(
465 data: &[u8],
466 delim: u8,
467 skip_idx: usize,
468 line_delim: u8,
469 suppress: bool,
470 buf: &mut Vec<u8>,
471) {
472 let mut start = 0;
473 for end_pos in memchr_iter(line_delim, data) {
474 let line = &data[start..end_pos];
475 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
476 start = end_pos + 1;
477 }
478 if start < data.len() {
479 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
480 }
481}
482
483#[inline(always)]
485fn complement_single_field_line(
486 line: &[u8],
487 delim: u8,
488 skip_idx: usize,
489 line_delim: u8,
490 suppress: bool,
491 buf: &mut Vec<u8>,
492) {
493 if line.is_empty() {
494 if !suppress {
495 buf.push(line_delim);
496 }
497 return;
498 }
499
500 let mut field_idx = 0;
502 let mut field_start = 0;
503 let mut first_output = true;
504 let mut has_delim = false;
505
506 for pos in memchr_iter(delim, line) {
507 has_delim = true;
508 if field_idx != skip_idx {
509 if !first_output {
510 buf.push(delim);
511 }
512 buf.extend_from_slice(&line[field_start..pos]);
513 first_output = false;
514 }
515 field_idx += 1;
516 field_start = pos + 1;
517 }
518
519 if !has_delim {
520 if !suppress {
521 buf.extend_from_slice(line);
522 buf.push(line_delim);
523 }
524 return;
525 }
526
527 if field_idx != skip_idx {
529 if !first_output {
530 buf.push(delim);
531 }
532 buf.extend_from_slice(&line[field_start..]);
533 }
534
535 if !first_output || field_idx != skip_idx {
536 buf.push(line_delim);
537 } else {
538 buf.push(line_delim);
540 }
541}
542
543fn process_fields_prefix(
547 data: &[u8],
548 delim: u8,
549 line_delim: u8,
550 last_field: usize,
551 suppress: bool,
552 out: &mut impl Write,
553) -> io::Result<()> {
554 if data.len() >= PARALLEL_THRESHOLD {
555 let chunks = split_into_chunks(data, line_delim);
556 let results: Vec<Vec<u8>> = chunks
557 .par_iter()
558 .map(|chunk| {
559 let mut buf = Vec::with_capacity(chunk.len());
560 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
561 buf
562 })
563 .collect();
564 for result in &results {
565 if !result.is_empty() {
566 out.write_all(result)?;
567 }
568 }
569 } else {
570 let mut buf = Vec::with_capacity(data.len());
571 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
572 if !buf.is_empty() {
573 out.write_all(&buf)?;
574 }
575 }
576 Ok(())
577}
578
579fn fields_prefix_chunk(
581 data: &[u8],
582 delim: u8,
583 line_delim: u8,
584 last_field: usize,
585 suppress: bool,
586 buf: &mut Vec<u8>,
587) {
588 let mut start = 0;
589 for end_pos in memchr_iter(line_delim, data) {
590 let line = &data[start..end_pos];
591 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
592 start = end_pos + 1;
593 }
594 if start < data.len() {
595 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
596 }
597}
598
599#[inline(always)]
602fn fields_prefix_line(
603 line: &[u8],
604 delim: u8,
605 line_delim: u8,
606 last_field: usize,
607 suppress: bool,
608 buf: &mut Vec<u8>,
609) {
610 if line.is_empty() {
611 if !suppress {
612 buf.push(line_delim);
613 }
614 return;
615 }
616
617 let mut field_count = 1;
619 let mut has_delim = false;
620
621 for pos in memchr_iter(delim, line) {
622 has_delim = true;
623 if field_count >= last_field {
624 buf.extend_from_slice(&line[..pos]);
626 buf.push(line_delim);
627 return;
628 }
629 field_count += 1;
630 }
631
632 if !has_delim {
633 if !suppress {
634 buf.extend_from_slice(line);
635 buf.push(line_delim);
636 }
637 return;
638 }
639
640 buf.extend_from_slice(line);
643 buf.push(line_delim);
644}
645
646fn process_fields_suffix(
649 data: &[u8],
650 delim: u8,
651 line_delim: u8,
652 start_field: usize,
653 suppress: bool,
654 out: &mut impl Write,
655) -> io::Result<()> {
656 if data.len() >= PARALLEL_THRESHOLD {
657 let chunks = split_into_chunks(data, line_delim);
658 let results: Vec<Vec<u8>> = chunks
659 .par_iter()
660 .map(|chunk| {
661 let mut buf = Vec::with_capacity(chunk.len());
662 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
663 buf
664 })
665 .collect();
666 for result in &results {
667 if !result.is_empty() {
668 out.write_all(result)?;
669 }
670 }
671 } else {
672 let mut buf = Vec::with_capacity(data.len());
673 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
674 if !buf.is_empty() {
675 out.write_all(&buf)?;
676 }
677 }
678 Ok(())
679}
680
681fn fields_suffix_chunk(
683 data: &[u8],
684 delim: u8,
685 line_delim: u8,
686 start_field: usize,
687 suppress: bool,
688 buf: &mut Vec<u8>,
689) {
690 let mut start = 0;
691 for end_pos in memchr_iter(line_delim, data) {
692 let line = &data[start..end_pos];
693 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
694 start = end_pos + 1;
695 }
696 if start < data.len() {
697 fields_suffix_line(
698 &data[start..],
699 delim,
700 line_delim,
701 start_field,
702 suppress,
703 buf,
704 );
705 }
706}
707
708#[inline(always)]
711fn fields_suffix_line(
712 line: &[u8],
713 delim: u8,
714 line_delim: u8,
715 start_field: usize,
716 suppress: bool,
717 buf: &mut Vec<u8>,
718) {
719 if line.is_empty() {
720 if !suppress {
721 buf.push(line_delim);
722 }
723 return;
724 }
725
726 let skip_delims = start_field - 1;
727 let mut delim_count = 0;
728 let mut has_delim = false;
729
730 for pos in memchr_iter(delim, line) {
731 has_delim = true;
732 delim_count += 1;
733 if delim_count >= skip_delims {
734 buf.extend_from_slice(&line[pos + 1..]);
736 buf.push(line_delim);
737 return;
738 }
739 }
740
741 if !has_delim {
742 if !suppress {
744 buf.extend_from_slice(line);
745 buf.push(line_delim);
746 }
747 return;
748 }
749
750 buf.push(line_delim);
752}
753
754fn process_first_field_combined(
758 data: &[u8],
759 delim: u8,
760 line_delim: u8,
761 suppress: bool,
762 buf: &mut Vec<u8>,
763) {
764 let mut line_start = 0;
765 let mut found_delim = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
768 let byte = data[pos];
769 if byte == line_delim {
770 if !found_delim {
772 if !suppress {
774 buf.extend_from_slice(&data[line_start..pos]);
775 buf.push(line_delim);
776 }
777 }
778 line_start = pos + 1;
779 found_delim = false;
780 } else {
781 if !found_delim {
783 buf.extend_from_slice(&data[line_start..pos]);
785 buf.push(line_delim);
786 found_delim = true;
787 }
788 }
790 }
791
792 if line_start < data.len() {
794 if !found_delim {
795 if !suppress {
797 match memchr::memchr(delim, &data[line_start..]) {
799 Some(offset) => {
800 buf.extend_from_slice(&data[line_start..line_start + offset]);
801 buf.push(line_delim);
802 }
803 None => {
804 buf.extend_from_slice(&data[line_start..]);
805 buf.push(line_delim);
806 }
807 }
808 }
809 }
810 }
811}
812
813fn process_single_field_chunk(
815 data: &[u8],
816 delim: u8,
817 target_idx: usize,
818 line_delim: u8,
819 suppress: bool,
820 buf: &mut Vec<u8>,
821) {
822 let mut start = 0;
823 for end_pos in memchr_iter(line_delim, data) {
824 let line = &data[start..end_pos];
825 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
826 start = end_pos + 1;
827 }
828 if start < data.len() {
830 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
831 }
832}
833
834#[inline(always)]
838fn extract_single_field_line(
839 line: &[u8],
840 delim: u8,
841 target_idx: usize,
842 line_delim: u8,
843 suppress: bool,
844 buf: &mut Vec<u8>,
845) {
846 if line.is_empty() {
847 if !suppress {
848 buf.push(line_delim);
849 }
850 return;
851 }
852
853 if target_idx == 0 {
855 match memchr::memchr(delim, line) {
856 Some(pos) => {
857 buf.extend_from_slice(&line[..pos]);
858 buf.push(line_delim);
859 }
860 None => {
861 if !suppress {
863 buf.extend_from_slice(line);
864 buf.push(line_delim);
865 }
866 }
867 }
868 return;
869 }
870
871 let mut field_start = 0;
872 let mut field_idx = 0;
873 let mut has_delim = false;
874
875 for pos in memchr_iter(delim, line) {
876 has_delim = true;
877 if field_idx == target_idx {
878 buf.extend_from_slice(&line[field_start..pos]);
880 buf.push(line_delim);
881 return;
882 }
883 field_idx += 1;
884 field_start = pos + 1;
885 }
886
887 if !has_delim {
888 if !suppress {
890 buf.extend_from_slice(line);
891 buf.push(line_delim);
892 }
893 return;
894 }
895
896 if field_idx == target_idx {
897 buf.extend_from_slice(&line[field_start..]);
899 buf.push(line_delim);
900 } else {
901 buf.push(line_delim);
903 }
904}
905
906#[inline(always)]
909fn extract_fields_to_buf(
910 line: &[u8],
911 delim: u8,
912 ranges: &[Range],
913 output_delim: &[u8],
914 suppress: bool,
915 max_field: usize,
916 field_mask: u64,
917 line_delim: u8,
918 buf: &mut Vec<u8>,
919 complement: bool,
920) {
921 let len = line.len();
922
923 if len == 0 {
925 if !suppress {
926 buf.push(line_delim);
927 }
928 return;
929 }
930
931 let mut field_num: usize = 1;
932 let mut field_start: usize = 0;
933 let mut first_output = true;
934 let mut has_delim = false;
935
936 for delim_pos in memchr_iter(delim, line) {
938 has_delim = true;
939
940 if is_selected(field_num, field_mask, ranges, complement) {
941 if !first_output {
942 buf.extend_from_slice(output_delim);
943 }
944 buf.extend_from_slice(&line[field_start..delim_pos]);
945 first_output = false;
946 }
947
948 field_num += 1;
949 field_start = delim_pos + 1;
950
951 if field_num > max_field {
953 break;
954 }
955 }
956
957 if (field_num <= max_field || complement)
959 && has_delim
960 && is_selected(field_num, field_mask, ranges, complement)
961 {
962 if !first_output {
963 buf.extend_from_slice(output_delim);
964 }
965 buf.extend_from_slice(&line[field_start..len]);
966 first_output = false;
967 }
968
969 if !first_output {
971 buf.push(line_delim);
973 } else if !has_delim {
974 if !suppress {
976 buf.extend_from_slice(line);
977 buf.push(line_delim);
978 }
979 } else {
980 buf.push(line_delim);
982 }
983}
984
985fn process_bytes_from_start(
991 data: &[u8],
992 max_bytes: usize,
993 line_delim: u8,
994 out: &mut impl Write,
995) -> io::Result<()> {
996 if data.len() >= PARALLEL_THRESHOLD {
997 let chunks = split_into_chunks(data, line_delim);
998 let results: Vec<Vec<u8>> = chunks
999 .par_iter()
1000 .map(|chunk| {
1001 let mut buf = Vec::with_capacity(chunk.len());
1002 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
1003 buf
1004 })
1005 .collect();
1006 for result in &results {
1007 if !result.is_empty() {
1008 out.write_all(result)?;
1009 }
1010 }
1011 } else {
1012 let mut buf = Vec::with_capacity(data.len());
1013 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
1014 if !buf.is_empty() {
1015 out.write_all(&buf)?;
1016 }
1017 }
1018 Ok(())
1019}
1020
1021#[inline]
1024fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1025 let mut start = 0;
1026 for pos in memchr_iter(line_delim, data) {
1027 let line_len = pos - start;
1028 let take = line_len.min(max_bytes);
1029 buf.extend_from_slice(&data[start..start + take]);
1030 buf.push(line_delim);
1031 start = pos + 1;
1032 }
1033 if start < data.len() {
1035 let line_len = data.len() - start;
1036 let take = line_len.min(max_bytes);
1037 buf.extend_from_slice(&data[start..start + take]);
1038 buf.push(line_delim);
1039 }
1040}
1041
1042fn process_bytes_from_offset(
1045 data: &[u8],
1046 skip_bytes: usize,
1047 line_delim: u8,
1048 out: &mut impl Write,
1049) -> io::Result<()> {
1050 if data.len() >= PARALLEL_THRESHOLD {
1051 let chunks = split_into_chunks(data, line_delim);
1052 let results: Vec<Vec<u8>> = chunks
1053 .par_iter()
1054 .map(|chunk| {
1055 let mut buf = Vec::with_capacity(chunk.len());
1056 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1057 buf
1058 })
1059 .collect();
1060 for result in &results {
1061 if !result.is_empty() {
1062 out.write_all(result)?;
1063 }
1064 }
1065 } else {
1066 let mut buf = Vec::with_capacity(data.len());
1067 bytes_from_offset_chunk(data, skip_bytes, line_delim, &mut buf);
1068 if !buf.is_empty() {
1069 out.write_all(&buf)?;
1070 }
1071 }
1072 Ok(())
1073}
1074
1075#[inline]
1077fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1078 let mut start = 0;
1079 for pos in memchr_iter(line_delim, data) {
1080 let line_len = pos - start;
1081 if line_len > skip_bytes {
1082 buf.extend_from_slice(&data[start + skip_bytes..pos]);
1083 }
1084 buf.push(line_delim);
1085 start = pos + 1;
1086 }
1087 if start < data.len() {
1089 let line_len = data.len() - start;
1090 if line_len > skip_bytes {
1091 buf.extend_from_slice(&data[start + skip_bytes..data.len()]);
1092 }
1093 buf.push(line_delim);
1094 }
1095}
1096
1097fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1099 let line_delim = cfg.line_delim;
1100 let ranges = cfg.ranges;
1101 let complement = cfg.complement;
1102 let output_delim = cfg.output_delim;
1103
1104 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1107 let max_bytes = ranges[0].end;
1108 if max_bytes < usize::MAX {
1109 return process_bytes_from_start(data, max_bytes, line_delim, out);
1110 }
1111 }
1112
1113 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1116 let skip_bytes = ranges[0].start.saturating_sub(1);
1117 if skip_bytes > 0 {
1118 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1119 }
1120 }
1121
1122 if data.len() >= PARALLEL_THRESHOLD {
1123 let chunks = split_into_chunks(data, line_delim);
1124 let results: Vec<Vec<u8>> = chunks
1125 .par_iter()
1126 .map(|chunk| {
1127 let mut buf = Vec::with_capacity(chunk.len());
1128 process_bytes_chunk(
1129 chunk,
1130 ranges,
1131 complement,
1132 output_delim,
1133 line_delim,
1134 &mut buf,
1135 );
1136 buf
1137 })
1138 .collect();
1139 for result in &results {
1140 if !result.is_empty() {
1141 out.write_all(result)?;
1142 }
1143 }
1144 } else {
1145 let mut buf = Vec::with_capacity(data.len());
1146 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1147 if !buf.is_empty() {
1148 out.write_all(&buf)?;
1149 }
1150 }
1151 Ok(())
1152}
1153
1154fn process_bytes_chunk(
1156 data: &[u8],
1157 ranges: &[Range],
1158 complement: bool,
1159 output_delim: &[u8],
1160 line_delim: u8,
1161 buf: &mut Vec<u8>,
1162) {
1163 let mut start = 0;
1164 for end_pos in memchr_iter(line_delim, data) {
1165 let line = &data[start..end_pos];
1166 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1167 buf.push(line_delim);
1168 start = end_pos + 1;
1169 }
1170 if start < data.len() {
1171 cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1172 buf.push(line_delim);
1173 }
1174}
1175
1176#[inline(always)]
1179fn cut_bytes_to_buf(
1180 line: &[u8],
1181 ranges: &[Range],
1182 complement: bool,
1183 output_delim: &[u8],
1184 buf: &mut Vec<u8>,
1185) {
1186 let len = line.len();
1187 let mut first_range = true;
1188
1189 if complement {
1190 let mut pos: usize = 1;
1191 for r in ranges {
1192 let rs = r.start;
1193 let re = r.end.min(len);
1194 if pos < rs {
1195 if !first_range && !output_delim.is_empty() {
1196 buf.extend_from_slice(output_delim);
1197 }
1198 buf.extend_from_slice(&line[pos - 1..rs - 1]);
1199 first_range = false;
1200 }
1201 pos = re + 1;
1202 if pos > len {
1203 break;
1204 }
1205 }
1206 if pos <= len {
1207 if !first_range && !output_delim.is_empty() {
1208 buf.extend_from_slice(output_delim);
1209 }
1210 buf.extend_from_slice(&line[pos - 1..len]);
1211 }
1212 } else if output_delim.is_empty() && ranges.len() == 1 {
1213 let start = ranges[0].start.saturating_sub(1);
1215 let end = ranges[0].end.min(len);
1216 if start < len {
1217 buf.extend_from_slice(&line[start..end]);
1218 }
1219 } else {
1220 for r in ranges {
1221 let start = r.start.saturating_sub(1);
1222 let end = r.end.min(len);
1223 if start >= len {
1224 break;
1225 }
1226 if !first_range && !output_delim.is_empty() {
1227 buf.extend_from_slice(output_delim);
1228 }
1229 buf.extend_from_slice(&line[start..end]);
1230 first_range = false;
1231 }
1232 }
1233}
1234
1235#[inline]
1241pub fn cut_fields(
1242 line: &[u8],
1243 delim: u8,
1244 ranges: &[Range],
1245 complement: bool,
1246 output_delim: &[u8],
1247 suppress_no_delim: bool,
1248 out: &mut impl Write,
1249) -> io::Result<bool> {
1250 if memchr::memchr(delim, line).is_none() {
1252 if !suppress_no_delim {
1253 out.write_all(line)?;
1254 return Ok(true);
1255 }
1256 return Ok(false); }
1258
1259 let mut field_num: usize = 1;
1261 let mut field_start: usize = 0;
1262 let mut first_output = true;
1263
1264 for delim_pos in memchr_iter(delim, line) {
1265 let selected = in_ranges(ranges, field_num) != complement;
1266 if selected {
1267 if !first_output {
1268 out.write_all(output_delim)?;
1269 }
1270 out.write_all(&line[field_start..delim_pos])?;
1271 first_output = false;
1272 }
1273 field_start = delim_pos + 1;
1274 field_num += 1;
1275 }
1276
1277 let selected = in_ranges(ranges, field_num) != complement;
1279 if selected {
1280 if !first_output {
1281 out.write_all(output_delim)?;
1282 }
1283 out.write_all(&line[field_start..])?;
1284 }
1285
1286 Ok(true)
1287}
1288
1289#[inline]
1292pub fn cut_bytes(
1293 line: &[u8],
1294 ranges: &[Range],
1295 complement: bool,
1296 output_delim: &[u8],
1297 out: &mut impl Write,
1298) -> io::Result<bool> {
1299 let mut first_range = true;
1300
1301 if complement {
1302 let len = line.len();
1303 let mut comp_ranges = Vec::new();
1304 let mut pos: usize = 1;
1305 for r in ranges {
1306 let rs = r.start;
1307 let re = r.end.min(len);
1308 if pos < rs {
1309 comp_ranges.push((pos, rs - 1));
1310 }
1311 pos = re + 1;
1312 if pos > len {
1313 break;
1314 }
1315 }
1316 if pos <= len {
1317 comp_ranges.push((pos, len));
1318 }
1319 for &(s, e) in &comp_ranges {
1320 if !first_range && !output_delim.is_empty() {
1321 out.write_all(output_delim)?;
1322 }
1323 out.write_all(&line[s - 1..e])?;
1324 first_range = false;
1325 }
1326 } else {
1327 for r in ranges {
1328 let start = r.start.saturating_sub(1);
1329 let end = r.end.min(line.len());
1330 if start >= line.len() {
1331 break;
1332 }
1333 if !first_range && !output_delim.is_empty() {
1334 out.write_all(output_delim)?;
1335 }
1336 out.write_all(&line[start..end])?;
1337 first_range = false;
1338 }
1339 }
1340 Ok(true)
1341}
1342
1343pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1346 match cfg.mode {
1347 CutMode::Fields => process_fields_fast(data, cfg, out),
1348 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
1349 }
1350}
1351
1352pub fn process_cut_reader<R: BufRead>(
1354 mut reader: R,
1355 cfg: &CutConfig,
1356 out: &mut impl Write,
1357) -> io::Result<()> {
1358 let mut buf = Vec::new();
1359
1360 loop {
1361 buf.clear();
1362 let n = reader.read_until(cfg.line_delim, &mut buf)?;
1363 if n == 0 {
1364 break;
1365 }
1366
1367 let has_line_delim = buf.last() == Some(&cfg.line_delim);
1368 let line = if has_line_delim {
1369 &buf[..buf.len() - 1]
1370 } else {
1371 &buf[..]
1372 };
1373
1374 let wrote = process_one_line(line, cfg, out)?;
1375
1376 if wrote {
1378 out.write_all(&[cfg.line_delim])?;
1379 }
1380 }
1381
1382 Ok(())
1383}
1384
1385#[inline]
1387fn process_one_line(line: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<bool> {
1388 match cfg.mode {
1389 CutMode::Fields => cut_fields(
1390 line,
1391 cfg.delim,
1392 cfg.ranges,
1393 cfg.complement,
1394 cfg.output_delim,
1395 cfg.suppress_no_delim,
1396 out,
1397 ),
1398 CutMode::Bytes | CutMode::Characters => {
1399 cut_bytes(line, cfg.ranges, cfg.complement, cfg.output_delim, out)
1400 }
1401 }
1402}
1403
1404#[derive(Debug, Clone, Copy, PartialEq)]
1406pub enum CutMode {
1407 Bytes,
1408 Characters,
1409 Fields,
1410}