1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, Write};
4
5const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
9
10pub struct CutConfig<'a> {
12 pub mode: CutMode,
13 pub ranges: &'a [Range],
14 pub complement: bool,
15 pub delim: u8,
16 pub output_delim: &'a [u8],
17 pub suppress_no_delim: bool,
18 pub line_delim: u8,
19}
20
21#[derive(Debug, Clone)]
23pub struct Range {
24 pub start: usize, pub end: usize, }
27
28pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
31 let mut ranges = Vec::new();
32
33 for part in spec.split(',') {
34 let part = part.trim();
35 if part.is_empty() {
36 continue;
37 }
38
39 if let Some(idx) = part.find('-') {
40 let left = &part[..idx];
41 let right = &part[idx + 1..];
42
43 let start = if left.is_empty() {
44 1
45 } else {
46 left.parse::<usize>()
47 .map_err(|_| format!("invalid range: '{}'", part))?
48 };
49
50 let end = if right.is_empty() {
51 usize::MAX
52 } else {
53 right
54 .parse::<usize>()
55 .map_err(|_| format!("invalid range: '{}'", part))?
56 };
57
58 if start == 0 {
59 return Err("fields and positions are numbered from 1".to_string());
60 }
61 if start > end {
62 return Err(format!("invalid decreasing range: '{}'", part));
63 }
64
65 ranges.push(Range { start, end });
66 } else {
67 let n = part
68 .parse::<usize>()
69 .map_err(|_| format!("invalid field: '{}'", part))?;
70 if n == 0 {
71 return Err("fields and positions are numbered from 1".to_string());
72 }
73 ranges.push(Range { start: n, end: n });
74 }
75 }
76
77 if ranges.is_empty() {
78 return Err("you must specify a list of bytes, characters, or fields".to_string());
79 }
80
81 ranges.sort_by_key(|r| (r.start, r.end));
83 let mut merged = vec![ranges[0].clone()];
84 for r in &ranges[1..] {
85 let last = merged.last_mut().unwrap();
86 if r.start <= last.end.saturating_add(1) {
87 last.end = last.end.max(r.end);
88 } else {
89 merged.push(r.clone());
90 }
91 }
92
93 Ok(merged)
94}
95
96#[inline(always)]
99fn in_ranges(ranges: &[Range], pos: usize) -> bool {
100 for r in ranges {
101 if pos < r.start {
102 return false; }
104 if pos <= r.end {
105 return true;
106 }
107 }
108 false
109}
110
111#[inline]
114fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
115 let mut mask: u64 = 0;
116 for i in 1..=64u32 {
117 let in_range = in_ranges(ranges, i as usize);
118 if in_range != complement {
119 mask |= 1u64 << (i - 1);
120 }
121 }
122 mask
123}
124
125#[inline(always)]
127fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
128 if field_num <= 64 {
129 (mask >> (field_num - 1)) & 1 == 1
130 } else {
131 in_ranges(ranges, field_num) != complement
132 }
133}
134
135fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
140 let num_threads = rayon::current_num_threads().max(1);
141 if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
142 return vec![data];
143 }
144
145 let chunk_size = data.len() / num_threads;
146 let mut chunks = Vec::with_capacity(num_threads);
147 let mut pos = 0;
148
149 for _ in 0..num_threads - 1 {
150 let target = pos + chunk_size;
151 if target >= data.len() {
152 break;
153 }
154 let boundary = memchr::memchr(line_delim, &data[target..])
156 .map(|p| target + p + 1)
157 .unwrap_or(data.len());
158 if boundary > pos {
159 chunks.push(&data[pos..boundary]);
160 }
161 pos = boundary;
162 }
163
164 if pos < data.len() {
166 chunks.push(&data[pos..]);
167 }
168
169 chunks
170}
171
172fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
177 let delim = cfg.delim;
178 let line_delim = cfg.line_delim;
179 let ranges = cfg.ranges;
180 let complement = cfg.complement;
181 let output_delim = cfg.output_delim;
182 let suppress = cfg.suppress_no_delim;
183
184 if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
186 return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
187 }
188
189 if complement
192 && ranges.len() == 1
193 && ranges[0].start == ranges[0].end
194 && output_delim.len() == 1
195 && output_delim[0] == delim
196 {
197 return process_complement_single_field(
198 data,
199 delim,
200 line_delim,
201 ranges[0].start,
202 suppress,
203 out,
204 );
205 }
206
207 if !complement
210 && ranges.len() == 1
211 && ranges[0].start == 1
212 && output_delim.len() == 1
213 && output_delim[0] == delim
214 && ranges[0].end < usize::MAX
215 {
216 return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
217 }
218
219 if !complement
222 && ranges.len() == 1
223 && ranges[0].end == usize::MAX
224 && ranges[0].start > 1
225 && output_delim.len() == 1
226 && output_delim[0] == delim
227 {
228 return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
229 }
230
231 let max_field = if complement {
233 usize::MAX
234 } else {
235 ranges.last().map(|r| r.end).unwrap_or(0)
236 };
237 let field_mask = compute_field_mask(ranges, complement);
238
239 if data.len() >= PARALLEL_THRESHOLD {
240 let chunks = split_into_chunks(data, line_delim);
242 let results: Vec<Vec<u8>> = chunks
243 .par_iter()
244 .map(|chunk| {
245 let mut buf = Vec::with_capacity(chunk.len());
246 process_fields_chunk(
247 chunk,
248 delim,
249 ranges,
250 output_delim,
251 suppress,
252 max_field,
253 field_mask,
254 line_delim,
255 complement,
256 &mut buf,
257 );
258 buf
259 })
260 .collect();
261 for result in &results {
262 if !result.is_empty() {
263 out.write_all(result)?;
264 }
265 }
266 } else {
267 let mut buf = Vec::with_capacity(data.len());
269 process_fields_chunk(
270 data,
271 delim,
272 ranges,
273 output_delim,
274 suppress,
275 max_field,
276 field_mask,
277 line_delim,
278 complement,
279 &mut buf,
280 );
281 if !buf.is_empty() {
282 out.write_all(&buf)?;
283 }
284 }
285 Ok(())
286}
287
288fn process_fields_chunk(
290 data: &[u8],
291 delim: u8,
292 ranges: &[Range],
293 output_delim: &[u8],
294 suppress: bool,
295 max_field: usize,
296 field_mask: u64,
297 line_delim: u8,
298 complement: bool,
299 buf: &mut Vec<u8>,
300) {
301 let mut start = 0;
302 for end_pos in memchr_iter(line_delim, data) {
303 let line = &data[start..end_pos];
304 extract_fields_to_buf(
305 line,
306 delim,
307 ranges,
308 output_delim,
309 suppress,
310 max_field,
311 field_mask,
312 line_delim,
313 buf,
314 complement,
315 );
316 start = end_pos + 1;
317 }
318 if start < data.len() {
319 extract_fields_to_buf(
320 &data[start..],
321 delim,
322 ranges,
323 output_delim,
324 suppress,
325 max_field,
326 field_mask,
327 line_delim,
328 buf,
329 complement,
330 );
331 }
332}
333
334fn process_single_field(
340 data: &[u8],
341 delim: u8,
342 line_delim: u8,
343 target: usize,
344 suppress: bool,
345 out: &mut impl Write,
346) -> io::Result<()> {
347 let target_idx = target - 1;
348
349 if target_idx == 0 && delim != line_delim {
353 if data.len() >= PARALLEL_THRESHOLD {
354 let chunks = split_into_chunks(data, line_delim);
355 let results: Vec<Vec<u8>> = chunks
356 .par_iter()
357 .map(|chunk| {
358 let mut buf = Vec::with_capacity(chunk.len());
359 process_first_field_combined(chunk, delim, line_delim, suppress, &mut buf);
360 buf
361 })
362 .collect();
363 for result in &results {
364 if !result.is_empty() {
365 out.write_all(result)?;
366 }
367 }
368 } else {
369 let mut buf = Vec::with_capacity(data.len());
370 process_first_field_combined(data, delim, line_delim, suppress, &mut buf);
371 if !buf.is_empty() {
372 out.write_all(&buf)?;
373 }
374 }
375 return Ok(());
376 }
377
378 if data.len() >= PARALLEL_THRESHOLD {
379 let chunks = split_into_chunks(data, line_delim);
381 let results: Vec<Vec<u8>> = chunks
382 .par_iter()
383 .map(|chunk| {
384 let mut buf = Vec::with_capacity(chunk.len() / 4);
385 process_single_field_chunk(
386 chunk, delim, target_idx, line_delim, suppress, &mut buf,
387 );
388 buf
389 })
390 .collect();
391 for result in &results {
392 if !result.is_empty() {
393 out.write_all(result)?;
394 }
395 }
396 } else {
397 let mut buf = Vec::with_capacity(data.len() / 4);
399 process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
400 if !buf.is_empty() {
401 out.write_all(&buf)?;
402 }
403 }
404 Ok(())
405}
406
407fn process_complement_single_field(
412 data: &[u8],
413 delim: u8,
414 line_delim: u8,
415 skip_field: usize,
416 suppress: bool,
417 out: &mut impl Write,
418) -> io::Result<()> {
419 let skip_idx = skip_field - 1;
420
421 if data.len() >= PARALLEL_THRESHOLD {
422 let chunks = split_into_chunks(data, line_delim);
423 let results: Vec<Vec<u8>> = chunks
424 .par_iter()
425 .map(|chunk| {
426 let mut buf = Vec::with_capacity(chunk.len());
427 complement_single_field_chunk(
428 chunk, delim, skip_idx, line_delim, suppress, &mut buf,
429 );
430 buf
431 })
432 .collect();
433 for result in &results {
434 if !result.is_empty() {
435 out.write_all(result)?;
436 }
437 }
438 } else {
439 let mut buf = Vec::with_capacity(data.len());
440 complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
441 if !buf.is_empty() {
442 out.write_all(&buf)?;
443 }
444 }
445 Ok(())
446}
447
448fn complement_single_field_chunk(
450 data: &[u8],
451 delim: u8,
452 skip_idx: usize,
453 line_delim: u8,
454 suppress: bool,
455 buf: &mut Vec<u8>,
456) {
457 let mut start = 0;
458 for end_pos in memchr_iter(line_delim, data) {
459 let line = &data[start..end_pos];
460 complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
461 start = end_pos + 1;
462 }
463 if start < data.len() {
464 complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
465 }
466}
467
468#[inline(always)]
470fn complement_single_field_line(
471 line: &[u8],
472 delim: u8,
473 skip_idx: usize,
474 line_delim: u8,
475 suppress: bool,
476 buf: &mut Vec<u8>,
477) {
478 if line.is_empty() {
479 if !suppress {
480 buf.push(line_delim);
481 }
482 return;
483 }
484
485 let mut field_idx = 0;
487 let mut field_start = 0;
488 let mut first_output = true;
489 let mut has_delim = false;
490
491 for pos in memchr_iter(delim, line) {
492 has_delim = true;
493 if field_idx != skip_idx {
494 if !first_output {
495 buf.push(delim);
496 }
497 buf.extend_from_slice(&line[field_start..pos]);
498 first_output = false;
499 }
500 field_idx += 1;
501 field_start = pos + 1;
502 }
503
504 if !has_delim {
505 if !suppress {
506 buf.extend_from_slice(line);
507 buf.push(line_delim);
508 }
509 return;
510 }
511
512 if field_idx != skip_idx {
514 if !first_output {
515 buf.push(delim);
516 }
517 buf.extend_from_slice(&line[field_start..]);
518 }
519
520 if !first_output || field_idx != skip_idx {
521 buf.push(line_delim);
522 } else {
523 buf.push(line_delim);
525 }
526}
527
528fn process_fields_prefix(
532 data: &[u8],
533 delim: u8,
534 line_delim: u8,
535 last_field: usize,
536 suppress: bool,
537 out: &mut impl Write,
538) -> io::Result<()> {
539 if data.len() >= PARALLEL_THRESHOLD {
540 let chunks = split_into_chunks(data, line_delim);
541 let results: Vec<Vec<u8>> = chunks
542 .par_iter()
543 .map(|chunk| {
544 let mut buf = Vec::with_capacity(chunk.len());
545 fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
546 buf
547 })
548 .collect();
549 for result in &results {
550 if !result.is_empty() {
551 out.write_all(result)?;
552 }
553 }
554 } else {
555 let mut buf = Vec::with_capacity(data.len());
556 fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
557 if !buf.is_empty() {
558 out.write_all(&buf)?;
559 }
560 }
561 Ok(())
562}
563
564fn fields_prefix_chunk(
566 data: &[u8],
567 delim: u8,
568 line_delim: u8,
569 last_field: usize,
570 suppress: bool,
571 buf: &mut Vec<u8>,
572) {
573 let mut start = 0;
574 for end_pos in memchr_iter(line_delim, data) {
575 let line = &data[start..end_pos];
576 fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
577 start = end_pos + 1;
578 }
579 if start < data.len() {
580 fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
581 }
582}
583
584#[inline(always)]
587fn fields_prefix_line(
588 line: &[u8],
589 delim: u8,
590 line_delim: u8,
591 last_field: usize,
592 suppress: bool,
593 buf: &mut Vec<u8>,
594) {
595 if line.is_empty() {
596 if !suppress {
597 buf.push(line_delim);
598 }
599 return;
600 }
601
602 let mut field_count = 1;
604 let mut has_delim = false;
605
606 for pos in memchr_iter(delim, line) {
607 has_delim = true;
608 if field_count >= last_field {
609 buf.extend_from_slice(&line[..pos]);
611 buf.push(line_delim);
612 return;
613 }
614 field_count += 1;
615 }
616
617 if !has_delim {
618 if !suppress {
619 buf.extend_from_slice(line);
620 buf.push(line_delim);
621 }
622 return;
623 }
624
625 buf.extend_from_slice(line);
628 buf.push(line_delim);
629}
630
631fn process_fields_suffix(
634 data: &[u8],
635 delim: u8,
636 line_delim: u8,
637 start_field: usize,
638 suppress: bool,
639 out: &mut impl Write,
640) -> io::Result<()> {
641 if data.len() >= PARALLEL_THRESHOLD {
642 let chunks = split_into_chunks(data, line_delim);
643 let results: Vec<Vec<u8>> = chunks
644 .par_iter()
645 .map(|chunk| {
646 let mut buf = Vec::with_capacity(chunk.len());
647 fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
648 buf
649 })
650 .collect();
651 for result in &results {
652 if !result.is_empty() {
653 out.write_all(result)?;
654 }
655 }
656 } else {
657 let mut buf = Vec::with_capacity(data.len());
658 fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
659 if !buf.is_empty() {
660 out.write_all(&buf)?;
661 }
662 }
663 Ok(())
664}
665
666fn fields_suffix_chunk(
668 data: &[u8],
669 delim: u8,
670 line_delim: u8,
671 start_field: usize,
672 suppress: bool,
673 buf: &mut Vec<u8>,
674) {
675 let mut start = 0;
676 for end_pos in memchr_iter(line_delim, data) {
677 let line = &data[start..end_pos];
678 fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
679 start = end_pos + 1;
680 }
681 if start < data.len() {
682 fields_suffix_line(
683 &data[start..],
684 delim,
685 line_delim,
686 start_field,
687 suppress,
688 buf,
689 );
690 }
691}
692
693#[inline(always)]
696fn fields_suffix_line(
697 line: &[u8],
698 delim: u8,
699 line_delim: u8,
700 start_field: usize,
701 suppress: bool,
702 buf: &mut Vec<u8>,
703) {
704 if line.is_empty() {
705 if !suppress {
706 buf.push(line_delim);
707 }
708 return;
709 }
710
711 let skip_delims = start_field - 1;
712 let mut delim_count = 0;
713 let mut has_delim = false;
714
715 for pos in memchr_iter(delim, line) {
716 has_delim = true;
717 delim_count += 1;
718 if delim_count >= skip_delims {
719 buf.extend_from_slice(&line[pos + 1..]);
721 buf.push(line_delim);
722 return;
723 }
724 }
725
726 if !has_delim {
727 if !suppress {
729 buf.extend_from_slice(line);
730 buf.push(line_delim);
731 }
732 return;
733 }
734
735 buf.push(line_delim);
737}
738
739fn process_first_field_combined(
743 data: &[u8],
744 delim: u8,
745 line_delim: u8,
746 suppress: bool,
747 buf: &mut Vec<u8>,
748) {
749 let mut line_start = 0;
750 let mut found_delim = false; for pos in memchr::memchr2_iter(delim, line_delim, data) {
753 let byte = data[pos];
754 if byte == line_delim {
755 if !found_delim {
757 if !suppress {
759 buf.extend_from_slice(&data[line_start..pos]);
760 buf.push(line_delim);
761 }
762 }
763 line_start = pos + 1;
764 found_delim = false;
765 } else {
766 if !found_delim {
768 buf.extend_from_slice(&data[line_start..pos]);
770 buf.push(line_delim);
771 found_delim = true;
772 }
773 }
775 }
776
777 if line_start < data.len() {
779 if !found_delim {
780 if !suppress {
782 match memchr::memchr(delim, &data[line_start..]) {
784 Some(offset) => {
785 buf.extend_from_slice(&data[line_start..line_start + offset]);
786 buf.push(line_delim);
787 }
788 None => {
789 buf.extend_from_slice(&data[line_start..]);
790 buf.push(line_delim);
791 }
792 }
793 }
794 }
795 }
796}
797
798fn process_single_field_chunk(
800 data: &[u8],
801 delim: u8,
802 target_idx: usize,
803 line_delim: u8,
804 suppress: bool,
805 buf: &mut Vec<u8>,
806) {
807 let mut start = 0;
808 for end_pos in memchr_iter(line_delim, data) {
809 let line = &data[start..end_pos];
810 extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
811 start = end_pos + 1;
812 }
813 if start < data.len() {
815 extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
816 }
817}
818
819#[inline(always)]
823fn extract_single_field_line(
824 line: &[u8],
825 delim: u8,
826 target_idx: usize,
827 line_delim: u8,
828 suppress: bool,
829 buf: &mut Vec<u8>,
830) {
831 if line.is_empty() {
832 if !suppress {
833 buf.push(line_delim);
834 }
835 return;
836 }
837
838 if target_idx == 0 {
840 match memchr::memchr(delim, line) {
841 Some(pos) => {
842 buf.extend_from_slice(&line[..pos]);
843 buf.push(line_delim);
844 }
845 None => {
846 if !suppress {
848 buf.extend_from_slice(line);
849 buf.push(line_delim);
850 }
851 }
852 }
853 return;
854 }
855
856 let mut field_start = 0;
857 let mut field_idx = 0;
858 let mut has_delim = false;
859
860 for pos in memchr_iter(delim, line) {
861 has_delim = true;
862 if field_idx == target_idx {
863 buf.extend_from_slice(&line[field_start..pos]);
865 buf.push(line_delim);
866 return;
867 }
868 field_idx += 1;
869 field_start = pos + 1;
870 }
871
872 if !has_delim {
873 if !suppress {
875 buf.extend_from_slice(line);
876 buf.push(line_delim);
877 }
878 return;
879 }
880
881 if field_idx == target_idx {
882 buf.extend_from_slice(&line[field_start..]);
884 buf.push(line_delim);
885 } else {
886 buf.push(line_delim);
888 }
889}
890
891#[inline(always)]
894fn extract_fields_to_buf(
895 line: &[u8],
896 delim: u8,
897 ranges: &[Range],
898 output_delim: &[u8],
899 suppress: bool,
900 max_field: usize,
901 field_mask: u64,
902 line_delim: u8,
903 buf: &mut Vec<u8>,
904 complement: bool,
905) {
906 let len = line.len();
907
908 if len == 0 {
910 if !suppress {
911 buf.push(line_delim);
912 }
913 return;
914 }
915
916 let mut field_num: usize = 1;
917 let mut field_start: usize = 0;
918 let mut first_output = true;
919 let mut has_delim = false;
920
921 for delim_pos in memchr_iter(delim, line) {
923 has_delim = true;
924
925 if is_selected(field_num, field_mask, ranges, complement) {
926 if !first_output {
927 buf.extend_from_slice(output_delim);
928 }
929 buf.extend_from_slice(&line[field_start..delim_pos]);
930 first_output = false;
931 }
932
933 field_num += 1;
934 field_start = delim_pos + 1;
935
936 if field_num > max_field {
938 break;
939 }
940 }
941
942 if (field_num <= max_field || complement)
944 && has_delim
945 && is_selected(field_num, field_mask, ranges, complement)
946 {
947 if !first_output {
948 buf.extend_from_slice(output_delim);
949 }
950 buf.extend_from_slice(&line[field_start..len]);
951 first_output = false;
952 }
953
954 if !first_output {
956 buf.push(line_delim);
958 } else if !has_delim {
959 if !suppress {
961 buf.extend_from_slice(line);
962 buf.push(line_delim);
963 }
964 } else {
965 buf.push(line_delim);
967 }
968}
969
970fn process_bytes_from_start(
976 data: &[u8],
977 max_bytes: usize,
978 line_delim: u8,
979 out: &mut impl Write,
980) -> io::Result<()> {
981 if data.len() >= PARALLEL_THRESHOLD {
982 let chunks = split_into_chunks(data, line_delim);
983 let results: Vec<Vec<u8>> = chunks
984 .par_iter()
985 .map(|chunk| {
986 let mut buf = Vec::with_capacity(chunk.len());
987 bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
988 buf
989 })
990 .collect();
991 for result in &results {
992 if !result.is_empty() {
993 out.write_all(result)?;
994 }
995 }
996 } else {
997 let mut buf = Vec::with_capacity(data.len());
998 bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
999 if !buf.is_empty() {
1000 out.write_all(&buf)?;
1001 }
1002 }
1003 Ok(())
1004}
1005
1006#[inline]
1009fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1010 let mut start = 0;
1011 for pos in memchr_iter(line_delim, data) {
1012 let line_len = pos - start;
1013 let take = line_len.min(max_bytes);
1014 buf.extend_from_slice(&data[start..start + take]);
1015 buf.push(line_delim);
1016 start = pos + 1;
1017 }
1018 if start < data.len() {
1020 let line_len = data.len() - start;
1021 let take = line_len.min(max_bytes);
1022 buf.extend_from_slice(&data[start..start + take]);
1023 buf.push(line_delim);
1024 }
1025}
1026
1027fn process_bytes_from_offset(
1030 data: &[u8],
1031 skip_bytes: usize,
1032 line_delim: u8,
1033 out: &mut impl Write,
1034) -> io::Result<()> {
1035 if data.len() >= PARALLEL_THRESHOLD {
1036 let chunks = split_into_chunks(data, line_delim);
1037 let results: Vec<Vec<u8>> = chunks
1038 .par_iter()
1039 .map(|chunk| {
1040 let mut buf = Vec::with_capacity(chunk.len());
1041 bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1042 buf
1043 })
1044 .collect();
1045 for result in &results {
1046 if !result.is_empty() {
1047 out.write_all(result)?;
1048 }
1049 }
1050 } else {
1051 let mut buf = Vec::with_capacity(data.len());
1052 bytes_from_offset_chunk(data, skip_bytes, line_delim, &mut buf);
1053 if !buf.is_empty() {
1054 out.write_all(&buf)?;
1055 }
1056 }
1057 Ok(())
1058}
1059
1060#[inline]
1062fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1063 let mut start = 0;
1064 for pos in memchr_iter(line_delim, data) {
1065 let line_len = pos - start;
1066 if line_len > skip_bytes {
1067 buf.extend_from_slice(&data[start + skip_bytes..pos]);
1068 }
1069 buf.push(line_delim);
1070 start = pos + 1;
1071 }
1072 if start < data.len() {
1074 let line_len = data.len() - start;
1075 if line_len > skip_bytes {
1076 buf.extend_from_slice(&data[start + skip_bytes..data.len()]);
1077 }
1078 buf.push(line_delim);
1079 }
1080}
1081
1082fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1084 let line_delim = cfg.line_delim;
1085 let ranges = cfg.ranges;
1086 let complement = cfg.complement;
1087 let output_delim = cfg.output_delim;
1088
1089 if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1092 let max_bytes = ranges[0].end;
1093 if max_bytes < usize::MAX {
1094 return process_bytes_from_start(data, max_bytes, line_delim, out);
1095 }
1096 }
1097
1098 if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1101 let skip_bytes = ranges[0].start.saturating_sub(1);
1102 if skip_bytes > 0 {
1103 return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1104 }
1105 }
1106
1107 if data.len() >= PARALLEL_THRESHOLD {
1108 let chunks = split_into_chunks(data, line_delim);
1109 let results: Vec<Vec<u8>> = chunks
1110 .par_iter()
1111 .map(|chunk| {
1112 let mut buf = Vec::with_capacity(chunk.len());
1113 process_bytes_chunk(
1114 chunk,
1115 ranges,
1116 complement,
1117 output_delim,
1118 line_delim,
1119 &mut buf,
1120 );
1121 buf
1122 })
1123 .collect();
1124 for result in &results {
1125 if !result.is_empty() {
1126 out.write_all(result)?;
1127 }
1128 }
1129 } else {
1130 let mut buf = Vec::with_capacity(data.len());
1131 process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1132 if !buf.is_empty() {
1133 out.write_all(&buf)?;
1134 }
1135 }
1136 Ok(())
1137}
1138
1139fn process_bytes_chunk(
1141 data: &[u8],
1142 ranges: &[Range],
1143 complement: bool,
1144 output_delim: &[u8],
1145 line_delim: u8,
1146 buf: &mut Vec<u8>,
1147) {
1148 let mut start = 0;
1149 for end_pos in memchr_iter(line_delim, data) {
1150 let line = &data[start..end_pos];
1151 cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1152 buf.push(line_delim);
1153 start = end_pos + 1;
1154 }
1155 if start < data.len() {
1156 cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1157 buf.push(line_delim);
1158 }
1159}
1160
1161#[inline(always)]
1164fn cut_bytes_to_buf(
1165 line: &[u8],
1166 ranges: &[Range],
1167 complement: bool,
1168 output_delim: &[u8],
1169 buf: &mut Vec<u8>,
1170) {
1171 let len = line.len();
1172 let mut first_range = true;
1173
1174 if complement {
1175 let mut pos: usize = 1;
1176 for r in ranges {
1177 let rs = r.start;
1178 let re = r.end.min(len);
1179 if pos < rs {
1180 if !first_range && !output_delim.is_empty() {
1181 buf.extend_from_slice(output_delim);
1182 }
1183 buf.extend_from_slice(&line[pos - 1..rs - 1]);
1184 first_range = false;
1185 }
1186 pos = re + 1;
1187 if pos > len {
1188 break;
1189 }
1190 }
1191 if pos <= len {
1192 if !first_range && !output_delim.is_empty() {
1193 buf.extend_from_slice(output_delim);
1194 }
1195 buf.extend_from_slice(&line[pos - 1..len]);
1196 }
1197 } else if output_delim.is_empty() && ranges.len() == 1 {
1198 let start = ranges[0].start.saturating_sub(1);
1200 let end = ranges[0].end.min(len);
1201 if start < len {
1202 buf.extend_from_slice(&line[start..end]);
1203 }
1204 } else {
1205 for r in ranges {
1206 let start = r.start.saturating_sub(1);
1207 let end = r.end.min(len);
1208 if start >= len {
1209 break;
1210 }
1211 if !first_range && !output_delim.is_empty() {
1212 buf.extend_from_slice(output_delim);
1213 }
1214 buf.extend_from_slice(&line[start..end]);
1215 first_range = false;
1216 }
1217 }
1218}
1219
1220#[inline]
1226pub fn cut_fields(
1227 line: &[u8],
1228 delim: u8,
1229 ranges: &[Range],
1230 complement: bool,
1231 output_delim: &[u8],
1232 suppress_no_delim: bool,
1233 out: &mut impl Write,
1234) -> io::Result<bool> {
1235 if memchr::memchr(delim, line).is_none() {
1237 if !suppress_no_delim {
1238 out.write_all(line)?;
1239 return Ok(true);
1240 }
1241 return Ok(false); }
1243
1244 let mut field_num: usize = 1;
1246 let mut field_start: usize = 0;
1247 let mut first_output = true;
1248
1249 for delim_pos in memchr_iter(delim, line) {
1250 let selected = in_ranges(ranges, field_num) != complement;
1251 if selected {
1252 if !first_output {
1253 out.write_all(output_delim)?;
1254 }
1255 out.write_all(&line[field_start..delim_pos])?;
1256 first_output = false;
1257 }
1258 field_start = delim_pos + 1;
1259 field_num += 1;
1260 }
1261
1262 let selected = in_ranges(ranges, field_num) != complement;
1264 if selected {
1265 if !first_output {
1266 out.write_all(output_delim)?;
1267 }
1268 out.write_all(&line[field_start..])?;
1269 }
1270
1271 Ok(true)
1272}
1273
1274#[inline]
1277pub fn cut_bytes(
1278 line: &[u8],
1279 ranges: &[Range],
1280 complement: bool,
1281 output_delim: &[u8],
1282 out: &mut impl Write,
1283) -> io::Result<bool> {
1284 let mut first_range = true;
1285
1286 if complement {
1287 let len = line.len();
1288 let mut comp_ranges = Vec::new();
1289 let mut pos: usize = 1;
1290 for r in ranges {
1291 let rs = r.start;
1292 let re = r.end.min(len);
1293 if pos < rs {
1294 comp_ranges.push((pos, rs - 1));
1295 }
1296 pos = re + 1;
1297 if pos > len {
1298 break;
1299 }
1300 }
1301 if pos <= len {
1302 comp_ranges.push((pos, len));
1303 }
1304 for &(s, e) in &comp_ranges {
1305 if !first_range && !output_delim.is_empty() {
1306 out.write_all(output_delim)?;
1307 }
1308 out.write_all(&line[s - 1..e])?;
1309 first_range = false;
1310 }
1311 } else {
1312 for r in ranges {
1313 let start = r.start.saturating_sub(1);
1314 let end = r.end.min(line.len());
1315 if start >= line.len() {
1316 break;
1317 }
1318 if !first_range && !output_delim.is_empty() {
1319 out.write_all(output_delim)?;
1320 }
1321 out.write_all(&line[start..end])?;
1322 first_range = false;
1323 }
1324 }
1325 Ok(true)
1326}
1327
1328pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1331 match cfg.mode {
1332 CutMode::Fields => process_fields_fast(data, cfg, out),
1333 CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
1334 }
1335}
1336
1337pub fn process_cut_reader<R: BufRead>(
1339 mut reader: R,
1340 cfg: &CutConfig,
1341 out: &mut impl Write,
1342) -> io::Result<()> {
1343 let mut buf = Vec::new();
1344
1345 loop {
1346 buf.clear();
1347 let n = reader.read_until(cfg.line_delim, &mut buf)?;
1348 if n == 0 {
1349 break;
1350 }
1351
1352 let has_line_delim = buf.last() == Some(&cfg.line_delim);
1353 let line = if has_line_delim {
1354 &buf[..buf.len() - 1]
1355 } else {
1356 &buf[..]
1357 };
1358
1359 let wrote = process_one_line(line, cfg, out)?;
1360
1361 if wrote {
1363 out.write_all(&[cfg.line_delim])?;
1364 }
1365 }
1366
1367 Ok(())
1368}
1369
1370#[inline]
1372fn process_one_line(line: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<bool> {
1373 match cfg.mode {
1374 CutMode::Fields => cut_fields(
1375 line,
1376 cfg.delim,
1377 cfg.ranges,
1378 cfg.complement,
1379 cfg.output_delim,
1380 cfg.suppress_no_delim,
1381 out,
1382 ),
1383 CutMode::Bytes | CutMode::Characters => {
1384 cut_bytes(line, cfg.ranges, cfg.complement, cfg.output_delim, out)
1385 }
1386 }
1387}
1388
1389#[derive(Debug, Clone, Copy, PartialEq)]
1391pub enum CutMode {
1392 Bytes,
1393 Characters,
1394 Fields,
1395}