Skip to main content

coreutils_rs/cut/
core.rs

1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, Write};
4
5/// Minimum file size for parallel processing (2MB).
6/// Rayon overhead is ~5-10μs per task; at 2MB per chunk,
7/// each chunk takes ~200μs+ to process, so overhead is < 5%.
8const PARALLEL_THRESHOLD: usize = 2 * 1024 * 1024;
9
10/// Configuration for cut operations.
11pub struct CutConfig<'a> {
12    pub mode: CutMode,
13    pub ranges: &'a [Range],
14    pub complement: bool,
15    pub delim: u8,
16    pub output_delim: &'a [u8],
17    pub suppress_no_delim: bool,
18    pub line_delim: u8,
19}
20
21/// A range specification like 1, 3-5, -3, 4-
22#[derive(Debug, Clone)]
23pub struct Range {
24    pub start: usize, // 1-based, 0 means "from beginning"
25    pub end: usize,   // 1-based, usize::MAX means "to end"
26}
27
28/// Parse a LIST specification like "1,3-5,7-" into ranges.
29/// Each range is 1-based. Returns sorted, merged ranges.
30pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
31    let mut ranges = Vec::new();
32
33    for part in spec.split(',') {
34        let part = part.trim();
35        if part.is_empty() {
36            continue;
37        }
38
39        if let Some(idx) = part.find('-') {
40            let left = &part[..idx];
41            let right = &part[idx + 1..];
42
43            let start = if left.is_empty() {
44                1
45            } else {
46                left.parse::<usize>()
47                    .map_err(|_| format!("invalid range: '{}'", part))?
48            };
49
50            let end = if right.is_empty() {
51                usize::MAX
52            } else {
53                right
54                    .parse::<usize>()
55                    .map_err(|_| format!("invalid range: '{}'", part))?
56            };
57
58            if start == 0 {
59                return Err("fields and positions are numbered from 1".to_string());
60            }
61            if start > end {
62                return Err(format!("invalid decreasing range: '{}'", part));
63            }
64
65            ranges.push(Range { start, end });
66        } else {
67            let n = part
68                .parse::<usize>()
69                .map_err(|_| format!("invalid field: '{}'", part))?;
70            if n == 0 {
71                return Err("fields and positions are numbered from 1".to_string());
72            }
73            ranges.push(Range { start: n, end: n });
74        }
75    }
76
77    if ranges.is_empty() {
78        return Err("you must specify a list of bytes, characters, or fields".to_string());
79    }
80
81    // Sort and merge overlapping ranges
82    ranges.sort_by_key(|r| (r.start, r.end));
83    let mut merged = vec![ranges[0].clone()];
84    for r in &ranges[1..] {
85        let last = merged.last_mut().unwrap();
86        if r.start <= last.end.saturating_add(1) {
87            last.end = last.end.max(r.end);
88        } else {
89            merged.push(r.clone());
90        }
91    }
92
93    Ok(merged)
94}
95
96/// Check if a 1-based position is in any range.
97/// Ranges must be sorted. Uses early exit since ranges are sorted.
98#[inline(always)]
99fn in_ranges(ranges: &[Range], pos: usize) -> bool {
100    for r in ranges {
101        if pos < r.start {
102            return false; // ranges are sorted, no point checking further
103        }
104        if pos <= r.end {
105            return true;
106        }
107    }
108    false
109}
110
111/// Pre-compute a 64-bit mask for field selection.
112/// Bit i-1 is set if field i should be output.
113#[inline]
114fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
115    let mut mask: u64 = 0;
116    for i in 1..=64u32 {
117        let in_range = in_ranges(ranges, i as usize);
118        if in_range != complement {
119            mask |= 1u64 << (i - 1);
120        }
121    }
122    mask
123}
124
125/// Check if a field should be selected, using bitset for first 64 fields.
126#[inline(always)]
127fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
128    if field_num <= 64 {
129        (mask >> (field_num - 1)) & 1 == 1
130    } else {
131        in_ranges(ranges, field_num) != complement
132    }
133}
134
135// ── Chunk splitting for parallel processing ──────────────────────────────
136
137/// Split data into chunks aligned to line boundaries for parallel processing.
138/// Returns slices that each end on a line_delim boundary (except possibly the last).
139fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
140    let num_threads = rayon::current_num_threads().max(1);
141    if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
142        return vec![data];
143    }
144
145    let chunk_size = data.len() / num_threads;
146    let mut chunks = Vec::with_capacity(num_threads);
147    let mut pos = 0;
148
149    for _ in 0..num_threads - 1 {
150        let target = pos + chunk_size;
151        if target >= data.len() {
152            break;
153        }
154        // Align to next line boundary
155        let boundary = memchr::memchr(line_delim, &data[target..])
156            .map(|p| target + p + 1)
157            .unwrap_or(data.len());
158        if boundary > pos {
159            chunks.push(&data[pos..boundary]);
160        }
161        pos = boundary;
162    }
163
164    // Last chunk gets the remainder
165    if pos < data.len() {
166        chunks.push(&data[pos..]);
167    }
168
169    chunks
170}
171
172// ── Fast path: field extraction with batched output ──────────────────────
173
174/// Optimized field extraction with early exit and batched output.
175/// Uses SIMD newline scanning + inline byte scan per line.
176fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
177    let delim = cfg.delim;
178    let line_delim = cfg.line_delim;
179    let ranges = cfg.ranges;
180    let complement = cfg.complement;
181    let output_delim = cfg.output_delim;
182    let suppress = cfg.suppress_no_delim;
183
184    // Zero-copy fast path: if delimiter never appears in data, output = input unchanged.
185    // Single SIMD memchr scan of entire data (~10 GB/s) to detect no-delimiter case.
186    if !complement && memchr::memchr(delim, data).is_none() {
187        if suppress {
188            return Ok(()); // All lines suppressed (no delimiter found on any line)
189        }
190        // Output is identical to input (every line has no delimiter -> output whole line)
191        out.write_all(data)?;
192        // GNU cut ensures trailing newline
193        if !data.is_empty() && *data.last().unwrap() != line_delim {
194            out.write_all(&[line_delim])?;
195        }
196        return Ok(());
197    }
198
199    // Ultra-fast path: single field extraction (e.g., cut -f5)
200    if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
201        return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
202    }
203
204    // Fast path: complement of single field with default output delimiter.
205    // e.g., cut --complement -f1: skip first field, output rest unchanged.
206    if complement
207        && ranges.len() == 1
208        && ranges[0].start == ranges[0].end
209        && output_delim.len() == 1
210        && output_delim[0] == delim
211    {
212        return process_complement_single_field(
213            data,
214            delim,
215            line_delim,
216            ranges[0].start,
217            suppress,
218            out,
219        );
220    }
221
222    // Fast path: contiguous from-start field range (e.g., cut -f1-5, cut -f-3)
223    // with default output delimiter. Just find the N-th delimiter and copy.
224    if !complement
225        && ranges.len() == 1
226        && ranges[0].start == 1
227        && output_delim.len() == 1
228        && output_delim[0] == delim
229        && ranges[0].end < usize::MAX
230    {
231        return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
232    }
233
234    // Fast path: open-ended field range from field N (e.g., cut -f3-)
235    // Skip first N-1 delimiters per line instead of per-field selection.
236    if !complement
237        && ranges.len() == 1
238        && ranges[0].end == usize::MAX
239        && ranges[0].start > 1
240        && output_delim.len() == 1
241        && output_delim[0] == delim
242    {
243        return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
244    }
245
246    // Pre-compute for general field extraction
247    let max_field = if complement {
248        usize::MAX
249    } else {
250        ranges.last().map(|r| r.end).unwrap_or(0)
251    };
252    let field_mask = compute_field_mask(ranges, complement);
253
254    if data.len() >= PARALLEL_THRESHOLD {
255        // Parallel path
256        let chunks = split_into_chunks(data, line_delim);
257        let results: Vec<Vec<u8>> = chunks
258            .par_iter()
259            .map(|chunk| {
260                let mut buf = Vec::with_capacity(chunk.len());
261                process_fields_chunk(
262                    chunk,
263                    delim,
264                    ranges,
265                    output_delim,
266                    suppress,
267                    max_field,
268                    field_mask,
269                    line_delim,
270                    complement,
271                    &mut buf,
272                );
273                buf
274            })
275            .collect();
276        for result in &results {
277            if !result.is_empty() {
278                out.write_all(result)?;
279            }
280        }
281    } else {
282        // Sequential path
283        let mut buf = Vec::with_capacity(data.len());
284        process_fields_chunk(
285            data,
286            delim,
287            ranges,
288            output_delim,
289            suppress,
290            max_field,
291            field_mask,
292            line_delim,
293            complement,
294            &mut buf,
295        );
296        if !buf.is_empty() {
297            out.write_all(&buf)?;
298        }
299    }
300    Ok(())
301}
302
303/// Process a chunk of data for general field extraction.
304fn process_fields_chunk(
305    data: &[u8],
306    delim: u8,
307    ranges: &[Range],
308    output_delim: &[u8],
309    suppress: bool,
310    max_field: usize,
311    field_mask: u64,
312    line_delim: u8,
313    complement: bool,
314    buf: &mut Vec<u8>,
315) {
316    let mut start = 0;
317    for end_pos in memchr_iter(line_delim, data) {
318        let line = &data[start..end_pos];
319        extract_fields_to_buf(
320            line,
321            delim,
322            ranges,
323            output_delim,
324            suppress,
325            max_field,
326            field_mask,
327            line_delim,
328            buf,
329            complement,
330        );
331        start = end_pos + 1;
332    }
333    if start < data.len() {
334        extract_fields_to_buf(
335            &data[start..],
336            delim,
337            ranges,
338            output_delim,
339            suppress,
340            max_field,
341            field_mask,
342            line_delim,
343            buf,
344            complement,
345        );
346    }
347}
348
349// ── Ultra-fast single field extraction ───────────────────────────────────
350
351/// Specialized path for extracting exactly one field (e.g., `cut -f5`).
352/// Uses SIMD memchr for newline scanning + minimal inline byte scan per line.
353/// For large files, splits work across multiple threads with rayon.
354fn process_single_field(
355    data: &[u8],
356    delim: u8,
357    line_delim: u8,
358    target: usize,
359    suppress: bool,
360    out: &mut impl Write,
361) -> io::Result<()> {
362    let target_idx = target - 1;
363
364    // Ultra-fast path: first field with combined delimiter+newline scan.
365    // Uses memchr2_iter to find both delimiter and newline in a single SIMD pass,
366    // eliminating the per-line memchr call overhead.
367    if target_idx == 0 && delim != line_delim {
368        if data.len() >= PARALLEL_THRESHOLD {
369            let chunks = split_into_chunks(data, line_delim);
370            let results: Vec<Vec<u8>> = chunks
371                .par_iter()
372                .map(|chunk| {
373                    let mut buf = Vec::with_capacity(chunk.len());
374                    process_first_field_combined(chunk, delim, line_delim, suppress, &mut buf);
375                    buf
376                })
377                .collect();
378            for result in &results {
379                if !result.is_empty() {
380                    out.write_all(result)?;
381                }
382            }
383        } else {
384            let mut buf = Vec::with_capacity(data.len());
385            process_first_field_combined(data, delim, line_delim, suppress, &mut buf);
386            if !buf.is_empty() {
387                out.write_all(&buf)?;
388            }
389        }
390        return Ok(());
391    }
392
393    if data.len() >= PARALLEL_THRESHOLD {
394        // Parallel path: split into chunks aligned to line boundaries
395        let chunks = split_into_chunks(data, line_delim);
396        let results: Vec<Vec<u8>> = chunks
397            .par_iter()
398            .map(|chunk| {
399                let mut buf = Vec::with_capacity(chunk.len() / 4);
400                process_single_field_chunk(
401                    chunk, delim, target_idx, line_delim, suppress, &mut buf,
402                );
403                buf
404            })
405            .collect();
406        for result in &results {
407            if !result.is_empty() {
408                out.write_all(result)?;
409            }
410        }
411    } else {
412        // Sequential path for small data
413        let mut buf = Vec::with_capacity(data.len() / 4);
414        process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
415        if !buf.is_empty() {
416            out.write_all(&buf)?;
417        }
418    }
419    Ok(())
420}
421
422/// Complement single-field extraction: skip one field, output rest unchanged.
423/// For `--complement -f1`: find first delimiter, output everything after.
424/// For `--complement -fN`: output fields 1..N-1, skip N, output N+1...
425/// Uses combined SIMD scan for first-field complement (most common case).
426fn process_complement_single_field(
427    data: &[u8],
428    delim: u8,
429    line_delim: u8,
430    skip_field: usize,
431    suppress: bool,
432    out: &mut impl Write,
433) -> io::Result<()> {
434    let skip_idx = skip_field - 1;
435
436    if data.len() >= PARALLEL_THRESHOLD {
437        let chunks = split_into_chunks(data, line_delim);
438        let results: Vec<Vec<u8>> = chunks
439            .par_iter()
440            .map(|chunk| {
441                let mut buf = Vec::with_capacity(chunk.len());
442                complement_single_field_chunk(
443                    chunk, delim, skip_idx, line_delim, suppress, &mut buf,
444                );
445                buf
446            })
447            .collect();
448        for result in &results {
449            if !result.is_empty() {
450                out.write_all(result)?;
451            }
452        }
453    } else {
454        let mut buf = Vec::with_capacity(data.len());
455        complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
456        if !buf.is_empty() {
457            out.write_all(&buf)?;
458        }
459    }
460    Ok(())
461}
462
463/// Process a chunk for complement single-field extraction.
464fn complement_single_field_chunk(
465    data: &[u8],
466    delim: u8,
467    skip_idx: usize,
468    line_delim: u8,
469    suppress: bool,
470    buf: &mut Vec<u8>,
471) {
472    let mut start = 0;
473    for end_pos in memchr_iter(line_delim, data) {
474        let line = &data[start..end_pos];
475        complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
476        start = end_pos + 1;
477    }
478    if start < data.len() {
479        complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
480    }
481}
482
483/// Extract all fields except skip_idx from one line.
484#[inline(always)]
485fn complement_single_field_line(
486    line: &[u8],
487    delim: u8,
488    skip_idx: usize,
489    line_delim: u8,
490    suppress: bool,
491    buf: &mut Vec<u8>,
492) {
493    if line.is_empty() {
494        if !suppress {
495            buf.push(line_delim);
496        }
497        return;
498    }
499
500    // Find all delimiter positions
501    let mut field_idx = 0;
502    let mut field_start = 0;
503    let mut first_output = true;
504    let mut has_delim = false;
505
506    for pos in memchr_iter(delim, line) {
507        has_delim = true;
508        if field_idx != skip_idx {
509            if !first_output {
510                buf.push(delim);
511            }
512            buf.extend_from_slice(&line[field_start..pos]);
513            first_output = false;
514        }
515        field_idx += 1;
516        field_start = pos + 1;
517    }
518
519    if !has_delim {
520        if !suppress {
521            buf.extend_from_slice(line);
522            buf.push(line_delim);
523        }
524        return;
525    }
526
527    // Last field
528    if field_idx != skip_idx {
529        if !first_output {
530            buf.push(delim);
531        }
532        buf.extend_from_slice(&line[field_start..]);
533    }
534
535    if !first_output || field_idx != skip_idx {
536        buf.push(line_delim);
537    } else {
538        // Only the skipped field existed — output empty line
539        buf.push(line_delim);
540    }
541}
542
543/// Contiguous from-start field range extraction (e.g., `cut -f1-5`).
544/// Finds the N-th delimiter and copies everything before it (preserving original delimiters).
545/// Much faster than per-field selection: one memchr_iter scan with early exit.
546fn process_fields_prefix(
547    data: &[u8],
548    delim: u8,
549    line_delim: u8,
550    last_field: usize,
551    suppress: bool,
552    out: &mut impl Write,
553) -> io::Result<()> {
554    if data.len() >= PARALLEL_THRESHOLD {
555        let chunks = split_into_chunks(data, line_delim);
556        let results: Vec<Vec<u8>> = chunks
557            .par_iter()
558            .map(|chunk| {
559                let mut buf = Vec::with_capacity(chunk.len());
560                fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
561                buf
562            })
563            .collect();
564        for result in &results {
565            if !result.is_empty() {
566                out.write_all(result)?;
567            }
568        }
569    } else {
570        let mut buf = Vec::with_capacity(data.len());
571        fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
572        if !buf.is_empty() {
573            out.write_all(&buf)?;
574        }
575    }
576    Ok(())
577}
578
579/// Process a chunk for contiguous from-start field range extraction.
580fn fields_prefix_chunk(
581    data: &[u8],
582    delim: u8,
583    line_delim: u8,
584    last_field: usize,
585    suppress: bool,
586    buf: &mut Vec<u8>,
587) {
588    let mut start = 0;
589    for end_pos in memchr_iter(line_delim, data) {
590        let line = &data[start..end_pos];
591        fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
592        start = end_pos + 1;
593    }
594    if start < data.len() {
595        fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
596    }
597}
598
599/// Extract first N fields from one line (contiguous from-start range).
600/// Finds the N-th delimiter using memchr_iter with early exit.
601#[inline(always)]
602fn fields_prefix_line(
603    line: &[u8],
604    delim: u8,
605    line_delim: u8,
606    last_field: usize,
607    suppress: bool,
608    buf: &mut Vec<u8>,
609) {
610    if line.is_empty() {
611        if !suppress {
612            buf.push(line_delim);
613        }
614        return;
615    }
616
617    // Count delimiters; stop at last_field (we need fields 1..last_field)
618    let mut field_count = 1;
619    let mut has_delim = false;
620
621    for pos in memchr_iter(delim, line) {
622        has_delim = true;
623        if field_count >= last_field {
624            // Found enough fields — output up to this delimiter
625            buf.extend_from_slice(&line[..pos]);
626            buf.push(line_delim);
627            return;
628        }
629        field_count += 1;
630    }
631
632    if !has_delim {
633        if !suppress {
634            buf.extend_from_slice(line);
635            buf.push(line_delim);
636        }
637        return;
638    }
639
640    // Fewer fields than requested — output the entire line
641    // (all fields are within range since we have < last_field fields)
642    buf.extend_from_slice(line);
643    buf.push(line_delim);
644}
645
646/// Open-ended field suffix extraction (e.g., `cut -f3-`).
647/// Skip the first N-1 delimiters, then output everything from field N to end.
648fn process_fields_suffix(
649    data: &[u8],
650    delim: u8,
651    line_delim: u8,
652    start_field: usize,
653    suppress: bool,
654    out: &mut impl Write,
655) -> io::Result<()> {
656    if data.len() >= PARALLEL_THRESHOLD {
657        let chunks = split_into_chunks(data, line_delim);
658        let results: Vec<Vec<u8>> = chunks
659            .par_iter()
660            .map(|chunk| {
661                let mut buf = Vec::with_capacity(chunk.len());
662                fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
663                buf
664            })
665            .collect();
666        for result in &results {
667            if !result.is_empty() {
668                out.write_all(result)?;
669            }
670        }
671    } else {
672        let mut buf = Vec::with_capacity(data.len());
673        fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
674        if !buf.is_empty() {
675            out.write_all(&buf)?;
676        }
677    }
678    Ok(())
679}
680
681/// Process a chunk for open-ended field suffix extraction.
682fn fields_suffix_chunk(
683    data: &[u8],
684    delim: u8,
685    line_delim: u8,
686    start_field: usize,
687    suppress: bool,
688    buf: &mut Vec<u8>,
689) {
690    let mut start = 0;
691    for end_pos in memchr_iter(line_delim, data) {
692        let line = &data[start..end_pos];
693        fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
694        start = end_pos + 1;
695    }
696    if start < data.len() {
697        fields_suffix_line(
698            &data[start..],
699            delim,
700            line_delim,
701            start_field,
702            suppress,
703            buf,
704        );
705    }
706}
707
708/// Extract fields from start_field to end from one line.
709/// Skips the first (start_field - 1) delimiters, outputs the rest.
710#[inline(always)]
711fn fields_suffix_line(
712    line: &[u8],
713    delim: u8,
714    line_delim: u8,
715    start_field: usize,
716    suppress: bool,
717    buf: &mut Vec<u8>,
718) {
719    if line.is_empty() {
720        if !suppress {
721            buf.push(line_delim);
722        }
723        return;
724    }
725
726    let skip_delims = start_field - 1;
727    let mut delim_count = 0;
728    let mut has_delim = false;
729
730    for pos in memchr_iter(delim, line) {
731        has_delim = true;
732        delim_count += 1;
733        if delim_count >= skip_delims {
734            // Output from after this delimiter to end of line
735            buf.extend_from_slice(&line[pos + 1..]);
736            buf.push(line_delim);
737            return;
738        }
739    }
740
741    if !has_delim {
742        // No delimiter — output whole line or suppress
743        if !suppress {
744            buf.extend_from_slice(line);
745            buf.push(line_delim);
746        }
747        return;
748    }
749
750    // Fewer delimiters than needed — field doesn't exist, output empty
751    buf.push(line_delim);
752}
753
754/// First-field extraction using combined delimiter+newline SIMD scan.
755/// Single memchr2_iter pass finds both delimiter and newline positions,
756/// eliminating per-line memchr overhead (saves ~250K function calls for 10MB).
757fn process_first_field_combined(
758    data: &[u8],
759    delim: u8,
760    line_delim: u8,
761    suppress: bool,
762    buf: &mut Vec<u8>,
763) {
764    let mut line_start = 0;
765    let mut found_delim = false; // true if we already found delimiter for current line
766
767    for pos in memchr::memchr2_iter(delim, line_delim, data) {
768        let byte = data[pos];
769        if byte == line_delim {
770            // End of line
771            if !found_delim {
772                // No delimiter on this line — output whole line or suppress
773                if !suppress {
774                    buf.extend_from_slice(&data[line_start..pos]);
775                    buf.push(line_delim);
776                }
777            }
778            line_start = pos + 1;
779            found_delim = false;
780        } else {
781            // Delimiter found
782            if !found_delim {
783                // First delimiter — output field before it
784                buf.extend_from_slice(&data[line_start..pos]);
785                buf.push(line_delim);
786                found_delim = true;
787            }
788            // Subsequent delimiters on same line — ignore
789        }
790    }
791
792    // Handle last line without trailing newline
793    if line_start < data.len() {
794        if !found_delim {
795            // No delimiter found — output whole line or suppress
796            if !suppress {
797                // Check if there's a delimiter in the remaining data
798                match memchr::memchr(delim, &data[line_start..]) {
799                    Some(offset) => {
800                        buf.extend_from_slice(&data[line_start..line_start + offset]);
801                        buf.push(line_delim);
802                    }
803                    None => {
804                        buf.extend_from_slice(&data[line_start..]);
805                        buf.push(line_delim);
806                    }
807                }
808            }
809        }
810    }
811}
812
813/// Process a chunk of data for single-field extraction.
814fn process_single_field_chunk(
815    data: &[u8],
816    delim: u8,
817    target_idx: usize,
818    line_delim: u8,
819    suppress: bool,
820    buf: &mut Vec<u8>,
821) {
822    let mut start = 0;
823    for end_pos in memchr_iter(line_delim, data) {
824        let line = &data[start..end_pos];
825        extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
826        start = end_pos + 1;
827    }
828    // Handle final line without terminator
829    if start < data.len() {
830        extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
831    }
832}
833
834/// Extract a single field from one line.
835/// For target_idx == 0 (first field), uses single memchr instead of memchr_iter.
836/// For other fields, uses SIMD memchr_iter with early exit.
837#[inline(always)]
838fn extract_single_field_line(
839    line: &[u8],
840    delim: u8,
841    target_idx: usize,
842    line_delim: u8,
843    suppress: bool,
844    buf: &mut Vec<u8>,
845) {
846    if line.is_empty() {
847        if !suppress {
848            buf.push(line_delim);
849        }
850        return;
851    }
852
853    // Ultra-fast path for first field (target_idx == 0): single memchr
854    if target_idx == 0 {
855        match memchr::memchr(delim, line) {
856            Some(pos) => {
857                buf.extend_from_slice(&line[..pos]);
858                buf.push(line_delim);
859            }
860            None => {
861                // No delimiter — output whole line or suppress
862                if !suppress {
863                    buf.extend_from_slice(line);
864                    buf.push(line_delim);
865                }
866            }
867        }
868        return;
869    }
870
871    let mut field_start = 0;
872    let mut field_idx = 0;
873    let mut has_delim = false;
874
875    for pos in memchr_iter(delim, line) {
876        has_delim = true;
877        if field_idx == target_idx {
878            // Found end of target field — output and return
879            buf.extend_from_slice(&line[field_start..pos]);
880            buf.push(line_delim);
881            return;
882        }
883        field_idx += 1;
884        field_start = pos + 1;
885    }
886
887    if !has_delim {
888        // No delimiters found — output whole line or suppress
889        if !suppress {
890            buf.extend_from_slice(line);
891            buf.push(line_delim);
892        }
893        return;
894    }
895
896    if field_idx == target_idx {
897        // Target is the last field (no trailing delimiter)
898        buf.extend_from_slice(&line[field_start..]);
899        buf.push(line_delim);
900    } else {
901        // Not enough fields — output empty line
902        buf.push(line_delim);
903    }
904}
905
906/// Extract fields from a single line into the output buffer.
907/// Uses inline byte scanning with early exit for maximum performance.
908#[inline(always)]
909fn extract_fields_to_buf(
910    line: &[u8],
911    delim: u8,
912    ranges: &[Range],
913    output_delim: &[u8],
914    suppress: bool,
915    max_field: usize,
916    field_mask: u64,
917    line_delim: u8,
918    buf: &mut Vec<u8>,
919    complement: bool,
920) {
921    let len = line.len();
922
923    // Empty line: no delimiter possible
924    if len == 0 {
925        if !suppress {
926            buf.push(line_delim);
927        }
928        return;
929    }
930
931    let mut field_num: usize = 1;
932    let mut field_start: usize = 0;
933    let mut first_output = true;
934    let mut has_delim = false;
935
936    // SIMD-accelerated delimiter scanning with early exit
937    for delim_pos in memchr_iter(delim, line) {
938        has_delim = true;
939
940        if is_selected(field_num, field_mask, ranges, complement) {
941            if !first_output {
942                buf.extend_from_slice(output_delim);
943            }
944            buf.extend_from_slice(&line[field_start..delim_pos]);
945            first_output = false;
946        }
947
948        field_num += 1;
949        field_start = delim_pos + 1;
950
951        // Early exit: past the last needed field
952        if field_num > max_field {
953            break;
954        }
955    }
956
957    // Last field (only if we didn't early-exit past it)
958    if (field_num <= max_field || complement)
959        && has_delim
960        && is_selected(field_num, field_mask, ranges, complement)
961    {
962        if !first_output {
963            buf.extend_from_slice(output_delim);
964        }
965        buf.extend_from_slice(&line[field_start..len]);
966        first_output = false;
967    }
968
969    // Output line terminator
970    if !first_output {
971        // Had output — add line delimiter
972        buf.push(line_delim);
973    } else if !has_delim {
974        // No delimiter found — output whole line or suppress
975        if !suppress {
976            buf.extend_from_slice(line);
977            buf.push(line_delim);
978        }
979    } else {
980        // Had delimiter but no selected field — output empty line (GNU compat)
981        buf.push(line_delim);
982    }
983}
984
985// ── Fast path: byte/char extraction with batched output ──────────────────
986
987/// Ultra-fast path for `cut -b1-N`: single from-start byte range.
988/// Scans for newlines with SIMD memchr, truncates each line to N bytes.
989/// Avoids per-line function call + range check overhead entirely.
990fn process_bytes_from_start(
991    data: &[u8],
992    max_bytes: usize,
993    line_delim: u8,
994    out: &mut impl Write,
995) -> io::Result<()> {
996    if data.len() >= PARALLEL_THRESHOLD {
997        let chunks = split_into_chunks(data, line_delim);
998        let results: Vec<Vec<u8>> = chunks
999            .par_iter()
1000            .map(|chunk| {
1001                let mut buf = Vec::with_capacity(chunk.len());
1002                bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
1003                buf
1004            })
1005            .collect();
1006        for result in &results {
1007            if !result.is_empty() {
1008                out.write_all(result)?;
1009            }
1010        }
1011    } else {
1012        let mut buf = Vec::with_capacity(data.len());
1013        bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
1014        if !buf.is_empty() {
1015            out.write_all(&buf)?;
1016        }
1017    }
1018    Ok(())
1019}
1020
1021/// Process a chunk for from-start byte range extraction.
1022/// For each line, outputs min(line_len, max_bytes) bytes + line delimiter.
1023#[inline]
1024fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1025    let mut start = 0;
1026    for pos in memchr_iter(line_delim, data) {
1027        let line_len = pos - start;
1028        let take = line_len.min(max_bytes);
1029        buf.extend_from_slice(&data[start..start + take]);
1030        buf.push(line_delim);
1031        start = pos + 1;
1032    }
1033    // Handle last line without terminator
1034    if start < data.len() {
1035        let line_len = data.len() - start;
1036        let take = line_len.min(max_bytes);
1037        buf.extend_from_slice(&data[start..start + take]);
1038        buf.push(line_delim);
1039    }
1040}
1041
1042/// Fast path for `cut -bN-`: skip first N-1 bytes per line.
1043/// For each line, outputs bytes from position N to end of line.
1044fn process_bytes_from_offset(
1045    data: &[u8],
1046    skip_bytes: usize,
1047    line_delim: u8,
1048    out: &mut impl Write,
1049) -> io::Result<()> {
1050    if data.len() >= PARALLEL_THRESHOLD {
1051        let chunks = split_into_chunks(data, line_delim);
1052        let results: Vec<Vec<u8>> = chunks
1053            .par_iter()
1054            .map(|chunk| {
1055                let mut buf = Vec::with_capacity(chunk.len());
1056                bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1057                buf
1058            })
1059            .collect();
1060        for result in &results {
1061            if !result.is_empty() {
1062                out.write_all(result)?;
1063            }
1064        }
1065    } else {
1066        let mut buf = Vec::with_capacity(data.len());
1067        bytes_from_offset_chunk(data, skip_bytes, line_delim, &mut buf);
1068        if !buf.is_empty() {
1069            out.write_all(&buf)?;
1070        }
1071    }
1072    Ok(())
1073}
1074
1075/// Process a chunk for from-offset byte range extraction.
1076#[inline]
1077fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1078    let mut start = 0;
1079    for pos in memchr_iter(line_delim, data) {
1080        let line_len = pos - start;
1081        if line_len > skip_bytes {
1082            buf.extend_from_slice(&data[start + skip_bytes..pos]);
1083        }
1084        buf.push(line_delim);
1085        start = pos + 1;
1086    }
1087    // Handle last line without terminator
1088    if start < data.len() {
1089        let line_len = data.len() - start;
1090        if line_len > skip_bytes {
1091            buf.extend_from_slice(&data[start + skip_bytes..data.len()]);
1092        }
1093        buf.push(line_delim);
1094    }
1095}
1096
1097/// Optimized byte/char extraction with batched output and parallel processing.
1098fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1099    let line_delim = cfg.line_delim;
1100    let ranges = cfg.ranges;
1101    let complement = cfg.complement;
1102    let output_delim = cfg.output_delim;
1103
1104    // Ultra-fast path: single range from byte 1 (e.g., cut -b1-10, cut -b-20)
1105    // Avoids per-line function call overhead by scanning newlines and truncating inline.
1106    if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1107        let max_bytes = ranges[0].end;
1108        if max_bytes < usize::MAX {
1109            return process_bytes_from_start(data, max_bytes, line_delim, out);
1110        }
1111    }
1112
1113    // Fast path: single open-ended range from byte N (e.g., cut -b5-)
1114    // Skip first N-1 bytes per line instead of per-byte range checking.
1115    if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1116        let skip_bytes = ranges[0].start.saturating_sub(1);
1117        if skip_bytes > 0 {
1118            return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1119        }
1120    }
1121
1122    if data.len() >= PARALLEL_THRESHOLD {
1123        let chunks = split_into_chunks(data, line_delim);
1124        let results: Vec<Vec<u8>> = chunks
1125            .par_iter()
1126            .map(|chunk| {
1127                let mut buf = Vec::with_capacity(chunk.len());
1128                process_bytes_chunk(
1129                    chunk,
1130                    ranges,
1131                    complement,
1132                    output_delim,
1133                    line_delim,
1134                    &mut buf,
1135                );
1136                buf
1137            })
1138            .collect();
1139        for result in &results {
1140            if !result.is_empty() {
1141                out.write_all(result)?;
1142            }
1143        }
1144    } else {
1145        let mut buf = Vec::with_capacity(data.len());
1146        process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1147        if !buf.is_empty() {
1148            out.write_all(&buf)?;
1149        }
1150    }
1151    Ok(())
1152}
1153
1154/// Process a chunk of data for byte/char extraction.
1155fn process_bytes_chunk(
1156    data: &[u8],
1157    ranges: &[Range],
1158    complement: bool,
1159    output_delim: &[u8],
1160    line_delim: u8,
1161    buf: &mut Vec<u8>,
1162) {
1163    let mut start = 0;
1164    for end_pos in memchr_iter(line_delim, data) {
1165        let line = &data[start..end_pos];
1166        cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1167        buf.push(line_delim);
1168        start = end_pos + 1;
1169    }
1170    if start < data.len() {
1171        cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1172        buf.push(line_delim);
1173    }
1174}
1175
1176/// Extract byte ranges from a line into the output buffer.
1177/// For the common non-complement case with contiguous ranges, uses bulk copy.
1178#[inline(always)]
1179fn cut_bytes_to_buf(
1180    line: &[u8],
1181    ranges: &[Range],
1182    complement: bool,
1183    output_delim: &[u8],
1184    buf: &mut Vec<u8>,
1185) {
1186    let len = line.len();
1187    let mut first_range = true;
1188
1189    if complement {
1190        let mut pos: usize = 1;
1191        for r in ranges {
1192            let rs = r.start;
1193            let re = r.end.min(len);
1194            if pos < rs {
1195                if !first_range && !output_delim.is_empty() {
1196                    buf.extend_from_slice(output_delim);
1197                }
1198                buf.extend_from_slice(&line[pos - 1..rs - 1]);
1199                first_range = false;
1200            }
1201            pos = re + 1;
1202            if pos > len {
1203                break;
1204            }
1205        }
1206        if pos <= len {
1207            if !first_range && !output_delim.is_empty() {
1208                buf.extend_from_slice(output_delim);
1209            }
1210            buf.extend_from_slice(&line[pos - 1..len]);
1211        }
1212    } else if output_delim.is_empty() && ranges.len() == 1 {
1213        // Ultra-fast path: single range, no output delimiter
1214        let start = ranges[0].start.saturating_sub(1);
1215        let end = ranges[0].end.min(len);
1216        if start < len {
1217            buf.extend_from_slice(&line[start..end]);
1218        }
1219    } else {
1220        for r in ranges {
1221            let start = r.start.saturating_sub(1);
1222            let end = r.end.min(len);
1223            if start >= len {
1224                break;
1225            }
1226            if !first_range && !output_delim.is_empty() {
1227                buf.extend_from_slice(output_delim);
1228            }
1229            buf.extend_from_slice(&line[start..end]);
1230            first_range = false;
1231        }
1232    }
1233}
1234
1235// ── Public API ───────────────────────────────────────────────────────────
1236
1237/// Cut fields from a line using a delimiter. Writes to `out`.
1238/// Returns true if any output was written, false if suppressed.
1239/// Used by process_cut_reader (stdin path) and unit tests.
1240#[inline]
1241pub fn cut_fields(
1242    line: &[u8],
1243    delim: u8,
1244    ranges: &[Range],
1245    complement: bool,
1246    output_delim: &[u8],
1247    suppress_no_delim: bool,
1248    out: &mut impl Write,
1249) -> io::Result<bool> {
1250    // Check if line contains delimiter at all
1251    if memchr::memchr(delim, line).is_none() {
1252        if !suppress_no_delim {
1253            out.write_all(line)?;
1254            return Ok(true);
1255        }
1256        return Ok(false); // suppressed
1257    }
1258
1259    // Walk through fields using memchr, output selected ones
1260    let mut field_num: usize = 1;
1261    let mut field_start: usize = 0;
1262    let mut first_output = true;
1263
1264    for delim_pos in memchr_iter(delim, line) {
1265        let selected = in_ranges(ranges, field_num) != complement;
1266        if selected {
1267            if !first_output {
1268                out.write_all(output_delim)?;
1269            }
1270            out.write_all(&line[field_start..delim_pos])?;
1271            first_output = false;
1272        }
1273        field_start = delim_pos + 1;
1274        field_num += 1;
1275    }
1276
1277    // Last field (after last delimiter)
1278    let selected = in_ranges(ranges, field_num) != complement;
1279    if selected {
1280        if !first_output {
1281            out.write_all(output_delim)?;
1282        }
1283        out.write_all(&line[field_start..])?;
1284    }
1285
1286    Ok(true)
1287}
1288
1289/// Cut bytes/chars from a line. Writes selected bytes to `out`.
1290/// Used by process_cut_reader (stdin path) and unit tests.
1291#[inline]
1292pub fn cut_bytes(
1293    line: &[u8],
1294    ranges: &[Range],
1295    complement: bool,
1296    output_delim: &[u8],
1297    out: &mut impl Write,
1298) -> io::Result<bool> {
1299    let mut first_range = true;
1300
1301    if complement {
1302        let len = line.len();
1303        let mut comp_ranges = Vec::new();
1304        let mut pos: usize = 1;
1305        for r in ranges {
1306            let rs = r.start;
1307            let re = r.end.min(len);
1308            if pos < rs {
1309                comp_ranges.push((pos, rs - 1));
1310            }
1311            pos = re + 1;
1312            if pos > len {
1313                break;
1314            }
1315        }
1316        if pos <= len {
1317            comp_ranges.push((pos, len));
1318        }
1319        for &(s, e) in &comp_ranges {
1320            if !first_range && !output_delim.is_empty() {
1321                out.write_all(output_delim)?;
1322            }
1323            out.write_all(&line[s - 1..e])?;
1324            first_range = false;
1325        }
1326    } else {
1327        for r in ranges {
1328            let start = r.start.saturating_sub(1);
1329            let end = r.end.min(line.len());
1330            if start >= line.len() {
1331                break;
1332            }
1333            if !first_range && !output_delim.is_empty() {
1334                out.write_all(output_delim)?;
1335            }
1336            out.write_all(&line[start..end])?;
1337            first_range = false;
1338        }
1339    }
1340    Ok(true)
1341}
1342
1343/// Process a full data buffer (from mmap or read) with cut operation.
1344/// Dispatches to optimized fast paths for field and byte modes.
1345pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1346    match cfg.mode {
1347        CutMode::Fields => process_fields_fast(data, cfg, out),
1348        CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
1349    }
1350}
1351
1352/// Process input from a reader (for stdin).
1353pub fn process_cut_reader<R: BufRead>(
1354    mut reader: R,
1355    cfg: &CutConfig,
1356    out: &mut impl Write,
1357) -> io::Result<()> {
1358    let mut buf = Vec::new();
1359
1360    loop {
1361        buf.clear();
1362        let n = reader.read_until(cfg.line_delim, &mut buf)?;
1363        if n == 0 {
1364            break;
1365        }
1366
1367        let has_line_delim = buf.last() == Some(&cfg.line_delim);
1368        let line = if has_line_delim {
1369            &buf[..buf.len() - 1]
1370        } else {
1371            &buf[..]
1372        };
1373
1374        let wrote = process_one_line(line, cfg, out)?;
1375
1376        // GNU always terminates output lines, even if input had no trailing delimiter
1377        if wrote {
1378            out.write_all(&[cfg.line_delim])?;
1379        }
1380    }
1381
1382    Ok(())
1383}
1384
1385/// Process one line according to the cut config (used by stdin reader path).
1386#[inline]
1387fn process_one_line(line: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<bool> {
1388    match cfg.mode {
1389        CutMode::Fields => cut_fields(
1390            line,
1391            cfg.delim,
1392            cfg.ranges,
1393            cfg.complement,
1394            cfg.output_delim,
1395            cfg.suppress_no_delim,
1396            out,
1397        ),
1398        CutMode::Bytes | CutMode::Characters => {
1399            cut_bytes(line, cfg.ranges, cfg.complement, cfg.output_delim, out)
1400        }
1401    }
1402}
1403
1404/// Cut operation mode
1405#[derive(Debug, Clone, Copy, PartialEq)]
1406pub enum CutMode {
1407    Bytes,
1408    Characters,
1409    Fields,
1410}