Skip to main content

coreutils_rs/cut/
core.rs

1use memchr::memchr_iter;
2use rayon::prelude::*;
3use std::io::{self, BufRead, Write};
4
5/// Minimum file size for parallel processing (1MB).
6/// Rayon overhead is ~5-10μs per task; at 1MB per chunk,
7/// each chunk takes ~100μs+ to process, so overhead is < 10%.
8const PARALLEL_THRESHOLD: usize = 1024 * 1024;
9
10/// Configuration for cut operations.
11pub struct CutConfig<'a> {
12    pub mode: CutMode,
13    pub ranges: &'a [Range],
14    pub complement: bool,
15    pub delim: u8,
16    pub output_delim: &'a [u8],
17    pub suppress_no_delim: bool,
18    pub line_delim: u8,
19}
20
21/// A range specification like 1, 3-5, -3, 4-
22#[derive(Debug, Clone)]
23pub struct Range {
24    pub start: usize, // 1-based, 0 means "from beginning"
25    pub end: usize,   // 1-based, usize::MAX means "to end"
26}
27
28/// Parse a LIST specification like "1,3-5,7-" into ranges.
29/// Each range is 1-based. Returns sorted, merged ranges.
30pub fn parse_ranges(spec: &str) -> Result<Vec<Range>, String> {
31    let mut ranges = Vec::new();
32
33    for part in spec.split(',') {
34        let part = part.trim();
35        if part.is_empty() {
36            continue;
37        }
38
39        if let Some(idx) = part.find('-') {
40            let left = &part[..idx];
41            let right = &part[idx + 1..];
42
43            let start = if left.is_empty() {
44                1
45            } else {
46                left.parse::<usize>()
47                    .map_err(|_| format!("invalid range: '{}'", part))?
48            };
49
50            let end = if right.is_empty() {
51                usize::MAX
52            } else {
53                right
54                    .parse::<usize>()
55                    .map_err(|_| format!("invalid range: '{}'", part))?
56            };
57
58            if start == 0 {
59                return Err("fields and positions are numbered from 1".to_string());
60            }
61            if start > end {
62                return Err(format!("invalid decreasing range: '{}'", part));
63            }
64
65            ranges.push(Range { start, end });
66        } else {
67            let n = part
68                .parse::<usize>()
69                .map_err(|_| format!("invalid field: '{}'", part))?;
70            if n == 0 {
71                return Err("fields and positions are numbered from 1".to_string());
72            }
73            ranges.push(Range { start: n, end: n });
74        }
75    }
76
77    if ranges.is_empty() {
78        return Err("you must specify a list of bytes, characters, or fields".to_string());
79    }
80
81    // Sort and merge overlapping ranges
82    ranges.sort_by_key(|r| (r.start, r.end));
83    let mut merged = vec![ranges[0].clone()];
84    for r in &ranges[1..] {
85        let last = merged.last_mut().unwrap();
86        if r.start <= last.end.saturating_add(1) {
87            last.end = last.end.max(r.end);
88        } else {
89            merged.push(r.clone());
90        }
91    }
92
93    Ok(merged)
94}
95
96/// Check if a 1-based position is in any range.
97/// Ranges must be sorted. Uses early exit since ranges are sorted.
98#[inline(always)]
99fn in_ranges(ranges: &[Range], pos: usize) -> bool {
100    for r in ranges {
101        if pos < r.start {
102            return false; // ranges are sorted, no point checking further
103        }
104        if pos <= r.end {
105            return true;
106        }
107    }
108    false
109}
110
111/// Pre-compute a 64-bit mask for field selection.
112/// Bit i-1 is set if field i should be output.
113#[inline]
114fn compute_field_mask(ranges: &[Range], complement: bool) -> u64 {
115    let mut mask: u64 = 0;
116    for i in 1..=64u32 {
117        let in_range = in_ranges(ranges, i as usize);
118        if in_range != complement {
119            mask |= 1u64 << (i - 1);
120        }
121    }
122    mask
123}
124
125/// Check if a field should be selected, using bitset for first 64 fields.
126#[inline(always)]
127fn is_selected(field_num: usize, mask: u64, ranges: &[Range], complement: bool) -> bool {
128    if field_num <= 64 {
129        (mask >> (field_num - 1)) & 1 == 1
130    } else {
131        in_ranges(ranges, field_num) != complement
132    }
133}
134
135// ── Chunk splitting for parallel processing ──────────────────────────────
136
137/// Split data into chunks aligned to line boundaries for parallel processing.
138/// Returns slices that each end on a line_delim boundary (except possibly the last).
139fn split_into_chunks<'a>(data: &'a [u8], line_delim: u8) -> Vec<&'a [u8]> {
140    let num_threads = rayon::current_num_threads().max(1);
141    if data.len() < PARALLEL_THRESHOLD || num_threads <= 1 {
142        return vec![data];
143    }
144
145    let chunk_size = data.len() / num_threads;
146    let mut chunks = Vec::with_capacity(num_threads);
147    let mut pos = 0;
148
149    for _ in 0..num_threads - 1 {
150        let target = pos + chunk_size;
151        if target >= data.len() {
152            break;
153        }
154        // Align to next line boundary
155        let boundary = memchr::memchr(line_delim, &data[target..])
156            .map(|p| target + p + 1)
157            .unwrap_or(data.len());
158        if boundary > pos {
159            chunks.push(&data[pos..boundary]);
160        }
161        pos = boundary;
162    }
163
164    // Last chunk gets the remainder
165    if pos < data.len() {
166        chunks.push(&data[pos..]);
167    }
168
169    chunks
170}
171
172// ── Fast path: field extraction with batched output ──────────────────────
173
174/// Optimized field extraction with early exit and batched output.
175/// Uses SIMD newline scanning + inline byte scan per line.
176fn process_fields_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
177    let delim = cfg.delim;
178    let line_delim = cfg.line_delim;
179    let ranges = cfg.ranges;
180    let complement = cfg.complement;
181    let output_delim = cfg.output_delim;
182    let suppress = cfg.suppress_no_delim;
183
184    // Ultra-fast path: single field extraction (e.g., cut -f5)
185    if !complement && ranges.len() == 1 && ranges[0].start == ranges[0].end {
186        return process_single_field(data, delim, line_delim, ranges[0].start, suppress, out);
187    }
188
189    // Fast path: complement of single field with default output delimiter.
190    // e.g., cut --complement -f1: skip first field, output rest unchanged.
191    if complement
192        && ranges.len() == 1
193        && ranges[0].start == ranges[0].end
194        && output_delim.len() == 1
195        && output_delim[0] == delim
196    {
197        return process_complement_single_field(
198            data,
199            delim,
200            line_delim,
201            ranges[0].start,
202            suppress,
203            out,
204        );
205    }
206
207    // Fast path: contiguous from-start field range (e.g., cut -f1-5, cut -f-3)
208    // with default output delimiter. Just find the N-th delimiter and copy.
209    if !complement
210        && ranges.len() == 1
211        && ranges[0].start == 1
212        && output_delim.len() == 1
213        && output_delim[0] == delim
214        && ranges[0].end < usize::MAX
215    {
216        return process_fields_prefix(data, delim, line_delim, ranges[0].end, suppress, out);
217    }
218
219    // Fast path: open-ended field range from field N (e.g., cut -f3-)
220    // Skip first N-1 delimiters per line instead of per-field selection.
221    if !complement
222        && ranges.len() == 1
223        && ranges[0].end == usize::MAX
224        && ranges[0].start > 1
225        && output_delim.len() == 1
226        && output_delim[0] == delim
227    {
228        return process_fields_suffix(data, delim, line_delim, ranges[0].start, suppress, out);
229    }
230
231    // Pre-compute for general field extraction
232    let max_field = if complement {
233        usize::MAX
234    } else {
235        ranges.last().map(|r| r.end).unwrap_or(0)
236    };
237    let field_mask = compute_field_mask(ranges, complement);
238
239    if data.len() >= PARALLEL_THRESHOLD {
240        // Parallel path
241        let chunks = split_into_chunks(data, line_delim);
242        let results: Vec<Vec<u8>> = chunks
243            .par_iter()
244            .map(|chunk| {
245                let mut buf = Vec::with_capacity(chunk.len() / 2);
246                process_fields_chunk(
247                    chunk,
248                    delim,
249                    ranges,
250                    output_delim,
251                    suppress,
252                    max_field,
253                    field_mask,
254                    line_delim,
255                    complement,
256                    &mut buf,
257                );
258                buf
259            })
260            .collect();
261        for result in &results {
262            if !result.is_empty() {
263                out.write_all(result)?;
264            }
265        }
266    } else {
267        // Sequential path
268        let mut buf = Vec::with_capacity(data.len() / 2);
269        process_fields_chunk(
270            data,
271            delim,
272            ranges,
273            output_delim,
274            suppress,
275            max_field,
276            field_mask,
277            line_delim,
278            complement,
279            &mut buf,
280        );
281        if !buf.is_empty() {
282            out.write_all(&buf)?;
283        }
284    }
285    Ok(())
286}
287
288/// Process a chunk of data for general field extraction.
289fn process_fields_chunk(
290    data: &[u8],
291    delim: u8,
292    ranges: &[Range],
293    output_delim: &[u8],
294    suppress: bool,
295    max_field: usize,
296    field_mask: u64,
297    line_delim: u8,
298    complement: bool,
299    buf: &mut Vec<u8>,
300) {
301    let mut start = 0;
302    for end_pos in memchr_iter(line_delim, data) {
303        let line = &data[start..end_pos];
304        extract_fields_to_buf(
305            line,
306            delim,
307            ranges,
308            output_delim,
309            suppress,
310            max_field,
311            field_mask,
312            line_delim,
313            buf,
314            complement,
315        );
316        start = end_pos + 1;
317    }
318    if start < data.len() {
319        extract_fields_to_buf(
320            &data[start..],
321            delim,
322            ranges,
323            output_delim,
324            suppress,
325            max_field,
326            field_mask,
327            line_delim,
328            buf,
329            complement,
330        );
331    }
332}
333
334// ── Ultra-fast single field extraction ───────────────────────────────────
335
336/// Specialized path for extracting exactly one field (e.g., `cut -f5`).
337/// Uses SIMD memchr for newline scanning + minimal inline byte scan per line.
338/// For large files, splits work across multiple threads with rayon.
339fn process_single_field(
340    data: &[u8],
341    delim: u8,
342    line_delim: u8,
343    target: usize,
344    suppress: bool,
345    out: &mut impl Write,
346) -> io::Result<()> {
347    let target_idx = target - 1;
348
349    // Ultra-fast path: first field with combined delimiter+newline scan.
350    // Uses memchr2_iter to find both delimiter and newline in a single SIMD pass,
351    // eliminating the per-line memchr call overhead.
352    if target_idx == 0 && delim != line_delim {
353        if data.len() >= PARALLEL_THRESHOLD {
354            let chunks = split_into_chunks(data, line_delim);
355            let results: Vec<Vec<u8>> = chunks
356                .par_iter()
357                .map(|chunk| {
358                    let mut buf = Vec::with_capacity(chunk.len() / 4);
359                    process_first_field_combined(chunk, delim, line_delim, suppress, &mut buf);
360                    buf
361                })
362                .collect();
363            for result in &results {
364                if !result.is_empty() {
365                    out.write_all(result)?;
366                }
367            }
368        } else {
369            let mut buf = Vec::with_capacity(data.len() / 4);
370            process_first_field_combined(data, delim, line_delim, suppress, &mut buf);
371            if !buf.is_empty() {
372                out.write_all(&buf)?;
373            }
374        }
375        return Ok(());
376    }
377
378    if data.len() >= PARALLEL_THRESHOLD {
379        // Parallel path: split into chunks aligned to line boundaries
380        let chunks = split_into_chunks(data, line_delim);
381        let results: Vec<Vec<u8>> = chunks
382            .par_iter()
383            .map(|chunk| {
384                let mut buf = Vec::with_capacity(chunk.len() / 4);
385                process_single_field_chunk(
386                    chunk, delim, target_idx, line_delim, suppress, &mut buf,
387                );
388                buf
389            })
390            .collect();
391        for result in &results {
392            if !result.is_empty() {
393                out.write_all(result)?;
394            }
395        }
396    } else {
397        // Sequential path for small data
398        let mut buf = Vec::with_capacity(data.len() / 4);
399        process_single_field_chunk(data, delim, target_idx, line_delim, suppress, &mut buf);
400        if !buf.is_empty() {
401            out.write_all(&buf)?;
402        }
403    }
404    Ok(())
405}
406
407/// Complement single-field extraction: skip one field, output rest unchanged.
408/// For `--complement -f1`: find first delimiter, output everything after.
409/// For `--complement -fN`: output fields 1..N-1, skip N, output N+1...
410/// Uses combined SIMD scan for first-field complement (most common case).
411fn process_complement_single_field(
412    data: &[u8],
413    delim: u8,
414    line_delim: u8,
415    skip_field: usize,
416    suppress: bool,
417    out: &mut impl Write,
418) -> io::Result<()> {
419    let skip_idx = skip_field - 1;
420
421    if data.len() >= PARALLEL_THRESHOLD {
422        let chunks = split_into_chunks(data, line_delim);
423        let results: Vec<Vec<u8>> = chunks
424            .par_iter()
425            .map(|chunk| {
426                let mut buf = Vec::with_capacity(chunk.len());
427                complement_single_field_chunk(
428                    chunk, delim, skip_idx, line_delim, suppress, &mut buf,
429                );
430                buf
431            })
432            .collect();
433        for result in &results {
434            if !result.is_empty() {
435                out.write_all(result)?;
436            }
437        }
438    } else {
439        let mut buf = Vec::with_capacity(data.len());
440        complement_single_field_chunk(data, delim, skip_idx, line_delim, suppress, &mut buf);
441        if !buf.is_empty() {
442            out.write_all(&buf)?;
443        }
444    }
445    Ok(())
446}
447
448/// Process a chunk for complement single-field extraction.
449fn complement_single_field_chunk(
450    data: &[u8],
451    delim: u8,
452    skip_idx: usize,
453    line_delim: u8,
454    suppress: bool,
455    buf: &mut Vec<u8>,
456) {
457    let mut start = 0;
458    for end_pos in memchr_iter(line_delim, data) {
459        let line = &data[start..end_pos];
460        complement_single_field_line(line, delim, skip_idx, line_delim, suppress, buf);
461        start = end_pos + 1;
462    }
463    if start < data.len() {
464        complement_single_field_line(&data[start..], delim, skip_idx, line_delim, suppress, buf);
465    }
466}
467
468/// Extract all fields except skip_idx from one line.
469#[inline(always)]
470fn complement_single_field_line(
471    line: &[u8],
472    delim: u8,
473    skip_idx: usize,
474    line_delim: u8,
475    suppress: bool,
476    buf: &mut Vec<u8>,
477) {
478    if line.is_empty() {
479        if !suppress {
480            buf.push(line_delim);
481        }
482        return;
483    }
484
485    // Find all delimiter positions
486    let mut field_idx = 0;
487    let mut field_start = 0;
488    let mut first_output = true;
489    let mut has_delim = false;
490
491    for pos in memchr_iter(delim, line) {
492        has_delim = true;
493        if field_idx != skip_idx {
494            if !first_output {
495                buf.push(delim);
496            }
497            buf.extend_from_slice(&line[field_start..pos]);
498            first_output = false;
499        }
500        field_idx += 1;
501        field_start = pos + 1;
502    }
503
504    if !has_delim {
505        if !suppress {
506            buf.extend_from_slice(line);
507            buf.push(line_delim);
508        }
509        return;
510    }
511
512    // Last field
513    if field_idx != skip_idx {
514        if !first_output {
515            buf.push(delim);
516        }
517        buf.extend_from_slice(&line[field_start..]);
518    }
519
520    if !first_output || field_idx != skip_idx {
521        buf.push(line_delim);
522    } else {
523        // Only the skipped field existed — output empty line
524        buf.push(line_delim);
525    }
526}
527
528/// Contiguous from-start field range extraction (e.g., `cut -f1-5`).
529/// Finds the N-th delimiter and copies everything before it (preserving original delimiters).
530/// Much faster than per-field selection: one memchr_iter scan with early exit.
531fn process_fields_prefix(
532    data: &[u8],
533    delim: u8,
534    line_delim: u8,
535    last_field: usize,
536    suppress: bool,
537    out: &mut impl Write,
538) -> io::Result<()> {
539    if data.len() >= PARALLEL_THRESHOLD {
540        let chunks = split_into_chunks(data, line_delim);
541        let results: Vec<Vec<u8>> = chunks
542            .par_iter()
543            .map(|chunk| {
544                let mut buf = Vec::with_capacity(chunk.len());
545                fields_prefix_chunk(chunk, delim, line_delim, last_field, suppress, &mut buf);
546                buf
547            })
548            .collect();
549        for result in &results {
550            if !result.is_empty() {
551                out.write_all(result)?;
552            }
553        }
554    } else {
555        let mut buf = Vec::with_capacity(data.len());
556        fields_prefix_chunk(data, delim, line_delim, last_field, suppress, &mut buf);
557        if !buf.is_empty() {
558            out.write_all(&buf)?;
559        }
560    }
561    Ok(())
562}
563
564/// Process a chunk for contiguous from-start field range extraction.
565fn fields_prefix_chunk(
566    data: &[u8],
567    delim: u8,
568    line_delim: u8,
569    last_field: usize,
570    suppress: bool,
571    buf: &mut Vec<u8>,
572) {
573    let mut start = 0;
574    for end_pos in memchr_iter(line_delim, data) {
575        let line = &data[start..end_pos];
576        fields_prefix_line(line, delim, line_delim, last_field, suppress, buf);
577        start = end_pos + 1;
578    }
579    if start < data.len() {
580        fields_prefix_line(&data[start..], delim, line_delim, last_field, suppress, buf);
581    }
582}
583
584/// Extract first N fields from one line (contiguous from-start range).
585/// Finds the N-th delimiter using memchr_iter with early exit.
586#[inline(always)]
587fn fields_prefix_line(
588    line: &[u8],
589    delim: u8,
590    line_delim: u8,
591    last_field: usize,
592    suppress: bool,
593    buf: &mut Vec<u8>,
594) {
595    if line.is_empty() {
596        if !suppress {
597            buf.push(line_delim);
598        }
599        return;
600    }
601
602    // Count delimiters; stop at last_field (we need fields 1..last_field)
603    let mut field_count = 1;
604    let mut has_delim = false;
605
606    for pos in memchr_iter(delim, line) {
607        has_delim = true;
608        if field_count >= last_field {
609            // Found enough fields — output up to this delimiter
610            buf.extend_from_slice(&line[..pos]);
611            buf.push(line_delim);
612            return;
613        }
614        field_count += 1;
615    }
616
617    if !has_delim {
618        if !suppress {
619            buf.extend_from_slice(line);
620            buf.push(line_delim);
621        }
622        return;
623    }
624
625    // Fewer fields than requested — output the entire line
626    // (all fields are within range since we have < last_field fields)
627    buf.extend_from_slice(line);
628    buf.push(line_delim);
629}
630
631/// Open-ended field suffix extraction (e.g., `cut -f3-`).
632/// Skip the first N-1 delimiters, then output everything from field N to end.
633fn process_fields_suffix(
634    data: &[u8],
635    delim: u8,
636    line_delim: u8,
637    start_field: usize,
638    suppress: bool,
639    out: &mut impl Write,
640) -> io::Result<()> {
641    if data.len() >= PARALLEL_THRESHOLD {
642        let chunks = split_into_chunks(data, line_delim);
643        let results: Vec<Vec<u8>> = chunks
644            .par_iter()
645            .map(|chunk| {
646                let mut buf = Vec::with_capacity(chunk.len());
647                fields_suffix_chunk(chunk, delim, line_delim, start_field, suppress, &mut buf);
648                buf
649            })
650            .collect();
651        for result in &results {
652            if !result.is_empty() {
653                out.write_all(result)?;
654            }
655        }
656    } else {
657        let mut buf = Vec::with_capacity(data.len());
658        fields_suffix_chunk(data, delim, line_delim, start_field, suppress, &mut buf);
659        if !buf.is_empty() {
660            out.write_all(&buf)?;
661        }
662    }
663    Ok(())
664}
665
666/// Process a chunk for open-ended field suffix extraction.
667fn fields_suffix_chunk(
668    data: &[u8],
669    delim: u8,
670    line_delim: u8,
671    start_field: usize,
672    suppress: bool,
673    buf: &mut Vec<u8>,
674) {
675    let mut start = 0;
676    for end_pos in memchr_iter(line_delim, data) {
677        let line = &data[start..end_pos];
678        fields_suffix_line(line, delim, line_delim, start_field, suppress, buf);
679        start = end_pos + 1;
680    }
681    if start < data.len() {
682        fields_suffix_line(
683            &data[start..],
684            delim,
685            line_delim,
686            start_field,
687            suppress,
688            buf,
689        );
690    }
691}
692
693/// Extract fields from start_field to end from one line.
694/// Skips the first (start_field - 1) delimiters, outputs the rest.
695#[inline(always)]
696fn fields_suffix_line(
697    line: &[u8],
698    delim: u8,
699    line_delim: u8,
700    start_field: usize,
701    suppress: bool,
702    buf: &mut Vec<u8>,
703) {
704    if line.is_empty() {
705        if !suppress {
706            buf.push(line_delim);
707        }
708        return;
709    }
710
711    let skip_delims = start_field - 1;
712    let mut delim_count = 0;
713    let mut has_delim = false;
714
715    for pos in memchr_iter(delim, line) {
716        has_delim = true;
717        delim_count += 1;
718        if delim_count >= skip_delims {
719            // Output from after this delimiter to end of line
720            buf.extend_from_slice(&line[pos + 1..]);
721            buf.push(line_delim);
722            return;
723        }
724    }
725
726    if !has_delim {
727        // No delimiter — output whole line or suppress
728        if !suppress {
729            buf.extend_from_slice(line);
730            buf.push(line_delim);
731        }
732        return;
733    }
734
735    // Fewer delimiters than needed — field doesn't exist, output empty
736    buf.push(line_delim);
737}
738
739/// First-field extraction using combined delimiter+newline SIMD scan.
740/// Single memchr2_iter pass finds both delimiter and newline positions,
741/// eliminating per-line memchr overhead (saves ~250K function calls for 10MB).
742fn process_first_field_combined(
743    data: &[u8],
744    delim: u8,
745    line_delim: u8,
746    suppress: bool,
747    buf: &mut Vec<u8>,
748) {
749    let mut line_start = 0;
750    let mut found_delim = false; // true if we already found delimiter for current line
751
752    for pos in memchr::memchr2_iter(delim, line_delim, data) {
753        let byte = data[pos];
754        if byte == line_delim {
755            // End of line
756            if !found_delim {
757                // No delimiter on this line — output whole line or suppress
758                if !suppress {
759                    buf.extend_from_slice(&data[line_start..pos]);
760                    buf.push(line_delim);
761                }
762            }
763            line_start = pos + 1;
764            found_delim = false;
765        } else {
766            // Delimiter found
767            if !found_delim {
768                // First delimiter — output field before it
769                buf.extend_from_slice(&data[line_start..pos]);
770                buf.push(line_delim);
771                found_delim = true;
772            }
773            // Subsequent delimiters on same line — ignore
774        }
775    }
776
777    // Handle last line without trailing newline
778    if line_start < data.len() {
779        if !found_delim {
780            // No delimiter found — output whole line or suppress
781            if !suppress {
782                // Check if there's a delimiter in the remaining data
783                match memchr::memchr(delim, &data[line_start..]) {
784                    Some(offset) => {
785                        buf.extend_from_slice(&data[line_start..line_start + offset]);
786                        buf.push(line_delim);
787                    }
788                    None => {
789                        buf.extend_from_slice(&data[line_start..]);
790                        buf.push(line_delim);
791                    }
792                }
793            }
794        }
795    }
796}
797
798/// Process a chunk of data for single-field extraction.
799fn process_single_field_chunk(
800    data: &[u8],
801    delim: u8,
802    target_idx: usize,
803    line_delim: u8,
804    suppress: bool,
805    buf: &mut Vec<u8>,
806) {
807    let mut start = 0;
808    for end_pos in memchr_iter(line_delim, data) {
809        let line = &data[start..end_pos];
810        extract_single_field_line(line, delim, target_idx, line_delim, suppress, buf);
811        start = end_pos + 1;
812    }
813    // Handle final line without terminator
814    if start < data.len() {
815        extract_single_field_line(&data[start..], delim, target_idx, line_delim, suppress, buf);
816    }
817}
818
819/// Extract a single field from one line.
820/// For target_idx == 0 (first field), uses single memchr instead of memchr_iter.
821/// For other fields, uses SIMD memchr_iter with early exit.
822#[inline(always)]
823fn extract_single_field_line(
824    line: &[u8],
825    delim: u8,
826    target_idx: usize,
827    line_delim: u8,
828    suppress: bool,
829    buf: &mut Vec<u8>,
830) {
831    if line.is_empty() {
832        if !suppress {
833            buf.push(line_delim);
834        }
835        return;
836    }
837
838    // Ultra-fast path for first field (target_idx == 0): single memchr
839    if target_idx == 0 {
840        match memchr::memchr(delim, line) {
841            Some(pos) => {
842                buf.extend_from_slice(&line[..pos]);
843                buf.push(line_delim);
844            }
845            None => {
846                // No delimiter — output whole line or suppress
847                if !suppress {
848                    buf.extend_from_slice(line);
849                    buf.push(line_delim);
850                }
851            }
852        }
853        return;
854    }
855
856    let mut field_start = 0;
857    let mut field_idx = 0;
858    let mut has_delim = false;
859
860    for pos in memchr_iter(delim, line) {
861        has_delim = true;
862        if field_idx == target_idx {
863            // Found end of target field — output and return
864            buf.extend_from_slice(&line[field_start..pos]);
865            buf.push(line_delim);
866            return;
867        }
868        field_idx += 1;
869        field_start = pos + 1;
870    }
871
872    if !has_delim {
873        // No delimiters found — output whole line or suppress
874        if !suppress {
875            buf.extend_from_slice(line);
876            buf.push(line_delim);
877        }
878        return;
879    }
880
881    if field_idx == target_idx {
882        // Target is the last field (no trailing delimiter)
883        buf.extend_from_slice(&line[field_start..]);
884        buf.push(line_delim);
885    } else {
886        // Not enough fields — output empty line
887        buf.push(line_delim);
888    }
889}
890
891/// Extract fields from a single line into the output buffer.
892/// Uses inline byte scanning with early exit for maximum performance.
893#[inline(always)]
894fn extract_fields_to_buf(
895    line: &[u8],
896    delim: u8,
897    ranges: &[Range],
898    output_delim: &[u8],
899    suppress: bool,
900    max_field: usize,
901    field_mask: u64,
902    line_delim: u8,
903    buf: &mut Vec<u8>,
904    complement: bool,
905) {
906    let len = line.len();
907
908    // Empty line: no delimiter possible
909    if len == 0 {
910        if !suppress {
911            buf.push(line_delim);
912        }
913        return;
914    }
915
916    let mut field_num: usize = 1;
917    let mut field_start: usize = 0;
918    let mut first_output = true;
919    let mut has_delim = false;
920
921    // SIMD-accelerated delimiter scanning with early exit
922    for delim_pos in memchr_iter(delim, line) {
923        has_delim = true;
924
925        if is_selected(field_num, field_mask, ranges, complement) {
926            if !first_output {
927                buf.extend_from_slice(output_delim);
928            }
929            buf.extend_from_slice(&line[field_start..delim_pos]);
930            first_output = false;
931        }
932
933        field_num += 1;
934        field_start = delim_pos + 1;
935
936        // Early exit: past the last needed field
937        if field_num > max_field {
938            break;
939        }
940    }
941
942    // Last field (only if we didn't early-exit past it)
943    if (field_num <= max_field || complement)
944        && has_delim
945        && is_selected(field_num, field_mask, ranges, complement)
946    {
947        if !first_output {
948            buf.extend_from_slice(output_delim);
949        }
950        buf.extend_from_slice(&line[field_start..len]);
951        first_output = false;
952    }
953
954    // Output line terminator
955    if !first_output {
956        // Had output — add line delimiter
957        buf.push(line_delim);
958    } else if !has_delim {
959        // No delimiter found — output whole line or suppress
960        if !suppress {
961            buf.extend_from_slice(line);
962            buf.push(line_delim);
963        }
964    } else {
965        // Had delimiter but no selected field — output empty line (GNU compat)
966        buf.push(line_delim);
967    }
968}
969
970// ── Fast path: byte/char extraction with batched output ──────────────────
971
972/// Ultra-fast path for `cut -b1-N`: single from-start byte range.
973/// Scans for newlines with SIMD memchr, truncates each line to N bytes.
974/// Avoids per-line function call + range check overhead entirely.
975fn process_bytes_from_start(
976    data: &[u8],
977    max_bytes: usize,
978    line_delim: u8,
979    out: &mut impl Write,
980) -> io::Result<()> {
981    if data.len() >= PARALLEL_THRESHOLD {
982        let chunks = split_into_chunks(data, line_delim);
983        let results: Vec<Vec<u8>> = chunks
984            .par_iter()
985            .map(|chunk| {
986                let mut buf = Vec::with_capacity(chunk.len());
987                bytes_from_start_chunk(chunk, max_bytes, line_delim, &mut buf);
988                buf
989            })
990            .collect();
991        for result in &results {
992            if !result.is_empty() {
993                out.write_all(result)?;
994            }
995        }
996    } else {
997        let mut buf = Vec::with_capacity(data.len());
998        bytes_from_start_chunk(data, max_bytes, line_delim, &mut buf);
999        if !buf.is_empty() {
1000            out.write_all(&buf)?;
1001        }
1002    }
1003    Ok(())
1004}
1005
1006/// Process a chunk for from-start byte range extraction.
1007/// For each line, outputs min(line_len, max_bytes) bytes + line delimiter.
1008#[inline]
1009fn bytes_from_start_chunk(data: &[u8], max_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1010    let mut start = 0;
1011    for pos in memchr_iter(line_delim, data) {
1012        let line_len = pos - start;
1013        let take = line_len.min(max_bytes);
1014        buf.extend_from_slice(&data[start..start + take]);
1015        buf.push(line_delim);
1016        start = pos + 1;
1017    }
1018    // Handle last line without terminator
1019    if start < data.len() {
1020        let line_len = data.len() - start;
1021        let take = line_len.min(max_bytes);
1022        buf.extend_from_slice(&data[start..start + take]);
1023        buf.push(line_delim);
1024    }
1025}
1026
1027/// Fast path for `cut -bN-`: skip first N-1 bytes per line.
1028/// For each line, outputs bytes from position N to end of line.
1029fn process_bytes_from_offset(
1030    data: &[u8],
1031    skip_bytes: usize,
1032    line_delim: u8,
1033    out: &mut impl Write,
1034) -> io::Result<()> {
1035    if data.len() >= PARALLEL_THRESHOLD {
1036        let chunks = split_into_chunks(data, line_delim);
1037        let results: Vec<Vec<u8>> = chunks
1038            .par_iter()
1039            .map(|chunk| {
1040                let mut buf = Vec::with_capacity(chunk.len());
1041                bytes_from_offset_chunk(chunk, skip_bytes, line_delim, &mut buf);
1042                buf
1043            })
1044            .collect();
1045        for result in &results {
1046            if !result.is_empty() {
1047                out.write_all(result)?;
1048            }
1049        }
1050    } else {
1051        let mut buf = Vec::with_capacity(data.len());
1052        bytes_from_offset_chunk(data, skip_bytes, line_delim, &mut buf);
1053        if !buf.is_empty() {
1054            out.write_all(&buf)?;
1055        }
1056    }
1057    Ok(())
1058}
1059
1060/// Process a chunk for from-offset byte range extraction.
1061#[inline]
1062fn bytes_from_offset_chunk(data: &[u8], skip_bytes: usize, line_delim: u8, buf: &mut Vec<u8>) {
1063    let mut start = 0;
1064    for pos in memchr_iter(line_delim, data) {
1065        let line_len = pos - start;
1066        if line_len > skip_bytes {
1067            buf.extend_from_slice(&data[start + skip_bytes..pos]);
1068        }
1069        buf.push(line_delim);
1070        start = pos + 1;
1071    }
1072    // Handle last line without terminator
1073    if start < data.len() {
1074        let line_len = data.len() - start;
1075        if line_len > skip_bytes {
1076            buf.extend_from_slice(&data[start + skip_bytes..data.len()]);
1077        }
1078        buf.push(line_delim);
1079    }
1080}
1081
1082/// Optimized byte/char extraction with batched output and parallel processing.
1083fn process_bytes_fast(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1084    let line_delim = cfg.line_delim;
1085    let ranges = cfg.ranges;
1086    let complement = cfg.complement;
1087    let output_delim = cfg.output_delim;
1088
1089    // Ultra-fast path: single range from byte 1 (e.g., cut -b1-10, cut -b-20)
1090    // Avoids per-line function call overhead by scanning newlines and truncating inline.
1091    if !complement && ranges.len() == 1 && ranges[0].start == 1 && output_delim.is_empty() {
1092        let max_bytes = ranges[0].end;
1093        if max_bytes < usize::MAX {
1094            return process_bytes_from_start(data, max_bytes, line_delim, out);
1095        }
1096    }
1097
1098    // Fast path: single open-ended range from byte N (e.g., cut -b5-)
1099    // Skip first N-1 bytes per line instead of per-byte range checking.
1100    if !complement && ranges.len() == 1 && ranges[0].end == usize::MAX && output_delim.is_empty() {
1101        let skip_bytes = ranges[0].start.saturating_sub(1);
1102        if skip_bytes > 0 {
1103            return process_bytes_from_offset(data, skip_bytes, line_delim, out);
1104        }
1105    }
1106
1107    if data.len() >= PARALLEL_THRESHOLD {
1108        let chunks = split_into_chunks(data, line_delim);
1109        let results: Vec<Vec<u8>> = chunks
1110            .par_iter()
1111            .map(|chunk| {
1112                let mut buf = Vec::with_capacity(chunk.len() / 2);
1113                process_bytes_chunk(
1114                    chunk,
1115                    ranges,
1116                    complement,
1117                    output_delim,
1118                    line_delim,
1119                    &mut buf,
1120                );
1121                buf
1122            })
1123            .collect();
1124        for result in &results {
1125            if !result.is_empty() {
1126                out.write_all(result)?;
1127            }
1128        }
1129    } else {
1130        let mut buf = Vec::with_capacity(data.len() / 2);
1131        process_bytes_chunk(data, ranges, complement, output_delim, line_delim, &mut buf);
1132        if !buf.is_empty() {
1133            out.write_all(&buf)?;
1134        }
1135    }
1136    Ok(())
1137}
1138
1139/// Process a chunk of data for byte/char extraction.
1140fn process_bytes_chunk(
1141    data: &[u8],
1142    ranges: &[Range],
1143    complement: bool,
1144    output_delim: &[u8],
1145    line_delim: u8,
1146    buf: &mut Vec<u8>,
1147) {
1148    let mut start = 0;
1149    for end_pos in memchr_iter(line_delim, data) {
1150        let line = &data[start..end_pos];
1151        cut_bytes_to_buf(line, ranges, complement, output_delim, buf);
1152        buf.push(line_delim);
1153        start = end_pos + 1;
1154    }
1155    if start < data.len() {
1156        cut_bytes_to_buf(&data[start..], ranges, complement, output_delim, buf);
1157        buf.push(line_delim);
1158    }
1159}
1160
1161/// Extract byte ranges from a line into the output buffer.
1162/// For the common non-complement case with contiguous ranges, uses bulk copy.
1163#[inline(always)]
1164fn cut_bytes_to_buf(
1165    line: &[u8],
1166    ranges: &[Range],
1167    complement: bool,
1168    output_delim: &[u8],
1169    buf: &mut Vec<u8>,
1170) {
1171    let len = line.len();
1172    let mut first_range = true;
1173
1174    if complement {
1175        let mut pos: usize = 1;
1176        for r in ranges {
1177            let rs = r.start;
1178            let re = r.end.min(len);
1179            if pos < rs {
1180                if !first_range && !output_delim.is_empty() {
1181                    buf.extend_from_slice(output_delim);
1182                }
1183                buf.extend_from_slice(&line[pos - 1..rs - 1]);
1184                first_range = false;
1185            }
1186            pos = re + 1;
1187            if pos > len {
1188                break;
1189            }
1190        }
1191        if pos <= len {
1192            if !first_range && !output_delim.is_empty() {
1193                buf.extend_from_slice(output_delim);
1194            }
1195            buf.extend_from_slice(&line[pos - 1..len]);
1196        }
1197    } else if output_delim.is_empty() && ranges.len() == 1 {
1198        // Ultra-fast path: single range, no output delimiter
1199        let start = ranges[0].start.saturating_sub(1);
1200        let end = ranges[0].end.min(len);
1201        if start < len {
1202            buf.extend_from_slice(&line[start..end]);
1203        }
1204    } else {
1205        for r in ranges {
1206            let start = r.start.saturating_sub(1);
1207            let end = r.end.min(len);
1208            if start >= len {
1209                break;
1210            }
1211            if !first_range && !output_delim.is_empty() {
1212                buf.extend_from_slice(output_delim);
1213            }
1214            buf.extend_from_slice(&line[start..end]);
1215            first_range = false;
1216        }
1217    }
1218}
1219
1220// ── Public API ───────────────────────────────────────────────────────────
1221
1222/// Cut fields from a line using a delimiter. Writes to `out`.
1223/// Returns true if any output was written, false if suppressed.
1224/// Used by process_cut_reader (stdin path) and unit tests.
1225#[inline]
1226pub fn cut_fields(
1227    line: &[u8],
1228    delim: u8,
1229    ranges: &[Range],
1230    complement: bool,
1231    output_delim: &[u8],
1232    suppress_no_delim: bool,
1233    out: &mut impl Write,
1234) -> io::Result<bool> {
1235    // Check if line contains delimiter at all
1236    if memchr::memchr(delim, line).is_none() {
1237        if !suppress_no_delim {
1238            out.write_all(line)?;
1239            return Ok(true);
1240        }
1241        return Ok(false); // suppressed
1242    }
1243
1244    // Walk through fields using memchr, output selected ones
1245    let mut field_num: usize = 1;
1246    let mut field_start: usize = 0;
1247    let mut first_output = true;
1248
1249    for delim_pos in memchr_iter(delim, line) {
1250        let selected = in_ranges(ranges, field_num) != complement;
1251        if selected {
1252            if !first_output {
1253                out.write_all(output_delim)?;
1254            }
1255            out.write_all(&line[field_start..delim_pos])?;
1256            first_output = false;
1257        }
1258        field_start = delim_pos + 1;
1259        field_num += 1;
1260    }
1261
1262    // Last field (after last delimiter)
1263    let selected = in_ranges(ranges, field_num) != complement;
1264    if selected {
1265        if !first_output {
1266            out.write_all(output_delim)?;
1267        }
1268        out.write_all(&line[field_start..])?;
1269    }
1270
1271    Ok(true)
1272}
1273
1274/// Cut bytes/chars from a line. Writes selected bytes to `out`.
1275/// Used by process_cut_reader (stdin path) and unit tests.
1276#[inline]
1277pub fn cut_bytes(
1278    line: &[u8],
1279    ranges: &[Range],
1280    complement: bool,
1281    output_delim: &[u8],
1282    out: &mut impl Write,
1283) -> io::Result<bool> {
1284    let mut first_range = true;
1285
1286    if complement {
1287        let len = line.len();
1288        let mut comp_ranges = Vec::new();
1289        let mut pos: usize = 1;
1290        for r in ranges {
1291            let rs = r.start;
1292            let re = r.end.min(len);
1293            if pos < rs {
1294                comp_ranges.push((pos, rs - 1));
1295            }
1296            pos = re + 1;
1297            if pos > len {
1298                break;
1299            }
1300        }
1301        if pos <= len {
1302            comp_ranges.push((pos, len));
1303        }
1304        for &(s, e) in &comp_ranges {
1305            if !first_range && !output_delim.is_empty() {
1306                out.write_all(output_delim)?;
1307            }
1308            out.write_all(&line[s - 1..e])?;
1309            first_range = false;
1310        }
1311    } else {
1312        for r in ranges {
1313            let start = r.start.saturating_sub(1);
1314            let end = r.end.min(line.len());
1315            if start >= line.len() {
1316                break;
1317            }
1318            if !first_range && !output_delim.is_empty() {
1319                out.write_all(output_delim)?;
1320            }
1321            out.write_all(&line[start..end])?;
1322            first_range = false;
1323        }
1324    }
1325    Ok(true)
1326}
1327
1328/// Process a full data buffer (from mmap or read) with cut operation.
1329/// Dispatches to optimized fast paths for field and byte modes.
1330pub fn process_cut_data(data: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<()> {
1331    match cfg.mode {
1332        CutMode::Fields => process_fields_fast(data, cfg, out),
1333        CutMode::Bytes | CutMode::Characters => process_bytes_fast(data, cfg, out),
1334    }
1335}
1336
1337/// Process input from a reader (for stdin).
1338pub fn process_cut_reader<R: BufRead>(
1339    mut reader: R,
1340    cfg: &CutConfig,
1341    out: &mut impl Write,
1342) -> io::Result<()> {
1343    let mut buf = Vec::new();
1344
1345    loop {
1346        buf.clear();
1347        let n = reader.read_until(cfg.line_delim, &mut buf)?;
1348        if n == 0 {
1349            break;
1350        }
1351
1352        let has_line_delim = buf.last() == Some(&cfg.line_delim);
1353        let line = if has_line_delim {
1354            &buf[..buf.len() - 1]
1355        } else {
1356            &buf[..]
1357        };
1358
1359        let wrote = process_one_line(line, cfg, out)?;
1360
1361        // GNU always terminates output lines, even if input had no trailing delimiter
1362        if wrote {
1363            out.write_all(&[cfg.line_delim])?;
1364        }
1365    }
1366
1367    Ok(())
1368}
1369
1370/// Process one line according to the cut config (used by stdin reader path).
1371#[inline]
1372fn process_one_line(line: &[u8], cfg: &CutConfig, out: &mut impl Write) -> io::Result<bool> {
1373    match cfg.mode {
1374        CutMode::Fields => cut_fields(
1375            line,
1376            cfg.delim,
1377            cfg.ranges,
1378            cfg.complement,
1379            cfg.output_delim,
1380            cfg.suppress_no_delim,
1381            out,
1382        ),
1383        CutMode::Bytes | CutMode::Characters => {
1384            cut_bytes(line, cfg.ranges, cfg.complement, cfg.output_delim, out)
1385        }
1386    }
1387}
1388
1389/// Cut operation mode
1390#[derive(Debug, Clone, Copy, PartialEq)]
1391pub enum CutMode {
1392    Bytes,
1393    Characters,
1394    Fields,
1395}