Skip to main content

coreutils_rs/expand/
core.rs

1use std::io::Write;
2
3/// Tab stop specification
4#[derive(Clone, Debug)]
5pub enum TabStops {
6    /// Regular interval (default 8)
7    Regular(usize),
8    /// Explicit list of tab stop positions (0-indexed columns)
9    List(Vec<usize>),
10}
11
12impl TabStops {
13    /// Calculate the number of spaces to the next tab stop from the given column.
14    #[inline]
15    fn spaces_to_next(&self, column: usize) -> usize {
16        match self {
17            TabStops::Regular(n) => {
18                if *n == 0 {
19                    return 0;
20                }
21                *n - (column % *n)
22            }
23            TabStops::List(stops) => {
24                // Find the first tab stop > current column
25                match stops.binary_search(&(column + 1)) {
26                    Ok(idx) => stops[idx] - column,
27                    Err(idx) => {
28                        if idx < stops.len() {
29                            stops[idx] - column
30                        } else {
31                            // Past all tab stops: GNU uses 1 space
32                            1
33                        }
34                    }
35                }
36            }
37        }
38    }
39
40    /// Get the next tab stop position after the given column.
41    #[inline]
42    fn next_tab_stop(&self, column: usize) -> usize {
43        column + self.spaces_to_next(column)
44    }
45}
46
47/// Parse a tab specification string (e.g., "4", "4,8,12", "4 8 12").
48pub fn parse_tab_stops(spec: &str) -> Result<TabStops, String> {
49    let spec = spec.trim();
50    if spec.is_empty() {
51        return Ok(TabStops::Regular(8));
52    }
53
54    // Check if it's a single number (regular interval)
55    if let Ok(n) = spec.parse::<usize>() {
56        if n == 0 {
57            return Err("tab size cannot be 0".to_string());
58        }
59        return Ok(TabStops::Regular(n));
60    }
61
62    // Parse as comma or space-separated list
63    let mut stops: Vec<usize> = Vec::new();
64    for part in spec.split([',', ' ']) {
65        let part = part.trim();
66        if part.is_empty() {
67            continue;
68        }
69        // Handle / prefix for repeating tab stops
70        if let Some(rest) = part.strip_prefix('/') {
71            let n: usize = rest
72                .parse()
73                .map_err(|_| format!("'{}' is not a valid number", part))?;
74            if n == 0 {
75                return Err("tab size cannot be 0".to_string());
76            }
77            let last = stops.last().copied().unwrap_or(0);
78            let mut pos = last + n;
79            while pos < 10000 {
80                stops.push(pos);
81                pos += n;
82            }
83            continue;
84        }
85        match part.parse::<usize>() {
86            Ok(n) => {
87                if !stops.is_empty() && n <= *stops.last().unwrap() {
88                    return Err("tab sizes must be ascending".to_string());
89                }
90                stops.push(n);
91            }
92            Err(_) => return Err(format!("'{}' is not a valid number", part)),
93        }
94    }
95
96    if stops.is_empty() {
97        return Err("tab specification is empty".to_string());
98    }
99
100    if stops.len() == 1 {
101        return Ok(TabStops::Regular(stops[0]));
102    }
103
104    Ok(TabStops::List(stops))
105}
106
107// Pre-computed spaces buffer for fast tab expansion (avoids per-tab allocation)
108// 4KB buffer covers even very large tab stops in a single memcpy
109const SPACES: [u8; 4096] = [b' '; 4096];
110
111/// Write N spaces to a Vec efficiently using pre-computed buffer.
112#[inline]
113fn push_spaces(output: &mut Vec<u8>, n: usize) {
114    let mut remaining = n;
115    while remaining > 0 {
116        let chunk = remaining.min(SPACES.len());
117        output.extend_from_slice(&SPACES[..chunk]);
118        remaining -= chunk;
119    }
120}
121
122/// Write N spaces to a Write stream using pre-computed buffer.
123#[inline]
124fn write_spaces(out: &mut impl Write, n: usize) -> std::io::Result<()> {
125    let mut remaining = n;
126    while remaining > 0 {
127        let chunk = remaining.min(SPACES.len());
128        out.write_all(&SPACES[..chunk])?;
129        remaining -= chunk;
130    }
131    Ok(())
132}
133
134/// Expand tabs to spaces using SIMD scanning.
135/// Dispatches to the optimal path based on tab configuration and data content.
136pub fn expand_bytes(
137    data: &[u8],
138    tabs: &TabStops,
139    initial_only: bool,
140    out: &mut impl Write,
141) -> std::io::Result<()> {
142    if data.is_empty() {
143        return Ok(());
144    }
145
146    // For regular tab stops, use fast SIMD paths.
147    // We combine the no-tabs and no-backspace checks to minimize full-data scans.
148    if let TabStops::Regular(tab_size) = tabs {
149        if initial_only {
150            // --initial mode: check for tabs first (cheap) to skip processing
151            if memchr::memchr(b'\t', data).is_none() {
152                return out.write_all(data);
153            }
154            return expand_initial_fast(data, *tab_size, out);
155        }
156
157        // Fast path: no tabs → write-through (avoids backspace scan too)
158        if memchr::memchr(b'\t', data).is_none() {
159            return out.write_all(data);
160        }
161
162        // Check for backspaces. If none, use the fast SIMD path.
163        if memchr::memchr(b'\x08', data).is_none() {
164            return expand_regular_fast(data, *tab_size, out);
165        }
166
167        // Has backspaces → fall through to generic
168        return expand_generic(data, tabs, initial_only, true, out);
169    }
170
171    // Tab list path: check for tabs first
172    if memchr::memchr(b'\t', data).is_none() {
173        return out.write_all(data);
174    }
175    let has_backspace = memchr::memchr(b'\x08', data).is_some();
176    expand_generic(data, tabs, initial_only, has_backspace, out)
177}
178
179/// Parallel threshold: files above this size use multi-threaded expand.
180/// Below this, single-threaded is faster (avoids thread pool overhead).
181const PARALLEL_THRESHOLD: usize = 4 * 1024 * 1024; // 4MB
182
183/// Fast expand for regular tab stops without -i flag.
184/// For large files (>4MB), uses rayon to process chunks in parallel.
185/// For smaller files, uses single-threaded SIMD processing.
186fn expand_regular_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
187    debug_assert!(tab_size > 0, "tab_size must be > 0");
188
189    if data.len() >= PARALLEL_THRESHOLD {
190        expand_regular_parallel(data, tab_size, out)
191    } else {
192        expand_regular_single(data, tab_size, out)
193    }
194}
195
196/// Single-threaded expand: SIMD memchr2_iter scan of entire data.
197fn expand_regular_single(
198    data: &[u8],
199    tab_size: usize,
200    out: &mut impl Write,
201) -> std::io::Result<()> {
202    let is_pow2 = tab_size.is_power_of_two();
203    let mask = tab_size.wrapping_sub(1);
204
205    const INPUT_CHUNK: usize = 256 * 1024;
206    let worst_output = INPUT_CHUNK * tab_size + tab_size;
207    let mut output: Vec<u8> = Vec::with_capacity(worst_output);
208
209    let mut column: usize = 0;
210    let mut data_pos: usize = 0;
211
212    while data_pos < data.len() {
213        let chunk_end = if data_pos + INPUT_CHUNK >= data.len() {
214            data.len()
215        } else {
216            let search_end = data_pos + INPUT_CHUNK;
217            match memchr::memrchr(b'\n', &data[data_pos..search_end]) {
218                Some(off) => data_pos + off + 1,
219                None => search_end,
220            }
221        };
222
223        let chunk = &data[data_pos..chunk_end];
224        output.clear();
225
226        let out_ptr = output.as_mut_ptr();
227        let src = chunk.as_ptr();
228        let mut wp: usize = 0;
229        let mut pos: usize = 0;
230
231        for hit in memchr::memchr2_iter(b'\t', b'\n', chunk) {
232            let seg_len = hit - pos;
233            if seg_len > 0 {
234                unsafe {
235                    std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), seg_len);
236                }
237                wp += seg_len;
238                column += seg_len;
239            }
240
241            if unsafe { *src.add(hit) } == b'\n' {
242                unsafe {
243                    *out_ptr.add(wp) = b'\n';
244                }
245                wp += 1;
246                column = 0;
247            } else {
248                let rem = if is_pow2 {
249                    column & mask
250                } else {
251                    column % tab_size
252                };
253                let spaces = tab_size - rem;
254                unsafe {
255                    std::ptr::write_bytes(out_ptr.add(wp), b' ', spaces);
256                }
257                wp += spaces;
258                column += spaces;
259            }
260
261            pos = hit + 1;
262        }
263
264        if pos < chunk.len() {
265            let tail = chunk.len() - pos;
266            unsafe {
267                std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), tail);
268            }
269            wp += tail;
270            column += tail;
271        }
272
273        unsafe {
274            output.set_len(wp);
275        }
276        if wp > 0 {
277            out.write_all(&output)?;
278        }
279
280        data_pos = chunk_end;
281    }
282
283    Ok(())
284}
285
286/// Expand a chunk of data (must start/end on newline boundaries, except possibly
287/// the last chunk). Column starts at 0 for each chunk since chunks are line-aligned.
288/// Returns the expanded output as a Vec<u8>.
289/// Pre-allocates worst-case output to eliminate all capacity checks in the hot loop.
290fn expand_chunk(chunk: &[u8], tab_size: usize, is_pow2: bool, mask: usize) -> Vec<u8> {
291    // Worst case: every byte is a tab → tab_size× expansion.
292    // For most real data the expansion ratio is ~1.5x, so this over-allocates,
293    // but it eliminates all capacity checks from the hot loop.
294    let worst_case = chunk.len() * tab_size;
295    let mut output: Vec<u8> = Vec::with_capacity(worst_case);
296
297    let out_ptr = output.as_mut_ptr();
298    let src = chunk.as_ptr();
299    let mut wp: usize = 0;
300    let mut column: usize = 0;
301    let mut pos: usize = 0;
302
303    // Inner loop: zero capacity checks, zero Vec::len() reads, zero set_len() calls.
304    // Just raw pointer arithmetic.
305    for hit in memchr::memchr2_iter(b'\t', b'\n', chunk) {
306        let seg_len = hit - pos;
307        if seg_len > 0 {
308            unsafe {
309                std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), seg_len);
310            }
311            wp += seg_len;
312            column += seg_len;
313        }
314
315        if unsafe { *src.add(hit) } == b'\n' {
316            unsafe {
317                *out_ptr.add(wp) = b'\n';
318            }
319            wp += 1;
320            column = 0;
321        } else {
322            let rem = if is_pow2 {
323                column & mask
324            } else {
325                column % tab_size
326            };
327            let spaces = tab_size - rem;
328            unsafe {
329                std::ptr::write_bytes(out_ptr.add(wp), b' ', spaces);
330            }
331            wp += spaces;
332            column += spaces;
333        }
334
335        pos = hit + 1;
336    }
337
338    // Copy tail
339    if pos < chunk.len() {
340        let tail = chunk.len() - pos;
341        unsafe {
342            std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), tail);
343        }
344        wp += tail;
345    }
346
347    unsafe {
348        output.set_len(wp);
349    }
350    output
351}
352
353/// Parallel expand for large files (>4MB). Splits data into line-aligned chunks
354/// and processes them concurrently using rayon. Each chunk is expanded independently
355/// (column tracking resets at newline boundaries), then results are written in order.
356fn expand_regular_parallel(
357    data: &[u8],
358    tab_size: usize,
359    out: &mut impl Write,
360) -> std::io::Result<()> {
361    use rayon::prelude::*;
362
363    let is_pow2 = tab_size.is_power_of_two();
364    let mask = tab_size.wrapping_sub(1);
365
366    // Split data into line-aligned chunks for parallel processing.
367    // Use ~N chunks where N = number of CPUs for optimal load balance.
368    let num_chunks = rayon::current_num_threads().max(2);
369    let target_chunk_size = data.len() / num_chunks;
370    let mut chunks: Vec<&[u8]> = Vec::with_capacity(num_chunks + 1);
371    let mut pos: usize = 0;
372
373    for _ in 0..num_chunks - 1 {
374        if pos >= data.len() {
375            break;
376        }
377        let target_end = (pos + target_chunk_size).min(data.len());
378        // Find the next newline at or after target_end
379        let chunk_end = if target_end >= data.len() {
380            data.len()
381        } else {
382            match memchr::memchr(b'\n', &data[target_end..]) {
383                Some(off) => target_end + off + 1,
384                None => data.len(),
385            }
386        };
387        chunks.push(&data[pos..chunk_end]);
388        pos = chunk_end;
389    }
390    // Last chunk gets everything remaining
391    if pos < data.len() {
392        chunks.push(&data[pos..]);
393    }
394
395    // Process all chunks in parallel
396    let results: Vec<Vec<u8>> = chunks
397        .par_iter()
398        .map(|chunk| expand_chunk(chunk, tab_size, is_pow2, mask))
399        .collect();
400
401    // Write results in order
402    for result in &results {
403        if !result.is_empty() {
404            out.write_all(result)?;
405        }
406    }
407
408    Ok(())
409}
410
411/// Fast expand for --initial mode with regular tab stops.
412/// Only expands tabs in the leading whitespace of each line, bulk-copying the rest.
413/// Uses memchr (SIMD) to find line boundaries. Leading-whitespace expansion is scalar.
414/// Handles backspace per-line: lines containing \x08 fall back to generic expand.
415fn expand_initial_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
416    debug_assert!(tab_size > 0, "tab_size must be > 0");
417    let tabs = TabStops::Regular(tab_size);
418    let mut pos: usize = 0;
419
420    while pos < data.len() {
421        // Find end of this line
422        let line_end = memchr::memchr(b'\n', &data[pos..])
423            .map(|off| pos + off + 1)
424            .unwrap_or(data.len());
425
426        let line = &data[pos..line_end];
427        debug_assert!(!line.is_empty());
428
429        // Fast skip: if line doesn't start with tab or space, write it whole
430        let first = line[0];
431        if first != b'\t' && first != b' ' {
432            out.write_all(line)?;
433            pos = line_end;
434            continue;
435        }
436
437        // If this line contains a backspace, fall back to generic for this line only
438        if memchr::memchr(b'\x08', line).is_some() {
439            expand_generic(line, &tabs, true, true, out)?;
440            pos = line_end;
441            continue;
442        }
443
444        // Expand only leading tabs/spaces in this line
445        let mut column: usize = 0;
446        let mut i = 0; // offset within line
447        while i < line.len() {
448            let byte = line[i];
449            if byte == b'\t' {
450                let spaces = tab_size - (column % tab_size);
451                write_spaces(out, spaces)?;
452                column += spaces;
453                i += 1;
454            } else if byte == b' ' {
455                // Batch consecutive spaces from source data
456                let space_start = i;
457                while i < line.len() && line[i] == b' ' {
458                    i += 1;
459                }
460                out.write_all(&line[space_start..i])?;
461                column += i - space_start;
462            } else {
463                // First non-blank: write the rest of the line unchanged
464                break;
465            }
466        }
467
468        // Write remainder of line as-is (zero-copy)
469        if i < line.len() {
470            out.write_all(&line[i..])?;
471        }
472
473        pos = line_end;
474    }
475
476    Ok(())
477}
478
479/// Generic expand with support for -i flag and tab lists.
480/// Uses memchr2 SIMD scanning when no backspaces are present.
481/// `has_backspace` hint avoids a redundant O(n) scan when the caller already knows.
482fn expand_generic(
483    data: &[u8],
484    tabs: &TabStops,
485    initial_only: bool,
486    has_backspace: bool,
487    out: &mut impl Write,
488) -> std::io::Result<()> {
489    const FLUSH_THRESHOLD: usize = 256 * 1024;
490    let cap = data.len().min(FLUSH_THRESHOLD) + data.len().min(FLUSH_THRESHOLD) / 8;
491    let mut output = Vec::with_capacity(cap);
492
493    // If no backspaces present and not initial-only, use SIMD bulk scanning
494    if !initial_only && !has_backspace {
495        let mut column: usize = 0;
496        let mut pos: usize = 0;
497
498        while pos < data.len() {
499            match memchr::memchr2(b'\t', b'\n', &data[pos..]) {
500                Some(offset) => {
501                    if offset > 0 {
502                        output.extend_from_slice(&data[pos..pos + offset]);
503                        column += offset;
504                    }
505                    let byte = data[pos + offset];
506                    pos += offset + 1;
507
508                    if byte == b'\n' {
509                        output.push(b'\n');
510                        column = 0;
511                    } else {
512                        let spaces = tabs.spaces_to_next(column);
513                        push_spaces(&mut output, spaces);
514                        column += spaces;
515                    }
516                    if output.len() >= FLUSH_THRESHOLD {
517                        out.write_all(&output)?;
518                        output.clear();
519                    }
520                }
521                None => {
522                    output.extend_from_slice(&data[pos..]);
523                    break;
524                }
525            }
526        }
527    } else {
528        // Byte-by-byte fallback for backspace handling or initial-only mode
529        let mut column: usize = 0;
530        let mut in_initial = true;
531
532        for &byte in data {
533            match byte {
534                b'\t' => {
535                    if initial_only && !in_initial {
536                        output.push(b'\t');
537                        column = tabs.next_tab_stop(column);
538                    } else {
539                        let spaces = tabs.spaces_to_next(column);
540                        push_spaces(&mut output, spaces);
541                        column += spaces;
542                    }
543                }
544                b'\n' => {
545                    output.push(b'\n');
546                    column = 0;
547                    in_initial = true;
548                    if output.len() >= FLUSH_THRESHOLD {
549                        out.write_all(&output)?;
550                        output.clear();
551                    }
552                }
553                b'\x08' => {
554                    output.push(b'\x08');
555                    if column > 0 {
556                        column -= 1;
557                    }
558                }
559                _ => {
560                    if initial_only && in_initial && byte != b' ' {
561                        in_initial = false;
562                    }
563                    output.push(byte);
564                    column += 1;
565                }
566            }
567        }
568    }
569
570    if !output.is_empty() {
571        out.write_all(&output)?;
572    }
573    Ok(())
574}
575
576/// Check if unexpand would produce output identical to input (passthrough case).
577/// Used by the binary to bypass BufWriter and write directly for maximum throughput.
578pub fn unexpand_is_passthrough(data: &[u8], tabs: &TabStops, all: bool) -> bool {
579    if data.is_empty() {
580        return true;
581    }
582    // No spaces or tabs at all → passthrough
583    if memchr::memchr2(b' ', b'\t', data).is_none() {
584        return true;
585    }
586    if let TabStops::Regular(_) = tabs {
587        if all {
588            memchr::memchr(b'\t', data).is_none() && memchr::memmem::find(data, b"  ").is_none()
589        } else {
590            !unexpand_default_needs_processing(data)
591        }
592    } else {
593        false
594    }
595}
596
597/// Unexpand spaces to tabs.
598/// If `all` is true, convert all sequences of spaces; otherwise only leading spaces.
599pub fn unexpand_bytes(
600    data: &[u8],
601    tabs: &TabStops,
602    all: bool,
603    out: &mut impl Write,
604) -> std::io::Result<()> {
605    if data.is_empty() {
606        return Ok(());
607    }
608
609    // Fast path: no spaces or tabs → just copy through
610    if memchr::memchr2(b' ', b'\t', data).is_none() {
611        return out.write_all(data);
612    }
613
614    // For regular tab stops, check passthrough BEFORE expensive backspace scan
615    if let TabStops::Regular(tab_size) = tabs {
616        if all {
617            // -a mode: if no tabs and no consecutive spaces, output = input
618            if memchr::memchr(b'\t', data).is_none() && memchr::memmem::find(data, b"  ").is_none()
619            {
620                return out.write_all(data);
621            }
622        } else {
623            // Default mode: only leading whitespace matters.
624            // If no line starts with space or tab, output = input.
625            if !unexpand_default_needs_processing(data) {
626                return out.write_all(data);
627            }
628        }
629
630        if memchr::memchr(b'\x08', data).is_none() {
631            return unexpand_regular_fast(data, *tab_size, all, out);
632        }
633    }
634
635    // Generic path for tab lists or data with backspaces
636    unexpand_generic(data, tabs, all, out)
637}
638
639/// Check if default-mode unexpand needs to process any data.
640/// Returns true if any line starts with a space or tab.
641/// Uses memmem for 2-byte pattern search — one SIMD pass per pattern,
642/// much faster than memchr_iter + per-match branching when no matches exist.
643#[inline]
644fn unexpand_default_needs_processing(data: &[u8]) -> bool {
645    if data[0] == b' ' || data[0] == b'\t' {
646        return true;
647    }
648    memchr::memmem::find(data, b"\n ").is_some() || memchr::memmem::find(data, b"\n\t").is_some()
649}
650
651/// Emit a run of blanks as the optimal combination of tabs and spaces.
652/// Matches GNU unexpand behavior: a single blank at a tab stop is only converted
653/// to a tab if more blanks follow, otherwise it stays as a space.
654#[inline]
655fn emit_blanks(
656    out: &mut impl Write,
657    start_col: usize,
658    count: usize,
659    tab_size: usize,
660) -> std::io::Result<()> {
661    if count == 0 {
662        return Ok(());
663    }
664    let end_col = start_col + count;
665    let mut col = start_col;
666
667    // Emit tabs for each tab stop we can reach
668    loop {
669        let next_tab = col + (tab_size - col % tab_size);
670        if next_tab > end_col {
671            break;
672        }
673        let blanks_consumed = next_tab - col;
674        if blanks_consumed >= 2 || next_tab < end_col {
675            // 2+ blanks to tab stop, OR 1 blank but more follow → emit tab
676            out.write_all(b"\t")?;
677            col = next_tab;
678        } else {
679            // 1 blank at tab stop with nothing after → keep as space
680            break;
681        }
682    }
683
684    // Emit remaining spaces
685    let remaining = end_col - col;
686    if remaining > 0 {
687        let mut r = remaining;
688        while r > 0 {
689            let chunk = r.min(SPACES.len());
690            out.write_all(&SPACES[..chunk])?;
691            r -= chunk;
692        }
693    }
694    Ok(())
695}
696
697/// Fast unexpand for regular tab stops without backspaces.
698/// Uses memchr SIMD scanning to skip non-special bytes in bulk.
699/// Passthrough checks are done by the caller (unexpand_bytes).
700fn unexpand_regular_fast(
701    data: &[u8],
702    tab_size: usize,
703    all: bool,
704    out: &mut impl Write,
705) -> std::io::Result<()> {
706    if all {
707        return unexpand_regular_fast_all(data, tab_size, out);
708    }
709
710    let mut column: usize = 0;
711    let mut pos: usize = 0;
712    let mut in_initial = true;
713
714    while pos < data.len() {
715        if in_initial {
716            // Check for blanks to convert
717            if data[pos] == b' ' || data[pos] == b'\t' {
718                // Count consecutive blanks, tracking column advancement
719                let blank_start_col = column;
720                while pos < data.len() && (data[pos] == b' ' || data[pos] == b'\t') {
721                    if data[pos] == b'\t' {
722                        column += tab_size - column % tab_size;
723                    } else {
724                        column += 1;
725                    }
726                    pos += 1;
727                }
728                // Emit blanks as optimal tabs+spaces
729                emit_blanks(out, blank_start_col, column - blank_start_col, tab_size)?;
730                continue;
731            }
732            if data[pos] == b'\n' {
733                out.write_all(b"\n")?;
734                column = 0;
735                in_initial = true;
736                pos += 1;
737                continue;
738            }
739            // Non-blank: fall through to body mode below.
740        }
741
742        // Body of line: bulk copy until newline (default mode)
743        match memchr::memchr(b'\n', &data[pos..]) {
744            Some(offset) => {
745                out.write_all(&data[pos..pos + offset + 1])?;
746                column = 0;
747                in_initial = true;
748                pos += offset + 1;
749            }
750            None => {
751                out.write_all(&data[pos..])?;
752                return Ok(());
753            }
754        }
755    }
756
757    Ok(())
758}
759
760/// Fast unexpand -a for regular tab stops without backspaces.
761/// Per-line processing with SIMD scanning for blank runs.
762/// Passthrough checks are done by the caller (unexpand_bytes).
763fn unexpand_regular_fast_all(
764    data: &[u8],
765    tab_size: usize,
766    out: &mut impl Write,
767) -> std::io::Result<()> {
768    let mut output: Vec<u8> = Vec::with_capacity(data.len());
769    let mut pos: usize = 0;
770
771    for nl_pos in memchr::memchr_iter(b'\n', data) {
772        let line = &data[pos..nl_pos];
773        // Per-line fast check: no tabs and no double-spaces → copy through
774        if memchr::memchr(b'\t', line).is_none() && memchr::memmem::find(line, b"  ").is_none() {
775            output.extend_from_slice(line);
776        } else {
777            unexpand_line_all_fast(line, tab_size, &mut output);
778        }
779        output.push(b'\n');
780
781        if output.len() >= 1024 * 1024 {
782            out.write_all(&output)?;
783            output.clear();
784        }
785        pos = nl_pos + 1;
786    }
787
788    // Handle final line without trailing newline
789    if pos < data.len() {
790        let line = &data[pos..];
791        if memchr::memchr(b'\t', line).is_none() && memchr::memmem::find(line, b"  ").is_none() {
792            output.extend_from_slice(line);
793        } else {
794            unexpand_line_all_fast(line, tab_size, &mut output);
795        }
796    }
797
798    if !output.is_empty() {
799        out.write_all(&output)?;
800    }
801    Ok(())
802}
803
804/// Process a single line for unexpand -a with SIMD-accelerated blank detection.
805/// Uses memchr2 to skip non-blank bytes in bulk, only processing blank runs.
806/// Single spaces (not followed by another blank) are included in the copy path.
807#[inline]
808fn unexpand_line_all_fast(line: &[u8], tab_size: usize, output: &mut Vec<u8>) {
809    let mut column: usize = 0;
810    let mut pos: usize = 0;
811
812    loop {
813        // Find next tab or convertible blank run using SIMD scan.
814        // Skip single spaces (most common) by continuing past them.
815        let blank_pos = {
816            let mut search = pos;
817            loop {
818                match memchr::memchr2(b' ', b'\t', &line[search..]) {
819                    Some(off) => {
820                        let abs = search + off;
821                        if line[abs] == b'\t' {
822                            break Some(abs);
823                        }
824                        // Space: check if followed by another blank
825                        if abs + 1 < line.len() && (line[abs + 1] == b' ' || line[abs + 1] == b'\t')
826                        {
827                            break Some(abs);
828                        }
829                        // Single space: skip and keep searching
830                        search = abs + 1;
831                    }
832                    None => break None,
833                }
834            }
835        };
836
837        match blank_pos {
838            Some(bp) => {
839                // Copy non-blank prefix (including single spaces)
840                if bp > pos {
841                    output.extend_from_slice(&line[pos..bp]);
842                    column += bp - pos;
843                }
844
845                // Process blank run
846                let blank_start_col = column;
847                let blank_start = bp;
848                pos = bp;
849                while pos < line.len() && (line[pos] == b' ' || line[pos] == b'\t') {
850                    if line[pos] == b'\t' {
851                        column += tab_size - column % tab_size;
852                    } else {
853                        column += 1;
854                    }
855                    pos += 1;
856                }
857                emit_blank_run_vec(output, &line[blank_start..pos], blank_start_col, tab_size);
858            }
859            None => {
860                // Rest of line has no convertible blanks
861                if pos < line.len() {
862                    output.extend_from_slice(&line[pos..]);
863                }
864                break;
865            }
866        }
867    }
868}
869
870/// Emit a blank run character-by-character, matching GNU unexpand behavior:
871/// - Spaces are accumulated and converted to tabs at tab stops (but a single
872///   space at a tab stop stays as space unless more blanks follow).
873/// - Input tabs are always emitted as tabs. Pending spaces before a tab are
874///   merged into the tab's column range and emitted as optimal tabs.
875#[inline(always)]
876fn emit_blank_run_vec(output: &mut Vec<u8>, blanks: &[u8], start_col: usize, tab_size: usize) {
877    let mut col = start_col;
878    let mut pending_spaces: usize = 0;
879    let mut pending_start_col = start_col;
880
881    for (idx, &b) in blanks.iter().enumerate() {
882        if b == b'\t' {
883            let new_col = col + (tab_size - col % tab_size);
884
885            if pending_spaces > 0 {
886                // Merge pending spaces with this tab: emit tabs for all tab stops
887                // in the range [pending_start_col .. new_col]
888                let mut tc = pending_start_col;
889                loop {
890                    let next_ts = tc + (tab_size - tc % tab_size);
891                    if next_ts > new_col {
892                        break;
893                    }
894                    output.push(b'\t');
895                    tc = next_ts;
896                }
897                // Any remaining gap (shouldn't happen for well-formed tab runs)
898                let gap = new_col - tc;
899                if gap > 0 {
900                    let len = output.len();
901                    output.resize(len + gap, b' ');
902                }
903                pending_spaces = 0;
904            } else {
905                // No pending spaces: emit the input tab directly
906                output.push(b'\t');
907            }
908
909            col = new_col;
910            pending_start_col = col;
911        } else {
912            // Space
913            if pending_spaces == 0 {
914                pending_start_col = col;
915            }
916            pending_spaces += 1;
917            col += 1;
918            // Check if we've reached a tab stop
919            if col.is_multiple_of(tab_size) {
920                let more_follow = idx + 1 < blanks.len();
921                if pending_spaces >= 2 || more_follow {
922                    output.push(b'\t');
923                    pending_spaces = 0;
924                    pending_start_col = col;
925                }
926                // else: single space at tab stop with nothing after → keep as space
927            }
928        }
929    }
930
931    // Flush remaining pending spaces as literal spaces
932    if pending_spaces > 0 {
933        let len = output.len();
934        output.resize(len + pending_spaces, b' ');
935    }
936}
937
938/// Generic unexpand with support for tab lists and backspaces.
939fn unexpand_generic(
940    data: &[u8],
941    tabs: &TabStops,
942    all: bool,
943    out: &mut impl Write,
944) -> std::io::Result<()> {
945    let tab_size = match tabs {
946        TabStops::Regular(n) => *n,
947        TabStops::List(_) => 0, // handled by is_tab_stop/next_tab_stop
948    };
949    let mut column: usize = 0;
950    let mut space_start_col: Option<usize> = None;
951    let mut in_initial = true;
952
953    for &byte in data {
954        match byte {
955            b' ' => {
956                if !all && !in_initial {
957                    out.write_all(b" ")?;
958                    column += 1;
959                } else {
960                    if space_start_col.is_none() {
961                        space_start_col = Some(column);
962                    }
963                    column += 1;
964                    // Don't convert to tab here — wait for end of blank run
965                }
966            }
967            b'\t' => {
968                if !all && !in_initial {
969                    // In non-converting mode, just emit the tab
970                    if let Some(start_col) = space_start_col.take() {
971                        let n = column - start_col;
972                        out.write_all(&SPACES[..n.min(SPACES.len())])?;
973                    }
974                    out.write_all(b"\t")?;
975                    column = tabs.next_tab_stop(column);
976                } else {
977                    if space_start_col.is_none() {
978                        space_start_col = Some(column);
979                    }
980                    column = tabs.next_tab_stop(column);
981                }
982            }
983            _ => {
984                // Flush pending blanks
985                if let Some(start_col) = space_start_col.take() {
986                    let count = column - start_col;
987                    if tab_size > 0 {
988                        emit_blanks(out, start_col, count, tab_size)?;
989                    } else {
990                        // Tab list: use is_tab_stop for conversion
991                        emit_blanks_tablist(out, start_col, count, tabs)?;
992                    }
993                }
994
995                if byte == b'\n' {
996                    out.write_all(b"\n")?;
997                    column = 0;
998                    in_initial = true;
999                } else if byte == b'\x08' {
1000                    out.write_all(b"\x08")?;
1001                    if column > 0 {
1002                        column -= 1;
1003                    }
1004                } else {
1005                    if in_initial {
1006                        in_initial = false;
1007                    }
1008                    out.write_all(&[byte])?;
1009                    column += 1;
1010                }
1011            }
1012        }
1013    }
1014
1015    if let Some(start_col) = space_start_col {
1016        let count = column - start_col;
1017        if tab_size > 0 {
1018            emit_blanks(out, start_col, count, tab_size)?;
1019        } else {
1020            emit_blanks_tablist(out, start_col, count, tabs)?;
1021        }
1022    }
1023
1024    Ok(())
1025}
1026
1027/// Emit blanks using a tab list (non-regular tab stops).
1028/// After the last defined tab stop, only spaces are emitted (no more tabs).
1029fn emit_blanks_tablist(
1030    out: &mut impl Write,
1031    start_col: usize,
1032    count: usize,
1033    tabs: &TabStops,
1034) -> std::io::Result<()> {
1035    if count == 0 {
1036        return Ok(());
1037    }
1038    let end_col = start_col + count;
1039    let mut col = start_col;
1040
1041    // Get the last defined tab stop to know when to stop converting to tabs
1042    let last_stop = match tabs {
1043        TabStops::List(stops) => stops.last().copied().unwrap_or(0),
1044        TabStops::Regular(_) => usize::MAX,
1045    };
1046
1047    while col < last_stop {
1048        let next_tab = tabs.next_tab_stop(col);
1049        if next_tab > end_col || next_tab > last_stop {
1050            break;
1051        }
1052        let blanks_consumed = next_tab - col;
1053        if blanks_consumed >= 2 || next_tab < end_col {
1054            out.write_all(b"\t")?;
1055            col = next_tab;
1056        } else {
1057            break;
1058        }
1059    }
1060
1061    let remaining = end_col - col;
1062    if remaining > 0 {
1063        let mut r = remaining;
1064        while r > 0 {
1065            let chunk = r.min(SPACES.len());
1066            out.write_all(&SPACES[..chunk])?;
1067            r -= chunk;
1068        }
1069    }
1070    Ok(())
1071}