Skip to main content

coreutils_rs/expand/
core.rs

1use std::io::Write;
2
3/// Tab stop specification
4#[derive(Clone, Debug)]
5pub enum TabStops {
6    /// Regular interval (default 8)
7    Regular(usize),
8    /// Explicit list of tab stop positions (0-indexed columns)
9    List(Vec<usize>),
10}
11
12impl TabStops {
13    /// Calculate the number of spaces to the next tab stop from the given column.
14    #[inline]
15    fn spaces_to_next(&self, column: usize) -> usize {
16        match self {
17            TabStops::Regular(n) => {
18                if *n == 0 {
19                    return 0;
20                }
21                *n - (column % *n)
22            }
23            TabStops::List(stops) => {
24                // Find the first tab stop > current column
25                match stops.binary_search(&(column + 1)) {
26                    Ok(idx) => stops[idx] - column,
27                    Err(idx) => {
28                        if idx < stops.len() {
29                            stops[idx] - column
30                        } else {
31                            // Past all tab stops: GNU uses 1 space
32                            1
33                        }
34                    }
35                }
36            }
37        }
38    }
39
40    /// Get the next tab stop position after the given column.
41    #[inline]
42    fn next_tab_stop(&self, column: usize) -> usize {
43        column + self.spaces_to_next(column)
44    }
45}
46
47/// Parse a tab specification string (e.g., "4", "4,8,12", "4 8 12").
48pub fn parse_tab_stops(spec: &str) -> Result<TabStops, String> {
49    let spec = spec.trim();
50    if spec.is_empty() {
51        return Ok(TabStops::Regular(8));
52    }
53
54    // Check if it's a single number (regular interval)
55    if let Ok(n) = spec.parse::<usize>() {
56        if n == 0 {
57            return Err("tab size cannot be 0".to_string());
58        }
59        return Ok(TabStops::Regular(n));
60    }
61
62    // Parse as comma or space-separated list
63    let mut stops: Vec<usize> = Vec::new();
64    for part in spec.split([',', ' ']) {
65        let part = part.trim();
66        if part.is_empty() {
67            continue;
68        }
69        // Handle / prefix for repeating tab stops
70        if let Some(rest) = part.strip_prefix('/') {
71            let n: usize = rest
72                .parse()
73                .map_err(|_| format!("'{}' is not a valid number", part))?;
74            if n == 0 {
75                return Err("tab size cannot be 0".to_string());
76            }
77            let last = stops.last().copied().unwrap_or(0);
78            let mut pos = last + n;
79            while pos < 10000 {
80                stops.push(pos);
81                pos += n;
82            }
83            continue;
84        }
85        match part.parse::<usize>() {
86            Ok(n) => {
87                if !stops.is_empty() && n <= *stops.last().unwrap() {
88                    return Err("tab sizes must be ascending".to_string());
89                }
90                stops.push(n);
91            }
92            Err(_) => return Err(format!("'{}' is not a valid number", part)),
93        }
94    }
95
96    if stops.is_empty() {
97        return Err("tab specification is empty".to_string());
98    }
99
100    if stops.len() == 1 {
101        return Ok(TabStops::Regular(stops[0]));
102    }
103
104    Ok(TabStops::List(stops))
105}
106
107// Pre-computed spaces buffer for fast tab expansion (avoids per-tab allocation)
108// 4KB buffer covers even very large tab stops in a single memcpy
109const SPACES: [u8; 4096] = [b' '; 4096];
110
111/// Write N spaces to a Vec efficiently using pre-computed buffer.
112#[inline]
113fn push_spaces(output: &mut Vec<u8>, n: usize) {
114    let mut remaining = n;
115    while remaining > 0 {
116        let chunk = remaining.min(SPACES.len());
117        output.extend_from_slice(&SPACES[..chunk]);
118        remaining -= chunk;
119    }
120}
121
122/// Write N spaces to a Write stream using pre-computed buffer.
123#[inline]
124fn write_spaces(out: &mut impl Write, n: usize) -> std::io::Result<()> {
125    let mut remaining = n;
126    while remaining > 0 {
127        let chunk = remaining.min(SPACES.len());
128        out.write_all(&SPACES[..chunk])?;
129        remaining -= chunk;
130    }
131    Ok(())
132}
133
134/// Expand tabs to spaces using SIMD scanning.
135/// Dispatches to the optimal path based on tab configuration and data content.
136pub fn expand_bytes(
137    data: &[u8],
138    tabs: &TabStops,
139    initial_only: bool,
140    out: &mut impl Write,
141) -> std::io::Result<()> {
142    if data.is_empty() {
143        return Ok(());
144    }
145
146    // For regular tab stops, use fast SIMD paths.
147    // We combine the no-tabs and no-backspace checks to minimize full-data scans.
148    if let TabStops::Regular(tab_size) = tabs {
149        if initial_only {
150            // --initial mode: check for tabs first (cheap) to skip processing
151            if memchr::memchr(b'\t', data).is_none() {
152                return out.write_all(data);
153            }
154            return expand_initial_fast(data, *tab_size, out);
155        }
156
157        // Fast path: no tabs → write-through (avoids backspace scan too)
158        if memchr::memchr(b'\t', data).is_none() {
159            return out.write_all(data);
160        }
161
162        // Check for backspaces. If none, use the fast SIMD path.
163        if memchr::memchr(b'\x08', data).is_none() {
164            return expand_regular_fast(data, *tab_size, out);
165        }
166
167        // Has backspaces → fall through to generic
168        return expand_generic(data, tabs, initial_only, true, out);
169    }
170
171    // Tab list path: check for tabs first
172    if memchr::memchr(b'\t', data).is_none() {
173        return out.write_all(data);
174    }
175    let has_backspace = memchr::memchr(b'\x08', data).is_some();
176    expand_generic(data, tabs, initial_only, has_backspace, out)
177}
178
179/// Parallel threshold: files above this size use multi-threaded expand.
180/// Below this, single-threaded is faster (avoids thread pool overhead).
181const PARALLEL_THRESHOLD: usize = 4 * 1024 * 1024; // 4MB
182
183/// Fast expand for regular tab stops without -i flag.
184/// For large files (>4MB), uses rayon to process chunks in parallel.
185/// For smaller files, uses single-threaded SIMD processing.
186fn expand_regular_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
187    debug_assert!(tab_size > 0, "tab_size must be > 0");
188
189    if data.len() >= PARALLEL_THRESHOLD {
190        expand_regular_parallel(data, tab_size, out)
191    } else {
192        expand_regular_single(data, tab_size, out)
193    }
194}
195
196/// Single-threaded expand: one-pass SIMD memchr2_iter scan with periodic flush.
197/// Processes the entire file in a single memchr2_iter pass (no chunk boundary
198/// logic) while using a modest output buffer that's flushed periodically to
199/// maintain a small page-fault footprint.
200fn expand_regular_single(
201    data: &[u8],
202    tab_size: usize,
203    out: &mut impl Write,
204) -> std::io::Result<()> {
205    let is_pow2 = tab_size.is_power_of_two();
206    let mask = tab_size.wrapping_sub(1);
207
208    // Output buffer: 2MB capacity with 64KB headroom for in-progress writes.
209    const BUF_CAP: usize = 2 * 1024 * 1024;
210    const FLUSH_AT: usize = BUF_CAP - 64 * 1024;
211    let mut output: Vec<u8> = Vec::with_capacity(BUF_CAP);
212
213    let src = data.as_ptr();
214    let mut column: usize = 0;
215    let mut pos: usize = 0;
216    let mut wp: usize = 0;
217
218    for hit in memchr::memchr2_iter(b'\t', b'\n', data) {
219        let seg_len = hit - pos;
220
221        // Flush if segment + max expansion would exceed buffer.
222        if wp + seg_len + tab_size > BUF_CAP {
223            unsafe {
224                output.set_len(wp);
225            }
226            out.write_all(&output)?;
227            output.clear();
228            wp = 0;
229            // If the segment alone exceeds BUF_CAP, write it directly to output
230            // instead of copying into the intermediate buffer.
231            if seg_len > BUF_CAP {
232                out.write_all(&data[pos..pos + seg_len])?;
233                column += seg_len;
234                pos = hit + 1;
235                // Still need to handle the tab/newline at `hit`
236                let out_ptr = output.as_mut_ptr();
237                if unsafe { *src.add(hit) } == b'\n' {
238                    unsafe {
239                        *out_ptr.add(wp) = b'\n';
240                    }
241                    wp += 1;
242                    column = 0;
243                } else {
244                    let rem = if is_pow2 {
245                        column & mask
246                    } else {
247                        column % tab_size
248                    };
249                    let spaces = tab_size - rem;
250                    unsafe {
251                        std::ptr::write_bytes(out_ptr.add(wp), b' ', spaces);
252                    }
253                    wp += spaces;
254                    column += spaces;
255                }
256                continue;
257            }
258        }
259
260        let out_ptr = output.as_mut_ptr();
261
262        if seg_len > 0 {
263            unsafe {
264                std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), seg_len);
265            }
266            wp += seg_len;
267            column += seg_len;
268        }
269
270        if unsafe { *src.add(hit) } == b'\n' {
271            unsafe {
272                *out_ptr.add(wp) = b'\n';
273            }
274            wp += 1;
275            column = 0;
276        } else {
277            let rem = if is_pow2 {
278                column & mask
279            } else {
280                column % tab_size
281            };
282            let spaces = tab_size - rem;
283            unsafe {
284                std::ptr::write_bytes(out_ptr.add(wp), b' ', spaces);
285            }
286            wp += spaces;
287            column += spaces;
288        }
289
290        pos = hit + 1;
291
292        if wp >= FLUSH_AT {
293            unsafe {
294                output.set_len(wp);
295            }
296            out.write_all(&output)?;
297            output.clear();
298            wp = 0;
299        }
300    }
301
302    if pos < data.len() {
303        let tail = data.len() - pos;
304        if wp + tail > BUF_CAP {
305            unsafe {
306                output.set_len(wp);
307            }
308            out.write_all(&output)?;
309            output.clear();
310            wp = 0;
311            // If the tail alone exceeds BUF_CAP, write it directly.
312            if tail > BUF_CAP {
313                out.write_all(&data[pos..])?;
314                return Ok(());
315            }
316        }
317        unsafe {
318            std::ptr::copy_nonoverlapping(src.add(pos), output.as_mut_ptr().add(wp), tail);
319        }
320        wp += tail;
321    }
322
323    if wp > 0 {
324        unsafe {
325            output.set_len(wp);
326        }
327        out.write_all(&output)?;
328    }
329
330    Ok(())
331}
332
333/// Expand a chunk of data (must start/end on newline boundaries, except possibly
334/// the last chunk). Column starts at 0 for each chunk since chunks are line-aligned.
335/// Returns the expanded output as a Vec<u8>.
336/// Pre-allocates worst-case output to eliminate all capacity checks in the hot loop.
337fn expand_chunk(chunk: &[u8], tab_size: usize, is_pow2: bool, mask: usize) -> Vec<u8> {
338    // Worst case: every byte is a tab → tab_size× expansion.
339    // For most real data the expansion ratio is ~1.5x, so this over-allocates,
340    // but it eliminates all capacity checks from the hot loop.
341    let worst_case = chunk.len() * tab_size;
342    let mut output: Vec<u8> = Vec::with_capacity(worst_case);
343
344    let out_ptr = output.as_mut_ptr();
345    let src = chunk.as_ptr();
346    let mut wp: usize = 0;
347    let mut column: usize = 0;
348    let mut pos: usize = 0;
349
350    // Inner loop: zero capacity checks, zero Vec::len() reads, zero set_len() calls.
351    // Just raw pointer arithmetic.
352    for hit in memchr::memchr2_iter(b'\t', b'\n', chunk) {
353        let seg_len = hit - pos;
354        if seg_len > 0 {
355            unsafe {
356                std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), seg_len);
357            }
358            wp += seg_len;
359            column += seg_len;
360        }
361
362        if unsafe { *src.add(hit) } == b'\n' {
363            unsafe {
364                *out_ptr.add(wp) = b'\n';
365            }
366            wp += 1;
367            column = 0;
368        } else {
369            let rem = if is_pow2 {
370                column & mask
371            } else {
372                column % tab_size
373            };
374            let spaces = tab_size - rem;
375            unsafe {
376                std::ptr::write_bytes(out_ptr.add(wp), b' ', spaces);
377            }
378            wp += spaces;
379            column += spaces;
380        }
381
382        pos = hit + 1;
383    }
384
385    // Copy tail
386    if pos < chunk.len() {
387        let tail = chunk.len() - pos;
388        unsafe {
389            std::ptr::copy_nonoverlapping(src.add(pos), out_ptr.add(wp), tail);
390        }
391        wp += tail;
392    }
393
394    unsafe {
395        output.set_len(wp);
396    }
397    output
398}
399
400/// Parallel expand for large files (>4MB). Splits data into line-aligned chunks
401/// and processes them concurrently using rayon. Each chunk is expanded independently
402/// (column tracking resets at newline boundaries), then results are written in order.
403fn expand_regular_parallel(
404    data: &[u8],
405    tab_size: usize,
406    out: &mut impl Write,
407) -> std::io::Result<()> {
408    use rayon::prelude::*;
409
410    let is_pow2 = tab_size.is_power_of_two();
411    let mask = tab_size.wrapping_sub(1);
412
413    // Split data into line-aligned chunks for parallel processing.
414    // Use ~N chunks where N = number of CPUs for optimal load balance.
415    let num_chunks = rayon::current_num_threads().max(2);
416    let target_chunk_size = data.len() / num_chunks;
417    let mut chunks: Vec<&[u8]> = Vec::with_capacity(num_chunks + 1);
418    let mut pos: usize = 0;
419
420    for _ in 0..num_chunks - 1 {
421        if pos >= data.len() {
422            break;
423        }
424        let target_end = (pos + target_chunk_size).min(data.len());
425        // Find the next newline at or after target_end
426        let chunk_end = if target_end >= data.len() {
427            data.len()
428        } else {
429            match memchr::memchr(b'\n', &data[target_end..]) {
430                Some(off) => target_end + off + 1,
431                None => data.len(),
432            }
433        };
434        chunks.push(&data[pos..chunk_end]);
435        pos = chunk_end;
436    }
437    // Last chunk gets everything remaining
438    if pos < data.len() {
439        chunks.push(&data[pos..]);
440    }
441
442    // Process all chunks in parallel
443    let results: Vec<Vec<u8>> = chunks
444        .par_iter()
445        .map(|chunk| expand_chunk(chunk, tab_size, is_pow2, mask))
446        .collect();
447
448    // Write results in order
449    for result in &results {
450        if !result.is_empty() {
451            out.write_all(result)?;
452        }
453    }
454
455    Ok(())
456}
457
458/// Fast expand for --initial mode with regular tab stops.
459/// Only expands tabs in the leading whitespace of each line, bulk-copying the rest.
460/// Uses memchr (SIMD) to find line boundaries. Leading-whitespace expansion is scalar.
461/// Handles backspace per-line: lines containing \x08 fall back to generic expand.
462fn expand_initial_fast(data: &[u8], tab_size: usize, out: &mut impl Write) -> std::io::Result<()> {
463    debug_assert!(tab_size > 0, "tab_size must be > 0");
464    let tabs = TabStops::Regular(tab_size);
465    let mut pos: usize = 0;
466
467    while pos < data.len() {
468        // Find end of this line
469        let line_end = memchr::memchr(b'\n', &data[pos..])
470            .map(|off| pos + off + 1)
471            .unwrap_or(data.len());
472
473        let line = &data[pos..line_end];
474        debug_assert!(!line.is_empty());
475
476        // Fast skip: if line doesn't start with tab or space, write it whole
477        let first = line[0];
478        if first != b'\t' && first != b' ' {
479            out.write_all(line)?;
480            pos = line_end;
481            continue;
482        }
483
484        // If this line contains a backspace, fall back to generic for this line only
485        if memchr::memchr(b'\x08', line).is_some() {
486            expand_generic(line, &tabs, true, true, out)?;
487            pos = line_end;
488            continue;
489        }
490
491        // Expand only leading tabs/spaces in this line
492        let mut column: usize = 0;
493        let mut i = 0; // offset within line
494        while i < line.len() {
495            let byte = line[i];
496            if byte == b'\t' {
497                let spaces = tab_size - (column % tab_size);
498                write_spaces(out, spaces)?;
499                column += spaces;
500                i += 1;
501            } else if byte == b' ' {
502                // Batch consecutive spaces from source data
503                let space_start = i;
504                while i < line.len() && line[i] == b' ' {
505                    i += 1;
506                }
507                out.write_all(&line[space_start..i])?;
508                column += i - space_start;
509            } else {
510                // First non-blank: write the rest of the line unchanged
511                break;
512            }
513        }
514
515        // Write remainder of line as-is (zero-copy)
516        if i < line.len() {
517            out.write_all(&line[i..])?;
518        }
519
520        pos = line_end;
521    }
522
523    Ok(())
524}
525
526/// Generic expand with support for -i flag and tab lists.
527/// Uses memchr2 SIMD scanning when no backspaces are present.
528/// `has_backspace` hint avoids a redundant O(n) scan when the caller already knows.
529fn expand_generic(
530    data: &[u8],
531    tabs: &TabStops,
532    initial_only: bool,
533    has_backspace: bool,
534    out: &mut impl Write,
535) -> std::io::Result<()> {
536    const FLUSH_THRESHOLD: usize = 256 * 1024;
537    let cap = data.len().min(FLUSH_THRESHOLD) + data.len().min(FLUSH_THRESHOLD) / 8;
538    let mut output = Vec::with_capacity(cap);
539
540    // If no backspaces present and not initial-only, use SIMD bulk scanning
541    if !initial_only && !has_backspace {
542        let mut column: usize = 0;
543        let mut pos: usize = 0;
544
545        while pos < data.len() {
546            match memchr::memchr2(b'\t', b'\n', &data[pos..]) {
547                Some(offset) => {
548                    if offset > 0 {
549                        output.extend_from_slice(&data[pos..pos + offset]);
550                        column += offset;
551                    }
552                    let byte = data[pos + offset];
553                    pos += offset + 1;
554
555                    if byte == b'\n' {
556                        output.push(b'\n');
557                        column = 0;
558                    } else {
559                        let spaces = tabs.spaces_to_next(column);
560                        push_spaces(&mut output, spaces);
561                        column += spaces;
562                    }
563                    if output.len() >= FLUSH_THRESHOLD {
564                        out.write_all(&output)?;
565                        output.clear();
566                    }
567                }
568                None => {
569                    output.extend_from_slice(&data[pos..]);
570                    break;
571                }
572            }
573        }
574    } else {
575        // Byte-by-byte fallback for backspace handling or initial-only mode
576        let mut column: usize = 0;
577        let mut in_initial = true;
578
579        for &byte in data {
580            match byte {
581                b'\t' => {
582                    if initial_only && !in_initial {
583                        output.push(b'\t');
584                        column = tabs.next_tab_stop(column);
585                    } else {
586                        let spaces = tabs.spaces_to_next(column);
587                        push_spaces(&mut output, spaces);
588                        column += spaces;
589                    }
590                }
591                b'\n' => {
592                    output.push(b'\n');
593                    column = 0;
594                    in_initial = true;
595                    if output.len() >= FLUSH_THRESHOLD {
596                        out.write_all(&output)?;
597                        output.clear();
598                    }
599                }
600                b'\x08' => {
601                    output.push(b'\x08');
602                    if column > 0 {
603                        column -= 1;
604                    }
605                }
606                _ => {
607                    if initial_only && in_initial && byte != b' ' {
608                        in_initial = false;
609                    }
610                    output.push(byte);
611                    column += 1;
612                }
613            }
614        }
615    }
616
617    if !output.is_empty() {
618        out.write_all(&output)?;
619    }
620    Ok(())
621}
622
623/// Check if unexpand would produce output identical to input (passthrough case).
624/// Used by the binary to bypass BufWriter and write directly for maximum throughput.
625pub fn unexpand_is_passthrough(data: &[u8], tabs: &TabStops, all: bool) -> bool {
626    if data.is_empty() {
627        return true;
628    }
629    // No spaces or tabs at all → passthrough
630    if memchr::memchr2(b' ', b'\t', data).is_none() {
631        return true;
632    }
633    if let TabStops::Regular(_) = tabs {
634        if all {
635            memchr::memchr(b'\t', data).is_none() && memchr::memmem::find(data, b"  ").is_none()
636        } else {
637            !unexpand_default_needs_processing(data)
638        }
639    } else {
640        false
641    }
642}
643
644/// Unexpand spaces to tabs.
645/// If `all` is true, convert all sequences of spaces; otherwise only leading spaces.
646pub fn unexpand_bytes(
647    data: &[u8],
648    tabs: &TabStops,
649    all: bool,
650    out: &mut impl Write,
651) -> std::io::Result<()> {
652    if data.is_empty() {
653        return Ok(());
654    }
655
656    // Fast path: no spaces or tabs → just copy through
657    if memchr::memchr2(b' ', b'\t', data).is_none() {
658        return out.write_all(data);
659    }
660
661    // For regular tab stops, check passthrough BEFORE expensive backspace scan
662    if let TabStops::Regular(tab_size) = tabs {
663        if all {
664            // -a mode: if no tabs and no consecutive spaces, output = input
665            if memchr::memchr(b'\t', data).is_none() && memchr::memmem::find(data, b"  ").is_none()
666            {
667                return out.write_all(data);
668            }
669        } else {
670            // Default mode: only leading whitespace matters.
671            // If no line starts with space or tab, output = input.
672            if !unexpand_default_needs_processing(data) {
673                return out.write_all(data);
674            }
675        }
676
677        if memchr::memchr(b'\x08', data).is_none() {
678            return unexpand_regular_fast(data, *tab_size, all, out);
679        }
680    }
681
682    // Generic path for tab lists or data with backspaces
683    unexpand_generic(data, tabs, all, out)
684}
685
686/// Check if default-mode unexpand needs to process any data.
687/// Returns true if any line starts with a space or tab.
688/// Uses memmem for 2-byte pattern search — one SIMD pass per pattern,
689/// much faster than memchr_iter + per-match branching when no matches exist.
690#[inline]
691fn unexpand_default_needs_processing(data: &[u8]) -> bool {
692    if data[0] == b' ' || data[0] == b'\t' {
693        return true;
694    }
695    memchr::memmem::find(data, b"\n ").is_some() || memchr::memmem::find(data, b"\n\t").is_some()
696}
697
698/// Emit a run of blanks as the optimal combination of tabs and spaces.
699/// Matches GNU unexpand behavior: a single blank at a tab stop is only converted
700/// to a tab if more blanks follow, otherwise it stays as a space.
701#[inline]
702fn emit_blanks(
703    out: &mut impl Write,
704    start_col: usize,
705    count: usize,
706    tab_size: usize,
707) -> std::io::Result<()> {
708    if count == 0 {
709        return Ok(());
710    }
711    let end_col = start_col + count;
712    let mut col = start_col;
713
714    // Emit tabs for each tab stop we can reach
715    loop {
716        let next_tab = col + (tab_size - col % tab_size);
717        if next_tab > end_col {
718            break;
719        }
720        let blanks_consumed = next_tab - col;
721        if blanks_consumed >= 2 || next_tab < end_col {
722            // 2+ blanks to tab stop, OR 1 blank but more follow → emit tab
723            out.write_all(b"\t")?;
724            col = next_tab;
725        } else {
726            // 1 blank at tab stop with nothing after → keep as space
727            break;
728        }
729    }
730
731    // Emit remaining spaces
732    let remaining = end_col - col;
733    if remaining > 0 {
734        let mut r = remaining;
735        while r > 0 {
736            let chunk = r.min(SPACES.len());
737            out.write_all(&SPACES[..chunk])?;
738            r -= chunk;
739        }
740    }
741    Ok(())
742}
743
744/// Fast unexpand for regular tab stops without backspaces.
745/// Uses memchr SIMD scanning to skip non-special bytes in bulk.
746/// Passthrough checks are done by the caller (unexpand_bytes).
747fn unexpand_regular_fast(
748    data: &[u8],
749    tab_size: usize,
750    all: bool,
751    out: &mut impl Write,
752) -> std::io::Result<()> {
753    if all {
754        return unexpand_regular_fast_all(data, tab_size, out);
755    }
756
757    let mut column: usize = 0;
758    let mut pos: usize = 0;
759    let mut in_initial = true;
760
761    while pos < data.len() {
762        if in_initial {
763            // Check for blanks to convert
764            if data[pos] == b' ' || data[pos] == b'\t' {
765                // Count consecutive blanks, tracking column advancement
766                let blank_start_col = column;
767                while pos < data.len() && (data[pos] == b' ' || data[pos] == b'\t') {
768                    if data[pos] == b'\t' {
769                        column += tab_size - column % tab_size;
770                    } else {
771                        column += 1;
772                    }
773                    pos += 1;
774                }
775                // Emit blanks as optimal tabs+spaces
776                emit_blanks(out, blank_start_col, column - blank_start_col, tab_size)?;
777                continue;
778            }
779            if data[pos] == b'\n' {
780                out.write_all(b"\n")?;
781                column = 0;
782                in_initial = true;
783                pos += 1;
784                continue;
785            }
786            // Non-blank: fall through to body mode below.
787        }
788
789        // Body of line: bulk copy until newline (default mode)
790        match memchr::memchr(b'\n', &data[pos..]) {
791            Some(offset) => {
792                out.write_all(&data[pos..pos + offset + 1])?;
793                column = 0;
794                in_initial = true;
795                pos += offset + 1;
796            }
797            None => {
798                out.write_all(&data[pos..])?;
799                return Ok(());
800            }
801        }
802    }
803
804    Ok(())
805}
806
807/// Fast unexpand -a for regular tab stops without backspaces.
808/// Per-line processing with SIMD scanning for blank runs.
809/// Passthrough checks are done by the caller (unexpand_bytes).
810fn unexpand_regular_fast_all(
811    data: &[u8],
812    tab_size: usize,
813    out: &mut impl Write,
814) -> std::io::Result<()> {
815    let mut output: Vec<u8> = Vec::with_capacity(data.len());
816    let mut pos: usize = 0;
817
818    for nl_pos in memchr::memchr_iter(b'\n', data) {
819        let line = &data[pos..nl_pos];
820        // Per-line fast check: no tabs and no double-spaces → copy through
821        if memchr::memchr(b'\t', line).is_none() && memchr::memmem::find(line, b"  ").is_none() {
822            output.extend_from_slice(line);
823        } else {
824            unexpand_line_all_fast(line, tab_size, &mut output);
825        }
826        output.push(b'\n');
827
828        if output.len() >= 1024 * 1024 {
829            out.write_all(&output)?;
830            output.clear();
831        }
832        pos = nl_pos + 1;
833    }
834
835    // Handle final line without trailing newline
836    if pos < data.len() {
837        let line = &data[pos..];
838        if memchr::memchr(b'\t', line).is_none() && memchr::memmem::find(line, b"  ").is_none() {
839            output.extend_from_slice(line);
840        } else {
841            unexpand_line_all_fast(line, tab_size, &mut output);
842        }
843    }
844
845    if !output.is_empty() {
846        out.write_all(&output)?;
847    }
848    Ok(())
849}
850
851/// Process a single line for unexpand -a with SIMD-accelerated blank detection.
852/// Uses memchr2 to skip non-blank bytes in bulk, only processing blank runs.
853/// Single spaces (not followed by another blank) are included in the copy path.
854#[inline]
855fn unexpand_line_all_fast(line: &[u8], tab_size: usize, output: &mut Vec<u8>) {
856    let mut column: usize = 0;
857    let mut pos: usize = 0;
858
859    loop {
860        // Find next tab or convertible blank run using SIMD scan.
861        // Skip single spaces (most common) by continuing past them.
862        let blank_pos = {
863            let mut search = pos;
864            loop {
865                match memchr::memchr2(b' ', b'\t', &line[search..]) {
866                    Some(off) => {
867                        let abs = search + off;
868                        if line[abs] == b'\t' {
869                            break Some(abs);
870                        }
871                        // Space: check if followed by another blank
872                        if abs + 1 < line.len() && (line[abs + 1] == b' ' || line[abs + 1] == b'\t')
873                        {
874                            break Some(abs);
875                        }
876                        // Single space: skip and keep searching
877                        search = abs + 1;
878                    }
879                    None => break None,
880                }
881            }
882        };
883
884        match blank_pos {
885            Some(bp) => {
886                // Copy non-blank prefix (including single spaces)
887                if bp > pos {
888                    output.extend_from_slice(&line[pos..bp]);
889                    column += bp - pos;
890                }
891
892                // Process blank run
893                let blank_start_col = column;
894                let blank_start = bp;
895                pos = bp;
896                // Fast count: check if it's all spaces (common case)
897                let mut has_tab = false;
898                while pos < line.len() && (line[pos] == b' ' || line[pos] == b'\t') {
899                    if line[pos] == b'\t' {
900                        has_tab = true;
901                        column += tab_size - column % tab_size;
902                    } else {
903                        column += 1;
904                    }
905                    pos += 1;
906                }
907                if has_tab {
908                    emit_blank_run_vec(output, &line[blank_start..pos], blank_start_col, tab_size);
909                } else {
910                    // Spaces only: use arithmetic fast path
911                    let more_follow = pos < line.len();
912                    emit_spaces_only_vec(
913                        output,
914                        blank_start_col,
915                        pos - blank_start,
916                        tab_size,
917                        more_follow,
918                    );
919                }
920            }
921            None => {
922                // Rest of line has no convertible blanks
923                if pos < line.len() {
924                    output.extend_from_slice(&line[pos..]);
925                }
926                break;
927            }
928        }
929    }
930}
931
932/// Fast path for space-only blank runs: compute tabs+spaces arithmetically.
933/// Avoids per-byte iteration for the common case.
934#[inline(always)]
935fn emit_spaces_only_vec(
936    output: &mut Vec<u8>,
937    start_col: usize,
938    count: usize,
939    tab_size: usize,
940    more_follow: bool,
941) {
942    if count == 0 {
943        return;
944    }
945    let end_col = start_col + count;
946    let mut col = start_col;
947
948    // Emit tabs for each tab stop boundary we cross
949    loop {
950        let next_tab = col + (tab_size - col % tab_size);
951        if next_tab > end_col {
952            break;
953        }
954        let blanks_to_tab = next_tab - col;
955        if blanks_to_tab >= 2 || next_tab < end_col || more_follow {
956            output.push(b'\t');
957            col = next_tab;
958        } else {
959            // Single space at tab stop, nothing follows → keep as space
960            break;
961        }
962    }
963
964    // Remaining spaces
965    let remaining = end_col - col;
966    if remaining > 0 {
967        let len = output.len();
968        output.resize(len + remaining, b' ');
969    }
970}
971
972/// Emit a blank run character-by-character, matching GNU unexpand behavior:
973/// - Spaces are accumulated and converted to tabs at tab stops (but a single
974///   space at a tab stop stays as space unless more blanks follow).
975/// - Input tabs are always emitted as tabs. Pending spaces before a tab are
976///   merged into the tab's column range and emitted as optimal tabs.
977#[inline(always)]
978fn emit_blank_run_vec(output: &mut Vec<u8>, blanks: &[u8], start_col: usize, tab_size: usize) {
979    // Fast path: all spaces, no tabs
980    if memchr::memchr(b'\t', blanks).is_none() {
981        emit_spaces_only_vec(output, start_col, blanks.len(), tab_size, false);
982        return;
983    }
984    let mut col = start_col;
985    let mut pending_spaces: usize = 0;
986    let mut pending_start_col = start_col;
987
988    for (idx, &b) in blanks.iter().enumerate() {
989        if b == b'\t' {
990            let new_col = col + (tab_size - col % tab_size);
991
992            if pending_spaces > 0 {
993                // Merge pending spaces with this tab: emit tabs for all tab stops
994                // in the range [pending_start_col .. new_col]
995                let mut tc = pending_start_col;
996                loop {
997                    let next_ts = tc + (tab_size - tc % tab_size);
998                    if next_ts > new_col {
999                        break;
1000                    }
1001                    output.push(b'\t');
1002                    tc = next_ts;
1003                }
1004                // Any remaining gap (shouldn't happen for well-formed tab runs)
1005                let gap = new_col - tc;
1006                if gap > 0 {
1007                    let len = output.len();
1008                    output.resize(len + gap, b' ');
1009                }
1010                pending_spaces = 0;
1011            } else {
1012                // No pending spaces: emit the input tab directly
1013                output.push(b'\t');
1014            }
1015
1016            col = new_col;
1017            pending_start_col = col;
1018        } else {
1019            // Space
1020            if pending_spaces == 0 {
1021                pending_start_col = col;
1022            }
1023            pending_spaces += 1;
1024            col += 1;
1025            // Check if we've reached a tab stop
1026            if col.is_multiple_of(tab_size) {
1027                let more_follow = idx + 1 < blanks.len();
1028                if pending_spaces >= 2 || more_follow {
1029                    output.push(b'\t');
1030                    pending_spaces = 0;
1031                    pending_start_col = col;
1032                }
1033                // else: single space at tab stop with nothing after → keep as space
1034            }
1035        }
1036    }
1037
1038    // Flush remaining pending spaces as literal spaces
1039    if pending_spaces > 0 {
1040        let len = output.len();
1041        output.resize(len + pending_spaces, b' ');
1042    }
1043}
1044
1045/// Generic unexpand with support for tab lists and backspaces.
1046fn unexpand_generic(
1047    data: &[u8],
1048    tabs: &TabStops,
1049    all: bool,
1050    out: &mut impl Write,
1051) -> std::io::Result<()> {
1052    let tab_size = match tabs {
1053        TabStops::Regular(n) => *n,
1054        TabStops::List(_) => 0, // handled by is_tab_stop/next_tab_stop
1055    };
1056    let mut column: usize = 0;
1057    let mut space_start_col: Option<usize> = None;
1058    let mut in_initial = true;
1059
1060    for &byte in data {
1061        match byte {
1062            b' ' => {
1063                if !all && !in_initial {
1064                    out.write_all(b" ")?;
1065                    column += 1;
1066                } else {
1067                    if space_start_col.is_none() {
1068                        space_start_col = Some(column);
1069                    }
1070                    column += 1;
1071                    // Don't convert to tab here — wait for end of blank run
1072                }
1073            }
1074            b'\t' => {
1075                if !all && !in_initial {
1076                    // In non-converting mode, just emit the tab
1077                    if let Some(start_col) = space_start_col.take() {
1078                        let n = column - start_col;
1079                        out.write_all(&SPACES[..n.min(SPACES.len())])?;
1080                    }
1081                    out.write_all(b"\t")?;
1082                    column = tabs.next_tab_stop(column);
1083                } else {
1084                    if space_start_col.is_none() {
1085                        space_start_col = Some(column);
1086                    }
1087                    column = tabs.next_tab_stop(column);
1088                }
1089            }
1090            _ => {
1091                // Flush pending blanks
1092                if let Some(start_col) = space_start_col.take() {
1093                    let count = column - start_col;
1094                    if tab_size > 0 {
1095                        emit_blanks(out, start_col, count, tab_size)?;
1096                    } else {
1097                        // Tab list: use is_tab_stop for conversion
1098                        emit_blanks_tablist(out, start_col, count, tabs)?;
1099                    }
1100                }
1101
1102                if byte == b'\n' {
1103                    out.write_all(b"\n")?;
1104                    column = 0;
1105                    in_initial = true;
1106                } else if byte == b'\x08' {
1107                    out.write_all(b"\x08")?;
1108                    if column > 0 {
1109                        column -= 1;
1110                    }
1111                } else {
1112                    if in_initial {
1113                        in_initial = false;
1114                    }
1115                    out.write_all(&[byte])?;
1116                    column += 1;
1117                }
1118            }
1119        }
1120    }
1121
1122    if let Some(start_col) = space_start_col {
1123        let count = column - start_col;
1124        if tab_size > 0 {
1125            emit_blanks(out, start_col, count, tab_size)?;
1126        } else {
1127            emit_blanks_tablist(out, start_col, count, tabs)?;
1128        }
1129    }
1130
1131    Ok(())
1132}
1133
1134/// Emit blanks using a tab list (non-regular tab stops).
1135/// After the last defined tab stop, only spaces are emitted (no more tabs).
1136fn emit_blanks_tablist(
1137    out: &mut impl Write,
1138    start_col: usize,
1139    count: usize,
1140    tabs: &TabStops,
1141) -> std::io::Result<()> {
1142    if count == 0 {
1143        return Ok(());
1144    }
1145    let end_col = start_col + count;
1146    let mut col = start_col;
1147
1148    // Get the last defined tab stop to know when to stop converting to tabs
1149    let last_stop = match tabs {
1150        TabStops::List(stops) => stops.last().copied().unwrap_or(0),
1151        TabStops::Regular(_) => usize::MAX,
1152    };
1153
1154    while col < last_stop {
1155        let next_tab = tabs.next_tab_stop(col);
1156        if next_tab > end_col || next_tab > last_stop {
1157            break;
1158        }
1159        let blanks_consumed = next_tab - col;
1160        if blanks_consumed >= 2 || next_tab < end_col {
1161            out.write_all(b"\t")?;
1162            col = next_tab;
1163        } else {
1164            break;
1165        }
1166    }
1167
1168    let remaining = end_col - col;
1169    if remaining > 0 {
1170        let mut r = remaining;
1171        while r > 0 {
1172            let chunk = r.min(SPACES.len());
1173            out.write_all(&SPACES[..chunk])?;
1174            r -= chunk;
1175        }
1176    }
1177    Ok(())
1178}